归并排序：如果其中一个数字是 2 个字节，如何排序

发布于 2024-09-27 17:10:53 字数 2300 浏览 7 评论 0原文

我正在尝试使用 void* 进行合并排序。如果我保留数字，我想将其排序为 1 字节，那么它工作得很好。但是，如果数字超过 1 个字节，则效果不佳。我相信它把它当作2个数字。我用数字 500 进行了测试，最后我有 256 和 244 未排序....

#include "msort.h"
#include <iostream>

using namespace std;

void Mergesort( void* base, size_t nelem, size_t width,
                    int (*fcmp)( const void*, const void* ) )
{
    if (nelem <=1)
        return;

    int lhsnelem = nelem/2; 
    int rhsnelem = nelem - lhsnelem;
    char* midpoint = (char*)(base) + ((nelem/2)*width);

    cout << "width is: " << width << endl;

    Mergesort(base, lhsnelem, width, fcmp);
    Mergesort(midpoint, rhsnelem, width, fcmp);

    //ItemType* temp= new ItemType[nelem];
    int temp[20];
    char* lhs = (char*)(base);
    char* rhs = midpoint;
    char* end = rhs + (rhsnelem*width);        
    int count = 0;

    while ((lhs != midpoint) && (rhs != end))
    {
         int num = fcmp( lhs, rhs ); 
         if (num <= 0)
         {
             temp[count] = *lhs;
             lhs = lhs + width;
         }
         else
         {
             temp[count] = *rhs; 
             rhs = rhs + width;
         }
         count++; 
    }

    if (lhs == midpoint)
    {
        while (rhs != end)
        {
            temp[count] = *rhs;
            rhs = rhs + width;
            count++;
        }
    }
    else
    {

        while (lhs != midpoint) 
        {
            temp[count] = *lhs; 
            lhs = lhs + width;
            count++;
        }

    }

    for (int i=0; i<nelem; i++)
    {
        lhs = (char*)(base)+ (width*i);
        *lhs = temp[i];
        lhs = lhs + width;
    }
}

main.cpp

/////// main.cpp ///////////////////////
// here is my comparison function in main.cpp

int compare(const void *one, const void *two)
{
    if (*(int*)one > *(int*)two) return 1;
    if (*(int*)one < *(int*)two) return -1;
    return 0;
}

1 字节数字的输出正常：

In:    5 8 7 4 1 6 11 41 160 47 38 120 40 58 12 43 66 98 17 140
Out:   1 4 5 6 7 8 11 12 17 38 40 41 43 47 58 66 98 120 140 160

如果有 2 字节数字则输出不正常，排序不正确效果不好：

In:    500 50 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Out:   256 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 50 244

原文

I am trying to do merge sort using void*. If I keep numbers I want to sort to 1-byte it works perfectly. However if the number is located in more than 1 byte it doesn't work well. I believe it takes it as 2 numbers. I tested it with number 500 and at the end I have 256 and 244 not sorted ....

#include "msort.h"
#include <iostream>

using namespace std;

void Mergesort( void* base, size_t nelem, size_t width,
                    int (*fcmp)( const void*, const void* ) )
{
    if (nelem <=1)
        return;

    int lhsnelem = nelem/2; 
    int rhsnelem = nelem - lhsnelem;
    char* midpoint = (char*)(base) + ((nelem/2)*width);

    cout << "width is: " << width << endl;

    Mergesort(base, lhsnelem, width, fcmp);
    Mergesort(midpoint, rhsnelem, width, fcmp);

    //ItemType* temp= new ItemType[nelem];
    int temp[20];
    char* lhs = (char*)(base);
    char* rhs = midpoint;
    char* end = rhs + (rhsnelem*width);        
    int count = 0;

    while ((lhs != midpoint) && (rhs != end))
    {
         int num = fcmp( lhs, rhs ); 
         if (num <= 0)
         {
             temp[count] = *lhs;
             lhs = lhs + width;
         }
         else
         {
             temp[count] = *rhs; 
             rhs = rhs + width;
         }
         count++; 
    }

    if (lhs == midpoint)
    {
        while (rhs != end)
        {
            temp[count] = *rhs;
            rhs = rhs + width;
            count++;
        }
    }
    else
    {

        while (lhs != midpoint) 
        {
            temp[count] = *lhs; 
            lhs = lhs + width;
            count++;
        }

    }

    for (int i=0; i<nelem; i++)
    {
        lhs = (char*)(base)+ (width*i);
        *lhs = temp[i];
        lhs = lhs + width;
    }
}

main.cpp

/////// main.cpp ///////////////////////
// here is my comparison function in main.cpp

int compare(const void *one, const void *two)
{
    if (*(int*)one > *(int*)two) return 1;
    if (*(int*)one < *(int*)two) return -1;
    return 0;
}

The output for 1-byte numbers is OK:

In:    5 8 7 4 1 6 11 41 160 47 38 120 40 58 12 43 66 98 17 140
Out:   1 4 5 6 7 8 11 12 17 38 40 41 43 47 58 66 98 120 140 160

The output if there is a 2-byte number is not OK the sorting doesn't work well:

In:    500 50 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Out:   256 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 50 244

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

○愚か者の日 2024-10-04 17:10:54

您面临的直接问题是您正在使用 char *lhs 来复制整数。在进行复制之前，您需要将指针转换回 int *。

#include <iostream>

using namespace std;

static void Mergesort(void *base, size_t nelem, size_t width,
                      int (*fcmp)(const void *, const void *))
{
    if (nelem <= 1)
        return;

    int lhsnelem = nelem/2; 
    int rhsnelem = nelem - lhsnelem;
    char* midpoint = (char*)(base) + ((nelem/2)*width);

    Mergesort(base, lhsnelem, width, fcmp);
    Mergesort(midpoint, rhsnelem, width, fcmp);

    int temp[20];
    char* lhs = (char*)(base);
    char* rhs = midpoint;
    char* end = rhs + (rhsnelem*width);        
    int count = 0;

    while ((lhs != midpoint) && (rhs != end))
    {
        int num = fcmp(lhs, rhs); 
        if (num <= 0)
        {
            temp[count] = *(int *)lhs;          // Here!
            lhs += width;
        }
        else
        {
            temp[count] = *(int *)rhs;          // Here!
            rhs += width;
        }
        count++; 
    }


    while (rhs != end)
    {
        temp[count] = *(int *)rhs;          // Here!
        rhs = rhs + width;
        count++;
    }
    while (lhs != midpoint) 
    {
        temp[count] = *(int *)lhs;          // Here!
        lhs = lhs + width;
        count++;
    }

    for (int i = 0; i < nelem; i++)
    {
        lhs = (char *)(base)+ (width*i);
        *(int *)lhs = temp[i];                  // Here!
        lhs = lhs + width;
    }
}

测试代码

static int compare(const void *one, const void *two)
{
    if (*(int*)one > *(int*)two) return 1;
    if (*(int*)one < *(int*)two) return -1;
    return 0;
}

#define DIM(x) (sizeof(x)/sizeof(*(x)))

int array1[] = { 5, 8, 7, 4, 1, 6, 11, 41, 160, 47, 38, 120,
                 40, 58, 12, 43, 66, 98, 17, 140 };
int array2[] = { 500, 50, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0 };

static void PrintArray(int *array, size_t n_items)
{
    const char *pad = "";
    for (size_t i = 0; i < n_items; i++)
    {
        cout << pad << array[i];
        pad = ", ";
    }
    cout << endl;
}

int main()
{
    PrintArray(array1, DIM(array1));
    Mergesort(array1, DIM(array1), sizeof(array1[0]), compare);
    PrintArray(array1, DIM(array1));

    PrintArray(array2, DIM(array2));
    Mergesort(array2, DIM(array2), sizeof(array2[0]), compare);
    PrintArray(array2, DIM(array2));

    return 0;
}

你很不幸使用的是little-endian（Intel）机器；如果您使用的是大端机器（SPARC、PPC），您将获得小数量测试用例的零数组。

还有一个更严重、更深层的问题。该代码实际上只能用于对最多 20 个整数的数组进行排序，因为 int temp[20]; （它将大小限制为 20，并将类型限制为 int）并且由于“固定”分配。

分配应替换为内存移动（可能调用 memmove()），这又意味着代码仅适用于 POD（普通数据）类型。 temp 数组需要分配为适当大小 (nelem * width) 的字节数组。而且内存分配很糟糕；它会减慢代码速度。 “也许是个好消息”是，将临时数组复制到原始数组上的最终循环可以用 memmove() 替换。

目前尚不清楚这是不是好的 C++ 代码 - 我认为模板化的实现会更明智。作为C代码，解决了内存移动问题就可以了。

通用代码

static void Mergesort(void *base, size_t nelem, size_t width,
                      int (*fcmp)(const void *, const void *))
{
    if (nelem <= 1)
        return;

    int lhsnelem = nelem/2; 
    int rhsnelem = nelem - lhsnelem;
    char* midpoint = (char*)(base) + ((nelem/2)*width);

    Mergesort(base, lhsnelem, width, fcmp);
    Mergesort(midpoint, rhsnelem, width, fcmp);

    char *temp = new char[nelem * width];
    char* lhs = (char*)(base);
    char* rhs = midpoint;
    char* end = rhs + (rhsnelem*width);        
    int count = 0;

    while ((lhs != midpoint) && (rhs != end))
    {
        int num = fcmp(lhs, rhs); 
        if (num <= 0)
        {
            memmove(&temp[count], lhs, width);
            lhs += width;
        }
        else
        {
            memmove(&temp[count], rhs, width);
            rhs += width;
        }
        count += width;
    }

    while (rhs != end)
    {
        memmove(&temp[count], rhs, width);
        rhs += width;
        count += width;
    }
    while (lhs != midpoint) 
    {
        memmove(&temp[count], lhs, width);
        lhs += width;
        count += width;
    }

    memmove(base, temp, nelem * width);
    delete[] temp;
}

Your immediate problem is that you are using char *lhs to copy the integers around. You need to convert the pointer back to an int * before doing the copying.

#include <iostream>

using namespace std;

static void Mergesort(void *base, size_t nelem, size_t width,
                      int (*fcmp)(const void *, const void *))
{
    if (nelem <= 1)
        return;

    int lhsnelem = nelem/2; 
    int rhsnelem = nelem - lhsnelem;
    char* midpoint = (char*)(base) + ((nelem/2)*width);

    Mergesort(base, lhsnelem, width, fcmp);
    Mergesort(midpoint, rhsnelem, width, fcmp);

    int temp[20];
    char* lhs = (char*)(base);
    char* rhs = midpoint;
    char* end = rhs + (rhsnelem*width);        
    int count = 0;

    while ((lhs != midpoint) && (rhs != end))
    {
        int num = fcmp(lhs, rhs); 
        if (num <= 0)
        {
            temp[count] = *(int *)lhs;          // Here!
            lhs += width;
        }
        else
        {
            temp[count] = *(int *)rhs;          // Here!
            rhs += width;
        }
        count++; 
    }


    while (rhs != end)
    {
        temp[count] = *(int *)rhs;          // Here!
        rhs = rhs + width;
        count++;
    }
    while (lhs != midpoint) 
    {
        temp[count] = *(int *)lhs;          // Here!
        lhs = lhs + width;
        count++;
    }

    for (int i = 0; i < nelem; i++)
    {
        lhs = (char *)(base)+ (width*i);
        *(int *)lhs = temp[i];                  // Here!
        lhs = lhs + width;
    }
}

Test code

static int compare(const void *one, const void *two)
{
    if (*(int*)one > *(int*)two) return 1;
    if (*(int*)one < *(int*)two) return -1;
    return 0;
}

#define DIM(x) (sizeof(x)/sizeof(*(x)))

int array1[] = { 5, 8, 7, 4, 1, 6, 11, 41, 160, 47, 38, 120,
                 40, 58, 12, 43, 66, 98, 17, 140 };
int array2[] = { 500, 50, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0 };

static void PrintArray(int *array, size_t n_items)
{
    const char *pad = "";
    for (size_t i = 0; i < n_items; i++)
    {
        cout << pad << array[i];
        pad = ", ";
    }
    cout << endl;
}

int main()
{
    PrintArray(array1, DIM(array1));
    Mergesort(array1, DIM(array1), sizeof(array1[0]), compare);
    PrintArray(array1, DIM(array1));

    PrintArray(array2, DIM(array2));
    Mergesort(array2, DIM(array2), sizeof(array2[0]), compare);
    PrintArray(array2, DIM(array2));

    return 0;
}

You are unlucky to be using a little-endian (Intel) machine; had you been using a big-endian machine (SPARC, PPC), you would have gotten an array of zeroes for the small number test case.

There is a more serious, more deep-seated problem too. The code is actually only usable to sort arrays of up to 20 integers, because of the int temp[20]; (which limits the size to 20 and which restricts the type to int) and because of the 'fixed' assignments.

The assignments should be replaced with memory moves (probably calls to memmove()) which in turn means that the code is only going to work for POD (plain ol' data) types. The temp array would need to be allocated as an array of bytes of the appropriate size (nelem * width). And the memory allocation is nasty; it will slow the code down. The 'maybe good news' is that the final loop which copies the temp array over the original array can be replaced by a memmove().

It is not clear that this is good C++ code - I think a templated implementation would be more sensible. As C code, with the memory move issues sorted out, it is OK.

General code

static void Mergesort(void *base, size_t nelem, size_t width,
                      int (*fcmp)(const void *, const void *))
{
    if (nelem <= 1)
        return;

    int lhsnelem = nelem/2; 
    int rhsnelem = nelem - lhsnelem;
    char* midpoint = (char*)(base) + ((nelem/2)*width);

    Mergesort(base, lhsnelem, width, fcmp);
    Mergesort(midpoint, rhsnelem, width, fcmp);

    char *temp = new char[nelem * width];
    char* lhs = (char*)(base);
    char* rhs = midpoint;
    char* end = rhs + (rhsnelem*width);        
    int count = 0;

    while ((lhs != midpoint) && (rhs != end))
    {
        int num = fcmp(lhs, rhs); 
        if (num <= 0)
        {
            memmove(&temp[count], lhs, width);
            lhs += width;
        }
        else
        {
            memmove(&temp[count], rhs, width);
            rhs += width;
        }
        count += width;
    }

    while (rhs != end)
    {
        memmove(&temp[count], rhs, width);
        rhs += width;
        count += width;
    }
    while (lhs != midpoint) 
    {
        memmove(&temp[count], lhs, width);
        lhs += width;
        count += width;
    }

    memmove(base, temp, nelem * width);
    delete[] temp;
}

回复收藏 0 原文

~没有更多了~