openmp中的希尔排序

发布于 2024-11-05 11:41:33 字数 858 浏览 1 评论 0原文

有谁熟悉 openmp，我没有得到排序列表。我做错了什么。我在最后使用了 Critical，因此在排序后只有一个线程可以访问该部分。我想我的私人价值观是不正确的。他们是否应该在那里，或者我最好只使用#pragma omp for。

void shellsort(int a[])
{
    int i, j, k, m, temp;

    omp_set_num_threads(10);
    for(m = 2; m > 0; m = m/2)
    {
            #pragma omp parallel for private (j, m)
            for(j = m; j < 100; j++)
            {
                    #pragma omp critical
                    for(i = j-m; i >= 0; i = i-m)
                    {
                            if(a[i+m] >= a[i])
                                break;
                            else
                            {
                                temp = a[i];
                                a[i] = a[i+m];
                                a[i+m] = temp;
                            }

                    }
            }
    }
}

原文

Is anyone familiar with openmp, I don't get a sorted list. what am I doing wrong. I am using critical at the end so only one thread can access that section when it's been sorted. I guess my private values are not correct. Should they even be there or am I better off with just #pragma omp for.

void shellsort(int a[])
{
    int i, j, k, m, temp;

    omp_set_num_threads(10);
    for(m = 2; m > 0; m = m/2)
    {
            #pragma omp parallel for private (j, m)
            for(j = m; j < 100; j++)
            {
                    #pragma omp critical
                    for(i = j-m; i >= 0; i = i-m)
                    {
                            if(a[i+m] >= a[i])
                                break;
                            else
                            {
                                temp = a[i];
                                a[i] = a[i+m];
                                a[i+m] = temp;
                            }

                    }
            }
    }
}

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

欢烬 2024-11-12 11:41:33

所以这里有很多问题。

因此，首先，正如已经指出的，i 和 j（以及 temp）需要是私有的； m 和需要共享。使用 openmp 的一个有用的方法是使用 default(none)，这样您就必须思考并行部分中使用的每个变量的作用以及它需要是什么。所以这

   #pragma omp parallel for private (i,j,temp) shared(a,m) default(none)

是一个好的开始。尤其将 m 设为私有有点灾难，因为这意味着 m 在并行区域内未定义。顺便说一下，循环应该以 m = n/2 开始，而不是 m=2。

此外，对于希尔排序来说，您不需要临界区——或者说您不应该这样做。我们稍后会看到，问题并不在于多个线程处理相同的元素。因此，如果你摆脱这些东西，你最终会得到一些几乎有效的东西，但并不总是有效。这给我们带来了更根本的问题。

shell 排序的工作方式基本上是，将数组分成许多（这里，< code>m) 子数组，并对它们进行插入排序（对于小数组来说非常快），然后重新组装；然后继续将它们分成越来越少的子数组和插入排序（非常快，因为它们是部分排序的）。对这些许多子数组进行排序是可以并行完成的事情。（实际上，内存争用将是这种简单方法的一个问题，但仍然如此）。

现在，您获得的代码以串行方式执行此操作，但如果您只是将 j 循环包装在 omp parallel for 中，则不能指望它能够工作。原因是 j 循环的每次迭代都会执行其中一个插入排序的一个步骤。第 j+m 个循环迭代执行下一步。但不能保证它们是由同一个线程或按顺序完成的！如果另一个线程在第一个线程执行第 j 次迭代之前已经完成了第 j+m 次迭代，则插入排序就会混乱并且排序失败。

因此，实现这项工作的方法是重写希尔排序以使并行性更加明确 - 不要将插入排序分解为一堆串行步骤。

#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>

void insertionsort(int a[], int n, int stride) {
    for (int j=stride; j<n; j+=stride) {
        int key = a[j];
        int i = j - stride;
        while (i >= 0 && a[i] > key) {
            a[i+stride] = a[i];
            i-=stride;
        }
        a[i+stride] = key;
    }
}

void shellsort(int a[], int n)
{
    int i, m;

    for(m = n/2; m > 0; m /= 2)
    {
            #pragma omp parallel for shared(a,m,n) private (i) default(none)
            for(i = 0; i < m; i++)
                insertionsort(&(a[i]), n-i, m);
    }
}

void printlist(char *s, int a[], int n) {
    printf("%s\n",s);
    for (int i=0; i<n; i++) {
        printf("%d ", a[i]);
    }
    printf("\n");
}

int checklist(int a[], int n) {
    int result = 0;
    for (int i=0; i<n; i++) {
        if (a[i] != i) {
            result++;
        }
    }
    return result;
}

void seedprng() {
    struct timeval t;

    /* seed prng */
    gettimeofday(&t, NULL);
    srand((unsigned int)(1000000*(t.tv_sec)+t.tv_usec));
}

int main(int argc, char **argv) {
    const int n=100;
    int *data;
    int missorted;

    data = (int *)malloc(n*sizeof(int));
    for (int i=0; i<n; i++)
        data[i] = i;

    seedprng();
    /* shuffle */ 
    for (int i=0; i<n; i++) {
        int i1 = rand() % n;
        int i2 = rand() % n;
        int tmp = data[i1];
        data[i1] = data[i2];
        data[i2] = tmp;
    }

    printlist("Unsorted List:",data,n);

    shellsort2(data,n);

    printlist("Sorted List:",data,n);
    missorted = checklist(data,n);
    if (missorted != 0) printf("%d missorted nubmers\n",missorted);

    return 0;
}

So there's a number of issues here.

So first, as has been pointed out, i and j (and temp) need to be private; m and a need to be shared. A useful thing to do with openmp is to use default(none), that way you are forced to think through what each variable you use in the parallel section does, and what it needs to be. So this

   #pragma omp parallel for private (i,j,temp) shared(a,m) default(none)

is a good start. Making m private in particular is a bit of a disaster, because it means that m is undefined inside the parallel region. The loop, by the way, should start with m = n/2, not m=2.

In addition, you don't need the critical region -- or you shouldn't, for a shell sort. The issue, we'll see in a second, is not so much multiple threads working on the same elements. So if you get rid of those things, you end up with something that almost works, but not always. And that brings us to the more fundamental problem.

The way a shell sort works is, basically, you break the array up into many (here, m) subarrays, and insertion-sort them (very fast for small arrays), and then reassemble; then continue by breaking them up into fewer and fewer subarrays and insertion sort (very fast, because they're partly sorted). Sorting those many subarrays is somethign that can be done in parallel. (In practice, memory contention will be a problem with this simple approach, but still).

Now, the code you've got does that in serial, but it can't be counted on to work if you just wrap the j loop in an omp parallel for. The reason is that each iteration through the j loop does one step of one of the insertion sorts. The j+m'th loop iteration does the next step. But there's no guarantee that they're done by the same thread, or in order! If another thread has already done the j+m'th iteration before the first does the j'th, then the insertion sort is messed up and the sort fails.

So the way to make this work is to rewrite the shell sort to make the parallelism more explicit - to not break up the insertion sort into a bunch of serial steps.

#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>

void insertionsort(int a[], int n, int stride) {
    for (int j=stride; j<n; j+=stride) {
        int key = a[j];
        int i = j - stride;
        while (i >= 0 && a[i] > key) {
            a[i+stride] = a[i];
            i-=stride;
        }
        a[i+stride] = key;
    }
}

void shellsort(int a[], int n)
{
    int i, m;

    for(m = n/2; m > 0; m /= 2)
    {
            #pragma omp parallel for shared(a,m,n) private (i) default(none)
            for(i = 0; i < m; i++)
                insertionsort(&(a[i]), n-i, m);
    }
}

void printlist(char *s, int a[], int n) {
    printf("%s\n",s);
    for (int i=0; i<n; i++) {
        printf("%d ", a[i]);
    }
    printf("\n");
}

int checklist(int a[], int n) {
    int result = 0;
    for (int i=0; i<n; i++) {
        if (a[i] != i) {
            result++;
        }
    }
    return result;
}

void seedprng() {
    struct timeval t;

    /* seed prng */
    gettimeofday(&t, NULL);
    srand((unsigned int)(1000000*(t.tv_sec)+t.tv_usec));
}

int main(int argc, char **argv) {
    const int n=100;
    int *data;
    int missorted;

    data = (int *)malloc(n*sizeof(int));
    for (int i=0; i<n; i++)
        data[i] = i;

    seedprng();
    /* shuffle */ 
    for (int i=0; i<n; i++) {
        int i1 = rand() % n;
        int i2 = rand() % n;
        int tmp = data[i1];
        data[i1] = data[i2];
        data[i2] = tmp;
    }

    printlist("Unsorted List:",data,n);

    shellsort2(data,n);

    printlist("Sorted List:",data,n);
    missorted = checklist(data,n);
    if (missorted != 0) printf("%d missorted nubmers\n",missorted);

    return 0;
}

回复收藏 0 原文