使用 OpenMP 编译 Smallpt 会导致运行时无限循环

发布于 2024-12-01 02:59:29 字数 3931 浏览 2 评论 0原文

我目前正在查看 Keavin Beason 编写的 smallpt 代码。我使用 g++ -O3 -fopenmpsmallpt.cpp 编译了代码,但我遇到了似乎是无限循环或死锁的情况。

仅使用 g++ -O3smallpt.cpp 编译代码会生成在他的页面上看到的图像,但我根本无法让 OpenMP 并行化工作。

作为参考,我正在使用 Cygwin 和 GCC 4.5.0 在 Windows 7 64 位计算机上进行编译。 作者本人表示他运行了完全相同的代码并且没有遇到任何问题,但是当程序完成跟踪图像时,我无法让程序真正退出。

这可能是我的特定编译器和环境的问题,还是我在这里做错了什么?以下是使用 OpenMP 并行化的特定代码片段。我只是用一些小的格式对其进行了修改,以使其更具可读性。


int main(int argc, char *argv[])
{
  int w=1024, h=768, samps = argc==2 ? atoi(argv[1])/4 : 1;

  Ray cam(Vec(50,52,295.6), Vec(0,-0.042612,-1).norm()); // cam pos, dir
  Vec cx=Vec(w*.5135/h);
  Vec cy=(cx%cam.d).norm()*.5135, r, *c=new Vec[w*h];

  #pragma omp parallel for schedule(dynamic, 1) private(r)       // OpenMP
  for (int y=0; y<h; y++)                       // Loop over image rows
  {
    fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samps*4,100.*y/(h-1));
    for (unsigned short x=0, Xi[3]={0,0,y*y*y}; x<w; x++)   // Loop cols
    {
      for (int sy=0, i=(h-y-1)*w+x; sy<2; sy++)     // 2x2 subpixel rows
      {
        for (int sx=0; sx<2; sx++, r=Vec())        // 2x2 subpixel cols
        {
          for (int s=0; s<samps; s++)
          {
            double r1=2*erand48(Xi), dx=r1<1 ? sqrt(r1)-1: 1-sqrt(2-r1);
            double r2=2*erand48(Xi), dy=r2<1 ? sqrt(r2)-1: 1-sqrt(2-r2);
            Vec d = cx*( ( (sx+.5 + dx)/2 + x)/w - .5) +
                    cy*( ( (sy+.5 + dy)/2 + y)/h - .5) + cam.d;
            r = r + radiance(Ray(cam.o+d*140,d.norm()),0,Xi)*(1./samps);
          } // Camera rays are pushed ^^^^^ forward to start in interior
          c[i] = c[i] + Vec(clamp(r.x),clamp(r.y),clamp(r.z))*.25;
        }
      }
    }
  }

  /*  PROBLEM HERE!
      The code never seems to reach here
      PROBLEM HERE!
  */
  FILE *f = fopen("image.ppm", "w");         // Write image to PPM file.
  fprintf(f, "P3\n%d %d\n%d\n", w, h, 255);
  for (int i=0; i<w*h; i++)
    fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z));
}

这是程序运行完成时生成的输出:

$ time ./a
Rendering (4 spp) 100.00%spp)  spp)   00..0026%%

以下是可以重现上述行为的最基本代码

#include <cstdio>
#include <cstdlib>
#include <cmath>

struct Vector
{
  double x, y, z;
  Vector() : x(0), y(0), z(0) {}
};

int toInt(double x)
{
  return (int)(255 * x);
}

double clamp(double x)
{
  if (x < 0) return 0;
  if (x > 1) return 1;
  return x;
}

int main(int argc, char *argv[])
{
  int w = 1024;
  int h = 768;
  int samples = 1;

  Vector r, *c = new Vector[w * h];

  #pragma omp parallel for schedule(dynamic, 1) private(r)
  for (int y = 0; y < h; y++)
  {
    fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samples * 4, 100. * y / (h - 1));
    for (unsigned short x = 0, Xi[3]= {0, 0, y*y*y}; x < w; x++)
    {
      for (int sy = 0, i = (h - y - 1) * w + x; sy < 2; sy++)
      {
        for (int sx = 0; sx < 2; sx++, r = Vector())
        {
          for (int s = 0; s < samples; s++)
          {
            double r1 = 2 * erand48(Xi), dx = r1 < 1 ? sqrt(r1) - 1 : 1 - sqrt(2 - r1);
            double r2 = 2 * erand48(Xi), dy = r2 < 1 ? sqrt(r2) - 1 : 1 - sqrt(2 - r2);
            r.x += r1;
            r.y += r2;
          }

          c[i].x += clamp(r.x) / 4;
          c[i].y += clamp(r.y) / 4;
        }
      }
    }
  }

  FILE *f = fopen("image.ppm", "w");         // Write image to PPM file.
  fprintf(f, "P3\n%d %d\n%d\n", w, h, 255);
  for (int i=0; i<w*h; i++)
    fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z));
}

这是从以下示例获得的输出程序:

$ g++ test.cpp
$ ./a
Rendering (4 spp) 100.00%

$ g++ test.cpp -fopenmp
$ ./a
Rendering (4 spp) 100.00%spp)  spp)   00..0052%%

I'm currently looking at the smallpt code by Keavin Beason. I compiled the code with what it says on the tin using g++ -O3 -fopenmp smallpt.cpp, and I'm running into what seems like either an infinite loop or a deadlock.

Compiling the code using just g++ -O3 smallpt.cpp produces the images seen on his page, but I can't get the OpenMP parallelization to work at all.

For reference, I'm compiling on a Windows 7 64-bit machine using Cygwin with GCC 4.5.0. The author himself has stated he's run the same exact code and has run into no issues whatsoever, but I can't get the program to actually exit when it's done tracing the image.

Could this be an issue with my particular compiler and environment, or am I doing something wrong here? Here's the particular snippet of code that's parallelized using OpenMP. I've only modified it with some minor formatting to make it more readable.


int main(int argc, char *argv[])
{
  int w=1024, h=768, samps = argc==2 ? atoi(argv[1])/4 : 1;

  Ray cam(Vec(50,52,295.6), Vec(0,-0.042612,-1).norm()); // cam pos, dir
  Vec cx=Vec(w*.5135/h);
  Vec cy=(cx%cam.d).norm()*.5135, r, *c=new Vec[w*h];

  #pragma omp parallel for schedule(dynamic, 1) private(r)       // OpenMP
  for (int y=0; y<h; y++)                       // Loop over image rows
  {
    fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samps*4,100.*y/(h-1));
    for (unsigned short x=0, Xi[3]={0,0,y*y*y}; x<w; x++)   // Loop cols
    {
      for (int sy=0, i=(h-y-1)*w+x; sy<2; sy++)     // 2x2 subpixel rows
      {
        for (int sx=0; sx<2; sx++, r=Vec())        // 2x2 subpixel cols
        {
          for (int s=0; s<samps; s++)
          {
            double r1=2*erand48(Xi), dx=r1<1 ? sqrt(r1)-1: 1-sqrt(2-r1);
            double r2=2*erand48(Xi), dy=r2<1 ? sqrt(r2)-1: 1-sqrt(2-r2);
            Vec d = cx*( ( (sx+.5 + dx)/2 + x)/w - .5) +
                    cy*( ( (sy+.5 + dy)/2 + y)/h - .5) + cam.d;
            r = r + radiance(Ray(cam.o+d*140,d.norm()),0,Xi)*(1./samps);
          } // Camera rays are pushed ^^^^^ forward to start in interior
          c[i] = c[i] + Vec(clamp(r.x),clamp(r.y),clamp(r.z))*.25;
        }
      }
    }
  }

  /*  PROBLEM HERE!
      The code never seems to reach here
      PROBLEM HERE!
  */
  FILE *f = fopen("image.ppm", "w");         // Write image to PPM file.
  fprintf(f, "P3\n%d %d\n%d\n", w, h, 255);
  for (int i=0; i<w*h; i++)
    fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z));
}

Here's the output that the program produces, when it runs to completion:

$ time ./a
Rendering (4 spp) 100.00%spp)  spp)   00..0026%%

The following is the most basic code that can reproduce the above behavior

#include <cstdio>
#include <cstdlib>
#include <cmath>

struct Vector
{
  double x, y, z;
  Vector() : x(0), y(0), z(0) {}
};

int toInt(double x)
{
  return (int)(255 * x);
}

double clamp(double x)
{
  if (x < 0) return 0;
  if (x > 1) return 1;
  return x;
}

int main(int argc, char *argv[])
{
  int w = 1024;
  int h = 768;
  int samples = 1;

  Vector r, *c = new Vector[w * h];

  #pragma omp parallel for schedule(dynamic, 1) private(r)
  for (int y = 0; y < h; y++)
  {
    fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samples * 4, 100. * y / (h - 1));
    for (unsigned short x = 0, Xi[3]= {0, 0, y*y*y}; x < w; x++)
    {
      for (int sy = 0, i = (h - y - 1) * w + x; sy < 2; sy++)
      {
        for (int sx = 0; sx < 2; sx++, r = Vector())
        {
          for (int s = 0; s < samples; s++)
          {
            double r1 = 2 * erand48(Xi), dx = r1 < 1 ? sqrt(r1) - 1 : 1 - sqrt(2 - r1);
            double r2 = 2 * erand48(Xi), dy = r2 < 1 ? sqrt(r2) - 1 : 1 - sqrt(2 - r2);
            r.x += r1;
            r.y += r2;
          }

          c[i].x += clamp(r.x) / 4;
          c[i].y += clamp(r.y) / 4;
        }
      }
    }
  }

  FILE *f = fopen("image.ppm", "w");         // Write image to PPM file.
  fprintf(f, "P3\n%d %d\n%d\n", w, h, 255);
  for (int i=0; i<w*h; i++)
    fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z));
}

This is the output obtained from the following sample program:

$ g++ test.cpp
$ ./a
Rendering (4 spp) 100.00%

$ g++ test.cpp -fopenmp
$ ./a
Rendering (4 spp) 100.00%spp)  spp)   00..0052%%

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

一刻暧昧 2024-12-08 02:59:29

fprintf 不受临界区或#pragma omp single/master 保护。如果在 Windows 上这个东西弄乱了控制台,我不会感到惊讶。

fprintf is not guarded by a critical section or a #pragma omp single/master. I wouldn't be surprised if on Windows this thing messes up the console.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文