使用 OpenMP 编译 Smallpt 会导致运行时无限循环
我目前正在查看 Keavin Beason 编写的 smallpt 代码。我使用 g++ -O3 -fopenmpsmallpt.cpp 编译了代码,但我遇到了似乎是无限循环或死锁的情况。
仅使用 g++ -O3smallpt.cpp
编译代码会生成在他的页面上看到的图像,但我根本无法让 OpenMP 并行化工作。
作为参考,我正在使用 Cygwin 和 GCC 4.5.0 在 Windows 7 64 位计算机上进行编译。 作者本人表示他运行了完全相同的代码并且没有遇到任何问题,但是当程序完成跟踪图像时,我无法让程序真正退出。
这可能是我的特定编译器和环境的问题,还是我在这里做错了什么?以下是使用 OpenMP 并行化的特定代码片段。我只是用一些小的格式对其进行了修改,以使其更具可读性。
int main(int argc, char *argv[])
{
int w=1024, h=768, samps = argc==2 ? atoi(argv[1])/4 : 1;
Ray cam(Vec(50,52,295.6), Vec(0,-0.042612,-1).norm()); // cam pos, dir
Vec cx=Vec(w*.5135/h);
Vec cy=(cx%cam.d).norm()*.5135, r, *c=new Vec[w*h];
#pragma omp parallel for schedule(dynamic, 1) private(r) // OpenMP
for (int y=0; y<h; y++) // Loop over image rows
{
fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samps*4,100.*y/(h-1));
for (unsigned short x=0, Xi[3]={0,0,y*y*y}; x<w; x++) // Loop cols
{
for (int sy=0, i=(h-y-1)*w+x; sy<2; sy++) // 2x2 subpixel rows
{
for (int sx=0; sx<2; sx++, r=Vec()) // 2x2 subpixel cols
{
for (int s=0; s<samps; s++)
{
double r1=2*erand48(Xi), dx=r1<1 ? sqrt(r1)-1: 1-sqrt(2-r1);
double r2=2*erand48(Xi), dy=r2<1 ? sqrt(r2)-1: 1-sqrt(2-r2);
Vec d = cx*( ( (sx+.5 + dx)/2 + x)/w - .5) +
cy*( ( (sy+.5 + dy)/2 + y)/h - .5) + cam.d;
r = r + radiance(Ray(cam.o+d*140,d.norm()),0,Xi)*(1./samps);
} // Camera rays are pushed ^^^^^ forward to start in interior
c[i] = c[i] + Vec(clamp(r.x),clamp(r.y),clamp(r.z))*.25;
}
}
}
}
/* PROBLEM HERE!
The code never seems to reach here
PROBLEM HERE!
*/
FILE *f = fopen("image.ppm", "w"); // Write image to PPM file.
fprintf(f, "P3\n%d %d\n%d\n", w, h, 255);
for (int i=0; i<w*h; i++)
fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z));
}
这是程序运行完成时生成的输出:
$ time ./a
Rendering (4 spp) 100.00%spp) spp) 00..0026%%
以下是可以重现上述行为的最基本代码
#include <cstdio>
#include <cstdlib>
#include <cmath>
struct Vector
{
double x, y, z;
Vector() : x(0), y(0), z(0) {}
};
int toInt(double x)
{
return (int)(255 * x);
}
double clamp(double x)
{
if (x < 0) return 0;
if (x > 1) return 1;
return x;
}
int main(int argc, char *argv[])
{
int w = 1024;
int h = 768;
int samples = 1;
Vector r, *c = new Vector[w * h];
#pragma omp parallel for schedule(dynamic, 1) private(r)
for (int y = 0; y < h; y++)
{
fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samples * 4, 100. * y / (h - 1));
for (unsigned short x = 0, Xi[3]= {0, 0, y*y*y}; x < w; x++)
{
for (int sy = 0, i = (h - y - 1) * w + x; sy < 2; sy++)
{
for (int sx = 0; sx < 2; sx++, r = Vector())
{
for (int s = 0; s < samples; s++)
{
double r1 = 2 * erand48(Xi), dx = r1 < 1 ? sqrt(r1) - 1 : 1 - sqrt(2 - r1);
double r2 = 2 * erand48(Xi), dy = r2 < 1 ? sqrt(r2) - 1 : 1 - sqrt(2 - r2);
r.x += r1;
r.y += r2;
}
c[i].x += clamp(r.x) / 4;
c[i].y += clamp(r.y) / 4;
}
}
}
}
FILE *f = fopen("image.ppm", "w"); // Write image to PPM file.
fprintf(f, "P3\n%d %d\n%d\n", w, h, 255);
for (int i=0; i<w*h; i++)
fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z));
}
这是从以下示例获得的输出程序:
$ g++ test.cpp
$ ./a
Rendering (4 spp) 100.00%
$ g++ test.cpp -fopenmp
$ ./a
Rendering (4 spp) 100.00%spp) spp) 00..0052%%
I'm currently looking at the smallpt code by Keavin Beason. I compiled the code with what it says on the tin using g++ -O3 -fopenmp smallpt.cpp
, and I'm running into what seems like either an infinite loop or a deadlock.
Compiling the code using just g++ -O3 smallpt.cpp
produces the images seen on his page, but I can't get the OpenMP parallelization to work at all.
For reference, I'm compiling on a Windows 7 64-bit machine using Cygwin with GCC 4.5.0. The author himself has stated he's run the same exact code and has run into no issues whatsoever, but I can't get the program to actually exit when it's done tracing the image.
Could this be an issue with my particular compiler and environment, or am I doing something wrong here? Here's the particular snippet of code that's parallelized using OpenMP. I've only modified it with some minor formatting to make it more readable.
int main(int argc, char *argv[])
{
int w=1024, h=768, samps = argc==2 ? atoi(argv[1])/4 : 1;
Ray cam(Vec(50,52,295.6), Vec(0,-0.042612,-1).norm()); // cam pos, dir
Vec cx=Vec(w*.5135/h);
Vec cy=(cx%cam.d).norm()*.5135, r, *c=new Vec[w*h];
#pragma omp parallel for schedule(dynamic, 1) private(r) // OpenMP
for (int y=0; y<h; y++) // Loop over image rows
{
fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samps*4,100.*y/(h-1));
for (unsigned short x=0, Xi[3]={0,0,y*y*y}; x<w; x++) // Loop cols
{
for (int sy=0, i=(h-y-1)*w+x; sy<2; sy++) // 2x2 subpixel rows
{
for (int sx=0; sx<2; sx++, r=Vec()) // 2x2 subpixel cols
{
for (int s=0; s<samps; s++)
{
double r1=2*erand48(Xi), dx=r1<1 ? sqrt(r1)-1: 1-sqrt(2-r1);
double r2=2*erand48(Xi), dy=r2<1 ? sqrt(r2)-1: 1-sqrt(2-r2);
Vec d = cx*( ( (sx+.5 + dx)/2 + x)/w - .5) +
cy*( ( (sy+.5 + dy)/2 + y)/h - .5) + cam.d;
r = r + radiance(Ray(cam.o+d*140,d.norm()),0,Xi)*(1./samps);
} // Camera rays are pushed ^^^^^ forward to start in interior
c[i] = c[i] + Vec(clamp(r.x),clamp(r.y),clamp(r.z))*.25;
}
}
}
}
/* PROBLEM HERE!
The code never seems to reach here
PROBLEM HERE!
*/
FILE *f = fopen("image.ppm", "w"); // Write image to PPM file.
fprintf(f, "P3\n%d %d\n%d\n", w, h, 255);
for (int i=0; i<w*h; i++)
fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z));
}
Here's the output that the program produces, when it runs to completion:
$ time ./a
Rendering (4 spp) 100.00%spp) spp) 00..0026%%
The following is the most basic code that can reproduce the above behavior
#include <cstdio>
#include <cstdlib>
#include <cmath>
struct Vector
{
double x, y, z;
Vector() : x(0), y(0), z(0) {}
};
int toInt(double x)
{
return (int)(255 * x);
}
double clamp(double x)
{
if (x < 0) return 0;
if (x > 1) return 1;
return x;
}
int main(int argc, char *argv[])
{
int w = 1024;
int h = 768;
int samples = 1;
Vector r, *c = new Vector[w * h];
#pragma omp parallel for schedule(dynamic, 1) private(r)
for (int y = 0; y < h; y++)
{
fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samples * 4, 100. * y / (h - 1));
for (unsigned short x = 0, Xi[3]= {0, 0, y*y*y}; x < w; x++)
{
for (int sy = 0, i = (h - y - 1) * w + x; sy < 2; sy++)
{
for (int sx = 0; sx < 2; sx++, r = Vector())
{
for (int s = 0; s < samples; s++)
{
double r1 = 2 * erand48(Xi), dx = r1 < 1 ? sqrt(r1) - 1 : 1 - sqrt(2 - r1);
double r2 = 2 * erand48(Xi), dy = r2 < 1 ? sqrt(r2) - 1 : 1 - sqrt(2 - r2);
r.x += r1;
r.y += r2;
}
c[i].x += clamp(r.x) / 4;
c[i].y += clamp(r.y) / 4;
}
}
}
}
FILE *f = fopen("image.ppm", "w"); // Write image to PPM file.
fprintf(f, "P3\n%d %d\n%d\n", w, h, 255);
for (int i=0; i<w*h; i++)
fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z));
}
This is the output obtained from the following sample program:
$ g++ test.cpp
$ ./a
Rendering (4 spp) 100.00%
$ g++ test.cpp -fopenmp
$ ./a
Rendering (4 spp) 100.00%spp) spp) 00..0052%%
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
fprintf
不受临界区或#pragma omp single/master
保护。如果在 Windows 上这个东西弄乱了控制台,我不会感到惊讶。fprintf
is not guarded by a critical section or a#pragma omp single/master
. I wouldn't be surprised if on Windows this thing messes up the console.