C/CUDA 中 3D 旋转的问题
我正在尝试使用 C/CUDA 实现图像堆栈的 3D 旋转例程(主要是为了加快计算时间)。我使用 ImageJ 源代码作为代码的基础,因此旋转不是围绕原点自由旋转,而是沿着轴旋转。不过,我遇到了一个有趣的问题。我实现了一个对象绕 Y 轴的旋转,几乎没有问题。但是,当我尝试使用非常相似的代码绕 X 轴旋转时,会出现问题。我注意到在 X 旋转中,存在明显的条纹,例如以下示例:
https://i.sstatic .net/BdkDf.png
这在我正在进行的 Y 旋转中没有发生。
我提供了运行来绕每个轴旋转的 CUDA 内核(rotationY 是有效的,rotationX 是提供条纹的)。我想知道是否有人可以提供任何建议来解释为什么我会遇到其中一个而不是另一个的问题,只要它们在实现上非常相似。
编辑:我已将问题范围缩小到atomicMin() 无法正常工作。即使所有偏移量均已正确设置,zbuffer 也未正确更改。如果有人知道为什么这可能行不通,那么最好知道。
__global__ void rotationY(int *input, int *projArray, int costheta, int sintheta, int width, int height, int depth, int xcenter, int zcenter,
int projectionwidth, int projectionsize, int *zbuffer, int adjCue, int depthCueSurf, int zmax, int zdiff){
int i=threadIdx.x + blockDim.x*blockIdx.x;
int zcostheta;
int zsintheta;
int offset;
int k, z, point, xnew, znew;
int y=i/width;
int x=i-y*width-xcenter;
int xcostheta = x*costheta;
int xsintheta = x*sintheta;
int offsetinit = y*projectionwidth;
zbuffer[i]=32767;
__syncthreads();
for(k=1; k<=depth; k++){
z = (int)(k-1+.5) - zcenter;
zcostheta = z*costheta;
zsintheta = z*sintheta;
point = i + (k-1)*width*height;
if(input[point]>0){
xnew = (xcostheta + zsintheta)/8192 + xcenter;
znew = (zcostheta - xsintheta)/8192 + zcenter;
offset = offsetinit + xnew;
if (offset<0 || offset>=projectionsize) offset = 0;
atomicMin(&zbuffer[offset],znew);
}
__syncthreads();
if(input[point]>0){
if(znew<=zbuffer[offset]) projArray[offset] = adjCue*input[point]/100+depthCueSurf*input[point]*(zmax-znew)/zdiff;
}
}
}
__global__ void rotationX(int *input, int *projArray, int costheta, int sintheta, int width, int height, int depth, int ycenter, int zcenter,
int projectionsize, int *zbuffer, int adjCue, int depthCueSurf, int zmax, int zdiff) {
int i=threadIdx.x + blockDim.x*blockIdx.x;
int zcostheta;
int zsintheta;
int offset;
int k, z, point, ynew, znew;
int y=i/width;
int x=i-y*width;
y=y-ycenter;
int ycostheta = y*costheta;
int ysintheta = y*sintheta;
zbuffer[i]=32767;
__syncthreads();
for(k=1; k<=depth; k++){
z = (int)(k-1+.5) - zcenter;
zcostheta = z*costheta;
zsintheta = z*sintheta;
point = i + (k-1)*width*height;
if(input[point]>0){
ynew = (ycostheta - zsintheta)/8192 + ycenter;
znew = (ysintheta + zcostheta)/8192 + zcenter;
offset = x + ynew*width;
if (offset<0 || offset>=projectionsize) offset = 0;
atomicMin(&zbuffer[offset], znew);
}
__syncthreads();
if(input[point]>0){
if(znew<=zbuffer[offset]) projArray[offset] = adjCue*input[point]/100+depthCueSurf*input[point]*(zmax-znew)/zdiff;
}
}
}
I am trying to implement a 3D rotation routine for an image stack using C/CUDA (mostly to speed up calculation times). I used the ImageJ source code as a basis for the code, so the rotation isn't freely about the origin, but rather along axes. There is an interesting problem I have come upon though. I implemented a rotation of an object about the Y-axis with little problem. However, when I attempt to rotate about the X-axis, with very similar code, there are issues. I noticed that in the X rotation, there was significant striping, such as this example:
https://i.sstatic.net/BdkDf.png
which was not occurring in the Y-rotation I was doing.
I have provided the CUDA kernels that are run to do the rotation about each axes (rotationY is the one that works, rotationX is the one that gives the striping). I was wondering if anybody could provide any suggestions as to why I would be getting problems with one and not the other, provided they are very similar in implementation.
EDIT: I have narrowed the problem down to atomicMin() not working correctly. zbuffer is not changing correctly even though all the offsets are being set correctly. If anybody knows why this might not be working it would be good to know.
__global__ void rotationY(int *input, int *projArray, int costheta, int sintheta, int width, int height, int depth, int xcenter, int zcenter,
int projectionwidth, int projectionsize, int *zbuffer, int adjCue, int depthCueSurf, int zmax, int zdiff){
int i=threadIdx.x + blockDim.x*blockIdx.x;
int zcostheta;
int zsintheta;
int offset;
int k, z, point, xnew, znew;
int y=i/width;
int x=i-y*width-xcenter;
int xcostheta = x*costheta;
int xsintheta = x*sintheta;
int offsetinit = y*projectionwidth;
zbuffer[i]=32767;
__syncthreads();
for(k=1; k<=depth; k++){
z = (int)(k-1+.5) - zcenter;
zcostheta = z*costheta;
zsintheta = z*sintheta;
point = i + (k-1)*width*height;
if(input[point]>0){
xnew = (xcostheta + zsintheta)/8192 + xcenter;
znew = (zcostheta - xsintheta)/8192 + zcenter;
offset = offsetinit + xnew;
if (offset<0 || offset>=projectionsize) offset = 0;
atomicMin(&zbuffer[offset],znew);
}
__syncthreads();
if(input[point]>0){
if(znew<=zbuffer[offset]) projArray[offset] = adjCue*input[point]/100+depthCueSurf*input[point]*(zmax-znew)/zdiff;
}
}
}
__global__ void rotationX(int *input, int *projArray, int costheta, int sintheta, int width, int height, int depth, int ycenter, int zcenter,
int projectionsize, int *zbuffer, int adjCue, int depthCueSurf, int zmax, int zdiff) {
int i=threadIdx.x + blockDim.x*blockIdx.x;
int zcostheta;
int zsintheta;
int offset;
int k, z, point, ynew, znew;
int y=i/width;
int x=i-y*width;
y=y-ycenter;
int ycostheta = y*costheta;
int ysintheta = y*sintheta;
zbuffer[i]=32767;
__syncthreads();
for(k=1; k<=depth; k++){
z = (int)(k-1+.5) - zcenter;
zcostheta = z*costheta;
zsintheta = z*sintheta;
point = i + (k-1)*width*height;
if(input[point]>0){
ynew = (ycostheta - zsintheta)/8192 + ycenter;
znew = (ysintheta + zcostheta)/8192 + zcenter;
offset = x + ynew*width;
if (offset<0 || offset>=projectionsize) offset = 0;
atomicMin(&zbuffer[offset], znew);
}
__syncthreads();
if(input[point]>0){
if(znew<=zbuffer[offset]) projArray[offset] = adjCue*input[point]/100+depthCueSurf*input[point]*(zmax-znew)/zdiff;
}
}
}
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
rotationX的函数原型中缺少参数projectionwidth。
这是我现在最适合错误的候选者。
The parameter projectionwidth is missing in the function prototype of rotationX.
This is my best candidate for the error right now.