使用纹理内存对图像进行下采样

发布于 2024-10-19 15:20:41 字数 1742 浏览 1 评论 0原文

我需要对图像进行下采样。一些阅读建议我,如果我使用纹理内存,那么这个函数是免费的并且速度更快(我正在寻找双线性插值)。有人可以告诉我如何为此编写内核吗?这是我目前拥有的: (我使用 (1,1) 线程块)

__global__ void texturekernel( int * final_red){
     int f = (blockIdx.x * blockDim.x) + threadIdx.x;
     int c = (blockIdx.y * blockDim.y) + threadIdx.y;
     int id=blockIdx.x+256*blockIdx.y;//256 is the width of downsampled image ..original was 512
     final_red[id]=tex2D( refTexture,c+0.5f,f+0.5f);//This is just for the red channel
     //where reftexture is defined as texture <float, 2, cudaReadModeElementType> refTexture;

  };

此版本当前在输出中给出全 0。

编辑(在此版本中,我尝试将 2 2000*512 大小的图像缩减采样为 2 1000*256):

  texture <float, 2, cudaReadModeElementType> refTexture; // global variable !
    cudaArray* myArray; 
    cudaChannelFormatDesc description = cudaCreateChannelDesc<float>();
    cudaError rs=cudaMallocArray (   &myArray,&description, 512,2000*2);//



//This line below is part of loop where input image is read row by row ..rowchecker keeps track of the row    

cudaMemcpyToArray(myArray,0,rowchecker++,array_temp_red,sizeof(int)*test_columns,cudaMemcpHostToDevice);

     refTexture.normalized=false;
     refTexture.addressMode[0]=cudaAddressModeClamp;
     refTexture.addressMode[1]=cudaAddressModeClamp;
     refTexture.filterMode=cudaFilterModePoint;

     cudaBindTextureToArray( refTexture,myArray);

              dim3 blockSize(1,1);
              int n_blocks_x=256;
              int n_blocks_y=1000*2;
              dim3 gridSize(n_blocks_x,n_blocks_y);
              cudaMalloc((void**)&finalarray,(2000)*(512)*2/4*sizeof(int));
              texturekernel<<<gridSize,blockSize>>>(finalarray );

I need to downsample an image.Some reading suggested me that if I use texture memory then this function comes on free and is faster(I am looking for a bilinear interpolation).Could some one tell me how exactly to write the kernel for this ?Here is what I have currently :
(I use (1,1) thread blocks)

__global__ void texturekernel( int * final_red){
     int f = (blockIdx.x * blockDim.x) + threadIdx.x;
     int c = (blockIdx.y * blockDim.y) + threadIdx.y;
     int id=blockIdx.x+256*blockIdx.y;//256 is the width of downsampled image ..original was 512
     final_red[id]=tex2D( refTexture,c+0.5f,f+0.5f);//This is just for the red channel
     //where reftexture is defined as texture <float, 2, cudaReadModeElementType> refTexture;

  };

This version currently gives me all 0's in the output.

Edited (In this version I am trying to downsample 2 2000*512 size images into 2 1000*256):

  texture <float, 2, cudaReadModeElementType> refTexture; // global variable !
    cudaArray* myArray; 
    cudaChannelFormatDesc description = cudaCreateChannelDesc<float>();
    cudaError rs=cudaMallocArray (   &myArray,&description, 512,2000*2);//



//This line below is part of loop where input image is read row by row ..rowchecker keeps track of the row    

cudaMemcpyToArray(myArray,0,rowchecker++,array_temp_red,sizeof(int)*test_columns,cudaMemcpHostToDevice);

     refTexture.normalized=false;
     refTexture.addressMode[0]=cudaAddressModeClamp;
     refTexture.addressMode[1]=cudaAddressModeClamp;
     refTexture.filterMode=cudaFilterModePoint;

     cudaBindTextureToArray( refTexture,myArray);

              dim3 blockSize(1,1);
              int n_blocks_x=256;
              int n_blocks_y=1000*2;
              dim3 gridSize(n_blocks_x,n_blocks_y);
              cudaMalloc((void**)&finalarray,(2000)*(512)*2/4*sizeof(int));
              texturekernel<<<gridSize,blockSize>>>(finalarray );

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

时光暖心i 2024-10-26 15:20:41

int id=blockIdx.x+256*blockIdx.y;

此语句超出了 Final_red 的限制。

试试这个:

__global__ void texturekernel( int * final_red){

     int f = blockIdx.x * blockDim.x + threadIdx.x;
     int c = blockIdx.y * blockDim.y + threadIdx.y;

     int id =c/2 * 256 + f/2;

     final_red[id] = tex2D( refTexture,c+0.5f,f+0.5f);

  }

int id=blockIdx.x+256*blockIdx.y;

This statement crosses the limits of your final_red.

Try this:

__global__ void texturekernel( int * final_red){

     int f = blockIdx.x * blockDim.x + threadIdx.x;
     int c = blockIdx.y * blockDim.y + threadIdx.y;

     int id =c/2 * 256 + f/2;

     final_red[id] = tex2D( refTexture,c+0.5f,f+0.5f);

  }
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文