错误cl_invalid_value在简单的C＆＃x2b;＆＃x2B上OPENCL图像操纵程序

发布于 2025-02-12 06:36:21 字数 3668 浏览 1 评论 0原文

我在C ++中编写一个简单的OpenCL程序，其中我需要颠倒地翻转输入图像，我使用CIMG读取和编写图像文件。问题在于，即使程序编译并运行而没有任何错误，输出文件也为空白。

这是CL内核代码：

const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

__kernel void img_turn(
    read_only image2d_t I,
    write_only image2d_t O
)
{
    int gid_x = get_global_id(0);
    int gid_y = get_global_id(1);

    int w = get_image_width(I);
    int h = get_image_height(I);

    if (gid_x >= w || gid_y >= h)
        return;
    
    uint4 p = read_imageui(I, sampler, (int2)(gid_x, gid_y));
    write_imageui(O, (int2)(gid_x, h - gid_y), p);
    
}

这是主机代码的位，首先是输入映像（编辑）：

CImg<unsigned char> img_in(img_file_name);

cl_image_format format = {
    CL_RGBA,
    CL_UNSIGNED_INT8,
};

cl_image_desc desc = {
    .image_type = CL_MEM_OBJECT_IMAGE2D,
    .image_width = (size_t) img_in.width(),
    .image_height = (size_t) img_in.height(),
    .image_row_pitch = 0,
    .image_slice_pitch = 0,
    .num_mip_levels = 0,
    .num_samples = 0,
    .buffer = NULL,
};

cl_mem input_img = clCreateImage(
    context,
    CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
    (const cl_image_format *) &format,
    (const cl_image_desc *) &desc,
    img_in.data(),
    &errNum
);

输出图像的定义（编辑）：

CImg<unsigned char> img_out(img_in.width(), img_in.height(), 1, 4);

format = {
    CL_RGBA,
    CL_UNSIGNED_INT8,
};

desc = {
    .image_type = CL_MEM_OBJECT_IMAGE2D,
    .image_width = (size_t) img_out.width(),
    .image_height = (size_t) img_out.height(),
    .image_row_pitch = 0,
    .image_slice_pitch = 0,
    .num_mip_levels = 0,
    .num_samples = 0,
    .buffer = NULL,
};

cl_mem output_img = clCreateImage(
    context,
    CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
    (const cl_image_format *) &format,
    (const cl_image_desc *) &desc,
    img_out.data(),
    NULL
);

代码的最后一部分，我在其中加入图像并运行程序（编辑）：

size_t origins[3] = {0, 0, 0};
size_t region_in[3] = {(size_t) img_in.width(), (size_t) img_in.height(), (size_t) 1};

errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), input_img);
errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), output_img);

size_t global[2] = {(size_t) img_in.width(), (size_t) img_in.height()};
clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global, NULL, 0, NULL, &kernel_event);

errNum = clEnqueueWriteImage(command_queue, input_img, CL_TRUE, origins, region_in, 0, 0, img_in.data(), 0, NULL, NULL);

size_t region_out[3] = {(size_t) img_out.width(), (size_t) img_out.height(), (size_t) 1};
errNum = clEnqueueReadImage(command_queue, output_img, CL_TRUE, origins, region_out, 0, 0, img_out.data(), 0, NULL, NULL);

clWaitForEvents(1, &kernel_event);
img_out.save("./output_img.png");

编译和运行程序后，创建了“ output_img.png”图像文件，但使用文本编辑器打开时，它是空白的：0bytes，没有任何数据。

编辑：因此，在Petert的建议（以及对我犯了一些愚蠢错误的修正）之后，该程序现在似乎正在做某事（它执行了3秒钟），但仍然什么也没有产生。

编辑2：调试后，我指出了问题：clenqueuereadimage返回错误cl_invalid_value和如果由原点指定的区域读取的区域不超出范围... 但是我不知道为什么。它的大小与输入映像的大小相同，但是clenquewriteImage即使使用相同的参数调用，也不会返回任何错误。

编辑3：问题已通过Egor的回应解决。但是现在它不会输出所需的结果：输入图像：

输出图像：

原文

I'm writing a simple OpenCL program in C++ where i need to flip an input image upside-down, i'm using CImg to read and write image files.
the problem is that even though the program compiles and run without any error, the output file is blank.

Here's the cl kernel code:

const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

__kernel void img_turn(
    read_only image2d_t I,
    write_only image2d_t O
)
{
    int gid_x = get_global_id(0);
    int gid_y = get_global_id(1);

    int w = get_image_width(I);
    int h = get_image_height(I);

    if (gid_x >= w || gid_y >= h)
        return;
    
    uint4 p = read_imageui(I, sampler, (int2)(gid_x, gid_y));
    write_imageui(O, (int2)(gid_x, h - gid_y), p);
    
}

and here's bits of the host code, first the input image (Edited):

CImg<unsigned char> img_in(img_file_name);

cl_image_format format = {
    CL_RGBA,
    CL_UNSIGNED_INT8,
};

cl_image_desc desc = {
    .image_type = CL_MEM_OBJECT_IMAGE2D,
    .image_width = (size_t) img_in.width(),
    .image_height = (size_t) img_in.height(),
    .image_row_pitch = 0,
    .image_slice_pitch = 0,
    .num_mip_levels = 0,
    .num_samples = 0,
    .buffer = NULL,
};

cl_mem input_img = clCreateImage(
    context,
    CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
    (const cl_image_format *) &format,
    (const cl_image_desc *) &desc,
    img_in.data(),
    &errNum
);

the definition of the output image (Edited):

CImg<unsigned char> img_out(img_in.width(), img_in.height(), 1, 4);

format = {
    CL_RGBA,
    CL_UNSIGNED_INT8,
};

desc = {
    .image_type = CL_MEM_OBJECT_IMAGE2D,
    .image_width = (size_t) img_out.width(),
    .image_height = (size_t) img_out.height(),
    .image_row_pitch = 0,
    .image_slice_pitch = 0,
    .num_mip_levels = 0,
    .num_samples = 0,
    .buffer = NULL,
};

cl_mem output_img = clCreateImage(
    context,
    CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
    (const cl_image_format *) &format,
    (const cl_image_desc *) &desc,
    img_out.data(),
    NULL
);

and the last part of the code, where i enqueue the images and run the program (Edited):

size_t origins[3] = {0, 0, 0};
size_t region_in[3] = {(size_t) img_in.width(), (size_t) img_in.height(), (size_t) 1};

errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), input_img);
errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), output_img);

size_t global[2] = {(size_t) img_in.width(), (size_t) img_in.height()};
clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global, NULL, 0, NULL, &kernel_event);

errNum = clEnqueueWriteImage(command_queue, input_img, CL_TRUE, origins, region_in, 0, 0, img_in.data(), 0, NULL, NULL);

size_t region_out[3] = {(size_t) img_out.width(), (size_t) img_out.height(), (size_t) 1};
errNum = clEnqueueReadImage(command_queue, output_img, CL_TRUE, origins, region_out, 0, 0, img_out.data(), 0, NULL, NULL);

clWaitForEvents(1, &kernel_event);
img_out.save("./output_img.png");

after compiling and running the program the 'output_img.png' image file is created but it's blank: 0Bytes and no data whatsoever when opened with a text editor.

Edit:
So after PeterT's suggestion (and after some corrections of some dumb mistakes i made), the program now seems to be doing something (it executes for 3 seconds), but still produces nothing.

Edit 2:
After a bit of debugging, i pinpointed the problem: clEnqueueReadImage returns the error CL_INVALID_VALUE, and the documentation specifies that it returns that error if the region being read specified by origin and region is out of bounds ...
But i don't know why. It's the same size of the input image, but clEnqueueWriteImage doesn't return any error, even if called with the same parameters.

Edit 3:
The problem has been fixed by Egor's response. But now it doesn't output the wanted result:
Input image:

Output image:

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

谈情不如逗狗 2025-02-19 06:36:24

首先，您使用cl_rgba格式创建OpenCl Image对象，然后将指针传递给CIMG Pixel数据。但是cimg使用“平面”结构来保留数据，并且颜色通道的值未交错（有关更多信息，请参见如何使用CIMG存储像素数据？）。例如，带有alpha通道的彩色图像将存储在内存中：

r1r2r3 ... b1b2b3 ... g1g2g3 ... a1a2a3 ...

，但cl_rgba格式意味着图像的交错通道：R1G1B1A1R2G2B2B2A2R3G3B3A3 ...。因此，在将图像复制到设备内存之前，必须将图像转换为cl_rgba格式。例如，使用以下函数：

struct rgba_pixel {
    unsigned char r;
    unsigned char g;
    unsigned char b;
    unsigned char a;
};

constexpr unsigned int r_channel_idx = 0;
constexpr unsigned int g_channel_idx = 1;
constexpr unsigned int b_channel_idx = 2;
constexpr unsigned int a_channel_idx = 3;

std::vector<rgba_pixel>
convert_cimg_to_rgba_buffer(const cimg_library::CImg<unsigned char>& img) {
    const unsigned int img_height = static_cast<unsigned int>(img.height());
    const unsigned int img_width = static_cast<unsigned int>(img.width());
    const unsigned int number_of_channels = static_cast<unsigned int>(img.spectrum());

    const bool has_r_channel = number_of_channels > r_channel_idx;
    const bool has_g_channel = number_of_channels > g_channel_idx;
    const bool has_b_channel = number_of_channels > b_channel_idx;
    const bool has_a_channel = number_of_channels > a_channel_idx;

    std::vector<rgba_pixel> rgba_buf(static_cast<std::size_t>(img_width) * img_height);
    for (unsigned int y = 0; y < img_height; ++y) {
        for (unsigned int x = 0; x < img_width; ++x) {
            const std::size_t pixel_idx = static_cast<std::size_t>(img_width) * y + x;
            rgba_buf[pixel_idx].r = has_r_channel ? *img.data(x, y, 0, r_channel_idx) : 0;
            rgba_buf[pixel_idx].g = has_g_channel ? *img.data(x, y, 0, g_channel_idx) : 0;
            rgba_buf[pixel_idx].b = has_b_channel ? *img.data(x, y, 0, b_channel_idx) : 0;
            rgba_buf[pixel_idx].a = has_a_channel ? *img.data(x, y, 0, a_channel_idx) : UCHAR_MAX;
        }
    }
    return rgba_buf;
}

因此，将图像复制到设备的代码看起来像：

    size_t origins[3] = { 0, 0, 0 };
    size_t region[3] = { (size_t)img_in.width(), (size_t)img_in.height(), (size_t)1 };
    auto rgba_buf = convert_cimg_to_rgba_buffer(img_in);

    ret = clEnqueueWriteImage(command_queue, input_img, CL_TRUE, origins, region, 0, 0, rgba_buf.data(), 0, NULL, NULL);

另外，在保存之前必须转换输出图像。例如，使用以下函数：

void
copy_rgba_buffer_to_cimg(const std::vector<rgba_pixel>& rgba_buf, cimg_library::CImg<unsigned char>& img) {
    const unsigned int img_height = static_cast<unsigned int>(img.height());
    const unsigned int img_width = static_cast<unsigned int>(img.width());
    const unsigned int number_of_channels = static_cast<unsigned int>(img.spectrum());

    const bool has_r_channel = number_of_channels > r_channel_idx;
    const bool has_g_channel = number_of_channels > g_channel_idx;
    const bool has_b_channel = number_of_channels > b_channel_idx;
    const bool has_a_channel = number_of_channels > a_channel_idx;

    for (unsigned int y = 0; y < img_height; ++y) {
        for (unsigned int x = 0; x < img_width; ++x) {
            const std::size_t pixel_idx = static_cast<std::size_t>(img_width) * y + x;
            if (has_r_channel) *img.data(x, y, 0, r_channel_idx) = rgba_buf[pixel_idx].r;
            if (has_g_channel) *img.data(x, y, 0, g_channel_idx) = rgba_buf[pixel_idx].g;
            if (has_b_channel) *img.data(x, y, 0, b_channel_idx) = rgba_buf[pixel_idx].b;
            if (has_a_channel) *img.data(x, y, 0, a_channel_idx) = rgba_buf[pixel_idx].a;
        }
    }
}

和从设备复制图像的代码看起来像：

    ret = clEnqueueReadImage(command_queue, output_img, CL_TRUE, origins, region, 0, 0, rgba_buf.data(), 0, NULL, NULL);
    copy_rgba_buffer_to_cimg(rgba_buf, img_out);
    img_out.save("./output_img.png");

接下来，您可以使用默认属性创建命令标题。这意味着将按顺序执行命令标题的命令。另外，您可以使用阻止读写（blocking_read和blocking_write flags设置为cl_true for clenqueEeReadImage and code> and clenquewriteImage函数调用）。在这种情况下，代码可以无需使用OpenCL事件即可同步命令的执行。只需要以正确的顺序加入命令并使用阻止读取命令以获取结果：

    size_t origins[3] = { 0, 0, 0 };
    size_t region[3] = { (size_t)img_in.width(), (size_t)img_in.height(), (size_t)1 };
    auto rgba_buf = convert_cimg_to_rgba_buffer(img_in);

    ret = clEnqueueWriteImage(command_queue, input_img, CL_FALSE, origins, region, 0, 0, rgba_buf.data(), 0, NULL, NULL);

    size_t global[2] = { (size_t)img_in.width(), (size_t)img_in.height() };
    clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global, NULL, 0, NULL, NULL);

    ret = clEnqueueReadImage(command_queue, output_img, CL_TRUE, origins, region, 0, 0, rgba_buf.data(), 0, NULL, NULL);
    copy_rgba_buffer_to_cimg(rgba_buf, img_out);
    img_out.save("./output_img.png");

最后，应该将新的y位置计算为get_image_height（） - （gid_y） + 1）是因为gid_y是间隔[0，get_image_height（））。因此，内核代码应该看起来像：

    write_imageui(O, (int2)(gid_x, h - gid_y - 1), p);

次要注意，如果您使用ClenqueWriteImage直接将图像复制到设备，则可以省略cl_mem_use_host_ptr_ptr for clcreateeimage flag flag称呼。

First, you create OpenCL image object using CL_RGBA format and pass the pointer to CImg pixel data. But CImg uses "planar" structure to keep the data and the values for color channels are not interleaved (for more information please see How pixel data are stored with CImg?). For example, colored image with alpha channel will be stored in memory as:

R1R2R3...B1B2B3...G1G2G3...A1A2A3...

But CL_RGBA format implies the interleaved channels for the image: R1G1B1A1R2G2B2A2R3G3B3A3.... Therefore, it is necessary to convert the image to CL_RGBA format before copying it to the device memory. For example, using following function:

struct rgba_pixel {
    unsigned char r;
    unsigned char g;
    unsigned char b;
    unsigned char a;
};

constexpr unsigned int r_channel_idx = 0;
constexpr unsigned int g_channel_idx = 1;
constexpr unsigned int b_channel_idx = 2;
constexpr unsigned int a_channel_idx = 3;

std::vector<rgba_pixel>
convert_cimg_to_rgba_buffer(const cimg_library::CImg<unsigned char>& img) {
    const unsigned int img_height = static_cast<unsigned int>(img.height());
    const unsigned int img_width = static_cast<unsigned int>(img.width());
    const unsigned int number_of_channels = static_cast<unsigned int>(img.spectrum());

    const bool has_r_channel = number_of_channels > r_channel_idx;
    const bool has_g_channel = number_of_channels > g_channel_idx;
    const bool has_b_channel = number_of_channels > b_channel_idx;
    const bool has_a_channel = number_of_channels > a_channel_idx;

    std::vector<rgba_pixel> rgba_buf(static_cast<std::size_t>(img_width) * img_height);
    for (unsigned int y = 0; y < img_height; ++y) {
        for (unsigned int x = 0; x < img_width; ++x) {
            const std::size_t pixel_idx = static_cast<std::size_t>(img_width) * y + x;
            rgba_buf[pixel_idx].r = has_r_channel ? *img.data(x, y, 0, r_channel_idx) : 0;
            rgba_buf[pixel_idx].g = has_g_channel ? *img.data(x, y, 0, g_channel_idx) : 0;
            rgba_buf[pixel_idx].b = has_b_channel ? *img.data(x, y, 0, b_channel_idx) : 0;
            rgba_buf[pixel_idx].a = has_a_channel ? *img.data(x, y, 0, a_channel_idx) : UCHAR_MAX;
        }
    }
    return rgba_buf;
}

So the code to copy the image to the device will look like:

    size_t origins[3] = { 0, 0, 0 };
    size_t region[3] = { (size_t)img_in.width(), (size_t)img_in.height(), (size_t)1 };
    auto rgba_buf = convert_cimg_to_rgba_buffer(img_in);

    ret = clEnqueueWriteImage(command_queue, input_img, CL_TRUE, origins, region, 0, 0, rgba_buf.data(), 0, NULL, NULL);

Also, it will be necessary to convert the output image before saving it. For example using following function:

void
copy_rgba_buffer_to_cimg(const std::vector<rgba_pixel>& rgba_buf, cimg_library::CImg<unsigned char>& img) {
    const unsigned int img_height = static_cast<unsigned int>(img.height());
    const unsigned int img_width = static_cast<unsigned int>(img.width());
    const unsigned int number_of_channels = static_cast<unsigned int>(img.spectrum());

    const bool has_r_channel = number_of_channels > r_channel_idx;
    const bool has_g_channel = number_of_channels > g_channel_idx;
    const bool has_b_channel = number_of_channels > b_channel_idx;
    const bool has_a_channel = number_of_channels > a_channel_idx;

    for (unsigned int y = 0; y < img_height; ++y) {
        for (unsigned int x = 0; x < img_width; ++x) {
            const std::size_t pixel_idx = static_cast<std::size_t>(img_width) * y + x;
            if (has_r_channel) *img.data(x, y, 0, r_channel_idx) = rgba_buf[pixel_idx].r;
            if (has_g_channel) *img.data(x, y, 0, g_channel_idx) = rgba_buf[pixel_idx].g;
            if (has_b_channel) *img.data(x, y, 0, b_channel_idx) = rgba_buf[pixel_idx].b;
            if (has_a_channel) *img.data(x, y, 0, a_channel_idx) = rgba_buf[pixel_idx].a;
        }
    }
}

And the code to copy the image from the device will look like:

    ret = clEnqueueReadImage(command_queue, output_img, CL_TRUE, origins, region, 0, 0, rgba_buf.data(), 0, NULL, NULL);
    copy_rgba_buffer_to_cimg(rgba_buf, img_out);
    img_out.save("./output_img.png");

Next, you create the command-queue with default properties. It means that the commands enqueued to the command-queue will be executed in order. Also, you use blocking read and write (blocking_read and blocking_write flags are set to CL_TRUE for clEnqueueReadImage and clEnqueueWriteImage function calls). In this case the code can work without using OpenCL events to synchronize the execution of the commands. It is just necessary to enqueue the commands in the correct order and use blocking read command to get the result:

    size_t origins[3] = { 0, 0, 0 };
    size_t region[3] = { (size_t)img_in.width(), (size_t)img_in.height(), (size_t)1 };
    auto rgba_buf = convert_cimg_to_rgba_buffer(img_in);

    ret = clEnqueueWriteImage(command_queue, input_img, CL_FALSE, origins, region, 0, 0, rgba_buf.data(), 0, NULL, NULL);

    size_t global[2] = { (size_t)img_in.width(), (size_t)img_in.height() };
    clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global, NULL, 0, NULL, NULL);

    ret = clEnqueueReadImage(command_queue, output_img, CL_TRUE, origins, region, 0, 0, rgba_buf.data(), 0, NULL, NULL);
    copy_rgba_buffer_to_cimg(rgba_buf, img_out);
    img_out.save("./output_img.png");

Finally, new y position for the pixel should be calculated as get_image_height() - (gid_y + 1) because gid_y is in interval [0, get_image_height()). So the kernel code should look like:

    write_imageui(O, (int2)(gid_x, h - gid_y - 1), p);

Minor note, if you directly copy the image to the device using clEnqueueWriteImage you can omit CL_MEM_USE_HOST_PTR flag for clCreateImage call.

回复收藏 0 原文

~没有更多了~