无法在Opencl中从Spir-File创建内核

发布于 2025-01-30 05:24:38 字数 5608 浏览 2 评论 0原文

我正在尝试使用clang cl-std = clc ++ 2021编译内核。内核的clang汇编运行良好：

clang -target = spirv32 -xclang -no -opaque -pointers -cl -std = clc ++ 2021 erk.cpp -o erk.spv

我的内核代码实现显式runge-kutta 4 Integrator方案：


void addVec(float* a, const float*b, const int Nx)
{
    for (int i = 0; i < Nx; i++)
    {
        a[i] += b[i];
    }
}

template <int Nstage_ERK, class f_ODE>
struct impl_ERK {

    constexpr static int NX = f_ODE::NX;
    impl_ERK(const float* _A, const float* _b, const float* _c): p_A(_A), p_b(_b), p_c(_c){}
    void solve(constant float* xk, float* xk_1, float t, float dt, const float* P) 
    {
        float K[Nstage_ERK * NX];
        float xk_stage[NX];
        for (int j = 0; j < NX; j++)
        {
            xk_stage[j] = xk[j];
        }
        f_ODE::solve(t, xk_stage, K, P);

        for (int i = 1; i < Nstage_ERK; i++) {
            float* k_i = &K[i * NX];
            float c_i = p_c[i];

            float ak_sum[NX];
            for (int j = 0; j < NX; j++) {
                ak_sum[j] = xk[j];
            }
            for (int j = 0; j < i; j++) {
                ak_sum[j] += p_A[i * Nstage_ERK + j] * K[j * NX];
                xk_stage[j] = xk[j] + dt*ak_sum[j];
            }
            

            f_ODE::solve(t + c_i * dt, xk_stage, k_i,  P);

            for (int j = 0; j < NX; j++) {
                xk_1[j] += dt * p_b[i] * k_i[j];
            }
        }
    }
    private:
    const float* p_A; 
    const float* p_b;
    const float* p_c;
};


template <class f_ODE>
struct ERK4 : public impl_ERK<4,f_ODE> 
{
    ERK4(const float* _A, const float* _b, const float* _c) : impl_ERK<4,f_ODE>(_A, _b, _c){}
};


struct f_ODE_1
{
    constexpr static int NX = 3;
    constexpr static int NP = 3;
    static void solve(const float t, const float* xk, float* xdot, const float* P)
    {
        float alpha = P[0];
        float beta = P[1];
        float N_pop = P[2];

        xdot[0] = -beta*xk[0]*xk[1]/N_pop;
        xdot[1] = beta*xk[0]*xk[1]/N_pop - alpha*xk[1];
        xdot[2] = alpha*xk[1];
    }
};

// Example that uses find_min in a kernel with array of int4.
__kernel void compute(constant float* x0, global float* x1) 
{
    const float A[4*4] = {.0f,.0f,.0f,.0f,
    .5f,.0f,.0f,.0f,
    .0f,.5f,.0f,.0f,
    .0f,.0f,1.f,.0f};
    const float b[4] = {1.f/6, 1.f/3, 1.f/3, 1.f/6};
    const float c[4] = {.0f,.5f,.5f,1.f};
    const float dt = .5f;
    const float t = .0f;
    const float R0 = 1.2;
    const float alpha = .9;
    const float beta = R0*alpha;
    const float N_pop = 1e6;
    const float P[f_ODE_1::NP] = {alpha, beta, N_pop};
    float res[3];
    ERK4<f_ODE_1> integrator(A, b, c);
    integrator.solve(x0, res, t, dt, P);
}

使用cleateprogramwithil（。）和构建它的程序效果很好，但是我无法使用clcreatekernelsinprogram（。）创建任何内核（。）

    clInstance.program = clCreateProgramWithIL(clInstance.context, (const void*) programBinary.data(), sizeof(char)*programBinary.length(), &err);

    assert(err == CL_SUCCESS);

    std::string build_options = "-I " + cl_generator_dir + " -I " + ERK_Kernel_dir;
    /*Step 6: Build program. */
    int status = clBuildProgram(clInstance.program, 1, clInstance.device_ids.data(), build_options.c_str(), NULL, NULL);
    if (status == CL_BUILD_PROGRAM_FAILURE)
    {
        // Determine the size of the log
        size_t log_size;
        clGetProgramBuildInfo(clInstance.program, clInstance.device_ids[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

        // Allocate memory for the log
        char *log = (char *)malloc(log_size);

        // Get the log
        clGetProgramBuildInfo(clInstance.program, clInstance.device_ids[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);

        // Print the log
        printf("%s\n", log);
    }



    float x0[3] = {1.0,2.0,3.0};
    float x_res[3] = {-10,-10,-10};

    size_t inputBufferSize = sizeof(float)*3;
    size_t outputBufferSize = sizeof(float)*3;

    cl_mem inputBuffer = clCreateBuffer(clInstance.context, CL_MEM_READ_ONLY, inputBufferSize, NULL, &err);
    assert(err == CL_SUCCESS);

    cl_mem outputBuffer = clCreateBuffer(clInstance.context, CL_MEM_WRITE_ONLY, outputBufferSize, NULL, &err);
    assert(err == CL_SUCCESS);

    err = clEnqueueWriteBuffer(clInstance.commandQueue, inputBuffer, CL_TRUE, 0, inputBufferSize, x0, 0, NULL, NULL);

    assert(err == CL_SUCCESS);

    // cl_kernel kernel = clCreateKernel(clInstance.program, "_ZNU3AS48impl_ERKILi4E7f_ODE_1E5solveEPU3AS2fPU3AS4fffPU3AS4Kf", &err);
    // assert(err == CL_SUCCESS);
    cl_kernel kernel;
    cl_uint num_kernels_ret = 0;
    err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


    err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

为什么我的内核函数__内核void Compute（。） opencl识别？

编辑： cl_kernel内核代替

err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

用

= clcreatekernel（clinstance.program，“ compute”，＆amp; err）;

导致错误代码cl_invalid_kernel_name

khronos注册表提及 cl_khr_spir'螺旋用-clc-std = C ++ 2021从Clang编译时是否有必要？

原文

I'm trying to compile a kernel written using Clang cl-std=clc++2021.
The clang compilation of the kernel runs fine:

clang --target=spirv32 -Xclang -no-opaque-pointers -cl-std=clc++2021 ERK.cpp -o ERK.spv

My kernel code implements an explicit Runge-Kutta 4 integrator scheme:


void addVec(float* a, const float*b, const int Nx)
{
    for (int i = 0; i < Nx; i++)
    {
        a[i] += b[i];
    }
}

template <int Nstage_ERK, class f_ODE>
struct impl_ERK {

    constexpr static int NX = f_ODE::NX;
    impl_ERK(const float* _A, const float* _b, const float* _c): p_A(_A), p_b(_b), p_c(_c){}
    void solve(constant float* xk, float* xk_1, float t, float dt, const float* P) 
    {
        float K[Nstage_ERK * NX];
        float xk_stage[NX];
        for (int j = 0; j < NX; j++)
        {
            xk_stage[j] = xk[j];
        }
        f_ODE::solve(t, xk_stage, K, P);

        for (int i = 1; i < Nstage_ERK; i++) {
            float* k_i = &K[i * NX];
            float c_i = p_c[i];

            float ak_sum[NX];
            for (int j = 0; j < NX; j++) {
                ak_sum[j] = xk[j];
            }
            for (int j = 0; j < i; j++) {
                ak_sum[j] += p_A[i * Nstage_ERK + j] * K[j * NX];
                xk_stage[j] = xk[j] + dt*ak_sum[j];
            }
            

            f_ODE::solve(t + c_i * dt, xk_stage, k_i,  P);

            for (int j = 0; j < NX; j++) {
                xk_1[j] += dt * p_b[i] * k_i[j];
            }
        }
    }
    private:
    const float* p_A; 
    const float* p_b;
    const float* p_c;
};


template <class f_ODE>
struct ERK4 : public impl_ERK<4,f_ODE> 
{
    ERK4(const float* _A, const float* _b, const float* _c) : impl_ERK<4,f_ODE>(_A, _b, _c){}
};


struct f_ODE_1
{
    constexpr static int NX = 3;
    constexpr static int NP = 3;
    static void solve(const float t, const float* xk, float* xdot, const float* P)
    {
        float alpha = P[0];
        float beta = P[1];
        float N_pop = P[2];

        xdot[0] = -beta*xk[0]*xk[1]/N_pop;
        xdot[1] = beta*xk[0]*xk[1]/N_pop - alpha*xk[1];
        xdot[2] = alpha*xk[1];
    }
};

// Example that uses find_min in a kernel with array of int4.
__kernel void compute(constant float* x0, global float* x1) 
{
    const float A[4*4] = {.0f,.0f,.0f,.0f,
    .5f,.0f,.0f,.0f,
    .0f,.5f,.0f,.0f,
    .0f,.0f,1.f,.0f};
    const float b[4] = {1.f/6, 1.f/3, 1.f/3, 1.f/6};
    const float c[4] = {.0f,.5f,.5f,1.f};
    const float dt = .5f;
    const float t = .0f;
    const float R0 = 1.2;
    const float alpha = .9;
    const float beta = R0*alpha;
    const float N_pop = 1e6;
    const float P[f_ODE_1::NP] = {alpha, beta, N_pop};
    float res[3];
    ERK4<f_ODE_1> integrator(A, b, c);
    integrator.solve(x0, res, t, dt, P);
}

Creating a program with clCreateProgramWithIL(.) and building it works fine, but I'm however not able to create any kernels using clCreateKernelsInProgram(.)

    clInstance.program = clCreateProgramWithIL(clInstance.context, (const void*) programBinary.data(), sizeof(char)*programBinary.length(), &err);

    assert(err == CL_SUCCESS);

    std::string build_options = "-I " + cl_generator_dir + " -I " + ERK_Kernel_dir;
    /*Step 6: Build program. */
    int status = clBuildProgram(clInstance.program, 1, clInstance.device_ids.data(), build_options.c_str(), NULL, NULL);
    if (status == CL_BUILD_PROGRAM_FAILURE)
    {
        // Determine the size of the log
        size_t log_size;
        clGetProgramBuildInfo(clInstance.program, clInstance.device_ids[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

        // Allocate memory for the log
        char *log = (char *)malloc(log_size);

        // Get the log
        clGetProgramBuildInfo(clInstance.program, clInstance.device_ids[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);

        // Print the log
        printf("%s\n", log);
    }



    float x0[3] = {1.0,2.0,3.0};
    float x_res[3] = {-10,-10,-10};

    size_t inputBufferSize = sizeof(float)*3;
    size_t outputBufferSize = sizeof(float)*3;

    cl_mem inputBuffer = clCreateBuffer(clInstance.context, CL_MEM_READ_ONLY, inputBufferSize, NULL, &err);
    assert(err == CL_SUCCESS);

    cl_mem outputBuffer = clCreateBuffer(clInstance.context, CL_MEM_WRITE_ONLY, outputBufferSize, NULL, &err);
    assert(err == CL_SUCCESS);

    err = clEnqueueWriteBuffer(clInstance.commandQueue, inputBuffer, CL_TRUE, 0, inputBufferSize, x0, 0, NULL, NULL);

    assert(err == CL_SUCCESS);

    // cl_kernel kernel = clCreateKernel(clInstance.program, "_ZNU3AS48impl_ERKILi4E7f_ODE_1E5solveEPU3AS2fPU3AS4fffPU3AS4Kf", &err);
    // assert(err == CL_SUCCESS);
    cl_kernel kernel;
    cl_uint num_kernels_ret = 0;
    err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


    err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

Why isn't my kernel function __kernel void compute(.) recognized by openCL?

Edit:
Replacing

err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

with

cl_kernel kernel = clCreateKernel(clInstance.program, "compute", &err);

results in error code CL_INVALID_KERNEL_NAME

The Khronos registry mentions cl_khr_spir which is used to add support for creating OpenCL program objects from SPIRV. Is this necessary when compiling from clang with -clc-std=c++2021?

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

九厘米的零° 2025-02-06 05:24:38

事实证明，该问题与内核的创建无关，而与早期终止文件阅读有关的问题无关。

OpenCl没有内核创建的问题

err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

使用erk.spv加载正确的

cl_kernel kernel = clCreateKernel(clInstance.program, "compute", &err);

The problem turned out to be unrelated to the creation of the kernel, and rather related to issues with an early termination of file reading.

With ERK.spv loaded properly OpenCL has no issues with kernel creation, using both

err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

and

cl_kernel kernel = clCreateKernel(clInstance.program, "compute", &err);

回复收藏 0 原文

~没有更多了~

关于作者

简美

暂无简介

文章

26 人气

关注发私信

友情链接

文江博客

无法在Opencl中从Spir-File创建内核

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

评论（1）

关于作者

相关话题

热门标签

推荐作者

Mr.HU

疯到世界奔溃

隔纱相望

萌无敌

梦幻的味道

自在安然

友情链接

无法在Opencl中从Spir-File创建内核

如果你对这篇内容有疑问，欢迎到本站社区发帖提问 参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

评论（1）

关于作者

相关话题

热门标签

推荐作者

Mr.HU

疯到世界奔溃

隔纱相望

萌无敌

梦幻的味道

自在安然

友情链接

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。