无法在Opencl中从Spir-File创建内核

发布于 2025-01-30 05:24:38 字数 5608 浏览 2 评论 0原文

我正在尝试使用clang cl-std = clc ++ 2021编译内核。 内核的clang汇编运行良好:

clang -target = spirv32 -xclang -no -opaque -pointers -cl -std = clc ++ 2021 erk.cpp -o erk.spv

我的内核代码实现显式runge-kutta 4 Integrator方案:


void addVec(float* a, const float*b, const int Nx)
{
    for (int i = 0; i < Nx; i++)
    {
        a[i] += b[i];
    }
}

template <int Nstage_ERK, class f_ODE>
struct impl_ERK {

    constexpr static int NX = f_ODE::NX;
    impl_ERK(const float* _A, const float* _b, const float* _c): p_A(_A), p_b(_b), p_c(_c){}
    void solve(constant float* xk, float* xk_1, float t, float dt, const float* P) 
    {
        float K[Nstage_ERK * NX];
        float xk_stage[NX];
        for (int j = 0; j < NX; j++)
        {
            xk_stage[j] = xk[j];
        }
        f_ODE::solve(t, xk_stage, K, P);

        for (int i = 1; i < Nstage_ERK; i++) {
            float* k_i = &K[i * NX];
            float c_i = p_c[i];

            float ak_sum[NX];
            for (int j = 0; j < NX; j++) {
                ak_sum[j] = xk[j];
            }
            for (int j = 0; j < i; j++) {
                ak_sum[j] += p_A[i * Nstage_ERK + j] * K[j * NX];
                xk_stage[j] = xk[j] + dt*ak_sum[j];
            }
            

            f_ODE::solve(t + c_i * dt, xk_stage, k_i,  P);

            for (int j = 0; j < NX; j++) {
                xk_1[j] += dt * p_b[i] * k_i[j];
            }
        }
    }
    private:
    const float* p_A; 
    const float* p_b;
    const float* p_c;
};


template <class f_ODE>
struct ERK4 : public impl_ERK<4,f_ODE> 
{
    ERK4(const float* _A, const float* _b, const float* _c) : impl_ERK<4,f_ODE>(_A, _b, _c){}
};


struct f_ODE_1
{
    constexpr static int NX = 3;
    constexpr static int NP = 3;
    static void solve(const float t, const float* xk, float* xdot, const float* P)
    {
        float alpha = P[0];
        float beta = P[1];
        float N_pop = P[2];

        xdot[0] = -beta*xk[0]*xk[1]/N_pop;
        xdot[1] = beta*xk[0]*xk[1]/N_pop - alpha*xk[1];
        xdot[2] = alpha*xk[1];
    }
};

// Example that uses find_min in a kernel with array of int4.
__kernel void compute(constant float* x0, global float* x1) 
{
    const float A[4*4] = {.0f,.0f,.0f,.0f,
    .5f,.0f,.0f,.0f,
    .0f,.5f,.0f,.0f,
    .0f,.0f,1.f,.0f};
    const float b[4] = {1.f/6, 1.f/3, 1.f/3, 1.f/6};
    const float c[4] = {.0f,.5f,.5f,1.f};
    const float dt = .5f;
    const float t = .0f;
    const float R0 = 1.2;
    const float alpha = .9;
    const float beta = R0*alpha;
    const float N_pop = 1e6;
    const float P[f_ODE_1::NP] = {alpha, beta, N_pop};
    float res[3];
    ERK4<f_ODE_1> integrator(A, b, c);
    integrator.solve(x0, res, t, dt, P);
}

使用cleateprogramwithil(。)和构建它的程序效果很好,但是我无法使用clcreatekernelsinprogram(。)创建任何内核(。)

    clInstance.program = clCreateProgramWithIL(clInstance.context, (const void*) programBinary.data(), sizeof(char)*programBinary.length(), &err);

    assert(err == CL_SUCCESS);

    std::string build_options = "-I " + cl_generator_dir + " -I " + ERK_Kernel_dir;
    /*Step 6: Build program. */
    int status = clBuildProgram(clInstance.program, 1, clInstance.device_ids.data(), build_options.c_str(), NULL, NULL);
    if (status == CL_BUILD_PROGRAM_FAILURE)
    {
        // Determine the size of the log
        size_t log_size;
        clGetProgramBuildInfo(clInstance.program, clInstance.device_ids[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

        // Allocate memory for the log
        char *log = (char *)malloc(log_size);

        // Get the log
        clGetProgramBuildInfo(clInstance.program, clInstance.device_ids[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);

        // Print the log
        printf("%s\n", log);
    }



    float x0[3] = {1.0,2.0,3.0};
    float x_res[3] = {-10,-10,-10};

    size_t inputBufferSize = sizeof(float)*3;
    size_t outputBufferSize = sizeof(float)*3;

    cl_mem inputBuffer = clCreateBuffer(clInstance.context, CL_MEM_READ_ONLY, inputBufferSize, NULL, &err);
    assert(err == CL_SUCCESS);

    cl_mem outputBuffer = clCreateBuffer(clInstance.context, CL_MEM_WRITE_ONLY, outputBufferSize, NULL, &err);
    assert(err == CL_SUCCESS);

    err = clEnqueueWriteBuffer(clInstance.commandQueue, inputBuffer, CL_TRUE, 0, inputBufferSize, x0, 0, NULL, NULL);

    assert(err == CL_SUCCESS);

    // cl_kernel kernel = clCreateKernel(clInstance.program, "_ZNU3AS48impl_ERKILi4E7f_ODE_1E5solveEPU3AS2fPU3AS4fffPU3AS4Kf", &err);
    // assert(err == CL_SUCCESS);
    cl_kernel kernel;
    cl_uint num_kernels_ret = 0;
    err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


    err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

为什么我的内核函数__内核void Compute(。) opencl识别?

编辑: cl_kernel内核代替

err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

= clcreatekernel(clinstance.program,“ compute”,&amp; err);

导致错误代码cl_invalid_kernel_name

khronos注册表提及 cl_khr_spir'螺旋用-clc-std = C ++ 2021从Clang编译时是否有必要?

I'm trying to compile a kernel written using Clang cl-std=clc++2021.
The clang compilation of the kernel runs fine:

clang --target=spirv32 -Xclang -no-opaque-pointers -cl-std=clc++2021 ERK.cpp -o ERK.spv

My kernel code implements an explicit Runge-Kutta 4 integrator scheme:


void addVec(float* a, const float*b, const int Nx)
{
    for (int i = 0; i < Nx; i++)
    {
        a[i] += b[i];
    }
}

template <int Nstage_ERK, class f_ODE>
struct impl_ERK {

    constexpr static int NX = f_ODE::NX;
    impl_ERK(const float* _A, const float* _b, const float* _c): p_A(_A), p_b(_b), p_c(_c){}
    void solve(constant float* xk, float* xk_1, float t, float dt, const float* P) 
    {
        float K[Nstage_ERK * NX];
        float xk_stage[NX];
        for (int j = 0; j < NX; j++)
        {
            xk_stage[j] = xk[j];
        }
        f_ODE::solve(t, xk_stage, K, P);

        for (int i = 1; i < Nstage_ERK; i++) {
            float* k_i = &K[i * NX];
            float c_i = p_c[i];

            float ak_sum[NX];
            for (int j = 0; j < NX; j++) {
                ak_sum[j] = xk[j];
            }
            for (int j = 0; j < i; j++) {
                ak_sum[j] += p_A[i * Nstage_ERK + j] * K[j * NX];
                xk_stage[j] = xk[j] + dt*ak_sum[j];
            }
            

            f_ODE::solve(t + c_i * dt, xk_stage, k_i,  P);

            for (int j = 0; j < NX; j++) {
                xk_1[j] += dt * p_b[i] * k_i[j];
            }
        }
    }
    private:
    const float* p_A; 
    const float* p_b;
    const float* p_c;
};


template <class f_ODE>
struct ERK4 : public impl_ERK<4,f_ODE> 
{
    ERK4(const float* _A, const float* _b, const float* _c) : impl_ERK<4,f_ODE>(_A, _b, _c){}
};


struct f_ODE_1
{
    constexpr static int NX = 3;
    constexpr static int NP = 3;
    static void solve(const float t, const float* xk, float* xdot, const float* P)
    {
        float alpha = P[0];
        float beta = P[1];
        float N_pop = P[2];

        xdot[0] = -beta*xk[0]*xk[1]/N_pop;
        xdot[1] = beta*xk[0]*xk[1]/N_pop - alpha*xk[1];
        xdot[2] = alpha*xk[1];
    }
};

// Example that uses find_min in a kernel with array of int4.
__kernel void compute(constant float* x0, global float* x1) 
{
    const float A[4*4] = {.0f,.0f,.0f,.0f,
    .5f,.0f,.0f,.0f,
    .0f,.5f,.0f,.0f,
    .0f,.0f,1.f,.0f};
    const float b[4] = {1.f/6, 1.f/3, 1.f/3, 1.f/6};
    const float c[4] = {.0f,.5f,.5f,1.f};
    const float dt = .5f;
    const float t = .0f;
    const float R0 = 1.2;
    const float alpha = .9;
    const float beta = R0*alpha;
    const float N_pop = 1e6;
    const float P[f_ODE_1::NP] = {alpha, beta, N_pop};
    float res[3];
    ERK4<f_ODE_1> integrator(A, b, c);
    integrator.solve(x0, res, t, dt, P);
}

Creating a program with clCreateProgramWithIL(.) and building it works fine, but I'm however not able to create any kernels using clCreateKernelsInProgram(.)

    clInstance.program = clCreateProgramWithIL(clInstance.context, (const void*) programBinary.data(), sizeof(char)*programBinary.length(), &err);

    assert(err == CL_SUCCESS);

    std::string build_options = "-I " + cl_generator_dir + " -I " + ERK_Kernel_dir;
    /*Step 6: Build program. */
    int status = clBuildProgram(clInstance.program, 1, clInstance.device_ids.data(), build_options.c_str(), NULL, NULL);
    if (status == CL_BUILD_PROGRAM_FAILURE)
    {
        // Determine the size of the log
        size_t log_size;
        clGetProgramBuildInfo(clInstance.program, clInstance.device_ids[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

        // Allocate memory for the log
        char *log = (char *)malloc(log_size);

        // Get the log
        clGetProgramBuildInfo(clInstance.program, clInstance.device_ids[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);

        // Print the log
        printf("%s\n", log);
    }



    float x0[3] = {1.0,2.0,3.0};
    float x_res[3] = {-10,-10,-10};

    size_t inputBufferSize = sizeof(float)*3;
    size_t outputBufferSize = sizeof(float)*3;

    cl_mem inputBuffer = clCreateBuffer(clInstance.context, CL_MEM_READ_ONLY, inputBufferSize, NULL, &err);
    assert(err == CL_SUCCESS);

    cl_mem outputBuffer = clCreateBuffer(clInstance.context, CL_MEM_WRITE_ONLY, outputBufferSize, NULL, &err);
    assert(err == CL_SUCCESS);

    err = clEnqueueWriteBuffer(clInstance.commandQueue, inputBuffer, CL_TRUE, 0, inputBufferSize, x0, 0, NULL, NULL);

    assert(err == CL_SUCCESS);

    // cl_kernel kernel = clCreateKernel(clInstance.program, "_ZNU3AS48impl_ERKILi4E7f_ODE_1E5solveEPU3AS2fPU3AS4fffPU3AS4Kf", &err);
    // assert(err == CL_SUCCESS);
    cl_kernel kernel;
    cl_uint num_kernels_ret = 0;
    err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


    err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

Why isn't my kernel function __kernel void compute(.) recognized by openCL?

Edit:
Replacing

err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

with

cl_kernel kernel = clCreateKernel(clInstance.program, "compute", &err);

results in error code CL_INVALID_KERNEL_NAME

The Khronos registry mentions cl_khr_spir which is used to add support for creating OpenCL program objects from SPIRV. Is this necessary when compiling from clang with -clc-std=c++2021?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

九厘米的零° 2025-02-06 05:24:38

事实证明,该问题与内核的创建无关,而与早期终止文件阅读有关的问题无关。

OpenCl没有内核创建的问题

err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

使用erk.spv加载正确的

cl_kernel kernel = clCreateKernel(clInstance.program, "compute", &err);

The problem turned out to be unrelated to the creation of the kernel, and rather related to issues with an early termination of file reading.

With ERK.spv loaded properly OpenCL has no issues with kernel creation, using both

err = clCreateKernelsInProgram(clInstance.program, 0, NULL, &num_kernels_ret);


err = clCreateKernelsInProgram(clInstance.program, num_kernels_ret, &kernel, NULL);

and

cl_kernel kernel = clCreateKernel(clInstance.program, "compute", &err);

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文