CUDA - 将设备数据复制到主机？

发布于 2025-01-02 06:14:47 字数 1216 浏览 6 评论 0原文

我有设备变量，在这个变量中，我在设备中分配并填充一个数组，但在将数据获取到主机时遇到问题。 cudaMemcpy() 返回 cudaErrorInvalidValue 错误。我该怎么办？

PS：代码只是示例，我知道，在这种特殊情况下，我可以使用 cudaMalloc 因为我知道数组的大小，但在我的真实代码中，它计算数组的大小设备，它需要立即分配内存。

PS2：我发现了类似的问题，但我还是不知道，如何解决？ - 从设备复制设备中分配的数据主机

PS3：我已经更新了代码，但仍然无法工作：{

PS4：我只是尝试在具有 Nvidia GT 520MX（最新游戏驱动程序）的笔记本上运行此代码，但也无法工作：（

谢谢

#include <cuda.h>
#include <stdio.h>

#define N 400
__device__ int* d_array;

__global__ void allocDeviceMemory()
{
    d_array = new int[N];
    for(int i=0; i < N; i++)
         d_array[i] = 123;
}

int main()
{
    allocDeviceMemory<<<1, 1>>>();

    cudaDeviceSynchronize();

    int* d_a = NULL;
    cudaMemcpyFromSymbol((void**)&d_a, "d_array", sizeof(d_a), 0, cudaMemcpyDeviceToHost);
    printf("gpu adress: %lld\n", d_a);


    int* h_array = (int*)malloc(N*sizeof(int));
    cudaError_t errr = cudaMemcpy(h_array, d_a, N*sizeof(int), cudaMemcpyDeviceToHost);
    printf("h_array: %d, %d\n", h_array[0], errr);

    getchar();
    return 0;
}

原文

I have device variable and in this variable, I allocate and fill an array in the device, but I have a problem to get data to host. cudaMemcpy() return cudaErrorInvalidValue error. how can I do it?

PS: The Code is just example, I know, that In this particular case I can use cudaMalloc because I know the size of the array, but In my REAL code, It computes the size of the array in the device and it needs immediately allocate memory.

PS2: I found a similar problem, but I still don't know, how can I solve it? - copy data which is allocated in device from device to host

PS3: I have updated code, but still doesn't work:{

PS4: I am just trying to run this code on a notebook with Nvidia GT 520MX(latest game driver) and doesn't work too :(

thx

#include <cuda.h>
#include <stdio.h>

#define N 400
__device__ int* d_array;

__global__ void allocDeviceMemory()
{
    d_array = new int[N];
    for(int i=0; i < N; i++)
         d_array[i] = 123;
}

int main()
{
    allocDeviceMemory<<<1, 1>>>();

    cudaDeviceSynchronize();

    int* d_a = NULL;
    cudaMemcpyFromSymbol((void**)&d_a, "d_array", sizeof(d_a), 0, cudaMemcpyDeviceToHost);
    printf("gpu adress: %lld\n", d_a);


    int* h_array = (int*)malloc(N*sizeof(int));
    cudaError_t errr = cudaMemcpy(h_array, d_a, N*sizeof(int), cudaMemcpyDeviceToHost);
    printf("h_array: %d, %d\n", h_array[0], errr);

    getchar();
    return 0;
}

分享到QQ

分享到微博