通过引用传递 CUDA 随机生成器状态

发布于 2024-10-01 13:44:01 字数 1502 浏览 5 评论 0原文

在函数CalculateValue(curandState *localStat)和GetExponential(curandState *localState)中通过引用传递随机生成器状态（CUDA工具包3.2 curand.lib）时，以下代码是否正确？

谢谢

__device__ double GetExponential(curandState *localState) { 
    double u1 = curand_uniform_double(localState); } 


__device__  double CalculateValue(curandState *localStat)  { 
  double x = GetExponential(localState);  
  return x; } 


__global__ void RunMonteCarloKernel(curandState *state, double *results) { 
    int i = threadIdx.x + blockIdx.x * blockDim.x; 

    /* Copy state to local memory for efficiency */ 
    curandState localState = state[threadIdx.x + blockIdx.x * blockDim.x];    

    results[i] = CalculateValue(&localState); 

    /* Copy state back to global memory */ 
    state[threadIdx.x + blockIdx.x * blockDim.x] = localState; }

__global__ void setup_kernel(curandState *state) { 
    int i = threadIdx.x + blockIdx.x * blockDim.x; 

    /* Each thread gets different seed, a different sequence number, no offset */ 
    curand_init(i, i, 0, &state[i]); } 

int main(void) { 
    double *devResults; 
    curandState *devStates; 

    /* Allocate space for prng states on device */ 
    CUDA_CALL(cudaMalloc((void **)&devStates, totalThreads * sizeof(curandState))); 

    /* Setup prng states */ 
   setup_kernel<<<totalBlocks, threadsPerBlock>>>(devStates); 

    for(int i=0; i< 1000; i++) 
    { 
            RunMonteCarloKernel(devStates, devResults); 
    } }

原文

Is the following code correct when passing the random generator state(CUDA toolkit 3.2 curand.lib) by reference in function CalculateValue(curandState *localStat) and GetExponential(curandState *localState)?

Thanks

__device__ double GetExponential(curandState *localState) { 
    double u1 = curand_uniform_double(localState); } 


__device__  double CalculateValue(curandState *localStat)  { 
  double x = GetExponential(localState);  
  return x; } 


__global__ void RunMonteCarloKernel(curandState *state, double *results) { 
    int i = threadIdx.x + blockIdx.x * blockDim.x; 

    /* Copy state to local memory for efficiency */ 
    curandState localState = state[threadIdx.x + blockIdx.x * blockDim.x];    

    results[i] = CalculateValue(&localState); 

    /* Copy state back to global memory */ 
    state[threadIdx.x + blockIdx.x * blockDim.x] = localState; }

__global__ void setup_kernel(curandState *state) { 
    int i = threadIdx.x + blockIdx.x * blockDim.x; 

    /* Each thread gets different seed, a different sequence number, no offset */ 
    curand_init(i, i, 0, &state[i]); } 

int main(void) { 
    double *devResults; 
    curandState *devStates; 

    /* Allocate space for prng states on device */ 
    CUDA_CALL(cudaMalloc((void **)&devStates, totalThreads * sizeof(curandState))); 

    /* Setup prng states */ 
   setup_kernel<<<totalBlocks, threadsPerBlock>>>(devStates); 

    for(int i=0; i< 1000; i++) 
    { 
            RunMonteCarloKernel(devStates, devResults); 
    } }

分享到QQ

分享到微博