CUDA 代码不起作用，为什么？

发布于 2025-01-08 04:43:48 字数 1909 浏览 3 评论 0原文

我有这段代码，但有时它有效，有时不行（写 printf("ERR:%d\n", id))。我在 CUDA 4.1 下工作，并拥有计算能力 2.1 的 GTS450。

该代码没有更高的目的，我只是想找出为什么它不起作用，因为我的想法告诉我，没错:]

如果你想运行它，也许你需要执行几次，当“错误”出现或更改网格时尺寸！

class MAN
{
public:
    int m_id;
    int m_use;

    __device__
    MAN()
    {
        m_id = -1;
        m_use = 0;
    }
};

__device__ int* d_ids = NULL;
__device__ int d_last_ids = 0;

__device__ MAN* d_mans = NULL;


__global__ void init()
{
    d_mans = new MAN[500];  //note: 500 is more than enough!
    d_ids = new int[500];

    for(int i=0; i < 500; i++)
        d_ids[i] = 0;
}


__device__ int getMAN() //every block get unique number, so at one moment all running blocks has different id
{
    while(true)
    {
        for(int i=0; i < 500; i++)
            if(atomicCAS(&(d_mans[i].m_use), 0, 1)==0)
                return i;
    }
}
__device__ void returnMAN(int id)
{
    int s = atomicExch(&(d_mans[id].m_use), 0);
}



__global__ void testIt()
{
    if(threadIdx.x==0)
    {
        int man = getMAN();

        int id = d_mans[man].m_id;
        if(id == -1)    //If It never works with this "id", its creating new
        {
            id = atomicAdd(&d_last_ids, 2);

            d_ids[id] = 10; //set to non-zero
            d_mans[man].m_id = id;  //save new id for next time

            printf("ADD:%d\n", id);
        }

        if(d_ids[id]==0)
            printf("ERR:%d\n", id); //THIS SHOULD NEVER HAPPEN, BUT BECOMES !!!

        returnMAN(man);
    }
}



int main()
{
    init<<<1, 1>>>();
    printf("init() err: %d\n", cudaDeviceSynchronize());

    testIt<<<20000, 512>>>();
    printf("testIt() err: %d\n", cudaDeviceSynchronize());

    getchar();
    return 0;
}

原文

I have this code, but sometimes It works, sometimes NOT(write printf("ERR:%d\n", id)).
I work under CUDA 4.1 and have GTS450 which is compute capability 2.1.

The code doesnt have higher purpose, Iam just trying to find why Its not working, because My mind tell me, that right :]

If you want to run it, maybe you need to execute few times, when "error" appear or change grid size!

PS: here you can download exe file for win64 - you need to have cuda4.1 driver

class MAN
{
public:
    int m_id;
    int m_use;

    __device__
    MAN()
    {
        m_id = -1;
        m_use = 0;
    }
};

__device__ int* d_ids = NULL;
__device__ int d_last_ids = 0;

__device__ MAN* d_mans = NULL;


__global__ void init()
{
    d_mans = new MAN[500];  //note: 500 is more than enough!
    d_ids = new int[500];

    for(int i=0; i < 500; i++)
        d_ids[i] = 0;
}


__device__ int getMAN() //every block get unique number, so at one moment all running blocks has different id
{
    while(true)
    {
        for(int i=0; i < 500; i++)
            if(atomicCAS(&(d_mans[i].m_use), 0, 1)==0)
                return i;
    }
}
__device__ void returnMAN(int id)
{
    int s = atomicExch(&(d_mans[id].m_use), 0);
}



__global__ void testIt()
{
    if(threadIdx.x==0)
    {
        int man = getMAN();

        int id = d_mans[man].m_id;
        if(id == -1)    //If It never works with this "id", its creating new
        {
            id = atomicAdd(&d_last_ids, 2);

            d_ids[id] = 10; //set to non-zero
            d_mans[man].m_id = id;  //save new id for next time

            printf("ADD:%d\n", id);
        }

        if(d_ids[id]==0)
            printf("ERR:%d\n", id); //THIS SHOULD NEVER HAPPEN, BUT BECOMES !!!

        returnMAN(man);
    }
}



int main()
{
    init<<<1, 1>>>();
    printf("init() err: %d\n", cudaDeviceSynchronize());

    testIt<<<20000, 512>>>();
    printf("testIt() err: %d\n", cudaDeviceSynchronize());

    getchar();
    return 0;
}

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

帅哥哥的热头脑 2025-01-15 04:43:48

这似乎发生了，因为
此代码

    int id = d_mans[man].m_id;
    if(id == -1)    //If It never works with this "id", its creating new
    {
        id = atomicAdd(&d_last_ids, 2);

        d_ids[id] = 10; //set to non-zero
        d_mans[man].m_id = id;  //save new id for next time

        printf("ADD:%d\n", id);
    }

    if(d_ids[id]==0)
        printf("ERR:%d\n", id); //THIS SHOULD NEVER HAPPEN, BUT BECOMES !!!

如果某个块写入 d_mans[man].m_id，但仍未写入 d_ids[id]，则包含竞争条件。编译器可能会交换指令“设置为非零”和“保存新的 id 以供下次使用”，或者缓存只是没有及时更新。

实际上，问题出在你的分配器上——记住最后使用的“man”的索引比寻找它更好。

This seems to happen, because
this code

    int id = d_mans[man].m_id;
    if(id == -1)    //If It never works with this "id", its creating new
    {
        id = atomicAdd(&d_last_ids, 2);

        d_ids[id] = 10; //set to non-zero
        d_mans[man].m_id = id;  //save new id for next time

        printf("ADD:%d\n", id);
    }

    if(d_ids[id]==0)
        printf("ERR:%d\n", id); //THIS SHOULD NEVER HAPPEN, BUT BECOMES !!!

Contains race condition if some block wrote to d_mans[man].m_id, but still haven't wrote to d_ids[id]. Probably compiler exchanges instruction "set to non-zero" and "save new id for next time" or cache just don't get updated in-time.

Actually, problem is with your allocator -- it is better to remember index of last used 'man' than look for it.

回复收藏 0 原文