如何使用CMAKE 3.23和MSVC 2019使工作CUDA 11.6

发布于 2025-01-18 18:18:24 字数 14760 浏览 3 评论 0 原文

我找不到解决方案来管理如何使用标准 MSVC 2019 编译器在 Windows 上的 CMake 项目中使用语言 CUDA。

我正在尝试配置和编译 hello-cmake-cuda 存储库 (也在这个中进行了描述博客文章)。

CMakeLists.txt 文件内容:

cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
project(hello LANGUAGES CXX CUDA)
enable_language(CUDA)
add_executable(hello hello.cu)

这是从构建目录中运行的 cmake .. 命令的输出:

PS C:\GitRepo\cuda_hello\build> cmake ..
-- Selecting Windows SDK version 10.0.18362.0 to target Windows 10.0.22000.
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/CMakeDetermineCUDACompiler.cmake:311 (message):
  CMAKE_CUDA_ARCHITECTURES must be valid if set.
Call Stack (most recent call first):
  CMakeLists.txt:5 (project)


-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeError.log".

这意味着 architectures_tested 来自CMakeDetermineCUDACompiler.cmake:311 为空...

我怎样才能让 CMake 完成其配置和构建简单的程序?

我的开发环境

  • 操作系统:Windows 11版本10.0.22000 Build 22000
  • 编译器:Microsoft Visual Studio Community 2019版本16.11.11
  • CMake版本是3.23
  • CUDA版本是11.6

我尝试了每个软件的不同版本,但一直遇到相同的问题。我目前决定保留这些版本。

我的 GPU 已正确配置:它显示为 nvidia-smi,并且我还能够构建并运行 deviceQuery CUDA 示例:

CUDA Device Query (Runtime API) version (CUDART static linking)

Detected 1 CUDA Capable device(s)

Device 0: "NVIDIA GeForce GTX 1650"
  CUDA Driver Version / Runtime Version          11.6 / 11.6
  CUDA Capability Major/Minor version number:    7.5
  etc. etc. ...

deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 11.6, CUDA Runtime Version = 11.6, NumDevs = 1
Result = PASS

我的环境 PATH 变量:

PS C:\GitRepo\hello-cuda-cmake-master> $env:path -split ";"
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\bin
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\libnvvp
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\bin
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\libnvvp

C:\Program Files (x86)\Common Files\Oracle\Java\javapath
C:\Python38\Scripts\
C:\Python38\
C:\Windows\system32
C:\Windows
C:\Windows\System32\Wbem
C:\Windows\System32\WindowsPowerShell\v1.0\
C:\Windows\System32\OpenSSH\
C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common
C:\Program Files\NVIDIA Corporation\NVIDIA NvDLISR
C:\Program Files\PuTTY\
C:\Program Files (x86)\PuTTY\
C:\Program Files\Microsoft SQL Server\110\Tools\Binn\
C:\Program Files\TortoiseSVN\bin
C:\Program Files\TortoiseGit\bin
C:\Program Files\Microsoft VS Code\bin
C:\WINDOWS\system32
C:\WINDOWS
C:\WINDOWS\System32\Wbem
C:\WINDOWS\System32\WindowsPowerShell\v1.0\
C:\WINDOWS\System32\OpenSSH\
C:\Program Files\Docker\Docker\resources\bin
C:\ProgramData\DockerDesktop\version-bin
C:\Program Files\Git\cmd
C:\WINDOWS\system32
C:\WINDOWS
C:\WINDOWS\System32\Wbem
C:\WINDOWS\System32\WindowsPowerShell\v1.0\
C:\WINDOWS\System32\OpenSSH\
C:\Program Files\NVIDIA Corporation\Nsight Compute 2022.1.1\
C:\Program Files\CMake\bin
C:\Ruby30-x64\bin
C:\Users\Thibault GEFFROY\.cargo\bin
C:\Users\Thibault GEFFROY\AppData\Local\Microsoft\WindowsApps
C:\Program Files\OpenCppCoverage
C:\intelFPGA\20.1\modelsim_ase\win32aloem

我的内容已经尝试过但没有成功

如果我尝试插入所需的 CMAKE_CUDA_ARCHITECTURES

set(CMAKE_CUDA_ARCHITECTURES 75)

我得到:

PS C:\GitRepo\cuda_hello\build> cmake ..
-- Selecting Windows SDK version 10.0.18362.0 to target Windows 10.0.22000.
-- The CUDA compiler identification is unknown
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/CMakeDetermineCUDACompiler.cmake:654 (message):
  The CMAKE_CUDA_ARCHITECTURES:

    75

  do not all work with this compiler.  Try:



  instead.
Call Stack (most recent call first):
  CMakeLists.txt:5 (project)


-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeError.log".

如果我尝试使用 FindCUDA 模块设置 CMAKE_CUDA_ARCHITECTURES - @alfC 此处给出的解决方案 - 我得到:

PS C:\GitRepo\cuda_hello\build> cmake ..
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/FindCUDA/select_compute_arch.cmake:120 (file):
  file failed to open for writing (Permission denied):

    /detect_cuda_compute_capabilities.cpp
Call Stack (most recent call first):
  CMakeLists.txt:4 (CUDA_DETECT_INSTALLED_GPUS)


CMake Error: The source directory "CMAKE_FLAGS" does not exist.
Specify --help for usage, or press the help button on the CMake GUI.
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/FindCUDA/select_compute_arch.cmake:141 (try_run):
  Failed to configure test project build system.
Call Stack (most recent call first):
  CMakeLists.txt:4 (CUDA_DETECT_INSTALLED_GPUS)


CMake Error: TRY_COMPILE attempt to remove -rf directory that does not contain CMakeTmp:/detect_cuda_compute_capabilities.cpp
-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeError.log".

最后,如果我尝试调用 find_package(CUDA),我得到:

PS C:\GitRepo\cuda_hello\build> cmake ..
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/FindCUDA.cmake:677 (cmake_initialize_per_config_variable):
  Unknown CMake command "cmake_initialize_per_config_variable".
Call Stack (most recent call first):
  CMakeLists.txt:2 (find_package)


-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeError.log".

编辑 1:

回答@einpoklum解决方案这个

感谢您的提议,但它也不起作用。

以下是您的存储库中的cmake -B build命令的输出a>:

PS C:\GitRepo\hello-cuda-cmake-master> cmake -B build
-- Building for: Visual Studio 16 2019
-- Selecting Windows SDK version 10.0.18362.0 to target Windows 10.0.22000.
-- The CUDA compiler identification is unknown
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/CMakeDetermineCUDACompiler.cmake:633 (message):
  Failed to detect a default CUDA architecture.



  Compiler output:

Call Stack (most recent call first):
  CMakeLists.txt:2 (project)


-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/hello-cuda-cmake-master/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/hello-cuda-cmake-master/build/CMakeFiles/CMakeError.log".

使用 PowerShell 或 MSVC 命令提示符的输出相同。


以下是使用 cmake-gui 时的 cmake 变量及其值:

Cmake Gui


当使用简单的 nvcc 构建命令时:来自 MSVC 命令提示符的 nvcc hello.cu 我得到:

nvcc fatal   : Could not set up the environment for Microsoft Visual Studio using 'c:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.29.30133/bin/HostX86/x86/../../../../../../../VC/Auxiliary/Build/vcvars64.bat'

PATH 是有效的,以及脚本 vcvars64.bat 存在于此位置。


如果我将 find_package(CUDAToolkit) 添加到 CMakeLists.txt 中,会发生什么

新的 CMakeLists.txt

cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
find_package(CUDAToolkit)
project(hello LANGUAGES CUDA)
add_executable(hello hello.cu)

输出:

PS C:\GitRepo\hello-cuda-cmake-master> cmake -B build
-- Building for: Visual Studio 16 2019
-- Found CUDAToolkit: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.6/include (found version "11.6.124")
-- Selecting Windows SDK version 10.0.18362.0 to target Windows 10.0.22000.
-- The CUDA compiler identification is unknown
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/CMakeDetermineCUDACompiler.cmake:633 (message):
  Failed to detect a default CUDA architecture.



  Compiler output:

Call Stack (most recent call first):
  CMakeLists.txt:3 (project)


-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/hello-cuda-cmake-master/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/hello-cuda-cmake-master/build/CMakeFiles/CMakeError.log".

编辑 2:

我尝试使用 MSVC 2019 解决方案编译 CUDA 示例 BlackScholes,无需 CMake 假如。

我最终遇到此错误:

Severity        Code        Description        Project        File        Line        Suppression State
Error        MSB3721        The command ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\bin\nvcc.exe" -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_37,code=\"sm_37,compute_37\" -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_52,code=\"sm_52,compute_52\" -gencode=arch=compute_60,code=\"sm_60,compute_60\" -gencode=arch=compute_61,code=\"sm_61,compute_61\" -gencode=arch=compute_70,code=\"sm_70,compute_70\" -gencode=arch=compute_75,code=\"sm_75,compute_75\" -gencode=arch=compute_80,code=\"sm_80,compute_80\" -gencode=arch=compute_86,code=\"sm_86,compute_86\" --use-local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\bin\HostX86\x64" -x cu   -I./ -I../../../Common -I./ -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\/include" -I../../../Common -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include"  -G   --keep-dir x64\Debug  -maxrregcount=0  --machine 64 --compile -cudart static -Xcompiler "/wd 4819"  --threads 0 -g  -DWIN32 -DWIN32 -D_MBCS -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Fdx64/Debug/vc142.pdb /FS /Zi /RTC1 /MTd " -o "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v11.6\cuda-samples\Samples\5_Domain_Specific\BlackScholes\x64\Debug\BlackScholes.cu.obj" "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v11.6\cuda-samples\Samples\5_Domain_Specific\BlackScholes\BlackScholes.cu"" exited with code 1.        BlackScholes        C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\MSBuild\Microsoft\VC\v160\BuildCustomizations\CUDA 11.6.targets        790

在使用 WSL 2 Ubuntu 20.4 和 以下 CUDA 安装 以及这些构建 BlackScholes 示例的说明 我得到了这个输出:

$ sudo make BlackScholes
/usr/local/cuda/bin/nvcc -ccbin g++ -I../../../Common  -m64    -maxrregcount=16 --threads 0 --std=c++11 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_86,code=compute_86 -o BlackScholes.o -c BlackScholes.cu
nvcc warning : The 'compute_35', 'compute_37', 'sm_35', and 'sm_37' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
ptxas warning : For profile sm_86 adjusting per thread register count of 16 to lower bound of 24
ptxas warning : For profile sm_80 adjusting per thread register count of 16 to lower bound of 24
ptxas warning : For profile sm_70 adjusting per thread register count of 16 to lower bound of 24
ptxas warning : For profile sm_75 adjusting per thread register count of 16 to lower bound of 24
/usr/local/cuda/bin/nvcc -ccbin g++ -I../../../Common  -m64    -maxrregcount=16 --threads 0 --std=c++11 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_86,code=compute_86 -o BlackScholes_gold.o -c BlackScholes_gold.cpp
nvcc warning : The 'compute_35', 'compute_37', 'sm_35', and 'sm_37' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
/usr/local/cuda/bin/nvcc -ccbin g++   -m64      -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_86,code=compute_86 -o BlackScholes BlackScholes.o BlackScholes_gold.o
nvcc warning : The 'compute_35', 'compute_37', 'sm_35', and 'sm_37' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
mkdir -p ../../../bin/x86_64/linux/release
cp BlackScholes ../../../bin/x86_64/linux/release


$ ./BlackScholes
[./BlackScholes] - Starting...
GPU Device 0: "Turing" with compute capability 7.5

Initializing data...
...allocating CPU memory for options.
...allocating GPU memory for options.
...generating input data in CPU mem.
...copying input data to GPU mem.
Data init done.

Executing Black-Scholes GPU kernel (512 iterations)...
Options count             : 8000000
BlackScholesGPU() time    : 0.722482 msec
Effective memory bandwidth: 110.729334 GB/s
Gigaoptions per second    : 11.072933

BlackScholes, Throughput = 11.0729 GOptions/s, Time = 0.00072 s, Size = 8000000 options, NumDevsUsed = 1, Workgroup = 128

Reading back GPU results...
Checking the results...
...running CPU calculations.

Comparing the results...
L1 norm: 1.741792E-07
Max absolute error: 1.192093E-05

Shutting down...
...releasing GPU memory.
...releasing CPU memory.
Shutdown done.

[BlackScholes] - Test Summary

NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.

Test passed

I cannot find a solution to manage how to use the langage CUDA in a CMake project on Windows with the standard MSVC 2019 compiler.

I am trying to configure and compile this hello-cmake-cuda repository (also described in this blog post).

CMakeLists.txt file contents:

cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
project(hello LANGUAGES CXX CUDA)
enable_language(CUDA)
add_executable(hello hello.cu)

Here is the output to the cmake .. command, run from within the build directory:

PS C:\GitRepo\cuda_hello\build> cmake ..
-- Selecting Windows SDK version 10.0.18362.0 to target Windows 10.0.22000.
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/CMakeDetermineCUDACompiler.cmake:311 (message):
  CMAKE_CUDA_ARCHITECTURES must be valid if set.
Call Stack (most recent call first):
  CMakeLists.txt:5 (project)


-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeError.log".

It means that architectures_tested from CMakeDetermineCUDACompiler.cmake:311 is empty...

How can I get CMake to complete its configuration and the simple program to build?

My development environment

  • Operating system: Windows 11 Version 10.0.22000 Build 22000
  • Compiler: Microsoft Visual Studio Community 2019 Version 16.11.11
  • CMake version is 3.23
  • CUDA version is 11.6

I have tried different versions of each soft and keep having the same issue. I have decided to stay with these versions at the moment.

My GPU is properly configured: It shows up with nvidia-smi, and I am also able to build and run the deviceQuery CUDA sample:

CUDA Device Query (Runtime API) version (CUDART static linking)

Detected 1 CUDA Capable device(s)

Device 0: "NVIDIA GeForce GTX 1650"
  CUDA Driver Version / Runtime Version          11.6 / 11.6
  CUDA Capability Major/Minor version number:    7.5
  etc. etc. ...

deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 11.6, CUDA Runtime Version = 11.6, NumDevs = 1
Result = PASS

My environment PATH variable:

PS C:\GitRepo\hello-cuda-cmake-master> $env:path -split ";"
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\bin
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\libnvvp
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\bin
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\libnvvp

C:\Program Files (x86)\Common Files\Oracle\Java\javapath
C:\Python38\Scripts\
C:\Python38\
C:\Windows\system32
C:\Windows
C:\Windows\System32\Wbem
C:\Windows\System32\WindowsPowerShell\v1.0\
C:\Windows\System32\OpenSSH\
C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common
C:\Program Files\NVIDIA Corporation\NVIDIA NvDLISR
C:\Program Files\PuTTY\
C:\Program Files (x86)\PuTTY\
C:\Program Files\Microsoft SQL Server\110\Tools\Binn\
C:\Program Files\TortoiseSVN\bin
C:\Program Files\TortoiseGit\bin
C:\Program Files\Microsoft VS Code\bin
C:\WINDOWS\system32
C:\WINDOWS
C:\WINDOWS\System32\Wbem
C:\WINDOWS\System32\WindowsPowerShell\v1.0\
C:\WINDOWS\System32\OpenSSH\
C:\Program Files\Docker\Docker\resources\bin
C:\ProgramData\DockerDesktop\version-bin
C:\Program Files\Git\cmd
C:\WINDOWS\system32
C:\WINDOWS
C:\WINDOWS\System32\Wbem
C:\WINDOWS\System32\WindowsPowerShell\v1.0\
C:\WINDOWS\System32\OpenSSH\
C:\Program Files\NVIDIA Corporation\Nsight Compute 2022.1.1\
C:\Program Files\CMake\bin
C:\Ruby30-x64\bin
C:\Users\Thibault GEFFROY\.cargo\bin
C:\Users\Thibault GEFFROY\AppData\Local\Microsoft\WindowsApps
C:\Program Files\OpenCppCoverage
C:\intelFPGA\20.1\modelsim_ase\win32aloem

What I've tried and hasn't worked

If I try to insert the wanted CMAKE_CUDA_ARCHITECTURES :

set(CMAKE_CUDA_ARCHITECTURES 75)

I get:

PS C:\GitRepo\cuda_hello\build> cmake ..
-- Selecting Windows SDK version 10.0.18362.0 to target Windows 10.0.22000.
-- The CUDA compiler identification is unknown
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/CMakeDetermineCUDACompiler.cmake:654 (message):
  The CMAKE_CUDA_ARCHITECTURES:

    75

  do not all work with this compiler.  Try:



  instead.
Call Stack (most recent call first):
  CMakeLists.txt:5 (project)


-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeError.log".

If I try to use the FindCUDA module to set CMAKE_CUDA_ARCHITECTURES - the solution given by @alfC here - I get:

PS C:\GitRepo\cuda_hello\build> cmake ..
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/FindCUDA/select_compute_arch.cmake:120 (file):
  file failed to open for writing (Permission denied):

    /detect_cuda_compute_capabilities.cpp
Call Stack (most recent call first):
  CMakeLists.txt:4 (CUDA_DETECT_INSTALLED_GPUS)


CMake Error: The source directory "CMAKE_FLAGS" does not exist.
Specify --help for usage, or press the help button on the CMake GUI.
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/FindCUDA/select_compute_arch.cmake:141 (try_run):
  Failed to configure test project build system.
Call Stack (most recent call first):
  CMakeLists.txt:4 (CUDA_DETECT_INSTALLED_GPUS)


CMake Error: TRY_COMPILE attempt to remove -rf directory that does not contain CMakeTmp:/detect_cuda_compute_capabilities.cpp
-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeError.log".

finally, if I try to invoke find_package(CUDA), I get:

PS C:\GitRepo\cuda_hello\build> cmake ..
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/FindCUDA.cmake:677 (cmake_initialize_per_config_variable):
  Unknown CMake command "cmake_initialize_per_config_variable".
Call Stack (most recent call first):
  CMakeLists.txt:2 (find_package)


-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/cuda_hello/build/CMakeFiles/CMakeError.log".

Edit 1:

Answer to @einpoklum solution this:

Thanks for the proposal but it doesn't work either.

Here is the output of the cmake -B build command in your repository:

PS C:\GitRepo\hello-cuda-cmake-master> cmake -B build
-- Building for: Visual Studio 16 2019
-- Selecting Windows SDK version 10.0.18362.0 to target Windows 10.0.22000.
-- The CUDA compiler identification is unknown
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/CMakeDetermineCUDACompiler.cmake:633 (message):
  Failed to detect a default CUDA architecture.



  Compiler output:

Call Stack (most recent call first):
  CMakeLists.txt:2 (project)


-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/hello-cuda-cmake-master/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/hello-cuda-cmake-master/build/CMakeFiles/CMakeError.log".

The output is the same using PowerShell or a MSVC command prompt.


Here are the cmake variables and their value when using cmake-gui:

Cmake Gui


When using the simple nvcc build command: nvcc hello.cu from MSVC command prompt I get:

nvcc fatal   : Could not set up the environment for Microsoft Visual Studio using 'c:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.29.30133/bin/HostX86/x86/../../../../../../../VC/Auxiliary/Build/vcvars64.bat'

The PATH is valid though, and the script vcvars64.bat exists at this location.


What happens if I add the find_package(CUDAToolkit) to the CMakeLists.txt

The new CMakeLists.txt:

cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
find_package(CUDAToolkit)
project(hello LANGUAGES CUDA)
add_executable(hello hello.cu)

The output :

PS C:\GitRepo\hello-cuda-cmake-master> cmake -B build
-- Building for: Visual Studio 16 2019
-- Found CUDAToolkit: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.6/include (found version "11.6.124")
-- Selecting Windows SDK version 10.0.18362.0 to target Windows 10.0.22000.
-- The CUDA compiler identification is unknown
CMake Error at C:/Program Files/CMake/share/cmake-3.23/Modules/CMakeDetermineCUDACompiler.cmake:633 (message):
  Failed to detect a default CUDA architecture.



  Compiler output:

Call Stack (most recent call first):
  CMakeLists.txt:3 (project)


-- Configuring incomplete, errors occurred!
See also "C:/GitRepo/hello-cuda-cmake-master/build/CMakeFiles/CMakeOutput.log".
See also "C:/GitRepo/hello-cuda-cmake-master/build/CMakeFiles/CMakeError.log".

Edit 2:

I am trying to compile the CUDA sample BlackScholes without CMake, with the MSVC 2019 solution provided.

I end up with this error:

Severity        Code        Description        Project        File        Line        Suppression State
Error        MSB3721        The command ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\bin\nvcc.exe" -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_37,code=\"sm_37,compute_37\" -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_52,code=\"sm_52,compute_52\" -gencode=arch=compute_60,code=\"sm_60,compute_60\" -gencode=arch=compute_61,code=\"sm_61,compute_61\" -gencode=arch=compute_70,code=\"sm_70,compute_70\" -gencode=arch=compute_75,code=\"sm_75,compute_75\" -gencode=arch=compute_80,code=\"sm_80,compute_80\" -gencode=arch=compute_86,code=\"sm_86,compute_86\" --use-local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\bin\HostX86\x64" -x cu   -I./ -I../../../Common -I./ -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\/include" -I../../../Common -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include"  -G   --keep-dir x64\Debug  -maxrregcount=0  --machine 64 --compile -cudart static -Xcompiler "/wd 4819"  --threads 0 -g  -DWIN32 -DWIN32 -D_MBCS -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Fdx64/Debug/vc142.pdb /FS /Zi /RTC1 /MTd " -o "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v11.6\cuda-samples\Samples\5_Domain_Specific\BlackScholes\x64\Debug\BlackScholes.cu.obj" "C:\ProgramData\NVIDIA Corporation\CUDA Samples\v11.6\cuda-samples\Samples\5_Domain_Specific\BlackScholes\BlackScholes.cu"" exited with code 1.        BlackScholes        C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\MSBuild\Microsoft\VC\v160\BuildCustomizations\CUDA 11.6.targets        790

While using WSL 2 Ubuntu 20.4 and the following CUDA installation and these instructions to build the BlackScholes sample I get this output:

$ sudo make BlackScholes
/usr/local/cuda/bin/nvcc -ccbin g++ -I../../../Common  -m64    -maxrregcount=16 --threads 0 --std=c++11 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_86,code=compute_86 -o BlackScholes.o -c BlackScholes.cu
nvcc warning : The 'compute_35', 'compute_37', 'sm_35', and 'sm_37' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
ptxas warning : For profile sm_86 adjusting per thread register count of 16 to lower bound of 24
ptxas warning : For profile sm_80 adjusting per thread register count of 16 to lower bound of 24
ptxas warning : For profile sm_70 adjusting per thread register count of 16 to lower bound of 24
ptxas warning : For profile sm_75 adjusting per thread register count of 16 to lower bound of 24
/usr/local/cuda/bin/nvcc -ccbin g++ -I../../../Common  -m64    -maxrregcount=16 --threads 0 --std=c++11 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_86,code=compute_86 -o BlackScholes_gold.o -c BlackScholes_gold.cpp
nvcc warning : The 'compute_35', 'compute_37', 'sm_35', and 'sm_37' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
/usr/local/cuda/bin/nvcc -ccbin g++   -m64      -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_86,code=compute_86 -o BlackScholes BlackScholes.o BlackScholes_gold.o
nvcc warning : The 'compute_35', 'compute_37', 'sm_35', and 'sm_37' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
mkdir -p ../../../bin/x86_64/linux/release
cp BlackScholes ../../../bin/x86_64/linux/release


$ ./BlackScholes
[./BlackScholes] - Starting...
GPU Device 0: "Turing" with compute capability 7.5

Initializing data...
...allocating CPU memory for options.
...allocating GPU memory for options.
...generating input data in CPU mem.
...copying input data to GPU mem.
Data init done.

Executing Black-Scholes GPU kernel (512 iterations)...
Options count             : 8000000
BlackScholesGPU() time    : 0.722482 msec
Effective memory bandwidth: 110.729334 GB/s
Gigaoptions per second    : 11.072933

BlackScholes, Throughput = 11.0729 GOptions/s, Time = 0.00072 s, Size = 8000000 options, NumDevsUsed = 1, Workgroup = 128

Reading back GPU results...
Checking the results...
...running CPU calculations.

Comparing the results...
L1 norm: 1.741792E-07
Max absolute error: 1.192093E-05

Shutting down...
...releasing GPU memory.
...releasing CPU memory.
Shutdown done.

[BlackScholes] - Test Summary

NOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.

Test passed

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(5

情绪少女 2025-01-25 18:18:24

从CMake 3.18开始,我们不再使用FindCuda.cmake模块 - 既不直接也可以通过 find_package(cuda)。这已被 (使用 findcudatoolkit.cmake 模块)。

但是实际上,对于您简单的Hello-World项目 - 您甚至不需要这样做,因为从CMake 3.8开始,CUDA是CMAKE的“一流公民”语言。好吧,有点。因此,这是可以使用的 cmakelists.txt 文件:

cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
PROJECT(cuda_hello LANGUAGES CUDA)
add_executable(hello hello.cu)

我使用CUDA 11.6和Visual Studio 16在Windows 10(企业评估)VM上进行了测试(aka vs 2019)。

注意: cmake_minimum_required()行中的版本号可能是 critical !使用 cuda_hello 存储库中的版本编号 - 它对我不起作用,因为 cmake_cuda_architectures 需要强>

现在,使用CMake配置后,您可以运行 ccmake ,在其中您会看到 cmake_cuda_architectures 值。将其更改为您要使用的内容。同样,我为您提供了做事最简单,最基本的方法,而不一定是最奇特,最健壮的方法。


我已经在a 存储库

Beginning with CMake 3.18, we no longer use the FindCUDA.cmake module - neither directly nor via find_package(CUDA). This has been replaced with find_package(CUDAToolkit) (which used the FindCUDAToolkit.cmake module).

But actually, for your simple hello-world project - you don't even need to do that, since starting with CMake 3.8, CUDA is a "first-class citizen" language for CMake. Well, kind of. So, here's a CMakeLists.txt file you can use:

cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
PROJECT(cuda_hello LANGUAGES CUDA)
add_executable(hello hello.cu)

I've tested this on a Windows 10 (Enterprise Evaluation) VM, using CUDA 11.6 and Visual Studio 16 (a.k.a. VS 2019).

Note: The version number in the cmake_minimum_required() line may be critical! With the version number at the cuda_hello repository - it doesn't work for me, since a CMAKE_CUDA_ARCHITECTURES value is demanded to be present.

Now, after you configure using CMake, you can run ccmake, where you'll see the CMAKE_CUDA_ARCHITECTURES value. Change it to what you want to use. Again, I'm offering you the simplest and most basic way to do things, not necessarily the fanciest and most robust.


I've set all of this up for you in a fork of the hello-cuda-cmake repository.

风尘浪孓 2025-01-25 18:18:24

我遇到了同样的问题,主要问题是,在CMAKE 3.23.2上它只是不起作用。

我解决此问题的步骤是:

  1. 的所有CUDA版本
  2. 安装
  3. 机器
  4. 删除
  5. link

I had the same issue and the main issue was, that on CMake 3.23.2 it was just not working.

My steps to solve this problem were:

  1. Remove all CUDA versions that were installed on the machine
  2. Remove all CUDA-related environmental variables
  3. Install CUDA v12.2
  4. Install newest CMAKE 3.27.4 (GUI version)
  5. Copy CUDA Visual Studio Integration files into Visual Studio 2022 link
泪冰清 2025-01-25 18:18:24

尝试添加:

set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 72 75 86)
set(CMAKE_CUDA_COMPILER /usr/local/cuda-11.6/bin/nvcc)

检查 中的 CUDA 架构https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ 并更改参数CMAKE_CUDA_ARCHITECTURES

并将 CMAKE_CUDA_COMPILER 链接到 nvcc。

这是我的完整 CMakeLists.txt:

cmake_minimum_required(VERSION 3.20 FATAL_ERROR)

set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 72 75 86)
set(CMAKE_CUDA_COMPILER /usr/local/cuda-11.6/bin/nvcc)

project(cudatest CUDA)
find_package(CUDAToolkit)

set(CMAKE_CUDA_STANDARD 14)

add_executable(cudatest main.cu)

set_target_properties(cudatest PROPERTIES
    CUDA_SEPARABLE_COMPILATION ON)

我的 GPU 是 GeForce GTX 1660,CMake 版本 3.23,CUDA 版本 11.6。

这是我为开发一些项目而制作的 Docker 镜像: https://github.com/GuangchenJ/cuda -dev,你可以尝试使用它。

Try to add:

set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 72 75 86)
set(CMAKE_CUDA_COMPILER /usr/local/cuda-11.6/bin/nvcc)

check your CUDA arch in https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ and change the parameter of CMAKE_CUDA_ARCHITECTURES.

And link the CMAKE_CUDA_COMPILER to nvcc.

this is my full CMakeLists.txt:

cmake_minimum_required(VERSION 3.20 FATAL_ERROR)

set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 72 75 86)
set(CMAKE_CUDA_COMPILER /usr/local/cuda-11.6/bin/nvcc)

project(cudatest CUDA)
find_package(CUDAToolkit)

set(CMAKE_CUDA_STANDARD 14)

add_executable(cudatest main.cu)

set_target_properties(cudatest PROPERTIES
    CUDA_SEPARABLE_COMPILATION ON)

My GPU is GeForce GTX 1660, CMake version 3.23, CUDA Version 11.6.

And this is a Docker image I made for developmenting some projects: https://github.com/GuangchenJ/cuda-dev, you can try to use it.

哑剧 2025-01-25 18:18:24

OS Env:

  1. 窗口10(Visual Studio 2022社区)
  2. CUDA:CUDA 11.6,NVCC
  3. CPP标准:17
  4. IDE:VSCODE
  5. CMAKE。

此项目名称是: hellogpu

cmake文件:

cmake_minimum_required(VERSION 3.0.0)
project(hellogpu CUDA)

include(CTest)
enable_testing()

add_executable(${PROJECT_NAME} main.cu)

set_target_properties(${PROJECT_NAME} PROPERTIES
        CUDA_SEPARABLE_COMPILATION ON)
set(CPACK_PROJECT_NAME ${PROJECT_NAME})
set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
include(CPack)

os env :

  1. window 10 (visual studio 2022 Community)
  2. cuda: cuda 11.6 , nvcc
  3. cpp standard: 17
  4. ide: vscode
  5. cmake.

this project name is :hellogpu

cmake file:

cmake_minimum_required(VERSION 3.0.0)
project(hellogpu CUDA)

include(CTest)
enable_testing()

add_executable(${PROJECT_NAME} main.cu)

set_target_properties(${PROJECT_NAME} PROPERTIES
        CUDA_SEPARABLE_COMPILATION ON)
set(CPACK_PROJECT_NAME ${PROJECT_NAME})
set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
include(CPack)
陌伤ぢ 2025-01-25 18:18:24

我遇到了同样的问题,我通过安装旧版本的CMake解决了它。更准确地说: 3.18之前的版本

显然,Cmake在3.18中添加了对CUDA的第一方语言支持,这就是这些荒谬的问题(“ try:indead” )来自的地方。

I had the same problem and I solved it by installing older version of CMake. More precisely: a version before 3.18.

Apparently CMake added first party language support for CUDA in 3.18 and that is where these nonsensical problems ("Try: indead") were coming from.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文