diff options
author | 3gg <3gg@shellblade.net> | 2024-11-27 13:41:09 -0800 |
---|---|---|
committer | 3gg <3gg@shellblade.net> | 2024-11-27 13:41:09 -0800 |
commit | c8be8496c8a15d0ede8338939a7512109b8e5e46 (patch) | |
tree | 1e60112652e9f3c3a20e6bf4cc0b8bef0ebc81fd /hello |
Initial commit.
Diffstat (limited to 'hello')
-rw-r--r-- | hello/CMakeLists.txt | 11 | ||||
-rw-r--r-- | hello/hello.cu | 59 |
2 files changed, 70 insertions, 0 deletions
diff --git a/hello/CMakeLists.txt b/hello/CMakeLists.txt new file mode 100644 index 0000000..e4b4acc --- /dev/null +++ b/hello/CMakeLists.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | cmake_minimum_required(VERSION 3.28) | ||
2 | |||
3 | project(cuda_hello LANGUAGES CUDA CXX) | ||
4 | |||
5 | add_executable(cuda_hello | ||
6 | hello.cu) | ||
7 | |||
8 | # -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific) | ||
9 | # host code. | ||
10 | # https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex | ||
11 | target_compile_options(cuda_hello PRIVATE -Wall -Wextra -Wno-pedantic) | ||
diff --git a/hello/hello.cu b/hello/hello.cu new file mode 100644 index 0000000..691b18c --- /dev/null +++ b/hello/hello.cu | |||
@@ -0,0 +1,59 @@ | |||
1 | #include <cstdio> | ||
2 | |||
3 | void logDevices() { | ||
4 | int count; | ||
5 | if (cudaGetDeviceCount(&count) != cudaSuccess) { | ||
6 | printf("No CUDA devices found\n"); | ||
7 | return; | ||
8 | } | ||
9 | |||
10 | printf("CUDA devices found: %d\n", count); | ||
11 | for (int i = 0; i < count; ++i) { | ||
12 | cudaDeviceProp properties; | ||
13 | if (cudaGetDeviceProperties(&properties, i) == cudaSuccess) { | ||
14 | printf("Device [%d]: %s\n", i, properties.name); | ||
15 | } | ||
16 | } | ||
17 | } | ||
18 | |||
19 | __global__ void kernel(int* array, int N) { | ||
20 | for (int i = 0; i < N; ++i) { | ||
21 | array[i] = i; | ||
22 | } | ||
23 | } | ||
24 | |||
25 | int main() { | ||
26 | logDevices(); | ||
27 | |||
28 | constexpr int N = 100; | ||
29 | |||
30 | int* host_array = new int[N]; | ||
31 | int* device_array = nullptr; | ||
32 | bool success = false; | ||
33 | |||
34 | if (cudaMalloc(&device_array, N * sizeof(int)) != cudaSuccess) { | ||
35 | goto cleanup; | ||
36 | } | ||
37 | |||
38 | kernel<<<1, 1>>>(device_array, N); | ||
39 | |||
40 | if (cudaMemcpy( | ||
41 | host_array, device_array, N * sizeof(int), cudaMemcpyDeviceToHost) != | ||
42 | cudaSuccess) { | ||
43 | goto cleanup; | ||
44 | } | ||
45 | |||
46 | for (int i = 0; i < N; ++i) { | ||
47 | printf("%d ", host_array[i]); | ||
48 | } | ||
49 | printf("\n"); | ||
50 | |||
51 | success = true; | ||
52 | |||
53 | cleanup: | ||
54 | delete[] host_array; | ||
55 | if (device_array != nullptr) { | ||
56 | cudaFree(device_array); | ||
57 | } | ||
58 | return success ? 0 : 1; | ||
59 | } | ||