From c8be8496c8a15d0ede8338939a7512109b8e5e46 Mon Sep 17 00:00:00 2001 From: 3gg <3gg@shellblade.net> Date: Wed, 27 Nov 2024 13:41:09 -0800 Subject: Initial commit. --- vector_sum/CMakeLists.txt | 11 +++++++++ vector_sum/main.cu | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 vector_sum/CMakeLists.txt create mode 100644 vector_sum/main.cu (limited to 'vector_sum') diff --git a/vector_sum/CMakeLists.txt b/vector_sum/CMakeLists.txt new file mode 100644 index 0000000..1eea51b --- /dev/null +++ b/vector_sum/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.28) + +project(vector_sum LANGUAGES CUDA CXX) + +add_executable(vector_sum + main.cu) + +# -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific) +# host code. +# https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex +target_compile_options(vector_sum PRIVATE -Wall -Wextra -Wno-pedantic) diff --git a/vector_sum/main.cu b/vector_sum/main.cu new file mode 100644 index 0000000..ba2e964 --- /dev/null +++ b/vector_sum/main.cu @@ -0,0 +1,62 @@ +#include + +__global__ void add(int N, int* a, int* b, int* out) { + const int id = blockIdx.x; + out[id] = a[id] + b[id]; +} + +int main() { + constexpr int N = 100; + + bool success = false; + int host_array[N] = {0}; + int* dev_arrays[3] = {nullptr}; + + // Allocate device arrays. + for (int i = 0; i < 3; ++i) { + if (cudaMalloc(&dev_arrays[i], N * sizeof(int)) != cudaSuccess) { + goto cleanup; + } + } + + // Fill the host array with values 0..N-1. + for (int i = 0; i < N; ++i) { + host_array[i] = i; + } + + // Copy the host array to each of the first two device arrays. + for (int i = 0; i < 2; ++i) { + if (cudaMemcpy( + dev_arrays[i], host_array, N * sizeof(int), + cudaMemcpyHostToDevice) != cudaSuccess) { + goto cleanup; + } + } + + // Add the first two arrays. + // N blocks, 1 thread per block. + add<<>>(N, dev_arrays[0], dev_arrays[1], dev_arrays[2]); + + // Copy the result from the third array to the host. + if (cudaMemcpy( + host_array, dev_arrays[2], N * sizeof(int), cudaMemcpyDeviceToHost) != + cudaSuccess) { + goto cleanup; + } + + // Print the result. + for (int i = 0; i < N; ++i) { + printf("%d ", host_array[i]); + } + printf("\n"); + + success = true; + +cleanup: + for (int i = 0; i < 3; ++i) { + if (dev_arrays[i] != nullptr) { + cudaFree(dev_arrays[i]); + } + } + return success ? 0 : 1; +} -- cgit v1.2.3