diff options
| author | 3gg <3gg@shellblade.net> | 2025-12-27 12:03:39 -0800 |
|---|---|---|
| committer | 3gg <3gg@shellblade.net> | 2025-12-27 12:03:39 -0800 |
| commit | 5a079a2d114f96d4847d1ee305d5b7c16eeec50e (patch) | |
| tree | 8926ab44f168acf787d8e19608857b3af0f82758 /contrib/SDL-3.2.8/test/testautomation_intrinsics.c | |
Initial commit
Diffstat (limited to 'contrib/SDL-3.2.8/test/testautomation_intrinsics.c')
| -rw-r--r-- | contrib/SDL-3.2.8/test/testautomation_intrinsics.c | 702 |
1 files changed, 702 insertions, 0 deletions
diff --git a/contrib/SDL-3.2.8/test/testautomation_intrinsics.c b/contrib/SDL-3.2.8/test/testautomation_intrinsics.c new file mode 100644 index 0000000..8338d33 --- /dev/null +++ b/contrib/SDL-3.2.8/test/testautomation_intrinsics.c | |||
| @@ -0,0 +1,702 @@ | |||
| 1 | /** | ||
| 2 | * Intrinsics test suite | ||
| 3 | */ | ||
| 4 | |||
| 5 | #ifdef HAVE_BUILD_CONFIG | ||
| 6 | /* Disable intrinsics that are unsupported by the current compiler */ | ||
| 7 | #include "SDL_build_config.h" | ||
| 8 | #endif | ||
| 9 | |||
| 10 | #include <SDL3/SDL.h> | ||
| 11 | #include <SDL3/SDL_intrin.h> | ||
| 12 | #include <SDL3/SDL_test.h> | ||
| 13 | #include "testautomation_suites.h" | ||
| 14 | |||
| 15 | // FIXME: missing tests for loongarch lsx/lasx | ||
| 16 | // FIXME: missing tests for powerpc altivec | ||
| 17 | |||
| 18 | /* ================= Test Case Implementation ================== */ | ||
| 19 | |||
| 20 | /* Helper functions */ | ||
| 21 | |||
| 22 | static int allocate_random_uint_arrays(Uint32 **dest, Uint32 **a, Uint32 **b, size_t *size) { | ||
| 23 | size_t i; | ||
| 24 | |||
| 25 | *size = (size_t)SDLTest_RandomIntegerInRange(127, 999); | ||
| 26 | *dest = SDL_malloc(sizeof(Uint32) * *size); | ||
| 27 | *a = SDL_malloc(sizeof(Uint32) * *size); | ||
| 28 | *b = SDL_malloc(sizeof(Uint32) * *size); | ||
| 29 | |||
| 30 | if (!*dest || !*a || !*b) { | ||
| 31 | SDLTest_AssertCheck(false, "SDL_malloc failed"); | ||
| 32 | return -1; | ||
| 33 | } | ||
| 34 | |||
| 35 | for (i = 0; i < *size; ++i) { | ||
| 36 | (*a)[i] = SDLTest_RandomUint32(); | ||
| 37 | (*b)[i] = SDLTest_RandomUint32(); | ||
| 38 | } | ||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | |||
| 42 | static int allocate_random_float_arrays(float **dest, float **a, float **b, size_t *size) { | ||
| 43 | size_t i; | ||
| 44 | |||
| 45 | *size = (size_t)SDLTest_RandomIntegerInRange(127, 999); | ||
| 46 | *dest = SDL_malloc(sizeof(float) * *size); | ||
| 47 | *a = SDL_malloc(sizeof(float) * *size); | ||
| 48 | *b = SDL_malloc(sizeof(float) * *size); | ||
| 49 | |||
| 50 | if (!*dest || !*a || !*b) { | ||
| 51 | SDLTest_AssertCheck(false, "SDL_malloc failed"); | ||
| 52 | return -1; | ||
| 53 | } | ||
| 54 | |||
| 55 | for (i = 0; i < *size; ++i) { | ||
| 56 | (*a)[i] = SDLTest_RandomUnitFloat(); | ||
| 57 | (*b)[i] = SDLTest_RandomUnitFloat(); | ||
| 58 | } | ||
| 59 | |||
| 60 | return 0; | ||
| 61 | } | ||
| 62 | |||
| 63 | static int allocate_random_double_arrays(double **dest, double **a, double **b, size_t *size) { | ||
| 64 | size_t i; | ||
| 65 | |||
| 66 | *size = (size_t)SDLTest_RandomIntegerInRange(127, 999); | ||
| 67 | *dest = SDL_malloc(sizeof(double) * *size); | ||
| 68 | *a = SDL_malloc(sizeof(double) * *size); | ||
| 69 | *b = SDL_malloc(sizeof(double) * *size); | ||
| 70 | |||
| 71 | if (!*dest || !*a || !*b) { | ||
| 72 | SDLTest_AssertCheck(false, "SDL_malloc failed"); | ||
| 73 | return -1; | ||
| 74 | } | ||
| 75 | |||
| 76 | for (i = 0; i < *size; ++i) { | ||
| 77 | (*a)[i] = SDLTest_RandomUnitDouble(); | ||
| 78 | (*b)[i] = SDLTest_RandomUnitDouble(); | ||
| 79 | } | ||
| 80 | |||
| 81 | return 0; | ||
| 82 | } | ||
| 83 | |||
| 84 | static void free_arrays(void *dest, void *a, void *b) { | ||
| 85 | SDL_free(dest); | ||
| 86 | SDL_free(a); | ||
| 87 | SDL_free(b); | ||
| 88 | } | ||
| 89 | |||
| 90 | /** | ||
| 91 | * Verify element-wise addition of 2 int arrays. | ||
| 92 | */ | ||
| 93 | static void verify_uints_addition(const Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size, const char *desc) { | ||
| 94 | size_t i; | ||
| 95 | int all_good = 1; | ||
| 96 | |||
| 97 | for (i = 0; i < size; ++i) { | ||
| 98 | Uint32 expected = a[i] + b[i]; | ||
| 99 | if (dest[i] != expected) { | ||
| 100 | SDLTest_AssertCheck(false, "%" SDL_PRIs32 " + %" SDL_PRIs32 " = %" SDL_PRIs32 ", expected %" SDL_PRIs32 " ([%" SDL_PRIu32 "/%" SDL_PRIu32 "] %s)", | ||
| 101 | a[i], b[i], dest[i], expected, (Uint32)i, (Uint32)size, desc); | ||
| 102 | all_good = 0; | ||
| 103 | } | ||
| 104 | } | ||
| 105 | if (all_good) { | ||
| 106 | SDLTest_AssertCheck(true, "All int additions were correct (%s)", desc); | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | /** | ||
| 111 | * Verify element-wise multiplication of 2 uint arrays. | ||
| 112 | */ | ||
| 113 | static void verify_uints_multiplication(const Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size, const char *desc) { | ||
| 114 | size_t i; | ||
| 115 | int all_good = 1; | ||
| 116 | |||
| 117 | for (i = 0; i < size; ++i) { | ||
| 118 | Uint32 expected = a[i] * b[i]; | ||
| 119 | if (dest[i] != expected) { | ||
| 120 | SDLTest_AssertCheck(false, "%" SDL_PRIu32 " * %" SDL_PRIu32 " = %" SDL_PRIu32 ", expected %" SDL_PRIu32 " ([%" SDL_PRIu32 "/%" SDL_PRIu32 "] %s)", | ||
| 121 | a[i], b[i], dest[i], expected, (Uint32)i, (Uint32)size, desc); | ||
| 122 | all_good = 0; | ||
| 123 | } | ||
| 124 | } | ||
| 125 | if (all_good) { | ||
| 126 | SDLTest_AssertCheck(true, "All int multiplication were correct (%s)", desc); | ||
| 127 | } | ||
| 128 | } | ||
| 129 | |||
| 130 | /** | ||
| 131 | * Verify element-wise addition of 2 float arrays. | ||
| 132 | */ | ||
| 133 | static void verify_floats_addition(const float *dest, const float *a, const float *b, size_t size, const char *desc) { | ||
| 134 | size_t i; | ||
| 135 | int all_good = 1; | ||
| 136 | |||
| 137 | for (i = 0; i < size; ++i) { | ||
| 138 | float expected = a[i] + b[i]; | ||
| 139 | float abs_error = SDL_fabsf(dest[i] - expected); | ||
| 140 | if (abs_error > 1.0e-5f) { | ||
| 141 | SDLTest_AssertCheck(false, "%g + %g = %g, expected %g (error = %g) ([%" SDL_PRIu32 "/%" SDL_PRIu32 "] %s)", | ||
| 142 | a[i], b[i], dest[i], expected, abs_error, (Uint32) i, (Uint32) size, desc); | ||
| 143 | all_good = 0; | ||
| 144 | } | ||
| 145 | } | ||
| 146 | if (all_good) { | ||
| 147 | SDLTest_AssertCheck(true, "All float additions were correct (%s)", desc); | ||
| 148 | } | ||
| 149 | } | ||
| 150 | |||
| 151 | /** | ||
| 152 | * Verify element-wise addition of 2 double arrays. | ||
| 153 | */ | ||
| 154 | static void verify_doubles_addition(const double *dest, const double *a, const double *b, size_t size, const char *desc) { | ||
| 155 | size_t i; | ||
| 156 | int all_good = 1; | ||
| 157 | |||
| 158 | for (i = 0; i < size; ++i) { | ||
| 159 | double expected = a[i] + b[i]; | ||
| 160 | double abs_error = SDL_fabs(dest[i] - expected); | ||
| 161 | if (abs_error > 1.0e-5) { | ||
| 162 | SDLTest_AssertCheck(abs_error < 1.0e-5f, "%g + %g = %g, expected %g (error = %g) ([%" SDL_PRIu32 "/%" SDL_PRIu32 "] %s)", | ||
| 163 | a[i], b[i], dest[i], expected, abs_error, (Uint32) i, (Uint32) size, desc); | ||
| 164 | all_good = false; | ||
| 165 | } | ||
| 166 | } | ||
| 167 | if (all_good) { | ||
| 168 | SDLTest_AssertCheck(true, "All double additions were correct (%s)", desc); | ||
| 169 | } | ||
| 170 | } | ||
| 171 | |||
| 172 | /* Intrinsic kernels */ | ||
| 173 | |||
| 174 | static void kernel_uints_add_cpu(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
| 175 | for (; size; --size, ++dest, ++a, ++b) { | ||
| 176 | *dest = *a + *b; | ||
| 177 | } | ||
| 178 | } | ||
| 179 | |||
| 180 | static void kernel_uints_mul_cpu(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
| 181 | for (; size; --size, ++dest, ++a, ++b) { | ||
| 182 | *dest = *a * *b; | ||
| 183 | } | ||
| 184 | } | ||
| 185 | |||
| 186 | static void kernel_floats_add_cpu(float *dest, const float *a, const float *b, size_t size) { | ||
| 187 | for (; size; --size, ++dest, ++a, ++b) { | ||
| 188 | *dest = *a + *b; | ||
| 189 | } | ||
| 190 | } | ||
| 191 | |||
| 192 | static void kernel_doubles_add_cpu(double *dest, const double *a, const double *b, size_t size) { | ||
| 193 | for (; size; --size, ++dest, ++a, ++b) { | ||
| 194 | *dest = *a + *b; | ||
| 195 | } | ||
| 196 | } | ||
| 197 | |||
| 198 | #ifdef SDL_MMX_INTRINSICS | ||
| 199 | SDL_TARGETING("mmx") static void kernel_uints_add_mmx(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
| 200 | for (; size >= 2; size -= 2, dest += 2, a += 2, b += 2) { | ||
| 201 | *(__m64*)dest = _mm_add_pi32(*(__m64*)a, *(__m64*)b); | ||
| 202 | } | ||
| 203 | if (size) { | ||
| 204 | *dest = *a + *b; | ||
| 205 | } | ||
| 206 | _mm_empty(); | ||
| 207 | } | ||
| 208 | #endif | ||
| 209 | |||
| 210 | #ifdef SDL_SSE_INTRINSICS | ||
| 211 | SDL_TARGETING("sse") static void kernel_floats_add_sse(float *dest, const float *a, const float *b, size_t size) { | ||
| 212 | for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) { | ||
| 213 | _mm_storeu_ps(dest, _mm_add_ps(_mm_loadu_ps(a), _mm_loadu_ps (b))); | ||
| 214 | } | ||
| 215 | for (; size; size--, ++dest, ++a, ++b) { | ||
| 216 | *dest = *a + *b; | ||
| 217 | } | ||
| 218 | } | ||
| 219 | #endif | ||
| 220 | |||
| 221 | #ifdef SDL_SSE2_INTRINSICS | ||
| 222 | SDL_TARGETING("sse2") static void kernel_doubles_add_sse2(double *dest, const double *a, const double *b, size_t size) { | ||
| 223 | for (; size >= 2; size -= 2, dest += 2, a += 2, b += 2) { | ||
| 224 | _mm_storeu_pd(dest, _mm_add_pd(_mm_loadu_pd(a), _mm_loadu_pd(b))); | ||
| 225 | } | ||
| 226 | if (size) { | ||
| 227 | *dest = *a + *b; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | #endif | ||
| 231 | |||
| 232 | #ifdef SDL_SSE3_INTRINSICS | ||
| 233 | SDL_TARGETING("sse3") static void kernel_uints_add_sse3(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
| 234 | for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) { | ||
| 235 | _mm_storeu_si128((__m128i*)dest, _mm_add_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b))); | ||
| 236 | } | ||
| 237 | for (;size; --size, ++dest, ++a, ++b) { | ||
| 238 | *dest = *a + *b; | ||
| 239 | } | ||
| 240 | } | ||
| 241 | #endif | ||
| 242 | |||
| 243 | #ifdef SDL_SSE4_1_INTRINSICS | ||
| 244 | SDL_TARGETING("sse4.1") static void kernel_uints_mul_sse4_1(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
| 245 | for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) { | ||
| 246 | _mm_storeu_si128((__m128i*)dest, _mm_mullo_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b))); | ||
| 247 | } | ||
| 248 | for (;size; --size, ++dest, ++a, ++b) { | ||
| 249 | *dest = *a * *b; | ||
| 250 | } | ||
| 251 | } | ||
| 252 | #endif | ||
| 253 | |||
| 254 | #ifdef SDL_SSE4_2_INTRINSICS | ||
| 255 | SDL_TARGETING("sse4.2") static Uint32 calculate_crc32c_sse4_2(const char *text) { | ||
| 256 | Uint32 crc32c = ~0u; | ||
| 257 | size_t len = SDL_strlen(text); | ||
| 258 | |||
| 259 | #if defined(__x86_64__) || defined(_M_X64) | ||
| 260 | for (; len >= 8; len -= 8, text += 8) { | ||
| 261 | crc32c = (Uint32)_mm_crc32_u64(crc32c, *(Sint64*)text); | ||
| 262 | } | ||
| 263 | if (len >= 4) { | ||
| 264 | crc32c = (Uint32)_mm_crc32_u32(crc32c, *(Sint32*)text); | ||
| 265 | len -= 4; | ||
| 266 | text += 4; | ||
| 267 | } | ||
| 268 | #else | ||
| 269 | for (; len >= 4; len -= 4, text += 4) { | ||
| 270 | crc32c = (Uint32)_mm_crc32_u32(crc32c, *(Sint32*)text); | ||
| 271 | } | ||
| 272 | #endif | ||
| 273 | if (len >= 2) { | ||
| 274 | crc32c = (Uint32)_mm_crc32_u16(crc32c, *(Sint16*)text); | ||
| 275 | len -= 2; | ||
| 276 | text += 2; | ||
| 277 | } | ||
| 278 | if (len) { | ||
| 279 | crc32c = (Uint32)_mm_crc32_u8(crc32c, *text); | ||
| 280 | } | ||
| 281 | return ~crc32c; | ||
| 282 | } | ||
| 283 | #endif | ||
| 284 | |||
| 285 | #ifdef SDL_AVX_INTRINSICS | ||
| 286 | SDL_TARGETING("avx") static void kernel_floats_add_avx(float *dest, const float *a, const float *b, size_t size) { | ||
| 287 | for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) { | ||
| 288 | _mm256_storeu_ps(dest, _mm256_add_ps(_mm256_loadu_ps(a), _mm256_loadu_ps(b))); | ||
| 289 | } | ||
| 290 | for (; size; size--, ++dest, ++a, ++b) { | ||
| 291 | *dest = *a + *b; | ||
| 292 | } | ||
| 293 | } | ||
| 294 | #endif | ||
| 295 | |||
| 296 | #ifdef SDL_AVX2_INTRINSICS | ||
| 297 | SDL_TARGETING("avx2") static void kernel_uints_add_avx2(Uint32 *dest, const Uint32 *a, const Uint32 *b, size_t size) { | ||
| 298 | for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) { | ||
| 299 | _mm256_storeu_si256((__m256i*)dest, _mm256_add_epi32(_mm256_loadu_si256((__m256i*)a), _mm256_loadu_si256((__m256i*)b))); | ||
| 300 | } | ||
| 301 | for (; size; size--, ++dest, ++a, ++b) { | ||
| 302 | *dest = *a + *b; | ||
| 303 | } | ||
| 304 | } | ||
| 305 | #endif | ||
| 306 | |||
| 307 | #ifdef SDL_AVX512F_INTRINSICS | ||
| 308 | SDL_TARGETING("avx512f") static void kernel_floats_add_avx512f(float *dest, const float *a, const float *b, size_t size) { | ||
| 309 | for (; size >= 16; size -= 16, dest += 16, a += 16, b += 16) { | ||
| 310 | _mm512_storeu_ps(dest, _mm512_add_ps(_mm512_loadu_ps(a), _mm512_loadu_ps(b))); | ||
| 311 | } | ||
| 312 | for (; size; --size) { | ||
| 313 | *dest++ = *a++ + *b++; | ||
| 314 | } | ||
| 315 | } | ||
| 316 | #endif | ||
| 317 | |||
| 318 | /* Test case functions */ | ||
| 319 | |||
| 320 | static int SDLCALL intrinsics_selftest(void *arg) | ||
| 321 | { | ||
| 322 | { | ||
| 323 | size_t size; | ||
| 324 | Uint32 *dest, *a, *b; | ||
| 325 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
| 326 | free_arrays(dest, a, b); | ||
| 327 | return TEST_ABORTED; | ||
| 328 | } | ||
| 329 | kernel_uints_mul_cpu(dest, a, b, size); | ||
| 330 | verify_uints_multiplication(dest, a, b, size, "CPU"); | ||
| 331 | free_arrays(dest, a, b); | ||
| 332 | } | ||
| 333 | { | ||
| 334 | size_t size; | ||
| 335 | Uint32 *dest, *a, *b; | ||
| 336 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
| 337 | free_arrays(dest, a, b); | ||
| 338 | return TEST_ABORTED; | ||
| 339 | } | ||
| 340 | kernel_uints_add_cpu(dest, a, b, size); | ||
| 341 | verify_uints_addition(dest, a, b, size, "CPU"); | ||
| 342 | free_arrays(dest, a, b); | ||
| 343 | } | ||
| 344 | { | ||
| 345 | size_t size; | ||
| 346 | float *dest, *a, *b; | ||
| 347 | if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) { | ||
| 348 | free_arrays(dest, a, b); | ||
| 349 | return TEST_ABORTED; | ||
| 350 | } | ||
| 351 | kernel_floats_add_cpu(dest, a, b, size); | ||
| 352 | verify_floats_addition(dest, a, b, size, "CPU"); | ||
| 353 | free_arrays(dest, a, b); | ||
| 354 | } | ||
| 355 | { | ||
| 356 | size_t size; | ||
| 357 | double *dest, *a, *b; | ||
| 358 | if (allocate_random_double_arrays(&dest, &a, &b, &size) < 0) { | ||
| 359 | free_arrays(dest, a, b); | ||
| 360 | return TEST_ABORTED; | ||
| 361 | } | ||
| 362 | kernel_doubles_add_cpu(dest, a, b, size); | ||
| 363 | verify_doubles_addition(dest, a, b, size, "CPU"); | ||
| 364 | free_arrays(dest, a, b); | ||
| 365 | } | ||
| 366 | return TEST_COMPLETED; | ||
| 367 | } | ||
| 368 | |||
| 369 | static int SDLCALL intrinsics_testMMX(void *arg) | ||
| 370 | { | ||
| 371 | if (SDL_HasMMX()) { | ||
| 372 | SDLTest_AssertCheck(true, "CPU of test machine has MMX support."); | ||
| 373 | #ifdef SDL_MMX_INTRINSICS | ||
| 374 | { | ||
| 375 | size_t size; | ||
| 376 | Uint32 *dest, *a, *b; | ||
| 377 | |||
| 378 | SDLTest_AssertCheck(true, "Test executable uses MMX intrinsics."); | ||
| 379 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
| 380 | free_arrays(dest, a, b); | ||
| 381 | return TEST_ABORTED; | ||
| 382 | } | ||
| 383 | kernel_uints_add_mmx(dest, a, b, size); | ||
| 384 | verify_uints_addition(dest, a, b, size, "MMX"); | ||
| 385 | free_arrays(dest, a, b); | ||
| 386 | |||
| 387 | return TEST_COMPLETED; | ||
| 388 | } | ||
| 389 | #else | ||
| 390 | SDLTest_AssertCheck(true, "Test executable does NOT use MMX intrinsics."); | ||
| 391 | #endif | ||
| 392 | } else { | ||
| 393 | SDLTest_AssertCheck(true, "CPU of test machine has NO MMX support."); | ||
| 394 | } | ||
| 395 | return TEST_SKIPPED; | ||
| 396 | } | ||
| 397 | |||
| 398 | static int SDLCALL intrinsics_testSSE(void *arg) | ||
| 399 | { | ||
| 400 | if (SDL_HasSSE()) { | ||
| 401 | SDLTest_AssertCheck(true, "CPU of test machine has SSE support."); | ||
| 402 | #ifdef SDL_SSE_INTRINSICS | ||
| 403 | { | ||
| 404 | size_t size; | ||
| 405 | float *dest, *a, *b; | ||
| 406 | |||
| 407 | SDLTest_AssertCheck(true, "Test executable uses SSE intrinsics."); | ||
| 408 | if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) { | ||
| 409 | free_arrays(dest, a, b); | ||
| 410 | return TEST_ABORTED; | ||
| 411 | } | ||
| 412 | kernel_floats_add_sse(dest, a, b, size); | ||
| 413 | verify_floats_addition(dest, a, b, size, "SSE"); | ||
| 414 | free_arrays(dest, a, b); | ||
| 415 | |||
| 416 | return TEST_COMPLETED; | ||
| 417 | } | ||
| 418 | #else | ||
| 419 | SDLTest_AssertCheck(true, "Test executable does NOT use SSE intrinsics."); | ||
| 420 | #endif | ||
| 421 | } else { | ||
| 422 | SDLTest_AssertCheck(true, "CPU of test machine has NO SSE support."); | ||
| 423 | } | ||
| 424 | return TEST_SKIPPED; | ||
| 425 | } | ||
| 426 | |||
| 427 | static int SDLCALL intrinsics_testSSE2(void *arg) | ||
| 428 | { | ||
| 429 | if (SDL_HasSSE2()) { | ||
| 430 | SDLTest_AssertCheck(true, "CPU of test machine has SSE2 support."); | ||
| 431 | #ifdef SDL_SSE2_INTRINSICS | ||
| 432 | { | ||
| 433 | size_t size; | ||
| 434 | double *dest, *a, *b; | ||
| 435 | |||
| 436 | SDLTest_AssertCheck(true, "Test executable uses SSE2 intrinsics."); | ||
| 437 | if (allocate_random_double_arrays(&dest, &a, &b, &size) < 0) { | ||
| 438 | free_arrays(dest, a, b); | ||
| 439 | return TEST_ABORTED; | ||
| 440 | } | ||
| 441 | kernel_doubles_add_sse2(dest, a, b, size); | ||
| 442 | verify_doubles_addition(dest, a, b, size, "SSE2"); | ||
| 443 | free_arrays(dest, a, b); | ||
| 444 | |||
| 445 | return TEST_COMPLETED; | ||
| 446 | } | ||
| 447 | #else | ||
| 448 | SDLTest_AssertCheck(true, "Test executable does NOT use SSE2 intrinsics."); | ||
| 449 | #endif | ||
| 450 | } else { | ||
| 451 | SDLTest_AssertCheck(true, "CPU of test machine has NO SSE2 support."); | ||
| 452 | } | ||
| 453 | return TEST_SKIPPED; | ||
| 454 | } | ||
| 455 | |||
| 456 | static int SDLCALL intrinsics_testSSE3(void *arg) | ||
| 457 | { | ||
| 458 | if (SDL_HasSSE3()) { | ||
| 459 | SDLTest_AssertCheck(true, "CPU of test machine has SSE3 support."); | ||
| 460 | #ifdef SDL_SSE3_INTRINSICS | ||
| 461 | { | ||
| 462 | size_t size; | ||
| 463 | Uint32 *dest, *a, *b; | ||
| 464 | |||
| 465 | SDLTest_AssertCheck(true, "Test executable uses SSE3 intrinsics."); | ||
| 466 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
| 467 | free_arrays(dest, a, b); | ||
| 468 | return TEST_ABORTED; | ||
| 469 | } | ||
| 470 | kernel_uints_add_sse3(dest, a, b, size); | ||
| 471 | verify_uints_addition(dest, a, b, size, "SSE3"); | ||
| 472 | free_arrays(dest, a, b); | ||
| 473 | |||
| 474 | return TEST_COMPLETED; | ||
| 475 | } | ||
| 476 | #else | ||
| 477 | SDLTest_AssertCheck(true, "Test executable does NOT use SSE3 intrinsics."); | ||
| 478 | #endif | ||
| 479 | } else { | ||
| 480 | SDLTest_AssertCheck(true, "CPU of test machine has NO SSE3 support."); | ||
| 481 | } | ||
| 482 | return TEST_SKIPPED; | ||
| 483 | } | ||
| 484 | |||
| 485 | static int SDLCALL intrinsics_testSSE4_1(void *arg) | ||
| 486 | { | ||
| 487 | if (SDL_HasSSE41()) { | ||
| 488 | SDLTest_AssertCheck(true, "CPU of test machine has SSE4.1 support."); | ||
| 489 | #ifdef SDL_SSE4_1_INTRINSICS | ||
| 490 | { | ||
| 491 | size_t size; | ||
| 492 | Uint32 *dest, *a, *b; | ||
| 493 | |||
| 494 | SDLTest_AssertCheck(true, "Test executable uses SSE4.1 intrinsics."); | ||
| 495 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
| 496 | free_arrays(dest, a, b); | ||
| 497 | return TEST_ABORTED; | ||
| 498 | } | ||
| 499 | kernel_uints_mul_sse4_1(dest, a, b, size); | ||
| 500 | verify_uints_multiplication(dest, a, b, size, "SSE4.1"); | ||
| 501 | free_arrays(dest, a, b); | ||
| 502 | |||
| 503 | return TEST_COMPLETED; | ||
| 504 | } | ||
| 505 | #else | ||
| 506 | SDLTest_AssertCheck(true, "Test executable does NOT use SSE4.1 intrinsics."); | ||
| 507 | #endif | ||
| 508 | } else { | ||
| 509 | SDLTest_AssertCheck(true, "CPU of test machine has NO SSE4.1 support."); | ||
| 510 | } | ||
| 511 | return TEST_SKIPPED; | ||
| 512 | } | ||
| 513 | |||
| 514 | static int SDLCALL intrinsics_testSSE4_2(void *arg) | ||
| 515 | { | ||
| 516 | if (SDL_HasSSE42()) { | ||
| 517 | SDLTest_AssertCheck(true, "CPU of test machine has SSE4.2 support."); | ||
| 518 | #ifdef SDL_SSE4_2_INTRINSICS | ||
| 519 | { | ||
| 520 | struct { | ||
| 521 | const char *input; | ||
| 522 | Uint32 crc32c; | ||
| 523 | } references[] = { | ||
| 524 | {"", 0x00000000}, | ||
| 525 | {"Hello world", 0x72b51f78}, | ||
| 526 | {"Simple DirectMedia Layer", 0x56f85341, }, | ||
| 527 | }; | ||
| 528 | size_t i; | ||
| 529 | |||
| 530 | SDLTest_AssertCheck(true, "Test executable uses SSE4.2 intrinsics."); | ||
| 531 | |||
| 532 | for (i = 0; i < SDL_arraysize(references); ++i) { | ||
| 533 | Uint32 actual = calculate_crc32c_sse4_2(references[i].input); | ||
| 534 | SDLTest_AssertCheck(actual == references[i].crc32c, "CRC32-C(\"%s\")=0x%08x, got 0x%08x", | ||
| 535 | references[i].input, references[i].crc32c, actual); | ||
| 536 | } | ||
| 537 | |||
| 538 | return TEST_COMPLETED; | ||
| 539 | } | ||
| 540 | #else | ||
| 541 | SDLTest_AssertCheck(true, "Test executable does NOT use SSE4.2 intrinsics."); | ||
| 542 | #endif | ||
| 543 | } else { | ||
| 544 | SDLTest_AssertCheck(true, "CPU of test machine has NO SSE4.2 support."); | ||
| 545 | } | ||
| 546 | return TEST_SKIPPED; | ||
| 547 | } | ||
| 548 | |||
| 549 | static int SDLCALL intrinsics_testAVX(void *arg) | ||
| 550 | { | ||
| 551 | if (SDL_HasAVX()) { | ||
| 552 | SDLTest_AssertCheck(true, "CPU of test machine has AVX support."); | ||
| 553 | #ifdef SDL_AVX_INTRINSICS | ||
| 554 | { | ||
| 555 | size_t size; | ||
| 556 | float *dest, *a, *b; | ||
| 557 | |||
| 558 | SDLTest_AssertCheck(true, "Test executable uses AVX intrinsics."); | ||
| 559 | if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) { | ||
| 560 | free_arrays(dest, a, b); | ||
| 561 | return TEST_ABORTED; | ||
| 562 | } | ||
| 563 | kernel_floats_add_avx(dest, a, b, size); | ||
| 564 | verify_floats_addition(dest, a, b, size, "AVX"); | ||
| 565 | free_arrays(dest, a, b); | ||
| 566 | |||
| 567 | return TEST_COMPLETED; | ||
| 568 | } | ||
| 569 | #else | ||
| 570 | SDLTest_AssertCheck(true, "Test executable does NOT use AVX intrinsics."); | ||
| 571 | #endif | ||
| 572 | } else { | ||
| 573 | SDLTest_AssertCheck(true, "CPU of test machine has NO AVX support."); | ||
| 574 | } | ||
| 575 | return TEST_SKIPPED; | ||
| 576 | } | ||
| 577 | |||
| 578 | static int SDLCALL intrinsics_testAVX2(void *arg) | ||
| 579 | { | ||
| 580 | if (SDL_HasAVX2()) { | ||
| 581 | SDLTest_AssertCheck(true, "CPU of test machine has AVX2 support."); | ||
| 582 | #ifdef SDL_AVX2_INTRINSICS | ||
| 583 | { | ||
| 584 | size_t size; | ||
| 585 | Uint32 *dest, *a, *b; | ||
| 586 | |||
| 587 | SDLTest_AssertCheck(true, "Test executable uses AVX2 intrinsics."); | ||
| 588 | if (allocate_random_uint_arrays(&dest, &a, &b, &size) < 0) { | ||
| 589 | free_arrays(dest, a, b); | ||
| 590 | return TEST_ABORTED; | ||
| 591 | } | ||
| 592 | kernel_uints_add_avx2(dest, a, b, size); | ||
| 593 | verify_uints_addition(dest, a, b, size, "AVX2"); | ||
| 594 | free_arrays(dest, a, b); | ||
| 595 | |||
| 596 | return TEST_COMPLETED; | ||
| 597 | } | ||
| 598 | #else | ||
| 599 | SDLTest_AssertCheck(true, "Test executable does NOT use AVX2 intrinsics."); | ||
| 600 | #endif | ||
| 601 | } else { | ||
| 602 | SDLTest_AssertCheck(true, "CPU of test machine has NO AVX2 support."); | ||
| 603 | } | ||
| 604 | return TEST_SKIPPED; | ||
| 605 | } | ||
| 606 | |||
| 607 | static int SDLCALL intrinsics_testAVX512F(void *arg) | ||
| 608 | { | ||
| 609 | if (SDL_HasAVX512F()) { | ||
| 610 | SDLTest_AssertCheck(true, "CPU of test machine has AVX512F support."); | ||
| 611 | #ifdef SDL_AVX512F_INTRINSICS | ||
| 612 | { | ||
| 613 | size_t size; | ||
| 614 | float *dest, *a, *b; | ||
| 615 | |||
| 616 | SDLTest_AssertCheck(true, "Test executable uses AVX512F intrinsics."); | ||
| 617 | if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) { | ||
| 618 | free_arrays(dest, a, b); | ||
| 619 | return TEST_ABORTED; | ||
| 620 | } | ||
| 621 | kernel_floats_add_avx512f(dest, a, b, size); | ||
| 622 | verify_floats_addition(dest, a, b, size, "AVX512F"); | ||
| 623 | free_arrays(dest, a, b); | ||
| 624 | |||
| 625 | return TEST_COMPLETED; | ||
| 626 | } | ||
| 627 | #else | ||
| 628 | SDLTest_AssertCheck(true, "Test executable does NOT use AVX512F intrinsics."); | ||
| 629 | #endif | ||
| 630 | } else { | ||
| 631 | SDLTest_AssertCheck(true, "CPU of test machine has NO AVX512F support."); | ||
| 632 | } | ||
| 633 | |||
| 634 | return TEST_SKIPPED; | ||
| 635 | } | ||
| 636 | |||
| 637 | /* ================= Test References ================== */ | ||
| 638 | |||
| 639 | /* Intrinsics test cases */ | ||
| 640 | |||
| 641 | static const SDLTest_TestCaseReference intrinsicsTest1 = { | ||
| 642 | intrinsics_selftest, "intrinsics_selftest", "Intrinsics testautomation selftest", TEST_ENABLED | ||
| 643 | }; | ||
| 644 | |||
| 645 | static const SDLTest_TestCaseReference intrinsicsTest2 = { | ||
| 646 | intrinsics_testMMX, "intrinsics_testMMX", "Tests MMX intrinsics", TEST_ENABLED | ||
| 647 | }; | ||
| 648 | |||
| 649 | static const SDLTest_TestCaseReference intrinsicsTest3 = { | ||
| 650 | intrinsics_testSSE, "intrinsics_testSSE", "Tests SSE intrinsics", TEST_ENABLED | ||
| 651 | }; | ||
| 652 | |||
| 653 | static const SDLTest_TestCaseReference intrinsicsTest4 = { | ||
| 654 | intrinsics_testSSE2, "intrinsics_testSSE2", "Tests SSE2 intrinsics", TEST_ENABLED | ||
| 655 | }; | ||
| 656 | |||
| 657 | static const SDLTest_TestCaseReference intrinsicsTest5 = { | ||
| 658 | intrinsics_testSSE3, "intrinsics_testSSE3", "Tests SSE3 intrinsics", TEST_ENABLED | ||
| 659 | }; | ||
| 660 | |||
| 661 | static const SDLTest_TestCaseReference intrinsicsTest6 = { | ||
| 662 | intrinsics_testSSE4_1, "intrinsics_testSSE4.1", "Tests SSE4.1 intrinsics", TEST_ENABLED | ||
| 663 | }; | ||
| 664 | |||
| 665 | static const SDLTest_TestCaseReference intrinsicsTest7 = { | ||
| 666 | intrinsics_testSSE4_2, "intrinsics_testSSE4.2", "Tests SSE4.2 intrinsics", TEST_ENABLED | ||
| 667 | }; | ||
| 668 | |||
| 669 | static const SDLTest_TestCaseReference intrinsicsTest8 = { | ||
| 670 | intrinsics_testAVX, "intrinsics_testAVX", "Tests AVX intrinsics", TEST_ENABLED | ||
| 671 | }; | ||
| 672 | |||
| 673 | static const SDLTest_TestCaseReference intrinsicsTest9 = { | ||
| 674 | intrinsics_testAVX2, "intrinsics_testAVX2", "Tests AVX2 intrinsics", TEST_ENABLED | ||
| 675 | }; | ||
| 676 | |||
| 677 | static const SDLTest_TestCaseReference intrinsicsTest10 = { | ||
| 678 | intrinsics_testAVX512F, "intrinsics_testAVX512F", "Tests AVX512F intrinsics", TEST_ENABLED | ||
| 679 | }; | ||
| 680 | |||
| 681 | /* Sequence of Platform test cases */ | ||
| 682 | static const SDLTest_TestCaseReference *platformTests[] = { | ||
| 683 | &intrinsicsTest1, | ||
| 684 | &intrinsicsTest2, | ||
| 685 | &intrinsicsTest3, | ||
| 686 | &intrinsicsTest4, | ||
| 687 | &intrinsicsTest5, | ||
| 688 | &intrinsicsTest6, | ||
| 689 | &intrinsicsTest7, | ||
| 690 | &intrinsicsTest8, | ||
| 691 | &intrinsicsTest9, | ||
| 692 | &intrinsicsTest10, | ||
| 693 | NULL | ||
| 694 | }; | ||
| 695 | |||
| 696 | /* Platform test suite (global) */ | ||
| 697 | SDLTest_TestSuiteReference intrinsicsTestSuite = { | ||
| 698 | "Intrinsics", | ||
| 699 | NULL, | ||
| 700 | platformTests, | ||
| 701 | NULL | ||
| 702 | }; | ||
