From 5a079a2d114f96d4847d1ee305d5b7c16eeec50e Mon Sep 17 00:00:00 2001 From: 3gg <3gg@shellblade.net> Date: Sat, 27 Dec 2025 12:03:39 -0800 Subject: Initial commit --- contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_sse.c | 460 ++++++++++++++++++++++ 1 file changed, 460 insertions(+) create mode 100644 contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_sse.c (limited to 'contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_sse.c') diff --git a/contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_sse.c b/contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_sse.c new file mode 100644 index 0000000..37fe7e4 --- /dev/null +++ b/contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_sse.c @@ -0,0 +1,460 @@ +// Copyright 2016 Adrien Descamps +// Distributed under BSD 3-Clause License +#include "SDL_internal.h" + +#ifdef SDL_HAVE_YUV +#include "yuv_rgb_internal.h" + +#ifdef SDL_SSE2_INTRINSICS + +/* SDL doesn't use these atm and compiling them adds seconds onto the build. --ryan. +#define SSE_FUNCTION_NAME yuv420_rgb565_sse +#define STD_FUNCTION_NAME yuv420_rgb565_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_rgb24_sse +#define STD_FUNCTION_NAME yuv420_rgb24_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_rgba_sse +#define STD_FUNCTION_NAME yuv420_rgba_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGBA +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_bgra_sse +#define STD_FUNCTION_NAME yuv420_bgra_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_BGRA +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_argb_sse +#define STD_FUNCTION_NAME yuv420_argb_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_ARGB +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_abgr_sse +#define STD_FUNCTION_NAME yuv420_abgr_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_ABGR +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_rgb565_sse +#define STD_FUNCTION_NAME yuv422_rgb565_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_rgb24_sse +#define STD_FUNCTION_NAME yuv422_rgb24_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_rgba_sse +#define STD_FUNCTION_NAME yuv422_rgba_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGBA +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_bgra_sse +#define STD_FUNCTION_NAME yuv422_bgra_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_BGRA +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_argb_sse +#define STD_FUNCTION_NAME yuv422_argb_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_ARGB +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_abgr_sse +#define STD_FUNCTION_NAME yuv422_abgr_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_ABGR +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_rgb565_sse +#define STD_FUNCTION_NAME yuvnv12_rgb565_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_rgb24_sse +#define STD_FUNCTION_NAME yuvnv12_rgb24_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_rgba_sse +#define STD_FUNCTION_NAME yuvnv12_rgba_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGBA +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_bgra_sse +#define STD_FUNCTION_NAME yuvnv12_bgra_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_BGRA +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_argb_sse +#define STD_FUNCTION_NAME yuvnv12_argb_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_ARGB +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_abgr_sse +#define STD_FUNCTION_NAME yuvnv12_abgr_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_ABGR +#define SSE_ALIGNED +#include "yuv_rgb_sse_func.h" +*/ + +#define SSE_FUNCTION_NAME yuv420_rgb565_sseu +#define STD_FUNCTION_NAME yuv420_rgb565_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_rgb24_sseu +#define STD_FUNCTION_NAME yuv420_rgb24_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_rgba_sseu +#define STD_FUNCTION_NAME yuv420_rgba_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGBA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_bgra_sseu +#define STD_FUNCTION_NAME yuv420_bgra_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_BGRA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_argb_sseu +#define STD_FUNCTION_NAME yuv420_argb_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_ARGB +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_abgr_sseu +#define STD_FUNCTION_NAME yuv420_abgr_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_ABGR +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_rgb565_sseu +#define STD_FUNCTION_NAME yuv422_rgb565_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_rgb24_sseu +#define STD_FUNCTION_NAME yuv422_rgb24_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_rgba_sseu +#define STD_FUNCTION_NAME yuv422_rgba_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGBA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_bgra_sseu +#define STD_FUNCTION_NAME yuv422_bgra_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_BGRA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_argb_sseu +#define STD_FUNCTION_NAME yuv422_argb_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_ARGB +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_abgr_sseu +#define STD_FUNCTION_NAME yuv422_abgr_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_ABGR +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu +#define STD_FUNCTION_NAME yuvnv12_rgb565_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu +#define STD_FUNCTION_NAME yuvnv12_rgb24_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_rgba_sseu +#define STD_FUNCTION_NAME yuvnv12_rgba_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGBA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_bgra_sseu +#define STD_FUNCTION_NAME yuvnv12_bgra_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_BGRA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_argb_sseu +#define STD_FUNCTION_NAME yuvnv12_argb_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_ARGB +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_abgr_sseu +#define STD_FUNCTION_NAME yuvnv12_abgr_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_ABGR +#include "yuv_rgb_sse_func.h" + + +/* SDL doesn't use these atm and compiling them adds seconds onto the build. --ryan. +#define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ +R1 = _mm_unpacklo_epi8(RGB1, RGB4); \ +R2 = _mm_unpackhi_epi8(RGB1, RGB4); \ +G1 = _mm_unpacklo_epi8(RGB2, RGB5); \ +G2 = _mm_unpackhi_epi8(RGB2, RGB5); \ +B1 = _mm_unpacklo_epi8(RGB3, RGB6); \ +B2 = _mm_unpackhi_epi8(RGB3, RGB6); + +#define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ +RGB1 = _mm_unpacklo_epi8(R1, G2); \ +RGB2 = _mm_unpackhi_epi8(R1, G2); \ +RGB3 = _mm_unpacklo_epi8(R2, B1); \ +RGB4 = _mm_unpackhi_epi8(R2, B1); \ +RGB5 = _mm_unpacklo_epi8(G1, B2); \ +RGB6 = _mm_unpackhi_epi8(G1, B2); \ + +#define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ +UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ +UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ +UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ +UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ +UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ + +#define RGB2YUV_16(R, G, B, Y, U, V) \ +Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \ + _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \ +Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \ +Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<matrix[1][0])), \ + _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \ +U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \ +U = _mm_add_epi16(U, _mm_set1_epi16(128<matrix[2][0])), \ + _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \ +V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \ +V = _mm_add_epi16(V, _mm_set1_epi16(128<