diff options
| author | 3gg <3gg@shellblade.net> | 2025-12-27 12:03:39 -0800 |
|---|---|---|
| committer | 3gg <3gg@shellblade.net> | 2025-12-27 12:03:39 -0800 |
| commit | 5a079a2d114f96d4847d1ee305d5b7c16eeec50e (patch) | |
| tree | 8926ab44f168acf787d8e19608857b3af0f82758 /contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_lsx_func.h | |
Initial commit
Diffstat (limited to 'contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_lsx_func.h')
| -rw-r--r-- | contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_lsx_func.h | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_lsx_func.h b/contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_lsx_func.h new file mode 100644 index 0000000..89d582a --- /dev/null +++ b/contrib/SDL-3.2.8/src/video/yuv2rgb/yuv_rgb_lsx_func.h | |||
| @@ -0,0 +1,372 @@ | |||
| 1 | // Copyright 2016 Adrien Descamps | ||
| 2 | // // Distributed under BSD 3-Clause License | ||
| 3 | |||
| 4 | #include <lsxintrin.h> | ||
| 5 | |||
| 6 | #if YUV_FORMAT == YUV_FORMAT_420 | ||
| 7 | |||
| 8 | #define READ_Y(y_ptr) \ | ||
| 9 | y = __lsx_vld(y_ptr, 0); \ | ||
| 10 | |||
| 11 | #define READ_UV \ | ||
| 12 | u_temp = __lsx_vld(u_ptr, 0); \ | ||
| 13 | v_temp = __lsx_vld(v_ptr, 0); \ | ||
| 14 | |||
| 15 | #else | ||
| 16 | #error READ_UV unimplemented | ||
| 17 | #endif | ||
| 18 | |||
| 19 | #define PACK_RGBA_32(R1, R2, G1, G2, B1, B2, A1, A2, RGB1, RGB2, \ | ||
| 20 | RGB3, RGB4, RGB5, RGB6, RGB7, RGB8) \ | ||
| 21 | { \ | ||
| 22 | __m128i ab_l, ab_h, gr_l, gr_h; \ | ||
| 23 | ab_l = __lsx_vilvl_b(B1, A1); \ | ||
| 24 | ab_h = __lsx_vilvh_b(B1, A1); \ | ||
| 25 | gr_l = __lsx_vilvl_b(R1, G1); \ | ||
| 26 | gr_h = __lsx_vilvh_b(R1, G1); \ | ||
| 27 | RGB1 = __lsx_vilvl_h(gr_l, ab_l); \ | ||
| 28 | RGB2 = __lsx_vilvh_h(gr_l, ab_l); \ | ||
| 29 | RGB3 = __lsx_vilvl_h(gr_h, ab_h); \ | ||
| 30 | RGB4 = __lsx_vilvh_h(gr_h, ab_h); \ | ||
| 31 | ab_l = __lsx_vilvl_b(B2, A2); \ | ||
| 32 | ab_h = __lsx_vilvh_b(B2, A2); \ | ||
| 33 | gr_l = __lsx_vilvl_b(R2, G2); \ | ||
| 34 | gr_h = __lsx_vilvh_b(R2, G2); \ | ||
| 35 | RGB5 = __lsx_vilvl_h(gr_l, ab_l); \ | ||
| 36 | RGB6 = __lsx_vilvh_h(gr_l, ab_l); \ | ||
| 37 | RGB7 = __lsx_vilvl_h(gr_h, ab_h); \ | ||
| 38 | RGB8 = __lsx_vilvh_h(gr_h, ab_h); \ | ||
| 39 | } | ||
| 40 | |||
| 41 | #define PACK_RGB24_32_STEP(R, G, B, RGB1, RGB2, RGB3) \ | ||
| 42 | RGB1 = __lsx_vilvl_b(G, R); \ | ||
| 43 | RGB1 = __lsx_vshuf_b(B, RGB1, mask1); \ | ||
| 44 | RGB2 = __lsx_vshuf_b(B, G, mask2); \ | ||
| 45 | RGB2 = __lsx_vshuf_b(R, RGB2, mask3); \ | ||
| 46 | RGB3 = __lsx_vshuf_b(R, B, mask4); \ | ||
| 47 | RGB3 = __lsx_vshuf_b(G, RGB3, mask5); \ | ||
| 48 | |||
| 49 | #define PACK_RGB24_32(R1, R2, G1, G2, B1, B2, RGB1, RGB2, RGB3, RGB4, RGB5, RGB6) \ | ||
| 50 | PACK_RGB24_32_STEP(R1, G1, B1, RGB1, RGB2, RGB3); \ | ||
| 51 | PACK_RGB24_32_STEP(R2, G2, B2, RGB4, RGB5, RGB6); \ | ||
| 52 | |||
| 53 | #if RGB_FORMAT == RGB_FORMAT_RGB24 | ||
| 54 | |||
| 55 | #define PACK_PIXEL \ | ||
| 56 | __m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6; \ | ||
| 57 | __m128i rgb_7, rgb_8, rgb_9, rgb_10, rgb_11, rgb_12; \ | ||
| 58 | PACK_RGB24_32(r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, \ | ||
| 59 | rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6) \ | ||
| 60 | PACK_RGB24_32(r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, \ | ||
| 61 | rgb_7, rgb_8, rgb_9, rgb_10, rgb_11, rgb_12) \ | ||
| 62 | |||
| 63 | #elif RGB_FORMAT == RGB_FORMAT_RGBA | ||
| 64 | |||
| 65 | #define PACK_PIXEL \ | ||
| 66 | __m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ | ||
| 67 | __m128i rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ | ||
| 68 | __m128i a = __lsx_vldi(0xFF); \ | ||
| 69 | PACK_RGBA_32(r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, a, a, \ | ||
| 70 | rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ | ||
| 71 | PACK_RGBA_32(r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, a, a, \ | ||
| 72 | rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16) \ | ||
| 73 | |||
| 74 | #elif RGB_FORMAT == RGB_FORMAT_BGRA | ||
| 75 | |||
| 76 | #define PACK_PIXEL \ | ||
| 77 | __m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ | ||
| 78 | __m128i rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ | ||
| 79 | __m128i a = __lsx_vldi(0xFF); \ | ||
| 80 | PACK_RGBA_32(b_8_11, b_8_12, g_8_11, g_8_12, r_8_11, r_8_12, a, a, \ | ||
| 81 | rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ | ||
| 82 | PACK_RGBA_32(b_8_21, b_8_22, g_8_21, g_8_22, r_8_21, r_8_22, a, a, \ | ||
| 83 | rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16) \ | ||
| 84 | |||
| 85 | #elif RGB_FORMAT == RGB_FORMAT_ARGB | ||
| 86 | |||
| 87 | #define PACK_PIXEL \ | ||
| 88 | __m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ | ||
| 89 | __m128i rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ | ||
| 90 | __m128i a = __lsx_vldi(0xFF); \ | ||
| 91 | PACK_RGBA_32(a, a, r_8_11, r_8_12, g_8_11, g_8_12, b_8_11, b_8_12, \ | ||
| 92 | rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ | ||
| 93 | PACK_RGBA_32(a, a, r_8_21, r_8_22, g_8_21, g_8_22, b_8_21, b_8_22, \ | ||
| 94 | rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16) \ | ||
| 95 | |||
| 96 | #elif RGB_FORMAT == RGB_FORMAT_ABGR | ||
| 97 | |||
| 98 | #define PACK_PIXEL \ | ||
| 99 | __m128i rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8; \ | ||
| 100 | __m128i rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16; \ | ||
| 101 | __m128i a = __lsx_vldi(0xFF); \ | ||
| 102 | PACK_RGBA_32(a, a, b_8_11, b_8_12, g_8_11, g_8_12, r_8_11, r_8_12, \ | ||
| 103 | rgb_1, rgb_2, rgb_3, rgb_4, rgb_5, rgb_6, rgb_7, rgb_8) \ | ||
| 104 | PACK_RGBA_32(a, a, b_8_21, b_8_22, g_8_21, g_8_22, r_8_21, r_8_22, \ | ||
| 105 | rgb_9, rgb_10, rgb_11, rgb_12, rgb_13, rgb_14, rgb_15, rgb_16) \ | ||
| 106 | |||
| 107 | #else | ||
| 108 | #error PACK_PIXEL unimplemented | ||
| 109 | #endif | ||
| 110 | |||
| 111 | #define LSX_ST_UB2(in0, in1, pdst, stride) \ | ||
| 112 | { \ | ||
| 113 | __lsx_vst(in0, pdst, 0); \ | ||
| 114 | __lsx_vst(in1, pdst + stride, 0); \ | ||
| 115 | } | ||
| 116 | |||
| 117 | #if RGB_FORMAT == RGB_FORMAT_RGB24 \ | ||
| 118 | |||
| 119 | #define SAVE_LINE1 \ | ||
| 120 | LSX_ST_UB2(rgb_1, rgb_2, rgb_ptr1, 16); \ | ||
| 121 | LSX_ST_UB2(rgb_3, rgb_4, rgb_ptr1 + 32, 16); \ | ||
| 122 | LSX_ST_UB2(rgb_5, rgb_6, rgb_ptr1 + 64, 16); \ | ||
| 123 | |||
| 124 | #define SAVE_LINE2 \ | ||
| 125 | LSX_ST_UB2(rgb_7, rgb_8, rgb_ptr2, 16); \ | ||
| 126 | LSX_ST_UB2(rgb_9, rgb_10, rgb_ptr2 + 32, 16); \ | ||
| 127 | LSX_ST_UB2(rgb_11, rgb_12, rgb_ptr2 + 64, 16); \ | ||
| 128 | |||
| 129 | #elif RGB_FORMAT == RGB_FORMAT_RGBA || RGB_FORMAT == RGB_FORMAT_BGRA || \ | ||
| 130 | RGB_FORMAT == RGB_FORMAT_ARGB || RGB_FORMAT == RGB_FORMAT_ABGR \ | ||
| 131 | |||
| 132 | #define SAVE_LINE1 \ | ||
| 133 | LSX_ST_UB2(rgb_1, rgb_2, rgb_ptr1, 16); \ | ||
| 134 | LSX_ST_UB2(rgb_3, rgb_4, rgb_ptr1 + 32, 16); \ | ||
| 135 | LSX_ST_UB2(rgb_5, rgb_6, rgb_ptr1 + 64, 16); \ | ||
| 136 | LSX_ST_UB2(rgb_7, rgb_8, rgb_ptr1 + 96, 16); \ | ||
| 137 | |||
| 138 | #define SAVE_LINE2 \ | ||
| 139 | LSX_ST_UB2(rgb_9, rgb_10, rgb_ptr2, 16); \ | ||
| 140 | LSX_ST_UB2(rgb_11, rgb_12, rgb_ptr2 + 32, 16); \ | ||
| 141 | LSX_ST_UB2(rgb_13, rgb_14, rgb_ptr2 + 64, 16); \ | ||
| 142 | LSX_ST_UB2(rgb_15, rgb_16, rgb_ptr2 + 96, 16); \ | ||
| 143 | |||
| 144 | #else | ||
| 145 | #error SAVE_LINE unimplemented | ||
| 146 | #endif | ||
| 147 | |||
| 148 | // = u*vr g=u*ug+v*vg b=u*ub | ||
| 149 | #define UV2RGB_16(U, V, R1, G1, B1, R2, G2, B2) \ | ||
| 150 | r_temp = __lsx_vmul_h(V, v2r); \ | ||
| 151 | g_temp = __lsx_vmul_h(U, u2g); \ | ||
| 152 | g_temp = __lsx_vmadd_h(g_temp, V, v2g); \ | ||
| 153 | b_temp = __lsx_vmul_h(U, u2b); \ | ||
| 154 | R1 = __lsx_vilvl_h(r_temp, r_temp); \ | ||
| 155 | G1 = __lsx_vilvl_h(g_temp, g_temp); \ | ||
| 156 | B1 = __lsx_vilvl_h(b_temp, b_temp); \ | ||
| 157 | R2 = __lsx_vilvh_h(r_temp, r_temp); \ | ||
| 158 | G2 = __lsx_vilvh_h(g_temp, g_temp); \ | ||
| 159 | B2 = __lsx_vilvh_h(b_temp, b_temp); \ | ||
| 160 | |||
| 161 | // Y=(Y-shift)*shift R=(Y+R)>>6,G=(Y+G)>>6,B=(B+Y)>>6 | ||
| 162 | #define ADD_Y2RGB_16(Y1, Y2, R1, G1, B1, R2, G2, B2) \ | ||
| 163 | Y1 = __lsx_vsub_h(Y1, shift); \ | ||
| 164 | Y2 = __lsx_vsub_h(Y2, shift); \ | ||
| 165 | Y1 = __lsx_vmul_h(Y1, yf); \ | ||
| 166 | Y2 = __lsx_vmul_h(Y2, yf); \ | ||
| 167 | R1 = __lsx_vadd_h(R1, Y1); \ | ||
| 168 | G1 = __lsx_vadd_h(G1, Y1); \ | ||
| 169 | B1 = __lsx_vadd_h(B1, Y1); \ | ||
| 170 | R2 = __lsx_vadd_h(R2, Y2); \ | ||
| 171 | G2 = __lsx_vadd_h(G2, Y2); \ | ||
| 172 | B2 = __lsx_vadd_h(B2, Y2); \ | ||
| 173 | R1 = __lsx_vsrai_h(R1, PRECISION); \ | ||
| 174 | G1 = __lsx_vsrai_h(G1, PRECISION); \ | ||
| 175 | B1 = __lsx_vsrai_h(B1, PRECISION); \ | ||
| 176 | R2 = __lsx_vsrai_h(R2, PRECISION); \ | ||
| 177 | G2 = __lsx_vsrai_h(G2, PRECISION); \ | ||
| 178 | B2 = __lsx_vsrai_h(B2, PRECISION); \ | ||
| 179 | |||
| 180 | #define CLIP(in0, in1, in2, in3, in4, in5) \ | ||
| 181 | { \ | ||
| 182 | in0 = __lsx_vmaxi_h(in0, 0); \ | ||
| 183 | in1 = __lsx_vmaxi_h(in1, 0); \ | ||
| 184 | in2 = __lsx_vmaxi_h(in2, 0); \ | ||
| 185 | in3 = __lsx_vmaxi_h(in3, 0); \ | ||
| 186 | in4 = __lsx_vmaxi_h(in4, 0); \ | ||
| 187 | in5 = __lsx_vmaxi_h(in5, 0); \ | ||
| 188 | in0 = __lsx_vsat_hu(in0, 7); \ | ||
| 189 | in1 = __lsx_vsat_hu(in1, 7); \ | ||
| 190 | in2 = __lsx_vsat_hu(in2, 7); \ | ||
| 191 | in3 = __lsx_vsat_hu(in3, 7); \ | ||
| 192 | in4 = __lsx_vsat_hu(in4, 7); \ | ||
| 193 | in5 = __lsx_vsat_hu(in5, 7); \ | ||
| 194 | } | ||
| 195 | |||
| 196 | #define YUV2RGB_32 \ | ||
| 197 | __m128i y, u_temp, v_temp; \ | ||
| 198 | __m128i r_8_11, g_8_11, b_8_11, r_8_21, g_8_21, b_8_21; \ | ||
| 199 | __m128i r_8_12, g_8_12, b_8_12, r_8_22, g_8_22, b_8_22; \ | ||
| 200 | __m128i u, v, r_temp, g_temp, b_temp; \ | ||
| 201 | __m128i r_1, g_1, b_1, r_2, g_2, b_2; \ | ||
| 202 | __m128i y_1, y_2; \ | ||
| 203 | __m128i r_uv_1, g_uv_1, b_uv_1, r_uv_2, g_uv_2, b_uv_2; \ | ||
| 204 | \ | ||
| 205 | READ_UV \ | ||
| 206 | \ | ||
| 207 | /* process first 16 pixels of first line */ \ | ||
| 208 | u = __lsx_vilvl_b(zero, u_temp); \ | ||
| 209 | v = __lsx_vilvl_b(zero, v_temp); \ | ||
| 210 | u = __lsx_vsub_h(u, bias); \ | ||
| 211 | v = __lsx_vsub_h(v, bias); \ | ||
| 212 | UV2RGB_16(u, v, r_1, g_1, b_1, r_2, g_2, b_2); \ | ||
| 213 | r_uv_1 = r_1; g_uv_1 = g_1; b_uv_1 = b_1; \ | ||
| 214 | r_uv_2 = r_2; g_uv_2 = g_2; b_uv_2 = b_2; \ | ||
| 215 | READ_Y(y_ptr1) \ | ||
| 216 | y_1 = __lsx_vilvl_b(zero, y); \ | ||
| 217 | y_2 = __lsx_vilvh_b(zero, y); \ | ||
| 218 | ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ | ||
| 219 | CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ | ||
| 220 | r_8_11 = __lsx_vpickev_b(r_2, r_1); \ | ||
| 221 | g_8_11 = __lsx_vpickev_b(g_2, g_1); \ | ||
| 222 | b_8_11 = __lsx_vpickev_b(b_2, b_1); \ | ||
| 223 | \ | ||
| 224 | /* process first 16 pixels of second line */ \ | ||
| 225 | r_1 = r_uv_1; g_1 = g_uv_1; b_1 = b_uv_1; \ | ||
| 226 | r_2 = r_uv_2; g_2 = g_uv_2; b_2 = b_uv_2; \ | ||
| 227 | \ | ||
| 228 | READ_Y(y_ptr2) \ | ||
| 229 | y_1 = __lsx_vilvl_b(zero, y); \ | ||
| 230 | y_2 = __lsx_vilvh_b(zero, y); \ | ||
| 231 | ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ | ||
| 232 | CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ | ||
| 233 | r_8_21 = __lsx_vpickev_b(r_2, r_1); \ | ||
| 234 | g_8_21 = __lsx_vpickev_b(g_2, g_1); \ | ||
| 235 | b_8_21 = __lsx_vpickev_b(b_2, b_1); \ | ||
| 236 | \ | ||
| 237 | /* process last 16 pixels of first line */ \ | ||
| 238 | u = __lsx_vilvh_b(zero, u_temp); \ | ||
| 239 | v = __lsx_vilvh_b(zero, v_temp); \ | ||
| 240 | u = __lsx_vsub_h(u, bias); \ | ||
| 241 | v = __lsx_vsub_h(v, bias); \ | ||
| 242 | UV2RGB_16(u, v, r_1, g_1, b_1, r_2, g_2, b_2); \ | ||
| 243 | r_uv_1 = r_1; g_uv_1 = g_1; b_uv_1 = b_1; \ | ||
| 244 | r_uv_2 = r_2; g_uv_2 = g_2; b_uv_2 = b_2; \ | ||
| 245 | READ_Y(y_ptr1 + 16 * y_pixel_stride) \ | ||
| 246 | y_1 = __lsx_vilvl_b(zero, y); \ | ||
| 247 | y_2 = __lsx_vilvh_b(zero, y); \ | ||
| 248 | ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ | ||
| 249 | CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ | ||
| 250 | r_8_12 = __lsx_vpickev_b(r_2, r_1); \ | ||
| 251 | g_8_12 = __lsx_vpickev_b(g_2, g_1); \ | ||
| 252 | b_8_12 = __lsx_vpickev_b(b_2, b_1); \ | ||
| 253 | \ | ||
| 254 | /* process last 16 pixels of second line */ \ | ||
| 255 | r_1 = r_uv_1; g_1 = g_uv_1; b_1 = b_uv_1; \ | ||
| 256 | r_2 = r_uv_2; g_2 = g_uv_2; b_2 = b_uv_2; \ | ||
| 257 | \ | ||
| 258 | READ_Y(y_ptr2 + 16 * y_pixel_stride) \ | ||
| 259 | y_1 = __lsx_vilvl_b(zero, y); \ | ||
| 260 | y_2 = __lsx_vilvh_b(zero, y); \ | ||
| 261 | ADD_Y2RGB_16(y_1, y_2, r_1, g_1, b_1, r_2, g_2, b_2) \ | ||
| 262 | CLIP(r_1, g_1, b_1, r_2, g_2, b_2); \ | ||
| 263 | r_8_22 = __lsx_vpickev_b(r_2, r_1); \ | ||
| 264 | g_8_22 = __lsx_vpickev_b(g_2, g_1); \ | ||
| 265 | b_8_22 = __lsx_vpickev_b(b_2, b_1); \ | ||
| 266 | \ | ||
| 267 | |||
| 268 | void LSX_FUNCTION_NAME(uint32_t width, uint32_t height, const uint8_t *Y, | ||
| 269 | const uint8_t *U, const uint8_t *V, uint32_t Y_stride, | ||
| 270 | uint32_t UV_stride, uint8_t *RGB, uint32_t RGB_stride, | ||
| 271 | YCbCrType yuv_type) | ||
| 272 | { | ||
| 273 | const YUV2RGBParam *const param = &(YUV2RGB[yuv_type]); | ||
| 274 | #if YUV_FORMAT == YUV_FORMAT_420 | ||
| 275 | const int y_pixel_stride = 1; | ||
| 276 | const int uv_pixel_stride = 1; | ||
| 277 | const int uv_x_sample_interval = 2; | ||
| 278 | const int uv_y_sample_interval = 2; | ||
| 279 | #endif | ||
| 280 | |||
| 281 | #if RGB_FORMAT == RGB_FORMAT_RGB565 | ||
| 282 | const int rgb_pixel_stride = 2; | ||
| 283 | #elif RGB_FORMAT == RGB_FORMAT_RGB24 | ||
| 284 | const int rgb_pixel_stride = 3; | ||
| 285 | __m128i mask1 = {0x0504110302100100, 0x0A14090813070612}; | ||
| 286 | __m128i mask2 = {0x1808170716061505, 0x00000000000A1909}; | ||
| 287 | __m128i mask3 = {0x0504170302160100, 0x0A1A090819070618}; | ||
| 288 | __m128i mask4 = {0x1E0D1D0C1C0B1B0A, 0x00000000000F1F0E}; | ||
| 289 | __m128i mask5 = {0x05041C03021B0100, 0x0A1F09081E07061D}; | ||
| 290 | #elif RGB_FORMAT == RGB_FORMAT_RGBA || RGB_FORMAT_BGRA || \ | ||
| 291 | RGB_FORMAT == RGB_FORMAT_ARGB || RGB_FORMAT_ABGR | ||
| 292 | const int rgb_pixel_stride = 4; | ||
| 293 | #else | ||
| 294 | #error Unknown RGB pixel size | ||
| 295 | #endif | ||
| 296 | |||
| 297 | uint32_t xpos, ypos; | ||
| 298 | __m128i v2r = __lsx_vreplgr2vr_h(param->v_r_factor); | ||
| 299 | __m128i v2g = __lsx_vreplgr2vr_h(param->v_g_factor); | ||
| 300 | __m128i u2g = __lsx_vreplgr2vr_h(param->u_g_factor); | ||
| 301 | __m128i u2b = __lsx_vreplgr2vr_h(param->u_b_factor); | ||
| 302 | __m128i bias = __lsx_vreplgr2vr_h(128); | ||
| 303 | __m128i shift = __lsx_vreplgr2vr_h(param->y_shift); | ||
| 304 | __m128i yf = __lsx_vreplgr2vr_h(param->y_factor); | ||
| 305 | __m128i zero = __lsx_vldi(0); | ||
| 306 | |||
| 307 | if (width >= 32) { | ||
| 308 | for (ypos = 0; ypos < (height - (uv_y_sample_interval - 1)); ypos += uv_y_sample_interval) { | ||
| 309 | const uint8_t *y_ptr1 = Y + ypos * Y_stride, | ||
| 310 | *y_ptr2 = Y + (ypos + 1) * Y_stride, | ||
| 311 | *u_ptr = U + (ypos/uv_y_sample_interval) * UV_stride, | ||
| 312 | *v_ptr = V + (ypos/uv_y_sample_interval) * UV_stride; | ||
| 313 | uint8_t *rgb_ptr1 = RGB + ypos * RGB_stride, | ||
| 314 | *rgb_ptr2 = RGB + (ypos + 1) * RGB_stride; | ||
| 315 | |||
| 316 | for (xpos = 0; xpos < (width - 31); xpos += 32){ | ||
| 317 | YUV2RGB_32 | ||
| 318 | { | ||
| 319 | PACK_PIXEL | ||
| 320 | SAVE_LINE1 | ||
| 321 | if (uv_y_sample_interval > 1) | ||
| 322 | { | ||
| 323 | SAVE_LINE2 | ||
| 324 | } | ||
| 325 | } | ||
| 326 | y_ptr1 += 32 * y_pixel_stride; | ||
| 327 | y_ptr2 += 32 * y_pixel_stride; | ||
| 328 | u_ptr += 32 * uv_pixel_stride/uv_x_sample_interval; | ||
| 329 | v_ptr += 32 * uv_pixel_stride/uv_x_sample_interval; | ||
| 330 | rgb_ptr1 += 32 * rgb_pixel_stride; | ||
| 331 | rgb_ptr2 += 32 * rgb_pixel_stride; | ||
| 332 | } | ||
| 333 | } | ||
| 334 | if (uv_y_sample_interval == 2 && ypos == (height - 1)) { | ||
| 335 | const uint8_t *y_ptr = Y + ypos * Y_stride, | ||
| 336 | *u_ptr = U + (ypos/uv_y_sample_interval) * UV_stride, | ||
| 337 | *v_ptr = V + (ypos/uv_y_sample_interval) * UV_stride; | ||
| 338 | uint8_t *rgb_ptr = RGB + ypos * RGB_stride; | ||
| 339 | STD_FUNCTION_NAME(width, 1, y_ptr, u_ptr, v_ptr, Y_stride, UV_stride, rgb_ptr, RGB_stride, yuv_type); | ||
| 340 | } | ||
| 341 | } | ||
| 342 | { | ||
| 343 | int converted = (width & ~31); | ||
| 344 | if (converted != width) | ||
| 345 | { | ||
| 346 | const uint8_t *y_ptr = Y + converted * y_pixel_stride, | ||
| 347 | *u_ptr = U + converted * uv_pixel_stride / uv_x_sample_interval, | ||
| 348 | *v_ptr = V + converted * uv_pixel_stride / uv_x_sample_interval; | ||
| 349 | uint8_t *rgb_ptr = RGB + converted * rgb_pixel_stride; | ||
| 350 | |||
| 351 | STD_FUNCTION_NAME(width-converted, height, y_ptr, u_ptr, v_ptr, Y_stride, UV_stride, rgb_ptr, RGB_stride, yuv_type); | ||
| 352 | } | ||
| 353 | } | ||
| 354 | } | ||
| 355 | |||
| 356 | #undef LSX_FUNCTION_NAME | ||
| 357 | #undef STD_FUNCTION_NAME | ||
| 358 | #undef YUV_FORMAT | ||
| 359 | #undef RGB_FORMAT | ||
| 360 | #undef LSX_ALIGNED | ||
| 361 | #undef LSX_ST_UB2 | ||
| 362 | #undef UV2RGB_16 | ||
| 363 | #undef ADD_Y2RGB_16 | ||
| 364 | #undef PACK_RGB24_32_STEP | ||
| 365 | #undef PACK_RGB24_32 | ||
| 366 | #undef PACK_PIXEL | ||
| 367 | #undef PACK_RGBA_32 | ||
| 368 | #undef SAVE_LINE1 | ||
| 369 | #undef SAVE_LINE2 | ||
| 370 | #undef READ_Y | ||
| 371 | #undef READ_UV | ||
| 372 | #undef YUV2RGB_32 | ||
