3 #include "../../SDL_internal.h" 11 #define PRECISION_FACTOR (1<<PRECISION) 35 #define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5) 44 { 0,
V(1.0),
V(1.402), -
V(0.3441), -
V(0.7141),
V(1.772)},
46 { 16,
V(1.1644),
V(1.596), -
V(0.3918), -
V(0.813),
V(2.0172)},
48 { 16,
V(1.1644),
V(1.7927), -
V(0.2132), -
V(0.5329),
V(2.1124)}
53 { 0, {{
V(0.299),
V(0.587),
V(0.114)}, {-
V(0.1687), -
V(0.3313),
V(0.5)}, {
V(0.5), -
V(0.4187), -
V(0.0813)}}},
55 { 16, {{
V(0.2568),
V(0.5041),
V(0.0979)}, {-
V(0.1482), -
V(0.291),
V(0.4392)}, {
V(0.4392), -
V(0.3678), -
V(0.0714)}}},
57 { 16, {{
V(0.1826),
V(0.6142),
V(0.062)}, {-
V(0.1006), -
V(0.3386),
V(0.4392)}, {
V(0.4392), -
V(0.3989), -
V(0.0403)}}}
61 #define YUV_FORMAT_420 1 62 #define YUV_FORMAT_422 2 63 #define YUV_FORMAT_NV12 3 66 #define RGB_FORMAT_RGB565 1 67 #define RGB_FORMAT_RGB24 2 68 #define RGB_FORMAT_RGBA 3 69 #define RGB_FORMAT_BGRA 4 70 #define RGB_FORMAT_ARGB 5 71 #define RGB_FORMAT_ABGR 6 78 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
79 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
80 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
81 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
82 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
83 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
84 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
85 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
86 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
87 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
88 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
89 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
90 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
96 #define STD_FUNCTION_NAME yuv420_rgb565_std 97 #define YUV_FORMAT YUV_FORMAT_420 98 #define RGB_FORMAT RGB_FORMAT_RGB565 101 #define STD_FUNCTION_NAME yuv420_rgb24_std 102 #define YUV_FORMAT YUV_FORMAT_420 103 #define RGB_FORMAT RGB_FORMAT_RGB24 106 #define STD_FUNCTION_NAME yuv420_rgba_std 107 #define YUV_FORMAT YUV_FORMAT_420 108 #define RGB_FORMAT RGB_FORMAT_RGBA 111 #define STD_FUNCTION_NAME yuv420_bgra_std 112 #define YUV_FORMAT YUV_FORMAT_420 113 #define RGB_FORMAT RGB_FORMAT_BGRA 116 #define STD_FUNCTION_NAME yuv420_argb_std 117 #define YUV_FORMAT YUV_FORMAT_420 118 #define RGB_FORMAT RGB_FORMAT_ARGB 121 #define STD_FUNCTION_NAME yuv420_abgr_std 122 #define YUV_FORMAT YUV_FORMAT_420 123 #define RGB_FORMAT RGB_FORMAT_ABGR 126 #define STD_FUNCTION_NAME yuv422_rgb565_std 127 #define YUV_FORMAT YUV_FORMAT_422 128 #define RGB_FORMAT RGB_FORMAT_RGB565 131 #define STD_FUNCTION_NAME yuv422_rgb24_std 132 #define YUV_FORMAT YUV_FORMAT_422 133 #define RGB_FORMAT RGB_FORMAT_RGB24 136 #define STD_FUNCTION_NAME yuv422_rgba_std 137 #define YUV_FORMAT YUV_FORMAT_422 138 #define RGB_FORMAT RGB_FORMAT_RGBA 141 #define STD_FUNCTION_NAME yuv422_bgra_std 142 #define YUV_FORMAT YUV_FORMAT_422 143 #define RGB_FORMAT RGB_FORMAT_BGRA 146 #define STD_FUNCTION_NAME yuv422_argb_std 147 #define YUV_FORMAT YUV_FORMAT_422 148 #define RGB_FORMAT RGB_FORMAT_ARGB 151 #define STD_FUNCTION_NAME yuv422_abgr_std 152 #define YUV_FORMAT YUV_FORMAT_422 153 #define RGB_FORMAT RGB_FORMAT_ABGR 156 #define STD_FUNCTION_NAME yuvnv12_rgb565_std 157 #define YUV_FORMAT YUV_FORMAT_NV12 158 #define RGB_FORMAT RGB_FORMAT_RGB565 161 #define STD_FUNCTION_NAME yuvnv12_rgb24_std 162 #define YUV_FORMAT YUV_FORMAT_NV12 163 #define RGB_FORMAT RGB_FORMAT_RGB24 166 #define STD_FUNCTION_NAME yuvnv12_rgba_std 167 #define YUV_FORMAT YUV_FORMAT_NV12 168 #define RGB_FORMAT RGB_FORMAT_RGBA 171 #define STD_FUNCTION_NAME yuvnv12_bgra_std 172 #define YUV_FORMAT YUV_FORMAT_NV12 173 #define RGB_FORMAT RGB_FORMAT_BGRA 176 #define STD_FUNCTION_NAME yuvnv12_argb_std 177 #define YUV_FORMAT YUV_FORMAT_NV12 178 #define RGB_FORMAT RGB_FORMAT_ARGB 181 #define STD_FUNCTION_NAME yuvnv12_abgr_std 182 #define YUV_FORMAT YUV_FORMAT_NV12 183 #define RGB_FORMAT RGB_FORMAT_ABGR 195 for(y=0; y<(height-1); y+=2)
197 const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
198 *rgb_ptr2=RGB+(y+1)*RGB_stride;
201 *y_ptr2=Y+(y+1)*Y_stride,
202 *u_ptr=U+(y/2)*UV_stride,
203 *v_ptr=V+(y/2)*UV_stride;
205 for(x=0; x<(width-1); x+=2)
210 y_tmp = param->
matrix[0][0]*rgb_ptr1[0] + param->
matrix[0][1]*rgb_ptr1[1] + param->
matrix[0][2]*rgb_ptr1[2];
211 u_tmp = param->
matrix[1][0]*rgb_ptr1[0] + param->
matrix[1][1]*rgb_ptr1[1] + param->
matrix[1][2]*rgb_ptr1[2];
212 v_tmp = param->
matrix[2][0]*rgb_ptr1[0] + param->
matrix[2][1]*rgb_ptr1[1] + param->
matrix[2][2]*rgb_ptr1[2];
215 y_tmp = param->
matrix[0][0]*rgb_ptr1[3] + param->
matrix[0][1]*rgb_ptr1[4] + param->
matrix[0][2]*rgb_ptr1[5];
216 u_tmp += param->
matrix[1][0]*rgb_ptr1[3] + param->
matrix[1][1]*rgb_ptr1[4] + param->
matrix[1][2]*rgb_ptr1[5];
217 v_tmp += param->
matrix[2][0]*rgb_ptr1[3] + param->
matrix[2][1]*rgb_ptr1[4] + param->
matrix[2][2]*rgb_ptr1[5];
220 y_tmp = param->
matrix[0][0]*rgb_ptr2[0] + param->
matrix[0][1]*rgb_ptr2[1] + param->
matrix[0][2]*rgb_ptr2[2];
221 u_tmp += param->
matrix[1][0]*rgb_ptr2[0] + param->
matrix[1][1]*rgb_ptr2[1] + param->
matrix[1][2]*rgb_ptr2[2];
222 v_tmp += param->
matrix[2][0]*rgb_ptr2[0] + param->
matrix[2][1]*rgb_ptr2[1] + param->
matrix[2][2]*rgb_ptr2[2];
225 y_tmp = param->
matrix[0][0]*rgb_ptr2[3] + param->
matrix[0][1]*rgb_ptr2[4] + param->
matrix[0][2]*rgb_ptr2[5];
226 u_tmp += param->
matrix[1][0]*rgb_ptr2[3] + param->
matrix[1][1]*rgb_ptr2[4] + param->
matrix[1][2]*rgb_ptr2[5];
227 v_tmp += param->
matrix[2][0]*rgb_ptr2[3] + param->
matrix[2][1]*rgb_ptr2[4] + param->
matrix[2][2]*rgb_ptr2[5];
245 #define SSE_FUNCTION_NAME yuv420_rgb565_sse 246 #define STD_FUNCTION_NAME yuv420_rgb565_std 247 #define YUV_FORMAT YUV_FORMAT_420 248 #define RGB_FORMAT RGB_FORMAT_RGB565 252 #define SSE_FUNCTION_NAME yuv420_rgb565_sseu 253 #define STD_FUNCTION_NAME yuv420_rgb565_std 254 #define YUV_FORMAT YUV_FORMAT_420 255 #define RGB_FORMAT RGB_FORMAT_RGB565 258 #define SSE_FUNCTION_NAME yuv420_rgb24_sse 259 #define STD_FUNCTION_NAME yuv420_rgb24_std 260 #define YUV_FORMAT YUV_FORMAT_420 261 #define RGB_FORMAT RGB_FORMAT_RGB24 265 #define SSE_FUNCTION_NAME yuv420_rgb24_sseu 266 #define STD_FUNCTION_NAME yuv420_rgb24_std 267 #define YUV_FORMAT YUV_FORMAT_420 268 #define RGB_FORMAT RGB_FORMAT_RGB24 271 #define SSE_FUNCTION_NAME yuv420_rgba_sse 272 #define STD_FUNCTION_NAME yuv420_rgba_std 273 #define YUV_FORMAT YUV_FORMAT_420 274 #define RGB_FORMAT RGB_FORMAT_RGBA 278 #define SSE_FUNCTION_NAME yuv420_rgba_sseu 279 #define STD_FUNCTION_NAME yuv420_rgba_std 280 #define YUV_FORMAT YUV_FORMAT_420 281 #define RGB_FORMAT RGB_FORMAT_RGBA 284 #define SSE_FUNCTION_NAME yuv420_bgra_sse 285 #define STD_FUNCTION_NAME yuv420_bgra_std 286 #define YUV_FORMAT YUV_FORMAT_420 287 #define RGB_FORMAT RGB_FORMAT_BGRA 291 #define SSE_FUNCTION_NAME yuv420_bgra_sseu 292 #define STD_FUNCTION_NAME yuv420_bgra_std 293 #define YUV_FORMAT YUV_FORMAT_420 294 #define RGB_FORMAT RGB_FORMAT_BGRA 297 #define SSE_FUNCTION_NAME yuv420_argb_sse 298 #define STD_FUNCTION_NAME yuv420_argb_std 299 #define YUV_FORMAT YUV_FORMAT_420 300 #define RGB_FORMAT RGB_FORMAT_ARGB 304 #define SSE_FUNCTION_NAME yuv420_argb_sseu 305 #define STD_FUNCTION_NAME yuv420_argb_std 306 #define YUV_FORMAT YUV_FORMAT_420 307 #define RGB_FORMAT RGB_FORMAT_ARGB 310 #define SSE_FUNCTION_NAME yuv420_abgr_sse 311 #define STD_FUNCTION_NAME yuv420_abgr_std 312 #define YUV_FORMAT YUV_FORMAT_420 313 #define RGB_FORMAT RGB_FORMAT_ABGR 317 #define SSE_FUNCTION_NAME yuv420_abgr_sseu 318 #define STD_FUNCTION_NAME yuv420_abgr_std 319 #define YUV_FORMAT YUV_FORMAT_420 320 #define RGB_FORMAT RGB_FORMAT_ABGR 323 #define SSE_FUNCTION_NAME yuv422_rgb565_sse 324 #define STD_FUNCTION_NAME yuv422_rgb565_std 325 #define YUV_FORMAT YUV_FORMAT_422 326 #define RGB_FORMAT RGB_FORMAT_RGB565 330 #define SSE_FUNCTION_NAME yuv422_rgb565_sseu 331 #define STD_FUNCTION_NAME yuv422_rgb565_std 332 #define YUV_FORMAT YUV_FORMAT_422 333 #define RGB_FORMAT RGB_FORMAT_RGB565 336 #define SSE_FUNCTION_NAME yuv422_rgb24_sse 337 #define STD_FUNCTION_NAME yuv422_rgb24_std 338 #define YUV_FORMAT YUV_FORMAT_422 339 #define RGB_FORMAT RGB_FORMAT_RGB24 343 #define SSE_FUNCTION_NAME yuv422_rgb24_sseu 344 #define STD_FUNCTION_NAME yuv422_rgb24_std 345 #define YUV_FORMAT YUV_FORMAT_422 346 #define RGB_FORMAT RGB_FORMAT_RGB24 349 #define SSE_FUNCTION_NAME yuv422_rgba_sse 350 #define STD_FUNCTION_NAME yuv422_rgba_std 351 #define YUV_FORMAT YUV_FORMAT_422 352 #define RGB_FORMAT RGB_FORMAT_RGBA 356 #define SSE_FUNCTION_NAME yuv422_rgba_sseu 357 #define STD_FUNCTION_NAME yuv422_rgba_std 358 #define YUV_FORMAT YUV_FORMAT_422 359 #define RGB_FORMAT RGB_FORMAT_RGBA 362 #define SSE_FUNCTION_NAME yuv422_bgra_sse 363 #define STD_FUNCTION_NAME yuv422_bgra_std 364 #define YUV_FORMAT YUV_FORMAT_422 365 #define RGB_FORMAT RGB_FORMAT_BGRA 369 #define SSE_FUNCTION_NAME yuv422_bgra_sseu 370 #define STD_FUNCTION_NAME yuv422_bgra_std 371 #define YUV_FORMAT YUV_FORMAT_422 372 #define RGB_FORMAT RGB_FORMAT_BGRA 375 #define SSE_FUNCTION_NAME yuv422_argb_sse 376 #define STD_FUNCTION_NAME yuv422_argb_std 377 #define YUV_FORMAT YUV_FORMAT_422 378 #define RGB_FORMAT RGB_FORMAT_ARGB 382 #define SSE_FUNCTION_NAME yuv422_argb_sseu 383 #define STD_FUNCTION_NAME yuv422_argb_std 384 #define YUV_FORMAT YUV_FORMAT_422 385 #define RGB_FORMAT RGB_FORMAT_ARGB 388 #define SSE_FUNCTION_NAME yuv422_abgr_sse 389 #define STD_FUNCTION_NAME yuv422_abgr_std 390 #define YUV_FORMAT YUV_FORMAT_422 391 #define RGB_FORMAT RGB_FORMAT_ABGR 395 #define SSE_FUNCTION_NAME yuv422_abgr_sseu 396 #define STD_FUNCTION_NAME yuv422_abgr_std 397 #define YUV_FORMAT YUV_FORMAT_422 398 #define RGB_FORMAT RGB_FORMAT_ABGR 401 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sse 402 #define STD_FUNCTION_NAME yuvnv12_rgb565_std 403 #define YUV_FORMAT YUV_FORMAT_NV12 404 #define RGB_FORMAT RGB_FORMAT_RGB565 408 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu 409 #define STD_FUNCTION_NAME yuvnv12_rgb565_std 410 #define YUV_FORMAT YUV_FORMAT_NV12 411 #define RGB_FORMAT RGB_FORMAT_RGB565 414 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sse 415 #define STD_FUNCTION_NAME yuvnv12_rgb24_std 416 #define YUV_FORMAT YUV_FORMAT_NV12 417 #define RGB_FORMAT RGB_FORMAT_RGB24 421 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu 422 #define STD_FUNCTION_NAME yuvnv12_rgb24_std 423 #define YUV_FORMAT YUV_FORMAT_NV12 424 #define RGB_FORMAT RGB_FORMAT_RGB24 427 #define SSE_FUNCTION_NAME yuvnv12_rgba_sse 428 #define STD_FUNCTION_NAME yuvnv12_rgba_std 429 #define YUV_FORMAT YUV_FORMAT_NV12 430 #define RGB_FORMAT RGB_FORMAT_RGBA 434 #define SSE_FUNCTION_NAME yuvnv12_rgba_sseu 435 #define STD_FUNCTION_NAME yuvnv12_rgba_std 436 #define YUV_FORMAT YUV_FORMAT_NV12 437 #define RGB_FORMAT RGB_FORMAT_RGBA 440 #define SSE_FUNCTION_NAME yuvnv12_bgra_sse 441 #define STD_FUNCTION_NAME yuvnv12_bgra_std 442 #define YUV_FORMAT YUV_FORMAT_NV12 443 #define RGB_FORMAT RGB_FORMAT_BGRA 447 #define SSE_FUNCTION_NAME yuvnv12_bgra_sseu 448 #define STD_FUNCTION_NAME yuvnv12_bgra_std 449 #define YUV_FORMAT YUV_FORMAT_NV12 450 #define RGB_FORMAT RGB_FORMAT_BGRA 453 #define SSE_FUNCTION_NAME yuvnv12_argb_sse 454 #define STD_FUNCTION_NAME yuvnv12_argb_std 455 #define YUV_FORMAT YUV_FORMAT_NV12 456 #define RGB_FORMAT RGB_FORMAT_ARGB 460 #define SSE_FUNCTION_NAME yuvnv12_argb_sseu 461 #define STD_FUNCTION_NAME yuvnv12_argb_std 462 #define YUV_FORMAT YUV_FORMAT_NV12 463 #define RGB_FORMAT RGB_FORMAT_ARGB 466 #define SSE_FUNCTION_NAME yuvnv12_abgr_sse 467 #define STD_FUNCTION_NAME yuvnv12_abgr_std 468 #define YUV_FORMAT YUV_FORMAT_NV12 469 #define RGB_FORMAT RGB_FORMAT_ABGR 473 #define SSE_FUNCTION_NAME yuvnv12_abgr_sseu 474 #define STD_FUNCTION_NAME yuvnv12_abgr_std 475 #define YUV_FORMAT YUV_FORMAT_NV12 476 #define RGB_FORMAT RGB_FORMAT_ABGR 480 #define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 481 R1 = _mm_unpacklo_epi8(RGB1, RGB4); \ 482 R2 = _mm_unpackhi_epi8(RGB1, RGB4); \ 483 G1 = _mm_unpacklo_epi8(RGB2, RGB5); \ 484 G2 = _mm_unpackhi_epi8(RGB2, RGB5); \ 485 B1 = _mm_unpacklo_epi8(RGB3, RGB6); \ 486 B2 = _mm_unpackhi_epi8(RGB3, RGB6); 488 #define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 489 RGB1 = _mm_unpacklo_epi8(R1, G2); \ 490 RGB2 = _mm_unpackhi_epi8(R1, G2); \ 491 RGB3 = _mm_unpacklo_epi8(R2, B1); \ 492 RGB4 = _mm_unpackhi_epi8(R2, B1); \ 493 RGB5 = _mm_unpacklo_epi8(G1, B2); \ 494 RGB6 = _mm_unpackhi_epi8(G1, B2); \ 496 #define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 497 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 498 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 499 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 500 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 501 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ 503 #define RGB2YUV_16(R, G, B, Y, U, V) \ 504 Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \ 505 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \ 506 Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \ 507 Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \ 508 Y = _mm_srai_epi16(Y, PRECISION); \ 509 U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \ 510 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \ 511 U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \ 512 U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \ 513 U = _mm_srai_epi16(U, PRECISION); \ 514 V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \ 515 _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \ 516 V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \ 517 V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \ 518 V = _mm_srai_epi16(V, PRECISION); 521 __m128i r1, r2, b1, b2, g1, g2; \ 522 __m128i r_16, g_16, b_16; \ 523 __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \ 524 __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \ 525 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \ 526 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \ 527 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \ 528 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \ 529 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \ 531 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ 533 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ 534 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ 535 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ 536 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 537 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ 538 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ 539 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ 540 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 541 y = _mm_packus_epi16(y1_16, y2_16); \ 542 u1 = _mm_packus_epi16(u1_16, u2_16); \ 543 v1 = _mm_packus_epi16(v1_16, v2_16); \ 545 SAVE_SI128((__m128i*)(y_ptr1), y); \ 547 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ 548 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ 549 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ 550 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 551 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ 552 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ 553 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ 554 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 555 y = _mm_packus_epi16(y1_16, y2_16); \ 556 u2 = _mm_packus_epi16(u1_16, u2_16); \ 557 v2 = _mm_packus_epi16(v1_16, v2_16); \ 559 SAVE_SI128((__m128i*)(y_ptr2), y); \ 561 u1_tmp = _mm_avg_epu8(u1, u2); \ 562 v1_tmp = _mm_avg_epu8(v1, v2); \ 564 rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \ 565 rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \ 566 rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \ 567 rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \ 568 rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \ 569 rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \ 571 UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ 573 r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ 574 g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ 575 b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ 576 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 577 r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ 578 g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ 579 b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ 580 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 581 y = _mm_packus_epi16(y1_16, y2_16); \ 582 u1 = _mm_packus_epi16(u1_16, u2_16); \ 583 v1 = _mm_packus_epi16(v1_16, v2_16); \ 585 SAVE_SI128((__m128i*)(y_ptr1+16), y); \ 587 r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ 588 g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ 589 b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ 590 RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ 591 r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ 592 g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ 593 b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ 594 RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ 595 y = _mm_packus_epi16(y1_16, y2_16); \ 596 u2 = _mm_packus_epi16(u1_16, u2_16); \ 597 v2 = _mm_packus_epi16(v1_16, v2_16); \ 599 SAVE_SI128((__m128i*)(y_ptr2+16), y); \ 601 u2_tmp = _mm_avg_epu8(u1, u2); \ 602 v2_tmp = _mm_avg_epu8(v1, v2); \ 604 u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \ 605 v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \ 606 u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \ 607 v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \ 608 u1 = _mm_avg_epu8(u1, u2); \ 609 v1 = _mm_avg_epu8(v1, v2); \ 610 SAVE_SI128((__m128i*)(u_ptr), u1); \ 611 SAVE_SI128((__m128i*)(v_ptr), v1); 618 #define LOAD_SI128 _mm_load_si128 619 #define SAVE_SI128 _mm_stream_si128 623 for(ypos=0; ypos<(
height-1); ypos+=2)
626 *rgb_ptr2=
RGB+(ypos+1)*RGB_stride;
629 *y_ptr2=
Y+(ypos+1)*Y_stride,
630 *u_ptr=U+(ypos/2)*UV_stride,
631 *v_ptr=
V+(ypos/2)*UV_stride;
633 for(xpos=0; xpos<(
width-31); xpos+=32)
654 #define LOAD_SI128 _mm_loadu_si128 655 #define SAVE_SI128 _mm_storeu_si128 659 for(ypos=0; ypos<(
height-1); ypos+=2)
662 *rgb_ptr2=
RGB+(ypos+1)*RGB_stride;
665 *y_ptr2=
Y+(ypos+1)*Y_stride,
666 *u_ptr=U+(ypos/2)*UV_stride,
667 *v_ptr=
V+(ypos/2)*UV_stride;
669 for(xpos=0; xpos<(
width-31); xpos+=32)
GLint GLint GLint GLint GLint x
void rgb24_yuv420_sseu(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
static const RGB2YUVParam RGB2YUV[3]
void rgb24_yuv420_std(uint32_t width, uint32_t height, const uint8_t *RGB, uint32_t RGB_stride, uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, YCbCrType yuv_type)
GLint GLint GLsizei width
void rgb24_yuv420_sse(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
static const YUV2RGBParam YUV2RGB[3]
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp if else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld, [WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld, [WK1]90:.else PF bic, WK0, base, #31 PF pld, [WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld, [WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X, [sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X, [sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld, [DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if SINGLE_SCANLINE ifc b endif else if vars_spilled word LINE_SAVED_REGS endif subs Y
GLint GLint GLint GLint GLint GLint y
static uint8_t clampU8(int32_t v)
GLint GLint GLsizei GLsizei height