SDL  2.0
yuv_rgb.c
Go to the documentation of this file.
1 // Copyright 2016 Adrien Descamps
2 // Distributed under BSD 3-Clause License
3 #include "../../SDL_internal.h"
4 
5 #include "yuv_rgb.h"
6 
7 #include "SDL_cpuinfo.h"
8 /*#include <x86intrin.h>*/
9 
10 #define PRECISION 6
11 #define PRECISION_FACTOR (1<<PRECISION)
12 
13 typedef struct
14 {
16  int16_t matrix[3][3];
17 } RGB2YUVParam;
18 // |Y| |y_shift| |matrix[0][0] matrix[0][1] matrix[0][2]| |R|
19 // |U| = | 128 | + 1/PRECISION_FACTOR * |matrix[1][0] matrix[1][1] matrix[1][2]| * |G|
20 // |V| | 128 | |matrix[2][0] matrix[2][1] matrix[2][2]| |B|
21 
22 typedef struct
23 {
30 } YUV2RGBParam;
31 // |R| |y_factor 0 v_r_factor| |Y-y_shift|
32 // |G| = 1/PRECISION_FACTOR * |y_factor u_g_factor v_g_factor| * | U-128 |
33 // |B| |y_factor u_b_factor 0 | | V-128 |
34 
35 #define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5)
36 
37 // for ITU-T T.871, values can be found in section 7
38 // for ITU-R BT.601-7 values are derived from equations in sections 2.5.1-2.5.3, assuming RGB is encoded using full range ([0-1]<->[0-255])
39 // for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255])
40 // all values are rounded to the fourth decimal
41 
42 static const YUV2RGBParam YUV2RGB[3] = {
43  // ITU-T T.871 (JPEG)
44  {/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)},
45  // ITU-R BT.601-7
46  {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)},
47  // ITU-R BT.709-6
48  {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)}
49 };
50 
51 static const RGB2YUVParam RGB2YUV[3] = {
52  // ITU-T T.871 (JPEG)
53  {/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}},
54  // ITU-R BT.601-7
55  {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}},
56  // ITU-R BT.709-6
57  {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}}
58 };
59 
60 /* The various layouts of YUV data we support */
61 #define YUV_FORMAT_420 1
62 #define YUV_FORMAT_422 2
63 #define YUV_FORMAT_NV12 3
64 
65 /* The various formats of RGB pixel that we support */
66 #define RGB_FORMAT_RGB565 1
67 #define RGB_FORMAT_RGB24 2
68 #define RGB_FORMAT_RGBA 3
69 #define RGB_FORMAT_BGRA 4
70 #define RGB_FORMAT_ARGB 5
71 #define RGB_FORMAT_ABGR 6
72 
73 // divide by PRECISION_FACTOR and clamp to [0:255] interval
74 // input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range
76 {
77  static const uint8_t lut[512] =
78  {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
79  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
80  0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
81  47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
82  91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
83  126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
84  159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
85  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
86  225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
87  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
88  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
89  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
90  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
91  };
92  return lut[(v+128*PRECISION_FACTOR)>>PRECISION];
93 }
94 
95 
96 #define STD_FUNCTION_NAME yuv420_rgb565_std
97 #define YUV_FORMAT YUV_FORMAT_420
98 #define RGB_FORMAT RGB_FORMAT_RGB565
99 #include "yuv_rgb_std_func.h"
100 
101 #define STD_FUNCTION_NAME yuv420_rgb24_std
102 #define YUV_FORMAT YUV_FORMAT_420
103 #define RGB_FORMAT RGB_FORMAT_RGB24
104 #include "yuv_rgb_std_func.h"
105 
106 #define STD_FUNCTION_NAME yuv420_rgba_std
107 #define YUV_FORMAT YUV_FORMAT_420
108 #define RGB_FORMAT RGB_FORMAT_RGBA
109 #include "yuv_rgb_std_func.h"
110 
111 #define STD_FUNCTION_NAME yuv420_bgra_std
112 #define YUV_FORMAT YUV_FORMAT_420
113 #define RGB_FORMAT RGB_FORMAT_BGRA
114 #include "yuv_rgb_std_func.h"
115 
116 #define STD_FUNCTION_NAME yuv420_argb_std
117 #define YUV_FORMAT YUV_FORMAT_420
118 #define RGB_FORMAT RGB_FORMAT_ARGB
119 #include "yuv_rgb_std_func.h"
120 
121 #define STD_FUNCTION_NAME yuv420_abgr_std
122 #define YUV_FORMAT YUV_FORMAT_420
123 #define RGB_FORMAT RGB_FORMAT_ABGR
124 #include "yuv_rgb_std_func.h"
125 
126 #define STD_FUNCTION_NAME yuv422_rgb565_std
127 #define YUV_FORMAT YUV_FORMAT_422
128 #define RGB_FORMAT RGB_FORMAT_RGB565
129 #include "yuv_rgb_std_func.h"
130 
131 #define STD_FUNCTION_NAME yuv422_rgb24_std
132 #define YUV_FORMAT YUV_FORMAT_422
133 #define RGB_FORMAT RGB_FORMAT_RGB24
134 #include "yuv_rgb_std_func.h"
135 
136 #define STD_FUNCTION_NAME yuv422_rgba_std
137 #define YUV_FORMAT YUV_FORMAT_422
138 #define RGB_FORMAT RGB_FORMAT_RGBA
139 #include "yuv_rgb_std_func.h"
140 
141 #define STD_FUNCTION_NAME yuv422_bgra_std
142 #define YUV_FORMAT YUV_FORMAT_422
143 #define RGB_FORMAT RGB_FORMAT_BGRA
144 #include "yuv_rgb_std_func.h"
145 
146 #define STD_FUNCTION_NAME yuv422_argb_std
147 #define YUV_FORMAT YUV_FORMAT_422
148 #define RGB_FORMAT RGB_FORMAT_ARGB
149 #include "yuv_rgb_std_func.h"
150 
151 #define STD_FUNCTION_NAME yuv422_abgr_std
152 #define YUV_FORMAT YUV_FORMAT_422
153 #define RGB_FORMAT RGB_FORMAT_ABGR
154 #include "yuv_rgb_std_func.h"
155 
156 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
157 #define YUV_FORMAT YUV_FORMAT_NV12
158 #define RGB_FORMAT RGB_FORMAT_RGB565
159 #include "yuv_rgb_std_func.h"
160 
161 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
162 #define YUV_FORMAT YUV_FORMAT_NV12
163 #define RGB_FORMAT RGB_FORMAT_RGB24
164 #include "yuv_rgb_std_func.h"
165 
166 #define STD_FUNCTION_NAME yuvnv12_rgba_std
167 #define YUV_FORMAT YUV_FORMAT_NV12
168 #define RGB_FORMAT RGB_FORMAT_RGBA
169 #include "yuv_rgb_std_func.h"
170 
171 #define STD_FUNCTION_NAME yuvnv12_bgra_std
172 #define YUV_FORMAT YUV_FORMAT_NV12
173 #define RGB_FORMAT RGB_FORMAT_BGRA
174 #include "yuv_rgb_std_func.h"
175 
176 #define STD_FUNCTION_NAME yuvnv12_argb_std
177 #define YUV_FORMAT YUV_FORMAT_NV12
178 #define RGB_FORMAT RGB_FORMAT_ARGB
179 #include "yuv_rgb_std_func.h"
180 
181 #define STD_FUNCTION_NAME yuvnv12_abgr_std
182 #define YUV_FORMAT YUV_FORMAT_NV12
183 #define RGB_FORMAT RGB_FORMAT_ABGR
184 #include "yuv_rgb_std_func.h"
185 
188  const uint8_t *RGB, uint32_t RGB_stride,
189  uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
190  YCbCrType yuv_type)
191 {
192  const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
193 
194  uint32_t x, y;
195  for(y=0; y<(height-1); y+=2)
196  {
197  const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
198  *rgb_ptr2=RGB+(y+1)*RGB_stride;
199 
200  uint8_t *y_ptr1=Y+y*Y_stride,
201  *y_ptr2=Y+(y+1)*Y_stride,
202  *u_ptr=U+(y/2)*UV_stride,
203  *v_ptr=V+(y/2)*UV_stride;
204 
205  for(x=0; x<(width-1); x+=2)
206  {
207  // compute yuv for the four pixels, u and v values are summed
208  int32_t y_tmp, u_tmp, v_tmp;
209 
210  y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2];
211  u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2];
212  v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2];
213  y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
214 
215  y_tmp = param->matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5];
216  u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5];
217  v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5];
218  y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
219 
220  y_tmp = param->matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2];
221  u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2];
222  v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2];
223  y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
224 
225  y_tmp = param->matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5];
226  u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5];
227  v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5];
228  y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
229 
230  u_ptr[0] = clampU8(u_tmp/4+(128<<PRECISION));
231  v_ptr[0] = clampU8(v_tmp/4+(128<<PRECISION));
232 
233  rgb_ptr1 += 6;
234  rgb_ptr2 += 6;
235  y_ptr1 += 2;
236  y_ptr2 += 2;
237  u_ptr += 1;
238  v_ptr += 1;
239  }
240  }
241 }
242 
243 #ifdef __SSE2__
244 
245 #define SSE_FUNCTION_NAME yuv420_rgb565_sse
246 #define STD_FUNCTION_NAME yuv420_rgb565_std
247 #define YUV_FORMAT YUV_FORMAT_420
248 #define RGB_FORMAT RGB_FORMAT_RGB565
249 #define SSE_ALIGNED
250 #include "yuv_rgb_sse_func.h"
251 
252 #define SSE_FUNCTION_NAME yuv420_rgb565_sseu
253 #define STD_FUNCTION_NAME yuv420_rgb565_std
254 #define YUV_FORMAT YUV_FORMAT_420
255 #define RGB_FORMAT RGB_FORMAT_RGB565
256 #include "yuv_rgb_sse_func.h"
257 
258 #define SSE_FUNCTION_NAME yuv420_rgb24_sse
259 #define STD_FUNCTION_NAME yuv420_rgb24_std
260 #define YUV_FORMAT YUV_FORMAT_420
261 #define RGB_FORMAT RGB_FORMAT_RGB24
262 #define SSE_ALIGNED
263 #include "yuv_rgb_sse_func.h"
264 
265 #define SSE_FUNCTION_NAME yuv420_rgb24_sseu
266 #define STD_FUNCTION_NAME yuv420_rgb24_std
267 #define YUV_FORMAT YUV_FORMAT_420
268 #define RGB_FORMAT RGB_FORMAT_RGB24
269 #include "yuv_rgb_sse_func.h"
270 
271 #define SSE_FUNCTION_NAME yuv420_rgba_sse
272 #define STD_FUNCTION_NAME yuv420_rgba_std
273 #define YUV_FORMAT YUV_FORMAT_420
274 #define RGB_FORMAT RGB_FORMAT_RGBA
275 #define SSE_ALIGNED
276 #include "yuv_rgb_sse_func.h"
277 
278 #define SSE_FUNCTION_NAME yuv420_rgba_sseu
279 #define STD_FUNCTION_NAME yuv420_rgba_std
280 #define YUV_FORMAT YUV_FORMAT_420
281 #define RGB_FORMAT RGB_FORMAT_RGBA
282 #include "yuv_rgb_sse_func.h"
283 
284 #define SSE_FUNCTION_NAME yuv420_bgra_sse
285 #define STD_FUNCTION_NAME yuv420_bgra_std
286 #define YUV_FORMAT YUV_FORMAT_420
287 #define RGB_FORMAT RGB_FORMAT_BGRA
288 #define SSE_ALIGNED
289 #include "yuv_rgb_sse_func.h"
290 
291 #define SSE_FUNCTION_NAME yuv420_bgra_sseu
292 #define STD_FUNCTION_NAME yuv420_bgra_std
293 #define YUV_FORMAT YUV_FORMAT_420
294 #define RGB_FORMAT RGB_FORMAT_BGRA
295 #include "yuv_rgb_sse_func.h"
296 
297 #define SSE_FUNCTION_NAME yuv420_argb_sse
298 #define STD_FUNCTION_NAME yuv420_argb_std
299 #define YUV_FORMAT YUV_FORMAT_420
300 #define RGB_FORMAT RGB_FORMAT_ARGB
301 #define SSE_ALIGNED
302 #include "yuv_rgb_sse_func.h"
303 
304 #define SSE_FUNCTION_NAME yuv420_argb_sseu
305 #define STD_FUNCTION_NAME yuv420_argb_std
306 #define YUV_FORMAT YUV_FORMAT_420
307 #define RGB_FORMAT RGB_FORMAT_ARGB
308 #include "yuv_rgb_sse_func.h"
309 
310 #define SSE_FUNCTION_NAME yuv420_abgr_sse
311 #define STD_FUNCTION_NAME yuv420_abgr_std
312 #define YUV_FORMAT YUV_FORMAT_420
313 #define RGB_FORMAT RGB_FORMAT_ABGR
314 #define SSE_ALIGNED
315 #include "yuv_rgb_sse_func.h"
316 
317 #define SSE_FUNCTION_NAME yuv420_abgr_sseu
318 #define STD_FUNCTION_NAME yuv420_abgr_std
319 #define YUV_FORMAT YUV_FORMAT_420
320 #define RGB_FORMAT RGB_FORMAT_ABGR
321 #include "yuv_rgb_sse_func.h"
322 
323 #define SSE_FUNCTION_NAME yuv422_rgb565_sse
324 #define STD_FUNCTION_NAME yuv422_rgb565_std
325 #define YUV_FORMAT YUV_FORMAT_422
326 #define RGB_FORMAT RGB_FORMAT_RGB565
327 #define SSE_ALIGNED
328 #include "yuv_rgb_sse_func.h"
329 
330 #define SSE_FUNCTION_NAME yuv422_rgb565_sseu
331 #define STD_FUNCTION_NAME yuv422_rgb565_std
332 #define YUV_FORMAT YUV_FORMAT_422
333 #define RGB_FORMAT RGB_FORMAT_RGB565
334 #include "yuv_rgb_sse_func.h"
335 
336 #define SSE_FUNCTION_NAME yuv422_rgb24_sse
337 #define STD_FUNCTION_NAME yuv422_rgb24_std
338 #define YUV_FORMAT YUV_FORMAT_422
339 #define RGB_FORMAT RGB_FORMAT_RGB24
340 #define SSE_ALIGNED
341 #include "yuv_rgb_sse_func.h"
342 
343 #define SSE_FUNCTION_NAME yuv422_rgb24_sseu
344 #define STD_FUNCTION_NAME yuv422_rgb24_std
345 #define YUV_FORMAT YUV_FORMAT_422
346 #define RGB_FORMAT RGB_FORMAT_RGB24
347 #include "yuv_rgb_sse_func.h"
348 
349 #define SSE_FUNCTION_NAME yuv422_rgba_sse
350 #define STD_FUNCTION_NAME yuv422_rgba_std
351 #define YUV_FORMAT YUV_FORMAT_422
352 #define RGB_FORMAT RGB_FORMAT_RGBA
353 #define SSE_ALIGNED
354 #include "yuv_rgb_sse_func.h"
355 
356 #define SSE_FUNCTION_NAME yuv422_rgba_sseu
357 #define STD_FUNCTION_NAME yuv422_rgba_std
358 #define YUV_FORMAT YUV_FORMAT_422
359 #define RGB_FORMAT RGB_FORMAT_RGBA
360 #include "yuv_rgb_sse_func.h"
361 
362 #define SSE_FUNCTION_NAME yuv422_bgra_sse
363 #define STD_FUNCTION_NAME yuv422_bgra_std
364 #define YUV_FORMAT YUV_FORMAT_422
365 #define RGB_FORMAT RGB_FORMAT_BGRA
366 #define SSE_ALIGNED
367 #include "yuv_rgb_sse_func.h"
368 
369 #define SSE_FUNCTION_NAME yuv422_bgra_sseu
370 #define STD_FUNCTION_NAME yuv422_bgra_std
371 #define YUV_FORMAT YUV_FORMAT_422
372 #define RGB_FORMAT RGB_FORMAT_BGRA
373 #include "yuv_rgb_sse_func.h"
374 
375 #define SSE_FUNCTION_NAME yuv422_argb_sse
376 #define STD_FUNCTION_NAME yuv422_argb_std
377 #define YUV_FORMAT YUV_FORMAT_422
378 #define RGB_FORMAT RGB_FORMAT_ARGB
379 #define SSE_ALIGNED
380 #include "yuv_rgb_sse_func.h"
381 
382 #define SSE_FUNCTION_NAME yuv422_argb_sseu
383 #define STD_FUNCTION_NAME yuv422_argb_std
384 #define YUV_FORMAT YUV_FORMAT_422
385 #define RGB_FORMAT RGB_FORMAT_ARGB
386 #include "yuv_rgb_sse_func.h"
387 
388 #define SSE_FUNCTION_NAME yuv422_abgr_sse
389 #define STD_FUNCTION_NAME yuv422_abgr_std
390 #define YUV_FORMAT YUV_FORMAT_422
391 #define RGB_FORMAT RGB_FORMAT_ABGR
392 #define SSE_ALIGNED
393 #include "yuv_rgb_sse_func.h"
394 
395 #define SSE_FUNCTION_NAME yuv422_abgr_sseu
396 #define STD_FUNCTION_NAME yuv422_abgr_std
397 #define YUV_FORMAT YUV_FORMAT_422
398 #define RGB_FORMAT RGB_FORMAT_ABGR
399 #include "yuv_rgb_sse_func.h"
400 
401 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sse
402 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
403 #define YUV_FORMAT YUV_FORMAT_NV12
404 #define RGB_FORMAT RGB_FORMAT_RGB565
405 #define SSE_ALIGNED
406 #include "yuv_rgb_sse_func.h"
407 
408 #define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu
409 #define STD_FUNCTION_NAME yuvnv12_rgb565_std
410 #define YUV_FORMAT YUV_FORMAT_NV12
411 #define RGB_FORMAT RGB_FORMAT_RGB565
412 #include "yuv_rgb_sse_func.h"
413 
414 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sse
415 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
416 #define YUV_FORMAT YUV_FORMAT_NV12
417 #define RGB_FORMAT RGB_FORMAT_RGB24
418 #define SSE_ALIGNED
419 #include "yuv_rgb_sse_func.h"
420 
421 #define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu
422 #define STD_FUNCTION_NAME yuvnv12_rgb24_std
423 #define YUV_FORMAT YUV_FORMAT_NV12
424 #define RGB_FORMAT RGB_FORMAT_RGB24
425 #include "yuv_rgb_sse_func.h"
426 
427 #define SSE_FUNCTION_NAME yuvnv12_rgba_sse
428 #define STD_FUNCTION_NAME yuvnv12_rgba_std
429 #define YUV_FORMAT YUV_FORMAT_NV12
430 #define RGB_FORMAT RGB_FORMAT_RGBA
431 #define SSE_ALIGNED
432 #include "yuv_rgb_sse_func.h"
433 
434 #define SSE_FUNCTION_NAME yuvnv12_rgba_sseu
435 #define STD_FUNCTION_NAME yuvnv12_rgba_std
436 #define YUV_FORMAT YUV_FORMAT_NV12
437 #define RGB_FORMAT RGB_FORMAT_RGBA
438 #include "yuv_rgb_sse_func.h"
439 
440 #define SSE_FUNCTION_NAME yuvnv12_bgra_sse
441 #define STD_FUNCTION_NAME yuvnv12_bgra_std
442 #define YUV_FORMAT YUV_FORMAT_NV12
443 #define RGB_FORMAT RGB_FORMAT_BGRA
444 #define SSE_ALIGNED
445 #include "yuv_rgb_sse_func.h"
446 
447 #define SSE_FUNCTION_NAME yuvnv12_bgra_sseu
448 #define STD_FUNCTION_NAME yuvnv12_bgra_std
449 #define YUV_FORMAT YUV_FORMAT_NV12
450 #define RGB_FORMAT RGB_FORMAT_BGRA
451 #include "yuv_rgb_sse_func.h"
452 
453 #define SSE_FUNCTION_NAME yuvnv12_argb_sse
454 #define STD_FUNCTION_NAME yuvnv12_argb_std
455 #define YUV_FORMAT YUV_FORMAT_NV12
456 #define RGB_FORMAT RGB_FORMAT_ARGB
457 #define SSE_ALIGNED
458 #include "yuv_rgb_sse_func.h"
459 
460 #define SSE_FUNCTION_NAME yuvnv12_argb_sseu
461 #define STD_FUNCTION_NAME yuvnv12_argb_std
462 #define YUV_FORMAT YUV_FORMAT_NV12
463 #define RGB_FORMAT RGB_FORMAT_ARGB
464 #include "yuv_rgb_sse_func.h"
465 
466 #define SSE_FUNCTION_NAME yuvnv12_abgr_sse
467 #define STD_FUNCTION_NAME yuvnv12_abgr_std
468 #define YUV_FORMAT YUV_FORMAT_NV12
469 #define RGB_FORMAT RGB_FORMAT_ABGR
470 #define SSE_ALIGNED
471 #include "yuv_rgb_sse_func.h"
472 
473 #define SSE_FUNCTION_NAME yuvnv12_abgr_sseu
474 #define STD_FUNCTION_NAME yuvnv12_abgr_std
475 #define YUV_FORMAT YUV_FORMAT_NV12
476 #define RGB_FORMAT RGB_FORMAT_ABGR
477 #include "yuv_rgb_sse_func.h"
478 
479 
480 #define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
481 R1 = _mm_unpacklo_epi8(RGB1, RGB4); \
482 R2 = _mm_unpackhi_epi8(RGB1, RGB4); \
483 G1 = _mm_unpacklo_epi8(RGB2, RGB5); \
484 G2 = _mm_unpackhi_epi8(RGB2, RGB5); \
485 B1 = _mm_unpacklo_epi8(RGB3, RGB6); \
486 B2 = _mm_unpackhi_epi8(RGB3, RGB6);
487 
488 #define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
489 RGB1 = _mm_unpacklo_epi8(R1, G2); \
490 RGB2 = _mm_unpackhi_epi8(R1, G2); \
491 RGB3 = _mm_unpacklo_epi8(R2, B1); \
492 RGB4 = _mm_unpackhi_epi8(R2, B1); \
493 RGB5 = _mm_unpacklo_epi8(G1, B2); \
494 RGB6 = _mm_unpackhi_epi8(G1, B2); \
495 
496 #define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
497 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
498 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
499 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
500 UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
501 UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \
502 
503 #define RGB2YUV_16(R, G, B, Y, U, V) \
504 Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \
505  _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \
506 Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \
507 Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \
508 Y = _mm_srai_epi16(Y, PRECISION); \
509 U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \
510  _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \
511 U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \
512 U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \
513 U = _mm_srai_epi16(U, PRECISION); \
514 V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \
515  _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \
516 V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \
517 V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \
518 V = _mm_srai_epi16(V, PRECISION);
519 
520 #define RGB2YUV_32 \
521  __m128i r1, r2, b1, b2, g1, g2; \
522  __m128i r_16, g_16, b_16; \
523  __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \
524  __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \
525  rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \
526  rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \
527  rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \
528  rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \
529  rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \
530  /* unpack rgb24 data to r, g and b data in separate channels*/ \
531  UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
532  /* process pixels of first line */ \
533  r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
534  g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
535  b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
536  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
537  r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
538  g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
539  b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
540  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
541  y = _mm_packus_epi16(y1_16, y2_16); \
542  u1 = _mm_packus_epi16(u1_16, u2_16); \
543  v1 = _mm_packus_epi16(v1_16, v2_16); \
544  /* save Y values */ \
545  SAVE_SI128((__m128i*)(y_ptr1), y); \
546  /* process pixels of second line */ \
547  r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
548  g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
549  b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
550  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
551  r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
552  g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
553  b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
554  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
555  y = _mm_packus_epi16(y1_16, y2_16); \
556  u2 = _mm_packus_epi16(u1_16, u2_16); \
557  v2 = _mm_packus_epi16(v1_16, v2_16); \
558  /* save Y values */ \
559  SAVE_SI128((__m128i*)(y_ptr2), y); \
560  /* vertical subsampling of u/v values */ \
561  u1_tmp = _mm_avg_epu8(u1, u2); \
562  v1_tmp = _mm_avg_epu8(v1, v2); \
563  /* do the same again with next data */ \
564  rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \
565  rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \
566  rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \
567  rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \
568  rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \
569  rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \
570  /* unpack rgb24 data to r, g and b data in separate channels*/ \
571  UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \
572  /* process pixels of first line */ \
573  r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \
574  g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \
575  b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \
576  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
577  r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \
578  g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \
579  b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \
580  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
581  y = _mm_packus_epi16(y1_16, y2_16); \
582  u1 = _mm_packus_epi16(u1_16, u2_16); \
583  v1 = _mm_packus_epi16(v1_16, v2_16); \
584  /* save Y values */ \
585  SAVE_SI128((__m128i*)(y_ptr1+16), y); \
586  /* process pixels of second line */ \
587  r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \
588  g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \
589  b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \
590  RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \
591  r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \
592  g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \
593  b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \
594  RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \
595  y = _mm_packus_epi16(y1_16, y2_16); \
596  u2 = _mm_packus_epi16(u1_16, u2_16); \
597  v2 = _mm_packus_epi16(v1_16, v2_16); \
598  /* save Y values */ \
599  SAVE_SI128((__m128i*)(y_ptr2+16), y); \
600  /* vertical subsampling of u/v values */ \
601  u2_tmp = _mm_avg_epu8(u1, u2); \
602  v2_tmp = _mm_avg_epu8(v1, v2); \
603  /* horizontal subsampling of u/v values */ \
604  u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \
605  v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \
606  u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \
607  v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \
608  u1 = _mm_avg_epu8(u1, u2); \
609  v1 = _mm_avg_epu8(v1, v2); \
610  SAVE_SI128((__m128i*)(u_ptr), u1); \
611  SAVE_SI128((__m128i*)(v_ptr), v1);
612 
614  const uint8_t *RGB, uint32_t RGB_stride,
615  uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
616  YCbCrType yuv_type)
617 {
618  #define LOAD_SI128 _mm_load_si128
619  #define SAVE_SI128 _mm_stream_si128
620  const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
621 
622  uint32_t xpos, ypos;
623  for(ypos=0; ypos<(height-1); ypos+=2)
624  {
625  const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
626  *rgb_ptr2=RGB+(ypos+1)*RGB_stride;
627 
628  uint8_t *y_ptr1=Y+ypos*Y_stride,
629  *y_ptr2=Y+(ypos+1)*Y_stride,
630  *u_ptr=U+(ypos/2)*UV_stride,
631  *v_ptr=V+(ypos/2)*UV_stride;
632 
633  for(xpos=0; xpos<(width-31); xpos+=32)
634  {
635  RGB2YUV_32
636 
637  rgb_ptr1+=96;
638  rgb_ptr2+=96;
639  y_ptr1+=32;
640  y_ptr2+=32;
641  u_ptr+=16;
642  v_ptr+=16;
643  }
644  }
645  #undef LOAD_SI128
646  #undef SAVE_SI128
647 }
648 
650  const uint8_t *RGB, uint32_t RGB_stride,
651  uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
652  YCbCrType yuv_type)
653 {
654  #define LOAD_SI128 _mm_loadu_si128
655  #define SAVE_SI128 _mm_storeu_si128
656  const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
657 
658  uint32_t xpos, ypos;
659  for(ypos=0; ypos<(height-1); ypos+=2)
660  {
661  const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
662  *rgb_ptr2=RGB+(ypos+1)*RGB_stride;
663 
664  uint8_t *y_ptr1=Y+ypos*Y_stride,
665  *y_ptr2=Y+(ypos+1)*Y_stride,
666  *u_ptr=U+(ypos/2)*UV_stride,
667  *v_ptr=V+(ypos/2)*UV_stride;
668 
669  for(xpos=0; xpos<(width-31); xpos+=32)
670  {
671  RGB2YUV_32
672 
673  rgb_ptr1+=96;
674  rgb_ptr2+=96;
675  y_ptr1+=32;
676  y_ptr2+=32;
677  u_ptr+=16;
678  v_ptr+=16;
679  }
680  }
681  #undef LOAD_SI128
682  #undef SAVE_SI128
683 }
684 
685 
686 #endif //__SSE2__
687 
GLuint GLenum matrix
Definition: edid.h:20
const GLdouble * v
Definition: SDL_opengl.h:2064
GLint GLint GLint GLint GLint x
Definition: SDL_opengl.h:1574
signed int int32_t
int16_t y_factor
Definition: yuv_rgb.c:25
void rgb24_yuv420_sseu(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
static const RGB2YUVParam RGB2YUV[3]
Definition: yuv_rgb.c:51
#define PRECISION
Definition: yuv_rgb.c:10
#define PRECISION_FACTOR
Definition: yuv_rgb.c:11
int16_t v_g_factor
Definition: yuv_rgb.c:28
signed short int16_t
void rgb24_yuv420_std(uint32_t width, uint32_t height, const uint8_t *RGB, uint32_t RGB_stride, uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, YCbCrType yuv_type)
Definition: yuv_rgb.c:186
int16_t u_b_factor
Definition: yuv_rgb.c:29
GLint GLint GLsizei width
Definition: SDL_opengl.h:1572
void rgb24_yuv420_sse(uint32_t width, uint32_t height, const uint8_t *rgb, uint32_t rgb_stride, uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, YCbCrType yuv_type)
static const YUV2RGBParam YUV2RGB[3]
Definition: yuv_rgb.c:42
set set set set set set set set set set set set set set set set set set set set *set set set macro pixldst op &r &cond WK op &r &cond WK op &r &cond WK else op &m &cond &ia op &r &cond WK else op &m &cond &ia elseif elseif else error unsupported base if elseif elseif else error unsupported unaligned pixldst unaligned endm macro pixst base base else pixldst base endif endm macro PF base if bpp PF set rept prefetch_distance PF set OFFSET endr endif endm macro preload_leading_step2 base if bpp ifc DST PF PF else if bpp lsl PF PF lsl PF PF lsl PF PF PF else PF lsl PF lsl PF lsl PF endif SIZE macro preload_middle scratch_holds_offset if bpp if else PF PF endif endif endif endm macro preload_trailing base if bpp if bpp *pix_per_block PF PF lsl PF PF PF PF PF else PF lsl PF lsl PF PF PF PF PF base if bpp if narrow_case &&bpp<=dst_w_bpp) PF bic, WK0, base, #31 PF pld, [WK0] PF add, WK1, base, X, LSL #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 90f PF pld, [WK1]90:.else PF bic, WK0, base, #31 PF pld, [WK0] PF add, WK1, base, X, lsl #bpp_shift PF sub, WK1, WK1, #1 PF bic, WK1, WK1, #31 PF cmp, WK1, WK0 PF beq, 92f91:PF add, WK0, WK0, #32 PF cmp, WK0, WK1 PF pld, [WK0] PF bne, 91b92:.endif .endif.endm.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond X, X, #8 *numbytes/dst_w_bpp .endif process_tail cond, numbytes, firstreg .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst cond, numbytes, firstreg, DST .endif.endm.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_BRANCH_OVER .ifc cond, mi bpl 100f .endif .ifc cond, cs bcc 100f .endif .ifc cond, ne beq 100f .endif conditional_process1_helper, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx100:.else conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx .endif.endm.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx .if(flags) &(FLAG_DST_READWRITE|FLAG_BRANCH_OVER|FLAG_PROCESS_CORRUPTS_PSR|FLAG_PROCESS_DOES_STORE) test conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx .if(flags) &FLAG_PROCESS_CORRUPTS_PSR test .endif conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx .else test process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 .if decrementx sub &cond1 X, X, #8 *numbytes1/dst_w_bpp sub &cond2 X, X, #8 *numbytes2/dst_w_bpp .endif process_tail cond1, numbytes1, firstreg1 process_tail cond2, numbytes2, firstreg2 pixst cond1, numbytes1, firstreg1, DST pixst cond2, numbytes2, firstreg2, DST .endif.endm.macro test_bits_1_0_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-1 .else movs SCRATCH, WK0, lsl #32-1 .endif.endm.macro test_bits_3_2_ptr .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 movs SCRATCH, X, lsl #32-3 .else movs SCRATCH, WK0, lsl #32-3 .endif.endm.macro leading_15bytes process_head, process_tail .set DECREMENT_X, 1 .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 .set DECREMENT_X, 0 sub X, X, WK0, lsr #dst_bpp_shift str X, [sp, #LINE_SAVED_REG_COUNT *4] mov X, WK0 .endif .if dst_w_bpp==8 conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, DECREMENT_X .elseif dst_w_bpp==16 test_bits_1_0_ptr conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, DECREMENT_X .endif conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, DECREMENT_X .if(flags) &FLAG_PROCESS_CORRUPTS_WK0 ldr X, [sp, #LINE_SAVED_REG_COUNT *4] .endif.endm.macro test_bits_3_2_pix movs SCRATCH, X, lsl #dst_bpp_shift+32-3.endm.macro test_bits_1_0_pix .if dst_w_bpp==8 movs SCRATCH, X, lsl #dst_bpp_shift+32-1 .else movs SCRATCH, X, lsr #1 .endif.endm.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 .if dst_w_bpp==16 test_bits_1_0_pix conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 .elseif dst_w_bpp==8 conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 .endif.endm.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment110:.set SUBBLOCK, 0 .rept pix_per_block *dst_w_bpp/128 process_head, 16, 0, unaligned_src, unaligned_mask, 1 .if(src_bpp > 0) &&(mask_bpp==0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle src_bpp, SRC, 1 .elseif(src_bpp==0) &&(mask_bpp > 0) &&((flags) &FLAG_PROCESS_PRESERVES_SCRATCH) preload_middle mask_bpp, MASK, 1 .else preload_middle src_bpp, SRC, 0 preload_middle mask_bpp, MASK, 0 .endif .if(dst_r_bpp > 0) &&((SUBBLOCK % 2)==0) &&(((flags) &FLAG_NO_PRELOAD_DST)==0) PF pld, [DST, #32 *prefetch_distance - dst_alignment] .endif process_tail, 16, 0 .if !((flags) &FLAG_PROCESS_DOES_STORE) pixst, 16, 0, DST .endif .set SUBBLOCK, SUBBLOCK+1 .endr subs X, X, #pix_per_block bhs 110b.endm.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask .if dst_r_bpp > tst bne process_inner_loop DST_PRELOAD_BIAS endif preload_trailing SRC preload_trailing MASK DST endif add medium_case_inner_loop_and_trailing_pixels unaligned_mask endm macro medium_case_inner_loop_and_trailing_pixels DST endif subs bhs tst beq exit_label trailing_15bytes unaligned_mask endm macro narrow_case_inner_loop_and_trailing_pixels unaligned_mask tst conditional_process1 trailing_15bytes unaligned_mask endm macro switch_on_alignment exit_label if bne endif if bne endif action if endif if bne endif action if endif endif endm macro end_of_line last_one if SINGLE_SCANLINE ifc b endif else if vars_spilled word LINE_SAVED_REGS endif subs Y
GLint GLint GLint GLint GLint GLint y
Definition: SDL_opengl.h:1574
static uint8_t clampU8(int32_t v)
Definition: yuv_rgb.c:75
int16_t u_g_factor
Definition: yuv_rgb.c:27
YCbCrType
Definition: yuv_rgb.h:22
uint8_t y_shift
Definition: yuv_rgb.c:15
unsigned char uint8_t
unsigned int uint32_t
int16_t matrix[3][3]
Definition: yuv_rgb.c:16
uint8_t y_shift
Definition: yuv_rgb.c:24
GLint GLint GLsizei GLsizei height
Definition: SDL_opengl.h:1572
int16_t v_r_factor
Definition: yuv_rgb.c:26
#define V(value)
Definition: yuv_rgb.c:35
GLfloat param