21 #include "../SDL_internal.h" 45 const unsigned A = info->
a;
60 if ( palmap ==
NULL ) {
61 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
63 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
90 unsigned sR, sG, sB, sA;
106 if ( palmap ==
NULL ) {
107 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
109 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
139 const unsigned A = info->
a;
146 if ( Pixel != ckey ) {
155 if ( palmap ==
NULL ) {
156 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
158 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
185 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta;
187 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe);
188 lmask = _mm_set_pi32(0x00010101, 0x00010101);
189 dsta = _mm_set_pi32(dalpha, dalpha);
196 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
197 + (s & d & 0x00010101)) | dalpha;
201 for (n >>= 1; n > 0; --
n) {
202 dst1 = *(__m64 *) dstp;
205 src1 = *(__m64 *) srcp;
208 dst2 = _mm_and_si64(dst2, hmask);
209 src2 = _mm_and_si64(src2, hmask);
210 src2 = _mm_add_pi32(src2, dst2);
211 src2 = _mm_srli_pi32(src2, 1);
213 dst1 = _mm_and_si64(dst1, src1);
214 dst1 = _mm_and_si64(dst1, lmask);
215 dst1 = _mm_add_pi32(dst1, src2);
216 dst1 = _mm_or_si64(dst1, dsta);
218 *(__m64 *) dstp = dst1;
237 if (alpha == 128 && (df->
Rmask | df->
Gmask | df->
Bmask) == 0x00FFFFFF) {
239 BlitRGBtoRGBSurfaceAlpha128MMX(info);
250 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta;
252 mm_zero = _mm_setzero_si64();
254 amult = alpha | (alpha << 8);
255 amult = amult | (amult << 16);
257 (0xff << df->
Rshift) | (0xff << df->
258 Gshift) | (0xff << df->
Bshift);
259 mm_alpha = _mm_set_pi32(0, amult & chanmask);
260 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero);
262 dsta = _mm_set_pi32(dalpha, dalpha);
268 src2 = _mm_cvtsi32_si64(*srcp);
269 src2 = _mm_unpacklo_pi8(src2, mm_zero);
271 dst1 = _mm_cvtsi32_si64(*dstp);
272 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
274 src2 = _mm_sub_pi16(src2, dst1);
275 src2 = _mm_mullo_pi16(src2, mm_alpha);
276 src2 = _mm_srli_pi16(src2, 8);
277 dst1 = _mm_add_pi8(src2, dst1);
279 dst1 = _mm_packs_pu16(dst1, mm_zero);
280 dst1 = _mm_or_si64(dst1, dsta);
281 *dstp = _mm_cvtsi64_si32(dst1);
289 for (n >>= 1; n > 0; --
n) {
291 src1 = *(__m64 *) srcp;
293 src1 = _mm_unpacklo_pi8(src1, mm_zero);
294 src2 = _mm_unpackhi_pi8(src2, mm_zero);
296 dst1 = *(__m64 *) dstp;
298 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
299 dst2 = _mm_unpackhi_pi8(dst2, mm_zero);
301 src1 = _mm_sub_pi16(src1, dst1);
302 src1 = _mm_mullo_pi16(src1, mm_alpha);
303 src1 = _mm_srli_pi16(src1, 8);
304 dst1 = _mm_add_pi8(src1, dst1);
306 src2 = _mm_sub_pi16(src2, dst2);
307 src2 = _mm_mullo_pi16(src2, mm_alpha);
308 src2 = _mm_srli_pi16(src2, 8);
309 dst2 = _mm_add_pi8(src2, dst2);
311 dst1 = _mm_packs_pu16(dst1, dst2);
312 dst1 = _mm_or_si64(dst1, dsta);
314 *(__m64 *) dstp = dst1;
339 Uint64 multmask, multmask2;
341 __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
343 mm_zero = _mm_setzero_si64();
345 multmask <<= (ashift * 2);
346 multmask2 = 0x00FF00FF00FF00FFULL;
354 }
else if (alpha == amask) {
357 src1 = _mm_cvtsi32_si64(*srcp);
358 src1 = _mm_unpacklo_pi8(src1, mm_zero);
360 dst1 = _mm_cvtsi32_si64(*dstp);
361 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
363 mm_alpha = _mm_cvtsi32_si64(alpha);
364 mm_alpha = _mm_srli_si64(mm_alpha, ashift);
365 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
366 mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
367 mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask);
368 mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2);
371 src1 = _mm_mullo_pi16(src1, mm_alpha);
372 src1 = _mm_srli_pi16(src1, 8);
373 dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
374 dst1 = _mm_srli_pi16(dst1, 8);
375 dst1 = _mm_add_pi16(src1, dst1);
376 dst1 = _mm_packs_pu16(dst1, mm_zero);
378 *dstp = _mm_cvtsi64_si32(dst1);
392 #if SDL_ARM_SIMD_BLITTERS 405 BlitARGBto565PixelAlphaARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
420 BlitRGBtoRGBPixelAlphaARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
424 #if SDL_ARM_NEON_BLITTERS 437 BlitARGBto565PixelAlphaARMNEONAsm(width, height, dstp, dststride, srcp, srcstride);
452 BlitRGBtoRGBPixelAlphaARMNEONAsm(width, height, dstp, dststride, srcp, srcstride);
472 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
473 + (s & d & 0x00010101)) | 0xff000000;
507 d1 = (d1 + ((s1 - d1) * alpha >> 8))
511 d = (d + ((s -
d) * alpha >> 8)) & 0xff00;
512 *dstp = d1 | d | 0xff000000;
559 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;
562 d = (d + ((s -
d) * alpha >> 8)) & 0xff00;
563 dalpha = alpha + (dalpha * (alpha ^ 0xFF) >> 8);
564 *dstp = d1 | d | (dalpha << 24);
590 Uint64 multmask, multmask2;
592 __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
594 mm_zero = _mm_setzero_si64();
596 multmask <<= (ashift * 2);
597 multmask2 = 0x00FF00FF00FF00FFULL;
604 _m_prefetch(srcp + 16);
605 _m_prefetch(dstp + 16);
607 alpha = *srcp & amask;
610 }
else if (alpha == amask) {
613 src1 = _mm_cvtsi32_si64(*srcp);
614 src1 = _mm_unpacklo_pi8(src1, mm_zero);
616 dst1 = _mm_cvtsi32_si64(*dstp);
617 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
619 mm_alpha = _mm_cvtsi32_si64(alpha);
620 mm_alpha = _mm_srli_si64(mm_alpha, ashift);
621 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
622 mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
623 mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask);
624 mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2);
628 src1 = _mm_mullo_pi16(src1, mm_alpha);
629 src1 = _mm_srli_pi16(src1, 8);
630 dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
631 dst1 = _mm_srli_pi16(dst1, 8);
632 dst1 = _mm_add_pi16(src1, dst1);
633 dst1 = _mm_packs_pu16(dst1, mm_zero);
635 *dstp = _mm_cvtsi64_si32(dst1);
652 #define BLEND16_50(d, s, mask) \ 653 ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff))) 656 #define BLEND2x16_50(d, s, mask) \ 657 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ 658 + (s & d & (~(mask | mask << 16)))) 691 prev_sw = ((
Uint32 *) srcp)[-1];
697 #if SDL_BYTEORDER == SDL_BIG_ENDIAN 698 s = (prev_sw << 16) + (sw >> 16);
700 s = (prev_sw >> 16) + (sw << 16);
712 #if SDL_BYTEORDER == SDL_BIG_ENDIAN 715 s = (
Uint16) (prev_sw >> 16);
777 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha;
779 alpha &= ~(1 + 2 + 4);
780 mm_alpha = _mm_set_pi32(0, alpha);
783 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
784 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
787 mm_alpha = _mm_slli_si64(mm_alpha, 3);
790 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0);
791 bmask = _mm_set_pi32(0x001F001F, 0x001F001F);
804 s = (s | s << 16) & 0x07e0f81f;
805 d = (d | d << 16) & 0x07e0f81f;
806 d += (s -
d) * alpha >> 5;
808 *dstp++ = (
Uint16)(d | d >> 16);
817 s = (s | s << 16) & 0x07e0f81f;
818 d = (d | d << 16) & 0x07e0f81f;
819 d += (s -
d) * alpha >> 5;
821 *dstp++ = (
Uint16)(d | d >> 16);
829 s = (s | s << 16) & 0x07e0f81f;
830 d = (d | d << 16) & 0x07e0f81f;
831 d += (s -
d) * alpha >> 5;
833 *dstp++ = (
Uint16)(d | d >> 16);
835 src1 = *(__m64*)srcp;
836 dst1 = *(__m64*)dstp;
840 src2 = _mm_srli_pi16(src2, 11);
843 dst2 = _mm_srli_pi16(dst2, 11);
846 src2 = _mm_sub_pi16(src2, dst2);
847 src2 = _mm_mullo_pi16(src2, mm_alpha);
848 src2 = _mm_srli_pi16(src2, 11);
849 dst2 = _mm_add_pi16(src2, dst2);
850 dst2 = _mm_slli_pi16(dst2, 11);
856 src2 = _mm_and_si64(src2, gmask);
859 dst2 = _mm_and_si64(dst2, gmask);
862 src2 = _mm_sub_pi16(src2, dst2);
863 src2 = _mm_mulhi_pi16(src2, mm_alpha);
864 src2 = _mm_slli_pi16(src2, 5);
865 dst2 = _mm_add_pi16(src2, dst2);
867 mm_res = _mm_or_si64(mm_res, dst2);
871 src2 = _mm_and_si64(src2, bmask);
874 dst2 = _mm_and_si64(dst2, bmask);
877 src2 = _mm_sub_pi16(src2, dst2);
878 src2 = _mm_mullo_pi16(src2, mm_alpha);
879 src2 = _mm_srli_pi16(src2, 11);
880 dst2 = _mm_add_pi16(src2, dst2);
881 dst2 = _mm_and_si64(dst2, bmask);
883 mm_res = _mm_or_si64(mm_res, dst2);
885 *(__m64*)dstp = mm_res;
914 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha;
916 alpha &= ~(1 + 2 + 4);
917 mm_alpha = _mm_set_pi32(0, alpha);
920 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
921 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
924 mm_alpha = _mm_slli_si64(mm_alpha, 3);
927 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00);
928 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0);
929 bmask = _mm_set_pi32(0x001F001F, 0x001F001F);
942 s = (s | s << 16) & 0x03e07c1f;
943 d = (d | d << 16) & 0x03e07c1f;
944 d += (s -
d) * alpha >> 5;
946 *dstp++ = (
Uint16)(d | d >> 16);
955 s = (s | s << 16) & 0x03e07c1f;
956 d = (d | d << 16) & 0x03e07c1f;
957 d += (s -
d) * alpha >> 5;
959 *dstp++ = (
Uint16)(d | d >> 16);
967 s = (s | s << 16) & 0x03e07c1f;
968 d = (d | d << 16) & 0x03e07c1f;
969 d += (s -
d) * alpha >> 5;
971 *dstp++ = (
Uint16)(d | d >> 16);
973 src1 = *(__m64*)srcp;
974 dst1 = *(__m64*)dstp;
978 src2 = _mm_and_si64(src2, rmask);
981 dst2 = _mm_and_si64(dst2, rmask);
984 src2 = _mm_sub_pi16(src2, dst2);
985 src2 = _mm_mulhi_pi16(src2, mm_alpha);
986 src2 = _mm_slli_pi16(src2, 5);
987 dst2 = _mm_add_pi16(src2, dst2);
988 dst2 = _mm_and_si64(dst2, rmask);
994 src2 = _mm_and_si64(src2, gmask);
997 dst2 = _mm_and_si64(dst2, gmask);
1000 src2 = _mm_sub_pi16(src2, dst2);
1001 src2 = _mm_mulhi_pi16(src2, mm_alpha);
1002 src2 = _mm_slli_pi16(src2, 5);
1003 dst2 = _mm_add_pi16(src2, dst2);
1005 mm_res = _mm_or_si64(mm_res, dst2);
1009 src2 = _mm_and_si64(src2, bmask);
1012 dst2 = _mm_and_si64(dst2, bmask);
1015 src2 = _mm_sub_pi16(src2, dst2);
1016 src2 = _mm_mullo_pi16(src2, mm_alpha);
1017 src2 = _mm_srli_pi16(src2, 11);
1018 dst2 = _mm_add_pi16(src2, dst2);
1019 dst2 = _mm_and_si64(dst2, bmask);
1021 mm_res = _mm_or_si64(mm_res, dst2);
1023 *(__m64*)dstp = mm_res;
1042 unsigned alpha = info->
a;
1064 s = (s | s << 16) & 0x07e0f81f;
1065 d = (d | d << 16) & 0x07e0f81f;
1066 d += (s -
d) * alpha >> 5;
1068 *dstp++ = (
Uint16)(d | d >> 16);
1081 unsigned alpha = info->
a;
1103 s = (s | s << 16) & 0x03e07c1f;
1104 d = (d | d << 16) & 0x03e07c1f;
1105 d += (s -
d) * alpha >> 5;
1107 *dstp++ = (
Uint16)(d | d >> 16);
1131 unsigned alpha = s >> 27;
1138 *dstp = (
Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f));
1145 s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800)
1147 d = (d | d << 16) & 0x07e0f81f;
1148 d += (s -
d) * alpha >> 5;
1150 *dstp = (
Uint16)(d | d >> 16);
1185 *dstp = (
Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f));
1192 s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00)
1194 d = (d | d << 16) & 0x03e07c1f;
1195 d += (s -
d) * alpha >> 5;
1197 *dstp = (
Uint16)(d | d >> 16);
1224 unsigned sR, sG, sB;
1225 unsigned dR, dG, dB, dA;
1226 const unsigned sA = info->
a;
1264 unsigned sR, sG, sB;
1265 unsigned dR, dG, dB, dA;
1266 const unsigned sA = info->
a;
1273 if(sA && Pixel != ckey) {
1304 unsigned sR, sG, sB, sA;
1305 unsigned dR, dG, dB, dA;
1346 #if SDL_ARM_NEON_BLITTERS || SDL_ARM_SIMD_BLITTERS 1350 || (sf->
Bmask == 0xff && df->
Bmask == 0x1f)))
1352 #if SDL_ARM_NEON_BLITTERS 1354 return BlitARGBto565PixelAlphaARMNEON;
1356 #if SDL_ARM_SIMD_BLITTERS 1358 return BlitARGBto565PixelAlphaARMSIMD;
1363 && sf->
Gmask == 0xff00
1365 || (sf->
Bmask == 0xff && df->
Bmask == 0x1f))) {
1366 if (df->
Gmask == 0x7e0)
1368 else if (df->
Gmask == 0x3e0)
1377 #if defined(__MMX__) || defined(__3dNOW__) 1384 return BlitRGBtoRGBPixelAlphaMMX3DNOW;
1388 return BlitRGBtoRGBPixelAlphaMMX;
1392 if (sf->
Amask == 0xff000000) {
1393 #if SDL_ARM_NEON_BLITTERS 1395 return BlitRGBtoRGBPixelAlphaARMNEON;
1397 #if SDL_ARM_SIMD_BLITTERS 1399 return BlitRGBtoRGBPixelAlphaARMSIMD;
1413 if (sf->
Amask == 0) {
1421 if (df->
Gmask == 0x7e0) {
1424 return Blit565to565SurfaceAlphaMMX;
1428 }
else if (df->
Gmask == 0x3e0) {
1431 return Blit555to555SurfaceAlphaMMX;
1447 return BlitRGBtoRGBSurfaceAlphaMMX;
1463 if (sf->
Amask == 0) {
SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info)
#define BLEND16_50(d, s, mask)
static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info)
static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info)
#define SDL_COPY_COLORKEY
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)
#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA)
#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB)
GLfloat GLfloat GLfloat GLfloat h
static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
SDL_PixelFormat * src_fmt
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)
A collection of pixels used in software blitting.
#define SDL_COPY_RLE_MASK
static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info)
static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
GLfloat GLfloat GLfloat alpha
GLint GLint GLsizei width
static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info)
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)
static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char const char SDL_SCANF_FORMAT_STRING const char return SDL_ThreadFunction const char void return Uint32 return Uint32 SDL_AssertionHandler void SDL_SpinLock SDL_atomic_t int int return SDL_atomic_t return void void void return void return int return SDL_AudioSpec SDL_AudioSpec return int int return return int SDL_RWops int SDL_AudioSpec Uint8 ** d
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)
static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
#define DUFFS_LOOP4(pixel_copy_increment, width)
static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info)
#define DUFFS_LOOP(pixel_copy_increment, width)
GLubyte GLubyte GLubyte GLubyte w
static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info)
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
SDL_PixelFormat * dst_fmt
GLint GLint GLsizei GLsizei height
SDL_bool SDL_HasARMSIMD(void)
#define SDL_COPY_MODULATE_ALPHA
static void BlitNto1PixelAlpha(SDL_BlitInfo *info)
void(* SDL_BlitFunc)(SDL_BlitInfo *info)
static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info)
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)
#define DUFFS_LOOP_124(pixel_copy_increment1, pixel_copy_increment2, pixel_copy_increment4, width)
#define BLEND2x16_50(d, s, mask)