BlitNtoN BlitNtoNKey: remove non-aligned word read/store (bpp 3<->4) (Bug 4503)
Mips and (old) ARM doesn't allow word read/write when adress isn't 4bytes
aligned. So just remove that.
diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c
index e2e2e45..3068cc1 100644
--- a/src/video/SDL_blit_N.c
+++ b/src/video/SDL_blit_N.c
@@ -2251,9 +2251,6 @@
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
- Uint32 *src32 = (Uint32*)src;
- Uint32 *dst32 = (Uint32*)dst;
-
/* Find the appropriate permutation */
int alpha_channel, p0, p1, p2, p3;
get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
@@ -2262,19 +2259,17 @@
/* *INDENT-OFF* */
DUFFS_LOOP(
{
- Uint8 *s8 = (Uint8 *)src32;
- Uint8 *d8 = (Uint8 *)dst32;
- d8[0] = s8[p0];
- d8[1] = s8[p1];
- d8[2] = s8[p2];
- d8[3] = s8[p3];
- d8[alpha_channel] = alpha;
- ++src32;
- ++dst32;
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
+ dst[alpha_channel] = alpha;
+ src += 4;
+ dst += 4;
}, width);
/* *INDENT-ON* */
- src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
- dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
+ src += srcskip;
+ dst += dstskip;
}
return;
}
@@ -2283,8 +2278,6 @@
if (srcbpp == 4 && dstbpp == 3 &&
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
- Uint32 *src32 = (Uint32*)src;
-
/* Find the appropriate permutation */
int p0, p1, p2, p3;
get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
@@ -2293,15 +2286,14 @@
/* *INDENT-OFF* */
DUFFS_LOOP(
{
- Uint8 *s8 = (Uint8 *)src32;
- dst[0] = s8[p0];
- dst[1] = s8[p1];
- dst[2] = s8[p2];
- ++src32;
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ src += 4;
dst += 3;
}, width);
/* *INDENT-ON* */
- src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
+ src += srcskip;
dst += dstskip;
}
return;
@@ -2311,8 +2303,6 @@
if (srcbpp == 3 && dstbpp == 4 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
- Uint32 *dst32 = (Uint32*)dst;
-
/* Find the appropriate permutation */
int alpha_channel, p0, p1, p2, p3;
get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
@@ -2321,18 +2311,17 @@
/* *INDENT-OFF* */
DUFFS_LOOP(
{
- Uint8 *d8 = (Uint8 *)dst32;
- d8[0] = src[p0];
- d8[1] = src[p1];
- d8[2] = src[p2];
- d8[3] = src[p3];
- d8[alpha_channel] = alpha;
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
+ dst[alpha_channel] = alpha;
src += 3;
- ++dst32;
+ dst += 4;
}, width);
/* *INDENT-ON* */
src += srcskip;
- dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
+ dst += dstskip;
}
return;
}
@@ -2377,9 +2366,6 @@
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
- Uint32 *src32 = (Uint32*)src;
- Uint32 *dst32 = (Uint32*)dst;
-
/* Find the appropriate permutation */
int p0, p1, p2, p3;
get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
@@ -2388,18 +2374,16 @@
/* *INDENT-OFF* */
DUFFS_LOOP(
{
- Uint8 *s8 = (Uint8 *)src32;
- Uint8 *d8 = (Uint8 *)dst32;
- d8[0] = s8[p0];
- d8[1] = s8[p1];
- d8[2] = s8[p2];
- d8[3] = s8[p3];
- ++src32;
- ++dst32;
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
+ src += 4;
+ dst += 4;
}, width);
/* *INDENT-ON* */
- src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
- dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
+ src += srcskip;
+ dst += dstskip;
}
return;
}
@@ -2589,9 +2573,6 @@
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
- Uint32 *src32 = (Uint32*)src;
- Uint32 *dst32 = (Uint32*)dst;
-
/* Find the appropriate permutation */
int alpha_channel, p0, p1, p2, p3;
get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
@@ -2600,21 +2581,21 @@
/* *INDENT-OFF* */
DUFFS_LOOP(
{
+ Uint32 *src32 = (Uint32*)src;
+
if ((*src32 & rgbmask) != ckey) {
- Uint8 *s8 = (Uint8 *)src32;
- Uint8 *d8 = (Uint8 *)dst32;
- d8[0] = s8[p0];
- d8[1] = s8[p1];
- d8[2] = s8[p2];
- d8[3] = s8[p3];
- d8[alpha_channel] = alpha;
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
+ dst[alpha_channel] = alpha;
}
- ++src32;
- ++dst32;
+ src += 4;
+ dst += 4;
}, width);
/* *INDENT-ON* */
- src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
- dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
+ src += srcskip;
+ dst += dstskip;
}
return;
}
@@ -2699,8 +2680,6 @@
if (srcbpp == 4 && dstbpp == 3 &&
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
- Uint32 *src32 = (Uint32*)src;
-
/* Find the appropriate permutation */
int p0, p1, p2, p3;
get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
@@ -2709,17 +2688,17 @@
/* *INDENT-OFF* */
DUFFS_LOOP(
{
+ Uint32 *src32 = (Uint32*)src;
if ((*src32 & rgbmask) != ckey) {
- Uint8 *s8 = (Uint8 *)src32;
- dst[0] = s8[p0];
- dst[1] = s8[p1];
- dst[2] = s8[p2];
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
}
- ++src32;
+ src += 4;
dst += 3;
}, width);
/* *INDENT-ON* */
- src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
+ src += srcskip;
dst += dstskip;
}
return;
@@ -2729,8 +2708,6 @@
if (srcbpp == 3 && dstbpp == 4 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
- Uint32 *dst32 = (Uint32*)dst;
-
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
Uint8 k0 = ckey & 0xFF;
Uint8 k1 = (ckey >> 8) & 0xFF;
@@ -2754,20 +2731,18 @@
Uint8 s2 = src[2];
if (k0 != s0 || k1 != s1 || k2 != s2) {
- Uint8 *d8 = (Uint8 *)dst32;
- d8[0] = src[p0];
- d8[1] = src[p1];
- d8[2] = src[p2];
- d8[3] = src[p3];
- d8[alpha_channel] = alpha;
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
+ dst[alpha_channel] = alpha;
}
src += 3;
- ++dst32;
+ dst += 4;
}, width);
/* *INDENT-ON* */
src += srcskip;
- dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
-
+ dst += dstskip;
}
return;
}
@@ -2853,9 +2828,6 @@
srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
- Uint32 *src32 = (Uint32*)src;
- Uint32 *dst32 = (Uint32*)dst;
-
/* Find the appropriate permutation */
int p0, p1, p2, p3;
get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
@@ -2864,20 +2836,19 @@
/* *INDENT-OFF* */
DUFFS_LOOP(
{
+ Uint32 *src32 = (Uint32*)src;
if ((*src32 & rgbmask) != ckey) {
- Uint8 *s8 = (Uint8 *)src32;
- Uint8 *d8 = (Uint8 *)dst32;
- d8[0] = s8[p0];
- d8[1] = s8[p1];
- d8[2] = s8[p2];
- d8[3] = s8[p3];
+ dst[0] = src[p0];
+ dst[1] = src[p1];
+ dst[2] = src[p2];
+ dst[3] = src[p3];
}
- ++src32;
- ++dst32;
+ src += 4;
+ dst += 4;
}, width);
/* *INDENT-ON* */
- src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
- dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
+ src += srcskip;
+ dst += dstskip;
}
return;
}
@@ -2982,31 +2953,15 @@
if (dstfmt->Amask) {
/* SET_ALPHA */
Uint32 mask = info->a << dstfmt->Ashift;
- int last_line = 0;
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ int i0 = 0, i1 = 1, i2 = 2;
#else
int i0 = srcbpp - 1 - 0;
int i1 = srcbpp - 1 - 1;
int i2 = srcbpp - 1 - 2;
#endif
-
- if (srcbpp == 3 && height) {
- height -= 1;
- last_line = 1;
- }
-
while (height--) {
/* *INDENT-OFF* */
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
- DUFFS_LOOP(
- {
- Uint32 *dst32 = (Uint32*)dst;
- Uint32 *src32 = (Uint32*)src;
- *dst32 = *src32 | mask;
- dst += 4;
- src += srcbpp;
- }, width);
-#else
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
@@ -3017,34 +2972,15 @@
dst += 4;
src += srcbpp;
}, width);
-#endif
/* *INDENT-ON* */
src += srcskip;
dst += dstskip;
}
-
- if (last_line) {
- while (width--) {
- Uint32 *dst32 = (Uint32*)dst;
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
- Uint8 s0 = src[0];
- Uint8 s1 = src[1];
- Uint8 s2 = src[2];
-#else
- Uint8 s0 = src[i0];
- Uint8 s1 = src[i1];
- Uint8 s2 = src[i2];
-#endif
- *dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask;
- dst += 4;
- src += srcbpp;
- }
- }
} else {
/* NO_ALPHA */
- int last_line = 0;
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
- int mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
+ int i0 = 0, i1 = 1, i2 = 2;
+ int j0 = 0, j1 = 1, j2 = 2;
#else
int i0 = srcbpp - 1 - 0;
int i1 = srcbpp - 1 - 1;
@@ -3052,72 +2988,23 @@
int j0 = dstbpp - 1 - 0;
int j1 = dstbpp - 1 - 1;
int j2 = dstbpp - 1 - 2;
- int shift0, shift1, shift2;
- if (dstbpp == 4) {
- shift2 = 16;
- shift1 = 8;
- shift0 = 0;
- } else { /* dstbpp 3 */
- shift2 = 24;
- shift1 = 16;
- shift0 = 8;
- }
#endif
-
- if ((dstbpp == 3 || srcbpp == 3) && height) {
- height -= 1;
- last_line = 1;
- }
-
while (height--) {
/* *INDENT-OFF* */
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
DUFFS_LOOP(
{
- Uint32 *dst32 = (Uint32*)dst;
-
- Uint32 *src32 = (Uint32*)src;
- *dst32 = *src32 & mask;
- dst += dstbpp;
- src += srcbpp;
- }, width);
-#else
- DUFFS_LOOP(
- {
- Uint32 *dst32 = (Uint32*)dst;
- Uint8 s0 = src[i0];
- Uint8 s1 = src[i1];
- Uint8 s2 = src[i2];
- *dst32 = (s0 << shift0) | (s1 << shift1) | (s2 << shift2);
- dst += dstbpp;
- src += srcbpp;
- }, width);
-#endif
- /* *INDENT-ON* */
- src += srcskip;
- dst += dstskip;
- }
-
- if (last_line) {
- while (width--) {
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
- Uint8 s0 = src[0];
- Uint8 s1 = src[1];
- Uint8 s2 = src[2];
- dst[0] = s0;
- dst[1] = s1;
- dst[2] = s2;
-#else
Uint8 s0 = src[i0];
Uint8 s1 = src[i1];
Uint8 s2 = src[i2];
dst[j0] = s0;
dst[j1] = s1;
dst[j2] = s2;
-#endif
dst += dstbpp;
src += srcbpp;
- }
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
}
}
return;
@@ -3158,8 +3045,8 @@
Uint32 alphashift = src[i3] << dstfmt->Ashift;
/* inversed, compared to Blit_3or4_to_3or4__same_rgb */
*dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
- dst += dstbpp;
- src += srcbpp;
+ dst += 4;
+ src += 4;
}, width);
/* *INDENT-ON* */
src += srcskip;
@@ -3175,7 +3062,6 @@
int i1 = srcbpp - 1 - 1;
int i2 = srcbpp - 1 - 2;
#endif
-
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
@@ -3186,7 +3072,7 @@
Uint8 s2 = src[i2];
/* inversed, compared to Blit_3or4_to_3or4__same_rgb */
*dst32 = (s0 << 16) | (s1 << 8) | (s2) | mask;
- dst += dstbpp;
+ dst += 4;
src += srcbpp;
}, width);
/* *INDENT-ON* */
@@ -3196,41 +3082,28 @@
}
} else {
/* NO_ALPHA */
- int last_line = 0;
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
int i0 = 0, i1 = 1, i2 = 2;
- int shift0 = 16, shift1 = 8, shift2 = 0;
+ int j0 = 2, j1 = 1, j2 = 0;
#else
int i0 = srcbpp - 1 - 0;
int i1 = srcbpp - 1 - 1;
int i2 = srcbpp - 1 - 2;
- int shift0, shift1, shift2;
- if (dstbpp == 4) {
- shift0 = 16;
- shift1 = 8;
- shift2 = 0;
- } else { /* dstbpp 3 */
- shift0 = 24;
- shift1 = 16;
- shift2 = 8;
- }
+ int j0 = dstbpp - 1 - 2;
+ int j1 = dstbpp - 1 - 1;
+ int j2 = dstbpp - 1 - 0;
#endif
-
- if (dstbpp == 3 && height) {
- height -= 1;
- last_line = 1;
- }
-
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
- Uint32 *dst32 = (Uint32*)dst;
Uint8 s0 = src[i0];
Uint8 s1 = src[i1];
Uint8 s2 = src[i2];
/* inversed, compared to Blit_3or4_to_3or4__same_rgb */
- *dst32 = (s0 << shift0) | (s1 << shift1) | (s2 << shift2);
+ dst[j0] = s0;
+ dst[j1] = s1;
+ dst[j2] = s2;
dst += dstbpp;
src += srcbpp;
}, width);
@@ -3238,30 +3111,6 @@
src += srcskip;
dst += dstskip;
}
-
- if (last_line) {
- while (width--) {
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
- Uint8 s0 = src[0];
- Uint8 s1 = src[1];
- Uint8 s2 = src[2];
- /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
- dst[0] = s2;
- dst[1] = s1;
- dst[2] = s0;
-#else
- Uint8 s0 = src[i0];
- Uint8 s1 = src[i1];
- Uint8 s2 = src[i2];
- /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
- dst[0] = s0;
- dst[1] = s1;
- dst[2] = s2;
-#endif
- dst += dstbpp;
- src += srcbpp;
- }
- }
}
return;
}