Fix invalid memory access and optimise Blit_3or4_to_3or4__*
Fix invalid write at last pixel of the surface:
when surface has no padding (pitch == w * bpp) and bpp is 3
with Blit, no colorkey, and NO_ALPHA same or inverse rgb triplet
Optimise by using int32 access:
BGR24 -> ARGB8888 : faster x1.897875 (362405 -> 190953)
RGB24 -> ABGR8888 : faster x1.660416 (363304 -> 218803)
ABGR8888 -> RGB24 : faster x1.686319 (334962 -> 198635)
ARGB8888 -> BGR24 : faster x1.691868 (324524 -> 191814)
BGR24 -> RGB888 : faster x1.678459 (326811 -> 194709)
BGR888 -> RGB24 : faster x1.731772 (327724 -> 189242)
RGB24 -> BGR888 : faster x1.690989 (328916 -> 194511)
RGB888 -> BGR24 : faster x1.698333 (326175 -> 192056)
diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c
index c31cd9a..9a2f241 100644
--- a/src/video/SDL_blit_N.c
+++ b/src/video/SDL_blit_N.c
@@ -2938,15 +2938,54 @@
if (dstfmt->Amask) {
/* SET_ALPHA */
Uint32 mask = info->a << dstfmt->Ashift;
+ int last_line = 0;
+ if (srcbpp == 3 && height) {
+ height -= 1;
+ last_line = 1;
+ }
+
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
{
Uint32 *dst32 = (Uint32*)dst;
+ Uint32 *src32 = (Uint32*)src;
+ *dst32 = *src32 | mask;
+ dst += 4;
+ src += srcbpp;
+ }, width);
+ /* *INDENT-ON* */
+ src += srcskip;
+ dst += dstskip;
+ }
+
+ if (last_line) {
+ while (width--) {
+ Uint32 *dst32 = (Uint32*)dst;
Uint8 s0 = src[0];
Uint8 s1 = src[1];
Uint8 s2 = src[2];
*dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask;
+ dst += 4;
+ src += srcbpp;
+ }
+ }
+ } else {
+ /* NO_ALPHA */
+ int mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
+ int last_line = 0;
+ if ((dstbpp == 3 || srcbpp == 3) && height) {
+ height -= 1;
+ last_line = 1;
+ }
+
+ while (height--) {
+ /* *INDENT-OFF* */
+ DUFFS_LOOP(
+ {
+ Uint32 *dst32 = (Uint32*)dst;
+ Uint32 *src32 = (Uint32*)src;
+ *dst32 = *src32 & mask;
dst += dstbpp;
src += srcbpp;
}, width);
@@ -2954,23 +2993,18 @@
src += srcskip;
dst += dstskip;
}
- } else {
- /* NO_ALPHA */
- while (height--) {
- /* *INDENT-OFF* */
- DUFFS_LOOP(
- {
- Uint32 *dst32 = (Uint32*)dst;
+
+ if (last_line) {
+ while (width--) {
Uint8 s0 = src[0];
Uint8 s1 = src[1];
Uint8 s2 = src[2];
- *dst32 = (s0) | (s1 << 8) | (s2 << 16);
+ dst[0] = s0;
+ dst[1] = s1;
+ dst[2] = s2;
dst += dstbpp;
src += srcbpp;
- }, width);
- /* *INDENT-ON* */
- src += srcskip;
- dst += dstskip;
+ }
}
}
return;
@@ -3036,6 +3070,12 @@
}
} else {
/* NO_ALPHA */
+ int last_line = 0;
+ if (dstbpp == 3 && height) {
+ height -= 1;
+ last_line = 1;
+ }
+
while (height--) {
/* *INDENT-OFF* */
DUFFS_LOOP(
@@ -3053,6 +3093,20 @@
src += srcskip;
dst += dstskip;
}
+
+ if (last_line) {
+ while (width--) {
+ Uint8 s0 = src[0];
+ Uint8 s1 = src[1];
+ Uint8 s2 = src[2];
+ /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
+ dst[0] = s2;
+ dst[1] = s1;
+ dst[2] = s0;
+ dst += dstbpp;
+ src += srcbpp;
+ }
+ }
}
return;
}