Faster blit when using No Alpha or Set Alpha, + ColorKey
Applied to following formats:

ABGR8888 -> BGRX8888 :  faster x4   (2794295 -> 610587)
ABGR8888 -> RGB888 :  faster x4   (2835693 -> 615561)
ABGR8888 -> RGBX8888 :  faster x4   (2880475 -> 610479)

ARGB8888 -> BGR888 :  faster x4   (2802718 -> 610702)
ARGB8888 -> BGRX8888 :  faster x4   (2792481 -> 606311)
ARGB8888 -> RGBX8888 :  faster x4   (2821621 -> 624745)

BGR888 -> ARGB8888 :  faster x4   (2791705 -> 637889)
BGR888 -> BGRA8888 :  faster x4   (2793195 -> 652299)
BGR888 -> BGRX8888 :  faster x4   (2800713 -> 609326)
BGR888 -> RGB888 :  faster x4   (2812260 -> 610471)
BGR888 -> RGBA8888 :  faster x4   (2792327 -> 629288)
BGR888 -> RGBX8888 :  faster x4   (2799224 -> 607073)

BGRA8888 -> BGR888 :  faster x4   (2800520 -> 606897)
BGRA8888 -> RGB888 :  faster x4   (2825274 -> 616156)
BGRA8888 -> RGBX8888 :  faster x4   (2812530 -> 610340)

BGRX8888 -> ABGR8888 :  faster x4   (2793940 -> 628596)
BGRX8888 -> ARGB8888 :  faster x4   (2822686 -> 638899)
BGRX8888 -> BGR888 :  faster x4   (2818141 -> 613659)
BGRX8888 -> RGB888 :  faster x4   (2929017 -> 611794)
BGRX8888 -> RGBA8888 :  faster x4   (2799709 -> 629750)
BGRX8888 -> RGBX8888 :  faster x4   (2911010 -> 605640)

RGB888 -> ABGR8888 :  faster x4   (2800671 -> 631542)
RGB888 -> BGR888 :  faster x4   (2802644 -> 604461)
RGB888 -> BGRA8888 :  faster x4   (2801919 -> 628729)
RGB888 -> BGRX8888 :  faster x4   (2938244 -> 604135)
RGB888 -> RGBA8888 :  faster x4   (2912447 -> 642185)
RGB888 -> RGBX8888 :  faster x4   (2831676 -> 634293)

RGBA8888 -> BGR888 :  faster x4   (2928896 -> 614960)
RGBA8888 -> BGRX8888 :  faster x4   (2821422 -> 608146)
RGBA8888 -> RGB888 :  faster x4   (2825927 -> 617184)

RGBX8888 -> ABGR8888 :  faster x4   (2803852 -> 654129)
RGBX8888 -> ARGB8888 :  faster x4   (2923615 -> 642644)
RGBX8888 -> BGR888 :  faster x4   (2806523 -> 610447)
RGBX8888 -> BGRA8888 :  faster x4   (2813388 -> 630305)
RGBX8888 -> BGRX8888 :  faster x4   (2800052 -> 607881)
RGBX8888 -> RGB888 :  faster x4   (2807722 -> 610263)
diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c
index 2067dfb..0886bed 100644
--- a/src/video/SDL_blit_N.c
+++ b/src/video/SDL_blit_N.c
@@ -2379,6 +2379,123 @@
         }
     }
 
+    /* Any src/dst 8888, not CopyAlpha, no ARGB2101010 */
+    if (srcbpp == 4 && dstbpp == 4 &&
+        srcfmt->format != SDL_PIXELFORMAT_ARGB2101010 &&
+        dstfmt->format != SDL_PIXELFORMAT_ARGB2101010) {
+
+        Uint32 *src32 = (Uint32*)src;
+        Uint32 *dst32 = (Uint32*)dst;
+        
+        if (dstfmt->Amask) {
+            /* Find the appropriate permutation */
+            int missing = 0, r, g, b, a;
+            int Pixel = 0x03020100;
+            RGB_FROM_PIXEL(Pixel, srcfmt, r, g, b);
+            PIXEL_FROM_RGBA(Pixel, dstfmt, r, g, b, 255);
+            r = Pixel & 0xFF;
+            g = (Pixel >> 8) & 0xFF;
+            b = (Pixel >> 16) & 0xFF;
+            a = (Pixel >> 24) & 0xFF;
+
+            {
+                int val;
+                for (val = 0; val <= 3; val++) {
+                    if (r != val && g != val && b != val && a != val) {
+                        missing = val;
+                    }
+                }
+            }
+
+            if (r == 255) {
+                r = missing;
+                missing = 0;
+            } else if (g == 255) {
+                g = missing;
+                missing = 1;
+            } else if (b == 255) {
+                b = missing;
+                missing = 2;
+            } else if (a == 255) {
+                a = missing;
+                missing = 3;
+            }
+
+            while (height--) {
+                /* *INDENT-OFF* */
+                DUFFS_LOOP(
+                {
+                    if ((*src32 & rgbmask) != ckey) {
+                        Uint8 *s8 = (Uint8 *)src32;
+                        Uint8 *d8 = (Uint8 *)dst32;
+                        d8[0] = s8[r];
+                        d8[1] = s8[g];
+                        d8[2] = s8[b];
+                        d8[3] = s8[a];
+                        d8[missing] = alpha;
+                    }
+                    ++src32;
+                    ++dst32;
+                }, width);
+                /* *INDENT-ON* */
+                src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
+                dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
+            }
+        
+            return;
+        } else {
+            /* Find the appropriate permutation */
+            int missing = 0, r, g, b, a;
+            int Pixel = 0x04030201; /* +1 */
+            RGBA_FROM_PIXEL(Pixel, srcfmt, r, g, b, a);
+            missing = a;
+            PIXEL_FROM_RGB(Pixel, dstfmt, r, g, b);
+            r = Pixel & 0xFF;
+            g = (Pixel >> 8) & 0xFF;
+            b = (Pixel >> 16) & 0xFF;
+            a = (Pixel >> 24) & 0xFF;
+
+            if (r == 0) {
+                r = missing;
+                missing = 0;
+            } else if (g == 0) {
+                g = missing;
+                missing = 1;
+            } else if (b == 0) {
+                b = missing;
+                missing = 2;
+            } else if (a == 0) {
+                a = missing;
+                missing = 3;
+            }
+        
+            /* -1 */
+            r -= 1; g -= 1; b -= 1; a -= 1;
+
+            while (height--) {
+                /* *INDENT-OFF* */
+                DUFFS_LOOP(
+                {
+                    if ((*src32 & rgbmask) != ckey) {
+                        Uint8 *s8 = (Uint8 *)src32;
+                        Uint8 *d8 = (Uint8 *)dst32;
+                        d8[0] = s8[r];
+                        d8[1] = s8[g];
+                        d8[2] = s8[b];
+                        d8[3] = s8[a];
+                        d8[missing] = 0;
+                    }
+                    ++src32;
+                    ++dst32;
+                }, width);
+                /* *INDENT-ON* */
+                src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
+                dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
+            }
+            return;
+        }
+    }
+
     /* BPP 3, same rgb triplet */
     if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_RGB24) ||
         (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_BGR24)) {