Fixed bug 4484 - use SIMD aligned memory for SDL_Surface

Surfaces are allocated using SDL_SIMDAlloc()
They are marked with SDL_SIMD_ALIGNED flag to appropriatly free them with SDL_SIMDFree()
(Flag is cleared when pixels is free'd in RLE, in case user would hijack the pixels ptr)

When providing its own memory pointer (SDL_CreateRGBSurfaceFrom()) and clearing
SDL_PREALLOC  to delegate to SDL the memory free, it's the responsability of the user
to add SDL_SIMD_ALIGNED or not, whether the pointer has been allocated with SDL_malloc() or
SDL_SIMDAlloc().
diff --git a/src/video/SDL_RLEaccel.c b/src/video/SDL_RLEaccel.c
index b4cb6f6..e105091 100644
--- a/src/video/SDL_RLEaccel.c
+++ b/src/video/SDL_RLEaccel.c
@@ -89,6 +89,7 @@
 #include "SDL_sysvideo.h"
 #include "SDL_blit.h"
 #include "SDL_RLEaccel_c.h"
+#include "../cpuinfo/SDL_simd.h"
 
 #ifndef MIN
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
@@ -1220,8 +1221,9 @@
 
     /* Now that we have it encoded, release the original pixels */
     if (!(surface->flags & SDL_PREALLOC)) {
-        SDL_free(surface->pixels);
+        SDL_SIMDFree(surface->pixels);
         surface->pixels = NULL;
+        surface->flags &= ~SDL_SIMD_ALIGNED;
     }
 
     /* realloc the buffer to release unused memory */
@@ -1383,8 +1385,9 @@
 
     /* Now that we have it encoded, release the original pixels */
     if (!(surface->flags & SDL_PREALLOC)) {
-        SDL_free(surface->pixels);
+        SDL_SIMDFree(surface->pixels);
         surface->pixels = NULL;
+        surface->flags &= ~SDL_SIMD_ALIGNED;
     }
 
     /* realloc the buffer to release unused memory */
@@ -1484,10 +1487,11 @@
         uncopy_opaque = uncopy_transl = uncopy_32;
     }
 
-    surface->pixels = SDL_malloc(surface->h * surface->pitch);
+    surface->pixels = SDL_SIMDAlloc(surface->h * surface->pitch);
     if (!surface->pixels) {
         return (SDL_FALSE);
     }
+    surface->flags |= SDL_SIMD_ALIGNED;
     /* fill background with transparent pixels */
     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
 
@@ -1549,12 +1553,13 @@
                 SDL_Rect full;
 
                 /* re-create the original surface */
-                surface->pixels = SDL_malloc(surface->h * surface->pitch);
+                surface->pixels = SDL_SIMDAlloc(surface->h * surface->pitch);
                 if (!surface->pixels) {
                     /* Oh crap... */
                     surface->flags |= SDL_RLEACCEL;
                     return;
                 }
+                surface->flags |= SDL_SIMD_ALIGNED;
 
                 /* fill it with the background color */
                 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
diff --git a/src/video/SDL_surface.c b/src/video/SDL_surface.c
index ec3e9b6..c0cc221 100644
--- a/src/video/SDL_surface.c
+++ b/src/video/SDL_surface.c
@@ -120,12 +120,13 @@
             return NULL;
         }
 
-        surface->pixels = SDL_malloc((size_t)size);
+        surface->pixels = SDL_SIMDAlloc((size_t)size);
         if (!surface->pixels) {
             SDL_FreeSurface(surface);
             SDL_OutOfMemory();
             return NULL;
         }
+        surface->flags |= SDL_SIMD_ALIGNED;
         /* This is important for bitmaps */
         SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
     }