Use Clang/GCC builtins for SDL byteswapping functions

__builtin_bswap32/64 were introduced in GCC 4.3. __builtin_bswap16 was
not available on x86 until GCC 4.8 due to a bug.

__builtin_bswap32/64 were introduced in Clang 2.6. __builtin_bswap16
was introduced in Clang 3.2.
diff --git a/include/SDL_endian.h b/include/SDL_endian.h
index 1398e05..4534cad 100644
--- a/include/SDL_endian.h
+++ b/include/SDL_endian.h
@@ -75,7 +75,10 @@
 /**
  *  \file SDL_endian.h
  */
-#if defined(__GNUC__) && defined(__i386__) && \
+#if (defined(__clang__) && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 2))) || \
+    (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))
+#define SDL_Swap16(x) __builtin_bswap16(x)
+#elif defined(__GNUC__) && defined(__i386__) && \
    !(__GNUC__ == 2 && __GNUC_MINOR__ <= 95 /* broken gcc version */)
 SDL_FORCE_INLINE Uint16
 SDL_Swap16(Uint16 x)
@@ -130,7 +133,10 @@
 }
 #endif
 
-#if defined(__GNUC__) && defined(__i386__) && \
+#if (defined(__clang__) && (__clang_major__ > 2 || (__clang_major__ == 2 && __clang_minor__ >= 6))) || \
+    (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
+#define SDL_Swap32(x) __builtin_bswap32(x)
+#elif defined(__GNUC__) && defined(__i386__) && \
    !(__GNUC__ == 2 && __GNUC_MINOR__ <= 95 /* broken gcc version */)
 SDL_FORCE_INLINE Uint32
 SDL_Swap32(Uint32 x)
@@ -197,7 +203,10 @@
 }
 #endif
 
-#if defined(__GNUC__) && defined(__i386__) && \
+#if (defined(__clang__) && (__clang_major__ > 2 || (__clang_major__ == 2 && __clang_minor__ >= 6))) || \
+    (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
+#define SDL_Swap64(x) __builtin_bswap64(x)
+#elif defined(__GNUC__) && defined(__i386__) && \
    !(__GNUC__ == 2 && __GNUC_MINOR__ <= 95 /* broken gcc version */)
 SDL_FORCE_INLINE Uint64
 SDL_Swap64(Uint64 x)