Fixed bug 3668 - Overflow of SDL_AudioCVT.filters with some downmixes

Simon Hug

There's a chance that an audio conversion from many channels to a few can use more than 9 audio filters. SDL_AudioCVT has 10 SDL_AudioFilter pointers of which one has to be the terminating NULL pointer. The SDL code has no checks for this limit. If it overflows there can be stack or heap corruption or a call to 0xa.

Attached patch adds a function that checks for this limit and throws an error if it is reached. Also adds some documentation.

Test parameters that trigger this issue:
AUDIO_U16MSB with 224 channels at 46359 Hz
                 V
AUDIO_S16MSB with 6 channels at 27463 Hz

The fuzzer program I uploaded in bug 3667 has more of them.
diff --git a/include/SDL_audio.h b/include/SDL_audio.h
index 677525a..ada6058 100644
--- a/include/SDL_audio.h
+++ b/include/SDL_audio.h
@@ -184,7 +184,17 @@
                                           SDL_AudioFormat format);
 
 /**
- *  A structure to hold a set of audio conversion filters and buffers.
+ *  \brief Upper limit of filters in SDL_AudioCVT
+ *
+ *  The maximum number of SDL_AudioFilter functions in SDL_AudioCVT is
+ *  currently limited to 9. The SDL_AudioCVT.filters array has 10 pointers,
+ *  one of which is the terminating NULL pointer.
+ */
+#define SDL_AUDIOCVT_MAX_FILTERS 9
+
+/**
+ *  \struct SDL_AudioCVT
+ *  \brief A structure to hold a set of audio conversion filters and buffers.
  *
  *  Note that various parts of the conversion pipeline can take advantage
  *  of SIMD operations (like SSE2, for example). SDL_AudioCVT doesn't require
@@ -214,7 +224,7 @@
     int len_cvt;                /**< Length of converted audio buffer */
     int len_mult;               /**< buffer must be len*len_mult big */
     double len_ratio;           /**< Given len, final size is len*len_ratio */
-    SDL_AudioFilter filters[10];        /**< Filter list */
+    SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS + 1]; /**< NULL-terminated list of filter functions */
     int filter_index;           /**< Current audio conversion function */
 } SDL_AUDIOCVT_PACKED SDL_AudioCVT;
 
diff --git a/src/audio/SDL_audiocvt.c b/src/audio/SDL_audiocvt.c
index aa4501f..0e1ee63 100644
--- a/src/audio/SDL_audiocvt.c
+++ b/src/audio/SDL_audiocvt.c
@@ -514,6 +514,19 @@
     }
 }
 
+static int
+SDL_AddAudioCVTFilter(SDL_AudioCVT *cvt, const SDL_AudioFilter filter)
+{
+    if (cvt->filter_index >= SDL_AUDIOCVT_MAX_FILTERS) {
+        return SDL_SetError("Too many filters needed for conversion, exceeded maximum of %d", SDL_AUDIOCVT_MAX_FILTERS);
+    }
+    if (filter == NULL) {
+        return SDL_SetError("Audio filter pointer is NULL");
+    }
+    cvt->filters[cvt->filter_index++] = filter;
+    cvt->filters[cvt->filter_index] = NULL; /* Moving terminator */
+    return 0;
+}
 
 static int
 SDL_BuildAudioTypeCVTToFloat(SDL_AudioCVT *cvt, const SDL_AudioFormat src_fmt)
@@ -521,7 +534,9 @@
     int retval = 0;  /* 0 == no conversion necessary. */
 
     if ((SDL_AUDIO_ISBIGENDIAN(src_fmt) != 0) == (SDL_BYTEORDER == SDL_LIL_ENDIAN)) {
-        cvt->filters[cvt->filter_index++] = SDL_Convert_Byteswap;
+        if (SDL_AddAudioCVTFilter(cvt, SDL_Convert_Byteswap) < 0) {
+            return -1;
+        }
         retval = 1;  /* added a converter. */
     }
 
@@ -543,7 +558,9 @@
             return SDL_SetError("No conversion available for these formats");
         }
 
-        cvt->filters[cvt->filter_index++] = filter;
+        if (SDL_AddAudioCVTFilter(cvt, filter) < 0) {
+            return -1;
+        }
         if (src_bitsize < dst_bitsize) {
             const int mult = (dst_bitsize / src_bitsize);
             cvt->len_mult *= mult;
@@ -580,7 +597,9 @@
             return SDL_SetError("No conversion available for these formats");
         }
 
-        cvt->filters[cvt->filter_index++] = filter;
+        if (SDL_AddAudioCVTFilter(cvt, filter) < 0) {
+            return -1;
+        }
         if (src_bitsize < dst_bitsize) {
             const int mult = (dst_bitsize / src_bitsize);
             cvt->len_mult *= mult;
@@ -592,7 +611,9 @@
     }
 
     if ((SDL_AUDIO_ISBIGENDIAN(dst_fmt) != 0) == (SDL_BYTEORDER == SDL_LIL_ENDIAN)) {
-        cvt->filters[cvt->filter_index++] = SDL_Convert_Byteswap;
+        if (SDL_AddAudioCVTFilter(cvt, SDL_Convert_Byteswap) < 0) {
+            return -1;
+        }
         retval = 1;  /* added a converter. */
     }
 
@@ -665,7 +686,9 @@
     }
 
     /* Update (cvt) with filter details... */
-    cvt->filters[cvt->filter_index++] = filter;
+    if (SDL_AddAudioCVTFilter(cvt, filter) < 0) {
+        return -1;
+    }
     if (src_rate < dst_rate) {
         const double mult = ((double) dst_rate) / ((double) src_rate);
         cvt->len_mult *= (int) SDL_ceil(mult);
@@ -739,7 +762,9 @@
        format as well. */
     if ((src_channels == 2) && (dst_channels == 2) && (src_fmt == AUDIO_S16SYS) && (dst_fmt == AUDIO_S16SYS) && (src_rate != dst_rate)) {
         cvt->needed = 1;
-        cvt->filters[cvt->filter_index++] = SDL_ResampleCVT_si16_c2;
+        if (SDL_AddAudioCVTFilter(cvt, SDL_ResampleCVT_si16_c2) < 0) {
+            return -1;
+        }
         if (src_rate < dst_rate) {
             const double mult = ((double) dst_rate) / ((double) src_rate);
             cvt->len_mult *= (int) SDL_ceil(mult);
@@ -772,7 +797,9 @@
 
         /* just a byteswap needed? */
         if ((src_fmt & ~SDL_AUDIO_MASK_ENDIAN) == (dst_fmt & ~SDL_AUDIO_MASK_ENDIAN)) {
-            cvt->filters[cvt->filter_index++] = SDL_Convert_Byteswap;
+            if (SDL_AddAudioCVTFilter(cvt, SDL_Convert_Byteswap) < 0) {
+                return -1;
+            }
             cvt->needed = 1;
             return 1;
         }
@@ -786,36 +813,48 @@
     /* Channel conversion */
     if (src_channels != dst_channels) {
         if ((src_channels == 1) && (dst_channels > 1)) {
-            cvt->filters[cvt->filter_index++] = SDL_ConvertMonoToStereo;
+            if (SDL_AddAudioCVTFilter(cvt, SDL_ConvertMonoToStereo) < 0) {
+                return -1;
+            }
             cvt->len_mult *= 2;
             src_channels = 2;
             cvt->len_ratio *= 2;
         }
         if ((src_channels == 2) && (dst_channels == 6)) {
-            cvt->filters[cvt->filter_index++] = SDL_ConvertStereoTo51;
+            if (SDL_AddAudioCVTFilter(cvt, SDL_ConvertStereoTo51) < 0) {
+                return -1;
+            }
             src_channels = 6;
             cvt->len_mult *= 3;
             cvt->len_ratio *= 3;
         }
         if ((src_channels == 2) && (dst_channels == 4)) {
-            cvt->filters[cvt->filter_index++] = SDL_ConvertStereoToQuad;
+            if (SDL_AddAudioCVTFilter(cvt, SDL_ConvertStereoToQuad) < 0) {
+                return -1;
+            }
             src_channels = 4;
             cvt->len_mult *= 2;
             cvt->len_ratio *= 2;
         }
         while ((src_channels * 2) <= dst_channels) {
-            cvt->filters[cvt->filter_index++] = SDL_ConvertMonoToStereo;
+            if (SDL_AddAudioCVTFilter(cvt, SDL_ConvertMonoToStereo) < 0) {
+                return -1;
+            }
             cvt->len_mult *= 2;
             src_channels *= 2;
             cvt->len_ratio *= 2;
         }
         if ((src_channels == 6) && (dst_channels <= 2)) {
-            cvt->filters[cvt->filter_index++] = SDL_Convert51ToStereo;
+            if (SDL_AddAudioCVTFilter(cvt, SDL_Convert51ToStereo) < 0) {
+                return -1;
+            }
             src_channels = 2;
             cvt->len_ratio /= 3;
         }
         if ((src_channels == 6) && (dst_channels == 4)) {
-            cvt->filters[cvt->filter_index++] = SDL_Convert51ToQuad;
+            if (SDL_AddAudioCVTFilter(cvt, SDL_Convert51ToQuad) < 0) {
+                return -1;
+            }
             src_channels = 4;
             cvt->len_ratio /= 2;
         }
@@ -837,7 +876,9 @@
                 filter = SDL_ConvertStereoToMono;
             }
 
-            cvt->filters[cvt->filter_index++] = filter;
+            if (SDL_AddAudioCVTFilter(cvt, filter) < 0) {
+                return -1;
+            }
 
             src_channels /= 2;
             cvt->len_ratio /= 2;