Fix audio instances

Fixes issue reported here: https://2dimensions.slack.com/archives/CSFM8K3CH/p1713795412798999

Part of the problem here was that the audio clips were never getting their "end" callback called as miniaudio doesn't call the "end" callback when an end time is provided that is not the end of the actual audio file. I modified the AudioEngine to create an appropriately sized PCM buffer for buffered sources and wrap the decoder in an audio source that only decodes the necessary PCM frames before marking the file is actually at its end (based on desired end frame). This is the cleanest way to make miniaudio call the callback without modifying the miniaudio source itself.

Diffs=
2b2e92ca7 Fix audio instances (#7113)

Co-authored-by: Luigi Rosso <luigi-rosso@users.noreply.github.com>
diff --git a/.rive_head b/.rive_head
index 584b93d..a569826 100644
--- a/.rive_head
+++ b/.rive_head
@@ -1 +1 @@
-89053041aea1e6c8559afc4273c4e466e9ff547a
+2b2e92ca7ef815c7c60176c14bed4bd488d44d0c
diff --git a/include/rive/audio/audio_sound.hpp b/include/rive/audio/audio_sound.hpp
index 5d34ca4..96d63f3 100644
--- a/include/rive/audio/audio_sound.hpp
+++ b/include/rive/audio/audio_sound.hpp
@@ -10,6 +10,92 @@
 {
 class AudioEngine;
 class Artboard;
+
+struct ma_end_clipped_decoder
+{
+    ma_data_source_base base;
+    ma_decoder decoder;
+    ma_uint64 frameCursor;
+    ma_uint64 endFrame;
+};
+
+static ma_result ma_end_clipped_decoder_read(ma_data_source* pDataSource,
+                                             void* pFramesOut,
+                                             ma_uint64 frameCount,
+                                             ma_uint64* pFramesRead)
+{
+    ma_end_clipped_decoder* clipped = (ma_end_clipped_decoder*)pDataSource;
+
+    ma_result result =
+        ma_decoder_read_pcm_frames(&clipped->decoder, pFramesOut, frameCount, pFramesRead);
+
+    clipped->frameCursor += *pFramesRead;
+    if (clipped->frameCursor >= clipped->endFrame)
+    {
+        ma_uint64 overflow = clipped->frameCursor - clipped->endFrame;
+        if (*pFramesRead > overflow)
+        {
+            *pFramesRead -= overflow;
+        }
+        else
+        {
+            *pFramesRead = 0;
+            return MA_AT_END;
+        }
+    }
+
+    return result;
+}
+
+static ma_result ma_end_clipped_decoder_seek(ma_data_source* pDataSource, ma_uint64 frameIndex)
+{
+    ma_end_clipped_decoder* clipped = (ma_end_clipped_decoder*)pDataSource;
+    ma_result result = ma_decoder_seek_to_pcm_frame(&clipped->decoder, frameIndex);
+    if (result != MA_SUCCESS)
+    {
+        return result;
+    }
+
+    clipped->frameCursor = frameIndex;
+    return result;
+}
+
+static ma_result ma_end_clipped_decoder_get_data_format(ma_data_source* pDataSource,
+                                                        ma_format* pFormat,
+                                                        ma_uint32* pChannels,
+                                                        ma_uint32* pSampleRate,
+                                                        ma_channel* pChannelMap,
+                                                        size_t channelMapCap)
+{
+    ma_end_clipped_decoder* clipped = (ma_end_clipped_decoder*)pDataSource;
+    return ma_decoder_get_data_format(&clipped->decoder,
+                                      pFormat,
+                                      pChannels,
+                                      pSampleRate,
+                                      pChannelMap,
+                                      channelMapCap);
+}
+
+static ma_result ma_end_clipped_decoder_get_cursor(ma_data_source* pDataSource, ma_uint64* pCursor)
+{
+    ma_end_clipped_decoder* clipped = (ma_end_clipped_decoder*)pDataSource;
+    *pCursor = clipped->frameCursor;
+    return MA_SUCCESS;
+}
+
+static ma_result ma_end_clipped_decoder_get_length(ma_data_source* pDataSource, ma_uint64* pLength)
+{
+    ma_end_clipped_decoder* clipped = (ma_end_clipped_decoder*)pDataSource;
+    return ma_decoder_get_length_in_pcm_frames(&clipped->decoder, pLength);
+}
+
+static ma_data_source_vtable g_ma_end_clipped_decoder_vtable = {
+    ma_end_clipped_decoder_read,
+    ma_end_clipped_decoder_seek,
+    ma_end_clipped_decoder_get_data_format,
+    ma_end_clipped_decoder_get_cursor,
+    ma_end_clipped_decoder_get_length};
+
 class AudioSound : public RefCnt<AudioSound>
 {
     friend class AudioEngine;
@@ -24,12 +110,12 @@
 
 private:
     AudioSound(AudioEngine* engine, rcp<AudioSource> source, Artboard* artboard);
-    ma_decoder* decoder() { return &m_decoder; }
+    ma_end_clipped_decoder* clippedDecoder() { return &m_decoder; }
     ma_audio_buffer* buffer() { return &m_buffer; }
     ma_sound* sound() { return &m_sound; }
     void dispose();
 
-    ma_decoder m_decoder;
+    ma_end_clipped_decoder m_decoder;
     ma_audio_buffer m_buffer;
     ma_sound m_sound;
     rcp<AudioSource> m_source;
diff --git a/src/audio/audio_engine.cpp b/src/audio/audio_engine.cpp
index 076733d..8f957e8 100644
--- a/src/audio/audio_engine.cpp
+++ b/src/audio/audio_engine.cpp
@@ -19,6 +19,7 @@
 #include "rive/audio/audio_source.hpp"
 
 #include <algorithm>
+#include <cmath>
 
 using namespace rive;
 
@@ -195,6 +196,12 @@
                                   uint64_t soundStartTime,
                                   Artboard* artboard)
 {
+    if (endTime != 0 && startTime >= endTime)
+    {
+        // Requested to stop sound before start.
+        return nullptr;
+    }
+
     std::unique_lock<std::mutex> lock(m_mutex);
     // We have to dispose completed sounds out of the completed callback. So we
     // do it on next play or at destruct.
@@ -208,12 +215,21 @@
     if (source->isBuffered())
     {
         rive::Span<float> samples = source->bufferedSamples();
-        ma_audio_buffer_config config =
-            ma_audio_buffer_config_init(ma_format_f32,
-                                        source->channels(),
-                                        samples.size() / source->channels(),
-                                        (const void*)samples.data(),
-                                        nullptr);
+        ma_uint64 sizeInFrames = samples.size() / source->channels();
+        if (endTime != 0)
+        {
+            float durationSeconds = (soundStartTime + endTime - startTime) / (float)sampleRate();
+            ma_uint64 clippedFrames = (ma_uint64)std::round(durationSeconds * source->sampleRate());
+            if (clippedFrames < sizeInFrames)
+            {
+                sizeInFrames = clippedFrames;
+            }
+        }
+        ma_audio_buffer_config config = ma_audio_buffer_config_init(ma_format_f32,
+                                                                    source->channels(),
+                                                                    sizeInFrames,
+                                                                    (const void*)samples.data(),
+                                                                    nullptr);
         if (ma_audio_buffer_init(&config, audioSound->buffer()) != MA_SUCCESS)
         {
             fprintf(stderr, "AudioSource::play - Failed to initialize audio buffer.\n");
@@ -230,18 +246,34 @@
     }
     else
     {
+        // We wrapped the miniaudio decoder with a custom data source "Clipped
+        // Decoder" which lets us ensure that the end callback for the sound is
+        // called when we reach the end of the clip. This won't happen when
+        // using ma_sound_set_stop_time_in_pcm_frames(audioSound->sound(),
+        // endTime); as this keeps the sound playing/ready to fade back in.
+        auto clip = audioSound->clippedDecoder();
         ma_decoder_config config = ma_decoder_config_init(ma_format_f32, channels(), sampleRate());
         auto sourceBytes = source->bytes();
         if (ma_decoder_init_memory(sourceBytes.data(),
                                    sourceBytes.size(),
                                    &config,
-                                   audioSound->decoder()) != MA_SUCCESS)
+                                   &clip->decoder) != MA_SUCCESS)
         {
             fprintf(stderr, "AudioSource::play - Failed to initialize decoder.\n");
             return nullptr;
         }
+        clip->frameCursor = 0;
+        clip->endFrame = endTime == 0 ? std::numeric_limits<uint64_t>::max()
+                                      : soundStartTime + endTime - startTime;
+        ma_data_source_config baseConfig = ma_data_source_config_init();
+        baseConfig.vtable = &g_ma_end_clipped_decoder_vtable;
+        if (ma_data_source_init(&baseConfig, &clip->base) != MA_SUCCESS)
+        {
+            return nullptr;
+        }
+
         if (ma_sound_init_from_data_source(m_engine,
-                                           &audioSound->m_decoder,
+                                           audioSound->clippedDecoder(),
                                            MA_SOUND_FLAG_NO_PITCH | MA_SOUND_FLAG_NO_SPATIALIZATION,
                                            nullptr,
                                            audioSound->sound()) != MA_SUCCESS)
@@ -261,10 +293,6 @@
     {
         ma_sound_set_start_time_in_pcm_frames(audioSound->sound(), startTime);
     }
-    if (endTime != 0)
-    {
-        ma_sound_set_stop_time_in_pcm_frames(audioSound->sound(), endTime);
-    }
 #ifdef WITH_RIVE_AUDIO_TOOLS
     if (m_levelMonitor != nullptr)
     {
diff --git a/src/audio/audio_sound.cpp b/src/audio/audio_sound.cpp
index 6bc227d..052d178 100644
--- a/src/audio/audio_sound.cpp
+++ b/src/audio/audio_sound.cpp
@@ -24,7 +24,7 @@
     }
     m_isDisposed = true;
     ma_sound_uninit(&m_sound);
-    ma_decoder_uninit(&m_decoder);
+    ma_decoder_uninit(&m_decoder.decoder);
     ma_audio_buffer_uninit(&m_buffer);
 }