Add gpu compressed decoders (#12237) 195d4fcb25
* feat(renderer): GPU compressed texture support (BC7 / ASTC / ETC2)

- Software decoders for BC, ASTC, ETC2 in the decoders library.
- KTX2 reader extended to parse BC7, ASTC LDR (all footprints), ETC2 RGBA8.
- Ktx2HwSupport + per-backend cap flags; DecodeKtx2 falls back to SW
  decode + tight RGBA mip chain when HW lacks the format.
- Native compressed upload in Vulkan, GL, Metal, WebGPU
  (D3D11/D3D12 already supported BC7); per-mip uploads, no auto-mipgen
  for compressed.
- Texture compressor premultiplies alpha before BC7 encode so output
  matches the runtime PNG path.
- Android CMakeLists exposes decoders include directory.

* Android break

* fix(android): always expose decoders/include for renderer sources

* fix(decoders): include <cstdio> for stderr in texture_decoder.cpp

* Clang format

* Update premake5_pls_renderer.lua

* Update render_context_gl_impl.cpp

* Update render_context_vulkan_impl.cpp

* Fix null renderer

* Update decode_ktx2.hpp

* Update astc_footprints.hpp

* Copyright

* Update texture_decoder.cpp

* Comments

* Comments

* Update to ETC

* Update decode_bc_texture.cpp

* Update decode_bc_texture.cpp

* More comments

* Comments

* Bug fix on block sizes

* Clang format

* Fix ASTC

* Fix android

* Update UE and Other platform

* Update render_context_vulkan_impl.cpp

* Josh Comments

Co-authored-by: John White <aliasbinman@gmail.com>
diff --git a/.rive_head b/.rive_head
index ca9f990..a9f10f6 100644
--- a/.rive_head
+++ b/.rive_head
@@ -1 +1 @@
-fe77b2ec3dd8ea1f11cb8b54f950c33ed0b87dc6
+195d4fcb25427e309307e07670afb060c6886fec
diff --git a/decoders/include/rive/decoders/astc_footprints.hpp b/decoders/include/rive/decoders/astc_footprints.hpp
new file mode 100644
index 0000000..4309e77
--- /dev/null
+++ b/decoders/include/rive/decoders/astc_footprints.hpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2026 Rive
+ */
+
+#ifndef _RIVE_ASTC_FOOTPRINTS_HPP_
+#define _RIVE_ASTC_FOOTPRINTS_HPP_
+
+#include <cstdint>
+
+namespace rive
+{
+
+// LDR ASTC block footprints in canonical (Vulkan / KHR_ldr) spec order. The
+// index into this table also indexes the corresponding GPU enums:
+//   VkFormat (UNORM) = VK_FORMAT_ASTC_4x4_UNORM_BLOCK (157) + 2 * idx
+//   VkFormat (SRGB)  = UNORM + 1
+//   GL enum  (UNORM) = 0x93B0 + idx
+//   GL enum  (SRGB)  = 0x93D0 + idx
+struct AstcFootprint
+{
+    uint8_t width;
+    uint8_t height;
+};
+
+constexpr AstcFootprint AstcFootprints[] = {
+    {4, 4},
+    {5, 4},
+    {5, 5},
+    {6, 5},
+    {6, 6},
+    {8, 5},
+    {8, 6},
+    {8, 8},
+    {10, 5},
+    {10, 6},
+    {10, 8},
+    {10, 10},
+    {12, 10},
+    {12, 12},
+};
+constexpr int AstcFootprintCount =
+    sizeof(AstcFootprints) / sizeof(AstcFootprints[0]);
+
+// Returns -1 if (blockWidth, blockHeight) is not a recognised LDR ASTC
+// footprint.
+inline int astcFootprintIndex(uint8_t blockWidth, uint8_t blockHeight)
+{
+    for (int i = 0; i < AstcFootprintCount; ++i)
+    {
+        if (AstcFootprints[i].width == blockWidth &&
+            AstcFootprints[i].height == blockHeight)
+        {
+            return i;
+        }
+    }
+    return -1;
+}
+
+} // namespace rive
+
+#endif
diff --git a/decoders/include/rive/decoders/texture_decoder.hpp b/decoders/include/rive/decoders/texture_decoder.hpp
new file mode 100644
index 0000000..e8ca4a4
--- /dev/null
+++ b/decoders/include/rive/decoders/texture_decoder.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2026 Rive
+ */
+
+#ifndef _RIVE_TEXTURE_DECODER_HPP_
+#define _RIVE_TEXTURE_DECODER_HPP_
+
+#include "rive/decoders/bitmap_decoder.hpp"
+#include "rive/gpu_texture_format.hpp"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+
+// Decode one mip level of block-compressed texture data to an RGBA Bitmap.
+// `blocks` points at the start of the level's block grid; `byteCount` is its
+// size in bytes. `width` / `height` are the level's logical pixel dimensions.
+// `blockWidth` / `blockHeight` default to 4 (BC/ETC); ASTC callers pass the
+// block footprint from the format.
+//
+// Returns nullptr if the format's software decoder was not compiled in or
+// decoding fails.
+//
+// Build flags that enable each family:
+//   RIVE_ASTC_DECODER  --  astc (any block size)
+//   RIVE_BC_DECODER    --  bc1 / bc2 / bc3 / bc7
+//   RIVE_ETC_DECODER   --  etc2 (RGB8 and RGBA8)
+std::unique_ptr<Bitmap> decode_texture(const uint8_t* blocks,
+                                       size_t byteCount,
+                                       uint32_t width,
+                                       uint32_t height,
+                                       rive::GPUTextureFormat format,
+                                       uint32_t blockWidth = 1,
+                                       uint32_t blockHeight = 1);
+
+#endif
diff --git a/decoders/premake5_v2.lua b/decoders/premake5_v2.lua
index 4b3043d..3753a5f 100644
--- a/decoders/premake5_v2.lua
+++ b/decoders/premake5_v2.lua
@@ -26,6 +26,21 @@
     description = 'don\'t build KTX2 container parsing into the rive_decoders library',
 })
 
+newoption({
+    trigger = 'with_rive_astc_decoder',
+    description = 'build ASTC software decoder into the rive_decoders library (requires astcenc)',
+})
+
+newoption({
+    trigger = 'with_rive_bc_decoder',
+    description = 'build BCn software decoder into the rive_decoders library (requires bc7enc_rdo)',
+})
+
+newoption({
+    trigger = 'with_rive_etc_decoder',
+    description = 'build ETC2 software decoder into the rive_decoders library (requires Ericsson ETCPACK)',
+})
+
 if not _OPTIONS["no_rive_png"] then
     dofile(rive .. '/dependencies/premake5_libpng_v2.lua')
 end
@@ -40,6 +55,19 @@
     libwebp = ''
 end
 
+local dependency = require('dependency')
+
+if _OPTIONS["with_rive_astc_decoder"] then
+    astcenc = dependency.github('ARM-software/astc-encoder', '4.7.0')
+end
+
+if _OPTIONS["with_rive_bc_decoder"] then
+    bc7enc = dependency.github('richgel999/bc7enc_rdo', 'master')
+end
+
+if _OPTIONS["with_rive_etc_decoder"] then
+    etcpack = dependency.github('Ericsson/ETCPACK', 'master')
+end
 
 project('rive_decoders')
 do
@@ -110,10 +138,97 @@
         files({ 'src/decode_webp.cpp' })
     end
 
+
     filter({ 'options:not no_rive_ktx2' })
     do
         defines({ 'RIVE_KTX2' })
         files({ 'src/decode_ktx2.cpp' })
+	end
+	
+    -- Always include the texture decoder dispatcher; it compiles cleanly with
+    -- no decoder flags set (all paths return nullptr with a log message).
+    filter({})
+    do
+        files({ 'src/texture_decoder.cpp' })
+    end
+
+    if _OPTIONS["with_rive_astc_decoder"] then
+        filter({ 'options:with_rive_astc_decoder' })
+        do
+            includedirs({ astcenc .. '/Source' })
+            defines({
+                'RIVE_ASTC_DECODER',
+                'ASTCENC_SSE=0',
+                'ASTCENC_POPCNT=0',
+                'ASTCENC_F16C=0',
+                'ASTCENC_AVX=0',
+                'ASTCENC_NEON=0',
+            })
+            files({
+                'src/decode_astc_texture.cpp',
+                astcenc .. '/Source/astcenc_*.cpp',
+            })
+            buildoptions({
+                '-Wno-sign-conversion',
+                '-Wno-implicit-int-float-conversion',
+                '-Wno-float-conversion',
+                '-Wno-shorten-64-to-32',
+                '-Wno-unused-variable',
+                '-Wno-unused-function',
+                '-Wno-shadow',
+                '-Wno-missing-field-initializers',
+            })
+        end
+    end
+
+    if _OPTIONS["with_rive_bc_decoder"] then
+        filter({ 'options:with_rive_bc_decoder' })
+        do
+            includedirs({ bc7enc })
+            defines({ 'RIVE_BC_DECODER' })
+            files({
+                'src/decode_bc_texture.cpp',
+                bc7enc .. '/bc7decomp.cpp',
+                bc7enc .. '/bc7decomp_ref.cpp',
+                bc7enc .. '/rgbcx.cpp',
+            })
+            buildoptions({
+                '-Wno-sign-conversion',
+                '-Wno-implicit-int-float-conversion',
+                '-Wno-float-conversion',
+                '-Wno-shorten-64-to-32',
+                '-Wno-unused-variable',
+                '-Wno-unused-function',
+                '-Wno-unused-const-variable',
+                '-Wno-shadow',
+                '-Wno-missing-field-initializers',
+            })
+        end
+    end
+
+    if _OPTIONS["with_rive_etc_decoder"] then
+        filter({ 'options:with_rive_etc_decoder' })
+        do
+            defines({ 'RIVE_ETC_DECODER' })
+            files({
+                'src/decode_etc_texture.cpp',
+                etcpack .. '/source/etcdec.cxx',
+            })
+            buildoptions({
+                '-Wno-sign-conversion',
+                '-Wno-implicit-int-float-conversion',
+                '-Wno-float-conversion',
+                '-Wno-shorten-64-to-32',
+                '-Wno-unused-variable',
+                '-Wno-unused-function',
+                '-Wno-unused-but-set-variable',
+                '-Wno-shadow',
+                '-Wno-missing-field-initializers',
+                '-Wno-old-style-cast',
+                '-Wno-parentheses',
+                '-Wno-sign-compare',
+            })
+        end
     end
 
 end
diff --git a/decoders/src/decode_astc_texture.cpp b/decoders/src/decode_astc_texture.cpp
new file mode 100644
index 0000000..380740e
--- /dev/null
+++ b/decoders/src/decode_astc_texture.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2026 Rive
+ */
+
+#include "rive/decoders/bitmap_decoder.hpp"
+#include "astcenc.h"
+
+#include <cstdio>
+#include <memory>
+#include <vector>
+
+std::unique_ptr<Bitmap> decode_astc_texture(const uint8_t* blocks,
+                                            size_t byteCount,
+                                            uint32_t width,
+                                            uint32_t height,
+                                            uint32_t blockWidth,
+                                            uint32_t blockHeight)
+{
+    astcenc_config config;
+    astcenc_error err = astcenc_config_init(ASTCENC_PRF_LDR_SRGB,
+                                            blockWidth,
+                                            blockHeight,
+                                            1,
+                                            ASTCENC_PRE_MEDIUM,
+                                            ASTCENC_FLG_DECOMPRESS_ONLY,
+                                            &config);
+    if (err != ASTCENC_SUCCESS)
+    {
+        fprintf(stderr,
+                "DecodeAstcTexture - astcenc_config_init failed: %s\n",
+                astcenc_get_error_string(err));
+        return nullptr;
+    }
+
+    astcenc_context* ctx = nullptr;
+    err = astcenc_context_alloc(&config, 1, &ctx);
+    if (err != ASTCENC_SUCCESS)
+    {
+        fprintf(stderr,
+                "DecodeAstcTexture - astcenc_context_alloc failed: %s\n",
+                astcenc_get_error_string(err));
+        return nullptr;
+    }
+
+    const size_t pixelCount = static_cast<size_t>(width) * height;
+    auto pixels = std::make_unique<uint8_t[]>(pixelCount * 4);
+
+    void* slicePtr = pixels.get();
+    astcenc_image outImage;
+    outImage.dim_x = width;
+    outImage.dim_y = height;
+    outImage.dim_z = 1;
+    outImage.data_type = ASTCENC_TYPE_U8;
+    outImage.data = &slicePtr;
+
+    const astcenc_swizzle swizzle = {ASTCENC_SWZ_R,
+                                     ASTCENC_SWZ_G,
+                                     ASTCENC_SWZ_B,
+                                     ASTCENC_SWZ_A};
+
+    err = astcenc_decompress_image(ctx,
+                                   blocks,
+                                   byteCount,
+                                   &outImage,
+                                   &swizzle,
+                                   0);
+    astcenc_context_free(ctx);
+
+    if (err != ASTCENC_SUCCESS)
+    {
+        fprintf(stderr,
+                "DecodeAstcTexture - astcenc_decompress_image failed: %s\n",
+                astcenc_get_error_string(err));
+        return nullptr;
+    }
+
+    const size_t numBytes = static_cast<size_t>(width) * height * 4;
+    return std::make_unique<Bitmap>(width,
+                                    height,
+                                    numBytes,
+                                    Bitmap::PixelFormat::RGBA,
+                                    std::move(pixels));
+}
diff --git a/decoders/src/decode_bc_texture.cpp b/decoders/src/decode_bc_texture.cpp
new file mode 100644
index 0000000..4ba38a4
--- /dev/null
+++ b/decoders/src/decode_bc_texture.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2026 Rive
+ */
+
+#include "rive/decoders/bitmap_decoder.hpp"
+#include "rive/gpu_texture_format.hpp"
+
+// bc7decomp provides BC7 software decompression.
+// rgbcx provides BC1/BC2/BC3 software decompression.
+#include "bc7decomp.h"
+#include "rgbcx.h"
+
+#include <algorithm>
+#include <cstdio>
+#include <cstring>
+#include <memory>
+
+std::unique_ptr<Bitmap> decode_bc_texture(const uint8_t* blocks,
+                                          size_t /*byteCount*/,
+                                          uint32_t width,
+                                          uint32_t height,
+                                          rive::GPUTextureFormat format)
+{
+    // All BCn formats use 4x4 pixel blocks.
+    const uint32_t blocksX = (width + 3) / 4;
+    const uint32_t blocksY = (height + 3) / 4;
+
+    const size_t pixelCount = static_cast<size_t>(width) * height;
+    auto pixels = std::make_unique<uint8_t[]>(pixelCount * 4);
+    memset(pixels.get(), 0, pixelCount * 4);
+
+    const uint8_t* src = blocks;
+
+    for (uint32_t by = 0; by < blocksY; by++)
+    {
+        for (uint32_t bx = 0; bx < blocksX; bx++)
+        {
+            // Decode one 4x4 block into a temporary 16-pixel RGBA buffer.
+            // uint32_t storage gives the bc7decomp/rgbcx union casts proper
+            // alignment (required on ARM) and lets the copy below move one
+            // pixel per assignment.
+            uint32_t blockPixels[16] = {};
+
+            switch (format)
+            {
+                case rive::GPUTextureFormat::bc7:
+                    bc7decomp::unpack_bc7(
+                        src,
+                        reinterpret_cast<bc7decomp::color_rgba*>(blockPixels));
+                    src += 16;
+                    break;
+
+                case rive::GPUTextureFormat::bc1:
+                    rgbcx::unpack_bc1(
+                        src,
+                        reinterpret_cast<rgbcx::color32*>(blockPixels),
+                        true);
+                    src += 8;
+                    break;
+
+                case rive::GPUTextureFormat::bc3:
+                    rgbcx::unpack_bc3(
+                        src,
+                        reinterpret_cast<rgbcx::color32*>(blockPixels));
+                    src += 16;
+                    break;
+
+                default:
+                    fprintf(stderr,
+                            "DecodeBcTexture - unsupported BC format %u\n",
+                            static_cast<unsigned>(format));
+                    return nullptr;
+            }
+
+            // Copy decoded pixels into the output image. The last block
+            // row/column may extend past the image edge — clamp via a
+            // precomputed pixel count so each loop has a single exit.
+            uint32_t* dst32 = reinterpret_cast<uint32_t*>(pixels.get());
+            const uint32_t copyW = std::min<uint32_t>(4u, width - bx * 4);
+            const uint32_t copyH = std::min<uint32_t>(4u, height - by * 4);
+            for (uint32_t py = 0; py < copyH; py++)
+            {
+                const uint32_t dstY = by * 4 + py;
+                for (uint32_t px = 0; px < copyW; px++)
+                {
+                    const uint32_t dstX = bx * 4 + px;
+                    dst32[dstY * width + dstX] = blockPixels[py * 4 + px];
+                }
+            }
+        }
+    }
+
+    const size_t numBytes = static_cast<size_t>(width) * height * 4;
+    return std::make_unique<Bitmap>(width,
+                                    height,
+                                    numBytes,
+                                    Bitmap::PixelFormat::RGBA,
+                                    std::move(pixels));
+}
diff --git a/decoders/src/decode_etc_texture.cpp b/decoders/src/decode_etc_texture.cpp
new file mode 100644
index 0000000..567f76a
--- /dev/null
+++ b/decoders/src/decode_etc_texture.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2026 Rive
+ */
+
+#include "rive/decoders/bitmap_decoder.hpp"
+#include "rive/gpu_texture_format.hpp"
+
+#include <cstdio>
+#include <cstring>
+#include <memory>
+
+// Forward declarations for Ericsson ETCPACK's etcdec.cxx (built as part of
+// rive_decoders when RIVE_ETC_DECODER is set). Each entrypoint writes one 4x4
+// block into a planar RGBA buffer at byte offsets +0/+1/+2 (RGB) and +3 (A).
+extern void decompressBlockETC2c(unsigned int block_part1,
+                                 unsigned int block_part2,
+                                 unsigned char* img,
+                                 int width,
+                                 int height,
+                                 int startx,
+                                 int starty,
+                                 int channels);
+extern void decompressBlockAlphaC(unsigned char* data,
+                                  unsigned char* img,
+                                  int width,
+                                  int height,
+                                  int ix,
+                                  int iy,
+                                  int channels);
+
+namespace
+{
+unsigned int readBE32(const uint8_t* p)
+{
+    return (static_cast<unsigned int>(p[0]) << 24) |
+           (static_cast<unsigned int>(p[1]) << 16) |
+           (static_cast<unsigned int>(p[2]) << 8) |
+           static_cast<unsigned int>(p[3]);
+}
+} // namespace
+
+// Decodes ETC2 RGBA8 (16 bytes/block: 8 bytes EAC alpha + 8 bytes ETC2 RGB).
+std::unique_ptr<Bitmap> decode_etc_texture(const uint8_t* blocks,
+                                           size_t byteCount,
+                                           uint32_t width,
+                                           uint32_t height,
+                                           rive::GPUTextureFormat format)
+{
+    if (format != rive::GPUTextureFormat::etc2 || width == 0 || height == 0)
+    {
+        return nullptr;
+    }
+
+    const uint32_t paddedW = (width + 3u) & ~3u;
+    const uint32_t paddedH = (height + 3u) & ~3u;
+    const uint32_t blocksX = paddedW / 4u;
+    const uint32_t blocksY = paddedH / 4u;
+    const size_t expectedBytes = static_cast<size_t>(blocksX) * blocksY * 16u;
+    if (byteCount != expectedBytes)
+    {
+        fprintf(stderr,
+                "DecodeEtcTexture - byteCount %zu != expected %zu for %ux%u\n",
+                byteCount,
+                expectedBytes,
+                width,
+                height);
+        return nullptr;
+    }
+
+    const size_t paddedPixels =
+        static_cast<size_t>(paddedW) * static_cast<size_t>(paddedH);
+    auto padded = std::make_unique<uint8_t[]>(paddedPixels * 4);
+
+    const uint8_t* src = blocks;
+    for (uint32_t by = 0; by < blocksY; ++by)
+    {
+        for (uint32_t bx = 0; bx < blocksX; ++bx)
+        {
+            const int startX = static_cast<int>(bx * 4u);
+            const int startY = static_cast<int>(by * 4u);
+            decompressBlockAlphaC(const_cast<uint8_t*>(src),
+                                  padded.get() + 3,
+                                  static_cast<int>(paddedW),
+                                  static_cast<int>(paddedH),
+                                  startX,
+                                  startY,
+                                  4);
+            const unsigned int p1 = readBE32(src + 8);
+            const unsigned int p2 = readBE32(src + 12);
+            decompressBlockETC2c(p1,
+                                 p2,
+                                 padded.get(),
+                                 static_cast<int>(paddedW),
+                                 static_cast<int>(paddedH),
+                                 startX,
+                                 startY,
+                                 4);
+            src += 16;
+        }
+    }
+
+    // Crop padded RGBA grid down to (width, height).
+    const size_t outPixels =
+        static_cast<size_t>(width) * static_cast<size_t>(height);
+    auto pixels = std::make_unique<uint8_t[]>(outPixels * 4);
+    for (uint32_t y = 0; y < height; ++y)
+    {
+        std::memcpy(pixels.get() + static_cast<size_t>(y) * width * 4,
+                    padded.get() + static_cast<size_t>(y) * paddedW * 4,
+                    static_cast<size_t>(width) * 4);
+    }
+
+    return std::make_unique<Bitmap>(width,
+                                    height,
+                                    outPixels * 4,
+                                    Bitmap::PixelFormat::RGBA,
+                                    std::move(pixels));
+}
diff --git a/decoders/src/decode_ktx2.cpp b/decoders/src/decode_ktx2.cpp
index 9154ab6..135595e 100644
--- a/decoders/src/decode_ktx2.cpp
+++ b/decoders/src/decode_ktx2.cpp
@@ -13,6 +13,8 @@
 // Spec: https://registry.khronos.org/KTX/specs/2.0/ktxspec.v2.html
 
 #include "rive/decoders/decode_ktx2.hpp"
+#include "rive/decoders/astc_footprints.hpp"
+#include "rive/decoders/texture_decoder.hpp"
 
 #include <algorithm>
 #include <cstdio>
@@ -22,7 +24,7 @@
 {
 namespace
 {
-constexpr uint8_t kKtx2Identifier[12] = {
+constexpr uint8_t Ktx2Identifier[12] = {
     0xAB,
     0x4B,
     0x54,
@@ -40,7 +42,20 @@
 constexpr uint32_t VK_FORMAT_BC7_UNORM_BLOCK = 145;
 constexpr uint32_t VK_FORMAT_BC7_SRGB_BLOCK = 146;
 
-constexpr uint32_t kSupercompressionNone = 0;
+// We only ship ETC2 RGBA8 (151 UNORM / 152 SRGB). The RGB8 (147/148) and
+// RGB-with-1-bit-alpha (149/150) variants are valid vkFormats but the rive
+// runtime always wants a 4-channel image; encoders should produce RGBA8.
+constexpr uint32_t VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK = 151;
+constexpr uint32_t VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK = 152;
+
+// ASTC LDR (VK_FORMAT_ASTC_<X>x<Y>_UNORM_BLOCK = 157,159,... and the SRGB
+// variant is the next value).
+constexpr uint32_t VK_FORMAT_ASTC_4x4_UNORM_BLOCK = 157;
+constexpr uint32_t VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184;
+
+// ASTC block footprints come from <rive/decoders/astc_footprints.hpp>.
+
+constexpr uint32_t SupercompressionNone = 0;
 
 #pragma pack(push, 1)
 struct Ktx2Header
@@ -73,46 +88,83 @@
               "KTX2 level index entry must be 24 bytes");
 
 // Defensive caps.
-constexpr uint32_t kMaxDimension = 16384;
-constexpr uint32_t kMaxLevels = 16;
+constexpr uint32_t MaxDimension = 16384;
+constexpr uint32_t MaxLevels = 16;
 
-constexpr uint32_t kBc7BlockBytes = 16;
-
-// Expected block-grid byte length for a BC7 mip level at the given logical
-// pixel dimensions. BC7 = 4x4 blocks, 16 bytes/block.
-inline uint64_t expectedBc7Bytes(uint32_t pixelWidth, uint32_t pixelHeight)
+// BC7 and ASTC LDR are 16 bytes/block. ETC2 RGB8 is 8.
+inline uint64_t expectedBlockBytes(uint32_t pixelWidth,
+                                   uint32_t pixelHeight,
+                                   uint32_t blockWidth,
+                                   uint32_t blockHeight,
+                                   uint32_t bytesPerBlock)
 {
-    const uint64_t blocksX = (pixelWidth + 3u) / 4u;
-    const uint64_t blocksY = (pixelHeight + 3u) / 4u;
-    return blocksX * blocksY * kBc7BlockBytes;
+    const uint64_t blocksX = (pixelWidth + blockWidth - 1u) / blockWidth;
+    const uint64_t blocksY = (pixelHeight + blockHeight - 1u) / blockHeight;
+    return blocksX * blocksY * bytesPerBlock;
 }
 } // namespace
 
-bool DecodeKtx2(const uint8_t* bytes, size_t byteCount, Ktx2DecodeResult& out)
+bool DecodeKtx2(const uint8_t* bytes,
+                size_t byteCount,
+                Ktx2DecodeResult& out,
+                const Ktx2HwSupport& hwSupport)
 {
-    if (byteCount < sizeof(kKtx2Identifier) + sizeof(Ktx2Header))
+    if (byteCount < sizeof(Ktx2Identifier) + sizeof(Ktx2Header))
     {
         std::fprintf(stderr, "DecodeKtx2: file too small\n");
         return false;
     }
-    if (std::memcmp(bytes, kKtx2Identifier, sizeof(kKtx2Identifier)) != 0)
+    if (std::memcmp(bytes, Ktx2Identifier, sizeof(Ktx2Identifier)) != 0)
     {
         std::fprintf(stderr, "DecodeKtx2: bad magic\n");
         return false;
     }
 
     Ktx2Header header;
-    std::memcpy(&header, bytes + sizeof(kKtx2Identifier), sizeof(header));
+    std::memcpy(&header, bytes + sizeof(Ktx2Identifier), sizeof(header));
 
-    if (header.vkFormat != VK_FORMAT_BC7_UNORM_BLOCK &&
-        header.vkFormat != VK_FORMAT_BC7_SRGB_BLOCK)
+    // Map vkFormat → (GPUTextureFormat, blockWidth, blockHeight,
+    // bytesPerBlock, srgb).
+    GPUTextureFormat outFormat;
+    uint8_t blockWidth = 4;
+    uint8_t blockHeight = 4;
+    uint32_t bytesPerBlock = 16;
+    bool srgb = false;
+    if (header.vkFormat == VK_FORMAT_BC7_UNORM_BLOCK ||
+        header.vkFormat == VK_FORMAT_BC7_SRGB_BLOCK)
+    {
+        outFormat = GPUTextureFormat::bc7;
+        srgb = (header.vkFormat == VK_FORMAT_BC7_SRGB_BLOCK);
+    }
+
+    else if (header.vkFormat == VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK ||
+             header.vkFormat == VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK)
+    {
+        outFormat = GPUTextureFormat::etc2;
+        // ETC2 RGBA8 = 8 bytes EAC alpha + 8 bytes ETC2 RGB = 16 per block.
+        bytesPerBlock = 16;
+        srgb = (header.vkFormat == VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK);
+    }
+
+    else if (header.vkFormat >= VK_FORMAT_ASTC_4x4_UNORM_BLOCK &&
+             header.vkFormat <= VK_FORMAT_ASTC_12x12_SRGB_BLOCK)
+    {
+        const uint32_t idx =
+            (header.vkFormat - VK_FORMAT_ASTC_4x4_UNORM_BLOCK) / 2u;
+        outFormat = GPUTextureFormat::astc;
+
+        blockWidth = AstcFootprints[idx].width;
+        blockHeight = AstcFootprints[idx].height;
+        srgb = (header.vkFormat % 2u) == 0u; // SRGB at UNORM+1.
+    }
+    else
     {
         std::fprintf(stderr,
-                     "DecodeKtx2: unsupported vkFormat %u (only BC7 wired)\n",
+                     "DecodeKtx2: unsupported vkFormat %u\n",
                      header.vkFormat);
         return false;
     }
-    if (header.supercompressionScheme != kSupercompressionNone)
+    if (header.supercompressionScheme != SupercompressionNone)
     {
         std::fprintf(stderr,
                      "DecodeKtx2: supercompressionScheme %u not supported\n",
@@ -128,8 +180,8 @@
             header.layerCount);
         return false;
     }
-    if (header.pixelWidth == 0 || header.pixelWidth > kMaxDimension ||
-        header.pixelHeight == 0 || header.pixelHeight > kMaxDimension)
+    if (header.pixelWidth == 0 || header.pixelWidth > MaxDimension ||
+        header.pixelHeight == 0 || header.pixelHeight > MaxDimension)
     {
         std::fprintf(stderr,
                      "DecodeKtx2: dimensions out of range (%ux%u)\n",
@@ -139,17 +191,16 @@
     }
 
     const uint32_t levelCount = header.levelCount == 0 ? 1u : header.levelCount;
-    if (levelCount > kMaxLevels)
+    if (levelCount > MaxLevels)
     {
         std::fprintf(stderr,
                      "DecodeKtx2: levelCount %u exceeds cap %u\n",
                      levelCount,
-                     kMaxLevels);
+                     MaxLevels);
         return false;
     }
 
-    const size_t levelIndexOffset =
-        sizeof(kKtx2Identifier) + sizeof(Ktx2Header);
+    const size_t levelIndexOffset = sizeof(Ktx2Identifier) + sizeof(Ktx2Header);
     const size_t levelIndexBytes =
         static_cast<size_t>(levelCount) * sizeof(Ktx2LevelIndex);
     if (byteCount < levelIndexOffset + levelIndexBytes)
@@ -169,7 +220,11 @@
         const Ktx2LevelIndex& e = entries[i];
         const uint32_t logW = std::max<uint32_t>(1u, header.pixelWidth >> i);
         const uint32_t logH = std::max<uint32_t>(1u, header.pixelHeight >> i);
-        const uint64_t expected = expectedBc7Bytes(logW, logH);
+        const uint64_t expected = expectedBlockBytes(logW,
+                                                     logH,
+                                                     blockWidth,
+                                                     blockHeight,
+                                                     bytesPerBlock);
         if (e.byteLength != expected)
         {
             std::fprintf(
@@ -192,12 +247,14 @@
     }
 
     // Concatenate level 0 .. N-1 into one contiguous buffer (largest first).
-    out.format = header.vkFormat == VK_FORMAT_BC7_SRGB_BLOCK
-                     ? GPUTextureFormat::bc7
-                     : GPUTextureFormat::bc7;
+    out.format = outFormat;
     out.pixelWidth = header.pixelWidth;
     out.pixelHeight = header.pixelHeight;
     out.levelCount = levelCount;
+    out.blockWidth = blockWidth;
+    out.blockHeight = blockHeight;
+    out.srgb = srgb;
+    out.softwareDecoded = false;
     out.blocks.resize(static_cast<size_t>(totalBytes));
     size_t writeOffset = 0;
     for (uint32_t i = 0; i < levelCount; ++i)
@@ -209,6 +266,88 @@
         writeOffset += static_cast<size_t>(e.byteLength);
     }
 
+    // HW-cap fallback: if the backend can't sample this format directly,
+    // software-decode mip 0 to RGBA8 in place. Caller uploads as rgba32.
+    bool needFallback = false;
+    switch (out.format)
+    {
+        case GPUTextureFormat::bc1:
+        case GPUTextureFormat::bc2:
+        case GPUTextureFormat::bc3:
+        case GPUTextureFormat::bc7:
+            needFallback = !hwSupport.bc;
+            break;
+        case GPUTextureFormat::astc:
+            needFallback = !hwSupport.astc;
+            break;
+        case GPUTextureFormat::etc2:
+            needFallback = !hwSupport.etc2;
+            break;
+        default:
+            break;
+    }
+    if (needFallback)
+    {
+        // Decode every level. Source layout: level 0 first, levels tight
+        // (matches how we just wrote `out.blocks`). Output layout: each
+        // level's logical width * height * 4 bytes, also tight.
+        //
+        // We allocate the decoded chain into a temp buffer first so the
+        // original block bytes remain valid for the per-level decode calls.
+        std::vector<uint8_t> decoded;
+        size_t totalRgba = 0;
+        for (uint32_t i = 0; i < levelCount; ++i)
+        {
+
+            const uint32_t logW = std::max<uint32_t>(1u, out.pixelWidth >> i);
+            const uint32_t logH = std::max<uint32_t>(1u, out.pixelHeight >> i);
+
+            totalRgba += static_cast<size_t>(logW) * logH * 4;
+        }
+        decoded.reserve(totalRgba);
+
+        size_t srcOffset = 0;
+        for (uint32_t i = 0; i < levelCount; ++i)
+        {
+
+            const uint32_t logW = std::max<uint32_t>(1u, out.pixelWidth >> i);
+            const uint32_t logH = std::max<uint32_t>(1u, out.pixelHeight >> i);
+            const size_t levelBytes =
+                static_cast<size_t>(entries[i].byteLength);
+            auto bmp = decode_texture(out.blocks.data() + srcOffset,
+                                      levelBytes,
+                                      logW,
+                                      logH,
+                                      out.format,
+                                      out.blockWidth,
+                                      out.blockHeight);
+            if (!bmp)
+            {
+                std::fprintf(stderr,
+                             "DecodeKtx2: HW lacks support for format %u "
+                             "and software decoder unavailable (level %u)\n",
+                             static_cast<unsigned>(out.format),
+                             i);
+                return false;
+            }
+            // Match the PNG runtime path: premultiplied texels.
+            bmp->pixelFormat(Bitmap::PixelFormat::RGBAPremul);
+            decoded.insert(decoded.end(),
+                           bmp->bytes(),
+                           bmp->bytes() + bmp->numBytes());
+            srcOffset += levelBytes;
+        }
+
+        out.blocks = std::move(decoded);
+        out.format = GPUTextureFormat::rgba32;
+
+        out.blockWidth = 1;
+        out.blockHeight = 1;
+        out.srgb = false;
+        out.softwareDecoded = true;
+        // `out.levelCount` already matches the KTX2 level count.
+    }
+
     return true;
 }
 
diff --git a/decoders/src/texture_decoder.cpp b/decoders/src/texture_decoder.cpp
new file mode 100644
index 0000000..f6dcfe3
--- /dev/null
+++ b/decoders/src/texture_decoder.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2026 Rive
+ */
+
+#include "rive/decoders/texture_decoder.hpp"
+
+#include <cstdio>
+
+#ifdef RIVE_ASTC_DECODER
+std::unique_ptr<Bitmap> decode_astc_texture(const uint8_t* blocks,
+                                            size_t byteCount,
+                                            uint32_t width,
+                                            uint32_t height,
+                                            uint32_t blockWidth,
+                                            uint32_t blockHeight);
+#endif
+
+#ifdef RIVE_BC_DECODER
+std::unique_ptr<Bitmap> decode_bc_texture(const uint8_t* blocks,
+                                          size_t byteCount,
+                                          uint32_t width,
+                                          uint32_t height,
+                                          rive::GPUTextureFormat format);
+#endif
+
+#ifdef RIVE_ETC_DECODER
+std::unique_ptr<Bitmap> decode_etc_texture(const uint8_t* blocks,
+                                           size_t byteCount,
+                                           uint32_t width,
+                                           uint32_t height,
+                                           rive::GPUTextureFormat format);
+#endif
+
+// Body branches reference these params via #ifdef; mark as maybe-unused so
+// no-decoder-compiled-in builds don't warn.
+std::unique_ptr<Bitmap> decode_texture([[maybe_unused]] const uint8_t* blocks,
+                                       [[maybe_unused]] size_t byteCount,
+                                       [[maybe_unused]] uint32_t width,
+                                       [[maybe_unused]] uint32_t height,
+                                       rive::GPUTextureFormat format,
+                                       [[maybe_unused]] uint32_t blockWidth,
+                                       [[maybe_unused]] uint32_t blockHeight)
+{
+
+    switch (format)
+    {
+        case rive::GPUTextureFormat::astc:
+#ifdef RIVE_ASTC_DECODER
+            return decode_astc_texture(blocks,
+                                       byteCount,
+                                       width,
+                                       height,
+                                       blockWidth,
+                                       blockHeight);
+#else
+            fprintf(stderr,
+                    "ASTC texture not supported "
+                    "(build with --with_rive_astc_decoder)\n");
+            return nullptr;
+#endif
+
+        case rive::GPUTextureFormat::bc1:
+        case rive::GPUTextureFormat::bc2:
+        case rive::GPUTextureFormat::bc3:
+        case rive::GPUTextureFormat::bc7:
+#ifdef RIVE_BC_DECODER
+            return decode_bc_texture(blocks, byteCount, width, height, format);
+#else
+            fprintf(stderr,
+                "BC texture not supported "
+                "(build with --with_rive_bc_decoder)\n");
+            return nullptr;
+#endif
+
+        case rive::GPUTextureFormat::etc2:
+#ifdef RIVE_ETC_DECODER
+            return decode_etc_texture(blocks, byteCount, width, height, format);
+#else
+            fprintf(stderr,
+                "ETC texture not supported "
+                "(build with --with_rive_etc_decoder)\n");
+            return nullptr;
+#endif
+
+        default:
+            fprintf(stderr,
+                    "decode_texture - unsupported format %u\n",
+                    static_cast<unsigned>(format));
+            return nullptr;
+    }
+}
diff --git a/include/rive/decoders/decode_ktx2.hpp b/include/rive/decoders/decode_ktx2.hpp
index f33440d..61b2cc8 100644
--- a/include/rive/decoders/decode_ktx2.hpp
+++ b/include/rive/decoders/decode_ktx2.hpp
@@ -14,17 +14,42 @@
 namespace rive
 {
 
+// HW support flags for the formats KTX2 may contain. When a format is not
+// supported by the backend, DecodeKtx2 will software-decode mip 0 to RGBA8
+// (if the corresponding RIVE_*_DECODER family was compiled in) and store
+// the result in `blocks` with `format = rgba32`.
+// Defaults assume the caller's backend natively supports every GPU
+// compressed format we recognise. Callers that have actually queried HW
+// caps should set the relevant booleans to false to opt the parser into
+// the software-decode fallback. Tests + parser-only consumers can use the
+// defaults to keep the original "pass blocks through verbatim" behavior.
+struct Ktx2HwSupport
+{
+    bool bc = true;
+    bool astc = true;
+    bool etc2 = true;
+};
+
 // Result of parsing a KTX2 container. Block data is held in a contiguous
 // owned buffer, level 0 first (largest), then level 1, … level N-1
 // (smallest). Each level's region is exactly its block-grid size in bytes
 // (no inter-level padding).
+//
+// If `softwareDecoded` is true, the GPU format in the container was not
+// supported by the caller's backend and mip 0 was decoded to RGBA8. In that
+// case `format == rgba32`, `levelCount == 1`, and `blocks` holds tightly
+// packed RGBA8 pixels.
 struct Ktx2DecodeResult
 {
     GPUTextureFormat format;
-    uint32_t pixelWidth;  // logical mip 0 width
-    uint32_t pixelHeight; // logical mip 0 height
-    uint32_t levelCount;  // number of mip levels stored (>=1)
+    uint32_t pixelWidth;     // logical mip 0 width
+    uint32_t pixelHeight;    // logical mip 0 height
+    uint32_t levelCount;     // number of mip levels stored (>=1)
+    uint8_t blockWidth = 4;  // compressed block footprint width (1 for rgba32)
+    uint8_t blockHeight = 4; // compressed block footprint height (1 for rgba32)
+    bool srgb = false;       // sRGB colour space (BC7_SRGB / ASTC_SRGB)
     std::vector<uint8_t> blocks;
+    bool softwareDecoded = false;
 };
 
 // Parses a KTX2 container. Returns true on success and fills `out`. Returns
@@ -35,7 +60,13 @@
 //   - cubemaps / array layers
 //   - oversized dimensions or level count
 //   - level data outside the buffer
-bool DecodeKtx2(const uint8_t* bytes, size_t byteCount, Ktx2DecodeResult& out);
+//
+// `hwSupport` is consulted after parsing to decide whether to fall back to
+// CPU decompression. Pass all-true to skip the fallback path entirely.
+bool DecodeKtx2(const uint8_t* bytes,
+                size_t byteCount,
+                Ktx2DecodeResult& out,
+                const Ktx2HwSupport& hwSupport = {});
 
 } // namespace rive
 
diff --git a/renderer/include/rive/renderer/d3d11/render_context_d3d_impl.hpp b/renderer/include/rive/renderer/d3d11/render_context_d3d_impl.hpp
index d093690..21a74c1 100644
--- a/renderer/include/rive/renderer/d3d11/render_context_d3d_impl.hpp
+++ b/renderer/include/rive/renderer/d3d11/render_context_d3d_impl.hpp
@@ -224,7 +224,11 @@
                                   uint32_t height,
                                   uint32_t mipLevelCount,
                                   GPUTextureFormat,
-                                  const uint8_t imageDataRGBAPremul[]) override;
+                                  const uint8_t imageData[],
+                                  uint8_t blockWidth = 1,
+                                  uint8_t blockHeight = 1,
+                                  bool srgb = false,
+                                  bool generateRemainingMips = false) override;
 
     std::unique_ptr<BufferRing> makeUniformBufferRing(
         size_t capacityInBytes) override;
diff --git a/renderer/include/rive/renderer/d3d12/render_context_d3d12_impl.hpp b/renderer/include/rive/renderer/d3d12/render_context_d3d12_impl.hpp
index a6816ac..4f3836e 100644
--- a/renderer/include/rive/renderer/d3d12/render_context_d3d12_impl.hpp
+++ b/renderer/include/rive/renderer/d3d12/render_context_d3d12_impl.hpp
@@ -106,7 +106,11 @@
         uint32_t height,
         uint32_t mipLevelCount,
         GPUTextureFormat format,
-        const uint8_t imageDataRGBAPremul[]) override;
+        const uint8_t imageData[],
+        uint8_t blockWidth = 1,
+        uint8_t blockHeight = 1,
+        bool srgb = false,
+        bool generateRemainingMips = false) override;
 
     rcp<Texture> adoptImageTexture(rcp<D3D12Texture> imageTexture);
 
diff --git a/renderer/include/rive/renderer/gl/gles3.hpp b/renderer/include/rive/renderer/gl/gles3.hpp
index 5d04526..af88b2e 100644
--- a/renderer/include/rive/renderer/gl/gles3.hpp
+++ b/renderer/include/rive/renderer/gl/gles3.hpp
@@ -112,6 +112,25 @@
 
 #endif // RIVE_WEBGL
 
+// KHR_texture_compression_astc_ldr is core on GLES 3.2 but ships as an
+// extension elsewhere. Some GL headers (e.g. unextended <GLES3/gl3.h>, and
+// the Windows release-clang config) define only a subset of the footprint
+// enums, so guard each symbol individually rather than via the extension
+// macro. UNORM enums are contiguous from 0x93B0 in spec order. Only the
+// footprints Rive currently uses are declared here.
+#ifndef GL_COMPRESSED_RGBA_ASTC_4x4_KHR
+#define GL_COMPRESSED_RGBA_ASTC_4x4_KHR 0x93B0
+#endif
+#ifndef GL_COMPRESSED_RGBA_ASTC_6x6_KHR
+#define GL_COMPRESSED_RGBA_ASTC_6x6_KHR 0x93B4
+#endif
+#ifndef GL_COMPRESSED_RGBA_ASTC_8x8_KHR
+#define GL_COMPRESSED_RGBA_ASTC_8x8_KHR 0x93B7
+#endif
+#ifndef GL_COMPRESSED_RGBA_ASTC_12x12_KHR
+#define GL_COMPRESSED_RGBA_ASTC_12x12_KHR 0x93BD
+#endif
+
 #if defined(RIVE_ANDROID) || defined(RIVE_WEBGL)
 // GLES 3.1 functionality is pulled in as an extension. Define these to avoid
 // compile errors, even if we won't use them.
diff --git a/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp b/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp
index 2c2082e..4fdf5d0 100644
--- a/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp
+++ b/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp
@@ -54,7 +54,11 @@
                                   uint32_t height,
                                   uint32_t mipLevelCount,
                                   GPUTextureFormat format,
-                                  const uint8_t imageDataRGBAPremul[]) override;
+                                  const uint8_t imageData[],
+                                  uint8_t blockWidth = 1,
+                                  uint8_t blockHeight = 1,
+                                  bool srgb = false,
+                                  bool generateRemainingMips = false) override;
 
     // Takes ownership of textureID and responsibility for deleting it.
     rcp<Texture> adoptImageTexture(uint32_t width,
diff --git a/renderer/include/rive/renderer/metal/render_context_metal_impl.h b/renderer/include/rive/renderer/metal/render_context_metal_impl.h
index d21c86e..b78c662 100644
--- a/renderer/include/rive/renderer/metal/render_context_metal_impl.h
+++ b/renderer/include/rive/renderer/metal/render_context_metal_impl.h
@@ -139,7 +139,11 @@
                                   uint32_t height,
                                   uint32_t mipLevelCount,
                                   GPUTextureFormat format,
-                                  const uint8_t imageDataRGBAPremul[]) override;
+                                  const uint8_t imageData[],
+                                  uint8_t blockWidth = 1,
+                                  uint8_t blockHeight = 1,
+                                  bool srgb = false,
+                                  bool generateRemainingMips = false) override;
 
 #ifdef RIVE_CANVAS
     rcp<RenderCanvas> makeRenderCanvas(uint32_t width,
diff --git a/renderer/include/rive/renderer/render_context_impl.hpp b/renderer/include/rive/renderer/render_context_impl.hpp
index 875ec1b..79bf1bc 100644
--- a/renderer/include/rive/renderer/render_context_impl.hpp
+++ b/renderer/include/rive/renderer/render_context_impl.hpp
@@ -52,12 +52,26 @@
 
     // this is called in the case of the default Bitmap class being used to
     // decode images so that it can be converted into a backend specific image.
+    // For compressed `format`s, `blockWidth`/`blockHeight` give the format's
+    // block footprint (e.g. 4x4 for BC7 and ASTC 4x4) and `srgb` selects the
+    // sRGB variant of the format. For rgba32 these are ignored.
+    //
+    // `mipLevelCount` is the number of stored mip levels in `imageData`,
+    // packed largest-first with no inter-level padding. When
+    // `generateRemainingMips` is true (PNG/JPEG path), only mip 0 bytes are
+    // expected in `imageData` and the backend fills the remaining levels
+    // via GPU blits. When false (KTX2 path), the caller has supplied the
+    // full chain and the backend uploads it verbatim.
     virtual rcp<Texture> makeImageTexture(
         uint32_t width,
         uint32_t height,
         uint32_t mipLevelCount,
         GPUTextureFormat format,
-        const uint8_t imageDataRGBAPremul[]) = 0;
+        const uint8_t imageData[],
+        uint8_t blockWidth = 1,
+        uint8_t blockHeight = 1,
+        bool srgb = false,
+        bool generateRemainingMips = false) = 0;
 
 #ifdef RIVE_CANVAS
     // Creates a RenderCanvas: a GPU texture usable as both a render target
diff --git a/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp b/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp
index 3abbe7f..fc239f1 100644
--- a/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp
+++ b/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp
@@ -84,7 +84,11 @@
                                   uint32_t height,
                                   uint32_t mipLevelCount,
                                   GPUTextureFormat format,
-                                  const uint8_t imageDataRGBAPremul[]) override;
+                                  const uint8_t imageData[],
+                                  uint8_t blockWidth = 1,
+                                  uint8_t blockHeight = 1,
+                                  bool srgb = false,
+                                  bool generateRemainingMips = false) override;
 
 #ifdef RIVE_CANVAS
     rcp<RenderCanvas> makeRenderCanvas(uint32_t width,
diff --git a/renderer/include/rive/renderer/vulkan/vkutil.hpp b/renderer/include/rive/renderer/vulkan/vkutil.hpp
index 8f021c6..c8a617f 100644
--- a/renderer/include/rive/renderer/vulkan/vkutil.hpp
+++ b/renderer/include/rive/renderer/vulkan/vkutil.hpp
@@ -229,10 +229,21 @@
     void* nativeHandle() const override { return (void*)vkImage(); }
 
     // Deferred mechanism for uploading image data without a command buffer.
+    //
+    // Single-region upload: one VkBufferImageCopy covering mip 0 in full.
+    // If the texture has more than one mip level, generateMipmaps() is
+    // called on apply (suitable for the PNG/JPEG path).
     void scheduleUpload(const void* imageDataRGBAPremul,
                         size_t imageDataSizeInBytes);
     void scheduleUpload(rcp<vkutil::Buffer> imageBufferRGBAPremul);
 
+    // Multi-region upload: caller hands over a staging buffer and the full
+    // list of VkBufferImageCopy regions (typically one per mip level).
+    // No automatic mipmap generation — the caller is responsible for
+    // supplying every level that exists in the texture.
+    void scheduleUpload(rcp<vkutil::Buffer> stagingBuffer,
+                        std::vector<VkBufferImageCopy> regions);
+
     void barrier(VkCommandBuffer,
                  const ImageAccess& dstAccess,
                  ImageAccessAction = ImageAccessAction::preserveContents,
@@ -315,6 +326,8 @@
     ImageAccess m_lastAccess;
 
     rcp<vkutil::Buffer> m_imageUploadBuffer;
+    // When non-empty, overrides the default single-region/auto-mip path.
+    std::vector<VkBufferImageCopy> m_imageUploadRegions;
 
     // Simple mechanism for caching and reusing a descriptor set for this
     // texture within a frame.
diff --git a/renderer/include/rive/renderer/webgpu/render_context_webgpu_impl.hpp b/renderer/include/rive/renderer/webgpu/render_context_webgpu_impl.hpp
index a294eea..091dc14 100644
--- a/renderer/include/rive/renderer/webgpu/render_context_webgpu_impl.hpp
+++ b/renderer/include/rive/renderer/webgpu/render_context_webgpu_impl.hpp
@@ -90,7 +90,11 @@
                                   uint32_t height,
                                   uint32_t mipLevelCount,
                                   GPUTextureFormat format,
-                                  const uint8_t imageDataRGBAPremul[]) override;
+                                  const uint8_t imageData[],
+                                  uint8_t blockWidth = 1,
+                                  uint8_t blockHeight = 1,
+                                  bool srgb = false,
+                                  bool generateRemainingMips = false) override;
 
 #ifdef RIVE_CANVAS
     rcp<RenderCanvas> makeRenderCanvas(uint32_t width,
diff --git a/renderer/premake5_pls_renderer.lua b/renderer/premake5_pls_renderer.lua
index 43c73dc..cd3a655 100644
--- a/renderer/premake5_pls_renderer.lua
+++ b/renderer/premake5_pls_renderer.lua
@@ -418,9 +418,16 @@
         files({ 'src/metal/metal_nop.cpp' })
     end
 
+    -- decoders/include must be on the include path unconditionally —
+    -- renderer sources reference rive/decoders/astc_footprints.hpp even on
+    -- --no-rive-decoders builds. The header is pure inline (no link dep)
+    -- so exposing it costs nothing. Reset filter so this applies
+    -- project-wide, not just under the previous `nop-obj-c` filter.
+    filter({})
+    includedirs({ '../decoders/include' })
+
     filter({ 'options:not no-rive-decoders' })
     do
-        includedirs({ '../decoders/include' })
         defines({ 'RIVE_DECODERS' })
     end
 
@@ -435,6 +442,24 @@
         defines({ 'RIVE_KTX2' })
     end
 
+    -- Mirror per-family decoder flags into the renderer so the
+    -- `#ifdef RIVE_*_DECODER` test-path branches in render_context.cpp
+    -- compile when the decoder lib was built with these flags.
+    filter({ 'options:with_rive_bc_decoder' })
+    do
+        defines({ 'RIVE_BC_DECODER' })
+    end
+
+    filter({ 'options:with_rive_astc_decoder' })
+    do
+        defines({ 'RIVE_ASTC_DECODER' })
+    end
+
+    filter({ 'options:with_rive_etc_decoder' })
+    do
+        defines({ 'RIVE_ETC_DECODER' })
+    end
+
     filter('system:windows')
     do
         architecture('x64')
diff --git a/renderer/src/d3d11/render_context_d3d_impl.cpp b/renderer/src/d3d11/render_context_d3d_impl.cpp
index c58e4d9..fb059bd 100644
--- a/renderer/src/d3d11/render_context_d3d_impl.cpp
+++ b/renderer/src/d3d11/render_context_d3d_impl.cpp
@@ -909,7 +909,8 @@
                    UINT height,
                    UINT mipLevelCount,
                    GPUTextureFormat format,
-                   const uint8_t imageDataRGBAPremul[]) :
+                   const uint8_t imageDataRGBAPremul[],
+                   bool generateRemainingMips) :
         Texture(width, height)
     {
         if (format == GPUTextureFormat::bc7)
@@ -955,29 +956,48 @@
         }
         else if (format == GPUTextureFormat::rgba32)
         {
+            // GENERATE_MIPS flag + RTV binding are only needed when the
+            // GPU is going to fill in the chain. For the KTX2-supplied
+            // chain (caller-provided mips) it's pure overhead.
+            const UINT miscFlags =
+                generateRemainingMips ? D3D11_RESOURCE_MISC_GENERATE_MIPS : 0u;
+            const UINT bindFlags =
+                generateRemainingMips
+                    ? (D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET)
+                    : D3D11_BIND_SHADER_RESOURCE;
             m_texture = renderContextImpl->makeSimple2DTexture(
                 DXGI_FORMAT_R8G8B8A8_UNORM,
                 width,
                 height,
                 mipLevelCount,
-                D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET,
-                D3D11_RESOURCE_MISC_GENERATE_MIPS);
+                bindFlags,
+                miscFlags);
 
-            // Specify the top-level image in the mipmap chain.
-            D3D11_BOX box;
-            box.left = 0;
-            box.right = width;
-            box.top = 0;
-            box.bottom = height;
-            box.front = 0;
-            box.back = 1;
-            renderContextImpl->gpuContext()->UpdateSubresource(
-                m_texture.Get(),
-                0,
-                &box,
-                imageDataRGBAPremul,
-                width * 4,
-                0);
+            const uint8_t* src = imageDataRGBAPremul;
+            const UINT levelsToUpload =
+                generateRemainingMips ? 1u : mipLevelCount;
+            UINT W = width;
+            UINT H = height;
+            for (UINT i = 0; i < levelsToUpload; ++i)
+            {
+                D3D11_BOX box;
+                box.left = 0;
+                box.right = W;
+                box.top = 0;
+                box.bottom = H;
+                box.front = 0;
+                box.back = 1;
+                renderContextImpl->gpuContext()->UpdateSubresource(
+                    m_texture.Get(),
+                    i,
+                    &box,
+                    src,
+                    W * 4,
+                    0);
+                src += static_cast<size_t>(W) * H * 4;
+                W = std::max<UINT>(1u, W >> 1);
+                H = std::max<UINT>(1u, H >> 1);
+            }
         }
         else
         {
@@ -990,8 +1010,11 @@
             NULL,
             m_srv.ReleaseAndGetAddressOf()));
 
-        if (format == GPUTextureFormat::rgba32)
+        if (format == GPUTextureFormat::rgba32 && generateRemainingMips &&
+            mipLevelCount > 1)
+        {
             renderContextImpl->gpuContext()->GenerateMips(m_srv.Get());
+        }
     }
 
     ID3D11ShaderResourceView* srv() const { return m_srv.Get(); }
@@ -1014,14 +1037,19 @@
     uint32_t height,
     uint32_t mipLevelCount,
     GPUTextureFormat format,
-    const uint8_t imageDataRGBAPremul[])
+    const uint8_t imageDataRGBAPremul[],
+    uint8_t /*blockWidth*/,
+    uint8_t /*blockHeight*/,
+    bool /*srgb*/,
+    bool generateRemainingMips)
 {
     return make_rcp<TextureD3DImpl>(this,
                                     width,
                                     height,
                                     mipLevelCount,
                                     format,
-                                    imageDataRGBAPremul);
+                                    imageDataRGBAPremul,
+                                    generateRemainingMips);
 }
 
 rcp<Texture> RenderContextD3DImpl::adoptImageTexture(
diff --git a/renderer/src/d3d12/render_context_d3d12_impl.cpp b/renderer/src/d3d12/render_context_d3d12_impl.cpp
index 2657153..d9b7905 100644
--- a/renderer/src/d3d12/render_context_d3d12_impl.cpp
+++ b/renderer/src/d3d12/render_context_d3d12_impl.cpp
@@ -15,6 +15,7 @@
 #include "shaders/d3d/root.sig"
 
 #include <sstream>
+#include <vector>
 #include <D3DCompiler.h>
 
 // this is defined here instead of root_sig becaise the gpu does not care about
@@ -115,26 +116,30 @@
     {
         DXGI_FORMAT d3dFormat = d3d_utils::convert_format(format);
 
-        m_gpuTexture = manager->make2DTexture(
-            width,
-            height,
-            mipLevel,
-            d3dFormat,
-            D3D12_RESOURCE_FLAG_NONE,
-            usesCommandList ? D3D12_RESOURCE_STATE_COMMON
-                            : D3D12_RESOURCE_STATE_COPY_DEST);
+        // Always create in COMMON. Both upload paths drive the texture
+        // through the copy command list which uses enhanced barriers, and
+        // enhanced barriers require COMMON layout (LEGACY_COPY_DEST is
+        // rejected as INCOMPATIBLE_BARRIER_LAYOUT). The copy itself
+        // promotes COMMON→COPY_DEST implicitly.
+        std::ignore = usesCommandList;
+        m_gpuTexture = manager->make2DTexture(width,
+                                              height,
+                                              mipLevel,
+                                              d3dFormat,
+                                              D3D12_RESOURCE_FLAG_NONE,
+                                              D3D12_RESOURCE_STATE_COMMON);
 
         if (format == GPUTextureFormat::bc7)
         {
-            // imageData contains already compressed data, so we can directly
-            // upload it to the GPU All mip levels are in this sequentially
+            // imageData contains pre-compressed BC7 blocks, level 0 first,
+            // levels packed tight (no inter-level padding). Copy each level
+            // into its placed-subresource slot in the upload buffer.
             auto desc = m_gpuTexture->resource()->GetDesc();
 
-            UINT numRows = 0;
-            UINT64 rowSizeInBytes = 0;
-            UINT64 totalBytes = 0;
-
             m_subresourceFootprints.resize(mipLevel);
+            std::vector<UINT> numRows(mipLevel);
+            std::vector<UINT64> rowSizeInBytes(mipLevel);
+            UINT64 totalBytes = 0;
 
             manager->device()->GetCopyableFootprints(
                 &desc,
@@ -142,8 +147,8 @@
                 mipLevel,                       // Number of mips
                 0,                              // Base offset
                 m_subresourceFootprints.data(), // One footprint per mip
-                &numRows,
-                &rowSizeInBytes,
+                numRows.data(),
+                rowSizeInBytes.data(),
                 &totalBytes);
 
             m_uploadBuffer = manager->makeUploadBuffer(
@@ -156,12 +161,20 @@
             for (UINT mip = 0; mip < mipLevel; ++mip)
             {
                 const auto& fp = m_subresourceFootprints[mip].Footprint;
-
-                UINT64 mipSize = fp.RowPitch * fp.Height;
-
-                memcpy(dst + m_subresourceFootprints[mip].Offset, src, mipSize);
-
-                src += mipSize; // advance to next mip in your BC7 blob
+                // RowPitch is padded to D3D12_TEXTURE_DATA_PITCH_ALIGNMENT
+                // (256). Source rows are tight (rowSizeInBytes). Copy one
+                // block-row at a time so we don't overflow the upload slot
+                // and don't read past the source mip.
+                uint8_t* dstMip = dst + m_subresourceFootprints[mip].Offset;
+                const UINT64 srcRowBytes = rowSizeInBytes[mip];
+                const UINT rows = numRows[mip];
+                for (UINT row = 0; row < rows; ++row)
+                {
+                    memcpy(dstMip + row * fp.RowPitch,
+                           src + row * srcRowBytes,
+                           srcRowBytes);
+                }
+                src += srcRowBytes * rows;
             }
         }
         else if (format == GPUTextureFormat::rgba32)
@@ -769,7 +782,11 @@
     uint32_t height,
     uint32_t mipLevelCount,
     GPUTextureFormat format,
-    const uint8_t imageDataRGBAPremul[])
+    const uint8_t imageDataRGBAPremul[],
+    uint8_t /*blockWidth*/,
+    uint8_t /*blockHeight*/,
+    bool /*srgb*/,
+    bool /*generateRemainingMips*/)
 {
     return make_rcp<TextureD3D12Impl>(m_resourceManager.get(),
                                       width,
diff --git a/renderer/src/gl/render_context_gl_impl.cpp b/renderer/src/gl/render_context_gl_impl.cpp
index 7ae754c..eb17fc3 100644
--- a/renderer/src/gl/render_context_gl_impl.cpp
+++ b/renderer/src/gl/render_context_gl_impl.cpp
@@ -4,6 +4,8 @@
 
 #include "rive/renderer/gl/render_context_gl_impl.hpp"
 
+#include "rive/decoders/astc_footprints.hpp"
+
 #include "rive/renderer/gl/render_buffer_gl_impl.hpp"
 #include "rive/renderer/gl/render_target_gl.hpp"
 #include "rive/renderer/draw.hpp"
@@ -702,36 +704,116 @@
 };
 #endif // RIVE_CANVAS
 
-rcp<Texture> RenderContextGLImpl::makeImageTexture(
-    uint32_t width,
-    uint32_t height,
-    uint32_t mipLevelCount,
-    GPUTextureFormat format,
-    const uint8_t imageDataRGBAPremul[])
+rcp<Texture> RenderContextGLImpl::makeImageTexture(uint32_t width,
+                                                   uint32_t height,
+                                                   uint32_t mipLevelCount,
+                                                   GPUTextureFormat format,
+                                                   const uint8_t imageData[],
+                                                   uint8_t blockWidth,
+                                                   uint8_t blockHeight,
+                                                   [[maybe_unused]] bool srgb,
+                                                   bool generateRemainingMips)
 {
-    if (format != GPUTextureFormat::rgba32)
+    // Pick UNORM internal format. Sampler path treats texels as sRGB-
+    // encoded bytes (matching the GL_RGBA8 PNG upload).
+    GLenum sizedInternal;
+    bool isCompressed = false;
+
+    uint32_t bytesPerBlock = 16;
+    switch (format)
     {
-        assert(!"unsupported format");
-        return nullptr;
+        case GPUTextureFormat::rgba32:
+            sizedInternal = GL_RGBA8;
+            assert(blockWidth == 1 && blockHeight == 1);
+            bytesPerBlock = 4;
+            break;
+        case GPUTextureFormat::bc7:
+            sizedInternal = 0x8E8C; // GL_COMPRESSED_RGBA_BPTC_UNORM
+            isCompressed = true;
+            break;
+        case GPUTextureFormat::etc2:
+            sizedInternal = 0x9278; // GL_COMPRESSED_RGBA8_ETC2_EAC
+            isCompressed = true;
+            break;
+        case GPUTextureFormat::astc:
+        {
+
+            const int idx = rive::astcFootprintIndex(blockWidth, blockHeight);
+            if (idx < 0)
+            {
+                assert(!"unsupported ASTC block footprint");
+                return nullptr;
+            }
+
+            // KHR_texture_compression_astc_ldr lays the per-footprint enums
+            // out contiguously starting at GL_COMPRESSED_RGBA_ASTC_4x4_KHR, in
+            // the same canonical order as astcFootprintIndex().
+            sizedInternal =
+                static_cast<GLenum>(GL_COMPRESSED_RGBA_ASTC_4x4_KHR + idx);
+            isCompressed = true;
+            break;
+        }
+        default:
+            assert(!"unsupported format");
+            return nullptr;
     }
+    assert(!(generateRemainingMips && isCompressed) &&
+           "glGenerateMipmap is undefined on compressed textures");
 
     GLuint textureID;
     glGenTextures(1, &textureID);
     glActiveTexture(GL_TEXTURE0 + IMAGE_TEXTURE_IDX);
     glBindTexture(GL_TEXTURE_2D, textureID);
-    glTexStorage2D(GL_TEXTURE_2D, mipLevelCount, GL_RGBA8, width, height);
-    if (imageDataRGBAPremul != nullptr)
+    glTexStorage2D(GL_TEXTURE_2D,
+                   static_cast<GLsizei>(mipLevelCount),
+                   sizedInternal,
+                   width,
+                   height);
+    if (imageData != nullptr)
     {
-        glTexSubImage2D(GL_TEXTURE_2D,
-                        0,
-                        0,
-                        0,
-                        width,
-                        height,
-                        GL_RGBA,
-                        GL_UNSIGNED_BYTE,
-                        imageDataRGBAPremul);
-        glGenerateMipmap(GL_TEXTURE_2D);
+        // When the caller wants the GPU to auto-fill mips 1..N from mip 0
+        // (PNG path), only upload level 0 and finish via glGenerateMipmap.
+        const uint32_t levelsToUpload =
+            generateRemainingMips ? 1u : mipLevelCount;
+        size_t srcOffset = 0;
+        for (uint32_t i = 0; i < levelsToUpload; ++i)
+        {
+            const uint32_t logW = std::max<uint32_t>(1u, width >> i);
+            const uint32_t logH = std::max<uint32_t>(1u, height >> i);
+            const uint32_t blocksX = (logW + blockWidth - 1) / blockWidth;
+            const uint32_t blocksY = (logH + blockHeight - 1) / blockHeight;
+            const size_t levelBytes =
+                static_cast<size_t>(blocksX) * blocksY * bytesPerBlock;
+            if (isCompressed)
+            {
+                glCompressedTexSubImage2D(GL_TEXTURE_2D,
+                                          static_cast<GLint>(i),
+                                          0,
+                                          0,
+                                          logW,
+                                          logH,
+                                          sizedInternal,
+                                          static_cast<GLsizei>(levelBytes),
+                                          imageData + srcOffset);
+            }
+            else
+            {
+                glTexSubImage2D(GL_TEXTURE_2D,
+                                static_cast<GLint>(i),
+                                0,
+                                0,
+                                logW,
+                                logH,
+                                GL_RGBA,
+                                GL_UNSIGNED_BYTE,
+                                imageData + srcOffset);
+            }
+            srcOffset += levelBytes;
+        }
+        if (generateRemainingMips && mipLevelCount > 1)
+        {
+            glGenerateMipmap(GL_TEXTURE_2D);
+        }
     }
     return adoptImageTexture(width, height, textureID);
 }
diff --git a/renderer/src/metal/render_context_metal_impl.mm b/renderer/src/metal/render_context_metal_impl.mm
index 0905172..43bb392 100644
--- a/renderer/src/metal/render_context_metal_impl.mm
+++ b/renderer/src/metal/render_context_metal_impl.mm
@@ -4,6 +4,8 @@
 
 #include "rive/renderer/metal/render_context_metal_impl.h"
 
+#include "rive/decoders/astc_footprints.hpp"
+
 #include "background_shader_compiler.h"
 #include "rive/renderer/buffer_ring.hpp"
 #ifdef RIVE_CANVAS
@@ -829,12 +831,18 @@
                      uint32_t width,
                      uint32_t height,
                      uint32_t mipLevelCount,
-                     const uint8_t imageDataRGBAPremul[]) :
-        Texture(width, height)
+                     const uint8_t imageData[],
+                     MTLPixelFormat pixelFormat = MTLPixelFormatRGBA8Unorm,
+                     uint8_t blockWidth = 1,
+                     uint8_t blockHeight = 1,
+                     uint32_t bytesPerBlock = 4,
+                     bool generateRemainingMips = false) :
+        Texture(width, height),
+        m_mipsDirty(generateRemainingMips && mipLevelCount > 1)
     {
         // Create the texture.
         MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init];
-        desc.pixelFormat = MTLPixelFormatRGBA8Unorm;
+        desc.pixelFormat = pixelFormat;
         desc.width = width;
         desc.height = height;
         desc.mipmapLevelCount = mipLevelCount;
@@ -842,12 +850,29 @@
         desc.textureType = MTLTextureType2D;
         m_texture = [gpu newTextureWithDescriptor:desc];
 
-        // Specify the top-level image in the mipmap chain.
-        MTLRegion region = MTLRegionMake2D(0, 0, width, height);
-        [m_texture replaceRegion:region
-                     mipmapLevel:0
-                       withBytes:imageDataRGBAPremul
-                     bytesPerRow:width * 4];
+        // Upload mip 0 only when the caller asks for auto-mipgen
+        // (generateRemainingMips=true). Otherwise upload every level the
+        // texture was created with from the caller-supplied tight blob.
+        const uint32_t levelsToUpload =
+            generateRemainingMips ? 1u : mipLevelCount;
+        const uint8_t* src = imageData;
+        for (uint32_t i = 0; i < levelsToUpload; ++i)
+        {
+            const uint32_t logW = std::max<uint32_t>(1u, width >> i);
+            const uint32_t logH = std::max<uint32_t>(1u, height >> i);
+            const uint32_t blocksX = (logW + blockWidth - 1) / blockWidth;
+            const uint32_t blocksY = (logH + blockHeight - 1) / blockHeight;
+            const NSUInteger bytesPerRow =
+                static_cast<NSUInteger>(blocksX) * bytesPerBlock;
+            const size_t levelBytes =
+                static_cast<size_t>(bytesPerRow) * blocksY;
+            MTLRegion region = MTLRegionMake2D(0, 0, logW, logH);
+            [m_texture replaceRegion:region
+                         mipmapLevel:i
+                           withBytes:src
+                         bytesPerRow:bytesPerRow];
+            src += levelBytes;
+        }
     }
 
     void ensureMipmaps(id<MTLCommandBuffer> commandBuffer) const
@@ -881,16 +906,67 @@
     uint32_t height,
     uint32_t mipLevelCount,
     GPUTextureFormat format,
-    const uint8_t imageDataRGBAPremul[])
+    const uint8_t imageData[],
+    uint8_t blockWidth,
+    uint8_t blockHeight,
+    [[maybe_unused]] bool srgb,
+    bool generateRemainingMips)
 {
-    if (format != GPUTextureFormat::rgba32)
-    {
-        assert(!"unsupported format");
-        return nullptr;
-    }
+    MTLPixelFormat pixelFormat = MTLPixelFormatRGBA8Unorm;
+    uint32_t bytesPerBlock = 4;
+    bool isCompressed = false;
 
-    return make_rcp<TextureMetalImpl>(
-        m_gpu, width, height, mipLevelCount, imageDataRGBAPremul);
+    switch (format)
+    {
+        case GPUTextureFormat::rgba32:
+            assert(blockWidth == 1 && blockHeight == 1);
+            break;
+#if !TARGET_OS_IPHONE
+        case GPUTextureFormat::bc7:
+            pixelFormat = MTLPixelFormatBC7_RGBAUnorm;
+            bytesPerBlock = 16;
+            isCompressed = true;
+            break;
+#endif
+        case GPUTextureFormat::astc:
+        {
+            // MTLPixelFormat ASTC LDR enums are sequential in Vulkan/GL
+            // footprint order, starting at MTLPixelFormatASTC_4x4_LDR.
+            const int idx = rive::astcFootprintIndex(blockWidth, blockHeight);
+            if (idx < 0)
+            {
+                assert(!"unsupported ASTC block footprint");
+                return nullptr;
+            }
+            pixelFormat =
+                static_cast<MTLPixelFormat>(MTLPixelFormatASTC_4x4_LDR + idx);
+            bytesPerBlock = 16;
+            isCompressed = true;
+            break;
+        }
+        case GPUTextureFormat::etc2:
+            // ETC2 RGBA8: 8 bytes EAC alpha + 8 bytes ETC2 RGB = 16/block.
+            pixelFormat = MTLPixelFormatEAC_RGBA8;
+            bytesPerBlock = 16;
+            isCompressed = true;
+            break;
+        default:
+            assert(!"unsupported format");
+            return nullptr;
+    }
+    assert(!(generateRemainingMips && isCompressed) &&
+           "generateMipmapsForTexture is undefined on compressed formats");
+
+    return make_rcp<TextureMetalImpl>(m_gpu,
+                                      width,
+                                      height,
+                                      mipLevelCount,
+                                      imageData,
+                                      pixelFormat,
+                                      blockWidth,
+                                      blockHeight,
+                                      bytesPerBlock,
+                                      generateRemainingMips);
 }
 
 #ifdef RIVE_CANVAS
diff --git a/renderer/src/ore/gl/ore_context_gl.cpp b/renderer/src/ore/gl/ore_context_gl.cpp
index 3f63b3e..454467e 100644
--- a/renderer/src/ore/gl/ore_context_gl.cpp
+++ b/renderer/src/ore/gl/ore_context_gl.cpp
@@ -90,19 +90,12 @@
         case TextureFormat::bc7unorm:
             RIVE_UNREACHABLE();
 #endif
-#ifdef GL_COMPRESSED_RGBA_ASTC_4x4_KHR
         case TextureFormat::astc4x4:
             return GL_COMPRESSED_RGBA_ASTC_4x4_KHR;
         case TextureFormat::astc6x6:
             return GL_COMPRESSED_RGBA_ASTC_6x6_KHR;
         case TextureFormat::astc8x8:
             return GL_COMPRESSED_RGBA_ASTC_8x8_KHR;
-#else
-        case TextureFormat::astc4x4:
-        case TextureFormat::astc6x6:
-        case TextureFormat::astc8x8:
-            RIVE_UNREACHABLE();
-#endif
     }
     RIVE_UNREACHABLE();
 }
diff --git a/renderer/src/ore/gl/ore_texture_gl.cpp b/renderer/src/ore/gl/ore_texture_gl.cpp
index 3487cc2..7bc0cf9 100644
--- a/renderer/src/ore/gl/ore_texture_gl.cpp
+++ b/renderer/src/ore/gl/ore_texture_gl.cpp
@@ -71,19 +71,12 @@
             return GL_COMPRESSED_RGB8_ETC2;
         case TextureFormat::etc2rgba8:
             return GL_COMPRESSED_RGBA8_ETC2_EAC;
-#ifdef GL_COMPRESSED_RGBA_ASTC_4x4_KHR
         case TextureFormat::astc4x4:
             return GL_COMPRESSED_RGBA_ASTC_4x4_KHR;
         case TextureFormat::astc6x6:
             return GL_COMPRESSED_RGBA_ASTC_6x6_KHR;
         case TextureFormat::astc8x8:
             return GL_COMPRESSED_RGBA_ASTC_8x8_KHR;
-#else
-        case TextureFormat::astc4x4:
-        case TextureFormat::astc6x6:
-        case TextureFormat::astc8x8:
-            RIVE_UNREACHABLE();
-#endif
     }
     RIVE_UNREACHABLE();
 }
diff --git a/renderer/src/render_context.cpp b/renderer/src/render_context.cpp
index 3edd95f..7f4d14a 100644
--- a/renderer/src/render_context.cpp
+++ b/renderer/src/render_context.cpp
@@ -173,14 +173,28 @@
         encodedBytes[0] == 0xAB && encodedBytes[1] == 0x4B &&
         encodedBytes[2] == 0x54 && encodedBytes[3] == 0x58)
     {
+        const Ktx2HwSupport hwSupport = {
+            platformFeatures().supportsTextureCompressionBC,
+            platformFeatures().supportsTextureCompressionASTC,
+            platformFeatures().supportsTextureCompressionETC2,
+        };
         Ktx2DecodeResult ktx2;
-        if (DecodeKtx2(encodedBytes.data(), encodedBytes.size(), ktx2))
+        if (DecodeKtx2(encodedBytes.data(),
+                       encodedBytes.size(),
+                       ktx2,
+                       hwSupport))
         {
+            // KTX2 provides the full level chain (or just level 0). The
+            // backends never auto-generate; whatever the file ships with is
+            // exactly what gets uploaded.
             texture = m_impl->makeImageTexture(ktx2.pixelWidth,
                                                ktx2.pixelHeight,
                                                ktx2.levelCount,
                                                ktx2.format,
-                                               ktx2.blocks.data());
+                                               ktx2.blocks.data(),
+                                               ktx2.blockWidth,
+                                               ktx2.blockHeight,
+                                               ktx2.srgb);
         }
     }
 #endif
@@ -203,7 +217,11 @@
                                                height,
                                                mipLevelCount,
                                                GPUTextureFormat::rgba32,
-                                               bitmap->bytes());
+                                               bitmap->bytes(),
+                                               /*blockWidth=*/1,
+                                               /*blockHeight=*/1,
+                                               /*srgb=*/false,
+                                               /*generateRemainingMips=*/true);
         }
     }
 #endif
diff --git a/renderer/src/vulkan/render_context_vulkan_impl.cpp b/renderer/src/vulkan/render_context_vulkan_impl.cpp
index 3e6773a..0c821db 100644
--- a/renderer/src/vulkan/render_context_vulkan_impl.cpp
+++ b/renderer/src/vulkan/render_context_vulkan_impl.cpp
@@ -4,6 +4,8 @@
 
 #include "rive/renderer/vulkan/render_context_vulkan_impl.hpp"
 
+#include "rive/decoders/astc_footprints.hpp"
+
 #include "vulkan_shaders.hpp"
 #ifdef RIVE_CANVAS
 #include "rive/renderer/render_canvas.hpp"
@@ -92,22 +94,112 @@
     uint32_t height,
     uint32_t mipLevelCount,
     GPUTextureFormat format,
-    const uint8_t imageDataRGBAPremul[])
+    const uint8_t imageData[],
+    uint8_t blockWidth,
+    uint8_t blockHeight,
+    [[maybe_unused]] bool srgb,
+    bool generateRemainingMips)
 {
-    if (format != GPUTextureFormat::rgba32)
-    {
-        assert(!"unsupported format");
-        return nullptr;
-    }
+    // Sampler path treats texels as sRGB-encoded bytes (matches PNG path's
+    // GL_RGBA8 / VK_FORMAT_R8G8B8A8_UNORM upload). Don't pick the GPU sRGB
+    // view here — would auto-linearise on sample and double-darken.
+    VkFormat vkFormat;
+    uint32_t bytesPerBlock = 16;
+    [[maybe_unused]] bool isCompressed = false;
 
-    auto texture = m_vk->makeTexture2D(
+    switch (format)
+    {
+        case GPUTextureFormat::rgba32:
+            vkFormat = VK_FORMAT_R8G8B8A8_UNORM;
+            assert(blockWidth == 1 && blockHeight == 1);
+            bytesPerBlock = 4;
+            break;
+        case GPUTextureFormat::bc7:
+            vkFormat = VK_FORMAT_BC7_UNORM_BLOCK;
+            isCompressed = true;
+            break;
+        case GPUTextureFormat::etc2:
+            // ETC2 RGBA8: 8 bytes EAC alpha + 8 bytes ETC2 RGB = 16/block.
+            vkFormat = VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK;
+            isCompressed = true;
+            break;
+        case GPUTextureFormat::astc:
         {
-            .format = VK_FORMAT_R8G8B8A8_UNORM,
-            .extent = {width, height},
-            .mipLevels = mipLevelCount,
-        },
-        "RenderContext imageTexture");
-    texture->scheduleUpload(imageDataRGBAPremul, height * width * 4);
+            const int idx = rive::astcFootprintIndex(blockWidth, blockHeight);
+            if (idx < 0)
+            {
+                assert(!"unsupported ASTC block footprint");
+                return nullptr;
+            }
+
+            vkFormat =
+                static_cast<VkFormat>(VK_FORMAT_ASTC_4x4_UNORM_BLOCK + 2 * idx);
+            isCompressed = true;
+            break;
+        }
+        default:
+            assert(!"unsupported format");
+            return nullptr;
+    }
+    assert(!(generateRemainingMips && isCompressed) &&
+           "vkCmdBlitImage mipgen is undefined on compressed formats");
+
+    auto texture = m_vk->makeTexture2D({.format = vkFormat,
+                                        .extent = {width, height},
+                                        .mipLevels = mipLevelCount},
+                                       "RenderContext imageTexture");
+
+    if (imageData == nullptr)
+    {
+        return texture;
+    }
+    assert(!(generateRemainingMips && isCompressed) &&
+           "vkCmdBlitImage mipgen is undefined on compressed formats");
+
+    if (generateRemainingMips)
+    {
+        // Upload mip 0 only; vkutil's single-region scheduleUpload calls
+        // generateMipmaps to fill the rest.
+        const size_t mip0Bytes =
+            static_cast<size_t>(width) * height * bytesPerBlock;
+        texture->scheduleUpload(imageData, mip0Bytes);
+        return texture;
+    }
+    assert(!(generateRemainingMips && isCompressed) &&
+           "vkCmdBlitImage mipgen is undefined on compressed formats");
+
+    // Multi-mip: pre-compute per-level regions in the source blob.
+    std::vector<VkBufferImageCopy> regions;
+    regions.reserve(mipLevelCount);
+    size_t srcOffset = 0;
+    for (uint32_t i = 0; i < mipLevelCount; ++i)
+    {
+        const uint32_t logW = std::max<uint32_t>(1u, width >> i);
+        const uint32_t logH = std::max<uint32_t>(1u, height >> i);
+        const uint32_t blocksX = (logW + blockWidth - 1) / blockWidth;
+        const uint32_t blocksY = (logH + blockHeight - 1) / blockHeight;
+        const size_t levelBytes =
+            static_cast<size_t>(blocksX) * blocksY * bytesPerBlock;
+        regions.push_back({.bufferOffset = static_cast<VkDeviceSize>(srcOffset),
+                           .imageSubresource =
+                               {
+                                   .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                                   .mipLevel = i,
+                                   .layerCount = 1,
+                               },
+                           .imageExtent = {logW, logH, 1}});
+        srcOffset += levelBytes;
+    }
+    assert(!(generateRemainingMips && isCompressed) &&
+           "vkCmdBlitImage mipgen is undefined on compressed formats");
+
+    // Stage all levels into one buffer, then hand the region list over.
+    rcp<vkutil::Buffer> staging = m_vk->makeBuffer(
+        {.size = srcOffset, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT},
+        vkutil::Mappability::writeOnly);
+    std::memcpy(staging->contents(), imageData, srcOffset);
+    staging->flushContents();
+    texture->scheduleUpload(std::move(staging), std::move(regions));
     return texture;
 }
 
diff --git a/renderer/src/vulkan/vkutil.cpp b/renderer/src/vulkan/vkutil.cpp
index b88ab70..f960eb2 100644
--- a/renderer/src/vulkan/vkutil.cpp
+++ b/renderer/src/vulkan/vkutil.cpp
@@ -346,21 +346,20 @@
 void Texture2D::scheduleUpload(rcp<vkutil::Buffer> imageBufferRGBAPremul)
 {
     m_imageUploadBuffer = std::move(imageBufferRGBAPremul);
+    m_imageUploadRegions.clear();
+}
+
+void Texture2D::scheduleUpload(rcp<vkutil::Buffer> stagingBuffer,
+                               std::vector<VkBufferImageCopy> regions)
+{
+    m_imageUploadBuffer = std::move(stagingBuffer);
+    m_imageUploadRegions = std::move(regions);
 }
 
 void Texture2D::applyImageUploadBuffer(VkCommandBuffer commandBuffer)
 {
     assert(m_imageUploadBuffer != nullptr);
 
-    VkBufferImageCopy bufferImageCopy = {
-        .imageSubresource =
-            {
-                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .layerCount = 1,
-            },
-        .imageExtent = {width(), height(), 1},
-    };
-
     barrier(commandBuffer,
             {
                 .pipelineStages = VK_PIPELINE_STAGE_TRANSFER_BIT,
@@ -369,19 +368,53 @@
             },
             vkutil::ImageAccessAction::invalidateContents);
 
-    m_image->vk()->CmdCopyBufferToImage(commandBuffer,
-                                        *m_imageUploadBuffer,
-                                        *m_image,
-                                        VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
-                                        1,
-                                        &bufferImageCopy);
-
-    generateMipmaps(commandBuffer,
-                    {
-                        .pipelineStages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
-                        .accessMask = VK_ACCESS_SHADER_READ_BIT,
-                        .layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
-                    });
+    if (!m_imageUploadRegions.empty())
+    {
+        // Caller-supplied per-level regions. No automatic mip generation —
+        // every level present in the texture must have a region.
+        m_image->vk()->CmdCopyBufferToImage(
+            commandBuffer,
+            *m_imageUploadBuffer,
+            *m_image,
+            VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+            static_cast<uint32_t>(m_imageUploadRegions.size()),
+            m_imageUploadRegions.data());
+        // All mips already written — transition straight to shader-read.
+        barrier(commandBuffer,
+                {
+                    .pipelineStages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+                    .accessMask = VK_ACCESS_SHADER_READ_BIT,
+                    .layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                });
+        m_imageUploadRegions.clear();
+    }
+    else
+    {
+        // Single-region upload (mip 0 full extent). Caller relies on
+        // generateMipmaps to fill remaining levels.
+        VkBufferImageCopy bufferImageCopy = {
+            .imageSubresource =
+                {
+                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                    .layerCount = 1,
+                },
+            .imageExtent = {width(), height(), 1},
+        };
+        m_image->vk()->CmdCopyBufferToImage(
+            commandBuffer,
+            *m_imageUploadBuffer,
+            *m_image,
+            VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+            1,
+            &bufferImageCopy);
+        generateMipmaps(
+            commandBuffer,
+            {
+                .pipelineStages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+                .accessMask = VK_ACCESS_SHADER_READ_BIT,
+                .layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+            });
+    }
 
     m_imageUploadBuffer = nullptr;
 }
diff --git a/renderer/src/webgpu/render_context_webgpu_impl.cpp b/renderer/src/webgpu/render_context_webgpu_impl.cpp
index 57a6031..f13a81d 100644
--- a/renderer/src/webgpu/render_context_webgpu_impl.cpp
+++ b/renderer/src/webgpu/render_context_webgpu_impl.cpp
@@ -4,6 +4,8 @@
 
 #include "rive/renderer/webgpu/render_context_webgpu_impl.hpp"
 
+#include "rive/decoders/astc_footprints.hpp"
+
 #include "rive/renderer/draw.hpp"
 #ifdef RIVE_CANVAS
 #include "rive/renderer/render_canvas.hpp"
@@ -2230,22 +2232,69 @@
     uint32_t height,
     uint32_t mipLevelCount,
     GPUTextureFormat format,
-    const uint8_t imageDataRGBAPremul[])
+    const uint8_t imageData[],
+    uint8_t blockWidth,
+    uint8_t blockHeight,
+    bool /*srgb*/,
+    bool generateRemainingMips)
 {
-    if (format != GPUTextureFormat::rgba32)
+    wgpu::TextureFormat wgpuFormat = wgpu::TextureFormat::RGBA8Unorm;
+    uint32_t bytesPerBlock = 4;
+
+    bool isCompressed = false;
+
+    switch (format)
     {
-        assert(!"unsupported format");
-        return nullptr;
+        case GPUTextureFormat::rgba32:
+            assert(blockWidth == 1 && blockHeight == 1);
+            break;
+        case GPUTextureFormat::bc7:
+            wgpuFormat = wgpu::TextureFormat::BC7RGBAUnorm;
+            bytesPerBlock = 16;
+            isCompressed = true;
+            break;
+        case GPUTextureFormat::astc:
+        {
+            // wgpu ASTC enums are sequential in spec footprint order
+            // starting at ASTC4x4Unorm. SRGB variant lives one entry later.
+            const int idx = rive::astcFootprintIndex(blockWidth, blockHeight);
+            if (idx < 0)
+            {
+                assert(!"unsupported ASTC block footprint");
+                return nullptr;
+            }
+            wgpuFormat = static_cast<wgpu::TextureFormat>(
+                static_cast<uint32_t>(wgpu::TextureFormat::ASTC4x4Unorm) +
+                2 * idx);
+
+            bytesPerBlock = 16;
+            isCompressed = true;
+            break;
+        }
+        case GPUTextureFormat::etc2:
+
+            wgpuFormat = wgpu::TextureFormat::ETC2RGBA8Unorm;
+            bytesPerBlock = 16;
+
+            break;
+        default:
+            assert(!"unsupported format");
+            return nullptr;
     }
+
+    assert(!(generateRemainingMips && isCompressed) &&
+           "WebGPU mip generation is undefined on compressed formats");
+
     wgpu::TextureDescriptor textureDesc = {
         .usage =
             wgpu::TextureUsage::TextureBinding | wgpu::TextureUsage::CopyDst,
         .dimension = wgpu::TextureDimension::e2D,
         .size = {width, height},
-        .format = wgpu::TextureFormat::RGBA8Unorm,
+        .format = wgpuFormat,
+
         .mipLevelCount = mipLevelCount,
     };
-    if (mipLevelCount > 1)
+    if (generateRemainingMips && mipLevelCount > 1)
     {
 #ifdef RIVE_WAGYU
         // Wagyu generates mipmaps with copies.
@@ -2258,16 +2307,30 @@
 
     wgpu::Texture texture = m_device.CreateTexture(&textureDesc);
 
-    wgpu::TexelCopyTextureInfo dest = {.texture = texture};
-    wgpu::TexelCopyBufferLayout layout = {.bytesPerRow = width * 4};
-    wgpu::Extent3D extent = {width, height};
-    m_queue.WriteTexture(&dest,
-                         imageDataRGBAPremul,
-                         height * width * 4,
-                         &layout,
-                         &extent);
+    // Upload mip 0 only when caller wants auto-mipgen; otherwise upload all.
+    const uint32_t levelsToUpload = generateRemainingMips ? 1u : mipLevelCount;
+    size_t srcOffset = 0;
+    for (uint32_t i = 0; i < levelsToUpload; ++i)
+    {
+        const uint32_t logW = std::max<uint32_t>(1u, width >> i);
+        const uint32_t logH = std::max<uint32_t>(1u, height >> i);
+        const uint32_t blocksX = (logW + blockWidth - 1) / blockWidth;
+        const uint32_t blocksY = (logH + blockHeight - 1) / blockHeight;
+        const uint32_t bytesPerRow = blocksX * bytesPerBlock;
+        const size_t levelBytes = static_cast<size_t>(bytesPerRow) * blocksY;
 
-    if (mipLevelCount > 1)
+        wgpu::TexelCopyTextureInfo dest = {.texture = texture, .mipLevel = i};
+        wgpu::TexelCopyBufferLayout layout = {.bytesPerRow = bytesPerRow};
+        wgpu::Extent3D extent = {logW, logH};
+        m_queue.WriteTexture(&dest,
+                             imageData + srcOffset,
+                             levelBytes,
+                             &layout,
+                             &extent);
+        srcOffset += levelBytes;
+    }
+
+    if (generateRemainingMips && mipLevelCount > 1)
     {
         generateMipmaps(texture);
     }
diff --git a/tests/common/render_context_null.cpp b/tests/common/render_context_null.cpp
index 9e3ddd0..c3f64ec 100644
--- a/tests/common/render_context_null.cpp
+++ b/tests/common/render_context_null.cpp
@@ -64,7 +64,11 @@
                                                  uint32_t height,
                                                  uint32_t mipLevelCount,
                                                  GPUTextureFormat format,
-                                                 const uint8_t imageDataRGBA[])
+                                                 const uint8_t imageData[],
+                                                 uint8_t blockWidth,
+                                                 uint8_t blockHeight,
+                                                 bool srgb,
+                                                 bool generateRemainingMips)
 {
     return make_rcp<Texture>(width, height);
 }
diff --git a/tests/common/render_context_null.hpp b/tests/common/render_context_null.hpp
index 9db3f89..ca95b4c 100644
--- a/tests/common/render_context_null.hpp
+++ b/tests/common/render_context_null.hpp
@@ -38,7 +38,11 @@
         uint32_t height,
         uint32_t mipLevelCount,
         rive::GPUTextureFormat format,
-        const uint8_t imageDataRGBA[]) override;
+        const uint8_t imageData[],
+        uint8_t blockWidth = 1,
+        uint8_t blockHeight = 1,
+        bool srgb = false,
+        bool generateRemainingMips = false) override;
 
     std::unique_ptr<rive::gpu::BufferRing> makeUniformBufferRing(
         size_t capacityInBytes) override;
diff --git a/tests/unit_tests/runtime/decode_ktx2_test.cpp b/tests/unit_tests/runtime/decode_ktx2_test.cpp
index 6ab370c..29c8371 100644
--- a/tests/unit_tests/runtime/decode_ktx2_test.cpp
+++ b/tests/unit_tests/runtime/decode_ktx2_test.cpp
@@ -16,7 +16,7 @@
 //   ...mip data
 namespace
 {
-constexpr uint8_t kKtx2Identifier[12] = {
+constexpr uint8_t Ktx2Identifier[12] = {
     0xAB,
     0x4B,
     0x54,
@@ -48,8 +48,8 @@
 {
     std::vector<uint8_t> buf;
     buf.insert(buf.end(),
-               kKtx2Identifier,
-               kKtx2Identifier + sizeof(kKtx2Identifier));
+               Ktx2Identifier,
+               Ktx2Identifier + sizeof(Ktx2Identifier));
 
     appendLE<uint32_t>(buf, vkFormat);
     appendLE<uint32_t>(buf, 1); // typeSize