Add gpu compressed decoders (#12237) 195d4fcb25 * feat(renderer): GPU compressed texture support (BC7 / ASTC / ETC2) - Software decoders for BC, ASTC, ETC2 in the decoders library. - KTX2 reader extended to parse BC7, ASTC LDR (all footprints), ETC2 RGBA8. - Ktx2HwSupport + per-backend cap flags; DecodeKtx2 falls back to SW decode + tight RGBA mip chain when HW lacks the format. - Native compressed upload in Vulkan, GL, Metal, WebGPU (D3D11/D3D12 already supported BC7); per-mip uploads, no auto-mipgen for compressed. - Texture compressor premultiplies alpha before BC7 encode so output matches the runtime PNG path. - Android CMakeLists exposes decoders include directory. * Android break * fix(android): always expose decoders/include for renderer sources * fix(decoders): include <cstdio> for stderr in texture_decoder.cpp * Clang format * Update premake5_pls_renderer.lua * Update render_context_gl_impl.cpp * Update render_context_vulkan_impl.cpp * Fix null renderer * Update decode_ktx2.hpp * Update astc_footprints.hpp * Copyright * Update texture_decoder.cpp * Comments * Comments * Update to ETC * Update decode_bc_texture.cpp * Update decode_bc_texture.cpp * More comments * Comments * Bug fix on block sizes * Clang format * Fix ASTC * Fix android * Update UE and Other platform * Update render_context_vulkan_impl.cpp * Josh Comments Co-authored-by: John White <aliasbinman@gmail.com>
diff --git a/.rive_head b/.rive_head index ca9f990..a9f10f6 100644 --- a/.rive_head +++ b/.rive_head
@@ -1 +1 @@ -fe77b2ec3dd8ea1f11cb8b54f950c33ed0b87dc6 +195d4fcb25427e309307e07670afb060c6886fec
diff --git a/decoders/include/rive/decoders/astc_footprints.hpp b/decoders/include/rive/decoders/astc_footprints.hpp new file mode 100644 index 0000000..4309e77 --- /dev/null +++ b/decoders/include/rive/decoders/astc_footprints.hpp
@@ -0,0 +1,61 @@ +/* + * Copyright 2026 Rive + */ + +#ifndef _RIVE_ASTC_FOOTPRINTS_HPP_ +#define _RIVE_ASTC_FOOTPRINTS_HPP_ + +#include <cstdint> + +namespace rive +{ + +// LDR ASTC block footprints in canonical (Vulkan / KHR_ldr) spec order. The +// index into this table also indexes the corresponding GPU enums: +// VkFormat (UNORM) = VK_FORMAT_ASTC_4x4_UNORM_BLOCK (157) + 2 * idx +// VkFormat (SRGB) = UNORM + 1 +// GL enum (UNORM) = 0x93B0 + idx +// GL enum (SRGB) = 0x93D0 + idx +struct AstcFootprint +{ + uint8_t width; + uint8_t height; +}; + +constexpr AstcFootprint AstcFootprints[] = { + {4, 4}, + {5, 4}, + {5, 5}, + {6, 5}, + {6, 6}, + {8, 5}, + {8, 6}, + {8, 8}, + {10, 5}, + {10, 6}, + {10, 8}, + {10, 10}, + {12, 10}, + {12, 12}, +}; +constexpr int AstcFootprintCount = + sizeof(AstcFootprints) / sizeof(AstcFootprints[0]); + +// Returns -1 if (blockWidth, blockHeight) is not a recognised LDR ASTC +// footprint. +inline int astcFootprintIndex(uint8_t blockWidth, uint8_t blockHeight) +{ + for (int i = 0; i < AstcFootprintCount; ++i) + { + if (AstcFootprints[i].width == blockWidth && + AstcFootprints[i].height == blockHeight) + { + return i; + } + } + return -1; +} + +} // namespace rive + +#endif
diff --git a/decoders/include/rive/decoders/texture_decoder.hpp b/decoders/include/rive/decoders/texture_decoder.hpp new file mode 100644 index 0000000..e8ca4a4 --- /dev/null +++ b/decoders/include/rive/decoders/texture_decoder.hpp
@@ -0,0 +1,36 @@ +/* + * Copyright 2026 Rive + */ + +#ifndef _RIVE_TEXTURE_DECODER_HPP_ +#define _RIVE_TEXTURE_DECODER_HPP_ + +#include "rive/decoders/bitmap_decoder.hpp" +#include "rive/gpu_texture_format.hpp" + +#include <cstddef> +#include <cstdint> +#include <memory> + +// Decode one mip level of block-compressed texture data to an RGBA Bitmap. +// `blocks` points at the start of the level's block grid; `byteCount` is its +// size in bytes. `width` / `height` are the level's logical pixel dimensions. +// `blockWidth` / `blockHeight` default to 4 (BC/ETC); ASTC callers pass the +// block footprint from the format. +// +// Returns nullptr if the format's software decoder was not compiled in or +// decoding fails. +// +// Build flags that enable each family: +// RIVE_ASTC_DECODER -- astc (any block size) +// RIVE_BC_DECODER -- bc1 / bc2 / bc3 / bc7 +// RIVE_ETC_DECODER -- etc2 (RGB8 and RGBA8) +std::unique_ptr<Bitmap> decode_texture(const uint8_t* blocks, + size_t byteCount, + uint32_t width, + uint32_t height, + rive::GPUTextureFormat format, + uint32_t blockWidth = 1, + uint32_t blockHeight = 1); + +#endif
diff --git a/decoders/premake5_v2.lua b/decoders/premake5_v2.lua index 4b3043d..3753a5f 100644 --- a/decoders/premake5_v2.lua +++ b/decoders/premake5_v2.lua
@@ -26,6 +26,21 @@ description = 'don\'t build KTX2 container parsing into the rive_decoders library', }) +newoption({ + trigger = 'with_rive_astc_decoder', + description = 'build ASTC software decoder into the rive_decoders library (requires astcenc)', +}) + +newoption({ + trigger = 'with_rive_bc_decoder', + description = 'build BCn software decoder into the rive_decoders library (requires bc7enc_rdo)', +}) + +newoption({ + trigger = 'with_rive_etc_decoder', + description = 'build ETC2 software decoder into the rive_decoders library (requires Ericsson ETCPACK)', +}) + if not _OPTIONS["no_rive_png"] then dofile(rive .. '/dependencies/premake5_libpng_v2.lua') end @@ -40,6 +55,19 @@ libwebp = '' end +local dependency = require('dependency') + +if _OPTIONS["with_rive_astc_decoder"] then + astcenc = dependency.github('ARM-software/astc-encoder', '4.7.0') +end + +if _OPTIONS["with_rive_bc_decoder"] then + bc7enc = dependency.github('richgel999/bc7enc_rdo', 'master') +end + +if _OPTIONS["with_rive_etc_decoder"] then + etcpack = dependency.github('Ericsson/ETCPACK', 'master') +end project('rive_decoders') do @@ -110,10 +138,97 @@ files({ 'src/decode_webp.cpp' }) end + filter({ 'options:not no_rive_ktx2' }) do defines({ 'RIVE_KTX2' }) files({ 'src/decode_ktx2.cpp' }) + end + + -- Always include the texture decoder dispatcher; it compiles cleanly with + -- no decoder flags set (all paths return nullptr with a log message). + filter({}) + do + files({ 'src/texture_decoder.cpp' }) + end + + if _OPTIONS["with_rive_astc_decoder"] then + filter({ 'options:with_rive_astc_decoder' }) + do + includedirs({ astcenc .. '/Source' }) + defines({ + 'RIVE_ASTC_DECODER', + 'ASTCENC_SSE=0', + 'ASTCENC_POPCNT=0', + 'ASTCENC_F16C=0', + 'ASTCENC_AVX=0', + 'ASTCENC_NEON=0', + }) + files({ + 'src/decode_astc_texture.cpp', + astcenc .. '/Source/astcenc_*.cpp', + }) + buildoptions({ + '-Wno-sign-conversion', + '-Wno-implicit-int-float-conversion', + '-Wno-float-conversion', + '-Wno-shorten-64-to-32', + '-Wno-unused-variable', + '-Wno-unused-function', + '-Wno-shadow', + '-Wno-missing-field-initializers', + }) + end + end + + if _OPTIONS["with_rive_bc_decoder"] then + filter({ 'options:with_rive_bc_decoder' }) + do + includedirs({ bc7enc }) + defines({ 'RIVE_BC_DECODER' }) + files({ + 'src/decode_bc_texture.cpp', + bc7enc .. '/bc7decomp.cpp', + bc7enc .. '/bc7decomp_ref.cpp', + bc7enc .. '/rgbcx.cpp', + }) + buildoptions({ + '-Wno-sign-conversion', + '-Wno-implicit-int-float-conversion', + '-Wno-float-conversion', + '-Wno-shorten-64-to-32', + '-Wno-unused-variable', + '-Wno-unused-function', + '-Wno-unused-const-variable', + '-Wno-shadow', + '-Wno-missing-field-initializers', + }) + end + end + + if _OPTIONS["with_rive_etc_decoder"] then + filter({ 'options:with_rive_etc_decoder' }) + do + defines({ 'RIVE_ETC_DECODER' }) + files({ + 'src/decode_etc_texture.cpp', + etcpack .. '/source/etcdec.cxx', + }) + buildoptions({ + '-Wno-sign-conversion', + '-Wno-implicit-int-float-conversion', + '-Wno-float-conversion', + '-Wno-shorten-64-to-32', + '-Wno-unused-variable', + '-Wno-unused-function', + '-Wno-unused-but-set-variable', + '-Wno-shadow', + '-Wno-missing-field-initializers', + '-Wno-old-style-cast', + '-Wno-parentheses', + '-Wno-sign-compare', + }) + end end end
diff --git a/decoders/src/decode_astc_texture.cpp b/decoders/src/decode_astc_texture.cpp new file mode 100644 index 0000000..380740e --- /dev/null +++ b/decoders/src/decode_astc_texture.cpp
@@ -0,0 +1,83 @@ +/* + * Copyright 2026 Rive + */ + +#include "rive/decoders/bitmap_decoder.hpp" +#include "astcenc.h" + +#include <cstdio> +#include <memory> +#include <vector> + +std::unique_ptr<Bitmap> decode_astc_texture(const uint8_t* blocks, + size_t byteCount, + uint32_t width, + uint32_t height, + uint32_t blockWidth, + uint32_t blockHeight) +{ + astcenc_config config; + astcenc_error err = astcenc_config_init(ASTCENC_PRF_LDR_SRGB, + blockWidth, + blockHeight, + 1, + ASTCENC_PRE_MEDIUM, + ASTCENC_FLG_DECOMPRESS_ONLY, + &config); + if (err != ASTCENC_SUCCESS) + { + fprintf(stderr, + "DecodeAstcTexture - astcenc_config_init failed: %s\n", + astcenc_get_error_string(err)); + return nullptr; + } + + astcenc_context* ctx = nullptr; + err = astcenc_context_alloc(&config, 1, &ctx); + if (err != ASTCENC_SUCCESS) + { + fprintf(stderr, + "DecodeAstcTexture - astcenc_context_alloc failed: %s\n", + astcenc_get_error_string(err)); + return nullptr; + } + + const size_t pixelCount = static_cast<size_t>(width) * height; + auto pixels = std::make_unique<uint8_t[]>(pixelCount * 4); + + void* slicePtr = pixels.get(); + astcenc_image outImage; + outImage.dim_x = width; + outImage.dim_y = height; + outImage.dim_z = 1; + outImage.data_type = ASTCENC_TYPE_U8; + outImage.data = &slicePtr; + + const astcenc_swizzle swizzle = {ASTCENC_SWZ_R, + ASTCENC_SWZ_G, + ASTCENC_SWZ_B, + ASTCENC_SWZ_A}; + + err = astcenc_decompress_image(ctx, + blocks, + byteCount, + &outImage, + &swizzle, + 0); + astcenc_context_free(ctx); + + if (err != ASTCENC_SUCCESS) + { + fprintf(stderr, + "DecodeAstcTexture - astcenc_decompress_image failed: %s\n", + astcenc_get_error_string(err)); + return nullptr; + } + + const size_t numBytes = static_cast<size_t>(width) * height * 4; + return std::make_unique<Bitmap>(width, + height, + numBytes, + Bitmap::PixelFormat::RGBA, + std::move(pixels)); +}
diff --git a/decoders/src/decode_bc_texture.cpp b/decoders/src/decode_bc_texture.cpp new file mode 100644 index 0000000..4ba38a4 --- /dev/null +++ b/decoders/src/decode_bc_texture.cpp
@@ -0,0 +1,99 @@ +/* + * Copyright 2026 Rive + */ + +#include "rive/decoders/bitmap_decoder.hpp" +#include "rive/gpu_texture_format.hpp" + +// bc7decomp provides BC7 software decompression. +// rgbcx provides BC1/BC2/BC3 software decompression. +#include "bc7decomp.h" +#include "rgbcx.h" + +#include <algorithm> +#include <cstdio> +#include <cstring> +#include <memory> + +std::unique_ptr<Bitmap> decode_bc_texture(const uint8_t* blocks, + size_t /*byteCount*/, + uint32_t width, + uint32_t height, + rive::GPUTextureFormat format) +{ + // All BCn formats use 4x4 pixel blocks. + const uint32_t blocksX = (width + 3) / 4; + const uint32_t blocksY = (height + 3) / 4; + + const size_t pixelCount = static_cast<size_t>(width) * height; + auto pixels = std::make_unique<uint8_t[]>(pixelCount * 4); + memset(pixels.get(), 0, pixelCount * 4); + + const uint8_t* src = blocks; + + for (uint32_t by = 0; by < blocksY; by++) + { + for (uint32_t bx = 0; bx < blocksX; bx++) + { + // Decode one 4x4 block into a temporary 16-pixel RGBA buffer. + // uint32_t storage gives the bc7decomp/rgbcx union casts proper + // alignment (required on ARM) and lets the copy below move one + // pixel per assignment. + uint32_t blockPixels[16] = {}; + + switch (format) + { + case rive::GPUTextureFormat::bc7: + bc7decomp::unpack_bc7( + src, + reinterpret_cast<bc7decomp::color_rgba*>(blockPixels)); + src += 16; + break; + + case rive::GPUTextureFormat::bc1: + rgbcx::unpack_bc1( + src, + reinterpret_cast<rgbcx::color32*>(blockPixels), + true); + src += 8; + break; + + case rive::GPUTextureFormat::bc3: + rgbcx::unpack_bc3( + src, + reinterpret_cast<rgbcx::color32*>(blockPixels)); + src += 16; + break; + + default: + fprintf(stderr, + "DecodeBcTexture - unsupported BC format %u\n", + static_cast<unsigned>(format)); + return nullptr; + } + + // Copy decoded pixels into the output image. The last block + // row/column may extend past the image edge — clamp via a + // precomputed pixel count so each loop has a single exit. + uint32_t* dst32 = reinterpret_cast<uint32_t*>(pixels.get()); + const uint32_t copyW = std::min<uint32_t>(4u, width - bx * 4); + const uint32_t copyH = std::min<uint32_t>(4u, height - by * 4); + for (uint32_t py = 0; py < copyH; py++) + { + const uint32_t dstY = by * 4 + py; + for (uint32_t px = 0; px < copyW; px++) + { + const uint32_t dstX = bx * 4 + px; + dst32[dstY * width + dstX] = blockPixels[py * 4 + px]; + } + } + } + } + + const size_t numBytes = static_cast<size_t>(width) * height * 4; + return std::make_unique<Bitmap>(width, + height, + numBytes, + Bitmap::PixelFormat::RGBA, + std::move(pixels)); +}
diff --git a/decoders/src/decode_etc_texture.cpp b/decoders/src/decode_etc_texture.cpp new file mode 100644 index 0000000..567f76a --- /dev/null +++ b/decoders/src/decode_etc_texture.cpp
@@ -0,0 +1,118 @@ +/* + * Copyright 2026 Rive + */ + +#include "rive/decoders/bitmap_decoder.hpp" +#include "rive/gpu_texture_format.hpp" + +#include <cstdio> +#include <cstring> +#include <memory> + +// Forward declarations for Ericsson ETCPACK's etcdec.cxx (built as part of +// rive_decoders when RIVE_ETC_DECODER is set). Each entrypoint writes one 4x4 +// block into a planar RGBA buffer at byte offsets +0/+1/+2 (RGB) and +3 (A). +extern void decompressBlockETC2c(unsigned int block_part1, + unsigned int block_part2, + unsigned char* img, + int width, + int height, + int startx, + int starty, + int channels); +extern void decompressBlockAlphaC(unsigned char* data, + unsigned char* img, + int width, + int height, + int ix, + int iy, + int channels); + +namespace +{ +unsigned int readBE32(const uint8_t* p) +{ + return (static_cast<unsigned int>(p[0]) << 24) | + (static_cast<unsigned int>(p[1]) << 16) | + (static_cast<unsigned int>(p[2]) << 8) | + static_cast<unsigned int>(p[3]); +} +} // namespace + +// Decodes ETC2 RGBA8 (16 bytes/block: 8 bytes EAC alpha + 8 bytes ETC2 RGB). +std::unique_ptr<Bitmap> decode_etc_texture(const uint8_t* blocks, + size_t byteCount, + uint32_t width, + uint32_t height, + rive::GPUTextureFormat format) +{ + if (format != rive::GPUTextureFormat::etc2 || width == 0 || height == 0) + { + return nullptr; + } + + const uint32_t paddedW = (width + 3u) & ~3u; + const uint32_t paddedH = (height + 3u) & ~3u; + const uint32_t blocksX = paddedW / 4u; + const uint32_t blocksY = paddedH / 4u; + const size_t expectedBytes = static_cast<size_t>(blocksX) * blocksY * 16u; + if (byteCount != expectedBytes) + { + fprintf(stderr, + "DecodeEtcTexture - byteCount %zu != expected %zu for %ux%u\n", + byteCount, + expectedBytes, + width, + height); + return nullptr; + } + + const size_t paddedPixels = + static_cast<size_t>(paddedW) * static_cast<size_t>(paddedH); + auto padded = std::make_unique<uint8_t[]>(paddedPixels * 4); + + const uint8_t* src = blocks; + for (uint32_t by = 0; by < blocksY; ++by) + { + for (uint32_t bx = 0; bx < blocksX; ++bx) + { + const int startX = static_cast<int>(bx * 4u); + const int startY = static_cast<int>(by * 4u); + decompressBlockAlphaC(const_cast<uint8_t*>(src), + padded.get() + 3, + static_cast<int>(paddedW), + static_cast<int>(paddedH), + startX, + startY, + 4); + const unsigned int p1 = readBE32(src + 8); + const unsigned int p2 = readBE32(src + 12); + decompressBlockETC2c(p1, + p2, + padded.get(), + static_cast<int>(paddedW), + static_cast<int>(paddedH), + startX, + startY, + 4); + src += 16; + } + } + + // Crop padded RGBA grid down to (width, height). + const size_t outPixels = + static_cast<size_t>(width) * static_cast<size_t>(height); + auto pixels = std::make_unique<uint8_t[]>(outPixels * 4); + for (uint32_t y = 0; y < height; ++y) + { + std::memcpy(pixels.get() + static_cast<size_t>(y) * width * 4, + padded.get() + static_cast<size_t>(y) * paddedW * 4, + static_cast<size_t>(width) * 4); + } + + return std::make_unique<Bitmap>(width, + height, + outPixels * 4, + Bitmap::PixelFormat::RGBA, + std::move(pixels)); +}
diff --git a/decoders/src/decode_ktx2.cpp b/decoders/src/decode_ktx2.cpp index 9154ab6..135595e 100644 --- a/decoders/src/decode_ktx2.cpp +++ b/decoders/src/decode_ktx2.cpp
@@ -13,6 +13,8 @@ // Spec: https://registry.khronos.org/KTX/specs/2.0/ktxspec.v2.html #include "rive/decoders/decode_ktx2.hpp" +#include "rive/decoders/astc_footprints.hpp" +#include "rive/decoders/texture_decoder.hpp" #include <algorithm> #include <cstdio> @@ -22,7 +24,7 @@ { namespace { -constexpr uint8_t kKtx2Identifier[12] = { +constexpr uint8_t Ktx2Identifier[12] = { 0xAB, 0x4B, 0x54, @@ -40,7 +42,20 @@ constexpr uint32_t VK_FORMAT_BC7_UNORM_BLOCK = 145; constexpr uint32_t VK_FORMAT_BC7_SRGB_BLOCK = 146; -constexpr uint32_t kSupercompressionNone = 0; +// We only ship ETC2 RGBA8 (151 UNORM / 152 SRGB). The RGB8 (147/148) and +// RGB-with-1-bit-alpha (149/150) variants are valid vkFormats but the rive +// runtime always wants a 4-channel image; encoders should produce RGBA8. +constexpr uint32_t VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK = 151; +constexpr uint32_t VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK = 152; + +// ASTC LDR (VK_FORMAT_ASTC_<X>x<Y>_UNORM_BLOCK = 157,159,... and the SRGB +// variant is the next value). +constexpr uint32_t VK_FORMAT_ASTC_4x4_UNORM_BLOCK = 157; +constexpr uint32_t VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184; + +// ASTC block footprints come from <rive/decoders/astc_footprints.hpp>. + +constexpr uint32_t SupercompressionNone = 0; #pragma pack(push, 1) struct Ktx2Header @@ -73,46 +88,83 @@ "KTX2 level index entry must be 24 bytes"); // Defensive caps. -constexpr uint32_t kMaxDimension = 16384; -constexpr uint32_t kMaxLevels = 16; +constexpr uint32_t MaxDimension = 16384; +constexpr uint32_t MaxLevels = 16; -constexpr uint32_t kBc7BlockBytes = 16; - -// Expected block-grid byte length for a BC7 mip level at the given logical -// pixel dimensions. BC7 = 4x4 blocks, 16 bytes/block. -inline uint64_t expectedBc7Bytes(uint32_t pixelWidth, uint32_t pixelHeight) +// BC7 and ASTC LDR are 16 bytes/block. ETC2 RGB8 is 8. +inline uint64_t expectedBlockBytes(uint32_t pixelWidth, + uint32_t pixelHeight, + uint32_t blockWidth, + uint32_t blockHeight, + uint32_t bytesPerBlock) { - const uint64_t blocksX = (pixelWidth + 3u) / 4u; - const uint64_t blocksY = (pixelHeight + 3u) / 4u; - return blocksX * blocksY * kBc7BlockBytes; + const uint64_t blocksX = (pixelWidth + blockWidth - 1u) / blockWidth; + const uint64_t blocksY = (pixelHeight + blockHeight - 1u) / blockHeight; + return blocksX * blocksY * bytesPerBlock; } } // namespace -bool DecodeKtx2(const uint8_t* bytes, size_t byteCount, Ktx2DecodeResult& out) +bool DecodeKtx2(const uint8_t* bytes, + size_t byteCount, + Ktx2DecodeResult& out, + const Ktx2HwSupport& hwSupport) { - if (byteCount < sizeof(kKtx2Identifier) + sizeof(Ktx2Header)) + if (byteCount < sizeof(Ktx2Identifier) + sizeof(Ktx2Header)) { std::fprintf(stderr, "DecodeKtx2: file too small\n"); return false; } - if (std::memcmp(bytes, kKtx2Identifier, sizeof(kKtx2Identifier)) != 0) + if (std::memcmp(bytes, Ktx2Identifier, sizeof(Ktx2Identifier)) != 0) { std::fprintf(stderr, "DecodeKtx2: bad magic\n"); return false; } Ktx2Header header; - std::memcpy(&header, bytes + sizeof(kKtx2Identifier), sizeof(header)); + std::memcpy(&header, bytes + sizeof(Ktx2Identifier), sizeof(header)); - if (header.vkFormat != VK_FORMAT_BC7_UNORM_BLOCK && - header.vkFormat != VK_FORMAT_BC7_SRGB_BLOCK) + // Map vkFormat → (GPUTextureFormat, blockWidth, blockHeight, + // bytesPerBlock, srgb). + GPUTextureFormat outFormat; + uint8_t blockWidth = 4; + uint8_t blockHeight = 4; + uint32_t bytesPerBlock = 16; + bool srgb = false; + if (header.vkFormat == VK_FORMAT_BC7_UNORM_BLOCK || + header.vkFormat == VK_FORMAT_BC7_SRGB_BLOCK) + { + outFormat = GPUTextureFormat::bc7; + srgb = (header.vkFormat == VK_FORMAT_BC7_SRGB_BLOCK); + } + + else if (header.vkFormat == VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK || + header.vkFormat == VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK) + { + outFormat = GPUTextureFormat::etc2; + // ETC2 RGBA8 = 8 bytes EAC alpha + 8 bytes ETC2 RGB = 16 per block. + bytesPerBlock = 16; + srgb = (header.vkFormat == VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK); + } + + else if (header.vkFormat >= VK_FORMAT_ASTC_4x4_UNORM_BLOCK && + header.vkFormat <= VK_FORMAT_ASTC_12x12_SRGB_BLOCK) + { + const uint32_t idx = + (header.vkFormat - VK_FORMAT_ASTC_4x4_UNORM_BLOCK) / 2u; + outFormat = GPUTextureFormat::astc; + + blockWidth = AstcFootprints[idx].width; + blockHeight = AstcFootprints[idx].height; + srgb = (header.vkFormat % 2u) == 0u; // SRGB at UNORM+1. + } + else { std::fprintf(stderr, - "DecodeKtx2: unsupported vkFormat %u (only BC7 wired)\n", + "DecodeKtx2: unsupported vkFormat %u\n", header.vkFormat); return false; } - if (header.supercompressionScheme != kSupercompressionNone) + if (header.supercompressionScheme != SupercompressionNone) { std::fprintf(stderr, "DecodeKtx2: supercompressionScheme %u not supported\n", @@ -128,8 +180,8 @@ header.layerCount); return false; } - if (header.pixelWidth == 0 || header.pixelWidth > kMaxDimension || - header.pixelHeight == 0 || header.pixelHeight > kMaxDimension) + if (header.pixelWidth == 0 || header.pixelWidth > MaxDimension || + header.pixelHeight == 0 || header.pixelHeight > MaxDimension) { std::fprintf(stderr, "DecodeKtx2: dimensions out of range (%ux%u)\n", @@ -139,17 +191,16 @@ } const uint32_t levelCount = header.levelCount == 0 ? 1u : header.levelCount; - if (levelCount > kMaxLevels) + if (levelCount > MaxLevels) { std::fprintf(stderr, "DecodeKtx2: levelCount %u exceeds cap %u\n", levelCount, - kMaxLevels); + MaxLevels); return false; } - const size_t levelIndexOffset = - sizeof(kKtx2Identifier) + sizeof(Ktx2Header); + const size_t levelIndexOffset = sizeof(Ktx2Identifier) + sizeof(Ktx2Header); const size_t levelIndexBytes = static_cast<size_t>(levelCount) * sizeof(Ktx2LevelIndex); if (byteCount < levelIndexOffset + levelIndexBytes) @@ -169,7 +220,11 @@ const Ktx2LevelIndex& e = entries[i]; const uint32_t logW = std::max<uint32_t>(1u, header.pixelWidth >> i); const uint32_t logH = std::max<uint32_t>(1u, header.pixelHeight >> i); - const uint64_t expected = expectedBc7Bytes(logW, logH); + const uint64_t expected = expectedBlockBytes(logW, + logH, + blockWidth, + blockHeight, + bytesPerBlock); if (e.byteLength != expected) { std::fprintf( @@ -192,12 +247,14 @@ } // Concatenate level 0 .. N-1 into one contiguous buffer (largest first). - out.format = header.vkFormat == VK_FORMAT_BC7_SRGB_BLOCK - ? GPUTextureFormat::bc7 - : GPUTextureFormat::bc7; + out.format = outFormat; out.pixelWidth = header.pixelWidth; out.pixelHeight = header.pixelHeight; out.levelCount = levelCount; + out.blockWidth = blockWidth; + out.blockHeight = blockHeight; + out.srgb = srgb; + out.softwareDecoded = false; out.blocks.resize(static_cast<size_t>(totalBytes)); size_t writeOffset = 0; for (uint32_t i = 0; i < levelCount; ++i) @@ -209,6 +266,88 @@ writeOffset += static_cast<size_t>(e.byteLength); } + // HW-cap fallback: if the backend can't sample this format directly, + // software-decode mip 0 to RGBA8 in place. Caller uploads as rgba32. + bool needFallback = false; + switch (out.format) + { + case GPUTextureFormat::bc1: + case GPUTextureFormat::bc2: + case GPUTextureFormat::bc3: + case GPUTextureFormat::bc7: + needFallback = !hwSupport.bc; + break; + case GPUTextureFormat::astc: + needFallback = !hwSupport.astc; + break; + case GPUTextureFormat::etc2: + needFallback = !hwSupport.etc2; + break; + default: + break; + } + if (needFallback) + { + // Decode every level. Source layout: level 0 first, levels tight + // (matches how we just wrote `out.blocks`). Output layout: each + // level's logical width * height * 4 bytes, also tight. + // + // We allocate the decoded chain into a temp buffer first so the + // original block bytes remain valid for the per-level decode calls. + std::vector<uint8_t> decoded; + size_t totalRgba = 0; + for (uint32_t i = 0; i < levelCount; ++i) + { + + const uint32_t logW = std::max<uint32_t>(1u, out.pixelWidth >> i); + const uint32_t logH = std::max<uint32_t>(1u, out.pixelHeight >> i); + + totalRgba += static_cast<size_t>(logW) * logH * 4; + } + decoded.reserve(totalRgba); + + size_t srcOffset = 0; + for (uint32_t i = 0; i < levelCount; ++i) + { + + const uint32_t logW = std::max<uint32_t>(1u, out.pixelWidth >> i); + const uint32_t logH = std::max<uint32_t>(1u, out.pixelHeight >> i); + const size_t levelBytes = + static_cast<size_t>(entries[i].byteLength); + auto bmp = decode_texture(out.blocks.data() + srcOffset, + levelBytes, + logW, + logH, + out.format, + out.blockWidth, + out.blockHeight); + if (!bmp) + { + std::fprintf(stderr, + "DecodeKtx2: HW lacks support for format %u " + "and software decoder unavailable (level %u)\n", + static_cast<unsigned>(out.format), + i); + return false; + } + // Match the PNG runtime path: premultiplied texels. + bmp->pixelFormat(Bitmap::PixelFormat::RGBAPremul); + decoded.insert(decoded.end(), + bmp->bytes(), + bmp->bytes() + bmp->numBytes()); + srcOffset += levelBytes; + } + + out.blocks = std::move(decoded); + out.format = GPUTextureFormat::rgba32; + + out.blockWidth = 1; + out.blockHeight = 1; + out.srgb = false; + out.softwareDecoded = true; + // `out.levelCount` already matches the KTX2 level count. + } + return true; }
diff --git a/decoders/src/texture_decoder.cpp b/decoders/src/texture_decoder.cpp new file mode 100644 index 0000000..f6dcfe3 --- /dev/null +++ b/decoders/src/texture_decoder.cpp
@@ -0,0 +1,91 @@ +/* + * Copyright 2026 Rive + */ + +#include "rive/decoders/texture_decoder.hpp" + +#include <cstdio> + +#ifdef RIVE_ASTC_DECODER +std::unique_ptr<Bitmap> decode_astc_texture(const uint8_t* blocks, + size_t byteCount, + uint32_t width, + uint32_t height, + uint32_t blockWidth, + uint32_t blockHeight); +#endif + +#ifdef RIVE_BC_DECODER +std::unique_ptr<Bitmap> decode_bc_texture(const uint8_t* blocks, + size_t byteCount, + uint32_t width, + uint32_t height, + rive::GPUTextureFormat format); +#endif + +#ifdef RIVE_ETC_DECODER +std::unique_ptr<Bitmap> decode_etc_texture(const uint8_t* blocks, + size_t byteCount, + uint32_t width, + uint32_t height, + rive::GPUTextureFormat format); +#endif + +// Body branches reference these params via #ifdef; mark as maybe-unused so +// no-decoder-compiled-in builds don't warn. +std::unique_ptr<Bitmap> decode_texture([[maybe_unused]] const uint8_t* blocks, + [[maybe_unused]] size_t byteCount, + [[maybe_unused]] uint32_t width, + [[maybe_unused]] uint32_t height, + rive::GPUTextureFormat format, + [[maybe_unused]] uint32_t blockWidth, + [[maybe_unused]] uint32_t blockHeight) +{ + + switch (format) + { + case rive::GPUTextureFormat::astc: +#ifdef RIVE_ASTC_DECODER + return decode_astc_texture(blocks, + byteCount, + width, + height, + blockWidth, + blockHeight); +#else + fprintf(stderr, + "ASTC texture not supported " + "(build with --with_rive_astc_decoder)\n"); + return nullptr; +#endif + + case rive::GPUTextureFormat::bc1: + case rive::GPUTextureFormat::bc2: + case rive::GPUTextureFormat::bc3: + case rive::GPUTextureFormat::bc7: +#ifdef RIVE_BC_DECODER + return decode_bc_texture(blocks, byteCount, width, height, format); +#else + fprintf(stderr, + "BC texture not supported " + "(build with --with_rive_bc_decoder)\n"); + return nullptr; +#endif + + case rive::GPUTextureFormat::etc2: +#ifdef RIVE_ETC_DECODER + return decode_etc_texture(blocks, byteCount, width, height, format); +#else + fprintf(stderr, + "ETC texture not supported " + "(build with --with_rive_etc_decoder)\n"); + return nullptr; +#endif + + default: + fprintf(stderr, + "decode_texture - unsupported format %u\n", + static_cast<unsigned>(format)); + return nullptr; + } +}
diff --git a/include/rive/decoders/decode_ktx2.hpp b/include/rive/decoders/decode_ktx2.hpp index f33440d..61b2cc8 100644 --- a/include/rive/decoders/decode_ktx2.hpp +++ b/include/rive/decoders/decode_ktx2.hpp
@@ -14,17 +14,42 @@ namespace rive { +// HW support flags for the formats KTX2 may contain. When a format is not +// supported by the backend, DecodeKtx2 will software-decode mip 0 to RGBA8 +// (if the corresponding RIVE_*_DECODER family was compiled in) and store +// the result in `blocks` with `format = rgba32`. +// Defaults assume the caller's backend natively supports every GPU +// compressed format we recognise. Callers that have actually queried HW +// caps should set the relevant booleans to false to opt the parser into +// the software-decode fallback. Tests + parser-only consumers can use the +// defaults to keep the original "pass blocks through verbatim" behavior. +struct Ktx2HwSupport +{ + bool bc = true; + bool astc = true; + bool etc2 = true; +}; + // Result of parsing a KTX2 container. Block data is held in a contiguous // owned buffer, level 0 first (largest), then level 1, … level N-1 // (smallest). Each level's region is exactly its block-grid size in bytes // (no inter-level padding). +// +// If `softwareDecoded` is true, the GPU format in the container was not +// supported by the caller's backend and mip 0 was decoded to RGBA8. In that +// case `format == rgba32`, `levelCount == 1`, and `blocks` holds tightly +// packed RGBA8 pixels. struct Ktx2DecodeResult { GPUTextureFormat format; - uint32_t pixelWidth; // logical mip 0 width - uint32_t pixelHeight; // logical mip 0 height - uint32_t levelCount; // number of mip levels stored (>=1) + uint32_t pixelWidth; // logical mip 0 width + uint32_t pixelHeight; // logical mip 0 height + uint32_t levelCount; // number of mip levels stored (>=1) + uint8_t blockWidth = 4; // compressed block footprint width (1 for rgba32) + uint8_t blockHeight = 4; // compressed block footprint height (1 for rgba32) + bool srgb = false; // sRGB colour space (BC7_SRGB / ASTC_SRGB) std::vector<uint8_t> blocks; + bool softwareDecoded = false; }; // Parses a KTX2 container. Returns true on success and fills `out`. Returns @@ -35,7 +60,13 @@ // - cubemaps / array layers // - oversized dimensions or level count // - level data outside the buffer -bool DecodeKtx2(const uint8_t* bytes, size_t byteCount, Ktx2DecodeResult& out); +// +// `hwSupport` is consulted after parsing to decide whether to fall back to +// CPU decompression. Pass all-true to skip the fallback path entirely. +bool DecodeKtx2(const uint8_t* bytes, + size_t byteCount, + Ktx2DecodeResult& out, + const Ktx2HwSupport& hwSupport = {}); } // namespace rive
diff --git a/renderer/include/rive/renderer/d3d11/render_context_d3d_impl.hpp b/renderer/include/rive/renderer/d3d11/render_context_d3d_impl.hpp index d093690..21a74c1 100644 --- a/renderer/include/rive/renderer/d3d11/render_context_d3d_impl.hpp +++ b/renderer/include/rive/renderer/d3d11/render_context_d3d_impl.hpp
@@ -224,7 +224,11 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat, - const uint8_t imageDataRGBAPremul[]) override; + const uint8_t imageData[], + uint8_t blockWidth = 1, + uint8_t blockHeight = 1, + bool srgb = false, + bool generateRemainingMips = false) override; std::unique_ptr<BufferRing> makeUniformBufferRing( size_t capacityInBytes) override;
diff --git a/renderer/include/rive/renderer/d3d12/render_context_d3d12_impl.hpp b/renderer/include/rive/renderer/d3d12/render_context_d3d12_impl.hpp index a6816ac..4f3836e 100644 --- a/renderer/include/rive/renderer/d3d12/render_context_d3d12_impl.hpp +++ b/renderer/include/rive/renderer/d3d12/render_context_d3d12_impl.hpp
@@ -106,7 +106,11 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) override; + const uint8_t imageData[], + uint8_t blockWidth = 1, + uint8_t blockHeight = 1, + bool srgb = false, + bool generateRemainingMips = false) override; rcp<Texture> adoptImageTexture(rcp<D3D12Texture> imageTexture);
diff --git a/renderer/include/rive/renderer/gl/gles3.hpp b/renderer/include/rive/renderer/gl/gles3.hpp index 5d04526..af88b2e 100644 --- a/renderer/include/rive/renderer/gl/gles3.hpp +++ b/renderer/include/rive/renderer/gl/gles3.hpp
@@ -112,6 +112,25 @@ #endif // RIVE_WEBGL +// KHR_texture_compression_astc_ldr is core on GLES 3.2 but ships as an +// extension elsewhere. Some GL headers (e.g. unextended <GLES3/gl3.h>, and +// the Windows release-clang config) define only a subset of the footprint +// enums, so guard each symbol individually rather than via the extension +// macro. UNORM enums are contiguous from 0x93B0 in spec order. Only the +// footprints Rive currently uses are declared here. +#ifndef GL_COMPRESSED_RGBA_ASTC_4x4_KHR +#define GL_COMPRESSED_RGBA_ASTC_4x4_KHR 0x93B0 +#endif +#ifndef GL_COMPRESSED_RGBA_ASTC_6x6_KHR +#define GL_COMPRESSED_RGBA_ASTC_6x6_KHR 0x93B4 +#endif +#ifndef GL_COMPRESSED_RGBA_ASTC_8x8_KHR +#define GL_COMPRESSED_RGBA_ASTC_8x8_KHR 0x93B7 +#endif +#ifndef GL_COMPRESSED_RGBA_ASTC_12x12_KHR +#define GL_COMPRESSED_RGBA_ASTC_12x12_KHR 0x93BD +#endif + #if defined(RIVE_ANDROID) || defined(RIVE_WEBGL) // GLES 3.1 functionality is pulled in as an extension. Define these to avoid // compile errors, even if we won't use them.
diff --git a/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp b/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp index 2c2082e..4fdf5d0 100644 --- a/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp +++ b/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp
@@ -54,7 +54,11 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) override; + const uint8_t imageData[], + uint8_t blockWidth = 1, + uint8_t blockHeight = 1, + bool srgb = false, + bool generateRemainingMips = false) override; // Takes ownership of textureID and responsibility for deleting it. rcp<Texture> adoptImageTexture(uint32_t width,
diff --git a/renderer/include/rive/renderer/metal/render_context_metal_impl.h b/renderer/include/rive/renderer/metal/render_context_metal_impl.h index d21c86e..b78c662 100644 --- a/renderer/include/rive/renderer/metal/render_context_metal_impl.h +++ b/renderer/include/rive/renderer/metal/render_context_metal_impl.h
@@ -139,7 +139,11 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) override; + const uint8_t imageData[], + uint8_t blockWidth = 1, + uint8_t blockHeight = 1, + bool srgb = false, + bool generateRemainingMips = false) override; #ifdef RIVE_CANVAS rcp<RenderCanvas> makeRenderCanvas(uint32_t width,
diff --git a/renderer/include/rive/renderer/render_context_impl.hpp b/renderer/include/rive/renderer/render_context_impl.hpp index 875ec1b..79bf1bc 100644 --- a/renderer/include/rive/renderer/render_context_impl.hpp +++ b/renderer/include/rive/renderer/render_context_impl.hpp
@@ -52,12 +52,26 @@ // this is called in the case of the default Bitmap class being used to // decode images so that it can be converted into a backend specific image. + // For compressed `format`s, `blockWidth`/`blockHeight` give the format's + // block footprint (e.g. 4x4 for BC7 and ASTC 4x4) and `srgb` selects the + // sRGB variant of the format. For rgba32 these are ignored. + // + // `mipLevelCount` is the number of stored mip levels in `imageData`, + // packed largest-first with no inter-level padding. When + // `generateRemainingMips` is true (PNG/JPEG path), only mip 0 bytes are + // expected in `imageData` and the backend fills the remaining levels + // via GPU blits. When false (KTX2 path), the caller has supplied the + // full chain and the backend uploads it verbatim. virtual rcp<Texture> makeImageTexture( uint32_t width, uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) = 0; + const uint8_t imageData[], + uint8_t blockWidth = 1, + uint8_t blockHeight = 1, + bool srgb = false, + bool generateRemainingMips = false) = 0; #ifdef RIVE_CANVAS // Creates a RenderCanvas: a GPU texture usable as both a render target
diff --git a/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp b/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp index 3abbe7f..fc239f1 100644 --- a/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp +++ b/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp
@@ -84,7 +84,11 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) override; + const uint8_t imageData[], + uint8_t blockWidth = 1, + uint8_t blockHeight = 1, + bool srgb = false, + bool generateRemainingMips = false) override; #ifdef RIVE_CANVAS rcp<RenderCanvas> makeRenderCanvas(uint32_t width,
diff --git a/renderer/include/rive/renderer/vulkan/vkutil.hpp b/renderer/include/rive/renderer/vulkan/vkutil.hpp index 8f021c6..c8a617f 100644 --- a/renderer/include/rive/renderer/vulkan/vkutil.hpp +++ b/renderer/include/rive/renderer/vulkan/vkutil.hpp
@@ -229,10 +229,21 @@ void* nativeHandle() const override { return (void*)vkImage(); } // Deferred mechanism for uploading image data without a command buffer. + // + // Single-region upload: one VkBufferImageCopy covering mip 0 in full. + // If the texture has more than one mip level, generateMipmaps() is + // called on apply (suitable for the PNG/JPEG path). void scheduleUpload(const void* imageDataRGBAPremul, size_t imageDataSizeInBytes); void scheduleUpload(rcp<vkutil::Buffer> imageBufferRGBAPremul); + // Multi-region upload: caller hands over a staging buffer and the full + // list of VkBufferImageCopy regions (typically one per mip level). + // No automatic mipmap generation — the caller is responsible for + // supplying every level that exists in the texture. + void scheduleUpload(rcp<vkutil::Buffer> stagingBuffer, + std::vector<VkBufferImageCopy> regions); + void barrier(VkCommandBuffer, const ImageAccess& dstAccess, ImageAccessAction = ImageAccessAction::preserveContents, @@ -315,6 +326,8 @@ ImageAccess m_lastAccess; rcp<vkutil::Buffer> m_imageUploadBuffer; + // When non-empty, overrides the default single-region/auto-mip path. + std::vector<VkBufferImageCopy> m_imageUploadRegions; // Simple mechanism for caching and reusing a descriptor set for this // texture within a frame.
diff --git a/renderer/include/rive/renderer/webgpu/render_context_webgpu_impl.hpp b/renderer/include/rive/renderer/webgpu/render_context_webgpu_impl.hpp index a294eea..091dc14 100644 --- a/renderer/include/rive/renderer/webgpu/render_context_webgpu_impl.hpp +++ b/renderer/include/rive/renderer/webgpu/render_context_webgpu_impl.hpp
@@ -90,7 +90,11 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) override; + const uint8_t imageData[], + uint8_t blockWidth = 1, + uint8_t blockHeight = 1, + bool srgb = false, + bool generateRemainingMips = false) override; #ifdef RIVE_CANVAS rcp<RenderCanvas> makeRenderCanvas(uint32_t width,
diff --git a/renderer/premake5_pls_renderer.lua b/renderer/premake5_pls_renderer.lua index 43c73dc..cd3a655 100644 --- a/renderer/premake5_pls_renderer.lua +++ b/renderer/premake5_pls_renderer.lua
@@ -418,9 +418,16 @@ files({ 'src/metal/metal_nop.cpp' }) end + -- decoders/include must be on the include path unconditionally — + -- renderer sources reference rive/decoders/astc_footprints.hpp even on + -- --no-rive-decoders builds. The header is pure inline (no link dep) + -- so exposing it costs nothing. Reset filter so this applies + -- project-wide, not just under the previous `nop-obj-c` filter. + filter({}) + includedirs({ '../decoders/include' }) + filter({ 'options:not no-rive-decoders' }) do - includedirs({ '../decoders/include' }) defines({ 'RIVE_DECODERS' }) end @@ -435,6 +442,24 @@ defines({ 'RIVE_KTX2' }) end + -- Mirror per-family decoder flags into the renderer so the + -- `#ifdef RIVE_*_DECODER` test-path branches in render_context.cpp + -- compile when the decoder lib was built with these flags. + filter({ 'options:with_rive_bc_decoder' }) + do + defines({ 'RIVE_BC_DECODER' }) + end + + filter({ 'options:with_rive_astc_decoder' }) + do + defines({ 'RIVE_ASTC_DECODER' }) + end + + filter({ 'options:with_rive_etc_decoder' }) + do + defines({ 'RIVE_ETC_DECODER' }) + end + filter('system:windows') do architecture('x64')
diff --git a/renderer/src/d3d11/render_context_d3d_impl.cpp b/renderer/src/d3d11/render_context_d3d_impl.cpp index c58e4d9..fb059bd 100644 --- a/renderer/src/d3d11/render_context_d3d_impl.cpp +++ b/renderer/src/d3d11/render_context_d3d_impl.cpp
@@ -909,7 +909,8 @@ UINT height, UINT mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) : + const uint8_t imageDataRGBAPremul[], + bool generateRemainingMips) : Texture(width, height) { if (format == GPUTextureFormat::bc7) @@ -955,29 +956,48 @@ } else if (format == GPUTextureFormat::rgba32) { + // GENERATE_MIPS flag + RTV binding are only needed when the + // GPU is going to fill in the chain. For the KTX2-supplied + // chain (caller-provided mips) it's pure overhead. + const UINT miscFlags = + generateRemainingMips ? D3D11_RESOURCE_MISC_GENERATE_MIPS : 0u; + const UINT bindFlags = + generateRemainingMips + ? (D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) + : D3D11_BIND_SHADER_RESOURCE; m_texture = renderContextImpl->makeSimple2DTexture( DXGI_FORMAT_R8G8B8A8_UNORM, width, height, mipLevelCount, - D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, - D3D11_RESOURCE_MISC_GENERATE_MIPS); + bindFlags, + miscFlags); - // Specify the top-level image in the mipmap chain. - D3D11_BOX box; - box.left = 0; - box.right = width; - box.top = 0; - box.bottom = height; - box.front = 0; - box.back = 1; - renderContextImpl->gpuContext()->UpdateSubresource( - m_texture.Get(), - 0, - &box, - imageDataRGBAPremul, - width * 4, - 0); + const uint8_t* src = imageDataRGBAPremul; + const UINT levelsToUpload = + generateRemainingMips ? 1u : mipLevelCount; + UINT W = width; + UINT H = height; + for (UINT i = 0; i < levelsToUpload; ++i) + { + D3D11_BOX box; + box.left = 0; + box.right = W; + box.top = 0; + box.bottom = H; + box.front = 0; + box.back = 1; + renderContextImpl->gpuContext()->UpdateSubresource( + m_texture.Get(), + i, + &box, + src, + W * 4, + 0); + src += static_cast<size_t>(W) * H * 4; + W = std::max<UINT>(1u, W >> 1); + H = std::max<UINT>(1u, H >> 1); + } } else { @@ -990,8 +1010,11 @@ NULL, m_srv.ReleaseAndGetAddressOf())); - if (format == GPUTextureFormat::rgba32) + if (format == GPUTextureFormat::rgba32 && generateRemainingMips && + mipLevelCount > 1) + { renderContextImpl->gpuContext()->GenerateMips(m_srv.Get()); + } } ID3D11ShaderResourceView* srv() const { return m_srv.Get(); } @@ -1014,14 +1037,19 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) + const uint8_t imageDataRGBAPremul[], + uint8_t /*blockWidth*/, + uint8_t /*blockHeight*/, + bool /*srgb*/, + bool generateRemainingMips) { return make_rcp<TextureD3DImpl>(this, width, height, mipLevelCount, format, - imageDataRGBAPremul); + imageDataRGBAPremul, + generateRemainingMips); } rcp<Texture> RenderContextD3DImpl::adoptImageTexture(
diff --git a/renderer/src/d3d12/render_context_d3d12_impl.cpp b/renderer/src/d3d12/render_context_d3d12_impl.cpp index 2657153..d9b7905 100644 --- a/renderer/src/d3d12/render_context_d3d12_impl.cpp +++ b/renderer/src/d3d12/render_context_d3d12_impl.cpp
@@ -15,6 +15,7 @@ #include "shaders/d3d/root.sig" #include <sstream> +#include <vector> #include <D3DCompiler.h> // this is defined here instead of root_sig becaise the gpu does not care about @@ -115,26 +116,30 @@ { DXGI_FORMAT d3dFormat = d3d_utils::convert_format(format); - m_gpuTexture = manager->make2DTexture( - width, - height, - mipLevel, - d3dFormat, - D3D12_RESOURCE_FLAG_NONE, - usesCommandList ? D3D12_RESOURCE_STATE_COMMON - : D3D12_RESOURCE_STATE_COPY_DEST); + // Always create in COMMON. Both upload paths drive the texture + // through the copy command list which uses enhanced barriers, and + // enhanced barriers require COMMON layout (LEGACY_COPY_DEST is + // rejected as INCOMPATIBLE_BARRIER_LAYOUT). The copy itself + // promotes COMMON→COPY_DEST implicitly. + std::ignore = usesCommandList; + m_gpuTexture = manager->make2DTexture(width, + height, + mipLevel, + d3dFormat, + D3D12_RESOURCE_FLAG_NONE, + D3D12_RESOURCE_STATE_COMMON); if (format == GPUTextureFormat::bc7) { - // imageData contains already compressed data, so we can directly - // upload it to the GPU All mip levels are in this sequentially + // imageData contains pre-compressed BC7 blocks, level 0 first, + // levels packed tight (no inter-level padding). Copy each level + // into its placed-subresource slot in the upload buffer. auto desc = m_gpuTexture->resource()->GetDesc(); - UINT numRows = 0; - UINT64 rowSizeInBytes = 0; - UINT64 totalBytes = 0; - m_subresourceFootprints.resize(mipLevel); + std::vector<UINT> numRows(mipLevel); + std::vector<UINT64> rowSizeInBytes(mipLevel); + UINT64 totalBytes = 0; manager->device()->GetCopyableFootprints( &desc, @@ -142,8 +147,8 @@ mipLevel, // Number of mips 0, // Base offset m_subresourceFootprints.data(), // One footprint per mip - &numRows, - &rowSizeInBytes, + numRows.data(), + rowSizeInBytes.data(), &totalBytes); m_uploadBuffer = manager->makeUploadBuffer( @@ -156,12 +161,20 @@ for (UINT mip = 0; mip < mipLevel; ++mip) { const auto& fp = m_subresourceFootprints[mip].Footprint; - - UINT64 mipSize = fp.RowPitch * fp.Height; - - memcpy(dst + m_subresourceFootprints[mip].Offset, src, mipSize); - - src += mipSize; // advance to next mip in your BC7 blob + // RowPitch is padded to D3D12_TEXTURE_DATA_PITCH_ALIGNMENT + // (256). Source rows are tight (rowSizeInBytes). Copy one + // block-row at a time so we don't overflow the upload slot + // and don't read past the source mip. + uint8_t* dstMip = dst + m_subresourceFootprints[mip].Offset; + const UINT64 srcRowBytes = rowSizeInBytes[mip]; + const UINT rows = numRows[mip]; + for (UINT row = 0; row < rows; ++row) + { + memcpy(dstMip + row * fp.RowPitch, + src + row * srcRowBytes, + srcRowBytes); + } + src += srcRowBytes * rows; } } else if (format == GPUTextureFormat::rgba32) @@ -769,7 +782,11 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) + const uint8_t imageDataRGBAPremul[], + uint8_t /*blockWidth*/, + uint8_t /*blockHeight*/, + bool /*srgb*/, + bool /*generateRemainingMips*/) { return make_rcp<TextureD3D12Impl>(m_resourceManager.get(), width,
diff --git a/renderer/src/gl/render_context_gl_impl.cpp b/renderer/src/gl/render_context_gl_impl.cpp index 7ae754c..eb17fc3 100644 --- a/renderer/src/gl/render_context_gl_impl.cpp +++ b/renderer/src/gl/render_context_gl_impl.cpp
@@ -4,6 +4,8 @@ #include "rive/renderer/gl/render_context_gl_impl.hpp" +#include "rive/decoders/astc_footprints.hpp" + #include "rive/renderer/gl/render_buffer_gl_impl.hpp" #include "rive/renderer/gl/render_target_gl.hpp" #include "rive/renderer/draw.hpp" @@ -702,36 +704,116 @@ }; #endif // RIVE_CANVAS -rcp<Texture> RenderContextGLImpl::makeImageTexture( - uint32_t width, - uint32_t height, - uint32_t mipLevelCount, - GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) +rcp<Texture> RenderContextGLImpl::makeImageTexture(uint32_t width, + uint32_t height, + uint32_t mipLevelCount, + GPUTextureFormat format, + const uint8_t imageData[], + uint8_t blockWidth, + uint8_t blockHeight, + [[maybe_unused]] bool srgb, + bool generateRemainingMips) { - if (format != GPUTextureFormat::rgba32) + // Pick UNORM internal format. Sampler path treats texels as sRGB- + // encoded bytes (matching the GL_RGBA8 PNG upload). + GLenum sizedInternal; + bool isCompressed = false; + + uint32_t bytesPerBlock = 16; + switch (format) { - assert(!"unsupported format"); - return nullptr; + case GPUTextureFormat::rgba32: + sizedInternal = GL_RGBA8; + assert(blockWidth == 1 && blockHeight == 1); + bytesPerBlock = 4; + break; + case GPUTextureFormat::bc7: + sizedInternal = 0x8E8C; // GL_COMPRESSED_RGBA_BPTC_UNORM + isCompressed = true; + break; + case GPUTextureFormat::etc2: + sizedInternal = 0x9278; // GL_COMPRESSED_RGBA8_ETC2_EAC + isCompressed = true; + break; + case GPUTextureFormat::astc: + { + + const int idx = rive::astcFootprintIndex(blockWidth, blockHeight); + if (idx < 0) + { + assert(!"unsupported ASTC block footprint"); + return nullptr; + } + + // KHR_texture_compression_astc_ldr lays the per-footprint enums + // out contiguously starting at GL_COMPRESSED_RGBA_ASTC_4x4_KHR, in + // the same canonical order as astcFootprintIndex(). + sizedInternal = + static_cast<GLenum>(GL_COMPRESSED_RGBA_ASTC_4x4_KHR + idx); + isCompressed = true; + break; + } + default: + assert(!"unsupported format"); + return nullptr; } + assert(!(generateRemainingMips && isCompressed) && + "glGenerateMipmap is undefined on compressed textures"); GLuint textureID; glGenTextures(1, &textureID); glActiveTexture(GL_TEXTURE0 + IMAGE_TEXTURE_IDX); glBindTexture(GL_TEXTURE_2D, textureID); - glTexStorage2D(GL_TEXTURE_2D, mipLevelCount, GL_RGBA8, width, height); - if (imageDataRGBAPremul != nullptr) + glTexStorage2D(GL_TEXTURE_2D, + static_cast<GLsizei>(mipLevelCount), + sizedInternal, + width, + height); + if (imageData != nullptr) { - glTexSubImage2D(GL_TEXTURE_2D, - 0, - 0, - 0, - width, - height, - GL_RGBA, - GL_UNSIGNED_BYTE, - imageDataRGBAPremul); - glGenerateMipmap(GL_TEXTURE_2D); + // When the caller wants the GPU to auto-fill mips 1..N from mip 0 + // (PNG path), only upload level 0 and finish via glGenerateMipmap. + const uint32_t levelsToUpload = + generateRemainingMips ? 1u : mipLevelCount; + size_t srcOffset = 0; + for (uint32_t i = 0; i < levelsToUpload; ++i) + { + const uint32_t logW = std::max<uint32_t>(1u, width >> i); + const uint32_t logH = std::max<uint32_t>(1u, height >> i); + const uint32_t blocksX = (logW + blockWidth - 1) / blockWidth; + const uint32_t blocksY = (logH + blockHeight - 1) / blockHeight; + const size_t levelBytes = + static_cast<size_t>(blocksX) * blocksY * bytesPerBlock; + if (isCompressed) + { + glCompressedTexSubImage2D(GL_TEXTURE_2D, + static_cast<GLint>(i), + 0, + 0, + logW, + logH, + sizedInternal, + static_cast<GLsizei>(levelBytes), + imageData + srcOffset); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, + static_cast<GLint>(i), + 0, + 0, + logW, + logH, + GL_RGBA, + GL_UNSIGNED_BYTE, + imageData + srcOffset); + } + srcOffset += levelBytes; + } + if (generateRemainingMips && mipLevelCount > 1) + { + glGenerateMipmap(GL_TEXTURE_2D); + } } return adoptImageTexture(width, height, textureID); }
diff --git a/renderer/src/metal/render_context_metal_impl.mm b/renderer/src/metal/render_context_metal_impl.mm index 0905172..43bb392 100644 --- a/renderer/src/metal/render_context_metal_impl.mm +++ b/renderer/src/metal/render_context_metal_impl.mm
@@ -4,6 +4,8 @@ #include "rive/renderer/metal/render_context_metal_impl.h" +#include "rive/decoders/astc_footprints.hpp" + #include "background_shader_compiler.h" #include "rive/renderer/buffer_ring.hpp" #ifdef RIVE_CANVAS @@ -829,12 +831,18 @@ uint32_t width, uint32_t height, uint32_t mipLevelCount, - const uint8_t imageDataRGBAPremul[]) : - Texture(width, height) + const uint8_t imageData[], + MTLPixelFormat pixelFormat = MTLPixelFormatRGBA8Unorm, + uint8_t blockWidth = 1, + uint8_t blockHeight = 1, + uint32_t bytesPerBlock = 4, + bool generateRemainingMips = false) : + Texture(width, height), + m_mipsDirty(generateRemainingMips && mipLevelCount > 1) { // Create the texture. MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init]; - desc.pixelFormat = MTLPixelFormatRGBA8Unorm; + desc.pixelFormat = pixelFormat; desc.width = width; desc.height = height; desc.mipmapLevelCount = mipLevelCount; @@ -842,12 +850,29 @@ desc.textureType = MTLTextureType2D; m_texture = [gpu newTextureWithDescriptor:desc]; - // Specify the top-level image in the mipmap chain. - MTLRegion region = MTLRegionMake2D(0, 0, width, height); - [m_texture replaceRegion:region - mipmapLevel:0 - withBytes:imageDataRGBAPremul - bytesPerRow:width * 4]; + // Upload mip 0 only when the caller asks for auto-mipgen + // (generateRemainingMips=true). Otherwise upload every level the + // texture was created with from the caller-supplied tight blob. + const uint32_t levelsToUpload = + generateRemainingMips ? 1u : mipLevelCount; + const uint8_t* src = imageData; + for (uint32_t i = 0; i < levelsToUpload; ++i) + { + const uint32_t logW = std::max<uint32_t>(1u, width >> i); + const uint32_t logH = std::max<uint32_t>(1u, height >> i); + const uint32_t blocksX = (logW + blockWidth - 1) / blockWidth; + const uint32_t blocksY = (logH + blockHeight - 1) / blockHeight; + const NSUInteger bytesPerRow = + static_cast<NSUInteger>(blocksX) * bytesPerBlock; + const size_t levelBytes = + static_cast<size_t>(bytesPerRow) * blocksY; + MTLRegion region = MTLRegionMake2D(0, 0, logW, logH); + [m_texture replaceRegion:region + mipmapLevel:i + withBytes:src + bytesPerRow:bytesPerRow]; + src += levelBytes; + } } void ensureMipmaps(id<MTLCommandBuffer> commandBuffer) const @@ -881,16 +906,67 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) + const uint8_t imageData[], + uint8_t blockWidth, + uint8_t blockHeight, + [[maybe_unused]] bool srgb, + bool generateRemainingMips) { - if (format != GPUTextureFormat::rgba32) - { - assert(!"unsupported format"); - return nullptr; - } + MTLPixelFormat pixelFormat = MTLPixelFormatRGBA8Unorm; + uint32_t bytesPerBlock = 4; + bool isCompressed = false; - return make_rcp<TextureMetalImpl>( - m_gpu, width, height, mipLevelCount, imageDataRGBAPremul); + switch (format) + { + case GPUTextureFormat::rgba32: + assert(blockWidth == 1 && blockHeight == 1); + break; +#if !TARGET_OS_IPHONE + case GPUTextureFormat::bc7: + pixelFormat = MTLPixelFormatBC7_RGBAUnorm; + bytesPerBlock = 16; + isCompressed = true; + break; +#endif + case GPUTextureFormat::astc: + { + // MTLPixelFormat ASTC LDR enums are sequential in Vulkan/GL + // footprint order, starting at MTLPixelFormatASTC_4x4_LDR. + const int idx = rive::astcFootprintIndex(blockWidth, blockHeight); + if (idx < 0) + { + assert(!"unsupported ASTC block footprint"); + return nullptr; + } + pixelFormat = + static_cast<MTLPixelFormat>(MTLPixelFormatASTC_4x4_LDR + idx); + bytesPerBlock = 16; + isCompressed = true; + break; + } + case GPUTextureFormat::etc2: + // ETC2 RGBA8: 8 bytes EAC alpha + 8 bytes ETC2 RGB = 16/block. + pixelFormat = MTLPixelFormatEAC_RGBA8; + bytesPerBlock = 16; + isCompressed = true; + break; + default: + assert(!"unsupported format"); + return nullptr; + } + assert(!(generateRemainingMips && isCompressed) && + "generateMipmapsForTexture is undefined on compressed formats"); + + return make_rcp<TextureMetalImpl>(m_gpu, + width, + height, + mipLevelCount, + imageData, + pixelFormat, + blockWidth, + blockHeight, + bytesPerBlock, + generateRemainingMips); } #ifdef RIVE_CANVAS
diff --git a/renderer/src/ore/gl/ore_context_gl.cpp b/renderer/src/ore/gl/ore_context_gl.cpp index 3f63b3e..454467e 100644 --- a/renderer/src/ore/gl/ore_context_gl.cpp +++ b/renderer/src/ore/gl/ore_context_gl.cpp
@@ -90,19 +90,12 @@ case TextureFormat::bc7unorm: RIVE_UNREACHABLE(); #endif -#ifdef GL_COMPRESSED_RGBA_ASTC_4x4_KHR case TextureFormat::astc4x4: return GL_COMPRESSED_RGBA_ASTC_4x4_KHR; case TextureFormat::astc6x6: return GL_COMPRESSED_RGBA_ASTC_6x6_KHR; case TextureFormat::astc8x8: return GL_COMPRESSED_RGBA_ASTC_8x8_KHR; -#else - case TextureFormat::astc4x4: - case TextureFormat::astc6x6: - case TextureFormat::astc8x8: - RIVE_UNREACHABLE(); -#endif } RIVE_UNREACHABLE(); }
diff --git a/renderer/src/ore/gl/ore_texture_gl.cpp b/renderer/src/ore/gl/ore_texture_gl.cpp index 3487cc2..7bc0cf9 100644 --- a/renderer/src/ore/gl/ore_texture_gl.cpp +++ b/renderer/src/ore/gl/ore_texture_gl.cpp
@@ -71,19 +71,12 @@ return GL_COMPRESSED_RGB8_ETC2; case TextureFormat::etc2rgba8: return GL_COMPRESSED_RGBA8_ETC2_EAC; -#ifdef GL_COMPRESSED_RGBA_ASTC_4x4_KHR case TextureFormat::astc4x4: return GL_COMPRESSED_RGBA_ASTC_4x4_KHR; case TextureFormat::astc6x6: return GL_COMPRESSED_RGBA_ASTC_6x6_KHR; case TextureFormat::astc8x8: return GL_COMPRESSED_RGBA_ASTC_8x8_KHR; -#else - case TextureFormat::astc4x4: - case TextureFormat::astc6x6: - case TextureFormat::astc8x8: - RIVE_UNREACHABLE(); -#endif } RIVE_UNREACHABLE(); }
diff --git a/renderer/src/render_context.cpp b/renderer/src/render_context.cpp index 3edd95f..7f4d14a 100644 --- a/renderer/src/render_context.cpp +++ b/renderer/src/render_context.cpp
@@ -173,14 +173,28 @@ encodedBytes[0] == 0xAB && encodedBytes[1] == 0x4B && encodedBytes[2] == 0x54 && encodedBytes[3] == 0x58) { + const Ktx2HwSupport hwSupport = { + platformFeatures().supportsTextureCompressionBC, + platformFeatures().supportsTextureCompressionASTC, + platformFeatures().supportsTextureCompressionETC2, + }; Ktx2DecodeResult ktx2; - if (DecodeKtx2(encodedBytes.data(), encodedBytes.size(), ktx2)) + if (DecodeKtx2(encodedBytes.data(), + encodedBytes.size(), + ktx2, + hwSupport)) { + // KTX2 provides the full level chain (or just level 0). The + // backends never auto-generate; whatever the file ships with is + // exactly what gets uploaded. texture = m_impl->makeImageTexture(ktx2.pixelWidth, ktx2.pixelHeight, ktx2.levelCount, ktx2.format, - ktx2.blocks.data()); + ktx2.blocks.data(), + ktx2.blockWidth, + ktx2.blockHeight, + ktx2.srgb); } } #endif @@ -203,7 +217,11 @@ height, mipLevelCount, GPUTextureFormat::rgba32, - bitmap->bytes()); + bitmap->bytes(), + /*blockWidth=*/1, + /*blockHeight=*/1, + /*srgb=*/false, + /*generateRemainingMips=*/true); } } #endif
diff --git a/renderer/src/vulkan/render_context_vulkan_impl.cpp b/renderer/src/vulkan/render_context_vulkan_impl.cpp index 3e6773a..0c821db 100644 --- a/renderer/src/vulkan/render_context_vulkan_impl.cpp +++ b/renderer/src/vulkan/render_context_vulkan_impl.cpp
@@ -4,6 +4,8 @@ #include "rive/renderer/vulkan/render_context_vulkan_impl.hpp" +#include "rive/decoders/astc_footprints.hpp" + #include "vulkan_shaders.hpp" #ifdef RIVE_CANVAS #include "rive/renderer/render_canvas.hpp" @@ -92,22 +94,112 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) + const uint8_t imageData[], + uint8_t blockWidth, + uint8_t blockHeight, + [[maybe_unused]] bool srgb, + bool generateRemainingMips) { - if (format != GPUTextureFormat::rgba32) - { - assert(!"unsupported format"); - return nullptr; - } + // Sampler path treats texels as sRGB-encoded bytes (matches PNG path's + // GL_RGBA8 / VK_FORMAT_R8G8B8A8_UNORM upload). Don't pick the GPU sRGB + // view here — would auto-linearise on sample and double-darken. + VkFormat vkFormat; + uint32_t bytesPerBlock = 16; + [[maybe_unused]] bool isCompressed = false; - auto texture = m_vk->makeTexture2D( + switch (format) + { + case GPUTextureFormat::rgba32: + vkFormat = VK_FORMAT_R8G8B8A8_UNORM; + assert(blockWidth == 1 && blockHeight == 1); + bytesPerBlock = 4; + break; + case GPUTextureFormat::bc7: + vkFormat = VK_FORMAT_BC7_UNORM_BLOCK; + isCompressed = true; + break; + case GPUTextureFormat::etc2: + // ETC2 RGBA8: 8 bytes EAC alpha + 8 bytes ETC2 RGB = 16/block. + vkFormat = VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK; + isCompressed = true; + break; + case GPUTextureFormat::astc: { - .format = VK_FORMAT_R8G8B8A8_UNORM, - .extent = {width, height}, - .mipLevels = mipLevelCount, - }, - "RenderContext imageTexture"); - texture->scheduleUpload(imageDataRGBAPremul, height * width * 4); + const int idx = rive::astcFootprintIndex(blockWidth, blockHeight); + if (idx < 0) + { + assert(!"unsupported ASTC block footprint"); + return nullptr; + } + + vkFormat = + static_cast<VkFormat>(VK_FORMAT_ASTC_4x4_UNORM_BLOCK + 2 * idx); + isCompressed = true; + break; + } + default: + assert(!"unsupported format"); + return nullptr; + } + assert(!(generateRemainingMips && isCompressed) && + "vkCmdBlitImage mipgen is undefined on compressed formats"); + + auto texture = m_vk->makeTexture2D({.format = vkFormat, + .extent = {width, height}, + .mipLevels = mipLevelCount}, + "RenderContext imageTexture"); + + if (imageData == nullptr) + { + return texture; + } + assert(!(generateRemainingMips && isCompressed) && + "vkCmdBlitImage mipgen is undefined on compressed formats"); + + if (generateRemainingMips) + { + // Upload mip 0 only; vkutil's single-region scheduleUpload calls + // generateMipmaps to fill the rest. + const size_t mip0Bytes = + static_cast<size_t>(width) * height * bytesPerBlock; + texture->scheduleUpload(imageData, mip0Bytes); + return texture; + } + assert(!(generateRemainingMips && isCompressed) && + "vkCmdBlitImage mipgen is undefined on compressed formats"); + + // Multi-mip: pre-compute per-level regions in the source blob. + std::vector<VkBufferImageCopy> regions; + regions.reserve(mipLevelCount); + size_t srcOffset = 0; + for (uint32_t i = 0; i < mipLevelCount; ++i) + { + const uint32_t logW = std::max<uint32_t>(1u, width >> i); + const uint32_t logH = std::max<uint32_t>(1u, height >> i); + const uint32_t blocksX = (logW + blockWidth - 1) / blockWidth; + const uint32_t blocksY = (logH + blockHeight - 1) / blockHeight; + const size_t levelBytes = + static_cast<size_t>(blocksX) * blocksY * bytesPerBlock; + regions.push_back({.bufferOffset = static_cast<VkDeviceSize>(srcOffset), + .imageSubresource = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = i, + .layerCount = 1, + }, + .imageExtent = {logW, logH, 1}}); + srcOffset += levelBytes; + } + assert(!(generateRemainingMips && isCompressed) && + "vkCmdBlitImage mipgen is undefined on compressed formats"); + + // Stage all levels into one buffer, then hand the region list over. + rcp<vkutil::Buffer> staging = m_vk->makeBuffer( + {.size = srcOffset, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT}, + vkutil::Mappability::writeOnly); + std::memcpy(staging->contents(), imageData, srcOffset); + staging->flushContents(); + texture->scheduleUpload(std::move(staging), std::move(regions)); return texture; }
diff --git a/renderer/src/vulkan/vkutil.cpp b/renderer/src/vulkan/vkutil.cpp index b88ab70..f960eb2 100644 --- a/renderer/src/vulkan/vkutil.cpp +++ b/renderer/src/vulkan/vkutil.cpp
@@ -346,21 +346,20 @@ void Texture2D::scheduleUpload(rcp<vkutil::Buffer> imageBufferRGBAPremul) { m_imageUploadBuffer = std::move(imageBufferRGBAPremul); + m_imageUploadRegions.clear(); +} + +void Texture2D::scheduleUpload(rcp<vkutil::Buffer> stagingBuffer, + std::vector<VkBufferImageCopy> regions) +{ + m_imageUploadBuffer = std::move(stagingBuffer); + m_imageUploadRegions = std::move(regions); } void Texture2D::applyImageUploadBuffer(VkCommandBuffer commandBuffer) { assert(m_imageUploadBuffer != nullptr); - VkBufferImageCopy bufferImageCopy = { - .imageSubresource = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .layerCount = 1, - }, - .imageExtent = {width(), height(), 1}, - }; - barrier(commandBuffer, { .pipelineStages = VK_PIPELINE_STAGE_TRANSFER_BIT, @@ -369,19 +368,53 @@ }, vkutil::ImageAccessAction::invalidateContents); - m_image->vk()->CmdCopyBufferToImage(commandBuffer, - *m_imageUploadBuffer, - *m_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - 1, - &bufferImageCopy); - - generateMipmaps(commandBuffer, - { - .pipelineStages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - .accessMask = VK_ACCESS_SHADER_READ_BIT, - .layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - }); + if (!m_imageUploadRegions.empty()) + { + // Caller-supplied per-level regions. No automatic mip generation — + // every level present in the texture must have a region. + m_image->vk()->CmdCopyBufferToImage( + commandBuffer, + *m_imageUploadBuffer, + *m_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + static_cast<uint32_t>(m_imageUploadRegions.size()), + m_imageUploadRegions.data()); + // All mips already written — transition straight to shader-read. + barrier(commandBuffer, + { + .pipelineStages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + .accessMask = VK_ACCESS_SHADER_READ_BIT, + .layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + }); + m_imageUploadRegions.clear(); + } + else + { + // Single-region upload (mip 0 full extent). Caller relies on + // generateMipmaps to fill remaining levels. + VkBufferImageCopy bufferImageCopy = { + .imageSubresource = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = 1, + }, + .imageExtent = {width(), height(), 1}, + }; + m_image->vk()->CmdCopyBufferToImage( + commandBuffer, + *m_imageUploadBuffer, + *m_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, + &bufferImageCopy); + generateMipmaps( + commandBuffer, + { + .pipelineStages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + .accessMask = VK_ACCESS_SHADER_READ_BIT, + .layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + }); + } m_imageUploadBuffer = nullptr; }
diff --git a/renderer/src/webgpu/render_context_webgpu_impl.cpp b/renderer/src/webgpu/render_context_webgpu_impl.cpp index 57a6031..f13a81d 100644 --- a/renderer/src/webgpu/render_context_webgpu_impl.cpp +++ b/renderer/src/webgpu/render_context_webgpu_impl.cpp
@@ -4,6 +4,8 @@ #include "rive/renderer/webgpu/render_context_webgpu_impl.hpp" +#include "rive/decoders/astc_footprints.hpp" + #include "rive/renderer/draw.hpp" #ifdef RIVE_CANVAS #include "rive/renderer/render_canvas.hpp" @@ -2230,22 +2232,69 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBAPremul[]) + const uint8_t imageData[], + uint8_t blockWidth, + uint8_t blockHeight, + bool /*srgb*/, + bool generateRemainingMips) { - if (format != GPUTextureFormat::rgba32) + wgpu::TextureFormat wgpuFormat = wgpu::TextureFormat::RGBA8Unorm; + uint32_t bytesPerBlock = 4; + + bool isCompressed = false; + + switch (format) { - assert(!"unsupported format"); - return nullptr; + case GPUTextureFormat::rgba32: + assert(blockWidth == 1 && blockHeight == 1); + break; + case GPUTextureFormat::bc7: + wgpuFormat = wgpu::TextureFormat::BC7RGBAUnorm; + bytesPerBlock = 16; + isCompressed = true; + break; + case GPUTextureFormat::astc: + { + // wgpu ASTC enums are sequential in spec footprint order + // starting at ASTC4x4Unorm. SRGB variant lives one entry later. + const int idx = rive::astcFootprintIndex(blockWidth, blockHeight); + if (idx < 0) + { + assert(!"unsupported ASTC block footprint"); + return nullptr; + } + wgpuFormat = static_cast<wgpu::TextureFormat>( + static_cast<uint32_t>(wgpu::TextureFormat::ASTC4x4Unorm) + + 2 * idx); + + bytesPerBlock = 16; + isCompressed = true; + break; + } + case GPUTextureFormat::etc2: + + wgpuFormat = wgpu::TextureFormat::ETC2RGBA8Unorm; + bytesPerBlock = 16; + + break; + default: + assert(!"unsupported format"); + return nullptr; } + + assert(!(generateRemainingMips && isCompressed) && + "WebGPU mip generation is undefined on compressed formats"); + wgpu::TextureDescriptor textureDesc = { .usage = wgpu::TextureUsage::TextureBinding | wgpu::TextureUsage::CopyDst, .dimension = wgpu::TextureDimension::e2D, .size = {width, height}, - .format = wgpu::TextureFormat::RGBA8Unorm, + .format = wgpuFormat, + .mipLevelCount = mipLevelCount, }; - if (mipLevelCount > 1) + if (generateRemainingMips && mipLevelCount > 1) { #ifdef RIVE_WAGYU // Wagyu generates mipmaps with copies. @@ -2258,16 +2307,30 @@ wgpu::Texture texture = m_device.CreateTexture(&textureDesc); - wgpu::TexelCopyTextureInfo dest = {.texture = texture}; - wgpu::TexelCopyBufferLayout layout = {.bytesPerRow = width * 4}; - wgpu::Extent3D extent = {width, height}; - m_queue.WriteTexture(&dest, - imageDataRGBAPremul, - height * width * 4, - &layout, - &extent); + // Upload mip 0 only when caller wants auto-mipgen; otherwise upload all. + const uint32_t levelsToUpload = generateRemainingMips ? 1u : mipLevelCount; + size_t srcOffset = 0; + for (uint32_t i = 0; i < levelsToUpload; ++i) + { + const uint32_t logW = std::max<uint32_t>(1u, width >> i); + const uint32_t logH = std::max<uint32_t>(1u, height >> i); + const uint32_t blocksX = (logW + blockWidth - 1) / blockWidth; + const uint32_t blocksY = (logH + blockHeight - 1) / blockHeight; + const uint32_t bytesPerRow = blocksX * bytesPerBlock; + const size_t levelBytes = static_cast<size_t>(bytesPerRow) * blocksY; - if (mipLevelCount > 1) + wgpu::TexelCopyTextureInfo dest = {.texture = texture, .mipLevel = i}; + wgpu::TexelCopyBufferLayout layout = {.bytesPerRow = bytesPerRow}; + wgpu::Extent3D extent = {logW, logH}; + m_queue.WriteTexture(&dest, + imageData + srcOffset, + levelBytes, + &layout, + &extent); + srcOffset += levelBytes; + } + + if (generateRemainingMips && mipLevelCount > 1) { generateMipmaps(texture); }
diff --git a/tests/common/render_context_null.cpp b/tests/common/render_context_null.cpp index 9e3ddd0..c3f64ec 100644 --- a/tests/common/render_context_null.cpp +++ b/tests/common/render_context_null.cpp
@@ -64,7 +64,11 @@ uint32_t height, uint32_t mipLevelCount, GPUTextureFormat format, - const uint8_t imageDataRGBA[]) + const uint8_t imageData[], + uint8_t blockWidth, + uint8_t blockHeight, + bool srgb, + bool generateRemainingMips) { return make_rcp<Texture>(width, height); }
diff --git a/tests/common/render_context_null.hpp b/tests/common/render_context_null.hpp index 9db3f89..ca95b4c 100644 --- a/tests/common/render_context_null.hpp +++ b/tests/common/render_context_null.hpp
@@ -38,7 +38,11 @@ uint32_t height, uint32_t mipLevelCount, rive::GPUTextureFormat format, - const uint8_t imageDataRGBA[]) override; + const uint8_t imageData[], + uint8_t blockWidth = 1, + uint8_t blockHeight = 1, + bool srgb = false, + bool generateRemainingMips = false) override; std::unique_ptr<rive::gpu::BufferRing> makeUniformBufferRing( size_t capacityInBytes) override;
diff --git a/tests/unit_tests/runtime/decode_ktx2_test.cpp b/tests/unit_tests/runtime/decode_ktx2_test.cpp index 6ab370c..29c8371 100644 --- a/tests/unit_tests/runtime/decode_ktx2_test.cpp +++ b/tests/unit_tests/runtime/decode_ktx2_test.cpp
@@ -16,7 +16,7 @@ // ...mip data namespace { -constexpr uint8_t kKtx2Identifier[12] = { +constexpr uint8_t Ktx2Identifier[12] = { 0xAB, 0x4B, 0x54, @@ -48,8 +48,8 @@ { std::vector<uint8_t> buf; buf.insert(buf.end(), - kKtx2Identifier, - kKtx2Identifier + sizeof(kKtx2Identifier)); + Ktx2Identifier, + Ktx2Identifier + sizeof(Ktx2Identifier)); appendLE<uint32_t>(buf, vkFormat); appendLE<uint32_t>(buf, 1); // typeSize