| // basisu_astc_helpers.h |
| // Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header. |
| #pragma once |
| #ifndef BASISU_ASTC_HELPERS_HEADER |
| #define BASISU_ASTC_HELPERS_HEADER |
| |
| #include <stdlib.h> |
| #include <stdint.h> |
| #include <math.h> |
| #include <fenv.h> |
| |
| namespace astc_helpers |
| { |
| const uint32_t MAX_WEIGHT_VALUE = 64; // grid texel weights must range from [0,64] |
| const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid |
| const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels |
| const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values |
| |
| static const uint32_t NUM_ASTC_BLOCK_SIZES = 14; |
| extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2]; |
| |
| // The Color Endpoint Modes (CEM's) |
| enum cems |
| { |
| CEM_LDR_LUM_DIRECT = 0, |
| CEM_LDR_LUM_BASE_PLUS_OFS = 1, |
| CEM_HDR_LUM_LARGE_RANGE = 2, |
| CEM_HDR_LUM_SMALL_RANGE = 3, |
| CEM_LDR_LUM_ALPHA_DIRECT = 4, |
| CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5, |
| CEM_LDR_RGB_BASE_SCALE = 6, |
| CEM_HDR_RGB_BASE_SCALE = 7, |
| CEM_LDR_RGB_DIRECT = 8, |
| CEM_LDR_RGB_BASE_PLUS_OFFSET = 9, |
| CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10, |
| CEM_HDR_RGB = 11, |
| CEM_LDR_RGBA_DIRECT = 12, |
| CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13, |
| CEM_HDR_RGB_LDR_ALPHA = 14, |
| CEM_HDR_RGB_HDR_ALPHA = 15 |
| }; |
| |
| // All Bounded Integer Sequence Coding (BISE or ISE) ranges. |
| // Weights: Ranges [0,11] are valid. |
| // Endpoints: Ranges [4,20] are valid. |
| enum bise_levels |
| { |
| BISE_2_LEVELS = 0, |
| BISE_3_LEVELS = 1, |
| BISE_4_LEVELS = 2, |
| BISE_5_LEVELS = 3, |
| BISE_6_LEVELS = 4, |
| BISE_8_LEVELS = 5, |
| BISE_10_LEVELS = 6, |
| BISE_12_LEVELS = 7, |
| BISE_16_LEVELS = 8, |
| BISE_20_LEVELS = 9, |
| BISE_24_LEVELS = 10, |
| BISE_32_LEVELS = 11, |
| BISE_40_LEVELS = 12, |
| BISE_48_LEVELS = 13, |
| BISE_64_LEVELS = 14, |
| BISE_80_LEVELS = 15, |
| BISE_96_LEVELS = 16, |
| BISE_128_LEVELS = 17, |
| BISE_160_LEVELS = 18, |
| BISE_192_LEVELS = 19, |
| BISE_256_LEVELS = 20 |
| }; |
| |
| const uint32_t TOTAL_ISE_RANGES = 21; |
| |
| // Valid endpoint ISE ranges |
| const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4 |
| const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20 |
| const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1; |
| |
| // Valid weight ISE ranges |
| const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0 |
| const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11 |
| const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1; |
| |
| // The ISE range table. |
| extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1) |
| |
| // Possible Color Component Select values, used in dual plane mode. |
| // The CCS component will be interpolated using the 2nd weight plane. |
| enum ccs |
| { |
| CCS_GBA_R = 0, |
| CCS_RBA_G = 1, |
| CCS_RGA_B = 2, |
| CCS_RGB_A = 3 |
| }; |
| |
| struct astc_block |
| { |
| uint32_t m_vals[4]; |
| }; |
| |
| const uint32_t MAX_PARTITIONS = 4; // Max # of partitions or subsets for single plane mode |
| const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3; // Max # of partitions or subsets for dual plane mode |
| const uint32_t NUM_PARTITION_PATTERNS = 1024; // Total # of partition pattern seeds (10-bits) |
| const uint32_t MAX_ENDPOINTS = 18; // Maximum # of endpoint values in a block |
| |
| struct log_astc_block |
| { |
| bool m_error_flag; |
| |
| bool m_solid_color_flag_ldr, m_solid_color_flag_hdr; |
| uint16_t m_solid_color[4]; |
| |
| // Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr |
| uint32_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block |
| |
| bool m_dual_plane; |
| |
| uint32_t m_weight_ise_range; // 0-11 |
| uint32_t m_endpoint_ise_range; // 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking |
| |
| uint32_t m_color_component_selector; // 0-3, 0=GBA R, 1=RBA G, 2=RGA B, 3=RGB A, only used in dual plane mode |
| |
| uint32_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode) |
| uint32_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1 |
| |
| uint32_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's |
| |
| // ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc. |
| uint8_t m_weights[MAX_GRID_WEIGHTS]; |
| |
| // ISE endpoint values |
| // Endpoint order examples: |
| // 1 subset LA : LL0 LH0 AL0 AH0 |
| // 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 |
| // 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 |
| // 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1 |
| // 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1 |
| // 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1 |
| uint8_t m_endpoints[MAX_ENDPOINTS]; |
| |
| void clear() |
| { |
| memset(this, 0, sizeof(*this)); |
| } |
| }; |
| |
| // Open interval |
| inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } |
| inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; } |
| |
| inline uint32_t get_bits(uint32_t val, int low, int high) |
| { |
| const int num_bits = (high - low) + 1; |
| assert((num_bits >= 1) && (num_bits <= 32)); |
| |
| val >>= low; |
| if (num_bits != 32) |
| val &= ((1u << num_bits) - 1); |
| |
| return val; |
| } |
| |
| // Returns the number of levels in the given ISE range. |
| inline uint32_t get_ise_levels(uint32_t ise_range) |
| { |
| assert(ise_range < TOTAL_ISE_RANGES); |
| return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0]; |
| } |
| |
| inline int get_ise_sequence_bits(int count, int range) |
| { |
| // See 18.22 Data Size Determination |
| int total_bits = g_ise_range_table[range][0] * count; |
| total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5; |
| total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3; |
| return total_bits; |
| } |
| |
| inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w) |
| { |
| assert(w <= MAX_WEIGHT_VALUE); |
| return (l * (64 - w) + h * w + 32) >> 6; |
| } |
| |
| void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range); |
| |
| // Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions. |
| bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr); |
| |
| // Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component. |
| void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a); |
| |
| // Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's) |
| void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah); |
| |
| // These helpers are all quite slow, but are useful for table preparation. |
| |
| // Dequantizes ISE encoded endpoint val to [0,255] |
| uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11 |
| |
| // Dequantizes ISE encoded weight val to [0,64] |
| uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10 |
| |
| uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range); |
| uint32_t find_nearest_bise_weight(int v, uint32_t ise_range); |
| |
| void create_quant_tables( |
| uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] |
| uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] |
| uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels] |
| uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] |
| uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights |
| bool weight_flag); // false if block endpoints, true if weights |
| |
| // True if the CEM is LDR. |
| bool is_cem_ldr(uint32_t mode); |
| inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); } |
| |
| // True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp). |
| bool is_valid_block_size(uint32_t w, uint32_t h); |
| |
| bool block_has_any_hdr_cems(const log_astc_block& log_blk); |
| bool block_has_any_ldr_cems(const log_astc_block& log_blk); |
| |
| // Returns the # of endpoint values for the given CEM. |
| inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); } |
| |
| struct dequant_table |
| { |
| basisu::vector<uint8_t> m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] |
| basisu::vector<uint8_t> m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] |
| basisu::vector<uint8_t> m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels] |
| basisu::vector<uint8_t> m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] |
| |
| void init(bool weight_flag, uint32_t num_levels, bool init_rank_tabs) |
| { |
| m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256); |
| m_ISE_to_val.resize(num_levels); |
| if (init_rank_tabs) |
| { |
| m_ISE_to_rank.resize(num_levels); |
| m_rank_to_ISE.resize(num_levels); |
| } |
| } |
| }; |
| |
| struct dequant_tables |
| { |
| dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES]; |
| dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES]; |
| |
| const dequant_table& get_weight_tab(uint32_t range) const |
| { |
| assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE)); |
| return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE]; |
| } |
| |
| dequant_table& get_weight_tab(uint32_t range) |
| { |
| assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE)); |
| return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE]; |
| } |
| |
| const dequant_table& get_endpoint_tab(uint32_t range) const |
| { |
| assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE)); |
| return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE]; |
| } |
| |
| dequant_table& get_endpoint_tab(uint32_t range) |
| { |
| assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE)); |
| return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE]; |
| } |
| |
| void init(bool init_rank_tabs) |
| { |
| for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++) |
| { |
| const uint32_t num_levels = get_ise_levels(range); |
| dequant_table& tab = get_weight_tab(range); |
| |
| tab.init(true, num_levels, init_rank_tabs); |
| |
| create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, true); |
| } |
| |
| for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++) |
| { |
| const uint32_t num_levels = get_ise_levels(range); |
| dequant_table& tab = get_endpoint_tab(range); |
| |
| tab.init(false, num_levels, init_rank_tabs); |
| |
| create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), init_rank_tabs ? tab.m_ISE_to_rank.data() : nullptr, init_rank_tabs ? tab.m_rank_to_ISE.data() : nullptr, range, false); |
| } |
| } |
| }; |
| |
| extern dequant_tables g_dequant_tables; |
| void init_tables(bool init_rank_tabs); |
| |
| // Procedurally returns the texel partition/subset index given the block coordinate and config. |
| int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block); |
| |
| void blue_contract( |
| int r, int g, int b, int a, |
| int& dr, int& dg, int& db, int& da); |
| |
| void bit_transfer_signed(int& a, int& b); |
| |
| void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE); |
| |
| typedef uint16_t half_float; |
| half_float float_to_half(float val, bool toward_zero); |
| float half_to_float(half_float hval); |
| |
| const int MAX_RGB9E5 = 0xff80; |
| void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b); |
| uint32_t pack_rgb9e5(float r, float g, float b); |
| |
| enum decode_mode |
| { |
| cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks |
| cDecodeModeLDR8 = 1, // returns uint8_t's, not valid on HDR blocks |
| cDecodeModeHDR16 = 2, // returns uint16_t's (half floats), valid on all LDR/HDR blocks |
| cDecodeModeRGB9E5 = 3 // returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt |
| }; |
| |
| // Decodes logical block to output pixels. |
| // pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16) |
| bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode); |
| |
| void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs); |
| |
| // Unpack a physical ASTC encoded GPU texture block to a logical block description. |
| bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height); |
| |
| } // namespace astc_helpers |
| |
| #endif // BASISU_ASTC_HELPERS_HEADER |
| |
| //------------------------------------------------------------------ |
| |
| #ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION |
| |
| namespace astc_helpers |
| { |
| template<typename T> inline T my_min(T a, T b) { return (a < b) ? a : b; } |
| template<typename T> inline T my_max(T a, T b) { return (a > b) ? a : b; } |
| |
| const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = { |
| { 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 }, |
| { 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 }, |
| { 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 }, |
| { 12, 10 }, { 12, 12 } |
| }; |
| |
| const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] = |
| { |
| //b t q |
| //2 3 5 // rng ise_index notes |
| { 1, 0, 0 }, // 0..1 0 |
| { 0, 1, 0 }, // 0..2 1 |
| { 2, 0, 0 }, // 0..3 2 |
| { 0, 0, 1 }, // 0..4 3 |
| { 1, 1, 0 }, // 0..5 4 min endpoint ISE index |
| { 3, 0, 0 }, // 0..7 5 |
| { 1, 0, 1 }, // 0..9 6 |
| { 2, 1, 0 }, // 0..11 7 |
| { 4, 0, 0 }, // 0..15 8 |
| { 2, 0, 1 }, // 0..19 9 |
| { 3, 1, 0 }, // 0..23 10 |
| { 5, 0, 0 }, // 0..31 11 max weight ISE index |
| { 3, 0, 1 }, // 0..39 12 |
| { 4, 1, 0 }, // 0..47 13 |
| { 6, 0, 0 }, // 0..63 14 |
| { 4, 0, 1 }, // 0..79 15 |
| { 5, 1, 0 }, // 0..95 16 |
| { 7, 0, 0 }, // 0..127 17 |
| { 5, 0, 1 }, // 0..159 18 |
| { 6, 1, 0 }, // 0..191 19 |
| { 8, 0, 0 }, // 0..255 20 |
| }; |
| |
| static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize) |
| { |
| uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst); |
| |
| assert(codesize <= 9); |
| if (codesize) |
| { |
| uint32_t byte_bit_offset = bit_offset & 7; |
| uint32_t val = code << byte_bit_offset; |
| |
| uint32_t index = bit_offset >> 3; |
| pBuf[index] |= (uint8_t)val; |
| |
| if (codesize > (8 - byte_bit_offset)) |
| pBuf[index + 1] |= (uint8_t)(val >> 8); |
| |
| bit_offset += codesize; |
| } |
| } |
| |
| static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high) |
| { |
| return (bits >> low) & ((1 << (high - low + 1)) - 1); |
| } |
| |
| // Writes bits to output in an endian safe way |
| static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits) |
| { |
| assert(total_bits <= 31); |
| assert(value < (1u << total_bits)); |
| |
| uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput); |
| |
| while (total_bits) |
| { |
| const uint32_t bits_to_write = my_min<int>(total_bits, 8 - (bit_pos & 7)); |
| |
| pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7)); |
| |
| bit_pos += bits_to_write; |
| total_bits -= bits_to_write; |
| value >>= bits_to_write; |
| } |
| } |
| |
| static const uint8_t g_astc_quint_encode[125] = |
| { |
| 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, |
| 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104, |
| 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, |
| 126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/ |
| }; |
| |
| // Encodes 3 values to output, usable for any range that uses quints and bits |
| static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n) |
| { |
| // First extract the quints and the bits from the 3 input values |
| int quints = 0, bits[3]; |
| const uint32_t bit_mask = (1 << n) - 1; |
| for (int i = 0; i < 3; i++) |
| { |
| static const int s_muls[3] = { 1, 5, 25 }; |
| |
| const int t = pValues[i] >> n; |
| |
| quints += t * s_muls[i]; |
| bits[i] = pValues[i] & bit_mask; |
| } |
| |
| // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits. |
| // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding |
| |
| assert(quints < 125); |
| const int T = g_astc_quint_encode[quints]; |
| |
| // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96. |
| astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) | |
| (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3); |
| } |
| |
| static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39, |
| 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154, |
| 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202, |
| 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224, |
| 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159, |
| 191, 223, 124, 125, 126 }; |
| |
| // Encodes 5 values to output, usable for any range that uses trits and bits |
| static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n) |
| { |
| // First extract the trits and the bits from the 5 input values |
| int trits = 0, bits[5]; |
| const uint32_t bit_mask = (1 << n) - 1; |
| for (int i = 0; i < 5; i++) |
| { |
| static const int s_muls[5] = { 1, 3, 9, 27, 81 }; |
| |
| const int t = pValues[i] >> n; |
| |
| trits += t * s_muls[i]; |
| bits[i] = pValues[i] & bit_mask; |
| } |
| |
| // Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits. |
| // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding |
| |
| assert(trits < 243); |
| const int T = g_astc_trit_encode[trits]; |
| |
| // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94. |
| astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2); |
| |
| astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) | |
| (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6); |
| } |
| |
| // Packs values using ASTC's BISE to output buffer. |
| void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range) |
| { |
| uint32_t temp[5] = { 0 }; |
| |
| const int num_bits = g_ise_range_table[range][0]; |
| |
| int group_size = 0; |
| if (g_ise_range_table[range][1]) |
| group_size = 5; |
| else if (g_ise_range_table[range][2]) |
| group_size = 3; |
| |
| #ifndef NDEBUG |
| const uint32_t num_levels = get_ise_levels(range); |
| for (int i = 0; i < num_vals; i++) |
| { |
| assert(pSrc_vals[i] < num_levels); |
| } |
| #endif |
| |
| if (group_size) |
| { |
| // Range has trits or quints - pack each group of 5 or 3 values |
| const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); |
| |
| for (int group_index = 0; group_index < total_groups; group_index++) |
| { |
| uint8_t vals[5] = { 0 }; |
| |
| const int limit = my_min(group_size, num_vals - group_index * group_size); |
| for (int i = 0; i < limit; i++) |
| vals[i] = pSrc_vals[group_index * group_size + i]; |
| |
| if (group_size == 5) |
| astc_encode_trits(temp, vals, bit_pos, num_bits); |
| else |
| astc_encode_quints(temp, vals, bit_pos, num_bits); |
| } |
| } |
| else |
| { |
| for (int i = 0; i < num_vals; i++) |
| astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits); |
| } |
| |
| // TODO: Could this write too many bits on incomplete blocks? |
| pDst[0] |= temp[0]; pDst[1] |= temp[1]; |
| pDst[2] |= temp[2]; pDst[3] |= temp[3]; |
| } |
| |
| inline uint32_t rev_dword(uint32_t bits) |
| { |
| uint32_t v = (bits << 16) | (bits >> 16); |
| v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4); |
| v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1); |
| return v; |
| } |
| |
| static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); } |
| |
| static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits) |
| { |
| config_bits = 0; |
| |
| const int W = log_block.m_grid_width, H = log_block.m_grid_height; |
| |
| const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision |
| const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits |
| |
| // See Tables 81-82 |
| // Compute p from weight range |
| uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0); |
| |
| // Rearrange p's bits to p0 p2 p1 |
| p = (p >> 1) + ((p & 1) << 2); |
| |
| // Try encoding each row of table 82. |
| |
| // W+4 H+2 |
| if (is_packable(W - 4, 2) && is_packable(H - 2, 2)) |
| { |
| config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3); |
| return true; |
| } |
| |
| // W+8 H+2 |
| if (is_packable(W - 8, 2) && is_packable(H - 2, 2)) |
| { |
| config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3); |
| return true; |
| } |
| |
| // W+2 H+8 |
| if (is_packable(W - 2, 2) && is_packable(H - 8, 2)) |
| { |
| config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3); |
| return true; |
| } |
| |
| // W+2 H+6 |
| if (is_packable(W - 2, 2) && is_packable(H - 6, 1)) |
| { |
| config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3); |
| return true; |
| } |
| |
| // W+2 H+2 |
| if (is_packable(W - 2, 1) && is_packable(H - 2, 2)) |
| { |
| config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3); |
| return true; |
| } |
| |
| // 12 H+2 |
| if ((W == 12) && is_packable(H - 2, 2)) |
| { |
| config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2); |
| return true; |
| } |
| |
| // W+2 12 |
| if ((H == 12) && is_packable(W - 2, 2)) |
| { |
| config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2); |
| return true; |
| } |
| |
| // 6 10 |
| if ((W == 6) && (H == 10)) |
| { |
| config_bits = (Dp_P << 9) | (3 << 7) | (p << 2); |
| return true; |
| } |
| |
| // 10 6 |
| if ((W == 10) && (H == 6)) |
| { |
| config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2); |
| return true; |
| } |
| |
| // W+6 H+6 (no dual plane or high prec) |
| if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2)) |
| { |
| config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2); |
| return true; |
| } |
| |
| // Failed: unsupported weight grid dimensions or config. |
| return false; |
| } |
| |
| bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range) |
| { |
| memset(&phys_block, 0, sizeof(phys_block)); |
| |
| if (pExpected_endpoint_range) |
| *pExpected_endpoint_range = -1; |
| |
| assert(!log_block.m_error_flag); |
| if (log_block.m_error_flag) |
| return false; |
| |
| if (log_block.m_solid_color_flag_ldr) |
| { |
| pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]); |
| return true; |
| } |
| else if (log_block.m_solid_color_flag_hdr) |
| { |
| pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]); |
| return true; |
| } |
| |
| if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS)) |
| return false; |
| |
| // Max usable weight range is 11 |
| if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE) |
| return false; |
| |
| // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints |
| if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE)) |
| return false; |
| |
| if (log_block.m_color_component_selector > 3) |
| return false; |
| |
| uint32_t config_bits = 0; |
| if (!get_config_bits(log_block, config_bits)) |
| return false; |
| |
| uint32_t bit_pos = 0; |
| astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11); |
| |
| const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height); |
| const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range); |
| |
| // 18.24 Illegal Encodings |
| if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96)) |
| return false; |
| |
| uint32_t total_extra_bits = 0; |
| |
| astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2); |
| |
| if (log_block.m_num_partitions > 1) |
| { |
| if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS) |
| return false; |
| |
| astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10); |
| |
| uint32_t highest_cem = 0, lowest_cem = UINT32_MAX; |
| for (uint32_t j = 0; j < log_block.m_num_partitions; j++) |
| { |
| highest_cem = my_max(highest_cem, log_block.m_color_endpoint_modes[j]); |
| lowest_cem = my_min(lowest_cem, log_block.m_color_endpoint_modes[j]); |
| } |
| |
| if (highest_cem > 15) |
| return false; |
| |
| // Ensure CEM range is contiguous |
| if (((highest_cem >> 2) > (1 + (lowest_cem >> 2)))) |
| return false; |
| |
| // See tables 79/80 |
| uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2; |
| if (lowest_cem != highest_cem) |
| { |
| encoded_cem = my_min<uint32_t>(3, 1 + (lowest_cem >> 2)); |
| |
| // See tables at 23.11 Color Endpoint Mode |
| for (uint32_t j = 0; j < log_block.m_num_partitions; j++) |
| { |
| const int M = log_block.m_color_endpoint_modes[j] & 3; |
| |
| const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1); |
| if ((C & 1) != C) |
| return false; |
| |
| encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j)); |
| } |
| |
| total_extra_bits = 3 * log_block.m_num_partitions - 4; |
| |
| if ((total_weight_bits + total_extra_bits) > 128) |
| return false; |
| |
| uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits; |
| astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits); |
| } |
| |
| astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6); |
| } |
| else |
| { |
| if (log_block.m_partition_id) |
| return false; |
| if (log_block.m_color_endpoint_modes[0] > 15) |
| return false; |
| |
| astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4); |
| } |
| |
| if (log_block.m_dual_plane) |
| { |
| if (log_block.m_num_partitions > 3) |
| return false; |
| |
| total_extra_bits += 2; |
| |
| uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits; |
| astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2); |
| } |
| |
| const uint32_t total_config_bits = bit_pos + total_extra_bits; |
| const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits; |
| if (num_remaining_bits < 0) |
| return false; |
| |
| uint32_t total_cem_vals = 0; |
| for (uint32_t j = 0; j < log_block.m_num_partitions; j++) |
| total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2); |
| |
| if (total_cem_vals > MAX_ENDPOINTS) |
| return false; |
| |
| int endpoint_ise_range = -1; |
| for (int k = 20; k > 0; k--) |
| { |
| int bits = get_ise_sequence_bits(total_cem_vals, k); |
| if (bits <= num_remaining_bits) |
| { |
| endpoint_ise_range = k; |
| break; |
| } |
| } |
| |
| // See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints |
| if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE) |
| return false; |
| |
| // Ensure the caller utilized the right endpoint ISE range. |
| if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range) |
| { |
| if (pExpected_endpoint_range) |
| *pExpected_endpoint_range = endpoint_ise_range; |
| return false; |
| } |
| |
| // Pack endpoints forwards |
| encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range); |
| |
| // Pack weights backwards |
| uint32_t weight_data[4] = { 0 }; |
| encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range); |
| |
| for (uint32_t i = 0; i < 4; i++) |
| phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]); |
| |
| return true; |
| } |
| |
| static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits) |
| { |
| assert(num_src_bits <= num_dst_bits); |
| assert((src & ((1 << num_src_bits) - 1)) == src); |
| |
| uint32_t dst = 0; |
| for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits) |
| dst |= (shift >= 0) ? (src << shift) : (src >> -shift); |
| |
| return dst; |
| } |
| |
| uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range) |
| { |
| assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE)); |
| assert(val < get_ise_levels(ise_range)); |
| |
| uint32_t u = 0; |
| |
| switch (ise_range) |
| { |
| case 5: |
| { |
| u = bit_replication_scale(val, 3, 8); |
| break; |
| } |
| case 8: |
| { |
| u = bit_replication_scale(val, 4, 8); |
| break; |
| } |
| case 11: |
| { |
| u = bit_replication_scale(val, 5, 8); |
| break; |
| } |
| case 14: |
| { |
| u = bit_replication_scale(val, 6, 8); |
| break; |
| } |
| case 17: |
| { |
| u = bit_replication_scale(val, 7, 8); |
| break; |
| } |
| case 20: |
| { |
| u = val; |
| break; |
| } |
| case 4: |
| case 6: |
| case 7: |
| case 9: |
| case 10: |
| case 12: |
| case 13: |
| case 15: |
| case 16: |
| case 18: |
| case 19: |
| { |
| const uint32_t num_bits = g_ise_range_table[ise_range][0]; |
| const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits); |
| const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints); |
| |
| // compute Table 103 row index |
| const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2; |
| |
| assert(range_index >= 0 && range_index <= 10); |
| |
| uint32_t bits = val & ((1 << num_bits) - 1); |
| uint32_t tval = val >> num_bits; |
| |
| assert(tval < (num_trits ? 3U : 5U)); |
| |
| uint32_t a = bits & 1; |
| uint32_t b = (bits >> 1) & 1; |
| uint32_t c = (bits >> 2) & 1; |
| uint32_t d = (bits >> 3) & 1; |
| uint32_t e = (bits >> 4) & 1; |
| uint32_t f = (bits >> 5) & 1; |
| |
| uint32_t A = a ? 511 : 0; |
| uint32_t B = 0; |
| |
| switch (range_index) |
| { |
| case 2: |
| { |
| // 876543210 |
| // b000b0bb0 |
| B = (b << 1) | (b << 2) | (b << 4) | (b << 8); |
| break; |
| } |
| case 3: |
| { |
| // 876543210 |
| // b0000bb00 |
| B = (b << 2) | (b << 3) | (b << 8); |
| break; |
| } |
| case 4: |
| { |
| // 876543210 |
| // cb000cbcb |
| B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8); |
| break; |
| } |
| case 5: |
| { |
| // 876543210 |
| // cb0000cbc |
| B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8); |
| break; |
| } |
| case 6: |
| { |
| // 876543210 |
| // dcb000dcb |
| B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8); |
| break; |
| } |
| case 7: |
| { |
| // 876543210 |
| // dcb0000dc |
| B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8); |
| break; |
| } |
| case 8: |
| { |
| // 876543210 |
| // edcb000ed |
| B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8); |
| break; |
| } |
| case 9: |
| { |
| // 876543210 |
| // edcb0000e |
| B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8); |
| break; |
| } |
| case 10: |
| { |
| // 876543210 |
| // fedcb000f |
| B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8); |
| break; |
| } |
| default: |
| break; |
| } |
| |
| static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 }; |
| uint32_t C = C_vals[range_index]; |
| uint32_t D = tval; |
| |
| u = D * C + B; |
| u = u ^ A; |
| u = (A & 0x80) | (u >> 2); |
| |
| break; |
| } |
| default: |
| { |
| assert(0); |
| break; |
| } |
| } |
| |
| return u; |
| } |
| |
| uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range) |
| { |
| assert(val < get_ise_levels(ise_range)); |
| |
| uint32_t u = 0; |
| switch (ise_range) |
| { |
| case 0: |
| { |
| u = val ? 63 : 0; |
| break; |
| } |
| case 1: // 0-2 |
| { |
| const uint8_t s_tab_0_2[3] = { 0, 32, 63 }; |
| u = s_tab_0_2[val]; |
| break; |
| } |
| case 2: // 0-3 |
| { |
| u = bit_replication_scale(val, 2, 6); |
| break; |
| } |
| case 3: // 0-4 |
| { |
| const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 }; |
| u = s_tab_0_4[val]; |
| break; |
| } |
| case 5: // 0-7 |
| { |
| u = bit_replication_scale(val, 3, 6); |
| break; |
| } |
| case 8: // 0-15 |
| { |
| u = bit_replication_scale(val, 4, 6); |
| break; |
| } |
| case 11: // 0-31 |
| { |
| u = bit_replication_scale(val, 5, 6); |
| break; |
| } |
| case 4: // 0-5 |
| case 6: // 0-9 |
| case 7: // 0-11 |
| case 9: // 0-19 |
| case 10: // 0-23 |
| { |
| const uint32_t num_bits = g_ise_range_table[ise_range][0]; |
| const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits); |
| const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints); |
| |
| // compute Table 103 row index |
| const int range_index = num_bits * 2 + (num_quints ? 1 : 0); |
| |
| // Extract bits and tris/quints from value |
| const uint32_t bits = val & ((1u << num_bits) - 1); |
| const uint32_t D = val >> num_bits; |
| |
| assert(D < (num_trits ? 3U : 5U)); |
| |
| // Now dequantize |
| // See Table 103. ASTC weight unquantization parameters |
| static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 }; |
| |
| const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1; |
| |
| const uint32_t A = (a == 0) ? 0 : 0x7F; |
| |
| uint32_t B = 0; |
| if (range_index == 4) |
| B = ((b << 6) | (b << 2) | (b << 0)); |
| else if (range_index == 5) |
| B = ((b << 6) | (b << 1)); |
| else if (range_index == 6) |
| B = ((c << 6) | (b << 5) | (c << 1) | (b << 0)); |
| |
| const uint32_t C = C_table[range_index - 2]; |
| |
| u = D * C + B; |
| u = u ^ A; |
| u = (A & 0x20) | (u >> 2); |
| break; |
| } |
| default: |
| assert(0); |
| break; |
| } |
| |
| if (u > 32) |
| u++; |
| |
| return u; |
| } |
| |
| // Returns the nearest ISE symbol given a [0,255] endpoint value. |
| uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range) |
| { |
| assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE); |
| |
| const uint32_t total_levels = get_ise_levels(ise_range); |
| int best_e = INT_MAX, best_index = 0; |
| for (uint32_t i = 0; i < total_levels; i++) |
| { |
| const int qv = dequant_bise_endpoint(i, ise_range); |
| int e = labs(v - qv); |
| if (e < best_e) |
| { |
| best_e = e; |
| best_index = i; |
| if (!best_e) |
| break; |
| } |
| } |
| return best_index; |
| } |
| |
| // Returns the nearest ISE weight given a [0,64] endpoint value. |
| uint32_t find_nearest_bise_weight(int v, uint32_t ise_range) |
| { |
| assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE); |
| assert(v <= (int)MAX_WEIGHT_VALUE); |
| |
| const uint32_t total_levels = get_ise_levels(ise_range); |
| int best_e = INT_MAX, best_index = 0; |
| for (uint32_t i = 0; i < total_levels; i++) |
| { |
| const int qv = dequant_bise_weight(i, ise_range); |
| int e = labs(v - qv); |
| if (e < best_e) |
| { |
| best_e = e; |
| best_index = i; |
| if (!best_e) |
| break; |
| } |
| } |
| return best_index; |
| } |
| |
| void create_quant_tables( |
| uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65] |
| uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels] |
| uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels] |
| uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels] |
| uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights |
| bool weight_flag) // false if block endpoints, true if weights |
| { |
| const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_VALUE + 1) : 256; |
| |
| for (uint32_t i = 0; i < num_dequant_vals; i++) |
| { |
| uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range); |
| |
| if (pVal_to_ise) |
| pVal_to_ise[i] = (uint8_t)bise_index; |
| |
| if (pISE_to_val) |
| pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range); |
| } |
| |
| if (pISE_to_rank || pRank_to_ISE) |
| { |
| const uint32_t num_levels = get_ise_levels(ise_range); |
| |
| if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2]) |
| { |
| // Only bits |
| for (uint32_t i = 0; i < num_levels; i++) |
| { |
| if (pISE_to_rank) |
| pISE_to_rank[i] = (uint8_t)i; |
| |
| if (pRank_to_ISE) |
| pRank_to_ISE[i] = (uint8_t)i; |
| } |
| } |
| else |
| { |
| // Range has trits or quints |
| uint32_t vals[256]; |
| for (uint32_t i = 0; i < num_levels; i++) |
| { |
| uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range); |
| |
| // Low=ISE value |
| // High=dequantized value |
| vals[i] = (v << 16) | i; |
| } |
| |
| // Sorts by dequantized value |
| std::sort(vals, vals + num_levels); |
| |
| for (uint32_t rank = 0; rank < num_levels; rank++) |
| { |
| uint32_t ise_val = (uint8_t)vals[rank]; |
| |
| if (pISE_to_rank) |
| pISE_to_rank[ise_val] = (uint8_t)rank; |
| |
| if (pRank_to_ISE) |
| pRank_to_ISE[rank] = (uint8_t)ise_val; |
| } |
| } |
| } |
| } |
| |
| void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah) |
| { |
| uint8_t* pDst = (uint8_t*)&blk.m_vals[0]; |
| memset(pDst, 0xFF, 16); |
| |
| pDst[0] = 0b11111100; |
| pDst[1] = 0b11111101; |
| |
| pDst[8] = (uint8_t)rh; |
| pDst[9] = (uint8_t)(rh >> 8); |
| pDst[10] = (uint8_t)gh; |
| pDst[11] = (uint8_t)(gh >> 8); |
| pDst[12] = (uint8_t)bh; |
| pDst[13] = (uint8_t)(bh >> 8); |
| pDst[14] = (uint8_t)ah; |
| pDst[15] = (uint8_t)(ah >> 8); |
| } |
| |
| // rh-ah are half-floats |
| void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah) |
| { |
| uint8_t* pDst = (uint8_t*)&blk.m_vals[0]; |
| memset(pDst, 0xFF, 16); |
| |
| pDst[0] = 0b11111100; |
| |
| pDst[8] = (uint8_t)rh; |
| pDst[9] = (uint8_t)(rh >> 8); |
| pDst[10] = (uint8_t)gh; |
| pDst[11] = (uint8_t)(gh >> 8); |
| pDst[12] = (uint8_t)bh; |
| pDst[13] = (uint8_t)(bh >> 8); |
| pDst[14] = (uint8_t)ah; |
| pDst[15] = (uint8_t)(ah >> 8); |
| } |
| |
| bool is_cem_ldr(uint32_t mode) |
| { |
| switch (mode) |
| { |
| case CEM_LDR_LUM_DIRECT: |
| case CEM_LDR_LUM_BASE_PLUS_OFS: |
| case CEM_LDR_LUM_ALPHA_DIRECT: |
| case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS: |
| case CEM_LDR_RGB_BASE_SCALE: |
| case CEM_LDR_RGB_DIRECT: |
| case CEM_LDR_RGB_BASE_PLUS_OFFSET: |
| case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: |
| case CEM_LDR_RGBA_DIRECT: |
| case CEM_LDR_RGBA_BASE_PLUS_OFFSET: |
| return true; |
| default: |
| break; |
| } |
| |
| return false; |
| } |
| |
| bool is_valid_block_size(uint32_t w, uint32_t h) |
| { |
| assert((w >= MIN_BLOCK_DIM) && (w <= MAX_BLOCK_DIM)); |
| assert((h >= MIN_BLOCK_DIM) && (h <= MAX_BLOCK_DIM)); |
| |
| #define SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true; |
| SIZECHK(4, 4); |
| SIZECHK(5, 4); |
| |
| SIZECHK(5, 5); |
| |
| SIZECHK(6, 5); |
| SIZECHK(6, 6); |
| |
| SIZECHK(8, 5); |
| SIZECHK(8, 6); |
| SIZECHK(10, 5); |
| SIZECHK(10, 6); |
| |
| SIZECHK(8, 8); |
| SIZECHK(10, 8); |
| SIZECHK(10, 10); |
| |
| SIZECHK(12, 10); |
| SIZECHK(12, 12); |
| #undef SIZECHK |
| |
| return false; |
| } |
| |
| bool block_has_any_hdr_cems(const log_astc_block& log_blk) |
| { |
| assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS)); |
| |
| for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) |
| if (is_cem_hdr(log_blk.m_color_endpoint_modes[i])) |
| return true; |
| |
| return false; |
| } |
| |
| bool block_has_any_ldr_cems(const log_astc_block& log_blk) |
| { |
| assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS)); |
| |
| for (uint32_t i = 0; i < log_blk.m_num_partitions; i++) |
| if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i])) |
| return true; |
| |
| return false; |
| } |
| |
| dequant_tables g_dequant_tables; |
| |
| void precompute_texel_partitions_4x4(); |
| |
| void init_tables(bool init_rank_tabs) |
| { |
| g_dequant_tables.init(init_rank_tabs); |
| |
| precompute_texel_partitions_4x4(); |
| } |
| |
| struct weighted_sample |
| { |
| uint8_t m_src_x; |
| uint8_t m_src_y; |
| uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8 |
| }; |
| |
| static void compute_upsample_weights( |
| int block_width, int block_height, |
| int weight_grid_width, int weight_grid_height, |
| weighted_sample* pWeights) // there will be block_width * block_height bilinear samples |
| { |
| const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1); |
| const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1); |
| |
| for (int texelY = 0; texelY < block_height; texelY++) |
| { |
| for (int texelX = 0; texelX < block_width; texelX++) |
| { |
| const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6; |
| const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6; |
| const uint32_t jX = gX >> 4; |
| const uint32_t jY = gY >> 4; |
| const uint32_t fX = gX & 0xf; |
| const uint32_t fY = gY & 0xf; |
| const uint32_t w11 = (fX * fY + 8) >> 4; |
| const uint32_t w10 = fY - w11; |
| const uint32_t w01 = fX - w11; |
| const uint32_t w00 = 16 - fX - fY + w11; |
| |
| weighted_sample& s = pWeights[texelX + texelY * block_width]; |
| s.m_src_x = (uint8_t)jX; |
| s.m_src_y = (uint8_t)jY; |
| s.m_weights[0][0] = (uint8_t)w00; |
| s.m_weights[0][1] = (uint8_t)w01; |
| s.m_weights[1][0] = (uint8_t)w10; |
| s.m_weights[1][1] = (uint8_t)w11; |
| } |
| } |
| } |
| |
| // Should be dequantized [0,64] weights |
| static void upsample_weight_grid( |
| uint32_t bx, uint32_t by, // destination/to dimension |
| uint32_t wx, uint32_t wy, // source/from dimension |
| const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx] |
| uint8_t* pDst_weights) // [by][bx] |
| { |
| assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12)); |
| assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by)); |
| |
| const uint32_t total_src_weights = wx * wy; |
| const uint32_t total_dst_weights = bx * by; |
| |
| if (total_src_weights == total_dst_weights) |
| { |
| memcpy(pDst_weights, pSrc_weights, total_src_weights); |
| return; |
| } |
| |
| weighted_sample weights[12 * 12]; |
| compute_upsample_weights(bx, by, wx, wy, weights); |
| |
| const weighted_sample* pS = weights; |
| |
| for (uint32_t y = 0; y < by; y++) |
| { |
| for (uint32_t x = 0; x < bx; x++, ++pS) |
| { |
| const uint32_t w00 = pS->m_weights[0][0]; |
| const uint32_t w01 = pS->m_weights[0][1]; |
| const uint32_t w10 = pS->m_weights[1][0]; |
| const uint32_t w11 = pS->m_weights[1][1]; |
| |
| assert(w00 || w01 || w10 || w11); |
| |
| const uint32_t sx = pS->m_src_x, sy = pS->m_src_y; |
| |
| uint32_t total = 8; |
| if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00; |
| if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01; |
| if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10; |
| if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11; |
| |
| pDst_weights[x + y * bx] = (uint8_t)(total >> 4); |
| } |
| } |
| } |
| |
| inline uint32_t hash52(uint32_t v) |
| { |
| uint32_t p = v; |
| p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; |
| p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; |
| p ^= p << 6; p ^= p >> 17; |
| return p; |
| } |
| |
| int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block) |
| { |
| assert(zIn == 0); |
| |
| const uint32_t x = small_block ? xIn << 1 : xIn; |
| const uint32_t y = small_block ? yIn << 1 : yIn; |
| const uint32_t z = small_block ? zIn << 1 : zIn; |
| const uint32_t seed = seedIn + 1024 * (num_partitions - 1); |
| const uint32_t rnum = hash52(seed); |
| |
| uint8_t seed1 = (uint8_t)(rnum & 0xf); |
| uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf); |
| uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf); |
| uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf); |
| uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf); |
| uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf); |
| uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf); |
| uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf); |
| uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf); |
| uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf); |
| uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf); |
| uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf); |
| |
| seed1 = (uint8_t)(seed1 * seed1); |
| seed2 = (uint8_t)(seed2 * seed2); |
| seed3 = (uint8_t)(seed3 * seed3); |
| seed4 = (uint8_t)(seed4 * seed4); |
| seed5 = (uint8_t)(seed5 * seed5); |
| seed6 = (uint8_t)(seed6 * seed6); |
| seed7 = (uint8_t)(seed7 * seed7); |
| seed8 = (uint8_t)(seed8 * seed8); |
| seed9 = (uint8_t)(seed9 * seed9); |
| seed10 = (uint8_t)(seed10 * seed10); |
| seed11 = (uint8_t)(seed11 * seed11); |
| seed12 = (uint8_t)(seed12 * seed12); |
| |
| const int shA = (seed & 2) != 0 ? 4 : 5; |
| const int shB = (num_partitions == 3) ? 6 : 5; |
| const int sh1 = (seed & 1) != 0 ? shA : shB; |
| const int sh2 = (seed & 1) != 0 ? shB : shA; |
| const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2; |
| |
| seed1 = (uint8_t)(seed1 >> sh1); |
| seed2 = (uint8_t)(seed2 >> sh2); |
| seed3 = (uint8_t)(seed3 >> sh1); |
| seed4 = (uint8_t)(seed4 >> sh2); |
| seed5 = (uint8_t)(seed5 >> sh1); |
| seed6 = (uint8_t)(seed6 >> sh2); |
| seed7 = (uint8_t)(seed7 >> sh1); |
| seed8 = (uint8_t)(seed8 >> sh2); |
| seed9 = (uint8_t)(seed9 >> sh3); |
| seed10 = (uint8_t)(seed10 >> sh3); |
| seed11 = (uint8_t)(seed11 >> sh3); |
| seed12 = (uint8_t)(seed12 >> sh3); |
| |
| const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14)); |
| const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10)); |
| const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0; |
| const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0; |
| |
| return (a >= b && a >= c && a >= d) ? 0 |
| : (b >= c && b >= d) ? 1 |
| : (c >= d) ? 2 |
| : 3; |
| } |
| |
| static uint32_t g_texel_partitions_4x4[1024][2]; |
| |
| void precompute_texel_partitions_4x4() |
| { |
| for (uint32_t p = 0; p < 1024; p++) |
| { |
| uint32_t v2 = 0, v3 = 0; |
| |
| for (uint32_t y = 0; y < 4; y++) |
| { |
| for (uint32_t x = 0; x < 4; x++) |
| { |
| const uint32_t shift = x * 2 + y * 8; |
| v2 |= (compute_texel_partition(p, x, y, 0, 2, true) << shift); |
| v3 |= (compute_texel_partition(p, x, y, 0, 3, true) << shift); |
| } |
| } |
| |
| g_texel_partitions_4x4[p][0] = v2; |
| g_texel_partitions_4x4[p][1] = v3; |
| } |
| } |
| |
| static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions) |
| { |
| assert(g_texel_partitions_4x4[1][0]); |
| assert(seed < 1024); |
| assert((x <= 3) && (y <= 3)); |
| assert((num_partitions >= 2) && (num_partitions <= 3)); |
| |
| const uint32_t shift = x * 2 + y * 8; |
| return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3; |
| } |
| |
| void blue_contract( |
| int r, int g, int b, int a, |
| int &dr, int &dg, int &db, int &da) |
| { |
| dr = (r + b) >> 1; |
| dg = (g + b) >> 1; |
| db = b; |
| da = a; |
| } |
| |
| inline void bit_transfer_signed(int& a, int& b) |
| { |
| b >>= 1; |
| b |= (a & 0x80); |
| a >>= 1; |
| a &= 0x3F; |
| if ((a & 0x20) != 0) |
| a -= 0x40; |
| } |
| |
| static inline int clamp(int a, int l, int h) |
| { |
| if (a < l) |
| a = l; |
| else if (a > h) |
| a = h; |
| return a; |
| } |
| |
| static inline float clampf(float a, float l, float h) |
| { |
| if (a < l) |
| a = l; |
| else if (a > h) |
| a = h; |
| return a; |
| } |
| |
| inline int sign_extend(int src, int num_src_bits) |
| { |
| assert((num_src_bits >= 2) && (num_src_bits <= 31)); |
| |
| const bool negative = (src & (1 << (num_src_bits - 1))) != 0; |
| if (negative) |
| return src | ~((1 << num_src_bits) - 1); |
| else |
| return src & ((1 << num_src_bits) - 1); |
| } |
| |
| // endpoints is [4][2] |
| void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE) |
| { |
| assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA); |
| |
| int v0 = pE[0], v1 = pE[1]; |
| |
| int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0]; |
| int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1]; |
| |
| switch (cem_index) |
| { |
| case CEM_LDR_LUM_DIRECT: |
| { |
| e0_r = v0; e1_r = v1; |
| e0_g = v0; e1_g = v1; |
| e0_b = v0; e1_b = v1; |
| e0_a = 0xFF; e1_a = 0xFF; |
| break; |
| } |
| case CEM_LDR_LUM_BASE_PLUS_OFS: |
| { |
| int l0 = (v0 >> 2) | (v1 & 0xc0); |
| int l1 = l0 + (v1 & 0x3f); |
| |
| if (l1 > 0xFF) |
| l1 = 0xFF; |
| |
| e0_r = l0; e1_r = l1; |
| e0_g = l0; e1_g = l1; |
| e0_b = l0; e1_b = l1; |
| e0_a = 0xFF; e1_a = 0xFF; |
| break; |
| } |
| case CEM_LDR_LUM_ALPHA_DIRECT: |
| { |
| int v2 = pE[2], v3 = pE[3]; |
| |
| e0_r = v0; e1_r = v1; |
| e0_g = v0; e1_g = v1; |
| e0_b = v0; e1_b = v1; |
| e0_a = v2; e1_a = v3; |
| break; |
| } |
| case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS: |
| { |
| int v2 = pE[2], v3 = pE[3]; |
| |
| bit_transfer_signed(v1, v0); |
| bit_transfer_signed(v3, v2); |
| |
| e0_r = v0; e1_r = v0 + v1; |
| e0_g = v0; e1_g = v0 + v1; |
| e0_b = v0; e1_b = v0 + v1; |
| e0_a = v2; e1_a = v2 + v3; |
| |
| for (uint32_t c = 0; c < 4; c++) |
| { |
| pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); |
| pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); |
| } |
| |
| break; |
| } |
| case CEM_LDR_RGB_BASE_SCALE: |
| { |
| int v2 = pE[2], v3 = pE[3]; |
| |
| e0_r = (v0 * v3) >> 8; e1_r = v0; |
| e0_g = (v1 * v3) >> 8; e1_g = v1; |
| e0_b = (v2 * v3) >> 8; e1_b = v2; |
| e0_a = 0xFF; e1_a = 0xFF; |
| |
| break; |
| } |
| case CEM_LDR_RGB_DIRECT: |
| { |
| int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; |
| |
| if ((v1 + v3 + v5) >= (v0 + v2 + v4)) |
| { |
| e0_r = v0; e1_r = v1; |
| e0_g = v2; e1_g = v3; |
| e0_b = v4; e1_b = v5; |
| e0_a = 0xFF; e1_a = 0xFF; |
| } |
| else |
| { |
| blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a); |
| blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a); |
| } |
| |
| break; |
| } |
| case CEM_LDR_RGB_BASE_PLUS_OFFSET: |
| { |
| int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; |
| |
| bit_transfer_signed(v1, v0); |
| bit_transfer_signed(v3, v2); |
| bit_transfer_signed(v5, v4); |
| |
| if ((v1 + v3 + v5) >= 0) |
| { |
| e0_r = v0; e1_r = v0 + v1; |
| e0_g = v2; e1_g = v2 + v3; |
| e0_b = v4; e1_b = v4 + v5; |
| e0_a = 0xFF; e1_a = 0xFF; |
| } |
| else |
| { |
| blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a); |
| blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a); |
| } |
| |
| for (uint32_t c = 0; c < 4; c++) |
| { |
| pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); |
| pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); |
| } |
| |
| break; |
| } |
| case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: |
| { |
| int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; |
| |
| e0_r = (v0 * v3) >> 8; e1_r = v0; |
| e0_g = (v1 * v3) >> 8; e1_g = v1; |
| e0_b = (v2 * v3) >> 8; e1_b = v2; |
| e0_a = v4; e1_a = v5; |
| |
| break; |
| } |
| case CEM_LDR_RGBA_DIRECT: |
| { |
| int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7]; |
| |
| if ((v1 + v3 + v5) >= (v0 + v2 + v4)) |
| { |
| e0_r = v0; e1_r = v1; |
| e0_g = v2; e1_g = v3; |
| e0_b = v4; e1_b = v5; |
| e0_a = v6; e1_a = v7; |
| } |
| else |
| { |
| blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a); |
| blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a); |
| } |
| |
| break; |
| } |
| case CEM_LDR_RGBA_BASE_PLUS_OFFSET: |
| { |
| int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7]; |
| |
| bit_transfer_signed(v1, v0); |
| bit_transfer_signed(v3, v2); |
| bit_transfer_signed(v5, v4); |
| bit_transfer_signed(v7, v6); |
| |
| if ((v1 + v3 + v5) >= 0) |
| { |
| e0_r = v0; e1_r = v0 + v1; |
| e0_g = v2; e1_g = v2 + v3; |
| e0_b = v4; e1_b = v4 + v5; |
| e0_a = v6; e1_a = v6 + v7; |
| } |
| else |
| { |
| blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a); |
| blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a); |
| } |
| |
| for (uint32_t c = 0; c < 4; c++) |
| { |
| pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255); |
| pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255); |
| } |
| |
| break; |
| } |
| case CEM_HDR_LUM_LARGE_RANGE: |
| { |
| int y0, y1; |
| if (v1 >= v0) |
| { |
| y0 = (v0 << 4); |
| y1 = (v1 << 4); |
| } |
| else |
| { |
| y0 = (v1 << 4) + 8; |
| y1 = (v0 << 4) - 8; |
| } |
| |
| e0_r = y0; e1_r = y1; |
| e0_g = y0; e1_g = y1; |
| e0_b = y0; e1_b = y1; |
| e0_a = 0x780; e1_a = 0x780; |
| |
| break; |
| } |
| case CEM_HDR_LUM_SMALL_RANGE: |
| { |
| int y0, y1, d; |
| |
| if ((v0 & 0x80) != 0) |
| { |
| y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2); |
| d = (v1 & 0x1F) << 2; |
| } |
| else |
| { |
| y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1); |
| d = (v1 & 0x0F) << 1; |
| } |
| |
| y1 = y0 + d; |
| if (y1 > 0xFFF) |
| y1 = 0xFFF; |
| |
| e0_r = y0; e1_r = y1; |
| e0_g = y0; e1_g = y1; |
| e0_b = y0; e1_b = y1; |
| e0_a = 0x780; e1_a = 0x780; |
| |
| break; |
| } |
| case CEM_HDR_RGB_BASE_SCALE: |
| { |
| int v2 = pE[2], v3 = pE[3]; |
| |
| int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4); |
| |
| int majcomp, mode; |
| if ((modeval & 0xC) != 0xC) |
| { |
| majcomp = modeval >> 2; |
| mode = modeval & 3; |
| } |
| else if (modeval != 0xF) |
| { |
| majcomp = modeval & 3; |
| mode = 4; |
| } |
| else |
| { |
| majcomp = 0; |
| mode = 5; |
| } |
| |
| int red = v0 & 0x3f; |
| int green = v1 & 0x1f; |
| int blue = v2 & 0x1f; |
| int scale = v3 & 0x1f; |
| |
| int x0 = (v1 >> 6) & 1; |
| int x1 = (v1 >> 5) & 1; |
| int x2 = (v2 >> 6) & 1; |
| int x3 = (v2 >> 5) & 1; |
| int x4 = (v3 >> 7) & 1; |
| int x5 = (v3 >> 6) & 1; |
| int x6 = (v3 >> 5) & 1; |
| |
| int ohm = 1 << mode; |
| if (ohm & 0x30) green |= x0 << 6; |
| if (ohm & 0x3A) green |= x1 << 5; |
| if (ohm & 0x30) blue |= x2 << 6; |
| if (ohm & 0x3A) blue |= x3 << 5; |
| if (ohm & 0x3D) scale |= x6 << 5; |
| if (ohm & 0x2D) scale |= x5 << 6; |
| if (ohm & 0x04) scale |= x4 << 7; |
| if (ohm & 0x3B) red |= x4 << 6; |
| if (ohm & 0x04) red |= x3 << 6; |
| if (ohm & 0x10) red |= x5 << 7; |
| if (ohm & 0x0F) red |= x2 << 7; |
| if (ohm & 0x05) red |= x1 << 8; |
| if (ohm & 0x0A) red |= x0 << 8; |
| if (ohm & 0x05) red |= x0 << 9; |
| if (ohm & 0x02) red |= x6 << 9; |
| if (ohm & 0x01) red |= x3 << 10; |
| if (ohm & 0x02) red |= x5 << 10; |
| |
| static const int s_shamts[6] = { 1,1,2,3,4,5 }; |
| |
| const int shamt = s_shamts[mode]; |
| red <<= shamt; |
| green <<= shamt; |
| blue <<= shamt; |
| scale <<= shamt; |
| |
| if (mode != 5) |
| { |
| green = red - green; |
| blue = red - blue; |
| } |
| |
| if (majcomp == 1) |
| std::swap(red, green); |
| |
| if (majcomp == 2) |
| std::swap(red, blue); |
| |
| e1_r = clamp(red, 0, 0xFFF); |
| e1_g = clamp(green, 0, 0xFFF); |
| e1_b = clamp(blue, 0, 0xFFF); |
| e1_a = 0x780; |
| |
| e0_r = clamp(red - scale, 0, 0xFFF); |
| e0_g = clamp(green - scale, 0, 0xFFF); |
| e0_b = clamp(blue - scale, 0, 0xFFF); |
| e0_a = 0x780; |
| |
| break; |
| } |
| case CEM_HDR_RGB_HDR_ALPHA: |
| case CEM_HDR_RGB_LDR_ALPHA: |
| case CEM_HDR_RGB: |
| { |
| int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5]; |
| |
| int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6); |
| |
| e0_a = 0x780; |
| e1_a = 0x780; |
| |
| if (majcomp == 3) |
| { |
| e0_r = v0 << 4; |
| e0_g = v2 << 4; |
| e0_b = (v4 & 0x7f) << 5; |
| |
| e1_r = v1 << 4; |
| e1_g = v3 << 4; |
| e1_b = (v5 & 0x7f) << 5; |
| } |
| else |
| { |
| int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5); |
| int va = v0 | ((v1 & 0x40) << 2); |
| int vb0 = v2 & 0x3f; |
| int vb1 = v3 & 0x3f; |
| int vc = v1 & 0x3f; |
| int vd0 = v4 & 0x7f; |
| int vd1 = v5 & 0x7f; |
| |
| static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 }; |
| vd0 = sign_extend(vd0, s_dbitstab[mode]); |
| vd1 = sign_extend(vd1, s_dbitstab[mode]); |
| |
| int x0 = (v2 >> 6) & 1; |
| int x1 = (v3 >> 6) & 1; |
| int x2 = (v4 >> 6) & 1; |
| int x3 = (v5 >> 6) & 1; |
| int x4 = (v4 >> 5) & 1; |
| int x5 = (v5 >> 5) & 1; |
| |
| int ohm = 1 << mode; |
| if (ohm & 0xA4) va |= x0 << 9; |
| if (ohm & 0x08) va |= x2 << 9; |
| if (ohm & 0x50) va |= x4 << 9; |
| if (ohm & 0x50) va |= x5 << 10; |
| if (ohm & 0xA0) va |= x1 << 10; |
| if (ohm & 0xC0) va |= x2 << 11; |
| if (ohm & 0x04) vc |= x1 << 6; |
| if (ohm & 0xE8) vc |= x3 << 6; |
| if (ohm & 0x20) vc |= x2 << 7; |
| if (ohm & 0x5B) vb0 |= x0 << 6; |
| if (ohm & 0x5B) vb1 |= x1 << 6; |
| if (ohm & 0x12) vb0 |= x2 << 7; |
| if (ohm & 0x12) vb1 |= x3 << 7; |
| |
| int shamt = (mode >> 1) ^ 3; |
| va = (uint32_t)va << shamt; |
| vb0 = (uint32_t)vb0 << shamt; |
| vb1 = (uint32_t)vb1 << shamt; |
| vc = (uint32_t)vc << shamt; |
| vd0 = (uint32_t)vd0 << shamt; |
| vd1 = (uint32_t)vd1 << shamt; |
| |
| e1_r = clamp(va, 0, 0xFFF); |
| e1_g = clamp(va - vb0, 0, 0xFFF); |
| e1_b = clamp(va - vb1, 0, 0xFFF); |
| |
| e0_r = clamp(va - vc, 0, 0xFFF); |
| e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF); |
| e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF); |
| |
| if (majcomp == 1) |
| { |
| std::swap(e0_r, e0_g); |
| std::swap(e1_r, e1_g); |
| } |
| else if (majcomp == 2) |
| { |
| std::swap(e0_r, e0_b); |
| std::swap(e1_r, e1_b); |
| } |
| } |
| |
| if (cem_index == CEM_HDR_RGB_LDR_ALPHA) |
| { |
| int v6 = pE[6], v7 = pE[7]; |
| |
| e0_a = v6; |
| e1_a = v7; |
| } |
| else if (cem_index == CEM_HDR_RGB_HDR_ALPHA) |
| { |
| int v6 = pE[6], v7 = pE[7]; |
| |
| // Extract mode bits |
| int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2); |
| v6 &= 0x7F; |
| v7 &= 0x7F; |
| |
| if (mode == 3) |
| { |
| e0_a = v6 << 5; |
| e1_a = v7 << 5; |
| } |
| else |
| { |
| v6 |= (v7 << (mode + 1)) & 0x780; |
| v7 &= (0x3F >> mode); |
| v7 ^= (0x20 >> mode); |
| v7 -= (0x20 >> mode); |
| v6 <<= (4 - mode); |
| v7 <<= (4 - mode); |
| |
| v7 += v6; |
| v7 = clamp(v7, 0, 0xFFF); |
| e0_a = v6; |
| e1_a = v7; |
| } |
| } |
| |
| break; |
| } |
| default: |
| { |
| assert(0); |
| for (uint32_t c = 0; c < 4; c++) |
| { |
| pEndpoints[c][0] = 0; |
| pEndpoints[c][1] = 0; |
| } |
| break; |
| } |
| } |
| } |
| |
| static inline bool is_half_inf_or_nan(half_float v) |
| { |
| return get_bits(v, 10, 14) == 31; |
| } |
| |
| // This float->half conversion matches how "F32TO16" works on Intel GPU's. |
| half_float float_to_half(float val, bool toward_zero) |
| { |
| union { float f; int32_t i; uint32_t u; } fi = { val }; |
| const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1; |
| int s = flt_s, e = 0, m = 0; |
| |
| // inf/NaN |
| if (flt_e == 0xff) |
| { |
| e = 31; |
| if (flt_m != 0) // NaN |
| m = 1; |
| } |
| // not zero or denormal |
| else if (flt_e != 0) |
| { |
| int new_exp = flt_e - 127; |
| if (new_exp > 15) |
| e = 31; |
| else if (new_exp < -14) |
| { |
| if (toward_zero) |
| m = (int)truncf((1 << 24) * fabsf(fi.f)); |
| else |
| m = lrintf((1 << 24) * fabsf(fi.f)); |
| } |
| else |
| { |
| e = new_exp + 15; |
| if (toward_zero) |
| m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13))); |
| else |
| m = lrintf((float)flt_m * (1.0f / (float)(1 << 13))); |
| } |
| } |
| |
| assert((0 <= m) && (m <= 1024)); |
| if (m == 1024) |
| { |
| e++; |
| m = 0; |
| } |
| |
| assert((s >= 0) && (s <= 1)); |
| assert((e >= 0) && (e <= 31)); |
| assert((m >= 0) && (m <= 1023)); |
| |
| half_float result = (half_float)((s << 15) | (e << 10) | m); |
| return result; |
| } |
| |
| float half_to_float(half_float hval) |
| { |
| union { float f; uint32_t u; } x = { 0 }; |
| |
| uint32_t s = ((uint32_t)hval >> 15) & 1; |
| uint32_t e = ((uint32_t)hval >> 10) & 0x1F; |
| uint32_t m = (uint32_t)hval & 0x3FF; |
| |
| if (!e) |
| { |
| if (!m) |
| { |
| // +- 0 |
| x.u = s << 31; |
| return x.f; |
| } |
| else |
| { |
| // denormalized |
| while (!(m & 0x00000400)) |
| { |
| m <<= 1; |
| --e; |
| } |
| |
| ++e; |
| m &= ~0x00000400; |
| } |
| } |
| else if (e == 31) |
| { |
| if (m == 0) |
| { |
| // +/- INF |
| x.u = (s << 31) | 0x7f800000; |
| return x.f; |
| } |
| else |
| { |
| // +/- NaN |
| x.u = (s << 31) | 0x7f800000 | (m << 13); |
| return x.f; |
| } |
| } |
| |
| e = e + (127 - 15); |
| m = m << 13; |
| |
| assert(s <= 1); |
| assert(m <= 0x7FFFFF); |
| assert(e <= 255); |
| |
| x.u = m | (e << 23) | (s << 31); |
| return x.f; |
| } |
| |
| static inline half_float qlog16_to_half(int k) |
| { |
| assert((k >= 0) && (k <= 0xFFFF)); |
| |
| int E = (k & 0xF800) >> 11; |
| int M = k & 0x7FF; |
| |
| int Mt; |
| if (M < 512) |
| Mt = 3 * M; |
| else if (M >= 1536) |
| Mt = 5 * M - 2048; |
| else |
| Mt = 4 * M - 512; |
| |
| return (half_float)((E << 10) + (Mt >> 3)); |
| } |
| |
| // See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt |
| const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31; |
| const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS); |
| const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS); |
| const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1); |
| //const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP)); |
| const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS)); |
| |
| void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b) |
| { |
| int x = packed & 511; |
| int y = (packed >> 9) & 511; |
| int z = (packed >> 18) & 511; |
| int w = (packed >> 27) & 31; |
| |
| const float scale = powf(2.0f, static_cast<float>(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS)); |
| |
| r = x * scale; |
| g = y * scale; |
| b = z * scale; |
| } |
| |
| // floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases. |
| static inline int floor_log2(float x) |
| { |
| union float754 |
| { |
| unsigned int raw; |
| float value; |
| }; |
| |
| float754 f; |
| f.value = x; |
| // Extract float exponent |
| return ((f.raw >> 23) & 0xFF) - 127; |
| } |
| |
| static inline int maximumi(int a, int b) { return (a > b) ? a : b; } |
| static inline float maximumf(float a, float b) { return (a > b) ? a : b; } |
| |
| uint32_t pack_rgb9e5(float r, float g, float b) |
| { |
| r = clampf(r, 0.0f, MAX_RGB9E5); |
| g = clampf(g, 0.0f, MAX_RGB9E5); |
| b = clampf(b, 0.0f, MAX_RGB9E5); |
| |
| float maxrgb = maximumf(maximumf(r, g), b); |
| int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS; |
| assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP)); |
| |
| float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS)); |
| |
| int maxm = (int)floorf((maxrgb / denom) + 0.5f); |
| if (maxm == (MAX_RGB9E5_MANTISSA + 1)) |
| { |
| denom *= 2; |
| exp_shared += 1; |
| assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP); |
| } |
| else |
| { |
| assert(maxm <= MAX_RGB9E5_MANTISSA); |
| } |
| |
| int rm = (int)floorf((r / denom) + 0.5f); |
| int gm = (int)floorf((g / denom) + 0.5f); |
| int bm = (int)floorf((b / denom) + 0.5f); |
| |
| assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA)); |
| assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA)); |
| assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA)); |
| |
| return rm | (gm << 9) | (bm << 18) | (exp_shared << 27); |
| } |
| |
| static inline int clz17(uint32_t x) |
| { |
| assert(x <= 0x1FFFF); |
| x &= 0x1FFFF; |
| |
| if (!x) |
| return 17; |
| |
| uint32_t n = 0; |
| while ((x & 0x10000) == 0) |
| { |
| x <<= 1u; |
| n++; |
| } |
| |
| return n; |
| } |
| |
| static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb) |
| { |
| int lz = clz17(Cr | Cg | Cb | 1); |
| if (Cr == 65535) { Cr = 65536; lz = 0; } |
| if (Cg == 65535) { Cg = 65536; lz = 0; } |
| if (Cb == 65535) { Cb = 65536; lz = 0; } |
| Cr <<= lz; Cg <<= lz; Cb <<= lz; |
| Cr = (Cr >> 8) & 0x1FF; |
| Cg = (Cg >> 8) & 0x1FF; |
| Cb = (Cb >> 8) & 0x1FF; |
| uint32_t exponent = 16 - lz; |
| uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr; |
| return texel; |
| } |
| |
| static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb) |
| { |
| if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff; |
| if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff; |
| if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff; |
| int Re = (Cr >> 10) & 0x1F; |
| int Ge = (Cg >> 10) & 0x1F; |
| int Be = (Cb >> 10) & 0x1F; |
| int Rex = (Re == 0) ? 1 : Re; |
| int Gex = (Ge == 0) ? 1 : Ge; |
| int Bex = (Be == 0) ? 1 : Be; |
| int Xm = ((Cr | Cg | Cb) & 0x200) >> 9; |
| int Xe = Re | Ge | Be; |
| uint32_t rshift, gshift, bshift, expo; |
| |
| if (Xe == 0) |
| { |
| expo = rshift = gshift = bshift = Xm; |
| } |
| else if (Re >= Ge && Re >= Be) |
| { |
| expo = Rex + 1; |
| rshift = 2; |
| gshift = Rex - Gex + 2; |
| bshift = Rex - Bex + 2; |
| } |
| else if (Ge >= Be) |
| { |
| expo = Gex + 1; |
| rshift = Gex - Rex + 2; |
| gshift = 2; |
| bshift = Gex - Bex + 2; |
| } |
| else |
| { |
| expo = Bex + 1; |
| rshift = Bex - Rex + 2; |
| gshift = Bex - Gex + 2; |
| bshift = 2; |
| } |
| |
| int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400); |
| int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400); |
| int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400); |
| Rm = (Rm >> rshift) & 0x1FF; |
| Gm = (Gm >> gshift) & 0x1FF; |
| Bm = (Bm >> bshift) & 0x1FF; |
| |
| uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0); |
| return texel; |
| } |
| |
| // Important: pPixels is either 32-bit/texel or 64-bit/texel. |
| bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode) |
| { |
| assert(is_valid_block_size(blk_width, blk_height)); |
| |
| assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()); |
| if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size()) |
| return false; |
| |
| const uint32_t num_blk_pixels = blk_width * blk_height; |
| |
| // Write block error color |
| if (dec_mode == cDecodeModeHDR16) |
| { |
| // NaN's |
| memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4); |
| } |
| else if (dec_mode == cDecodeModeRGB9E5) |
| { |
| const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f); |
| |
| for (uint32_t i = 0; i < num_blk_pixels; i++) |
| ((uint32_t*)pPixels)[i] = purple_9e5; |
| } |
| else |
| { |
| for (uint32_t i = 0; i < num_blk_pixels; i++) |
| ((uint32_t*)pPixels)[i] = 0xFFFF00FF; |
| } |
| |
| if (log_blk.m_error_flag) |
| { |
| // Should this return false? It's not an invalid logical block config, though. |
| return false; |
| } |
| |
| // Handle solid color blocks |
| if (log_blk.m_solid_color_flag_ldr) |
| { |
| // LDR solid block |
| if (dec_mode == cDecodeModeHDR16) |
| { |
| // Convert LDR pixels to half-float |
| half_float h[4]; |
| for (uint32_t c = 0; c < 4; c++) |
| h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true); |
| |
| for (uint32_t i = 0; i < num_blk_pixels; i++) |
| memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4); |
| } |
| else if (dec_mode == cDecodeModeRGB9E5) |
| { |
| float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f)); |
| float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f)); |
| float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f)); |
| |
| const uint32_t packed = pack_rgb9e5(r, g, b); |
| |
| for (uint32_t i = 0; i < num_blk_pixels; i++) |
| ((uint32_t*)pPixels)[i] = packed; |
| } |
| else |
| { |
| // Convert LDR pixels to 8-bits |
| for (uint32_t i = 0; i < num_blk_pixels; i++) |
| for (uint32_t c = 0; c < 4; c++) |
| ((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8); |
| } |
| |
| return true; |
| } |
| else if (log_blk.m_solid_color_flag_hdr) |
| { |
| // HDR solid block, decode mode must be half-float or RGB9E5 |
| if (dec_mode == cDecodeModeHDR16) |
| { |
| for (uint32_t i = 0; i < num_blk_pixels; i++) |
| memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4); |
| } |
| else if (dec_mode == cDecodeModeRGB9E5) |
| { |
| float r = half_to_float(log_blk.m_solid_color[0]); |
| float g = half_to_float(log_blk.m_solid_color[1]); |
| float b = half_to_float(log_blk.m_solid_color[2]); |
| |
| const uint32_t packed = pack_rgb9e5(r, g, b); |
| |
| for (uint32_t i = 0; i < num_blk_pixels; i++) |
| ((uint32_t*)pPixels)[i] = packed; |
| } |
| else |
| { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // Sanity check block's config |
| if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2)) |
| return false; |
| if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height)) |
| return false; |
| |
| if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE)) |
| return false; |
| if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk. |