blob: 9b75b84117fc0396f322c8aa27b05b9a34e0971a [file] [log] [blame]
// basisu_uastc_enc.cpp
// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "basisu_uastc_enc.h"
#include "basisu_astc_decomp.h"
#include "basisu_gpu_texture.h"
#include "basisu_bc7enc.h"
#ifdef _DEBUG
// When BASISU_VALIDATE_UASTC_ENC is 1, we pack and unpack to/from UASTC and ASTC, then validate that each codec returns the exact same results. This is slower.
#define BASISU_VALIDATE_UASTC_ENC 1
#endif
#define BASISU_SUPPORT_FORCE_MODE 0
using namespace basist;
namespace basisu
{
const uint32_t MAX_ENCODE_RESULTS = 512;
#if BASISU_VALIDATE_UASTC_ENC
static void validate_func(bool condition, int line)
{
if (!condition)
{
fprintf(stderr, "basisu_uastc_enc: Internal validation failed on line %u!\n", line);
}
}
#define VALIDATE(c) validate_func(c, __LINE__);
#else
#define VALIDATE(c)
#endif
enum dxt_constants
{
cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U,
cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U,
};
struct dxt1_block
{
enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
uint8_t m_low_color[cTotalEndpointBytes];
uint8_t m_high_color[cTotalEndpointBytes];
uint8_t m_selectors[cTotalSelectorBytes];
inline void clear() { basisu::clear_obj(*this); }
inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast<uint8_t>(c & 0xFF); m_low_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast<uint8_t>(c & 0xFF); m_high_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits))& cDXT1SelectorMask; }
inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); }
static uint16_t pack_color(const color_rgba& color, bool scaled, uint32_t bias = 127U)
{
uint32_t r = color.r, g = color.g, b = color.b;
if (scaled)
{
r = (r * 31U + bias) / 255U;
g = (g * 63U + bias) / 255U;
b = (b * 31U + bias) / 255U;
}
return static_cast<uint16_t>(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U));
}
static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); }
};
#define UASTC_WRITE_MODE_DESCS 0
static inline void uastc_write_bits(uint8_t* pBuf, uint32_t& bit_offset, uint64_t code, uint32_t codesize, const char* pDesc)
{
(void)pDesc;
#if UASTC_WRITE_MODE_DESCS
if (pDesc)
printf("%s: %u %u\n", pDesc, bit_offset, codesize);
#endif
assert((codesize == 64) || (code < (1ULL << codesize)));
while (codesize)
{
uint32_t byte_bit_offset = bit_offset & 7;
uint32_t bits_to_write = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
pBuf[bit_offset >> 3] |= (code << byte_bit_offset);
code >>= bits_to_write;
codesize -= bits_to_write;
bit_offset += bits_to_write;
}
}
void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1)
{
if ((g_uastc_mode_has_alpha[result.m_uastc_mode]) && (result.m_uastc_mode != UASTC_MODE_INDEX_SOLID_COLOR))
{
assert(etc_eac_a8_blk.m_multiplier >= 1);
}
uint8_t buf[32];
memset(buf, 0, sizeof(buf));
uint32_t block_bit_offset = 0;
#if UASTC_WRITE_MODE_DESCS
printf("**** Mode: %u\n", result.m_uastc_mode);
#endif
uastc_write_bits(buf, block_bit_offset, g_uastc_mode_huff_codes[result.m_uastc_mode][0], g_uastc_mode_huff_codes[result.m_uastc_mode][1], "mode");
if (result.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR)
{
uastc_write_bits(buf, block_bit_offset, result.m_solid_color.r, 8, "R");
uastc_write_bits(buf, block_bit_offset, result.m_solid_color.g, 8, "G");
uastc_write_bits(buf, block_bit_offset, result.m_solid_color.b, 8, "B");
uastc_write_bits(buf, block_bit_offset, result.m_solid_color.a, 8, "A");
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D");
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I");
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_selector(0, 0), 2, "ETC1S");
uint32_t r, g, b;
if (etc1_blk.get_diff_bit())
etc_block::unpack_color5(r, g, b, etc1_blk.get_base5_color(), false);
else
etc_block::unpack_color4(r, g, b, etc1_blk.get_base4_color(0), false);
uastc_write_bits(buf, block_bit_offset, r, 5, "ETC1R");
uastc_write_bits(buf, block_bit_offset, g, 5, "ETC1G");
uastc_write_bits(buf, block_bit_offset, b, 5, "ETC1B");
memcpy(&blk, buf, sizeof(blk));
return;
}
if (g_uastc_mode_has_bc1_hint0[result.m_uastc_mode])
uastc_write_bits(buf, block_bit_offset, bc1_hint0, 1, "BC1H0");
else
{
assert(bc1_hint0 == false);
}
if (g_uastc_mode_has_bc1_hint1[result.m_uastc_mode])
uastc_write_bits(buf, block_bit_offset, bc1_hint1, 1, "BC1H1");
else
{
assert(bc1_hint1 == false);
}
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_flip_bit(), 1, "ETC1F");
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D");
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I0");
uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(1), 3, "ETC1I1");
if (g_uastc_mode_has_etc1_bias[result.m_uastc_mode])
uastc_write_bits(buf, block_bit_offset, etc1_bias, 5, "ETC1BIAS");
else
{
assert(etc1_bias == 0);
}
if (g_uastc_mode_has_alpha[result.m_uastc_mode])
{
const uint32_t etc2_hints = etc_eac_a8_blk.m_table | (etc_eac_a8_blk.m_multiplier << 4);
assert(etc2_hints > 0 && etc2_hints <= 0xFF);
uastc_write_bits(buf, block_bit_offset, etc2_hints, 8, "ETC2TM");
}
uint32_t subsets = 1;
switch (result.m_uastc_mode)
{
case 2:
case 4:
case 7:
case 9:
case 16:
uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 5, "PAT");
subsets = 2;
break;
case 3:
uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 4, "PAT");
subsets = 3;
break;
default:
break;
}
#ifdef _DEBUG
uint32_t part_seed = 0;
switch (result.m_uastc_mode)
{
case 2:
case 4:
case 9:
case 16:
part_seed = g_astc_bc7_common_partitions2[result.m_common_pattern].m_astc;
break;
case 3:
part_seed = g_astc_bc7_common_partitions3[result.m_common_pattern].m_astc;
break;
case 7:
part_seed = g_bc7_3_astc2_common_partitions[result.m_common_pattern].m_astc2;
break;
default:
break;
}
#endif
uint32_t total_planes = 1;
switch (result.m_uastc_mode)
{
case 6:
case 11:
case 13:
uastc_write_bits(buf, block_bit_offset, result.m_astc.m_ccs, 2, "COMPSEL");
total_planes = 2;
break;
case 17:
// CCS field is always 3 for dual plane LA.
assert(result.m_astc.m_ccs == 3);
total_planes = 2;
break;
default:
break;
}
uint8_t weights[32];
memcpy(weights, result.m_astc.m_weights, 16 * total_planes);
uint8_t endpoints[18];
memcpy(endpoints, result.m_astc.m_endpoints, sizeof(endpoints));
const uint32_t total_comps = g_uastc_mode_comps[result.m_uastc_mode];
// LLAA
// LLAA LLAA
// LLAA LLAA LLAA
// RRGGBB
// RRGGBB RRGGBB
// RRGGBB RRGGBB RRGGBB
// RRGGBBAA
// RRGGBBAA RRGGBBAA
const uint32_t weight_bits = g_uastc_mode_weight_bits[result.m_uastc_mode];
const uint8_t* pPartition_pattern;
const uint8_t* pSubset_anchor_indices = basist::get_anchor_indices(subsets, result.m_uastc_mode, result.m_common_pattern, pPartition_pattern);
for (uint32_t plane_index = 0; plane_index < total_planes; plane_index++)
{
for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
{
const uint32_t anchor_index = pSubset_anchor_indices[subset_index];
#ifdef _DEBUG
if (subsets >= 2)
{
for (uint32_t i = 0; i < 16; i++)
{
const uint32_t part_index = astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true);
if (part_index == subset_index)
{
assert(anchor_index == i);
break;
}
}
}
else
{
assert(!anchor_index);
}
#endif
// Check anchor weight's MSB - if it's set then invert this subset's weights and swap the endpoints
if (weights[anchor_index * total_planes + plane_index] & (1 << (weight_bits - 1)))
{
for (uint32_t i = 0; i < 16; i++)
{
const uint32_t part_index = pPartition_pattern[i];
#ifdef _DEBUG
if (subsets >= 2)
{
assert(part_index == (uint32_t)astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true));
}
else
{
assert(!part_index);
}
#endif
if (part_index == subset_index)
weights[i * total_planes + plane_index] = ((1 << weight_bits) - 1) - weights[i * total_planes + plane_index];
}
if (total_planes == 2)
{
for (int c = 0; c < (int)total_comps; c++)
{
const uint32_t comp_plane = (total_comps == 2) ? c : ((c == result.m_astc.m_ccs) ? 1 : 0);
if (comp_plane == plane_index)
std::swap(endpoints[c * 2 + 0], endpoints[c * 2 + 1]);
}
}
else
{
for (uint32_t c = 0; c < total_comps; c++)
std::swap(endpoints[subset_index * total_comps * 2 + c * 2 + 0], endpoints[subset_index * total_comps * 2 + c * 2 + 1]);
}
}
} // subset_index
} // plane_index
const uint32_t total_values = total_comps * 2 * subsets;
const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[result.m_uastc_mode];
uint32_t bit_values[18];
uint32_t tq_values[8];
uint32_t total_tq_values = 0;
uint32_t tq_accum = 0;
uint32_t tq_mul = 1;
const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0];
const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1];
const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2];
for (uint32_t i = 0; i < total_values; i++)
{
uint32_t val = endpoints[i];
uint32_t bits = val & ((1 << ep_bits) - 1);
uint32_t tq = val >> ep_bits;
bit_values[i] = bits;
if (ep_trits)
{
assert(tq < 3);
tq_accum += tq * tq_mul;
tq_mul *= 3;
if (tq_mul == 243)
{
tq_values[total_tq_values++] = tq_accum;
tq_accum = 0;
tq_mul = 1;
}
}
else if (ep_quints)
{
assert(tq < 5);
tq_accum += tq * tq_mul;
tq_mul *= 5;
if (tq_mul == 125)
{
tq_values[total_tq_values++] = tq_accum;
tq_accum = 0;
tq_mul = 1;
}
}
}
uint32_t total_endpoint_bits = 0;
for (uint32_t i = 0; i < total_tq_values; i++)
{
const uint32_t num_bits = ep_trits ? 8 : 7;
uastc_write_bits(buf, block_bit_offset, tq_values[i], num_bits, "ETQ");
total_endpoint_bits += num_bits;
}
if (tq_mul > 1)
{
uint32_t num_bits;
if (ep_trits)
{
if (tq_mul == 3)
num_bits = 2;
else if (tq_mul == 9)
num_bits = 4;
else if (tq_mul == 27)
num_bits = 5;
else //if (tq_mul == 81)
num_bits = 7;
}
else
{
if (tq_mul == 5)
num_bits = 3;
else //if (tq_mul == 25)
num_bits = 5;
}
uastc_write_bits(buf, block_bit_offset, tq_accum, num_bits, "ETQ");
total_endpoint_bits += num_bits;
}
for (uint32_t i = 0; i < total_values; i++)
{
uastc_write_bits(buf, block_bit_offset, bit_values[i], ep_bits, "EBITS");
total_endpoint_bits += ep_bits;
}
#if UASTC_WRITE_MODE_DESCS
uint32_t weight_start = block_bit_offset;
#endif
uint32_t total_weight_bits = 0;
const uint32_t plane_shift = (total_planes == 2) ? 1 : 0;
for (uint32_t i = 0; i < 16 * total_planes; i++)
{
uint32_t numbits = weight_bits;
for (uint32_t s = 0; s < subsets; s++)
{
if (pSubset_anchor_indices[s] == (i >> plane_shift))
{
numbits--;
break;
}
}
uastc_write_bits(buf, block_bit_offset, weights[i], numbits, nullptr);
total_weight_bits += numbits;
}
#if UASTC_WRITE_MODE_DESCS
printf("WEIGHTS: %u %u\n", weight_start, total_weight_bits);
#endif
assert(block_bit_offset <= 128);
memcpy(&blk, buf, sizeof(blk));
#if UASTC_WRITE_MODE_DESCS
printf("Total bits: %u, endpoint bits: %u, weight bits: %u\n", block_bit_offset, total_endpoint_bits, total_weight_bits);
#endif
}
// MODE 0
// 0. DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 19 (192) MODE6 RGB
// 18. DualPlane: 0, WeightRange: 11 (32), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 11 (32) MODE6 RGB
static void astc_mode0_or_18(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, const uint8_t *pForce_selectors = nullptr)
{
const uint32_t endpoint_range = (mode == 18) ? 11 : 19;
const uint32_t weight_range = (mode == 18) ? 11 : 8;
color_cell_compressor_params ccell_params;
memset(&ccell_params, 0, sizeof(ccell_params));
ccell_params.m_num_pixels = 16;
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
ccell_params.m_num_selector_weights = (mode == 18) ? 32 : 16;
ccell_params.m_pSelector_weights = (mode == 18) ? g_astc_weights5 : g_astc_weights4;
ccell_params.m_pSelector_weightsx = (mode == 18) ? (const bc7enc_vec4F*)g_astc_weights5x : (const bc7enc_vec4F*)g_astc_weights4x;
ccell_params.m_astc_endpoint_range = endpoint_range;
ccell_params.m_weights[0] = 1;
ccell_params.m_weights[1] = 1;
ccell_params.m_weights[2] = 1;
ccell_params.m_weights[3] = 1;
ccell_params.m_pForce_selectors = pForce_selectors;
color_cell_compressor_results ccell_results;
uint8_t ccell_result_selectors[16];
uint8_t ccell_result_selectors_temp[16];
memset(&ccell_results, 0, sizeof(ccell_results));
ccell_results.m_pSelectors = &ccell_result_selectors[0];
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
// ASTC
astc_block_desc astc_results;
memset(&astc_results, 0, sizeof(astc_results));
astc_results.m_dual_plane = false;
astc_results.m_weight_range = weight_range;// (mode == 18) ? 11 : 8;
astc_results.m_ccs = 0;
astc_results.m_subsets = 1;
astc_results.m_partition_seed = 0;
astc_results.m_cem = 8;
astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
bool invert = false;
if (pForce_selectors == nullptr)
{
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
invert = true;
}
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
if (invert)
astc_results.m_weights[x + y * 4] = ((mode == 18) ? 31 : 15) - astc_results.m_weights[x + y * 4];
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = mode;
pResults[total_results].m_common_pattern = 0;
pResults[total_results].m_astc = astc_results;
pResults[total_results].m_astc_err = part_err;
total_results++;
}
}
// MODE 1
// 1-subset, 2-bit indices, 8-bit endpoints, BC7 mode 3
// DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) MODE3 or MODE5 RGB
static void astc_mode1(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
{
color_cell_compressor_params ccell_params;
memset(&ccell_params, 0, sizeof(ccell_params));
ccell_params.m_num_pixels = 16;
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
ccell_params.m_num_selector_weights = 4;
ccell_params.m_pSelector_weights = g_bc7_weights2;
ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
ccell_params.m_astc_endpoint_range = 20;
ccell_params.m_weights[0] = 1;
ccell_params.m_weights[1] = 1;
ccell_params.m_weights[2] = 1;
ccell_params.m_weights[3] = 1;
color_cell_compressor_results ccell_results;
uint8_t ccell_result_selectors[16];
uint8_t ccell_result_selectors_temp[16];
memset(&ccell_results, 0, sizeof(ccell_results));
ccell_results.m_pSelectors = &ccell_result_selectors[0];
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
// ASTC
astc_block_desc astc_results;
memset(&astc_results, 0, sizeof(astc_results));
astc_results.m_dual_plane = false;
astc_results.m_weight_range = 2;
astc_results.m_ccs = 0;
astc_results.m_subsets = 1;
astc_results.m_partition_seed = 0;
astc_results.m_cem = 8;
astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
const uint32_t range = 20;
bool invert = false;
int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
invert = true;
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
if (invert)
astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = 1;
pResults[total_results].m_common_pattern = 0;
pResults[total_results].m_astc = astc_results;
pResults[total_results].m_astc_err = part_err;
total_results++;
}
}
static uint32_t estimate_partition2(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], const uint32_t weights[4])
{
assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64);
uint64_t best_err = UINT64_MAX;
uint32_t best_common_pattern = 0;
for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++)
{
const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16];
color_quad_u8 subset_colors[2][16];
uint32_t subset_total_colors[2] = { 0, 0 };
for (uint32_t index = 0; index < 16; index++)
subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
uint64_t total_subset_err = 0;
for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++)
total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights);
if (total_subset_err < best_err)
{
best_err = total_subset_err;
best_common_pattern = common_pattern;
}
}
return best_common_pattern;
}
// MODE 2
// 2-subset, 3-bit indices, 4-bit endpoints, BC7 mode 1
// DualPlane: 0, WeightRange: 5 (8), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 8 (16) MODE1
static void astc_mode2(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
{
uint32_t first_common_pattern = 0;
uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2;
if (estimate_partition)
{
const uint32_t weights[4] = { 1, 1, 1, 1 };
first_common_pattern = estimate_partition2(8, 3, g_bc7_weights3, block, weights);
last_common_pattern = first_common_pattern + 1;
}
for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
{
const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
color_rgba part_pixels[2][16];
uint32_t part_pixel_index[4][4];
uint32_t num_part_pixels[2] = { 0, 0 };
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
part_pixel_index[y][x] = num_part_pixels[part];
part_pixels[part][num_part_pixels[part]++] = block[y][x];
}
}
color_cell_compressor_params ccell_params[2];
color_cell_compressor_results ccell_results[2];
uint8_t ccell_result_selectors[2][16];
uint8_t ccell_result_selectors_temp[2][16];
uint64_t total_part_err = 0;
for (uint32_t part = 0; part < 2; part++)
{
memset(&ccell_params[part], 0, sizeof(ccell_params[part]));
ccell_params[part].m_num_pixels = num_part_pixels[part];
ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0];
ccell_params[part].m_num_selector_weights = 8;
ccell_params[part].m_pSelector_weights = g_bc7_weights3;
ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x;
ccell_params[part].m_astc_endpoint_range = 8;
ccell_params[part].m_weights[0] = 1;
ccell_params[part].m_weights[1] = 1;
ccell_params[part].m_weights[2] = 1;
ccell_params[part].m_weights[3] = 1;
memset(&ccell_results[part], 0, sizeof(ccell_results[part]));
ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0];
ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0];
uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params);
total_part_err += part_err;
} // part
{
// ASTC
astc_block_desc astc_results;
memset(&astc_results, 0, sizeof(astc_results));
astc_results.m_dual_plane = false;
astc_results.m_weight_range = 5;
astc_results.m_ccs = 0;
astc_results.m_subsets = 2;
astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc;
astc_results.m_cem = 8;
uint32_t p0 = 0;
uint32_t p1 = 1;
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
std::swap(p0, p1);
astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0];
astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0];
astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1];
astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1];
astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2];
astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2];
const uint32_t range = 8;
bool invert[2] = { false, false };
int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
invert[0] = true;
}
astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0];
astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0];
astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1];
astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1];
astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2];
astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2];
s0 = g_astc_unquant[range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4 + 6]].m_unquant;
s1 = g_astc_unquant[range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5 + 6]].m_unquant;
if (s1 < s0)
{
std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]);
std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]);
std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]);
invert[1] = true;
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
uint32_t astc_part = bc7_part;
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
astc_part = 1 - astc_part;
if (invert[astc_part])
astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4];
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = 2;
pResults[total_results].m_common_pattern = common_pattern;
pResults[total_results].m_astc = astc_results;
pResults[total_results].m_astc_err = total_part_err;
total_results++;
}
}
} // common_pattern
}
// MODE 3
// 3-subsets, 2-bit indices, [0,11] endpoints, BC7 mode 2
// DualPlane: 0, WeightRange: 2 (4), Subsets: 3, CEM: 8 (RGB Direct ), EndpointRange: 7 (12) MODE2
static void astc_mode3(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
{
uint32_t first_common_pattern = 0;
uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS3;
if (estimate_partition)
{
uint64_t best_err = UINT64_MAX;
uint32_t best_common_pattern = 0;
const uint32_t weights[4] = { 1, 1, 1, 1 };
for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS3; common_pattern++)
{
const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7;
const uint8_t* pPartition = &g_bc7_partition3[bc7_pattern * 16];
color_quad_u8 subset_colors[3][16];
uint32_t subset_total_colors[3] = { 0, 0 };
for (uint32_t index = 0; index < 16; index++)
subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
uint64_t total_subset_err = 0;
for (uint32_t subset = 0; (subset < 3) && (total_subset_err < best_err); subset++)
total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights);
if (total_subset_err < best_err)
{
best_err = total_subset_err;
best_common_pattern = common_pattern;
}
}
first_common_pattern = best_common_pattern;
last_common_pattern = best_common_pattern + 1;
}
for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
{
const uint32_t endpoint_range = 7;
const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7;
color_rgba part_pixels[3][16];
uint32_t part_pixel_index[4][4];
uint32_t num_part_pixels[3] = { 0, 0, 0 };
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4];
part_pixel_index[y][x] = num_part_pixels[bc7_part];
part_pixels[bc7_part][num_part_pixels[bc7_part]++] = block[y][x];
}
}
color_cell_compressor_params ccell_params[3];
color_cell_compressor_results ccell_results[3];
uint8_t ccell_result_selectors[3][16];
uint8_t ccell_result_selectors_temp[3][16];
uint64_t total_part_err = 0;
for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++)
{
memset(&ccell_params[bc7_part], 0, sizeof(ccell_params[bc7_part]));
ccell_params[bc7_part].m_num_pixels = num_part_pixels[bc7_part];
ccell_params[bc7_part].m_pPixels = (color_quad_u8*)&part_pixels[bc7_part][0];
ccell_params[bc7_part].m_num_selector_weights = 4;
ccell_params[bc7_part].m_pSelector_weights = g_bc7_weights2;
ccell_params[bc7_part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
ccell_params[bc7_part].m_astc_endpoint_range = endpoint_range;
ccell_params[bc7_part].m_weights[0] = 1;
ccell_params[bc7_part].m_weights[1] = 1;
ccell_params[bc7_part].m_weights[2] = 1;
ccell_params[bc7_part].m_weights[3] = 1;
memset(&ccell_results[bc7_part], 0, sizeof(ccell_results[bc7_part]));
ccell_results[bc7_part].m_pSelectors = &ccell_result_selectors[bc7_part][0];
ccell_results[bc7_part].m_pSelectors_temp = &ccell_result_selectors_temp[bc7_part][0];
uint64_t part_err = color_cell_compression(255, &ccell_params[bc7_part], &ccell_results[bc7_part], &comp_params);
total_part_err += part_err;
} // part
{
// ASTC
astc_block_desc astc_results;
memset(&astc_results, 0, sizeof(astc_results));
astc_results.m_dual_plane = false;
astc_results.m_weight_range = 2;
astc_results.m_ccs = 0;
astc_results.m_subsets = 3;
astc_results.m_partition_seed = g_astc_bc7_common_partitions3[common_pattern].m_astc;
astc_results.m_cem = 8;
uint32_t astc_to_bc7_part[3]; // converts ASTC to BC7 partition index
const uint32_t perm = g_astc_bc7_common_partitions3[common_pattern].m_astc_to_bc7_perm;
astc_to_bc7_part[0] = g_astc_to_bc7_partition_index_perm_tables[perm][0];
astc_to_bc7_part[1] = g_astc_to_bc7_partition_index_perm_tables[perm][1];
astc_to_bc7_part[2] = g_astc_to_bc7_partition_index_perm_tables[perm][2];
bool invert_astc_part[3] = { false, false, false };
for (uint32_t astc_part = 0; astc_part < 3; astc_part++)
{
uint8_t* pEndpoints = &astc_results.m_endpoints[6 * astc_part];
pEndpoints[0] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[0];
pEndpoints[1] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[0];
pEndpoints[2] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[1];
pEndpoints[3] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[1];
pEndpoints[4] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[2];
pEndpoints[5] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[2];
int s0 = g_astc_unquant[endpoint_range][pEndpoints[0]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[2]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][pEndpoints[1]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[3]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(pEndpoints[0], pEndpoints[1]);
std::swap(pEndpoints[2], pEndpoints[3]);
std::swap(pEndpoints[4], pEndpoints[5]);
invert_astc_part[astc_part] = true;
}
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4];
astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
uint32_t astc_part = 0;
for (uint32_t i = 0; i < 3; i++)
{
if (astc_to_bc7_part[i] == bc7_part)
{
astc_part = i;
break;
}
}
if (invert_astc_part[astc_part])
astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = 3;
pResults[total_results].m_common_pattern = common_pattern;
pResults[total_results].m_astc = astc_results;
pResults[total_results].m_astc_err = total_part_err;
total_results++;
}
}
} // common_pattern
}
// MODE 4
// DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE3
static void astc_mode4(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
{
//const uint32_t weight_range = 2;
const uint32_t endpoint_range = 12;
uint32_t first_common_pattern = 0;
uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2;
if (estimate_partition)
{
const uint32_t weights[4] = { 1, 1, 1, 1 };
first_common_pattern = estimate_partition2(4, 3, g_bc7_weights2, block, weights);
last_common_pattern = first_common_pattern + 1;
}
for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
{
const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
color_rgba part_pixels[2][16];
uint32_t part_pixel_index[4][4];
uint32_t num_part_pixels[2] = { 0, 0 };
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
part_pixel_index[y][x] = num_part_pixels[part];
part_pixels[part][num_part_pixels[part]++] = block[y][x];
}
}
color_cell_compressor_params ccell_params[2];
color_cell_compressor_results ccell_results[2];
uint8_t ccell_result_selectors[2][16];
uint8_t ccell_result_selectors_temp[2][16];
uint64_t total_part_err = 0;
for (uint32_t part = 0; part < 2; part++)
{
memset(&ccell_params[part], 0, sizeof(ccell_params[part]));
ccell_params[part].m_num_pixels = num_part_pixels[part];
ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0];
ccell_params[part].m_num_selector_weights = 4;
ccell_params[part].m_pSelector_weights = g_bc7_weights2;
ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
ccell_params[part].m_astc_endpoint_range = endpoint_range;
ccell_params[part].m_weights[0] = 1;
ccell_params[part].m_weights[1] = 1;
ccell_params[part].m_weights[2] = 1;
ccell_params[part].m_weights[3] = 1;
memset(&ccell_results[part], 0, sizeof(ccell_results[part]));
ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0];
ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0];
uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params);
total_part_err += part_err;
} // part
// ASTC
astc_block_desc astc_results;
memset(&astc_results, 0, sizeof(astc_results));
astc_results.m_dual_plane = false;
astc_results.m_weight_range = 2;
astc_results.m_ccs = 0;
astc_results.m_subsets = 2;
astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc;
astc_results.m_cem = 8;
uint32_t p0 = 0;
uint32_t p1 = 1;
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
std::swap(p0, p1);
astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0];
astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0];
astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1];
astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1];
astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2];
astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2];
bool invert[2] = { false, false };
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
invert[0] = true;
}
astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0];
astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0];
astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1];
astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1];
astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2];
astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2];
s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4 + 6]].m_unquant;
s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5 + 6]].m_unquant;
if (s1 < s0)
{
std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]);
std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]);
std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]);
invert[1] = true;
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
uint32_t astc_part = bc7_part;
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
astc_part = 1 - astc_part;
if (invert[astc_part])
astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = 4;
pResults[total_results].m_common_pattern = common_pattern;
pResults[total_results].m_astc = astc_results;
pResults[total_results].m_astc_err = total_part_err;
total_results++;
}
} // common_pattern
}
// MODE 5
// DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) BC7 MODE 6 (or MODE 1 1-subset)
static void astc_mode5(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
{
const uint32_t weight_range = 5;
const uint32_t endpoint_range = 20;
color_cell_compressor_params ccell_params;
memset(&ccell_params, 0, sizeof(ccell_params));
ccell_params.m_num_pixels = 16;
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
ccell_params.m_num_selector_weights = 8;
ccell_params.m_pSelector_weights = g_bc7_weights3;
ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x;
ccell_params.m_astc_endpoint_range = endpoint_range;
ccell_params.m_weights[0] = 1;
ccell_params.m_weights[1] = 1;
ccell_params.m_weights[2] = 1;
ccell_params.m_weights[3] = 1;
color_cell_compressor_results ccell_results;
uint8_t ccell_result_selectors[16];
uint8_t ccell_result_selectors_temp[16];
memset(&ccell_results, 0, sizeof(ccell_results));
ccell_results.m_pSelectors = &ccell_result_selectors[0];
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
// ASTC
astc_block_desc blk;
memset(&blk, 0, sizeof(blk));
blk.m_dual_plane = false;
blk.m_weight_range = weight_range;
blk.m_ccs = 0;
blk.m_subsets = 1;
blk.m_partition_seed = 0;
blk.m_cem = 8;
blk.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
blk.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
blk.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
blk.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
blk.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
blk.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
bool invert = false;
int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
invert = true;
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
blk.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
if (invert)
blk.m_weights[x + y * 4] = 7 - blk.m_weights[x + y * 4];
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = 5;
pResults[total_results].m_common_pattern = 0;
pResults[total_results].m_astc = blk;
pResults[total_results].m_astc_err = part_err;
total_results++;
}
}
// MODE 6
// DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 18 (160) BC7 MODE5
static void astc_mode6(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
{
for (uint32_t rot_comp = 0; rot_comp < 3; rot_comp++)
{
const uint32_t weight_range = 2;
const uint32_t endpoint_range = 18;
color_quad_u8 block_rgb[16];
color_quad_u8 block_a[16];
for (uint32_t i = 0; i < 16; i++)
{
block_rgb[i] = ((color_quad_u8*)&block[0][0])[i];
block_a[i] = block_rgb[i];
uint8_t c = block_a[i].m_c[rot_comp];
block_a[i].m_c[0] = c;
block_a[i].m_c[1] = c;
block_a[i].m_c[2] = c;
block_a[i].m_c[3] = 255;
block_rgb[i].m_c[rot_comp] = 255;
}
uint8_t ccell_result_selectors_temp[16];
color_cell_compressor_params ccell_params_rgb;
memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb));
ccell_params_rgb.m_num_pixels = 16;
ccell_params_rgb.m_pPixels = block_rgb;
ccell_params_rgb.m_num_selector_weights = 4;
ccell_params_rgb.m_pSelector_weights = g_bc7_weights2;
ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
ccell_params_rgb.m_astc_endpoint_range = endpoint_range;
ccell_params_rgb.m_weights[0] = 1;
ccell_params_rgb.m_weights[1] = 1;
ccell_params_rgb.m_weights[2] = 1;
ccell_params_rgb.m_weights[3] = 1;
color_cell_compressor_results ccell_results_rgb;
uint8_t ccell_result_selectors_rgb[16];
memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb));
ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0];
ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &comp_params);
color_cell_compressor_params ccell_params_a;
memset(&ccell_params_a, 0, sizeof(ccell_params_a));
ccell_params_a.m_num_pixels = 16;
ccell_params_a.m_pPixels = block_a;
ccell_params_a.m_num_selector_weights = 4;
ccell_params_a.m_pSelector_weights = g_bc7_weights2;
ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
ccell_params_a.m_astc_endpoint_range = endpoint_range;
ccell_params_a.m_weights[0] = 1;
ccell_params_a.m_weights[1] = 1;
ccell_params_a.m_weights[2] = 1;
ccell_params_a.m_weights[3] = 1;
color_cell_compressor_results ccell_results_a;
uint8_t ccell_result_selectors_a[16];
memset(&ccell_results_a, 0, sizeof(ccell_results_a));
ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0];
ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &comp_params) / 3;
uint64_t total_err = part_err_rgb + part_err_a;
// ASTC
astc_block_desc blk;
memset(&blk, 0, sizeof(blk));
blk.m_dual_plane = true;
blk.m_weight_range = weight_range;
blk.m_ccs = rot_comp;
blk.m_subsets = 1;
blk.m_partition_seed = 0;
blk.m_cem = 8;
blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0];
blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0];
blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1];
blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1];
blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2];
blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2];
bool invert = false;
int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
invert = true;
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4];
uint32_t a_index = ccell_result_selectors_a[x + y * 4];
if (invert)
{
rgb_index = 3 - rgb_index;
a_index = 3 - a_index;
}
blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index;
blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index;
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = 6;
pResults[total_results].m_common_pattern = 0;
pResults[total_results].m_astc = blk;
pResults[total_results].m_astc_err = total_err;
total_results++;
}
} // rot_comp
}
// MODE 7 - 2 subset ASTC, 3 subset BC7
// DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE2
static void astc_mode7(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
{
uint32_t first_common_pattern = 0;
uint32_t last_common_pattern = TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS;
if (estimate_partition)
{
uint64_t best_err = UINT64_MAX;
uint32_t best_common_pattern = 0;
const uint32_t weights[4] = { 1, 1, 1, 1 };
for (uint32_t common_pattern = 0; common_pattern < TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS; common_pattern++)
{
const uint8_t* pPartition = &g_bc7_3_astc2_patterns2[common_pattern][0];
#ifdef _DEBUG
const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2;
const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73;
const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k;
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k);
assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true));
assert(astc_part == pPartition[x + y * 4]);
}
}
#endif
color_quad_u8 subset_colors[2][16];
uint32_t subset_total_colors[2] = { 0, 0 };
for (uint32_t index = 0; index < 16; index++)
subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
uint64_t total_subset_err = 0;
for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++)
total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights);
if (total_subset_err < best_err)
{
best_err = total_subset_err;
best_common_pattern = common_pattern;
}
}
first_common_pattern = best_common_pattern;
last_common_pattern = best_common_pattern + 1;
}
//const uint32_t weight_range = 2;
const uint32_t endpoint_range = 12;
for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
{
const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2;
const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73;
const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k;
color_rgba part_pixels[2][16];
uint32_t part_pixel_index[4][4];
uint32_t num_part_pixels[2] = { 0, 0 };
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k);
#ifdef _DEBUG
assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true));
#endif
part_pixel_index[y][x] = num_part_pixels[astc_part];
part_pixels[astc_part][num_part_pixels[astc_part]++] = block[y][x];
}
}
color_cell_compressor_params ccell_params[2];
color_cell_compressor_results ccell_results[2];
uint8_t ccell_result_selectors[2][16];
uint8_t ccell_result_selectors_temp[2][16];
uint64_t total_part_err = 0;
for (uint32_t part = 0; part < 2; part++)
{
memset(&ccell_params[part], 0, sizeof(ccell_params[part]));
ccell_params[part].m_num_pixels = num_part_pixels[part];
ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0];
ccell_params[part].m_num_selector_weights = 4;
ccell_params[part].m_pSelector_weights = g_bc7_weights2;
ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
ccell_params[part].m_astc_endpoint_range = endpoint_range;
ccell_params[part].m_weights[0] = 1;
ccell_params[part].m_weights[1] = 1;
ccell_params[part].m_weights[2] = 1;
ccell_params[part].m_weights[3] = 1;
memset(&ccell_results[part], 0, sizeof(ccell_results[part]));
ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0];
ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0];
uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params);
total_part_err += part_err;
} // part
// ASTC
astc_block_desc blk;
memset(&blk, 0, sizeof(blk));
blk.m_dual_plane = false;
blk.m_weight_range = 2;
blk.m_ccs = 0;
blk.m_subsets = 2;
blk.m_partition_seed = astc_pattern;
blk.m_cem = 8;
const uint32_t p0 = 0;
const uint32_t p1 = 1;
blk.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0];
blk.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0];
blk.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1];
blk.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1];
blk.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2];
blk.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2];
bool invert[2] = { false, false };
int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
invert[0] = true;
}
blk.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0];
blk.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0];
blk.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1];
blk.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1];
blk.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2];
blk.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2];
s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4 + 6]].m_unquant;
s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5 + 6]].m_unquant;
if (s1 < s0)
{
std::swap(blk.m_endpoints[0 + 6], blk.m_endpoints[1 + 6]);
std::swap(blk.m_endpoints[2 + 6], blk.m_endpoints[3 + 6]);
std::swap(blk.m_endpoints[4 + 6], blk.m_endpoints[5 + 6]);
invert[1] = true;
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k);
blk.m_weights[x + y * 4] = ccell_result_selectors[astc_part][part_pixel_index[y][x]];
if (invert[astc_part])
blk.m_weights[x + y * 4] = 3 - blk.m_weights[x + y * 4];
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = 7;
pResults[total_results].m_common_pattern = common_pattern;
pResults[total_results].m_astc = blk;
pResults[total_results].m_astc_err = total_part_err;
total_results++;
}
} // common_pattern
}
static void estimate_partition2_list(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], uint32_t* pParts, uint32_t max_parts, const uint32_t weights[4])
{
assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64);
const uint32_t MAX_PARTS = 8;
assert(max_parts <= MAX_PARTS);
uint64_t part_error[MAX_PARTS];
memset(part_error, 0xFF, sizeof(part_error));
memset(pParts, 0, sizeof(pParts[0]) * max_parts);
for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++)
{
const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16];
color_quad_u8 subset_colors[2][16];
uint32_t subset_total_colors[2] = { 0, 0 };
for (uint32_t index = 0; index < 16; index++)
subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
uint64_t total_subset_err = 0;
for (uint32_t subset = 0; subset < 2; subset++)
total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], UINT64_MAX, weights);
for (int i = 0; i < (int)max_parts; i++)
{
if (total_subset_err < part_error[i])
{
for (int j = max_parts - 1; j > i; --j)
{
pParts[j] = pParts[j - 1];
part_error[j] = part_error[j - 1];
}
pParts[i] = common_pattern;
part_error[i] = total_subset_err;
break;
}
}
}
#ifdef _DEBUG
for (uint32_t i = 0; i < max_parts - 1; i++)
{
assert(part_error[i] <= part_error[i + 1]);
}
#endif
}
// 9. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 12 (RGBA Direct), EndpointRange: 8 (16) - BC7 MODE 7
// 16. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, CEM: 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE 7
static void astc_mode9_or_16(uint32_t mode, const color_rgba source_block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, uint32_t estimate_partition_list_size)
{
assert(mode == 9 || mode == 16);
const color_rgba* pBlock = &source_block[0][0];
color_rgba temp_block[16];
if (mode == 16)
{
for (uint32_t i = 0; i < 16; i++)
{
if (mode == 16)
{
assert(pBlock[i].r == pBlock[i].g);
assert(pBlock[i].r == pBlock[i].b);
}
const uint32_t l = pBlock[i].r;
const uint32_t a = pBlock[i].a;
// Use (l,0,0,a) not (l,l,l,a) so both components are treated equally.
temp_block[i].set_noclamp_rgba(l, 0, 0, a);
}
pBlock = temp_block;
}
const uint32_t weights[4] = { 1, 1, 1, 1 };
//const uint32_t weight_range = 2;
const uint32_t endpoint_range = (mode == 16) ? 20 : 8;
uint32_t first_common_pattern = 0;
uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2;
bool use_part_list = false;
const uint32_t MAX_PARTS = 8;
uint32_t parts[MAX_PARTS];
if (estimate_partition_list_size == 1)
{
first_common_pattern = estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights);
last_common_pattern = first_common_pattern + 1;
}
else if (estimate_partition_list_size > 0)
{
assert(estimate_partition_list_size <= MAX_PARTS);
estimate_partition_list_size = std::min(estimate_partition_list_size, MAX_PARTS);
estimate_partition2_list(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, parts, estimate_partition_list_size, weights);
first_common_pattern = 0;
last_common_pattern = estimate_partition_list_size;
use_part_list = true;
#ifdef _DEBUG
assert(parts[0] == estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights));
#endif
}
for (uint32_t common_pattern_iter = first_common_pattern; common_pattern_iter < last_common_pattern; common_pattern_iter++)
{
const uint32_t common_pattern = use_part_list ? parts[common_pattern_iter] : common_pattern_iter;
const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
color_rgba part_pixels[2][16];
uint32_t part_pixel_index[4][4];
uint32_t num_part_pixels[2] = { 0, 0 };
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
part_pixel_index[y][x] = num_part_pixels[part];
part_pixels[part][num_part_pixels[part]++] = pBlock[y * 4 + x];
}
}
color_cell_compressor_params ccell_params[2];
color_cell_compressor_results ccell_results[2];
uint8_t ccell_result_selectors[2][16];
uint8_t ccell_result_selectors_temp[2][16];
uint64_t total_err = 0;
for (uint32_t subset = 0; subset < 2; subset++)
{
memset(&ccell_params[subset], 0, sizeof(ccell_params[subset]));
ccell_params[subset].m_num_pixels = num_part_pixels[subset];
ccell_params[subset].m_pPixels = (color_quad_u8*)&part_pixels[subset][0];
ccell_params[subset].m_num_selector_weights = 4;
ccell_params[subset].m_pSelector_weights = g_bc7_weights2;
ccell_params[subset].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
ccell_params[subset].m_astc_endpoint_range = endpoint_range;
ccell_params[subset].m_weights[0] = weights[0];
ccell_params[subset].m_weights[1] = weights[1];
ccell_params[subset].m_weights[2] = weights[2];
ccell_params[subset].m_weights[3] = weights[3];
ccell_params[subset].m_has_alpha = true;
memset(&ccell_results[subset], 0, sizeof(ccell_results[subset]));
ccell_results[subset].m_pSelectors = &ccell_result_selectors[subset][0];
ccell_results[subset].m_pSelectors_temp = &ccell_result_selectors_temp[subset][0];
uint64_t subset_err = color_cell_compression(255, &ccell_params[subset], &ccell_results[subset], &comp_params);
if (mode == 16)
{
color_rgba colors[4];
for (uint32_t c = 0; c < 4; c++)
{
colors[0].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_low_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant;
colors[3].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_high_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant;
}
for (uint32_t i = 1; i < 4 - 1; i++)
for (uint32_t c = 0; c < 4; c++)
colors[i].m_comps[c] = (uint8_t)astc_interpolate(colors[0].m_comps[c], colors[3].m_comps[c], g_bc7_weights2[i], false);
for (uint32_t p = 0; p < ccell_params[subset].m_num_pixels; p++)
{
color_rgba orig_pix(part_pixels[subset][p]);
orig_pix.g = orig_pix.r;
orig_pix.b = orig_pix.r;
total_err += color_distance_la(orig_pix, colors[ccell_result_selectors[subset][p]]);
}
}
else
{
total_err += subset_err;
}
} // subset
// ASTC
astc_block_desc astc_results;
memset(&astc_results, 0, sizeof(astc_results));
astc_results.m_dual_plane = false;
astc_results.m_weight_range = 2;
astc_results.m_ccs = 0;
astc_results.m_subsets = 2;
astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc;
astc_results.m_cem = (mode == 16) ? 4 : 12;
uint32_t part[2] = { 0, 1 };
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
std::swap(part[0], part[1]);
bool invert[2] = { false, false };
for (uint32_t p = 0; p < 2; p++)
{
if (mode == 16)
{
astc_results.m_endpoints[p * 4 + 0] = ccell_results[part[p]].m_astc_low_endpoint.m_c[0];
astc_results.m_endpoints[p * 4 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[0];
astc_results.m_endpoints[p * 4 + 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[3];
astc_results.m_endpoints[p * 4 + 3] = ccell_results[part[p]].m_astc_high_endpoint.m_c[3];
}
else
{
for (uint32_t c = 0; c < 4; c++)
{
astc_results.m_endpoints[p * 8 + c * 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[c];
astc_results.m_endpoints[p * 8 + c * 2 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[c];
}
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 0]].m_unquant +
g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 2]].m_unquant +
g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 1]].m_unquant +
g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 3]].m_unquant +
g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 5]].m_unquant;
if (s1 < s0)
{
std::swap(astc_results.m_endpoints[p * 8 + 0], astc_results.m_endpoints[p * 8 + 1]);
std::swap(astc_results.m_endpoints[p * 8 + 2], astc_results.m_endpoints[p * 8 + 3]);
std::swap(astc_results.m_endpoints[p * 8 + 4], astc_results.m_endpoints[p * 8 + 5]);
std::swap(astc_results.m_endpoints[p * 8 + 6], astc_results.m_endpoints[p * 8 + 7]);
invert[p] = true;
}
}
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
uint32_t astc_part = bc7_part;
if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
astc_part = 1 - astc_part;
if (invert[astc_part])
astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = mode;
pResults[total_results].m_common_pattern = common_pattern;
pResults[total_results].m_astc = astc_results;
pResults[total_results].m_astc_err = total_err;
total_results++;
}
} // common_pattern
}
// MODE 10
// DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 13 (48) MODE6
static void astc_mode10(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
{
const uint32_t weight_range = 8;
const uint32_t endpoint_range = 13;
color_cell_compressor_params ccell_params;
memset(&ccell_params, 0, sizeof(ccell_params));
ccell_params.m_num_pixels = 16;
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
ccell_params.m_num_selector_weights = 16;
ccell_params.m_pSelector_weights = g_astc_weights4;
ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_astc_weights4x;
ccell_params.m_astc_endpoint_range = endpoint_range;
ccell_params.m_weights[0] = 1;
ccell_params.m_weights[1] = 1;
ccell_params.m_weights[2] = 1;
ccell_params.m_weights[3] = 1;
ccell_params.m_has_alpha = true;
color_cell_compressor_results ccell_results;
uint8_t ccell_result_selectors[16];
uint8_t ccell_result_selectors_temp[16];
memset(&ccell_results, 0, sizeof(ccell_results));
ccell_results.m_pSelectors = &ccell_result_selectors[0];
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
// ASTC
astc_block_desc astc_results;
memset(&astc_results, 0, sizeof(astc_results));
astc_results.m_dual_plane = false;
astc_results.m_weight_range = weight_range;
astc_results.m_ccs = 0;
astc_results.m_subsets = 1;
astc_results.m_partition_seed = 0;
astc_results.m_cem = 12;
astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3];
astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3];
bool invert = false;
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]);
invert = true;
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
if (invert)
astc_results.m_weights[x + y * 4] = 15 - astc_results.m_weights[x + y * 4];
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = 10;
pResults[total_results].m_common_pattern = 0;
pResults[total_results].m_astc = astc_results;
pResults[total_results].m_astc_err = part_err;
total_results++;
}
}
// 11. DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct), EndpointRange: 13 (48) MODE5
// 17. DualPlane: 1, WeightRange : 2 (4), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) BC7 MODE5
static void astc_mode11_or_17(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
{
assert((mode == 11) || (mode == 17));
const uint32_t weight_range = 2;
const uint32_t endpoint_range = (mode == 17) ? 20 : 13;
bc7enc_compress_block_params local_comp_params(comp_params);
local_comp_params.m_perceptual = false;
local_comp_params.m_weights[0] = 1;
local_comp_params.m_weights[1] = 1;
local_comp_params.m_weights[2] = 1;
local_comp_params.m_weights[3] = 1;
const uint32_t last_rot_comp = (mode == 17) ? 1 : 4;
for (uint32_t rot_comp = 0; rot_comp < last_rot_comp; rot_comp++)
{
color_quad_u8 block_rgb[16];
color_quad_u8 block_a[16];
for (uint32_t i = 0; i < 16; i++)
{
block_rgb[i] = ((color_quad_u8*)&block[0][0])[i];
block_a[i] = block_rgb[i];
if (mode == 17)
{
assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[1]);
assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[2]);
block_a[i].m_c[0] = block_rgb[i].m_c[3];
block_a[i].m_c[1] = block_rgb[i].m_c[3];
block_a[i].m_c[2] = block_rgb[i].m_c[3];
block_a[i].m_c[3] = 255;
block_rgb[i].m_c[1] = block_rgb[i].m_c[0];
block_rgb[i].m_c[2] = block_rgb[i].m_c[0];
block_rgb[i].m_c[3] = 255;
}
else
{
uint8_t c = block_a[i].m_c[rot_comp];
block_a[i].m_c[0] = c;
block_a[i].m_c[1] = c;
block_a[i].m_c[2] = c;
block_a[i].m_c[3] = 255;
block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3];
block_rgb[i].m_c[3] = 255;
}
}
uint8_t ccell_result_selectors_temp[16];
color_cell_compressor_params ccell_params_rgb;
memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb));
ccell_params_rgb.m_num_pixels = 16;
ccell_params_rgb.m_pPixels = block_rgb;
ccell_params_rgb.m_num_selector_weights = 4;
ccell_params_rgb.m_pSelector_weights = g_bc7_weights2;
ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
ccell_params_rgb.m_astc_endpoint_range = endpoint_range;
ccell_params_rgb.m_weights[0] = 1;
ccell_params_rgb.m_weights[1] = 1;
ccell_params_rgb.m_weights[2] = 1;
ccell_params_rgb.m_weights[3] = 1;
color_cell_compressor_results ccell_results_rgb;
uint8_t ccell_result_selectors_rgb[16];
memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb));
ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0];
ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params);
color_cell_compressor_params ccell_params_a;
memset(&ccell_params_a, 0, sizeof(ccell_params_a));
ccell_params_a.m_num_pixels = 16;
ccell_params_a.m_pPixels = block_a;
ccell_params_a.m_num_selector_weights = 4;
ccell_params_a.m_pSelector_weights = g_bc7_weights2;
ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
ccell_params_a.m_astc_endpoint_range = endpoint_range;
ccell_params_a.m_weights[0] = 1;
ccell_params_a.m_weights[1] = 1;
ccell_params_a.m_weights[2] = 1;
ccell_params_a.m_weights[3] = 1;
color_cell_compressor_results ccell_results_a;
uint8_t ccell_result_selectors_a[16];
memset(&ccell_results_a, 0, sizeof(ccell_results_a));
ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0];
ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3;
uint64_t total_err = (mode == 17) ? ((part_err_rgb / 3) + part_err_a) : (part_err_rgb + part_err_a);
// ASTC
astc_block_desc blk;
memset(&blk, 0, sizeof(blk));
blk.m_dual_plane = true;
blk.m_weight_range = weight_range;
blk.m_ccs = (mode == 17) ? 3 : rot_comp;
blk.m_subsets = 1;
blk.m_partition_seed = 0;
blk.m_cem = (mode == 17) ? 4 : 12;
bool invert = false;
if (mode == 17)
{
assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[1]);
assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[2]);
assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[1]);
assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[2]);
blk.m_endpoints[0] = ccell_results_rgb.m_astc_low_endpoint.m_c[0];
blk.m_endpoints[1] = ccell_results_rgb.m_astc_high_endpoint.m_c[0];
blk.m_endpoints[2] = ccell_results_a.m_astc_low_endpoint.m_c[0];
blk.m_endpoints[3] = ccell_results_a.m_astc_high_endpoint.m_c[0];
}
else
{
blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0];
blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0];
blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1];
blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1];
blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2];
blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2];
if (rot_comp == 3)
{
blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0];
blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0];
}
else
{
blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp];
blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp];
}
int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
std::swap(blk.m_endpoints[6], blk.m_endpoints[7]);
invert = true;
}
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4];
uint32_t a_index = ccell_result_selectors_a[x + y * 4];
if (invert)
{
rgb_index = 3 - rgb_index;
a_index = 3 - a_index;
}
blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index;
blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index;
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = mode;
pResults[total_results].m_common_pattern = 0;
pResults[total_results].m_astc = blk;
pResults[total_results].m_astc_err = total_err;
total_results++;
}
} // rot_comp
}
// MODE 12
// DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 19 (192) MODE6
static void astc_mode12(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
{
const uint32_t weight_range = 5;
const uint32_t endpoint_range = 19;
color_cell_compressor_params ccell_params;
memset(&ccell_params, 0, sizeof(ccell_params));
ccell_params.m_num_pixels = 16;
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
ccell_params.m_num_selector_weights = 8;
ccell_params.m_pSelector_weights = g_bc7_weights3;
ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x;
ccell_params.m_astc_endpoint_range = endpoint_range;
ccell_params.m_weights[0] = 1;
ccell_params.m_weights[1] = 1;
ccell_params.m_weights[2] = 1;
ccell_params.m_weights[3] = 1;
ccell_params.m_has_alpha = true;
color_cell_compressor_results ccell_results;
uint8_t ccell_result_selectors[16];
uint8_t ccell_result_selectors_temp[16];
memset(&ccell_results, 0, sizeof(ccell_results));
ccell_results.m_pSelectors = &ccell_result_selectors[0];
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
// ASTC
astc_block_desc astc_results;
memset(&astc_results, 0, sizeof(astc_results));
astc_results.m_dual_plane = false;
astc_results.m_weight_range = weight_range;
astc_results.m_ccs = 0;
astc_results.m_subsets = 1;
astc_results.m_partition_seed = 0;
astc_results.m_cem = 12;
astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3];
astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3];
bool invert = false;
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]);
invert = true;
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
if (invert)
astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4];
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = 12;
pResults[total_results].m_common_pattern = 0;
pResults[total_results].m_astc = astc_results;
pResults[total_results].m_astc_err = part_err;
total_results++;
}
}
// 13. DualPlane: 1, WeightRange: 0 (2), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE5
static void astc_mode13(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
{
bc7enc_compress_block_params local_comp_params(comp_params);
local_comp_params.m_perceptual = false;
local_comp_params.m_weights[0] = 1;
local_comp_params.m_weights[1] = 1;
local_comp_params.m_weights[2] = 1;
local_comp_params.m_weights[3] = 1;
for (uint32_t rot_comp = 0; rot_comp < 4; rot_comp++)
{
const uint32_t weight_range = 0;
const uint32_t endpoint_range = 20;
color_quad_u8 block_rgb[16];
color_quad_u8 block_a[16];
for (uint32_t i = 0; i < 16; i++)
{
block_rgb[i] = ((color_quad_u8*)&block[0][0])[i];
block_a[i] = block_rgb[i];
uint8_t c = block_a[i].m_c[rot_comp];
block_a[i].m_c[0] = c;
block_a[i].m_c[1] = c;
block_a[i].m_c[2] = c;
block_a[i].m_c[3] = 255;
block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3];
block_rgb[i].m_c[3] = 255;
}
uint8_t ccell_result_selectors_temp[16];
color_cell_compressor_params ccell_params_rgb;
memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb));
ccell_params_rgb.m_num_pixels = 16;
ccell_params_rgb.m_pPixels = block_rgb;
ccell_params_rgb.m_num_selector_weights = 2;
ccell_params_rgb.m_pSelector_weights = g_bc7_weights1;
ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x;
ccell_params_rgb.m_astc_endpoint_range = endpoint_range;
ccell_params_rgb.m_weights[0] = 1;
ccell_params_rgb.m_weights[1] = 1;
ccell_params_rgb.m_weights[2] = 1;
ccell_params_rgb.m_weights[3] = 1;
color_cell_compressor_results ccell_results_rgb;
uint8_t ccell_result_selectors_rgb[16];
memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb));
ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0];
ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params);
color_cell_compressor_params ccell_params_a;
memset(&ccell_params_a, 0, sizeof(ccell_params_a));
ccell_params_a.m_num_pixels = 16;
ccell_params_a.m_pPixels = block_a;
ccell_params_a.m_num_selector_weights = 2;
ccell_params_a.m_pSelector_weights = g_bc7_weights1;
ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x;
ccell_params_a.m_astc_endpoint_range = endpoint_range;
ccell_params_a.m_weights[0] = 1;
ccell_params_a.m_weights[1] = 1;
ccell_params_a.m_weights[2] = 1;
ccell_params_a.m_weights[3] = 1;
color_cell_compressor_results ccell_results_a;
uint8_t ccell_result_selectors_a[16];
memset(&ccell_results_a, 0, sizeof(ccell_results_a));
ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0];
ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3;
uint64_t total_err = part_err_rgb + part_err_a;
// ASTC
astc_block_desc blk;
memset(&blk, 0, sizeof(blk));
blk.m_dual_plane = true;
blk.m_weight_range = weight_range;
blk.m_ccs = rot_comp;
blk.m_subsets = 1;
blk.m_partition_seed = 0;
blk.m_cem = 12;
blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0];
blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0];
blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1];
blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1];
blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2];
blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2];
if (rot_comp == 3)
{
blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0];
blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0];
}
else
{
blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp];
blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp];
}
bool invert = false;
int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
std::swap(blk.m_endpoints[6], blk.m_endpoints[7]);
invert = true;
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4];
uint32_t a_index = ccell_result_selectors_a[x + y * 4];
if (invert)
{
rgb_index = 1 - rgb_index;
a_index = 1 - a_index;
}
blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index;
blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index;
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = 13;
pResults[total_results].m_common_pattern = 0;
pResults[total_results].m_astc = blk;
pResults[total_results].m_astc_err = total_err;
total_results++;
}
} // rot_comp
}
// MODE14
// DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE6
static void astc_mode14(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
{
const uint32_t weight_range = 2;
const uint32_t endpoint_range = 20;
color_cell_compressor_params ccell_params;
memset(&ccell_params, 0, sizeof(ccell_params));
ccell_params.m_num_pixels = 16;
ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
ccell_params.m_num_selector_weights = 4;
ccell_params.m_pSelector_weights = g_bc7_weights2;
ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
ccell_params.m_astc_endpoint_range = endpoint_range;
ccell_params.m_weights[0] = 1;
ccell_params.m_weights[1] = 1;
ccell_params.m_weights[2] = 1;
ccell_params.m_weights[3] = 1;
ccell_params.m_has_alpha = true;
color_cell_compressor_results ccell_results;
uint8_t ccell_result_selectors[16];
uint8_t ccell_result_selectors_temp[16];
memset(&ccell_results, 0, sizeof(ccell_results));
ccell_results.m_pSelectors = &ccell_result_selectors[0];
ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
// ASTC
astc_block_desc astc_results;
memset(&astc_results, 0, sizeof(astc_results));
astc_results.m_dual_plane = false;
astc_results.m_weight_range = weight_range;
astc_results.m_ccs = 0;
astc_results.m_subsets = 1;
astc_results.m_partition_seed = 0;
astc_results.m_cem = 12;
astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3];
astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3];
bool invert = false;
int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
if (s1 < s0)
{
std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]);
invert = true;
}
for (uint32_t y = 0; y < 4; y++)
{
for (uint32_t x = 0; x < 4; x++)
{
astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
if (invert)
astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
}
}
assert(total_results < MAX_ENCODE_RESULTS);
if (total_results < MAX_ENCODE_RESULTS)
{
pResults[total_results].m_uastc_mode = 14;
pResults[total_results].m_common_pattern = 0;
pResults[total_results].m_astc = astc_results;
pResults[total_results].m_astc_err = part_err;
total_results++;