blob: b720e2690ed1ebe4ba313c1e9564ea960b025801 [file] [log] [blame]
// File: basisu_astc_hdr_common.cpp
#include "basisu_enc.h"
#include "basisu_gpu_texture.h"
#include "../transcoder/basisu_astc_helpers.h"
#include "../transcoder/basisu_astc_hdr_core.h"
#include "basisu_astc_hdr_common.h"
using namespace basist;
#ifndef __EMSCRIPTEN__
#define BASISU_MULTITHREADED_INIT (0)
#endif
namespace basisu
{
const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][33] =
{
{ 2, 0, 64 }, // 0, note ise range=0 is invalid for 4x4 block sizes (<24 weight bits in the block)
{ 3, 0, 32, 64 }, // 1
{ 4, 0, 21, 43, 64 }, // 2
{ 5, 0, 16, 32, 48, 64 }, // 3
{ 6, 0, 64, 12, 52, 25, 39 }, // 4
{ 8, 0, 9, 18, 27, 37, 46, 55, 64 }, // 5
{ 10, 0, 64, 7, 57, 14, 50, 21, 43, 28, 36 }, // 6
{ 12, 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36 }, // 7
{ 16, 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 }, // 8
{ 20, 0,64,16,48,3,61,19,45,6,58,23,41,9,55,26,38,13,51,29,35}, // 9
{ 24, 0,64,8,56,16,48,24,40,2,62,11,53,19,45,27,37,5,59,13,51,22,42,30,34}, // 10
{ 32, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64}, // 11
};
//--------------------------------------------------------------------------------------------------------------------------
const float DEF_R_ERROR_SCALE = 2.0f;
const float DEF_G_ERROR_SCALE = 3.0f;
void astc_hdr_codec_base_options::init()
{
m_r_err_scale = DEF_R_ERROR_SCALE;
m_g_err_scale = DEF_G_ERROR_SCALE;
m_q_log_bias = Q_LOG_BIAS_4x4;
m_ultra_quant = false;
// Disabling by default to avoid transcoding outliers (try kodim26). The quality lost is very low. TODO: Could include the uber result in the output.
m_allow_uber_mode = false;
m_mode7_full_s_optimization = true;
m_take_first_non_clamping_mode11_submode = false;
m_take_first_non_clamping_mode7_submode = false;
m_disable_weight_plane_optimization = true;
}
//--------------------------------------------------------------------------------------------------------------------------
// max usable qlog8 value is 247, 248=inf, >=249 is nan
// max usable qlog7 value is 123, 124=inf, >=125 is nan
//const uint32_t TOTAL_USABLE_QLOG8 = 248; // 0-247 are usable, 0=0, 247=60416.0, 246=55296.0
// nearest values given a positive half float value (only)
static uint16_t g_half_to_qlog7[32768], g_half_to_qlog8[32768];
const uint32_t HALF_TO_QLOG_TABS_MIN_BITS = 7;
const uint32_t HALF_TO_QLOG_TABS_MAX_BITS = 8;
static uint16_t* g_pHalf_to_qlog_tabs[2] =
{
g_half_to_qlog7,
g_half_to_qlog8,
};
#if 0
static inline uint32_t half_to_qlog7_8(half_float h, uint32_t bits)
{
assert((bits >= HALF_TO_QLOG_TABS_MIN_BITS) && (bits <= HALF_TO_QLOG_TABS_MAX_BITS));
assert(h < 32768);
return g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS][h];
}
#endif
// TODO: Tune this
static inline uint32_t quant_qlog16(uint32_t q16, uint32_t desired_bits)
{
assert((desired_bits >= 7) && (desired_bits <= 12));
assert(q16 <= 65535);
const uint32_t shift = 16 - desired_bits;
uint32_t e = (q16 + (1U << (shift - 1U)) - 1U) >> shift;
uint32_t max_val = (1U << desired_bits) - 1U;
e = minimum<uint32_t>(e, max_val);
return e;
}
static void compute_half_to_qlog_table(uint32_t bits, uint16_t* pTable, const basisu::vector<float>& qlog16_to_float)
{
assert(bits >= 5 && bits <= 12);
const uint32_t max_val = (1 << bits) - 1;
const uint32_t FIRST_INVALID_QLOG16_INDEX = 63488; // first inf, rest are inf/nan's
assert(std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX]));
assert(std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX + 1]));
assert(!std::isnan(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX - 1]));
assert(!std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX - 1]));
// For all positive half-floats
for (uint32_t h = 0; h < 32768; h++)
{
// Skip invalid values
if (is_half_inf_or_nan((half_float)h))
continue;
const float desired_val = half_to_float((half_float)h);
float best_err = BIG_FLOAT_VAL;
uint32_t best_qlog = 0;
double prev_err = BIG_FLOAT_VAL;
// For all possible qlog's
for (uint32_t i = 0; i <= max_val; i++)
{
// Skip invalid values
uint32_t idx = i << (16 - bits);
if (idx >= FIRST_INVALID_QLOG16_INDEX)
break;
float v = qlog16_to_float[idx];
//assert(!std::isinf(v) && !std::isnan(v)); // too clostly in debug
// Compute error
float err = fabsf(v - desired_val);
if (err > prev_err)
{
// Every remaining entry will have guaranteed higher error
break;
}
prev_err = err;
// Find best
if (err < best_err)
{
best_err = err;
best_qlog = i;
if (best_err == 0.0f)
break;
}
}
pTable[h] = (uint16_t)best_qlog;
}
}
static void init_qlog_tables()
{
basisu::vector<float> qlog16_to_float(65536);
// for all possible qlog16, compute the corresponding half float
for (uint32_t i = 0; i <= 65535; i++)
{
half_float h = astc_helpers::qlog16_to_half(i);
qlog16_to_float[i] = half_to_float(h);
}
#if BASISU_MULTITHREADED_INIT
job_pool jp(3);
for (uint32_t bits = HALF_TO_QLOG_TABS_MIN_BITS; bits <= HALF_TO_QLOG_TABS_MAX_BITS; bits++)
{
jp.add_job( [bits, &qlog16_to_float]() { compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS], qlog16_to_float); });
}
jp.wait_for_all();
#else
// for all possible half floats, find the nearest qlog5-12 float
for (uint32_t bits = HALF_TO_QLOG_TABS_MIN_BITS; bits <= HALF_TO_QLOG_TABS_MAX_BITS; bits++)
{
compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS], qlog16_to_float);
#if 0
std::vector<uint16_t> check_tab(32768);
compute_half_to_qlog_table_orig(bits, check_tab.data(), qlog16_to_float);
for (uint32_t i = 0; i < (1 << bits); i++)
{
assert(check_tab[i] == g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS][i]);
}
#endif
}
#endif // BASISU_MULTITHREADED_INIT
}
//--------------------------------------------------------------------------------------------------------------------------
static vec3F calc_mean(uint32_t num_pixels, const vec4F* pPixels)
{
vec3F mean(0.0f);
for (uint32_t i = 0; i < num_pixels; i++)
{
const vec4F& p = pPixels[i];
mean[0] += p[0];
mean[1] += p[1];
mean[2] += p[2];
}
return mean / static_cast<float>(num_pixels);
}
static vec3F calc_rgb_pca(uint32_t num_pixels, const vec4F* pPixels, const vec3F& mean_color)
{
float cov[6] = { 0, 0, 0, 0, 0, 0 };
for (uint32_t i = 0; i < num_pixels; i++)
{
const vec4F& v = pPixels[i];
float r = v[0] - mean_color[0];
float g = v[1] - mean_color[1];
float b = v[2] - mean_color[2];
cov[0] += r * r;
cov[1] += r * g;
cov[2] += r * b;
cov[3] += g * g;
cov[4] += g * b;
cov[5] += b * b;
}
float xr = .9f, xg = 1.0f, xb = .7f;
for (uint32_t iter = 0; iter < 3; iter++)
{
float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b));
if (m > 1e-10f)
{
m = 1.0f / m;
r *= m;
g *= m;
b *= m;
}
xr = r;
xg = g;
xb = b;
}
float len = xr * xr + xg * xg + xb * xb;
vec3F axis(0.5773502691f);
if (len >= 1e-10f)
{
len = 1.0f / sqrtf(len);
xr *= len;
xg *= len;
xb *= len;
axis.set(xr, xg, xb);
}
return axis;
}
void encode_astc_block_stats::init(uint32_t num_pixels, const vec4F pBlock_pixels_q16[])
{
m_num_pixels = num_pixels;
m_mean_q16 = calc_mean(num_pixels, pBlock_pixels_q16);
m_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, m_mean_q16);
}
static vec3F interp_color(const vec3F& mean, const vec3F& dir, float df, const aabb3F& colorspace_box, const aabb3F& input_box, bool* pInside = nullptr)
{
#if 0
assert(mean[0] >= input_box[0][0]);
assert(mean[1] >= input_box[0][1]);
assert(mean[2] >= input_box[0][2]);
assert(mean[0] <= input_box[1][0]);
assert(mean[1] <= input_box[1][1]);
assert(mean[2] <= input_box[1][2]);
#endif
if (pInside)
*pInside = false;
vec3F k(mean + dir * df);
if (colorspace_box.contains(k))
{
if (pInside)
*pInside = true;
return k;
}
// starts inside
vec3F s(mean);
// ends outside
vec3F e(mean + dir * df);
// a ray guaranteed to go from the outside to inside
ray3F r(e, (s - e).normalize_in_place());
vec3F c;
float t = 0.0f;
intersection::result res = intersection::ray_aabb(c, t, r, input_box);
if (res != intersection::cSuccess)
c = k;
return c;
}
// all in Q16 space, 0-65535
static bool compute_least_squares_endpoints_rgb(
uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights,
vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box)
{
// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;
float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f;
for (uint32_t i = 0; i < N; i++)
{
const uint32_t sel = pSelectors[i];
z00 += pSelector_weights[sel][0];
z10 += pSelector_weights[sel][1];
z11 += pSelector_weights[sel][2];
float w = pSelector_weights[sel][3];
q00_r += w * pColors[i][0];
t_r += pColors[i][0];
q00_g += w * pColors[i][1];
t_g += pColors[i][1];
q00_b += w * pColors[i][2];
t_b += pColors[i][2];
}
q10_r = t_r - q00_r;
q10_g = t_g - q00_g;
q10_b = t_b - q00_b;
z01 = z10;
float det = z00 * z11 - z01 * z10;
if (det == 0.0f)
return false;
det = 1.0f / det;
float iz00, iz01, iz10, iz11;
iz00 = z11 * det;
iz01 = -z01 * det;
iz10 = -z10 * det;
iz11 = z00 * det;
(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);
(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);
(*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g);
(*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g);
(*pXl)[2] = (float)(iz00 * q00_b + iz01 * q10_b);
(*pXh)[2] = (float)(iz10 * q00_b + iz11 * q10_b);
for (uint32_t c = 0; c < 3; c++)
{
float l = (*pXl)[c], h = (*pXh)[c];
if (input_box.get_dim(c) < .0000125f)
{
l = input_box[0][c];
h = input_box[1][c];
}
(*pXl)[c] = l;
(*pXh)[c] = h;
}
vec3F mean((*pXl + *pXh) * .5f);
vec3F dir(*pXh - *pXl);
float ln = dir.length();
if (ln)
{
dir /= ln;
float ld = (*pXl - mean).dot(dir);
float hd = (*pXh - mean).dot(dir);
aabb3F colorspace_box(vec3F(0.0f), vec3F(MAX_QLOG16_VAL));
bool was_inside1 = false;
vec3F l = interp_color(mean, dir, ld, colorspace_box, input_box, &was_inside1);
if (!was_inside1)
*pXl = l;
bool was_inside2 = false;
vec3F h = interp_color(mean, dir, hd, colorspace_box, input_box, &was_inside2);
if (!was_inside2)
*pXh = h;
}
pXl->clamp(0.0f, MAX_QLOG16_VAL);
pXh->clamp(0.0f, MAX_QLOG16_VAL);
return true;
}
static bool compute_least_squares_endpoints_rgb_raw_weights(
uint32_t N, const uint8_t* pRaw_weights,
vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box)
{
// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;
float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f;
for (uint32_t i = 0; i < N; i++)
{
const float wt = (float)pRaw_weights[i] * (1.0f / 64.0f);
assert(wt <= 1.0f);
const float w0 = wt * wt;
const float w1 = (1.0f - wt) * wt;
const float w2 = (1.0f - wt) * (1.0f - wt);
const float w3 = wt;
z00 += w0;
z10 += w1;
z11 += w2;
float w = w3;
q00_r += w * pColors[i][0];
t_r += pColors[i][0];
q00_g += w * pColors[i][1];
t_g += pColors[i][1];
q00_b += w * pColors[i][2];
t_b += pColors[i][2];
}
q10_r = t_r - q00_r;
q10_g = t_g - q00_g;
q10_b = t_b - q00_b;
z01 = z10;
float det = z00 * z11 - z01 * z10;
if (det == 0.0f)
return false;
det = 1.0f / det;
float iz00, iz01, iz10, iz11;
iz00 = z11 * det;
iz01 = -z01 * det;
iz10 = -z10 * det;
iz11 = z00 * det;
(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);
(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);
(*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g);
(*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g);
(*pXl)[2] = (float)(iz00 * q00_b + iz01 * q10_b);
(*pXh)[2] = (float)(iz10 * q00_b + iz11 * q10_b);
for (uint32_t c = 0; c < 3; c++)
{
float l = (*pXl)[c], h = (*pXh)[c];
if (input_box.get_dim(c) < .0000125f)
{
l = input_box[0][c];
h = input_box[1][c];
}
(*pXl)[c] = l;
(*pXh)[c] = h;
}
vec3F mean((*pXl + *pXh) * .5f);
vec3F dir(*pXh - *pXl);
float ln = dir.length();
if (ln)
{
dir /= ln;
float ld = (*pXl - mean).dot(dir);
float hd = (*pXh - mean).dot(dir);
aabb3F colorspace_box(vec3F(0.0f), vec3F(MAX_QLOG16_VAL));
bool was_inside1 = false;
vec3F l = interp_color(mean, dir, ld, colorspace_box, input_box, &was_inside1);
if (!was_inside1)
*pXl = l;
bool was_inside2 = false;
vec3F h = interp_color(mean, dir, hd, colorspace_box, input_box, &was_inside2);
if (!was_inside2)
*pXh = h;
}
pXl->clamp(0.0f, MAX_QLOG16_VAL);
pXh->clamp(0.0f, MAX_QLOG16_VAL);
return true;
}
static bool compute_least_squares_endpoints_2D(
uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights,
vec2F* pXl, vec2F* pXh, const vec2F* pColors, const aabb2F& input_box)
{
// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;
for (uint32_t i = 0; i < N; i++)
{
const uint32_t sel = pSelectors[i];
z00 += pSelector_weights[sel][0];
z10 += pSelector_weights[sel][1];
z11 += pSelector_weights[sel][2];
float w = pSelector_weights[sel][3];
q00_r += w * pColors[i][0];
t_r += pColors[i][0];
q00_g += w * pColors[i][1];
t_g += pColors[i][1];
}
q10_r = t_r - q00_r;
q10_g = t_g - q00_g;
z01 = z10;
float det = z00 * z11 - z01 * z10;
if (det == 0.0f)
return false;
det = 1.0f / det;
float iz00, iz01, iz10, iz11;
iz00 = z11 * det;
iz01 = -z01 * det;
iz10 = -z10 * det;
iz11 = z00 * det;
(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);
(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);
(*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g);
(*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g);
for (uint32_t c = 0; c < 2; c++)
{
float l = (*pXl)[c], h = (*pXh)[c];
if (input_box.get_dim(c) < .0000125f)
{
l = input_box[0][c];
h = input_box[1][c];
}
(*pXl)[c] = l;
(*pXh)[c] = h;
}
pXl->clamp(0.0f, MAX_QLOG16_VAL);
pXh->clamp(0.0f, MAX_QLOG16_VAL);
return true;
}
static bool compute_least_squares_endpoints_1D(
uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights,
vec1F* pXl, vec1F* pXh, const vec1F* pColors, const aabb1F& input_box)
{
// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
for (uint32_t i = 0; i < N; i++)
{
const uint32_t sel = pSelectors[i];
z00 += pSelector_weights[sel][0];
z10 += pSelector_weights[sel][1];
z11 += pSelector_weights[sel][2];
float w = pSelector_weights[sel][3];
q00_r += w * pColors[i][0];
t_r += pColors[i][0];
}
q10_r = t_r - q00_r;
z01 = z10;
float det = z00 * z11 - z01 * z10;
if (det == 0.0f)
return false;
det = 1.0f / det;
float iz00, iz01, iz10, iz11;
iz00 = z11 * det;
iz01 = -z01 * det;
iz10 = -z10 * det;
iz11 = z00 * det;
(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);
(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);
for (uint32_t c = 0; c < 1; c++)
{
float l = (*pXl)[c], h = (*pXh)[c];
if (input_box.get_dim(c) < .0000125f)
{
l = input_box[0][c];
h = input_box[1][c];
}
(*pXl)[c] = l;
(*pXh)[c] = h;
}
pXl->clamp(0.0f, MAX_QLOG16_VAL);
pXh->clamp(0.0f, MAX_QLOG16_VAL);
return true;
}
static bool compute_weighted_least_squares_endpoints_rgb(
uint32_t N,
const uint8_t* pSelectors, const vec4F* pSelector_weights, const float* pRaw_weights, /* ti */
const float* pEmphasis_weights /* wi */,
vec3F* pXl, vec3F* pXh,
const vec4F* pColors, /* pi */
const aabb3F& input_box)
{
(void)input_box;
assert(N);
assert((pSelectors && pSelector_weights) || pRaw_weights);
assert(pEmphasis_weights);
// Pi = pixel colors
// Ti = project weights, [0,1]
// Wi = emphasis weights
float total_wi = 0.0f;
for (uint32_t i = 0; i < N; i++)
total_wi += pEmphasis_weights[i];
if (total_wi == 0.0f)
return false;
float weighted_mean_tw = 0.0f;
float weighted_mean_pw[3] = { 0.0f };
for (uint32_t i = 0; i < N; i++)
{
const float wi = pEmphasis_weights[i];
const float ti = pSelectors ? pSelector_weights[pSelectors[i]][3] : pRaw_weights[i];
const float pi_r = pColors[i][0], pi_g = pColors[i][1], pi_b = pColors[i][2];
weighted_mean_tw += wi * ti;
weighted_mean_pw[0] += wi * pi_r;
weighted_mean_pw[1] += wi * pi_g;
weighted_mean_pw[2] += wi * pi_b;
}
weighted_mean_tw /= total_wi;
weighted_mean_pw[0] /= total_wi;
weighted_mean_pw[1] /= total_wi;
weighted_mean_pw[2] /= total_wi;
float spt[3] = { 0.0f };
float stt = 0.0f;
for (uint32_t i = 0; i < N; i++)
{
const float wi = pEmphasis_weights[i];
const float ti = pSelectors ? pSelector_weights[pSelectors[i]][3] : pRaw_weights[i];
const float pi_r = pColors[i][0], pi_g = pColors[i][1], pi_b = pColors[i][2];
spt[0] += wi * (pi_r - weighted_mean_pw[0]) * (ti - weighted_mean_tw);
spt[1] += wi * (pi_g - weighted_mean_pw[1]) * (ti - weighted_mean_tw);
spt[2] += wi * (pi_b - weighted_mean_pw[2]) * (ti - weighted_mean_tw);
stt += wi * square(ti - weighted_mean_tw);
}
if (stt == 0.0f)
return false;
for (uint32_t i = 0; i < 3; i++)
{
float h = weighted_mean_pw[i] + (spt[i] / stt) * (1.0f - weighted_mean_tw);
float l = weighted_mean_pw[i] - (spt[i] / stt) * weighted_mean_tw;
(*pXh)[i] = h;
(*pXl)[i] = l;
}
pXl->clamp(0.0f, MAX_QLOG16_VAL);
pXh->clamp(0.0f, MAX_QLOG16_VAL);
return true;
}
static vec4F g_astc_ls_weights_ise[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS];
static uint8_t g_map_astc_to_linear_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][astc_index] -> linear index
static uint8_t g_map_linear_to_astc_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][linear_index] -> astc_index
static void encode_astc_hdr_init()
{
// Precomputed weight constants used during least fit determination. For each entry: w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w
for (uint32_t range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; range++)
{
const uint32_t num_levels = g_ise_weight_lerps[range][0];
assert(num_levels == astc_helpers::get_ise_levels(range));
assert((num_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));
for (uint32_t i = 0; i < num_levels; i++)
{
float w = g_ise_weight_lerps[range][1 + i] * (1.0f / 64.0f);
g_astc_ls_weights_ise[range][i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w);
}
}
for (uint32_t ise_range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; ise_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; ise_range++)
{
const uint32_t num_levels = g_ise_weight_lerps[ise_range][0];
assert((num_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));
uint32_t s[MAX_SUPPORTED_WEIGHT_LEVELS];
for (uint32_t i = 0; i < num_levels; i++)
s[i] = (g_ise_weight_lerps[ise_range][1 + i] << 8) + i;
std::sort(s, s + num_levels);
for (uint32_t i = 0; i < num_levels; i++)
g_map_linear_to_astc_order[ise_range][i] = (uint8_t)(s[i] & 0xFF);
for (uint32_t i = 0; i < num_levels; i++)
g_map_astc_to_linear_order[ise_range][g_map_linear_to_astc_order[ise_range][i]] = (uint8_t)i;
}
//init_quantize_tables();
}
bool g_astc_hdr_enc_initialized;
void astc_hdr_enc_init()
{
if (g_astc_hdr_enc_initialized)
return;
astc_hdr_core_init();
astc_helpers::init_tables(true);
init_qlog_tables();
encode_astc_hdr_init();
g_astc_hdr_enc_initialized = true;
}
void interpolate_qlog12_colors(
const int e[2][3],
half_float* pDecoded_half,
vec3F* pDecoded_float,
uint32_t n, uint32_t ise_weight_range)
{
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
for (uint32_t i = 0; i < 2; i++)
{
for (uint32_t j = 0; j < 3; j++)
{
assert(in_range(e[i][j], 0, 0xFFF));
}
}
for (uint32_t i = 0; i < n; i++)
{
const int c = g_ise_weight_lerps[ise_weight_range][1 + i];
assert(c == (int)astc_helpers::dequant_bise_weight(i, ise_weight_range));
half_float rf, gf, bf;
{
uint32_t r0 = e[0][0] << 4;
uint32_t r1 = e[1][0] << 4;
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
rf = astc_helpers::qlog16_to_half(ri);
}
{
uint32_t g0 = e[0][1] << 4;
uint32_t g1 = e[1][1] << 4;
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
gf = astc_helpers::qlog16_to_half(gi);
}
{
uint32_t b0 = e[0][2] << 4;
uint32_t b1 = e[1][2] << 4;
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
bf = astc_helpers::qlog16_to_half(bi);
}
if (pDecoded_half)
{
pDecoded_half[i * 3 + 0] = rf;
pDecoded_half[i * 3 + 1] = gf;
pDecoded_half[i * 3 + 2] = bf;
}
if (pDecoded_float)
{
pDecoded_float[i][0] = half_to_float(rf);
pDecoded_float[i][1] = half_to_float(gf);
pDecoded_float[i][2] = half_to_float(bf);
}
}
}
// decoded in ASTC order, not linear order
// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded
bool get_astc_hdr_mode_11_block_colors(
const uint8_t* pEndpoints,
half_float* pDecoded_half,
vec3F* pDecoded_float,
uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range)
{
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
int e[2][3];
if (!decode_mode11_to_qlog12(pEndpoints, e, ise_endpoint_range))
return false;
interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range);
return true;
}
// decoded in ASTC order, not linear order
// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded
bool get_astc_hdr_mode_7_block_colors(
const uint8_t* pEndpoints,
half_float* pDecoded_half,
vec3F* pDecoded_float,
uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range)
{
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
int e[2][3];
if (!decode_mode7_to_qlog12(pEndpoints, e, nullptr, ise_endpoint_range))
return false;
interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range);
return true;
}
double eval_selectors_f(
uint32_t num_pixels,
uint8_t* pWeights,
const half_float* pBlock_pixels_half,
uint32_t num_weight_levels,
const half_float* pDecoded_half,
const astc_hdr_codec_base_options& coptions,
uint32_t usable_selector_bitmask)
{
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
assert(usable_selector_bitmask);
const float R_WEIGHT = coptions.m_r_err_scale;
const float G_WEIGHT = coptions.m_g_err_scale;
double total_error = 0;
#ifdef _DEBUG
for (uint32_t i = 0; i < num_weight_levels; i++)
{
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0]));
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1]));
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2]));
}
#endif
double decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3];
for (uint32_t i = 0; i < num_weight_levels; i++)
{
const half_float* p = &pDecoded_half[i * 3];
decoded_half_q[i][0] = q(p[0], coptions.m_q_log_bias);
decoded_half_q[i][1] = q(p[1], coptions.m_q_log_bias);
decoded_half_q[i][2] = q(p[2], coptions.m_q_log_bias);
}
for (uint32_t p = 0; p < num_pixels; p++)
{
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias);
const double desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias);
const double desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);
double lowest_e = BIG_FLOAT_VAL;
//double dists[MAX_SUPPORTED_WEIGHT_LEVELS];
// this is an approximation of MSLE
for (uint32_t i = 0; i < num_weight_levels; i++)
{
if (((1 << i) & usable_selector_bitmask) == 0)
continue;
// compute piecewise linear approximation of log2(a+eps)-log2(b+eps), for each component, then MSLE
double rd = decoded_half_q[i][0] - desired_half_r_q;
double gd = decoded_half_q[i][1] - desired_half_g_q;
double bd = decoded_half_q[i][2] - desired_half_b_q;
double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
//dists[i] = e;
if (e < lowest_e)
{
lowest_e = e;
pWeights[p] = (uint8_t)i;
}
}
total_error += lowest_e;
} // p
return total_error;
}
double eval_selectors(
uint32_t num_pixels,
uint8_t* pWeights,
uint32_t ise_weight_range,
const half_float* pBlock_pixels_half,
uint32_t num_weight_levels,
const half_float* pDecoded_half,
const astc_hdr_codec_base_options& coptions,
uint32_t usable_selector_bitmask)
{
if ((coptions.m_r_err_scale != 2.0f) || (coptions.m_g_err_scale != 3.0f))
{
return eval_selectors_f(
num_pixels,
pWeights,
pBlock_pixels_half,
num_weight_levels,
pDecoded_half,
coptions,
usable_selector_bitmask);
}
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
assert(usable_selector_bitmask);
uint64_t total_error = 0;
#ifdef _DEBUG
for (uint32_t i = 0; i < num_weight_levels; i++)
{
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0]));
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1]));
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2]));
}
#endif
int64_t decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3];
for (uint32_t i = 0; i < num_weight_levels; i++)
{
const half_float* p = &pDecoded_half[i * 3];
decoded_half_q[i][0] = q2(p[0], coptions.m_q_log_bias);
decoded_half_q[i][1] = q2(p[1], coptions.m_q_log_bias);
decoded_half_q[i][2] = q2(p[2], coptions.m_q_log_bias);
}
if (usable_selector_bitmask != UINT32_MAX)
{
for (uint32_t p = 0; p < num_pixels; p++)
{
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
const int64_t desired_half_r_q = q2(pDesired_half[0], coptions.m_q_log_bias);
const int64_t desired_half_g_q = q2(pDesired_half[1], coptions.m_q_log_bias);
const int64_t desired_half_b_q = q2(pDesired_half[2], coptions.m_q_log_bias);
int64_t lowest_e = INT64_MAX;
for (uint32_t i = 0; i < num_weight_levels; i++)
{
if (((1 << i) & usable_selector_bitmask) == 0)
continue;
int64_t rd = decoded_half_q[i][0] - desired_half_r_q;
int64_t gd = decoded_half_q[i][1] - desired_half_g_q;
int64_t bd = decoded_half_q[i][2] - desired_half_b_q;
int64_t e = 2 * (rd * rd) + 3 * (gd * gd) + bd * bd;
if (e < lowest_e)
{
lowest_e = e;
pWeights[p] = (uint8_t)i;
}
}
total_error += lowest_e;
} // p
}
else
{
if ((num_weight_levels <= 4) || (coptions.m_disable_weight_plane_optimization))
{
for (uint32_t p = 0; p < num_pixels; p++)
{
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
const half_float desired_r = pDesired_half[0], desired_g = pDesired_half[1], desired_b = pDesired_half[2];
const int64_t desired_half_r_q = q2(desired_r, coptions.m_q_log_bias);
const int64_t desired_half_g_q = q2(desired_g, coptions.m_q_log_bias);
const int64_t desired_half_b_q = q2(desired_b, coptions.m_q_log_bias);
int64_t lowest_e = INT64_MAX;
uint32_t i;
for (i = 0; (i + 1) < num_weight_levels; i += 2)
{
int64_t e0, e1;
{
int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q; // 27 bits maximum with half float inputs
int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;
int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;
e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i; // max 62 bits (27*2+3+5)
}
{
int64_t rd1 = decoded_half_q[i + 1][0] - desired_half_r_q;
int64_t gd1 = decoded_half_q[i + 1][1] - desired_half_g_q;
int64_t bd1 = decoded_half_q[i + 1][2] - desired_half_b_q;
e1 = ((2 * (rd1 * rd1) + 3 * (gd1 * gd1) + bd1 * bd1) << 5) | (i + 1);
}
lowest_e = minimum(lowest_e, e0, e1);
}
if (i != num_weight_levels)
{
int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q;
int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;
int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;
int64_t e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i;
lowest_e = minimum(lowest_e, e0);
}
pWeights[p] = (uint8_t)(lowest_e & 31);
total_error += (lowest_e >> 5);
} // p
}
else
{
const auto& weight_val_to_ise_tab = astc_helpers::g_dequant_tables.get_weight_tab(ise_weight_range).m_val_to_ise;
const int lo_index = weight_val_to_ise_tab[0], hi_index = weight_val_to_ise_tab[64], mid_index = weight_val_to_ise_tab[32];
const vec3F low_color((float)pDecoded_half[lo_index * 3 + 0], (float)pDecoded_half[lo_index * 3 + 1], (float)pDecoded_half[lo_index * 3 + 2]);
const vec3F high_color((float)pDecoded_half[hi_index * 3 + 0], (float)pDecoded_half[hi_index * 3 + 1], (float)pDecoded_half[hi_index * 3 + 2]);
const vec3F mid_color((float)pDecoded_half[mid_index * 3 + 0], (float)pDecoded_half[mid_index * 3 + 1], (float)pDecoded_half[mid_index * 3 + 2]);
const vec3F block_dir(high_color - low_color);
for (uint32_t p = 0; p < num_pixels; p++)
{
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
const half_float desired_r = pDesired_half[0], desired_g = pDesired_half[1], desired_b = pDesired_half[2];
const int64_t desired_half_r_q = q2(desired_r, coptions.m_q_log_bias);
const int64_t desired_half_g_q = q2(desired_g, coptions.m_q_log_bias);
const int64_t desired_half_b_q = q2(desired_b, coptions.m_q_log_bias);
// Determine which side of the middle plane the point is for a modest gain
vec3F c((float)desired_r - mid_color[0], (float)desired_g - mid_color[1], (float)desired_b - mid_color[2]);
float d = c.dot(block_dir);
int i = 0, high_index = (num_weight_levels / 2) + 1;
if (d >= 0.0f)
{
i = num_weight_levels / 2;
high_index = num_weight_levels;
}
int64_t lowest_e = INT64_MAX;
for (; (i + 1) < high_index; i += 2)
{
int64_t e0, e1;
{
int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q; // 27 bits maximum with half float inputs
int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;
int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;
e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i; // max 62 bits (27*2+3+5)
}
{
int64_t rd1 = decoded_half_q[i + 1][0] - desired_half_r_q;
int64_t gd1 = decoded_half_q[i + 1][1] - desired_half_g_q;
int64_t bd1 = decoded_half_q[i + 1][2] - desired_half_b_q;
e1 = ((2 * (rd1 * rd1) + 3 * (gd1 * gd1) + bd1 * bd1) << 5) | (i + 1);
}
lowest_e = minimum(lowest_e, e0, e1);
}
if (i != high_index)
{
int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q;
int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;
int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;
int64_t e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i;
lowest_e = minimum(lowest_e, e0);
}
pWeights[p] = (uint8_t)(lowest_e & 31);
total_error += (lowest_e >> 5);
} // p
}
}
return (double)total_error;
}
//--------------------------------------------------------------------------------------------------------------------------
double eval_selectors_dual_plane(
uint32_t channel_index,
uint32_t num_pixels,
uint8_t* pWeights0, uint8_t* pWeights1,
const half_float* pBlock_pixels_half,
uint32_t num_weight_levels,
const half_float* pDecoded_half,
const astc_hdr_codec_base_options& coptions,
uint32_t usable_selector_bitmask)
{
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
assert(usable_selector_bitmask);
const float R_WEIGHT = coptions.m_r_err_scale;
const float G_WEIGHT = coptions.m_g_err_scale;
double total_error = 0;
#ifdef _DEBUG
for (uint32_t i = 0; i < num_weight_levels; i++)
{
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0]));
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1]));
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2]));
}
#endif
double decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3];
for (uint32_t i = 0; i < num_weight_levels; i++)
{
const half_float* p = &pDecoded_half[i * 3];
decoded_half_q[i][0] = q(p[0], coptions.m_q_log_bias);
decoded_half_q[i][1] = q(p[1], coptions.m_q_log_bias);
decoded_half_q[i][2] = q(p[2], coptions.m_q_log_bias);
}
const double channel_weights[3] = { R_WEIGHT, G_WEIGHT, 1.0f };
const uint32_t first_channel = (channel_index + 1) % 3;
const uint32_t second_channel = (channel_index + 2) % 3;
// First plane
const double first_channel_weight = channel_weights[first_channel];
const double second_channel_weight = channel_weights[second_channel];
for (uint32_t p = 0; p < num_pixels; p++)
{
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
const double desired_half_x_q = q(pDesired_half[first_channel], coptions.m_q_log_bias);
const double desired_half_y_q = q(pDesired_half[second_channel], coptions.m_q_log_bias);
double lowest_e = BIG_FLOAT_VAL;
// this is an approximation of MSLE
for (uint32_t i = 0; i < num_weight_levels; i++)
{
if (((1 << i) & usable_selector_bitmask) == 0)
continue;
double xd = decoded_half_q[i][first_channel] - desired_half_x_q;
double yd = decoded_half_q[i][second_channel] - desired_half_y_q;
double e = first_channel_weight * (xd * xd) + second_channel_weight * (yd * yd);
if (e < lowest_e)
{
lowest_e = e;
pWeights0[p] = (uint8_t)i;
}
}
total_error += lowest_e;
} // p
// Second plane
const double alt_channel_weight = channel_weights[channel_index];
for (uint32_t p = 0; p < num_pixels; p++)
{
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
const double desired_half_a_q = q(pDesired_half[channel_index], coptions.m_q_log_bias);
double lowest_e = BIG_FLOAT_VAL;
// this is an approximation of MSLE
for (uint32_t i = 0; i < num_weight_levels; i++)
{
if (((1 << i) & usable_selector_bitmask) == 0)
continue;
double ad = decoded_half_q[i][channel_index] - desired_half_a_q;
double e = alt_channel_weight * (ad * ad);
if (e < lowest_e)
{
lowest_e = e;
pWeights1[p] = (uint8_t)i;
}
}
total_error += lowest_e;
} // p
return total_error;
}
//--------------------------------------------------------------------------------------------------------------------------
double compute_block_error(uint32_t num_pixels, const half_float* pOrig_block, const half_float* pPacked_block, const astc_hdr_codec_base_options& coptions)
{
const float R_WEIGHT = coptions.m_r_err_scale;
const float G_WEIGHT = coptions.m_g_err_scale;
double total_error = 0;
for (uint32_t p = 0; p < num_pixels; p++)
{
double rd = q(pOrig_block[p * 3 + 0], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 0], coptions.m_q_log_bias);
double gd = q(pOrig_block[p * 3 + 1], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 1], coptions.m_q_log_bias);
double bd = q(pOrig_block[p * 3 + 2], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 2], coptions.m_q_log_bias);
double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
total_error += e;
}
return total_error;
}
//--------------------------------------------------------------------------------------------------------------------------
double compute_block_error_from_raw_weights(
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3],
const uint8_t* pRaw_weights,
int endpoints_qlog12[2][3],
const astc_hdr_codec_base_options& coptions)
{
// qlog12->qlog16
int trial_e[2][3];
for (uint32_t i = 0; i < 3; i++)
{
assert(endpoints_qlog12[0][i] <= (int)basist::MAX_QLOG12);
assert(endpoints_qlog12[1][i] <= (int)basist::MAX_QLOG12);
trial_e[0][i] = endpoints_qlog12[0][i] << 4;
trial_e[1][i] = endpoints_qlog12[1][i] << 4;
}
const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale;
double trial_error = 0;
for (uint32_t p = 0; p < num_pixels; p++)
{
const half_float* pDesired_half = &pBlock_pixels_half[p][0];
const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);
const uint32_t c = pRaw_weights[p];
assert(c <= 64);
{
half_float rf, gf, bf;
{
uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0];
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
rf = astc_helpers::qlog16_to_half(ri);
}
{
uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1];
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
gf = astc_helpers::qlog16_to_half(gi);
}
{
uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2];
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
bf = astc_helpers::qlog16_to_half(bi);
}
const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);
const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;
trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
}
}
return trial_error;
}
//--------------------------------------------------------------------------------------------------------------------------
static inline int compute_clamped_val(int v, int l, int h, bool& did_clamp, int& max_clamp_mag)
{
assert(l < h);
if (v < l)
{
max_clamp_mag = basisu::maximum<int>(max_clamp_mag, l - v);
v = l;
did_clamp = true;
}
else if (v > h)
{
max_clamp_mag = basisu::maximum<int>(max_clamp_mag, v - h);
v = h;
did_clamp = true;
}
return v;
}
//--------------------------------------------------------------------------------------------------------------------------
const uint8_t s_b_bits[8] = { 7, 8, 6, 7, 8, 6, 7, 6 };
const uint8_t s_c_bits[8] = { 6, 6, 7, 7, 6, 7, 7, 7 };
const uint8_t s_d_bits[8] = { 7, 6, 7, 6, 5, 6, 5, 6 };
// val_q[] must be already packed to qlog9-qlog12.
bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2][3], int& max_clamp_mag, bool early_out_if_clamped, int max_clamp_mag_accept_thresh)
{
assert(submode <= 7);
const uint32_t a_bits = 9 + (submode >> 1);
const uint32_t b_bits = s_b_bits[submode];
const uint32_t c_bits = s_c_bits[submode];
const uint32_t d_bits = s_d_bits[submode];
const int max_a_val = (1 << a_bits) - 1;
const int max_b_val = (1 << b_bits) - 1;
const int max_c_val = (1 << c_bits) - 1;
// The maximum usable value before it turns to NaN/Inf
const int max_a_qlog = get_max_qlog(a_bits);
BASISU_NOTE_UNUSED(max_a_qlog);
const int min_d_val = -(1 << (d_bits - 1));
const int max_d_val = -min_d_val - 1;
assert((max_d_val - min_d_val + 1) == (1 << d_bits));
int highest_q = -1, highest_val = 0, highest_comp = 0;
for (uint32_t c = 0; c < 3; c++)
{
assert(val_q[0][c] <= max_a_qlog);
assert(val_q[1][c] <= max_a_qlog);
}
for (uint32_t v = 0; v < 2; v++)
{
for (uint32_t c = 0; c < 3; c++)
{
assert(val_q[v][c] >= 0 && val_q[v][c] <= max_a_val);
if (val_q[v][c] > highest_q)
{
highest_q = val_q[v][c];
highest_val = v;
highest_comp = c;
}
}
}
const bool had_tie = (val_q[highest_val ^ 1][highest_comp] == highest_q);
if (highest_val != 1)
{
for (uint32_t c = 0; c < 3; c++)
{
std::swap(val_q[0][c], val_q[1][c]);
}
}
if (highest_comp)
{
std::swap(val_q[0][0], val_q[0][highest_comp]);
std::swap(val_q[1][0], val_q[1][highest_comp]);
}
int orig_q[2][3];
memcpy(orig_q, val_q, sizeof(int) * 6);
// val[1][0] is now guaranteed to be highest
int best_va = 0, best_vb0 = 0, best_vb1 = 0, best_vc = 0, best_vd0 = 0, best_vd1 = 0;
int best_max_clamp_mag = 0;
bool best_did_clamp = false;
int best_q[2][3] = { { 0, 0, 0}, { 0, 0, 0 } };
BASISU_NOTE_UNUSED(best_q);
uint32_t best_dist = UINT_MAX;
for (uint32_t pass = 0; pass < 2; pass++)
{
int trial_va = val_q[1][0];
assert(trial_va <= max_a_val);
assert(trial_va >= val_q[1][1]);
assert(trial_va >= val_q[1][2]);
assert(trial_va >= val_q[0][0]);
assert(trial_va >= val_q[0][1]);
assert(trial_va >= val_q[0][2]);
bool did_clamp = false;
int trial_max_clamp_mag = 0;
int trial_vb0 = compute_clamped_val(trial_va - val_q[1][1], 0, max_b_val, did_clamp, trial_max_clamp_mag);
int trial_vb1 = compute_clamped_val(trial_va - val_q[1][2], 0, max_b_val, did_clamp, trial_max_clamp_mag);
int trial_vc = compute_clamped_val(trial_va - val_q[0][0], 0, max_c_val, did_clamp, trial_max_clamp_mag);
int trial_vd0 = compute_clamped_val((trial_va - trial_vb0 - trial_vc) - val_q[0][1], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag);
int trial_vd1 = compute_clamped_val((trial_va - trial_vb1 - trial_vc) - val_q[0][2], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag);
if ((early_out_if_clamped) && (did_clamp) && (trial_max_clamp_mag > max_clamp_mag_accept_thresh))
{
if ((!had_tie) || (pass == 1))
{
max_clamp_mag = trial_max_clamp_mag;
return true;
}
}
if (!did_clamp)
{
// Make sure decoder gets the expected values
assert(trial_va == val_q[1][0]);
assert(trial_va - trial_vb0 == val_q[1][1]);
assert(trial_va - trial_vb1 == val_q[1][2]);
assert((trial_va - trial_vc) == val_q[0][0]);
assert((trial_va - trial_vb0 - trial_vc - trial_vd0) == val_q[0][1]);
assert((trial_va - trial_vb1 - trial_vc - trial_vd1) == val_q[0][2]);
}
const int r_e0 = clamp<int>(trial_va, 0, max_a_val);
const int r_e1 = clamp<int>(trial_va - trial_vb0, 0, max_a_val);
const int r_e2 = clamp<int>(trial_va - trial_vb1, 0, max_a_val);
const int r_f0 = clamp<int>(trial_va - trial_vc, 0, max_a_val);
const int r_f1 = clamp<int>(trial_va - trial_vb0 - trial_vc - trial_vd0, 0, max_a_val);
const int r_f2 = clamp<int>(trial_va - trial_vb1 - trial_vc - trial_vd1, 0, max_a_val);
assert(r_e0 <= max_a_qlog);
assert(r_e1 <= max_a_qlog);
assert(r_e2 <= max_a_qlog);
assert(r_f0 <= max_a_qlog);
assert(r_f1 <= max_a_qlog);
assert(r_f2 <= max_a_qlog);
if ((!did_clamp) || (!had_tie))
{
best_va = trial_va;
best_vb0 = trial_vb0;
best_vb1 = trial_vb1;
best_vc = trial_vc;
best_vd0 = trial_vd0;
best_vd1 = trial_vd1;
best_max_clamp_mag = trial_max_clamp_mag;
best_did_clamp = did_clamp;
best_q[1][0] = r_e0;
best_q[1][1] = r_e1;
best_q[1][2] = r_e2;
best_q[0][0] = r_f0;
best_q[0][1] = r_f1;
best_q[0][2] = r_f2;
break;
}
// we had a tie and it did clamp, try swapping L/H for a potential slight gain
const uint32_t r_dist1 = basisu::square<int>(r_e0 - val_q[1][0]) + basisu::square<int>(r_e1 - val_q[1][1]) + basisu::square<int>(r_e2 - val_q[1][2]);
const uint32_t r_dist0 = basisu::square<int>(r_f0 - val_q[0][0]) + basisu::square<int>(r_f1 - val_q[0][1]) + basisu::square<int>(r_f2 - val_q[0][2]);
const uint32_t total_dist = r_dist1 + r_dist0;
if (total_dist < best_dist)
{
best_dist = total_dist;
best_va = trial_va;
best_vb0 = trial_vb0;
best_vb1 = trial_vb1;
best_vc = trial_vc;
best_vd0 = trial_vd0;
best_vd1 = trial_vd1;
best_did_clamp = did_clamp;
best_q[1][0] = r_e0;
best_q[1][1] = r_e1;
best_q[1][2] = r_e2;
best_q[0][0] = r_f0;
best_q[0][1] = r_f1;
best_q[0][2] = r_f2;
}
for (uint32_t c = 0; c < 3; c++)
std::swap(val_q[0][c], val_q[1][c]);
}
// pack bits now
int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0;
int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0;
switch (submode)
{
case 0:
x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
break;
case 1:
x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
break;
case 2:
x0 = get_bit(best_va, 9); x1 = get_bit(best_vc, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
break;
case 3:
x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 9); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
break;
case 4:
x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10);
break;
case 5:
x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_vc, 7); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
break;
case 6:
x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10);
break;
case 7:
x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
break;
default:
break;
}
// write mode
pack_bit(v1, 7, submode, 0);
pack_bit(v2, 7, submode, 1);
pack_bit(v3, 7, submode, 2);
// highest component
pack_bit(v4, 7, highest_comp, 0);
pack_bit(v5, 7, highest_comp, 1);
// write bit 8 of va
pack_bit(v1, 6, best_va, 8);
// extra bits
pack_bit(v2, 6, x0);
pack_bit(v3, 6, x1);
pack_bit(v4, 6, x2);
pack_bit(v5, 6, x3);
pack_bit(v4, 5, x4);
pack_bit(v5, 5, x5);
v0 = best_va & 0xFF;
v1 |= (best_vc & 63);
v2 |= (best_vb0 & 63);
v3 |= (best_vb1 & 63);
v4 |= (best_vd0 & 31);
v5 |= (best_vd1 & 31);
assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255));
pEndpoints[0] = (uint8_t)v0;
pEndpoints[1] = (uint8_t)v1;
pEndpoints[2] = (uint8_t)v2;
pEndpoints[3] = (uint8_t)v3;
pEndpoints[4] = (uint8_t)v4;
pEndpoints[5] = (uint8_t)v5;
#ifdef _DEBUG
// Test for valid pack by unpacking
{
if (highest_comp)
{
std::swap(best_q[0][0], best_q[0][highest_comp]);
std::swap(best_q[1][0], best_q[1][highest_comp]);
std::swap(orig_q[0][0], orig_q[0][highest_comp]);
std::swap(orig_q[1][0], orig_q[1][highest_comp]);
}
int test_e[2][3];
decode_mode11_to_qlog12(pEndpoints, test_e, astc_helpers::BISE_256_LEVELS);
for (uint32_t i = 0; i < 2; i++)
{
for (uint32_t j = 0; j < 3; j++)
{
assert(best_q[i][j] == test_e[i][j] >> (12 - a_bits));
if (!best_did_clamp)
{
assert((orig_q[i][j] == test_e[i][j] >> (12 - a_bits)) ||
(orig_q[1 - i][j] == test_e[i][j] >> (12 - a_bits)));
}
}
}
}
#endif
max_clamp_mag = best_max_clamp_mag;
return best_did_clamp;
}
bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped, int max_clamp_mag_accept_thresh)
{
assert(submode <= 7);
const uint32_t a_bits = 9 + (submode >> 1);
const int max_a_val = (1 << a_bits) - 1;
// The maximum usable value before it turns to NaN/Inf
const int max_a_qlog = get_max_qlog(a_bits);
int val_q[2][3];
for (uint32_t c = 0; c < 3; c++)
{
#if 0
// This is very slightly better, but ~8% slower likely due to the table lookups.
const half_float l = astc_helpers::qlog16_to_half((uint32_t)std::round(low_q16[c]));
val_q[0][c] = half_to_qlog7_12(l, a_bits);
const half_float h = astc_helpers::qlog16_to_half((uint32_t)std::round(high_q16[c]));
val_q[1][c] = half_to_qlog7_12(h, a_bits);
#else
// TODO: Tune quant_qlog16() for higher precision.
val_q[0][c] = quant_qlog16((uint32_t)std::round(low_q16[c]), a_bits);
val_q[1][c] = quant_qlog16((uint32_t)std::round(high_q16[c]), a_bits);
#endif
#if 1
if (val_q[0][c] == val_q[1][c])
{
#if 0
if (l <= h)
#else
if (low_q16[c] < high_q16[c])
#endif
{
if (val_q[0][c])
val_q[0][c]--;
if (val_q[1][c] != max_a_val)
val_q[1][c]++;
}
else
{
if (val_q[0][c] != max_a_val)
val_q[0][c]++;
if (val_q[1][c])
val_q[1][c]--;
}
}
#endif
val_q[0][c] = minimum<uint32_t>(val_q[0][c], max_a_qlog);
val_q[1][c] = minimum<uint32_t>(val_q[1][c], max_a_qlog);
}
return pack_astc_mode11_submode(submode, pEndpoints, val_q, max_clamp_mag, early_out_if_clamped, max_clamp_mag_accept_thresh);
}
//--------------------------------------------------------------------------------------------------------------------------
void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16)
{
float lg = l_q16.dot(vec3F(1.0f)), hg = h_q16.dot(vec3F(1.0f));
if (lg > hg)
{
// Ensure low endpoint is generally less bright than high in direct mode.
std::swap(l_q16, h_q16);
}
for (uint32_t i = 0; i < 3; i++)
{
// TODO: This goes from QLOG16->HALF->QLOG8/7
half_float l_half = astc_helpers::qlog16_to_half(clamp((int)std::round(l_q16[i]), 0, 65535));
half_float h_half = astc_helpers::qlog16_to_half(clamp((int)std::round(h_q16[i]), 0, 65535));
int l_q, h_q;
if (i == 2)
{
l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)];
h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)];
l_q = minimum<uint32_t>(l_q, MAX_QLOG7);
h_q = minimum<uint32_t>(h_q, MAX_QLOG7);
}
else
{
l_q = g_half_to_qlog8[bounds_check((uint32_t)l_half, 0U, 32768U)];
h_q = g_half_to_qlog8[bounds_check((uint32_t)h_half, 0U, 32768U)];
// this quantizes R and G as 7 bits vs. 8, for grayscale.
//l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)] << 1;
//h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)] << 1;
l_q = minimum<uint32_t>(l_q, MAX_QLOG8);
h_q = minimum<uint32_t>(h_q, MAX_QLOG8);
}
#if 1
if (l_q == h_q)
{
const int m = (i == 2) ? MAX_QLOG7 : MAX_QLOG8;
if (l_q16[i] <= h_q16[i])
{
if (l_q)
l_q--;
if (h_q != m)
h_q++;
}
else
{
if (h_q)
h_q--;
if (l_q != m)
l_q++;
}
}
#endif
if (i == 2)
{
assert(l_q <= (int)MAX_QLOG7 && h_q <= (int)MAX_QLOG7);
l_q |= 128;
h_q |= 128;
}
else
{
assert(l_q <= (int)MAX_QLOG8 && h_q <= (int)MAX_QLOG8);
}
pEndpoints[2 * i + 0] = (uint8_t)l_q;
pEndpoints[2 * i + 1] = (uint8_t)h_q;
}
}
//--------------------------------------------------------------------------------------------------------------------------
bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range, bool early_out_if_clamped, int max_clamp_mag_accept_thresh)
{
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
assert(submode <= 5);
max_clamp_mag = 0;
static const uint8_t s_r_bits[6] = { 11, 11, 10, 9, 8, 7 };
static const uint8_t s_g_b_bits[6] = { 5, 6, 5, 6, 7, 7 };
static const uint8_t s_s_bits[6] = { 7, 5, 8, 7, 6, 7 };
// The precision of the components
const uint32_t prec_bits = s_r_bits[submode];
int qlog[4], pack_bits[4];
for (uint32_t i = 0; i < 4; i++)
{
const float f = (i == 3) ? s_q16 : rgb_q16[i];
// The # of bits the component is packed into
if (i == 0)
pack_bits[i] = s_r_bits[submode];
else if (i == 3)
pack_bits[i] = s_s_bits[submode];
else
pack_bits[i] = s_g_b_bits[submode];
#if 0
// this is slightly worse
// TODO: going from qlog16 to half loses some precision. Then going from half to qlog 7-12 will have extra error.
half_float h = qlog_to_half(clamp((int)std::round(f), 0, MAX_QLOG16), 16);
qlog[i] = half_to_qlog7_12((half_float)bounds_check((uint32_t)h, 0U, 32768U), prec_bits);
#else
qlog[i] = quant_qlog16(clamp<int>((int)std::round(f), 0, MAX_QLOG16), prec_bits);
// Only bias if there are enough texel weights, 4=6 weights
if (ise_weight_range >= 4)
{
// Explictly bias the high color, and the scale up, to better exploit the weights.
// The quantized range also then encompases the complete input range.
const uint32_t max_val = (1 << prec_bits) - 1;
const uint32_t K = 3;
if (i == 3)
{
qlog[i] = minimum<uint32_t>(qlog[i] + K * 2, max_val);
}
else
{
qlog[i] = minimum<uint32_t>(qlog[i] + K, max_val);
}
}
#endif
if (i != 3)
qlog[i] = minimum<uint32_t>(qlog[i], get_max_qlog(prec_bits));
// If S=0, we lose freedom for the texel weights to add any value.
if ((i == 3) && (qlog[i] == 0))
qlog[i] = 1;
}
uint32_t maj_index = 0;
bool did_clamp = false;
if (submode != 5)
{
int largest_qlog = 0;
for (uint32_t i = 0; i < 3; i++)
{
if (qlog[i] > largest_qlog)
{
largest_qlog = qlog[i];
maj_index = i;
}
}
if (maj_index)
{
std::swap(qlog[0], qlog[maj_index]);
}
assert(qlog[0] >= qlog[1]);
assert(qlog[0] >= qlog[2]);
qlog[1] = qlog[0] - qlog[1];
qlog[2] = qlog[0] - qlog[2];
for (uint32_t i = 1; i < 4; i++)
{
const int max_val = (1 << pack_bits[i]) - 1;
if (qlog[i] > max_val)
{
max_clamp_mag = maximum<int>(max_clamp_mag, qlog[i] - max_val);
qlog[i] = max_val;
did_clamp = true;
if ((early_out_if_clamped) && (max_clamp_mag > max_clamp_mag_accept_thresh))
return true;
}
}
}
for (uint32_t i = 0; i < 4; i++)
{
const int max_val = (1 << pack_bits[i]) - 1; (void)max_val;
assert(qlog[i] <= max_val);
}
int mode = 0;
int r = qlog[0] & 63; // 6-bits
int g = qlog[1] & 31; // 5-bits
int b = qlog[2] & 31; // 5-bits
int s = qlog[3] & 31; // 5-bits
int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0, x6 = 0;
switch (submode)
{
case 0:
{
mode = (maj_index << 2) | 0;
assert((mode & 0xC) != 0xC);
x0 = get_bit(qlog[0], 9); // R9
x1 = get_bit(qlog[0], 8); // R8
x2 = get_bit(qlog[0], 7); // R7
x3 = get_bit(qlog[0], 10); // R10
x4 = get_bit(qlog[0], 6); // R6
x5 = get_bit(qlog[3], 6); // S6
x6 = get_bit(qlog[3], 5); // S5
break;
}
case 1:
{
mode = (maj_index << 2) | 1;
assert((mode & 0xC) != 0xC);
x0 = get_bit(qlog[0], 8); // R8
x1 = get_bit(qlog[1], 5); // G5
x2 = get_bit(qlog[0], 7); // R7
x3 = get_bit(qlog[2], 5); // B5
x4 = get_bit(qlog[0], 6); // R6
x5 = get_bit(qlog[0], 10); // R10
x6 = get_bit(qlog[0], 9); // R9
break;
}
case 2:
{
mode = (maj_index << 2) | 2;
assert((mode & 0xC) != 0xC);
x0 = get_bit(qlog[0], 9); // R9
x1 = get_bit(qlog[0], 8); // R8
x2 = get_bit(qlog[0], 7); // R7
x3 = get_bit(qlog[0], 6); // R6
x4 = get_bit(qlog[3], 7); // S7
x5 = get_bit(qlog[3], 6); // S6
x6 = get_bit(qlog[3], 5); // S5
break;
}
case 3:
{
mode = (maj_index << 2) | 3;
assert((mode & 0xC) != 0xC);
x0 = get_bit(qlog[0], 8); // R8
x1 = get_bit(qlog[1], 5); // G5
x2 = get_bit(qlog[0], 7); // R7
x3 = get_bit(qlog[2], 5); // B5
x4 = get_bit(qlog[0], 6); // R6
x5 = get_bit(qlog[3], 6); // S6
x6 = get_bit(qlog[3], 5); // S5
break;
}
case 4:
{
mode = maj_index | 0xC; // 0b1100
assert((mode & 0xC) == 0xC);
assert(mode != 0xF);
x0 = get_bit(qlog[1], 6); // G6
x1 = get_bit(qlog[1], 5); // G5
x2 = get_bit(qlog[2], 6); // B6
x3 = get_bit(qlog[2], 5); // B5
x4 = get_bit(qlog[0], 6); // R6
x5 = get_bit(qlog[0], 7); // R7
x6 = get_bit(qlog[3], 5); // S5
break;
}
case 5:
{
mode = 0xF;
x0 = get_bit(qlog[1], 6); // G6
x1 = get_bit(qlog[1], 5); // G5
x2 = get_bit(qlog[2], 6); // B6
x3 = get_bit(qlog[2], 5); // B5
x4 = get_bit(qlog[0], 6); // R6
x5 = get_bit(qlog[3], 6); // S6
x6 = get_bit(qlog[3], 5); // S5
break;
}
default:
{
assert(0);
break;
}
}
pEndpoints[0] = (uint8_t)((get_bit(mode, 1) << 7) | (get_bit(mode, 0) << 6) | r);
pEndpoints[1] = (uint8_t)((get_bit(mode, 2) << 7) | (x0 << 6) | (x1 << 5) | g);
pEndpoints[2] = (uint8_t)((get_bit(mode, 3) << 7) | (x2 << 6) | (x3 << 5) | b);
pEndpoints[3] = (uint8_t)((x4 << 7) | (x5 << 6) | (x6 << 5) | s);
#ifdef _DEBUG
// Test for valid pack by unpacking
{
const int inv_shift = 12 - prec_bits;
int unpacked_e[2][3];
if (submode != 5)
{
unpacked_e[1][0] = left_shift32(qlog[0], inv_shift);
unpacked_e[1][1] = clamp(left_shift32((qlog[0] - qlog[1]), inv_shift), 0, 0xFFF);
unpacked_e[1][2] = clamp(left_shift32((qlog[0] - qlog[2]), inv_shift), 0, 0xFFF);
unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF);
unpacked_e[0][1] = clamp(left_shift32(((qlog[0] - qlog[1]) - qlog[3]), inv_shift), 0, 0xFFF);
unpacked_e[0][2] = clamp(left_shift32(((qlog[0] - qlog[2]) - qlog[3]), inv_shift), 0, 0xFFF);
}
else
{
unpacked_e[1][0] = left_shift32(qlog[0], inv_shift);
unpacked_e[1][1] = left_shift32(qlog[1], inv_shift);
unpacked_e[1][2] = left_shift32(qlog[2], inv_shift);
unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF);
unpacked_e[0][1] = clamp(left_shift32((qlog[1] - qlog[3]), inv_shift), 0, 0xFFF);
unpacked_e[0][2] = clamp(left_shift32((qlog[2] - qlog[3]), inv_shift), 0, 0xFFF);
}
if (maj_index)
{
std::swap(unpacked_e[0][0], unpacked_e[0][maj_index]);
std::swap(unpacked_e[1][0], unpacked_e[1][maj_index]);
}
int e[2][3];
decode_mode7_to_qlog12_ise20(pEndpoints, e, nullptr);
for (uint32_t i = 0; i < 3; i++)
{
assert(unpacked_e[0][i] == e[0][i]);
assert(unpacked_e[1][i] == e[1][i]);
}
}
#endif
return did_clamp;
}
//--------------------------------------------------------------------------------------------------------------------------
bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints)
{
memset(pEndpoints, 0, NUM_MODE11_ENDPOINTS);
if (desc.is_direct())
{
if ((desc.m_a < 0) || (desc.m_c < 0) || (desc.m_b0 < 0))
return false;
if (!((desc.m_a <= 255) && (desc.m_c <= 255) && (desc.m_b0 <= 127)))
return false;
pEndpoints[0] = (uint8_t)desc.m_a;
pEndpoints[2] = (uint8_t)desc.m_c;
pEndpoints[4] = (uint8_t)desc.m_b0 | 128;
if ((desc.m_b1 < 0) || (desc.m_d0 < 0) || (desc.m_d1 < 0))
return false;
if (!((desc.m_b1 <= 255) && (desc.m_d0 <= 255) && (desc.m_d1 <= 127)))
return false;
pEndpoints[1] = (uint8_t)desc.m_b1;
pEndpoints[3] = (uint8_t)desc.m_d0;
pEndpoints[5] = (uint8_t)desc.m_d1 | 128;
return true;
}
if (!((desc.m_a >= 0) && (desc.m_a <= desc.m_max_a_val)))
return false;
if (!(((desc.m_c >= 0) && (desc.m_c <= desc.m_max_c_val))))
return false;
if (!((desc.m_b0 >= 0) && (desc.m_b0 <= desc.m_max_b_val)))
return false;
if (!((desc.m_b1 >= 0) && (desc.m_b1 <= desc.m_max_b_val)))
return false;
if (!((desc.m_d0 >= desc.m_min_d_val) && (desc.m_d0 <= desc.m_max_d_val)))
return false;
if (!((desc.m_d1 >= desc.m_min_d_val) && (desc.m_d1 <= desc.m_max_d_val)))
return false;
const int va = desc.m_a, vb0 = desc.m_b0, vb1 = desc.m_b1, vc = desc.m_c, vd0 = desc.m_d0, vd1 = desc.m_d1;
int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0;
int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0;
switch (desc.m_submode)
{
case 0:
x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vd0, 6); x3 = get_bit(vd1, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
break;
case 1:
x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vb0, 7); x3 = get_bit(vb1, 7); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
break;
case 2:
x0 = get_bit(va, 9); x1 = get_bit(vc, 6); x2 = get_bit(vd0, 6); x3 = get_bit(vd1, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
break;
case 3:
x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(va, 9); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
break;
case 4:
x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vb0, 7); x3 = get_bit(vb1, 7); x4 = get_bit(va, 9); x5 = get_bit(va, 10);
break;
case 5:
x0 = get_bit(va, 9); x1 = get_bit(va, 10); x2 = get_bit(vc, 7); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
break;
case 6:
x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(va, 11); x3 = get_bit(vc, 6); x4 = get_bit(va, 9); x5 = get_bit(va, 10);
break;
case 7:
x0 = get_bit(va, 9); x1 = get_bit(va, 10); x2 = get_bit(va, 11); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
break;
default:
break;
}
// write mode
pack_bit(v1, 7, desc.m_submode, 0);
pack_bit(v2, 7, desc.m_submode, 1);
pack_bit(v3, 7, desc.m_submode, 2);
// highest component
pack_bit(v4, 7, desc.m_maj_comp, 0);
pack_bit(v5, 7, desc.m_maj_comp, 1);
// write bit 8 of va
pack_bit(v1, 6, va, 8);
// extra bits
pack_bit(v2, 6, x0);
pack_bit(v3, 6, x1);
pack_bit(v4, 6, x2);
pack_bit(v5, 6, x3);
pack_bit(v4, 5, x4);
pack_bit(v5, 5, x5);
v0 = va & 0xFF;
v1 |= (vc & 63);
v2 |= (vb0 & 63);
v3 |= (vb1 & 63);
v4 |= (vd0 & 31);
v5 |= (vd1 & 31);
assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255));
pEndpoints[0] = (uint8_t)v0;
pEndpoints[1] = (uint8_t)v1;
pEndpoints[2] = (uint8_t)v2;
pEndpoints[3] = (uint8_t)v3;
pEndpoints[4] = (uint8_t)v4;
pEndpoints[5] = (uint8_t)v5;
return true;
}
static inline int astc_hdr_sign_extend(int src, int num_src_bits)
{
assert(basisu::in_range(num_src_bits, 2, 31));
const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
if (negative)
return src | ~((1 << num_src_bits) - 1);
else
return src & ((1 << num_src_bits) - 1);
}
void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc)
{
clear_obj(desc);
pack_bit(desc.m_maj_comp, 0, pEndpoints[4], 7);
pack_bit(desc.m_maj_comp, 1, pEndpoints[5], 7);
if (desc.m_maj_comp == 3)
{
desc.m_a = pEndpoints[0];
desc.m_c = pEndpoints[2];
desc.m_b0 = pEndpoints[4] & 0x7F;
desc.m_b1 = pEndpoints[1];
desc.m_d0 = pEndpoints[3];
desc.m_d1 = pEndpoints[5] & 0x7F;
return;
}
pack_bit(desc.m_submode, 0, pEndpoints[1], 7);
pack_bit(desc.m_submode, 1, pEndpoints[2], 7);
pack_bit(desc.m_submode, 2, pEndpoints[3], 7);
desc.m_a = pEndpoints[0]; // 8 bits
pack_bit(desc.m_a, 8, pEndpoints[1], 6);
desc.m_c = pEndpoints[1] & 63; // 6 bits
desc.m_b0 = pEndpoints[2] & 63; // 6 bits
desc.m_b1 = pEndpoints[3] & 63; // 6 bits
desc.m_d0 = pEndpoints[4] & 31; // 5 bits
desc.m_d1 = pEndpoints[5] & 31; // 5 bits
const int x0 = get_bit(pEndpoints[2], 6);
const int x1 = get_bit(pEndpoints[3], 6);
const int x2 = get_bit(pEndpoints[4], 6);
const int x3 = get_bit(pEndpoints[5], 6);
const int x4 = get_bit(pEndpoints[4], 5);
const int x5 = get_bit(pEndpoints[5], 5);
switch (desc.m_submode)
{
case 0:
pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_d0, 6, x2, 0); pack_bit(desc.m_d1, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
break;
case 1:
pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_b0, 7, x2, 0); pack_bit(desc.m_b1, 7, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
break;
case 2:
pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_c, 6, x1, 0); pack_bit(desc.m_d0, 6, x2, 0); pack_bit(desc.m_d1, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
break;
case 3:
pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_a, 9, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
break;
case 4:
pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_b0, 7, x2, 0); pack_bit(desc.m_b1, 7, x3, 0); pack_bit(desc.m_a, 9, x4, 0); pack_bit(desc.m_a, 10, x5, 0);
break;
case 5:
pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_a, 10, x1, 0); pack_bit(desc.m_c, 7, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
break;
case 6:
pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_a, 11, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_a, 9, x4, 0); pack_bit(desc.m_a, 10, x5, 0);
break;
case 7:
default:
pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_a, 10, x1, 0); pack_bit(desc.m_a, 11, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
break;
}
desc.m_a_bits = 9 + (desc.m_submode >> 1);
desc.m_b_bits = s_b_bits[desc.m_submode];
desc.m_c_bits = s_c_bits[desc.m_submode];
desc.m_d_bits = s_d_bits[desc.m_submode];
desc.m_max_a_val = (1 << desc.m_a_bits) - 1;
desc.m_max_b_val = (1 << desc.m_b_bits) - 1;
desc.m_max_c_val = (1 << desc.m_c_bits) - 1;
desc.m_min_d_val = -(1 << (desc.m_d_bits - 1));
desc.m_max_d_val = -desc.m_min_d_val - 1;
desc.m_d0 = astc_hdr_sign_extend(desc.m_d0, desc.m_d_bits);
desc.m_d1 = astc_hdr_sign_extend(desc.m_d1, desc.m_d_bits);
assert((desc.m_a >= 0) && (desc.m_a <= desc.m_max_a_val));
assert((desc.m_c >= 0) && (desc.m_c <= desc.m_max_c_val));
assert((desc.m_b0 >= 0) && (desc.m_b0 <= desc.m_max_b_val));
assert((desc.m_b1 >= 0) && (desc.m_b1 <= desc.m_max_b_val));
assert((desc.m_d0 >= desc.m_min_d_val) && (desc.m_d0 <= desc.m_max_d_val));
assert((desc.m_d1 >= desc.m_min_d_val) && (desc.m_d1 <= desc.m_max_d_val));
}
//--------------------------------------------------------------------------------------------------------------------------
void decode_cem_11_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index)
{
submode_index = 0;
maj_index = 0;
pack_bit(submode_index, 0, pEndpoints[1], 7);
pack_bit(submode_index, 1, pEndpoints[2], 7);
pack_bit(submode_index, 2, pEndpoints[3], 7);
pack_bit(maj_index, 0, pEndpoints[4], 7);
pack_bit(maj_index, 1, pEndpoints[5], 7);
}
//--------------------------------------------------------------------------------------------------------------------------
void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int &maj_index)
{
const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3];
(void)v3;
// Extract mode bits and unpack to major component and mode.
const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
if ((modeval & 0xC) != 0xC)
{
maj_index = modeval >> 2;
submode_index = modeval & 3;
}
else if (modeval != 0xF)
{
maj_index = modeval & 3;
submode_index = 4;
}
else
{
maj_index = 0;
submode_index = 5;
}
}
//--------------------------------------------------------------------------------------------------------------------------
// TODO: Use pack_mode11() as a shared function.
bool pack_mode11(
const vec3F& low_color_q16, const vec3F& high_color_q16,
uint32_t ise_endpoint_range, uint8_t* pEndpoints,
const astc_hdr_codec_base_options& coptions,
bool direct_only, int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used)
{
uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS];
if (direct_only)
{
first_submode = -1;
last_submode = -1;
}
assert(first_submode <= last_submode);
assert((first_submode >= -1) && (first_submode <= 7));
assert((last_submode >= -1) && (last_submode <= 7));
memset(pEndpoints, 0, NUM_MODE11_ENDPOINTS);
double best_trial_dist = BIG_FLOAT_VAL;
int best_submode = 0;
for (int submode = last_submode; submode >= first_submode; submode--)
{
bool did_clamp = false;
int max_clamp_mag = 0;
if (submode == -1)
{
// If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision.
pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16);
}
else
{
const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32;
did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);
if (!ignore_clamping)
{
// If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts.
if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
continue;
}
}
uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];
// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
// It could massively distort the endpoints, but still result in a valid encoding.
basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints);
int e[2][3];
if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range))
continue;
vec3F e0(
(float)(e[0][0] << 4),
(float)(e[0][1] << 4),
(float)(e[0][2] << 4)
);
vec3F e1(
(float)(e[1][0] << 4),
(float)(e[1][1] << 4),
(float)(e[1][2] << 4)
);
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
double dist = helpers::minimum(dist0, dist1);
if (dist < best_trial_dist)
{
best_trial_dist = dist;
best_submode = submode;
memcpy(pEndpoints, trial_endpoints, NUM_MODE11_ENDPOINTS);
}
if (coptions.m_take_first_non_clamping_mode11_submode)
{
if (!did_clamp)
break;
}
} // submode
if ((coptions.m_ultra_quant) &&
(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&
(best_trial_dist != BIG_FLOAT_VAL))
{
uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS];
memcpy(orig_best_trial_endpoints, pEndpoints, NUM_MODE11_ENDPOINTS);
for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++)
{
for (int dt = 0; dt <= 1; dt++)
{
const int d = dt ? 1 : -1;
uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS];
memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS);
int ise = varied_endpoints[c];
int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];
rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);
ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];
varied_endpoints[c] = (uint8_t)ise;
int e[2][3];
if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range))
continue;
vec3F e0(
(float)(e[0][0] << 4),
(float)(e[0][1] << 4),
(float)(e[0][2] << 4)
);
vec3F e1(
(float)(e[1][0] << 4),
(float)(e[1][1] << 4),
(float)(e[1][2] << 4)
);
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
double dist = helpers::minimum(dist0, dist1);
if (dist < best_trial_dist)
{
best_trial_dist = dist;
memcpy(pEndpoints, varied_endpoints, NUM_MODE11_ENDPOINTS);
}
} // d
} // c
} // if (coptions.m_ultra_quant)
submode_used = best_submode + 1;
return (best_trial_dist != BIG_FLOAT_VAL);
}
bool try_mode11(uint32_t num_pixels,
uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
const vec3F& low_color_q16, const vec3F& high_color_q16,
const basist::half_float block_pixels_half[][3],
uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,
bool constrain_ise_weight_selectors,
int32_t first_submode, int32_t last_submode, bool ignore_clamping) // -1, 7
{
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
assert((num_weight_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range));
half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3];
uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
if (direct_only)
{
first_submode = -1;
last_submode = -1;
}
assert(first_submode <= last_submode);
assert((first_submode >= -1) && (first_submode <= 7));
assert((last_submode >= -1) && (last_submode <= 7));
uint8_t best_trial_endpoints[NUM_MODE11_ENDPOINTS];
clear_obj(best_trial_endpoints);
double best_trial_dist = BIG_FLOAT_VAL;
int best_submode = 0;
for (int submode = last_submode; submode >= first_submode; submode--)
{
bool did_clamp = false;
int max_clamp_mag = 0;
if (submode == -1)
{
// If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision.
pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16);
}
else
{
const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32;
did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);
if (!ignore_clamping)
{
// If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts.
if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
continue;
}
}
uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];
// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
// It could massively distort the endpoints, but still result in a valid encoding.
basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints);
int e[2][3];
if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range))
continue;
vec3F e0(
(float)(e[0][0] << 4),
(float)(e[0][1] << 4),
(float)(e[0][2] << 4)
);
vec3F e1(
(float)(e[1][0] << 4),
(float)(e[1][1] << 4),
(float)(e[1][2] << 4)
);
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
double dist = helpers::minimum(dist0, dist1);
if (dist < best_trial_dist)
{
best_trial_dist = dist;
best_submode = submode;
memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints));
}
if (coptions.m_take_first_non_clamping_mode11_submode)
{
if (!did_clamp)
break;
}
} // submode
if ((coptions.m_ultra_quant) &&
(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&
(best_trial_dist != BIG_FLOAT_VAL))
{
uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS];
memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);
for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++)
{
for (int dt = 0; dt <= 1; dt++)
{
const int d = dt ? 1 : -1;
uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS];
memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS);
int ise = varied_endpoints[c];
int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];
rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);
ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];
varied_endpoints[c] = (uint8_t)ise;
int e[2][3];
if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range))
continue;
vec3F e0(
(float)(e[0][0] << 4),
(float)(e[0][1] << 4),
(float)(e[0][2] << 4)
);
vec3F e1(
(float)(e[1][0] << 4),
(float)(e[1][1] << 4),
(float)(e[1][2] << 4)
);
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
double dist = helpers::minimum(dist0, dist1);
if (dist < best_trial_dist)
{
best_trial_dist = dist;
memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE11_ENDPOINTS);
}
} // d
} // c
} // if (coptions.m_ultra_quant)
bool improved_flag = false;
if (best_trial_dist != BIG_FLOAT_VAL)
{
if (get_astc_hdr_mode_11_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range))
{
uint32_t usable_selector_bitmask = UINT32_MAX;
if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_16_LEVELS))
usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 10) | (1 << 11) | (1 << 14) | (1 << 15);
else if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_12_LEVELS))
usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3);
double trial_blk_error = eval_selectors(num_pixels, trial_weights, ise_weight_range, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions, usable_selector_bitmask);
if (trial_blk_error < cur_block_error)
{
cur_block_error = trial_blk_error;
memcpy(pEndpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);
memcpy(pWeights, trial_weights, num_pixels);
submode_used = best_submode + 1;
improved_flag = true;
}
}
}
return improved_flag;
}
//--------------------------------------------------------------------------------------------------------------------------
bool try_mode11_dual_plane(uint32_t channel_index, uint32_t num_pixels,
uint8_t* pEndpoints, uint8_t* pWeights0, uint8_t* pWeights1, double& cur_block_error, uint32_t& submode_used,
const vec3F& low_color_q16, const vec3F& high_color_q16,
const basist::half_float block_pixels_half[][3],
uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,
bool constrain_ise_weight_selectors,
int32_t first_submode, int32_t last_submode, bool ignore_clamping) // -1, 7
{
assert(channel_index <= 2);
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
assert((num_weight_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range));
half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3];
uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights0[MAX_ASTC_HDR_ENC_BLOCK_PIXELS], trial_weights1[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
if (direct_only)
{
first_submode = -1;
last_submode = -1;
}
assert(first_submode <= last_submode);
assert((first_submode >= -1) && (first_submode <= 7));
assert((last_submode >= -1) && (last_submode <= 7));
uint8_t best_trial_endpoints[NUM_MODE11_ENDPOINTS];
clear_obj(best_trial_endpoints);
double best_trial_dist = BIG_FLOAT_VAL;
int best_submode = 0;
for (int submode = last_submode; submode >= first_submode; submode--)
{
bool did_clamp = false;
int max_clamp_mag = 0;
if (submode == -1)
{
// If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision.
pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16);
}
else
{
const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32;
did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);
if (!ignore_clamping)
{
// If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts.
if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
continue;
}
}
uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];
// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
// It could massively distort the endpoints, but still result in a valid encoding.
basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints);
int e[2][3];
if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range))
continue;
vec3F e0(
(float)(e[0][0] << 4),
(float)(e[0][1] << 4),
(float)(e[0][2] << 4)
);
vec3F e1(
(float)(e[1][0] << 4),
(float)(e[1][1] << 4),
(float)(e[1][2] << 4)
);
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
double dist = helpers::minimum(dist0, dist1);
if (dist < best_trial_dist)
{
best_trial_dist = dist;
best_submode = submode;
memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints));
}
if (coptions.m_take_first_non_clamping_mode11_submode)
{
if (!did_clamp)
break;
}
} // submode
if ((coptions.m_ultra_quant) &&
(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&
(best_trial_dist != BIG_FLOAT_VAL))
{
uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS];
memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);
for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++)
{
for (int dt = 0; dt <= 1; dt++)
{
const int d = dt ? 1 : -1;
uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS];
memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS);
int ise = varied_endpoints[c];
int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];
rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);
ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];
varied_endpoints[c] = (uint8_t)ise;
int e[2][3];
if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range))
continue;
vec3F e0(
(float)(e[0][0] << 4),
(float)(e[0][1] << 4),
(float)(e[0][2] << 4)
);
vec3F e1(
(float)(e[1][0] << 4),
(float)(e[1][1] << 4),
(float)(e[1][2] << 4)
);
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
double dist = helpers::minimum(dist0, dist1);
if (dist < best_trial_dist)
{
best_trial_dist = dist;
memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE11_ENDPOINTS);
}
} // d
} // c
} // if (coptions.m_ultra_quant)
bool improved_flag = false;
if (best_trial_dist != BIG_FLOAT_VAL)
{
if (get_astc_hdr_mode_11_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range))
{
uint32_t usable_selector_bitmask = UINT32_MAX;
if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_16_LEVELS))
usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 10) | (1 << 11) | (1 << 14) | (1 << 15);
else if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_12_LEVELS))
usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3);
double trial_blk_error = eval_selectors_dual_plane(channel_index, num_pixels, trial_weights0, trial_weights1, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions, usable_selector_bitmask);
if (trial_blk_error < cur_block_error)
{
cur_block_error = trial_blk_error;
memcpy(pEndpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);
memcpy(pWeights0, trial_weights0, num_pixels);
memcpy(pWeights1, trial_weights1, num_pixels);
submode_used = best_submode + 1;
improved_flag = true;
}
}
}
return improved_flag;
}
//--------------------------------------------------------------------------------------------------------------------------
bool pack_mode7(
const vec3F& high_color_q16, const float s_q16,
uint32_t ise_endpoint_range, uint8_t* pEndpoints,
uint32_t ise_weight_range, // only used for determining biasing during packing
const astc_hdr_codec_base_options& coptions,
int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used)
{
assert(first_submode <= last_submode);
assert((first_submode >= 0) && (first_submode <= (int)MAX_MODE7_SUBMODE_INDEX));
assert(last_submode <= (int)MAX_MODE7_SUBMODE_INDEX);
uint8_t unquant_trial_endpoints[NUM_MODE7_ENDPOINTS];
memset(pEndpoints, 0, NUM_MODE7_ENDPOINTS);
double best_trial_dist = BIG_FLOAT_VAL;
int best_trial_submode = 0;
for (int submode = first_submode; submode <= last_submode; submode++)
{
const int MAX_CLAMP_MAG_ACCEPT_THRESH = 16;
int max_clamp_mag = 0;
const bool did_clamp = pack_astc_mode7_submode(submode, unquant_trial_endpoints, high_color_q16, s_q16, max_clamp_mag, ise_weight_range, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);
if (submode < 5)
{
if (!ignore_clamping)
{
if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
continue;
}
}
uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS];
// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
// It could massively distort the endpoints, but still result in a valid encoding.
basist::astc_6x6_hdr::requantize_ise_endpoints(7, astc_helpers::BISE_256_LEVELS, unquant_trial_endpoints, ise_endpoint_range, trial_endpoints);
int e[2][3];
int decoded_s = 0;
if (!decode_mode7_to_qlog12(trial_endpoints, e, &decoded_s, ise_endpoint_range))
continue;
// e1 is always the high color
vec3F e1(
(float)(e[1][0] << 4),
(float)(e[1][1] << 4),
(float)(e[1][2] << 4)
);
decoded_s <<= 4;
double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;
if (dist < best_trial_dist)
{
best_trial_dist = dist;
best_trial_submode = submode;
memcpy(pEndpoints, trial_endpoints, NUM_MODE7_ENDPOINTS);
}
if (coptions.m_take_first_non_clamping_mode7_submode)
{
if (!did_clamp)
break;
}
} // submode
if ((coptions.m_ultra_quant) &&
(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&
(best_trial_dist != BIG_FLOAT_VAL))
{
uint8_t orig_best_trial_endpoints[NUM_MODE7_ENDPOINTS];
memcpy(orig_best_trial_endpoints, pEndpoints, NUM_MODE7_ENDPOINTS);
vec3F low_color_q16(high_color_q16 - vec3F(s_q16));
low_color_q16.clamp(0.0f, 65535.0f);
for (uint32_t c = 0; c < NUM_MODE7_ENDPOINTS; c++)
{
for (int dt = 0; dt <= 1; dt++)
{
const int d = dt ? 1 : -1;
uint8_t varied_endpoints[NUM_MODE7_ENDPOINTS];
memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE7_ENDPOINTS);
int ise = varied_endpoints[c];
int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];
rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);
ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];
varied_endpoints[c] = (uint8_t)ise;
int e[2][3];
int decoded_s = 0;
if (!decode_mode7_to_qlog12(varied_endpoints, e, &decoded_s, ise_endpoint_range))
continue;
// e1 is always the high color
vec3F e1(
(float)(e[1][0] << 4),
(float)(e[1][1] << 4),
(float)(e[1][2] << 4)
);
decoded_s <<= 4;
double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;
if (dist < best_trial_dist)
{
best_trial_dist = dist;
memcpy(pEndpoints, varied_endpoints, NUM_MODE7_ENDPOINTS);
}
} // d
} // c
}
submode_used = best_trial_submode;
return (best_trial_dist != BIG_FLOAT_VAL);
}
//--------------------------------------------------------------------------------------------------------------------------
bool try_mode7(
uint32_t num_pixels,
uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
const vec3F& high_color_q16, const float s_q16,
const half_float block_pixels_half[][3],
uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions,
uint32_t ise_endpoint_range,
int32_t first_submode, int32_t last_submode)
{
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
assert(first_submode <= last_submode);
assert((first_submode >= 0) && (first_submode <= (int)MAX_MODE7_SUBMODE_INDEX));
assert(last_submode <= (int)MAX_MODE7_SUBMODE_INDEX);
assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range));
uint8_t unquant_trial_endpoints[NUM_MODE7_ENDPOINTS];
uint8_t best_trial_endpoints[NUM_MODE7_ENDPOINTS];
clear_obj(best_trial_endpoints);
double best_trial_dist = BIG_FLOAT_VAL;
int best_trial_submode = 0;
for (int submode = first_submode; submode <= last_submode; submode++)
{
const int MAX_CLAMP_MAG_ACCEPT_THRESH = 16;
int max_clamp_mag = 0;
const bool did_clamp = pack_astc_mode7_submode(submode, unquant_trial_endpoints, high_color_q16, s_q16, max_clamp_mag, ise_weight_range, true, MAX_CLAMP_MAG_ACCEPT_THRESH);
if (submode < 5)
{
if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
continue;
}
uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS];
// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
// It could massively distort the endpoints, but still result in a valid encoding.
basist::astc_6x6_hdr::requantize_ise_endpoints(7, astc_helpers::BISE_256_LEVELS, unquant_trial_endpoints, ise_endpoint_range, trial_endpoints);
int e[2][3];
int decoded_s = 0;
if (!decode_mode7_to_qlog12(trial_endpoints, e, &decoded_s, ise_endpoint_range))
continue;
// e1 is always the high color
vec3F e1(
(float)(e[1][0] << 4),
(float)(e[1][1] << 4),
(float)(e[1][2] << 4)
);
decoded_s <<= 4;
double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;
if (dist < best_trial_dist)
{
best_trial_dist = dist;
best_trial_submode = submode;
memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints));
}
if (coptions.m_take_first_non_clamping_mode7_submode)
{
if (!did_clamp)
break;
}
} // submode
if ((coptions.m_ultra_quant) &&
(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&
(best_trial_dist != BIG_FLOAT_VAL))
{
uint8_t orig_best_trial_endpoints[NUM_MODE7_ENDPOINTS];
memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE7_ENDPOINTS);
vec3F low_color_q16(high_color_q16 - vec3F(s_q16));
low_color_q16.clamp(0.0f, 65535.0f);
for (uint32_t c = 0; c < NUM_MODE7_ENDPOINTS; c++)
{
for (int dt = 0; dt <= 1; dt++)
{
const int d = dt ? 1 : -1;
uint8_t varied_endpoints[NUM_MODE7_ENDPOINTS];
memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE7_ENDPOINTS);
int ise = varied_endpoints[c];
int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];
rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);
ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];
varied_endpoints[c] = (uint8_t)ise;
int e[2][3];
int decoded_s = 0;
if (!decode_mode7_to_qlog12(varied_endpoints, e, &decoded_s, ise_endpoint_range))
continue;
// e1 is always the high color
vec3F e1(
(float)(e[1][0] << 4),
(float)(e[1][1] << 4),
(float)(e[1][2] << 4)
);
decoded_s <<= 4;
double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;
if (dist < best_trial_dist)
{
best_trial_dist = dist;
memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE7_ENDPOINTS);
}
} // d
} // c
}
bool improved_flag = false;
if (best_trial_dist != BIG_FLOAT_VAL)
{
half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3];
uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
if (get_astc_hdr_mode_7_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range))
{
double trial_blk_error = eval_selectors(num_pixels, trial_weights, ise_weight_range, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions);
if (trial_blk_error < cur_block_error)
{
cur_block_error = trial_blk_error;
memcpy(pEndpoints, best_trial_endpoints, NUM_MODE7_ENDPOINTS);
memcpy(pWeights, trial_weights, num_pixels);
submode_used = best_trial_submode;
improved_flag = true;
}
}
}
return improved_flag;
}
//--------------------------------------------------------------------------------------------------------------------------
const float LOW_EMPHASIS_WEIGHT = 1.0f, MIDDLE_EMPHASIS_WEIGHT = 1.25f, HIGH_EMPHASIS_WEIGHT = 1.0f;
const float LOW_EMPHASIS_WEIGHT_HEAVY = 1.0f, MIDDLE_EMPHASIS_WEIGHT_HEAVY = 4.0f, HIGH_EMPHASIS_WEIGHT_HEAVY = 1.0f;
double encode_astc_hdr_block_mode_11(
uint32_t num_pixels,
const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
uint32_t ise_weight_range,
uint32_t& best_submode,
double cur_block_error,
uint8_t* blk_endpoints, uint8_t* blk_weights,
const astc_hdr_codec_base_options& coptions,
bool direct_only,
uint32_t ise_endpoint_range,
bool uber_mode,
bool constrain_ise_weight_selectors,
int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode,
const encode_astc_block_stats* pBlock_stats)
{
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode));
assert(last_submode <= MAX_MODE11_SUBMODE_INDEX);
best_submode = 0;
const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);
assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS);
vec3F block_mean_color_q16, block_axis_q16;
if (!pBlock_stats)
{
block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16);
block_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16);
}
else
{
assert(num_pixels == pBlock_stats->m_num_pixels);
block_mean_color_q16 = pBlock_stats->m_mean_q16;
block_axis_q16 = pBlock_stats->m_axis_q16;
}
aabb3F color_box_q16(cInitExpand);
float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;
vec3F low_color_q16, high_color_q16;
for (uint32_t i = 0; i < num_pixels; i++)
{
color_box_q16.expand(pBlock_pixels_q16[i]);
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
float kd = k.dot(block_axis_q16);
if (kd < l)
{
l = kd;
low_color_q16 = pBlock_pixels_q16[i];
}
if (kd > h)
{
h = kd;
high_color_q16 = pBlock_pixels_q16[i];
}
}
vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16);
for (uint32_t i = 0; i < 3; i++)
{
low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f);
high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f);
}
uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS];
uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
uint32_t trial_best_submode = 0;
clear_obj(trial_blk_endpoints);
clear_obj(trial_blk_weights);
double trial_blk_error = BIG_FLOAT_VAL;
bool did_improve = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,
low_color_q16, high_color_q16,
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
first_submode, last_submode, ignore_clamping);
// If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.
if (!did_improve)
return cur_block_error;
// Did the solution improve?
if (trial_blk_error < cur_block_error)
{
cur_block_error = trial_blk_error;
memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);
memcpy(blk_weights, trial_blk_weights, num_pixels);
best_submode = trial_best_submode;
}
if (opt_mode == cNoOpt)
return cur_block_error;
// least squares on the most promising trial weight indices found
const uint32_t NUM_LS_PASSES = 3;
float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
if (opt_mode == cWeightedAverage)
{
const uint32_t NUM_OPT_PASSES = 3;
for (uint32_t pass = 0; pass < NUM_OPT_PASSES; pass++)
{
vec3F low_p(0.0f);
float total_low = 0.0f;
vec3F high_p(0.0f);
float total_high = 0.0f;
for (uint32_t i = 0; i < num_pixels; i++)
{
vec3F p(pBlock_pixels_q16[i]);
float lerp = g_ise_weight_lerps[ise_weight_range][trial_blk_weights[i] + 1] * (1.0f / 64.0f);
low_p += p * (1.0f - lerp);
total_low += (1.0f - lerp);
high_p += p * lerp;
total_high += lerp;
}
if (total_low != 0.0f)
low_p *= (1.0f / total_low);
if (total_high != 0.0f)
high_p *= (1.0f / total_high);
vec3F low, high;
bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
low_p, high_p,
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
first_submode, last_submode, ignore_clamping);
if (!was_improved)
break;
memcpy(trial_blk_weights, blk_weights, num_pixels);
}
}
else if (opt_mode == cOrdinaryLeastSquares)
{
for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++)
{
vec3F l_q16, h_q16;
if (!compute_least_squares_endpoints_rgb(num_pixels, trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16))
break;
bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
l_q16, h_q16,
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
first_submode, last_submode, ignore_clamping);
if (!was_improved)
break;
// It's improved, so let's take the new weight indices.
memcpy(trial_blk_weights, blk_weights, num_pixels);
} // pass
}
else
{
if (h == l)
{
for (uint32_t i = 0; i < num_pixels; i++)
emphasis_weights[i] = 1.0f;
}
else
{
float mid = (0.0f - l) / (h - l);
mid = clamp(mid, .01f, .99f);
float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT;
if (opt_mode == cWeightedLeastSquaresHeavy)
lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY;
for (uint32_t i = 0; i < num_pixels; i++)
{
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
float kd = k.dot(block_axis_q16);
assert((kd >= l) && (kd <= h));
float v = (kd - l) / (h - l);
if (v < mid)
v = lerp(lw, mw, v / mid);
else
v = lerp(mw, hw, (v - mid) * (1.0f - mid));
emphasis_weights[i] = v;
}
#if 0
if (num_pixels == 6 * 6)
{
const float EDGE_WEIGHT = .1f;
for (uint32_t i = 0; i < 6; i++)
{
emphasis_weights[i] += EDGE_WEIGHT;
emphasis_weights[i + 5 * 6] += EDGE_WEIGHT;
emphasis_weights[i * 6] += EDGE_WEIGHT;
emphasis_weights[5 + i * 6] += EDGE_WEIGHT;
}
}
#endif
}
for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++)
{
vec3F l_q16, h_q16;
if (!compute_weighted_least_squares_endpoints_rgb(
num_pixels,
trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr,
emphasis_weights,
&l_q16, &h_q16,
pBlock_pixels_q16,
color_box_q16))
break;
bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
l_q16, h_q16,
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
first_submode, last_submode, ignore_clamping);
if (!was_improved)
break;
// It's improved, so let's take the new weight indices.
memcpy(trial_blk_weights, blk_weights, num_pixels);
} // pass
}
if ( (uber_mode) && (ise_weight_range >= astc_helpers::BISE_3_LEVELS) &&
((opt_mode == cOrdinaryLeastSquares) || (opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy)) )
{
// Try varying the current best weight indices. This can be expanded/improved, but at potentially great cost.
uint8_t temp_astc_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
memcpy(temp_astc_weights, trial_blk_weights, num_pixels);
uint32_t min_lin_sel = 256, max_lin_sel = 0;
for (uint32_t i = 0; i < num_pixels; i++)
{
const uint32_t astc_sel = temp_astc_weights[i];
const uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
assert(lin_sel < num_weight_levels);
min_lin_sel = minimumu(min_lin_sel, lin_sel);
max_lin_sel = maximumu(max_lin_sel, lin_sel);
}
bool was_improved = false;
(void)was_improved;
{
bool weights_changed = false;
uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
for (uint32_t i = 0; i < num_pixels; i++)
{
uint32_t astc_sel = temp_astc_weights[i];
uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1)))
{
lin_sel++;
weights_changed = true;
}
trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
}
if (weights_changed)
{
vec3F l_q16, h_q16;
bool succeeded;
if (opt_mode == cOrdinaryLeastSquares)
succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
else
succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
if (succeeded)
{
if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
l_q16, h_q16,
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
first_submode, last_submode, ignore_clamping))
{
was_improved = true;
}
}
}
}
{
bool weights_changed = false;
uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
for (uint32_t i = 0; i < num_pixels; i++)
{
uint32_t astc_sel = temp_astc_weights[i];
uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
if ((lin_sel == max_lin_sel) && (lin_sel > 0))
{
lin_sel--;
weights_changed = true;
}
trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
}
if (weights_changed)
{
vec3F l_q16, h_q16;
bool succeeded;
if (opt_mode == cOrdinaryLeastSquares)
succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
else
succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
if (succeeded)
{
if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
l_q16, h_q16,
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
first_submode, last_submode, ignore_clamping))
{
was_improved = true;
}
}
}
}
{
bool weights_changed = false;
uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
for (uint32_t i = 0; i < num_pixels; i++)
{
uint32_t astc_sel = temp_astc_weights[i];
uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
if ((lin_sel == max_lin_sel) && (lin_sel > 0))
{
lin_sel--;
weights_changed = true;
}
else if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1)))
{
lin_sel++;
weights_changed = true;
}
trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
}
if (weights_changed)
{
vec3F l_q16, h_q16;
bool succeeded;
if (opt_mode == cOrdinaryLeastSquares)
succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
else
succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
if (succeeded)
{
if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
l_q16, h_q16,
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
first_submode, last_submode, ignore_clamping))
{
was_improved = true;
}
}
}
}
} // uber_mode
return cur_block_error;
}
//--------------------------------------------------------------------------------------------------------------------------
double encode_astc_hdr_block_downsampled_mode_11(
uint32_t block_x, uint32_t block_y, uint32_t grid_x, uint32_t grid_y,
uint32_t ise_weight_range, uint32_t ise_endpoint_range,
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
double cur_block_error,
int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode,
uint8_t* pBlk_endpoints, uint8_t* pBlk_weights, uint32_t& best_submode,
const astc_hdr_codec_base_options& coptions,
const encode_astc_block_stats* pBlock_stats)
{
assert((block_x >= 4) && (block_y >= 4) && (block_x <= MAX_ASTC_HDR_BLOCK_W) && (block_y <= MAX_ASTC_HDR_BLOCK_H));
assert((grid_x >= 2) && (grid_y >= 2) && (grid_x <= block_x) && (grid_y <= block_y));
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode));
assert(last_submode <= MAX_MODE11_SUBMODE_INDEX);
best_submode = 0;
assert(astc_helpers::get_ise_levels(ise_weight_range) <= MAX_SUPPORTED_WEIGHT_LEVELS);
const uint32_t num_weights = grid_x * grid_y;
vec3F block_mean_color_q16, block_axis_q16;
if (!pBlock_stats)
{
block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16);
block_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16);
}
else
{
assert(num_pixels == pBlock_stats->m_num_pixels);
block_mean_color_q16 = pBlock_stats->m_mean_q16;
block_axis_q16 = pBlock_stats->m_axis_q16;
}
aabb3F color_box_q16(cInitExpand);
float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;
vec3F low_color_q16, high_color_q16;
for (uint32_t i = 0; i < num_pixels; i++)
{
color_box_q16.expand(pBlock_pixels_q16[i]);
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
float kd = k.dot(block_axis_q16);
if (kd < l)
{
l = kd;
low_color_q16 = pBlock_pixels_q16[i];
}
if (kd > h)
{
h = kd;
high_color_q16 = pBlock_pixels_q16[i];
}
}
vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16);
for (uint32_t i = 0; i < 3; i++)
{
low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f);
high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f);
}
const uint32_t NUM_PASSES = 3;
for (uint32_t pass = 0; pass < NUM_PASSES; pass++)
{
uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS];
uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // at block resolution, not grid res
uint32_t trial_best_submode = 0;
clear_obj(trial_blk_endpoints);
clear_obj(trial_blk_weights);
double trial_blk_error = BIG_FLOAT_VAL;
bool could_pack = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,
low_color_q16, high_color_q16,
pBlock_pixels_half, 32, astc_helpers::BISE_32_LEVELS, coptions, false, ise_endpoint_range, false,
first_submode, last_submode, ignore_clamping);
if (!could_pack)
break;
uint8_t trial_downsampled_ise_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
downsample_ise_weights(
astc_helpers::BISE_32_LEVELS, ise_weight_range,
block_x, block_y, grid_x, grid_y,
trial_blk_weights, trial_downsampled_ise_weights);
uint8_t trial_downsampled_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
dequantize_astc_weights(num_weights, trial_downsampled_ise_weights, ise_weight_range, trial_downsampled_raw_weights);
uint8_t trial_upsampled_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE
astc_helpers::upsample_weight_grid(block_x, block_y, grid_x, grid_y, trial_downsampled_raw_weights, trial_upsampled_raw_weights);
//------
int trial_e[2][3];
if (!decode_mode11_to_qlog12(trial_blk_endpoints, trial_e, ise_endpoint_range))
return cur_block_error;
double trial_error = compute_block_error_from_raw_weights(num_pixels, pBlock_pixels_half, trial_upsampled_raw_weights, trial_e, coptions);
if (trial_error < cur_block_error)
{
cur_block_error = trial_error;
memcpy(pBlk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);
memcpy(pBlk_weights, trial_downsampled_ise_weights, num_weights);
best_submode = trial_best_submode;
}
else if (pass)
break;
if ((opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy))
{
float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
if (h == l)
{
for (uint32_t i = 0; i < num_pixels; i++)
emphasis_weights[i] = 1.0f;
}
else
{
float mid = (0.0f - l) / (h - l);
mid = clamp(mid, .01f, .99f);
float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT;
if (opt_mode == cWeightedLeastSquaresHeavy)
lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY;
for (uint32_t i = 0; i < num_pixels; i++)
{
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
float kd = k.dot(block_axis_q16);
assert((kd >= l) && (kd <= h));
float v = (kd - l) / (h - l);
if (v < mid)
v = lerp(lw, mw, v / mid);
else
v = lerp(mw, hw, (v - mid) * (1.0f - mid));
emphasis_weights[i] = v;
}
}
float trial_upsampled_raw_weightsf[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
for (uint32_t i = 0; i < num_pixels; i++)
trial_upsampled_raw_weightsf[i] = (float)trial_upsampled_raw_weights[i] * (1.0f / 64.0f);
if (!compute_weighted_least_squares_endpoints_rgb(num_pixels, nullptr, nullptr, trial_upsampled_raw_weightsf, emphasis_weights, &low_color_q16, &high_color_q16, pBlock_pixels_q16, color_box_q16))
return false;
}
else
{
if (!compute_least_squares_endpoints_rgb_raw_weights(num_pixels, trial_upsampled_raw_weights, &low_color_q16, &high_color_q16, pBlock_pixels_q16, color_box_q16))
break;
}
bool pack_succeeded = pack_mode11(low_color_q16, high_color_q16, ise_endpoint_range, trial_blk_endpoints, coptions, false, first_submode, last_submode, false, trial_best_submode);
if (!pack_succeeded)
break;
if (!decode_mode11_to_qlog12(trial_blk_endpoints, trial_e, ise_endpoint_range))
break;
trial_error = compute_block_error_from_raw_weights(num_pixels, pBlock_pixels_half, trial_upsampled_raw_weights, trial_e, coptions);
if (trial_error < cur_block_error)
{
cur_block_error = trial_error;
memcpy(pBlk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);
memcpy(pBlk_weights, trial_downsampled_ise_weights, num_weights);
best_submode = trial_best_submode;
}
else
{
break;
}
} // pass
return cur_block_error;
}
//--------------------------------------------------------------------------------------------------------------------------
double encode_astc_hdr_block_mode_11_dual_plane(
uint32_t num_pixels,
const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
uint32_t channel_index, // 0-2
uint32_t ise_weight_range,
uint32_t& best_submode,
double cur_block_error,
uint8_t* blk_endpoints, uint8_t* blk_weights0, uint8_t* blk_weights1,
const astc_hdr_codec_base_options& coptions,
bool direct_only,
uint32_t ise_endpoint_range,
bool uber_mode,
bool constrain_ise_weight_selectors,
int32_t first_submode, int32_t last_submode, bool ignore_clamping)
{
(void)uber_mode;
assert(channel_index <= 2);
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode));
assert(last_submode <= MAX_MODE11_SUBMODE_INDEX);
assert(num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS);
best_submode = 0;
const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);
assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS);
vec4F temp_block_pixels_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
for (uint32_t i = 0; i < num_pixels; i++)
{
temp_block_pixels_q16[i] = pBlock_pixels_q16[i];
temp_block_pixels_q16[i][channel_index] = 0.0f;
}
vec3F block_mean_color_q16(calc_mean(num_pixels, temp_block_pixels_q16));
vec3F block_axis_q16(calc_rgb_pca(num_pixels, temp_block_pixels_q16, block_mean_color_q16));
float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;
vec3F low_color_q16, high_color_q16;
aabb3F color_box_q16(cInitExpand);
for (uint32_t i = 0; i < num_pixels; i++)
{
color_box_q16.expand(pBlock_pixels_q16[i]);
vec3F k(vec3F(temp_block_pixels_q16[i]) - block_mean_color_q16);
float kd = k.dot(block_axis_q16);
if (kd < l)
{
l = kd;
low_color_q16 = pBlock_pixels_q16[i];
}
if (kd > h)
{
h = kd;
high_color_q16 = pBlock_pixels_q16[i];
}
}
low_color_q16[channel_index] = 0.0f;
high_color_q16[channel_index] = 0.0f;
float a = low_color_q16.dot(vec3F(1.0f)), b = high_color_q16.dot(vec3F(1.0f));
if (a <= b)
{
low_color_q16[channel_index] = color_box_q16.get_low()[channel_index];
high_color_q16[channel_index] = color_box_q16.get_high()[channel_index];
}
else
{
high_color_q16[channel_index] = color_box_q16.get_low()[channel_index];
low_color_q16[channel_index] = color_box_q16.get_high()[channel_index];
}
vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16);
for (uint32_t i = 0; i < 3; i++)
{
low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f);
high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f);
}
uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS];
uint8_t trial_blk_weights0[MAX_ASTC_HDR_ENC_BLOCK_PIXELS], trial_blk_weights1[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
uint32_t trial_best_submode = 0;
clear_obj(trial_blk_endpoints);
clear_obj(trial_blk_weights0);
clear_obj(trial_blk_weights1);
double trial_blk_error = BIG_FLOAT_VAL;
bool did_improve = try_mode11_dual_plane(channel_index, num_pixels, trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_best_submode,
low_color_q16, high_color_q16,
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
first_submode, last_submode, ignore_clamping);
// If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.
if (!did_improve)
return cur_block_error;
// Did the solution improve?
if (trial_blk_error < cur_block_error)
{
cur_block_error = trial_blk_error;
memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);
memcpy(blk_weights0, trial_blk_weights0, num_pixels);
memcpy(blk_weights1, trial_blk_weights1, num_pixels);
best_submode = trial_best_submode;
}
const uint32_t chan0 = (channel_index + 1) % 3, chan1 = (channel_index + 2) % 3;
vec2F plane0_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
aabb2F plane0_bounds;
plane0_bounds[0].set(color_box_q16.get_low()[chan0], color_box_q16.get_low()[chan1]);
plane0_bounds[1].set(color_box_q16.get_high()[chan0], color_box_q16.get_high()[chan1]);
vec1F plane1_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
aabb1F plane1_bounds;
plane1_bounds[0].set(color_box_q16.get_low()[channel_index]);
plane1_bounds[1].set(color_box_q16.get_high()[channel_index]);
for (uint32_t i = 0; i < num_pixels; i++)
{
plane0_q16[i][0] = pBlock_pixels_q16[i][chan0];
plane0_q16[i][1] = pBlock_pixels_q16[i][chan1];
plane1_q16[i][0] = pBlock_pixels_q16[i][channel_index];
}
const uint32_t NUM_LS_PASSES = 3;
for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++)
{
vec2F l0_q16, h0_q16;
if (!compute_least_squares_endpoints_2D(num_pixels, trial_blk_weights0, &g_astc_ls_weights_ise[ise_weight_range][0], &l0_q16, &h0_q16, plane0_q16, plane0_bounds))
break;
vec1F l1_q16, h1_q16;
if (!compute_least_squares_endpoints_1D(num_pixels, trial_blk_weights1, &g_astc_ls_weights_ise[ise_weight_range][0], &l1_q16, &h1_q16, plane1_q16, plane1_bounds))
break;
vec3F l_q16, h_q16;
l_q16[channel_index] = l1_q16[0];
h_q16[channel_index] = h1_q16[0];
l_q16[chan0] = l0_q16[0];
h_q16[chan0] = h0_q16[0];
l_q16[chan1] = l0_q16[1];
h_q16[chan1] = h0_q16[1];
bool was_improved = try_mode11_dual_plane(channel_index, num_pixels, blk_endpoints, blk_weights0, blk_weights1, cur_block_error, best_submode,
l_q16, h_q16,
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
first_submode, last_submode, ignore_clamping);
if (!was_improved)
break;
// It's improved, so let's take the new weight indices.
memcpy(trial_blk_weights0, blk_weights0, num_pixels);
memcpy(trial_blk_weights1, blk_weights1, num_pixels);
} // pass
return cur_block_error;
}
//--------------------------------------------------------------------------------------------------------------------------
double encode_astc_hdr_block_mode_7(
uint32_t num_pixels,
const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
uint32_t ise_weight_range,
uint32_t& best_submode,
double cur_block_error,
uint8_t* blk_endpoints, //[4]
uint8_t* blk_weights, // [num_pixels]
const astc_hdr_codec_base_options& coptions,
uint32_t ise_endpoint_range,
int first_submode, int last_submode,
const encode_astc_block_stats* pBlock_stats)
{
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);
assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS);
best_submode = 0;
vec3F block_mean_color_q16;
if (!pBlock_stats)
block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16);
else
{
assert(num_pixels == pBlock_stats->m_num_pixels);
block_mean_color_q16 = pBlock_stats->m_mean_q16;
}
vec3F block_axis_q16(0.577350259f);
aabb3F color_box_q16(cInitExpand);
float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;
for (uint32_t i = 0; i < num_pixels; i++)
{
color_box_q16.expand(pBlock_pixels_q16[i]);
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
float kd = k.dot(block_axis_q16);
l = basisu::minimum<float>(l, kd);
h = basisu::maximum<float>(h, kd);
}
vec3F low_color_q16(interp_color(block_mean_color_q16, block_axis_q16, l, color_box_q16, color_box_q16));
vec3F high_color_q16(interp_color(block_mean_color_q16, block_axis_q16, h, color_box_q16, color_box_q16));
low_color_q16.clamp(0.0f, MAX_QLOG16_VAL);
high_color_q16.clamp(0.0f, MAX_QLOG16_VAL);
vec3F diff(high_color_q16 - low_color_q16);
// The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0,
// i.e. it's equivalent to a vector of (1,1,1) multiplied by scale before the sub. We want to actually move along the grayscale axis, or (0.577350259, 0.577350259, 0.577350259).
float s_q16 = diff.dot(block_axis_q16) * block_axis_q16[0];
uint8_t trial_blk_endpoints[NUM_MODE7_ENDPOINTS];
uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
uint32_t trial_best_submode = 0;
clear_obj(trial_blk_endpoints);
clear_obj(trial_blk_weights);
double trial_blk_error = BIG_FLOAT_VAL;
bool did_improve = try_mode7(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,
high_color_q16, ceilf(s_q16),
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode);
// If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.
if (!did_improve)
{
return cur_block_error;
}
// Did the solution improve?
if (trial_blk_error < cur_block_error)
{
cur_block_error = trial_blk_error;
memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE7_ENDPOINTS);
memcpy(blk_weights, trial_blk_weights, num_pixels);
best_submode = trial_best_submode;
}
#if 1
{
//const float TL = 8830.0f;// (float)half_to_qlog16(float_to_half(0.00061f));
//const float TH = 41600.0f;// (float)half_to_qlog16(float_to_half(40.0f));
//float zl = minimum<float>(color_box_q16[0][0], color_box_q16[0][1], color_box_q16[0][2]);
//float zh = minimum<float>(color_box_q16[1][0], color_box_q16[1][1], color_box_q16[1][2]);
//if ((zl <= TL) && (zh >= TH))
{
// Try a simpler technique for artifact reduction
l = BIG_FLOAT_VAL;
h = -BIG_FLOAT_VAL;
vec3F alt_low_color_q16(0.0f), alt_high_color_q16(0.0f);
for (uint32_t i = 0; i < num_pixels; i++)
{
color_box_q16.expand(pBlock_pixels_q16[i]);
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
float kd = k.dot(block_axis_q16);
if (kd < l)
{
alt_low_color_q16 = pBlock_pixels_q16[i];
l = kd;
}
if (kd > h)
{
alt_high_color_q16 = pBlock_pixels_q16[i];
h = kd;
}
}
vec3F old_alt_low_color_q16(alt_low_color_q16);
for (uint32_t i = 0; i < 3; i++)
alt_low_color_q16[i] = lerp<float>(old_alt_low_color_q16[i], alt_high_color_q16[i], 1.0f / 64.0f);
vec3F alt_diff(alt_high_color_q16 - alt_low_color_q16);
// The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0,
// i.e. it's equivalent to a vector of (1,1,1) multiplied by scale before the sub. We want to actually move along the grayscale axis, or (0.577350259, 0.577350259, 0.577350259).
float alt_s_q16 = alt_diff.dot(block_axis_q16) * block_axis_q16[0];
try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
alt_high_color_q16, ceilf(alt_s_q16),
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode);
}
}
#endif
const float one_over_num_pixels = 1.0f / (float)num_pixels;
const uint32_t NUM_TRIALS = 2;
for (uint32_t trial = 0; trial < NUM_TRIALS; trial++)
{
// Given a set of selectors and S, try to compute a better high color
vec3F new_high_color_q16(block_mean_color_q16);
int e[2][3];
int cur_s = 0;
if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, &cur_s, ise_endpoint_range))
break;
cur_s <<= 4;
for (uint32_t i = 0; i < num_pixels; i++)
{
uint32_t astc_sel = trial_blk_weights[i];
float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f);
float k = (float)cur_s * (1.0f - lerp) * one_over_num_pixels;
new_high_color_q16[0] += k;
new_high_color_q16[1] += k;
new_high_color_q16[2] += k;
}
bool improved = try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
new_high_color_q16, (float)cur_s,
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode);
if (improved)
{
memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS);
memcpy(trial_blk_weights, blk_weights, num_pixels);
}
// Given a set of selectors and a high color, try to compute a better S.
float t = 0.0f;
for (uint32_t i = 0; i < num_pixels; i++)
{
uint32_t astc_sel = trial_blk_weights[i];
float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f);
t += (1.0f) - lerp;
}
t *= one_over_num_pixels;
//int e[2][3];
if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, nullptr, ise_endpoint_range))
break;
vec3F cur_h_q16((float)(e[1][0] << 4), (float)(e[1][1] << 4), (float)(e[1][2] << 4));
if (fabs(t) > .0000125f)
{
float s_r = (cur_h_q16[0] - block_mean_color_q16[0]) / t;
float s_g = (cur_h_q16[1] - block_mean_color_q16[1]) / t;
float s_b = (cur_h_q16[2] - block_mean_color_q16[2]) / t;
// TODO: gather statistics on these
if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
cur_h_q16, ceilf(s_r),
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))
{
improved = true;
}
if (coptions.m_mode7_full_s_optimization)
{
if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
cur_h_q16, ceilf(s_g),
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))
{
improved = true;
}
if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
cur_h_q16, ceilf(s_b),
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))
{
improved = true;
}
if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
cur_h_q16, ceilf((s_r + s_g + s_b) / 3.0f),
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))
{
improved = true;
}
// Added this - quite strong.
if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
cur_h_q16, minimum(maximum(s_r, s_g, s_b) * 1.1f, 65535.0f),
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))
{
improved = true;
}
} // if (coptions.m_mode7_full_s_optimization)
} // if (fabs(t) > .0000125f)
if (!improved)
break;
memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS);
memcpy(trial_blk_weights, blk_weights, num_pixels);
} // trial
return cur_block_error;
}
//--------------------------------------------------------------------------------------------------------------------------
void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights)
{
const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(from_ise_range).m_ISE_to_val;
for (uint32_t i = 0; i < n; i++)
pDst_raw_weights[i] = dequant_tab[pSrc_ise_vals[i]];
}
//--------------------------------------------------------------------------------------------------------------------------
// For each output (2x2) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_2x2[4][36] = {
{0.165438f, 0.132609f, 0.092681f, 0.028953f, 0.000000f, 0.000000f, 0.133716f, 0.111240f, 0.065133f, 0.022236f, 0.000000f, 0.000000f, 0.092623f, 0.063898f, 0.039120f, 0.000000f, 0.000000f, 0.000000f, 0.028168f, 0.024184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.027262f, 0.091051f, 0.132446f, 0.164791f, 0.000000f, 0.000000f, 0.026038f, 0.066511f, 0.111644f, 0.133197f, 0.000000f, 0.000000f, 0.000000f, 0.040053f, 0.064757f, 0.091196f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024265f, 0.026789f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028282f, 0.024804f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092871f, 0.066580f, 0.042024f, 0.000000f, 0.000000f, 0.000000f, 0.132115f, 0.107586f, 0.061943f, 0.025551f, 0.000000f, 0.000000f, 0.166111f, 0.132946f, 0.089043f, 0.030145f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024535f, 0.028835f, 0.000000f, 0.000000f, 0.000000f, 0.044465f, 0.063652f, 0.093251f, 0.000000f, 0.000000f, 0.025961f, 0.063339f, 0.107329f, 0.132240f, 0.000000f, 0.000000f, 0.029844f, 0.089249f, 0.132200f, 0.165099f},
};
// For each output (3x2) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_3x2[6][36] = {
{0.257933f, 0.144768f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.213754f, 0.109376f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.140969f, 0.064128f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041270f, 0.027803f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.046066f, 0.153691f, 0.153395f, 0.042845f, 0.000000f, 0.000000f, 0.038497f, 0.131674f, 0.126804f, 0.041513f, 0.000000f, 0.000000f, 0.028434f, 0.081152f, 0.075499f, 0.025372f, 0.000000f, 0.000000f, 0.000000f, 0.030067f, 0.024989f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.147088f, 0.258980f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105549f, 0.211746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066714f, 0.144015f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027755f, 0.038152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044268f, 0.030990f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.141642f, 0.069930f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.207393f, 0.105354f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.255911f, 0.144511f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026658f, 0.032535f, 0.000000f, 0.000000f, 0.000000f, 0.024618f, 0.079487f, 0.080415f, 0.026311f, 0.000000f, 0.000000f, 0.038382f, 0.133569f, 0.133162f, 0.033451f, 0.000000f, 0.000000f, 0.043697f, 0.152483f, 0.154345f, 0.040885f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026401f, 0.040228f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066688f, 0.142350f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.108504f, 0.210286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.149666f, 0.255876f},
};
// For each output (4x2) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_4x2[8][36] = {
{0.318857f, 0.081413f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.262816f, 0.064811f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.175211f, 0.046152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050740f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.163830f, 0.223661f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128904f, 0.194332f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080369f, 0.121162f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041941f, 0.045801f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.230801f, 0.166220f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193495f, 0.136548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113816f, 0.085890f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043771f, 0.029459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.087528f, 0.318213f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059739f, 0.262039f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046515f, 0.175973f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049993f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.054078f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173243f, 0.055145f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.254561f, 0.059695f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.319463f, 0.083816f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038171f, 0.037447f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.076263f, 0.117360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134218f, 0.202503f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.163759f, 0.230278f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044607f, 0.035170f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.114466f, 0.088407f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201026f, 0.127983f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.224148f, 0.164194f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052817f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043531f, 0.174390f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060164f, 0.262636f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.089340f, 0.317122f},
};
// For each output (5x2) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_5x2[10][36] = {
{0.393855f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.327491f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.216089f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062565f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.303101f, 0.078223f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.261199f, 0.068761f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.160056f, 0.054634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074026f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.202529f, 0.207447f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.151013f, 0.157673f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100074f, 0.095239f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043623f, 0.042402f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.083336f, 0.309647f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061432f, 0.269582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046328f, 0.166035f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063640f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397684f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.217856f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.058282f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065541f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215996f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321124f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397338f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069030f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.159434f, 0.051902f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.266327f, 0.065732f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.305627f, 0.081948f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038550f, 0.046259f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092606f, 0.100038f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.162523f, 0.163345f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199767f, 0.196912f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066709f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050841f, 0.169003f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061591f, 0.265094f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.081426f, 0.305335f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063517f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210896f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316133f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027674f, 0.381781f},
};
// For each output (6x2) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_6x2[12][36] = {
{0.395563f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.328397f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214936f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061104f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.395041f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.323513f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.393200f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.317339f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.218679f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.399071f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321356f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214689f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064883f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.399159f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326009f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212426f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062406f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398973f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.217446f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.057071f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065386f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215039f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321113f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398462f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072234f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.211515f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.319185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397066f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.053184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.213286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.332634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400895f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.207210f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.334096f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395193f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074315f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.216723f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320827f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388135f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063571f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215814f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.325843f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394772f},
};
// For each output (2x3) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_2x3[6][36] = {
{0.253933f, 0.211745f, 0.142964f, 0.043509f, 0.000000f, 0.000000f, 0.146094f, 0.108119f, 0.068727f, 0.024908f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.043336f, 0.140540f, 0.208745f, 0.253069f, 0.000000f, 0.000000f, 0.031333f, 0.069242f, 0.108596f, 0.145138f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044780f, 0.036916f, 0.026808f, 0.000000f, 0.000000f, 0.000000f, 0.151455f, 0.129189f, 0.076266f, 0.030885f, 0.000000f, 0.000000f, 0.151915f, 0.131628f, 0.081598f, 0.031903f, 0.000000f, 0.000000f, 0.043838f, 0.032645f, 0.030173f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028998f, 0.038454f, 0.046460f, 0.000000f, 0.000000f, 0.033717f, 0.076274f, 0.130140f, 0.153377f, 0.000000f, 0.000000f, 0.025762f, 0.077843f, 0.130195f, 0.150217f, 0.000000f, 0.000000f, 0.000000f, 0.029422f, 0.034493f, 0.044648f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.145243f, 0.107655f, 0.062280f, 0.033041f, 0.000000f, 0.000000f, 0.257369f, 0.210260f, 0.139667f, 0.044485f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037604f, 0.064104f, 0.105759f, 0.144848f, 0.000000f, 0.000000f, 0.042699f, 0.141511f, 0.207704f, 0.255772f},
};
// For each output (3x3) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_3x3[9][36] = {
{0.412913f, 0.237773f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.237370f, 0.111944f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.066531f, 0.251421f, 0.245639f, 0.065785f, 0.000000f, 0.000000f, 0.047059f, 0.143642f, 0.128760f, 0.051164f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.234587f, 0.419421f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.110765f, 0.235227f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067391f, 0.044131f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.248992f, 0.133218f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.247568f, 0.139987f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072238f, 0.046475f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.040674f, 0.048555f, 0.000000f, 0.000000f, 0.000000f, 0.049640f, 0.158199f, 0.158521f, 0.046044f, 0.000000f, 0.000000f, 0.043591f, 0.153956f, 0.155258f, 0.049378f, 0.000000f, 0.000000f, 0.000000f, 0.046674f, 0.049509f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049528f, 0.063611f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.137662f, 0.252612f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134924f, 0.246668f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.042655f, 0.072341f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.237403f, 0.114850f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.418506f, 0.229241f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049009f, 0.142093f, 0.136891f, 0.036294f, 0.000000f, 0.000000f, 0.074433f, 0.244437f, 0.251631f, 0.065212f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.121166f, 0.231108f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.236230f, 0.411495f},
};
// For each output (4x3) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_4x3[12][36] = {
{0.508292f, 0.132529f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.285382f, 0.073798f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.266624f, 0.378457f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.144380f, 0.210539f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.380292f, 0.270590f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200825f, 0.148293f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.130560f, 0.507542f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.071578f, 0.290320f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094051f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322294f, 0.082665f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316365f, 0.092271f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092353f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046081f, 0.061377f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.158151f, 0.235006f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.152896f, 0.232594f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052844f, 0.061053f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061619f, 0.046867f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.227763f, 0.158202f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.222620f, 0.155545f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073398f, 0.053986f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.084098f, 0.330283f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085224f, 0.323658f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094451f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.286413f, 0.077046f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.512915f, 0.123625f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.140389f, 0.213324f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.267125f, 0.379163f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208464f, 0.139969f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.382876f, 0.268691f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080416f, 0.285653f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.131803f, 0.502128f},
};
// For each output (5x3) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_5x3[15][36] = {
{0.618662f, 0.032137f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.349200f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.497060f, 0.129255f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.281642f, 0.092043f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.333166f, 0.338337f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164333f, 0.164165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.129409f, 0.504176f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085525f, 0.280890f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.636943f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.363057f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113467f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394204f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.386741f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105588f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086925f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.317750f, 0.095763f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321008f, 0.086368f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.057696f, 0.061462f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.184995f, 0.197656f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.186342f, 0.186715f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059712f, 0.065422f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.091939f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079906f, 0.328876f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085955f, 0.320229f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.093096f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.099585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398489f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113144f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360655f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.639345f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.285578f, 0.088663f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.495946f, 0.129812f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177513f, 0.166195f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.329950f, 0.326342f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082692f, 0.279744f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134353f, 0.503211f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.638822f},
};
// For each output (6x3) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_6x3[18][36] = {
{0.640623f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359377f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.638697f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361303f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.640672f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359328f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.637721f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.362279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.647342f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.352658f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.638418f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111041f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395972f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.387932f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105054f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.101949f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401263f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.101060f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098132f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388180f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402030f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111659f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.096173f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.386312f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.123650f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104357f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398062f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393265f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104316f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097666f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400772f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111166f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359466f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640534f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360569f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.639431f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.355750f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.644250f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.353865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.646135f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.642273f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359539f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640461f},
};
// For each output (2x4) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_2x4[8][36] = {
{0.312206f, 0.261492f, 0.177496f, 0.055798f, 0.000000f, 0.000000f, 0.081944f, 0.062361f, 0.048703f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.054679f, 0.172805f, 0.260561f, 0.314742f, 0.000000f, 0.000000f, 0.000000f, 0.049040f, 0.065652f, 0.082520f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164115f, 0.129589f, 0.083879f, 0.029309f, 0.000000f, 0.000000f, 0.231202f, 0.198851f, 0.118719f, 0.044334f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035855f, 0.083276f, 0.127764f, 0.166965f, 0.000000f, 0.000000f, 0.045347f, 0.116503f, 0.193645f, 0.230645f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.223790f, 0.194804f, 0.115855f, 0.047371f, 0.000000f, 0.000000f, 0.164616f, 0.125798f, 0.087268f, 0.040497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044738f, 0.118365f, 0.198854f, 0.230745f, 0.000000f, 0.000000f, 0.029646f, 0.078141f, 0.131405f, 0.168106f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080206f, 0.060505f, 0.041197f, 0.000000f, 0.000000f, 0.000000f, 0.320486f, 0.265233f, 0.174992f, 0.057380f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.051057f, 0.058139f, 0.082120f, 0.000000f, 0.000000f, 0.056168f, 0.174118f, 0.260525f, 0.317873f},
};
// For each output (3x4) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_3x4[12][36] = {
{0.503381f, 0.288537f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.130806f, 0.077275f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.088808f, 0.319226f, 0.312498f, 0.086797f, 0.000000f, 0.000000f, 0.000000f, 0.092065f, 0.079421f, 0.021185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.286250f, 0.514036f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072999f, 0.126714f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.261935f, 0.133191f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.376226f, 0.207118f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021529f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059585f, 0.153016f, 0.152552f, 0.043373f, 0.000000f, 0.000000f, 0.063990f, 0.231504f, 0.235283f, 0.060696f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.146403f, 0.262394f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208547f, 0.382656f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.374676f, 0.209306f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.270440f, 0.145577f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059636f, 0.233975f, 0.235944f, 0.069029f, 0.000000f, 0.000000f, 0.048950f, 0.150198f, 0.154340f, 0.047929f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200921f, 0.380881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.146928f, 0.271271f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128883f, 0.075468f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.509859f, 0.285791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.095842f, 0.086878f, 0.000000f, 0.000000f, 0.000000f, 0.092942f, 0.314169f, 0.319263f, 0.090906f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079652f, 0.124852f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.289868f, 0.505628f},
};
// For each output (4x4) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_4x4[16][36] = {
{0.665277f, 0.167914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166809f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.325854f, 0.449938f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094690f, 0.129518f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.455174f, 0.326025f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.109174f, 0.109627f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166733f, 0.664155f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.169112f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320619f, 0.090788f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.462066f, 0.126527f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.165890f, 0.235855f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.233931f, 0.364324f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.239319f, 0.151533f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.363629f, 0.245519f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106763f, 0.311932f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.119451f, 0.461853f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.451893f, 0.124086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326160f, 0.097861f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.239712f, 0.365585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164178f, 0.230525f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360274f, 0.237862f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.246139f, 0.155726f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.121863f, 0.457051f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097828f, 0.323258f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.163634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.667648f, 0.168718f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094870f, 0.132660f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316878f, 0.455591f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116917f, 0.098433f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.458816f, 0.325834f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.168403f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.172019f, 0.659578f},
};
// For each output (5x4) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_5x4[20][36] = {
{0.773702f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192588f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.633422f, 0.166577f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.170080f, 0.029921f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.388335f, 0.403694f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100996f, 0.106975f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.161122f, 0.655288f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.183590f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.801705f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198295f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400989f, 0.025097f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.573915f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309345f, 0.085396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.478694f, 0.126565f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194664f, 0.187267f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.292735f, 0.308960f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.016375f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098049f, 0.295983f, 0.000000f, 0.000000f, 0.017892f, 0.000000f, 0.111938f, 0.476138f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043545f, 0.386448f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.570007f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.566407f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402307f, 0.031286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.463145f, 0.120696f, 0.000000f, 0.019497f, 0.000000f, 0.000000f, 0.311721f, 0.084942f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.296730f, 0.300781f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204639f, 0.197849f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122117f, 0.469302f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.102545f, 0.306036f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.562064f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041534f, 0.396403f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190134f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.773971f, 0.035896f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.169927f, 0.035812f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.630284f, 0.163977f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.112667f, 0.106813f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393502f, 0.387018f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177024f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.170482f, 0.652494f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192274f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033039f, 0.774687f},
};
// For each output (6x4) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_6x4[24][36] = {
{0.804254f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.804177f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195823f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.799585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200415f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.803604f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.807256f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192744f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.805135f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.410532f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.589468f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408690f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.591310f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.416225f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.583775f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.414279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.585721f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.406723f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.593277f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.597490f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.584784f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.415216f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590427f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409573f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590073f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409927f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.580348f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.419652f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.588321f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411679f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.587022f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.412978f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193281f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.806719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.189163f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.810837f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195108f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.804892f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.188290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.811710f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.807086f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195292f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.804708f},
};
// For each output (2x5) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_2x5[10][36] = {
{0.387593f, 0.325123f, 0.221104f, 0.066180f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.065940f, 0.214659f, 0.326737f, 0.392664f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309603f, 0.265953f, 0.168780f, 0.060600f, 0.000000f, 0.000000f, 0.084707f, 0.063017f, 0.047341f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062836f, 0.170767f, 0.261053f, 0.307978f, 0.000000f, 0.000000f, 0.000000f, 0.049286f, 0.064361f, 0.083719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195787f, 0.153943f, 0.095706f, 0.042417f, 0.000000f, 0.000000f, 0.190695f, 0.154435f, 0.097288f, 0.040258f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029471f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017536f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.039307f, 0.094677f, 0.158696f, 0.199136f, 0.000000f, 0.000000f, 0.040959f, 0.093353f, 0.155294f, 0.201042f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079432f, 0.065739f, 0.044876f, 0.000000f, 0.000000f, 0.000000f, 0.309205f, 0.264700f, 0.167247f, 0.068801f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052112f, 0.064829f, 0.081363f, 0.000000f, 0.000000f, 0.064024f, 0.161136f, 0.263743f, 0.312793f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393277f, 0.324792f, 0.213188f, 0.068743f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066964f, 0.215440f, 0.323005f, 0.394591f},
};
// For each output (3x5) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_3x5[15][36] = {
{0.620557f, 0.350797f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028646f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.110170f, 0.397489f, 0.386326f, 0.106015f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357348f, 0.642652f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.503934f, 0.275289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128280f, 0.092497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.102294f, 0.316223f, 0.313576f, 0.092518f, 0.000000f, 0.000000f, 0.000000f, 0.081158f, 0.094231f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.279079f, 0.502163f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086083f, 0.132675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.325483f, 0.157739f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322567f, 0.172225f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021986f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063342f, 0.192228f, 0.186950f, 0.057021f, 0.000000f, 0.000000f, 0.054779f, 0.186114f, 0.185666f, 0.073901f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.172195f, 0.331802f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.148212f, 0.322038f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025751f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.123726f, 0.081188f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.507339f, 0.287746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.093924f, 0.094021f, 0.000000f, 0.000000f, 0.000000f, 0.097070f, 0.315697f, 0.314560f, 0.084728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082560f, 0.129771f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.277014f, 0.486817f, 0.023837f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.644191f, 0.355809f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.107771f, 0.387615f, 0.393454f, 0.111159f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360886f, 0.639114f},
};
// For each output (4x5) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_4x5[20][36] = {
{0.778254f, 0.190730f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031016f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.401147f, 0.570243f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028610f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.563768f, 0.394241f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041992f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196238f, 0.767548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036214f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.637514f, 0.166734f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.167634f, 0.028118f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322778f, 0.473312f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085399f, 0.118511f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471429f, 0.308185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.118025f, 0.102361f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.176592f, 0.643933f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.179475f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.391609f, 0.100882f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390531f, 0.116978f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017259f, 0.000000f, 0.201618f, 0.301555f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197600f, 0.281968f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.016735f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.293309f, 0.192842f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.268674f, 0.208109f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020330f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.118514f, 0.380746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097621f, 0.381305f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021814f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.157977f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.657533f, 0.184490f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097522f, 0.128585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309864f, 0.464029f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128900f, 0.090864f, 0.000000f, 0.025393f, 0.000000f, 0.000000f, 0.464029f, 0.290814f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024593f, 0.172268f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173412f, 0.629727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.778816f, 0.191602f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036297f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394454f, 0.569249f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.039685f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.561207f, 0.399108f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034683f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193744f, 0.771574f},
};
// For each output (5x5) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_5x5[25][36] = {
{1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.794727f, 0.205273f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.465125f, 0.484079f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028881f, 0.000000f, 0.000000f, 0.021914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.192446f, 0.772941f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034613f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033123f, 0.930510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036367f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800234f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199766f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.629079f, 0.165939f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166390f, 0.019675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018918f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.378734f, 0.373861f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111597f, 0.135808f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177492f, 0.641195f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.181313f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028722f, 0.761781f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.209497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.475763f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471882f, 0.029551f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022804f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.382714f, 0.116167f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.383377f, 0.117742f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.254151f, 0.249987f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.241972f, 0.253891f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.017950f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122722f, 0.376847f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.095099f, 0.369986f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017396f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029442f, 0.472507f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471751f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026300f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190299f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.776924f, 0.032778f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.171498f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.666385f, 0.162117f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.125713f, 0.117624f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.387084f, 0.369579f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028493f, 0.169318f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173770f, 0.628419f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198951f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035634f, 0.765415f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.963102f, 0.036898f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030322f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.771054f, 0.198624f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021816f, 0.020944f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.481761f, 0.475479f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032816f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198418f, 0.768766f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033338f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966662f},
};
// For each output (6x5) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_6x5[30][36] = {
{0.966284f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033716f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.966290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033710f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966125f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033875f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966273f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800857f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199143f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.773463f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025372f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.805735f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194265f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.788791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.211209f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.785975f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214025f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.787286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212714f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490845f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.487242f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021913f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490663f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486878f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.505452f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.494548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.495383f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.482180f, 0.000000f, 0.022437f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.022727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.496545f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.480728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486261f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486387f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027352f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196272f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.803728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210059f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.789941f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212947f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.787053f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215261f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.784739f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.209116f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.790884f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.205881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.794119f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033710f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966281f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033712f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966288f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033712f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966288f},
};
// For each output (2x6) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_2x6[12][36] = {
{0.388815f, 0.325435f, 0.220189f, 0.065562f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.064515f, 0.214042f, 0.327700f, 0.393742f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398821f, 0.326200f, 0.217851f, 0.057128f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062546f, 0.216408f, 0.322269f, 0.398777f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.396575f, 0.330631f, 0.212857f, 0.059936f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070253f, 0.215326f, 0.317576f, 0.396845f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398130f, 0.324745f, 0.213572f, 0.063553f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062009f, 0.216253f, 0.324683f, 0.397055f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397646f, 0.321346f, 0.212334f, 0.068675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067073f, 0.210768f, 0.318165f, 0.403993f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395756f, 0.325048f, 0.211862f, 0.067334f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065475f, 0.214113f, 0.324009f, 0.396403f},
};
// For each output (3x6) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_3x6[18][36] = {
{0.640136f, 0.359864f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.108112f, 0.399968f, 0.388087f, 0.103833f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.356122f, 0.643878f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.646308f, 0.353692f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122937f, 0.390166f, 0.380558f, 0.106339f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.355015f, 0.644985f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.642874f, 0.357126f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111570f, 0.398638f, 0.387639f, 0.102153f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359134f, 0.640866f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640159f, 0.359841f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098908f, 0.393303f, 0.400421f, 0.107369f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357119f, 0.642881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640541f, 0.359459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116318f, 0.397635f, 0.395084f, 0.090964f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361948f, 0.638052f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.645448f, 0.354552f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106981f, 0.389214f, 0.395056f, 0.108749f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359592f, 0.640408f},
};
// For each output (4x6) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_4x6[24][36] = {
{0.806928f, 0.193072f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.412216f, 0.587784f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.590075f, 0.409925f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200682f, 0.799318f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.809822f, 0.190178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.423474f, 0.576526f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.580816f, 0.419184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190240f, 0.809760f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800320f, 0.199680f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408625f, 0.591375f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.583392f, 0.416608f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200372f, 0.799628f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.798914f, 0.201086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411243f, 0.588757f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.586520f, 0.413480f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.203588f, 0.796412f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.802040f, 0.197960f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411175f, 0.588825f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.599873f, 0.400127f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193060f, 0.806940f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.806073f, 0.193927f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408705f, 0.591295f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.585711f, 0.414289f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197672f, 0.802328f},
};
// For each output (5x6) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_5x6[30][36] = {
{0.966289f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.794848f, 0.205152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.473272f, 0.496525f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030202f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.196955f, 0.803045f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033711f, 0.966289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966284f, 0.033716f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.795787f, 0.204213f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.500928f, 0.499072f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198603f, 0.801397f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033716f, 0.966284f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966283f, 0.033717f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.788424f, 0.211576f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029276f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.484227f, 0.486497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201499f, 0.798501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033724f, 0.966276f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966283f, 0.033717f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.791336f, 0.208664f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490188f, 0.509812f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204835f, 0.795165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033703f, 0.966297f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966276f, 0.033724f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.799276f, 0.200724f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.022501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.494443f, 0.483055f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.205967f, 0.794033f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033726f, 0.966274f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.965971f, 0.034029f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.798640f, 0.201360f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.502577f, 0.497423f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.203927f, 0.796073f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033706f, 0.966294f},
};
// For each output (6x6) sample, the weight of each input (6x6) sample.
static const float g_weight_downsample_6x6_to_6x6[36][36] = {
{1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f},
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f},
};
//--------------------------------------------------------------------------------------------------------------------------
const struct downsample_matrix_6x6
{
uint32_t m_grid_width, m_grid_height;
const float* m_p;
} g_downsample_matrices_6x6[] = {
{ 2, 2, (const float*)g_weight_downsample_6x6_to_2x2 },
{ 3, 2, (const float*)g_weight_downsample_6x6_to_3x2 },
{ 4, 2, (const float*)g_weight_downsample_6x6_to_4x2 },
{ 5, 2, (const float*)g_weight_downsample_6x6_to_5x2 },
{ 6, 2, (const float*)g_weight_downsample_6x6_to_6x2 },
{ 2, 3, (const float*)g_weight_downsample_6x6_to_2x3 },
{ 3, 3, (const float*)g_weight_downsample_6x6_to_3x3 },
{ 4, 3, (const float*)g_weight_downsample_6x6_to_4x3 },
{ 5, 3, (const float*)g_weight_downsample_6x6_to_5x3 },
{ 6, 3, (const float*)g_weight_downsample_6x6_to_6x3 },
{ 2, 4, (const float*)g_weight_downsample_6x6_to_2x4 },
{ 3, 4, (const float*)g_weight_downsample_6x6_to_3x4 },
{ 4, 4, (const float*)g_weight_downsample_6x6_to_4x4 },
{ 5, 4, (const float*)g_weight_downsample_6x6_to_5x4 },
{ 6, 4, (const float*)g_weight_downsample_6x6_to_6x4 },
{ 2, 5, (const float*)g_weight_downsample_6x6_to_2x5 },
{ 3, 5, (const float*)g_weight_downsample_6x6_to_3x5 },
{ 4, 5, (const float*)g_weight_downsample_6x6_to_4x5 },
{ 5, 5, (const float*)g_weight_downsample_6x6_to_5x5 },
{ 6, 5, (const float*)g_weight_downsample_6x6_to_6x5 },
{ 2, 6, (const float*)g_weight_downsample_6x6_to_2x6 },
{ 3, 6, (const float*)g_weight_downsample_6x6_to_3x6 },
{ 4, 6, (const float*)g_weight_downsample_6x6_to_4x6 },
{ 5, 6, (const float*)g_weight_downsample_6x6_to_5x6 },
{ 6, 6, (const float*)g_weight_downsample_6x6_to_6x6 }
};
//const uint32_t NUM_DOWNSAMPLE_MATRICES_6x6 = sizeof(g_downsample_matrices_6x6) / sizeof(g_downsample_matrices_6x6[0]);
//--------------------------------------------------------------------------------------------------------------------------
const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height)
{
// TODO: Use hash or map lookup.
for (const auto& m : g_downsample_matrices_6x6)
if ((m.m_grid_width == grid_width) && (m.m_grid_height == grid_height))
return m.m_p;
assert(0);
return nullptr;
}
void downsample_weight_grid(
const float* pMatrix_weights,
uint32_t bx, uint32_t by, // source/from dimension (block size)
uint32_t wx, uint32_t wy, // dest/to dimension (grid size)
const uint8_t* pSrc_weights, // these are dequantized weights, NOT ISE symbols, [by][bx]
uint8_t* pDst_weights) // [wy][wx]
{
const uint32_t total_block_samples = bx * by;
for (uint32_t y = 0; y < wy; y++)
{
for (uint32_t x = 0; x < wx; x++)
{
float total = 0.5f;
for (uint32_t i = 0; i < total_block_samples; i++)
if (pMatrix_weights[i])
total += pMatrix_weights[i] * (float)pSrc_weights[i];
pDst_weights[x + y * wx] = (uint8_t)clamp((int)total, 0, 64);
pMatrix_weights += total_block_samples;
}
}
}
//--------------------------------------------------------------------------------------------------------------------------
void downsample_ise_weights(
uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range,
uint32_t block_w, uint32_t block_h,
uint32_t grid_w, uint32_t grid_h,
const uint8_t* pSrc_weights, uint8_t* pDst_weights)
{
assert((block_w <= MAX_ASTC_HDR_BLOCK_W) && (block_h <= MAX_ASTC_HDR_BLOCK_H));
assert((grid_w >= 2) && (grid_w <= MAX_ASTC_HDR_BLOCK_W));
assert((grid_h >= 2) && (grid_h <= MAX_ASTC_HDR_BLOCK_H));
assert(dequant_weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE);
assert(dequant_weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE);
assert(quant_weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE);
assert(quant_weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE);
if ((block_w == grid_w) && (block_h == grid_h))
{
if (dequant_weight_ise_range != quant_weight_ise_range)
{
basist::astc_6x6_hdr::requantize_astc_weights(block_w * block_h, pSrc_weights, dequant_weight_ise_range, pDst_weights, quant_weight_ise_range);
}
else
{
if (pDst_weights != pSrc_weights)
memcpy(pDst_weights, pSrc_weights, block_w * block_h);
}
return;
}
uint8_t desired_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(dequant_weight_ise_range).m_ISE_to_val;
for (uint32_t by = 0; by < block_h; by++)
for (uint32_t bx = 0; bx < block_w; bx++)
desired_weights[bx + by * block_w] = dequant_tab[pSrc_weights[bx + by * block_w]];
uint8_t downsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
const float* pDownsample_matrix = get_6x6_downsample_matrix(grid_w, grid_h);
assert(pDownsample_matrix);
downsample_weight_grid(
pDownsample_matrix,
block_w, block_h, // source/from dimension (block size)
grid_w, grid_h, // dest/to dimension (grid size)
desired_weights, // these are dequantized weights, NOT ISE symbols, [by][bx]
downsampled_weights); // [wy][wx]
const auto& weight_quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(quant_weight_ise_range).m_val_to_ise;
for (uint32_t gy = 0; gy < grid_h; gy++)
for (uint32_t gx = 0; gx < grid_w; gx++)
pDst_weights[gx + gy * grid_w] = weight_quant_tab[downsampled_weights[gx + gy * grid_w]];
}
void downsample_ise_weights_dual_plane(
uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range,
uint32_t block_w, uint32_t block_h,
uint32_t grid_w, uint32_t grid_h,
const uint8_t* pSrc_weights0, const uint8_t* pSrc_weights1,
uint8_t* pDst_weights)
{
uint8_t downsampled_weights0[MAX_ASTC_HDR_BLOCK_W * MAX_ASTC_HDR_BLOCK_H], downsampled_weights1[MAX_ASTC_HDR_BLOCK_W * MAX_ASTC_HDR_BLOCK_H];
downsample_ise_weights(
dequant_weight_ise_range, quant_weight_ise_range,
block_w, block_h,
grid_w, grid_h,
pSrc_weights0, downsampled_weights0);
downsample_ise_weights(
dequant_weight_ise_range, quant_weight_ise_range,
block_w, block_h,
grid_w, grid_h,
pSrc_weights1, downsampled_weights1);
const uint32_t num_grid_samples = grid_w * grid_h;
for (uint32_t i = 0; i < num_grid_samples; i++)
{
pDst_weights[i * 2 + 0] = downsampled_weights0[i];
pDst_weights[i * 2 + 1] = downsampled_weights1[i];
}
}
static bool refine_endpoints_mode11(
uint32_t endpoint_ise_range,
uint8_t* pEndpoint_vals, // the endpoints to optimize
uint32_t block_w, uint32_t block_h, // block dimensions
uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets
astc_hdr_codec_base_options& coptions,
bool direct_only, int first_submode, int last_submode,
opt_mode_t opt_mode)
{
if (opt_mode == cNoOpt)
return false;
const uint32_t num_block_pixels = block_w * block_h;
uint8_t def_pixel_block_ofs[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
if (!pPixel_block_ofs)
{
for (uint32_t i = 0; i < num_block_pixels; i++)
def_pixel_block_ofs[i] = (uint8_t)i;
pPixel_block_ofs = def_pixel_block_ofs;
}
const uint32_t num_weights = grid_w * grid_h;
uint8_t dequantized_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
for (uint32_t i = 0; i < num_weights; i++)
dequantized_raw_weights[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val[pWeights[i]];
uint8_t upsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE
astc_helpers::upsample_weight_grid(block_w, block_h, grid_w, grid_h, dequantized_raw_weights, upsampled_weights);
aabb3F color_box_q16(cInitExpand);
uint8_t trial_blk_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE
float trial_blk_raw_weightsf[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
for (uint32_t i = 0; i < num_pixels; i++)
{
color_box_q16.expand(pBlock_pixels_q16[i]);
assert(pPixel_block_ofs[i] < num_block_pixels);
trial_blk_raw_weights[i] = upsampled_weights[pPixel_block_ofs[i]];
trial_blk_raw_weightsf[i] = (float)trial_blk_raw_weights[i] * (1.0f / 64.0f);
}
vec3F l_q16, h_q16;
if (opt_mode == cOrdinaryLeastSquares)
{
if (!compute_least_squares_endpoints_rgb_raw_weights(num_pixels, trial_blk_raw_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16))
return false;
}
else if ((opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy))
{
vec3F block_mean_color_q16(calc_mean(num_pixels, pBlock_pixels_q16));
vec3F block_axis_q16(calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16));
float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;
for (uint32_t i = 0; i < num_pixels; i++)
{
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
float kd = k.dot(block_axis_q16);
if (kd < l)
l = kd;
if (kd > h)
h = kd;
}
float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
if (h == l)
{
for (uint32_t i = 0; i < num_pixels; i++)
emphasis_weights[i] = 1.0f;
}
else
{
float mid = (0.0f - l) / (h - l);
mid = clamp(mid, .01f, .99f);
float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT;
if (opt_mode == cWeightedLeastSquaresHeavy)
lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY;
for (uint32_t i = 0; i < num_pixels; i++)
{
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
float kd = k.dot(block_axis_q16);
assert((kd >= l) && (kd <= h));
float v = (kd - l) / (h - l);
if (v < mid)
v = lerp(lw, mw, v / mid);
else
v = lerp(mw, hw, (v - mid) * (1.0f - mid));
emphasis_weights[i] = v;
}
}
if (!compute_weighted_least_squares_endpoints_rgb(num_pixels, nullptr, nullptr, trial_blk_raw_weightsf, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16))
return false;
}
else
{
assert(opt_mode == cWeightedAverage);
l_q16.set(0.0f);
float total_low = 0.0f;
h_q16.set(0.0f);
float total_high = 0.0f;
for (uint32_t i = 0; i < num_pixels; i++)
{
vec3F p(pBlock_pixels_q16[i]);
float lerp = (float)trial_blk_raw_weights[i] * (1.0f / 64.0f);
l_q16 += p * (1.0f - lerp);
total_low += (1.0f - lerp);
h_q16 += p * lerp;
total_high += lerp;
}
if (total_low != 0.0f)
l_q16 *= (1.0f / total_low);
else
return false;
if (total_high != 0.0f)
h_q16 *= (1.0f / total_high);
else
return false;
}
uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];
uint32_t submode_used;
bool pack_succeeded = pack_mode11(l_q16, h_q16, endpoint_ise_range, trial_endpoints, coptions, direct_only, first_submode, last_submode, false, submode_used);
if (!pack_succeeded)
return false;
int cur_e[2][3];
if (!decode_mode11_to_qlog12(pEndpoint_vals, cur_e, endpoint_ise_range))
return false;
int trial_e[2][3];
if (!decode_mode11_to_qlog12(trial_endpoints, trial_e, endpoint_ise_range))
return false;
for (uint32_t i = 0; i < 3; i++)
{
cur_e[0][i] <<= 4;
cur_e[1][i] <<= 4;
trial_e[0][i] <<= 4;
trial_e[1][i] <<= 4;
}
const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale;
double cur_error = 0, trial_error = 0;
for (uint32_t p = 0; p < num_pixels; p++)
{
const half_float* pDesired_half = &pBlock_pixels_half[p][0];
const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);
const uint32_t c = trial_blk_raw_weights[p];
assert(c <= 64);
{
half_float rf, gf, bf;
{
uint32_t r0 = cur_e[0][0], r1 = cur_e[1][0];
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
rf = astc_helpers::qlog16_to_half(ri);
}
{
uint32_t g0 = cur_e[0][1], g1 = cur_e[1][1];
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
gf = astc_helpers::qlog16_to_half(gi);
}
{
uint32_t b0 = cur_e[0][2], b1 = cur_e[1][2];
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
bf = astc_helpers::qlog16_to_half(bi);
}
const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);
const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;
cur_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
}
{
half_float rf, gf, bf;
{
uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0];
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
rf = astc_helpers::qlog16_to_half(ri);
}
{
uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1];
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
gf = astc_helpers::qlog16_to_half(gi);
}
{
uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2];
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
bf = astc_helpers::qlog16_to_half(bi);
}
const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);
const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;
trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
}
} // p
if (trial_error < cur_error)
{
memcpy(pEndpoint_vals, trial_endpoints, NUM_MODE11_ENDPOINTS);
return true;
}
return false;
}
static bool refine_endpoints_mode7(
uint32_t endpoint_ise_range,
uint8_t* pEndpoint_vals, // the endpoints to optimize
uint32_t block_w, uint32_t block_h, // block dimensions
uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets
astc_hdr_codec_base_options& coptions,
int first_submode, int last_submode)
{
const uint32_t num_block_pixels = block_w * block_h;
uint8_t def_pixel_block_ofs[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
if (!pPixel_block_ofs)
{
for (uint32_t i = 0; i < num_block_pixels; i++)
def_pixel_block_ofs[i] = (uint8_t)i;
pPixel_block_ofs = def_pixel_block_ofs;
}
const uint32_t num_weights = grid_w * grid_h;
uint8_t dequantized_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
for (uint32_t i = 0; i < num_weights; i++)
dequantized_raw_weights[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val[pWeights[i]];
uint8_t upsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE
astc_helpers::upsample_weight_grid(block_w, block_h, grid_w, grid_h, dequantized_raw_weights, upsampled_weights);
uint8_t trial_blk_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE
for (uint32_t i = 0; i < num_pixels; i++)
{
assert(pPixel_block_ofs[i] < num_block_pixels);
trial_blk_raw_weights[i] = upsampled_weights[pPixel_block_ofs[i]];
}
//--
int cur_e[2][3];
int cur_s = 0;
if (!decode_mode7_to_qlog12(pEndpoint_vals, cur_e, &cur_s, endpoint_ise_range))
return false;
cur_s <<= 4;
vec3F block_mean_color_q16(calc_mean(num_pixels, pBlock_pixels_q16));
vec3F new_high_color_q16(block_mean_color_q16);
const float one_over_num_pixels = 1.0f / (float)num_pixels;
for (uint32_t i = 0; i < num_pixels; i++)
{
float lerp = trial_blk_raw_weights[i] * (1.0f / 64.0f);
float k = (float)cur_s * (1.0f - lerp) * one_over_num_pixels;
new_high_color_q16[0] += k;
new_high_color_q16[1] += k;
new_high_color_q16[2] += k;
}
// Given a set of selectors and a high color, try to compute a better S.
float t = 0.0f;
for (uint32_t i = 0; i < num_pixels; i++)
{
float lerp = trial_blk_raw_weights[i] * (1.0f / 64.0f);
t += (1.0f) - lerp;
}
t *= one_over_num_pixels;
if (fabs(t) < .0000125f)
return false;
uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS];
uint32_t submode_used;
if (!pack_mode7(new_high_color_q16, (float)cur_s, endpoint_ise_range, trial_endpoints, weight_ise_range, coptions, first_submode, last_submode, false, submode_used))
return false;
int trial_e[2][3];
if (!decode_mode7_to_qlog12(trial_endpoints, trial_e, nullptr, endpoint_ise_range))
return false;
vec3F cur_h_q16((float)(trial_e[1][0] << 4), (float)(trial_e[1][1] << 4), (float)(trial_e[1][2] << 4));
float s_r = (cur_h_q16[0] - block_mean_color_q16[0]) / t;
//float s_g = (cur_h_q16[1] - block_mean_color_q16[1]) / t;
//float s_b = (cur_h_q16[2] - block_mean_color_q16[2]) / t;
float new_s_q16 = ceilf(s_r);
if (!pack_mode7(new_high_color_q16, new_s_q16, endpoint_ise_range, trial_endpoints, weight_ise_range, coptions, first_submode, last_submode, false, submode_used))
return false;
if (!decode_mode7_to_qlog12(trial_endpoints, trial_e, nullptr, endpoint_ise_range))
return false;
// --
for (uint32_t i = 0; i < 3; i++)
{
cur_e[0][i] <<= 4;
cur_e[1][i] <<= 4;
trial_e[0][i] <<= 4;
trial_e[1][i] <<= 4;
}
const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale;
double cur_error = 0, trial_error = 0;
for (uint32_t p = 0; p < num_pixels; p++)
{
const half_float* pDesired_half = &pBlock_pixels_half[p][0];
const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);
const uint32_t c = trial_blk_raw_weights[p];
assert(c <= 64);
{
half_float rf, gf, bf;
{
uint32_t r0 = cur_e[0][0], r1 = cur_e[1][0];
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
rf = astc_helpers::qlog16_to_half(ri);
}
{
uint32_t g0 = cur_e[0][1], g1 = cur_e[1][1];
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
gf = astc_helpers::qlog16_to_half(gi);
}
{
uint32_t b0 = cur_e[0][2], b1 = cur_e[1][2];
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
bf = astc_helpers::qlog16_to_half(bi);
}
const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);
const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;
cur_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
}
{
half_float rf, gf, bf;
{
uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0];
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
rf = astc_helpers::qlog16_to_half(ri);
}
{
uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1];
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
gf = astc_helpers::qlog16_to_half(gi);
}
{
uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2];
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
bf = astc_helpers::qlog16_to_half(bi);
}
const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);
const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;
trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
}
} // p
if (trial_error < cur_error)
{
memcpy(pEndpoint_vals, trial_endpoints, NUM_MODE7_ENDPOINTS);
return true;
}
return false;
}
bool refine_endpoints(
uint32_t cem,
uint32_t endpoint_ise_range,
uint8_t* pEndpoint_vals, // the endpoints to optimize
uint32_t block_w, uint32_t block_h, // block dimensions
uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets
astc_hdr_codec_base_options& coptions, opt_mode_t opt_mode)
{
if (cem == 7)
{
return refine_endpoints_mode7(
endpoint_ise_range,
pEndpoint_vals,
block_w, block_h,
grid_w, grid_h, pWeights, weight_ise_range,
num_pixels, pBlock_pixels_half, pBlock_pixels_q16,
pPixel_block_ofs,
coptions,
FIRST_MODE7_SUBMODE_INDEX, MAX_MODE7_SUBMODE_INDEX);
}
else if (cem == 11)
{
return refine_endpoints_mode11(
endpoint_ise_range,
pEndpoint_vals,
block_w, block_h,
grid_w, grid_h, pWeights, weight_ise_range,
num_pixels, pBlock_pixels_half, pBlock_pixels_q16,
pPixel_block_ofs,
coptions,
false, FIRST_MODE11_SUBMODE_INDEX, MAX_MODE11_SUBMODE_INDEX, opt_mode);
}
return false;
}
} // namespace basisu