experimental/lowp-basic/bilerp-study.cpp - skia - Git at Google

 /*
  * Copyright 2021 Google LLC
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include <algorithm>
 #include <cmath>
 #include <cstdio>
 #include <cstdint>

 #include "experimental/lowp-basic/QMath.h"

 struct Stats {
     int64_t diff_8_bits = 0;
     int64_t max_diff = 0;
     int64_t min_diff = 0;
     int64_t total = 0;

     void log(int16_t golden, int16_t candidate) {
         int64_t diff = candidate - golden;
         max_diff = std::max(max_diff, diff);
         min_diff = std::min(min_diff, diff);
         diff_8_bits += candidate != golden;
         total++;
     }

     void print() const {
         printf("8-bit diff: %lld - %g%%\n", diff_8_bits, 100.0 * diff_8_bits / total);
         printf("differences min: %lld max: %lld\n", min_diff, max_diff);
         printf("total: %lld\n", total);
     }
 };

 // This has all kinds of rounding issues.
 // TODO(herb): figure out rounding problems with this code.
 static float golden_bilerp(float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
     return (1.0f-tx) * (1.0f-ty) * p00
          + (1.0f-tx) * ty * p01
          + (1.0f-ty) * tx * p10
          + tx * ty * p11;
 }

 static double golden_bilerp2(
         float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
     // Double is needed to avoid rounding of lower bits.
     double dtx(tx), dty(ty);

     double top = (1.0 - dtx) * p00 + dtx * p10;
     double bottom = (1.0 - dtx) * p01 + dtx * p11;

     return (1.0 - dty) * top + dty * bottom;
 }

 static int16_t full_res_bilerp(
         float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
     int32_t ftx(floor(tx * 65536.0f + 0.5f));
     int64_t top = ftx * (p10 - p00) + 65536 * p00;
     int64_t bottom = ftx * (p11 - p01) + 65536 * p01;

     int64_t fty(floor(ty * 65536.0f + 0.5f));
     int64_t temp = fty * (bottom - top) + top * 65536LL;
     int64_t rounded = temp + (1LL << 31);
     return rounded >> 32;
 }


 static int16_t bilerp_1(float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
     const int logPixelScale = 7;
     const int16_t half = 1 << logPixelScale;
     I16 qtx = floor(tx * 65536.0f - 32768.0f + 0.5f);
     I16 qw = (p10 - p00) << logPixelScale;
     U16 qm = (p10 + p00) << logPixelScale;
     I16 top = (I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1);

     qw = (p11 - p01) << logPixelScale;
     qm = (p11 + p01) << logPixelScale;
     I16 bottom =
             (I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1);

     I16 qty = floor(ty * 65536.0f - 32768.0f + 0.5f);

     qw = bottom - top;
     qm = (U16)bottom + (U16)top;
     U16 scaledAnswer = constrained_add(simulate_ssse3_mm_mulhrs_epi16(qty, qw), qm);

     return (scaledAnswer[0] + half) >> (logPixelScale + 1);
 }

 template <typename Bilerp>
 static Stats check_bilerp(Bilerp bilerp) {
     Stats stats;
     const int step = 1;
     auto interesting = {0, 1, 2, 3, 4, 5, 6, 7, 8, 60, 61, 62, 63, 64, 65, 66, 67, 68, 124, 125,
                         126, 127, 128, 129, 130, 131, 132, 188, 189, 190, 191, 192, 193, 194,
                         195, 196, 248, 249, 250, 251, 252, 253, 254, 255};
     for (float tx : {0.0f, 0.25f, 0.5f, 0.75f, 1.0f - 1.0f/65536.0f})
     for (float ty : {0.0f, 0.25f, 0.5f, 0.75f, 1.0f - 1.0f/65536.0f})
     for (int p00 : interesting)
     for (int p01 : interesting)
     for (int p10 : interesting)
     for (int p11 : interesting) {
         // Having this be double causes the proper rounding.
         double l = golden_bilerp2(tx, ty, p00, p10, p01, p11);
         int16_t golden = floor(l + 0.5);
         //l = golden_bilerp(tx, ty, p00, p10, p01, p11);
         //int16_t golden2 = floor(l + 0.5f);
         int16_t candidate = bilerp(tx, ty, p00, p10, p01, p11);
         stats.log(golden, candidate);
     }
     return stats;
 }

 int main() {
     Stats stats;

     printf("\nUsing trunc_bilerp...\n");
     stats = check_bilerp(bilerp_1);
     stats.print();

     printf("\nUsing full_res_bilerp...\n");
     stats = check_bilerp(full_res_bilerp);
     stats.print();

     printf("Done.\n");
     return 0;
 }
	/*
	* Copyright 2021 Google LLC
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include <algorithm>
	#include <cmath>
	#include <cstdio>
	#include <cstdint>

	#include "experimental/lowp-basic/QMath.h"

	struct Stats {
	int64_t diff_8_bits = 0;
	int64_t max_diff = 0;
	int64_t min_diff = 0;
	int64_t total = 0;

	void log(int16_t golden, int16_t candidate) {
	int64_t diff = candidate - golden;
	max_diff = std::max(max_diff, diff);
	min_diff = std::min(min_diff, diff);
	diff_8_bits += candidate != golden;
	total++;
	}

	void print() const {
	printf("8-bit diff: %lld - %g%%\n", diff_8_bits, 100.0 * diff_8_bits / total);
	printf("differences min: %lld max: %lld\n", min_diff, max_diff);
	printf("total: %lld\n", total);
	}
	};

	// This has all kinds of rounding issues.
	// TODO(herb): figure out rounding problems with this code.
	static float golden_bilerp(float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
	return (1.0f-tx) * (1.0f-ty) * p00
	+ (1.0f-tx) * ty * p01
	+ (1.0f-ty) * tx * p10
	+ tx * ty * p11;
	}

	static double golden_bilerp2(
	float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
	// Double is needed to avoid rounding of lower bits.
	double dtx(tx), dty(ty);

	double top = (1.0 - dtx) * p00 + dtx * p10;
	double bottom = (1.0 - dtx) * p01 + dtx * p11;

	return (1.0 - dty) * top + dty * bottom;
	}

	static int16_t full_res_bilerp(
	float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
	int32_t ftx(floor(tx * 65536.0f + 0.5f));
	int64_t top = ftx * (p10 - p00) + 65536 * p00;
	int64_t bottom = ftx * (p11 - p01) + 65536 * p01;

	int64_t fty(floor(ty * 65536.0f + 0.5f));
	int64_t temp = fty * (bottom - top) + top * 65536LL;
	int64_t rounded = temp + (1LL << 31);
	return rounded >> 32;
	}


	static int16_t bilerp_1(float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
	const int logPixelScale = 7;
	const int16_t half = 1 << logPixelScale;
	I16 qtx = floor(tx * 65536.0f - 32768.0f + 0.5f);
	I16 qw = (p10 - p00) << logPixelScale;
	U16 qm = (p10 + p00) << logPixelScale;
	I16 top = (I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1);

	qw = (p11 - p01) << logPixelScale;
	qm = (p11 + p01) << logPixelScale;
	I16 bottom =
	(I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1);

	I16 qty = floor(ty * 65536.0f - 32768.0f + 0.5f);

	qw = bottom - top;
	qm = (U16)bottom + (U16)top;
	U16 scaledAnswer = constrained_add(simulate_ssse3_mm_mulhrs_epi16(qty, qw), qm);

	return (scaledAnswer[0] + half) >> (logPixelScale + 1);
	}

	template <typename Bilerp>
	static Stats check_bilerp(Bilerp bilerp) {
	Stats stats;
	const int step = 1;
	auto interesting = {0, 1, 2, 3, 4, 5, 6, 7, 8, 60, 61, 62, 63, 64, 65, 66, 67, 68, 124, 125,
	126, 127, 128, 129, 130, 131, 132, 188, 189, 190, 191, 192, 193, 194,
	195, 196, 248, 249, 250, 251, 252, 253, 254, 255};
	for (float tx : {0.0f, 0.25f, 0.5f, 0.75f, 1.0f - 1.0f/65536.0f})
	for (float ty : {0.0f, 0.25f, 0.5f, 0.75f, 1.0f - 1.0f/65536.0f})
	for (int p00 : interesting)
	for (int p01 : interesting)
	for (int p10 : interesting)
	for (int p11 : interesting) {
	// Having this be double causes the proper rounding.
	double l = golden_bilerp2(tx, ty, p00, p10, p01, p11);
	int16_t golden = floor(l + 0.5);
	//l = golden_bilerp(tx, ty, p00, p10, p01, p11);
	//int16_t golden2 = floor(l + 0.5f);
	int16_t candidate = bilerp(tx, ty, p00, p10, p01, p11);
	stats.log(golden, candidate);
	}
	return stats;
	}

	int main() {
	Stats stats;

	printf("\nUsing trunc_bilerp...\n");
	stats = check_bilerp(bilerp_1);
	stats.print();

	printf("\nUsing full_res_bilerp...\n");
	stats = check_bilerp(full_res_bilerp);
	stats.print();

	printf("Done.\n");
	return 0;
	}