webgl_videotest/dxt-to-rgb565.js - external/github.com/BinomialLLC/basis_universal - Git at Google

 /**
  * Transcodes DXT into RGB565.
  * This is an optimized version of dxtToRgb565Unoptimized() below.
  * Optimizations:
  * 1. Use integer math to compute c2 and c3 instead of floating point
  *    math.  Specifically:
  *      c2 = 5/8 * c0 + 3/8 * c1
  *      c3 = 3/8 * c0 + 5/8 * c1
  *    This is about a 40% performance improvement.  It also appears to
  *    match what hardware DXT decoders do, as the colors produced
  *    by this integer math match what hardware produces, while the
  *    floating point in dxtToRgb565Unoptimized() produce slightly
  *    different colors (for one GPU this was tested on).
  * 2. Unroll the inner loop.  Another ~10% improvement.
  * 3. Compute r0, g0, b0, r1, g1, b1 only once instead of twice.
  *    Another 10% improvement.
  * 4. Use a Uint16Array instead of a Uint8Array.  Another 10% improvement.
  * @param {Uint16Array} src The src DXT bits as a Uint16Array.
  * @param {number} srcByteOffset
  * @param {number} width
  * @param {number} height
  * @return {Uint16Array} dst
  */
 function dxtToRgb565(src, src16Offset, width, height) {
   var c = new Uint16Array(4);
   var dst = new Uint16Array(width * height);
   var nWords = (width * height) / 4;
   var m = 0;
   var dstI = 0;
   var i = 0;
   var r0 = 0, g0 = 0, b0 = 0, r1 = 0, g1 = 0, b1 = 0;

   var blockWidth = width / 4;
   var blockHeight = height / 4;
   for (var blockY = 0; blockY < blockHeight; blockY++) {
     for (var blockX = 0; blockX < blockWidth; blockX++) {
       i = src16Offset + 4 * (blockY * blockWidth + blockX);
       c[0] = src[i];
       c[1] = src[i + 1];

       r0 = c[0] & 0x1f;
       g0 = c[0] & 0x7e0;
       b0 = c[0] & 0xf800;
       r1 = c[1] & 0x1f;
       g1 = c[1] & 0x7e0;
       b1 = c[1] & 0xf800;
       // Interpolate between c0 and c1 to get c2 and c3.
       // Note that we approximate 1/3 as 3/8 and 2/3 as 5/8 for
       // speed.  This also appears to be what the hardware DXT
       // decoder in many GPUs does :)

 	  // rg FIXME: This is most likely leading to wrong results vs. a GPU

       c[2] = ((5 * r0 + 3 * r1) >> 3)
              | (((5 * g0 + 3 * g1) >> 3) & 0x7e0)
              | (((5 * b0 + 3 * b1) >> 3) & 0xf800);
       c[3] = ((5 * r1 + 3 * r0) >> 3)
              | (((5 * g1 + 3 * g0) >> 3) & 0x7e0)
              | (((5 * b1 + 3 * b0) >> 3) & 0xf800);
       m = src[i + 2];
       dstI = (blockY * 4) * width + blockX * 4;
       dst[dstI] = c[m & 0x3];
       dst[dstI + 1] = c[(m >> 2) & 0x3];
       dst[dstI + 2] = c[(m >> 4) & 0x3];
       dst[dstI + 3] = c[(m >> 6) & 0x3];
       dstI += width;
       dst[dstI] = c[(m >> 8) & 0x3];
       dst[dstI + 1] = c[(m >> 10) & 0x3];
       dst[dstI + 2] = c[(m >> 12) & 0x3];
       dst[dstI + 3] = c[(m >> 14)];
       m = src[i + 3];
       dstI += width;
       dst[dstI] = c[m & 0x3];
       dst[dstI + 1] = c[(m >> 2) & 0x3];
       dst[dstI + 2] = c[(m >> 4) & 0x3];
       dst[dstI + 3] = c[(m >> 6) & 0x3];
       dstI += width;
       dst[dstI] = c[(m >> 8) & 0x3];
       dst[dstI + 1] = c[(m >> 10) & 0x3];
       dst[dstI + 2] = c[(m >> 12) & 0x3];
       dst[dstI + 3] = c[(m >> 14)];
     }
   }
   return dst;
 }


 /**
  * An unoptimized version of dxtToRgb565.  Also, the floating
  * point math used to compute the colors actually results in
  * slightly different colors compared to hardware DXT decoders.
  * @param {Uint8Array} src
  * @param {number} srcByteOffset
  * @param {number} width
  * @param {number} height
  * @return {Uint16Array} dst
  */
 function dxtToRgb565Unoptimized(src, srcByteOffset, width, height) {
   var c = new Uint16Array(4);
   var dst = new Uint16Array(width * height);
   var nWords = (width * height) / 4;

   var blockWidth = width / 4;
   var blockHeight = height / 4;
   for (var blockY = 0; blockY < blockHeight; blockY++) {
     for (var blockX = 0; blockX < blockWidth; blockX++) {
       var i = srcByteOffset + 8 * (blockY * blockWidth + blockX);
       c[0] = src[i] | (src[i + 1] << 8);
       c[1] = src[i + 2] | (src[i + 3] << 8);
       c[2] = (2 * (c[0] & 0x1f) + 1 * (c[1] & 0x1f)) / 3
              | (((2 * (c[0] & 0x7e0) + 1 * (c[1] & 0x7e0)) / 3) & 0x7e0)
              | (((2 * (c[0] & 0xf800) + 1 * (c[1] & 0xf800)) / 3) & 0xf800);
       c[3] = (2 * (c[1] & 0x1f) + 1 * (c[0] & 0x1f)) / 3
              | (((2 * (c[1] & 0x7e0) + 1 * (c[0] & 0x7e0)) / 3) & 0x7e0)
              | (((2 * (c[1] & 0xf800) + 1 * (c[0] & 0xf800)) / 3) & 0xf800);
       for (var row = 0; row < 4; row++) {
         var m = src[i + 4 + row];
         var dstI = (blockY * 4 + row) * width + blockX * 4;
         dst[dstI++] = c[m & 0x3];
         dst[dstI++] = c[(m >> 2) & 0x3];
         dst[dstI++] = c[(m >> 4) & 0x3];
         dst[dstI++] = c[(m >> 6) & 0x3];
       }
     }
   }
   return dst;
 }
	/**
	* Transcodes DXT into RGB565.
	* This is an optimized version of dxtToRgb565Unoptimized() below.
	* Optimizations:
	* 1. Use integer math to compute c2 and c3 instead of floating point
	* math. Specifically:
	* c2 = 5/8 * c0 + 3/8 * c1
	* c3 = 3/8 * c0 + 5/8 * c1
	* This is about a 40% performance improvement. It also appears to
	* match what hardware DXT decoders do, as the colors produced
	* by this integer math match what hardware produces, while the
	* floating point in dxtToRgb565Unoptimized() produce slightly
	* different colors (for one GPU this was tested on).
	* 2. Unroll the inner loop. Another ~10% improvement.
	* 3. Compute r0, g0, b0, r1, g1, b1 only once instead of twice.
	* Another 10% improvement.
	* 4. Use a Uint16Array instead of a Uint8Array. Another 10% improvement.
	* @param {Uint16Array} src The src DXT bits as a Uint16Array.
	* @param {number} srcByteOffset
	* @param {number} width
	* @param {number} height
	* @return {Uint16Array} dst
	*/
	function dxtToRgb565(src, src16Offset, width, height) {
	var c = new Uint16Array(4);
	var dst = new Uint16Array(width * height);
	var nWords = (width * height) / 4;
	var m = 0;
	var dstI = 0;
	var i = 0;
	var r0 = 0, g0 = 0, b0 = 0, r1 = 0, g1 = 0, b1 = 0;

	var blockWidth = width / 4;
	var blockHeight = height / 4;
	for (var blockY = 0; blockY < blockHeight; blockY++) {
	for (var blockX = 0; blockX < blockWidth; blockX++) {
	i = src16Offset + 4 * (blockY * blockWidth + blockX);
	c[0] = src[i];
	c[1] = src[i + 1];

	r0 = c[0] & 0x1f;
	g0 = c[0] & 0x7e0;
	b0 = c[0] & 0xf800;
	r1 = c[1] & 0x1f;
	g1 = c[1] & 0x7e0;
	b1 = c[1] & 0xf800;
	// Interpolate between c0 and c1 to get c2 and c3.
	// Note that we approximate 1/3 as 3/8 and 2/3 as 5/8 for
	// speed. This also appears to be what the hardware DXT
	// decoder in many GPUs does :)

	// rg FIXME: This is most likely leading to wrong results vs. a GPU

	c[2] = ((5 * r0 + 3 * r1) >> 3)
	\| (((5 * g0 + 3 * g1) >> 3) & 0x7e0)
	\| (((5 * b0 + 3 * b1) >> 3) & 0xf800);
	c[3] = ((5 * r1 + 3 * r0) >> 3)
	\| (((5 * g1 + 3 * g0) >> 3) & 0x7e0)
	\| (((5 * b1 + 3 * b0) >> 3) & 0xf800);
	m = src[i + 2];
	dstI = (blockY * 4) * width + blockX * 4;
	dst[dstI] = c[m & 0x3];
	dst[dstI + 1] = c[(m >> 2) & 0x3];
	dst[dstI + 2] = c[(m >> 4) & 0x3];
	dst[dstI + 3] = c[(m >> 6) & 0x3];
	dstI += width;
	dst[dstI] = c[(m >> 8) & 0x3];
	dst[dstI + 1] = c[(m >> 10) & 0x3];
	dst[dstI + 2] = c[(m >> 12) & 0x3];
	dst[dstI + 3] = c[(m >> 14)];
	m = src[i + 3];
	dstI += width;
	dst[dstI] = c[m & 0x3];
	dst[dstI + 1] = c[(m >> 2) & 0x3];
	dst[dstI + 2] = c[(m >> 4) & 0x3];
	dst[dstI + 3] = c[(m >> 6) & 0x3];
	dstI += width;
	dst[dstI] = c[(m >> 8) & 0x3];
	dst[dstI + 1] = c[(m >> 10) & 0x3];
	dst[dstI + 2] = c[(m >> 12) & 0x3];
	dst[dstI + 3] = c[(m >> 14)];
	}
	}
	return dst;
	}


	/**
	* An unoptimized version of dxtToRgb565. Also, the floating
	* point math used to compute the colors actually results in
	* slightly different colors compared to hardware DXT decoders.
	* @param {Uint8Array} src
	* @param {number} srcByteOffset
	* @param {number} width
	* @param {number} height
	* @return {Uint16Array} dst
	*/
	function dxtToRgb565Unoptimized(src, srcByteOffset, width, height) {
	var c = new Uint16Array(4);
	var dst = new Uint16Array(width * height);
	var nWords = (width * height) / 4;

	var blockWidth = width / 4;
	var blockHeight = height / 4;
	for (var blockY = 0; blockY < blockHeight; blockY++) {
	for (var blockX = 0; blockX < blockWidth; blockX++) {
	var i = srcByteOffset + 8 * (blockY * blockWidth + blockX);
	c[0] = src[i] \| (src[i + 1] << 8);
	c[1] = src[i + 2] \| (src[i + 3] << 8);
	c[2] = (2 * (c[0] & 0x1f) + 1 * (c[1] & 0x1f)) / 3
	\| (((2 * (c[0] & 0x7e0) + 1 * (c[1] & 0x7e0)) / 3) & 0x7e0)
	\| (((2 * (c[0] & 0xf800) + 1 * (c[1] & 0xf800)) / 3) & 0xf800);
	c[3] = (2 * (c[1] & 0x1f) + 1 * (c[0] & 0x1f)) / 3
	\| (((2 * (c[1] & 0x7e0) + 1 * (c[0] & 0x7e0)) / 3) & 0x7e0)
	\| (((2 * (c[1] & 0xf800) + 1 * (c[0] & 0xf800)) / 3) & 0xf800);
	for (var row = 0; row < 4; row++) {
	var m = src[i + 4 + row];
	var dstI = (blockY * 4 + row) * width + blockX * 4;
	dst[dstI++] = c[m & 0x3];
	dst[dstI++] = c[(m >> 2) & 0x3];
	dst[dstI++] = c[(m >> 4) & 0x3];
	dst[dstI++] = c[(m >> 6) & 0x3];
	}
	}
	}
	return dst;
	}