blob: 8e35be4d12bc91738ef74d4e6d69b5ec6cdd99c2 [file] [log] [blame]
/**
* Transcodes DXT into RGB565.
* This is an optimized version of dxtToRgb565Unoptimized() below.
* Optimizations:
* 1. Use integer math to compute c2 and c3 instead of floating point
* math. Specifically:
* c2 = 5/8 * c0 + 3/8 * c1
* c3 = 3/8 * c0 + 5/8 * c1
* This is about a 40% performance improvement. It also appears to
* match what hardware DXT decoders do, as the colors produced
* by this integer math match what hardware produces, while the
* floating point in dxtToRgb565Unoptimized() produce slightly
* different colors (for one GPU this was tested on).
* 2. Unroll the inner loop. Another ~10% improvement.
* 3. Compute r0, g0, b0, r1, g1, b1 only once instead of twice.
* Another 10% improvement.
* 4. Use a Uint16Array instead of a Uint8Array. Another 10% improvement.
* @param {Uint16Array} src The src DXT bits as a Uint16Array.
* @param {number} srcByteOffset
* @param {number} width
* @param {number} height
* @return {Uint16Array} dst
*/
function dxtToRgb565(src, src16Offset, width, height) {
var c = new Uint16Array(4);
var dst = new Uint16Array(width * height);
var nWords = (width * height) / 4;
var m = 0;
var dstI = 0;
var i = 0;
var r0 = 0, g0 = 0, b0 = 0, r1 = 0, g1 = 0, b1 = 0;
var blockWidth = width / 4;
var blockHeight = height / 4;
for (var blockY = 0; blockY < blockHeight; blockY++) {
for (var blockX = 0; blockX < blockWidth; blockX++) {
i = src16Offset + 4 * (blockY * blockWidth + blockX);
c[0] = src[i];
c[1] = src[i + 1];
r0 = c[0] & 0x1f;
g0 = c[0] & 0x7e0;
b0 = c[0] & 0xf800;
r1 = c[1] & 0x1f;
g1 = c[1] & 0x7e0;
b1 = c[1] & 0xf800;
// Interpolate between c0 and c1 to get c2 and c3.
// Note that we approximate 1/3 as 3/8 and 2/3 as 5/8 for
// speed. This also appears to be what the hardware DXT
// decoder in many GPUs does :)
// rg FIXME: This is most likely leading to wrong results vs. a GPU
c[2] = ((5 * r0 + 3 * r1) >> 3)
| (((5 * g0 + 3 * g1) >> 3) & 0x7e0)
| (((5 * b0 + 3 * b1) >> 3) & 0xf800);
c[3] = ((5 * r1 + 3 * r0) >> 3)
| (((5 * g1 + 3 * g0) >> 3) & 0x7e0)
| (((5 * b1 + 3 * b0) >> 3) & 0xf800);
m = src[i + 2];
dstI = (blockY * 4) * width + blockX * 4;
dst[dstI] = c[m & 0x3];
dst[dstI + 1] = c[(m >> 2) & 0x3];
dst[dstI + 2] = c[(m >> 4) & 0x3];
dst[dstI + 3] = c[(m >> 6) & 0x3];
dstI += width;
dst[dstI] = c[(m >> 8) & 0x3];
dst[dstI + 1] = c[(m >> 10) & 0x3];
dst[dstI + 2] = c[(m >> 12) & 0x3];
dst[dstI + 3] = c[(m >> 14)];
m = src[i + 3];
dstI += width;
dst[dstI] = c[m & 0x3];
dst[dstI + 1] = c[(m >> 2) & 0x3];
dst[dstI + 2] = c[(m >> 4) & 0x3];
dst[dstI + 3] = c[(m >> 6) & 0x3];
dstI += width;
dst[dstI] = c[(m >> 8) & 0x3];
dst[dstI + 1] = c[(m >> 10) & 0x3];
dst[dstI + 2] = c[(m >> 12) & 0x3];
dst[dstI + 3] = c[(m >> 14)];
}
}
return dst;
}
/**
* An unoptimized version of dxtToRgb565. Also, the floating
* point math used to compute the colors actually results in
* slightly different colors compared to hardware DXT decoders.
* @param {Uint8Array} src
* @param {number} srcByteOffset
* @param {number} width
* @param {number} height
* @return {Uint16Array} dst
*/
function dxtToRgb565Unoptimized(src, srcByteOffset, width, height) {
var c = new Uint16Array(4);
var dst = new Uint16Array(width * height);
var nWords = (width * height) / 4;
var blockWidth = width / 4;
var blockHeight = height / 4;
for (var blockY = 0; blockY < blockHeight; blockY++) {
for (var blockX = 0; blockX < blockWidth; blockX++) {
var i = srcByteOffset + 8 * (blockY * blockWidth + blockX);
c[0] = src[i] | (src[i + 1] << 8);
c[1] = src[i + 2] | (src[i + 3] << 8);
c[2] = (2 * (c[0] & 0x1f) + 1 * (c[1] & 0x1f)) / 3
| (((2 * (c[0] & 0x7e0) + 1 * (c[1] & 0x7e0)) / 3) & 0x7e0)
| (((2 * (c[0] & 0xf800) + 1 * (c[1] & 0xf800)) / 3) & 0xf800);
c[3] = (2 * (c[1] & 0x1f) + 1 * (c[0] & 0x1f)) / 3
| (((2 * (c[1] & 0x7e0) + 1 * (c[0] & 0x7e0)) / 3) & 0x7e0)
| (((2 * (c[1] & 0xf800) + 1 * (c[0] & 0xf800)) / 3) & 0xf800);
for (var row = 0; row < 4; row++) {
var m = src[i + 4 + row];
var dstI = (blockY * 4 + row) * width + blockX * 4;
dst[dstI++] = c[m & 0x3];
dst[dstI++] = c[(m >> 2) & 0x3];
dst[dstI++] = c[(m >> 4) & 0x3];
dst[dstI++] = c[(m >> 6) & 0x3];
}
}
}
return dst;
}