| /** |
| * Transcodes DXT into RGB565. |
| * This is an optimized version of dxtToRgb565Unoptimized() below. |
| * Optimizations: |
| * 1. Use integer math to compute c2 and c3 instead of floating point |
| * math. Specifically: |
| * c2 = 5/8 * c0 + 3/8 * c1 |
| * c3 = 3/8 * c0 + 5/8 * c1 |
| * This is about a 40% performance improvement. It also appears to |
| * match what hardware DXT decoders do, as the colors produced |
| * by this integer math match what hardware produces, while the |
| * floating point in dxtToRgb565Unoptimized() produce slightly |
| * different colors (for one GPU this was tested on). |
| * 2. Unroll the inner loop. Another ~10% improvement. |
| * 3. Compute r0, g0, b0, r1, g1, b1 only once instead of twice. |
| * Another 10% improvement. |
| * 4. Use a Uint16Array instead of a Uint8Array. Another 10% improvement. |
| * @param {Uint16Array} src The src DXT bits as a Uint16Array. |
| * @param {number} srcByteOffset |
| * @param {number} width |
| * @param {number} height |
| * @return {Uint16Array} dst |
| */ |
| function dxtToRgb565(src, src16Offset, width, height) { |
| var c = new Uint16Array(4); |
| var dst = new Uint16Array(width * height); |
| var nWords = (width * height) / 4; |
| var m = 0; |
| var dstI = 0; |
| var i = 0; |
| var r0 = 0, g0 = 0, b0 = 0, r1 = 0, g1 = 0, b1 = 0; |
| |
| var blockWidth = width / 4; |
| var blockHeight = height / 4; |
| for (var blockY = 0; blockY < blockHeight; blockY++) { |
| for (var blockX = 0; blockX < blockWidth; blockX++) { |
| i = src16Offset + 4 * (blockY * blockWidth + blockX); |
| c[0] = src[i]; |
| c[1] = src[i + 1]; |
| |
| r0 = c[0] & 0x1f; |
| g0 = c[0] & 0x7e0; |
| b0 = c[0] & 0xf800; |
| r1 = c[1] & 0x1f; |
| g1 = c[1] & 0x7e0; |
| b1 = c[1] & 0xf800; |
| // Interpolate between c0 and c1 to get c2 and c3. |
| // Note that we approximate 1/3 as 3/8 and 2/3 as 5/8 for |
| // speed. This also appears to be what the hardware DXT |
| // decoder in many GPUs does :) |
| |
| // rg FIXME: This is most likely leading to wrong results vs. a GPU |
| |
| c[2] = ((5 * r0 + 3 * r1) >> 3) |
| | (((5 * g0 + 3 * g1) >> 3) & 0x7e0) |
| | (((5 * b0 + 3 * b1) >> 3) & 0xf800); |
| c[3] = ((5 * r1 + 3 * r0) >> 3) |
| | (((5 * g1 + 3 * g0) >> 3) & 0x7e0) |
| | (((5 * b1 + 3 * b0) >> 3) & 0xf800); |
| m = src[i + 2]; |
| dstI = (blockY * 4) * width + blockX * 4; |
| dst[dstI] = c[m & 0x3]; |
| dst[dstI + 1] = c[(m >> 2) & 0x3]; |
| dst[dstI + 2] = c[(m >> 4) & 0x3]; |
| dst[dstI + 3] = c[(m >> 6) & 0x3]; |
| dstI += width; |
| dst[dstI] = c[(m >> 8) & 0x3]; |
| dst[dstI + 1] = c[(m >> 10) & 0x3]; |
| dst[dstI + 2] = c[(m >> 12) & 0x3]; |
| dst[dstI + 3] = c[(m >> 14)]; |
| m = src[i + 3]; |
| dstI += width; |
| dst[dstI] = c[m & 0x3]; |
| dst[dstI + 1] = c[(m >> 2) & 0x3]; |
| dst[dstI + 2] = c[(m >> 4) & 0x3]; |
| dst[dstI + 3] = c[(m >> 6) & 0x3]; |
| dstI += width; |
| dst[dstI] = c[(m >> 8) & 0x3]; |
| dst[dstI + 1] = c[(m >> 10) & 0x3]; |
| dst[dstI + 2] = c[(m >> 12) & 0x3]; |
| dst[dstI + 3] = c[(m >> 14)]; |
| } |
| } |
| return dst; |
| } |
| |
| |
| /** |
| * An unoptimized version of dxtToRgb565. Also, the floating |
| * point math used to compute the colors actually results in |
| * slightly different colors compared to hardware DXT decoders. |
| * @param {Uint8Array} src |
| * @param {number} srcByteOffset |
| * @param {number} width |
| * @param {number} height |
| * @return {Uint16Array} dst |
| */ |
| function dxtToRgb565Unoptimized(src, srcByteOffset, width, height) { |
| var c = new Uint16Array(4); |
| var dst = new Uint16Array(width * height); |
| var nWords = (width * height) / 4; |
| |
| var blockWidth = width / 4; |
| var blockHeight = height / 4; |
| for (var blockY = 0; blockY < blockHeight; blockY++) { |
| for (var blockX = 0; blockX < blockWidth; blockX++) { |
| var i = srcByteOffset + 8 * (blockY * blockWidth + blockX); |
| c[0] = src[i] | (src[i + 1] << 8); |
| c[1] = src[i + 2] | (src[i + 3] << 8); |
| c[2] = (2 * (c[0] & 0x1f) + 1 * (c[1] & 0x1f)) / 3 |
| | (((2 * (c[0] & 0x7e0) + 1 * (c[1] & 0x7e0)) / 3) & 0x7e0) |
| | (((2 * (c[0] & 0xf800) + 1 * (c[1] & 0xf800)) / 3) & 0xf800); |
| c[3] = (2 * (c[1] & 0x1f) + 1 * (c[0] & 0x1f)) / 3 |
| | (((2 * (c[1] & 0x7e0) + 1 * (c[0] & 0x7e0)) / 3) & 0x7e0) |
| | (((2 * (c[1] & 0xf800) + 1 * (c[0] & 0xf800)) / 3) & 0xf800); |
| for (var row = 0; row < 4; row++) { |
| var m = src[i + 4 + row]; |
| var dstI = (blockY * 4 + row) * width + blockX * 4; |
| dst[dstI++] = c[m & 0x3]; |
| dst[dstI++] = c[(m >> 2) & 0x3]; |
| dst[dstI++] = c[(m >> 4) & 0x3]; |
| dst[dstI++] = c[(m >> 6) & 0x3]; |
| } |
| } |
| } |
| return dst; |
| } |
| |