| /* |
| @author herumi |
| |
| JPEG quantize sample |
| This program generates a quantization routine by using fast division algorithm in run-time. |
| |
| time(sec) |
| quality 1(high) 10 50 100(low) |
| VC2005 8.0 8.0 8.0 8.0 |
| Xbyak 1.6 0.8 0.5 0.5 |
| |
| |
| ; generated code at q = 1 |
| push esi |
| push edi |
| mov edi,dword ptr [esp+0Ch] |
| mov esi,dword ptr [esp+10h] |
| mov eax,dword ptr [esi] |
| shr eax,4 |
| mov dword ptr [edi],eax |
| mov eax,dword ptr [esi+4] |
| mov edx,0BA2E8BA3h |
| mul eax,edx |
| shr edx,3 |
| ... |
| |
| ; generated code at q = 100 |
| push esi |
| push edi |
| mov edi,dword ptr [esp+0Ch] |
| mov esi,dword ptr [esp+10h] |
| mov eax,dword ptr [esi] |
| mov dword ptr [edi],eax |
| mov eax,dword ptr [esi+4] |
| mov dword ptr [edi+4],eax |
| mov eax,dword ptr [esi+8] |
| mov dword ptr [edi+8],eax |
| mov eax,dword ptr [esi+0Ch] |
| ... |
| |
| */ |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <time.h> |
| #define XBYAK_NO_OP_NAMES |
| #include "xbyak/xbyak.h" |
| #ifdef _MSC_VER |
| #pragma warning(disable : 4996) // scanf |
| #endif |
| |
| const int N = 64; |
| |
| class Quantize : public Xbyak::CodeGenerator { |
| static int ilog2(int x) |
| { |
| int shift = 0; |
| while ((1 << shift) <= x) shift++; |
| return shift - 1; |
| } |
| public: |
| /* |
| input : esi |
| output : eax = [esi+offset] / dividend |
| destroy : edx |
| */ |
| void udiv(uint32_t dividend, int offset) |
| { |
| mov(eax, ptr[esi + offset]); |
| |
| /* dividend = odd x 2^exponent */ |
| int exponent = 0, odd = dividend; |
| while ((odd & 1) == 0) { |
| odd >>= 1; exponent++; |
| } |
| |
| if (odd == 1) { // trivial case |
| if (exponent) { |
| shr(eax, exponent); |
| } |
| return; |
| } |
| |
| uint64_t mLow, mHigh; |
| int len = ilog2(odd) + 1; |
| { |
| uint64_t roundUp = uint64_t(1) << (32 + len); |
| uint64_t k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd)); |
| mLow = roundUp / odd; |
| mHigh = (roundUp + k) / odd; |
| } |
| |
| while (((mLow >> 1) < (mHigh >> 1)) && (len > 0)) { |
| mLow >>= 1; mHigh >>= 1; len--; |
| } |
| |
| uint64_t m; int a; |
| if ((mHigh >> 32) == 0) { |
| m = mHigh; a = 0; |
| } else { |
| len = ilog2(odd); |
| uint64_t roundDown = uint64_t(1) << (32 + len); |
| mLow = roundDown / odd; |
| int r = (int)(roundDown % odd); |
| m = (r <= (odd >> 1)) ? mLow : mLow + 1; |
| a = 1; |
| } |
| while ((m & 1) == 0) { |
| m >>= 1; len--; |
| } |
| len += exponent; |
| |
| mov(edx, int(m)); |
| mul(edx); |
| if (a) { |
| add(eax, int(m)); |
| adc(edx, 0); |
| } |
| if (len) { |
| shr(edx, len); |
| } |
| mov(eax, edx); |
| } |
| /* |
| quantize(uint32_t dest[64], const uint32_t src[64]); |
| */ |
| Quantize(const uint32_t qTbl[64]) |
| { |
| push(esi); |
| push(edi); |
| const int P_ = 4 * 2; |
| mov(edi, ptr [esp+P_+4]); // dest |
| mov(esi, ptr [esp+P_+8]); // src |
| for (int i = 0; i < N; i++) { |
| udiv(qTbl[i], i * 4); |
| mov(ptr[edi+i*4], eax); |
| } |
| pop(edi); |
| pop(esi); |
| ret(); |
| } |
| }; |
| |
| void quantize(uint32_t dest[64], const uint32_t src[64], const uint32_t qTbl[64]) |
| { |
| for (int i = 0; i < N; i++) { |
| dest[i] = src[i] / qTbl[i]; |
| } |
| } |
| |
| #ifdef XBYAK64 |
| int main() |
| { |
| puts("not implemented for 64bit"); |
| return 1; |
| } |
| #else |
| int main(int argc, char *argv[]) |
| { |
| int q; |
| if (argc > 1) { |
| q = atoi(argv[1]); |
| } else { |
| printf("input quantize="); |
| if (scanf("%d", &q) != 1) { |
| fprintf(stderr, "bad number\n"); |
| return 1; |
| } |
| } |
| printf("q=%d\n", q); |
| uint32_t qTbl[] = { |
| 16, 11, 10, 16, 24, 40, 51, 61, |
| 12, 12, 14, 19, 26, 58, 60, 55, |
| 14, 13, 16, 24, 40, 57, 69, 56, |
| 14, 17, 22, 29, 51, 87, 80, 62, |
| 18, 22, 37, 56, 68, 109, 103, 77, |
| 24, 35, 55, 64, 81, 104, 113, 92, |
| 49, 64, 78, 87, 103, 121, 120, 101, |
| 72, 92, 95, 98, 112, 100, 103, 99 |
| }; |
| |
| for (int i = 0; i < N; i++) { |
| qTbl[i] /= q; |
| if (qTbl[i] == 0) qTbl[i] = 1; |
| } |
| |
| try { |
| uint32_t src[N]; |
| uint32_t dest[N]; |
| uint32_t dest2[N]; |
| for (int i = 0; i < N; i++) { |
| src[i] = rand() % 2048; |
| } |
| |
| Quantize jit(qTbl); |
| //printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode()); |
| void (*quantize2)(uint32_t*, const uint32_t*, const uint32_t *) = jit.getCode<void (*)(uint32_t*, const uint32_t*, const uint32_t *)>(); |
| |
| quantize(dest, src, qTbl); |
| quantize2(dest2, src, qTbl); |
| for (int i = 0; i < N; i++) { |
| if (dest[i] != dest2[i]) { |
| printf("err[%d] %u %u\n", i, dest[i], dest2[i]); |
| } |
| } |
| |
| const int count = 10000000; |
| int begin; |
| |
| begin = clock(); |
| for (int i = 0; i < count; i++) { |
| quantize(dest, src, qTbl); |
| } |
| printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC)); |
| |
| begin = clock(); |
| for (int i = 0; i < count; i++) { |
| quantize2(dest, src, qTbl); |
| } |
| printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC)); |
| } catch (std::exception& e) { |
| printf("ERR:%s\n", e.what()); |
| } catch (...) { |
| printf("unknown error\n"); |
| } |
| } |
| #endif |