|  | /* | 
|  | * Copyright 2006 The Android Open Source Project | 
|  | * | 
|  | * Use of this source code is governed by a BSD-style license that can be | 
|  | * found in the LICENSE file. | 
|  | */ | 
|  |  | 
|  |  | 
|  | #include "SkUtils.h" | 
|  |  | 
|  | /*  0xxxxxxx    1 total | 
|  | 10xxxxxx    // never a leading byte | 
|  | 110xxxxx    2 total | 
|  | 1110xxxx    3 total | 
|  | 11110xxx    4 total | 
|  |  | 
|  | 11 10 01 01 xx xx xx xx 0... | 
|  | 0xE5XX0000 | 
|  | 0xE5 << 24 | 
|  | */ | 
|  |  | 
|  | #ifdef SK_DEBUG | 
|  | static void assert_utf8_leadingbyte(unsigned c) { | 
|  | SkASSERT(c <= 0xF7);    // otherwise leading byte is too big (more than 4 bytes) | 
|  | SkASSERT((c & 0xC0) != 0x80);   // can't begin with a middle char | 
|  | } | 
|  |  | 
|  | int SkUTF8_LeadByteToCount(unsigned c) { | 
|  | assert_utf8_leadingbyte(c); | 
|  | return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1; | 
|  | } | 
|  | #else | 
|  | #define assert_utf8_leadingbyte(c) | 
|  | #endif | 
|  |  | 
|  | int SkUTF8_CountUnichars(const char utf8[]) { | 
|  | SkASSERT(utf8); | 
|  |  | 
|  | int count = 0; | 
|  |  | 
|  | for (;;) { | 
|  | int c = *(const uint8_t*)utf8; | 
|  | if (c == 0) { | 
|  | break; | 
|  | } | 
|  | utf8 += SkUTF8_LeadByteToCount(c); | 
|  | count += 1; | 
|  | } | 
|  | return count; | 
|  | } | 
|  |  | 
|  | int SkUTF8_CountUnichars(const char utf8[], size_t byteLength) { | 
|  | SkASSERT(utf8 || 0 == byteLength); | 
|  |  | 
|  | int         count = 0; | 
|  | const char* stop = utf8 + byteLength; | 
|  |  | 
|  | while (utf8 < stop) { | 
|  | utf8 += SkUTF8_LeadByteToCount(*(const uint8_t*)utf8); | 
|  | count += 1; | 
|  | } | 
|  | return count; | 
|  | } | 
|  |  | 
|  | SkUnichar SkUTF8_ToUnichar(const char utf8[]) { | 
|  | SkASSERT(utf8); | 
|  |  | 
|  | const uint8_t*  p = (const uint8_t*)utf8; | 
|  | int             c = *p; | 
|  | int             hic = c << 24; | 
|  |  | 
|  | assert_utf8_leadingbyte(c); | 
|  |  | 
|  | if (hic < 0) { | 
|  | uint32_t mask = (uint32_t)~0x3F; | 
|  | hic = SkLeftShift(hic, 1); | 
|  | do { | 
|  | c = (c << 6) | (*++p & 0x3F); | 
|  | mask <<= 5; | 
|  | } while ((hic = SkLeftShift(hic, 1)) < 0); | 
|  | c &= ~mask; | 
|  | } | 
|  | return c; | 
|  | } | 
|  |  | 
|  | SkUnichar SkUTF8_NextUnichar(const char** ptr) { | 
|  | SkASSERT(ptr && *ptr); | 
|  |  | 
|  | const uint8_t*  p = (const uint8_t*)*ptr; | 
|  | int             c = *p; | 
|  | int             hic = c << 24; | 
|  |  | 
|  | assert_utf8_leadingbyte(c); | 
|  |  | 
|  | if (hic < 0) { | 
|  | uint32_t mask = (uint32_t)~0x3F; | 
|  | hic = SkLeftShift(hic, 1); | 
|  | do { | 
|  | c = (c << 6) | (*++p & 0x3F); | 
|  | mask <<= 5; | 
|  | } while ((hic = SkLeftShift(hic, 1)) < 0); | 
|  | c &= ~mask; | 
|  | } | 
|  | *ptr = (char*)p + 1; | 
|  | return c; | 
|  | } | 
|  |  | 
|  | SkUnichar SkUTF8_PrevUnichar(const char** ptr) { | 
|  | SkASSERT(ptr && *ptr); | 
|  |  | 
|  | const char* p = *ptr; | 
|  |  | 
|  | if (*--p & 0x80) { | 
|  | while (*--p & 0x40) { | 
|  | ; | 
|  | } | 
|  | } | 
|  |  | 
|  | *ptr = (char*)p; | 
|  | return SkUTF8_NextUnichar(&p); | 
|  | } | 
|  |  | 
|  | size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) { | 
|  | if ((uint32_t)uni > 0x10FFFF) { | 
|  | SkDEBUGFAIL("bad unichar"); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | if (uni <= 127) { | 
|  | if (utf8) { | 
|  | *utf8 = (char)uni; | 
|  | } | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | char    tmp[4]; | 
|  | char*   p = tmp; | 
|  | size_t  count = 1; | 
|  |  | 
|  | SkDEBUGCODE(SkUnichar orig = uni;) | 
|  |  | 
|  | while (uni > 0x7F >> count) { | 
|  | *p++ = (char)(0x80 | (uni & 0x3F)); | 
|  | uni >>= 6; | 
|  | count += 1; | 
|  | } | 
|  |  | 
|  | if (utf8) { | 
|  | p = tmp; | 
|  | utf8 += count; | 
|  | while (p < tmp + count - 1) { | 
|  | *--utf8 = *p++; | 
|  | } | 
|  | *--utf8 = (char)(~(0xFF >> count) | uni); | 
|  | } | 
|  |  | 
|  | SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8)); | 
|  | return count; | 
|  | } | 
|  |  | 
|  | /////////////////////////////////////////////////////////////////////////////// | 
|  |  | 
|  | int SkUTF16_CountUnichars(const uint16_t src[]) { | 
|  | SkASSERT(src); | 
|  |  | 
|  | int count = 0; | 
|  | unsigned c; | 
|  | while ((c = *src++) != 0) { | 
|  | SkASSERT(!SkUTF16_IsLowSurrogate(c)); | 
|  | if (SkUTF16_IsHighSurrogate(c)) { | 
|  | c = *src++; | 
|  | SkASSERT(SkUTF16_IsLowSurrogate(c)); | 
|  | } | 
|  | count += 1; | 
|  | } | 
|  | return count; | 
|  | } | 
|  |  | 
|  | int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) { | 
|  | SkASSERT(src); | 
|  |  | 
|  | const uint16_t* stop = src + numberOf16BitValues; | 
|  | int count = 0; | 
|  | while (src < stop) { | 
|  | unsigned c = *src++; | 
|  | SkASSERT(!SkUTF16_IsLowSurrogate(c)); | 
|  | if (SkUTF16_IsHighSurrogate(c)) { | 
|  | SkASSERT(src < stop); | 
|  | c = *src++; | 
|  | SkASSERT(SkUTF16_IsLowSurrogate(c)); | 
|  | } | 
|  | count += 1; | 
|  | } | 
|  | return count; | 
|  | } | 
|  |  | 
|  | SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) { | 
|  | SkASSERT(srcPtr && *srcPtr); | 
|  |  | 
|  | const uint16_t* src = *srcPtr; | 
|  | SkUnichar       c = *src++; | 
|  |  | 
|  | SkASSERT(!SkUTF16_IsLowSurrogate(c)); | 
|  | if (SkUTF16_IsHighSurrogate(c)) { | 
|  | unsigned c2 = *src++; | 
|  | SkASSERT(SkUTF16_IsLowSurrogate(c2)); | 
|  |  | 
|  | // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000 | 
|  | // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF) | 
|  | c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00); | 
|  | } | 
|  | *srcPtr = src; | 
|  | return c; | 
|  | } | 
|  |  | 
|  | SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) { | 
|  | SkASSERT(srcPtr && *srcPtr); | 
|  |  | 
|  | const uint16_t* src = *srcPtr; | 
|  | SkUnichar       c = *--src; | 
|  |  | 
|  | SkASSERT(!SkUTF16_IsHighSurrogate(c)); | 
|  | if (SkUTF16_IsLowSurrogate(c)) { | 
|  | unsigned c2 = *--src; | 
|  | SkASSERT(SkUTF16_IsHighSurrogate(c2)); | 
|  | c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00); | 
|  | } | 
|  | *srcPtr = src; | 
|  | return c; | 
|  | } | 
|  |  | 
|  | size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) { | 
|  | SkASSERT((unsigned)uni <= 0x10FFFF); | 
|  |  | 
|  | int extra = (uni > 0xFFFF); | 
|  |  | 
|  | if (dst) { | 
|  | if (extra) { | 
|  | // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10)); | 
|  | // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64)); | 
|  | dst[0] = SkToU16((0xD800 - 64) + (uni >> 10)); | 
|  | dst[1] = SkToU16(0xDC00 | (uni & 0x3FF)); | 
|  |  | 
|  | SkASSERT(SkUTF16_IsHighSurrogate(dst[0])); | 
|  | SkASSERT(SkUTF16_IsLowSurrogate(dst[1])); | 
|  | } else { | 
|  | dst[0] = SkToU16(uni); | 
|  | SkASSERT(!SkUTF16_IsHighSurrogate(dst[0])); | 
|  | SkASSERT(!SkUTF16_IsLowSurrogate(dst[0])); | 
|  | } | 
|  | } | 
|  | return 1 + extra; | 
|  | } | 
|  |  | 
|  | size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues, | 
|  | char utf8[]) { | 
|  | SkASSERT(numberOf16BitValues >= 0); | 
|  | if (numberOf16BitValues <= 0) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | SkASSERT(utf16 != nullptr); | 
|  |  | 
|  | const uint16_t* stop = utf16 + numberOf16BitValues; | 
|  | size_t          size = 0; | 
|  |  | 
|  | if (utf8 == nullptr) {    // just count | 
|  | while (utf16 < stop) { | 
|  | size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr); | 
|  | } | 
|  | } else { | 
|  | char* start = utf8; | 
|  | while (utf16 < stop) { | 
|  | utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8); | 
|  | } | 
|  | size = utf8 - start; | 
|  | } | 
|  | return size; | 
|  | } |