blob: 03476b4c7ed97a0a7f9748081e1af858c456d0ff [file] [log] [blame]
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/17/99 aliu Creation.
**********************************************************************
*/
#include "unicode/unitohex.h"
#include "unicode/rep.h"
#include "unicode/unifilt.h"
/**
* ID for this transliterator.
*/
const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex";
const UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {
// Use Unicode hex values for EBCDIC compatibility
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567
0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, // 89abcdef
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567
0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, // 89ABCDEF
};
/**
* Constructs a transliterator.
*/
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
const UnicodeString& thePattern,
UBool isUppercase,
UnicodeFilter* adoptedFilter,
UErrorCode& status) :
Transliterator(_ID, adoptedFilter),
uppercase(isUppercase) {
if (U_FAILURE(status)) {
return;
}
applyPattern(thePattern, status);
}
/**
* Constructs a transliterator.
*/
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
const UnicodeString& thePattern,
UErrorCode& status) :
Transliterator(_ID, 0),
uppercase(TRUE) {
if (U_FAILURE(status)) {
return;
}
applyPattern(thePattern, status);
}
/**
* Constructs a transliterator with the default prefix "\u"
* that outputs four uppercase hex digits.
*/
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
UnicodeFilter* adoptedFilter) :
Transliterator(_ID, adoptedFilter),
pattern("\\\\u0000", ""),
prefix("\\u", 2, ""),
suffix(),
minDigits(4),
uppercase(TRUE) {
}
/**
* Copy constructor.
*/
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
const UnicodeToHexTransliterator& other) :
Transliterator(other),
pattern(other.pattern),
prefix(other.prefix),
suffix(other.suffix),
minDigits(other.minDigits),
uppercase(other.uppercase) {
}
/**
* Assignment operator.
*/
UnicodeToHexTransliterator&
UnicodeToHexTransliterator::operator=(const UnicodeToHexTransliterator& other) {
Transliterator::operator=(other);
pattern = other.pattern;
prefix = other.prefix;
suffix = other.suffix;
minDigits = other.minDigits;
uppercase = other.uppercase;
return *this;
}
Transliterator*
UnicodeToHexTransliterator::clone(void) const {
return new UnicodeToHexTransliterator(*this);
}
void UnicodeToHexTransliterator::applyPattern(const UnicodeString& thePattern,
UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
// POSSIBILE FUTURE MODIFICATION
// Parse thePattern, and if this succeeds, set pattern to thePattern.
// If it fails, call applyPattern(pattern) to restore the original
// conditions.
pattern = thePattern;
prefix.truncate(0);
suffix.truncate(0);
minDigits = 0;
int32_t maxDigits = 0;
/* The mode specifies where we are in each spec.
* mode 0 = in prefix
* mode 1 = in optional digits (#)
* mode 2 = in required digits (0)
* mode 3 = in suffix
*/
int32_t mode = 0;
for (int32_t i=0; i<pattern.length(); ++i) {
UChar c = pattern.charAt(i);
UBool isLiteral = FALSE;
if (c == BACKSLASH) {
if ((i+1)<pattern.length()) {
isLiteral = TRUE;
c = pattern.charAt(++i);
} else {
// Trailing '\\'
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
}
if (!isLiteral) {
switch (c) {
case POUND:
// Seeing a '#' moves us from mode 0 (prefix) to mode 1
// (optional digits).
if (mode == 0) {
++mode;
} else if (mode != 1) {
// Unquoted '#'
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
++maxDigits;
break;
case ZERO:
// Seeing a '0' moves us to mode 2 (required digits)
if (mode < 2) {
mode = 2;
} else if (mode != 2) {
// Unquoted '0'
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
++minDigits;
++maxDigits;
break;
default:
isLiteral = TRUE;
break;
}
}
if (isLiteral) {
if (mode == 0) {
prefix.append(c);
} else {
// Any literal outside the prefix moves us into mode 3
// (suffix)
mode = 3;
suffix.append(c);
}
}
}
if (minDigits < 1 || maxDigits > 4) {
// Invalid min/max digit count
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
}
const UnicodeString& UnicodeToHexTransliterator::toPattern(void) const {
return pattern;
}
/**
* Returns true if this transliterator outputs uppercase hex digits.
*/
UBool UnicodeToHexTransliterator::isUppercase(void) const {
return uppercase;
}
/**
* Sets if this transliterator outputs uppercase hex digits.
*
* <p>Callers must take care if a transliterator is in use by
* multiple threads. The uppercase mode should not be changed by
* one thread while another thread may be transliterating.
* @param outputUppercase if true, then this transliterator
* outputs uppercase hex digits.
*/
void UnicodeToHexTransliterator::setUppercase(UBool outputUppercase) {
uppercase = outputUppercase;
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void UnicodeToHexTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
UBool /*isIncremental*/) const {
/**
* Performs transliteration changing all characters to
* Unicode hexadecimal escapes. For example, '@' -> "U+0040",
* assuming the prefix is "U+".
*/
int32_t cursor = offsets.start;
int32_t limit = offsets.limit;
const UnicodeFilter* localFilter = getFilter();
UnicodeString hex;
while (cursor < limit) {
UChar c = text.charAt(cursor);
if (localFilter != 0 && !localFilter->contains(c)) {
++cursor;
continue;
}
hex = prefix;
UBool showRest = FALSE;
for (int32_t i=3; i>=0; --i) {
int32_t d = (c >> (i*4)) & 0xF;
if (showRest || (d != 0) || minDigits > i) {
hex.append(HEX_DIGITS[uppercase ? (d|16) : d]);
showRest = TRUE;
}
}
hex.append(suffix);
text.handleReplaceBetween(cursor, cursor+1, hex);
int32_t len = hex.length();
cursor += len; // Advance cursor by 1 and adjust for new text
--len;
limit += len;
}
offsets.contextLimit += limit - offsets.limit;
offsets.limit = limit;
offsets.start = cursor;
}