blob: dbc3f5c5f2c58f1ae164589e51926ffa15500ff4 [file] [log] [blame]
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/17/99 aliu Creation.
**********************************************************************
*/
#include "unicode/unitohex.h"
#include "unicode/rep.h"
#include "unicode/unifilt.h"
/**
* ID for this transliterator.
*/
const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex";
const char* UnicodeToHexTransliterator::DEFAULT_PREFIX = "\\u";
/**
* Constructs a transliterator.
* @param prefix the string that will precede the four hex
* digits for UNICODE_HEX transliterators. Ignored
* if direction is HEX_UNICODE.
* @param uppercase if true, the four hex digits will be
* converted to uppercase; otherwise they will be lowercase.
* Ignored if direction is HEX_UNICODE.
*/
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
const UnicodeString& hexPrefix,
bool_t isUppercase,
UnicodeFilter* adoptedFilter) :
Transliterator(_ID, adoptedFilter),
prefix(hexPrefix),
uppercase(isUppercase) {
}
/**
* Constructs a transliterator with the default prefix "\u"
* that outputs uppercase hex digits.
*/
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
UnicodeFilter* adoptedFilter) :
Transliterator(_ID, adoptedFilter),
prefix(DEFAULT_PREFIX),
uppercase(TRUE) {
}
/**
* Copy constructor.
*/
UnicodeToHexTransliterator::UnicodeToHexTransliterator(
const UnicodeToHexTransliterator& other) :
Transliterator(other), prefix(other.prefix),
uppercase(other.uppercase) {
}
/**
* Assignment operator.
*/
UnicodeToHexTransliterator&
UnicodeToHexTransliterator::operator=(const UnicodeToHexTransliterator& other) {
Transliterator::operator=(other);
prefix = other.prefix;
uppercase = other.uppercase;
return *this;
}
Transliterator*
UnicodeToHexTransliterator::clone(void) const {
return new UnicodeToHexTransliterator(*this);
}
/**
* Returns the string that precedes the four hex digits.
* @return prefix string
*/
const UnicodeString& UnicodeToHexTransliterator::getPrefix(void) const {
return prefix;
}
/**
* Sets the string that precedes the four hex digits.
*
* <p>Callers must take care if a transliterator is in use by
* multiple threads. The prefix should not be changed by one
* thread while another thread may be transliterating.
* @param prefix prefix string
*/
void UnicodeToHexTransliterator::setPrefix(const UnicodeString& hexPrefix) {
prefix = hexPrefix;
}
/**
* Returns true if this transliterator outputs uppercase hex digits.
*/
bool_t UnicodeToHexTransliterator::isUppercase(void) const {
return uppercase;
}
/**
* Sets if this transliterator outputs uppercase hex digits.
*
* <p>Callers must take care if a transliterator is in use by
* multiple threads. The uppercase mode should not be changed by
* one thread while another thread may be transliterating.
* @param outputUppercase if true, then this transliterator
* outputs uppercase hex digits.
*/
void UnicodeToHexTransliterator::setUppercase(bool_t outputUppercase) {
uppercase = outputUppercase;
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void UnicodeToHexTransliterator::handleTransliterate(Replaceable& text, Position& offsets,
bool_t isIncremental) const {
/**
* Performs transliteration changing all characters to
* Unicode hexadecimal escapes. For example, '@' -> "U+0040",
* assuming the prefix is "U+".
*/
int32_t cursor = offsets.cursor;
int32_t limit = offsets.limit;
const UnicodeFilter* filter = getFilter();
UnicodeString hex;
while (cursor < limit) {
UChar c = text.charAt(cursor);
if (filter != 0 && !filter->contains(c)) {
++cursor;
continue;
}
toHex(hex, c);
text.handleReplaceBetween(cursor, cursor+1, hex);
int32_t len = hex.length();
cursor += len; // Advance cursor by 1 and adjust for new text
--len;
limit += len;
}
offsets.limit = limit;
offsets.cursor = cursor;
}
UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {
// If necessary, replace these character constants with their hex values
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};
/**
* Given an integer, return its least significant hex digit.
*/
UChar UnicodeToHexTransliterator::itoh(int32_t i) const {
i &= 0xF;
return HEX_DIGITS[uppercase ? (i|16) : i];
}
/**
* Form escape sequence.
*/
UnicodeString& UnicodeToHexTransliterator::toHex(UnicodeString& result,
UChar c) const {
result = prefix;
result.append(itoh(c >> 12));
result.append(itoh(c >> 8));
result.append(itoh(c >> 4));
result.append(itoh(c));
return result;
}