source/i18n/unitohex.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 **********************************************************************
 *   Copyright (C) 1999, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   11/17/99    aliu        Creation.
 **********************************************************************
 */
 #include "unicode/unitohex.h"
 #include "unicode/rep.h"
 #include "unicode/unifilt.h"

 /**
  * ID for this transliterator.
  */
 const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex";

 const UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {
     // Use Unicode hex values for EBCDIC compatibility
     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567
     0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, // 89abcdef
     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567
     0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, // 89ABCDEF
 };

 /**
  * Constructs a transliterator.
  */
 UnicodeToHexTransliterator::UnicodeToHexTransliterator(
                                 const UnicodeString& thePattern,
                                 UBool isUppercase,
                                 UnicodeFilter* adoptedFilter,
                                 UErrorCode& status) :
     Transliterator(_ID, adoptedFilter),
     uppercase(isUppercase) {

     if (U_FAILURE(status)) {
         return;
     }
     applyPattern(thePattern, status);
 }

 /**
  * Constructs a transliterator.
  */
 UnicodeToHexTransliterator::UnicodeToHexTransliterator(
                                 const UnicodeString& thePattern,
                                 UErrorCode& status) :
     Transliterator(_ID, 0),
     uppercase(TRUE) {

     if (U_FAILURE(status)) {
         return;
     }
     applyPattern(thePattern, status);
 }

 /**
  * Constructs a transliterator with the default prefix "&#092;u"
  * that outputs four uppercase hex digits.
  */
 UnicodeToHexTransliterator::UnicodeToHexTransliterator(
                                 UnicodeFilter* adoptedFilter) :
     Transliterator(_ID, adoptedFilter),
     pattern("\\\\u0000", ""),
     prefix("\\u", 2, ""),
     suffix(),
     minDigits(4),
     uppercase(TRUE) {
 }

 /**
  * Copy constructor.
  */
 UnicodeToHexTransliterator::UnicodeToHexTransliterator(
                                 const UnicodeToHexTransliterator& other) :
     Transliterator(other),
     pattern(other.pattern),
     prefix(other.prefix),
     suffix(other.suffix),
     minDigits(other.minDigits),
     uppercase(other.uppercase) {
 }

 /**
  * Assignment operator.
  */
 UnicodeToHexTransliterator&
 UnicodeToHexTransliterator::operator=(const UnicodeToHexTransliterator& other) {
     Transliterator::operator=(other);
     pattern = other.pattern;
     prefix = other.prefix;
     suffix = other.suffix;
     minDigits = other.minDigits;
     uppercase = other.uppercase;
     return *this;
 }

 Transliterator*
 UnicodeToHexTransliterator::clone(void) const {
     return new UnicodeToHexTransliterator(*this);
 }

 void UnicodeToHexTransliterator::applyPattern(const UnicodeString& thePattern,
                                               UErrorCode& status) {
     if (U_FAILURE(status)) {
         return;
     }

     // POSSIBILE FUTURE MODIFICATION
     // Parse thePattern, and if this succeeds, set pattern to thePattern.
     // If it fails, call applyPattern(pattern) to restore the original
     // conditions.

     pattern = thePattern;
     prefix.truncate(0);
     suffix.truncate(0);
     minDigits = 0;
     int32_t maxDigits = 0;

     /* The mode specifies where we are in each spec.
      * mode 0 = in prefix
      * mode 1 = in optional digits (#)
      * mode 2 = in required digits (0)
      * mode 3 = in suffix
      */
     int32_t mode = 0;

     for (int32_t i=0; i<pattern.length(); ++i) {
         UChar c = pattern.charAt(i);
         UBool isLiteral = FALSE;
         if (c == BACKSLASH) {
             if ((i+1)<pattern.length()) {
                 isLiteral = TRUE;
                 c = pattern.charAt(++i);
             } else {
                 // Trailing '\\'
                 status = U_ILLEGAL_ARGUMENT_ERROR;
                 return;
             }
         }

         if (!isLiteral) {
             switch (c) {
             case POUND:
                 // Seeing a '#' moves us from mode 0 (prefix) to mode 1
                 // (optional digits).
                 if (mode == 0) {
                     ++mode;
                 } else if (mode != 1) {
                     // Unquoted '#'
                     status = U_ILLEGAL_ARGUMENT_ERROR;
                     return;
                 }
                 ++maxDigits;
                 break;
             case ZERO:
                 // Seeing a '0' moves us to mode 2 (required digits)
                 if (mode < 2) {
                     mode = 2;
                 } else if (mode != 2) {
                     // Unquoted '0'
                     status = U_ILLEGAL_ARGUMENT_ERROR;
                     return;
                 }
                 ++minDigits;
                 ++maxDigits;
                 break;
             default:
                 isLiteral = TRUE;
                 break;
             }
         }

         if (isLiteral) {
             if (mode == 0) {
                 prefix.append(c);
             } else {
                 // Any literal outside the prefix moves us into mode 3
                 // (suffix)
                 mode = 3;
                 suffix.append(c);
             }
         }
     }

     if (minDigits < 1 || maxDigits > 4) {
         // Invalid min/max digit count
         status = U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
 }

 const UnicodeString& UnicodeToHexTransliterator::toPattern(void) const {
     return pattern;
 }

 /**
  * Returns true if this transliterator outputs uppercase hex digits.
  */
 UBool UnicodeToHexTransliterator::isUppercase(void) const {
     return uppercase;
 }

 /**
  * Sets if this transliterator outputs uppercase hex digits.
  *
  * <p>Callers must take care if a transliterator is in use by
  * multiple threads.  The uppercase mode should not be changed by
  * one thread while another thread may be transliterating.
  * @param outputUppercase if true, then this transliterator
  * outputs uppercase hex digits.
  */
 void UnicodeToHexTransliterator::setUppercase(UBool outputUppercase) {
     uppercase = outputUppercase;
 }

 /**
  * Implements {@link Transliterator#handleTransliterate}.
  */
 void UnicodeToHexTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
                                                      UBool /*isIncremental*/) const {
     /**
      * Performs transliteration changing all characters to
      * Unicode hexadecimal escapes.  For example, '@' -> "U+0040",
      * assuming the prefix is "U+".
      */
     int32_t cursor = offsets.start;
     int32_t limit = offsets.limit;

     const UnicodeFilter* localFilter = getFilter();
     UnicodeString hex;

     while (cursor < limit) {
         UChar c = text.charAt(cursor);
         if (localFilter != 0 && !localFilter->contains(c)) {
             ++cursor;
             continue;
         }

         hex = prefix;
         UBool showRest = FALSE;
         for (int32_t i=3; i>=0; --i) {
             int32_t d = (c >> (i*4)) & 0xF;
             if (showRest || (d != 0) || minDigits > i) {
                 hex.append(HEX_DIGITS[uppercase ? (d|16) : d]);
                 showRest = TRUE;
             }
         }
         hex.append(suffix);

         text.handleReplaceBetween(cursor, cursor+1, hex);
         int32_t len = hex.length();
         cursor += len; // Advance cursor by 1 and adjust for new text
         --len;
         limit += len;
     }

     offsets.contextLimit += limit - offsets.limit;
     offsets.limit = limit;
     offsets.start = cursor;
 }
	/*
	**********************************************************************
	* Copyright (C) 1999, International Business Machines
	* Corporation and others. All Rights Reserved.
	**********************************************************************
	* Date Name Description
	* 11/17/99 aliu Creation.
	**********************************************************************
	*/
	#include "unicode/unitohex.h"
	#include "unicode/rep.h"
	#include "unicode/unifilt.h"

	/**
	* ID for this transliterator.
	*/
	const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex";

	const UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {
	// Use Unicode hex values for EBCDIC compatibility
	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567
	0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, // 89abcdef
	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 01234567
	0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, // 89ABCDEF
	};

	/**
	* Constructs a transliterator.
	*/
	UnicodeToHexTransliterator::UnicodeToHexTransliterator(
	const UnicodeString& thePattern,
	UBool isUppercase,
	UnicodeFilter* adoptedFilter,
	UErrorCode& status) :
	Transliterator(_ID, adoptedFilter),
	uppercase(isUppercase) {

	if (U_FAILURE(status)) {
	return;
	}
	applyPattern(thePattern, status);
	}

	/**
	* Constructs a transliterator.
	*/
	UnicodeToHexTransliterator::UnicodeToHexTransliterator(
	const UnicodeString& thePattern,
	UErrorCode& status) :
	Transliterator(_ID, 0),
	uppercase(TRUE) {

	if (U_FAILURE(status)) {
	return;
	}
	applyPattern(thePattern, status);
	}

	/**
	* Constructs a transliterator with the default prefix "\u"
	* that outputs four uppercase hex digits.
	*/
	UnicodeToHexTransliterator::UnicodeToHexTransliterator(
	UnicodeFilter* adoptedFilter) :
	Transliterator(_ID, adoptedFilter),
	pattern("\\\\u0000", ""),
	prefix("\\u", 2, ""),
	suffix(),
	minDigits(4),
	uppercase(TRUE) {
	}

	/**
	* Copy constructor.
	*/
	UnicodeToHexTransliterator::UnicodeToHexTransliterator(
	const UnicodeToHexTransliterator& other) :
	Transliterator(other),
	pattern(other.pattern),
	prefix(other.prefix),
	suffix(other.suffix),
	minDigits(other.minDigits),
	uppercase(other.uppercase) {
	}

	/**
	* Assignment operator.
	*/
	UnicodeToHexTransliterator&
	UnicodeToHexTransliterator::operator=(const UnicodeToHexTransliterator& other) {
	Transliterator::operator=(other);
	pattern = other.pattern;
	prefix = other.prefix;
	suffix = other.suffix;
	minDigits = other.minDigits;
	uppercase = other.uppercase;
	return *this;
	}

	Transliterator*
	UnicodeToHexTransliterator::clone(void) const {
	return new UnicodeToHexTransliterator(*this);
	}

	void UnicodeToHexTransliterator::applyPattern(const UnicodeString& thePattern,
	UErrorCode& status) {
	if (U_FAILURE(status)) {
	return;
	}

	// POSSIBILE FUTURE MODIFICATION
	// Parse thePattern, and if this succeeds, set pattern to thePattern.
	// If it fails, call applyPattern(pattern) to restore the original
	// conditions.

	pattern = thePattern;
	prefix.truncate(0);
	suffix.truncate(0);
	minDigits = 0;
	int32_t maxDigits = 0;

	/* The mode specifies where we are in each spec.
	* mode 0 = in prefix
	* mode 1 = in optional digits (#)
	* mode 2 = in required digits (0)
	* mode 3 = in suffix
	*/
	int32_t mode = 0;

	for (int32_t i=0; i<pattern.length(); ++i) {
	UChar c = pattern.charAt(i);
	UBool isLiteral = FALSE;
	if (c == BACKSLASH) {
	if ((i+1)<pattern.length()) {
	isLiteral = TRUE;
	c = pattern.charAt(++i);
	} else {
	// Trailing '\\'
	status = U_ILLEGAL_ARGUMENT_ERROR;
	return;
	}
	}

	if (!isLiteral) {
	switch (c) {
	case POUND:
	// Seeing a '#' moves us from mode 0 (prefix) to mode 1
	// (optional digits).
	if (mode == 0) {
	++mode;
	} else if (mode != 1) {
	// Unquoted '#'
	status = U_ILLEGAL_ARGUMENT_ERROR;
	return;
	}
	++maxDigits;
	break;
	case ZERO:
	// Seeing a '0' moves us to mode 2 (required digits)
	if (mode < 2) {
	mode = 2;
	} else if (mode != 2) {
	// Unquoted '0'
	status = U_ILLEGAL_ARGUMENT_ERROR;
	return;
	}
	++minDigits;
	++maxDigits;
	break;
	default:
	isLiteral = TRUE;
	break;
	}
	}

	if (isLiteral) {
	if (mode == 0) {
	prefix.append(c);
	} else {
	// Any literal outside the prefix moves us into mode 3
	// (suffix)
	mode = 3;
	suffix.append(c);
	}
	}
	}

	if (minDigits < 1 \|\| maxDigits > 4) {
	// Invalid min/max digit count
	status = U_ILLEGAL_ARGUMENT_ERROR;
	return;
	}
	}

	const UnicodeString& UnicodeToHexTransliterator::toPattern(void) const {
	return pattern;
	}

	/**
	* Returns true if this transliterator outputs uppercase hex digits.
	*/
	UBool UnicodeToHexTransliterator::isUppercase(void) const {
	return uppercase;
	}

	/**
	* Sets if this transliterator outputs uppercase hex digits.
	*
	* <p>Callers must take care if a transliterator is in use by
	* multiple threads. The uppercase mode should not be changed by
	* one thread while another thread may be transliterating.
	* @param outputUppercase if true, then this transliterator
	* outputs uppercase hex digits.
	*/
	void UnicodeToHexTransliterator::setUppercase(UBool outputUppercase) {
	uppercase = outputUppercase;
	}

	/**
	* Implements {@link Transliterator#handleTransliterate}.
	*/
	void UnicodeToHexTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
	UBool /isIncremental/) const {
	/**
	* Performs transliteration changing all characters to
	* Unicode hexadecimal escapes. For example, '@' -> "U+0040",
	* assuming the prefix is "U+".
	*/
	int32_t cursor = offsets.start;
	int32_t limit = offsets.limit;

	const UnicodeFilter* localFilter = getFilter();
	UnicodeString hex;

	while (cursor < limit) {
	UChar c = text.charAt(cursor);
	if (localFilter != 0 && !localFilter->contains(c)) {
	++cursor;
	continue;
	}

	hex = prefix;
	UBool showRest = FALSE;
	for (int32_t i=3; i>=0; --i) {
	int32_t d = (c >> (i*4)) & 0xF;
	if (showRest \|\| (d != 0) \|\| minDigits > i) {
	hex.append(HEX_DIGITS[uppercase ? (d\|16) : d]);
	showRest = TRUE;
	}
	}
	hex.append(suffix);

	text.handleReplaceBetween(cursor, cursor+1, hex);
	int32_t len = hex.length();
	cursor += len; // Advance cursor by 1 and adjust for new text
	--len;
	limit += len;
	}

	offsets.contextLimit += limit - offsets.limit;
	offsets.limit = limit;
	offsets.start = cursor;
	}