main/classes/translit/src/com/ibm/icu/text/NameUnicodeTransliterator.java - external/github.com/unicode-org/icu - Git at Google

 /*
  * Copyright (C) 1996-2011, International Business Machines Corporation and
  * others. All Rights Reserved.
  */
 package com.ibm.icu.text;
 import com.ibm.icu.impl.PatternProps;
 import com.ibm.icu.impl.UCharacterName;
 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.lang.UCharacter;

 /**
  * A transliterator that performs name to character mapping.
  * @author Alan Liu
  */
 class NameUnicodeTransliterator extends Transliterator {

     static final String _ID = "Name-Any";

     static final String OPEN_PAT    = "\\N~{~";
     static final char   OPEN_DELIM  = '\\'; // first char of OPEN_PAT
     static final char   CLOSE_DELIM = '}';
     static final char   SPACE       = ' ';


     /**
      * System registration hook.
      */
     static void register() {
         Transliterator.registerFactory(_ID, new Transliterator.Factory() {
             public Transliterator getInstance(String ID) {
                 return new NameUnicodeTransliterator(null);
             }
         });
     }

     /**
      * Constructs a transliterator.
      */
     public NameUnicodeTransliterator(UnicodeFilter filter) {
         super(_ID, filter);
     }

     /**
      * Implements {@link Transliterator#handleTransliterate}.
      */
     protected void handleTransliterate(Replaceable text,
                                        Position offsets, boolean isIncremental) {

         int maxLen = UCharacterName.INSTANCE.getMaxCharNameLength() + 1; // allow for temporary trailing space

         StringBuffer name = new StringBuffer(maxLen);

         // Get the legal character set
         UnicodeSet legal = new UnicodeSet();
         UCharacterName.INSTANCE.getCharNameCharacters(legal);

         int cursor = offsets.start;
         int limit = offsets.limit;

         // Modes:
         // 0 - looking for open delimiter
         // 1 - after open delimiter
         int mode = 0;
         int openPos = -1; // open delim candidate pos

         int c;
         while (cursor < limit) {
             c = text.char32At(cursor);

             switch (mode) {
             case 0: // looking for open delimiter
                 if (c == OPEN_DELIM) { // quick check first
                     openPos = cursor;
                     int i = Utility.parsePattern(OPEN_PAT, text, cursor, limit);
                     if (i >= 0 && i < limit) {
                         mode = 1;
                         name.setLength(0);
                         cursor = i;
                         continue; // *** reprocess char32At(cursor)
                     }
                 }
                 break;

             case 1: // after open delimiter
                 // Look for legal chars.  If \s+ is found, convert it
                 // to a single space.  If closeDelimiter is found, exit
                 // the loop.  If any other character is found, exit the
                 // loop.  If the limit is reached, exit the loop.

                 // Convert \s+ => SPACE.  This assumes there are no
                 // runs of >1 space characters in names.
                 if (PatternProps.isWhiteSpace(c)) {
                     // Ignore leading whitespace
                     if (name.length() > 0 &&
                         name.charAt(name.length()-1) != SPACE) {
                         name.append(SPACE);
                         // If we are too long then abort.  maxLen includes
                         // temporary trailing space, so use '>'.
                         if (name.length() > maxLen) {
                             mode = 0;
                         }
                     }
                     break;
                 }

                 if (c == CLOSE_DELIM) {

                     int len = name.length();

                     // Delete trailing space, if any
                     if (len > 0 &&
                         name.charAt(len-1) == SPACE) {
                         name.setLength(--len);
                     }

                     c = UCharacter.getCharFromExtendedName(name.toString());
                     if (c != -1) {
                         // Lookup succeeded

                         // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
                         cursor++; // advance over CLOSE_DELIM

                         String str = UTF16.valueOf(c);
                         text.replace(openPos, cursor, str);

                         // Adjust indices for the change in the length of
                         // the string.  Do not assume that str.length() ==
                         // 1, in case of surrogates.
                         int delta = cursor - openPos - str.length();
                         cursor -= delta;
                         limit -= delta;
                         // assert(cursor == openPos + str.length());
                     }
                     // If the lookup failed, we leave things as-is and
                     // still switch to mode 0 and continue.
                     mode = 0;
                     openPos = -1; // close off candidate
                     continue; // *** reprocess char32At(cursor)
                 }

                 if (legal.contains(c)) {
                     UTF16.append(name, c);
                     // If we go past the longest possible name then abort.
                     // maxLen includes temporary trailing space, so use '>='.
                     if (name.length() >= maxLen) {
                         mode = 0;
                     }
                 }

                 // Invalid character
                 else {
                     --cursor; // Backup and reprocess this character
                     mode = 0;
                 }

                 break;
             }

             cursor += UTF16.getCharCount(c);
         }

         offsets.contextLimit += limit - offsets.limit;
         offsets.limit = limit;
         // In incremental mode, only advance the cursor up to the last
         // open delimiter candidate.
         offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor;
     }

     /* (non-Javadoc)
      * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
      */
     @Override
     public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
         UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
         if (!myFilter.containsAll(UnicodeNameTransliterator.OPEN_DELIM) || !myFilter.contains(CLOSE_DELIM)) {
             return; // we have to contain both prefix and suffix
         }
         UnicodeSet items = new UnicodeSet()
         .addAll('0', '9')
         .addAll('A', 'F')
         .addAll('a', 'z') // for controls
         .add('<').add('>') // for controls
         .add('(').add(')') // for controls
         .add('-')
         .add(' ')
         .addAll(UnicodeNameTransliterator.OPEN_DELIM)
         .add(CLOSE_DELIM);
         items.retainAll(myFilter);
         if (items.size() > 0) {
             sourceSet.addAll(items);
             // could produce any character
             targetSet.addAll(0, 0x10FFFF);
         }
     }
 }
	/*
	* Copyright (C) 1996-2011, International Business Machines Corporation and
	* others. All Rights Reserved.
	*/
	package com.ibm.icu.text;
	import com.ibm.icu.impl.PatternProps;
	import com.ibm.icu.impl.UCharacterName;
	import com.ibm.icu.impl.Utility;
	import com.ibm.icu.lang.UCharacter;

	/**
	* A transliterator that performs name to character mapping.
	* @author Alan Liu
	*/
	class NameUnicodeTransliterator extends Transliterator {

	static final String _ID = "Name-Any";

	static final String OPEN_PAT = "\\N~{~";
	static final char OPEN_DELIM = '\\'; // first char of OPEN_PAT
	static final char CLOSE_DELIM = '}';
	static final char SPACE = ' ';


	/**
	* System registration hook.
	*/
	static void register() {
	Transliterator.registerFactory(_ID, new Transliterator.Factory() {
	public Transliterator getInstance(String ID) {
	return new NameUnicodeTransliterator(null);
	}
	});
	}

	/**
	* Constructs a transliterator.
	*/
	public NameUnicodeTransliterator(UnicodeFilter filter) {
	super(_ID, filter);
	}

	/**
	* Implements {@link Transliterator#handleTransliterate}.
	*/
	protected void handleTransliterate(Replaceable text,
	Position offsets, boolean isIncremental) {

	int maxLen = UCharacterName.INSTANCE.getMaxCharNameLength() + 1; // allow for temporary trailing space

	StringBuffer name = new StringBuffer(maxLen);

	// Get the legal character set
	UnicodeSet legal = new UnicodeSet();
	UCharacterName.INSTANCE.getCharNameCharacters(legal);

	int cursor = offsets.start;
	int limit = offsets.limit;

	// Modes:
	// 0 - looking for open delimiter
	// 1 - after open delimiter
	int mode = 0;
	int openPos = -1; // open delim candidate pos

	int c;
	while (cursor < limit) {
	c = text.char32At(cursor);

	switch (mode) {
	case 0: // looking for open delimiter
	if (c == OPEN_DELIM) { // quick check first
	openPos = cursor;
	int i = Utility.parsePattern(OPEN_PAT, text, cursor, limit);
	if (i >= 0 && i < limit) {
	mode = 1;
	name.setLength(0);
	cursor = i;
	continue; // *** reprocess char32At(cursor)
	}
	}
	break;

	case 1: // after open delimiter
	// Look for legal chars. If \s+ is found, convert it
	// to a single space. If closeDelimiter is found, exit
	// the loop. If any other character is found, exit the
	// loop. If the limit is reached, exit the loop.

	// Convert \s+ => SPACE. This assumes there are no
	// runs of >1 space characters in names.
	if (PatternProps.isWhiteSpace(c)) {
	// Ignore leading whitespace
	if (name.length() > 0 &&
	name.charAt(name.length()-1) != SPACE) {
	name.append(SPACE);
	// If we are too long then abort. maxLen includes
	// temporary trailing space, so use '>'.
	if (name.length() > maxLen) {
	mode = 0;
	}
	}
	break;
	}

	if (c == CLOSE_DELIM) {

	int len = name.length();

	// Delete trailing space, if any
	if (len > 0 &&
	name.charAt(len-1) == SPACE) {
	name.setLength(--len);
	}

	c = UCharacter.getCharFromExtendedName(name.toString());
	if (c != -1) {
	// Lookup succeeded

	// assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
	cursor++; // advance over CLOSE_DELIM

	String str = UTF16.valueOf(c);
	text.replace(openPos, cursor, str);

	// Adjust indices for the change in the length of
	// the string. Do not assume that str.length() ==
	// 1, in case of surrogates.
	int delta = cursor - openPos - str.length();
	cursor -= delta;
	limit -= delta;
	// assert(cursor == openPos + str.length());
	}
	// If the lookup failed, we leave things as-is and
	// still switch to mode 0 and continue.
	mode = 0;
	openPos = -1; // close off candidate
	continue; // *** reprocess char32At(cursor)
	}

	if (legal.contains(c)) {
	UTF16.append(name, c);
	// If we go past the longest possible name then abort.
	// maxLen includes temporary trailing space, so use '>='.
	if (name.length() >= maxLen) {
	mode = 0;
	}
	}

	// Invalid character
	else {
	--cursor; // Backup and reprocess this character
	mode = 0;
	}

	break;
	}

	cursor += UTF16.getCharCount(c);
	}

	offsets.contextLimit += limit - offsets.limit;
	offsets.limit = limit;
	// In incremental mode, only advance the cursor up to the last
	// open delimiter candidate.
	offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor;
	}

	/* (non-Javadoc)
	* @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
	*/
	@Override
	public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
	UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
	if (!myFilter.containsAll(UnicodeNameTransliterator.OPEN_DELIM) \|\| !myFilter.contains(CLOSE_DELIM)) {
	return; // we have to contain both prefix and suffix
	}
	UnicodeSet items = new UnicodeSet()
	.addAll('0', '9')
	.addAll('A', 'F')
	.addAll('a', 'z') // for controls
	.add('<').add('>') // for controls
	.add('(').add(')') // for controls
	.add('-')
	.add(' ')
	.addAll(UnicodeNameTransliterator.OPEN_DELIM)
	.add(CLOSE_DELIM);
	items.retainAll(myFilter);
	if (items.size() > 0) {
	sourceSet.addAll(items);
	// could produce any character
	targetSet.addAll(0, 0x10FFFF);
	}
	}
	}