main/classes/translit/src/com/ibm/icu/text/StringReplacer.java - external/github.com/unicode-org/icu - Git at Google

 /*
 **********************************************************************
 *   Copyright (c) 2002-2007, International Business Machines Corporation
 *   and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   01/14/2002  aliu        Creation.
 **********************************************************************
 */

 package com.ibm.icu.text;
 import com.ibm.icu.impl.Utility;

 /**
  * A replacer that produces static text as its output.  The text may
  * contain transliterator stand-in characters that represent nested
  * UnicodeReplacer objects, making it possible to encode a tree of
  * replacers in a StringReplacer.  A StringReplacer that contains such
  * stand-ins is called a <em>complex</em> StringReplacer.  A complex
  * StringReplacer has a slower processing loop than a non-complex one.
  * @author Alan Liu
  */
 class StringReplacer implements UnicodeReplacer {

     /**
      * Output text, possibly containing stand-in characters that
      * represent nested UnicodeReplacers.
      */
     private String output;

     /**
      * Cursor position.  Value is ignored if hasCursor is false.
      */
     private int cursorPos;

     /**
      * True if this object outputs a cursor position.
      */
     private boolean hasCursor;

     /**
      * A complex object contains nested replacers and requires more
      * complex processing.  StringReplacers are initially assumed to
      * be complex.  If no nested replacers are seen during processing,
      * then isComplex is set to false, and future replacements are
      * short circuited for better performance.
      */
     private boolean isComplex;

     /**
      * Object that translates stand-in characters in 'output' to
      * UnicodeReplacer objects.
      */
     private final RuleBasedTransliterator.Data data;

     /**
      * Construct a StringReplacer that sets the emits the given output
      * text and sets the cursor to the given position.
      * @param theOutput text that will replace input text when the
      * replace() method is called.  May contain stand-in characters
      * that represent nested replacers.
      * @param theCursorPos cursor position that will be returned by
      * the replace() method
      * @param theData transliterator context object that translates
      * stand-in characters to UnicodeReplacer objects
      */
     public StringReplacer(String theOutput,
                           int theCursorPos,
                           RuleBasedTransliterator.Data theData) {
         output = theOutput;
         cursorPos = theCursorPos;
         hasCursor = true;
         data = theData;
         isComplex = true;
     }

     /**
      * Construct a StringReplacer that sets the emits the given output
      * text and does not modify the cursor.
      * @param theOutput text that will replace input text when the
      * replace() method is called.  May contain stand-in characters
      * that represent nested replacers.
      * @param theData transliterator context object that translates
      * stand-in characters to UnicodeReplacer objects
      */
     public StringReplacer(String theOutput,
                           RuleBasedTransliterator.Data theData) {
         output = theOutput;
         cursorPos = 0;
         hasCursor = false;
         data = theData;
         isComplex = true;
     }

 //=    public static UnicodeReplacer valueOf(String output,
 //=                                          int cursorPos,
 //=                                          RuleBasedTransliterator.Data data) {
 //=        if (output.length() == 1) {
 //=            char c = output.charAt(0);
 //=            UnicodeReplacer r = data.lookupReplacer(c);
 //=            if (r != null) {
 //=                return r;
 //=            }
 //=        }
 //=        return new StringReplacer(output, cursorPos, data);
 //=    }

     /**
      * UnicodeReplacer API
      */
     public int replace(Replaceable text,
                        int start,
                        int limit,
                        int[] cursor) {
         int outLen;
         int newStart = 0;

         // NOTE: It should be possible to _always_ run the complex
         // processing code; just slower.  If not, then there is a bug
         // in the complex processing code.

         // Simple (no nested replacers) Processing Code :
         if (!isComplex) {
             text.replace(start, limit, output);
             outLen = output.length();

             // Setup default cursor position (for cursorPos within output)
             newStart = cursorPos;
         }

         // Complex (nested replacers) Processing Code :
         else {
             /* When there are segments to be copied, use the Replaceable.copy()
              * API in order to retain out-of-band data.  Copy everything to the
              * end of the string, then copy them back over the key.  This preserves
              * the integrity of indices into the key and surrounding context while
              * generating the output text.
              */
             StringBuffer buf = new StringBuffer();
             int oOutput; // offset into 'output'
             isComplex = false;

             // The temporary buffer starts at tempStart, and extends
             // to destLimit + tempExtra.  The start of the buffer has a single
             // character from before the key.  This provides style
             // data when addition characters are filled into the
             // temporary buffer.  If there is nothing to the left, use
             // the non-character U+FFFF, which Replaceable subclasses
             // should treat specially as a "no-style character."
             // destStart points to the point after the style context
             // character, so it is tempStart+1 or tempStart+2.
             int tempStart = text.length(); // start of temp buffer
             int destStart = tempStart; // copy new text to here
             if (start > 0) {
                 int len = UTF16.getCharCount(text.char32At(start-1));
                 text.copy(start-len, start, tempStart);
                 destStart += len;
             } else {
                 text.replace(tempStart, tempStart, "\uFFFF");
                 destStart++;
             }
             int destLimit = destStart;
             int tempExtra = 0; // temp chars after destLimit

             for (oOutput=0; oOutput<output.length(); ) {
                 if (oOutput == cursorPos) {
                     // Record the position of the cursor
                     newStart = buf.length() + destLimit - destStart; // relative to start
                     // the buf.length() was inserted for bug 5789
                     // the problem is that if we are accumulating into a buffer (when r == null below)
                     // then the actual length of the text at that point needs to add the buf length.
                     // there was an alternative suggested in #5789, but that looks like it won't work
                     // if we have accumulated some stuff in the dest part AND have a non-zero buffer.
                 }
                 int c = UTF16.charAt(output, oOutput);

                 // When we are at the last position copy the right style
                 // context character into the temporary buffer.  We don't
                 // do this before because it will provide an incorrect
                 // right context for previous replace() operations.
                 int nextIndex = oOutput + UTF16.getCharCount(c);
                 if (nextIndex == output.length()) {
                     tempExtra = UTF16.getCharCount(text.char32At(limit));
                     text.copy(limit, limit+tempExtra, destLimit);
                 }

                 UnicodeReplacer r = data.lookupReplacer(c);
                 if (r == null) {
                     // Accumulate straight (non-segment) text.
                     UTF16.append(buf, c);
                 } else {
                     isComplex = true;

                     // Insert any accumulated straight text.
                     if (buf.length() > 0) {
                         text.replace(destLimit, destLimit, buf.toString());
                         destLimit += buf.length();
                         buf.setLength(0);
                     }

                     // Delegate output generation to replacer object
                     int len = r.replace(text, destLimit, destLimit, cursor);
                     destLimit += len;
                 }
                 oOutput = nextIndex;
             }
             // Insert any accumulated straight text.
             if (buf.length() > 0) {
                 text.replace(destLimit, destLimit, buf.toString());
                 destLimit += buf.length();
             }
             if (oOutput == cursorPos) {
                 // Record the position of the cursor
                 newStart = destLimit - destStart; // relative to start
             }

             outLen = destLimit - destStart;

             // Copy new text to start, and delete it
             text.copy(destStart, destLimit, start);
             text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");

             // Delete the old text (the key)
             text.replace(start + outLen, limit + outLen, "");
         }

         if (hasCursor) {
             // Adjust the cursor for positions outside the key.  These
             // refer to code points rather than code units.  If cursorPos
             // is within the output string, then use newStart, which has
             // already been set above.
             if (cursorPos < 0) {
                 newStart = start;
                 int n = cursorPos;
                 // Outside the output string, cursorPos counts code points
                 while (n < 0 && newStart > 0) {
                     newStart -= UTF16.getCharCount(text.char32At(newStart-1));
                     ++n;
                 }
                 newStart += n;
             } else if (cursorPos > output.length()) {
                 newStart = start + outLen;
                 int n = cursorPos - output.length();
                 // Outside the output string, cursorPos counts code points
                 while (n > 0 && newStart < text.length()) {
                     newStart += UTF16.getCharCount(text.char32At(newStart));
                     --n;
                 }
                 newStart += n;
             } else {
                 // Cursor is within output string.  It has been set up above
                 // to be relative to start.
                 newStart += start;
             }

             cursor[0] = newStart;
         }

         return outLen;
     }

     /**
      * UnicodeReplacer API
      */
     public String toReplacerPattern(boolean escapeUnprintable) {
         StringBuffer rule = new StringBuffer();
         StringBuffer quoteBuf = new StringBuffer();

         int cursor = cursorPos;

         // Handle a cursor preceding the output
         if (hasCursor && cursor < 0) {
             while (cursor++ < 0) {
                 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
             }
             // Fall through and append '|' below
         }

         for (int i=0; i<output.length(); ++i) {
             if (hasCursor && i == cursor) {
                 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
             }
             char c = output.charAt(i); // Ok to use 16-bits here

             UnicodeReplacer r = data.lookupReplacer(c);
             if (r == null) {
                 Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
             } else {
                 StringBuffer buf = new StringBuffer(" ");
                 buf.append(r.toReplacerPattern(escapeUnprintable));
                 buf.append(' ');
                 Utility.appendToRule(rule, buf.toString(),
                                      true, escapeUnprintable, quoteBuf);
             }
         }

         // Handle a cursor after the output.  Use > rather than >= because
         // if cursor == output.length() it is at the end of the output,
         // which is the default position, so we need not emit it.
         if (hasCursor && cursor > output.length()) {
             cursor -= output.length();
             while (cursor-- > 0) {
                 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
             }
             Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
         }
         // Flush quoteBuf out to result
         Utility.appendToRule(rule, -1,
                              true, escapeUnprintable, quoteBuf);

         return rule.toString();
     }

     /**
      * Union the set of all characters that may output by this object
      * into the given set.
      * @param toUnionTo the set into which to union the output characters
      */
     public void addReplacementSetTo(UnicodeSet toUnionTo) {
         int ch;
         for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
             ch = UTF16.charAt(output, i);
             UnicodeReplacer r = data.lookupReplacer(ch);
             if (r == null) {
                 toUnionTo.add(ch);
             } else {
                 r.addReplacementSetTo(toUnionTo);
             }
         }
     }
 }

 //eof
	/*
	**********************************************************************
	* Copyright (c) 2002-2007, International Business Machines Corporation
	* and others. All Rights Reserved.
	**********************************************************************
	* Date Name Description
	* 01/14/2002 aliu Creation.
	**********************************************************************
	*/

	package com.ibm.icu.text;
	import com.ibm.icu.impl.Utility;

	/**
	* A replacer that produces static text as its output. The text may
	* contain transliterator stand-in characters that represent nested
	* UnicodeReplacer objects, making it possible to encode a tree of
	* replacers in a StringReplacer. A StringReplacer that contains such
	* stand-ins is called a <em>complex</em> StringReplacer. A complex
	* StringReplacer has a slower processing loop than a non-complex one.
	* @author Alan Liu
	*/
	class StringReplacer implements UnicodeReplacer {

	/**
	* Output text, possibly containing stand-in characters that
	* represent nested UnicodeReplacers.
	*/
	private String output;

	/**
	* Cursor position. Value is ignored if hasCursor is false.
	*/
	private int cursorPos;

	/**
	* True if this object outputs a cursor position.
	*/
	private boolean hasCursor;

	/**
	* A complex object contains nested replacers and requires more
	* complex processing. StringReplacers are initially assumed to
	* be complex. If no nested replacers are seen during processing,
	* then isComplex is set to false, and future replacements are
	* short circuited for better performance.
	*/
	private boolean isComplex;

	/**
	* Object that translates stand-in characters in 'output' to
	* UnicodeReplacer objects.
	*/
	private final RuleBasedTransliterator.Data data;

	/**
	* Construct a StringReplacer that sets the emits the given output
	* text and sets the cursor to the given position.
	* @param theOutput text that will replace input text when the
	* replace() method is called. May contain stand-in characters
	* that represent nested replacers.
	* @param theCursorPos cursor position that will be returned by
	* the replace() method
	* @param theData transliterator context object that translates
	* stand-in characters to UnicodeReplacer objects
	*/
	public StringReplacer(String theOutput,
	int theCursorPos,
	RuleBasedTransliterator.Data theData) {
	output = theOutput;
	cursorPos = theCursorPos;
	hasCursor = true;
	data = theData;
	isComplex = true;
	}

	/**
	* Construct a StringReplacer that sets the emits the given output
	* text and does not modify the cursor.
	* @param theOutput text that will replace input text when the
	* replace() method is called. May contain stand-in characters
	* that represent nested replacers.
	* @param theData transliterator context object that translates
	* stand-in characters to UnicodeReplacer objects
	*/
	public StringReplacer(String theOutput,
	RuleBasedTransliterator.Data theData) {
	output = theOutput;
	cursorPos = 0;
	hasCursor = false;
	data = theData;
	isComplex = true;
	}

	//= public static UnicodeReplacer valueOf(String output,
	//= int cursorPos,
	//= RuleBasedTransliterator.Data data) {
	//= if (output.length() == 1) {
	//= char c = output.charAt(0);
	//= UnicodeReplacer r = data.lookupReplacer(c);
	//= if (r != null) {
	//= return r;
	//= }
	//= }
	//= return new StringReplacer(output, cursorPos, data);
	//= }

	/**
	* UnicodeReplacer API
	*/
	public int replace(Replaceable text,
	int start,
	int limit,
	int[] cursor) {
	int outLen;
	int newStart = 0;

	// NOTE: It should be possible to _always_ run the complex
	// processing code; just slower. If not, then there is a bug
	// in the complex processing code.

	// Simple (no nested replacers) Processing Code :
	if (!isComplex) {
	text.replace(start, limit, output);
	outLen = output.length();

	// Setup default cursor position (for cursorPos within output)
	newStart = cursorPos;
	}

	// Complex (nested replacers) Processing Code :
	else {
	/* When there are segments to be copied, use the Replaceable.copy()
	* API in order to retain out-of-band data. Copy everything to the
	* end of the string, then copy them back over the key. This preserves
	* the integrity of indices into the key and surrounding context while
	* generating the output text.
	*/
	StringBuffer buf = new StringBuffer();
	int oOutput; // offset into 'output'
	isComplex = false;

	// The temporary buffer starts at tempStart, and extends
	// to destLimit + tempExtra. The start of the buffer has a single
	// character from before the key. This provides style
	// data when addition characters are filled into the
	// temporary buffer. If there is nothing to the left, use
	// the non-character U+FFFF, which Replaceable subclasses
	// should treat specially as a "no-style character."
	// destStart points to the point after the style context
	// character, so it is tempStart+1 or tempStart+2.
	int tempStart = text.length(); // start of temp buffer
	int destStart = tempStart; // copy new text to here
	if (start > 0) {
	int len = UTF16.getCharCount(text.char32At(start-1));
	text.copy(start-len, start, tempStart);
	destStart += len;
	} else {
	text.replace(tempStart, tempStart, "\uFFFF");
	destStart++;
	}
	int destLimit = destStart;
	int tempExtra = 0; // temp chars after destLimit

	for (oOutput=0; oOutput<output.length(); ) {
	if (oOutput == cursorPos) {
	// Record the position of the cursor
	newStart = buf.length() + destLimit - destStart; // relative to start
	// the buf.length() was inserted for bug 5789
	// the problem is that if we are accumulating into a buffer (when r == null below)
	// then the actual length of the text at that point needs to add the buf length.
	// there was an alternative suggested in #5789, but that looks like it won't work
	// if we have accumulated some stuff in the dest part AND have a non-zero buffer.
	}
	int c = UTF16.charAt(output, oOutput);

	// When we are at the last position copy the right style
	// context character into the temporary buffer. We don't
	// do this before because it will provide an incorrect
	// right context for previous replace() operations.
	int nextIndex = oOutput + UTF16.getCharCount(c);
	if (nextIndex == output.length()) {
	tempExtra = UTF16.getCharCount(text.char32At(limit));
	text.copy(limit, limit+tempExtra, destLimit);
	}

	UnicodeReplacer r = data.lookupReplacer(c);
	if (r == null) {
	// Accumulate straight (non-segment) text.
	UTF16.append(buf, c);
	} else {
	isComplex = true;

	// Insert any accumulated straight text.
	if (buf.length() > 0) {
	text.replace(destLimit, destLimit, buf.toString());
	destLimit += buf.length();
	buf.setLength(0);
	}

	// Delegate output generation to replacer object
	int len = r.replace(text, destLimit, destLimit, cursor);
	destLimit += len;
	}
	oOutput = nextIndex;
	}
	// Insert any accumulated straight text.
	if (buf.length() > 0) {
	text.replace(destLimit, destLimit, buf.toString());
	destLimit += buf.length();
	}
	if (oOutput == cursorPos) {
	// Record the position of the cursor
	newStart = destLimit - destStart; // relative to start
	}

	outLen = destLimit - destStart;

	// Copy new text to start, and delete it
	text.copy(destStart, destLimit, start);
	text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");

	// Delete the old text (the key)
	text.replace(start + outLen, limit + outLen, "");
	}

	if (hasCursor) {
	// Adjust the cursor for positions outside the key. These
	// refer to code points rather than code units. If cursorPos
	// is within the output string, then use newStart, which has
	// already been set above.
	if (cursorPos < 0) {
	newStart = start;
	int n = cursorPos;
	// Outside the output string, cursorPos counts code points
	while (n < 0 && newStart > 0) {
	newStart -= UTF16.getCharCount(text.char32At(newStart-1));
	++n;
	}
	newStart += n;
	} else if (cursorPos > output.length()) {
	newStart = start + outLen;
	int n = cursorPos - output.length();
	// Outside the output string, cursorPos counts code points
	while (n > 0 && newStart < text.length()) {
	newStart += UTF16.getCharCount(text.char32At(newStart));
	--n;
	}
	newStart += n;
	} else {
	// Cursor is within output string. It has been set up above
	// to be relative to start.
	newStart += start;
	}

	cursor[0] = newStart;
	}

	return outLen;
	}

	/**
	* UnicodeReplacer API
	*/
	public String toReplacerPattern(boolean escapeUnprintable) {
	StringBuffer rule = new StringBuffer();
	StringBuffer quoteBuf = new StringBuffer();

	int cursor = cursorPos;

	// Handle a cursor preceding the output
	if (hasCursor && cursor < 0) {
	while (cursor++ < 0) {
	Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
	}
	// Fall through and append '\|' below
	}

	for (int i=0; i<output.length(); ++i) {
	if (hasCursor && i == cursor) {
	Utility.appendToRule(rule, '\|', true, escapeUnprintable, quoteBuf);
	}
	char c = output.charAt(i); // Ok to use 16-bits here

	UnicodeReplacer r = data.lookupReplacer(c);
	if (r == null) {
	Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
	} else {
	StringBuffer buf = new StringBuffer(" ");
	buf.append(r.toReplacerPattern(escapeUnprintable));
	buf.append(' ');
	Utility.appendToRule(rule, buf.toString(),
	true, escapeUnprintable, quoteBuf);
	}
	}

	// Handle a cursor after the output. Use > rather than >= because
	// if cursor == output.length() it is at the end of the output,
	// which is the default position, so we need not emit it.
	if (hasCursor && cursor > output.length()) {
	cursor -= output.length();
	while (cursor-- > 0) {
	Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
	}
	Utility.appendToRule(rule, '\|', true, escapeUnprintable, quoteBuf);
	}
	// Flush quoteBuf out to result
	Utility.appendToRule(rule, -1,
	true, escapeUnprintable, quoteBuf);

	return rule.toString();
	}

	/**
	* Union the set of all characters that may output by this object
	* into the given set.
	* @param toUnionTo the set into which to union the output characters
	*/
	public void addReplacementSetTo(UnicodeSet toUnionTo) {
	int ch;
	for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
	ch = UTF16.charAt(output, i);
	UnicodeReplacer r = data.lookupReplacer(ch);
	if (r == null) {
	toUnionTo.add(ch);
	} else {
	r.addReplacementSetTo(toUnionTo);
	}
	}
	}
	}

	//eof