blob: 57016318507a886d03a7c300d3fe16c16705775d [file] [log] [blame]
/*
**********************************************************************
* Copyright (c) 2002-2007, International Business Machines Corporation
* and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 01/14/2002 aliu Creation.
**********************************************************************
*/
package com.ibm.icu.text;
import com.ibm.icu.impl.Utility;
/**
* A replacer that produces static text as its output. The text may
* contain transliterator stand-in characters that represent nested
* UnicodeReplacer objects, making it possible to encode a tree of
* replacers in a StringReplacer. A StringReplacer that contains such
* stand-ins is called a <em>complex</em> StringReplacer. A complex
* StringReplacer has a slower processing loop than a non-complex one.
* @author Alan Liu
*/
class StringReplacer implements UnicodeReplacer {
/**
* Output text, possibly containing stand-in characters that
* represent nested UnicodeReplacers.
*/
private String output;
/**
* Cursor position. Value is ignored if hasCursor is false.
*/
private int cursorPos;
/**
* True if this object outputs a cursor position.
*/
private boolean hasCursor;
/**
* A complex object contains nested replacers and requires more
* complex processing. StringReplacers are initially assumed to
* be complex. If no nested replacers are seen during processing,
* then isComplex is set to false, and future replacements are
* short circuited for better performance.
*/
private boolean isComplex;
/**
* Object that translates stand-in characters in 'output' to
* UnicodeReplacer objects.
*/
private final RuleBasedTransliterator.Data data;
/**
* Construct a StringReplacer that sets the emits the given output
* text and sets the cursor to the given position.
* @param theOutput text that will replace input text when the
* replace() method is called. May contain stand-in characters
* that represent nested replacers.
* @param theCursorPos cursor position that will be returned by
* the replace() method
* @param theData transliterator context object that translates
* stand-in characters to UnicodeReplacer objects
*/
public StringReplacer(String theOutput,
int theCursorPos,
RuleBasedTransliterator.Data theData) {
output = theOutput;
cursorPos = theCursorPos;
hasCursor = true;
data = theData;
isComplex = true;
}
/**
* Construct a StringReplacer that sets the emits the given output
* text and does not modify the cursor.
* @param theOutput text that will replace input text when the
* replace() method is called. May contain stand-in characters
* that represent nested replacers.
* @param theData transliterator context object that translates
* stand-in characters to UnicodeReplacer objects
*/
public StringReplacer(String theOutput,
RuleBasedTransliterator.Data theData) {
output = theOutput;
cursorPos = 0;
hasCursor = false;
data = theData;
isComplex = true;
}
//= public static UnicodeReplacer valueOf(String output,
//= int cursorPos,
//= RuleBasedTransliterator.Data data) {
//= if (output.length() == 1) {
//= char c = output.charAt(0);
//= UnicodeReplacer r = data.lookupReplacer(c);
//= if (r != null) {
//= return r;
//= }
//= }
//= return new StringReplacer(output, cursorPos, data);
//= }
/**
* UnicodeReplacer API
*/
public int replace(Replaceable text,
int start,
int limit,
int[] cursor) {
int outLen;
int newStart = 0;
// NOTE: It should be possible to _always_ run the complex
// processing code; just slower. If not, then there is a bug
// in the complex processing code.
// Simple (no nested replacers) Processing Code :
if (!isComplex) {
text.replace(start, limit, output);
outLen = output.length();
// Setup default cursor position (for cursorPos within output)
newStart = cursorPos;
}
// Complex (nested replacers) Processing Code :
else {
/* When there are segments to be copied, use the Replaceable.copy()
* API in order to retain out-of-band data. Copy everything to the
* end of the string, then copy them back over the key. This preserves
* the integrity of indices into the key and surrounding context while
* generating the output text.
*/
StringBuffer buf = new StringBuffer();
int oOutput; // offset into 'output'
isComplex = false;
// The temporary buffer starts at tempStart, and extends
// to destLimit + tempExtra. The start of the buffer has a single
// character from before the key. This provides style
// data when addition characters are filled into the
// temporary buffer. If there is nothing to the left, use
// the non-character U+FFFF, which Replaceable subclasses
// should treat specially as a "no-style character."
// destStart points to the point after the style context
// character, so it is tempStart+1 or tempStart+2.
int tempStart = text.length(); // start of temp buffer
int destStart = tempStart; // copy new text to here
if (start > 0) {
int len = UTF16.getCharCount(text.char32At(start-1));
text.copy(start-len, start, tempStart);
destStart += len;
} else {
text.replace(tempStart, tempStart, "\uFFFF");
destStart++;
}
int destLimit = destStart;
int tempExtra = 0; // temp chars after destLimit
for (oOutput=0; oOutput<output.length(); ) {
if (oOutput == cursorPos) {
// Record the position of the cursor
newStart = buf.length() + destLimit - destStart; // relative to start
// the buf.length() was inserted for bug 5789
// the problem is that if we are accumulating into a buffer (when r == null below)
// then the actual length of the text at that point needs to add the buf length.
// there was an alternative suggested in #5789, but that looks like it won't work
// if we have accumulated some stuff in the dest part AND have a non-zero buffer.
}
int c = UTF16.charAt(output, oOutput);
// When we are at the last position copy the right style
// context character into the temporary buffer. We don't
// do this before because it will provide an incorrect
// right context for previous replace() operations.
int nextIndex = oOutput + UTF16.getCharCount(c);
if (nextIndex == output.length()) {
tempExtra = UTF16.getCharCount(text.char32At(limit));
text.copy(limit, limit+tempExtra, destLimit);
}
UnicodeReplacer r = data.lookupReplacer(c);
if (r == null) {
// Accumulate straight (non-segment) text.
UTF16.append(buf, c);
} else {
isComplex = true;
// Insert any accumulated straight text.
if (buf.length() > 0) {
text.replace(destLimit, destLimit, buf.toString());
destLimit += buf.length();
buf.setLength(0);
}
// Delegate output generation to replacer object
int len = r.replace(text, destLimit, destLimit, cursor);
destLimit += len;
}
oOutput = nextIndex;
}
// Insert any accumulated straight text.
if (buf.length() > 0) {
text.replace(destLimit, destLimit, buf.toString());
destLimit += buf.length();
}
if (oOutput == cursorPos) {
// Record the position of the cursor
newStart = destLimit - destStart; // relative to start
}
outLen = destLimit - destStart;
// Copy new text to start, and delete it
text.copy(destStart, destLimit, start);
text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");
// Delete the old text (the key)
text.replace(start + outLen, limit + outLen, "");
}
if (hasCursor) {
// Adjust the cursor for positions outside the key. These
// refer to code points rather than code units. If cursorPos
// is within the output string, then use newStart, which has
// already been set above.
if (cursorPos < 0) {
newStart = start;
int n = cursorPos;
// Outside the output string, cursorPos counts code points
while (n < 0 && newStart > 0) {
newStart -= UTF16.getCharCount(text.char32At(newStart-1));
++n;
}
newStart += n;
} else if (cursorPos > output.length()) {
newStart = start + outLen;
int n = cursorPos - output.length();
// Outside the output string, cursorPos counts code points
while (n > 0 && newStart < text.length()) {
newStart += UTF16.getCharCount(text.char32At(newStart));
--n;
}
newStart += n;
} else {
// Cursor is within output string. It has been set up above
// to be relative to start.
newStart += start;
}
cursor[0] = newStart;
}
return outLen;
}
/**
* UnicodeReplacer API
*/
public String toReplacerPattern(boolean escapeUnprintable) {
StringBuffer rule = new StringBuffer();
StringBuffer quoteBuf = new StringBuffer();
int cursor = cursorPos;
// Handle a cursor preceding the output
if (hasCursor && cursor < 0) {
while (cursor++ < 0) {
Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
}
// Fall through and append '|' below
}
for (int i=0; i<output.length(); ++i) {
if (hasCursor && i == cursor) {
Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
}
char c = output.charAt(i); // Ok to use 16-bits here
UnicodeReplacer r = data.lookupReplacer(c);
if (r == null) {
Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
} else {
StringBuffer buf = new StringBuffer(" ");
buf.append(r.toReplacerPattern(escapeUnprintable));
buf.append(' ');
Utility.appendToRule(rule, buf.toString(),
true, escapeUnprintable, quoteBuf);
}
}
// Handle a cursor after the output. Use > rather than >= because
// if cursor == output.length() it is at the end of the output,
// which is the default position, so we need not emit it.
if (hasCursor && cursor > output.length()) {
cursor -= output.length();
while (cursor-- > 0) {
Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
}
Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
}
// Flush quoteBuf out to result
Utility.appendToRule(rule, -1,
true, escapeUnprintable, quoteBuf);
return rule.toString();
}
/**
* Union the set of all characters that may output by this object
* into the given set.
* @param toUnionTo the set into which to union the output characters
*/
public void addReplacementSetTo(UnicodeSet toUnionTo) {
int ch;
for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
ch = UTF16.charAt(output, i);
UnicodeReplacer r = data.lookupReplacer(ch);
if (r == null) {
toUnionTo.add(ch);
} else {
r.addReplacementSetTo(toUnionTo);
}
}
}
}
//eof