blob: ff51a382e5d0a3e35372583072a258564652f8be [file] [log] [blame]
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2016, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.text;
import java.text.CharacterIterator;
import com.ibm.icu.impl.CharacterIteration;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
final class UnhandledBreakEngine implements LanguageBreakEngine {
// TODO: Use two UnicodeSets, one with all frozen sets, one with unfrozen.
// in handleChar(), update the unfrozen version, clone, freeze, replace the frozen one.
// Note on concurrency: A single instance of UnhandledBreakEngine is shared across all
// RuleBasedBreakIterators in a process. They may make arbitrary concurrent calls.
// If handleChar() is updating the set of unhandled characters at the same time
// findBreaks() or handles() is referencing it, the referencing functions must see
// a consistent set. It doesn't matter whether they see it before or after the update,
// but they should not see an inconsistent, changing set.
//
// To do this, an update is made by cloning the old set, updating the clone, then
// replacing the old with the new. Once made visible, each set remains constant.
// TODO: it's odd that findBreaks() can produce different results, depending
// on which scripts have been previously seen by handleChar(). (This is not a
// threading specific issue). Possibly stop on script boundaries?
volatile UnicodeSet fHandled = new UnicodeSet();
public UnhandledBreakEngine() {
}
@Override
public boolean handles(int c) {
return fHandled.contains(c);
}
@Override
public int findBreaks(CharacterIterator text, int startPos, int endPos,
DictionaryBreakEngine.DequeI foundBreaks) {
UnicodeSet uniset = fHandled;
int c = CharacterIteration.current32(text);
while (text.getIndex() < endPos && uniset.contains(c)) {
CharacterIteration.next32(text);
c = CharacterIteration.current32(text);
}
return 0;
}
/**
* Update the set of unhandled characters to include
* all that have the same script as c.
* May be called concurrently with handles() or findBreaks().
* Must not be called concurrently with itself.
*/
public void handleChar(int c) {
UnicodeSet originalSet = fHandled;
if (!originalSet.contains(c)) {
int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
UnicodeSet newSet = new UnicodeSet();
newSet.applyIntPropertyValue(UProperty.SCRIPT, script);
newSet.addAll(originalSet);
fHandled = newSet;
}
}
}