| /* |
| ******************************************************************************* |
| * Copyright (C) 2009-2014, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ******************************************************************************* |
| */ |
| |
| package com.ibm.icu.impl.text; |
| |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| import com.ibm.icu.impl.ICUDebug; |
| import com.ibm.icu.text.CollationElementIterator; |
| import com.ibm.icu.text.Collator; |
| import com.ibm.icu.text.RbnfLenientScanner; |
| import com.ibm.icu.text.RbnfLenientScannerProvider; |
| import com.ibm.icu.text.RuleBasedCollator; |
| import com.ibm.icu.util.ULocale; |
| |
| /** |
| * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat |
| * implementation behind setLenientParseMode, which is based on Collator. |
| * @internal |
| * @deprecated This API is ICU internal only. |
| */ |
| @Deprecated |
| public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider { |
| private static final boolean DEBUG = ICUDebug.enabled("rbnf"); |
| private Map<String, RbnfLenientScanner> cache; |
| |
| /** |
| * @internal |
| * @deprecated This API is ICU internal only. |
| */ |
| @Deprecated |
| public RbnfScannerProviderImpl() { |
| cache = new HashMap<String, RbnfLenientScanner>(); |
| } |
| |
| /** |
| * Returns a collation-based scanner. |
| * |
| * Only primary differences are treated as significant. This means that case |
| * differences, accent differences, alternate spellings of the same letter |
| * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in |
| * matching the text. In many cases, numerals will be accepted in place of words |
| * or phrases as well. |
| * |
| * For example, all of the following will correctly parse as 255 in English in |
| * lenient-parse mode: |
| * <br>"two hundred fifty-five" |
| * <br>"two hundred fifty five" |
| * <br>"TWO HUNDRED FIFTY-FIVE" |
| * <br>"twohundredfiftyfive" |
| * <br>"2 hundred fifty-5" |
| * |
| * The Collator used is determined by the locale that was |
| * passed to this object on construction. The description passed to this object |
| * on construction may supply additional collation rules that are appended to the |
| * end of the default collator for the locale, enabling additional equivalences |
| * (such as adding more ignorable characters or permitting spelled-out version of |
| * symbols; see the demo program for examples). |
| * |
| * It's important to emphasize that even strict parsing is relatively lenient: it |
| * will accept some text that it won't produce as output. In English, for example, |
| * it will correctly parse "two hundred zero" and "fifteen hundred". |
| * |
| * @internal |
| * @deprecated This API is ICU internal only. |
| */ |
| @Deprecated |
| public RbnfLenientScanner get(ULocale locale, String extras) { |
| RbnfLenientScanner result = null; |
| String key = locale.toString() + "/" + extras; |
| synchronized(cache) { |
| result = cache.get(key); |
| if (result != null) { |
| return result; |
| } |
| } |
| result = createScanner(locale, extras); |
| synchronized(cache) { |
| cache.put(key, result); |
| } |
| return result; |
| } |
| |
| /** |
| * @internal |
| * @deprecated This API is ICU internal only. |
| */ |
| @Deprecated |
| protected RbnfLenientScanner createScanner(ULocale locale, String extras) { |
| RuleBasedCollator collator = null; |
| try { |
| // create a default collator based on the locale, |
| // then pull out that collator's rules, append any additional |
| // rules specified in the description, and create a _new_ |
| // collator based on the combination of those rules |
| collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale()); |
| if (extras != null) { |
| String rules = collator.getRules() + extras; |
| collator = new RuleBasedCollator(rules); |
| } |
| collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); |
| } |
| catch (Exception e) { |
| // If we get here, it means we have a malformed set of |
| // collation rules, which hopefully won't happen |
| ///CLOVER:OFF |
| if (DEBUG){ // debug hook |
| e.printStackTrace(); System.out.println("++++"); |
| } |
| collator = null; |
| ///CLOVER:ON |
| } |
| |
| return new RbnfLenientScannerImpl(collator); |
| } |
| |
| private static class RbnfLenientScannerImpl implements RbnfLenientScanner { |
| private final RuleBasedCollator collator; |
| |
| private RbnfLenientScannerImpl(RuleBasedCollator rbc) { |
| this.collator = rbc; |
| } |
| |
| public boolean allIgnorable(String s) { |
| CollationElementIterator iter = collator.getCollationElementIterator(s); |
| |
| int o = iter.next(); |
| while (o != CollationElementIterator.NULLORDER |
| && CollationElementIterator.primaryOrder(o) == 0) { |
| o = iter.next(); |
| } |
| return o == CollationElementIterator.NULLORDER; |
| } |
| |
| public int[] findText(String str, String key, int startingAt) { |
| int p = startingAt; |
| int keyLen = 0; |
| |
| // basically just isolate smaller and smaller substrings of |
| // the target string (each running to the end of the string, |
| // and with the first one running from startingAt to the end) |
| // and then use prefixLength() to see if the search key is at |
| // the beginning of each substring. This is excruciatingly |
| // slow, but it will locate the key and tell use how long the |
| // matching text was. |
| while (p < str.length() && keyLen == 0) { |
| keyLen = prefixLength(str.substring(p), key); |
| if (keyLen != 0) { |
| return new int[] { p, keyLen }; |
| } |
| ++p; |
| } |
| // if we make it to here, we didn't find it. Return -1 for the |
| // location. The length should be ignored, but set it to 0, |
| // which should be "safe" |
| return new int[] { -1, 0 }; |
| } |
| |
| ///CLOVER:OFF |
| // The following method contains the same signature as findText |
| // and has never been used by anything once. |
| @SuppressWarnings("unused") |
| public int[] findText2(String str, String key, int startingAt) { |
| |
| CollationElementIterator strIter = collator.getCollationElementIterator(str); |
| CollationElementIterator keyIter = collator.getCollationElementIterator(key); |
| |
| int keyStart = -1; |
| |
| strIter.setOffset(startingAt); |
| |
| int oStr = strIter.next(); |
| int oKey = keyIter.next(); |
| while (oKey != CollationElementIterator.NULLORDER) { |
| while (oStr != CollationElementIterator.NULLORDER && |
| CollationElementIterator.primaryOrder(oStr) == 0) |
| oStr = strIter.next(); |
| |
| while (oKey != CollationElementIterator.NULLORDER && |
| CollationElementIterator.primaryOrder(oKey) == 0) |
| oKey = keyIter.next(); |
| |
| if (oStr == CollationElementIterator.NULLORDER) { |
| return new int[] { -1, 0 }; |
| } |
| |
| if (oKey == CollationElementIterator.NULLORDER) { |
| break; |
| } |
| |
| if (CollationElementIterator.primaryOrder(oStr) == |
| CollationElementIterator.primaryOrder(oKey)) { |
| keyStart = strIter.getOffset(); |
| oStr = strIter.next(); |
| oKey = keyIter.next(); |
| } else { |
| if (keyStart != -1) { |
| keyStart = -1; |
| keyIter.reset(); |
| } else { |
| oStr = strIter.next(); |
| } |
| } |
| } |
| |
| if (oKey == CollationElementIterator.NULLORDER) { |
| return new int[] { keyStart, strIter.getOffset() - keyStart }; |
| } |
| |
| return new int[] { -1, 0 }; |
| } |
| ///CLOVER:ON |
| |
| public int prefixLength(String str, String prefix) { |
| // Create two collation element iterators, one over the target string |
| // and another over the prefix. |
| // |
| // Previous code was matching "fifty-" against " fifty" and leaving |
| // the number " fifty-7" to parse as 43 (50 - 7). |
| // Also it seems that if we consume the entire prefix, that's ok even |
| // if we've consumed the entire string, so I switched the logic to |
| // reflect this. |
| |
| CollationElementIterator strIter = collator.getCollationElementIterator(str); |
| CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix); |
| |
| // match collation elements between the strings |
| int oStr = strIter.next(); |
| int oPrefix = prefixIter.next(); |
| |
| while (oPrefix != CollationElementIterator.NULLORDER) { |
| // skip over ignorable characters in the target string |
| while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr != |
| CollationElementIterator.NULLORDER) { |
| oStr = strIter.next(); |
| } |
| |
| // skip over ignorable characters in the prefix |
| while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix != |
| CollationElementIterator.NULLORDER) { |
| oPrefix = prefixIter.next(); |
| } |
| |
| // if skipping over ignorables brought to the end of |
| // the prefix, we DID match: drop out of the loop |
| if (oPrefix == CollationElementIterator.NULLORDER) { |
| break; |
| } |
| |
| // if skipping over ignorables brought us to the end |
| // of the target string, we didn't match and return 0 |
| if (oStr == CollationElementIterator.NULLORDER) { |
| return 0; |
| } |
| |
| // match collation elements from the two strings |
| // (considering only primary differences). If we |
| // get a mismatch, dump out and return 0 |
| if (CollationElementIterator.primaryOrder(oStr) != |
| CollationElementIterator.primaryOrder(oPrefix)) { |
| return 0; |
| } |
| |
| // otherwise, advance to the next character in each string |
| // and loop (we drop out of the loop when we exhaust |
| // collation elements in the prefix) |
| |
| oStr = strIter.next(); |
| oPrefix = prefixIter.next(); |
| } |
| |
| int result = strIter.getOffset(); |
| if (oStr != CollationElementIterator.NULLORDER) { |
| --result; |
| } |
| return result; |
| } |
| } |
| } |