main/classes/collate/src/com/ibm/icu/impl/text/RbnfScannerProviderImpl.java - external/github.com/unicode-org/icu - Git at Google

 /*
 *******************************************************************************
 * Copyright (C) 2009-2014, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */

 package com.ibm.icu.impl.text;

 import java.util.HashMap;
 import java.util.Map;

 import com.ibm.icu.impl.ICUDebug;
 import com.ibm.icu.text.CollationElementIterator;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.RbnfLenientScanner;
 import com.ibm.icu.text.RbnfLenientScannerProvider;
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.util.ULocale;

 /**
  * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat
  * implementation behind setLenientParseMode, which is based on Collator.
  * @internal
  * @deprecated This API is ICU internal only.
  */
 @Deprecated
 public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider {
     private static final boolean DEBUG = ICUDebug.enabled("rbnf");
     private Map<String, RbnfLenientScanner> cache;

     /**
      * @internal
      * @deprecated This API is ICU internal only.
      */
     @Deprecated
     public RbnfScannerProviderImpl() {
         cache = new HashMap<String, RbnfLenientScanner>();
     }

     /**
      * Returns a collation-based scanner.
      *
      * Only primary differences are treated as significant.  This means that case
      * differences, accent differences, alternate spellings of the same letter
      * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
      * matching the text.  In many cases, numerals will be accepted in place of words
      * or phrases as well.
      *
      * For example, all of the following will correctly parse as 255 in English in
      * lenient-parse mode:
      * <br>"two hundred fifty-five"
      * <br>"two hundred fifty five"
      * <br>"TWO HUNDRED FIFTY-FIVE"
      * <br>"twohundredfiftyfive"
      * <br>"2 hundred fifty-5"
      *
      * The Collator used is determined by the locale that was
      * passed to this object on construction.  The description passed to this object
      * on construction may supply additional collation rules that are appended to the
      * end of the default collator for the locale, enabling additional equivalences
      * (such as adding more ignorable characters or permitting spelled-out version of
      * symbols; see the demo program for examples).
      *
      * It's important to emphasize that even strict parsing is relatively lenient: it
      * will accept some text that it won't produce as output.  In English, for example,
      * it will correctly parse "two hundred zero" and "fifteen hundred".
      *
      * @internal
      * @deprecated This API is ICU internal only.
      */
     @Deprecated
     public RbnfLenientScanner get(ULocale locale, String extras) {
         RbnfLenientScanner result = null;
         String key = locale.toString() + "/" + extras;
         synchronized(cache) {
             result = cache.get(key);
             if (result != null) {
                 return result;
             }
         }
         result = createScanner(locale, extras);
         synchronized(cache) {
             cache.put(key, result);
         }
         return result;
     }

     /**
      * @internal
      * @deprecated This API is ICU internal only.
      */
     @Deprecated
     protected RbnfLenientScanner createScanner(ULocale locale, String extras) {
         RuleBasedCollator collator = null;
         try {
             // create a default collator based on the locale,
             // then pull out that collator's rules, append any additional
             // rules specified in the description, and create a _new_
             // collator based on the combination of those rules
             collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale());
             if (extras != null) {
                 String rules = collator.getRules() + extras;
                 collator = new RuleBasedCollator(rules);
             }
             collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
         }
         catch (Exception e) {
             // If we get here, it means we have a malformed set of
             // collation rules, which hopefully won't happen
             ///CLOVER:OFF
             if (DEBUG){ // debug hook
                 e.printStackTrace(); System.out.println("++++");
             }
             collator = null;
             ///CLOVER:ON
         }

         return new RbnfLenientScannerImpl(collator);
     }

     private static class RbnfLenientScannerImpl implements RbnfLenientScanner {
         private final RuleBasedCollator collator;

         private RbnfLenientScannerImpl(RuleBasedCollator rbc) {
             this.collator = rbc;
         }

         public boolean allIgnorable(String s) {
             CollationElementIterator iter = collator.getCollationElementIterator(s);

             int o = iter.next();
             while (o != CollationElementIterator.NULLORDER
                    && CollationElementIterator.primaryOrder(o) == 0) {
                 o = iter.next();
             }
             return o == CollationElementIterator.NULLORDER;
         }

         public int[] findText(String str, String key, int startingAt) {
             int p = startingAt;
             int keyLen = 0;

             // basically just isolate smaller and smaller substrings of
             // the target string (each running to the end of the string,
             // and with the first one running from startingAt to the end)
             // and then use prefixLength() to see if the search key is at
             // the beginning of each substring.  This is excruciatingly
             // slow, but it will locate the key and tell use how long the
             // matching text was.
             while (p < str.length() && keyLen == 0) {
                 keyLen = prefixLength(str.substring(p), key);
                 if (keyLen != 0) {
                     return new int[] { p, keyLen };
                 }
                 ++p;
             }
             // if we make it to here, we didn't find it.  Return -1 for the
             // location.  The length should be ignored, but set it to 0,
             // which should be "safe"
             return new int[] { -1, 0 };
         }

         ///CLOVER:OFF
         // The following method contains the same signature as findText
         //  and has never been used by anything once.
         @SuppressWarnings("unused")
         public int[] findText2(String str, String key, int startingAt) {

             CollationElementIterator strIter = collator.getCollationElementIterator(str);
             CollationElementIterator keyIter = collator.getCollationElementIterator(key);

             int keyStart = -1;

             strIter.setOffset(startingAt);

             int oStr = strIter.next();
             int oKey = keyIter.next();
             while (oKey != CollationElementIterator.NULLORDER) {
                 while (oStr != CollationElementIterator.NULLORDER &&
                        CollationElementIterator.primaryOrder(oStr) == 0)
                     oStr = strIter.next();

                 while (oKey != CollationElementIterator.NULLORDER &&
                        CollationElementIterator.primaryOrder(oKey) == 0)
                     oKey = keyIter.next();

                 if (oStr == CollationElementIterator.NULLORDER) {
                     return new int[] { -1, 0 };
                 }

                 if (oKey == CollationElementIterator.NULLORDER) {
                     break;
                 }

                 if (CollationElementIterator.primaryOrder(oStr) ==
                     CollationElementIterator.primaryOrder(oKey)) {
                     keyStart = strIter.getOffset();
                     oStr = strIter.next();
                     oKey = keyIter.next();
                 } else {
                     if (keyStart != -1) {
                         keyStart = -1;
                         keyIter.reset();
                     } else {
                         oStr = strIter.next();
                     }
                 }
             }

             if (oKey == CollationElementIterator.NULLORDER) {
                 return new int[] { keyStart, strIter.getOffset() - keyStart };
             }

             return new int[] { -1, 0 };
         }
         ///CLOVER:ON

         public int prefixLength(String str, String prefix) {
             // Create two collation element iterators, one over the target string
             // and another over the prefix.
             //
             // Previous code was matching "fifty-" against " fifty" and leaving
             // the number " fifty-7" to parse as 43 (50 - 7).
             // Also it seems that if we consume the entire prefix, that's ok even
             // if we've consumed the entire string, so I switched the logic to
             // reflect this.

             CollationElementIterator strIter = collator.getCollationElementIterator(str);
             CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix);

             // match collation elements between the strings
             int oStr = strIter.next();
             int oPrefix = prefixIter.next();

             while (oPrefix != CollationElementIterator.NULLORDER) {
                 // skip over ignorable characters in the target string
                 while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr !=
                        CollationElementIterator.NULLORDER) {
                     oStr = strIter.next();
                 }

                 // skip over ignorable characters in the prefix
                 while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix !=
                        CollationElementIterator.NULLORDER) {
                     oPrefix = prefixIter.next();
                 }

                 // if skipping over ignorables brought to the end of
                 // the prefix, we DID match: drop out of the loop
                 if (oPrefix == CollationElementIterator.NULLORDER) {
                     break;
                 }

                 // if skipping over ignorables brought us to the end
                 // of the target string, we didn't match and return 0
                 if (oStr == CollationElementIterator.NULLORDER) {
                     return 0;
                 }

                 // match collation elements from the two strings
                 // (considering only primary differences).  If we
                 // get a mismatch, dump out and return 0
                 if (CollationElementIterator.primaryOrder(oStr) !=
                     CollationElementIterator.primaryOrder(oPrefix)) {
                     return 0;
                 }

                 // otherwise, advance to the next character in each string
                 // and loop (we drop out of the loop when we exhaust
                 // collation elements in the prefix)

                 oStr = strIter.next();
                 oPrefix = prefixIter.next();
             }

             int result = strIter.getOffset();
             if (oStr != CollationElementIterator.NULLORDER) {
                 --result;
             }
             return result;
         }
     }
 }
	/*
	*******************************************************************************
	* Copyright (C) 2009-2014, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	*******************************************************************************
	*/

	package com.ibm.icu.impl.text;

	import java.util.HashMap;
	import java.util.Map;

	import com.ibm.icu.impl.ICUDebug;
	import com.ibm.icu.text.CollationElementIterator;
	import com.ibm.icu.text.Collator;
	import com.ibm.icu.text.RbnfLenientScanner;
	import com.ibm.icu.text.RbnfLenientScannerProvider;
	import com.ibm.icu.text.RuleBasedCollator;
	import com.ibm.icu.util.ULocale;

	/**
	* Returns RbnfLenientScanners that use the old RuleBasedNumberFormat
	* implementation behind setLenientParseMode, which is based on Collator.
	* @internal
	* @deprecated This API is ICU internal only.
	*/
	@Deprecated
	public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider {
	private static final boolean DEBUG = ICUDebug.enabled("rbnf");
	private Map<String, RbnfLenientScanner> cache;

	/**
	* @internal
	* @deprecated This API is ICU internal only.
	*/
	@Deprecated
	public RbnfScannerProviderImpl() {
	cache = new HashMap<String, RbnfLenientScanner>();
	}

	/**
	* Returns a collation-based scanner.
	*
	* Only primary differences are treated as significant. This means that case
	* differences, accent differences, alternate spellings of the same letter
	* (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
	* matching the text. In many cases, numerals will be accepted in place of words
	* or phrases as well.
	*
	* For example, all of the following will correctly parse as 255 in English in
	* lenient-parse mode:
	* <br>"two hundred fifty-five"
	* <br>"two hundred fifty five"
	* <br>"TWO HUNDRED FIFTY-FIVE"
	* <br>"twohundredfiftyfive"
	* <br>"2 hundred fifty-5"
	*
	* The Collator used is determined by the locale that was
	* passed to this object on construction. The description passed to this object
	* on construction may supply additional collation rules that are appended to the
	* end of the default collator for the locale, enabling additional equivalences
	* (such as adding more ignorable characters or permitting spelled-out version of
	* symbols; see the demo program for examples).
	*
	* It's important to emphasize that even strict parsing is relatively lenient: it
	* will accept some text that it won't produce as output. In English, for example,
	* it will correctly parse "two hundred zero" and "fifteen hundred".
	*
	* @internal
	* @deprecated This API is ICU internal only.
	*/
	@Deprecated
	public RbnfLenientScanner get(ULocale locale, String extras) {
	RbnfLenientScanner result = null;
	String key = locale.toString() + "/" + extras;
	synchronized(cache) {
	result = cache.get(key);
	if (result != null) {
	return result;
	}
	}
	result = createScanner(locale, extras);
	synchronized(cache) {
	cache.put(key, result);
	}
	return result;
	}

	/**
	* @internal
	* @deprecated This API is ICU internal only.
	*/
	@Deprecated
	protected RbnfLenientScanner createScanner(ULocale locale, String extras) {
	RuleBasedCollator collator = null;
	try {
	// create a default collator based on the locale,
	// then pull out that collator's rules, append any additional
	// rules specified in the description, and create a _new_
	// collator based on the combination of those rules
	collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale());
	if (extras != null) {
	String rules = collator.getRules() + extras;
	collator = new RuleBasedCollator(rules);
	}
	collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
	}
	catch (Exception e) {
	// If we get here, it means we have a malformed set of
	// collation rules, which hopefully won't happen
	///CLOVER:OFF
	if (DEBUG){ // debug hook
	e.printStackTrace(); System.out.println("++++");
	}
	collator = null;
	///CLOVER:ON
	}

	return new RbnfLenientScannerImpl(collator);
	}

	private static class RbnfLenientScannerImpl implements RbnfLenientScanner {
	private final RuleBasedCollator collator;

	private RbnfLenientScannerImpl(RuleBasedCollator rbc) {
	this.collator = rbc;
	}

	public boolean allIgnorable(String s) {
	CollationElementIterator iter = collator.getCollationElementIterator(s);

	int o = iter.next();
	while (o != CollationElementIterator.NULLORDER
	&& CollationElementIterator.primaryOrder(o) == 0) {
	o = iter.next();
	}
	return o == CollationElementIterator.NULLORDER;
	}

	public int[] findText(String str, String key, int startingAt) {
	int p = startingAt;
	int keyLen = 0;

	// basically just isolate smaller and smaller substrings of
	// the target string (each running to the end of the string,
	// and with the first one running from startingAt to the end)
	// and then use prefixLength() to see if the search key is at
	// the beginning of each substring. This is excruciatingly
	// slow, but it will locate the key and tell use how long the
	// matching text was.
	while (p < str.length() && keyLen == 0) {
	keyLen = prefixLength(str.substring(p), key);
	if (keyLen != 0) {
	return new int[] { p, keyLen };
	}
	++p;
	}
	// if we make it to here, we didn't find it. Return -1 for the
	// location. The length should be ignored, but set it to 0,
	// which should be "safe"
	return new int[] { -1, 0 };
	}

	///CLOVER:OFF
	// The following method contains the same signature as findText
	// and has never been used by anything once.
	@SuppressWarnings("unused")
	public int[] findText2(String str, String key, int startingAt) {

	CollationElementIterator strIter = collator.getCollationElementIterator(str);
	CollationElementIterator keyIter = collator.getCollationElementIterator(key);

	int keyStart = -1;

	strIter.setOffset(startingAt);

	int oStr = strIter.next();
	int oKey = keyIter.next();
	while (oKey != CollationElementIterator.NULLORDER) {
	while (oStr != CollationElementIterator.NULLORDER &&
	CollationElementIterator.primaryOrder(oStr) == 0)
	oStr = strIter.next();

	while (oKey != CollationElementIterator.NULLORDER &&
	CollationElementIterator.primaryOrder(oKey) == 0)
	oKey = keyIter.next();

	if (oStr == CollationElementIterator.NULLORDER) {
	return new int[] { -1, 0 };
	}

	if (oKey == CollationElementIterator.NULLORDER) {
	break;
	}

	if (CollationElementIterator.primaryOrder(oStr) ==
	CollationElementIterator.primaryOrder(oKey)) {
	keyStart = strIter.getOffset();
	oStr = strIter.next();
	oKey = keyIter.next();
	} else {
	if (keyStart != -1) {
	keyStart = -1;
	keyIter.reset();
	} else {
	oStr = strIter.next();
	}
	}
	}

	if (oKey == CollationElementIterator.NULLORDER) {
	return new int[] { keyStart, strIter.getOffset() - keyStart };
	}

	return new int[] { -1, 0 };
	}
	///CLOVER:ON

	public int prefixLength(String str, String prefix) {
	// Create two collation element iterators, one over the target string
	// and another over the prefix.
	//
	// Previous code was matching "fifty-" against " fifty" and leaving
	// the number " fifty-7" to parse as 43 (50 - 7).
	// Also it seems that if we consume the entire prefix, that's ok even
	// if we've consumed the entire string, so I switched the logic to
	// reflect this.

	CollationElementIterator strIter = collator.getCollationElementIterator(str);
	CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix);

	// match collation elements between the strings
	int oStr = strIter.next();
	int oPrefix = prefixIter.next();

	while (oPrefix != CollationElementIterator.NULLORDER) {
	// skip over ignorable characters in the target string
	while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr !=
	CollationElementIterator.NULLORDER) {
	oStr = strIter.next();
	}

	// skip over ignorable characters in the prefix
	while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix !=
	CollationElementIterator.NULLORDER) {
	oPrefix = prefixIter.next();
	}

	// if skipping over ignorables brought to the end of
	// the prefix, we DID match: drop out of the loop
	if (oPrefix == CollationElementIterator.NULLORDER) {
	break;
	}

	// if skipping over ignorables brought us to the end
	// of the target string, we didn't match and return 0
	if (oStr == CollationElementIterator.NULLORDER) {
	return 0;
	}

	// match collation elements from the two strings
	// (considering only primary differences). If we
	// get a mismatch, dump out and return 0
	if (CollationElementIterator.primaryOrder(oStr) !=
	CollationElementIterator.primaryOrder(oPrefix)) {
	return 0;
	}

	// otherwise, advance to the next character in each string
	// and loop (we drop out of the loop when we exhaust
	// collation elements in the prefix)

	oStr = strIter.next();
	oPrefix = prefixIter.next();
	}

	int result = strIter.getOffset();
	if (oStr != CollationElementIterator.NULLORDER) {
	--result;
	}
	return result;
	}
	}
	}