blob: c766a482ca66f5300c1fa83454ab17cd309790eb [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 1996-2000, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/BreakIteratorRules_th.java,v $
* $Date: 2000/03/10 04:07:26 $
* $Revision: 1.2 $
*
*****************************************************************************************
*/
package com.ibm.text.resources;
import java.util.ListResourceBundle;
import java.util.MissingResourceException;
import java.net.URL;
public class BreakIteratorRules_th extends ListResourceBundle {
public Object[][] getContents() {
URL url = getClass().getResource("thai_dict");
// if dictionary wasn't found, then this resource bundle doesn't have
// much to contribute...
if (url == null) {
return new Object[0][0];
}
return new Object[][] {
// names of classes to instantiate for the different kinds of break
// iterator. Notice we're now using DictionaryBasedBreakIterator
// for word and line breaking.
{ "BreakIteratorClasses",
new String[] { "RuleBasedBreakIterator", // character-break iterator class
"DictionaryBasedBreakIterator", // word-break iterator class
"DictionaryBasedBreakIterator", // line-break iterator class
"RuleBasedBreakIterator" } // sentence-break iterator class
},
{ "WordBreakRules",
"$dictionary=[\u0e01-\u0e2e\u0e30-\u0e3a\u0e40-\u0e44\u0e47-\u0e4e];" // this rule breaks the iterator with mixed Thai and English
+ "$ignore=[[[:Mn:][:Me:][:Cf:]]-{$dictionary}];"
+ "paiyannoi=[\u0e2f];"
+ "maiyamok=[\u0e46];"
+ "danda=[\u0964\u0965];"
+ "kanji=[\u3005\u4e00-\u9fa5\uf900-\ufa2d];"
+ "kata=[\u30a1-\u30fa];"
+ "hira=[\u3041-\u3094];"
+ "cjk-diacrit=[\u3099-\u309c];"
+ "let=[[[:L:][:Mc:]]-[{kanji}{kata}{hira}{cjk-diacrit}{$dictionary}]];"
+ "dgt=[:N:];"
+ "mid-word=[[:Pd:]\u00ad\u2027\\\"\\\'\\.];"
+ "mid-num=[\\\"\\\'\\,\u066b\\.];"
+ "pre-num=[[[:Sc:]-[\u00a2]]\\#\\.];"
+ "post-num=[\\%\\&\u00a2\u066a\u2030\u2031];"
+ "ls=[\n\u000c\u2028\u2029];"
+ "ws=[[:Zs:]\t];"
+ "word=(({let}+({mid-word}{let}+)*){danda}?);"
+ "number=({dgt}+({mid-num}{dgt}+)*);"
+ "thai-etc={paiyannoi}\u0e25{paiyannoi};"
+ ".;"
+ "{word}?({number}{word})*({number}{post-num}?)?;"
+ "{pre-num}({number}{word})*({number}{post-num}?)?;"
+ "{$dictionary}+({paiyannoi}{maiyamok}?)?;"
+ "{$dictionary}+{paiyannoi}/([^\u0e25{$ignore}]"
+ "|\u0e25[^{paiyannoi}{$ignore}]);"
+ "{thai-etc};"
+ "{ws}*\r?{ls}?;"
+ "[{kata}{cjk-diacrit}]*;"
+ "[{hira}{cjk-diacrit}]*;"
+ "{kanji}*;"
},
{ "LineBreakRules",
"$dictionary=[\u0e01-\u0e2e\u0e30-\u0e3a\u0e40-\u0e44\u0e47-\u0e4e];" // this rule breaks the iterator with mixed Thai and English
+ "$ignore=[[[:Mn:][:Me:][:Cf:]]-[{$dictionary}]];"
+ "danda=[\u0964\u0965];"
+ "break=[\u0003\t\n\f\u2028\u2029];"
+ "nbsp=[\u00a0\u2007\u2011\ufeff];"
+ "space=[[[:Zs:][:Cc:]]-[{nbsp}{break}\r]];"
+ "dash=[[[:Pd:]\u00ad]-{nbsp}];"
+ "paiyannoi=[\u0e2f];"
+ "maiyamok=[\u0e46];"
+ "thai-etc=({paiyannoi}\u0e25{paiyannoi});"
+ "pre-word=[[[:Sc:]-[\u00a2]][:Ps:]\\\"];"
+ "post-word=[[:Pe:]\\!\\%\\.\\,\\:\\;\\?\\\"\u00a2\u00b0\u066a\u2030-\u2034\u2103"
+ "\u2105\u2109\u3001\u3002\u3005\u3041\u3043\u3045\u3047\u3049\u3063"
+ "\u3083\u3085\u3087\u308e\u3099-\u309e\u30a1\u30a3\u30a5\u30a7\u30a9"
+ "\u30c3\u30e3\u30e5\u30e7\u30ee\u30f5\u30f6\u30fc-\u30fe\uff01\uff0e"
+ "\uff1f{maiyamok}];"
+ "kanji=[[\u4e00-\u9fa5\uf900-\ufa2d\u3041-\u3094\u30a1-\u30fa]-[{post-word}{$ignore}]];"
+ "digit=[[:Nd:][:No:]];"
+ "mid-num=[\\.\\,];"
+ "char=[^{break}{space}{dash}{kanji}{nbsp}{$ignore}{pre-word}{post-word}"
+ "{mid-num}\r{danda}{$dictionary}{paiyannoi}{maiyamok}];"
+ "number=([{pre-word}{dash}]*{digit}+({mid-num}{digit}+)*);"
+ "word-core=({char}*|{kanji}|{number}|{$dictionary}+|{thai-etc});"
+ "word-suffix=(({dash}+|{post-word}*){space}*);"
+ "word=({pre-word}*{word-core}{word-suffix});"
+ "{word}({nbsp}+{word})*(\r?{break}?|{paiyannoi}\r{break}|{paiyannoi}{break})?;"
+ "{word}({nbsp}+{word})*{paiyannoi}/([^[\u0e25{$ignore}]]|"
+ "\u0e25[^{paiyannoi}{$ignore}]);"
},
{ "WordBreakDictionary", url },
{ "LineBreakDictionary", url }
};
}
}