ICU-13637 Break Iterator Rule Updates for Indic Grapheme Clusters.
diff --git a/icu4c/source/data/BUILDRULES.py b/icu4c/source/data/BUILDRULES.py
index c6e584b..08091ee 100644
--- a/icu4c/source/data/BUILDRULES.py
+++ b/icu4c/source/data/BUILDRULES.py
@@ -22,6 +22,7 @@
exit(1)
requests += generate_cnvalias(config, glob, common_vars)
+ requests += generate_ulayout(config, glob, common_vars)
requests += generate_confusables(config, glob, common_vars)
requests += generate_conversion_mappings(config, glob, common_vars)
requests += generate_brkitr_brk(config, glob, common_vars)
@@ -31,7 +32,6 @@
requests += generate_coll_ucadata(config, glob, common_vars)
requests += generate_full_unicore_data(config, glob, common_vars)
requests += generate_unames(config, glob, common_vars)
- requests += generate_ulayout(config, glob, common_vars)
requests += generate_misc(config, glob, common_vars)
requests += generate_curr_supplemental(config, glob, common_vars)
requests += generate_translit(config, glob, common_vars)
@@ -189,7 +189,7 @@
RepeatedExecutionRequest(
name = "brkitr_brk",
category = "brkitr_rules",
- dep_targets = [DepTarget("cnvalias")],
+ dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout")],
input_files = input_files,
output_files = output_files,
tool = IcuTool("genbrk"),
diff --git a/icu4c/source/data/brkitr/rules/char.txt b/icu4c/source/data/brkitr/rules/char.txt
index 973207a..555f1a5 100644
--- a/icu4c/source/data/brkitr/rules/char.txt
+++ b/icu4c/source/data/brkitr/rules/char.txt
@@ -25,6 +25,13 @@
$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
#
+# From cldr/common/properties/segments/
+# and issue CLDR-10994
+#
+$Virama = [\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}&\p{Indic_Syllabic_Category=Virama}];
+$LinkingConsonant = [\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}&\p{Indic_Syllabic_Category=Consonant}];
+$ExtCccZwj = [[\p{gcb=Extend}-\p{ccc=0}] \p{gcb=ZWJ}];
+
# Korean Syllable Definitions
#
$L = [\p{Grapheme_Cluster_Break = L}];
@@ -57,6 +64,9 @@
# GB 9b
$Prepend [^$Control $CR $LF];
+# GB 9.3, from CLDR-10994
+$LinkingConsonant $ExtCccZwj* $Virama $ExtCccZwj* $LinkingConsonant;
+
# GB 11 Do not break within emoji modifier sequences or emoji zwj sequences.
$Extended_Pict $Extend* $ZWJ $Extended_Pict;
diff --git a/icu4c/source/test/intltest/rbbimonkeytest.cpp b/icu4c/source/test/intltest/rbbimonkeytest.cpp
index 16fe3bb..70f989e 100644
--- a/icu4c/source/test/intltest/rbbimonkeytest.cpp
+++ b/icu4c/source/test/intltest/rbbimonkeytest.cpp
@@ -135,13 +135,13 @@
printf("epandedDef: %s\n", CStr(expandedDef)());
}
- UnicodeSet *s = new UnicodeSet(expandedDef, USET_IGNORE_SPACE, NULL, status);
+ LocalPointer<UnicodeSet> s(new UnicodeSet(expandedDef, USET_IGNORE_SPACE, NULL, status), status);
if (U_FAILURE(status)) {
- IntlTest::gTest->errln("%s:%d: error %s creating UnicodeSet %s", __FILE__, __LINE__,
- u_errorName(status), CStr(name)());
- return NULL;
+ IntlTest::gTest->errln("%s:%d: error %s creating UnicodeSet %s\n Expanded set definition: %s",
+ __FILE__, __LINE__, u_errorName(status), CStr(name)(), CStr(expandedDef)());
+ return nullptr;
}
- CharClass *cclass = new CharClass(name, definition, expandedDef, s);
+ CharClass *cclass = new CharClass(name, definition, expandedDef, s.orphan());
CharClass *previousClass = static_cast<CharClass *>(uhash_put(fCharClasses.getAlias(),
new UnicodeString(name), // Key, owned by hash table.
cclass, // Value, owned by hash table.
diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp
index 0f81a9e..ee03fb7 100644
--- a/icu4c/source/test/intltest/rbbitst.cpp
+++ b/icu4c/source/test/intltest/rbbitst.cpp
@@ -1611,6 +1611,9 @@
UnicodeSet *fLVTSet;
UnicodeSet *fHangulSet;
UnicodeSet *fExtendedPictSet;
+ UnicodeSet *fViramaSet;
+ UnicodeSet *fLinkingConsonantSet;
+ UnicodeSet *fExtCccZwjSet;
UnicodeSet *fAnySet;
const UnicodeString *fText;
@@ -1643,6 +1646,11 @@
fHangulSet->addAll(*fLVTSet);
fExtendedPictSet = new UnicodeSet(u"[:Extended_Pictographic:]", status);
+ fViramaSet = new UnicodeSet(u"[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&"
+ "\\p{Indic_Syllabic_Category=Virama}]", status);
+ fLinkingConsonantSet = new UnicodeSet(u"[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&"
+ "\\p{Indic_Syllabic_Category=Consonant}]", status);
+ fExtCccZwjSet = new UnicodeSet(u"[[\\p{gcb=Extend}-\\p{ccc=0}] \\p{gcb=ZWJ}]", status);
fAnySet = new UnicodeSet(0, 0x10ffff);
fSets = new UVector(status);
@@ -1658,6 +1666,9 @@
fSets->addElement(fAnySet, status);
fSets->addElement(fZWJSet, status);
fSets->addElement(fExtendedPictSet, status);
+ fSets->addElement(fViramaSet, status);
+ fSets->addElement(fLinkingConsonantSet, status);
+ fSets->addElement(fExtCccZwjSet, status);
if (U_FAILURE(status)) {
deferredStatus = status;
}
@@ -1777,6 +1788,22 @@
continue;
}
+ // Rule (GB9.3) LinkingConsonant ExtCccZwj* Virama ExtCccZwj* × LinkingConsonant
+ // Note: Viramas are also included in the ExtCccZwj class.
+ if (fLinkingConsonantSet->contains(c2)) {
+ int pi = p1;
+ bool sawVirama = false;
+ while (pi > 0 && fExtCccZwjSet->contains(fText->char32At(pi))) {
+ if (fViramaSet->contains(fText->char32At(pi))) {
+ sawVirama = true;
+ }
+ pi = fText->moveIndex32(pi, -1);
+ }
+ if (sawVirama && fLinkingConsonantSet->contains(fText->char32At(pi))) {
+ continue;
+ }
+ }
+
// Rule (GB11) Extended_Pictographic Extend * ZWJ x Extended_Pictographic
if (fExtendedPictSet->contains(cBase) && fZWJSet->contains(c1) && fExtendedPictSet->contains(c2)) {
continue;
@@ -1827,7 +1854,9 @@
delete fAnySet;
delete fZWJSet;
delete fExtendedPictSet;
-}
+ delete fViramaSet;
+ delete fLinkingConsonantSet;
+ delete fExtCccZwjSet;}
//------------------------------------------------------------------------------------------
//
diff --git a/icu4c/source/test/testdata/GraphemeBreakTest.txt b/icu4c/source/test/testdata/GraphemeBreakTest.txt
index fb4fec9..6f8ca3a 100644
--- a/icu4c/source/test/testdata/GraphemeBreakTest.txt
+++ b/icu4c/source/test/testdata/GraphemeBreakTest.txt
@@ -1,5 +1,5 @@
-# GraphemeBreakTest-12.1.0.txt
-# Date: 2019-03-10, 10:53:12 GMT
+# GraphemeBreakTest-12.0.0.txt
+# Date: 2019-02-21, 07:57:26 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@@ -48,10 +48,14 @@
÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0020 ÷ 0915 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0915 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 094D ÷ # ÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 0308 × 094D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -82,10 +86,14 @@
÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000D ÷ 0915 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 094D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0308 × 094D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
@@ -116,10 +124,14 @@
÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000A ÷ 0915 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 094D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0308 × 094D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
@@ -150,10 +162,14 @@
÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0001 ÷ 0915 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 094D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0308 × 094D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
@@ -184,10 +200,14 @@
÷ 034F × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 034F ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 034F × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 034F ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 034F × 0308 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 034F ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 034F × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 034F × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 034F × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 034F × 094D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 034F × 0308 × 094D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 034F × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 034F × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 034F ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -218,10 +238,14 @@
÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1F1E6 ÷ 0915 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0915 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 094D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 0308 × 094D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -252,10 +276,14 @@
÷ 0600 × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0600 × AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0600 × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0600 × 0915 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0915 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0600 × 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3]
÷ 0600 × 0308 ÷ 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0600 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0308 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 094D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0308 × 094D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3]
@@ -286,10 +314,14 @@
÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0903 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0903 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0903 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -320,10 +352,14 @@
÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1100 ÷ 0915 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 1100 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1100 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 094D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 0308 × 094D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -354,10 +390,14 @@
÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1160 ÷ 0915 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 1160 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1160 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 094D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 0308 × 094D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -388,10 +428,14 @@
÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 11A8 ÷ 0915 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 11A8 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 11A8 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 094D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 0308 × 094D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -422,10 +466,14 @@
÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC00 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ AC00 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC00 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 0308 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -456,14 +504,56 @@
÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC01 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ AC01 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC01 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 0308 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0915 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0915 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0915 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0915 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0915 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0915 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0915 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0915 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0915 × 034F ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0915 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0915 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0915 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0915 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0915 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0915 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0915 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0915 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0915 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0915 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0915 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0915 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0915 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0915 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0915 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0915 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0915 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0915 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0915 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0915 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0915 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0915 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0915 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 0915 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 0915 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0915 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0915 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0915 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
@@ -490,10 +580,14 @@
÷ 231A × 0308 ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 231A ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 231A × 0308 ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 231A ÷ 0915 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 231A × 0308 ÷ 0915 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 231A × 094D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 231A × 0308 × 094D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 231A ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -524,14 +618,56 @@
÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0300 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 094D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 0308 × 094D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 094D ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 094D × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 094D ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 094D × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 094D ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 094D × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 094D ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 094D × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 094D × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 094D × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 094D ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 094D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 094D ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 094D × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 094D × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 094D × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 094D ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 094D × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 094D ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 094D × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 094D ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 094D × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 094D ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 094D × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 094D ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 094D × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 094D ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 094D × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 094D ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 094D × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 094D × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 094D × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 094D × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 094D × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 094D × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 094D × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 094D ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 094D × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
@@ -558,10 +694,14 @@
÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200D ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 200D × 0308 ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 200D ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 200D × 094D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 200D × 0308 × 094D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -592,10 +732,14 @@
÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0378 ÷ 0915 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0915 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
÷ 0378 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0378 × 0308 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 094D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 0308 × 094D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [0.3]
÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
@@ -616,6 +760,15 @@
÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0915 ÷ 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 094D × 200D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 093C × 200D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 093C × 094D × 200D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 094D × 0924 × 094D × 092F ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) × [9.3] DEVANAGARI LETTER YA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 094D ÷ 0061 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER A (Other) ÷ [0.3]
+÷ 0061 × 094D ÷ 0924 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 003F × 094D ÷ 0924 ÷ # ÷ [0.2] QUESTION MARK (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_Virama_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
@@ -625,6 +778,6 @@
÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
#
-# Lines: 602
+# Lines: 755
#
# EOF
diff --git a/icu4c/source/test/testdata/break_rules/grapheme.txt b/icu4c/source/test/testdata/break_rules/grapheme.txt
index 27498c1..c57a898 100644
--- a/icu4c/source/test/testdata/break_rules/grapheme.txt
+++ b/icu4c/source/test/testdata/break_rules/grapheme.txt
@@ -37,6 +37,13 @@
Extended_Pict = [:ExtPict:];
+# Indic Sequences
+Virama_ = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Virama}]];
+
+LinkingConsonant = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Consonant}]];
+
+ExtCccZwj = [[Extend-[\p{ccc=0}]] ZWJ];
+
GB3: CR LF;
GB4: (Control | CR | LF) ÷;
GB5: . ÷ (Control | CR | LF);
@@ -46,6 +53,7 @@
GB8: (LVT | T) T;
GB11: Extended_Pict Extend* ZWJ Extended_Pict;
+GB9c: LinkingConsonant ExtCccZwj* Virama_ ExtCccZwj* LinkingConsonant;
GB9: . (Extend | ZWJ);
GB9a: . SpacingMark;
diff --git a/icu4c/source/test/testdata/break_rules/readme.txt b/icu4c/source/test/testdata/break_rules/readme.txt
deleted file mode 100644
index 52d54a0..0000000
--- a/icu4c/source/test/testdata/break_rules/readme.txt
+++ /dev/null
@@ -1,76 +0,0 @@
-file: testdata/break_rules/readme.txt
-Copyright (C) 2016 and later: Unicode, Inc. and others.
-License & terms of use: http://www.unicode.org/copyright.html#License
-
-Copyright (c) 2015-2016, International Business Machines Corporation and others. All Rights Reserved.
-
-This directory contains the break iterator reference rule files used by intltest rbbi/RBBIMonkeyTest/testMonkey.
-The rules in this directory track the boundary rules from Unicode UAX 14 and 29. They are interpreted
-to provide an expected set of boundary positions to compare with the results from ICU break iteration.
-
-ICU4J also includes copies of the test reference rules, located in the directory
-main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/
-The copies should be kept synchronized; there should be no differences.
-
-Each set of reference break rules lives in a separate file.
-The list of rule files to run by default is hard coded into the test code, in rbbimonkeytest.cpp.
-
-Each test file includes
- - The type of ICU break iterator to create (word, line, sentence, etc.)
- - The locale to use
- - Character Class definitions
- - Rule definitions
-
-To Do
- - Extend the syntax to support rule tailoring.
-
-
-Character Class Definition:
- name = set_regular_expression;
-
-Rule Definition:
- rule_regular_expression;
-
-name:
- [A-Za-z_][A-Za-z0-9_]*
-
-set_regular_expression:
- The intersection of an ICU regular expression [set] expression and a UnicodeSet pattern.
- (They are mostly the same)
- May include previously defined set names, which are logically expanded in-place.
-
-rule_regular_expression:
- An ICU Regular Expression.
- May include set names, which are logically expanded in-place.
- May include a '÷', which defines a boundary position.
-
-Application of the rules:
- Matching begins at the start of text, or after a previously identified boundary.
- The pseudo-code below finds the next boundary.
-
- while position < end of text
- for each rule
- if the text at position matches this rule
- if the rule has a '÷'
- Boundary is found.
- return the position of the '÷' within the match.
- else
- position = last character of the rule match.
- break from the inner rule loop, continue the outer loop.
-
- This differs from the Unicode UAX algorithm in that each position in the text is
- not tested separately. Instead, when a rule match is found, rule application restarts with the last
- character of the preceding rule match. ICU's break rules also operate this way.
-
- Expressing rules this way simplifies UAX rules that have leading or trailing context; it
- is no longer necessary to write expressions that match the context starting from
- any position within it.
-
- This rule form differs from ICU rules in that the rules are applied sequentially, as they
- are with the Unicode UAX rules. With the main ICU break rules, all are applied in parallel.
-
-Word Dictionaries
- The monkey test does not test dictionary based breaking. The set named 'dictionary' is special,
- as it is in the main ICU rules. For the monkey test, no characters from the dictionary set are
- included in the randomly-generated test data.
-
diff --git a/icu4c/source/test/testdata/rbbitst.txt b/icu4c/source/test/testdata/rbbitst.txt
index a19419c..d0533c0 100644
--- a/icu4c/source/test/testdata/rbbitst.txt
+++ b/icu4c/source/test/testdata/rbbitst.txt
@@ -164,6 +164,498 @@
#
#<data>•\u0e40\u0e01•\u0e44\u0301\u0e23\u0302\u0303•\u0e40•\u0e40\u0e02•\u0e02• •</data>
+#
+# ICU-13637 and CLDR-10994 - Indic Grapheme Cluster Boundary changes to support aksaras
+# New rule: LinkingConsonant ExtCccZwj* Virama ExtCccZwj* × LinkingConsonant
+# Sample Chars: LinkingConsonant: \u0915
+# Virama: \u094d [also Extend]
+# ExtCccZWJ: \u0308
+# Extend but not ExtCCCZWJ \u093A
+<char>
+<data>•\u0915\u094d\u0915•</data>
+<data>•\u0915\u0308\u0308\u094d\u0308\u0308\u0915•</data>
+<data>•\u0915\u0308\u0308\u094d\u0308\u0308•\u0041•</data>
+<data>•\u0915\u0308\u0308\u094d\u093A\u093A•\u0915•</data>
+
+#
+# From cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Bengali.txt
+#
+
+# ব্যক্তিত্বের ;
+<data>•ব্য•ক্তি•ত্বে•র•</data>
+# আত্মবিশ্বাস ;
+<data>•আ•ত্ম•বি•শ্বা•স•</data>
+# ব্যাক্টেরিয়া ;
+<data>•ব্যা•ক্টে•রি•য়া•</data>
+# সমস্যার ;
+<data>•স•ম•স্যা•র•</data>
+# মিশ্রণ ;
+<data>•মি•শ্র•ণ•</data>
+# দুর্গন্ধ ;
+<data>•দু•র্গ•ন্ধ•</data>
+# পরীক্ষার ;
+<data>•প•রী•ক্ষা•র•</data>
+# কোলেস্টেরল ;
+<data>•কো•লে•স্টে•র•ল•</data>
+# ব্যায়ামকে ;
+<data>•ব্যা•য়া•ম•কে•</data>
+# সপ্তাহে ;
+<data>•স•প্তা•হে•</data>
+# পরীক্ষার ;
+<data>•প•রী•ক্ষা•র•</data>
+# চর্বিজাতীয় ;
+<data>•চ•র্বি•জা•তী•য়•</data>
+# নিয়ণ্ত্রণ ;
+<data>•নি•য়•ণ্ত্র•ণ•</data>
+# অবশ্যই ;
+<data>•অ•ব•শ্য•ই•</data>
+# নয়াদিল্লির ;
+<data>•ন•য়া•দি•ল্লি•র•</data>
+# সমীক্ষাটা ;
+<data>•স•মী•ক্ষা•টা•</data>
+# #ভূমিকম্পের ;
+# <data>•ভূ•মি•ক•ম্পের•</data> # line 17 in TestSegmenter-Bengali.txt
+# কেন্দ্রীয় ;
+<data>•কে•ন্দ্রী•য়•</data>
+# উস্কানিই ;
+<data>•উ•স্কা•নি•ই•</data>
+# সমীক্ষকরা ;
+<data>•স•মী•ক্ষ•ক•রা•</data>
+# মুহূর্তে ;
+<data>•মু•হূ•র্তে•</data>
+# সম্পর্কে ;
+<data>•স•ম্প•র্কে•</data>
+# পৌষসংক্রান্তির ;
+<data>•পৌ•ষ•সং•ক্রা•ন্তি•র•</data>
+# মুখ্যমন্ত্রী ;
+<data>•মু•খ্য•ম•ন্ত্রী•</data>
+
+#
+# from cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Devanagari.txt
+#
+
+# संदिग्ध ;
+<data>•सं•दि•ग्ध•</data>
+# सुरक्षा ;
+<data>•सु•र•क्षा•</data>
+# टक्कर ;
+<data>•ट•क्क•र•</data>
+# सत्र ;
+<data>•स•त्र•</data>
+# दक्षिण ;
+<data>•द•क्षि•ण•</data>
+# मिश्रणाने ;
+<data>•मि•श्र•णा•ने•</data>
+# दुर्घटनाग्रस्त ;
+<data>•दु•र्घ•ट•ना•ग्र•स्त•</data>
+# मुहूर्त ;
+<data>•मु•हू•र्त•</data>
+# शर्करायुक्त ;
+<data>•श•र्क•रा•यु•क्त•</data>
+# अंतरराष्ट्रीय ;
+<data>•अं•त•र•रा•ष्ट्री•य•</data>
+# राष्ट्रपति ;
+<data>•रा•ष्ट्र•प•ति•</data>
+# फ्रांस ;
+<data>•फ्रां•स•</data>
+# ट्रैक्टर ;
+<data>•ट्रै•क्ट•र•</data>
+# सिट्रोनेलाचे ;
+<data>•सि•ट्रो•ने•ला•चे•</data>
+# टुक्रालाई ;
+<data>•टु•क्रा•ला•ई•</data>
+# इकट्ठा ;
+<data>•इ•क•ट्ठा•</data>
+# शास्त्र ;
+<data>•शा•स्त्र•</data>
+# स्त्री ;
+<data>•स्त्री•</data>
+# लक्ष्य ;
+<data>•ल•क्ष्य•</data>
+# तीक्ष्ण ;
+<data>•ती•क्ष्ण•</data>
+# Words ;
+<data>•W•o•r•d•s•</data>
+# त्रिवेदी ;
+<data>•त्रि•वे•दी•</data>
+# कृत्रिम ;
+<data>•कृ•त्रि•म•</data>
+# मात्रामा ;
+<data>•मा•त्रा•मा•</data>
+# सिद्धार्थनगर ;
+<data>•सि•द्धा•र्थ•न•ग•र•</data>
+# श्रद्धालुओं ;
+<data>•श्र•द्धा•लु•ओं•</data>
+# वृद्धिसँग ;
+<data>•वृ•द्धि•सँ•ग•</data>
+# अंतःज्ञानी ;
+<data>•अं•तः•ज्ञा•नी••</data>
+# गन्नदी॑धिम ;
+<data>•ग•न्न•दी॑•धि•म•</data>
+# प्प्रप॑द्ये॒ ;
+<data>•प्प्र•प॑•द्ये॒•</data>
+# मनस्तापः ;
+<data>•म•न•स्ता•पः•</data>
+# हविष्करोमि ;
+<data>•ह•वि•ष्क•रो•मि•</data>
+# अहर्पतिः ;
+<data>•अ•ह•र्प•तिः•</data>
+# गच्छति ;
+<data>•ग•च्छ•ति•</data>
+# अयम् ;
+<data>•अ•य•म्•</data>
+# शिवश्चोदति ;
+<data>•शि•व•श्चो•द•ति•</data>
+# मनष्टालयति ;
+<data>•म•न•ष्टा•ल•य•ति•</data>
+# अश्वष्ठक्कस्य ;
+<data>•अ•श्व•ष्ठ•क्क•स्य•</data>
+# दुष्पुत्रः ;
+<data>•दु•ष्पु•त्रः•</data>
+# द्विःपक्वम् ;
+<data>•द्विः•प•क्व•म्•</data>
+# द्विष्कामः ;
+<data>•द्वि•ष्का•मः•</data>
+# भर्तुर्भोगः ;
+<data>•भ•र्तु•र्भो•गः•</data>
+# शॆत्युल ;
+<data>•शॆ•त्यु•ल••</data>
+# महारॆन्य ;
+<data>•म•हा•रॆ•न्य•</data>
+# सॆक्युल ;
+<data>•सॆ•क्यु•ल•</data>
+# ल्यॊदुर ;
+<data>•ल्यॊ•दु•र•</data>
+# फयॊक ;
+<data>•फ•यॊ•क•</data>
+# मॊहन्युव ;
+<data>•मॊ•ह•न्यु•व•</data>
+# अन्यर ;
+<data>•अ•न्य•र•</data>
+# ख्वजि ;
+<data>•ख्व•जि•</data>
+# खॅरिन्य ;
+<data>•खॅ•रि•न्य•</data>
+# उच्छ्वास ;
+<data>•उ•च्छ्वा•स•</data>
+# व्यक्तिमत्व ;
+<data>•व्य•क्ति•म•त्व•</data>
+# दातांच्यामध्ये ;
+<data>•दा•तां•च्या•म•ध्ये•</data>
+# दुर्गंधी ;
+<data>•दु•र्गं•धी•</data>
+# दुर्गंधीपासूनसुद्धा ;
+<data>•दु•र्गं•धी•पा•सू•न•सु•द्धा•</data>
+# नित्यकर्मामध्ये ;
+<data>•नि•त्य•क•र्मा•म•ध्ये•</data>
+# आजारांपासूनसुद्धा ;
+<data>•आ•जा•रां•पा•सू•न•सु•द्धा•</data>
+# भाज्यांमध्ये ;
+<data>•भा•ज्यां•म•ध्ये•</data>
+# उच्छ्वासाच्या ;
+<data>•उ•च्छ्वा•सा•च्या•</data>
+# सुकिल्लीं ;
+<data>•सु•कि•ल्लीं•</data>
+# स्लिपां ;
+<data>•स्लि•पां•</data>
+# मिसळिल्ल्यान ;
+<data>•मि•स•ळि•ल्ल्या•न•</data>
+# रोंप्यांची ;
+<data>•रों•प्यां•ची•</data>
+# वर्सांतल्यान ;
+<data>•व•र्सां•त•ल्या•न•</data>
+# रोंप्याच्या ;
+<data>•रों•प्या•च्या•</data>
+# नाशिल्ल्यान ;
+<data>•ना•शि•ल्ल्या•न•</data>
+# जिल्ल्याच्या ;
+<data>•जि•ल्ल्या•च्या•</data>
+# कुरुक्षेत्रांतल्या ;
+<data>•कु•रु•क्षे•त्रां•त•ल्या•</data>
+# भाज्ज्यांची ;
+<data>•भा•ज्ज्यां•ची•</data>
+# सिट्रोनेलाका ;
+<data>•सि•ट्रो•ने•ला•का•</data>
+# गरिनुपर्छ ;
+<data>•ग•रि•नु•प•र्छ•</data>
+# सामान्यतः ;
+<data>•सा•मा•न्य•तः•</data>
+# वृद्धिसँग ;
+<data>•वृ•द्धि•सँ•ग•</data>
+# रिपोर्टनि ;
+<data>•रि•पो•र्ट•नि•</data>
+# टोस्टर्ज़ ;
+<data>•टो•स्ट•र्ज़•</data>
+# वक्तव्य ;
+<data>•व•क्त•व्य•</data>
+# प्रक्रिया ;
+<data>•प्र•क्रि•या•</data>
+# निर्दिष्ट ;
+<data>•नि•र्दि•ष्ट•</data>
+# अस्वीकृत ;
+<data>•अ•स्वी•कृ•त•</data>
+# प्रयोक्ता ;
+<data>•प्र•यो•क्ता•</data>
+# प्रकार्यक ;
+<data>•प्र•का•र्य•क•</data>
+# ट्रेक्टरु ;
+<data>•ट्रे•क्ट•रु•</data>
+# स्थानधारक ;
+<data>•स्था•न•धा•र•क•</data>
+# प्रकार्यक ;
+<data>•प्र•का•र्य•क•</data>
+# अनुच्छेदसँ ;
+<data>•अ•नु•च्छे•द•सँ•</data>
+# गर्मीपदु ;
+<data>•ग•र्मी•प•दु•</data>
+# शास्त्रु ;
+<data>•शा•स्त्रु•</data>
+# इन्द्री ;
+<data>•इ•न्द्री•</data>
+# श्रधालू ;
+<data>•श्र•धा•लू•</data>
+# आस्तिकु ;
+<data>•आ•स्ति•कु•</data>
+# सकार्थो ;
+<data>•स•का•र्थो•</data>
+# सन्ॿंधु ;
+<data>•स•न्ॿं•धु•</data>
+# मनुक्खो ;
+<data>•म•नु•क्खो•</data>
+# हानिफ्राय ;
+<data>•हा•नि•फ्रा•य•</data>
+# दैथाइहरग्रा ;
+<data>•दै•था•इ•ह•र•ग्रा•</data>
+# बोसोरब्रै ;
+<data>•बो•सो•र•ब्रै•</data>
+# रांखान्थियारि ;
+<data>•रां•खा•न्थि•या•रि•</data>
+# खान्थियाव ;
+<data>•खा•न्थि•या•व•</data>
+# स्लिप्स ;
+<data>•स्लि•प्स•</data>
+
+#
+# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Gujarati.txt
+#
+
+# અંગ્રેજી ;
+<data>•અં•ગ્રે•જી•</data>
+# શબ્દકોશ ;
+<data>•શ•બ્દ•કો•શ•</data>
+# બાપ્તિસ્મા ;
+<data>•બા•પ્તિ•સ્મા•</data>
+# મિસ્ત્રી ;
+<data>•મિ•સ્ત્રી•</data>
+# સિક્કા ;
+<data>•સિ•ક્કા•</data>
+#6 ; એકત્રીસમું ; એ÷કત્રી÷સ÷મું
+
+# સ્વસ્થાને ;
+<data>•સ્વ•સ્થા•ને•</data>
+# પ્રશ્નાર્થ ;
+<data>•પ્ર•શ્ના•ર્થ•</data>
+# વર્તમાનકૃદંત ;
+<data>•વ•ર્ત•મા•ન•કૃ•દં•ત•</data>
+# વાક્યની ;
+<data>•વા•ક્ય•ની•</data>
+# સાર્વજનિક ;
+<data>•સા•ર્વ•જ•નિ•ક•</data>
+# સમાપ્તિ ;
+<data>•સ•મા•પ્તિ•</data>
+# પધાર્યા ;
+<data>•પ•ધા•ર્યા•</data>
+# વ્યક્તિત્વને ;
+<data>•વ્ય•ક્તિ•ત્વ•ને•</data>
+# આત્મવિશ્વાસ ;
+<data>•આ•ત્મ•વિ•શ્વા•સ•</data>
+# વ્યાયામથી ;
+<data>•વ્યા•યા•મ•થી•</data>
+# યુક્ત ;
+<data>•યુ•ક્ત•</data>
+# #18 ; પુરુષોત્તમ ;
+<data>•પ•રુ•ષો•ત્ત•મ•</data>
+# કેન્દ્રીય ;
+<data>•કે•ન્દ્રી•ય•</data>
+# ક્ષત્રિય ;
+<data>•ક્ષ•ત્રિ•ય•</data>
+# ફોર્મ્યુલા ;
+<data>•ફો•ર્મ્યુ•લા•</data>
+# કેન્દ્રમાં ;
+<data>•કે•ન્દ્ર•માં•</data>
+# સ્પ્રિંગ ;
+<data>•સ્પ્રિં•ગ•</data>
+# પ્રારંભ ;
+<data>•પ્રા•રં•ભ•</data>
+# વિદ્યાર્થીઓ ;
+<data>•વિ•દ્યા•ર્થી•ઓ•</data>
+# સ્વાર્થની ;
+<data>•સ્વા•ર્થ•ની•</data>
+# લોન્ગયરબ્યેન ;
+<data>•લો•ન્ગ•ય•ર•બ્યે•ન•</data>
+# સ્થાનિક ;
+<data>•સ્થા•નિ•ક•</data>
+# બિલ્ડિંગની ;
+<data>•બિ•લ્ડિં•ગ•ની•</data>
+# ઉત્પાદક ;
+<data>•ઉ•ત્પા•દ•ક•</data>
+# ઝૂકાવ્યું ;
+<data>•ઝૂ•કા•વ્યું•</data>
+# પ્રપૌત્ર ;
+<data>•પ્ર•પૌ•ત્ર•</data>
+
+
+#
+# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Malayalam.txt
+#
+
+# സ്ഥാനമൊഴിയുക ;
+<data>•സ്ഥാ•ന•മൊ•ഴി•യു•ക•</data>
+# വ്യക്തി ;
+<data>•വ്യ•ക്തി•</data>
+# കൗമാരക്കാരി ;
+<data>•കൗ•മാ•ര•ക്കാ•രി•</data>
+# കല്യാണം ;
+<data>•ക•ല്യാ•ണം•</data>
+# റദ്ദാക്കിയ ;
+<data>•റ•ദ്ദാ•ക്കി•യ•</data>
+# വ്യാപിക്കുക ;
+<data>•വ്യാ•പി•ക്കു•ക•</data>
+# സ്തുതി ;
+<data>•സ്തു•തി•</data>
+# ഭക്ഷ്യസുരക്ഷ ;
+<data>•ഭ•ക്ഷ്യ•സു•ര•ക്ഷ•</data>
+# പൂഴ്ത്തിവെക്കുക ;
+<data>•പൂ•ഴ്ത്തി•വെ•ക്കു•ക•</data>
+# നിശ്ചയിച്ച ;
+<data>•നി•ശ്ച•യി•ച്ച•</data>
+# പ്രശ്നം ;
+<data>•പ്ര•ശ്നം•</data>
+# സംസ്ഥാനം ;
+<data>•സം•സ്ഥാ•നം•</data>
+# പ്രോത്സാഹം ;
+<data>•പ്രോ•ത്സാ•ഹം•</data>
+# ഉദ്യോഗസ്ഥ ;
+<data>•ഉ•ദ്യോ•ഗ•സ്ഥ•</data>
+# സ്ഥാപനം ;
+<data>•സ്ഥാ•പ•നം•</data>
+# അത്ഭുതം ;
+<data>•അ•ത്ഭു•തം•</data>
+# പ്രഖ്യാപനം ;
+<data>•പ്ര•ഖ്യാ•പ•നം•</data>
+
+#
+# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Odia.txt
+#
+
+# ଅସ୍ବରାନ୍ତ ;
+<data>•ଅ•ସ୍ବ•ରା•ନ୍ତ•</data>
+# ଅକର୍ମଣ୍ୟତା ;
+<data>•ଅ•କ•ର୍ମ•ଣ୍ୟ•ତା•</data>
+# ଅକର୍ମା ;
+<data>•ଅ•କ•ର୍ମା•</data>
+# ଆକର୍ଣ୍ଣ ;
+<data>•ଆ•କ•ର୍ଣ୍ଣ•</data>
+# ଆକସ୍ମିକୀ ;
+<data>•ଆ•କ•ସ୍ମି•କୀ•</data>
+# ଇଞ୍ଚମନୌତୀ ;
+<data>•ଇ•ଞ୍ଚ•ମ•ନୌ•ତୀ•</data>
+# ଅଗତ୍ୟା ;
+<data>•ଅ•ଗ•ତ୍ୟା•</data>
+# ଇନ୍ଦ୍ରଧ୍ବଜ ;
+<data>•ଇ•ନ୍ଦ୍ର•ଧ୍ବ•ଜ•</data>
+# ଊରୁତ୍ରାଣ ;
+<data>•ଊ•ରୁ•ତ୍ରା•ଣ•</data>
+# ଐଶ୍ବର୍ଯ୍ୟ ;
+<data>•ଐ•ଶ୍ବ•ର୍ଯ୍ୟ•</data>
+# ଅଗତ୍ୟା ;
+<data>•ଅ•ଗ•ତ୍ୟା•</data>
+# ଔପନ୍ୟାସିକ ;
+<data>•ଔ•ପ•ନ୍ୟା•ସି•କ•</data>
+# ଔଷ୍ଠ୍ୟ ;
+<data>•ଔ•ଷ୍ଠ୍ୟ•</data>
+# ଯଜୁର୍ବେଦୀ ;
+<data>•ଯ•ଜୁ•ର୍ବେ•ଦୀ•</data>
+# ପକ୍ଷ୍ମ ;
+<data>•ପ•କ୍ଷ୍ମ•</data>
+# ପଞ୍ଚଭୌତିକ ;
+<data>•ପ•ଞ୍ଚ•ଭୌ•ତି•କ•</data>
+# ତନ୍ତ୍ରିକାତନ୍ତ୍ର ;
+<data>•ତ•ନ୍ତ୍ରି•କା•ତ•ନ୍ତ୍ର•</data>
+# ସ୍ନାୟୁତନ୍ତ୍ର ;
+<data>•ସ୍ନା•ୟୁ•ତ•ନ୍ତ୍ର•</data>
+# ତପ୍ତକୁଣ୍ଡ ;
+<data>•ତ•ପ୍ତ•କୁ•ଣ୍ଡ•</data>
+# ଚଣ୍ଡୋଦରୀ ;
+<data>•ଚ•ଣ୍ଡୋ•ଦ•ରୀ•</data>
+# ଝଙ୍କାର ;
+<data>•ଝ•ଙ୍କା•ର•</data>
+# କଙ୍କପୃଷ୍ଠୀ ;
+<data>•କ•ଙ୍କ•ପୃ•ଷ୍ଠୀ•</data>
+# ଖଣ୍ଡନୀୟ ;
+<data>•ଖ•ଣ୍ଡ•ନୀ•ୟ•</data>
+# ଖମ୍ଭାବତୀ ;
+<data>•ଖ•ମ୍ଭା•ବ•ତୀ•</data>
+# ଘାଣେନ୍ଦ୍ରିୟ ;
+<data>•ଘା•ଣେ•ନ୍ଦ୍ରି•ୟ•</data>
+# ଘୁଞ୍ଚାଇବା ;
+<data>•ଘୁ•ଞ୍ଚା•ଇ•ବା•</data>
+
+#
+# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Telugu.txt
+#
+
+# అదృశ్యం ;
+<data>•అ•దృ•శ్యం•</data>
+# ఉద్యోగాలు ;
+<data>•ఉ•ద్యో•గా•లు•</data>
+# ఉన్నాయన్న ;
+<data>•ఉ•న్నా•య•న్న•</data>
+# కార్యదర్శి ;
+<data>•కా•ర్య•ద•ర్శి•</data>
+# సామర్థ్యం ;
+<data>•సా•మ•ర్థ్యం•</data>
+# అభిప్రాయం ;
+<data>•అ•భి•ప్రా•యం•</data>
+# రాష్ట్రస్థాయి ;
+<data>•రా•ష్ట్ర•స్థా•యి•</data>
+# నిర్లక్ష్యం ;
+<data>•ని•ర్ల•క్ష్యం•</data>
+# వ్యాజ్యాలన్నీ ;
+<data>•వ్యా•జ్యా•ల•న్నీ•</data>
+# న్యాయవ్యవస్థ ;
+<data>•న్యా•య•వ్య•వ•స్థ•</data>
+# వ్యాఖ్యలు ;
+<data>•వ్యా•ఖ్య•లు•</data>
+# నేతృత్వం ;
+<data>•నే•తృ•త్వం•</data>
+# ఉద్రిక్తత ;
+<data>•ఉ•ద్రి•క్త•త•</data>
+# వ్యాఖ్యలు ;
+<data>•వ్యా•ఖ్య•లు•</data>
+# అత్యున్నత ;
+<data>•అ•త్యు•న్న•త•</data>
+# మనస్పర్ధలు ;
+<data>•మ•న•స్ప•ర్ధ•లు•</data>
+# కార్యక్రమం ;
+<data>•కా•ర్య•క్ర•మం•</data>
+# గుప్పిస్తున్నారు ;
+<data>•గు•ప్పి•స్తు•న్నా•రు•</data>
+# నటిస్తున్నారు ;
+<data>•న•టి•స్తు•న్నా•రు•</data>
+# ద్వితీయార్ధం ;
+<data>•ద్వి•తీ•యా•ర్ధం•</data>
+# జీర్ణవ్యవస్థ ;
+<data>•జీ•ర్ణ•వ్య•వ•స్థ•</data>
+# ఉత్సాహం ;
+<data>•ఉ•త్సా•హం•</data>
+# హృద్రోగాలు ;
+<data>•హృ•ద్రో•గా•లు•</data>
+# పాల్గొనాల్సింది ;
+<data>•పా•ల్గొ•నా•ల్సిం•ది•</data>
+# మార్గదర్శకాలు ;
+<data>•మా•ర్గ•ద•ర్శ•కా•లు•</data>
+
########################################################################################
#
diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar
index e878e89..d1d89d2 100644
--- a/icu4j/main/shared/data/icudata.jar
+++ b/icu4j/main/shared/data/icudata.jar
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:daf3da9b26c03ccb859820cecb59e45827db628cc63730995287f0eecb648b1c
-size 12842333
+oid sha256:5c8773434e9708bca02ad11319c35e01f29f62748851a38ae89de1334c279cca
+size 12842785
diff --git a/icu4j/main/shared/data/icutzdata.jar b/icu4j/main/shared/data/icutzdata.jar
index 6cfc658..6788ecc 100644
--- a/icu4j/main/shared/data/icutzdata.jar
+++ b/icu4j/main/shared/data/icutzdata.jar
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:e20cb098cebe9ec0c5ee25c3b28d1918fd4147525e8f893a9dcaa763b962409b
-size 94073
+oid sha256:744f6e6d4a252e51b13ac3c0b9a580e21ba469cd2959b2561e3022636ecf126c
+size 94060
diff --git a/icu4j/main/shared/data/testdata.jar b/icu4j/main/shared/data/testdata.jar
index 23dcfaf..55aaef1 100644
--- a/icu4j/main/shared/data/testdata.jar
+++ b/icu4j/main/shared/data/testdata.jar
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:3a09da92d612c34c7f468f073b356c7323e14f0fb53b0eb34483beed0a296ac4
-size 723338
+oid sha256:bcb07f3738f1e8c216ac47e9b091a0946e107e3f74251e6be3a21adba8dd71c4
+size 723370
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
index ba33b0a..dab7ce0 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
@@ -93,6 +93,9 @@
UnicodeSet fHangulSet;
UnicodeSet fZWJSet;
UnicodeSet fExtendedPictSet;
+ UnicodeSet fViramaSet;
+ UnicodeSet fLinkingConsonantSet;
+ UnicodeSet fExtCccZwjSet;
UnicodeSet fAnySet;
@@ -122,6 +125,11 @@
fHangulSet.addAll(fLVTSet);
fExtendedPictSet = new UnicodeSet("[:Extended_Pictographic:]");
+ fViramaSet = new UnicodeSet("[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&"
+ + "\\p{Indic_Syllabic_Category=Virama}]");
+ fLinkingConsonantSet = new UnicodeSet("[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}&"
+ + "\\p{Indic_Syllabic_Category=Consonant}]");
+ fExtCccZwjSet = new UnicodeSet("[[\\p{gcb=Extend}-\\p{ccc=0}] \\p{gcb=ZWJ}]");
fAnySet = new UnicodeSet("[\\u0000-\\U0010ffff]");
@@ -138,6 +146,9 @@
fSets.add(fAnySet);
fSets.add(fZWJSet);
fSets.add(fExtendedPictSet);
+ fSets.add(fViramaSet);
+ fSets.add(fLinkingConsonantSet);
+ fSets.add(fExtCccZwjSet);
}
@@ -253,6 +264,22 @@
continue;
}
+ // Rule (GB9.3) LinkingConsonant ExtCccZwj* Virama ExtCccZwj* × LinkingConsonant
+ // Note: Viramas are also included in the ExtCccZwj class.
+ if (fLinkingConsonantSet.contains(c2)) {
+ int pi = p1;
+ boolean sawVirama = false;
+ while (pi > 0 && fExtCccZwjSet.contains(fText.codePointAt(pi))) {
+ if (fViramaSet.contains(fText.codePointAt(pi))) {
+ sawVirama = true;
+ }
+ pi = fText.offsetByCodePoints(pi, -1);
+ }
+ if (sawVirama && fLinkingConsonantSet.contains(fText.codePointAt(pi))) {
+ continue;
+ }
+ }
+
// Rule (GB11) Extended_Pictographic ZWJ x Extended_Pictographic
if (fExtendedPictSet.contains(cBase) && fZWJSet.contains(c1) && fExtendedPictSet.contains(c2) ) {
continue;
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/grapheme.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/grapheme.txt
index 27498c1..c57a898 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/grapheme.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/grapheme.txt
@@ -37,6 +37,13 @@
Extended_Pict = [:ExtPict:];
+# Indic Sequences
+Virama_ = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Virama}]];
+
+LinkingConsonant = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Consonant}]];
+
+ExtCccZwj = [[Extend-[\p{ccc=0}]] ZWJ];
+
GB3: CR LF;
GB4: (Control | CR | LF) ÷;
GB5: . ÷ (Control | CR | LF);
@@ -46,6 +53,7 @@
GB8: (LVT | T) T;
GB11: Extended_Pict Extend* ZWJ Extended_Pict;
+GB9c: LinkingConsonant ExtCccZwj* Virama_ ExtCccZwj* LinkingConsonant;
GB9: . (Extend | ZWJ);
GB9a: . SpacingMark;
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/rbbitst.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/rbbitst.txt
index a19419c..d0533c0 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/rbbitst.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/rbbitst.txt
@@ -164,6 +164,498 @@
#
#<data>•\u0e40\u0e01•\u0e44\u0301\u0e23\u0302\u0303•\u0e40•\u0e40\u0e02•\u0e02• •</data>
+#
+# ICU-13637 and CLDR-10994 - Indic Grapheme Cluster Boundary changes to support aksaras
+# New rule: LinkingConsonant ExtCccZwj* Virama ExtCccZwj* × LinkingConsonant
+# Sample Chars: LinkingConsonant: \u0915
+# Virama: \u094d [also Extend]
+# ExtCccZWJ: \u0308
+# Extend but not ExtCCCZWJ \u093A
+<char>
+<data>•\u0915\u094d\u0915•</data>
+<data>•\u0915\u0308\u0308\u094d\u0308\u0308\u0915•</data>
+<data>•\u0915\u0308\u0308\u094d\u0308\u0308•\u0041•</data>
+<data>•\u0915\u0308\u0308\u094d\u093A\u093A•\u0915•</data>
+
+#
+# From cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Bengali.txt
+#
+
+# ব্যক্তিত্বের ;
+<data>•ব্য•ক্তি•ত্বে•র•</data>
+# আত্মবিশ্বাস ;
+<data>•আ•ত্ম•বি•শ্বা•স•</data>
+# ব্যাক্টেরিয়া ;
+<data>•ব্যা•ক্টে•রি•য়া•</data>
+# সমস্যার ;
+<data>•স•ম•স্যা•র•</data>
+# মিশ্রণ ;
+<data>•মি•শ্র•ণ•</data>
+# দুর্গন্ধ ;
+<data>•দু•র্গ•ন্ধ•</data>
+# পরীক্ষার ;
+<data>•প•রী•ক্ষা•র•</data>
+# কোলেস্টেরল ;
+<data>•কো•লে•স্টে•র•ল•</data>
+# ব্যায়ামকে ;
+<data>•ব্যা•য়া•ম•কে•</data>
+# সপ্তাহে ;
+<data>•স•প্তা•হে•</data>
+# পরীক্ষার ;
+<data>•প•রী•ক্ষা•র•</data>
+# চর্বিজাতীয় ;
+<data>•চ•র্বি•জা•তী•য়•</data>
+# নিয়ণ্ত্রণ ;
+<data>•নি•য়•ণ্ত্র•ণ•</data>
+# অবশ্যই ;
+<data>•অ•ব•শ্য•ই•</data>
+# নয়াদিল্লির ;
+<data>•ন•য়া•দি•ল্লি•র•</data>
+# সমীক্ষাটা ;
+<data>•স•মী•ক্ষা•টা•</data>
+# #ভূমিকম্পের ;
+# <data>•ভূ•মি•ক•ম্পের•</data> # line 17 in TestSegmenter-Bengali.txt
+# কেন্দ্রীয় ;
+<data>•কে•ন্দ্রী•য়•</data>
+# উস্কানিই ;
+<data>•উ•স্কা•নি•ই•</data>
+# সমীক্ষকরা ;
+<data>•স•মী•ক্ষ•ক•রা•</data>
+# মুহূর্তে ;
+<data>•মু•হূ•র্তে•</data>
+# সম্পর্কে ;
+<data>•স•ম্প•র্কে•</data>
+# পৌষসংক্রান্তির ;
+<data>•পৌ•ষ•সং•ক্রা•ন্তি•র•</data>
+# মুখ্যমন্ত্রী ;
+<data>•মু•খ্য•ম•ন্ত্রী•</data>
+
+#
+# from cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Devanagari.txt
+#
+
+# संदिग्ध ;
+<data>•सं•दि•ग्ध•</data>
+# सुरक्षा ;
+<data>•सु•र•क्षा•</data>
+# टक्कर ;
+<data>•ट•क्क•र•</data>
+# सत्र ;
+<data>•स•त्र•</data>
+# दक्षिण ;
+<data>•द•क्षि•ण•</data>
+# मिश्रणाने ;
+<data>•मि•श्र•णा•ने•</data>
+# दुर्घटनाग्रस्त ;
+<data>•दु•र्घ•ट•ना•ग्र•स्त•</data>
+# मुहूर्त ;
+<data>•मु•हू•र्त•</data>
+# शर्करायुक्त ;
+<data>•श•र्क•रा•यु•क्त•</data>
+# अंतरराष्ट्रीय ;
+<data>•अं•त•र•रा•ष्ट्री•य•</data>
+# राष्ट्रपति ;
+<data>•रा•ष्ट्र•प•ति•</data>
+# फ्रांस ;
+<data>•फ्रां•स•</data>
+# ट्रैक्टर ;
+<data>•ट्रै•क्ट•र•</data>
+# सिट्रोनेलाचे ;
+<data>•सि•ट्रो•ने•ला•चे•</data>
+# टुक्रालाई ;
+<data>•टु•क्रा•ला•ई•</data>
+# इकट्ठा ;
+<data>•इ•क•ट्ठा•</data>
+# शास्त्र ;
+<data>•शा•स्त्र•</data>
+# स्त्री ;
+<data>•स्त्री•</data>
+# लक्ष्य ;
+<data>•ल•क्ष्य•</data>
+# तीक्ष्ण ;
+<data>•ती•क्ष्ण•</data>
+# Words ;
+<data>•W•o•r•d•s•</data>
+# त्रिवेदी ;
+<data>•त्रि•वे•दी•</data>
+# कृत्रिम ;
+<data>•कृ•त्रि•म•</data>
+# मात्रामा ;
+<data>•मा•त्रा•मा•</data>
+# सिद्धार्थनगर ;
+<data>•सि•द्धा•र्थ•न•ग•र•</data>
+# श्रद्धालुओं ;
+<data>•श्र•द्धा•लु•ओं•</data>
+# वृद्धिसँग ;
+<data>•वृ•द्धि•सँ•ग•</data>
+# अंतःज्ञानी ;
+<data>•अं•तः•ज्ञा•नी••</data>
+# गन्नदी॑धिम ;
+<data>•ग•न्न•दी॑•धि•म•</data>
+# प्प्रप॑द्ये॒ ;
+<data>•प्प्र•प॑•द्ये॒•</data>
+# मनस्तापः ;
+<data>•म•न•स्ता•पः•</data>
+# हविष्करोमि ;
+<data>•ह•वि•ष्क•रो•मि•</data>
+# अहर्पतिः ;
+<data>•अ•ह•र्प•तिः•</data>
+# गच्छति ;
+<data>•ग•च्छ•ति•</data>
+# अयम् ;
+<data>•अ•य•म्•</data>
+# शिवश्चोदति ;
+<data>•शि•व•श्चो•द•ति•</data>
+# मनष्टालयति ;
+<data>•म•न•ष्टा•ल•य•ति•</data>
+# अश्वष्ठक्कस्य ;
+<data>•अ•श्व•ष्ठ•क्क•स्य•</data>
+# दुष्पुत्रः ;
+<data>•दु•ष्पु•त्रः•</data>
+# द्विःपक्वम् ;
+<data>•द्विः•प•क्व•म्•</data>
+# द्विष्कामः ;
+<data>•द्वि•ष्का•मः•</data>
+# भर्तुर्भोगः ;
+<data>•भ•र्तु•र्भो•गः•</data>
+# शॆत्युल ;
+<data>•शॆ•त्यु•ल••</data>
+# महारॆन्य ;
+<data>•म•हा•रॆ•न्य•</data>
+# सॆक्युल ;
+<data>•सॆ•क्यु•ल•</data>
+# ल्यॊदुर ;
+<data>•ल्यॊ•दु•र•</data>
+# फयॊक ;
+<data>•फ•यॊ•क•</data>
+# मॊहन्युव ;
+<data>•मॊ•ह•न्यु•व•</data>
+# अन्यर ;
+<data>•अ•न्य•र•</data>
+# ख्वजि ;
+<data>•ख्व•जि•</data>
+# खॅरिन्य ;
+<data>•खॅ•रि•न्य•</data>
+# उच्छ्वास ;
+<data>•उ•च्छ्वा•स•</data>
+# व्यक्तिमत्व ;
+<data>•व्य•क्ति•म•त्व•</data>
+# दातांच्यामध्ये ;
+<data>•दा•तां•च्या•म•ध्ये•</data>
+# दुर्गंधी ;
+<data>•दु•र्गं•धी•</data>
+# दुर्गंधीपासूनसुद्धा ;
+<data>•दु•र्गं•धी•पा•सू•न•सु•द्धा•</data>
+# नित्यकर्मामध्ये ;
+<data>•नि•त्य•क•र्मा•म•ध्ये•</data>
+# आजारांपासूनसुद्धा ;
+<data>•आ•जा•रां•पा•सू•न•सु•द्धा•</data>
+# भाज्यांमध्ये ;
+<data>•भा•ज्यां•म•ध्ये•</data>
+# उच्छ्वासाच्या ;
+<data>•उ•च्छ्वा•सा•च्या•</data>
+# सुकिल्लीं ;
+<data>•सु•कि•ल्लीं•</data>
+# स्लिपां ;
+<data>•स्लि•पां•</data>
+# मिसळिल्ल्यान ;
+<data>•मि•स•ळि•ल्ल्या•न•</data>
+# रोंप्यांची ;
+<data>•रों•प्यां•ची•</data>
+# वर्सांतल्यान ;
+<data>•व•र्सां•त•ल्या•न•</data>
+# रोंप्याच्या ;
+<data>•रों•प्या•च्या•</data>
+# नाशिल्ल्यान ;
+<data>•ना•शि•ल्ल्या•न•</data>
+# जिल्ल्याच्या ;
+<data>•जि•ल्ल्या•च्या•</data>
+# कुरुक्षेत्रांतल्या ;
+<data>•कु•रु•क्षे•त्रां•त•ल्या•</data>
+# भाज्ज्यांची ;
+<data>•भा•ज्ज्यां•ची•</data>
+# सिट्रोनेलाका ;
+<data>•सि•ट्रो•ने•ला•का•</data>
+# गरिनुपर्छ ;
+<data>•ग•रि•नु•प•र्छ•</data>
+# सामान्यतः ;
+<data>•सा•मा•न्य•तः•</data>
+# वृद्धिसँग ;
+<data>•वृ•द्धि•सँ•ग•</data>
+# रिपोर्टनि ;
+<data>•रि•पो•र्ट•नि•</data>
+# टोस्टर्ज़ ;
+<data>•टो•स्ट•र्ज़•</data>
+# वक्तव्य ;
+<data>•व•क्त•व्य•</data>
+# प्रक्रिया ;
+<data>•प्र•क्रि•या•</data>
+# निर्दिष्ट ;
+<data>•नि•र्दि•ष्ट•</data>
+# अस्वीकृत ;
+<data>•अ•स्वी•कृ•त•</data>
+# प्रयोक्ता ;
+<data>•प्र•यो•क्ता•</data>
+# प्रकार्यक ;
+<data>•प्र•का•र्य•क•</data>
+# ट्रेक्टरु ;
+<data>•ट्रे•क्ट•रु•</data>
+# स्थानधारक ;
+<data>•स्था•न•धा•र•क•</data>
+# प्रकार्यक ;
+<data>•प्र•का•र्य•क•</data>
+# अनुच्छेदसँ ;
+<data>•अ•नु•च्छे•द•सँ•</data>
+# गर्मीपदु ;
+<data>•ग•र्मी•प•दु•</data>
+# शास्त्रु ;
+<data>•शा•स्त्रु•</data>
+# इन्द्री ;
+<data>•इ•न्द्री•</data>
+# श्रधालू ;
+<data>•श्र•धा•लू•</data>
+# आस्तिकु ;
+<data>•आ•स्ति•कु•</data>
+# सकार्थो ;
+<data>•स•का•र्थो•</data>
+# सन्ॿंधु ;
+<data>•स•न्ॿं•धु•</data>
+# मनुक्खो ;
+<data>•म•नु•क्खो•</data>
+# हानिफ्राय ;
+<data>•हा•नि•फ्रा•य•</data>
+# दैथाइहरग्रा ;
+<data>•दै•था•इ•ह•र•ग्रा•</data>
+# बोसोरब्रै ;
+<data>•बो•सो•र•ब्रै•</data>
+# रांखान्थियारि ;
+<data>•रां•खा•न्थि•या•रि•</data>
+# खान्थियाव ;
+<data>•खा•न्थि•या•व•</data>
+# स्लिप्स ;
+<data>•स्लि•प्स•</data>
+
+#
+# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Gujarati.txt
+#
+
+# અંગ્રેજી ;
+<data>•અં•ગ્રે•જી•</data>
+# શબ્દકોશ ;
+<data>•શ•બ્દ•કો•શ•</data>
+# બાપ્તિસ્મા ;
+<data>•બા•પ્તિ•સ્મા•</data>
+# મિસ્ત્રી ;
+<data>•મિ•સ્ત્રી•</data>
+# સિક્કા ;
+<data>•સિ•ક્કા•</data>
+#6 ; એકત્રીસમું ; એ÷કત્રી÷સ÷મું
+
+# સ્વસ્થાને ;
+<data>•સ્વ•સ્થા•ને•</data>
+# પ્રશ્નાર્થ ;
+<data>•પ્ર•શ્ના•ર્થ•</data>
+# વર્તમાનકૃદંત ;
+<data>•વ•ર્ત•મા•ન•કૃ•દં•ત•</data>
+# વાક્યની ;
+<data>•વા•ક્ય•ની•</data>
+# સાર્વજનિક ;
+<data>•સા•ર્વ•જ•નિ•ક•</data>
+# સમાપ્તિ ;
+<data>•સ•મા•પ્તિ•</data>
+# પધાર્યા ;
+<data>•પ•ધા•ર્યા•</data>
+# વ્યક્તિત્વને ;
+<data>•વ્ય•ક્તિ•ત્વ•ને•</data>
+# આત્મવિશ્વાસ ;
+<data>•આ•ત્મ•વિ•શ્વા•સ•</data>
+# વ્યાયામથી ;
+<data>•વ્યા•યા•મ•થી•</data>
+# યુક્ત ;
+<data>•યુ•ક્ત•</data>
+# #18 ; પુરુષોત્તમ ;
+<data>•પ•રુ•ષો•ત્ત•મ•</data>
+# કેન્દ્રીય ;
+<data>•કે•ન્દ્રી•ય•</data>
+# ક્ષત્રિય ;
+<data>•ક્ષ•ત્રિ•ય•</data>
+# ફોર્મ્યુલા ;
+<data>•ફો•ર્મ્યુ•લા•</data>
+# કેન્દ્રમાં ;
+<data>•કે•ન્દ્ર•માં•</data>
+# સ્પ્રિંગ ;
+<data>•સ્પ્રિં•ગ•</data>
+# પ્રારંભ ;
+<data>•પ્રા•રં•ભ•</data>
+# વિદ્યાર્થીઓ ;
+<data>•વિ•દ્યા•ર્થી•ઓ•</data>
+# સ્વાર્થની ;
+<data>•સ્વા•ર્થ•ની•</data>
+# લોન્ગયરબ્યેન ;
+<data>•લો•ન્ગ•ય•ર•બ્યે•ન•</data>
+# સ્થાનિક ;
+<data>•સ્થા•નિ•ક•</data>
+# બિલ્ડિંગની ;
+<data>•બિ•લ્ડિં•ગ•ની•</data>
+# ઉત્પાદક ;
+<data>•ઉ•ત્પા•દ•ક•</data>
+# ઝૂકાવ્યું ;
+<data>•ઝૂ•કા•વ્યું•</data>
+# પ્રપૌત્ર ;
+<data>•પ્ર•પૌ•ત્ર•</data>
+
+
+#
+# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Malayalam.txt
+#
+
+# സ്ഥാനമൊഴിയുക ;
+<data>•സ്ഥാ•ന•മൊ•ഴി•യു•ക•</data>
+# വ്യക്തി ;
+<data>•വ്യ•ക്തി•</data>
+# കൗമാരക്കാരി ;
+<data>•കൗ•മാ•ര•ക്കാ•രി•</data>
+# കല്യാണം ;
+<data>•ക•ല്യാ•ണം•</data>
+# റദ്ദാക്കിയ ;
+<data>•റ•ദ്ദാ•ക്കി•യ•</data>
+# വ്യാപിക്കുക ;
+<data>•വ്യാ•പി•ക്കു•ക•</data>
+# സ്തുതി ;
+<data>•സ്തു•തി•</data>
+# ഭക്ഷ്യസുരക്ഷ ;
+<data>•ഭ•ക്ഷ്യ•സു•ര•ക്ഷ•</data>
+# പൂഴ്ത്തിവെക്കുക ;
+<data>•പൂ•ഴ്ത്തി•വെ•ക്കു•ക•</data>
+# നിശ്ചയിച്ച ;
+<data>•നി•ശ്ച•യി•ച്ച•</data>
+# പ്രശ്നം ;
+<data>•പ്ര•ശ്നം•</data>
+# സംസ്ഥാനം ;
+<data>•സം•സ്ഥാ•നം•</data>
+# പ്രോത്സാഹം ;
+<data>•പ്രോ•ത്സാ•ഹം•</data>
+# ഉദ്യോഗസ്ഥ ;
+<data>•ഉ•ദ്യോ•ഗ•സ്ഥ•</data>
+# സ്ഥാപനം ;
+<data>•സ്ഥാ•പ•നം•</data>
+# അത്ഭുതം ;
+<data>•അ•ത്ഭു•തം•</data>
+# പ്രഖ്യാപനം ;
+<data>•പ്ര•ഖ്യാ•പ•നം•</data>
+
+#
+# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Odia.txt
+#
+
+# ଅସ୍ବରାନ୍ତ ;
+<data>•ଅ•ସ୍ବ•ରା•ନ୍ତ•</data>
+# ଅକର୍ମଣ୍ୟତା ;
+<data>•ଅ•କ•ର୍ମ•ଣ୍ୟ•ତା•</data>
+# ଅକର୍ମା ;
+<data>•ଅ•କ•ର୍ମା•</data>
+# ଆକର୍ଣ୍ଣ ;
+<data>•ଆ•କ•ର୍ଣ୍ଣ•</data>
+# ଆକସ୍ମିକୀ ;
+<data>•ଆ•କ•ସ୍ମି•କୀ•</data>
+# ଇଞ୍ଚମନୌତୀ ;
+<data>•ଇ•ଞ୍ଚ•ମ•ନୌ•ତୀ•</data>
+# ଅଗତ୍ୟା ;
+<data>•ଅ•ଗ•ତ୍ୟା•</data>
+# ଇନ୍ଦ୍ରଧ୍ବଜ ;
+<data>•ଇ•ନ୍ଦ୍ର•ଧ୍ବ•ଜ•</data>
+# ଊରୁତ୍ରାଣ ;
+<data>•ଊ•ରୁ•ତ୍ରା•ଣ•</data>
+# ଐଶ୍ବର୍ଯ୍ୟ ;
+<data>•ଐ•ଶ୍ବ•ର୍ଯ୍ୟ•</data>
+# ଅଗତ୍ୟା ;
+<data>•ଅ•ଗ•ତ୍ୟା•</data>
+# ଔପନ୍ୟାସିକ ;
+<data>•ଔ•ପ•ନ୍ୟା•ସି•କ•</data>
+# ଔଷ୍ଠ୍ୟ ;
+<data>•ଔ•ଷ୍ଠ୍ୟ•</data>
+# ଯଜୁର୍ବେଦୀ ;
+<data>•ଯ•ଜୁ•ର୍ବେ•ଦୀ•</data>
+# ପକ୍ଷ୍ମ ;
+<data>•ପ•କ୍ଷ୍ମ•</data>
+# ପଞ୍ଚଭୌତିକ ;
+<data>•ପ•ଞ୍ଚ•ଭୌ•ତି•କ•</data>
+# ତନ୍ତ୍ରିକାତନ୍ତ୍ର ;
+<data>•ତ•ନ୍ତ୍ରି•କା•ତ•ନ୍ତ୍ର•</data>
+# ସ୍ନାୟୁତନ୍ତ୍ର ;
+<data>•ସ୍ନା•ୟୁ•ତ•ନ୍ତ୍ର•</data>
+# ତପ୍ତକୁଣ୍ଡ ;
+<data>•ତ•ପ୍ତ•କୁ•ଣ୍ଡ•</data>
+# ଚଣ୍ଡୋଦରୀ ;
+<data>•ଚ•ଣ୍ଡୋ•ଦ•ରୀ•</data>
+# ଝଙ୍କାର ;
+<data>•ଝ•ଙ୍କା•ର•</data>
+# କଙ୍କପୃଷ୍ଠୀ ;
+<data>•କ•ଙ୍କ•ପୃ•ଷ୍ଠୀ•</data>
+# ଖଣ୍ଡନୀୟ ;
+<data>•ଖ•ଣ୍ଡ•ନୀ•ୟ•</data>
+# ଖମ୍ଭାବତୀ ;
+<data>•ଖ•ମ୍ଭା•ବ•ତୀ•</data>
+# ଘାଣେନ୍ଦ୍ରିୟ ;
+<data>•ଘା•ଣେ•ନ୍ଦ୍ରି•ୟ•</data>
+# ଘୁଞ୍ଚାଇବା ;
+<data>•ଘୁ•ଞ୍ଚା•ଇ•ବା•</data>
+
+#
+# cldr/common/testData/segmentation/graphemeCluster/TestSegmenter-Telugu.txt
+#
+
+# అదృశ్యం ;
+<data>•అ•దృ•శ్యం•</data>
+# ఉద్యోగాలు ;
+<data>•ఉ•ద్యో•గా•లు•</data>
+# ఉన్నాయన్న ;
+<data>•ఉ•న్నా•య•న్న•</data>
+# కార్యదర్శి ;
+<data>•కా•ర్య•ద•ర్శి•</data>
+# సామర్థ్యం ;
+<data>•సా•మ•ర్థ్యం•</data>
+# అభిప్రాయం ;
+<data>•అ•భి•ప్రా•యం•</data>
+# రాష్ట్రస్థాయి ;
+<data>•రా•ష్ట్ర•స్థా•యి•</data>
+# నిర్లక్ష్యం ;
+<data>•ని•ర్ల•క్ష్యం•</data>
+# వ్యాజ్యాలన్నీ ;
+<data>•వ్యా•జ్యా•ల•న్నీ•</data>
+# న్యాయవ్యవస్థ ;
+<data>•న్యా•య•వ్య•వ•స్థ•</data>
+# వ్యాఖ్యలు ;
+<data>•వ్యా•ఖ్య•లు•</data>
+# నేతృత్వం ;
+<data>•నే•తృ•త్వం•</data>
+# ఉద్రిక్తత ;
+<data>•ఉ•ద్రి•క్త•త•</data>
+# వ్యాఖ్యలు ;
+<data>•వ్యా•ఖ్య•లు•</data>
+# అత్యున్నత ;
+<data>•అ•త్యు•న్న•త•</data>
+# మనస్పర్ధలు ;
+<data>•మ•న•స్ప•ర్ధ•లు•</data>
+# కార్యక్రమం ;
+<data>•కా•ర్య•క్ర•మం•</data>
+# గుప్పిస్తున్నారు ;
+<data>•గు•ప్పి•స్తు•న్నా•రు•</data>
+# నటిస్తున్నారు ;
+<data>•న•టి•స్తు•న్నా•రు•</data>
+# ద్వితీయార్ధం ;
+<data>•ద్వి•తీ•యా•ర్ధం•</data>
+# జీర్ణవ్యవస్థ ;
+<data>•జీ•ర్ణ•వ్య•వ•స్థ•</data>
+# ఉత్సాహం ;
+<data>•ఉ•త్సా•హం•</data>
+# హృద్రోగాలు ;
+<data>•హృ•ద్రో•గా•లు•</data>
+# పాల్గొనాల్సింది ;
+<data>•పా•ల్గొ•నా•ల్సిం•ది•</data>
+# మార్గదర్శకాలు ;
+<data>•మా•ర్గ•ద•ర్శ•కా•లు•</data>
+
########################################################################################
#