| # |
| # Copyright (C) 2007, International Business Machines Corporation and others. |
| # All Rights Reserved. |
| # |
| # file: xgc.txt |
| # |
| # ICU Extended Grapheme Cluster Boundary Rules. |
| # See Unicode Standard Annex #29. |
| # These rules are based on the CLDR 1.5 Text Segmentation definition for |
| # Extended Grapheme Cluster. |
| # |
| |
| # |
| # Character Class Definitions. |
| # |
| $CR = [\p{Grapheme_Cluster_Break = CR}]; |
| $LF = [\p{Grapheme_Cluster_Break = LF}]; |
| $Control = [\p{Grapheme_Cluster_Break = Control}]; |
| |
| # add Japanese Half Width voicing marks to $Extend |
| $VoiceMarks = [\uff9e\uff9f]; |
| |
| $Extend = [\p{Grapheme_Cluster_Break = Extend} |
| \p{gc=Mc} |
| $VoiceMarks |
| \p{name=THAI CHARACTER SARA A} |
| \p{name=THAI CHARACTER SARA AA} |
| \p{name=THAI CHARACTER SARA AM} |
| \p{name=THAI CHARACTER LAKKHANGYAO}]; |
| |
| # |
| # Thai Clusters |
| # |
| $Prepend = [\u0E40-\u0E44]; |
| $PrependBase = [\u0E01-\u0E2E]; |
| |
| |
| # |
| # Korean Syllable Definitions |
| # |
| $L = [\p{Grapheme_Cluster_Break = L}]; |
| $V = [\p{Grapheme_Cluster_Break = V}]; |
| $T = [\p{Grapheme_Cluster_Break = T}]; |
| |
| $LV = [\p{Grapheme_Cluster_Break = LV}]; |
| $LVT = [\p{Grapheme_Cluster_Break = LVT}]; |
| |
| $HangulSyllable = $L+ | ($L* ($LV? $V+ | $LV | $LVT) $T*) | $T+; |
| |
| ## ------------------------------------------------- |
| |
| !!forward; |
| |
| $CR $LF; |
| ([^$Control $CR $LF] | $HangulSyllable) $Extend*; |
| $Prepend $Extend* $PrependBase $Extend*; |
| |
| ## ------------------------------------------------- |
| |
| !!reverse; |
| |
| $BackHangulSyllable = $L+ | ($T* ($V+$LV? | $LV | $LVT) $L*) | $T+; |
| $BackOneCluster = ($LF $CR) | ($Extend* ([^$Control $CR $LF] | $BackHangulSyllable)); |
| $BackOneCluster; |
| $Extend* $PrependBase $Extend* $Prepend; |
| |
| ## ------------------------------------------------- |
| |
| !!safe_reverse; |
| |
| # rule 6, 7, 8 |
| $V+ $L; |
| $Extend+ $Prepend; |
| |
| ## ------------------------------------------------- |
| |
| !!safe_forward; |
| |
| # rule 6, 7, 8 |
| $V+ $T; |