| # |
| # Copyright (C) 2002-2006, International Business Machines Corporation and others. |
| # All Rights Reserved. |
| # |
| # file: char.txt |
| # |
| # ICU Character Break Rules, also known as Grapheme Cluster Boundaries |
| # See Unicode Standard Annex #29. |
| # These rules are based on TR29 Version 5.0.0 |
| # Includes post-5.0 change to treat Japanese half width voicing marks |
| # as Grapheme Extend. |
| # |
| |
| # |
| # Character Class Definitions. |
| # |
| $CR = [\p{Grapheme_Cluster_Break = CR}]; |
| $LF = [\p{Grapheme_Cluster_Break = LF}]; |
| $Control = [\p{Grapheme_Cluster_Break = Control}]; |
| |
| # add Japanese Half Width voicing marks to $Extend |
| $VoiceMarks = [\uff9e\uff9f]; |
| $Extend = [\p{Grapheme_Cluster_Break = Extend} $VoiceMarks]; |
| |
| # |
| # Korean Syllable Definitions |
| # |
| $L = [\p{Grapheme_Cluster_Break = L}]; |
| $V = [\p{Grapheme_Cluster_Break = V}]; |
| $T = [\p{Grapheme_Cluster_Break = T}]; |
| |
| $LV = [\p{Grapheme_Cluster_Break = LV}]; |
| $LVT = [\p{Grapheme_Cluster_Break = LVT}]; |
| |
| $HangulSyllable = $L+ | ($L* ($LV? $V+ | $LV | $LVT) $T*) | $T+; |
| |
| ## ------------------------------------------------- |
| |
| !!forward; |
| |
| $CR $LF; |
| ([^$Control $CR $LF] | $HangulSyllable) $Extend*; |
| |
| ## ------------------------------------------------- |
| |
| !!reverse; |
| |
| $BackHangulSyllable = $L+ | ($T* ($V+$LV? | $LV | $LVT) $L*) | $T+; |
| $BackOneCluster = ($LF $CR) | ($Extend* ([^$Control $CR $LF] | $BackHangulSyllable)); |
| $BackOneCluster; |
| |
| ## ------------------------------------------------- |
| |
| !!safe_reverse; |
| |
| # rule 6, 7, 8 |
| $V+ $L; |
| |
| ## ------------------------------------------------- |
| |
| !!safe_forward; |
| |
| # rule 6, 7, 8 |
| $V+ $T; |