blob: f930afaa8e1a4b0260b8fb19c30b67dd9beaf79d [file] [log] [blame]
#
# Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved.
# file: grapheme.txt
#
# Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest
#
#
# Note: Rule syntax and the monkey test itself are still a work in progress.
# They are expected to change with review and the addition of support for rule tailoring.
type = grapheme; # one of grapheme | word | line | sentence
locale = en;
CR = [\p{Grapheme_Cluster_Break = CR}];
LF = [\p{Grapheme_Cluster_Break = LF}];
Control = [[\p{Grapheme_Cluster_Break = Control}]];
Extend = [[\p{Grapheme_Cluster_Break = Extend}]];
ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}];
Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
#
# Korean Syllable Definitions
#
L = [\p{Grapheme_Cluster_Break = L}];
V = [\p{Grapheme_Cluster_Break = V}];
T = [\p{Grapheme_Cluster_Break = T}];
LV = [\p{Grapheme_Cluster_Break = LV}];
LVT = [\p{Grapheme_Cluster_Break = LVT}];
# Emoji defintions
E_Base = [\p{Grapheme_Cluster_Break = EB}];
E_Modifier = [\p{Grapheme_Cluster_Break = EM}];
GAZ = [\p{Grapheme_Cluster_Break = GAZ}];
E_Base_GAZ = [\p{Grapheme_Cluster_Break = EBG}];
GB3: CR LF;
GB4: (Control | CR | LF) ÷;
GB5: . ÷ (Control | CR | LF);
GB6: L (L | V | LV | LVT);
GB7: (LV | V) (V | T);
GB8: (LVT | T) T;
GB10: (E_Base | E_Base_GAZ) Extend* E_Modifier;
GB9: . (Extend | ZWJ);
GB9a: . SpacingMark;
GB9b: Prepend .;
GB11: ZWJ (GAZ | E_Base_GAZ);
# Regional Indicators, split into pairs.
# Note that a pair of RIs that is not followed by a third RI will fall into
# the normal rules for Extend, etc.
#
GB12: Regional_Indicator Regional_Indicator ÷ Regional_Indicator;
GB13: Regional_Indicator Regional_Indicator;
GB999: . ÷;