70 lines
2.4 KiB
Plaintext
70 lines
2.4 KiB
Plaintext
#
|
|
# Copyright (C) 2016 and later: Unicode, Inc. and others.
|
|
# License & terms of use: http://www.unicode.org/copyright.html
|
|
# Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved.
|
|
|
|
# file: grapheme.txt
|
|
#
|
|
# Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest
|
|
#
|
|
#
|
|
# Note: Rule syntax and the monkey test itself are still a work in progress.
|
|
# They are expected to change with review and the addition of support for rule tailoring.
|
|
|
|
type = grapheme; # one of grapheme | word | line | sentence
|
|
locale = en;
|
|
|
|
CR = [\p{Grapheme_Cluster_Break = CR}];
|
|
LF = [\p{Grapheme_Cluster_Break = LF}];
|
|
|
|
Control = [[\p{Grapheme_Cluster_Break = Control}]];
|
|
Extend = [[\p{Grapheme_Cluster_Break = Extend}]];
|
|
ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}];
|
|
Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
|
|
Prepend = [\p{Grapheme_Cluster_Break = Prepend}];
|
|
SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
|
|
|
|
#
|
|
# Korean Syllable Definitions
|
|
#
|
|
L = [\p{Grapheme_Cluster_Break = L}];
|
|
V = [\p{Grapheme_Cluster_Break = V}];
|
|
T = [\p{Grapheme_Cluster_Break = T}];
|
|
LV = [\p{Grapheme_Cluster_Break = LV}];
|
|
LVT = [\p{Grapheme_Cluster_Break = LVT}];
|
|
|
|
# Emoji definitions
|
|
|
|
Extended_Pict = [:ExtPict:];
|
|
|
|
# Indic Sequences
|
|
Virama_ = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Virama}]];
|
|
|
|
LinkingConsonant = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Consonant}]];
|
|
|
|
ExtCccZwj = [[Extend-[\p{ccc=0}]] ZWJ];
|
|
|
|
GB3: CR LF;
|
|
GB4: (Control | CR | LF) ÷;
|
|
GB5: . ÷ (Control | CR | LF);
|
|
|
|
GB6: L (L | V | LV | LVT);
|
|
GB7: (LV | V) (V | T);
|
|
GB8: (LVT | T) T;
|
|
|
|
GB11: Extended_Pict Extend* ZWJ Extended_Pict;
|
|
GB9c: LinkingConsonant ExtCccZwj* Virama_ ExtCccZwj* LinkingConsonant;
|
|
GB9: . (Extend | ZWJ);
|
|
|
|
GB9a: . SpacingMark;
|
|
GB9b: Prepend .;
|
|
|
|
# Regional Indicators, split into pairs.
|
|
# Note that a pair of RIs that is not followed by a third RI will fall into
|
|
# the normal rules for Extend, etc.
|
|
#
|
|
GB12: Regional_Indicator Regional_Indicator ÷ Regional_Indicator;
|
|
GB13: Regional_Indicator Regional_Indicator;
|
|
|
|
GB999: . ÷;
|