diff options
Diffstat (limited to 'i18npool/source/breakiterator/data/char_in.txt')
-rw-r--r-- | i18npool/source/breakiterator/data/char_in.txt | 108 |
1 files changed, 0 insertions, 108 deletions
diff --git a/i18npool/source/breakiterator/data/char_in.txt b/i18npool/source/breakiterator/data/char_in.txt deleted file mode 100644 index 5e1ed67596..0000000000 --- a/i18npool/source/breakiterator/data/char_in.txt +++ /dev/null @@ -1,108 +0,0 @@ -# -# Copyright (C) 2002-2009, International Business Machines Corporation and others. -# All Rights Reserved. -# -# file: char.txt -# -# ICU Character Break Rules, also known as Grapheme Cluster Boundaries -# See Unicode Standard Annex #29. -# These rules are based on TR29 Revision 13, for Unicode Version 5.1 -# - -# -# Character Class Definitions. -# -$CR = [\p{Grapheme_Cluster_Break = CR}]; -$LF = [\p{Grapheme_Cluster_Break = LF}]; -$Control = [\p{Grapheme_Cluster_Break = Control}]; -$Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; -$Extend = [\p{Grapheme_Cluster_Break = Extend}]; -$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; -$BengaliLetter = [\u0985-\u09B9 \u09CE \u09DC-\u09E1 \u09F0-\u09F1]; -$BengaliSignVirama = \u09CD; -$GujaratiLetter = [\u0A85-\u0A8C \u0A8F-\u0A90 \u0A93-\u0AB9 \u0AE0-\u0AE1]; -$GujaratiSignVirama = \u0ACD; -$DevanagariLetter = [\u0904-\u0939 \u0958-\u0961 \u0972-\u097F]; -$DevanagariSignVirama = \u094D; -$KannadaLetter = [\u0C85-\u0CB9 \u0CDE-\u0CE1]; -$KannadaSignVirama = \u0CCD; -$MalayalamLetter = [\u0D05-\u0D39 \u0D60-\u0D61 \u0D7A-\u0D7F]; -$MalayalamSignVirama = \u0D4D; -$OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71]; -$OriyaSignVirama = \u0B4D; -$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E]; -$GurmukhiSignVirama = \u0A4D; -$TamilLetter = [\u0B85-\u0BB9]; -$TamilSignVirama = \u0BCD; -$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61]; -$TeluguSignVirama = \u0C4D; - -# -# Korean Syllable Definitions -# -$L = [\p{Grapheme_Cluster_Break = L}]; -$V = [\p{Grapheme_Cluster_Break = V}]; -$T = [\p{Grapheme_Cluster_Break = T}]; - -$LV = [\p{Grapheme_Cluster_Break = LV}]; -$LVT = [\p{Grapheme_Cluster_Break = LVT}]; - - -## ------------------------------------------------- -!!chain; - -!!forward; - -$CR $LF; - -$BengaliLetter ($BengaliSignVirama $BengaliLetter?)+; -$GujaratiLetter ($GujaratiSignVirama $GujaratiLetter?)+; -$DevanagariLetter ($DevanagariSignVirama $DevanagariLetter?)+; -$KannadaLetter ($KannadaSignVirama $KannadaLetter?)+; -$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+; -$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+; -$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+; -$TamilLetter ($TamilSignVirama $TamilLetter?)+; -$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+; - -$L ($L | $V | $LV | $LVT); -($LV | $V) ($V | $T); -($LVT | $T) $T; - -[^$Control $CR $LF] $Extend; - -[^$Control $CR $LF] $SpacingMark; -$Prepend [^$Control $CR $LF]; - - -## ------------------------------------------------- - -!!reverse; -$LF $CR; -($BengaliLetter? $BengaliSignVirama)+ $BengaliLetter; -($GujaratiLetter? $GujaratiSignVirama)+ $GujaratiLetter; -($DevanagariLetter? $DevanagariSignVirama)+ $DevanagariLetter; -($KannadaLetter? $KannadaSignVirama)+ $KannadaLetter; -($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter; -($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter; -($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter; -($TamilLetter? $TamilSignVirama)+ $TamilLetter; -($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter; -($L | $V | $LV | $LVT) $L; -($V | $T) ($LV | $V); -$T ($LVT | $T); - -$Extend [^$Control $CR $LF]; -$SpacingMark [^$Control $CR $LF]; -[^$Control $CR $LF] $Prepend; - - -## ------------------------------------------------- - -!!safe_reverse; - - -## ------------------------------------------------- - -!!safe_forward; - |