summaryrefslogtreecommitdiff
path: root/include/i18nutil
diff options
context:
space:
mode:
authorStephan Bergmann <sbergman@redhat.com>2019-06-12 11:22:29 +0200
committerStephan Bergmann <sbergman@redhat.com>2019-06-12 16:27:57 +0200
commitc1399e497191f295b9c3db95d126ff6a4fa5891d (patch)
tree09a9ca2c668d67b10959ca588a43c1fb24e71eb7 /include/i18nutil
parent47dbbe214641b9a28871d0c82f71b2afb9c5943c (diff)
Move isIVSSelector, isCJKIVSCharacter to i18nutil/unicode.hxx
6a7db071c75609093fc3a9cbc297b8069726a33e "tdf#125497 allow backspace to remove CJK IVS" had moved these functions from sw/source/uibase/wrtsh/delete.cxx to rtl/character.hxx, but the latter appears to be a less than ideal home for them: For one, it is part of the stable URE interface, which makes it harder to maintain (e.g., later versions of Unicode have added CJK Extension C--F code blocks, which the current implementation of isCJKIVSCharacter does not reflect). And for another, besides details of legacy/ubiquitous ASCII, it only deals with the "hard" structure of Unicode (isUnicodeCodePoint, isSurrogate, etc.), not with any specific code blocks or character classifications (which can change over time). Internal i18nutil/unicode.hxx appears to be better suited. Change-Id: I88b3e4e2488411f988c1a20f79b8a58626d93dce Reviewed-on: https://gerrit.libreoffice.org/73873 Tested-by: Jenkins Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
Diffstat (limited to 'include/i18nutil')
-rw-r--r--include/i18nutil/unicode.hxx25
1 files changed, 25 insertions, 0 deletions
diff --git a/include/i18nutil/unicode.hxx b/include/i18nutil/unicode.hxx
index b3563e529a38..c0b20a33cba8 100644
--- a/include/i18nutil/unicode.hxx
+++ b/include/i18nutil/unicode.hxx
@@ -47,6 +47,31 @@ public:
static bool isSpace( const sal_Unicode ch);
static bool isWhiteSpace( const sal_Unicode ch);
+ /** Check for Unicode variation sequence selectors
+
+ @param nCode A Unicode code point.
+
+ @return True if code is an Unicode variation sequence selector.
+ */
+ static bool isIVSSelector(sal_uInt32 nCode)
+ {
+ return (nCode >= 0xFE00 && nCode <= 0xFE0F) // Variation Selectors block
+ || (nCode >= 0xE0100 && nCode <= 0xE01EF);// Variation Selectors Supplement block
+ }
+
+ /** Check for base characters of a CJK ideographic variation sequence (IVS)
+
+ @param nCode A Unicode code point.
+
+ @return True if code is an Unicode base character part of CJK IVS
+ */
+ static bool isCJKIVSCharacter(sal_uInt32 nCode)
+ {
+ return (nCode >= 0x4E00 && nCode <= 0x9FFF) // CJK Unified Ideographs
+ || (nCode >= 0x3400 && nCode <= 0x4DBF) // CJK Unified Ideographs Extension A
+ || (nCode >= 0x20000 && nCode <= 0x2A6DF); // CJK Unified Ideographs Extension B
+ }
+
//Map an ISO 15924 script code to Latin/Asian/Complex/Weak
static sal_Int16 getScriptClassFromUScriptCode(UScriptCode eScript);