summaryrefslogtreecommitdiff
path: root/i18npool/inc
diff options
context:
space:
mode:
authorKhaled Hosny <khaledhosny@eglug.org>2017-01-08 01:02:20 +0200
committerEike Rathke <erack@redhat.com>2017-01-17 19:14:27 +0000
commit18bc169b4727744f35227532078cbf7c9558bc9a (patch)
treebd6c4664c111dcbd4ae7ee024b60985358e933cb /i18npool/inc
parent767695be0e6cce551ae661ba741694d48695def1 (diff)
tdf#105170: Ignore diacritics on precomposed chars
When ignoring diacritics, the old code just ignore combining marks which works when using decomposed forms (NFD) but does not work for precomposed forms (NFC). Instead, we now decompose, strip marks, then recompose, and use a nice icu::Transliterator that does the hard work for us. As a bonus, we should now handle surrogate pairs fine (most of the time). The new code (in ignoreDiacritics_CTL::folding()) might not be as efficient as the old code that used transliteration_Ignore::folding(), but it is less ugly and more easier to resonate with, or so I hope. Change-Id: If48c8be30527580cdd68f20b40a6533c5f258d83 Reviewed-on: https://gerrit.libreoffice.org/32826 Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: Eike Rathke <erack@redhat.com> (cherry picked from commit 278eabab2b5bdc95a51d501fcdb46c216ded3baa) Reviewed-on: https://gerrit.libreoffice.org/33214
Diffstat (limited to 'i18npool/inc')
-rw-r--r--i18npool/inc/transliteration_Ignore.hxx19
1 files changed, 18 insertions, 1 deletions
diff --git a/i18npool/inc/transliteration_Ignore.hxx b/i18npool/inc/transliteration_Ignore.hxx
index e6573b6adfaa..3b67d64120e7 100644
--- a/i18npool/inc/transliteration_Ignore.hxx
+++ b/i18npool/inc/transliteration_Ignore.hxx
@@ -21,6 +21,7 @@
#include <transliteration_commonclass.hxx>
#include <i18nutil/oneToOneMapping.hxx>
+#include <unicode/translit.h>
typedef sal_Unicode (*TransFunc)(const sal_Unicode);
@@ -91,9 +92,25 @@ TRANSLITERATION_IGNORE(Space_ja_JP)
TRANSLITERATION_IGNORE(TraditionalKana_ja_JP)
TRANSLITERATION_IGNORE(TraditionalKanji_ja_JP)
TRANSLITERATION_IGNORE(ZiZu_ja_JP)
-TRANSLITERATION_IGNORE(Diacritics_CTL)
TRANSLITERATION_IGNORE(Kashida_CTL)
+class ignoreDiacritics_CTL : public transliteration_Ignore
+{
+ icu::Transliterator* m_transliterator;
+
+public:
+ ignoreDiacritics_CTL();
+
+ OUString SAL_CALL
+ folding(const OUString& rInStr, sal_Int32 nStartPos, sal_Int32 nCount, css::uno::Sequence<sal_Int32>& rOffset)
+ throw(css::uno::RuntimeException, std::exception) override;
+
+ sal_Unicode SAL_CALL
+ transliterateChar2Char(sal_Unicode nInChar)
+ throw(css::uno::RuntimeException,
+ css::i18n::MultipleCharsOutputException, std::exception) override;
+};
+
#undef TRANSLITERATION_IGNORE
#define TRANSLITERATION_IGNORE( name ) \