10 files changed, 463 insertions, 326 deletions
diff --git a/i18npool/source/breakiterator/breakiteratorImpl.cxx b/i18npool/source/breakiterator/breakiteratorImpl.cxx
index 19b175d4bd83..3cc974870c3d 100644
--- a/i18npool/source/breakiterator/breakiteratorImpl.cxx
+++ b/i18npool/source/breakiterator/breakiteratorImpl.cxx
@@ -1,3 +1,4 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 /*************************************************************************
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -132,7 +133,7 @@ Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 n
 }
 
 static inline sal_Bool SAL_CALL isCJK( const Locale& rLocale ) {
-        return rLocale.Language.equalsAscii("zh") || rLocale.Language.equalsAscii("ja") || rLocale.Language.equalsAscii("ko");
+        return rLocale.Language.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("zh")) || rLocale.Language.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("ja")) || rLocale.Language.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("ko"));
 }
 
 Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos,
@@ -442,38 +443,49 @@ sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/,
         return 0;
 }
 
-typedef struct {
-    UBlockCode from;
-    UBlockCode to;
-    sal_Int16 script;
-} UBlock2Script;
-
-// for a list of the UBLOCK_... values see:
-// http://icu-project.org/apiref/icu4c/uchar_8h.html
-// where enum UBlockCode is defined.
-// See also http://www.unicode.org/charts/ for general reference
-static UBlock2Script scriptList[] = {
-    {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK},
-    {UBLOCK_BASIC_LATIN, UBLOCK_ARMENIAN, ScriptType::LATIN},
-    {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX},
-    {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN},
-    {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN},
-    {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX},
-    {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN},
-    {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX},
-    {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN},
-    {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN},
-    {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN},
-    {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX},
-    {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN},
-    {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX},
-    {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN},
-    {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN},
-    {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN},
-    {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN}
-};
-
-#define scriptListCount sizeof (scriptList) / sizeof (UBlock2Script)
+static sal_Int16 scriptTypes[] = {
+    ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX,
+    ScriptType::ASIAN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN,
+// 15
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN, ScriptType::COMPLEX,
+    ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
+// 30
+    ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::LATIN, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+// 45
+    ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
+    ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+// 60
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN,
+// 75
+    ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+// 90
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX,
+// 105
+    ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN,
+// 120
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+// 135
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
+    ScriptType::COMPLEX,
+    ScriptType::WEAK};
+
+#define scriptListCount SAL_N_ELEMENTS(scriptTypes)
 
 sal_Int16  BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar)
 {
@@ -483,27 +495,13 @@ sal_Int16  BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar)
         if (currentChar != lastChar) {
             lastChar = currentChar;
 
-            //JP 21.9.2001: handle specific characters - always as weak
-            //                  definition of 1 - this breaks a word
-            //                  2 - this can be inside a word
-            //                  0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
-            if( 1 == currentChar || 2 == currentChar || 0x20 == currentChar || 0xA0 == currentChar)
+            int32_t script = u_getIntPropertyValue(currentChar, UCHAR_SCRIPT);
+            if (script < 0)
                 nRet = ScriptType::WEAK;
-            // workaround for Coptic
-            else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar)
-                nRet = ScriptType::LATIN;
-            // work-around for ligatures (see http://www.unicode.org/charts/PDF/UFB00.pdf)
-            else if ((0xFB00 <= currentChar && currentChar <= 0xFB06) ||
-                     (0xFB13 <= currentChar && currentChar <= 0xFB17))
-                nRet = ScriptType::LATIN;
-            else {
-                UBlockCode block=ublock_getCode(currentChar);
-                sal_uInt16 i;
-                for ( i = 0; i < scriptListCount; i++) {
-                    if (block <= scriptList[i].to) break;
-                }
-                nRet=(i < scriptListCount && block >= scriptList[i].from) ? scriptList[i].script : ScriptType::WEAK;
-            }
+            else if (static_cast<size_t>(script) >= SAL_N_ELEMENTS(scriptTypes))
+                nRet = ScriptType::COMPLEX;         // anything new is going to be pretty wild
+            else
+                nRet = scriptTypes[script];
         }
         return nRet;
 }
@@ -524,7 +522,7 @@ sal_Bool SAL_CALL BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUS
         }
 
         Reference < uno::XInterface > xI = xMSF->createInstance(
-            OUString::createFromAscii("com.sun.star.i18n.BreakIterator_") + aLocaleName);
+            OUString(RTL_CONSTASCII_USTRINGPARAM("com.sun.star.i18n.BreakIterator_")) + aLocaleName);
 
         if ( xI.is() ) {
             xI->queryInterface( getCppuType((const Reference< XBreakIterator>*)0) ) >>= xBI;
@@ -575,7 +573,7 @@ BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale) throw (
                     // load service with name <base>_<lang>
                     createLocaleSpecificBreakIterator(rLocale.Language)) ||
                     // load default service with name <base>_Unicode
-                    createLocaleSpecificBreakIterator(OUString::createFromAscii("Unicode"))) {
+                    createLocaleSpecificBreakIterator(OUString(RTL_CONSTASCII_USTRINGPARAM("Unicode")))) {
                 lookupTable.push_back( new lookupTableItem(aLocale, xBI) );
                 return xBI;
             }
@@ -607,3 +605,4 @@ BreakIteratorImpl::getSupportedServiceNames(void) throw( RuntimeException )
 
 } } } }
 
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/breakiterator/breakiterator_cjk.cxx b/i18npool/source/breakiterator/breakiterator_cjk.cxx
index 16d7d1337538..ce7170c2a411 100644
--- a/i18npool/source/breakiterator/breakiterator_cjk.cxx
+++ b/i18npool/source/breakiterator/breakiterator_cjk.cxx
@@ -1,3 +1,4 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 /*************************************************************************
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -184,3 +185,5 @@ BreakIterator_ko::~BreakIterator_ko()
 }
 
 } } } }
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/breakiterator/breakiterator_ctl.cxx b/i18npool/source/breakiterator/breakiterator_ctl.cxx
index cc174084198e..6d6b3f9b9cd7 100644
--- a/i18npool/source/breakiterator/breakiterator_ctl.cxx
+++ b/i18npool/source/breakiterator/breakiterator_ctl.cxx
@@ -1,3 +1,4 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 /*************************************************************************
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -143,3 +144,5 @@ LineBreakResults SAL_CALL BreakIterator_CTL::getLineBreak(
 }
 
 } } } }
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/breakiterator/breakiterator_th.cxx b/i18npool/source/breakiterator/breakiterator_th.cxx
index cbbcd510379b..ad3c619d0979 100644
--- a/i18npool/source/breakiterator/breakiterator_th.cxx
+++ b/i18npool/source/breakiterator/breakiterator_th.cxx
@@ -1,3 +1,4 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 /*************************************************************************
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -155,3 +156,5 @@ void SAL_CALL BreakIterator_th::makeIndex(const OUString& Text, sal_Int32 nStart
 }
 
 } } } }
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx
index d7242d180d85..ad934db2db11 100644
--- a/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -1,3 +1,4 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 /*************************************************************************
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -443,3 +444,5 @@ BreakIterator_Unicode::getSupportedServiceNames(void) throw( uno::RuntimeExcepti
 }
 
 } } } }
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/breakiterator/data/char.txt b/i18npool/source/breakiterator/data/char.txt
new file mode 100644
index 000000000000..8e49a565ed8c
--- /dev/null
+++ b/i18npool/source/breakiterator/data/char.txt
@@ -0,0 +1,118 @@
+#
+#   Copyright (C) 2002-2009, International Business Machines Corporation and others.
+#       All Rights Reserved.
+#
+#   file:  char.txt 
+#
+#   ICU Character Break Rules, also known as Grapheme Cluster Boundaries
+#      See Unicode Standard Annex #29.
+#      These rules are based on TR29 Revision 13, for Unicode Version 5.1
+#   Modifications to SpacingMark and Prepend by M. Hosken.
+#
+
+#
+#  Character Class Definitions.
+#
+$CR          = [\p{Grapheme_Cluster_Break = CR}];
+$LF          = [\p{Grapheme_Cluster_Break = LF}];
+$Control     = [\p{Grapheme_Cluster_Break = Control}];
+$Prepend     = [\p{Grapheme_Cluster_Break = Prepend}];
+$Extend      = [\p{Grapheme_Cluster_Break = Extend}];
+$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
+# True Indic wants to move by syllables. Break up SpacingMark. This based on Unicode 6.0 data
+# In effect it is [\p{Grapheme_Cluster_Break = SpacingMark} - \u0E30 \u0E32 \u0E45 \u0EB0 \u0EB2 \u102B \u102C \u1038
+#       \u1062-\u1064 \u1067-\u106D \u1083 \u1087-\u108C \u108F \u109A-\u109C \u19B0-\u19B4 \u19B8-\u19C0 \u19C8 \u19C9
+#       \u1A61 \u1A63 \u1A64 \u1BE7 \u1BEA-\u1BEC \u1BEE \u1BF2 \u1BF3 \uAA7B
+$IndicSpacing = [\u0903 \u093B \u093E-\u0940 \u0949-\u094C \u094E \u094F \u0982 \u0983 \u09BF \u09C0 \u09C7 \u09C8 \u09CB \u09CC \u0A03 \u0A3E-\u0A40 \u0A83 \u0ABE-\u0AC0 \u0AC9 \u0ACB \u0ACC \u0B02 \u0B03 \u0B40 \u0B47 \u0B48 \u0B4B-\u0B4C \u0BBF \u0BC1 \u0BC2 \u0BC6-\u0BC8 \u0BCA-\u0BCC \u0C01-\u0C03 \u0C41-\u0C44 \u0C82 \u0C83 \u0CBE \u0CC0 \u0CC1 \u0CC3 \u0CC4 \u0CC7 \u0CC8 \u0CCA \u0CCB \u0D02 \u0D03 \u0D3F \u0D40 \u0D46-\u0D48 \u0D4A-\u0D4C \u0D82 \u0D83 \u0DD0 \u0DD1 \u0DD8-\u0DDE \u0F3E \u0F3F \u0F7F \u1923-\u1926 \u1929-\u192B \u1930 \u1931 \u1933-\u1938 \u1A19-\u1A1B \u1B04 \u1B35 \u1B3B \u1B3D-\u1B41 \u1B43 \u1B44 \u1B82 \u1BA1 \u1BA6 \u1BA7 \u1BAA \u1C24-\u1C2B \u1C34 \u1C35 \u1CE1 \u1CF2 \uA880 \uA881 \uA8B4-\uA8C3 \uA952 \uA953 \uA983 \uA9B4 \uA9B5 \uA9BA \uA9BB \uA9BD-\uA9C0 \uAA2F \uAA30 \uAA33 \uAA34 \uABE3 \uABE4 \uABE6 \uABE7 \uABE9 \uABEA \uABEC \U00011000 \U00011002 \U00011082 \U000110B0-\U000110B2 \U000110B7 \U000100B8 \U0001D166 \U0001D16D];
+# SEAsian (Thai, Lao, Burmese, Tai Lue, Tai Tham, Batak) are cluster based not syllable based
+$SEASpacing = [\u0E33 \u0EB3 \u1031 \u103B \u103C \u1056 \u1057 \u1084 \u17B6 \u17BE-\u17C5 \u17C7 \u17C8 \u19B5-\u19B7 \u19BA \u1A55 \u1A57 \u1A6D-\u1A72 \uA823 \uA824 \uA827 \uAA4D];
+$BengaliLetter = [\u0985-\u09B9 \u09CE \u09DC-\u09E1 \u09F0-\u09F1];
+$BengaliSignVirama = \u09CD;
+$GujaratiLetter = [\u0A85-\u0A8C \u0A8F-\u0A90 \u0A93-\u0AB9 \u0AE0-\u0AE1];
+$GujaratiSignVirama = \u0ACD;
+$DevanagariLetter = [\u0904-\u0939 \u0958-\u0961 \u0972-\u097F];
+$DevanagariSignVirama = \u094D;
+$KannadaLetter = [\u0C85-\u0CB9 \u0CDE-\u0CE1];
+$KannadaSignVirama = \u0CCD;
+$MalayalamLetter = [\u0D05-\u0D39 \u0D60-\u0D61 \u0D7A-\u0D7F];
+$MalayalamSignVirama = \u0D4D;
+$OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
+$OriyaSignVirama = \u0B4D;
+$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
+$GurmukhiSignVirama = \u0A4D;
+$TamilLetter = [\u0B85-\u0BB9];
+$TamilSignVirama = \u0BCD;
+$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
+$TeluguSignVirama = \u0C4D;
+
+#
+# Korean Syllable Definitions
+#
+$L       = [\p{Grapheme_Cluster_Break = L}];
+$V       = [\p{Grapheme_Cluster_Break = V}];
+$T       = [\p{Grapheme_Cluster_Break = T}];
+
+$LV      = [\p{Grapheme_Cluster_Break = LV}];
+$LVT     = [\p{Grapheme_Cluster_Break = LVT}];
+
+
+## -------------------------------------------------
+!!chain;
+
+!!forward;
+
+$CR $LF;
+
+$BengaliLetter ($BengaliSignVirama $BengaliLetter?)+;
+$GujaratiLetter ($GujaratiSignVirama $GujaratiLetter?)+;
+$DevanagariLetter ($DevanagariSignVirama $DevanagariLetter?)+;
+$KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
+$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
+$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
+$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
+$TamilLetter ($TamilSignVirama $TamilLetter?)+;
+$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
+
+$L ($L | $V | $LV | $LVT);
+($LV | $V) ($V | $T);
+($LVT | $T) $T;
+
+[^$Control $CR $LF] $Extend;
+
+[^$Control $CR $LF] ($IndicSpacing | $SEASpacing);
+#[^$Control $CR $LF] $SpacingMark;
+# $Prepend [^$Control $CR $LF];
+
+
+## -------------------------------------------------
+
+!!reverse;
+$LF $CR;
+($BengaliLetter? $BengaliSignVirama)+ $BengaliLetter;
+($GujaratiLetter? $GujaratiSignVirama)+ $GujaratiLetter;
+($DevanagariLetter? $DevanagariSignVirama)+ $DevanagariLetter;
+($KannadaLetter? $KannadaSignVirama)+ $KannadaLetter;
+($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
+($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
+($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
+($TamilLetter? $TamilSignVirama)+ $TamilLetter;
+($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
+($L | $V | $LV | $LVT) $L;
+($V | $T) ($LV | $V);
+$T ($LVT | $T);
+
+$Extend      [^$Control $CR $LF];
+($IndicSpacing | $SEASpacing) [^$Control $CR $LF];
+#$SpacingMark [^$Control $CR $LF];
+# [^$Control $CR $LF] $Prepend;
+
+
+## -------------------------------------------------
+
+!!safe_reverse;
+
+
+## -------------------------------------------------
+
+!!safe_forward;
+
diff --git a/i18npool/source/breakiterator/data/makefile.mk b/i18npool/source/breakiterator/data/makefile.mk
index cb37c5132f67..81bbbbd280e1 100644
--- a/i18npool/source/breakiterator/data/makefile.mk
+++ b/i18npool/source/breakiterator/data/makefile.mk
@@ -24,7 +24,7 @@
 # for a copy of the LGPLv3 License.
 #
 #************************************************************************
-PRJ=..$/..$/..
+PRJ=../../..
 
 PRJNAME=i18npool
 TARGET=dict
@@ -46,13 +46,13 @@ SHL1TARGET=dict_ja
 SHL1IMPLIB=i$(SHL1TARGET)
 
 SHL1VERSIONMAP=$(TARGET).map
-SHL1DEF=$(MISC)$/$(SHL1TARGET).def
+SHL1DEF=$(MISC)/$(SHL1TARGET).def
 DEF1NAME=$(SHL1TARGET)
 
 SHL1OBJS= \
-    $(SLO)$/dict_ja.obj
+    $(SLO)/dict_ja.obj
 
-LIB1TARGET=	$(SLB)$/$(SHL1TARGET).lib
+LIB1TARGET=	$(SLB)/$(SHL1TARGET).lib
 LIB1OBJFILES=$(SHL1OBJS)
 
 # Chinese dictionary
@@ -60,13 +60,13 @@ SHL2TARGET=dict_zh
 SHL2IMPLIB=i$(SHL2TARGET)
 
 SHL2VERSIONMAP=$(TARGET).map
-SHL2DEF=$(MISC)$/$(SHL2TARGET).def
+SHL2DEF=$(MISC)/$(SHL2TARGET).def
 DEF2NAME=$(SHL2TARGET)
 
 SHL2OBJS= \
-    $(SLO)$/dict_zh.obj
+    $(SLO)/dict_zh.obj
 
-LIB2TARGET=	$(SLB)$/$(SHL2TARGET).lib
+LIB2TARGET=	$(SLB)/$(SHL2TARGET).lib
 LIB2OBJFILES=$(SHL2OBJS)
 
 DEPOBJFILES= \
@@ -76,9 +76,5 @@ DEPOBJFILES= \
 # --- Targets ------------------------------------------------------
 .INCLUDE :  target.mk
 
-$(MISC)$/dict_%.cxx : %.dic
-    $(AUGMENT_LIBRARY_PATH) $(BIN)$/gendict $< $@
-
-# ugly - is this dependency really required here?
-$(foreach,i,$(shell @$(FIND) . -name "*.dic") $(MISC)$/dict_$(i:b).cxx) : $(BIN)$/gendict$(EXECPOST)
-
+$(MISC)/dict_%.cxx : %.dic
+	$(AUGMENT_LIBRARY_PATH) $(OUT_FOR_BUILD)/bin/gendict $< $@
diff --git a/i18npool/source/breakiterator/gendict.cxx b/i18npool/source/breakiterator/gendict.cxx
index fe2758602ee4..ab181be73836 100644
--- a/i18npool/source/breakiterator/gendict.cxx
+++ b/i18npool/source/breakiterator/gendict.cxx
@@ -1,3 +1,4 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 /*************************************************************************
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -35,171 +36,213 @@
 #include <sal/types.h>
 #include <rtl/strbuf.hxx>
 #include <rtl/ustring.hxx>
+#include <osl/diagnose.h>
+#include <vector>
+using std::vector;
 
 using namespace ::rtl;
 
-/* Main Procedure */
+/* Utility gendict:
 
-SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
-{
-    FILE *sfp, *cfp;
+   "BreakIterator_CJK provides input string caching and dictionary searching for
+   longest matching. You can provide a sorted dictionary (the encoding must be
+   UTF-8) by creating the following file:
+            i18npool/source/breakiterator/data/<language>.dict.
 
-    if (argc < 3) exit(-1);
+   The utility gendict will convert the file to C code, which will be compiled
+   into a shared library for dynamic loading.
 
-    sfp = fopen(argv[1], "rb"); // open the source file for read;
-    if (sfp == NULL)
-    {
-        printf("Open the dictionary source file failed.");
-        return -1;
-    }
+   All dictionary searching and loading is performed in the xdictionary class.
+   The only thing you need to do is to derive your class from BreakIterator_CJK
+   and create an instance of the xdictionary with the language name and
+   pass it to the parent class." (from http://wiki.services.openoffice.org/wiki/
+   /Documentation/DevGuide/OfficeDev/Implementing_a_New_Locale - 27/01/2011)
+*/
 
-    // create the C source file to write
-    cfp = fopen(argv[2], "wb");
-    if (cfp == NULL) {
-        fclose(sfp);
-        printf("Can't create the C source file.");
-        return -1;
-    }
+// C-standard garantees that static variables are automatically initialized to 0
+static sal_uInt8 exists[0x2000];
+static sal_uInt32 charArray[0x10000];
 
-    fprintf(cfp, "/*\n");
-    fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
-    fprintf(cfp, " * All Rights Reserved.\n");
-    fprintf(cfp, " */\n\n");
-    fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n");
-    fprintf(cfp, "#include <sal/types.h>\n\n");
-    fprintf(cfp, "extern \"C\" {\n");
-
-    sal_Int32 count, i, j;
-    sal_Int32 lenArrayCurr = 0, lenArrayCount = 0, lenArrayLen = 0, *lenArray = NULL, charArray[0x10000];
-    sal_Bool exist[0x10000];
-    for (i = 0; i < 0x10000; i++) {
-        exist[i] = sal_False;
-        charArray[i] = 0;
-    }
+static inline void set_exists(sal_uInt32 index)
+{
+   exists[index>>3] |= 1 << (index & 0x07);
+}
+
+static inline void printIncludes(FILE* source_fp)
+{
+    fputs("/* !!!The file is generated automatically. DO NOT edit the file manually!!! */\n\n", source_fp);
+    fputs("#include <sal/types.h>\n\n", source_fp);
+}
 
+static inline void printFunctions(FILE* source_fp)
+{
+    fputs ("\tconst sal_uInt8* getExistMark() { return existMark; }\n", source_fp);
+    fputs ("\tconst sal_Int16* getIndex1() { return index1; }\n", source_fp);
+    fputs ("\tconst sal_Int32* getIndex2() { return index2; }\n", source_fp);
+    fputs ("\tconst sal_Int32* getLenArray() { return lenArray; }\n", source_fp);
+    fputs ("\tconst sal_Unicode* getDataArea() { return dataArea; }\n", source_fp);
+}
+
+static inline void printDataArea(FILE *dictionary_fp, FILE *source_fp, vector<sal_uInt32>& lenArray)
+{
     // generate main dict. data array
-    fprintf(cfp, "static const sal_Unicode dataArea[] = {");
+    fputs("static const sal_Unicode dataArea[] = {\n\t", source_fp);
     sal_Char str[1024];
+    sal_uInt32 lenArrayCurr = 0;
     sal_Unicode current = 0;
-    count = 0;
-    while (fgets(str, 1024, sfp)) {
+
+    while (fgets(str, 1024, dictionary_fp)) {
         // input file is in UTF-8 encoding
         // don't convert last new line character to Ostr.
         OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
         const sal_Unicode *u = Ostr.getStr();
 
-        sal_Int32 len = Ostr.getLength();
+        const sal_Int32 len = Ostr.getLength();
 
-        i=0;
+        sal_Int32 i=0;
         Ostr.iterateCodePoints(&i, 1);
-        if (len == i) continue; // skip one character word
-
-        if (*u != current) {
-        if (*u < current)
-        printf("u %x, current %x, count %d, lenArrayCount %d\n", *u, current,
-                    sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArrayCount));
-        current = *u;
-        charArray[current] = lenArrayCount;
+        if (len == i)
+            continue;   // skip one character word
+
+        if (u[0] != current) {
+            OSL_ENSURE( (u[0] > current), "Dictionary file should be sorted");
+            current = u[0];
+            charArray[current] = lenArray.size();
         }
 
-        if (lenArrayLen <= lenArrayCount+1)
-        lenArray = (sal_Int32*) realloc(lenArray, (lenArrayLen += 1000) * sizeof(sal_Int32));
-        lenArray[lenArrayCount++] = lenArrayCurr;
-
-        exist[u[0]] = sal_True;
-        for (i = 1; i < len; i++) {     // start from second character,
-        exist[u[i]] = sal_True;     // since the first character is captured in charArray.
-        lenArrayCurr++;
-        if ((count++) % 0x10 == 0)
-            fprintf(cfp, "\n\t");
-        fprintf(cfp, "0x%04x, ", u[i]);
+        lenArray.push_back(lenArrayCurr);
+
+        set_exists(u[0]);
+        // first character is stored in charArray, so start from second
+        for (i = 1; i < len; i++, lenArrayCurr++) {
+            set_exists(u[i]);
+            fprintf(source_fp, "0x%04x, ", u[i]);
+            if ((lenArrayCurr & 0x0f) == 0x0f)
+                fputs("\n\t", source_fp);
         }
     }
-    lenArray[lenArrayCount++] = lenArrayCurr; // store last ending pointer
-    charArray[current+1] = lenArrayCount;
-    fprintf(cfp, "\n};\n");
-
-    // generate lenArray
-    fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t");
-    count = 1;
-    fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
-    for (i = 0; i < lenArrayCount; i++) {
-        fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[i]));
-        if (count == 0xf) {
-        count = 0;
-        fprintf(cfp, "\n\t");
-        } else count++;
-    }
-    fprintf(cfp, "\n};\n");
+    lenArray.push_back( lenArrayCurr ); // store last ending pointer
+    charArray[current+1] = lenArray.size();
+    fputs("\n};\n", source_fp);
+}
 
-    free(lenArray);
+static inline void printLenArray(FILE* source_fp, const vector<sal_uInt32>& lenArray)
+{
+    fprintf(source_fp, "static const sal_Int32 lenArray[] = {\n\t");
+    fprintf(source_fp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
+    for (size_t k = 0; k < lenArray.size(); k++)
+    {
+        if( !(k & 0xf) )
+            fputs("\n\t", source_fp);
 
-    // generate index1 array
-    fprintf (cfp, "static const sal_Int16 index1[] = {\n\t");
-    sal_Int16 set[0x100];
-    count = 0;
-    for (i = 0; i < 0x100; i++) {
-        for (j = 0; j < 0x100; j++)
-        if (charArray[(i*0x100) + j] != 0)
-            break;
-
-        fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 0xff));
-        if ((i+1) % 0x10 == 0)
-        fprintf (cfp, "\n\t");
+        fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(lenArray[k]));
+    }
+    fputs("\n};\n", source_fp );
+}
+
+/* FIXME?: what happens if in every range i there is at least one charArray != 0
+       => this will make index1[] = {0x00, 0x01, 0x02,... 0xfe, 0xff }
+       => then in index2, the last range will be ignored incorrectly */
+static inline void printIndex1(FILE *source_fp, sal_Int16 *set)
+{
+    fprintf (source_fp, "static const sal_Int16 index1[] = {\n\t");
+    sal_Int16 count = 0;
+    for (sal_Int32 i = 0; i < 0x100; i++) {
+        sal_Int32 j = 0;
+        while( j < 0x100 && charArray[(i<<8) + j] == 0)
+            j++;
+
+        fprintf(source_fp, "0x%02x, ", set[i] = (j < 0x100 ? count++ : 0xff));
+        if ((i & 0x0f) == 0x0f)
+            fputs ("\n\t", source_fp);
     }
-    fprintf (cfp, "};\n");
+    fputs("};\n", source_fp);
+}
 
-    // generate index2 array
-    fprintf (cfp, "static const sal_Int32 index2[] = {\n\t");
+static inline void printIndex2(FILE *source_fp, sal_Int16 *set)
+{
+    fputs ("static const sal_Int32 index2[] = {\n\t", source_fp);
     sal_Int32 prev = 0;
-    for (i = 0; i < 0x100; i++) {
+    for (sal_Int32 i = 0; i < 0x100; i++) {
         if (set[i] != 0xff) {
-        for (j = 0; j < 0x100; j++) {
-            sal_Int32 k = (i*0x100) + j;
-            if (prev != 0 && charArray[k] == 0) {
-            for (k++; k < 0x10000; k++)
-                if (charArray[k] != 0)
-                break;
+            for (sal_Int32 j = 0; j < 0x100; j++) {
+                sal_Int32 k = (i<<8) + j;
+                if (prev != 0 )
+                    while( charArray[k] == 0 && k < 0x10000 )
+                        k++;
+
+                prev = charArray[(i<<8) + j];
+                fprintf(source_fp, "0x%lx, ", static_cast<long unsigned int>(k < 0x10000 ? charArray[k] + 1 : 0));
+                if ((j & 0x0f) == 0x0f)
+                    fputs ("\n\t", source_fp);
             }
-            prev = charArray[(i*0x100) + j];
-            fprintf(
-                cfp, "0x%lx, ",
-                sal::static_int_cast< unsigned long >(
-                    k < 0x10000 ? charArray[k] + 1 : 0));
-            if ((j+1) % 0x10 == 0)
-            fprintf (cfp, "\n\t");
-        }
-        fprintf (cfp, "\n\t");
+            fputs ("\n\t", source_fp);
         }
     }
-    fprintf (cfp, "\n};\n");
-
-    // generate existMark array
-    count = 0;
-    fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t");
-    for (i = 0; i < 0x1FFF; i++) {
-        sal_uInt8 bit = 0;
-        for (j = 0; j < 8; j++)
-        if (exist[i * 8 + j])
-            bit |= 1 << j;
-        fprintf(cfp, "0x%02x, ", bit);
-        if (count == 0xf) {
-        count = 0;
-        fprintf(cfp, "\n\t");
-        } else count++;
+    fputs ("\n};\n", source_fp);
+}
+
+/* Generates a bitmask for the existance of sal_Unicode values in dictionary;
+   it packs 8 sal_Bool values in 1 sal_uInt8 */
+static inline void printExistsMask(FILE *source_fp)
+{
+    fprintf (source_fp, "static const sal_uInt8 existMark[] = {\n\t");
+    for (unsigned int i = 0; i < 0x2000; i++)
+    {
+        fprintf(source_fp, "0x%02x, ", exists[i]);
+        if ( (i & 0xf) == 0xf )
+            fputs("\n\t", source_fp);
     }
-    fprintf (cfp, "\n};\n");
+    fputs("\n};\n", source_fp);
+}
 
-    // create function to return arrays
-    fprintf (cfp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
-    fprintf (cfp, "\tconst sal_Int16* getIndex1() { return index1; }\n");
-    fprintf (cfp, "\tconst sal_Int32* getIndex2() { return index2; }\n");
-    fprintf (cfp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n");
-    fprintf (cfp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
-    fprintf (cfp, "}\n");
+SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
+{
+    FILE *dictionary_fp, *source_fp;
 
-    fclose(sfp);
-    fclose(cfp);
+    if (argc == 1 || argc > 3)
+    {
+        fputs("2 arguments required: dictionary_file_name source_file_name", stderr);
+        exit(-1);
+    }
+
+    dictionary_fp = fopen(argv[1], "rb");   // open the source file for read;
+    if (dictionary_fp == NULL)
+    {
+        printf("Open the dictionary source file failed.");
+        return -1;
+    }
+
+    if(argc == 2)
+        source_fp = stdout;
+    else
+    {
+        // create the C source file to write
+        source_fp = fopen(argv[2], "wb");
+        if (source_fp == NULL) {
+            fclose(dictionary_fp);
+            printf("Can't create the C source file.");
+            return -1;
+        }
+    }
+
+    vector<sal_uInt32> lenArray;   // stores the word boundaries in DataArea
+    sal_Int16 set[0x100];
+
+    printIncludes(source_fp);
+    fputs("extern \"C\" {\n", source_fp);
+        printDataArea(dictionary_fp, source_fp, lenArray);
+        printLenArray(source_fp, lenArray);
+        printIndex1(source_fp, set);
+        printIndex2(source_fp, set);
+        printExistsMask(source_fp);
+        printFunctions(source_fp);
+    fputs("}\n", source_fp);
+
+    fclose(dictionary_fp);
+    fclose(source_fp);
 
     return 0;
-}   // End of main
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/breakiterator/makefile.mk b/i18npool/source/breakiterator/makefile.mk
index fc6561c3e0aa..434fdc9f5d88 100644
--- a/i18npool/source/breakiterator/makefile.mk
+++ b/i18npool/source/breakiterator/makefile.mk
@@ -24,7 +24,7 @@
 # for a copy of the LGPLv3 License.
 #
 #************************************************************************/
-PRJ=..$/..
+PRJ=../..
 PRJNAME=i18npool
 TARGET=breakiterator
 
@@ -41,28 +41,28 @@ ENABLE_EXCEPTIONS=TRUE
 MY_BRK_TXTFILES:=$(shell @ls data/*.txt)
 
 # insert "OpenOffice" as icu package name in front of the  name of each rule file for searching on application provided data
-MY_BRK_BRKFILES:=$(subst,data/,$(MISC)$/ $(MY_BRK_TXTFILES:s/.txt/.brk/))
+MY_BRK_BRKFILES:=$(subst,data/,$(MISC)/ $(MY_BRK_TXTFILES:s/.txt/.brk/))
 
 # OpenOffice_dat.c is a generated file from the rule file list by gencmn
 MY_MISC_CXXFILES := \
-        $(MISC)$/OpenOffice_dat.c \
+        $(MISC)/OpenOffice_dat.c \
         $(MY_BRK_BRKFILES:s/.brk/_brk.c/)
 
 SLOFILES=   \
-        $(SLO)$/breakiteratorImpl.obj \
-        $(SLO)$/breakiterator_cjk.obj \
-        $(SLO)$/breakiterator_ctl.obj \
-        $(SLO)$/breakiterator_th.obj \
-        $(SLO)$/breakiterator_unicode.obj \
-        $(SLO)$/xdictionary.obj \
-        $(subst,$(MISC)$/,$(SLO)$/ $(MY_MISC_CXXFILES:s/.c/.obj/))
+        $(SLO)/breakiteratorImpl.obj \
+        $(SLO)/breakiterator_cjk.obj \
+        $(SLO)/breakiterator_ctl.obj \
+        $(SLO)/breakiterator_th.obj \
+        $(SLO)/breakiterator_unicode.obj \
+        $(SLO)/xdictionary.obj \
+        $(subst,$(MISC)/,$(SLO)/ $(MY_MISC_CXXFILES:s/.c/.obj/))
 
-OBJFILES   = $(OBJ)$/gendict.obj
+OBJFILES   = $(OBJ)/gendict.obj
 
 APP1TARGET = gendict
 APP1RPATH = NONE
 
-DEPOBJFILES   = $(OBJ)$/gendict.obj 
+DEPOBJFILES   = $(OBJ)/gendict.obj
 APP1OBJS   = $(DEPOBJFILES)
 
 APP1STDLIBS = $(SALLIB)
@@ -74,37 +74,36 @@ GENCMN:=$(SYSTEM_GENCMN)
 GENBRK:=$(SYSTEM_GENBRK)
 GENCCODE:=$(SYSTEM_GENCCODE)
 .ELSE
-GENCMN:=$(AUGMENT_LIBRARY_PATH) $(SOLARBINDIR)$/gencmn
-GENBRK:=$(AUGMENT_LIBRARY_PATH) $(SOLARBINDIR)$/genbrk
-GENCCODE:=$(AUGMENT_LIBRARY_PATH) $(SOLARBINDIR)$/genccode
+GENCMN:=$(AUGMENT_LIBRARY_PATH) $(SOLARBINDIR)/gencmn
+GENBRK:=$(AUGMENT_LIBRARY_PATH) $(SOLARBINDIR)/genbrk
+GENCCODE:=$(AUGMENT_LIBRARY_PATH) $(SOLARBINDIR)/genccode
 .ENDIF
 
 .INCLUDE .IGNORE :  icuversion.mk
 
-$(MISC)$/%.txt : data/%.txt
+.INCLUDE :	target.mk
+
+$(MISC)/%.txt : data/%.txt
 # fdo#31271 ")" reclassified in more recent ICU/Unicode Standards
 .IF "$(ICU_MAJOR)" >= "5" || ("$(ICU_MAJOR)" == "4" && "$(ICU_MINOR)" >= "4")
-    $(SED) "s#\[:LineBreak =  Close_Punctuation:\]#\[\[:LineBreak =  Close_Punctuation:\] \[:LineBreak = Close_Parenthesis:\]\]#" $< > $@
+	$(SED) "s#\[:LineBreak =  Close_Punctuation:\]#\[\[:LineBreak =  Close_Punctuation:\] \[:LineBreak = Close_Parenthesis:\]\]#" $< > $@
 .ELSE
-    $(COPY) $< $@
+	$(COPY) $< $@
 .ENDIF
 
-$(MISC)$/%.brk : $(MISC)/%.txt
-    $(WRAPCMD) $(GENBRK) -r $< -o $(MISC)$/$*.brk
+$(MISC)/%.brk : $(MISC)/%.txt
+	$(GENBRK) -r $< -o $(MISC)/$*.brk
 
-$(MISC)$/%_brk.c : $(MISC)$/%.brk
-    $(WRAPCMD) $(GENCCODE) -n OpenOffice -d $(MISC)$ $(MISC)$/$*.brk
+$(MISC)/%_brk.c : $(MISC)/%.brk
+	$(GENCCODE) -n OpenOffice -d $(MISC)$ $(MISC)/$*.brk
 
 # 'gencmn', 'genbrk' and 'genccode' are tools generated and delivered by icu project to process icu breakiterator rules.
 # The output of gencmn generates warnings under Windows. We want to minimize the patches to external tools,
-# so the output (OpenOffice_icu_dat.c) is changed here to include a pragma to disable the warnings.
+# so the output (OpenOffice_dat.c) is changed here to include a pragma to disable the warnings.
 # Output of gencmn is redirected to OpenOffice_icu_tmp.c with the -t switch.
-$(MISC)$/OpenOffice_%.c : $(MY_BRK_BRKFILES:s/.brk/_brk.c/)
-    $(WRAPCMD) $(GENCMN) -n OpenOffice -t tmp -S -d $(MISC) O $(mktmp $(subst,$(MISC)$/, $(MY_BRK_BRKFILES:t"\n")))
-    echo $(USQ)#ifdef _MSC_VER$(USQ) > $@
-    echo $(USQ)#pragma warning( disable : 4229 4668 )$(USQ) >> $@
-    echo $(USQ)#endif$(USQ) >> $@
-    $(TYPE) $(@:s/_dat/_tmp/) >> $@
-
-.INCLUDE :	target.mk
-
+$(MISC)/OpenOffice_dat.c : $(MY_BRK_BRKFILES:s/.brk/_brk.c/)
+	$(GENCMN) -n OpenOffice -t tmp -S -d $(MISC) O $(mktmp $(subst,$(MISC)/, $(MY_BRK_BRKFILES:t"\n")))
+	echo $(USQ)#ifdef _MSC_VER$(USQ) > $@
+	echo $(USQ)#pragma warning( disable : 4229 4668 )$(USQ) >> $@
+	echo $(USQ)#endif$(USQ) >> $@
+	$(TYPE) $(@:s/_dat/_tmp/) >> $@
diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx
index aba69b5e9a21..0bff3d0b174c 100644
--- a/i18npool/source/breakiterator/xdictionary.cxx
+++ b/i18npool/source/breakiterator/xdictionary.cxx
@@ -1,3 +1,4 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 /*************************************************************************
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -45,7 +46,8 @@
 // Construction/Destruction
 //////////////////////////////////////////////////////////////////////
 
-using namespace rtl;
+using ::rtl::OUString;
+using ::rtl::OUStringBuffer;
 
 namespace com { namespace sun { namespace star { namespace i18n {
 
@@ -60,12 +62,6 @@ xdictionary::xdictionary(const sal_Char *lang) :
     hModule( NULL ),
     boundary(),
     japaneseWordBreak( sal_False )
-#if USE_CELL_BOUNDARY_CODE
-    // For CTL breakiterator, where the word boundary should not be inside cell.
-    ,
-    useCellBoundary( sal_False ),
-    cellBoundary( NULL )
-#endif
 {
     index1 = 0;
 #ifdef SAL_DLLPREFIX
@@ -78,15 +74,15 @@ xdictionary::xdictionary(const sal_Char *lang) :
         hModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT );
         if( hModule ) {
             sal_IntPtr (*func)();
-            func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getExistMark").pData );
+            func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString(RTL_CONSTASCII_USTRINGPARAM("getExistMark")).pData );
             existMark = (sal_uInt8*) (*func)();
-            func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getIndex1").pData );
+            func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString(RTL_CONSTASCII_USTRINGPARAM("getIndex1")).pData );
             index1 = (sal_Int16*) (*func)();
-            func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getIndex2").pData );
+            func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString(RTL_CONSTASCII_USTRINGPARAM("getIndex2")).pData );
             index2 = (sal_Int32*) (*func)();
-            func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getLenArray").pData );
+            func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString(RTL_CONSTASCII_USTRINGPARAM("getLenArray")).pData );
             lenArray = (sal_Int32*) (*func)();
-            func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString::createFromAscii("getDataArea").pData );
+            func = (sal_IntPtr(*)()) osl_getFunctionSymbol( hModule, OUString(RTL_CONSTASCII_USTRINGPARAM("getDataArea")).pData );
             dataArea = (sal_Unicode*) (*func)();
         }
         else
@@ -101,10 +97,6 @@ xdictionary::xdictionary(const sal_Char *lang) :
         for (sal_Int32 i = 0; i < CACHE_MAX; i++)
             cache[i].size = 0;
 
-#if USE_CELL_BOUNDARY_CODE
-        useCellBoundary = sal_False;
-        cellBoundary = NULL;
-#endif
         japaneseWordBreak = sal_False;
 }
 
@@ -112,8 +104,8 @@ xdictionary::~xdictionary() {
         osl_unloadModule(hModule);
         for (sal_Int32 i = 0; i < CACHE_MAX; i++) {
             if (cache[i].size > 0) {
-                delete cache[i].contents;
-                delete cache[i].wordboundary;
+                delete [] cache[i].contents;
+                delete [] cache[i].wordboundary;
             }
         }
 }
@@ -241,86 +233,70 @@ static sal_Int16 JapaneseCharType(sal_Unicode c)
 
 WordBreakCache& xdictionary::getCache(const sal_Unicode *text, Boundary& wordBoundary)
 {
+    WordBreakCache& rCache = cache[text[0] & 0x1f];
 
-        WordBreakCache& aCache = cache[text[0] & 0x1f];
-
-        if (aCache.size != 0 && aCache.equals(text, wordBoundary))
-            return aCache;
+    if (rCache.size != 0 && rCache.equals(text, wordBoundary))
+        return rCache;
 
-        sal_Int32 len = wordBoundary.endPos - wordBoundary.startPos;
+    sal_Int32 len = wordBoundary.endPos - wordBoundary.startPos;
 
-        if (aCache.size == 0 || len > aCache.size) {
-            if (aCache.size != 0) {
-                delete aCache.contents;
-                delete aCache.wordboundary;
-                aCache.size = len;
-            }
-            else
-                aCache.size = len > DEFAULT_SIZE ? len : DEFAULT_SIZE;
-            aCache.contents = new sal_Unicode[aCache.size + 1];
-            aCache.wordboundary = new sal_Int32[aCache.size + 2];
+    if (rCache.size == 0 || len > rCache.size) {
+        if (rCache.size != 0) {
+            delete rCache.contents;
+            delete rCache.wordboundary;
+            rCache.size = len;
         }
-        aCache.length  = len;
-        memcpy(aCache.contents, text + wordBoundary.startPos, len * sizeof(sal_Unicode));
-        *(aCache.contents + len) = 0x0000;
-        // reset the wordboundary in cache
-        memset(aCache.wordboundary, '\0', sizeof(sal_Int32)*(len + 2));
-
-        sal_Int32 i = 0;        // loop variable
-        while (aCache.wordboundary[i] < aCache.length) {
-            len = 0;
-            // look the continuous white space as one word and cashe it
-            while (u_isWhitespace((sal_uInt32)text[wordBoundary.startPos + aCache.wordboundary[i] + len]))
-                len ++;
-
-            if (len == 0) {
-                const sal_Unicode *str = text + wordBoundary.startPos + aCache.wordboundary[i];
-                sal_Int32 slen = aCache.length - aCache.wordboundary[i];
-                sal_Int16 type = 0, count = 0;
-                for (;len == 0 && slen > 0; str++, slen--) {
-                    len = getLongestMatch(str, slen);
-                    if (len == 0) {
-                        if (!japaneseWordBreak) {
-                            len = 1;
-                        } else {
-                            if (count == 0)
-                                type = JapaneseCharType(*str);
-                            else if (type != JapaneseCharType(*str))
-                                break;
-                            count++;
-                        }
-                    }
-                }
-                if (count) {
-                    aCache.wordboundary[i+1] = aCache.wordboundary[i] + count;
-                    i++;
-
-#if USE_CELL_BOUNDARY_CODE
-                    if (useCellBoundary) {
-                        sal_Int32 cBoundary = cellBoundary[aCache.wordboundary[i] + wordBoundary.startPos - 1];
-                        if (cBoundary > 0)
-                            aCache.wordboundary[i] = cBoundary - wordBoundary.startPos;
+        else
+            rCache.size = len > DEFAULT_SIZE ? len : DEFAULT_SIZE;
+        rCache.contents = new sal_Unicode[rCache.size + 1];
+        rCache.wordboundary = new sal_Int32[rCache.size + 2];
+    }
+    rCache.length  = len;
+    memcpy(rCache.contents, text + wordBoundary.startPos, len * sizeof(sal_Unicode));
+    *(rCache.contents + len) = 0x0000;
+    // reset the wordboundary in cache
+    memset(rCache.wordboundary, '\0', sizeof(sal_Int32)*(len + 2));
+
+    sal_Int32 i = 0;        // loop variable
+    while (rCache.wordboundary[i] < rCache.length) {
+        len = 0;
+        // look the continuous white space as one word and cashe it
+        while (u_isWhitespace((sal_uInt32)text[wordBoundary.startPos + rCache.wordboundary[i] + len]))
+            len ++;
+
+        if (len == 0) {
+            const sal_Unicode *str = text + wordBoundary.startPos + rCache.wordboundary[i];
+            sal_Int32 slen = rCache.length - rCache.wordboundary[i];
+            sal_Int16 type = 0, count = 0;
+            for (;len == 0 && slen > 0; str++, slen--) {
+                len = getLongestMatch(str, slen);
+                if (len == 0) {
+                    if (!japaneseWordBreak) {
+                        len = 1;
+                    } else {
+                        if (count == 0)
+                            type = JapaneseCharType(*str);
+                        else if (type != JapaneseCharType(*str))
+                            break;
+                        count++;
                     }
-#endif
                 }
             }
-
-            if (len) {
-                aCache.wordboundary[i+1] = aCache.wordboundary[i] + len;
+            if (count)
+            {
+                rCache.wordboundary[i+1] = rCache.wordboundary[i] + count;
                 i++;
-
-#if USE_CELL_BOUNDARY_CODE
-                if (useCellBoundary) {
-                    sal_Int32 cBoundary = cellBoundary[aCache.wordboundary[i] + wordBoundary.startPos - 1];
-                    if (cBoundary > 0)
-                        aCache.wordboundary[i] = cBoundary - wordBoundary.startPos;
-                }
-#endif
             }
         }
-        aCache.wordboundary[i + 1] = aCache.length + 1;
 
-        return aCache;
+        if (len) {
+            rCache.wordboundary[i+1] = rCache.wordboundary[i] + len;
+            i++;
+        }
+    }
+    rCache.wordboundary[i + 1] = rCache.length + 1;
+
+    return rCache;
 }
 
 Boundary xdictionary::previousWord(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType)
@@ -391,12 +367,6 @@ Boundary xdictionary::getWordBoundary(const OUString& rText, sal_Int32 anyPos, s
         return boundary;
 }
 
-#if USE_CELL_BOUNDARY_CODE
-void xdictionary::setCellBoundary(sal_Int32* cellArray)
-{
-        useCellBoundary = sal_True;
-        cellBoundary = cellArray;
-}
-#endif
-
 } } } }
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */