summaryrefslogtreecommitdiff
path: root/i18npool/source/characterclassification/cclass_unicode.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'i18npool/source/characterclassification/cclass_unicode.cxx')
-rw-r--r--i18npool/source/characterclassification/cclass_unicode.cxx288
1 files changed, 288 insertions, 0 deletions
diff --git a/i18npool/source/characterclassification/cclass_unicode.cxx b/i18npool/source/characterclassification/cclass_unicode.cxx
new file mode 100644
index 000000000000..045512e61258
--- /dev/null
+++ b/i18npool/source/characterclassification/cclass_unicode.cxx
@@ -0,0 +1,288 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+// MARKER(update_precomp.py): autogen include statement, do not remove
+#include "precompiled_i18npool.hxx"
+
+#include <cclass_unicode.hxx>
+#include <com/sun/star/i18n/UnicodeScript.hpp>
+#include <com/sun/star/i18n/UnicodeType.hpp>
+#include <com/sun/star/i18n/KCharacterType.hpp>
+#include <unicode/uchar.h>
+#include <i18nutil/x_rtl_ustring.h>
+#include <breakiteratorImpl.hxx>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+using namespace ::rtl;
+
+namespace com { namespace sun { namespace star { namespace i18n {
+// ----------------------------------------------------
+// class cclass_Unicode
+// ----------------------------------------------------;
+
+cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ),
+ pTable( NULL ),
+ pStart( NULL ),
+ pCont( NULL ),
+ nStartTypes( 0 ),
+ nContTypes( 0 ),
+ eState( ssGetChar ),
+ cGroupSep( ',' ),
+ cDecimalSep( '.' )
+{
+ trans = new Transliteration_casemapping();
+ cClass = "com.sun.star.i18n.CharacterClassification_Unicode";
+}
+
+cclass_Unicode::~cclass_Unicode() {
+ destroyParserTable();
+ delete trans;
+}
+
+
+OUString SAL_CALL
+cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
+ sal_Int32 len = Text.getLength();
+ if (nPos >= len)
+ return OUString();
+ if (nCount + nPos > len)
+ nCount = len - nPos;
+
+ trans->setMappingType(MappingTypeToUpper, rLocale);
+ return trans->transliterateString2String(Text, nPos, nCount);
+}
+
+OUString SAL_CALL
+cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
+ sal_Int32 len = Text.getLength();
+ if (nPos >= len)
+ return OUString();
+ if (nCount + nPos > len)
+ nCount = len - nPos;
+
+ trans->setMappingType(MappingTypeToLower, rLocale);
+ return trans->transliterateString2String(Text, nPos, nCount);
+}
+
+OUString SAL_CALL
+cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
+ sal_Int32 len = Text.getLength();
+ if (nPos >= len)
+ return OUString();
+ if (nCount + nPos > len)
+ nCount = len - nPos;
+
+ trans->setMappingType(MappingTypeToTitle, rLocale);
+ rtl_uString* pStr = x_rtl_uString_new_WithLength( nCount, 1 );
+ sal_Unicode* out = pStr->buffer;
+ BreakIteratorImpl brk(xMSF);
+ Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
+ WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
+ for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
+ if (i >= bdy.endPos)
+ bdy = brk.nextWord(Text, bdy.endPos, rLocale,
+ WordType::ANYWORD_IGNOREWHITESPACES);
+ *out = (i == bdy.startPos) ?
+ trans->transliterateChar2Char(Text[i]) : Text[i];
+ }
+ *out = 0;
+ return OUString( pStr, SAL_NO_ACQUIRE );
+}
+
+sal_Int16 SAL_CALL
+cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
+ if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
+ return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
+}
+
+sal_Int16 SAL_CALL
+cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
+ if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
+ return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
+}
+
+
+sal_Int16 SAL_CALL
+cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
+ if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
+ // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
+ // while OO.o enum UnicideScript starts from 0.
+ // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
+ return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
+}
+
+
+sal_Int32 SAL_CALL
+cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
+ using namespace ::com::sun::star::i18n::KCharacterType;
+
+ sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
+ if (increment > 0) ch = Text.iterateCodePoints(nPos, 0);
+ switch ( u_charType(ch) ) {
+ // Upper
+ case U_UPPERCASE_LETTER :
+ return UPPER|LETTER|PRINTABLE|BASE_FORM;
+
+ // Lower
+ case U_LOWERCASE_LETTER :
+ return LOWER|LETTER|PRINTABLE|BASE_FORM;
+
+ // Title
+ case U_TITLECASE_LETTER :
+ return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
+
+ // Letter
+ case U_MODIFIER_LETTER :
+ case U_OTHER_LETTER :
+ return LETTER|PRINTABLE|BASE_FORM;
+
+ // Digit
+ case U_DECIMAL_DIGIT_NUMBER:
+ case U_LETTER_NUMBER:
+ case U_OTHER_NUMBER:
+ return DIGIT|PRINTABLE|BASE_FORM;
+
+ // Base
+ case U_NON_SPACING_MARK:
+ case U_ENCLOSING_MARK:
+ case U_COMBINING_SPACING_MARK:
+ return BASE_FORM|PRINTABLE;
+
+ // Print
+ case U_SPACE_SEPARATOR:
+
+ case U_DASH_PUNCTUATION:
+ case U_INITIAL_PUNCTUATION:
+ case U_FINAL_PUNCTUATION:
+ case U_CONNECTOR_PUNCTUATION:
+ case U_OTHER_PUNCTUATION:
+
+ case U_MATH_SYMBOL:
+ case U_CURRENCY_SYMBOL:
+ case U_MODIFIER_SYMBOL:
+ case U_OTHER_SYMBOL:
+ return PRINTABLE;
+
+ // Control
+ case U_CONTROL_CHAR:
+ case U_FORMAT_CHAR:
+ return CONTROL;
+
+ case U_LINE_SEPARATOR:
+ case U_PARAGRAPH_SEPARATOR:
+ return CONTROL|PRINTABLE;
+
+ // for all others
+ default:
+ return U_GENERAL_OTHER_TYPES;
+ }
+}
+
+sal_Int32 SAL_CALL
+cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) {
+ if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
+ return getCharType(Text, &nPos, 0);
+
+}
+
+sal_Int32 SAL_CALL
+cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) {
+ if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
+
+ sal_Int32 result = getCharType(Text, &nPos, 0);
+ for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++)
+ result |= getCharType(Text, &nPos, 1);
+ return result;
+}
+
+ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
+ const OUString& Text,
+ sal_Int32 nPos,
+ const Locale& rLocale,
+ sal_Int32 startCharTokenType,
+ const OUString& userDefinedCharactersStart,
+ sal_Int32 contCharTokenType,
+ const OUString& userDefinedCharactersCont )
+ throw(RuntimeException)
+{
+ ParseResult r;
+ if ( Text.getLength() <= nPos )
+ return r;
+
+ setupParserTable( rLocale,
+ startCharTokenType, userDefinedCharactersStart,
+ contCharTokenType, userDefinedCharactersCont );
+ parseText( r, Text, nPos );
+
+ return r;
+}
+
+
+ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
+ sal_Int32 nTokenType,
+ const OUString& Text,
+ sal_Int32 nPos,
+ const Locale& rLocale,
+ sal_Int32 startCharTokenType,
+ const OUString& userDefinedCharactersStart,
+ sal_Int32 contCharTokenType,
+ const OUString& userDefinedCharactersCont )
+ throw(RuntimeException)
+{
+ ParseResult r;
+ if ( Text.getLength() <= nPos )
+ return r;
+
+ setupParserTable( rLocale,
+ startCharTokenType, userDefinedCharactersStart,
+ contCharTokenType, userDefinedCharactersCont );
+ parseText( r, Text, nPos, nTokenType );
+
+ return r;
+}
+
+OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException )
+{
+ return OUString::createFromAscii(cClass);
+}
+
+
+sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException )
+{
+ return !rServiceName.compareToAscii(cClass);
+}
+
+Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException )
+{
+ Sequence< OUString > aRet(1);
+ aRet[0] = OUString::createFromAscii(cClass);
+ return aRet;
+}
+
+} } } }
+