diff options
Diffstat (limited to 'comphelper/source/misc/string.cxx')
-rw-r--r-- | comphelper/source/misc/string.cxx | 196 |
1 files changed, 192 insertions, 4 deletions
diff --git a/comphelper/source/misc/string.cxx b/comphelper/source/misc/string.cxx index 951baf0be8d2..18ad6ad4fe9f 100644 --- a/comphelper/source/misc/string.cxx +++ b/comphelper/source/misc/string.cxx @@ -1,3 +1,4 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /************************************************************************* * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -41,6 +42,8 @@ #include <comphelper/stlunosequence.hxx> #include <comphelper/stl_types.hxx> +#include <com/sun/star/i18n/CharType.hpp> + namespace comphelper { namespace string { @@ -91,20 +94,119 @@ rtl::OUString searchAndReplaceAsciiL( return _source; } +sal_uInt32 decimalStringToNumber( + ::rtl::OUString const & str ) +{ + sal_uInt32 result = 0; + for( sal_Int32 i = 0 ; i < str.getLength() ; ) + { + sal_uInt32 c = str.iterateCodePoints(&i); + sal_uInt32 value = 0; + if( c <= 0x0039) // ASCII decimal digits, most common + value = c - 0x0030; + else if( c >= 0x1D7F6 ) // mathematical monospace digits + value = c - 0x1D7F6; + else if( c >= 0x1D7EC ) // mathematical sans-serif bold digits + value = c - 0x1D7EC; + else if( c >= 0x1D7E2 ) // mathematical sans-serif digits + value = c - 0x1D7E2; + else if( c >= 0x1D7D8 ) // mathematical double-struck digits + value = c - 0x1D7D8; + else if( c >= 0x1D7CE ) // mathematical bold digits + value = c - 0x1D7CE; + else if( c >= 0x11066 ) // brahmi digits + value = c - 0x11066; + else if( c >= 0x104A0 ) // osmanya digits + value = c - 0x104A0; + else if( c >= 0xFF10 ) // fullwidth digits + value = c - 0xFF10; + else if( c >= 0xABF0 ) // meetei mayek digits + value = c - 0xABF0; + else if( c >= 0xAA50 ) // cham digits + value = c - 0xAA50; + else if( c >= 0xA9D0 ) // javanese digits + value = c - 0xA9D0; + else if( c >= 0xA900 ) // kayah li digits + value = c - 0xA900; + else if( c >= 0xA8D0 ) // saurashtra digits + value = c - 0xA8D0; + else if( c >= 0xA620 ) // vai digits + value = c - 0xA620; + else if( c >= 0x1C50 ) // ol chiki digits + value = c - 0x1C50; + else if( c >= 0x1C40 ) // lepcha digits + value = c - 0x1C40; + else if( c >= 0x1BB0 ) // sundanese digits + value = c - 0x1BB0; + else if( c >= 0x1B50 ) // balinese digits + value = c - 0x1B50; + else if( c >= 0x1A90 ) // tai tham tham digits + value = c - 0x1A90; + else if( c >= 0x1A80 ) // tai tham hora digits + value = c - 0x1A80; + else if( c >= 0x19D0 ) // new tai lue digits + value = c - 0x19D0; + else if( c >= 0x1946 ) // limbu digits + value = c - 0x1946; + else if( c >= 0x1810 ) // mongolian digits + value = c - 0x1810; + else if( c >= 0x17E0 ) // khmer digits + value = c - 0x17E0; + else if( c >= 0x1090 ) // myanmar shan digits + value = c - 0x1090; + else if( c >= 0x1040 ) // myanmar digits + value = c - 0x1040; + else if( c >= 0x0F20 ) // tibetan digits + value = c - 0x0F20; + else if( c >= 0x0ED0 ) // lao digits + value = c - 0x0ED0; + else if( c >= 0x0E50 ) // thai digits + value = c - 0x0E50; + else if( c >= 0x0D66 ) // malayalam digits + value = c - 0x0D66; + else if( c >= 0x0CE6 ) // kannada digits + value = c - 0x0CE6; + else if( c >= 0x0C66 ) // telugu digits + value = c - 0x0C66; + else if( c >= 0x0BE6 ) // tamil digits + value = c - 0x0BE6; + else if( c >= 0x0B66 ) // oriya digits + value = c - 0x0B66; + else if( c >= 0x0AE6 ) // gujarati digits + value = c - 0x0AE6; + else if( c >= 0x0A66 ) // gurmukhi digits + value = c - 0x0A66; + else if( c >= 0x09E6 ) // bengali digits + value = c - 0x09E6; + else if( c >= 0x0966 ) // devanagari digit + value = c - 0x0966; + else if( c >= 0x07C0 ) // nko digits + value = c - 0x07C0; + else if( c >= 0x06F0 ) // extended arabic-indic digits + value = c - 0x06F0; + else if( c >= 0x0660 ) // arabic-indic digits + value = c - 0x0660; + result = result * 10 + value; + } + return result; +} + +using namespace ::com::sun::star; + // convert between sequence of string and comma separated string ::rtl::OUString convertCommaSeparated( - ::com::sun::star::uno::Sequence< ::rtl::OUString > const& i_rSeq) + uno::Sequence< ::rtl::OUString > const& i_rSeq) { ::rtl::OUStringBuffer buf; ::comphelper::intersperse( ::comphelper::stl_begin(i_rSeq), ::comphelper::stl_end(i_rSeq), ::comphelper::OUStringBufferAppender(buf), - ::rtl::OUString::createFromAscii(", ")); + ::rtl::OUString( RTL_CONSTASCII_USTRINGPARAM( ", " ))); return buf.makeStringAndClear(); } -::com::sun::star::uno::Sequence< ::rtl::OUString > +uno::Sequence< ::rtl::OUString > convertCommaSeparated( ::rtl::OUString const& i_rString ) { std::vector< ::rtl::OUString > vec; @@ -117,9 +219,95 @@ rtl::OUString searchAndReplaceAsciiL( vec.push_back(kw); } } while (idx >= 0); - ::com::sun::star::uno::Sequence< ::rtl::OUString > kws(vec.size()); + uno::Sequence< ::rtl::OUString > kws(vec.size()); std::copy(vec.begin(), vec.end(), stl_begin(kws)); return kws; } + +sal_Int32 compareNatural( const ::rtl::OUString & rLHS, const ::rtl::OUString & rRHS, + const uno::Reference< i18n::XCollator > &rCollator, + const uno::Reference< i18n::XBreakIterator > &rBI, + const lang::Locale &rLocale ) +{ + sal_Int32 nRet = 0; + + sal_Int32 nLHSLastNonDigitPos = 0; + sal_Int32 nRHSLastNonDigitPos = 0; + sal_Int32 nLHSFirstDigitPos = 0; + sal_Int32 nRHSFirstDigitPos = 0; + + while (nLHSFirstDigitPos < rLHS.getLength() || nRHSFirstDigitPos < rRHS.getLength()) + { + sal_Int32 nLHSChunkLen; + sal_Int32 nRHSChunkLen; + + //Compare non digit block as normal strings + nLHSFirstDigitPos = rBI->nextCharBlock(rLHS, nLHSLastNonDigitPos, + rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); + nRHSFirstDigitPos = rBI->nextCharBlock(rRHS, nRHSLastNonDigitPos, + rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); + if (nLHSFirstDigitPos == -1) + nLHSFirstDigitPos = rLHS.getLength(); + if (nRHSFirstDigitPos == -1) + nRHSFirstDigitPos = rRHS.getLength(); + nLHSChunkLen = nLHSFirstDigitPos - nLHSLastNonDigitPos; + nRHSChunkLen = nRHSFirstDigitPos - nRHSLastNonDigitPos; + + nRet = rCollator->compareSubstring(rLHS, nLHSLastNonDigitPos, + nLHSChunkLen, rRHS, nRHSLastNonDigitPos, nRHSChunkLen); + if (nRet != 0) + break; + + //Compare digit block as one number vs another + nLHSLastNonDigitPos = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, + rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); + nRHSLastNonDigitPos = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, + rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); + if (nLHSLastNonDigitPos == -1) + nLHSLastNonDigitPos = rLHS.getLength(); + if (nRHSLastNonDigitPos == -1) + nRHSLastNonDigitPos = rRHS.getLength(); + nLHSChunkLen = nLHSLastNonDigitPos - nLHSFirstDigitPos; + nRHSChunkLen = nRHSLastNonDigitPos - nRHSFirstDigitPos; + + //To-Do: Possibly scale down those unicode codepoints that relate to + //numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in + //vcl + + sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.copy(nLHSFirstDigitPos, nLHSChunkLen)); + sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.copy(nRHSFirstDigitPos, nRHSChunkLen)); + + nRet = nLHS-nRHS; + if (nRet != 0) + break; + } + + //Squeeze these down to -1, 0, 1 in case it gets casted to a StringCompare + if (nRet > 0) + nRet = 1; + else if (nRet < 0) + nRet = -1; + + return nRet; +} + +NaturalStringSorter::NaturalStringSorter( + const uno::Reference< uno::XComponentContext > &rContext, + const lang::Locale &rLocale) : m_aLocale(rLocale) +{ + uno::Reference< lang::XMultiComponentFactory > xFactory(rContext->getServiceManager(), + uno::UNO_SET_THROW); + + m_xCollator = uno::Reference< i18n::XCollator >(xFactory->createInstanceWithContext( + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("com.sun.star.i18n.Collator")), rContext), + uno::UNO_QUERY_THROW); + m_xCollator->loadDefaultCollator(m_aLocale, 0); + m_xBI = uno::Reference< i18n::XBreakIterator >(xFactory->createInstanceWithContext( + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("com.sun.star.i18n.BreakIterator")), rContext), + uno::UNO_QUERY_THROW); +} + } } + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |