diff options
Diffstat (limited to 'comphelper/source/misc/string.cxx')
-rw-r--r-- | comphelper/source/misc/string.cxx | 309 |
1 files changed, 244 insertions, 65 deletions
diff --git a/comphelper/source/misc/string.cxx b/comphelper/source/misc/string.cxx index a3ee9bc58521..446e500e0250 100644 --- a/comphelper/source/misc/string.cxx +++ b/comphelper/source/misc/string.cxx @@ -19,16 +19,21 @@ #include <sal/config.h> +#include <cassert> #include <cstddef> #include <string_view> +#include <utility> #include <vector> #include <algorithm> +#include <o3tl/safeint.hxx> +#include <o3tl/string_view.hxx> #include <rtl/character.hxx> #include <rtl/ustring.hxx> #include <rtl/ustrbuf.hxx> #include <rtl/string.hxx> #include <rtl/strbuf.hxx> +#include <sal/log.hxx> #include <sal/types.h> #include <comphelper/string.hxx> @@ -61,16 +66,43 @@ namespace return rIn.substr(i); } + template <typename T, typename C> T tmpl_stripStartString(const T &rIn, + const C cRemove) + { + if (rIn.isEmpty()) + return rIn; + + sal_Int32 i = 0; + + while (i < rIn.getLength()) + { + if (rIn[i] != cRemove) + break; + ++i; + } + + return rIn.copy(i); + } +} + +OString stripStart(const OString& rIn, char c) +{ + return tmpl_stripStartString<OString, char>(rIn, c); } -OString stripStart(std::string_view rIn, char c) +std::string_view stripStart(std::string_view rIn, char c) { - return OString(tmpl_stripStart<std::string_view, char>(rIn, c)); + return tmpl_stripStart<std::string_view, char>(rIn, c); } -OUString stripStart(std::u16string_view rIn, sal_Unicode c) +OUString stripStart(const OUString& rIn, sal_Unicode c) { - return OUString(tmpl_stripStart<std::u16string_view, sal_Unicode>(rIn, c)); + return tmpl_stripStartString<OUString, sal_Unicode>(rIn, c); +} + +std::u16string_view stripStart(std::u16string_view rIn, sal_Unicode c) +{ + return tmpl_stripStart<std::u16string_view, sal_Unicode>(rIn, c); } namespace @@ -92,28 +124,114 @@ namespace return rIn.substr(0, i); } + template <typename T, typename C> T tmpl_stripEndString(const T &rIn, + const C cRemove) + { + if (rIn.isEmpty()) + return rIn; + + sal_Int32 i = rIn.getLength(); + + while (i > 0) + { + if (rIn[i-1] != cRemove) + break; + --i; + } + + return rIn.copy(0, i); + } } -OString stripEnd(std::string_view rIn, char c) +OString stripEnd(const OString& rIn, char c) { - return OString(tmpl_stripEnd<std::string_view, char>(rIn, c)); + return tmpl_stripEndString<OString, char>(rIn, c); } -OUString stripEnd(std::u16string_view rIn, sal_Unicode c) +std::string_view stripEnd(std::string_view rIn, char c) { - return OUString(tmpl_stripEnd<std::u16string_view, sal_Unicode>(rIn, c)); + return tmpl_stripEnd<std::string_view, char>(rIn, c); } -OString strip(std::string_view rIn, char c) +OUString stripEnd(const OUString& rIn, sal_Unicode c) { - auto x = tmpl_stripStart<std::string_view, char>(rIn, c); - return stripEnd(x, c); + return tmpl_stripEndString<OUString, sal_Unicode>(rIn, c); } -OUString strip(std::u16string_view rIn, sal_Unicode c) +std::u16string_view stripEnd(std::u16string_view rIn, sal_Unicode c) { - auto x = tmpl_stripStart<std::u16string_view, sal_Unicode>(rIn, c); - return stripEnd(x, c); + return tmpl_stripEnd<std::u16string_view, sal_Unicode>(rIn, c); +} + +namespace +{ + template <typename T, typename C> T tmpl_strip(const T &rIn, + const C cRemove) + { + if (rIn.empty()) + return rIn; + + typename T::size_type end = rIn.size(); + while (end > 0) + { + if (rIn[end-1] != cRemove) + break; + --end; + } + + typename T::size_type start = 0; + while (start < end) + { + if (rIn[start] != cRemove) + break; + ++start; + } + + return rIn.substr(start, end - start); + } + template <typename T, typename C> T tmpl_stripString(const T &rIn, + const C cRemove) + { + if (rIn.isEmpty()) + return rIn; + + sal_Int32 end = rIn.getLength(); + while (end > 0) + { + if (rIn[end-1] != cRemove) + break; + --end; + } + sal_Int32 start = 0; + while (start < end) + { + if (rIn[start] != cRemove) + break; + ++start; + } + + return rIn.copy(start, end - start); + } +} + +OString strip(const OString& rIn, char c) +{ + return tmpl_stripString<OString, char>(rIn, c); +} + +std::string_view strip(std::string_view rIn, char c) +{ + return tmpl_strip<std::string_view, char>(rIn, c); +} + +OUString strip(const OUString& rIn, sal_Unicode c) +{ + return tmpl_stripString<OUString, sal_Unicode>(rIn, c); +} + +std::u16string_view strip(std::u16string_view rIn, sal_Unicode c) +{ + return tmpl_strip<std::u16string_view, sal_Unicode>(rIn, c); } namespace @@ -145,13 +263,12 @@ sal_Int32 getTokenCount(std::u16string_view rIn, sal_Unicode cTok) return tmpl_getTokenCount<std::u16string_view, sal_Unicode>(rIn, cTok); } -static sal_uInt32 decimalStringToNumber( - OUString const & str, sal_Int32 nStart, sal_Int32 nLength ) +sal_uInt32 decimalStringToNumber(std::u16string_view str) { sal_uInt32 result = 0; - for( sal_Int32 i = nStart; i < nStart + nLength; ) + for( std::size_t i = 0; i < str.size(); ) { - sal_uInt32 c = str.iterateCodePoints(&i); + sal_uInt32 c = o3tl::iterateCodePoints(str, &i); sal_uInt32 value = 0; if( c <= 0x0039) // ASCII decimal digits, most common value = c - 0x0030; @@ -242,12 +359,6 @@ static sal_uInt32 decimalStringToNumber( return result; } -sal_uInt32 decimalStringToNumber( - OUString const & str ) -{ - return decimalStringToNumber(str, 0, str.getLength()); -} - using namespace ::com::sun::star; // convert between sequence of string and comma separated string @@ -262,27 +373,26 @@ OUString convertCommaSeparated( } std::vector<OUString> - split(const OUString& rStr, sal_Unicode cSeparator) + split(std::u16string_view rStr, sal_Unicode cSeparator) { std::vector< OUString > vec; - sal_Int32 idx = 0; + std::size_t idx = 0; do { - OUString kw = - rStr.getToken(0, cSeparator, idx); - kw = kw.trim(); - if (!kw.isEmpty()) + std::u16string_view kw = o3tl::getToken(rStr, cSeparator, idx); + kw = o3tl::trim(kw); + if (!kw.empty()) { - vec.push_back(kw); + vec.push_back(OUString(kw)); } - } while (idx >= 0); + } while (idx != std::u16string_view::npos); return vec; } uno::Sequence< OUString > - convertCommaSeparated( OUString const& i_rString ) + convertCommaSeparated( std::u16string_view i_rString ) { std::vector< OUString > vec = split(i_rString, ','); return comphelper::containerToSequence(vec); @@ -312,33 +422,50 @@ sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS, sal_Int32 nLHSFirstDigitPos = 0; sal_Int32 nRHSFirstDigitPos = 0; + // Check if the string starts with a digit + sal_Int32 nStartsDigitLHS = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); + sal_Int32 nStartsDigitRHS = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); + + if (nStartsDigitLHS > 0 && nStartsDigitRHS > 0) + { + sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.subView(0, nStartsDigitLHS)); + sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.subView(0, nStartsDigitRHS)); + + if (nLHS != nRHS) + return nLHS < nRHS ? -1 : 1; + nLHSLastNonDigitPos = nStartsDigitLHS; + nRHSLastNonDigitPos = nStartsDigitRHS; + } + else if (nStartsDigitLHS > 0) + return -1; + else if (nStartsDigitRHS > 0) + return 1; + while (nLHSFirstDigitPos < rLHS.getLength() || nRHSFirstDigitPos < rRHS.getLength()) { sal_Int32 nLHSChunkLen; sal_Int32 nRHSChunkLen; //Compare non digit block as normal strings - nLHSFirstDigitPos = rBI->nextCharBlock(rLHS, nLHSLastNonDigitPos, - rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); - nRHSFirstDigitPos = rBI->nextCharBlock(rRHS, nRHSLastNonDigitPos, - rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); + nLHSFirstDigitPos = rBI->nextCharBlock(rLHS, nLHSLastNonDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); + nRHSFirstDigitPos = rBI->nextCharBlock(rRHS, nRHSLastNonDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); + if (nLHSFirstDigitPos == -1) nLHSFirstDigitPos = rLHS.getLength(); + if (nRHSFirstDigitPos == -1) nRHSFirstDigitPos = rRHS.getLength(); + nLHSChunkLen = nLHSFirstDigitPos - nLHSLastNonDigitPos; nRHSChunkLen = nRHSFirstDigitPos - nRHSLastNonDigitPos; - nRet = rCollator->compareSubstring(rLHS, nLHSLastNonDigitPos, - nLHSChunkLen, rRHS, nRHSLastNonDigitPos, nRHSChunkLen); + nRet = rCollator->compareSubstring(rLHS, nLHSLastNonDigitPos, nLHSChunkLen, rRHS, nRHSLastNonDigitPos, nRHSChunkLen); if (nRet != 0) break; //Compare digit block as one number vs another - nLHSLastNonDigitPos = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, - rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); - nRHSLastNonDigitPos = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, - rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); + nLHSLastNonDigitPos = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); + nRHSLastNonDigitPos = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER); if (nLHSLastNonDigitPos == -1) nLHSLastNonDigitPos = rLHS.getLength(); if (nRHSLastNonDigitPos == -1) @@ -350,8 +477,8 @@ sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS, //numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in //vcl - sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS, nLHSFirstDigitPos, nLHSChunkLen); - sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS, nRHSFirstDigitPos, nRHSChunkLen); + sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.subView(nLHSFirstDigitPos, nLHSChunkLen)); + sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.subView(nRHSFirstDigitPos, nRHSChunkLen)); if (nLHS != nRHS) { @@ -365,7 +492,7 @@ sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS, NaturalStringSorter::NaturalStringSorter( const uno::Reference< uno::XComponentContext > &rContext, - const lang::Locale &rLocale) : m_aLocale(rLocale) + lang::Locale aLocale) : m_aLocale(std::move(aLocale)) { m_xCollator = i18n::Collator::create( rContext ); m_xCollator->loadDefaultCollator(m_aLocale, 0); @@ -386,29 +513,25 @@ bool isdigitAsciiString(std::u16string_view rString) [](sal_Unicode c){ return rtl::isAsciiDigit(c); }); } -namespace -{ - template <typename T, typename I, typename O> T tmpl_reverseString(I rIn) - { - if (rIn.empty()) - return T(); - - typename I::size_type i = rIn.size(); - O sBuf(static_cast<sal_Int32>(i)); - while (i) - sBuf.append(rIn[--i]); - return sBuf.makeStringAndClear(); - } -} - OUString reverseString(std::u16string_view rStr) { - return tmpl_reverseString<OUString, std::u16string_view, OUStringBuffer>(rStr); + if (rStr.empty()) + return OUString(); + + std::size_t i = rStr.size(); + OUStringBuffer sBuf(static_cast<sal_Int32>(i)); + while (i) + sBuf.append(rStr[--i]); + return sBuf.makeStringAndClear(); } -OString reverseString(std::string_view rStr) -{ - return tmpl_reverseString<OString, std::string_view, OStringBuffer>(rStr); +OUString reverseCodePoints(std::u16string_view str) { + auto const len = str.size(); + OUStringBuffer buf(len); + for (sal_Int32 i = len; i != 0;) { + buf.appendUtf32(o3tl::iterateCodePoints(str, &i, -1)); + } + return buf.makeStringAndClear(); } sal_Int32 indexOfAny(std::u16string_view rIn, @@ -494,6 +617,62 @@ OUString setToken(const OUString& rIn, sal_Int32 nToken, sal_Unicode cTok, return rIn; } +/** Similar to OUString::replaceAt, but for an OUStringBuffer. + + Replace n = count characters + from position index in this string with newStr. + */ +void replaceAt(OUStringBuffer& rIn, sal_Int32 nIndex, sal_Int32 nCount, std::u16string_view newStr ) +{ + assert(nIndex >= 0 && nIndex <= rIn.getLength()); + assert(nCount >= 0); + assert(nCount <= rIn.getLength() - nIndex); + + /* Append? */ + const sal_Int32 nOldLength = rIn.getLength(); + if ( nIndex == nOldLength ) + { + rIn.append(newStr); + return; + } + + sal_Int32 nNewLength = nOldLength + newStr.size() - nCount; + if (newStr.size() > o3tl::make_unsigned(nCount)) + rIn.ensureCapacity(nOldLength + newStr.size() - nCount); + + sal_Unicode* pStr = const_cast<sal_Unicode*>(rIn.getStr()); + memmove(pStr + nIndex + newStr.size(), pStr + nIndex + nCount, nOldLength - nIndex + nCount); + memcpy(pStr + nIndex, newStr.data(), newStr.size()); + + rIn.setLength(nNewLength); +} + +OUString sanitizeStringSurrogates(const OUString& rString) +{ + sal_Int32 i=0; + while (i < rString.getLength()) + { + sal_Unicode c = rString[i]; + if (rtl::isHighSurrogate(c)) + { + if (i+1 == rString.getLength() + || !rtl::isLowSurrogate(rString[i+1])) + { + SAL_WARN("comphelper", "Surrogate error: high without low"); + return rString.copy(0, i); + } + ++i; //skip correct low + } + if (rtl::isLowSurrogate(c)) //bare low without preceding high + { + SAL_WARN("comphelper", "Surrogate error: low without high"); + return rString.copy(0, i); + } + ++i; + } + return rString; +} + } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |