summaryrefslogtreecommitdiff
path: root/comphelper/source/misc/string.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'comphelper/source/misc/string.cxx')
-rw-r--r--comphelper/source/misc/string.cxx309
1 files changed, 244 insertions, 65 deletions
diff --git a/comphelper/source/misc/string.cxx b/comphelper/source/misc/string.cxx
index a3ee9bc58521..446e500e0250 100644
--- a/comphelper/source/misc/string.cxx
+++ b/comphelper/source/misc/string.cxx
@@ -19,16 +19,21 @@
#include <sal/config.h>
+#include <cassert>
#include <cstddef>
#include <string_view>
+#include <utility>
#include <vector>
#include <algorithm>
+#include <o3tl/safeint.hxx>
+#include <o3tl/string_view.hxx>
#include <rtl/character.hxx>
#include <rtl/ustring.hxx>
#include <rtl/ustrbuf.hxx>
#include <rtl/string.hxx>
#include <rtl/strbuf.hxx>
+#include <sal/log.hxx>
#include <sal/types.h>
#include <comphelper/string.hxx>
@@ -61,16 +66,43 @@ namespace
return rIn.substr(i);
}
+ template <typename T, typename C> T tmpl_stripStartString(const T &rIn,
+ const C cRemove)
+ {
+ if (rIn.isEmpty())
+ return rIn;
+
+ sal_Int32 i = 0;
+
+ while (i < rIn.getLength())
+ {
+ if (rIn[i] != cRemove)
+ break;
+ ++i;
+ }
+
+ return rIn.copy(i);
+ }
+}
+
+OString stripStart(const OString& rIn, char c)
+{
+ return tmpl_stripStartString<OString, char>(rIn, c);
}
-OString stripStart(std::string_view rIn, char c)
+std::string_view stripStart(std::string_view rIn, char c)
{
- return OString(tmpl_stripStart<std::string_view, char>(rIn, c));
+ return tmpl_stripStart<std::string_view, char>(rIn, c);
}
-OUString stripStart(std::u16string_view rIn, sal_Unicode c)
+OUString stripStart(const OUString& rIn, sal_Unicode c)
{
- return OUString(tmpl_stripStart<std::u16string_view, sal_Unicode>(rIn, c));
+ return tmpl_stripStartString<OUString, sal_Unicode>(rIn, c);
+}
+
+std::u16string_view stripStart(std::u16string_view rIn, sal_Unicode c)
+{
+ return tmpl_stripStart<std::u16string_view, sal_Unicode>(rIn, c);
}
namespace
@@ -92,28 +124,114 @@ namespace
return rIn.substr(0, i);
}
+ template <typename T, typename C> T tmpl_stripEndString(const T &rIn,
+ const C cRemove)
+ {
+ if (rIn.isEmpty())
+ return rIn;
+
+ sal_Int32 i = rIn.getLength();
+
+ while (i > 0)
+ {
+ if (rIn[i-1] != cRemove)
+ break;
+ --i;
+ }
+
+ return rIn.copy(0, i);
+ }
}
-OString stripEnd(std::string_view rIn, char c)
+OString stripEnd(const OString& rIn, char c)
{
- return OString(tmpl_stripEnd<std::string_view, char>(rIn, c));
+ return tmpl_stripEndString<OString, char>(rIn, c);
}
-OUString stripEnd(std::u16string_view rIn, sal_Unicode c)
+std::string_view stripEnd(std::string_view rIn, char c)
{
- return OUString(tmpl_stripEnd<std::u16string_view, sal_Unicode>(rIn, c));
+ return tmpl_stripEnd<std::string_view, char>(rIn, c);
}
-OString strip(std::string_view rIn, char c)
+OUString stripEnd(const OUString& rIn, sal_Unicode c)
{
- auto x = tmpl_stripStart<std::string_view, char>(rIn, c);
- return stripEnd(x, c);
+ return tmpl_stripEndString<OUString, sal_Unicode>(rIn, c);
}
-OUString strip(std::u16string_view rIn, sal_Unicode c)
+std::u16string_view stripEnd(std::u16string_view rIn, sal_Unicode c)
{
- auto x = tmpl_stripStart<std::u16string_view, sal_Unicode>(rIn, c);
- return stripEnd(x, c);
+ return tmpl_stripEnd<std::u16string_view, sal_Unicode>(rIn, c);
+}
+
+namespace
+{
+ template <typename T, typename C> T tmpl_strip(const T &rIn,
+ const C cRemove)
+ {
+ if (rIn.empty())
+ return rIn;
+
+ typename T::size_type end = rIn.size();
+ while (end > 0)
+ {
+ if (rIn[end-1] != cRemove)
+ break;
+ --end;
+ }
+
+ typename T::size_type start = 0;
+ while (start < end)
+ {
+ if (rIn[start] != cRemove)
+ break;
+ ++start;
+ }
+
+ return rIn.substr(start, end - start);
+ }
+ template <typename T, typename C> T tmpl_stripString(const T &rIn,
+ const C cRemove)
+ {
+ if (rIn.isEmpty())
+ return rIn;
+
+ sal_Int32 end = rIn.getLength();
+ while (end > 0)
+ {
+ if (rIn[end-1] != cRemove)
+ break;
+ --end;
+ }
+ sal_Int32 start = 0;
+ while (start < end)
+ {
+ if (rIn[start] != cRemove)
+ break;
+ ++start;
+ }
+
+ return rIn.copy(start, end - start);
+ }
+}
+
+OString strip(const OString& rIn, char c)
+{
+ return tmpl_stripString<OString, char>(rIn, c);
+}
+
+std::string_view strip(std::string_view rIn, char c)
+{
+ return tmpl_strip<std::string_view, char>(rIn, c);
+}
+
+OUString strip(const OUString& rIn, sal_Unicode c)
+{
+ return tmpl_stripString<OUString, sal_Unicode>(rIn, c);
+}
+
+std::u16string_view strip(std::u16string_view rIn, sal_Unicode c)
+{
+ return tmpl_strip<std::u16string_view, sal_Unicode>(rIn, c);
}
namespace
@@ -145,13 +263,12 @@ sal_Int32 getTokenCount(std::u16string_view rIn, sal_Unicode cTok)
return tmpl_getTokenCount<std::u16string_view, sal_Unicode>(rIn, cTok);
}
-static sal_uInt32 decimalStringToNumber(
- OUString const & str, sal_Int32 nStart, sal_Int32 nLength )
+sal_uInt32 decimalStringToNumber(std::u16string_view str)
{
sal_uInt32 result = 0;
- for( sal_Int32 i = nStart; i < nStart + nLength; )
+ for( std::size_t i = 0; i < str.size(); )
{
- sal_uInt32 c = str.iterateCodePoints(&i);
+ sal_uInt32 c = o3tl::iterateCodePoints(str, &i);
sal_uInt32 value = 0;
if( c <= 0x0039) // ASCII decimal digits, most common
value = c - 0x0030;
@@ -242,12 +359,6 @@ static sal_uInt32 decimalStringToNumber(
return result;
}
-sal_uInt32 decimalStringToNumber(
- OUString const & str )
-{
- return decimalStringToNumber(str, 0, str.getLength());
-}
-
using namespace ::com::sun::star;
// convert between sequence of string and comma separated string
@@ -262,27 +373,26 @@ OUString convertCommaSeparated(
}
std::vector<OUString>
- split(const OUString& rStr, sal_Unicode cSeparator)
+ split(std::u16string_view rStr, sal_Unicode cSeparator)
{
std::vector< OUString > vec;
- sal_Int32 idx = 0;
+ std::size_t idx = 0;
do
{
- OUString kw =
- rStr.getToken(0, cSeparator, idx);
- kw = kw.trim();
- if (!kw.isEmpty())
+ std::u16string_view kw = o3tl::getToken(rStr, cSeparator, idx);
+ kw = o3tl::trim(kw);
+ if (!kw.empty())
{
- vec.push_back(kw);
+ vec.push_back(OUString(kw));
}
- } while (idx >= 0);
+ } while (idx != std::u16string_view::npos);
return vec;
}
uno::Sequence< OUString >
- convertCommaSeparated( OUString const& i_rString )
+ convertCommaSeparated( std::u16string_view i_rString )
{
std::vector< OUString > vec = split(i_rString, ',');
return comphelper::containerToSequence(vec);
@@ -312,33 +422,50 @@ sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS,
sal_Int32 nLHSFirstDigitPos = 0;
sal_Int32 nRHSFirstDigitPos = 0;
+ // Check if the string starts with a digit
+ sal_Int32 nStartsDigitLHS = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
+ sal_Int32 nStartsDigitRHS = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
+
+ if (nStartsDigitLHS > 0 && nStartsDigitRHS > 0)
+ {
+ sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.subView(0, nStartsDigitLHS));
+ sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.subView(0, nStartsDigitRHS));
+
+ if (nLHS != nRHS)
+ return nLHS < nRHS ? -1 : 1;
+ nLHSLastNonDigitPos = nStartsDigitLHS;
+ nRHSLastNonDigitPos = nStartsDigitRHS;
+ }
+ else if (nStartsDigitLHS > 0)
+ return -1;
+ else if (nStartsDigitRHS > 0)
+ return 1;
+
while (nLHSFirstDigitPos < rLHS.getLength() || nRHSFirstDigitPos < rRHS.getLength())
{
sal_Int32 nLHSChunkLen;
sal_Int32 nRHSChunkLen;
//Compare non digit block as normal strings
- nLHSFirstDigitPos = rBI->nextCharBlock(rLHS, nLHSLastNonDigitPos,
- rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
- nRHSFirstDigitPos = rBI->nextCharBlock(rRHS, nRHSLastNonDigitPos,
- rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
+ nLHSFirstDigitPos = rBI->nextCharBlock(rLHS, nLHSLastNonDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
+ nRHSFirstDigitPos = rBI->nextCharBlock(rRHS, nRHSLastNonDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
+
if (nLHSFirstDigitPos == -1)
nLHSFirstDigitPos = rLHS.getLength();
+
if (nRHSFirstDigitPos == -1)
nRHSFirstDigitPos = rRHS.getLength();
+
nLHSChunkLen = nLHSFirstDigitPos - nLHSLastNonDigitPos;
nRHSChunkLen = nRHSFirstDigitPos - nRHSLastNonDigitPos;
- nRet = rCollator->compareSubstring(rLHS, nLHSLastNonDigitPos,
- nLHSChunkLen, rRHS, nRHSLastNonDigitPos, nRHSChunkLen);
+ nRet = rCollator->compareSubstring(rLHS, nLHSLastNonDigitPos, nLHSChunkLen, rRHS, nRHSLastNonDigitPos, nRHSChunkLen);
if (nRet != 0)
break;
//Compare digit block as one number vs another
- nLHSLastNonDigitPos = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos,
- rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
- nRHSLastNonDigitPos = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos,
- rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
+ nLHSLastNonDigitPos = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
+ nRHSLastNonDigitPos = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
if (nLHSLastNonDigitPos == -1)
nLHSLastNonDigitPos = rLHS.getLength();
if (nRHSLastNonDigitPos == -1)
@@ -350,8 +477,8 @@ sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS,
//numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in
//vcl
- sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS, nLHSFirstDigitPos, nLHSChunkLen);
- sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS, nRHSFirstDigitPos, nRHSChunkLen);
+ sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.subView(nLHSFirstDigitPos, nLHSChunkLen));
+ sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.subView(nRHSFirstDigitPos, nRHSChunkLen));
if (nLHS != nRHS)
{
@@ -365,7 +492,7 @@ sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS,
NaturalStringSorter::NaturalStringSorter(
const uno::Reference< uno::XComponentContext > &rContext,
- const lang::Locale &rLocale) : m_aLocale(rLocale)
+ lang::Locale aLocale) : m_aLocale(std::move(aLocale))
{
m_xCollator = i18n::Collator::create( rContext );
m_xCollator->loadDefaultCollator(m_aLocale, 0);
@@ -386,29 +513,25 @@ bool isdigitAsciiString(std::u16string_view rString)
[](sal_Unicode c){ return rtl::isAsciiDigit(c); });
}
-namespace
-{
- template <typename T, typename I, typename O> T tmpl_reverseString(I rIn)
- {
- if (rIn.empty())
- return T();
-
- typename I::size_type i = rIn.size();
- O sBuf(static_cast<sal_Int32>(i));
- while (i)
- sBuf.append(rIn[--i]);
- return sBuf.makeStringAndClear();
- }
-}
-
OUString reverseString(std::u16string_view rStr)
{
- return tmpl_reverseString<OUString, std::u16string_view, OUStringBuffer>(rStr);
+ if (rStr.empty())
+ return OUString();
+
+ std::size_t i = rStr.size();
+ OUStringBuffer sBuf(static_cast<sal_Int32>(i));
+ while (i)
+ sBuf.append(rStr[--i]);
+ return sBuf.makeStringAndClear();
}
-OString reverseString(std::string_view rStr)
-{
- return tmpl_reverseString<OString, std::string_view, OStringBuffer>(rStr);
+OUString reverseCodePoints(std::u16string_view str) {
+ auto const len = str.size();
+ OUStringBuffer buf(len);
+ for (sal_Int32 i = len; i != 0;) {
+ buf.appendUtf32(o3tl::iterateCodePoints(str, &i, -1));
+ }
+ return buf.makeStringAndClear();
}
sal_Int32 indexOfAny(std::u16string_view rIn,
@@ -494,6 +617,62 @@ OUString setToken(const OUString& rIn, sal_Int32 nToken, sal_Unicode cTok,
return rIn;
}
+/** Similar to OUString::replaceAt, but for an OUStringBuffer.
+
+ Replace n = count characters
+ from position index in this string with newStr.
+ */
+void replaceAt(OUStringBuffer& rIn, sal_Int32 nIndex, sal_Int32 nCount, std::u16string_view newStr )
+{
+ assert(nIndex >= 0 && nIndex <= rIn.getLength());
+ assert(nCount >= 0);
+ assert(nCount <= rIn.getLength() - nIndex);
+
+ /* Append? */
+ const sal_Int32 nOldLength = rIn.getLength();
+ if ( nIndex == nOldLength )
+ {
+ rIn.append(newStr);
+ return;
+ }
+
+ sal_Int32 nNewLength = nOldLength + newStr.size() - nCount;
+ if (newStr.size() > o3tl::make_unsigned(nCount))
+ rIn.ensureCapacity(nOldLength + newStr.size() - nCount);
+
+ sal_Unicode* pStr = const_cast<sal_Unicode*>(rIn.getStr());
+ memmove(pStr + nIndex + newStr.size(), pStr + nIndex + nCount, nOldLength - nIndex + nCount);
+ memcpy(pStr + nIndex, newStr.data(), newStr.size());
+
+ rIn.setLength(nNewLength);
+}
+
+OUString sanitizeStringSurrogates(const OUString& rString)
+{
+ sal_Int32 i=0;
+ while (i < rString.getLength())
+ {
+ sal_Unicode c = rString[i];
+ if (rtl::isHighSurrogate(c))
+ {
+ if (i+1 == rString.getLength()
+ || !rtl::isLowSurrogate(rString[i+1]))
+ {
+ SAL_WARN("comphelper", "Surrogate error: high without low");
+ return rString.copy(0, i);
+ }
+ ++i; //skip correct low
+ }
+ if (rtl::isLowSurrogate(c)) //bare low without preceding high
+ {
+ SAL_WARN("comphelper", "Surrogate error: low without high");
+ return rString.copy(0, i);
+ }
+ ++i;
+ }
+ return rString;
+}
+
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */