summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephan Bergmann <sbergman@redhat.com>2017-09-13 13:35:49 +0200
committerStephan Bergmann <sbergman@redhat.com>2017-09-13 13:35:49 +0200
commit3f0fba004badec01e536ca0fe2889e7e0bb93d7a (patch)
treecdf682e89f56c29fd6a8cd650bdb8bba42eca555
parent2d22b39fb7ddbb02f246e2943b228931e5d583e1 (diff)
New rtl::isUnicodeScalarValue, rtl::isSurrogate
There are apparently various places that want to check for a Unicode scalar value rather than for a Unicode code point. Changed those uses of rtl::isUnicodeCodePoint where that was obvious. (For changing svtools/source/svrtf/svparser.cxx see 8e0fb74dc01927b60d8b868548ef8fe1d7a80ce3 "Revert 'svtools: HTML import: don't put lone surrogates in OUString'".) Other uses of rtl::isUnicodeCodePoint might also want to use rtl::isUnicodeScalarValue instead. As a side effect, this change also introduces rtl::isSurrogate, which is useful in a few places as well. Change-Id: I9245f4f98b83877145a4d392f0ddb7c5d824a535
-rw-r--r--configmgr/source/access.cxx4
-rw-r--r--include/rtl/character.hxx27
-rw-r--r--sal/rtl/uri.cxx5
-rw-r--r--sal/rtl/ustrbuf.cxx2
-rw-r--r--sal/textenc/tcvtutf8.cxx5
-rw-r--r--sax/source/expatwrap/saxwriter.cxx4
-rw-r--r--stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx4
-rw-r--r--svtools/source/svrtf/svparser.cxx2
-rw-r--r--tools/source/fsys/urlobj.cxx6
9 files changed, 40 insertions, 19 deletions
diff --git a/configmgr/source/access.cxx b/configmgr/source/access.cxx
index 6eb692cc5430..0d19af353b49 100644
--- a/configmgr/source/access.cxx
+++ b/configmgr/source/access.cxx
@@ -113,8 +113,8 @@ bool isValidName(OUString const & name, bool setMember) {
for (sal_Int32 i = 0; i != name.getLength();) {
sal_uInt32 c = name.iterateCodePoints(&i);
if ((c < 0x20 && !(c == 0x09 || c == 0x0A || c == 0x0D))
- || rtl::isHighSurrogate(c) || rtl::isLowSurrogate(c) || c == 0xFFFE
- || c == 0xFFFF || (!setMember && c == '/'))
+ || rtl::isSurrogate(c) || c == 0xFFFE || c == 0xFFFF
+ || (!setMember && c == '/'))
{
return false;
}
diff --git a/include/rtl/character.hxx b/include/rtl/character.hxx
index b83121a4a6d4..ee26f4ae1f9a 100644
--- a/include/rtl/character.hxx
+++ b/include/rtl/character.hxx
@@ -335,6 +335,20 @@ sal_uInt32 const surrogatesLowLast = 0xDFFF;
}
/// @endcond
+/** Check for surrogate.
+
+ @param code A Unicode code point.
+
+ @return True if code is a surrogate code point (0xD800--0xDFFF).
+
+ @since LibreOffice 6.0
+*/
+inline bool isSurrogate(sal_uInt32 code) {
+ assert(isUnicodeCodePoint(code));
+ return code >= detail::surrogatesHighFirst
+ && code <= detail::surrogatesLowLast;
+}
+
/** Check for high surrogate.
@param code A Unicode code point.
@@ -433,6 +447,19 @@ inline std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode * output) {
}
}
+/** Check for Unicode scalar value.
+
+ @param code An integer.
+
+ @return True if code is a Unicode scalar value.
+
+ @since LibreOffice 6.0
+*/
+inline bool isUnicodeScalarValue(sal_uInt32 code)
+{
+ return isUnicodeCodePoint(code) && !isSurrogate(code);
+}
+
}
#endif
diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx
index 257a0a27abac..57a7102a38eb 100644
--- a/sal/rtl/uri.cxx
+++ b/sal/rtl/uri.cxx
@@ -133,9 +133,8 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
p += 3;
nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
}
- if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded)
- && nEncoded >= nMin && !rtl::isHighSurrogate(nEncoded)
- && !rtl::isLowSurrogate(nEncoded))
+ if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
+ && nEncoded >= nMin)
{
*pBegin = p;
*pType = EscapeChar;
diff --git a/sal/rtl/ustrbuf.cxx b/sal/rtl/ustrbuf.cxx
index cef31a5da2ea..508c8d7a9c7c 100644
--- a/sal/rtl/ustrbuf.cxx
+++ b/sal/rtl/ustrbuf.cxx
@@ -168,7 +168,7 @@ void rtl_uStringbuffer_insertUtf32(
{
sal_Unicode buf[2];
sal_Int32 len;
- OSL_ASSERT(rtl::isUnicodeCodePoint(c) && !(c >= 0xD800 && c <= 0xDFFF));
+ OSL_ASSERT(rtl::isUnicodeScalarValue(c));
if (c <= 0xFFFF) {
buf[0] = (sal_Unicode) c;
len = 1;
diff --git a/sal/textenc/tcvtutf8.cxx b/sal/textenc/tcvtutf8.cxx
index d9c3c134a83a..f5ed9dfb995c 100644
--- a/sal/textenc/tcvtutf8.cxx
+++ b/sal/textenc/tcvtutf8.cxx
@@ -170,10 +170,7 @@ sal_Size ImplConvertUtf8ToUnicode(
}
break;
case 3:
- if (nUtf32 < 0x800
- || (!bJavaUtf8
- && (rtl::isHighSurrogate(nUtf32)
- || rtl::isLowSurrogate(nUtf32))))
+ if (nUtf32 < 0x800 || (!bJavaUtf8 && rtl::isSurrogate(nUtf32)))
{
goto bad_input;
}
diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx
index 71d3fba62547..8303afa145d7 100644
--- a/sax/source/expatwrap/saxwriter.cxx
+++ b/sax/source/expatwrap/saxwriter.cxx
@@ -406,7 +406,7 @@ inline bool SaxWriterHelper::convertToXML( const sal_Unicode * pStr,
OSL_ENSURE( nSurrogate != 0, "lone 2nd Unicode surrogate" );
nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
- if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 )
+ if( rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 0x00010000 )
{
sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
@@ -851,7 +851,7 @@ inline sal_Int32 calcXMLByteLength( const OUString& rStr,
{
// 2. surrogate: write as UTF-8 (if range is OK
nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
- if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 )
+ if( rtl::isUnicodeScalarValue(nSurrogate) && nSurrogate >= 0x00010000 )
nOutputLength += 4;
nSurrogate = 0;
}
diff --git a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
index c57670e27f20..6df3cd3e33ca 100644
--- a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
+++ b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
@@ -110,8 +110,8 @@ OUString parsePart(
}
encoded |= (n & 0x3F) << shift;
}
- if (!utf8 || !rtl::isUnicodeCodePoint(encoded) || encoded < min
- || (encoded >= 0xD800 && encoded <= 0xDFFF))
+ if (!utf8 || !rtl::isUnicodeScalarValue(encoded)
+ || encoded < min)
{
break;
}
diff --git a/svtools/source/svrtf/svparser.cxx b/svtools/source/svrtf/svparser.cxx
index 541aa5276c2d..2d1be0e3e405 100644
--- a/svtools/source/svrtf/svparser.cxx
+++ b/svtools/source/svrtf/svparser.cxx
@@ -423,7 +423,7 @@ sal_uInt32 SvParser<T>::GetNextChar()
while( 0 == nChars && !bErr );
}
- if ( ! rtl::isUnicodeCodePoint( c ) )
+ if ( ! rtl::isUnicodeScalarValue( c ) )
c = '?' ;
if( bErr )
diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx
index eeadb7df38d2..ccebbd3f4e2a 100644
--- a/tools/source/fsys/urlobj.cxx
+++ b/tools/source/fsys/urlobj.cxx
@@ -4745,10 +4745,8 @@ sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
break;
nShift -= 6;
}
- if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded)
- && nEncoded >= nMin
- && !rtl::isHighSurrogate(nEncoded)
- && !rtl::isLowSurrogate(nEncoded))
+ if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
+ && nEncoded >= nMin)
{
rBegin = p;
nUTF32 = nEncoded;