summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--i18nutil/source/utility/unicode.cxx4
-rw-r--r--include/rtl/character.hxx60
-rw-r--r--sal/rtl/uri.cxx8
-rw-r--r--sal/rtl/ustrbuf.cxx4
-rw-r--r--sal/rtl/ustring.cxx2
-rw-r--r--sal/textenc/tcvtutf8.cxx2
-rw-r--r--sal/textenc/unichars.hxx9
-rw-r--r--sax/source/expatwrap/saxwriter.cxx5
-rw-r--r--sc/source/core/tool/interpr1.cxx2
-rw-r--r--stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx6
-rw-r--r--svtools/source/svhtml/parhtml.cxx2
-rw-r--r--svtools/source/svrtf/svparser.cxx2
-rw-r--r--tools/source/fsys/urlobj.cxx4
-rw-r--r--tools/source/inet/inetmime.cxx6
-rw-r--r--xmlreader/source/xmlreader.cxx9
15 files changed, 65 insertions, 60 deletions
diff --git a/i18nutil/source/utility/unicode.cxx b/i18nutil/source/utility/unicode.cxx
index a7d3d4690f1c..6507479807fe 100644
--- a/i18nutil/source/utility/unicode.cxx
+++ b/i18nutil/source/utility/unicode.cxx
@@ -1190,14 +1190,14 @@ OUString ToggleUnicodeCodepoint::StringToReplace()
{
nUnicode = sIn.copy(0, nUPlus).toString().toUInt32(16);
//prevent creating control characters or invalid Unicode values
- if( nUnicode < 0x20 || nUnicode > 0x10ffff )
+ if( !rtl::isUnicodeCodePoint(nUnicode) || nUnicode < 0x20 )
maInput = sIn.copy(nUPlus);
sIn = sIn.copy(nUPlus+2);
nUPlus = sIn.indexOf("U+");
}
nUnicode = sIn.toString().toUInt32(16);
- if( nUnicode < 0x20 || nUnicode > 0x10ffff )
+ if( !rtl::isUnicodeCodePoint(nUnicode) || nUnicode < 0x20 )
maInput.truncate().append( sIn[sIn.getLength()-1] );
return maInput.toString();
}
diff --git a/include/rtl/character.hxx b/include/rtl/character.hxx
index 49f6803821de..ba3088efdeda 100644
--- a/include/rtl/character.hxx
+++ b/include/rtl/character.hxx
@@ -29,6 +29,19 @@
namespace rtl
{
+/** Check for Unicode code point.
+
+ @param code An integer.
+
+ @return True if code is a Unicode code point.
+
+ @since LibreOffice 5.2
+*/
+inline bool isUnicodeCodePoint(sal_uInt32 code)
+{
+ return code <= 0x10FFFF;
+}
+
/** Check for ASCII character.
@param code A Unicode code point.
@@ -39,7 +52,7 @@ namespace rtl
*/
inline bool isAscii(sal_uInt32 code)
{
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return code <= 0x7F;
}
@@ -54,7 +67,7 @@ inline bool isAscii(sal_uInt32 code)
*/
inline bool isAsciiLowerCase(sal_uInt32 code)
{
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return code >= 'a' && code <= 'z';
}
@@ -69,7 +82,7 @@ inline bool isAsciiLowerCase(sal_uInt32 code)
*/
inline bool isAsciiUpperCase(sal_uInt32 code)
{
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return code >= 'A' && code <= 'Z';
}
@@ -84,7 +97,7 @@ inline bool isAsciiUpperCase(sal_uInt32 code)
*/
inline bool isAsciiAlpha(sal_uInt32 code)
{
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return isAsciiLowerCase(code) || isAsciiUpperCase(code);
}
@@ -99,7 +112,7 @@ inline bool isAsciiAlpha(sal_uInt32 code)
*/
inline bool isAsciiDigit(sal_uInt32 code)
{
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return code >= '0' && code <= '9';
}
@@ -114,7 +127,7 @@ inline bool isAsciiDigit(sal_uInt32 code)
*/
inline bool isAsciiAlphanumeric(sal_uInt32 code)
{
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return isAsciiDigit(code) || isAsciiAlpha(code);
}
@@ -129,7 +142,7 @@ inline bool isAsciiAlphanumeric(sal_uInt32 code)
*/
inline bool isAsciiCanonicHexDigit(sal_uInt32 code)
{
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return isAsciiDigit(code) || (code >= 'A' && code <= 'F');
}
@@ -144,7 +157,7 @@ inline bool isAsciiCanonicHexDigit(sal_uInt32 code)
*/
inline bool isAsciiHexDigit(sal_uInt32 code)
{
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return isAsciiCanonicHexDigit(code) || (code >= 'a' && code <= 'f');
}
@@ -158,7 +171,7 @@ inline bool isAsciiHexDigit(sal_uInt32 code)
*/
inline bool isAsciiOctalDigit(sal_uInt32 code)
{
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return code >= '0' && code <= '7';
}
@@ -173,7 +186,7 @@ inline bool isAsciiOctalDigit(sal_uInt32 code)
*/
inline sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
{
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return isAsciiLowerCase(code) ? code - 32 : code;
}
@@ -187,7 +200,7 @@ inline sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
*/
inline sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
{
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return isAsciiUpperCase(code) ? code + 32 : code;
}
@@ -205,8 +218,8 @@ inline sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
*/
inline sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
{
- assert(code1 <= 0x10FFFF);
- assert(code2 <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code1));
+ assert(isUnicodeCodePoint(code2));
return static_cast<sal_Int32>(toAsciiLowerCase(code1))
- static_cast<sal_Int32>(toAsciiLowerCase(code2));
}
@@ -222,19 +235,6 @@ sal_uInt32 const surrogatesLowLast = 0xDFFF;
}
/// @endcond
-/** Check if a codepoint is accessible via utf16 per RFC3629
-
- @param code A non-BMP Unicode code point.
-
- @return True if the code is a valid codepoint.
-
- @since LibreOffice 5.2
-*/
-inline bool isValidCodePoint( sal_uInt32 code)
-{
- return code <= 0x10FFFF;
-}
-
/** Check for high surrogate.
@param code A Unicode code point.
@@ -244,7 +244,7 @@ inline bool isValidCodePoint( sal_uInt32 code)
@since LibreOffice 5.0
*/
inline bool isHighSurrogate(sal_uInt32 code) {
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return code >= detail::surrogatesHighFirst
&& code <= detail::surrogatesHighLast;
}
@@ -258,7 +258,7 @@ inline bool isHighSurrogate(sal_uInt32 code) {
@since LibreOffice 5.0
*/
inline bool isLowSurrogate(sal_uInt32 code) {
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
return code >= detail::surrogatesLowFirst
&& code <= detail::surrogatesLowLast;
}
@@ -272,7 +272,7 @@ inline bool isLowSurrogate(sal_uInt32 code) {
@since LibreOffice 5.0
*/
inline sal_Unicode getHighSurrogate(sal_uInt32 code) {
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
assert(code >= 0x10000);
return static_cast<sal_Unicode>(((code - 0x10000) >> 10) | detail::surrogatesHighFirst);
}
@@ -286,7 +286,7 @@ inline sal_Unicode getHighSurrogate(sal_uInt32 code) {
@since LibreOffice 5.0
*/
inline sal_Unicode getLowSurrogate(sal_uInt32 code) {
- assert(code <= 0x10FFFF);
+ assert(isUnicodeCodePoint(code));
assert(code >= 0x10000);
return static_cast<sal_Unicode>(((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst);
}
diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx
index b028b3cf6664..0f3d6df52194 100644
--- a/sal/rtl/uri.cxx
+++ b/sal/rtl/uri.cxx
@@ -132,8 +132,8 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
p += 3;
nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
}
- if (bUTF8 && nEncoded >= nMin && nEncoded <= 0x10FFFF
- && !rtl::isHighSurrogate(nEncoded)
+ if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded)
+ && nEncoded >= nMin && !rtl::isHighSurrogate(nEncoded)
&& !rtl::isLowSurrogate(nEncoded))
{
*pBegin = p;
@@ -213,7 +213,7 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
{
- assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
+ assert(rtl::isUnicodeCodePoint(nUtf32));
if (nUtf32 <= 0xFFFF) {
writeUnicode(
pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
@@ -245,7 +245,7 @@ void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
{
- assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
+ assert(rtl::isUnicodeCodePoint(nUtf32));
if (eCharset == RTL_TEXTENCODING_UTF8) {
if (nUtf32 < 0x80)
writeEscapeOctet(pBuffer, pCapacity, nUtf32);
diff --git a/sal/rtl/ustrbuf.cxx b/sal/rtl/ustrbuf.cxx
index 89b897f21573..b73318b6e5f4 100644
--- a/sal/rtl/ustrbuf.cxx
+++ b/sal/rtl/ustrbuf.cxx
@@ -21,7 +21,7 @@
#include <osl/interlck.h>
#include <osl/diagnose.h>
-
+#include <rtl/character.hxx>
#include <rtl/ustrbuf.hxx>
#include <strimp.hxx>
@@ -169,7 +169,7 @@ void rtl_uStringbuffer_insertUtf32(
{
sal_Unicode buf[2];
sal_Int32 len;
- OSL_ASSERT(c <= 0x10FFFF && !(c >= 0xD800 && c <= 0xDFFF));
+ OSL_ASSERT(rtl::isUnicodeCodePoint(c) && !(c >= 0xD800 && c <= 0xDFFF));
if (c <= 0xFFFF) {
buf[0] = (sal_Unicode) c;
len = 1;
diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx
index 16e9b87b6288..db07cabb396b 100644
--- a/sal/rtl/ustring.cxx
+++ b/sal/rtl/ustring.cxx
@@ -578,7 +578,7 @@ void SAL_CALL rtl_uString_newFromCodePoints(
}
n = codePointCount;
for (i = 0; i < codePointCount; ++i) {
- OSL_ASSERT(codePoints[i] <= 0x10FFFF);
+ OSL_ASSERT(rtl::isUnicodeCodePoint(codePoints[i]));
if (codePoints[i] >= 0x10000) {
++n;
}
diff --git a/sal/textenc/tcvtutf8.cxx b/sal/textenc/tcvtutf8.cxx
index 1f0b2bfc96ef..f9c9879c54a3 100644
--- a/sal/textenc/tcvtutf8.cxx
+++ b/sal/textenc/tcvtutf8.cxx
@@ -163,7 +163,7 @@ sal_Size ImplConvertUtf8ToUnicode(
*pDestBufPtr++ = (sal_Unicode) nUtf32;
else
goto no_output;
- else if (nUtf32 <= 0x10FFFF)
+ else if (rtl::isUnicodeCodePoint(nUtf32))
if (pDestBufEnd - pDestBufPtr >= 2)
{
*pDestBufPtr++ = (sal_Unicode) ImplGetHighSurrogate(nUtf32);
diff --git a/sal/textenc/unichars.hxx b/sal/textenc/unichars.hxx
index 09652b98ce57..0bcd6f710518 100644
--- a/sal/textenc/unichars.hxx
+++ b/sal/textenc/unichars.hxx
@@ -20,9 +20,12 @@
#ifndef INCLUDED_SAL_TEXTENC_UNICHARS_HXX
#define INCLUDED_SAL_TEXTENC_UNICHARS_HXX
-#include "sal/config.h"
+#include <sal/config.h>
+
#include <cassert>
-#include "sal/types.h"
+
+#include <rtl/character.hxx>
+#include <sal/types.h>
#define RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER 0xFFFD
@@ -30,7 +33,7 @@ inline bool ImplIsNoncharacter(sal_uInt32 nUtf32)
{
return (nUtf32 >= 0xFDD0 && nUtf32 <= 0xFDEF)
|| (nUtf32 & 0xFFFF) >= 0xFFFE
- || nUtf32 > 0x10FFFF;
+ || !rtl::isUnicodeCodePoint(nUtf32);
}
// All code points that are noncharacters, as of Unicode 3.1.1.
diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx
index 09a78557c8cb..db75efe44d33 100644
--- a/sax/source/expatwrap/saxwriter.cxx
+++ b/sax/source/expatwrap/saxwriter.cxx
@@ -39,6 +39,7 @@
#include <cppuhelper/supportsservice.hxx>
#include <osl/diagnose.h>
+#include <rtl/character.hxx>
#include <rtl/ref.hxx>
#include <rtl/ustrbuf.hxx>
@@ -388,7 +389,7 @@ inline bool SaxWriterHelper::convertToXML( const sal_Unicode * pStr,
OSL_ENSURE( nSurrogate != 0, "lone 2nd Unicode surrogate" );
nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
- if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF )
+ if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 )
{
sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
@@ -831,7 +832,7 @@ inline sal_Int32 calcXMLByteLength( const sal_Unicode *pStr, sal_Int32 nStrLen,
{
// 2. surrogate: write as UTF-8 (if range is OK
nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
- if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF )
+ if( rtl::isUnicodeCodePoint(nSurrogate) && nSurrogate >= 0x00010000 )
nOutputLength += 4;
nSurrogate = 0;
}
diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx
index 266b0d776741..d43f5f5b1a26 100644
--- a/sc/source/core/tool/interpr1.cxx
+++ b/sc/source/core/tool/interpr1.cxx
@@ -3324,7 +3324,7 @@ void ScInterpreter::ScUnichar()
if ( MustHaveParamCount( GetByte(), 1 ) )
{
double dVal = ::rtl::math::approxFloor( GetDouble() );
- if ((dVal < 0x000000) || (dVal > 0x10FFFF))
+ if (dVal < 0 || !rtl::isUnicodeCodePoint(dVal))
PushIllegalArgument();
else
{
diff --git a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
index b2f6c6da2b3a..eaf874143a5e 100644
--- a/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
+++ b/stoc/source/uriproc/UriSchemeParser_vndDOTsunDOTstarDOTscript.cxx
@@ -34,6 +34,7 @@
#include <cppuhelper/supportsservice.hxx>
#include <cppuhelper/weak.hxx>
#include <osl/mutex.hxx>
+#include <rtl/character.hxx>
#include <rtl/uri.hxx>
#include <rtl/ustrbuf.hxx>
#include <rtl/ustring.hxx>
@@ -110,9 +111,8 @@ OUString parsePart(
}
encoded |= (n & 0x3F) << shift;
}
- if (!utf8 || encoded < min
- || (encoded >= 0xD800 && encoded <= 0xDFFF)
- || encoded > 0x10FFFF)
+ if (!utf8 || !rtl::isUnicodeCodePoint(encoded) || encoded < min
+ || (encoded >= 0xD800 && encoded <= 0xDFFF))
{
break;
}
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index a8eff6d0158f..d1a081bbabf8 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -502,7 +502,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
else
nNextCh = 0U;
- if ( ! rtl::isValidCodePoint( cChar ) )
+ if ( ! rtl::isUnicodeCodePoint( cChar ) )
cChar = '?';
}
else if( HTML_ISALPHA( nNextCh ) )
diff --git a/svtools/source/svrtf/svparser.cxx b/svtools/source/svrtf/svparser.cxx
index b862e66766ca..33504923a684 100644
--- a/svtools/source/svrtf/svparser.cxx
+++ b/svtools/source/svrtf/svparser.cxx
@@ -394,7 +394,7 @@ sal_uInt32 SvParser::GetNextChar()
while( 0 == nChars && !bErr );
}
- if ( ! rtl::isValidCodePoint( c ) )
+ if ( ! rtl::isUnicodeCodePoint( c ) )
c = (sal_uInt32) '?' ;
if( bErr )
diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx
index 6ebb517540ae..03e550e96421 100644
--- a/tools/source/fsys/urlobj.cxx
+++ b/tools/source/fsys/urlobj.cxx
@@ -4744,8 +4744,8 @@ sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
break;
nShift -= 6;
}
- if (bUTF8 && nEncoded >= nMin
- && nEncoded <= 0x10FFFF
+ if (bUTF8 && rtl::isUnicodeCodePoint(nEncoded)
+ && nEncoded >= nMin
&& !rtl::isHighSurrogate(nEncoded)
&& !rtl::isLowSurrogate(nEncoded))
{
diff --git a/tools/source/inet/inetmime.cxx b/tools/source/inet/inetmime.cxx
index 88b9f99d2306..d0b638a66a99 100644
--- a/tools/source/inet/inetmime.cxx
+++ b/tools/source/inet/inetmime.cxx
@@ -270,7 +270,7 @@ sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
sal_uInt32 nUTF32)
{
- DBG_ASSERT(nUTF32 <= 0x10FFFF, "putUTF32Character(): Bad char");
+ DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char");
if (nUTF32 < 0x10000)
*pBuffer++ = sal_Unicode(nUTF32);
else
@@ -375,7 +375,7 @@ bool translateUTF8Char(const sal_Char *& rBegin,
else
return false;
- if (nUCS4 < nMin || nUCS4 > 0x10FFFF)
+ if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
return false;
if (eEncoding >= RTL_TEXTENCODING_UCS4)
@@ -1279,7 +1279,7 @@ void INetMIMEEncodedWordOutputSink::finish(bool bWriteTrailer)
if (bEscape)
{
DBG_ASSERT(
- nUTF32 < 0x10FFFF,
+ rtl::isUnicodeCodePoint(nUTF32),
"INetMIMEEncodedWordOutputSink::finish():"
" Bad char");
if (nUTF32 < 0x80)
diff --git a/xmlreader/source/xmlreader.cxx b/xmlreader/source/xmlreader.cxx
index 011a09485d2c..25b56847f692 100644
--- a/xmlreader/source/xmlreader.cxx
+++ b/xmlreader/source/xmlreader.cxx
@@ -28,6 +28,7 @@
#include <com/sun/star/uno/RuntimeException.hpp>
#include <com/sun/star/uno/XInterface.hpp>
#include <osl/file.h>
+#include <rtl/character.hxx>
#include <rtl/string.h>
#include <rtl/ustring.hxx>
#include <sal/log.hxx>
@@ -399,7 +400,7 @@ char const * XmlReader::handleReference(char const * position, char const * end)
++position;
if (*position == '#') {
++position;
- sal_Int32 val = 0;
+ sal_uInt32 val = 0;
char const * p;
if (*position == 'x') {
++position;
@@ -415,7 +416,7 @@ char const * XmlReader::handleReference(char const * position, char const * end)
} else {
break;
}
- if (val > 0x10FFFF) { // avoid overflow
+ if (!rtl::isUnicodeCodePoint(val)) { // avoid overflow
throw css::uno::RuntimeException(
"'&#x...' too large in " + fileUrl_ );
}
@@ -429,7 +430,7 @@ char const * XmlReader::handleReference(char const * position, char const * end)
} else {
break;
}
- if (val > 0x10FFFF) { // avoid overflow
+ if (!rtl::isUnicodeCodePoint(val)) { // avoid overflow
throw css::uno::RuntimeException(
"'&#...' too large in " + fileUrl_ );
}
@@ -439,7 +440,7 @@ char const * XmlReader::handleReference(char const * position, char const * end)
throw css::uno::RuntimeException(
"'&#...' missing ';' in " + fileUrl_ );
}
- assert(val >= 0 && val <= 0x10FFFF);
+ assert(rtl::isUnicodeCodePoint(val));
if ((val < 0x20 && val != 0x9 && val != 0xA && val != 0xD) ||
(val >= 0xD800 && val <= 0xDFFF) || val == 0xFFFE || val == 0xFFFF)
{