From 738aa2cb2352613981d39678404f1b32197f0ad2 Mon Sep 17 00:00:00 2001 From: Stephan Bergmann Date: Tue, 14 Aug 2018 13:34:13 +0200 Subject: cclass_Unicode::StrChr needs to support non-BMP chars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ...as seen with Clang's new -fsanitize=implicit-cast during CppunitTest_starmath_qa_cppunit: > i18npool/source/characterclassification/cclass_unicode_parser.cxx:565:46: runtime error: implicit conversion from type 'sal_uInt32' (aka 'unsigned int') of value 119886 (32-bit, unsigned) to type 'sal_Unicode' (aka 'char16_t') changed the value to 54350 (16-bit, unsigned) > #0 in i18npool::cclass_Unicode::getFlags(unsigned int) at i18npool/source/characterclassification/cclass_unicode_parser.cxx:565:46 (instdir/program/libi18npoollo.so +0x3ae807) > #1 in i18npool::cclass_Unicode::parseText(com::sun::star::i18n::ParseResult&, rtl::OUString const&, int, int) at i18npool/source/characterclassification/cclass_unicode_parser.cxx:712:29 (instdir/program/libi18npoollo.so +0x3b04c3) > #2 in i18npool::cclass_Unicode::parsePredefinedToken(int, rtl::OUString const&, int, com::sun::star::lang::Locale const&, int, rtl::OUString const&, int, rtl::OUString const&) at i18npool/source/characterclassification/cclass_unicode.cxx:275:5 (instdir/program/libi18npoollo.so +0x3a17ea) > #3 in non-virtual thunk to i18npool::cclass_Unicode::parsePredefinedToken(int, rtl::OUString const&, int, com::sun::star::lang::Locale const&, int, rtl::OUString const&, int, rtl::OUString const&) at i18npool/source/characterclassification/cclass_unicode.cxx (instdir/program/libi18npoollo.so +0x3a18dc) > #4 in i18npool::CharacterClassificationImpl::parsePredefinedToken(int, rtl::OUString const&, int, com::sun::star::lang::Locale const&, int, rtl::OUString const&, int, rtl::OUString const&) at i18npool/source/characterclassification/characterclassificationImpl.cxx:118:63 (instdir/program/libi18npoollo.so +0x3c48ba) > #5 in non-virtual thunk to i18npool::CharacterClassificationImpl::parsePredefinedToken(int, rtl::OUString const&, int, com::sun::star::lang::Locale const&, int, rtl::OUString const&, int, rtl::OUString const&) at i18npool/source/characterclassification/characterclassificationImpl.cxx (instdir/program/libi18npoollo.so +0x3c497c) > #6 in CharClass::parsePredefinedToken(int, rtl::OUString const&, int, int, rtl::OUString const&, int, rtl::OUString const&) const at unotools/source/i18n/charclass.cxx:443:25 (instdir/program/libutllo.so +0x904d17) > #7 in SmParser::NextToken() at starmath/source/parse.cxx:391:25 (workdir/LinkTarget/CppunitTest/libtest_starmath_qa_cppunit.so +0xa4a3e7) > #8 in SmParser::DoTerm(bool) at starmath/source/parse.cxx:1337:13 (workdir/LinkTarget/CppunitTest/libtest_starmath_qa_cppunit.so +0xa5951b) > #9 in SmParser::DoPower() at starmath/source/parse.cxx:1285:35 (workdir/LinkTarget/CppunitTest/libtest_starmath_qa_cppunit.so +0xa57d46) > #10 in SmParser::DoProduct() at starmath/source/parse.cxx:1105:19 (workdir/LinkTarget/CppunitTest/libtest_starmath_qa_cppunit.so +0xa5685a) > #11 in SmParser::DoSum() at starmath/source/parse.cxx:1087:19 (workdir/LinkTarget/CppunitTest/libtest_starmath_qa_cppunit.so +0xa55ebc) > #12 in SmParser::DoRelation() at starmath/source/parse.cxx:1069:19 (workdir/LinkTarget/CppunitTest/libtest_starmath_qa_cppunit.so +0xa558dc) > #13 in SmParser::DoExpression(bool) at starmath/source/parse.cxx:1043:29 (workdir/LinkTarget/CppunitTest/libtest_starmath_qa_cppunit.so +0xa54ff5) > #14 in SmParser::ParseExpression(rtl::OUString const&) at starmath/source/parse.cxx:2366:12 (workdir/LinkTarget/CppunitTest/libtest_starmath_qa_cppunit.so +0xa671dc) > #15 in (anonymous namespace)::Test::ParseAndCompare(char const*, char const*, char const*) at starmath/qa/cppunit/test_nodetotextvisitors.cxx:485:30 (workdir/LinkTarget/CppunitTest/libtest_starmath_qa_cppunit.so +0x5f7ea6) > #16 in (anonymous namespace)::Test::testMiscEquivalent() at starmath/qa/cppunit/test_nodetotextvisitors.cxx:637:5 (workdir/LinkTarget/CppunitTest/libtest_starmath_qa_cppunit.so +0x5f2dc8) Change-Id: Iaf62efd60bd6132e005ab69ce385bbf5c2db5d19 Reviewed-on: https://gerrit.libreoffice.org/58979 Tested-by: Jenkins Reviewed-by: Stephan Bergmann (cherry picked from commit 9b4013ef522fe1faebf76f7f9c624e7e2a90c8c9) Reviewed-on: https://gerrit.libreoffice.org/58990 Reviewed-by: Caolán McNamara Tested-by: Caolán McNamara --- i18npool/inc/cclass_unicode.hxx | 4 ++-- .../source/characterclassification/cclass_unicode_parser.cxx | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'i18npool') diff --git a/i18npool/inc/cclass_unicode.hxx b/i18npool/inc/cclass_unicode.hxx index d962a3216585..cd77cbea275a 100644 --- a/i18npool/inc/cclass_unicode.hxx +++ b/i18npool/inc/cclass_unicode.hxx @@ -120,7 +120,7 @@ private: static const sal_Int32 pParseTokensType[]; /// If and where c occurs in pStr - static const sal_Unicode* StrChr( const sal_Unicode* pStr, sal_Unicode c ); + static const sal_Unicode* StrChr( const sal_Unicode* pStr, sal_uInt32 c ); css::uno::Reference < css::uno::XComponentContext > m_xContext; @@ -151,7 +151,7 @@ private: ParserFlags getFlagsExtended(sal_uInt32 c); /// Access parser table flags for user defined start characters. - ParserFlags getStartCharsFlags( sal_Unicode c ); + ParserFlags getStartCharsFlags( sal_uInt32 c ); /// Access parser table flags for user defined continuation characters. ParserFlags getContCharsFlags( sal_Unicode c ); diff --git a/i18npool/source/characterclassification/cclass_unicode_parser.cxx b/i18npool/source/characterclassification/cclass_unicode_parser.cxx index a5cb1b680984..b767f09cb311 100644 --- a/i18npool/source/characterclassification/cclass_unicode_parser.cxx +++ b/i18npool/source/characterclassification/cclass_unicode_parser.cxx @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -317,13 +318,15 @@ const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] = // static -const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c ) +const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_uInt32 c ) { if ( !pStr ) return nullptr; + sal_Unicode cs[2]; + auto const n = rtl::splitSurrogates(c, cs); while ( *pStr ) { - if ( *pStr == c ) + if ( *pStr == cs[0] && (n == 1 || pStr[1] == cs[1]) ) return pStr; pStr++; } @@ -659,7 +662,7 @@ ParserFlags cclass_Unicode::getFlagsExtended(sal_uInt32 const c) } -ParserFlags cclass_Unicode::getStartCharsFlags( sal_Unicode c ) +ParserFlags cclass_Unicode::getStartCharsFlags( sal_uInt32 c ) { if ( pStart ) { -- cgit v1.2.3