summaryrefslogtreecommitdiff
path: root/external
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2020-05-11 19:45:42 +0200
committerEike Rathke <erack@redhat.com>2020-05-11 21:19:11 +0200
commit8a31ac7264d7a11146d4a29034e97b564164f635 (patch)
treef8c4f6b68d9138c4188b7dc3cae5c7fb01c856b7 /external
parent9c5ffdbdd60385a3d4618f5e36034f550d9b15c9 (diff)
Upgrade to internal ICU 67
Change-Id: I9b8d5cb6d6f4610f2b20c0e0f49eb674d55ce3b8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/94009 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Jenkins
Diffstat (limited to 'external')
-rw-r--r--external/icu/UnpackedTarball_icu.mk4
-rw-r--r--external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2118
-rw-r--r--external/icu/char8_t.patch19
-rw-r--r--external/icu/icu4c-khmerbreakengine.patch.1269
4 files changed, 25 insertions, 385 deletions
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
index 96dcd45c30a4..72fae09b1625 100644
--- a/external/icu/UnpackedTarball_icu.mk
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -34,13 +34,11 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
external/icu/icu4c-rtti.patch.1 \
external/icu/icu4c-clang-cl.patch.1 \
$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.patch.1) \
- external/icu/icu4c-khmerbreakengine.patch.1 \
external/icu/gcc9.patch \
- external/icu/char8_t.patch \
external/icu/c++20-comparison.patch \
external/icu/ubsan.patch \
external/icu/Wdeprecated-copy-dtor.patch \
- external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2 \
+ external/icu/icu4c-khmerbreakengine.patch.1 \
))
$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
diff --git a/external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2 b/external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2
deleted file mode 100644
index 07b3db6774be..000000000000
--- a/external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2
+++ /dev/null
@@ -1,118 +0,0 @@
-From b7d08bc04a4296982fcef8b6b8a354a9e4e7afca Mon Sep 17 00:00:00 2001
-From: Frank Tang <ftang@chromium.org>
-Date: Sat, 1 Feb 2020 02:39:04 +0000
-Subject: [PATCH] ICU-20958 Prevent SEGV_MAPERR in append
-
-See #971
----
- icu4c/source/common/unistr.cpp | 6 ++-
- icu4c/source/test/intltest/ustrtest.cpp | 62 +++++++++++++++++++++++++
- icu4c/source/test/intltest/ustrtest.h | 1 +
- 3 files changed, 68 insertions(+), 1 deletion(-)
-
-diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp
-index 901bb3358ba..077b4d6ef20 100644
---- a/icu4c/source/common/unistr.cpp
-+++ b/icu4c/source/common/unistr.cpp
-@@ -1563,7 +1563,11 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng
- }
-
- int32_t oldLength = length();
-- int32_t newLength = oldLength + srcLength;
-+ int32_t newLength;
-+ if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
-+ setToBogus();
-+ return *this;
-+ }
-
- // Check for append onto ourself
- const UChar* oldArray = getArrayStart();
-diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp
-index b6515ea813c..ad38bdf53a3 100644
---- a/icu4c/source/test/intltest/ustrtest.cpp
-+++ b/icu4c/source/test/intltest/ustrtest.cpp
-@@ -67,6 +67,7 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &
- TESTCASE_AUTO(TestWCharPointers);
- TESTCASE_AUTO(TestNullPointers);
- TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
-+ TESTCASE_AUTO(TestLargeAppend);
- TESTCASE_AUTO_END;
- }
-
-@@ -2310,3 +2311,64 @@ void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
- str.insert(2, sub);
- assertEquals("", u"abbcdcde", str);
- }
-+
-+void UnicodeStringTest::TestLargeAppend() {
-+ if(quick) return;
-+
-+ IcuTestErrorCode status(*this, "TestLargeAppend");
-+ // Make a large UnicodeString
-+ int32_t len = 0xAFFFFFF;
-+ UnicodeString str;
-+ char16_t *buf = str.getBuffer(len);
-+ // A fast way to set buffer to valid Unicode.
-+ // 4E4E is a valid unicode character
-+ uprv_memset(buf, 0x4e, len * 2);
-+ str.releaseBuffer(len);
-+ UnicodeString dest;
-+ // Append it 16 times
-+ // 0xAFFFFFF times 16 is 0xA4FFFFF1,
-+ // which is greater than INT32_MAX, which is 0x7FFFFFFF.
-+ int64_t total = 0;
-+ for (int32_t i = 0; i < 16; i++) {
-+ dest.append(str);
-+ total += len;
-+ if (total <= INT32_MAX) {
-+ assertFalse("dest is not bogus", dest.isBogus());
-+ } else {
-+ assertTrue("dest should be bogus", dest.isBogus());
-+ }
-+ }
-+ dest.remove();
-+ total = 0;
-+ for (int32_t i = 0; i < 16; i++) {
-+ dest.append(str);
-+ total += len;
-+ if (total + len <= INT32_MAX) {
-+ assertFalse("dest is not bogus", dest.isBogus());
-+ } else if (total <= INT32_MAX) {
-+ // Check that a string of exactly the maximum size works
-+ UnicodeString str2;
-+ int32_t remain = INT32_MAX - total;
-+ char16_t *buf2 = str2.getBuffer(remain);
-+ if (buf2 == nullptr) {
-+ // if somehow memory allocation fail, return the test
-+ return;
-+ }
-+ uprv_memset(buf2, 0x4e, remain * 2);
-+ str2.releaseBuffer(remain);
-+ dest.append(str2);
-+ total += remain;
-+ assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
-+ assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
-+ assertFalse("dest is not bogus", dest.isBogus());
-+
-+ // Check that a string size+1 goes bogus
-+ str2.truncate(1);
-+ dest.append(str2);
-+ total++;
-+ assertTrue("dest should be bogus", dest.isBogus());
-+ } else {
-+ assertTrue("dest should be bogus", dest.isBogus());
-+ }
-+ }
-+}
-diff --git a/icu4c/source/test/intltest/ustrtest.h b/icu4c/source/test/intltest/ustrtest.h
-index 218befdcc68..4a356a92c7a 100644
---- a/icu4c/source/test/intltest/ustrtest.h
-+++ b/icu4c/source/test/intltest/ustrtest.h
-@@ -97,6 +97,7 @@ class UnicodeStringTest: public IntlTest {
- void TestWCharPointers();
- void TestNullPointers();
- void TestUnicodeStringInsertAppendToSelf();
-+ void TestLargeAppend();
- };
-
- #endif
diff --git a/external/icu/char8_t.patch b/external/icu/char8_t.patch
deleted file mode 100644
index d13b29634bc5..000000000000
--- a/external/icu/char8_t.patch
+++ /dev/null
@@ -1,19 +0,0 @@
---- source/common/ucasemap.cpp
-+++ source/common/ucasemap.cpp
-@@ -687,13 +687,13 @@
- if (change) {
- ByteSinkUtil::appendTwoBytes(upper, sink);
- if ((data & HAS_EITHER_DIALYTIKA) != 0) {
-- sink.Append(u8"\u0308", 2); // restore or add a dialytika
-+ sink.Append(reinterpret_cast<char const *>(u8"\u0308"), 2); // restore or add a dialytika
- }
- if (addTonos) {
-- sink.Append(u8"\u0301", 2);
-+ sink.Append(reinterpret_cast<char const *>(u8"\u0301"), 2);
- }
- while (numYpogegrammeni > 0) {
-- sink.Append(u8"\u0399", 2);
-+ sink.Append(reinterpret_cast<char const *>(u8"\u0399"), 2);
- --numYpogegrammeni;
- }
- }
diff --git a/external/icu/icu4c-khmerbreakengine.patch.1 b/external/icu/icu4c-khmerbreakengine.patch.1
index 7992da6fc18f..272d0b8ab204 100644
--- a/external/icu/icu4c-khmerbreakengine.patch.1
+++ b/external/icu/icu4c-khmerbreakengine.patch.1
@@ -1,7 +1,7 @@
diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
---- icu.org/source/common/dictbe.cpp 2018-10-02 00:39:56.000000000 +0200
-+++ icu/source/common/dictbe.cpp 2018-10-20 00:14:46.462039038 +0200
-@@ -29,7 +29,19 @@
+--- icu.org/source/common/dictbe.cpp 2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictbe.cpp 2020-05-11 18:55:07.702282061 +0200
+@@ -32,7 +32,19 @@
******************************************************************
*/
@@ -22,7 +22,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
}
DictionaryBreakEngine::~DictionaryBreakEngine() {
-@@ -76,6 +88,169 @@
+@@ -79,6 +91,169 @@
fSet.compact();
}
@@ -192,7 +192,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
/*
******************************************************************
* PossibleWord
-@@ -282,7 +282,7 @@
+@@ -108,7 +283,7 @@
~PossibleWord() {}
// Fill the list of candidates if needed, select the longest, and return the number found
@@ -201,7 +201,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
// Select the currently marked candidate, point after it in the text, and invalidate self
int32_t acceptMarked( UText *text );
-@@ -303,12 +303,12 @@
+@@ -129,12 +304,12 @@
};
@@ -216,7 +216,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
// Dictionary leaves text after longest prefix, not longest word. Back up.
if (count <= 0) {
utext_setNativeIndex(text, start);
-@@ -803,51 +978,28 @@
+@@ -815,53 +990,30 @@
* KhmerBreakEngine
*/
@@ -241,6 +241,8 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
+ : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)),
fDictionary(adoptDictionary)
{
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
- fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
+
+ clusterLimit = 3;
@@ -277,10 +279,10 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
+ fIgnoreSet.compact();
+ fBaseSet.compact();
+ fPuncSet.compact();
+ UTRACE_EXIT_STATUS(status);
}
- KhmerBreakEngine::~KhmerBreakEngine() {
-@@ -859,180 +1011,204 @@
+@@ -874,180 +1026,204 @@
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks ) const {
@@ -637,8 +639,8 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
#if !UCONFIG_NO_NORMALIZATION
diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
---- icu.org/source/common/dictbe.h 2018-09-29 02:34:41.000000000 +0200
-+++ icu/source/common/dictbe.h 2018-10-19 14:21:00.339942804 +0200
+--- icu.org/source/common/dictbe.h 2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictbe.h 2020-05-11 19:08:24.754634732 +0200
@@ -34,7 +34,8 @@
* threads without synchronization.</p>
*/
@@ -735,206 +737,15 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
* <p>Virtual destructor.</p>
*/
virtual ~DictionaryBreakEngine();
-@@ -68,7 +139,7 @@
- * <p>Find any breaks within a run in the supplied text.</p>
- *
- * @param text A UText representing the text. The iterator is left at
-- * the end of the run of characters which the engine is capable of handling
-+ * the end of the run of characters which the engine is capable of handling
- * that starts from the first character in the range.
- * @param startPos The start of the run within the supplied text.
- * @param endPos The end of the run within the supplied text.
-@@ -218,118 +289,120 @@
-
- };
-
--/*******************************************************************
-- * BurmeseBreakEngine
-- */
--
--/**
-- * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
-- * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
-- *
-- * <p>After it is constructed a BurmeseBreakEngine may be shared between
-- * threads without synchronization.</p>
-- */
--class BurmeseBreakEngine : public DictionaryBreakEngine {
-- private:
-- /**
-- * The set of characters handled by this engine
-- * @internal
-- */
--
-- UnicodeSet fBurmeseWordSet;
+@@ -293,11 +364,13 @@
+ */
+
+ UnicodeSet fKhmerWordSet;
- UnicodeSet fEndWordSet;
- UnicodeSet fBeginWordSet;
- UnicodeSet fMarkSet;
- DictionaryMatcher *fDictionary;
-
-- public:
--
-- /**
-- * <p>Default constructor.</p>
-- *
-- * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
-- * engine is deleted.
-- */
-- BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
--
-- /**
-- * <p>Virtual destructor.</p>
-- */
-- virtual ~BurmeseBreakEngine();
--
-- protected:
-- /**
-- * <p>Divide up a range of known dictionary characters.</p>
-- *
-- * @param text A UText representing the text
-- * @param rangeStart The start of the range of dictionary characters
-- * @param rangeEnd The end of the range of dictionary characters
-- * @param foundBreaks Output of C array of int32_t break positions, or 0
-- * @return The number of breaks found
-- */
-- virtual int32_t divideUpDictionaryRange( UText *text,
-- int32_t rangeStart,
-- int32_t rangeEnd,
-- UVector32 &foundBreaks ) const;
--
--};
--
--/*******************************************************************
-- * KhmerBreakEngine
-- */
--
--/**
-- * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
-- * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
-- *
-- * <p>After it is constructed a KhmerBreakEngine may be shared between
-- * threads without synchronization.</p>
-- */
--class KhmerBreakEngine : public DictionaryBreakEngine {
-- private:
-- /**
-- * The set of characters handled by this engine
-- * @internal
-- */
--
-- UnicodeSet fKhmerWordSet;
-- UnicodeSet fEndWordSet;
-- UnicodeSet fBeginWordSet;
-- UnicodeSet fMarkSet;
-- DictionaryMatcher *fDictionary;
--
-- public:
--
-- /**
-- * <p>Default constructor.</p>
-- *
-- * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
-- * engine is deleted.
-- */
-- KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
--
-- /**
-- * <p>Virtual destructor.</p>
-- */
-- virtual ~KhmerBreakEngine();
--
-- protected:
-- /**
-- * <p>Divide up a range of known dictionary characters.</p>
-- *
-- * @param text A UText representing the text
-- * @param rangeStart The start of the range of dictionary characters
-- * @param rangeEnd The end of the range of dictionary characters
-- * @param foundBreaks Output of C array of int32_t break positions, or 0
-- * @return The number of breaks found
-- */
-- virtual int32_t divideUpDictionaryRange( UText *text,
-- int32_t rangeStart,
-- int32_t rangeEnd,
-- UVector32 &foundBreaks ) const;
--
--};
--
-+/*******************************************************************
-+ * BurmeseBreakEngine
-+ */
-+
-+/**
-+ * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
-+ * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
-+ *
-+ * <p>After it is constructed a BurmeseBreakEngine may be shared between
-+ * threads without synchronization.</p>
-+ */
-+class BurmeseBreakEngine : public DictionaryBreakEngine {
-+ private:
-+ /**
-+ * The set of characters handled by this engine
-+ * @internal
-+ */
-+
-+ UnicodeSet fBurmeseWordSet;
-+ UnicodeSet fEndWordSet;
-+ UnicodeSet fBeginWordSet;
-+ UnicodeSet fMarkSet;
-+ DictionaryMatcher *fDictionary;
-+
-+ public:
-+
-+ /**
-+ * <p>Default constructor.</p>
-+ *
-+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
-+ * engine is deleted.
-+ */
-+ BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
-+
-+ /**
-+ * <p>Virtual destructor.</p>
-+ */
-+ virtual ~BurmeseBreakEngine();
-+
-+ protected:
-+ /**
-+ * <p>Divide up a range of known dictionary characters.</p>
-+ *
-+ * @param text A UText representing the text
-+ * @param rangeStart The start of the range of dictionary characters
-+ * @param rangeEnd The end of the range of dictionary characters
-+ * @param foundBreaks Output of C array of int32_t break positions, or 0
-+ * @return The number of breaks found
-+ */
-+ virtual int32_t divideUpDictionaryRange( UText *text,
-+ int32_t rangeStart,
-+ int32_t rangeEnd,
-+ UVector32 &foundBreaks ) const;
-+
-+};
-+
-+/*******************************************************************
-+ * KhmerBreakEngine
-+ */
-+
-+/**
-+ * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
-+ * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
-+ *
-+ * <p>After it is constructed a KhmerBreakEngine may be shared between
-+ * threads without synchronization.</p>
-+ */
-+class KhmerBreakEngine : public DictionaryBreakEngine {
-+ private:
-+ /**
-+ * The set of characters handled by this engine
-+ * @internal
-+ */
-+
-+ UnicodeSet fKhmerWordSet;
+ UnicodeSet fBeginWordSet;
+ UnicodeSet fPuncSet;
+ DictionaryMatcher *fDictionary;
@@ -942,44 +753,12 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
+ const uint32_t BADSNLP = 256 * 20;
+ const uint32_t kuint32max = 0x7FFFFFFF;
+
-+ public:
-+
-+ /**
-+ * <p>Default constructor.</p>
-+ *
-+ * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
-+ * engine is deleted.
-+ */
-+ KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
-+
-+ /**
-+ * <p>Virtual destructor.</p>
-+ */
-+ virtual ~KhmerBreakEngine();
-+
-+ protected:
-+ /**
-+ * <p>Divide up a range of known dictionary characters.</p>
-+ *
-+ * @param text A UText representing the text
-+ * @param rangeStart The start of the range of dictionary characters
-+ * @param rangeEnd The end of the range of dictionary characters
-+ * @param foundBreaks Output of C array of int32_t break positions, or 0
-+ * @return The number of breaks found
-+ */
-+ virtual int32_t divideUpDictionaryRange( UText *text,
-+ int32_t rangeStart,
-+ int32_t rangeEnd,
-+ UVector32 &foundBreaks ) const;
-+
-+};
-+
- #if !UCONFIG_NO_NORMALIZATION
-
- /*******************************************************************
+ public:
+
+ /**
diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionarydata.cpp
---- icu.org/source/common/dictionarydata.cpp 2018-09-29 02:34:41.000000000 +0200
-+++ icu/source/common/dictionarydata.cpp 2018-10-19 14:21:00.339942804 +0200
+--- icu.org/source/common/dictionarydata.cpp 2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictionarydata.cpp 2020-05-11 18:50:43.703113749 +0200
@@ -44,7 +44,7 @@
int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
@@ -1027,8 +806,8 @@ diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionaryda
if (values != NULL) {
values[wordCount] = bt.getValue();
diff -ur icu.org/source/common/dictionarydata.h icu/source/common/dictionarydata.h
---- icu.org/source/common/dictionarydata.h 2018-09-29 02:34:41.000000000 +0200
-+++ icu/source/common/dictionarydata.h 2018-10-19 14:21:00.339942804 +0200
+--- icu.org/source/common/dictionarydata.h 2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictionarydata.h 2020-05-11 18:50:43.704113746 +0200
@@ -21,6 +21,7 @@
#include "unicode/utext.h"
#include "unicode/udata.h"