Upgrade to internal ICU 67

Change-Id: I9b8d5cb6d6f4610f2b20c0e0f49eb674d55ce3b8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/94009 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Jenkins
author: Eike Rathke <erack@redhat.com> 2020-05-11 19:45:42 +0200
committer: Eike Rathke <erack@redhat.com> 2020-05-11 21:19:11 +0200
commit: 8a31ac7264d7a11146d4a29034e97b564164f635 (patch)
tree: f8c4f6b68d9138c4188b7dc3cae5c7fb01c856b7 /external
parent: 9c5ffdbdd60385a3d4618f5e36034f550d9b15c9 (diff)
4 files changed, 25 insertions, 385 deletions
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
index 96dcd45c30a4..72fae09b1625 100644
--- a/external/icu/UnpackedTarball_icu.mk
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -34,13 +34,11 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
 	external/icu/icu4c-rtti.patch.1 \
 	external/icu/icu4c-clang-cl.patch.1 \
 	$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.patch.1) \
-	external/icu/icu4c-khmerbreakengine.patch.1 \
 	external/icu/gcc9.patch \
-	external/icu/char8_t.patch \
 	external/icu/c++20-comparison.patch \
 	external/icu/ubsan.patch \
 	external/icu/Wdeprecated-copy-dtor.patch \
-	external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2 \
+	external/icu/icu4c-khmerbreakengine.patch.1 \
 ))
 
 $(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
diff --git a/external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2 b/external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2
deleted file mode 100644
index 07b3db6774be..000000000000
--- a/external/icu/b7d08bc04a4296982fcef8b6b8a354a9e4e7afca.patch.2
+++ /dev/null
@@ -1,118 +0,0 @@
-From b7d08bc04a4296982fcef8b6b8a354a9e4e7afca Mon Sep 17 00:00:00 2001
-From: Frank Tang <ftang@chromium.org>
-Date: Sat, 1 Feb 2020 02:39:04 +0000
-Subject: [PATCH] ICU-20958 Prevent SEGV_MAPERR in append
-
-See #971
----
- icu4c/source/common/unistr.cpp          |  6 ++-
- icu4c/source/test/intltest/ustrtest.cpp | 62 +++++++++++++++++++++++++
- icu4c/source/test/intltest/ustrtest.h   |  1 +
- 3 files changed, 68 insertions(+), 1 deletion(-)
-
-diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp
-index 901bb3358ba..077b4d6ef20 100644
---- a/icu4c/source/common/unistr.cpp
-+++ b/icu4c/source/common/unistr.cpp
-@@ -1563,7 +1563,11 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng
-   }
- 
-   int32_t oldLength = length();
--  int32_t newLength = oldLength + srcLength;
-+  int32_t newLength;
-+  if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
-+    setToBogus();
-+    return *this;
-+  }
- 
-   // Check for append onto ourself
-   const UChar* oldArray = getArrayStart();
-diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp
-index b6515ea813c..ad38bdf53a3 100644
---- a/icu4c/source/test/intltest/ustrtest.cpp
-+++ b/icu4c/source/test/intltest/ustrtest.cpp
-@@ -67,6 +67,7 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &
-     TESTCASE_AUTO(TestWCharPointers);
-     TESTCASE_AUTO(TestNullPointers);
-     TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
-+    TESTCASE_AUTO(TestLargeAppend);
-     TESTCASE_AUTO_END;
- }
- 
-@@ -2310,3 +2311,64 @@ void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
-     str.insert(2, sub);
-     assertEquals("", u"abbcdcde", str);
- }
-+
-+void UnicodeStringTest::TestLargeAppend() {
-+    if(quick) return;
-+
-+    IcuTestErrorCode status(*this, "TestLargeAppend");
-+    // Make a large UnicodeString
-+    int32_t len = 0xAFFFFFF;
-+    UnicodeString str;
-+    char16_t *buf = str.getBuffer(len);
-+    // A fast way to set buffer to valid Unicode.
-+    // 4E4E is a valid unicode character
-+    uprv_memset(buf, 0x4e, len * 2);
-+    str.releaseBuffer(len);
-+    UnicodeString dest;
-+    // Append it 16 times
-+    // 0xAFFFFFF times 16 is 0xA4FFFFF1,
-+    // which is greater than INT32_MAX, which is 0x7FFFFFFF.
-+    int64_t total = 0;
-+    for (int32_t i = 0; i < 16; i++) {
-+        dest.append(str);
-+        total += len;
-+        if (total <= INT32_MAX) {
-+            assertFalse("dest is not bogus", dest.isBogus());
-+        } else {
-+            assertTrue("dest should be bogus", dest.isBogus());
-+        }
-+    }
-+    dest.remove();
-+    total = 0;
-+    for (int32_t i = 0; i < 16; i++) {
-+        dest.append(str);
-+        total += len;
-+        if (total + len <= INT32_MAX) {
-+            assertFalse("dest is not bogus", dest.isBogus());
-+        } else if (total <= INT32_MAX) {
-+            // Check that a string of exactly the maximum size works
-+            UnicodeString str2;
-+            int32_t remain = INT32_MAX - total;
-+            char16_t *buf2 = str2.getBuffer(remain);
-+            if (buf2 == nullptr) {
-+                // if somehow memory allocation fail, return the test
-+                return;
-+            }
-+            uprv_memset(buf2, 0x4e, remain * 2);
-+            str2.releaseBuffer(remain);
-+            dest.append(str2);
-+            total += remain;
-+            assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
-+            assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
-+            assertFalse("dest is not bogus", dest.isBogus());
-+
-+            // Check that a string size+1 goes bogus
-+            str2.truncate(1);
-+            dest.append(str2);
-+            total++;
-+            assertTrue("dest should be bogus", dest.isBogus());
-+        } else {
-+            assertTrue("dest should be bogus", dest.isBogus());
-+        }
-+    }
-+}
-diff --git a/icu4c/source/test/intltest/ustrtest.h b/icu4c/source/test/intltest/ustrtest.h
-index 218befdcc68..4a356a92c7a 100644
---- a/icu4c/source/test/intltest/ustrtest.h
-+++ b/icu4c/source/test/intltest/ustrtest.h
-@@ -97,6 +97,7 @@ class UnicodeStringTest: public IntlTest {
-     void TestWCharPointers();
-     void TestNullPointers();
-     void TestUnicodeStringInsertAppendToSelf();
-+    void TestLargeAppend();
- };
- 
- #endif
diff --git a/external/icu/char8_t.patch b/external/icu/char8_t.patch
deleted file mode 100644
index d13b29634bc5..000000000000
--- a/external/icu/char8_t.patch
+++ /dev/null
@@ -1,19 +0,0 @@
---- source/common/ucasemap.cpp
-+++ source/common/ucasemap.cpp
-@@ -687,13 +687,13 @@
-             if (change) {
-                 ByteSinkUtil::appendTwoBytes(upper, sink);
-                 if ((data & HAS_EITHER_DIALYTIKA) != 0) {
--                    sink.Append(u8"\u0308", 2);  // restore or add a dialytika
-+                    sink.Append(reinterpret_cast<char const *>(u8"\u0308"), 2);  // restore or add a dialytika
-                 }
-                 if (addTonos) {
--                    sink.Append(u8"\u0301", 2);
-+                    sink.Append(reinterpret_cast<char const *>(u8"\u0301"), 2);
-                 }
-                 while (numYpogegrammeni > 0) {
--                    sink.Append(u8"\u0399", 2);
-+                    sink.Append(reinterpret_cast<char const *>(u8"\u0399"), 2);
-                     --numYpogegrammeni;
-                 }
-             }
diff --git a/external/icu/icu4c-khmerbreakengine.patch.1 b/external/icu/icu4c-khmerbreakengine.patch.1
index 7992da6fc18f..272d0b8ab204 100644
--- a/external/icu/icu4c-khmerbreakengine.patch.1
+++ b/external/icu/icu4c-khmerbreakengine.patch.1
@@ -1,7 +1,7 @@
 diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
---- icu.org/source/common/dictbe.cpp	2018-10-02 00:39:56.000000000 +0200
-+++ icu/source/common/dictbe.cpp	2018-10-20 00:14:46.462039038 +0200
-@@ -29,7 +29,19 @@
+--- icu.org/source/common/dictbe.cpp	2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictbe.cpp	2020-05-11 18:55:07.702282061 +0200
+@@ -32,7 +32,19 @@
   ******************************************************************
   */
  
@@ -22,7 +22,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
  }
  
  DictionaryBreakEngine::~DictionaryBreakEngine() {
-@@ -76,6 +88,169 @@
+@@ -79,6 +91,169 @@
      fSet.compact();
  }
  
@@ -192,7 +192,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
  /*
   ******************************************************************
   * PossibleWord
-@@ -282,7 +282,7 @@
+@@ -108,7 +283,7 @@
      ~PossibleWord() {}
    
      // Fill the list of candidates if needed, select the longest, and return the number found
@@ -201,7 +201,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
    
      // Select the currently marked candidate, point after it in the text, and invalidate self
      int32_t   acceptMarked( UText *text );
-@@ -303,12 +303,12 @@
+@@ -129,12 +304,12 @@
  };
  
  
@@ -216,7 +216,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
          // Dictionary leaves text after longest prefix, not longest word. Back up.
          if (count <= 0) {
              utext_setNativeIndex(text, start);
-@@ -803,51 +978,28 @@
+@@ -815,53 +990,30 @@
   * KhmerBreakEngine
   */
  
@@ -241,6 +241,8 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 +    : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)),
        fDictionary(adoptDictionary)
  {
+     UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+     UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
 -    fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
 +
 +    clusterLimit = 3;
@@ -277,10 +279,10 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
 +    fIgnoreSet.compact();
 +    fBaseSet.compact();
 +    fPuncSet.compact();
+     UTRACE_EXIT_STATUS(status);
  }
  
- KhmerBreakEngine::~KhmerBreakEngine() {
-@@ -859,180 +1011,204 @@
+@@ -874,180 +1026,204 @@
                                                  int32_t rangeStart,
                                                  int32_t rangeEnd,
                                                  UVector32 &foundBreaks ) const {
@@ -637,8 +639,8 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
  
  #if !UCONFIG_NO_NORMALIZATION
 diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
---- icu.org/source/common/dictbe.h	2018-09-29 02:34:41.000000000 +0200
-+++ icu/source/common/dictbe.h	2018-10-19 14:21:00.339942804 +0200
+--- icu.org/source/common/dictbe.h	2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictbe.h	2020-05-11 19:08:24.754634732 +0200
 @@ -34,7 +34,8 @@
   * threads without synchronization.</p>
   */
@@ -735,206 +737,15 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
     * <p>Virtual destructor.</p>
     */
    virtual ~DictionaryBreakEngine();
-@@ -68,7 +139,7 @@
-    * <p>Find any breaks within a run in the supplied text.</p>
-    *
-    * @param text A UText representing the text. The iterator is left at
--   * the end of the run of characters which the engine is capable of handling 
-+   * the end of the run of characters which the engine is capable of handling
-    * that starts from the first character in the range.
-    * @param startPos The start of the run within the supplied text.
-    * @param endPos The end of the run within the supplied text.
-@@ -218,118 +289,120 @@
- 
- };
- 
--/******************************************************************* 
-- * BurmeseBreakEngine 
-- */ 
-- 
--/** 
-- * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a 
-- * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p> 
-- * 
-- * <p>After it is constructed a BurmeseBreakEngine may be shared between 
-- * threads without synchronization.</p> 
-- */ 
--class BurmeseBreakEngine : public DictionaryBreakEngine { 
-- private: 
--    /** 
--     * The set of characters handled by this engine 
--     * @internal 
--     */ 
-- 
--  UnicodeSet                fBurmeseWordSet; 
+@@ -293,11 +364,13 @@
+      */ 
+  
+   UnicodeSet                fKhmerWordSet; 
 -  UnicodeSet                fEndWordSet; 
 -  UnicodeSet                fBeginWordSet; 
 -  UnicodeSet                fMarkSet; 
 -  DictionaryMatcher  *fDictionary; 
 - 
-- public: 
-- 
--  /** 
--   * <p>Default constructor.</p> 
--   * 
--   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the 
--   * engine is deleted. 
--   */ 
--  BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); 
-- 
--  /** 
--   * <p>Virtual destructor.</p> 
--   */ 
--  virtual ~BurmeseBreakEngine(); 
-- 
-- protected: 
-- /** 
--  * <p>Divide up a range of known dictionary characters.</p> 
--  * 
--  * @param text A UText representing the text 
--  * @param rangeStart The start of the range of dictionary characters 
--  * @param rangeEnd The end of the range of dictionary characters 
--  * @param foundBreaks Output of C array of int32_t break positions, or 0 
--  * @return The number of breaks found 
--  */ 
--  virtual int32_t divideUpDictionaryRange( UText *text, 
--                                           int32_t rangeStart, 
--                                           int32_t rangeEnd, 
--                                           UVector32 &foundBreaks ) const; 
-- 
--}; 
-- 
--/******************************************************************* 
-- * KhmerBreakEngine 
-- */ 
-- 
--/** 
-- * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a 
-- * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p> 
-- * 
-- * <p>After it is constructed a KhmerBreakEngine may be shared between 
-- * threads without synchronization.</p> 
-- */ 
--class KhmerBreakEngine : public DictionaryBreakEngine { 
-- private: 
--    /** 
--     * The set of characters handled by this engine 
--     * @internal 
--     */ 
-- 
--  UnicodeSet                fKhmerWordSet; 
--  UnicodeSet                fEndWordSet; 
--  UnicodeSet                fBeginWordSet; 
--  UnicodeSet                fMarkSet; 
--  DictionaryMatcher  *fDictionary; 
-- 
-- public: 
-- 
--  /** 
--   * <p>Default constructor.</p> 
--   * 
--   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the 
--   * engine is deleted. 
--   */ 
--  KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); 
-- 
--  /** 
--   * <p>Virtual destructor.</p> 
--   */ 
--  virtual ~KhmerBreakEngine(); 
-- 
-- protected: 
-- /** 
--  * <p>Divide up a range of known dictionary characters.</p> 
--  * 
--  * @param text A UText representing the text 
--  * @param rangeStart The start of the range of dictionary characters 
--  * @param rangeEnd The end of the range of dictionary characters 
--  * @param foundBreaks Output of C array of int32_t break positions, or 0 
--  * @return The number of breaks found 
--  */ 
--  virtual int32_t divideUpDictionaryRange( UText *text, 
--                                           int32_t rangeStart, 
--                                           int32_t rangeEnd, 
--                                           UVector32 &foundBreaks ) const; 
-- 
--}; 
-- 
-+/*******************************************************************
-+ * BurmeseBreakEngine
-+ */
-+
-+/**
-+ * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
-+ * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
-+ *
-+ * <p>After it is constructed a BurmeseBreakEngine may be shared between
-+ * threads without synchronization.</p>
-+ */
-+class BurmeseBreakEngine : public DictionaryBreakEngine {
-+ private:
-+    /**
-+     * The set of characters handled by this engine
-+     * @internal
-+     */
-+
-+  UnicodeSet                fBurmeseWordSet;
-+  UnicodeSet                fEndWordSet;
-+  UnicodeSet                fBeginWordSet;
-+  UnicodeSet                fMarkSet;
-+  DictionaryMatcher  *fDictionary;
-+
-+ public:
-+
-+  /**
-+   * <p>Default constructor.</p>
-+   *
-+   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
-+   * engine is deleted.
-+   */
-+  BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
-+
-+  /**
-+   * <p>Virtual destructor.</p>
-+   */
-+  virtual ~BurmeseBreakEngine();
-+
-+ protected:
-+ /**
-+  * <p>Divide up a range of known dictionary characters.</p>
-+  *
-+  * @param text A UText representing the text
-+  * @param rangeStart The start of the range of dictionary characters
-+  * @param rangeEnd The end of the range of dictionary characters
-+  * @param foundBreaks Output of C array of int32_t break positions, or 0
-+  * @return The number of breaks found
-+  */
-+  virtual int32_t divideUpDictionaryRange( UText *text,
-+                                           int32_t rangeStart,
-+                                           int32_t rangeEnd,
-+                                           UVector32 &foundBreaks ) const;
-+
-+};
-+
-+/*******************************************************************
-+ * KhmerBreakEngine
-+ */
-+
-+/**
-+ * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
-+ * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
-+ *
-+ * <p>After it is constructed a KhmerBreakEngine may be shared between
-+ * threads without synchronization.</p>
-+ */
-+class KhmerBreakEngine : public DictionaryBreakEngine {
-+ private:
-+    /**
-+     * The set of characters handled by this engine
-+     * @internal
-+     */
-+
-+  UnicodeSet                fKhmerWordSet;
 +  UnicodeSet                fBeginWordSet;
 +  UnicodeSet                fPuncSet;
 +  DictionaryMatcher        *fDictionary;
@@ -942,44 +753,12 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
 +  const uint32_t BADSNLP = 256 * 20;
 +  const uint32_t kuint32max = 0x7FFFFFFF;
 +
-+ public:
-+
-+  /**
-+   * <p>Default constructor.</p>
-+   *
-+   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
-+   * engine is deleted.
-+   */
-+  KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
-+
-+  /**
-+   * <p>Virtual destructor.</p>
-+   */
-+  virtual ~KhmerBreakEngine();
-+
-+ protected:
-+ /**
-+  * <p>Divide up a range of known dictionary characters.</p>
-+  *
-+  * @param text A UText representing the text
-+  * @param rangeStart The start of the range of dictionary characters
-+  * @param rangeEnd The end of the range of dictionary characters
-+  * @param foundBreaks Output of C array of int32_t break positions, or 0
-+  * @return The number of breaks found
-+  */
-+  virtual int32_t divideUpDictionaryRange( UText *text,
-+                                           int32_t rangeStart,
-+                                           int32_t rangeEnd,
-+                                           UVector32 &foundBreaks ) const;
-+
-+};
-+
- #if !UCONFIG_NO_NORMALIZATION
- 
- /*******************************************************************
+  public: 
+  
+   /** 
 diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionarydata.cpp
---- icu.org/source/common/dictionarydata.cpp	2018-09-29 02:34:41.000000000 +0200
-+++ icu/source/common/dictionarydata.cpp	2018-10-19 14:21:00.339942804 +0200
+--- icu.org/source/common/dictionarydata.cpp	2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictionarydata.cpp	2020-05-11 18:50:43.703113749 +0200
 @@ -44,7 +44,7 @@
  
  int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
@@ -1027,8 +806,8 @@ diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionaryda
                  if (values != NULL) {
                      values[wordCount] = bt.getValue();
 diff -ur icu.org/source/common/dictionarydata.h icu/source/common/dictionarydata.h
---- icu.org/source/common/dictionarydata.h	2018-09-29 02:34:41.000000000 +0200
-+++ icu/source/common/dictionarydata.h	2018-10-19 14:21:00.339942804 +0200
+--- icu.org/source/common/dictionarydata.h	2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictionarydata.h	2020-05-11 18:50:43.704113746 +0200
 @@ -21,6 +21,7 @@
  #include "unicode/utext.h"
  #include "unicode/udata.h"
author	Eike Rathke <erack@redhat.com>	2020-05-11 19:45:42 +0200
committer	Eike Rathke <erack@redhat.com>	2020-05-11 21:19:11 +0200
commit	8a31ac7264d7a11146d4a29034e97b564164f635 (patch)
tree	f8c4f6b68d9138c4188b7dc3cae5c7fb01c856b7 /external
parent	9c5ffdbdd60385a3d4618f5e36034f550d9b15c9 (diff)