diff options
author | Eike Rathke <erack@redhat.com> | 2017-04-20 22:06:23 +0200 |
---|---|---|
committer | Andras Timar <andras.timar@collabora.com> | 2017-04-24 11:02:08 +0200 |
commit | d6ca887de38b3989ec1759f69ebcb390af5b6c1d (patch) | |
tree | d074637ab1c9711c2be120ab31ba56ca425acbc6 | |
parent | b799279088353b38117171ea212c73af6ccae648 (diff) |
add ICU changeset-39671 fix for CVE-2017-7867 CVE-2017-7868
http://bugs.icu-project.org/trac/changeset/39671
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=213
https://bugzilla.redhat.com/show_bug.cgi?id=1444101
Reviewed-on: https://gerrit.libreoffice.org/36754
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins <ci@libreoffice.org>
(cherry picked from commit c7de8233d15ed0c90fef6c49a54d60cf10119f58)
Backported to older MSVC using the UGLY_SIZEOF_MAPTOUCHARS macro instead
of sizeof(UTF8Buf::mapToUChars).
Reviewed-on: https://gerrit.libreoffice.org/36776
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Michael Stahl <mstahl@redhat.com>
(cherry picked from commit 91f5d002884cae1a60768e9caa9d182f41fb7be6)
(cherry picked from commit 3cdac6bb2defce45342dff04400c7a37bb8a2453)
Change-Id: I4e776ad4fe63c77057b0c823f8672a2b6703346f
-rw-r--r-- | external/icu/UnpackedTarball_icu.mk | 1 | ||||
-rw-r--r-- | external/icu/icu4c-changeset-39671.patch.1 | 208 |
2 files changed, 209 insertions, 0 deletions
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk index 09342893fc8c..f3d796dbf138 100644 --- a/external/icu/UnpackedTarball_icu.mk +++ b/external/icu/UnpackedTarball_icu.mk @@ -29,6 +29,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\ external/icu/icu.changeset_36727.patch.1 \ external/icu/icu.changeset_36801.patch.1 \ $(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.diff) \ + external/icu/icu4c-changeset-39671.patch.1 \ )) # vim: set noet sw=4 ts=4: diff --git a/external/icu/icu4c-changeset-39671.patch.1 b/external/icu/icu4c-changeset-39671.patch.1 new file mode 100644 index 000000000000..abcf6ded85ac --- /dev/null +++ b/external/icu/icu4c-changeset-39671.patch.1 @@ -0,0 +1,208 @@ +diff -ur icu.org/source/common/utext.cpp icu/source/common/utext.cpp +--- icu.org/source/common/utext.cpp 2017-04-24 10:40:07.408383999 +0200 ++++ icu/source/common/utext.cpp 2017-04-24 10:47:27.868533934 +0200 +@@ -845,9 +845,15 @@ + //------------------------------------------------------------------------------ + + // Chunk size. +-// Must be less than 85, because of byte mapping from UChar indexes to native indexes. +-// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes +-// to two UChars.) ++// Must be less than 42 (256/6), because of byte mapping from UChar indexes to native indexes. ++// Worst case there are six UTF-8 bytes per UChar. ++// obsolete 6 byte form fd + 5 trails maps to fffd ++// obsolete 5 byte form fc + 4 trails maps to fffd ++// non-shortest 4 byte forms maps to fffd ++// normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit ++// mapToUChars array size must allow for the worst case, 6. ++// This could be brought down to 4, by treating fd and fc as pure illegal, ++// rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros. + // + enum { UTF8_TEXT_CHUNK_SIZE=32 }; + +@@ -864,6 +870,14 @@ + // the last character added being a supplementary, and thus requiring a surrogate + // pair. Doing this is simpler than checking for the edge case. + // ++// erAck: older MSVC used on libreoffice-5-3 and 5-2 bails out with ++// error C2070: 'unknown': illegal sizeof operand ++// for sizeof(UTF8Buf::mapToUChars) ++// so have an ugly workaround: ++// First define a macro of the original size expression, so a follow-up patch ++// on the original code would fail.. ++#define UGLY_MAPTOUCHARS_SIZE (UTF8_TEXT_CHUNK_SIZE*6+6) ++#define UGLY_SIZEOF_MAPTOUCHARS (sizeof(uint8_t)*(UGLY_MAPTOUCHARS_SIZE)) + + struct UTF8Buf { + int32_t bufNativeStart; // Native index of first char in UChar buf +@@ -887,7 +901,7 @@ + // Requires two extra slots, + // one for a supplementary starting in the last normal position, + // and one for an entry for the buffer limit position. +- uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to ++ uint8_t mapToUChars[UGLY_MAPTOUCHARS_SIZE]; // Map native offset from bufNativeStart to + // correspoding offset in filled part of buf. + int32_t align; + }; +@@ -1030,6 +1044,7 @@ + // Requested index is in this buffer. + u8b = (UTF8Buf *)ut->p; // the current buffer + mapIndex = ix - u8b->toUCharsMapStart; ++ U_ASSERT(mapIndex < (int32_t)UGLY_SIZEOF_MAPTOUCHARS); + ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; + return TRUE; + +@@ -1296,6 +1311,10 @@ + // Can only do this if the incoming index is somewhere in the interior of the string. + // If index is at the end, there is no character there to look at. + if (ix != ut->b) { ++ // Note: this function will only move the index back if it is on a trail byte ++ // and there is a preceding lead byte and the sequence from the lead ++ // through this trail could be part of a valid UTF-8 sequence ++ // Otherwise the index remains unchanged. + U8_SET_CP_START(s8, 0, ix); + } + +@@ -1309,7 +1328,10 @@ + UChar *buf = u8b->buf; + uint8_t *mapToNative = u8b->mapToNative; + uint8_t *mapToUChars = u8b->mapToUChars; +- int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1); ++ int32_t toUCharsMapStart = ix - UGLY_SIZEOF_MAPTOUCHARS + 1; ++ // Note that toUCharsMapStart can be negative. Happens when the remaining ++ // text from current position to the beginning is less than the buffer size. ++ // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry. + int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region + // at end of buffer to leave room + // for a surrogate pair at the +@@ -1336,6 +1358,7 @@ + if (c<0x80) { + // Special case ASCII range for speed. + buf[destIx] = (UChar)c; ++ U_ASSERT(toUCharsMapStart <= srcIx); + mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx; + mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); + } else { +@@ -1365,6 +1388,7 @@ + do { + mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx; + } while (sIx >= srcIx); ++ U_ASSERT(toUCharsMapStart <= (srcIx+1)); + + // Set native indexing limit to be the current position. + // We are processing a non-ascii, non-native-indexing char now; +@@ -1539,6 +1563,7 @@ + U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit); + U_ASSERT(index<=ut->chunkNativeLimit); + int32_t mapIndex = index - u8b->toUCharsMapStart; ++ U_ASSERT(mapIndex < (int32_t)UGLY_SIZEOF_MAPTOUCHARS); + int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; + U_ASSERT(offset>=0 && offset<=ut->chunkLength); + return offset; +diff -ur icu.org/source/test/intltest/utxttest.cpp icu/source/test/intltest/utxttest.cpp +--- icu.org/source/test/intltest/utxttest.cpp 2017-04-24 10:40:07.396383558 +0200 ++++ icu/source/test/intltest/utxttest.cpp 2017-04-24 10:49:30.685028467 +0200 +@@ -61,6 +61,8 @@ + if (exec) Ticket10562(); break; + case 6: name = "Ticket10983"; + if (exec) Ticket10983(); break; ++ case 7: name = "Ticket12888"; ++ if (exec) Ticket12888(); break; + default: name = ""; break; + } + } +@@ -1501,3 +1503,64 @@ + + utext_close(ut); + } ++ ++// Ticket 12888: bad handling of illegal utf-8 containing many instances of the archaic, now illegal, ++// six byte utf-8 forms. Original implementation had an assumption that ++// there would be at most three utf-8 bytes per UTF-16 code unit. ++// The five and six byte sequences map to a single replacement character. ++ ++void UTextTest::Ticket12888() { ++ const char *badString = ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80" ++ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"; ++ ++ UErrorCode status = U_ZERO_ERROR; ++ LocalUTextPointer ut(utext_openUTF8(NULL, badString, -1, &status)); ++ TEST_SUCCESS(status); ++ for (;;) { ++ UChar32 c = utext_next32(ut.getAlias()); ++ if (c == U_SENTINEL) { ++ break; ++ } ++ } ++ int32_t endIdx = utext_getNativeIndex(ut.getAlias()); ++ if (endIdx != (int32_t)strlen(badString)) { ++ errln("%s:%d expected=%d, actual=%d", __FILE__, __LINE__, strlen(badString), endIdx); ++ return; ++ } ++ ++ for (int32_t prevIndex = endIdx; prevIndex>0;) { ++ UChar32 c = utext_previous32(ut.getAlias()); ++ int32_t currentIndex = utext_getNativeIndex(ut.getAlias()); ++ if (c != 0xfffd) { ++ errln("%s:%d (expected, actual, index) = (%d, %d, %d)\n", ++ __FILE__, __LINE__, 0xfffd, c, currentIndex); ++ break; ++ } ++ if (currentIndex != prevIndex - 6) { ++ errln("%s:%d: wrong index. Expected, actual = %d, %d", ++ __FILE__, __LINE__, prevIndex - 6, currentIndex); ++ break; ++ } ++ prevIndex = currentIndex; ++ } ++} ++ +diff -ur icu.org/source/test/intltest/utxttest.h icu/source/test/intltest/utxttest.h +--- icu.org/source/test/intltest/utxttest.h 2017-04-24 10:40:07.396383558 +0200 ++++ icu/source/test/intltest/utxttest.h 2017-04-24 10:50:09.738457001 +0200 +@@ -1,5 +1,5 @@ + /******************************************************************** +- * COPYRIGHT: ++ * COPYRIGHT: + * Copyright (c) 2005-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ********************************************************************/ +@@ -35,6 +35,7 @@ + void Ticket6847(); + void Ticket10562(); + void Ticket10983(); ++ void Ticket12888(); + + private: + struct m { // Map between native indices & code points. +@@ -49,9 +50,9 @@ + void TestCopyMove(const UnicodeString &us, UText *ut, UBool move, + int32_t nativeStart, int32_t nativeLimit, int32_t nativeDest, + int32_t u16Start, int32_t u16Limit, int32_t u16Dest); +- void TestReplace(const UnicodeString &us, // reference UnicodeString in which to do the replace ++ void TestReplace(const UnicodeString &us, // reference UnicodeString in which to do the replace + UText *ut, // UnicodeText object under test. +- int32_t nativeStart, // Range to be replaced, in UText native units. ++ int32_t nativeStart, // Range to be replaced, in UText native units. + int32_t nativeLimit, + int32_t u16Start, // Range to be replaced, in UTF-16 units + int32_t u16Limit, // for use in the reference UnicodeString. |