diff options
author | Hubert Figuière <hub@figuiere.net> | 2013-06-29 22:31:09 -0400 |
---|---|---|
committer | Hubert Figuière <hub@figuiere.net> | 2013-06-29 22:31:09 -0400 |
commit | a36182b4304c2f0d4c27091fcf26c36ea648d9f2 (patch) | |
tree | 98a365bd6a7bd95270c093021b815c1fc09b0f9a /samples/source | |
parent | 71d488b0d4a91ef83e63b1a01e199f29c0412821 (diff) | |
parent | 4652015fe779e12fb06ff8fa56bf70e373cd3894 (diff) |
Update to XMP SDK CC-2013.06.
Merge branch 'adobe-sdk' into cc-2013.06-integration
Conflicts:
XMPCore/source/XMPMeta.cpp
XMPCore/source/XMPMeta.hpp
XMPFiles/source/FormatSupport/TIFF_FileWriter.cpp
XMPFiles/source/PluginHandler/XMPAtoms.h
public/include/XMP_Const.h
samples/source/DumpMainXMP.cpp
samples/source/DumpScannedXMP.cpp
samples/source/XMPCoreCoverage.cpp
samples/source/XMPFilesCoverage.cpp
samples/source/common/LargeFileAccess.cpp
samples/source/common/globals.h
source/EndianUtils.hpp
Diffstat (limited to 'samples/source')
-rw-r--r-- | samples/source/CustomSchema.cpp | 3 | ||||
-rw-r--r-- | samples/source/DumpMainXMP.cpp | 11 | ||||
-rw-r--r-- | samples/source/DumpScannedXMP.cpp | 19 | ||||
-rw-r--r-- | samples/source/ModifyingXMP.cpp | 3 | ||||
-rw-r--r-- | samples/source/ReadingXMP.cpp | 3 | ||||
-rw-r--r-- | samples/source/UnicodeCorrectness.cpp | 2813 | ||||
-rw-r--r-- | samples/source/UnicodeParseSerialize.cpp | 510 | ||||
-rw-r--r-- | samples/source/UnicodePerformance.cpp | 308 | ||||
-rw-r--r-- | samples/source/XMPCoreCoverage.cpp | 11 | ||||
-rw-r--r-- | samples/source/XMPFilesCoverage.cpp | 8 | ||||
-rw-r--r-- | samples/source/XMPIterations.cpp | 241 | ||||
-rw-r--r-- | samples/source/common/DumpFile.cpp | 177 | ||||
-rw-r--r-- | samples/source/common/DumpFile.h | 2 | ||||
-rw-r--r-- | samples/source/common/LargeFileAccess.cpp | 5 | ||||
-rw-r--r-- | samples/source/common/TagTree.cpp | 18 | ||||
-rw-r--r-- | samples/source/common/globals.h | 6 | ||||
-rw-r--r-- | samples/source/dumpfile/main.cpp | 2 | ||||
-rw-r--r-- | samples/source/xmpcommand/Actions.cpp | 6 | ||||
-rw-r--r-- | samples/source/xmpcommand/XMPCommand.cpp | 4 |
19 files changed, 3992 insertions, 158 deletions
diff --git a/samples/source/CustomSchema.cpp b/samples/source/CustomSchema.cpp index 270b6a8..1cf0566 100644 --- a/samples/source/CustomSchema.cpp +++ b/samples/source/CustomSchema.cpp @@ -13,7 +13,10 @@ * and modify properties with complex paths using the path composition utilities from the XMP API */ +#include <cstdio> +#include <vector> #include <string> +#include <cstring> // Must be defined to instantiate template classes #define TXMP_STRING_TYPE std::string diff --git a/samples/source/DumpMainXMP.cpp b/samples/source/DumpMainXMP.cpp index 44ddedd..313665b 100644 --- a/samples/source/DumpMainXMP.cpp +++ b/samples/source/DumpMainXMP.cpp @@ -11,14 +11,15 @@ * it to a human-readable log file. This is preferred over "dumb" packet scanning. */ +#include <cstdio> +#include <vector> #include <string> -#include <time.h> +#include <cstring> +#include <ctime> -#include <stdio.h> -#include <stdlib.h> +#include <cstdlib> #include <stdexcept> -#include <errno.h> -#include <cstring> +#include <cerrno> #if XMP_WinBuild #pragma warning ( disable : 4127 ) // conditional expression is constant diff --git a/samples/source/DumpScannedXMP.cpp b/samples/source/DumpScannedXMP.cpp index d398334..1acbc82 100644 --- a/samples/source/DumpScannedXMP.cpp +++ b/samples/source/DumpScannedXMP.cpp @@ -11,14 +11,15 @@ * serializes the XMP and writes it to log file. */ +#include <cstdio> +#include <vector> #include <string> -#include <time.h> +#include <cstring> +#include <ctime> -#include <stdio.h> -#include <stdlib.h> +#include <cstdlib> #include <stdexcept> -#include <errno.h> -#include <cstring> +#include <cerrno> #if XMP_WinBuild #pragma warning ( disable : 4127 ) // conditional expression is constant @@ -62,10 +63,10 @@ ProcessPacket ( const char * fileName, char title [1000]; - sprintf ( title, "// Dumping raw input for \"%s\" (%d..%d)", fileName, offset, (offset + length - 1) ); + sprintf ( title, "// Dumping raw input for \"%s\" (%lu..%lu)", fileName, offset, (offset + length - 1) ); printf ( "// " ); for ( size_t i = 3; i < strlen(title); ++i ) printf ( "=" ); - printf ( "\n\n%s\n\n%.*s\n\n", title, length, xmlString.c_str() ); + printf ( "\n\n%s\n\n%.*s\n\n", title, (int)length, xmlString.c_str() ); fflush ( stdout ); SXMPMeta xmpObj; @@ -81,11 +82,11 @@ ProcessPacket ( const char * fileName, string xmpString; xmpObj.SerializeToBuffer ( &xmpString, kXMP_OmitPacketWrapper ); - printf ( "\nPretty serialization, %d bytes :\n\n%s\n", xmpString.size(), xmpString.c_str() ); + printf ( "\nPretty serialization, %lu bytes :\n\n%s\n", xmpString.size(), xmpString.c_str() ); fflush ( stdout ); xmpObj.SerializeToBuffer ( &xmpString, (kXMP_OmitPacketWrapper | kXMP_UseCompactFormat) ); - printf ( "Compact serialization, %d bytes :\n\n%s\n", xmpString.size(), xmpString.c_str() ); + printf ( "Compact serialization, %lu bytes :\n\n%s\n", xmpString.size(), xmpString.c_str() ); fflush ( stdout ); } // ProcessPacket diff --git a/samples/source/ModifyingXMP.cpp b/samples/source/ModifyingXMP.cpp index e899845..856bb80 100644 --- a/samples/source/ModifyingXMP.cpp +++ b/samples/source/ModifyingXMP.cpp @@ -11,7 +11,10 @@ * Demonstrates how to open a file for update, and modifying the contained XMP before writing it back to the file. */ +#include <cstdio> +#include <vector> #include <string> +#include <cstring> // Must be defined to instantiate template classes #define TXMP_STRING_TYPE std::string diff --git a/samples/source/ReadingXMP.cpp b/samples/source/ReadingXMP.cpp index 1f4daa8..bfcf56c 100644 --- a/samples/source/ReadingXMP.cpp +++ b/samples/source/ReadingXMP.cpp @@ -12,7 +12,10 @@ * and examining it through the XMP object. */ +#include <cstdio> +#include <vector> #include <string> +#include <cstring> // Must be defined to instantiate template classes #define TXMP_STRING_TYPE std::string diff --git a/samples/source/UnicodeCorrectness.cpp b/samples/source/UnicodeCorrectness.cpp new file mode 100644 index 0000000..56f121a --- /dev/null +++ b/samples/source/UnicodeCorrectness.cpp @@ -0,0 +1,2813 @@ +// ================================================================================================= + +#include <cstdio> +#include <vector> +#include <string> +#include <cstring> +#include <ctime> + +#include <cstdlib> +#include <cerrno> +#include <stdexcept> + +using namespace std; + +#if WIN_ENV + #pragma warning ( disable : 4701 ) // local variable may be used without having been initialized +#endif + +// ================================================================================================= + +#include "public/include/XMP_Environment.h" +#include "public/include/XMP_Const.h" + +#include "source/EndianUtils.hpp" +#include "source/UnicodeConversions.hpp" +#include "source/UnicodeConversions.cpp" + +// ================================================================================================= + +#define kCodePointCount 0x110000 + +UTF8Unit sU8 [kCodePointCount*4 + 8]; +UTF16Unit sU16 [kCodePointCount*2 + 4]; +UTF32Unit sU32 [kCodePointCount + 2]; + +// ================================================================================================= + +static UTF16Unit NativeUTF16BE ( UTF16Unit value ) +{ + if ( ! kBigEndianHost ) SwapUTF16 ( &value, &value, 1 ); + return value; +} + +static UTF16Unit NativeUTF16LE ( UTF16Unit value ) +{ + if ( kBigEndianHost ) SwapUTF16 ( &value, &value, 1 ); + return value; +} + +static UTF32Unit NativeUTF32BE ( UTF32Unit value ) +{ + if ( ! kBigEndianHost ) SwapUTF32 ( &value, &value, 1 ); + return value; +} + +static UTF32Unit NativeUTF32LE ( UTF32Unit value ) +{ + if ( kBigEndianHost ) SwapUTF32 ( &value, &value, 1 ); + return value; +} + +// ================================================================================================= + +static void Bad_CodePoint_to_UTF8 ( FILE * log, UTF32Unit cp ) +{ + UTF8Unit u8[8]; + size_t len; + + try { + CodePoint_to_UTF8 ( cp, u8, sizeof(u8), &len ); + fprintf ( log, " *** CodePoint_to_UTF8 failure, no exception for 0x%X\n", cp ); + } catch ( ... ) { + // Do nothing, the exception is expected. + } + +} + +// ================================================================================================= + +static void Bad_CodePoint_to_UTF16BE ( FILE * log, UTF32Unit cp ) +{ + UTF16Unit u16[4]; + size_t len; + + try { + CodePoint_to_UTF16BE ( cp, u16, sizeof(u16), &len ); + fprintf ( log, " *** CodePoint_to_UTF16BE failure, no exception for 0x%X\n", cp ); + } catch ( ... ) { + // Do nothing, the exception is expected. + } + +} + +// ================================================================================================= + +static void Bad_CodePoint_to_UTF16LE ( FILE * log, UTF32Unit cp ) +{ + UTF16Unit u16[4]; + size_t len; + + try { + CodePoint_to_UTF16LE ( cp, u16, sizeof(u16), &len ); + fprintf ( log, " *** CodePoint_to_UTF16LE failure, no exception for 0x%X\n", cp ); + } catch ( ... ) { + // Do nothing, the exception is expected. + } + +} + +// ================================================================================================= + +static void Bad_CodePoint_from_UTF8 ( FILE * log, const char * inU8, const char * message ) +{ + UTF32Unit cp; + size_t len; + + try { + CodePoint_from_UTF8 ( (UTF8Unit*)inU8, strlen(inU8), &cp, &len ); + fprintf ( log, " *** CodePoint_from_UTF8 failure, no exception for %s\n", message ); + } catch ( ... ) { + // Do nothing, the exception is expected. + } + +} + +// ================================================================================================= + +static void Bad_CodePoint_from_UTF16BE ( FILE * log, const UTF16Unit * inU16, const size_t inLen, const char * message ) +{ + UTF32Unit cp; + size_t outLen; + + try { + CodePoint_from_UTF16BE ( inU16, inLen, &cp, &outLen ); + fprintf ( log, " *** CodePoint_from_UTF16BE failure, no exception for %s\n", message ); + } catch ( ... ) { + // Do nothing, the exception is expected. + } + +} + +// ================================================================================================= + +static void Bad_CodePoint_from_UTF16LE ( FILE * log, const UTF16Unit * inU16, const size_t inLen, const char * message ) +{ + UTF32Unit cp; + size_t outLen; + + try { + CodePoint_from_UTF16LE ( inU16, inLen, &cp, &outLen ); + fprintf ( log, " *** CodePoint_from_UTF16LE failure, no exception for %s\n", message ); + } catch ( ... ) { + // Do nothing, the exception is expected. + } + +} + +// ================================================================================================= + +static void Test_SwappingPrimitives ( FILE * log ) +{ + UTF16Unit u16[8]; + UTF32Unit u32[8]; + UTF32Unit i; + + fprintf ( log, "\nTesting byte swapping primitives\n" ); + + u16[0] = 0x1122; + if ( UTF16InSwap(&u16[0]) == 0x2211 ) printf ( " UTF16InSwap OK\n" ); + + u32[0] = 0x11223344; + if ( UTF32InSwap(&u32[0]) == 0x44332211 ) printf ( " UTF32InSwap OK\n" ); + + UTF16OutSwap ( &u16[0], 0x1122 ); + if ( u16[0] == 0x2211 ) printf ( " UTF16OutSwap OK\n" ); + + UTF32OutSwap ( &u32[0], 0x11223344 ); + if ( u32[0] == 0x44332211 ) printf ( " UTF32OutSwap OK\n" ); + + for ( i = 0; i < 8; ++i ) u16[i] = 0x1100 | UTF16Unit(i); + SwapUTF16 ( u16, u16, 8 ); + for ( i = 0; i < 8; ++i ) { + if ( u16[i] != ((UTF16Unit(i) << 8) | 0x11) ) break; + } + if ( i == 8 ) printf ( " SwapUTF16 OK\n" ); + + for ( i = 0; i < 8; ++i ) u32[i] = 0x11223300 | i; + SwapUTF32 ( u32, u32, 8 ); + for ( i = 0; i < 8; ++i ) { + if ( u32[i] != ((i << 24) | 0x00332211) ) break; + } + if ( i == 8 ) printf ( " SwapUTF32 OK\n" ); + +} // Test_SwappingPrimitives + +// ================================================================================================= + +static void Test_CodePoint_to_UTF8 ( FILE * log ) +{ + size_t len, lenx; + UTF32Unit cp, cp0, cpx; + UTF8Unit u8[8]; + + // ------------------------------------- + // Test CodePoint_to_UTF8 on good input. + + fprintf ( log, "\nTesting CodePoint_to_UTF8 on good input\n" ); + + // Test ASCII, 00..7F. + cp0 = 0; + for ( cp = cp0; cp < 0x80; ++cp ) { + CodePoint_to_UTF8 ( cp, u8, 0, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 1, &len ); + CodePoint_from_UTF8 ( u8, len, &cpx, &lenx ); + if ( (len != 1) || (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 2, &len ); + CodePoint_from_UTF8 ( u8, len, &cpx, &lenx ); + if ( (len != 1) || (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + } + fprintf ( log, " CodePoint_to_UTF8 done for %.4X..%.4X\n", cp0, cpx ); + + // Test 2 byte values, 0080..07FF : 110x xxxx 10xx xxxx + cp0 = cpx+1; + for ( cp = cp0; cp < 0x800; ++cp ) { + CodePoint_to_UTF8 ( cp, u8, 0, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 1, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 2, &len ); + CodePoint_from_UTF8 ( u8, len, &cpx, &lenx ); + if ( (len != 2) || (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 3, &len ); + CodePoint_from_UTF8 ( u8, len, &cpx, &lenx ); + if ( (len != 2) || (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + } + fprintf ( log, " CodePoint_to_UTF8 done for %.4X..%.4X\n", cp0, cpx ); + + // Test 3 byte values, 0800..D7FF : 1110 xxxx 10xx xxxx 10xx xxxx + cp0 = cpx+1; + for ( cp = cp0; cp < 0xD800; ++cp ) { + CodePoint_to_UTF8 ( cp, u8, 0, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 1, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 2, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 3, &len ); + CodePoint_from_UTF8 ( u8, len, &cpx, &lenx ); + if ( (len != 3) || (cp != cpx) || (lenx != 3) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 4, &len ); + CodePoint_from_UTF8 ( u8, len, &cpx, &lenx ); + if ( (len != 3) || (cp != cpx) || (lenx != 3) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + } + fprintf ( log, " CodePoint_to_UTF8 done for %.4X..%.4X\n", cp0, cpx ); + + // Test 3 byte values, E000..FFFF : 1110 xxxx 10xx xxxx 10xx xxxx + cp0 = 0xE000; + for ( cp = cp0; cp < 0x10000; ++cp ) { + CodePoint_to_UTF8 ( cp, u8, 0, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 1, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 2, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 3, &len ); + CodePoint_from_UTF8 ( u8, len, &cpx, &lenx ); + if ( (len != 3) || (cp != cpx) || (lenx != 3) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 4, &len ); + CodePoint_from_UTF8 ( u8, len, &cpx, &lenx ); + if ( (len != 3) || (cp != cpx) || (lenx != 3) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + } + fprintf ( log, " CodePoint_to_UTF8 done for %.4X..%.4X\n", cp0, cpx ); + + // Test 4 byte values, 10000..10FFFF : 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx + cp0 = cpx+1; + for ( cp = cp0; cp < 0x110000; ++cp ) { + CodePoint_to_UTF8 ( cp, u8, 0, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 1, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 2, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 3, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 4, &len ); + CodePoint_from_UTF8 ( u8, len, &cpx, &lenx ); + if ( (len != 4) || (cp != cpx) || (lenx != 4) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + CodePoint_to_UTF8 ( cp, u8, 5, &len ); + CodePoint_from_UTF8 ( u8, len, &cpx, &lenx ); + if ( (len != 4) || (cp != cpx) || (lenx != 4) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp ); + } + fprintf ( log, " CodePoint_to_UTF8 done for %.4X..%.4X\n", cp0, cpx ); + + // -------------------------------------- + // Test CodePoint_to_UTF8 with bad input. + + fprintf ( log, "\nTesting CodePoint_to_UTF8 with bad input\n" ); + + Bad_CodePoint_to_UTF8 ( log, 0x110000 ); // Code points beyond the defined range. + Bad_CodePoint_to_UTF8 ( log, 0x123456 ); + Bad_CodePoint_to_UTF8 ( log, 0xFFFFFFFF ); + Bad_CodePoint_to_UTF8 ( log, 0xD800 ); // Surrogate code points. + Bad_CodePoint_to_UTF8 ( log, 0xDC00 ); + Bad_CodePoint_to_UTF8 ( log, 0xDFFF ); + + fprintf ( log, " CodePoint_to_UTF8 done with bad input\n" ); + +} // Test_CodePoint_to_UTF8 + +// ================================================================================================= + +static void Test_CodePoint_from_UTF8 ( FILE * log ) +{ + UTF32Unit i, j, k, l; + size_t len; + UTF32Unit cp, cp0, cpx; + UTF8Unit u8[5]; + + // --------------------------------------- + // Test CodePoint_from_UTF8 on good input. + + fprintf ( log, "\nTesting CodePoint_from_UTF8 on good input\n" ); + + // Test ASCII, 00..7F. + cp0 = 0; + for ( i = 0; i < 0x80; ++i ) { + u8[0] = UTF8Unit(i); u8[1] = 0xFF; cpx = i; + CodePoint_from_UTF8 ( u8, 0, &cp, &len ); + if ( len != 0 ) fprintf ( log, "CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 1, &cp, &len ); + if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 2, &cp, &len ); + if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + } + fprintf ( log, " CodePoint_from_UTF8 done for %.4X..%.4X\n", cp0, cpx ); + + // Test 2 byte values, 0080..07FF : 110x xxxx 10xx xxxx + cp0 = cpx+1; + for ( i = 0; i < 0x20; ++i ) { + for ( j = 0; j < 0x40; ++j ) { + cpx = (i<<6) + j; if ( cpx < cp0 ) continue; + u8[0] = 0xC0+UTF8Unit(i); u8[1] = 0x80+UTF8Unit(j); u8[2] = 0xFF; + CodePoint_from_UTF8 ( u8, 0, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 1, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 2, &cp, &len ); + if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 3, &cp, &len ); + if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + } + } + fprintf ( log, " CodePoint_from_UTF8 done for %.4X..%.4X\n", cp0, cpx ); + + // Test 3 byte values, 0800..D7FF : 1110 xxxx 10xx xxxx 10xx xxxx + cp0 = cpx+1; + for ( i = 0; i < 0x10; ++i ) { + for ( j = 0; j < 0x40; ++j ) { + for ( k = 0; k < 0x40; ++k ) { + cpx = (i<<12) + (j<<6) + k; if ( cpx < cp0 ) continue; + u8[0] = 0xE0+UTF8Unit(i); u8[1] = 0x80+UTF8Unit(j); u8[2] = 0x80+UTF8Unit(k); u8[3] = 0xFF; + CodePoint_from_UTF8 ( u8, 0, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 1, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 2, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 3, &cp, &len ); + if ( (cp != cpx) || (len != 3) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 4, &cp, &len ); + if ( (cp != cpx) || (len != 3) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + } + if ( cpx == 0xD7FF ) break; + } + if ( cpx == 0xD7FF ) break; + } + fprintf ( log, " CodePoint_from_UTF8 done for %.4X..%.4X\n", cp0, cpx ); + + // Test 3 byte values, E000..FFFF : 1110 xxxx 10xx xxxx 10xx xxxx + cp0 = 0xE000; + for ( i = 0; i < 0x10; ++i ) { + for ( j = 0; j < 0x40; ++j ) { + for ( k = 0; k < 0x40; ++k ) { + cpx = (i<<12) + (j<<6) + k; if ( cpx < cp0 ) continue; + u8[0] = 0xE0+UTF8Unit(i); u8[1] = 0x80+UTF8Unit(j); u8[2] = 0x80+UTF8Unit(k); u8[3] = 0xFF; + CodePoint_from_UTF8 ( u8, 0, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 1, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 2, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 3, &cp, &len ); + if ( (cp != cpx) || (len != 3) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 4, &cp, &len ); + if ( (cp != cpx) || (len != 3) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + } + } + } + fprintf ( log, " CodePoint_from_UTF8 done for %.4X..%.4X\n", cp0, cpx ); + + // Test 4 byte values, 10000..10FFFF : 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx + cp0 = cpx+1; + for ( i = 0; i < 0x7; ++i ) { + for ( j = 0; j < 0x40; ++j ) { + for ( k = 0; k < 0x40; ++k ) { + for ( l = 0; l < 0x40; ++l ) { + cpx = (i<<18) + (j<<12) + (k<<6) + l; if ( cpx < cp0 ) continue; + u8[0] = 0xF0+UTF8Unit(i); u8[1] = 0x80+UTF8Unit(j); u8[2] = 0x80+UTF8Unit(k); u8[3] = 0x80+UTF8Unit(l); u8[4] = 0xFF; + CodePoint_from_UTF8 ( u8, 0, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 1, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 2, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 3, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 4, &cp, &len ); + if ( (cp != cpx) || (len != 4) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + CodePoint_from_UTF8 ( u8, 5, &cp, &len ); + if ( (cp != cpx) || (len != 4) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx ); + } + if ( cpx == 0x10FFFF ) break; + } + if ( cpx == 0x10FFFF ) break; + } + if ( cpx == 0x10FFFF ) break; + } + fprintf ( log, " CodePoint_from_UTF8 done for %.4X..%.4X\n", cp0, cpx ); + + // ---------------------------------------- + // Test CodePoint_from_UTF8 with bad input. + + fprintf ( log, "\nTesting CodePoint_from_UTF8 with bad input\n" ); + + Bad_CodePoint_from_UTF8 ( log, "\x88\x20", "bad leading byte count" ); // One byte "sequence". + Bad_CodePoint_from_UTF8 ( log, "\xF9\x90\x80\x80\x80\x20", "bad leading byte count" ); // Five byte sequence. + Bad_CodePoint_from_UTF8 ( log, "\xFE\x90\x80\x80\x80\x80\x80\x20", "bad leading byte count" ); // Seven byte sequence. + Bad_CodePoint_from_UTF8 ( log, "\xFF\x90\x80\x80\x80\x80\x80\x80\x20", "bad leading byte count" ); // Eight byte sequence. + + Bad_CodePoint_from_UTF8 ( log, "\xF1\x80\x01\x80\x20", "bad following high bits" ); // 00xx xxxx + Bad_CodePoint_from_UTF8 ( log, "\xF1\x80\x40\x80\x20", "bad following high bits" ); // 01xx xxxx + Bad_CodePoint_from_UTF8 ( log, "\xF1\x80\xC0\x80\x20", "bad following high bits" ); // 11xx xxxx + + Bad_CodePoint_from_UTF8 ( log, "\xF4\x90\x80\x80\x20", "out of range code point" ); // U+110000 + Bad_CodePoint_from_UTF8 ( log, "\xF7\xBF\xBF\xBF\x20", "out of range code point" ); // U+1FFFFF + + Bad_CodePoint_from_UTF8 ( log, "\xED\xA0\x80\x20", "surrogate code point" ); // U+D800 + Bad_CodePoint_from_UTF8 ( log, "\xED\xB0\x80\x20", "surrogate code point" ); // U+DC00 + Bad_CodePoint_from_UTF8 ( log, "\xED\xBF\xBF\x20", "surrogate code point" ); // U+DFFF + + fprintf ( log, " CodePoint_from_UTF8 done with bad input\n" ); + +} // Test_CodePoint_from_UTF8 + +// ================================================================================================= + +static void Test_CodePoint_to_UTF16 ( FILE * log ) +{ + size_t len, lenx; + UTF32Unit cp, cp0, cpx; + UTF16Unit u16[3]; + + // ---------------------------------------- + // Test CodePoint_to_UTF16BE on good input. + + fprintf ( log, "\nTesting CodePoint_to_UTF16BE on good input\n" ); + + // Some explicit sanity tests, in case the code and exhaustive tests have inverse bugs. + if ( kBigEndianHost ) { + CodePoint_to_UTF16BE ( 0x1234, u16, 1, &len ); + if ( (len != 1) || (u16[0] != 0x1234) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+1234\n" ); + CodePoint_to_UTF16BE ( 0xFEDC, u16, 1, &len ); + if ( (len != 1) || (u16[0] != 0xFEDC) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+FEDC\n" ); + CodePoint_to_UTF16BE ( 0x14834, u16, 2, &len ); + if ( (len != 2) || (u16[0] != 0xD812) || (u16[1] != 0xDC34) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+14834\n" ); + } else { + CodePoint_to_UTF16BE ( 0x1234, u16, 1, &len ); + if ( (len != 1) || (u16[0] != 0x3412) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+1234\n" ); + CodePoint_to_UTF16BE ( 0xFEDC, u16, 1, &len ); + if ( (len != 1) || (u16[0] != 0xDCFE) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+FEDC\n" ); + CodePoint_to_UTF16BE ( 0x14834, u16, 2, &len ); + if ( (len != 2) || (u16[0] != 0x12D8) || (u16[1] != 0x34DC) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+14834\n" ); + } + fprintf ( log, " CodePoint_to_UTF16BE sanity tests done\n" ); + + // Test the low part of the BMP, 0000..D7FF. + cp0 = 0; + for ( cp = cp0; cp < 0xD800; ++cp ) { + CodePoint_to_UTF16BE ( cp, u16, 0, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16BE ( cp, u16, 1, &len ); + if ( (len != 1) || (NativeUTF16BE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16BE ( cp, u16, 2, &len ); + if ( (len != 1) || (NativeUTF16BE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + } + fprintf ( log, " CodePoint_to_UTF16BE done for %.4X..%.4X\n", cp0, cpx ); + + // Test the high part of the BMP, E000..FFFF. + cp0 = 0xE000; + for ( cp = cp0; cp < 0x10000; ++cp ) { + CodePoint_to_UTF16BE ( cp, u16, 0, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16BE ( cp, u16, 1, &len ); + if ( (len != 1) || (NativeUTF16BE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16BE ( cp, u16, 2, &len ); + if ( (len != 1) || (NativeUTF16BE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + } + fprintf ( log, " CodePoint_to_UTF16BE done for %.4X..%.4X\n", cp0, cpx ); + + // Test beyond the BMP, 10000..10FFFF. + cp0 = 0x10000; + for ( cp = cp0; cp < 0x110000; ++cp ) { + CodePoint_to_UTF16BE ( cp, u16, 0, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16BE ( cp, u16, 1, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16BE ( cp, u16, 2, &len ); + if ( (len != 2) || + (NativeUTF16BE(u16[0]) != (0xD800 | ((cp-0x10000) >> 10))) || + (NativeUTF16BE(u16[1]) != (0xDC00 | ((cp-0x10000) & 0x3FF))) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16BE ( cp, u16, 3, &len ); + if ( (len != 2) || + (NativeUTF16BE(u16[0]) != (0xD800 | ((cp-0x10000) >> 10))) || + (NativeUTF16BE(u16[1]) != (0xDC00 | ((cp-0x10000) & 0x3FF))) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp ); + } + fprintf ( log, " CodePoint_to_UTF16BE done for %.4X..%.4X\n", cp0, cpx ); + + // ---------------------------------------- + // Test CodePoint_to_UTF16LE on good input. + + fprintf ( log, "\nTesting CodePoint_to_UTF16LE on good input\n" ); + + // Some explicit sanity tests, in case the code and exhaustive tests have inverse bugs. + if ( kBigEndianHost ) { + CodePoint_to_UTF16LE ( 0x1234, u16, 1, &len ); + if ( (len != 1) || (u16[0] != 0x3412) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+1234\n" ); + CodePoint_to_UTF16LE ( 0xFEDC, u16, 1, &len ); + if ( (len != 1) || (u16[0] != 0xDCFE) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+FEDC\n" ); + CodePoint_to_UTF16LE ( 0x14834, u16, 2, &len ); + if ( (len != 2) || (u16[0] != 0x12D8) || (u16[1] != 0x34DC) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+14834\n" ); + } else { + CodePoint_to_UTF16LE ( 0x1234, u16, 1, &len ); + if ( (len != 1) || (u16[0] != 0x1234) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+1234\n" ); + CodePoint_to_UTF16LE ( 0xFEDC, u16, 1, &len ); + if ( (len != 1) || (u16[0] != 0xFEDC) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+FEDC\n" ); + CodePoint_to_UTF16LE ( 0x14834, u16, 2, &len ); + if ( (len != 2) || (u16[0] != 0xD812) || (u16[1] != 0xDC34) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+14834\n" ); + } + fprintf ( log, " CodePoint_to_UTF16LE sanity tests done\n" ); + + // Test the low part of the BMP, 0000..D7FF. + cp0 = 0; + for ( cp = cp0; cp < 0xD800; ++cp ) { + CodePoint_to_UTF16LE ( cp, u16, 0, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16LE ( cp, u16, 1, &len ); + if ( (len != 1) || (NativeUTF16LE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16LE ( cp, u16, 2, &len ); + if ( (len != 1) || (NativeUTF16LE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + } + fprintf ( log, " CodePoint_to_UTF16LE done for %.4X..%.4X\n", cp0, cpx ); + + // Test the high part of the BMP, E000..FFFF. + cp0 = 0xE000; + for ( cp = cp0; cp < 0x10000; ++cp ) { + CodePoint_to_UTF16LE ( cp, u16, 0, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16LE ( cp, u16, 1, &len ); + if ( (len != 1) || (NativeUTF16LE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16LE ( cp, u16, 2, &len ); + if ( (len != 1) || (NativeUTF16LE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + } + fprintf ( log, " CodePoint_to_UTF16LE done for %.4X..%.4X\n", cp0, cpx ); + + // Test beyond the BMP, 10000..10FFFF. + cp0 = 0x10000; + for ( cp = cp0; cp < 0x110000; ++cp ) { + CodePoint_to_UTF16LE ( cp, u16, 0, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16LE ( cp, u16, 1, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16LE ( cp, u16, 2, &len ); + if ( (len != 2) || + (NativeUTF16LE(u16[0]) != (0xD800 | ((cp-0x10000) >> 10))) || + (NativeUTF16LE(u16[1]) != (0xDC00 | ((cp-0x10000) & 0x3FF))) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_to_UTF16LE ( cp, u16, 3, &len ); + if ( (len != 2) || + (NativeUTF16LE(u16[0]) != (0xD800 | ((cp-0x10000) >> 10))) || + (NativeUTF16LE(u16[1]) != (0xDC00 | ((cp-0x10000) & 0x3FF))) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx ); + if ( (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp ); + } + fprintf ( log, " CodePoint_to_UTF16LE done for %.4X..%.4X\n", cp0, cpx ); + + // --------------------------------------- + // Test CodePoint_to_UTF16 with bad input. + + fprintf ( log, "\nTesting CodePoint_to_UTF16 with bad input\n" ); + + Bad_CodePoint_to_UTF16BE ( log, 0x110000 ); // Code points beyond the defined range. + Bad_CodePoint_to_UTF16BE ( log, 0x123456 ); + Bad_CodePoint_to_UTF16BE ( log, 0xFFFFFFFF ); + Bad_CodePoint_to_UTF16BE ( log, 0xD800 ); // Surrogate code points. + Bad_CodePoint_to_UTF16BE ( log, 0xDC00 ); + Bad_CodePoint_to_UTF16BE ( log, 0xDFFF ); + + fprintf ( log, " CodePoint_to_UTF16BE done with bad input\n" ); + + Bad_CodePoint_to_UTF16LE ( log, 0x110000 ); // Code points beyond the defined range. + Bad_CodePoint_to_UTF16LE ( log, 0x123456 ); + Bad_CodePoint_to_UTF16LE ( log, 0xFFFFFFFF ); + Bad_CodePoint_to_UTF16LE ( log, 0xD800 ); // Surrogate code points. + Bad_CodePoint_to_UTF16LE ( log, 0xDC00 ); + Bad_CodePoint_to_UTF16LE ( log, 0xDFFF ); + + fprintf ( log, " CodePoint_to_UTF16LE done with bad input\n" ); + +} // Test_CodePoint_to_UTF16 + +// ================================================================================================= + +static void Test_CodePoint_from_UTF16 ( FILE * log ) +{ + UTF32Unit i, j; + size_t len; + UTF32Unit cp, cp0, cpx; + UTF16Unit u16[3]; + + // ------------------------------------------ + // Test CodePoint_from_UTF16BE on good input. + + fprintf ( log, "\nTesting CodePoint_from_UTF16BE on good input\n" ); + + // Some explicit sanity tests, in case the code and exhaustive tests have inverse bugs. + if ( kBigEndianHost ) { + u16[0] = 0x1234; + CodePoint_from_UTF16BE ( u16, 1, &cp, &len ); + if ( (len != 1) || (cp != 0x1234) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+1234\n" ); + u16[0] = 0xFEDC; + CodePoint_from_UTF16BE ( u16, 1, &cp, &len ); + if ( (len != 1) || (cp != 0xFEDC) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+FEDC\n" ); + u16[0] = 0xD812; u16[1] = 0xDC34; + CodePoint_from_UTF16BE ( u16, 2, &cp, &len ); + if ( (len != 2) || (cp != 0x14834) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+14834\n" ); + } else { + u16[0] = 0x3412; + CodePoint_from_UTF16BE ( u16, 1, &cp, &len ); + if ( (len != 1) || (cp != 0x1234) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+1234\n" ); + u16[0] = 0xDCFE; + CodePoint_from_UTF16BE ( u16, 1, &cp, &len ); + if ( (len != 1) || (cp != 0xFEDC) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+FEDC\n" ); + u16[0] = 0x12D8; u16[1] = 0x34DC; + CodePoint_from_UTF16BE ( u16, 2, &cp, &len ); + if ( (len != 2) || (cp != 0x14834) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+14834\n" ); + } + fprintf ( log, " CodePoint_from_UTF16BE sanity tests done\n" ); + + // Test the low part of the BMP, 0000..D7FF. + cp0 = 0; + for ( i = 0; i < 0xD800; ++i ) { + u16[0] = NativeUTF16BE(UTF16Unit(i)); u16[1] = 0xFFFF; cpx = i; + CodePoint_from_UTF16BE ( u16, 0, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16BE ( u16, 1, &cp, &len ); + if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16BE ( u16, 2, &cp, &len ); + if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx ); + } + fprintf ( log, " CodePoint_from_UTF16BE done for %.4X..%.4X\n", cp0, cpx ); + + // Test the high part of the BMP, E000..FFFF. + cp0 = 0xE000; + for ( i = cp0; i < 0x10000; ++i ) { + u16[0] = NativeUTF16BE(UTF16Unit(i)); u16[1] = 0xFFFF; cpx = i; + CodePoint_from_UTF16BE ( u16, 0, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16BE ( u16, 1, &cp, &len ); + if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16BE ( u16, 2, &cp, &len ); + if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx ); + } + fprintf ( log, " CodePoint_from_UTF16BE done for %.4X..%.4X\n", cp0, cpx ); + + // Test beyond the BMP, 10000..10FFFF. + cp0 = 0x10000; + for ( i = 0; i < 0x400; ++i ) { + for ( j = 0; j < 0x400; ++j ) { + cpx = (i<<10) + j + cp0; + u16[0] = NativeUTF16BE(0xD800+UTF16Unit(i)); u16[1] = NativeUTF16BE(0xDC00+UTF16Unit(j)); u16[2] = 0xFFFF; + CodePoint_from_UTF16BE ( u16, 0, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16BE ( u16, 1, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16BE ( u16, 2, &cp, &len ); + if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16BE ( u16, 3, &cp, &len ); + if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx ); + } + } + fprintf ( log, " CodePoint_from_UTF16BE done for %.4X..%.4X\n", cp0, cpx ); + + // ------------------------------------------ + // Test CodePoint_from_UTF16LE on good input. + + fprintf ( log, "\nTesting CodePoint_from_UTF16LE on good input\n" ); + + // Some explicit sanity tests, in case the code and exhaustive tests have inverse bugs. + if ( kBigEndianHost ) { + u16[0] = 0x3412; + CodePoint_from_UTF16LE ( u16, 1, &cp, &len ); + if ( (len != 1) || (cp != 0x1234) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+1234\n" ); + u16[0] = 0xDCFE; + CodePoint_from_UTF16LE ( u16, 1, &cp, &len ); + if ( (len != 1) || (cp != 0xFEDC) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+FEDC\n" ); + u16[0] = 0x12D8; u16[1] = 0x34DC; + CodePoint_from_UTF16LE ( u16, 2, &cp, &len ); + if ( (len != 2) || (cp != 0x14834) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+14834\n" ); + } else { + u16[0] = 0x1234; + CodePoint_from_UTF16LE ( u16, 1, &cp, &len ); + if ( (len != 1) || (cp != 0x1234) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+1234\n" ); + u16[0] = 0xFEDC; + CodePoint_from_UTF16LE ( u16, 1, &cp, &len ); + if ( (len != 1) || (cp != 0xFEDC) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+FEDC\n" ); + u16[0] = 0xD812; u16[1] = 0xDC34; + CodePoint_from_UTF16LE ( u16, 2, &cp, &len ); + if ( (len != 2) || (cp != 0x14834) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+14834\n" ); + } + fprintf ( log, " CodePoint_from_UTF16LE sanity tests done\n" ); + + // Test the low part of the BMP, 0000..D7FF. + cp0 = 0; + for ( i = 0; i < 0xD800; ++i ) { + u16[0] = NativeUTF16LE(UTF16Unit(i)); u16[1] = 0xFFFF; cpx = i; + CodePoint_from_UTF16LE ( u16, 0, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16LE ( u16, 1, &cp, &len ); + if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16LE ( u16, 2, &cp, &len ); + if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx ); + } + fprintf ( log, " CodePoint_from_UTF16LE done for %.4X..%.4X\n", cp0, cpx ); + + // Test the high part of the BMP, E000..FFFF. + cp0 = 0xE000; + for ( i = cp0; i < 0x10000; ++i ) { + u16[0] = NativeUTF16LE(UTF16Unit(i)); u16[1] = 0xFFFF; cpx = i; + CodePoint_from_UTF16LE ( u16, 0, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16LE ( u16, 1, &cp, &len ); + if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16LE ( u16, 2, &cp, &len ); + if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx ); + } + fprintf ( log, " CodePoint_from_UTF16LE done for %.4X..%.4X\n", cp0, cpx ); + + // Test beyond the BMP, 10000..10FFFF. + cp0 = 0x10000; + for ( i = 0; i < 0x400; ++i ) { + for ( j = 0; j < 0x400; ++j ) { + cpx = (i<<10) + j + cp0; + u16[0] = NativeUTF16LE(0xD800+UTF16Unit(i)); u16[1] = NativeUTF16LE(0xDC00+UTF16Unit(j)); u16[2] = 0xFFFF; + CodePoint_from_UTF16LE ( u16, 0, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16LE ( u16, 1, &cp, &len ); + if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16LE ( u16, 2, &cp, &len ); + if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx ); + CodePoint_from_UTF16LE ( u16, 3, &cp, &len ); + if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx ); + } + } + fprintf ( log, " CodePoint_from_UTF16LE done for %.4X..%.4X\n", cp0, cpx ); + + // --------------------------------------------------------------- + // Test CodePoint_from_UTF16 with bad input. U+12345 is D808 DF45. + + fprintf ( log, "\nTesting CodePoint_from_UTF16 with bad input\n" ); + + memcpy ( sU16, "\xD8\x08\x00\x20\x00\x00", 6 ); // ! HPPA (maybe others) won't tolerate misaligned loads. + Bad_CodePoint_from_UTF16BE ( log, sU16, 3, "missing low surrogate" ); + memcpy ( sU16, "\xDF\x45\x00\x20\x00\x00", 6 ); + Bad_CodePoint_from_UTF16BE ( log, sU16, 3, "leading low surrogate" ); + memcpy ( sU16, "\xD8\x08\xD8\x08\x00\x20\x00\x00", 8 ); + Bad_CodePoint_from_UTF16BE ( log, sU16, 4, "double high surrogate" ); + + fprintf ( log, " CodePoint_from_UTF16BE done with bad input\n" ); + + memcpy ( sU16, "\x08\xD8\x20\x00\x00\x00", 6 ); + Bad_CodePoint_from_UTF16LE ( log, sU16, 3, "missing low surrogate" ); + memcpy ( sU16, "\x45\xDF\x20\x00\x00\x00", 6 ); + Bad_CodePoint_from_UTF16LE ( log, sU16, 3, "leading low surrogate" ); + memcpy ( sU16, "\x08\xD8\x08\xD8\x20\x00\x00\x00", 8 ); + Bad_CodePoint_from_UTF16LE ( log, sU16, 4, "double high surrogate" ); + + fprintf ( log, " CodePoint_from_UTF16LE done with bad input\n" ); + +} // Test_CodePoint_from_UTF16 + +// ================================================================================================= + +static void Test_UTF8_to_UTF16 ( FILE * log ) +{ + size_t i; + size_t len8, len16, len8x, len16x; + UTF32Unit cp, cpx, cpLo, cpHi; + + // --------------------------------------------------------------------------------------- + // Test UTF8_to_UTF16BE on good input. The CodePoint to/from functions are already tested, + // use them to verify the results here. + + fprintf ( log, "\nTesting UTF8_to_UTF16BE on good input\n" ); + + // Test ASCII. + + cpLo = 0; cpHi = 0x80; len8 = len16 = 0x80; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU8[i] = UTF8Unit(cp); + sU8[len8] = 0xFF; + + UTF8_to_UTF16BE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16BE length failure, %d -> %d\n", len8x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF8_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test non-ASCII inside the BMP, below the surrogates. + + cpLo = 0x80; cpHi = 0xD800; len16 = cpHi-cpLo; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 != (2*(0x800-cpLo) + 3*(cpHi-0x800)) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF16BE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16BE length failure, %d -> %d\n", len8x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF8_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len16 = cpHi-cpLo; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 != 3*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF16BE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16BE length failure, %d -> %d\n", len8x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF8_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len16 = (cpHi-cpLo)*2; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 != 4*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF16BE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16BE length failure, %d -> %d\n", len8x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF8_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating ASCII, non-ASCII BMP, beyond BMP. + + len16 = 0x80*(1+1+1+2); + for ( i = 0, len8 = 0; i < 0x80; ++i ) { + CodePoint_to_UTF8 ( i, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x100, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x1000, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x10000, &sU8[len8], 8, &len8x ); + len8 += len8x; + } + if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF16BE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16BE length failure, %d -> %d\n", len8x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, len16 = 0; i < 0x80; ++i ) { + CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", i ); + len16 += len16x; + CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != i+0x100) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", i+0x100 ); + len16 += len16x; + CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", i+0x1000 ); + len16 += len16x; + CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", i+0x10000 ); + len16 += len16x; + } + if ( len16 != 0x80*(1+1+1+2) ) fprintf ( log, " *** UTF8_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF8_to_UTF16BE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len8 = 0x80*(1+2+3+4); len16 = 0x80*(1+1+1+2); + + UTF8_to_UTF16BE ( sU8, 0, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF8_to_UTF16BE empty input failure, %d -> %d\n", len8x, len16x ); + UTF8_to_UTF16BE ( sU8, len8, sU16, 0, &len8x, &len16x ); + if ( (len8x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF8_to_UTF16BE empty output failure, %d -> %d\n", len8x, len16x ); + UTF8_to_UTF16BE ( sU8, 8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8x != 6) || (len16x != 3) ) fprintf ( log, " *** UTF8_to_UTF16BE partial input failure, %d -> %d\n", len8x, len16x ); + UTF8_to_UTF16BE ( sU8, len8, sU16, 4, &len8x, &len16x ); + if ( (len8x != 6) || (len16x != 3) ) fprintf ( log, " *** UTF8_to_UTF16BE partial output failure, %d -> %d\n", len8x, len16x ); + + fprintf ( log, " UTF8_to_UTF16BE done for empty buffers and buffers ending in mid character\n" ); + + // ----------------------------------- + // Test UTF8_to_UTF16LE on good input. + + fprintf ( log, "\nTesting UTF8_to_UTF16LE on good input\n" ); + + // Test ASCII. + + cpLo = 0; cpHi = 0x80; len8 = len16 = 0x80; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU8[i] = UTF8Unit(cp); + sU8[len8] = 0xFF; + + UTF8_to_UTF16LE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16LE length failure, %d -> %d\n", len8x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF8_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test non-ASCII inside the BMP, below the surrogates. + + cpLo = 0x80; cpHi = 0xD800; len16 = cpHi-cpLo; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 != (2*(0x800-cpLo) + 3*(cpHi-0x800)) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF16LE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16LE length failure, %d -> %d\n", len8x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF8_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len16 = cpHi-cpLo; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 != 3*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF16LE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16LE length failure, %d -> %d\n", len8x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF8_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len16 = (cpHi-cpLo)*2; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 != 4*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF16LE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16LE length failure, %d -> %d\n", len8x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF8_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating ASCII, non-ASCII BMP, beyond BMP. + + len16 = 0x80*(1+1+1+2); + for ( i = 0, len8 = 0; i < 0x80; ++i ) { + CodePoint_to_UTF8 ( i, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x100, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x1000, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x10000, &sU8[len8], 8, &len8x ); + len8 += len8x; + } + if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF16LE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16LE length failure, %d -> %d\n", len8x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, len16 = 0; i < 0x80; ++i ) { + CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", i ); + len16 += len16x; + CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != i+0x100) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", i+0x100 ); + len16 += len16x; + CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", i+0x1000 ); + len16 += len16x; + CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", i+0x10000 ); + len16 += len16x; + } + if ( len16 != 0x80*(1+1+1+2) ) fprintf ( log, " *** UTF8_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF8_to_UTF16LE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len8 = 0x80*(1+2+3+4); len16 = 0x80*(1+1+1+2); + + UTF8_to_UTF16LE ( sU8, 0, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF8_to_UTF16LE empty input failure, %d -> %d\n", len8x, len16x ); + UTF8_to_UTF16LE ( sU8, len8, sU16, 0, &len8x, &len16x ); + if ( (len8x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF8_to_UTF16LE empty output failure, %d -> %d\n", len8x, len16x ); + UTF8_to_UTF16LE ( sU8, 8, sU16, sizeof(sU16), &len8x, &len16x ); + if ( (len8x != 6) || (len16x != 3) ) fprintf ( log, " *** UTF8_to_UTF16LE partial input failure, %d -> %d\n", len8x, len16x ); + UTF8_to_UTF16LE ( sU8, len8, sU16, 4, &len8x, &len16x ); + if ( (len8x != 6) || (len16x != 3) ) fprintf ( log, " *** UTF8_to_UTF16LE partial output failure, %d -> %d\n", len8x, len16x ); + + fprintf ( log, " UTF8_to_UTF16LE done for empty buffers and buffers ending in mid character\n" ); + +} // Test_UTF8_to_UTF16 + +// ================================================================================================= + +static void Test_UTF8_to_UTF32 ( FILE * log ) +{ + size_t i; + size_t len8, len32, len8x, len32x; + UTF32Unit cp, cpLo, cpHi; + + // --------------------------------------------------------------------------------------- + // Test UTF8_to_UTF32BE on good input. The CodePoint to/from functions are already tested, + // use them to verify the results here. + + fprintf ( log, "\nTesting UTF8_to_UTF32BE on good input\n" ); + + // Test ASCII. + + cpLo = 0; cpHi = 0x80; len8 = len32 = 0x80; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU8[i] = UTF8Unit(cp); + sU8[len8] = 0xFF; + + UTF8_to_UTF32BE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32BE length failure, %d -> %d\n", len8x, len32x ); + + sU32[len32x] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF8_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test non-ASCII inside the BMP, below the surrogates. + + cpLo = 0x80; cpHi = 0xD800; len32 = cpHi-cpLo; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 != (2*(0x800-cpLo) + 3*(cpHi-0x800)) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF32BE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32BE length failure, %d -> %d\n", len8x, len32x ); + + sU32[len32x] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF8_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len32 = cpHi-cpLo; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 !=3*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF32BE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32BE length failure, %d -> %d\n", len8x, len32x ); + + sU32[len32x] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF8_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 !=4*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF32BE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32BE length failure, %d -> %d\n", len8x, len32x ); + + sU32[len32x] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF8_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating ASCII, non-ASCII BMP, beyond BMP. + + len32 = 0x80*(1+1+1+1); + for ( i = 0, len8 = 0; i < 0x80; ++i ) { + CodePoint_to_UTF8 ( i, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x100, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x1000, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x10000, &sU8[len8], 8, &len8x ); + len8 += len8x; + } + if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF32BE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32BE length failure, %d -> %d\n", len8x, len32x ); + + sU32[len32x] = 0xFFFFFFFF; + for ( i = 0, len32 = 0; i < 0x80; ++i ) { + if ( sU32[len32] != NativeUTF32BE(i) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", i ); + ++len32; + if ( sU32[len32] != NativeUTF32BE(i+0x100) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", i+0x100 ); + ++len32; + if ( sU32[len32] != NativeUTF32BE(i+0x1000) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", i+0x1000 ); + ++len32; + if ( sU32[len32] != NativeUTF32BE(i+0x10000) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", i+0x10000 ); + ++len32; + } + + fprintf ( log, " UTF8_to_UTF32BE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len8 = 0x80*(1+2+3+4); len32 = 0x80*(1+1+1+1); + + UTF8_to_UTF32BE ( sU8, 0, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF8_to_UTF32BE empty input failure, %d -> %d\n", len8x, len32x ); + UTF8_to_UTF32BE ( sU8, len8, sU32, 0, &len8x, &len32x ); + if ( (len8x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF8_to_UTF32BE empty output failure, %d -> %d\n", len8x, len32x ); + UTF8_to_UTF32BE ( sU8, 8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8x != 6) || (len32x != 3) ) fprintf ( log, " *** UTF8_to_UTF32BE partial input failure, %d -> %d\n", len8x, len32x ); + + fprintf ( log, " UTF8_to_UTF32BE done for empty buffers and buffers ending in mid character\n" ); + + // ----------------------------------- + // Test UTF8_to_UTF32LE on good input. + + fprintf ( log, "\nTesting UTF8_to_UTF32LE on good input\n" ); + + // Test ASCII. + + cpLo = 0; cpHi = 0x80; len8 = len32 = 0x80; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU8[i] = UTF8Unit(cp); + sU8[len8] = 0xFF; + + UTF8_to_UTF32LE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32LE length failure, %d -> %d\n", len8x, len32x ); + + sU32[len32x] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF8_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test non-ASCII inside the BMP, below the surrogates. + + cpLo = 0x80; cpHi = 0xD800; len32 = cpHi-cpLo; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 != (2*(0x800-cpLo) + 3*(cpHi-0x800)) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF32LE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32LE length failure, %d -> %d\n", len8x, len32x ); + + sU32[len32x] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF8_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len32 = cpHi-cpLo; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 !=3*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF32LE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32LE length failure, %d -> %d\n", len8x, len32x ); + + sU32[len32x] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF8_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; + for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x ); + if ( len8 !=4*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF32LE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32LE length failure, %d -> %d\n", len8x, len32x ); + + sU32[len32x] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF8_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating ASCII, non-ASCII BMP, beyond BMP. + + len32 = 0x80*(1+1+1+1); + for ( i = 0, len8 = 0; i < 0x80; ++i ) { + CodePoint_to_UTF8 ( i, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x100, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x1000, &sU8[len8], 8, &len8x ); + len8 += len8x; + CodePoint_to_UTF8 ( i+0x10000, &sU8[len8], 8, &len8x ); + len8 += len8x; + } + if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 ); + sU8[len8] = 0xFF; + + UTF8_to_UTF32LE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32LE length failure, %d -> %d\n", len8x, len32x ); + + sU32[len32x] = 0xFFFFFFFF; + for ( i = 0, len32 = 0; i < 0x80; ++i ) { + if ( sU32[len32] != NativeUTF32LE(i) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", i ); + ++len32; + if ( sU32[len32] != NativeUTF32LE(i+0x100) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", i+0x100 ); + ++len32; + if ( sU32[len32] != NativeUTF32LE(i+0x1000) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", i+0x1000 ); + ++len32; + if ( sU32[len32] != NativeUTF32LE(i+0x10000) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", i+0x10000 ); + ++len32; + } + + fprintf ( log, " UTF8_to_UTF32LE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len8 = 0x80*(1+2+3+4); len32 = 0x80*(1+1+1+1); + + UTF8_to_UTF32LE ( sU8, 0, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF8_to_UTF32LE empty input failure, %d -> %d\n", len8x, len32x ); + UTF8_to_UTF32LE ( sU8, len8, sU32, 0, &len8x, &len32x ); + if ( (len8x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF8_to_UTF32LE empty output failure, %d -> %d\n", len8x, len32x ); + UTF8_to_UTF32LE ( sU8, 8, sU32, sizeof(sU32), &len8x, &len32x ); + if ( (len8x != 6) || (len32x != 3) ) fprintf ( log, " *** UTF8_to_UTF32LE partial input failure, %d -> %d\n", len8x, len32x ); + + fprintf ( log, " UTF8_to_UTF32LE done for empty buffers and buffers ending in mid character\n" ); + +} // Test_UTF8_to_UTF32 + +// ================================================================================================= + +static void Test_UTF16_to_UTF8 ( FILE * log ) +{ + size_t i; + size_t len16, len8, len16x, len8x; + UTF32Unit cp, cpx, cpLo, cpHi; + + // --------------------------------------------------------------------------------------- + // Test UTF16BE_to_UTF8 on good input. The CodePoint to/from functions are already tested, + // use them to verify the results here. + + fprintf ( log, "\nTesting UTF16BE_to_UTF8 on good input\n" ); + + // Test ASCII. + + cpLo = 0; cpHi = 0x80; len16 = len8 = 0x80; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test 2 byte non-ASCII inside the BMP. + + cpLo = 0x80; cpHi = 0x800; len16 = cpHi-cpLo; len8 = 2*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test 3 byte non-ASCII inside the BMP, below the surrogates. + + cpLo = 0x800; cpHi = 0xD800; len16 = cpHi-cpLo; len8 = 3*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len16 = cpHi-cpLo; len8 = 3*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len8 = (cpHi-cpLo)*4; + for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16BE ( cp, &sU16[len16], 4, &len16x ); + if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 4) || (cpx != cp) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating ASCII, non-ASCII BMP, beyond BMP. + + len8 = 0x80*(1+2+3+4); + for ( i = 0, len16 = 0; i < 0x80; ++i ) { + CodePoint_to_UTF16BE ( i, &sU16[len16], 4, &len16x ); + len16 += len16x; + CodePoint_to_UTF16BE ( i+0x100, &sU16[len16], 4, &len16x ); + len16 += len16x; + CodePoint_to_UTF16BE ( i+0x1000, &sU16[len16], 4, &len16x ); + len16 += len16x; + CodePoint_to_UTF16BE ( i+0x10000, &sU16[len16], 4, &len16x ); + len16 += len16x; + } + if ( len16 != 0x80*(1+1+1+2) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, len8 = 0; i < 0x80; ++i ) { + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 1) || (cpx != i) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", i ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 2) || (cpx != i+0x100) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", i+0x100 ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", i+0x1000 ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 4) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", i+0x10000 ); + len8 += len8x; + } + if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16BE_to_UTF8 done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len16 = 0x80*(1+1+1+2); len8 = 0x80*(1+2+3+4); + + UTF16BE_to_UTF8 ( sU16, 0, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF8 empty input failure, %d -> %d\n", len16x, len8x ); + UTF16BE_to_UTF8 ( sU16, len16, sU8, 0, &len16x, &len8x ); + if ( (len16x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF8 empty output failure, %d -> %d\n", len16x, len8x ); + UTF16BE_to_UTF8 ( sU16, 4, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF16BE_to_UTF8 partial input failure, %d -> %d\n", len16x, len8x ); + UTF16BE_to_UTF8 ( sU16, len16, sU8, 8, &len16x, &len8x ); + if ( (len16x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF16BE_to_UTF8 partial output failure, %d -> %d\n", len16x, len8x ); + + fprintf ( log, " UTF16BE_to_UTF8 done for empty buffers and buffers ending in mid character\n" ); + + // ----------------------------------- + // Test UTF16LE_to_UTF8 on good input. + + fprintf ( log, "\nTesting UTF16LE_to_UTF8 on good input\n" ); + + // Test ASCII. + + cpLo = 0; cpHi = 0x80; len16 = len8 = 0x80; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test 2 byte non-ASCII inside the BMP. + + cpLo = 0x80; cpHi = 0x800; len16 = cpHi-cpLo; len8 = 2*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test 3 byte non-ASCII inside the BMP, below the surrogates. + + cpLo = 0x800; cpHi = 0xD800; len16 = cpHi-cpLo; len8 = 3*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len16 = cpHi-cpLo; len8 = 3*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len8 = (cpHi-cpLo)*4; + for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16LE ( cp, &sU16[len16], 4, &len16x ); + if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 4) || (cpx != cp) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating ASCII, non-ASCII BMP, beyond BMP. + + len8 = 0x80*(1+2+3+4); + for ( i = 0, len16 = 0; i < 0x80; ++i ) { + CodePoint_to_UTF16LE ( i, &sU16[len16], 4, &len16x ); + len16 += len16x; + CodePoint_to_UTF16LE ( i+0x100, &sU16[len16], 4, &len16x ); + len16 += len16x; + CodePoint_to_UTF16LE ( i+0x1000, &sU16[len16], 4, &len16x ); + len16 += len16x; + CodePoint_to_UTF16LE ( i+0x10000, &sU16[len16], 4, &len16x ); + len16 += len16x; + } + if ( len16 != 0x80*(1+1+1+2) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, len8 = 0; i < 0x80; ++i ) { + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 1) || (cpx != i) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", i ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 2) || (cpx != i+0x100) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", i+0x100 ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", i+0x1000 ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 4) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", i+0x10000 ); + len8 += len8x; + } + if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF16LE_to_UTF8 done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len16 = 0x80*(1+1+1+2); len8 = 0x80*(1+2+3+4); + + UTF16LE_to_UTF8 ( sU16, 0, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF8 empty input failure, %d -> %d\n", len16x, len8x ); + UTF16LE_to_UTF8 ( sU16, len16, sU8, 0, &len16x, &len8x ); + if ( (len16x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF8 empty output failure, %d -> %d\n", len16x, len8x ); + UTF16LE_to_UTF8 ( sU16, 4, sU8, sizeof(sU8), &len16x, &len8x ); + if ( (len16x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF16LE_to_UTF8 partial input failure, %d -> %d\n", len16x, len8x ); + UTF16LE_to_UTF8 ( sU16, len16, sU8, 8, &len16x, &len8x ); + if ( (len16x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF16LE_to_UTF8 partial output failure, %d -> %d\n", len16x, len8x ); + + fprintf ( log, " UTF16LE_to_UTF8 done for empty buffers and buffers ending in mid character\n" ); + +} // Test_UTF16_to_UTF8 + +// ================================================================================================= + +static void Test_UTF32_to_UTF8 ( FILE * log ) +{ + size_t i; + size_t len32, len8, len32x, len8x; + UTF32Unit cp, cpx, cpLo, cpHi; + + // ----------------------------------- + // Test UTF32BE_to_UTF8 on good input. + + fprintf ( log, "\nTesting UTF32BE_to_UTF8 on good input\n" ); + + // Test ASCII. + + cpLo = 0; cpHi = 0x80; len32 = len8 = 0x80; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test 2 byte non-ASCII inside the BMP. + + cpLo = 0x80; cpHi = 0x800; len32 = cpHi-cpLo; len8 = 2*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test 3 byte non-ASCII inside the BMP, below the surrogates. + + cpLo = 0x800; cpHi = 0xD800; len32 = cpHi-cpLo; len8 = 3*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len32 = cpHi-cpLo; len8 = 3*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len8 = (cpHi-cpLo)*4; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 4) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating ASCII, non-ASCII BMP, beyond BMP. + + len8 = 0x80*(1+2+3+4); + for ( i = 0, len32 = 0; i < 0x80; ++i ) { + sU32[len32] = NativeUTF32BE(i); + ++len32; + sU32[len32] = NativeUTF32BE(i+0x100); + ++len32; + sU32[len32] = NativeUTF32BE(i+0x1000); + ++len32; + sU32[len32] = NativeUTF32BE(i+0x10000); + ++len32; + } + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, len8 = 0; i < 0x80; ++i ) { + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", i ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 2) || (cpx != i+0x100) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", i+0x100 ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", i+0x1000 ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 4) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", i+0x10000 ); + len8 += len8x; + } + if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32BE_to_UTF8 done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len32 = 0x80*(1+1+1+2); len8 = 0x80*(1+2+3+4); + + UTF32BE_to_UTF8 ( sU32, 0, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF8 empty input failure, %d -> %d\n", len32x, len8x ); + UTF32BE_to_UTF8 ( sU32, len32, sU8, 0, &len32x, &len8x ); + if ( (len32x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF8 empty output failure, %d -> %d\n", len32x, len8x ); + UTF32BE_to_UTF8 ( sU32, len32, sU8, 8, &len32x, &len8x ); + if ( (len32x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF32BE_to_UTF8 partial output failure, %d -> %d\n", len32x, len8x ); + + fprintf ( log, " UTF32BE_to_UTF8 done for empty buffers and buffers ending in mid character\n" ); + + // ----------------------------------- + // Test UTF32LE_to_UTF8 on good input. + + fprintf ( log, "\nTesting UTF32LE_to_UTF8 on good input\n" ); + + // Test ASCII. + + cpLo = 0; cpHi = 0x80; len32 = len8 = 0x80; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test 2 byte non-ASCII inside the BMP. + + cpLo = 0x80; cpHi = 0x800; len32 = cpHi-cpLo; len8 = 2*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test 3 byte non-ASCII inside the BMP, below the surrogates. + + cpLo = 0x800; cpHi = 0xD800; len32 = cpHi-cpLo; len8 = 3*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len32 = cpHi-cpLo; len8 = 3*(cpHi-cpLo); + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len8 = (cpHi-cpLo)*4; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) { + CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x ); + if ( (len8x != 4) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", cp ); + } + if ( i != len8 ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating ASCII, non-ASCII BMP, beyond BMP. + + len8 = 0x80*(1+2+3+4); + for ( i = 0, len32 = 0; i < 0x80; ++i ) { + sU32[len32] = NativeUTF32LE(i); + ++len32; + sU32[len32] = NativeUTF32LE(i+0x100); + ++len32; + sU32[len32] = NativeUTF32LE(i+0x1000); + ++len32; + sU32[len32] = NativeUTF32LE(i+0x10000); + ++len32; + } + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x ); + + sU8[len8] = 0xFF; + for ( i = 0, len8 = 0; i < 0x80; ++i ) { + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", i ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 2) || (cpx != i+0x100) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", i+0x100 ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 3) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", i+0x1000 ); + len8 += len8x; + CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x ); + if ( (len8x != 4) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", i+0x10000 ); + len8 += len8x; + } + if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 ); + + fprintf ( log, " UTF32LE_to_UTF8 done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len32 = 0x80*(1+1+1+2); len8 = 0x80*(1+2+3+4); + + UTF32LE_to_UTF8 ( sU32, 0, sU8, sizeof(sU8), &len32x, &len8x ); + if ( (len32x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF8 empty input failure, %d -> %d\n", len32x, len8x ); + UTF32LE_to_UTF8 ( sU32, len32, sU8, 0, &len32x, &len8x ); + if ( (len32x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF8 empty output failure, %d -> %d\n", len32x, len8x ); + UTF32LE_to_UTF8 ( sU32, len32, sU8, 8, &len32x, &len8x ); + if ( (len32x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF32LE_to_UTF8 partial output failure, %d -> %d\n", len32x, len8x ); + + fprintf ( log, " UTF32LE_to_UTF8 done for empty buffers and buffers ending in mid character\n" ); + +} // Test_UTF32_to_UTF8 + +// ================================================================================================= + +static void Test_UTF16_to_UTF32 ( FILE * log ) +{ + size_t i; + size_t len16, len32, len16x, len32x; + UTF32Unit cp, cpLo, cpHi; + + // -------------------------------------- + // Test UTF16BE_to_UTF32BE on good input. + + fprintf ( log, "\nTesting UTF16BE_to_UTF32BE on good input\n" ); + + // Test inside the BMP, below the surrogates. + + cpLo = 0; cpHi = 0xD800; len16 = len32 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32BE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16BE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len16 = len32 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32BE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16BE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; + for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16BE ( cp, &sU16[len16], 4, &len16x ); + if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32BE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16BE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating BMP, beyond BMP. + + len16 = 0x8000*(1+2); len32 = 0x8000*(1+1); + for ( i = 0, len16 = 0; i < 0x8000; ++i ) { + CodePoint_to_UTF16BE ( i, &sU16[len16], 8, &len16x ); + len16 += len16x; + CodePoint_to_UTF16BE ( i+0x10000, &sU16[len16], 8, &len16x ); + len16 += len16x; + } + if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, len32 = 0; i < 0x8000; ++i ) { + if ( sU32[len32] != NativeUTF32BE(i) ) fprintf ( log, " *** UTF16BE_to_UTF32BE failure for U+%.4X\n", i ); + ++len32; + if ( sU32[len32] != NativeUTF32BE(i+0x10000) ) fprintf ( log, " *** UTF16BE_to_UTF32BE failure for U+%.4X\n", i+0x10000 ); + ++len32; + } + + fprintf ( log, " UTF16BE_to_UTF32BE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len16 = 0x8000*(1+2); len32 = 0x8000*(1+1); + + UTF16BE_to_UTF32BE ( sU16, 0, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF32BE empty input failure, %d -> %d\n", len16x, len32x ); + UTF16BE_to_UTF32BE ( sU16, len16, sU32, 0, &len16x, &len32x ); + if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF32BE empty output failure, %d -> %d\n", len16x, len32x ); + UTF16BE_to_UTF32BE ( sU16, 5, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16x != 4) || (len32x != 3) ) fprintf ( log, " *** UTF16BE_to_UTF32BE partial input failure, %d -> %d\n", len16x, len32x ); + + fprintf ( log, " UTF16BE_to_UTF32BE done for empty buffers and buffers ending in mid character\n" ); + + // -------------------------------------- + // Test UTF16LE_to_UTF32LE on good input. + + fprintf ( log, "\nTesting UTF16LE_to_UTF32LE on good input\n" ); + + // Test inside the BMP, below the surrogates. + + cpLo = 0; cpHi = 0xD800; len16 = len32 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32LE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16LE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len16 = len32 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32LE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16LE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; + for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16LE ( cp, &sU16[len16], 4, &len16x ); + if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32LE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16LE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating BMP, beyond BMP. + + len16 = 0x8000*(1+2); len32 = 0x8000*(1+1); + for ( i = 0, len16 = 0; i < 0x8000; ++i ) { + CodePoint_to_UTF16LE ( i, &sU16[len16], 8, &len16x ); + len16 += len16x; + CodePoint_to_UTF16LE ( i+0x10000, &sU16[len16], 8, &len16x ); + len16 += len16x; + } + if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, len32 = 0; i < 0x8000; ++i ) { + if ( sU32[len32] != NativeUTF32LE(i) ) fprintf ( log, " *** UTF16LE_to_UTF32LE failure for U+%.4X\n", i ); + ++len32; + if ( sU32[len32] != NativeUTF32LE(i+0x10000) ) fprintf ( log, " *** UTF16LE_to_UTF32LE failure for U+%.4X\n", i+0x10000 ); + ++len32; + } + + fprintf ( log, " UTF16LE_to_UTF32LE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len16 = 0x8000*(1+2); len32 = 0x8000*(1+1); + + UTF16LE_to_UTF32LE ( sU16, 0, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF32LE empty input failure, %d -> %d\n", len16x, len32x ); + UTF16LE_to_UTF32LE ( sU16, len16, sU32, 0, &len16x, &len32x ); + if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF32LE empty output failure, %d -> %d\n", len16x, len32x ); + UTF16LE_to_UTF32LE ( sU16, 5, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16x != 4) || (len32x != 3) ) fprintf ( log, " *** UTF16LE_to_UTF32LE partial input failure, %d -> %d\n", len16x, len32x ); + + fprintf ( log, " UTF16LE_to_UTF32LE done for empty buffers and buffers ending in mid character\n" ); + + // -------------------------------------- + // Test UTF16BE_to_UTF32LE on good input. + + fprintf ( log, "\nTesting UTF16BE_to_UTF32LE on good input\n" ); + + // Test inside the BMP, below the surrogates. + + cpLo = 0; cpHi = 0xD800; len16 = len32 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32LE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16BE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len16 = len32 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32LE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16BE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; + for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16BE ( cp, &sU16[len16], 4, &len16x ); + if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32LE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16BE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating BMP, beyond BMP. + + len16 = 0x8000*(1+2); len32 = 0x8000*(1+1); + for ( i = 0, len16 = 0; i < 0x8000; ++i ) { + CodePoint_to_UTF16BE ( i, &sU16[len16], 8, &len16x ); + len16 += len16x; + CodePoint_to_UTF16BE ( i+0x10000, &sU16[len16], 8, &len16x ); + len16 += len16x; + } + if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + + UTF16BE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, len32 = 0; i < 0x8000; ++i ) { + if ( sU32[len32] != NativeUTF32LE(i) ) fprintf ( log, " *** UTF16BE_to_UTF32LE failure for U+%.4X\n", i ); + ++len32; + if ( sU32[len32] != NativeUTF32LE(i+0x10000) ) fprintf ( log, " *** UTF16BE_to_UTF32LE failure for U+%.4X\n", i+0x10000 ); + ++len32; + } + + fprintf ( log, " UTF16BE_to_UTF32LE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len16 = 0x8000*(1+2); len32 = 0x8000*(1+1); + + UTF16BE_to_UTF32LE ( sU16, 0, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF32LE empty input failure, %d -> %d\n", len16x, len32x ); + UTF16BE_to_UTF32LE ( sU16, len16, sU32, 0, &len16x, &len32x ); + if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF32LE empty output failure, %d -> %d\n", len16x, len32x ); + UTF16BE_to_UTF32LE ( sU16, 5, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16x != 4) || (len32x != 3) ) fprintf ( log, " *** UTF16BE_to_UTF32LE partial input failure, %d -> %d\n", len16x, len32x ); + + fprintf ( log, " UTF16BE_to_UTF32LE done for empty buffers and buffers ending in mid character\n" ); + + // -------------------------------------- + // Test UTF16LE_to_UTF32BE on good input. + + fprintf ( log, "\nTesting UTF16LE_to_UTF32BE on good input\n" ); + + // Test inside the BMP, below the surrogates. + + cpLo = 0; cpHi = 0xD800; len16 = len32 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32BE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16LE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len16 = len32 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp)); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32BE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16LE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; + for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16LE ( cp, &sU16[len16], 4, &len16x ); + if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) { + if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32BE failure for U+%.4X\n", cp ); + } + + fprintf ( log, " UTF16LE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating BMP, beyond BMP. + + len16 = 0x8000*(1+2); len32 = 0x8000*(1+1); + for ( i = 0, len16 = 0; i < 0x8000; ++i ) { + CodePoint_to_UTF16LE ( i, &sU16[len16], 8, &len16x ); + len16 += len16x; + CodePoint_to_UTF16LE ( i+0x10000, &sU16[len16], 8, &len16x ); + len16 += len16x; + } + if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 ); + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + + UTF16LE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x ); + + sU32[len32] = 0xFFFFFFFF; + for ( i = 0, len32 = 0; i < 0x8000; ++i ) { + if ( sU32[len32] != NativeUTF32BE(i) ) fprintf ( log, " *** UTF16LE_to_UTF32BE failure for U+%.4X\n", i ); + ++len32; + if ( sU32[len32] != NativeUTF32BE(i+0x10000) ) fprintf ( log, " *** UTF16LE_to_UTF32BE failure for U+%.4X\n", i+0x10000 ); + ++len32; + } + + fprintf ( log, " UTF16LE_to_UTF32BE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len16 = 0x8000*(1+2); len32 = 0x8000*(1+1); + + UTF16LE_to_UTF32BE ( sU16, 0, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF32BE empty input failure, %d -> %d\n", len16x, len32x ); + UTF16LE_to_UTF32BE ( sU16, len16, sU32, 0, &len16x, &len32x ); + if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF32BE empty output failure, %d -> %d\n", len16x, len32x ); + UTF16LE_to_UTF32BE ( sU16, 5, sU32, sizeof(sU32), &len16x, &len32x ); + if ( (len16x != 4) || (len32x != 3) ) fprintf ( log, " *** UTF16LE_to_UTF32BE partial input failure, %d -> %d\n", len16x, len32x ); + + fprintf ( log, " UTF16LE_to_UTF32BE done for empty buffers and buffers ending in mid character\n" ); + +} // Test_UTF16_to_UTF32 + +// ================================================================================================= + +static void Test_UTF32_to_UTF16 ( FILE * log ) +{ + size_t i; + size_t len32, len16, len32x, len16x; + UTF32Unit cp, cpx, cpLo, cpHi; + + // -------------------------------------- + // Test UTF32BE_to_UTF16BE on good input. + + fprintf ( log, "\nTesting UTF32BE_to_UTF16BE on good input\n" ); + + // Test inside the BMP, below the surrogates. + + cpLo = 0; cpHi = 0xD800; len32 = len16 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16BE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32BE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len32 = len16 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16BE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32BE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len16 = (cpHi-cpLo)*2; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16BE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32BE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating BMP, beyond BMP. + + len32 = 0x8000*(1+1); len16 = 0x8000*(1+2); + for ( i = 0, len32 = 0; i < 0x8000; ++i ) { + sU32[len32] = NativeUTF32BE(i); + ++len32; + sU32[len32] = NativeUTF32BE(i+0x10000); + ++len32; + } + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, len16 = 0; i < 0x8000; ++i ) { + CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32BE_to_UTF16BE failure for U+%.4X\n", i ); + len16 += len16x; + CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32BE_to_UTF16BE failure for U+%.4X\n", i ); + len16 += len16x; + } + if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** UTF32BE_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32BE_to_UTF16BE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len32 = 0x8000*(1+1); len16 = 0x8000*(1+2); + + UTF32BE_to_UTF16BE ( sU32, 0, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF16BE empty input failure, %d -> %d\n", len32x, len16x ); + UTF32BE_to_UTF16BE ( sU32, len32, sU16, 0, &len32x, &len16x ); + if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF16BE empty output failure, %d -> %d\n", len32x, len16x ); + UTF32BE_to_UTF16BE ( sU32, len32, sU16, 5, &len32x, &len16x ); + if ( (len32x != 3) || (len16x != 4) ) fprintf ( log, " *** UTF32BE_to_UTF16BE partial output failure, %d -> %d\n", len32x, len16x ); + + fprintf ( log, " UTF32BE_to_UTF16BE done for empty buffers and buffers ending in mid character\n" ); + +// ================================================================================================= + + // -------------------------------------- + // Test UTF32LE_to_UTF16LE on good input. + + fprintf ( log, "\nTesting UTF32LE_to_UTF16LE on good input\n" ); + + // Test inside the BMP, below the surrogates. + + cpLo = 0; cpHi = 0xD800; len32 = len16 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16LE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32LE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len32 = len16 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16LE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32LE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len16 = (cpHi-cpLo)*2; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16LE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32LE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating BMP, beyond BMP. + + len32 = 0x8000*(1+1); len16 = 0x8000*(1+2); + for ( i = 0, len32 = 0; i < 0x8000; ++i ) { + sU32[len32] = NativeUTF32LE(i); + ++len32; + sU32[len32] = NativeUTF32LE(i+0x10000); + ++len32; + } + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, len16 = 0; i < 0x8000; ++i ) { + CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32LE_to_UTF16LE failure for U+%.4X\n", i ); + len16 += len16x; + CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32LE_to_UTF16LE failure for U+%.4X\n", i ); + len16 += len16x; + } + if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** UTF32LE_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32LE_to_UTF16LE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len32 = 0x8000*(1+1); len16 = 0x8000*(1+2); + + UTF32LE_to_UTF16LE ( sU32, 0, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF16LE empty input failure, %d -> %d\n", len32x, len16x ); + UTF32LE_to_UTF16LE ( sU32, len32, sU16, 0, &len32x, &len16x ); + if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF16LE empty output failure, %d -> %d\n", len32x, len16x ); + UTF32LE_to_UTF16LE ( sU32, len32, sU16, 5, &len32x, &len16x ); + if ( (len32x != 3) || (len16x != 4) ) fprintf ( log, " *** UTF32LE_to_UTF16LE partial output failure, %d -> %d\n", len32x, len16x ); + + fprintf ( log, " UTF32LE_to_UTF16LE done for empty buffers and buffers ending in mid character\n" ); + +// ================================================================================================= + + // -------------------------------------- + // Test UTF32BE_to_UTF16LE on good input. + + fprintf ( log, "\nTesting UTF32BE_to_UTF16LE on good input\n" ); + + // Test inside the BMP, below the surrogates. + + cpLo = 0; cpHi = 0xD800; len32 = len16 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16LE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32BE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len32 = len16 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16LE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32BE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len16 = (cpHi-cpLo)*2; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16LE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32BE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating BMP, beyond BMP. + + len32 = 0x8000*(1+1); len16 = 0x8000*(1+2); + for ( i = 0, len32 = 0; i < 0x8000; ++i ) { + sU32[len32] = NativeUTF32BE(i); + ++len32; + sU32[len32] = NativeUTF32BE(i+0x10000); + ++len32; + } + sU32[len32] = 0xFFFFFFFF; + + UTF32BE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate. + for ( i = 0, len16 = 0; i < 0x8000; ++i ) { + CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32BE_to_UTF16LE failure for U+%.4X\n", i ); + len16 += len16x; + CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32BE_to_UTF16LE failure for U+%.4X\n", i ); + len16 += len16x; + } + if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** UTF32BE_to_UTF16LE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32BE_to_UTF16LE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len32 = 0x8000*(1+1); len16 = 0x8000*(1+2); + + UTF32BE_to_UTF16LE ( sU32, 0, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF16LE empty input failure, %d -> %d\n", len32x, len16x ); + UTF32BE_to_UTF16LE ( sU32, len32, sU16, 0, &len32x, &len16x ); + if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF16LE empty output failure, %d -> %d\n", len32x, len16x ); + UTF32BE_to_UTF16LE ( sU32, len32, sU16, 5, &len32x, &len16x ); + if ( (len32x != 3) || (len16x != 4) ) fprintf ( log, " *** UTF32BE_to_UTF16LE partial output failure, %d -> %d\n", len32x, len16x ); + + fprintf ( log, " UTF32BE_to_UTF16LE done for empty buffers and buffers ending in mid character\n" ); + +// ================================================================================================= + + // -------------------------------------- + // Test UTF32LE_to_UTF16BE on good input. + + fprintf ( log, "\nTesting UTF32LE_to_UTF16BE on good input\n" ); + + // Test inside the BMP, below the surrogates. + + cpLo = 0; cpHi = 0xD800; len32 = len16 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16BE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32LE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test inside the BMP, above the surrogates. + + cpLo = 0xE000; cpHi = 0x10000; len32 = len16 = cpHi-cpLo; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16BE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32LE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test outside the BMP. + + cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len16 = (cpHi-cpLo)*2; + for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp); + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) { + CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16BE failure for U+%.4X\n", cp ); + } + if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32LE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 ); + + // Test alternating BMP, beyond BMP. + + len32 = 0x8000*(1+1); len16 = 0x8000*(1+2); + for ( i = 0, len32 = 0; i < 0x8000; ++i ) { + sU32[len32] = NativeUTF32LE(i); + ++len32; + sU32[len32] = NativeUTF32LE(i+0x10000); + ++len32; + } + sU32[len32] = 0xFFFFFFFF; + + UTF32LE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x ); + + sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate. + for ( i = 0, len16 = 0; i < 0x8000; ++i ) { + CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32LE_to_UTF16BE failure for U+%.4X\n", i ); + len16 += len16x; + CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x ); + if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32LE_to_UTF16BE failure for U+%.4X\n", i ); + len16 += len16x; + } + if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** UTF32LE_to_UTF16BE consume failure, %d != %d\n", i, len16 ); + + fprintf ( log, " UTF32LE_to_UTF16BE done for mixed values\n" ); + + // Test empty buffers and buffers ending in mid character. + + len32 = 0x8000*(1+1); len16 = 0x8000*(1+2); + + UTF32LE_to_UTF16BE ( sU32, 0, sU16, sizeof(sU16), &len32x, &len16x ); + if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF16BE empty input failure, %d -> %d\n", len32x, len16x ); + UTF32LE_to_UTF16BE ( sU32, len32, sU16, 0, &len32x, &len16x ); + if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF16BE empty output failure, %d -> %d\n", len32x, len16x ); + UTF32LE_to_UTF16BE ( sU32, len32, sU16, 5, &len32x, &len16x ); + if ( (len32x != 3) || (len16x != 4) ) fprintf ( log, " *** UTF32LE_to_UTF16BE partial output failure, %d -> %d\n", len32x, len16x ); + + fprintf ( log, " UTF32LE_to_UTF16BE done for empty buffers and buffers ending in mid character\n" ); + +} // Test_UTF32_to_UTF16 + +// ================================================================================================= + +static void DoTest ( FILE * log ) +{ + InitializeUnicodeConversions(); + + Test_SwappingPrimitives ( log ); + + Test_CodePoint_to_UTF8 ( log ); + Test_CodePoint_from_UTF8 ( log ); + + Test_CodePoint_to_UTF16 ( log ); + Test_CodePoint_from_UTF16 ( log ); + + Test_UTF8_to_UTF16 ( log ); + Test_UTF8_to_UTF32 ( log ); + + Test_UTF16_to_UTF8 ( log ); + Test_UTF32_to_UTF8 ( log ); + + Test_UTF16_to_UTF32 ( log ); + Test_UTF32_to_UTF16 ( log ); + +} // DoTest + +// ================================================================================================= + +extern "C" int main ( void ) +{ + char buffer [1000]; + + #if !XMP_AutomatedTestBuild + FILE * log = stdout; + #else + FILE * log = fopen ( "TestUnicode.out", "wb" ); + #endif + + time_t now; + time ( &now ); + sprintf ( buffer, "// Starting test for Unicode conversion correctness, %s", ctime ( &now ) ); + + fprintf ( log, "// " ); + for ( size_t i = 4; i < strlen(buffer); ++i ) fprintf ( log, "=" ); + fprintf ( log, "\n%s", buffer ); + fprintf ( log, "// Native %s endian\n", (kBigEndianHost ? "big" : "little") ); + + try { + + DoTest ( log ); + + } catch ( ... ) { + + fprintf ( log, "\n## Caught unexpected exception\n" ); + return -1; + + } + + time ( &now ); + sprintf ( buffer, "// Finished test for Unicode conversion correctness, %s", ctime ( &now ) ); + + fprintf ( log, "\n// " ); + for ( size_t i = 4; i < strlen(buffer); ++i ) fprintf ( log, "=" ); + fprintf ( log, "\n%s\n", buffer ); + + fclose ( log ); + return 0; + +} diff --git a/samples/source/UnicodeParseSerialize.cpp b/samples/source/UnicodeParseSerialize.cpp new file mode 100644 index 0000000..c3c9865 --- /dev/null +++ b/samples/source/UnicodeParseSerialize.cpp @@ -0,0 +1,510 @@ +// ================================================================================================= +// +// A thorough test for UTF-16 and UTF-32 serialization and parsing. It assumes the basic Unicode +// conversion functions are working - they have their own exhaustive test. +// +// ================================================================================================= + +#include <cstdio> +#include <vector> +#include <string> +#include <cstring> +#include <iostream> +#include <iomanip> +#include <fstream> +#include <ctime> + +#include <cstdlib> +#include <cerrno> +#include <stdexcept> +#include <cassert> + +#define TXMP_STRING_TYPE std::string +#include "XMP.hpp" +#include "XMP.incl_cpp" + +#include "source/EndianUtils.hpp" +#include "source/UnicodeConversions.hpp" +#include "source/UnicodeConversions.cpp" + +using namespace std; + +#if WIN_ENV + #pragma warning ( disable : 4701 ) // local variable may be used without having been initialized +#endif + +// ================================================================================================= + +#define IncludeUTF32 0 // *** UTF-32 parsing isn't working at the moment, Expat seems to not handle it. + +#define kCodePointCount 0x110000 + +UTF8Unit sU8 [kCodePointCount*4 + 8]; +UTF16Unit sU16 [kCodePointCount*2 + 4]; +UTF32Unit sU32 [kCodePointCount + 2]; + +static FILE * sLogFile; + +static const char * kNS1 = "ns:test1/"; + +static const char * kSimpleRDF = + "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>" + " <rdf:Description rdf:about='Test:kSimpleRDF/' xmlns:ns1='ns:test1/' xmlns:ns2='ns:test2/'>" + "" + " <ns1:SimpleProp>Simple value</ns1:SimpleProp>" + "" + " <ns1:ArrayProp>" + " <rdf:Bag>" + " <rdf:li>Item1 value</rdf:li>" + " <rdf:li>Item2 value</rdf:li>" + " </rdf:Bag>" + " </ns1:ArrayProp>" + "" + " <ns1:StructProp rdf:parseType='Resource'>" + " <ns2:Field1>Field1 value</ns2:Field1>" + " <ns2:Field2>Field2 value</ns2:Field2>" + " </ns1:StructProp>" + "" + " <ns1:QualProp rdf:parseType='Resource'>" + " <rdf:value>Prop value</rdf:value>" + " <ns2:Qual>Qual value</ns2:Qual>" + " </ns1:QualProp>" + "" + " <ns1:AltTextProp>" + " <rdf:Alt>" + " <rdf:li xml:lang='x-one'>x-one value</rdf:li>" + " <rdf:li xml:lang='x-two'>x-two value</rdf:li>" + " </rdf:Alt>" + " </ns1:AltTextProp>" + "" + " <ns1:ArrayOfStructProp>" + " <rdf:Bag>" + " <rdf:li rdf:parseType='Resource'>" + " <ns2:Field1>Item-1</ns2:Field1>" + " <ns2:Field2>Field 1.2 value</ns2:Field2>" + " </rdf:li>" + " <rdf:li rdf:parseType='Resource'>" + " <ns2:Field1>Item-2</ns2:Field1>" + " <ns2:Field2>Field 2.2 value</ns2:Field2>" + " </rdf:li>" + " </rdf:Bag>" + " </ns1:ArrayOfStructProp>" + "" + " </rdf:Description>" + "</rdf:RDF>"; + +// ================================================================================================= + +static XMP_Status DumpToString ( void * refCon, XMP_StringPtr outStr, XMP_StringLen outLen ) +{ + std::string * dumpString = static_cast < std::string * > ( refCon ); + dumpString->append ( outStr, outLen ); + return 0; +} + +// ================================================================================================= + +static XMP_Status DumpToFile ( void * refCon, XMP_StringPtr outStr, XMP_StringLen outLen ) +{ + FILE * outFile = static_cast < FILE * > ( refCon ); + fwrite ( outStr, 1, outLen, outFile ); + return 0; +} + +// ================================================================================================= + +static void PrintXMPErrorInfo ( const XMP_Error & excep, const char * title ) +{ + XMP_Int32 id = excep.GetID(); + const char * message = excep.GetErrMsg(); + fprintf ( sLogFile, "%s\n", title ); + fprintf ( sLogFile, " #%d : %s\n", id, message ); +} + +// ================================================================================================= + +static void FullUnicodeParse ( FILE * log, const char * encoding, size_t bufferSize, + const std::string & packet, const std::string & fullUnicode ) +{ + if ( bufferSize > sizeof(sU32) ) { + fprintf ( log, "#ERROR: FullUnicodeParse buffer overrun for %s, %d byte buffers\n", encoding, bufferSize ); + return; + } + + SXMPMeta meta; + try { + memset ( sU32, -1, sizeof(sU32) ); + for ( size_t i = 0; i < packet.size(); i += bufferSize ) { + size_t count = bufferSize; + if ( count > (packet.size() - i) ) count = packet.size() - i; + memcpy ( sU32, &packet[i], count ); + meta.ParseFromBuffer ( XMP_StringPtr(sU32), count, kXMP_ParseMoreBuffers ); + } + meta.ParseFromBuffer ( XMP_StringPtr(sU32), 0 ); + } catch ( XMP_Error& excep ) { + char message [200]; + sprintf ( message, "#ERROR: Full Unicode parsing error for %s, %d byte buffers", encoding, bufferSize ); + PrintXMPErrorInfo ( excep, message ); + return; + } + + std::string value; + bool found = meta.GetProperty ( kNS1, "FullUnicode", &value, 0 ); + if ( (! found) || (value != fullUnicode) ) fprintf ( log, "#ERROR: Failed to get full Unicode value for %s, %d byte buffers\n", encoding, bufferSize ); + +} // FullUnicodeParse + +// ================================================================================================= + +static void DoTest ( FILE * log ) +{ + SXMPMeta meta; + size_t u8Count, u32Count; + SXMPMeta meta8, meta16b, meta16l, meta32b, meta32l; + std::string u8Packet, u16bPacket, u16lPacket, u32bPacket, u32lPacket; + + InitializeUnicodeConversions(); + + // --------------------------------------------------------------------------------------------- + + fprintf ( log, "// ------------------------------------------------\n" ); + fprintf ( log, "// Test basic serialization and parsing using ASCII\n\n" ); + + // ---------------------------------------------------- + // Create basic ASCII packets in each of the encodings. + + meta.ParseFromBuffer ( kSimpleRDF, kXMP_UseNullTermination ); + + meta.SerializeToBuffer ( &u8Packet, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF8) ); + meta.SerializeToBuffer ( &u16bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Big) ); + meta.SerializeToBuffer ( &u16lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Little) ); + meta.SerializeToBuffer ( &u32bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Big) ); + meta.SerializeToBuffer ( &u32lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Little) ); + + #if 0 + FILE* dump; + dump = fopen ( "u8Packet.txt", "w" ); + fwrite ( u8Packet.c_str(), 1, u8Packet.size(), dump ); + fclose ( dump ); + dump = fopen ( "u16bPacket.txt", "w" ); + fwrite ( u16bPacket.c_str(), 1, u16bPacket.size(), dump ); + fclose ( dump ); + dump = fopen ( "u16lPacket.txt", "w" ); + fwrite ( u16lPacket.c_str(), 1, u16lPacket.size(), dump ); + fclose ( dump ); + dump = fopen ( "u32bPacket.txt", "w" ); + fwrite ( u32bPacket.c_str(), 1, u32bPacket.size(), dump ); + fclose ( dump ); + dump = fopen ( "u32lPacket.txt", "w" ); + fwrite ( u32lPacket.c_str(), 1, u32lPacket.size(), dump ); + fclose ( dump ); + #endif + + // Verify the character form. The conversion functions are tested separately. + + const char * ptr; + + ptr = u8Packet.c_str(); + fprintf ( log, "UTF-8 : %d : %.2X %.2X \"%.10s...\"\n", u8Packet.size(), *ptr, *(ptr+1), ptr ); + + ptr = u16bPacket.c_str(); + fprintf ( log, "UTF-16BE : %d : %.2X %.2X %.2X\n", u16bPacket.size(), *ptr, *(ptr+1), *(ptr+2) ); + ptr = u16lPacket.c_str(); + fprintf ( log, "UTF-16LE : %d : %.2X %.2X %.2X\n", u16lPacket.size(), *ptr, *(ptr+1), *(ptr+2) ); + + ptr = u32bPacket.c_str(); + fprintf ( log, "UTF-32BE : %d : %.2X %.2X %.2X %.2X %.2X\n", u32bPacket.size(), *ptr, *(ptr+1), *(ptr+2), *(ptr+3), *(ptr+4) ); + ptr = u32lPacket.c_str(); + fprintf ( log, "UTF-32LE : %d : %.2X %.2X %.2X %.2X %.2X\n", u32lPacket.size(), *ptr, *(ptr+1), *(ptr+2), *(ptr+3), *(ptr+4) ); + + fprintf ( log, "\nBasic serialization tests done\n" ); + + // ------------------------------------------------- + // Verify round trip reparsing of the basic packets. + + std::string origDump, rtDump; + + meta.DumpObject ( DumpToString, &origDump ); + fprintf ( log, "Original dump\n%s\n", origDump.c_str() ); + + try { + meta8.ParseFromBuffer ( u8Packet.c_str(), u8Packet.size() ); + meta16b.ParseFromBuffer ( u16bPacket.c_str(), u16bPacket.size() ); + meta16l.ParseFromBuffer ( u16lPacket.c_str(), u16lPacket.size() ); + meta32b.ParseFromBuffer ( u32bPacket.c_str(), u32bPacket.size() ); + meta32l.ParseFromBuffer ( u32lPacket.c_str(), u32lPacket.size() ); + } catch ( XMP_Error& excep ) { + PrintXMPErrorInfo ( excep, "## Caught reparsing exception" ); + fprintf ( log, "\n" ); + } + + #if 0 + fprintf ( log, "After UTF-8 roundtrip\n" ); + meta8.DumpObject ( DumpToFile, log ); + fprintf ( log, "\nAfter UTF-16 BE roundtrip\n" ); + meta16b.DumpObject ( DumpToFile, log ); + fprintf ( log, "\nAfter UTF-16 LE roundtrip\n" ); + meta16l.DumpObject ( DumpToFile, log ); + fprintf ( log, "\nAfter UTF-32 BE roundtrip\n" ); + meta32b.DumpObject ( DumpToFile, log ); + fprintf ( log, "\nAfter UTF-32 LE roundtrip\n" ); + meta32l.DumpObject ( DumpToFile, log ); + #endif + + rtDump.clear(); + meta8.DumpObject ( DumpToString, &rtDump ); + if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-8\n%s\n", rtDump.c_str() ); + + rtDump.clear(); + meta16b.DumpObject ( DumpToString, &rtDump ); + if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-16BE\n%s\n", rtDump.c_str() ); + + rtDump.clear(); + meta16l.DumpObject ( DumpToString, &rtDump ); + if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-16LE\n%s\n", rtDump.c_str() ); + + #if IncludeUTF32 + + rtDump.clear(); + meta32b.DumpObject ( DumpToString, &rtDump ); + if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-32BE\n%s\n", rtDump.c_str() ); + + rtDump.clear(); + meta32l.DumpObject ( DumpToString, &rtDump ); + if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-32LE\n%s\n", rtDump.c_str() ); + + #endif + + fprintf ( log, "Basic round-trip parsing tests done\n\n" ); + + // --------------------------------------------------------------------------------------------- + + fprintf ( log, "// --------------------------------------------------\n" ); + fprintf ( log, "// Test parse buffering logic using full Unicode data\n\n" ); + + // -------------------------------------------------------------------------------------------- + // Construct the packets to parse in all encodings. There is just one property with a value + // containing all of the Unicode representations. This isn't all of the Unicode characters, but + // is more than enough to establish correctness of the buffering logic. It is almost everything + // in the BMP, plus the range U+100000..U+10FFFF beyond the BMP. Doing all Unicode characters + // takes far to long to execute and does not provide additional confidence. Skip ASCII controls, + // they are not allowed in XML and get changed to spaces by SetProperty. Skip U+FFFE and U+FFFF, + // the expat parser rejects them. + + #define kTab 0x09 + #define kLF 0x0A + #define kCR 0x0D + + size_t i; + UTF32Unit cp; + sU32[0] = kTab; sU32[1] = kLF; sU32[2] = kCR; + for ( i = 3, cp = 0x20; cp < 0x7F; ++i, ++cp ) sU32[i] = cp; + for ( cp = 0x80; cp < 0xD800; ++i, ++cp ) sU32[i] = cp; + for ( cp = 0xE000; cp < 0xFFFE; ++i, ++cp ) sU32[i] = cp; + for ( cp = 0x100000; cp < 0x110000; ++i, ++cp ) sU32[i] = cp; + u32Count = i; + assert ( u32Count == (3 + (0x7F-0x20) + (0xD800-0x80) + (0xFFFE - 0xE000) + (0x110000-0x100000)) ); + + if ( kBigEndianHost ) { + UTF32BE_to_UTF8 ( sU32, u32Count, sU8, sizeof(sU8), &i, &u8Count ); + } else { + UTF32LE_to_UTF8 ( sU32, u32Count, sU8, sizeof(sU8), &i, &u8Count ); + } + if ( i != u32Count ) fprintf ( log, "#ERROR: Failed to convert full UTF-32 buffer\n" ); + assert ( u8Count == (3 + (0x7F-0x20) + 2*(0x800-0x80) + 3*(0xD800-0x800) + 3*(0xFFFE - 0xE000) + 4*(0x110000-0x100000)) ); + sU8[u8Count] = 0; + + std::string fullUnicode; + SXMPUtils::RemoveProperties ( &meta, "", "", kXMPUI_DoAllProperties ); + meta.SetProperty ( kNS1, "FullUnicode", XMP_StringPtr(sU8) ); + meta.GetProperty ( kNS1, "FullUnicode", &fullUnicode, 0 ); + if ( (fullUnicode.size() != u8Count) || (fullUnicode != XMP_StringPtr(sU8)) ) { + fprintf ( log, "#ERROR: Failed to set full UTF-8 value\n" ); + if ( (fullUnicode.size() != u8Count) ) { + fprintf ( log, " Size mismatch, want %d, got %d\n", u8Count, fullUnicode.size() ); + } else { + for ( size_t b = 0; b < u8Count; ++b ) { + if ( fullUnicode[b] != sU8[b] ) fprintf ( log, " Byte mismatch at %d\n", b ); + } + } + } + + u8Packet.clear(); + u16bPacket.clear(); + u16lPacket.clear(); + u32bPacket.clear(); + u32lPacket.clear(); + + meta.SerializeToBuffer ( &u8Packet, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF8) ); + meta.SerializeToBuffer ( &u16bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Big) ); + meta.SerializeToBuffer ( &u16lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Little) ); + #if IncludeUTF32 + meta.SerializeToBuffer ( &u32bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Big) ); + meta.SerializeToBuffer ( &u32lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Little) ); + #endif + + // --------------------------------------------------------------------- + // Parse the whole packet as a sanity check, then at a variety of sizes. + + FullUnicodeParse ( log, "UTF-8", u8Packet.size(), u8Packet, fullUnicode ); + FullUnicodeParse ( log, "UTF-16BE", u16bPacket.size(), u16bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-16LE", u16lPacket.size(), u16lPacket, fullUnicode ); + #if IncludeUTF32 + FullUnicodeParse ( log, "UTF-32BE", u32bPacket.size(), u32bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-32LE", u32lPacket.size(), u32lPacket, fullUnicode ); + #endif + fprintf ( log, "Full packet, no BOM, buffered parsing tests done\n" ); + +#if 0 // Skip the partial buffer tests, there seem to be problems, but no client uses partial buffers. + + for ( i = 1; i <= 3; ++i ) { + FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode ); + FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode ); + #if IncludeUTF32 + FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode ); + #endif + fprintf ( log, "%d byte buffers, no BOM, buffered parsing tests done\n", i ); + } + + for ( i = 4; i <= 16; i *= 2 ) { + FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode ); + FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode ); + #if IncludeUTF32 + FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode ); + #endif + fprintf ( log, "%d byte buffers, no BOM, buffered parsing tests done\n", i ); + } + +#endif + + fprintf ( log, "\n" ); + + // ----------------------------------------------------------------------- + // Redo the buffered parsing tests, now with a leading BOM in the packets. + + u8Packet.insert ( 0, "\xEF\xBB\xBF", 3 ); + + UTF32Unit NatBOM = 0x0000FEFF; + UTF32Unit SwapBOM = 0xFFFE0000; + + if ( kBigEndianHost ) { + u16bPacket.insert ( 0, XMP_StringPtr(&NatBOM)+2, 2 ); + u16lPacket.insert ( 0, XMP_StringPtr(&SwapBOM), 2 ); + u32bPacket.insert ( 0, XMP_StringPtr(&NatBOM), 4 ); + u32lPacket.insert ( 0, XMP_StringPtr(&SwapBOM), 4 ); + } else { + u16lPacket.insert ( 0, XMP_StringPtr(&NatBOM), 2 ); + u16bPacket.insert ( 0, XMP_StringPtr(&SwapBOM)+2, 2 ); + u32lPacket.insert ( 0, XMP_StringPtr(&NatBOM), 4 ); + u32bPacket.insert ( 0, XMP_StringPtr(&SwapBOM), 4 ); + } + + FullUnicodeParse ( log, "UTF-8", u8Packet.size(), u8Packet, fullUnicode ); + FullUnicodeParse ( log, "UTF-16BE", u16bPacket.size(), u16bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-16LE", u16lPacket.size(), u16lPacket, fullUnicode ); + #if IncludeUTF32 + FullUnicodeParse ( log, "UTF-32BE", u32bPacket.size(), u32bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-32LE", u32lPacket.size(), u32lPacket, fullUnicode ); + #endif + fprintf ( log, "Full packet, leading BOM, buffered parsing tests done\n" ); + +#if 0 // Skip the partial buffer tests, there seem to be problems, but no client uses partial buffers. + + for ( i = 1; i <= 3; ++i ) { + FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode ); + FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode ); + #if IncludeUTF32 + FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode ); + #endif + fprintf ( log, "%d byte buffers, leading BOM, buffered parsing tests done\n", i ); + } + + for ( i = 4; i <= 16; i *= 2 ) { + FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode ); + FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode ); + #if IncludeUTF32 + FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode ); + FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode ); + #endif + fprintf ( log, "%d byte buffers, leading BOM, buffered parsing tests done\n", i ); + } + +#endif + + fprintf ( log, "\n" ); + +} // DoTest + +// ================================================================================================= + +extern "C" int main ( void ) +{ + int result = 0; + char buffer [1000]; + + sLogFile = stdout; + + time_t now; + time ( &now ); + sprintf ( buffer, "// Starting test for UTF-16 and UTF-32 serialization and parsing, %s", ctime ( &now ) ); + + fprintf ( sLogFile, "// " ); + for ( int i = 4; i < strlen(buffer); ++i ) fprintf ( sLogFile, "=" ); + fprintf ( sLogFile, "\n%s", buffer ); + + fprintf ( sLogFile, "// =====================================================================================\n" ); + fprintf ( sLogFile, "// A thorough test for UTF-16 and UTF-32 serialization and parsing. It assumes the basic\n" ); + fprintf ( sLogFile, "// Unicode conversion functions are working - they have their own exhaustive test.\n\n" ); + + #if ! IncludeUTF32 + fprintf ( sLogFile, "// ** Skipping UTF-32 tests, Expat seems to not handle it.\n\n" ); + #endif + + #if 0 + if ( sLogFile == stdout ) { + // Use this to be able to move the app window away from debugger windows. + fprintf ( sLogFile, "Move window, type return to continue" ); + fread ( buffer, 1, 1, stdin ); + } + #endif + + try { + + if ( ! SXMPMeta::Initialize() ) { + fprintf ( sLogFile, "\n## SXMPMeta::Initialize failed!\n" ); + return -1; + } + + DoTest ( sLogFile ); + + SXMPMeta::Terminate(); + + } catch ( XMP_Error& excep ) { + + PrintXMPErrorInfo ( excep, "\n## Unhandled XMP_Error exception" ); + + } catch ( ... ) { + + fprintf ( sLogFile, "\n## Unexpected exception\n" ); + return -1; + + } + + time ( &now ); + sprintf ( buffer, "// Finished test for UTF-16 and UTF-32 serialization and parsing, %s", ctime ( &now ) ); + + fprintf ( sLogFile, "// " ); + for ( int i = 4; i < strlen(buffer); ++i ) fprintf ( sLogFile, "=" ); + fprintf ( sLogFile, "\n%s\n", buffer ); + + fclose ( sLogFile ); + return 0; + +} diff --git a/samples/source/UnicodePerformance.cpp b/samples/source/UnicodePerformance.cpp new file mode 100644 index 0000000..d11185c --- /dev/null +++ b/samples/source/UnicodePerformance.cpp @@ -0,0 +1,308 @@ +// ================================================================================================= + +#include <cstdio> +#include <vector> +#include <string> +#include <cstring> +#include <ctime> + +#include <cstdio> +#include <cstdlib> +#include <cerrno> +#include <stdexcept> + +using namespace std; + +#if WIN_ENV + #pragma warning ( disable : 4701 ) // local variable may be used without having been initialized +#endif + +// ================================================================================================= + +#include "public/include/XMP_Environment.h" +#include "public/include/XMP_Const.h" + +#include "source/EndianUtils.hpp" +#include "source/UnicodeConversions.hpp" +#include "source/UnicodeConversions.cpp" + +#define TestUnicodeConsortium 0 + +#if TestUnicodeConsortium + #include "ConvertUTF.c" // The Unicode Consortium implementations. +#endif + +// ================================================================================================= + +#define kCodePointCount 0x110000 + +UTF8Unit sU8 [kCodePointCount*4 + 8]; +UTF16Unit sU16 [kCodePointCount*2 + 4]; +UTF32Unit sU32 [kCodePointCount + 2]; + +// ================================================================================================= + +static UTF8_to_UTF16_Proc OurUTF8_to_UTF16; // ! Don't use static initialization, VS.Net strips it! +static UTF8_to_UTF32_Proc OurUTF8_to_UTF32; +static UTF16_to_UTF8_Proc OurUTF16_to_UTF8; +static UTF16_to_UTF32_Proc OurUTF16_to_UTF32; +static UTF32_to_UTF8_Proc OurUTF32_to_UTF8; +static UTF32_to_UTF16_Proc OurUTF32_to_UTF16; + +// ================================================================================================= + +static void ReportPerformance ( FILE * log, const char * content, const size_t u32Count, const size_t u16Count, const size_t u8Count ) +{ + size_t inCount, outCount; + UTF32Unit * u32Ptr; + UTF16Unit * u16Ptr; + UTF8Unit * u8Ptr; + + size_t i; + const size_t cycles = 100; + clock_t start, end; + double elapsed; + + // -------------------------------------------------- + fprintf ( log, "\n Adobe code over %s\n", content ); + + start = clock(); + for ( i = 0; i < cycles; ++i ) OurUTF32_to_UTF8 ( sU32, u32Count, sU8, sizeof(sU8), &inCount, &outCount ); + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF32_to_UTF8 : %.3f seconds\n", elapsed ); + if ( (inCount != u32Count) || (outCount != u8Count) ) fprintf ( log, " *** Our UTF32_to_UTF8 count error, %d -> %d\n", inCount, outCount ); + + start = clock(); + for ( i = 0; i < cycles; ++i ) OurUTF32_to_UTF16 ( sU32, u32Count, sU16, sizeof(sU16), &inCount, &outCount ); + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF32_to_UTF16 : %.3f seconds\n", elapsed ); + if ( (inCount != u32Count) || (outCount != u16Count) ) fprintf ( log, " *** Our UTF32_to_UTF16 count error, %d -> %d\n", inCount, outCount ); + + start = clock(); + for ( i = 0; i < cycles; ++i ) OurUTF16_to_UTF8 ( sU16, u16Count, sU8, sizeof(sU8), &inCount, &outCount ); + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF16_to_UTF8 : %.3f seconds\n", elapsed ); + if ( (inCount != u16Count) || (outCount != u8Count) ) fprintf ( log, " *** Our UTF16_to_UTF8 count error, %d -> %d\n", inCount, outCount ); + + start = clock(); + for ( i = 0; i < cycles; ++i ) OurUTF16_to_UTF32 ( sU16, u16Count, sU32, sizeof(sU32), &inCount, &outCount ); + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF16_to_UTF32 : %.3f seconds\n", elapsed ); + if ( (inCount != u16Count) || (outCount != u32Count) ) fprintf ( log, " *** Our UTF16_to_UTF32 count error, %d -> %d\n", inCount, outCount ); + + start = clock(); + for ( i = 0; i < cycles; ++i ) OurUTF8_to_UTF16 ( sU8, u8Count, sU16, sizeof(sU16), &inCount, &outCount ); + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF8_to_UTF16 : %.3f seconds\n", elapsed ); + if ( (inCount != u8Count) || (outCount != u16Count) ) fprintf ( log, " *** Our UTF8_to_UTF16 count error, %d -> %d\n", inCount, outCount ); + + start = clock(); + for ( i = 0; i < cycles; ++i ) OurUTF8_to_UTF32 ( sU8, u8Count, sU32, sizeof(sU32), &inCount, &outCount ); + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF8_to_UTF32 : %.3f seconds\n", elapsed ); + if ( (inCount != u8Count) || (outCount != u32Count) ) fprintf ( log, " *** Our UTF8_to_UTF32 count error, %d -> %d\n", inCount, outCount ); + + #if TestUnicodeConsortium + + // --------------------------------------------------------------- + fprintf ( log, "\n Unicode Consortium code over %s\n", content ); + + ConversionResult ucStatus; + + start = clock(); + for ( i = 0; i < cycles; ++i ) { + u32Ptr = sU32; u8Ptr = sU8; + ucStatus = ConvertUTF32toUTF8 ( (const UTF32**)(&u32Ptr), (const UTF32*)(sU32+u32Count), &u8Ptr, sU8+sizeof(sU8), strictConversion ); + } + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF32_to_UTF8 : %.3f seconds\n", elapsed ); + inCount = u32Ptr - sU32; outCount = u8Ptr - sU8; + if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF32toUTF8 status error, %d\n", ucStatus ); + if ( (inCount != u32Count) || (outCount != u8Count) ) fprintf ( log, " *** UC ConvertUTF32toUTF8 count error, %d, %d -> %d\n", inCount, outCount ); + + start = clock(); + for ( i = 0; i < cycles; ++i ) { + u32Ptr = sU32; u16Ptr = sU16; + ucStatus = ConvertUTF32toUTF16 ( (const UTF32**)(&u32Ptr), (const UTF32*)(sU32+u32Count), &u16Ptr, sU16+sizeof(sU16), strictConversion ); + } + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF32_to_UTF16 : %.3f seconds\n", elapsed ); + inCount = u32Ptr - sU32; outCount = u16Ptr - sU16; + if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF32toUTF16 status error, %d\n", ucStatus ); + if ( (inCount != u32Count) || (outCount != u16Count) ) fprintf ( log, " *** UC ConvertUTF32toUTF16 count error, %d, %d -> %d\n", inCount, outCount ); + + start = clock(); + for ( i = 0; i < cycles; ++i ) { + u16Ptr = sU16; u8Ptr = sU8; + ucStatus = ConvertUTF16toUTF8 ( (const UTF16**)(&u16Ptr), (const UTF16*)(sU16+u16Count), &u8Ptr, sU8+sizeof(sU8), strictConversion ); + } + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF16_to_UTF8 : %.3f seconds\n", elapsed ); + inCount = u16Ptr - sU16; outCount = u8Ptr - sU8; + if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF16toUTF8 status error, %d\n", ucStatus ); + if ( (inCount != u16Count) || (outCount != u8Count) ) fprintf ( log, " *** UC ConvertUTF16toUTF8 count error, %d, %d -> %d\n", inCount, outCount ); + + start = clock(); + for ( i = 0; i < cycles; ++i ) { + u16Ptr = sU16; u32Ptr = sU32; + ucStatus = ConvertUTF16toUTF32 ( (const UTF16**)(&u16Ptr), (const UTF16*)(sU16+u16Count), &u32Ptr, sU32+sizeof(sU32), strictConversion ); + } + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF16_to_UTF32 : %.3f seconds\n", elapsed ); + inCount = u16Ptr - sU16; outCount = u32Ptr - sU32; + if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF16toUTF32 status error, %d\n", ucStatus ); + if ( (inCount != u16Count) || (outCount != u32Count) ) fprintf ( log, " *** UC ConvertUTF16toUTF32 count error, %d, %d -> %d\n", inCount, outCount ); + + start = clock(); + for ( i = 0; i < cycles; ++i ) { + u8Ptr = sU8; u16Ptr = sU16; + ucStatus = ConvertUTF8toUTF16 ( (const UTF8**)(&u8Ptr), (const UTF8*)(sU8+u8Count), &u16Ptr, sU16+sizeof(sU16), strictConversion ); + } + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF8_to_UTF16 : %.3f seconds\n", elapsed ); + inCount = u8Ptr - sU8; outCount = u16Ptr - sU16; + if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF8toUTF16 status error, %d\n", ucStatus ); + if ( (inCount != u8Count) || (outCount != u16Count) ) fprintf ( log, " *** UC ConvertUTF8toUTF16 count error, %d, %d -> %d\n", inCount, outCount ); + + start = clock(); + for ( i = 0; i < cycles; ++i ) { + u8Ptr = sU8; u32Ptr = sU32; + ucStatus = ConvertUTF8toUTF32 ( (const UTF8**)(&u8Ptr), (const UTF8*)(sU8+u8Count), &u32Ptr, sU32+sizeof(sU32), strictConversion ); + } + end = clock(); + elapsed = double(end-start) / CLOCKS_PER_SEC; + + fprintf ( log, " UTF8_to_UTF32 : %.3f seconds\n", elapsed ); + inCount = u8Ptr - sU8; outCount = u32Ptr - sU32; + if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF8toUTF32 status error, %d\n", ucStatus ); + if ( (inCount != u8Count) || (outCount != u32Count) ) fprintf ( log, " *** UC ConvertUTF8toUTF32 count error, %d, %d -> %d\n", inCount, outCount ); + + #endif + +} // ReportPerformance + +// ================================================================================================= + +static void ComparePerformance ( FILE * log ) +{ + size_t i, u32Count, u16Count, u8Count; + UTF32Unit cp; + + if ( kBigEndianHost ) { + OurUTF8_to_UTF16 = UTF8_to_UTF16BE; + OurUTF8_to_UTF32 = UTF8_to_UTF32BE; + OurUTF16_to_UTF8 = UTF16BE_to_UTF8; + OurUTF16_to_UTF32 = UTF16BE_to_UTF32BE; + OurUTF32_to_UTF8 = UTF32BE_to_UTF8; + OurUTF32_to_UTF16 = UTF32BE_to_UTF16BE; + } else { + OurUTF8_to_UTF16 = UTF8_to_UTF16LE; + OurUTF8_to_UTF32 = UTF8_to_UTF32LE; + OurUTF16_to_UTF8 = UTF16LE_to_UTF8; + OurUTF16_to_UTF32 = UTF16LE_to_UTF32LE; + OurUTF32_to_UTF8 = UTF32LE_to_UTF8; + OurUTF32_to_UTF16 = UTF32LE_to_UTF16LE; + } + + for ( i = 0, cp = 0; cp < 0xD800; ++i, ++cp ) sU32[i] = cp; // Measure using the full Unicode set. + for ( cp = 0xE000; cp < 0x110000; ++i, ++cp ) sU32[i] = cp; + u32Count = 0xD800 + (0x110000 - 0xE000); + u16Count = 0xD800 + (0x10000 - 0xE000) + (0x110000 - 0x10000)*2; + u8Count = 0x80 + (0x800 - 0x80)*2 + (0xD800 - 0x800)*3 + (0x10000 - 0xE000)*3 + (0x110000 - 0x10000)*4; + ReportPerformance ( log, "full Unicode set", u32Count, u16Count, u8Count ); + + for ( i = 0; i < 0x110000; ++i ) sU32[i] = i & 0x7F; // Measure using just ASCII. + u32Count = 0x110000; + u16Count = 0x110000; + u8Count = 0x110000; + ReportPerformance ( log, "just ASCII", u32Count, u16Count, u8Count ); + + for ( i = 0; i < 0x110000; ++i ) sU32[i] = 0x4000 + (i & 0x7FFF); // Measure using just non-ASCII inside the BMP. + u32Count = 0x110000; + u16Count = 0x110000; + u8Count = 0x110000*3; + ReportPerformance ( log, "just non-ASCII inside the BMP", u32Count, u16Count, u8Count ); + + for ( i = 0; i < 0x110000; ++i ) sU32[i] = 0x40000 + (i & 0xFFFF); // Measure using just outside the BMP. + u32Count = 0x110000; + u16Count = 0x110000*2; + u8Count = 0x110000*4; + ReportPerformance ( log, "just outside the BMP", u32Count, u16Count, u8Count ); + +} // ComparePerformance + +// ================================================================================================= + +static void DoTest ( FILE * log ) +{ + + InitializeUnicodeConversions(); + ComparePerformance ( log ); + +} // DoTest + +// ================================================================================================= + +extern "C" int main ( void ) +{ + char buffer [1000]; + + #if !XMP_AutomatedTestBuild + FILE * log = stdout; + #else + FILE * log = fopen ( "TestUnicode.out", "wb" ); + #endif + + time_t now; + time ( &now ); + sprintf ( buffer, "// Starting test for Unicode conversion performance, %s", ctime ( &now ) ); + + fprintf ( log, "// " ); + for ( size_t i = 4; i < strlen(buffer); ++i ) fprintf ( log, "=" ); + fprintf ( log, "\n%s", buffer ); + fprintf ( log, "// Native %s endian\n", (kBigEndianHost ? "big" : "little") ); + + try { + + DoTest ( log ); + + } catch ( ... ) { + + fprintf ( log, "\n## Caught unexpected exception\n" ); + return -1; + + } + + time ( &now ); + sprintf ( buffer, "// Finished test for Unicode conversion performance, %s", ctime ( &now ) ); + + fprintf ( log, "\n// " ); + for ( size_t i = 4; i < strlen(buffer); ++i ) fprintf ( log, "=" ); + fprintf ( log, "\n%s\n", buffer ); + + fclose ( log ); + return 0; + +} diff --git a/samples/source/XMPCoreCoverage.cpp b/samples/source/XMPCoreCoverage.cpp index 669600e..fd1e65a 100644 --- a/samples/source/XMPCoreCoverage.cpp +++ b/samples/source/XMPCoreCoverage.cpp @@ -10,16 +10,17 @@ * Demonstrates syntax and usage by exercising most of the API functions of XMPCore Toolkit SDK component, * using a sample XMP Packet that contains all of the different property and attribute types. */ +#include <cstdio> +#include <vector> #include <string> +#include <cstring> #include <iostream> #include <iomanip> #include <fstream> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <time.h> +#include <cstdlib> +#include <cerrno> +#include <ctime> #define TXMP_STRING_TYPE std::string diff --git a/samples/source/XMPFilesCoverage.cpp b/samples/source/XMPFilesCoverage.cpp index 3742c40..326b19a 100644 --- a/samples/source/XMPFilesCoverage.cpp +++ b/samples/source/XMPFilesCoverage.cpp @@ -11,13 +11,13 @@ * using a sample XMP Packet that contains all of the different property and attribute types. */ +#include <cstdio> #include <vector> #include <string> +#include <cstring> #include <stdexcept> -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <time.h> +#include <cerrno> +#include <ctime> #define TXMP_STRING_TYPE std::string #define XMP_INCLUDE_XMPFILES 1 diff --git a/samples/source/XMPIterations.cpp b/samples/source/XMPIterations.cpp index b24c7cd..f93b7d7 100644 --- a/samples/source/XMPIterations.cpp +++ b/samples/source/XMPIterations.cpp @@ -7,10 +7,13 @@ // ================================================================================================= /** -* Demonstrates how to use the iteration utility in the XMPCore component to walk through property trees. -*/ + * Demonstrates how to use the iteration utility in the XMPCore component to walk through property trees. + */ +#include <cstdio> +#include <vector> #include <string> +#include <cstring> // Must be defined to instantiate template classes #define TXMP_STRING_TYPE std::string @@ -30,124 +33,128 @@ using namespace std; // Provide some custom XMP static const char * rdf = - "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>" - " <rdf:Description rdf:about='' xmlns:xmpTest='http://ns.adobe.com/xmpTest/'>" - "" - " <xmpTest:MySimpleProp rdf:parseType='Resource'>" - " <rdf:value>A Value</rdf:value>" - " <xmpTest:MyQual>Qual Value</xmpTest:MyQual>" - " </xmpTest:MySimpleProp>" - "" - " <xmpTest:MyTopStruct rdf:parseType='Resource'>" - " <xmpTest:MySecondStruct rdf:parseType='Resource'>" - " <xmpTest:MyThirdStruct rdf:parseType='Resource'>" - " <xmpTest:MyThirdStructField>Field Value 3</xmpTest:MyThirdStructField>" - " </xmpTest:MyThirdStruct>" - " <xmpTest:MySecondStructField>Field Value 2</xmpTest:MySecondStructField>" - " </xmpTest:MySecondStruct>" - " <xmpTest:MyTopStructField>Field Value 1</xmpTest:MyTopStructField>" - " </xmpTest:MyTopStruct>" - - " <xmpTest:MyArrayWithNestedArray>" - " <rdf:Bag>" - " <rdf:li>" - " <rdf:Seq>" - " <rdf:li>Item 1</rdf:li>" - " <rdf:li>Item 2</rdf:li>" - " </rdf:Seq>" - " </rdf:li>" - " </rdf:Bag>" - " </xmpTest:MyArrayWithNestedArray>" - - " <xmpTest:MyArrayWithStructures>" - " <rdf:Seq>" - " <rdf:li rdf:parseType='Resource'>" - " <rdf:value>Field Value 1</rdf:value>" - " <xmpTest:FirstQual>Qual Value 1</xmpTest:FirstQual>" - " <xmpTest:SecondQual>Qual Value 2</xmpTest:SecondQual>" - " </rdf:li>" - " <rdf:li rdf:parseType='Resource'>" - " <rdf:value>Field Value 2</rdf:value>" - " <xmpTest:FirstQual>Qual Value 3</xmpTest:FirstQual>" - " <xmpTest:SecondQual>Qual Value 4</xmpTest:SecondQual>" - " </rdf:li>" - " </rdf:Seq>" - " </xmpTest:MyArrayWithStructures>" - "" - " <xmpTest:MyStructureWithArray rdf:parseType='Resource'>" - " <xmpTest:NestedArray>" - " <rdf:Bag>" - " <rdf:li>Item 3</rdf:li>" - " <rdf:li>Item 4</rdf:li>" - " <rdf:li>Item 5</rdf:li>" - " <rdf:li>Item 6</rdf:li>" - " </rdf:Bag>" - " </xmpTest:NestedArray>" - " <xmpTest:NestedArray2>" - " <rdf:Bag>" - " <rdf:li>Item 66</rdf:li>" - " <rdf:li>Item 46</rdf:li>" - " <rdf:li>Item 56</rdf:li>" - " <rdf:li>Item 66</rdf:li>" - " </rdf:Bag>" - " </xmpTest:NestedArray2>" - " </xmpTest:MyStructureWithArray>" - "" - " </rdf:Description>" - "</rdf:RDF>"; +"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>" +" <rdf:Description rdf:about='' xmlns:xmpTest='http://ns.adobe.com/xmpTest/'>" +"" +" <xmpTest:MySimpleProp rdf:parseType='Resource'>" +" <rdf:value>A Value</rdf:value>" +" <xmpTest:MyQual>Qual Value</xmpTest:MyQual>" +" </xmpTest:MySimpleProp>" +"" +" <xmpTest:MyTopStruct rdf:parseType='Resource'>" +" <xmpTest:MySecondStruct rdf:parseType='Resource'>" +" <xmpTest:MyThirdStruct rdf:parseType='Resource'>" +" <xmpTest:MyThirdStructField>Field Value 3</xmpTest:MyThirdStructField>" +" </xmpTest:MyThirdStruct>" +" <xmpTest:MySecondStructField>Field Value 2</xmpTest:MySecondStructField>" +" </xmpTest:MySecondStruct>" +" <xmpTest:MyTopStructField>Field Value 1</xmpTest:MyTopStructField>" +" </xmpTest:MyTopStruct>" + +" <xmpTest:MyArrayWithNestedArray>" +" <rdf:Bag>" +" <rdf:li>" +" <rdf:Seq>" +" <rdf:li>Item 1</rdf:li>" +" <rdf:li>Item 2</rdf:li>" +" </rdf:Seq>" +" </rdf:li>" +" </rdf:Bag>" +" </xmpTest:MyArrayWithNestedArray>" + +" <xmpTest:MyArrayWithStructures>" +" <rdf:Seq>" +" <rdf:li rdf:parseType='Resource'>" +" <rdf:value>Field Value 1</rdf:value>" +" <xmpTest:FirstQual>Qual Value 1</xmpTest:FirstQual>" +" <xmpTest:SecondQual>Qual Value 2</xmpTest:SecondQual>" +" </rdf:li>" +" <rdf:li rdf:parseType='Resource'>" +" <rdf:value>Field Value 2</rdf:value>" +" <xmpTest:FirstQual>Qual Value 3</xmpTest:FirstQual>" +" <xmpTest:SecondQual>Qual Value 4</xmpTest:SecondQual>" +" </rdf:li>" +" </rdf:Seq>" +" </xmpTest:MyArrayWithStructures>" +"" +" <xmpTest:MyStructureWithArray rdf:parseType='Resource'>" +" <xmpTest:NestedArray>" +" <rdf:Bag>" +" <rdf:li>Item 3</rdf:li>" +" <rdf:li>Item 4</rdf:li>" +" <rdf:li>Item 5</rdf:li>" +" <rdf:li>Item 6</rdf:li>" +" </rdf:Bag>" +" </xmpTest:NestedArray>" +" <xmpTest:NestedArray2>" +" <rdf:Bag>" +" <rdf:li>Item 66</rdf:li>" +" <rdf:li>Item 46</rdf:li>" +" <rdf:li>Item 56</rdf:li>" +" <rdf:li>Item 66</rdf:li>" +" </rdf:Bag>" +" </xmpTest:NestedArray2>" +" </xmpTest:MyStructureWithArray>" +"" +" </rdf:Description>" +"</rdf:RDF>"; // The namespace to be used. This will be automatically registered // when the RDF is parsed. const XMP_StringPtr kXMP_NS_SDK = "http://ns.adobe.com/xmpTest/"; /** -* Reads some metadata from a file and appends some custom XMP to it. Then does several -* iterations, using various iterators. Each iteration is displayed in the console window. -*/ + * Reads some metadata from a file and appends some custom XMP to it. Then does several + * iterations, using various iterators. Each iteration is displayed in the console window. + */ int main() { if(SXMPMeta::Initialize()) { XMP_OptionBits options = 0; - #if UNIX_ENV - options |= kXMPFiles_ServerMode; - #endif +#if UNIX_ENV + options |= kXMPFiles_ServerMode; +#endif if ( SXMPFiles::Initialize ( options ) ) { bool ok; SXMPFiles myFile; - + XMP_OptionBits opts = kXMPFiles_OpenForRead | kXMPFiles_OpenUseSmartHandler; - ok = myFile.OpenFile("../../../testfiles/Image1.jpg", kXMP_UnknownFile, opts); +#if MAC_ENV + ok = myFile.OpenFile("../../../../testfiles/Image1.jpg", kXMP_UnknownFile, opts); +#else + ok = myFile.OpenFile("../../../testfiles/Image1.jpg", kXMP_UnknownFile, opts); +#endif if(ok) { SXMPMeta xmp; myFile.GetXMP(&xmp); - + // Add some custom metadata to the XMP object SXMPMeta custXMP(rdf, (XMP_StringLen) strlen(rdf)); SXMPUtils::ApplyTemplate(&xmp, custXMP, kXMPTemplate_AddNewProperties); - + // Store any details from the iter.Next() call string schemaNS, propPath, propVal; - + // Only visit the immediate children that are leaf properties of the Dublin Core schema SXMPIterator dcLeafIter(xmp, kXMP_NS_DC, (kXMP_IterJustChildren | kXMP_IterJustLeafNodes)); while(dcLeafIter.Next(&schemaNS, &propPath, &propVal)) { cout << schemaNS << " " << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Visit one property from the XMP Basic schema SXMPIterator xmpKeywordsIter(xmp, kXMP_NS_XMP, "Keywords", kXMP_IterJustLeafNodes); while(xmpKeywordsIter.Next(&schemaNS, &propPath, &propVal)) { cout << schemaNS << " " << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Visit the Dublin Core schema, omit any quailifiers and only // show the leaf properties SXMPIterator dcIter(xmp, kXMP_NS_DC, (kXMP_IterOmitQualifiers | kXMP_IterJustLeafNodes)); @@ -155,9 +162,9 @@ int main() { cout << schemaNS << " " << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Visit the Dublin Core schema, omit any quailifiers, // show the leaf properties but only return the leaf name and not the full path SXMPIterator dcIter2(xmp, kXMP_NS_DC, (kXMP_IterOmitQualifiers | kXMP_IterJustLeafNodes | kXMP_IterJustLeafName)); @@ -165,9 +172,9 @@ int main() { cout << schemaNS << " " << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Iterate over a single namespace. Show all properties within // the Photoshop schema SXMPIterator exifIter(xmp, kXMP_NS_Photoshop); @@ -175,9 +182,9 @@ int main() { cout << schemaNS << " " << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Just visit the leaf nodes of EXIF properties. That is just // properties that may have values. SXMPIterator exifLeafIter(xmp, kXMP_NS_EXIF, kXMP_IterJustLeafNodes); @@ -185,9 +192,9 @@ int main() { cout << schemaNS << " " << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Iterate over all properties but skip the EXIF schema and skip the custom schema // and continue visiting nodes SXMPIterator skipExifIter (xmp); @@ -202,9 +209,9 @@ int main() cout << schemaNS << " " << propPath << " = " << propVal << endl; } } - + cout << "----------------------------------" << endl; - + // Iterate over all properties but skip the EXIF schema // and any remaining siblings of the current node. SXMPIterator stopAfterExifIter ( xmp ); @@ -219,13 +226,13 @@ int main() cout << schemaNS << " " << propPath << " = " << propVal << endl; } } - + cout << "----------------------------------" << endl; - + ////////////////////////////////////////////////////////////////////////////////////// - + // Iterate over the custom XMP - + // Visit the immediate children of this node. // No qualifiers are visisted as they are below the property being visisted. SXMPIterator justChildrenIter(xmp, kXMP_NS_SDK, kXMP_IterJustChildren); @@ -233,9 +240,9 @@ int main() { cout << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Visit the immediate children of this node but only those that may have values. // No qualifiers are visisted as they are below the property being visisted. SXMPIterator justChildrenAndLeafIter(xmp, kXMP_NS_SDK, (kXMP_IterJustChildren | kXMP_IterJustLeafNodes)); @@ -243,18 +250,18 @@ int main() { cout << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Visit the leaf nodes of TopStructProperty SXMPIterator myTopStructIter(xmp, kXMP_NS_SDK, "MyTopStruct", kXMP_IterJustLeafNodes); while(myTopStructIter.Next(&schemaNS, &propPath, &propVal)) { cout << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Visit the leaf nodes of the TopStructProperty but only return the names for // the leaf components and not the full path SXMPIterator xmyTopStructIterShortNames(xmp, kXMP_NS_SDK, "MyTopStruct", (kXMP_IterJustLeafNodes | kXMP_IterJustLeafName)); @@ -262,27 +269,27 @@ int main() { cout << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Visit a property and all of the qualifiers SXMPIterator iterArrayProp (xmp, kXMP_NS_SDK, "ArrayWithStructures", kXMP_IterJustLeafNodes ); while(iterArrayProp.Next(&schemaNS, &propPath, &propVal)) { cout << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Visit a property and omit all of the qualifiers SXMPIterator iterArrayPropNoQual (xmp, kXMP_NS_SDK, "ArrayWithStructures", (kXMP_IterJustLeafNodes | kXMP_IterOmitQualifiers)); while(iterArrayPropNoQual.Next(&schemaNS, &propPath, &propVal)) { cout << propPath << " = " << propVal << endl; } - + cout << "----------------------------------" << endl; - + // Skip a subtree and continue onwards. Once 'Item 4' is found then the we can skip all of the // siblings of the current node. If the the current node were a top level node the iteration // would be complete as all siblings would be skipped. However, when 'Item 4' is found the current @@ -300,22 +307,22 @@ int main() cout << schemaNS << " " << propPath << " = " << propVal << endl; } } - + /* - // Visit all properties and qualifiers - SXMPIterator allPropsIter(xmp); - while(allPropsIter.Next(&schemaNS, &propPath, &propVal)) - { - cout << schemaNS << " " << propPath << " = " << propVal << endl; - } - */ + // Visit all properties and qualifiers + SXMPIterator allPropsIter(xmp); + while(allPropsIter.Next(&schemaNS, &propPath, &propVal)) + { + cout << schemaNS << " " << propPath << " = " << propVal << endl; + } + */ } } } SXMPFiles::Terminate(); SXMPMeta::Terminate(); - + return 0; } diff --git a/samples/source/common/DumpFile.cpp b/samples/source/common/DumpFile.cpp index ca5446d..c651aca 100644 --- a/samples/source/common/DumpFile.cpp +++ b/samples/source/common/DumpFile.cpp @@ -938,6 +938,9 @@ static const XMP_Int64 kASFMinSize = 16; // ! Not really accurate, but covers th static const XMP_Int64 kRIFFMinSize = 12; +static const XMP_Int64 kPostScriptMinSize = 49; + + static const XMP_Int64 kInDesignMinSize = 2 * kINDD_PageSize; // Two master pages. static const XMP_Int64 kISOMediaMinSize = 16; // At least a minimal file type box. @@ -945,6 +948,7 @@ static const XMP_Uns8 kISOMediaFTyp[] = { 0x66, 0x74, 0x79, 0x70 }; // "ftyp" static const XMP_Uns32 kISOTag_ftyp = 0x66747970UL; static const XMP_Uns32 kISOBrand_mp41 = 0x6D703431UL; static const XMP_Uns32 kISOBrand_mp42 = 0x6D703432UL; +static const XMP_Uns32 kISOBrand_avc1 = 0x61766331UL; static const XMP_Uns32 kISOBrand_f4v = 0x66347620UL; static const XMP_Uns32 kQTTag_XMP_ = 0x584D505FUL; @@ -953,6 +957,8 @@ static const XMP_Int64 kSWFMinSize = (8+2+4 + 2); // Header with minimal rectang static const XMP_Int64 kFLVMinSize = 9; // Header with zero length data. +static const XMP_Uns8 kPostScriptStart[] = { 0xC5, 0xD0, 0xD3, 0xC6 }; + static XMP_FileFormat CheckFileFormat ( const char * filePath, XMP_Uns8 * fileContent, XMP_Int64 fileSize ) { @@ -998,6 +1004,10 @@ CheckFileFormat ( const char * filePath, XMP_Uns8 * fileContent, XMP_Int64 fileS if ( CheckBytes ( fileContent+8, "AIFC", 4 ) ) return kXMP_AIFFFile; } + if ( (fileSize >= kPostScriptMinSize) && CheckBytes (fileContent, kPostScriptStart, 4) ) { + return kXMP_PostScriptFile; + } + if ( (fileSize >= kInDesignMinSize) && CheckBytes ( fileContent, kInDesign_MasterPageGUID, kInDesignGUIDSize ) ) { return kXMP_InDesignFile; } @@ -1025,7 +1035,18 @@ CheckFileFormat ( const char * filePath, XMP_Uns8 * fileContent, XMP_Int64 fileS for ( ; compatPtr < compatEnd; compatPtr += 4 ) { XMP_Uns32 compatBrand = GetUns32BE (compatPtr); - if ( (compatBrand == kISOBrand_mp41) || (compatBrand == kISOBrand_mp42) ) return kXMP_MPEG4File; + switch ( compatBrand ) { + case kISOBrand_mp41: + case kISOBrand_mp42: + case kISOBrand_avc1: + return kXMP_MPEG4File; + break; + + default: + break; + + } + } } @@ -1856,8 +1877,13 @@ digestInternationalTextSequence ( LFA_FileRef file, std::string isoPath, XMP_Int tree->digest16u(file,isoPath+"language code",true,true); (*remainingSize) -= 4; if ( (*remainingSize) != miniBoxStringSize ) + { tree->addComment("WARNING: boxSize and miniBoxSize differ!"); - tree->digestString( file, isoPath+"value", miniBoxStringSize, false ); + } + else + { + tree->digestString( file, isoPath+"value", miniBoxStringSize, false ); + } } /** @@ -1918,7 +1944,9 @@ DumpISOBoxes ( LFA_FileRef file, XMP_Uns32 maxBoxLen, std::string _isoPath ) break; } - std::string boxString( fromArgs( "%.4s" , &boxType ) ); + XMP_Uns32 tempBoxType = GetUns32LE(&boxType); + std::string boxString( fromArgs( "%.4s" , &tempBoxType) ); + // substitute mac-copyright signs with an easier-to-handle "(c)" if ( boxString.at(0) == 0xA9 ) boxString = std::string("(c)") + boxString.substr(1); @@ -1983,6 +2011,10 @@ DumpISOBoxes ( LFA_FileRef file, XMP_Uns32 maxBoxLen, std::string _isoPath ) XMP_Uns32 majorBrand = LFA_ReadUns32_LE( file ); XMP_Uns32 minorVersion = LFA_ReadUns32_LE( file ); + //data has been read in LE make it in BE + majorBrand = GetUns32LE(&majorBrand); + minorVersion = GetUns32LE(&minorVersion); + //Log::info( fromArgs( "major Brand: '%.4s' (0x%.8X)" , &majorBrand, MakeUns32BE(majorBrand) )); //Log::info( fromArgs( "minor Version: 0x%.8X" , MakeUns32BE(minorVersion) ) ); tree->setKeyValue( isoPath + "majorBrand", @@ -2223,9 +2255,7 @@ DumpISOBoxes ( LFA_FileRef file, XMP_Uns32 maxBoxLen, std::string _isoPath ) // (c)-style quicktime boxes and boxes of no interest: default: - if ( (boxType & 0xA9) == 0xA9) // (c)something - { - if ( 0 == isoPath.compare( 0 , 20, "moov/udta/meta/ilst/")) + if ( 0 == isoPath.compare( 0 , 20, "moov/udta/meta/ilst/")) { // => iTunes metadata (hunt for data childs) // a container box, hunt for 'data' atom by recursion: bool ok; @@ -2236,16 +2266,12 @@ DumpISOBoxes ( LFA_FileRef file, XMP_Uns32 maxBoxLen, std::string _isoPath ) } else if ( 0 == isoPath.compare( 0 , 10, "moov/udta/" )) { // => Quicktime metadata "international text sequence" ( size, language code, value ) - digestInternationalTextSequence( file, isoPath, &remainingSize ); + digestInternationalTextSequence( file, isoPath, &remainingSize ); } else { tree->addComment("WARNING: unknown flavor of (c)*** boxes, neither QT nor iTunes"); } break; - } - //boxes of no interest: - - break; } bool ok; @@ -3492,6 +3518,128 @@ DumpPNGChunk ( LFA_FileRef file, XMP_Uns32 pngLen, XMP_Uns32 chunkOffset ) // ================================================================================================= static void +DumpPS ( LFA_FileRef file, XMP_Uns32 fileLen ) +{ + XMP_Int32 psOffset; + size_t psLength; + + LFA_Seek ( file, 4, SEEK_SET ); // skip fileheader bytes + LFA_Read ( file, &psOffset, 4, true ); + LFA_Read ( file, &psLength, 4, true ); + + tree->addComment(" psOffset: %d, psLength: %d", psOffset, psLength); + + // jump to psOffset + Skip(file, (psOffset - 12)); + + // get the header (everything till first % + + XMP_Int64 offset = LFA_Tell(file); + std::string key, value; + char byte = LFA_GetChar(file); + bool eof = false; + while ( !eof ) + { + key.clear(); + key += byte; // add the first % + byte = LFA_GetChar(file); + + while (byte != ' ' && byte != '\r') // get everthing until next space or LF + { + key += byte; + byte = LFA_GetChar(file); + + } + + //if (CheckBytes( key.c_str(), "%%EOF", 5)) + if (key == "%%EOF") + { + eof = true; + } + else + { + byte = LFA_GetChar(file); + value.clear(); + while (byte != '%') // get everthing until next % + { + value += byte; + byte = LFA_GetChar(file); + } + } + tree->pushNode(key); + tree->addOffset( file ); + + //for now only store value for header + if ( key =="%!PS-Adobe-3.0" ) + { + tree->changeValue(value); + } + + tree->addComment("offset: %d", offset ); + tree->addComment("size: 0x%llX", LFA_Tell(file)-offset ); + tree->popNode(); + + offset = LFA_Tell(file); + } + // Now just get everything else and store all keys that start with % + + + // get the key + // start of the PostScript DSC header comment + + /*XMP_Uns8 buffer [11]; + LFA_Read ( file, &buffer, sizeof(buffer), true ); + + if (!CheckBytes( buffer, "%!PS-Adobe-", 11)) + { + tree->comment ( "** Invalid PS, unknown PS file tag." ); + return; + } + + // Check the PostScript DSC major version number. + XMP_Uns8 byte; + LFA_Read ( file, &byte, sizeof(byte), true ); + + psMajorVer = 0; + while ( IsNumeric( byte ) ) + { + psMajorVer = (psMajorVer * 10) + (byte - '0'); + if ( psMajorVer > 1000 ) { + tree->comment ( "** Invalid PS, Overflow." ); + return; + }; // Overflow. + LFA_Read ( file, &byte, sizeof(byte), true ); + } + if ( psMajorVer < 3 ){ + tree->comment ( "** Invalid PS, The version must be at least 3.0." ); + return; + }; // The version must be at least 3.0. + + if ( byte != '.' ){ + tree->comment ( "** Invalid PS, No minor number" ); + return; + }; // No minor number. + LFA_Read ( file, &byte, sizeof(byte), true ); + + // Check the PostScript DSC minor version number. + + psMinorVer = 0; + while ( IsNumeric( byte ) ) + { + psMinorVer = (psMinorVer * 10) + (byte - '0'); + if ( psMinorVer > 1000 ) { + tree->comment ( "** Invalid PS, Overflow." ); + return; + }; // Overflow. + LFA_Read ( file, &byte, sizeof(byte), true ); + } + + tree->addComment(" psMajor Version: %d, psMinor Version: %d", psMajorVer, psMinorVer);*/ +} + +// ================================================================================================= + +static void DumpPNG ( LFA_FileRef file, XMP_Uns32 pngLen ) { // A PNG file contains an 8 byte signature followed by a sequence of chunks. @@ -5000,6 +5148,13 @@ void DumpFile::Scan (std::string filename, TagTree &tagTree, bool resetTree) tagTree.comment ( "** Recognized MPEG-2 file type, but this is a pure sidecar solution. No legacy dump available at this time." ); + } else if ( format == kXMP_PostScriptFile ) { + + tagTree.pushNode ( "Dumping PostScript file" ); + tagTree.addComment ( "size %lld (0x%llx)", fileLen, fileLen ); + DumpPS ( fileRef, fileLen ); + tagTree.popNode(); + } else if ( format == kXMP_UnknownFile ) { tagTree.pushNode ( "Unknown format. packet scanning, size %d (0x%X)", fileLen, fileLen ); diff --git a/samples/source/common/DumpFile.h b/samples/source/common/DumpFile.h index 96524ca..3324dc5 100644 --- a/samples/source/common/DumpFile.h +++ b/samples/source/common/DumpFile.h @@ -11,6 +11,8 @@ #define XMPQE_DUMPFILE_H #include "samples/source/common/TagTree.h" +#define IsNumeric( ch ) (ch >='0' && ch<='9' ) + class DumpFile { public: static void Scan( std::string filename, TagTree &tagTree, bool resetTree = true ); diff --git a/samples/source/common/LargeFileAccess.cpp b/samples/source/common/LargeFileAccess.cpp index 411152b..1d5c725 100644 --- a/samples/source/common/LargeFileAccess.cpp +++ b/samples/source/common/LargeFileAccess.cpp @@ -523,7 +523,7 @@ void LFA_Throw ( const char* msg, int id ) // LFA implementations for POSIX // ============================= -#if XMP_UNIXBuild +#if XMP_UNIXBuild || XMP_iOSBuild // --------------------------------------------------------------------------------------------- @@ -842,7 +842,8 @@ bool LFA_isEof( LFA_FileRef file ) return filesize == filepos; #endif - #if XMP_UNIXBuild + + #if XMP_UNIXBuild || XMP_iOSBuild int descr = (int)file; struct stat info; diff --git a/samples/source/common/TagTree.cpp b/samples/source/common/TagTree.cpp index de5d3b4..713998a 100644 --- a/samples/source/common/TagTree.cpp +++ b/samples/source/common/TagTree.cpp @@ -288,10 +288,16 @@ void TagTree::digest64u(XMP_Uns64 expected, LFA_FileRef file,const std::string k { XMP_Uns64 tmp=digest64u( file,"",BigEndian, hexDisplay ); if (expected != tmp ) + { if (hexDisplay) + { throw DumpFileException("'%s' was 0x%.16X, expected: 0x%.16X",key.c_str(),tmp,expected); + } else + { throw DumpFileException("'%s' was %d, expected: %d",key.c_str(),tmp,expected); + } + } } void TagTree::digest32s(XMP_Int32 expected, LFA_FileRef file,const std::string key /*=""*/, bool BigEndian /*=false*/ ) @@ -305,10 +311,16 @@ void TagTree::digest32u(XMP_Uns32 expected, LFA_FileRef file,const std::string k { XMP_Uns32 tmp=digest32u( file,"",BigEndian, hexDisplay ); if (expected != tmp ) + { if (hexDisplay) + { throw DumpFileException("'%s' was 0x%.8X, expected: 0x%.8X",key.c_str(),tmp,expected); + } else + { throw DumpFileException("'%s' was %d, expected: %d",key.c_str(),tmp,expected); + } + } } void TagTree::digest16s(XMP_Int16 expected, LFA_FileRef file,const std::string key /*=""*/, bool BigEndian /*=false*/ ) @@ -322,10 +334,16 @@ void TagTree::digest16u(XMP_Uns16 expected, LFA_FileRef file,const std::string k { XMP_Uns16 tmp=digest16u( file,key,BigEndian, hexDisplay ); if (expected != tmp ) + { if (hexDisplay) + { throw DumpFileException("'%s' was 0x%.4X, expected: 0x%.4X",key.c_str(),tmp,expected); + } else + { throw DumpFileException("'%s' was %d, expected: %d",key.c_str(),tmp,expected); + } + } } ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/samples/source/common/globals.h b/samples/source/common/globals.h index f056b60..30be27d 100644 --- a/samples/source/common/globals.h +++ b/samples/source/common/globals.h @@ -17,13 +17,13 @@ #include <cstdio> //sanity check platform/endianess - #if !defined(WIN_ENV) && !defined(MAC_ENV) && !defined(UNIX_ENV) - #error "XMP environment error - must define one of MAC_ENV, WIN_ENV, or UNIX_ENV" + #if !defined(WIN_ENV) && !defined(MAC_ENV) && !defined(UNIX_ENV) && !defined(IOS_ENV) + #error "XMP environment error - must define one of MAC_ENV, WIN_ENV, UNIX_ENV or IOS_ENV" #endif #ifdef WIN_ENV #define XMPQE_LITTLE_ENDIAN 1 - #elif defined(MAC_ENV) + #elif (defined(MAC_ENV) || defined(IOS_ENV)) #if __BIG_ENDIAN__ #define XMPQE_BIG_ENDIAN 1 #elif __LITTLE_ENDIAN__ diff --git a/samples/source/dumpfile/main.cpp b/samples/source/dumpfile/main.cpp index eb2f663..bd11b05 100644 --- a/samples/source/dumpfile/main.cpp +++ b/samples/source/dumpfile/main.cpp @@ -31,6 +31,8 @@ const int DUMPFILEVERSION=2; #include <stdexcept> #include <iostream> #include <string> +#include <cstring> +#include <cstdio> #include <vector> #include <sstream> diff --git a/samples/source/xmpcommand/Actions.cpp b/samples/source/xmpcommand/Actions.cpp index ad7028c..c738930 100644 --- a/samples/source/xmpcommand/Actions.cpp +++ b/samples/source/xmpcommand/Actions.cpp @@ -13,7 +13,11 @@ const char * XMP_EXE_VERSION= "4.4"; #include <stdexcept> -#include <stdarg.h> +#include <cstdarg> +#include <cstdio> +#include <vector> +#include <string> +#include <cstring> //XMP related #define TXMP_STRING_TYPE std::string diff --git a/samples/source/xmpcommand/XMPCommand.cpp b/samples/source/xmpcommand/XMPCommand.cpp index f6fc574..670fc1b 100644 --- a/samples/source/xmpcommand/XMPCommand.cpp +++ b/samples/source/xmpcommand/XMPCommand.cpp @@ -16,8 +16,10 @@ #include <stdexcept> #include <iostream> -#include <string> +#include <cstdio> #include <vector> +#include <string> +#include <cstring> #include <sstream> #include "samples/source/common/globals.h" |