summaryrefslogtreecommitdiff
path: root/samples/source
diff options
context:
space:
mode:
authorHubert Figuière <hub@figuiere.net>2013-06-29 22:31:09 -0400
committerHubert Figuière <hub@figuiere.net>2013-06-29 22:31:09 -0400
commita36182b4304c2f0d4c27091fcf26c36ea648d9f2 (patch)
tree98a365bd6a7bd95270c093021b815c1fc09b0f9a /samples/source
parent71d488b0d4a91ef83e63b1a01e199f29c0412821 (diff)
parent4652015fe779e12fb06ff8fa56bf70e373cd3894 (diff)
Update to XMP SDK CC-2013.06.
Merge branch 'adobe-sdk' into cc-2013.06-integration Conflicts: XMPCore/source/XMPMeta.cpp XMPCore/source/XMPMeta.hpp XMPFiles/source/FormatSupport/TIFF_FileWriter.cpp XMPFiles/source/PluginHandler/XMPAtoms.h public/include/XMP_Const.h samples/source/DumpMainXMP.cpp samples/source/DumpScannedXMP.cpp samples/source/XMPCoreCoverage.cpp samples/source/XMPFilesCoverage.cpp samples/source/common/LargeFileAccess.cpp samples/source/common/globals.h source/EndianUtils.hpp
Diffstat (limited to 'samples/source')
-rw-r--r--samples/source/CustomSchema.cpp3
-rw-r--r--samples/source/DumpMainXMP.cpp11
-rw-r--r--samples/source/DumpScannedXMP.cpp19
-rw-r--r--samples/source/ModifyingXMP.cpp3
-rw-r--r--samples/source/ReadingXMP.cpp3
-rw-r--r--samples/source/UnicodeCorrectness.cpp2813
-rw-r--r--samples/source/UnicodeParseSerialize.cpp510
-rw-r--r--samples/source/UnicodePerformance.cpp308
-rw-r--r--samples/source/XMPCoreCoverage.cpp11
-rw-r--r--samples/source/XMPFilesCoverage.cpp8
-rw-r--r--samples/source/XMPIterations.cpp241
-rw-r--r--samples/source/common/DumpFile.cpp177
-rw-r--r--samples/source/common/DumpFile.h2
-rw-r--r--samples/source/common/LargeFileAccess.cpp5
-rw-r--r--samples/source/common/TagTree.cpp18
-rw-r--r--samples/source/common/globals.h6
-rw-r--r--samples/source/dumpfile/main.cpp2
-rw-r--r--samples/source/xmpcommand/Actions.cpp6
-rw-r--r--samples/source/xmpcommand/XMPCommand.cpp4
19 files changed, 3992 insertions, 158 deletions
diff --git a/samples/source/CustomSchema.cpp b/samples/source/CustomSchema.cpp
index 270b6a8..1cf0566 100644
--- a/samples/source/CustomSchema.cpp
+++ b/samples/source/CustomSchema.cpp
@@ -13,7 +13,10 @@
* and modify properties with complex paths using the path composition utilities from the XMP API
*/
+#include <cstdio>
+#include <vector>
#include <string>
+#include <cstring>
// Must be defined to instantiate template classes
#define TXMP_STRING_TYPE std::string
diff --git a/samples/source/DumpMainXMP.cpp b/samples/source/DumpMainXMP.cpp
index 44ddedd..313665b 100644
--- a/samples/source/DumpMainXMP.cpp
+++ b/samples/source/DumpMainXMP.cpp
@@ -11,14 +11,15 @@
* it to a human-readable log file. This is preferred over "dumb" packet scanning.
*/
+#include <cstdio>
+#include <vector>
#include <string>
-#include <time.h>
+#include <cstring>
+#include <ctime>
-#include <stdio.h>
-#include <stdlib.h>
+#include <cstdlib>
#include <stdexcept>
-#include <errno.h>
-#include <cstring>
+#include <cerrno>
#if XMP_WinBuild
#pragma warning ( disable : 4127 ) // conditional expression is constant
diff --git a/samples/source/DumpScannedXMP.cpp b/samples/source/DumpScannedXMP.cpp
index d398334..1acbc82 100644
--- a/samples/source/DumpScannedXMP.cpp
+++ b/samples/source/DumpScannedXMP.cpp
@@ -11,14 +11,15 @@
* serializes the XMP and writes it to log file.
*/
+#include <cstdio>
+#include <vector>
#include <string>
-#include <time.h>
+#include <cstring>
+#include <ctime>
-#include <stdio.h>
-#include <stdlib.h>
+#include <cstdlib>
#include <stdexcept>
-#include <errno.h>
-#include <cstring>
+#include <cerrno>
#if XMP_WinBuild
#pragma warning ( disable : 4127 ) // conditional expression is constant
@@ -62,10 +63,10 @@ ProcessPacket ( const char * fileName,
char title [1000];
- sprintf ( title, "// Dumping raw input for \"%s\" (%d..%d)", fileName, offset, (offset + length - 1) );
+ sprintf ( title, "// Dumping raw input for \"%s\" (%lu..%lu)", fileName, offset, (offset + length - 1) );
printf ( "// " );
for ( size_t i = 3; i < strlen(title); ++i ) printf ( "=" );
- printf ( "\n\n%s\n\n%.*s\n\n", title, length, xmlString.c_str() );
+ printf ( "\n\n%s\n\n%.*s\n\n", title, (int)length, xmlString.c_str() );
fflush ( stdout );
SXMPMeta xmpObj;
@@ -81,11 +82,11 @@ ProcessPacket ( const char * fileName,
string xmpString;
xmpObj.SerializeToBuffer ( &xmpString, kXMP_OmitPacketWrapper );
- printf ( "\nPretty serialization, %d bytes :\n\n%s\n", xmpString.size(), xmpString.c_str() );
+ printf ( "\nPretty serialization, %lu bytes :\n\n%s\n", xmpString.size(), xmpString.c_str() );
fflush ( stdout );
xmpObj.SerializeToBuffer ( &xmpString, (kXMP_OmitPacketWrapper | kXMP_UseCompactFormat) );
- printf ( "Compact serialization, %d bytes :\n\n%s\n", xmpString.size(), xmpString.c_str() );
+ printf ( "Compact serialization, %lu bytes :\n\n%s\n", xmpString.size(), xmpString.c_str() );
fflush ( stdout );
} // ProcessPacket
diff --git a/samples/source/ModifyingXMP.cpp b/samples/source/ModifyingXMP.cpp
index e899845..856bb80 100644
--- a/samples/source/ModifyingXMP.cpp
+++ b/samples/source/ModifyingXMP.cpp
@@ -11,7 +11,10 @@
* Demonstrates how to open a file for update, and modifying the contained XMP before writing it back to the file.
*/
+#include <cstdio>
+#include <vector>
#include <string>
+#include <cstring>
// Must be defined to instantiate template classes
#define TXMP_STRING_TYPE std::string
diff --git a/samples/source/ReadingXMP.cpp b/samples/source/ReadingXMP.cpp
index 1f4daa8..bfcf56c 100644
--- a/samples/source/ReadingXMP.cpp
+++ b/samples/source/ReadingXMP.cpp
@@ -12,7 +12,10 @@
* and examining it through the XMP object.
*/
+#include <cstdio>
+#include <vector>
#include <string>
+#include <cstring>
// Must be defined to instantiate template classes
#define TXMP_STRING_TYPE std::string
diff --git a/samples/source/UnicodeCorrectness.cpp b/samples/source/UnicodeCorrectness.cpp
new file mode 100644
index 0000000..56f121a
--- /dev/null
+++ b/samples/source/UnicodeCorrectness.cpp
@@ -0,0 +1,2813 @@
+// =================================================================================================
+
+#include <cstdio>
+#include <vector>
+#include <string>
+#include <cstring>
+#include <ctime>
+
+#include <cstdlib>
+#include <cerrno>
+#include <stdexcept>
+
+using namespace std;
+
+#if WIN_ENV
+ #pragma warning ( disable : 4701 ) // local variable may be used without having been initialized
+#endif
+
+// =================================================================================================
+
+#include "public/include/XMP_Environment.h"
+#include "public/include/XMP_Const.h"
+
+#include "source/EndianUtils.hpp"
+#include "source/UnicodeConversions.hpp"
+#include "source/UnicodeConversions.cpp"
+
+// =================================================================================================
+
+#define kCodePointCount 0x110000
+
+UTF8Unit sU8 [kCodePointCount*4 + 8];
+UTF16Unit sU16 [kCodePointCount*2 + 4];
+UTF32Unit sU32 [kCodePointCount + 2];
+
+// =================================================================================================
+
+static UTF16Unit NativeUTF16BE ( UTF16Unit value )
+{
+ if ( ! kBigEndianHost ) SwapUTF16 ( &value, &value, 1 );
+ return value;
+}
+
+static UTF16Unit NativeUTF16LE ( UTF16Unit value )
+{
+ if ( kBigEndianHost ) SwapUTF16 ( &value, &value, 1 );
+ return value;
+}
+
+static UTF32Unit NativeUTF32BE ( UTF32Unit value )
+{
+ if ( ! kBigEndianHost ) SwapUTF32 ( &value, &value, 1 );
+ return value;
+}
+
+static UTF32Unit NativeUTF32LE ( UTF32Unit value )
+{
+ if ( kBigEndianHost ) SwapUTF32 ( &value, &value, 1 );
+ return value;
+}
+
+// =================================================================================================
+
+static void Bad_CodePoint_to_UTF8 ( FILE * log, UTF32Unit cp )
+{
+ UTF8Unit u8[8];
+ size_t len;
+
+ try {
+ CodePoint_to_UTF8 ( cp, u8, sizeof(u8), &len );
+ fprintf ( log, " *** CodePoint_to_UTF8 failure, no exception for 0x%X\n", cp );
+ } catch ( ... ) {
+ // Do nothing, the exception is expected.
+ }
+
+}
+
+// =================================================================================================
+
+static void Bad_CodePoint_to_UTF16BE ( FILE * log, UTF32Unit cp )
+{
+ UTF16Unit u16[4];
+ size_t len;
+
+ try {
+ CodePoint_to_UTF16BE ( cp, u16, sizeof(u16), &len );
+ fprintf ( log, " *** CodePoint_to_UTF16BE failure, no exception for 0x%X\n", cp );
+ } catch ( ... ) {
+ // Do nothing, the exception is expected.
+ }
+
+}
+
+// =================================================================================================
+
+static void Bad_CodePoint_to_UTF16LE ( FILE * log, UTF32Unit cp )
+{
+ UTF16Unit u16[4];
+ size_t len;
+
+ try {
+ CodePoint_to_UTF16LE ( cp, u16, sizeof(u16), &len );
+ fprintf ( log, " *** CodePoint_to_UTF16LE failure, no exception for 0x%X\n", cp );
+ } catch ( ... ) {
+ // Do nothing, the exception is expected.
+ }
+
+}
+
+// =================================================================================================
+
+static void Bad_CodePoint_from_UTF8 ( FILE * log, const char * inU8, const char * message )
+{
+ UTF32Unit cp;
+ size_t len;
+
+ try {
+ CodePoint_from_UTF8 ( (UTF8Unit*)inU8, strlen(inU8), &cp, &len );
+ fprintf ( log, " *** CodePoint_from_UTF8 failure, no exception for %s\n", message );
+ } catch ( ... ) {
+ // Do nothing, the exception is expected.
+ }
+
+}
+
+// =================================================================================================
+
+static void Bad_CodePoint_from_UTF16BE ( FILE * log, const UTF16Unit * inU16, const size_t inLen, const char * message )
+{
+ UTF32Unit cp;
+ size_t outLen;
+
+ try {
+ CodePoint_from_UTF16BE ( inU16, inLen, &cp, &outLen );
+ fprintf ( log, " *** CodePoint_from_UTF16BE failure, no exception for %s\n", message );
+ } catch ( ... ) {
+ // Do nothing, the exception is expected.
+ }
+
+}
+
+// =================================================================================================
+
+static void Bad_CodePoint_from_UTF16LE ( FILE * log, const UTF16Unit * inU16, const size_t inLen, const char * message )
+{
+ UTF32Unit cp;
+ size_t outLen;
+
+ try {
+ CodePoint_from_UTF16LE ( inU16, inLen, &cp, &outLen );
+ fprintf ( log, " *** CodePoint_from_UTF16LE failure, no exception for %s\n", message );
+ } catch ( ... ) {
+ // Do nothing, the exception is expected.
+ }
+
+}
+
+// =================================================================================================
+
+static void Test_SwappingPrimitives ( FILE * log )
+{
+ UTF16Unit u16[8];
+ UTF32Unit u32[8];
+ UTF32Unit i;
+
+ fprintf ( log, "\nTesting byte swapping primitives\n" );
+
+ u16[0] = 0x1122;
+ if ( UTF16InSwap(&u16[0]) == 0x2211 ) printf ( " UTF16InSwap OK\n" );
+
+ u32[0] = 0x11223344;
+ if ( UTF32InSwap(&u32[0]) == 0x44332211 ) printf ( " UTF32InSwap OK\n" );
+
+ UTF16OutSwap ( &u16[0], 0x1122 );
+ if ( u16[0] == 0x2211 ) printf ( " UTF16OutSwap OK\n" );
+
+ UTF32OutSwap ( &u32[0], 0x11223344 );
+ if ( u32[0] == 0x44332211 ) printf ( " UTF32OutSwap OK\n" );
+
+ for ( i = 0; i < 8; ++i ) u16[i] = 0x1100 | UTF16Unit(i);
+ SwapUTF16 ( u16, u16, 8 );
+ for ( i = 0; i < 8; ++i ) {
+ if ( u16[i] != ((UTF16Unit(i) << 8) | 0x11) ) break;
+ }
+ if ( i == 8 ) printf ( " SwapUTF16 OK\n" );
+
+ for ( i = 0; i < 8; ++i ) u32[i] = 0x11223300 | i;
+ SwapUTF32 ( u32, u32, 8 );
+ for ( i = 0; i < 8; ++i ) {
+ if ( u32[i] != ((i << 24) | 0x00332211) ) break;
+ }
+ if ( i == 8 ) printf ( " SwapUTF32 OK\n" );
+
+} // Test_SwappingPrimitives
+
+// =================================================================================================
+
+static void Test_CodePoint_to_UTF8 ( FILE * log )
+{
+ size_t len, lenx;
+ UTF32Unit cp, cp0, cpx;
+ UTF8Unit u8[8];
+
+ // -------------------------------------
+ // Test CodePoint_to_UTF8 on good input.
+
+ fprintf ( log, "\nTesting CodePoint_to_UTF8 on good input\n" );
+
+ // Test ASCII, 00..7F.
+ cp0 = 0;
+ for ( cp = cp0; cp < 0x80; ++cp ) {
+ CodePoint_to_UTF8 ( cp, u8, 0, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 1, &len );
+ CodePoint_from_UTF8 ( u8, len, &cpx, &lenx );
+ if ( (len != 1) || (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 2, &len );
+ CodePoint_from_UTF8 ( u8, len, &cpx, &lenx );
+ if ( (len != 1) || (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ fprintf ( log, " CodePoint_to_UTF8 done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test 2 byte values, 0080..07FF : 110x xxxx 10xx xxxx
+ cp0 = cpx+1;
+ for ( cp = cp0; cp < 0x800; ++cp ) {
+ CodePoint_to_UTF8 ( cp, u8, 0, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 1, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 2, &len );
+ CodePoint_from_UTF8 ( u8, len, &cpx, &lenx );
+ if ( (len != 2) || (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 3, &len );
+ CodePoint_from_UTF8 ( u8, len, &cpx, &lenx );
+ if ( (len != 2) || (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ fprintf ( log, " CodePoint_to_UTF8 done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test 3 byte values, 0800..D7FF : 1110 xxxx 10xx xxxx 10xx xxxx
+ cp0 = cpx+1;
+ for ( cp = cp0; cp < 0xD800; ++cp ) {
+ CodePoint_to_UTF8 ( cp, u8, 0, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 1, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 2, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 3, &len );
+ CodePoint_from_UTF8 ( u8, len, &cpx, &lenx );
+ if ( (len != 3) || (cp != cpx) || (lenx != 3) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 4, &len );
+ CodePoint_from_UTF8 ( u8, len, &cpx, &lenx );
+ if ( (len != 3) || (cp != cpx) || (lenx != 3) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ fprintf ( log, " CodePoint_to_UTF8 done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test 3 byte values, E000..FFFF : 1110 xxxx 10xx xxxx 10xx xxxx
+ cp0 = 0xE000;
+ for ( cp = cp0; cp < 0x10000; ++cp ) {
+ CodePoint_to_UTF8 ( cp, u8, 0, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 1, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 2, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 3, &len );
+ CodePoint_from_UTF8 ( u8, len, &cpx, &lenx );
+ if ( (len != 3) || (cp != cpx) || (lenx != 3) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 4, &len );
+ CodePoint_from_UTF8 ( u8, len, &cpx, &lenx );
+ if ( (len != 3) || (cp != cpx) || (lenx != 3) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ fprintf ( log, " CodePoint_to_UTF8 done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test 4 byte values, 10000..10FFFF : 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
+ cp0 = cpx+1;
+ for ( cp = cp0; cp < 0x110000; ++cp ) {
+ CodePoint_to_UTF8 ( cp, u8, 0, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 1, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 2, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 3, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 4, &len );
+ CodePoint_from_UTF8 ( u8, len, &cpx, &lenx );
+ if ( (len != 4) || (cp != cpx) || (lenx != 4) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ CodePoint_to_UTF8 ( cp, u8, 5, &len );
+ CodePoint_from_UTF8 ( u8, len, &cpx, &lenx );
+ if ( (len != 4) || (cp != cpx) || (lenx != 4) ) fprintf ( log, " *** CodePoint_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ fprintf ( log, " CodePoint_to_UTF8 done for %.4X..%.4X\n", cp0, cpx );
+
+ // --------------------------------------
+ // Test CodePoint_to_UTF8 with bad input.
+
+ fprintf ( log, "\nTesting CodePoint_to_UTF8 with bad input\n" );
+
+ Bad_CodePoint_to_UTF8 ( log, 0x110000 ); // Code points beyond the defined range.
+ Bad_CodePoint_to_UTF8 ( log, 0x123456 );
+ Bad_CodePoint_to_UTF8 ( log, 0xFFFFFFFF );
+ Bad_CodePoint_to_UTF8 ( log, 0xD800 ); // Surrogate code points.
+ Bad_CodePoint_to_UTF8 ( log, 0xDC00 );
+ Bad_CodePoint_to_UTF8 ( log, 0xDFFF );
+
+ fprintf ( log, " CodePoint_to_UTF8 done with bad input\n" );
+
+} // Test_CodePoint_to_UTF8
+
+// =================================================================================================
+
+static void Test_CodePoint_from_UTF8 ( FILE * log )
+{
+ UTF32Unit i, j, k, l;
+ size_t len;
+ UTF32Unit cp, cp0, cpx;
+ UTF8Unit u8[5];
+
+ // ---------------------------------------
+ // Test CodePoint_from_UTF8 on good input.
+
+ fprintf ( log, "\nTesting CodePoint_from_UTF8 on good input\n" );
+
+ // Test ASCII, 00..7F.
+ cp0 = 0;
+ for ( i = 0; i < 0x80; ++i ) {
+ u8[0] = UTF8Unit(i); u8[1] = 0xFF; cpx = i;
+ CodePoint_from_UTF8 ( u8, 0, &cp, &len );
+ if ( len != 0 ) fprintf ( log, "CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 1, &cp, &len );
+ if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 2, &cp, &len );
+ if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ }
+ fprintf ( log, " CodePoint_from_UTF8 done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test 2 byte values, 0080..07FF : 110x xxxx 10xx xxxx
+ cp0 = cpx+1;
+ for ( i = 0; i < 0x20; ++i ) {
+ for ( j = 0; j < 0x40; ++j ) {
+ cpx = (i<<6) + j; if ( cpx < cp0 ) continue;
+ u8[0] = 0xC0+UTF8Unit(i); u8[1] = 0x80+UTF8Unit(j); u8[2] = 0xFF;
+ CodePoint_from_UTF8 ( u8, 0, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 1, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 2, &cp, &len );
+ if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 3, &cp, &len );
+ if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ }
+ }
+ fprintf ( log, " CodePoint_from_UTF8 done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test 3 byte values, 0800..D7FF : 1110 xxxx 10xx xxxx 10xx xxxx
+ cp0 = cpx+1;
+ for ( i = 0; i < 0x10; ++i ) {
+ for ( j = 0; j < 0x40; ++j ) {
+ for ( k = 0; k < 0x40; ++k ) {
+ cpx = (i<<12) + (j<<6) + k; if ( cpx < cp0 ) continue;
+ u8[0] = 0xE0+UTF8Unit(i); u8[1] = 0x80+UTF8Unit(j); u8[2] = 0x80+UTF8Unit(k); u8[3] = 0xFF;
+ CodePoint_from_UTF8 ( u8, 0, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 1, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 2, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 3, &cp, &len );
+ if ( (cp != cpx) || (len != 3) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 4, &cp, &len );
+ if ( (cp != cpx) || (len != 3) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ }
+ if ( cpx == 0xD7FF ) break;
+ }
+ if ( cpx == 0xD7FF ) break;
+ }
+ fprintf ( log, " CodePoint_from_UTF8 done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test 3 byte values, E000..FFFF : 1110 xxxx 10xx xxxx 10xx xxxx
+ cp0 = 0xE000;
+ for ( i = 0; i < 0x10; ++i ) {
+ for ( j = 0; j < 0x40; ++j ) {
+ for ( k = 0; k < 0x40; ++k ) {
+ cpx = (i<<12) + (j<<6) + k; if ( cpx < cp0 ) continue;
+ u8[0] = 0xE0+UTF8Unit(i); u8[1] = 0x80+UTF8Unit(j); u8[2] = 0x80+UTF8Unit(k); u8[3] = 0xFF;
+ CodePoint_from_UTF8 ( u8, 0, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 1, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 2, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 3, &cp, &len );
+ if ( (cp != cpx) || (len != 3) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 4, &cp, &len );
+ if ( (cp != cpx) || (len != 3) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ }
+ }
+ }
+ fprintf ( log, " CodePoint_from_UTF8 done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test 4 byte values, 10000..10FFFF : 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
+ cp0 = cpx+1;
+ for ( i = 0; i < 0x7; ++i ) {
+ for ( j = 0; j < 0x40; ++j ) {
+ for ( k = 0; k < 0x40; ++k ) {
+ for ( l = 0; l < 0x40; ++l ) {
+ cpx = (i<<18) + (j<<12) + (k<<6) + l; if ( cpx < cp0 ) continue;
+ u8[0] = 0xF0+UTF8Unit(i); u8[1] = 0x80+UTF8Unit(j); u8[2] = 0x80+UTF8Unit(k); u8[3] = 0x80+UTF8Unit(l); u8[4] = 0xFF;
+ CodePoint_from_UTF8 ( u8, 0, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 1, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 2, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 3, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 4, &cp, &len );
+ if ( (cp != cpx) || (len != 4) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF8 ( u8, 5, &cp, &len );
+ if ( (cp != cpx) || (len != 4) ) fprintf ( log, " *** CodePoint_from_UTF8 failure for U+%.4X\n", cpx );
+ }
+ if ( cpx == 0x10FFFF ) break;
+ }
+ if ( cpx == 0x10FFFF ) break;
+ }
+ if ( cpx == 0x10FFFF ) break;
+ }
+ fprintf ( log, " CodePoint_from_UTF8 done for %.4X..%.4X\n", cp0, cpx );
+
+ // ----------------------------------------
+ // Test CodePoint_from_UTF8 with bad input.
+
+ fprintf ( log, "\nTesting CodePoint_from_UTF8 with bad input\n" );
+
+ Bad_CodePoint_from_UTF8 ( log, "\x88\x20", "bad leading byte count" ); // One byte "sequence".
+ Bad_CodePoint_from_UTF8 ( log, "\xF9\x90\x80\x80\x80\x20", "bad leading byte count" ); // Five byte sequence.
+ Bad_CodePoint_from_UTF8 ( log, "\xFE\x90\x80\x80\x80\x80\x80\x20", "bad leading byte count" ); // Seven byte sequence.
+ Bad_CodePoint_from_UTF8 ( log, "\xFF\x90\x80\x80\x80\x80\x80\x80\x20", "bad leading byte count" ); // Eight byte sequence.
+
+ Bad_CodePoint_from_UTF8 ( log, "\xF1\x80\x01\x80\x20", "bad following high bits" ); // 00xx xxxx
+ Bad_CodePoint_from_UTF8 ( log, "\xF1\x80\x40\x80\x20", "bad following high bits" ); // 01xx xxxx
+ Bad_CodePoint_from_UTF8 ( log, "\xF1\x80\xC0\x80\x20", "bad following high bits" ); // 11xx xxxx
+
+ Bad_CodePoint_from_UTF8 ( log, "\xF4\x90\x80\x80\x20", "out of range code point" ); // U+110000
+ Bad_CodePoint_from_UTF8 ( log, "\xF7\xBF\xBF\xBF\x20", "out of range code point" ); // U+1FFFFF
+
+ Bad_CodePoint_from_UTF8 ( log, "\xED\xA0\x80\x20", "surrogate code point" ); // U+D800
+ Bad_CodePoint_from_UTF8 ( log, "\xED\xB0\x80\x20", "surrogate code point" ); // U+DC00
+ Bad_CodePoint_from_UTF8 ( log, "\xED\xBF\xBF\x20", "surrogate code point" ); // U+DFFF
+
+ fprintf ( log, " CodePoint_from_UTF8 done with bad input\n" );
+
+} // Test_CodePoint_from_UTF8
+
+// =================================================================================================
+
+static void Test_CodePoint_to_UTF16 ( FILE * log )
+{
+ size_t len, lenx;
+ UTF32Unit cp, cp0, cpx;
+ UTF16Unit u16[3];
+
+ // ----------------------------------------
+ // Test CodePoint_to_UTF16BE on good input.
+
+ fprintf ( log, "\nTesting CodePoint_to_UTF16BE on good input\n" );
+
+ // Some explicit sanity tests, in case the code and exhaustive tests have inverse bugs.
+ if ( kBigEndianHost ) {
+ CodePoint_to_UTF16BE ( 0x1234, u16, 1, &len );
+ if ( (len != 1) || (u16[0] != 0x1234) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+1234\n" );
+ CodePoint_to_UTF16BE ( 0xFEDC, u16, 1, &len );
+ if ( (len != 1) || (u16[0] != 0xFEDC) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+FEDC\n" );
+ CodePoint_to_UTF16BE ( 0x14834, u16, 2, &len );
+ if ( (len != 2) || (u16[0] != 0xD812) || (u16[1] != 0xDC34) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+14834\n" );
+ } else {
+ CodePoint_to_UTF16BE ( 0x1234, u16, 1, &len );
+ if ( (len != 1) || (u16[0] != 0x3412) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+1234\n" );
+ CodePoint_to_UTF16BE ( 0xFEDC, u16, 1, &len );
+ if ( (len != 1) || (u16[0] != 0xDCFE) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+FEDC\n" );
+ CodePoint_to_UTF16BE ( 0x14834, u16, 2, &len );
+ if ( (len != 2) || (u16[0] != 0x12D8) || (u16[1] != 0x34DC) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+14834\n" );
+ }
+ fprintf ( log, " CodePoint_to_UTF16BE sanity tests done\n" );
+
+ // Test the low part of the BMP, 0000..D7FF.
+ cp0 = 0;
+ for ( cp = cp0; cp < 0xD800; ++cp ) {
+ CodePoint_to_UTF16BE ( cp, u16, 0, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16BE ( cp, u16, 1, &len );
+ if ( (len != 1) || (NativeUTF16BE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16BE ( cp, u16, 2, &len );
+ if ( (len != 1) || (NativeUTF16BE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ fprintf ( log, " CodePoint_to_UTF16BE done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test the high part of the BMP, E000..FFFF.
+ cp0 = 0xE000;
+ for ( cp = cp0; cp < 0x10000; ++cp ) {
+ CodePoint_to_UTF16BE ( cp, u16, 0, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16BE ( cp, u16, 1, &len );
+ if ( (len != 1) || (NativeUTF16BE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16BE ( cp, u16, 2, &len );
+ if ( (len != 1) || (NativeUTF16BE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ fprintf ( log, " CodePoint_to_UTF16BE done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test beyond the BMP, 10000..10FFFF.
+ cp0 = 0x10000;
+ for ( cp = cp0; cp < 0x110000; ++cp ) {
+ CodePoint_to_UTF16BE ( cp, u16, 0, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16BE ( cp, u16, 1, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16BE ( cp, u16, 2, &len );
+ if ( (len != 2) ||
+ (NativeUTF16BE(u16[0]) != (0xD800 | ((cp-0x10000) >> 10))) ||
+ (NativeUTF16BE(u16[1]) != (0xDC00 | ((cp-0x10000) & 0x3FF))) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16BE ( cp, u16, 3, &len );
+ if ( (len != 2) ||
+ (NativeUTF16BE(u16[0]) != (0xD800 | ((cp-0x10000) >> 10))) ||
+ (NativeUTF16BE(u16[1]) != (0xDC00 | ((cp-0x10000) & 0x3FF))) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16BE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ fprintf ( log, " CodePoint_to_UTF16BE done for %.4X..%.4X\n", cp0, cpx );
+
+ // ----------------------------------------
+ // Test CodePoint_to_UTF16LE on good input.
+
+ fprintf ( log, "\nTesting CodePoint_to_UTF16LE on good input\n" );
+
+ // Some explicit sanity tests, in case the code and exhaustive tests have inverse bugs.
+ if ( kBigEndianHost ) {
+ CodePoint_to_UTF16LE ( 0x1234, u16, 1, &len );
+ if ( (len != 1) || (u16[0] != 0x3412) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+1234\n" );
+ CodePoint_to_UTF16LE ( 0xFEDC, u16, 1, &len );
+ if ( (len != 1) || (u16[0] != 0xDCFE) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+FEDC\n" );
+ CodePoint_to_UTF16LE ( 0x14834, u16, 2, &len );
+ if ( (len != 2) || (u16[0] != 0x12D8) || (u16[1] != 0x34DC) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+14834\n" );
+ } else {
+ CodePoint_to_UTF16LE ( 0x1234, u16, 1, &len );
+ if ( (len != 1) || (u16[0] != 0x1234) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+1234\n" );
+ CodePoint_to_UTF16LE ( 0xFEDC, u16, 1, &len );
+ if ( (len != 1) || (u16[0] != 0xFEDC) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+FEDC\n" );
+ CodePoint_to_UTF16LE ( 0x14834, u16, 2, &len );
+ if ( (len != 2) || (u16[0] != 0xD812) || (u16[1] != 0xDC34) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+14834\n" );
+ }
+ fprintf ( log, " CodePoint_to_UTF16LE sanity tests done\n" );
+
+ // Test the low part of the BMP, 0000..D7FF.
+ cp0 = 0;
+ for ( cp = cp0; cp < 0xD800; ++cp ) {
+ CodePoint_to_UTF16LE ( cp, u16, 0, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16LE ( cp, u16, 1, &len );
+ if ( (len != 1) || (NativeUTF16LE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16LE ( cp, u16, 2, &len );
+ if ( (len != 1) || (NativeUTF16LE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ fprintf ( log, " CodePoint_to_UTF16LE done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test the high part of the BMP, E000..FFFF.
+ cp0 = 0xE000;
+ for ( cp = cp0; cp < 0x10000; ++cp ) {
+ CodePoint_to_UTF16LE ( cp, u16, 0, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16LE ( cp, u16, 1, &len );
+ if ( (len != 1) || (NativeUTF16LE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16LE ( cp, u16, 2, &len );
+ if ( (len != 1) || (NativeUTF16LE(u16[0]) != cp) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 1) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ fprintf ( log, " CodePoint_to_UTF16LE done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test beyond the BMP, 10000..10FFFF.
+ cp0 = 0x10000;
+ for ( cp = cp0; cp < 0x110000; ++cp ) {
+ CodePoint_to_UTF16LE ( cp, u16, 0, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16LE ( cp, u16, 1, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16LE ( cp, u16, 2, &len );
+ if ( (len != 2) ||
+ (NativeUTF16LE(u16[0]) != (0xD800 | ((cp-0x10000) >> 10))) ||
+ (NativeUTF16LE(u16[1]) != (0xDC00 | ((cp-0x10000) & 0x3FF))) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_to_UTF16LE ( cp, u16, 3, &len );
+ if ( (len != 2) ||
+ (NativeUTF16LE(u16[0]) != (0xD800 | ((cp-0x10000) >> 10))) ||
+ (NativeUTF16LE(u16[1]) != (0xDC00 | ((cp-0x10000) & 0x3FF))) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ CodePoint_from_UTF16LE ( u16, len, &cpx, &lenx );
+ if ( (cp != cpx) || (lenx != 2) ) fprintf ( log, " *** CodePoint_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ fprintf ( log, " CodePoint_to_UTF16LE done for %.4X..%.4X\n", cp0, cpx );
+
+ // ---------------------------------------
+ // Test CodePoint_to_UTF16 with bad input.
+
+ fprintf ( log, "\nTesting CodePoint_to_UTF16 with bad input\n" );
+
+ Bad_CodePoint_to_UTF16BE ( log, 0x110000 ); // Code points beyond the defined range.
+ Bad_CodePoint_to_UTF16BE ( log, 0x123456 );
+ Bad_CodePoint_to_UTF16BE ( log, 0xFFFFFFFF );
+ Bad_CodePoint_to_UTF16BE ( log, 0xD800 ); // Surrogate code points.
+ Bad_CodePoint_to_UTF16BE ( log, 0xDC00 );
+ Bad_CodePoint_to_UTF16BE ( log, 0xDFFF );
+
+ fprintf ( log, " CodePoint_to_UTF16BE done with bad input\n" );
+
+ Bad_CodePoint_to_UTF16LE ( log, 0x110000 ); // Code points beyond the defined range.
+ Bad_CodePoint_to_UTF16LE ( log, 0x123456 );
+ Bad_CodePoint_to_UTF16LE ( log, 0xFFFFFFFF );
+ Bad_CodePoint_to_UTF16LE ( log, 0xD800 ); // Surrogate code points.
+ Bad_CodePoint_to_UTF16LE ( log, 0xDC00 );
+ Bad_CodePoint_to_UTF16LE ( log, 0xDFFF );
+
+ fprintf ( log, " CodePoint_to_UTF16LE done with bad input\n" );
+
+} // Test_CodePoint_to_UTF16
+
+// =================================================================================================
+
+static void Test_CodePoint_from_UTF16 ( FILE * log )
+{
+ UTF32Unit i, j;
+ size_t len;
+ UTF32Unit cp, cp0, cpx;
+ UTF16Unit u16[3];
+
+ // ------------------------------------------
+ // Test CodePoint_from_UTF16BE on good input.
+
+ fprintf ( log, "\nTesting CodePoint_from_UTF16BE on good input\n" );
+
+ // Some explicit sanity tests, in case the code and exhaustive tests have inverse bugs.
+ if ( kBigEndianHost ) {
+ u16[0] = 0x1234;
+ CodePoint_from_UTF16BE ( u16, 1, &cp, &len );
+ if ( (len != 1) || (cp != 0x1234) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+1234\n" );
+ u16[0] = 0xFEDC;
+ CodePoint_from_UTF16BE ( u16, 1, &cp, &len );
+ if ( (len != 1) || (cp != 0xFEDC) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+FEDC\n" );
+ u16[0] = 0xD812; u16[1] = 0xDC34;
+ CodePoint_from_UTF16BE ( u16, 2, &cp, &len );
+ if ( (len != 2) || (cp != 0x14834) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+14834\n" );
+ } else {
+ u16[0] = 0x3412;
+ CodePoint_from_UTF16BE ( u16, 1, &cp, &len );
+ if ( (len != 1) || (cp != 0x1234) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+1234\n" );
+ u16[0] = 0xDCFE;
+ CodePoint_from_UTF16BE ( u16, 1, &cp, &len );
+ if ( (len != 1) || (cp != 0xFEDC) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+FEDC\n" );
+ u16[0] = 0x12D8; u16[1] = 0x34DC;
+ CodePoint_from_UTF16BE ( u16, 2, &cp, &len );
+ if ( (len != 2) || (cp != 0x14834) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+14834\n" );
+ }
+ fprintf ( log, " CodePoint_from_UTF16BE sanity tests done\n" );
+
+ // Test the low part of the BMP, 0000..D7FF.
+ cp0 = 0;
+ for ( i = 0; i < 0xD800; ++i ) {
+ u16[0] = NativeUTF16BE(UTF16Unit(i)); u16[1] = 0xFFFF; cpx = i;
+ CodePoint_from_UTF16BE ( u16, 0, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16BE ( u16, 1, &cp, &len );
+ if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16BE ( u16, 2, &cp, &len );
+ if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx );
+ }
+ fprintf ( log, " CodePoint_from_UTF16BE done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test the high part of the BMP, E000..FFFF.
+ cp0 = 0xE000;
+ for ( i = cp0; i < 0x10000; ++i ) {
+ u16[0] = NativeUTF16BE(UTF16Unit(i)); u16[1] = 0xFFFF; cpx = i;
+ CodePoint_from_UTF16BE ( u16, 0, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16BE ( u16, 1, &cp, &len );
+ if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16BE ( u16, 2, &cp, &len );
+ if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx );
+ }
+ fprintf ( log, " CodePoint_from_UTF16BE done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test beyond the BMP, 10000..10FFFF.
+ cp0 = 0x10000;
+ for ( i = 0; i < 0x400; ++i ) {
+ for ( j = 0; j < 0x400; ++j ) {
+ cpx = (i<<10) + j + cp0;
+ u16[0] = NativeUTF16BE(0xD800+UTF16Unit(i)); u16[1] = NativeUTF16BE(0xDC00+UTF16Unit(j)); u16[2] = 0xFFFF;
+ CodePoint_from_UTF16BE ( u16, 0, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16BE ( u16, 1, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16BE ( u16, 2, &cp, &len );
+ if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16BE ( u16, 3, &cp, &len );
+ if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF16BE failure for U+%.4X\n", cpx );
+ }
+ }
+ fprintf ( log, " CodePoint_from_UTF16BE done for %.4X..%.4X\n", cp0, cpx );
+
+ // ------------------------------------------
+ // Test CodePoint_from_UTF16LE on good input.
+
+ fprintf ( log, "\nTesting CodePoint_from_UTF16LE on good input\n" );
+
+ // Some explicit sanity tests, in case the code and exhaustive tests have inverse bugs.
+ if ( kBigEndianHost ) {
+ u16[0] = 0x3412;
+ CodePoint_from_UTF16LE ( u16, 1, &cp, &len );
+ if ( (len != 1) || (cp != 0x1234) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+1234\n" );
+ u16[0] = 0xDCFE;
+ CodePoint_from_UTF16LE ( u16, 1, &cp, &len );
+ if ( (len != 1) || (cp != 0xFEDC) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+FEDC\n" );
+ u16[0] = 0x12D8; u16[1] = 0x34DC;
+ CodePoint_from_UTF16LE ( u16, 2, &cp, &len );
+ if ( (len != 2) || (cp != 0x14834) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+14834\n" );
+ } else {
+ u16[0] = 0x1234;
+ CodePoint_from_UTF16LE ( u16, 1, &cp, &len );
+ if ( (len != 1) || (cp != 0x1234) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+1234\n" );
+ u16[0] = 0xFEDC;
+ CodePoint_from_UTF16LE ( u16, 1, &cp, &len );
+ if ( (len != 1) || (cp != 0xFEDC) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+FEDC\n" );
+ u16[0] = 0xD812; u16[1] = 0xDC34;
+ CodePoint_from_UTF16LE ( u16, 2, &cp, &len );
+ if ( (len != 2) || (cp != 0x14834) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+14834\n" );
+ }
+ fprintf ( log, " CodePoint_from_UTF16LE sanity tests done\n" );
+
+ // Test the low part of the BMP, 0000..D7FF.
+ cp0 = 0;
+ for ( i = 0; i < 0xD800; ++i ) {
+ u16[0] = NativeUTF16LE(UTF16Unit(i)); u16[1] = 0xFFFF; cpx = i;
+ CodePoint_from_UTF16LE ( u16, 0, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16LE ( u16, 1, &cp, &len );
+ if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16LE ( u16, 2, &cp, &len );
+ if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx );
+ }
+ fprintf ( log, " CodePoint_from_UTF16LE done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test the high part of the BMP, E000..FFFF.
+ cp0 = 0xE000;
+ for ( i = cp0; i < 0x10000; ++i ) {
+ u16[0] = NativeUTF16LE(UTF16Unit(i)); u16[1] = 0xFFFF; cpx = i;
+ CodePoint_from_UTF16LE ( u16, 0, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16LE ( u16, 1, &cp, &len );
+ if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16LE ( u16, 2, &cp, &len );
+ if ( (cp != cpx) || (len != 1) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx );
+ }
+ fprintf ( log, " CodePoint_from_UTF16LE done for %.4X..%.4X\n", cp0, cpx );
+
+ // Test beyond the BMP, 10000..10FFFF.
+ cp0 = 0x10000;
+ for ( i = 0; i < 0x400; ++i ) {
+ for ( j = 0; j < 0x400; ++j ) {
+ cpx = (i<<10) + j + cp0;
+ u16[0] = NativeUTF16LE(0xD800+UTF16Unit(i)); u16[1] = NativeUTF16LE(0xDC00+UTF16Unit(j)); u16[2] = 0xFFFF;
+ CodePoint_from_UTF16LE ( u16, 0, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16LE ( u16, 1, &cp, &len );
+ if ( len != 0 ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16LE ( u16, 2, &cp, &len );
+ if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx );
+ CodePoint_from_UTF16LE ( u16, 3, &cp, &len );
+ if ( (cp != cpx) || (len != 2) ) fprintf ( log, " *** CodePoint_from_UTF16LE failure for U+%.4X\n", cpx );
+ }
+ }
+ fprintf ( log, " CodePoint_from_UTF16LE done for %.4X..%.4X\n", cp0, cpx );
+
+ // ---------------------------------------------------------------
+ // Test CodePoint_from_UTF16 with bad input. U+12345 is D808 DF45.
+
+ fprintf ( log, "\nTesting CodePoint_from_UTF16 with bad input\n" );
+
+ memcpy ( sU16, "\xD8\x08\x00\x20\x00\x00", 6 ); // ! HPPA (maybe others) won't tolerate misaligned loads.
+ Bad_CodePoint_from_UTF16BE ( log, sU16, 3, "missing low surrogate" );
+ memcpy ( sU16, "\xDF\x45\x00\x20\x00\x00", 6 );
+ Bad_CodePoint_from_UTF16BE ( log, sU16, 3, "leading low surrogate" );
+ memcpy ( sU16, "\xD8\x08\xD8\x08\x00\x20\x00\x00", 8 );
+ Bad_CodePoint_from_UTF16BE ( log, sU16, 4, "double high surrogate" );
+
+ fprintf ( log, " CodePoint_from_UTF16BE done with bad input\n" );
+
+ memcpy ( sU16, "\x08\xD8\x20\x00\x00\x00", 6 );
+ Bad_CodePoint_from_UTF16LE ( log, sU16, 3, "missing low surrogate" );
+ memcpy ( sU16, "\x45\xDF\x20\x00\x00\x00", 6 );
+ Bad_CodePoint_from_UTF16LE ( log, sU16, 3, "leading low surrogate" );
+ memcpy ( sU16, "\x08\xD8\x08\xD8\x20\x00\x00\x00", 8 );
+ Bad_CodePoint_from_UTF16LE ( log, sU16, 4, "double high surrogate" );
+
+ fprintf ( log, " CodePoint_from_UTF16LE done with bad input\n" );
+
+} // Test_CodePoint_from_UTF16
+
+// =================================================================================================
+
+static void Test_UTF8_to_UTF16 ( FILE * log )
+{
+ size_t i;
+ size_t len8, len16, len8x, len16x;
+ UTF32Unit cp, cpx, cpLo, cpHi;
+
+ // ---------------------------------------------------------------------------------------
+ // Test UTF8_to_UTF16BE on good input. The CodePoint to/from functions are already tested,
+ // use them to verify the results here.
+
+ fprintf ( log, "\nTesting UTF8_to_UTF16BE on good input\n" );
+
+ // Test ASCII.
+
+ cpLo = 0; cpHi = 0x80; len8 = len16 = 0x80;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU8[i] = UTF8Unit(cp);
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF16BE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16BE length failure, %d -> %d\n", len8x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF8_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test non-ASCII inside the BMP, below the surrogates.
+
+ cpLo = 0x80; cpHi = 0xD800; len16 = cpHi-cpLo;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 != (2*(0x800-cpLo) + 3*(cpHi-0x800)) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF16BE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16BE length failure, %d -> %d\n", len8x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF8_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len16 = cpHi-cpLo;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 != 3*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF16BE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16BE length failure, %d -> %d\n", len8x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF8_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len16 = (cpHi-cpLo)*2;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 != 4*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF16BE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16BE length failure, %d -> %d\n", len8x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF8_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating ASCII, non-ASCII BMP, beyond BMP.
+
+ len16 = 0x80*(1+1+1+2);
+ for ( i = 0, len8 = 0; i < 0x80; ++i ) {
+ CodePoint_to_UTF8 ( i, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x100, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x1000, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x10000, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ }
+ if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF16BE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16BE length failure, %d -> %d\n", len8x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, len16 = 0; i < 0x80; ++i ) {
+ CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", i );
+ len16 += len16x;
+ CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != i+0x100) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", i+0x100 );
+ len16 += len16x;
+ CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", i+0x1000 );
+ len16 += len16x;
+ CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF8_to_UTF16BE failure for U+%.4X\n", i+0x10000 );
+ len16 += len16x;
+ }
+ if ( len16 != 0x80*(1+1+1+2) ) fprintf ( log, " *** UTF8_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF8_to_UTF16BE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len8 = 0x80*(1+2+3+4); len16 = 0x80*(1+1+1+2);
+
+ UTF8_to_UTF16BE ( sU8, 0, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF8_to_UTF16BE empty input failure, %d -> %d\n", len8x, len16x );
+ UTF8_to_UTF16BE ( sU8, len8, sU16, 0, &len8x, &len16x );
+ if ( (len8x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF8_to_UTF16BE empty output failure, %d -> %d\n", len8x, len16x );
+ UTF8_to_UTF16BE ( sU8, 8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8x != 6) || (len16x != 3) ) fprintf ( log, " *** UTF8_to_UTF16BE partial input failure, %d -> %d\n", len8x, len16x );
+ UTF8_to_UTF16BE ( sU8, len8, sU16, 4, &len8x, &len16x );
+ if ( (len8x != 6) || (len16x != 3) ) fprintf ( log, " *** UTF8_to_UTF16BE partial output failure, %d -> %d\n", len8x, len16x );
+
+ fprintf ( log, " UTF8_to_UTF16BE done for empty buffers and buffers ending in mid character\n" );
+
+ // -----------------------------------
+ // Test UTF8_to_UTF16LE on good input.
+
+ fprintf ( log, "\nTesting UTF8_to_UTF16LE on good input\n" );
+
+ // Test ASCII.
+
+ cpLo = 0; cpHi = 0x80; len8 = len16 = 0x80;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU8[i] = UTF8Unit(cp);
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF16LE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16LE length failure, %d -> %d\n", len8x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF8_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test non-ASCII inside the BMP, below the surrogates.
+
+ cpLo = 0x80; cpHi = 0xD800; len16 = cpHi-cpLo;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 != (2*(0x800-cpLo) + 3*(cpHi-0x800)) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF16LE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16LE length failure, %d -> %d\n", len8x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF8_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len16 = cpHi-cpLo;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 != 3*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF16LE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16LE length failure, %d -> %d\n", len8x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF8_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len16 = (cpHi-cpLo)*2;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 != 4*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF16LE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16LE length failure, %d -> %d\n", len8x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF8_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF8_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating ASCII, non-ASCII BMP, beyond BMP.
+
+ len16 = 0x80*(1+1+1+2);
+ for ( i = 0, len8 = 0; i < 0x80; ++i ) {
+ CodePoint_to_UTF8 ( i, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x100, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x1000, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x10000, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ }
+ if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF16LE ( sU8, len8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8 != len8x) || (len16 != len16x) ) fprintf ( log, " *** UTF8_to_UTF16LE length failure, %d -> %d\n", len8x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, len16 = 0; i < 0x80; ++i ) {
+ CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", i );
+ len16 += len16x;
+ CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != i+0x100) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", i+0x100 );
+ len16 += len16x;
+ CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", i+0x1000 );
+ len16 += len16x;
+ CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF8_to_UTF16LE failure for U+%.4X\n", i+0x10000 );
+ len16 += len16x;
+ }
+ if ( len16 != 0x80*(1+1+1+2) ) fprintf ( log, " *** UTF8_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF8_to_UTF16LE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len8 = 0x80*(1+2+3+4); len16 = 0x80*(1+1+1+2);
+
+ UTF8_to_UTF16LE ( sU8, 0, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF8_to_UTF16LE empty input failure, %d -> %d\n", len8x, len16x );
+ UTF8_to_UTF16LE ( sU8, len8, sU16, 0, &len8x, &len16x );
+ if ( (len8x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF8_to_UTF16LE empty output failure, %d -> %d\n", len8x, len16x );
+ UTF8_to_UTF16LE ( sU8, 8, sU16, sizeof(sU16), &len8x, &len16x );
+ if ( (len8x != 6) || (len16x != 3) ) fprintf ( log, " *** UTF8_to_UTF16LE partial input failure, %d -> %d\n", len8x, len16x );
+ UTF8_to_UTF16LE ( sU8, len8, sU16, 4, &len8x, &len16x );
+ if ( (len8x != 6) || (len16x != 3) ) fprintf ( log, " *** UTF8_to_UTF16LE partial output failure, %d -> %d\n", len8x, len16x );
+
+ fprintf ( log, " UTF8_to_UTF16LE done for empty buffers and buffers ending in mid character\n" );
+
+} // Test_UTF8_to_UTF16
+
+// =================================================================================================
+
+static void Test_UTF8_to_UTF32 ( FILE * log )
+{
+ size_t i;
+ size_t len8, len32, len8x, len32x;
+ UTF32Unit cp, cpLo, cpHi;
+
+ // ---------------------------------------------------------------------------------------
+ // Test UTF8_to_UTF32BE on good input. The CodePoint to/from functions are already tested,
+ // use them to verify the results here.
+
+ fprintf ( log, "\nTesting UTF8_to_UTF32BE on good input\n" );
+
+ // Test ASCII.
+
+ cpLo = 0; cpHi = 0x80; len8 = len32 = 0x80;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU8[i] = UTF8Unit(cp);
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF32BE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32BE length failure, %d -> %d\n", len8x, len32x );
+
+ sU32[len32x] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF8_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test non-ASCII inside the BMP, below the surrogates.
+
+ cpLo = 0x80; cpHi = 0xD800; len32 = cpHi-cpLo;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 != (2*(0x800-cpLo) + 3*(cpHi-0x800)) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF32BE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32BE length failure, %d -> %d\n", len8x, len32x );
+
+ sU32[len32x] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF8_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len32 = cpHi-cpLo;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 !=3*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF32BE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32BE length failure, %d -> %d\n", len8x, len32x );
+
+ sU32[len32x] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF8_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 !=4*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF32BE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32BE length failure, %d -> %d\n", len8x, len32x );
+
+ sU32[len32x] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF8_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating ASCII, non-ASCII BMP, beyond BMP.
+
+ len32 = 0x80*(1+1+1+1);
+ for ( i = 0, len8 = 0; i < 0x80; ++i ) {
+ CodePoint_to_UTF8 ( i, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x100, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x1000, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x10000, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ }
+ if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF32BE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32BE length failure, %d -> %d\n", len8x, len32x );
+
+ sU32[len32x] = 0xFFFFFFFF;
+ for ( i = 0, len32 = 0; i < 0x80; ++i ) {
+ if ( sU32[len32] != NativeUTF32BE(i) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", i );
+ ++len32;
+ if ( sU32[len32] != NativeUTF32BE(i+0x100) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", i+0x100 );
+ ++len32;
+ if ( sU32[len32] != NativeUTF32BE(i+0x1000) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", i+0x1000 );
+ ++len32;
+ if ( sU32[len32] != NativeUTF32BE(i+0x10000) ) fprintf ( log, " *** UTF8_to_UTF32BE failure for U+%.4X\n", i+0x10000 );
+ ++len32;
+ }
+
+ fprintf ( log, " UTF8_to_UTF32BE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len8 = 0x80*(1+2+3+4); len32 = 0x80*(1+1+1+1);
+
+ UTF8_to_UTF32BE ( sU8, 0, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF8_to_UTF32BE empty input failure, %d -> %d\n", len8x, len32x );
+ UTF8_to_UTF32BE ( sU8, len8, sU32, 0, &len8x, &len32x );
+ if ( (len8x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF8_to_UTF32BE empty output failure, %d -> %d\n", len8x, len32x );
+ UTF8_to_UTF32BE ( sU8, 8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8x != 6) || (len32x != 3) ) fprintf ( log, " *** UTF8_to_UTF32BE partial input failure, %d -> %d\n", len8x, len32x );
+
+ fprintf ( log, " UTF8_to_UTF32BE done for empty buffers and buffers ending in mid character\n" );
+
+ // -----------------------------------
+ // Test UTF8_to_UTF32LE on good input.
+
+ fprintf ( log, "\nTesting UTF8_to_UTF32LE on good input\n" );
+
+ // Test ASCII.
+
+ cpLo = 0; cpHi = 0x80; len8 = len32 = 0x80;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU8[i] = UTF8Unit(cp);
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF32LE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32LE length failure, %d -> %d\n", len8x, len32x );
+
+ sU32[len32x] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF8_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test non-ASCII inside the BMP, below the surrogates.
+
+ cpLo = 0x80; cpHi = 0xD800; len32 = cpHi-cpLo;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 != (2*(0x800-cpLo) + 3*(cpHi-0x800)) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF32LE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32LE length failure, %d -> %d\n", len8x, len32x );
+
+ sU32[len32x] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF8_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len32 = cpHi-cpLo;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 !=3*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF32LE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32LE length failure, %d -> %d\n", len8x, len32x );
+
+ sU32[len32x] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF8_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo;
+ for ( cp = cpLo, len8 = 0; cp < cpHi; ++cp, len8 += len8x ) CodePoint_to_UTF8 ( cp, &sU8[len8], 8, &len8x );
+ if ( len8 !=4*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF32LE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32LE length failure, %d -> %d\n", len8x, len32x );
+
+ sU32[len32x] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF8_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating ASCII, non-ASCII BMP, beyond BMP.
+
+ len32 = 0x80*(1+1+1+1);
+ for ( i = 0, len8 = 0; i < 0x80; ++i ) {
+ CodePoint_to_UTF8 ( i, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x100, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x1000, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ CodePoint_to_UTF8 ( i+0x10000, &sU8[len8], 8, &len8x );
+ len8 += len8x;
+ }
+ if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** CodePoint_to_UTF8 length failure, %d\n", len8 );
+ sU8[len8] = 0xFF;
+
+ UTF8_to_UTF32LE ( sU8, len8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8 != len8x) || (len32 != len32x) ) fprintf ( log, " *** UTF8_to_UTF32LE length failure, %d -> %d\n", len8x, len32x );
+
+ sU32[len32x] = 0xFFFFFFFF;
+ for ( i = 0, len32 = 0; i < 0x80; ++i ) {
+ if ( sU32[len32] != NativeUTF32LE(i) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", i );
+ ++len32;
+ if ( sU32[len32] != NativeUTF32LE(i+0x100) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", i+0x100 );
+ ++len32;
+ if ( sU32[len32] != NativeUTF32LE(i+0x1000) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", i+0x1000 );
+ ++len32;
+ if ( sU32[len32] != NativeUTF32LE(i+0x10000) ) fprintf ( log, " *** UTF8_to_UTF32LE failure for U+%.4X\n", i+0x10000 );
+ ++len32;
+ }
+
+ fprintf ( log, " UTF8_to_UTF32LE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len8 = 0x80*(1+2+3+4); len32 = 0x80*(1+1+1+1);
+
+ UTF8_to_UTF32LE ( sU8, 0, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF8_to_UTF32LE empty input failure, %d -> %d\n", len8x, len32x );
+ UTF8_to_UTF32LE ( sU8, len8, sU32, 0, &len8x, &len32x );
+ if ( (len8x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF8_to_UTF32LE empty output failure, %d -> %d\n", len8x, len32x );
+ UTF8_to_UTF32LE ( sU8, 8, sU32, sizeof(sU32), &len8x, &len32x );
+ if ( (len8x != 6) || (len32x != 3) ) fprintf ( log, " *** UTF8_to_UTF32LE partial input failure, %d -> %d\n", len8x, len32x );
+
+ fprintf ( log, " UTF8_to_UTF32LE done for empty buffers and buffers ending in mid character\n" );
+
+} // Test_UTF8_to_UTF32
+
+// =================================================================================================
+
+static void Test_UTF16_to_UTF8 ( FILE * log )
+{
+ size_t i;
+ size_t len16, len8, len16x, len8x;
+ UTF32Unit cp, cpx, cpLo, cpHi;
+
+ // ---------------------------------------------------------------------------------------
+ // Test UTF16BE_to_UTF8 on good input. The CodePoint to/from functions are already tested,
+ // use them to verify the results here.
+
+ fprintf ( log, "\nTesting UTF16BE_to_UTF8 on good input\n" );
+
+ // Test ASCII.
+
+ cpLo = 0; cpHi = 0x80; len16 = len8 = 0x80;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test 2 byte non-ASCII inside the BMP.
+
+ cpLo = 0x80; cpHi = 0x800; len16 = cpHi-cpLo; len8 = 2*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test 3 byte non-ASCII inside the BMP, below the surrogates.
+
+ cpLo = 0x800; cpHi = 0xD800; len16 = cpHi-cpLo; len8 = 3*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len16 = cpHi-cpLo; len8 = 3*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len8 = (cpHi-cpLo)*4;
+ for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16BE ( cp, &sU16[len16], 4, &len16x );
+ if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 4) || (cpx != cp) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating ASCII, non-ASCII BMP, beyond BMP.
+
+ len8 = 0x80*(1+2+3+4);
+ for ( i = 0, len16 = 0; i < 0x80; ++i ) {
+ CodePoint_to_UTF16BE ( i, &sU16[len16], 4, &len16x );
+ len16 += len16x;
+ CodePoint_to_UTF16BE ( i+0x100, &sU16[len16], 4, &len16x );
+ len16 += len16x;
+ CodePoint_to_UTF16BE ( i+0x1000, &sU16[len16], 4, &len16x );
+ len16 += len16x;
+ CodePoint_to_UTF16BE ( i+0x10000, &sU16[len16], 4, &len16x );
+ len16 += len16x;
+ }
+ if ( len16 != 0x80*(1+1+1+2) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16BE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, len8 = 0; i < 0x80; ++i ) {
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 1) || (cpx != i) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", i );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 2) || (cpx != i+0x100) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", i+0x100 );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", i+0x1000 );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 4) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF16BE_to_UTF8 failure for U+%.4X\n", i+0x10000 );
+ len8 += len8x;
+ }
+ if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** UTF16BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16BE_to_UTF8 done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len16 = 0x80*(1+1+1+2); len8 = 0x80*(1+2+3+4);
+
+ UTF16BE_to_UTF8 ( sU16, 0, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF8 empty input failure, %d -> %d\n", len16x, len8x );
+ UTF16BE_to_UTF8 ( sU16, len16, sU8, 0, &len16x, &len8x );
+ if ( (len16x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF8 empty output failure, %d -> %d\n", len16x, len8x );
+ UTF16BE_to_UTF8 ( sU16, 4, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF16BE_to_UTF8 partial input failure, %d -> %d\n", len16x, len8x );
+ UTF16BE_to_UTF8 ( sU16, len16, sU8, 8, &len16x, &len8x );
+ if ( (len16x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF16BE_to_UTF8 partial output failure, %d -> %d\n", len16x, len8x );
+
+ fprintf ( log, " UTF16BE_to_UTF8 done for empty buffers and buffers ending in mid character\n" );
+
+ // -----------------------------------
+ // Test UTF16LE_to_UTF8 on good input.
+
+ fprintf ( log, "\nTesting UTF16LE_to_UTF8 on good input\n" );
+
+ // Test ASCII.
+
+ cpLo = 0; cpHi = 0x80; len16 = len8 = 0x80;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test 2 byte non-ASCII inside the BMP.
+
+ cpLo = 0x80; cpHi = 0x800; len16 = cpHi-cpLo; len8 = 2*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test 3 byte non-ASCII inside the BMP, below the surrogates.
+
+ cpLo = 0x800; cpHi = 0xD800; len16 = cpHi-cpLo; len8 = 3*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len16 = cpHi-cpLo; len8 = 3*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len8 = (cpHi-cpLo)*4;
+ for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16LE ( cp, &sU16[len16], 4, &len16x );
+ if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 4) || (cpx != cp) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating ASCII, non-ASCII BMP, beyond BMP.
+
+ len8 = 0x80*(1+2+3+4);
+ for ( i = 0, len16 = 0; i < 0x80; ++i ) {
+ CodePoint_to_UTF16LE ( i, &sU16[len16], 4, &len16x );
+ len16 += len16x;
+ CodePoint_to_UTF16LE ( i+0x100, &sU16[len16], 4, &len16x );
+ len16 += len16x;
+ CodePoint_to_UTF16LE ( i+0x1000, &sU16[len16], 4, &len16x );
+ len16 += len16x;
+ CodePoint_to_UTF16LE ( i+0x10000, &sU16[len16], 4, &len16x );
+ len16 += len16x;
+ }
+ if ( len16 != 0x80*(1+1+1+2) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF8 ( sU16, len16, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16 != len16x) || (len8 != len8x) ) fprintf ( log, " *** UTF16LE_to_UTF8 length failure, %d -> %d\n", len16x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, len8 = 0; i < 0x80; ++i ) {
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 1) || (cpx != i) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", i );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 2) || (cpx != i+0x100) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", i+0x100 );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", i+0x1000 );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 4) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF16LE_to_UTF8 failure for U+%.4X\n", i+0x10000 );
+ len8 += len8x;
+ }
+ if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** UTF16LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF16LE_to_UTF8 done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len16 = 0x80*(1+1+1+2); len8 = 0x80*(1+2+3+4);
+
+ UTF16LE_to_UTF8 ( sU16, 0, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF8 empty input failure, %d -> %d\n", len16x, len8x );
+ UTF16LE_to_UTF8 ( sU16, len16, sU8, 0, &len16x, &len8x );
+ if ( (len16x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF8 empty output failure, %d -> %d\n", len16x, len8x );
+ UTF16LE_to_UTF8 ( sU16, 4, sU8, sizeof(sU8), &len16x, &len8x );
+ if ( (len16x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF16LE_to_UTF8 partial input failure, %d -> %d\n", len16x, len8x );
+ UTF16LE_to_UTF8 ( sU16, len16, sU8, 8, &len16x, &len8x );
+ if ( (len16x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF16LE_to_UTF8 partial output failure, %d -> %d\n", len16x, len8x );
+
+ fprintf ( log, " UTF16LE_to_UTF8 done for empty buffers and buffers ending in mid character\n" );
+
+} // Test_UTF16_to_UTF8
+
+// =================================================================================================
+
+static void Test_UTF32_to_UTF8 ( FILE * log )
+{
+ size_t i;
+ size_t len32, len8, len32x, len8x;
+ UTF32Unit cp, cpx, cpLo, cpHi;
+
+ // -----------------------------------
+ // Test UTF32BE_to_UTF8 on good input.
+
+ fprintf ( log, "\nTesting UTF32BE_to_UTF8 on good input\n" );
+
+ // Test ASCII.
+
+ cpLo = 0; cpHi = 0x80; len32 = len8 = 0x80;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test 2 byte non-ASCII inside the BMP.
+
+ cpLo = 0x80; cpHi = 0x800; len32 = cpHi-cpLo; len8 = 2*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test 3 byte non-ASCII inside the BMP, below the surrogates.
+
+ cpLo = 0x800; cpHi = 0xD800; len32 = cpHi-cpLo; len8 = 3*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len32 = cpHi-cpLo; len8 = 3*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len8 = (cpHi-cpLo)*4;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 4) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32BE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating ASCII, non-ASCII BMP, beyond BMP.
+
+ len8 = 0x80*(1+2+3+4);
+ for ( i = 0, len32 = 0; i < 0x80; ++i ) {
+ sU32[len32] = NativeUTF32BE(i);
+ ++len32;
+ sU32[len32] = NativeUTF32BE(i+0x100);
+ ++len32;
+ sU32[len32] = NativeUTF32BE(i+0x1000);
+ ++len32;
+ sU32[len32] = NativeUTF32BE(i+0x10000);
+ ++len32;
+ }
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32BE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, len8 = 0; i < 0x80; ++i ) {
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", i );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 2) || (cpx != i+0x100) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", i+0x100 );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", i+0x1000 );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 4) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32BE_to_UTF8 failure for U+%.4X\n", i+0x10000 );
+ len8 += len8x;
+ }
+ if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** UTF32BE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32BE_to_UTF8 done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len32 = 0x80*(1+1+1+2); len8 = 0x80*(1+2+3+4);
+
+ UTF32BE_to_UTF8 ( sU32, 0, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF8 empty input failure, %d -> %d\n", len32x, len8x );
+ UTF32BE_to_UTF8 ( sU32, len32, sU8, 0, &len32x, &len8x );
+ if ( (len32x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF8 empty output failure, %d -> %d\n", len32x, len8x );
+ UTF32BE_to_UTF8 ( sU32, len32, sU8, 8, &len32x, &len8x );
+ if ( (len32x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF32BE_to_UTF8 partial output failure, %d -> %d\n", len32x, len8x );
+
+ fprintf ( log, " UTF32BE_to_UTF8 done for empty buffers and buffers ending in mid character\n" );
+
+ // -----------------------------------
+ // Test UTF32LE_to_UTF8 on good input.
+
+ fprintf ( log, "\nTesting UTF32LE_to_UTF8 on good input\n" );
+
+ // Test ASCII.
+
+ cpLo = 0; cpHi = 0x80; len32 = len8 = 0x80;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test 2 byte non-ASCII inside the BMP.
+
+ cpLo = 0x80; cpHi = 0x800; len32 = cpHi-cpLo; len8 = 2*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test 3 byte non-ASCII inside the BMP, below the surrogates.
+
+ cpLo = 0x800; cpHi = 0xD800; len32 = cpHi-cpLo; len8 = 3*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len32 = cpHi-cpLo; len8 = 3*(cpHi-cpLo);
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len8 = (cpHi-cpLo)*4;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len8x, ++cp ) {
+ CodePoint_from_UTF8 ( &sU8[i], 8, &cpx, &len8x );
+ if ( (len8x != 4) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", cp );
+ }
+ if ( i != len8 ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32LE_to_UTF8 done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating ASCII, non-ASCII BMP, beyond BMP.
+
+ len8 = 0x80*(1+2+3+4);
+ for ( i = 0, len32 = 0; i < 0x80; ++i ) {
+ sU32[len32] = NativeUTF32LE(i);
+ ++len32;
+ sU32[len32] = NativeUTF32LE(i+0x100);
+ ++len32;
+ sU32[len32] = NativeUTF32LE(i+0x1000);
+ ++len32;
+ sU32[len32] = NativeUTF32LE(i+0x10000);
+ ++len32;
+ }
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF8 ( sU32, len32, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32 != len32x) || (len8 != len8x) ) fprintf ( log, " *** UTF32LE_to_UTF8 length failure, %d -> %d\n", len32x, len8x );
+
+ sU8[len8] = 0xFF;
+ for ( i = 0, len8 = 0; i < 0x80; ++i ) {
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", i );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 2) || (cpx != i+0x100) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", i+0x100 );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 3) || (cpx != i+0x1000) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", i+0x1000 );
+ len8 += len8x;
+ CodePoint_from_UTF8 ( &sU8[len8], 8, &cpx, &len8x );
+ if ( (len8x != 4) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32LE_to_UTF8 failure for U+%.4X\n", i+0x10000 );
+ len8 += len8x;
+ }
+ if ( len8 != 0x80*(1+2+3+4) ) fprintf ( log, " *** UTF32LE_to_UTF8 consume failure, %d != %d\n", i, len8 );
+
+ fprintf ( log, " UTF32LE_to_UTF8 done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len32 = 0x80*(1+1+1+2); len8 = 0x80*(1+2+3+4);
+
+ UTF32LE_to_UTF8 ( sU32, 0, sU8, sizeof(sU8), &len32x, &len8x );
+ if ( (len32x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF8 empty input failure, %d -> %d\n", len32x, len8x );
+ UTF32LE_to_UTF8 ( sU32, len32, sU8, 0, &len32x, &len8x );
+ if ( (len32x != 0) || (len8x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF8 empty output failure, %d -> %d\n", len32x, len8x );
+ UTF32LE_to_UTF8 ( sU32, len32, sU8, 8, &len32x, &len8x );
+ if ( (len32x != 3) || (len8x != 6) ) fprintf ( log, " *** UTF32LE_to_UTF8 partial output failure, %d -> %d\n", len32x, len8x );
+
+ fprintf ( log, " UTF32LE_to_UTF8 done for empty buffers and buffers ending in mid character\n" );
+
+} // Test_UTF32_to_UTF8
+
+// =================================================================================================
+
+static void Test_UTF16_to_UTF32 ( FILE * log )
+{
+ size_t i;
+ size_t len16, len32, len16x, len32x;
+ UTF32Unit cp, cpLo, cpHi;
+
+ // --------------------------------------
+ // Test UTF16BE_to_UTF32BE on good input.
+
+ fprintf ( log, "\nTesting UTF16BE_to_UTF32BE on good input\n" );
+
+ // Test inside the BMP, below the surrogates.
+
+ cpLo = 0; cpHi = 0xD800; len16 = len32 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32BE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16BE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len16 = len32 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32BE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16BE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo;
+ for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16BE ( cp, &sU16[len16], 4, &len16x );
+ if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32BE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16BE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating BMP, beyond BMP.
+
+ len16 = 0x8000*(1+2); len32 = 0x8000*(1+1);
+ for ( i = 0, len16 = 0; i < 0x8000; ++i ) {
+ CodePoint_to_UTF16BE ( i, &sU16[len16], 8, &len16x );
+ len16 += len16x;
+ CodePoint_to_UTF16BE ( i+0x10000, &sU16[len16], 8, &len16x );
+ len16 += len16x;
+ }
+ if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, len32 = 0; i < 0x8000; ++i ) {
+ if ( sU32[len32] != NativeUTF32BE(i) ) fprintf ( log, " *** UTF16BE_to_UTF32BE failure for U+%.4X\n", i );
+ ++len32;
+ if ( sU32[len32] != NativeUTF32BE(i+0x10000) ) fprintf ( log, " *** UTF16BE_to_UTF32BE failure for U+%.4X\n", i+0x10000 );
+ ++len32;
+ }
+
+ fprintf ( log, " UTF16BE_to_UTF32BE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len16 = 0x8000*(1+2); len32 = 0x8000*(1+1);
+
+ UTF16BE_to_UTF32BE ( sU16, 0, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF32BE empty input failure, %d -> %d\n", len16x, len32x );
+ UTF16BE_to_UTF32BE ( sU16, len16, sU32, 0, &len16x, &len32x );
+ if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF32BE empty output failure, %d -> %d\n", len16x, len32x );
+ UTF16BE_to_UTF32BE ( sU16, 5, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16x != 4) || (len32x != 3) ) fprintf ( log, " *** UTF16BE_to_UTF32BE partial input failure, %d -> %d\n", len16x, len32x );
+
+ fprintf ( log, " UTF16BE_to_UTF32BE done for empty buffers and buffers ending in mid character\n" );
+
+ // --------------------------------------
+ // Test UTF16LE_to_UTF32LE on good input.
+
+ fprintf ( log, "\nTesting UTF16LE_to_UTF32LE on good input\n" );
+
+ // Test inside the BMP, below the surrogates.
+
+ cpLo = 0; cpHi = 0xD800; len16 = len32 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32LE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16LE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len16 = len32 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32LE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16LE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo;
+ for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16LE ( cp, &sU16[len16], 4, &len16x );
+ if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32LE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16LE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating BMP, beyond BMP.
+
+ len16 = 0x8000*(1+2); len32 = 0x8000*(1+1);
+ for ( i = 0, len16 = 0; i < 0x8000; ++i ) {
+ CodePoint_to_UTF16LE ( i, &sU16[len16], 8, &len16x );
+ len16 += len16x;
+ CodePoint_to_UTF16LE ( i+0x10000, &sU16[len16], 8, &len16x );
+ len16 += len16x;
+ }
+ if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, len32 = 0; i < 0x8000; ++i ) {
+ if ( sU32[len32] != NativeUTF32LE(i) ) fprintf ( log, " *** UTF16LE_to_UTF32LE failure for U+%.4X\n", i );
+ ++len32;
+ if ( sU32[len32] != NativeUTF32LE(i+0x10000) ) fprintf ( log, " *** UTF16LE_to_UTF32LE failure for U+%.4X\n", i+0x10000 );
+ ++len32;
+ }
+
+ fprintf ( log, " UTF16LE_to_UTF32LE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len16 = 0x8000*(1+2); len32 = 0x8000*(1+1);
+
+ UTF16LE_to_UTF32LE ( sU16, 0, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF32LE empty input failure, %d -> %d\n", len16x, len32x );
+ UTF16LE_to_UTF32LE ( sU16, len16, sU32, 0, &len16x, &len32x );
+ if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF32LE empty output failure, %d -> %d\n", len16x, len32x );
+ UTF16LE_to_UTF32LE ( sU16, 5, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16x != 4) || (len32x != 3) ) fprintf ( log, " *** UTF16LE_to_UTF32LE partial input failure, %d -> %d\n", len16x, len32x );
+
+ fprintf ( log, " UTF16LE_to_UTF32LE done for empty buffers and buffers ending in mid character\n" );
+
+ // --------------------------------------
+ // Test UTF16BE_to_UTF32LE on good input.
+
+ fprintf ( log, "\nTesting UTF16BE_to_UTF32LE on good input\n" );
+
+ // Test inside the BMP, below the surrogates.
+
+ cpLo = 0; cpHi = 0xD800; len16 = len32 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32LE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16BE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len16 = len32 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16BE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32LE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16BE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo;
+ for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16BE ( cp, &sU16[len16], 4, &len16x );
+ if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32LE(cp) ) fprintf ( log, " *** UTF16BE_to_UTF32LE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16BE_to_UTF32LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating BMP, beyond BMP.
+
+ len16 = 0x8000*(1+2); len32 = 0x8000*(1+1);
+ for ( i = 0, len16 = 0; i < 0x8000; ++i ) {
+ CodePoint_to_UTF16BE ( i, &sU16[len16], 8, &len16x );
+ len16 += len16x;
+ CodePoint_to_UTF16BE ( i+0x10000, &sU16[len16], 8, &len16x );
+ len16 += len16x;
+ }
+ if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** CodePoint_to_UTF16BE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+
+ UTF16BE_to_UTF32LE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16BE_to_UTF32LE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, len32 = 0; i < 0x8000; ++i ) {
+ if ( sU32[len32] != NativeUTF32LE(i) ) fprintf ( log, " *** UTF16BE_to_UTF32LE failure for U+%.4X\n", i );
+ ++len32;
+ if ( sU32[len32] != NativeUTF32LE(i+0x10000) ) fprintf ( log, " *** UTF16BE_to_UTF32LE failure for U+%.4X\n", i+0x10000 );
+ ++len32;
+ }
+
+ fprintf ( log, " UTF16BE_to_UTF32LE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len16 = 0x8000*(1+2); len32 = 0x8000*(1+1);
+
+ UTF16BE_to_UTF32LE ( sU16, 0, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF32LE empty input failure, %d -> %d\n", len16x, len32x );
+ UTF16BE_to_UTF32LE ( sU16, len16, sU32, 0, &len16x, &len32x );
+ if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16BE_to_UTF32LE empty output failure, %d -> %d\n", len16x, len32x );
+ UTF16BE_to_UTF32LE ( sU16, 5, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16x != 4) || (len32x != 3) ) fprintf ( log, " *** UTF16BE_to_UTF32LE partial input failure, %d -> %d\n", len16x, len32x );
+
+ fprintf ( log, " UTF16BE_to_UTF32LE done for empty buffers and buffers ending in mid character\n" );
+
+ // --------------------------------------
+ // Test UTF16LE_to_UTF32BE on good input.
+
+ fprintf ( log, "\nTesting UTF16LE_to_UTF32BE on good input\n" );
+
+ // Test inside the BMP, below the surrogates.
+
+ cpLo = 0; cpHi = 0xD800; len16 = len32 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32BE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16LE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len16 = len32 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU16[i] = NativeUTF16LE(UTF16Unit(cp));
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32BE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16LE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo;
+ for ( cp = cpLo, len16 = 0; cp < cpHi; ++cp, len16 += len16x ) CodePoint_to_UTF16LE ( cp, &sU16[len16], 4, &len16x );
+ if ( len16 != 2*(cpHi-cpLo) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) {
+ if ( sU32[i] != NativeUTF32BE(cp) ) fprintf ( log, " *** UTF16LE_to_UTF32BE failure for U+%.4X\n", cp );
+ }
+
+ fprintf ( log, " UTF16LE_to_UTF32BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating BMP, beyond BMP.
+
+ len16 = 0x8000*(1+2); len32 = 0x8000*(1+1);
+ for ( i = 0, len16 = 0; i < 0x8000; ++i ) {
+ CodePoint_to_UTF16LE ( i, &sU16[len16], 8, &len16x );
+ len16 += len16x;
+ CodePoint_to_UTF16LE ( i+0x10000, &sU16[len16], 8, &len16x );
+ len16 += len16x;
+ }
+ if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** CodePoint_to_UTF16LE length failure, %d\n", len16 );
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+
+ UTF16LE_to_UTF32BE ( sU16, len16, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16 != len16x) || (len32 != len32x) ) fprintf ( log, " *** UTF16LE_to_UTF32BE length failure, %d -> %d\n", len16x, len32x );
+
+ sU32[len32] = 0xFFFFFFFF;
+ for ( i = 0, len32 = 0; i < 0x8000; ++i ) {
+ if ( sU32[len32] != NativeUTF32BE(i) ) fprintf ( log, " *** UTF16LE_to_UTF32BE failure for U+%.4X\n", i );
+ ++len32;
+ if ( sU32[len32] != NativeUTF32BE(i+0x10000) ) fprintf ( log, " *** UTF16LE_to_UTF32BE failure for U+%.4X\n", i+0x10000 );
+ ++len32;
+ }
+
+ fprintf ( log, " UTF16LE_to_UTF32BE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len16 = 0x8000*(1+2); len32 = 0x8000*(1+1);
+
+ UTF16LE_to_UTF32BE ( sU16, 0, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF32BE empty input failure, %d -> %d\n", len16x, len32x );
+ UTF16LE_to_UTF32BE ( sU16, len16, sU32, 0, &len16x, &len32x );
+ if ( (len16x != 0) || (len32x != 0) ) fprintf ( log, " *** UTF16LE_to_UTF32BE empty output failure, %d -> %d\n", len16x, len32x );
+ UTF16LE_to_UTF32BE ( sU16, 5, sU32, sizeof(sU32), &len16x, &len32x );
+ if ( (len16x != 4) || (len32x != 3) ) fprintf ( log, " *** UTF16LE_to_UTF32BE partial input failure, %d -> %d\n", len16x, len32x );
+
+ fprintf ( log, " UTF16LE_to_UTF32BE done for empty buffers and buffers ending in mid character\n" );
+
+} // Test_UTF16_to_UTF32
+
+// =================================================================================================
+
+static void Test_UTF32_to_UTF16 ( FILE * log )
+{
+ size_t i;
+ size_t len32, len16, len32x, len16x;
+ UTF32Unit cp, cpx, cpLo, cpHi;
+
+ // --------------------------------------
+ // Test UTF32BE_to_UTF16BE on good input.
+
+ fprintf ( log, "\nTesting UTF32BE_to_UTF16BE on good input\n" );
+
+ // Test inside the BMP, below the surrogates.
+
+ cpLo = 0; cpHi = 0xD800; len32 = len16 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32BE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len32 = len16 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32BE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len16 = (cpHi-cpLo)*2;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32BE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating BMP, beyond BMP.
+
+ len32 = 0x8000*(1+1); len16 = 0x8000*(1+2);
+ for ( i = 0, len32 = 0; i < 0x8000; ++i ) {
+ sU32[len32] = NativeUTF32BE(i);
+ ++len32;
+ sU32[len32] = NativeUTF32BE(i+0x10000);
+ ++len32;
+ }
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, len16 = 0; i < 0x8000; ++i ) {
+ CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32BE_to_UTF16BE failure for U+%.4X\n", i );
+ len16 += len16x;
+ CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32BE_to_UTF16BE failure for U+%.4X\n", i );
+ len16 += len16x;
+ }
+ if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** UTF32BE_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32BE_to_UTF16BE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len32 = 0x8000*(1+1); len16 = 0x8000*(1+2);
+
+ UTF32BE_to_UTF16BE ( sU32, 0, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF16BE empty input failure, %d -> %d\n", len32x, len16x );
+ UTF32BE_to_UTF16BE ( sU32, len32, sU16, 0, &len32x, &len16x );
+ if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF16BE empty output failure, %d -> %d\n", len32x, len16x );
+ UTF32BE_to_UTF16BE ( sU32, len32, sU16, 5, &len32x, &len16x );
+ if ( (len32x != 3) || (len16x != 4) ) fprintf ( log, " *** UTF32BE_to_UTF16BE partial output failure, %d -> %d\n", len32x, len16x );
+
+ fprintf ( log, " UTF32BE_to_UTF16BE done for empty buffers and buffers ending in mid character\n" );
+
+// =================================================================================================
+
+ // --------------------------------------
+ // Test UTF32LE_to_UTF16LE on good input.
+
+ fprintf ( log, "\nTesting UTF32LE_to_UTF16LE on good input\n" );
+
+ // Test inside the BMP, below the surrogates.
+
+ cpLo = 0; cpHi = 0xD800; len32 = len16 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32LE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len32 = len16 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32LE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len16 = (cpHi-cpLo)*2;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32LE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating BMP, beyond BMP.
+
+ len32 = 0x8000*(1+1); len16 = 0x8000*(1+2);
+ for ( i = 0, len32 = 0; i < 0x8000; ++i ) {
+ sU32[len32] = NativeUTF32LE(i);
+ ++len32;
+ sU32[len32] = NativeUTF32LE(i+0x10000);
+ ++len32;
+ }
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, len16 = 0; i < 0x8000; ++i ) {
+ CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32LE_to_UTF16LE failure for U+%.4X\n", i );
+ len16 += len16x;
+ CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32LE_to_UTF16LE failure for U+%.4X\n", i );
+ len16 += len16x;
+ }
+ if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** UTF32LE_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32LE_to_UTF16LE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len32 = 0x8000*(1+1); len16 = 0x8000*(1+2);
+
+ UTF32LE_to_UTF16LE ( sU32, 0, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF16LE empty input failure, %d -> %d\n", len32x, len16x );
+ UTF32LE_to_UTF16LE ( sU32, len32, sU16, 0, &len32x, &len16x );
+ if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF16LE empty output failure, %d -> %d\n", len32x, len16x );
+ UTF32LE_to_UTF16LE ( sU32, len32, sU16, 5, &len32x, &len16x );
+ if ( (len32x != 3) || (len16x != 4) ) fprintf ( log, " *** UTF32LE_to_UTF16LE partial output failure, %d -> %d\n", len32x, len16x );
+
+ fprintf ( log, " UTF32LE_to_UTF16LE done for empty buffers and buffers ending in mid character\n" );
+
+// =================================================================================================
+
+ // --------------------------------------
+ // Test UTF32BE_to_UTF16LE on good input.
+
+ fprintf ( log, "\nTesting UTF32BE_to_UTF16LE on good input\n" );
+
+ // Test inside the BMP, below the surrogates.
+
+ cpLo = 0; cpHi = 0xD800; len32 = len16 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32BE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len32 = len16 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32BE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len16 = (cpHi-cpLo)*2;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32BE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16LE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32BE_to_UTF16LE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32BE_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32BE_to_UTF16LE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating BMP, beyond BMP.
+
+ len32 = 0x8000*(1+1); len16 = 0x8000*(1+2);
+ for ( i = 0, len32 = 0; i < 0x8000; ++i ) {
+ sU32[len32] = NativeUTF32BE(i);
+ ++len32;
+ sU32[len32] = NativeUTF32BE(i+0x10000);
+ ++len32;
+ }
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32BE_to_UTF16LE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32BE_to_UTF16LE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16LE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, len16 = 0; i < 0x8000; ++i ) {
+ CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32BE_to_UTF16LE failure for U+%.4X\n", i );
+ len16 += len16x;
+ CodePoint_from_UTF16LE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32BE_to_UTF16LE failure for U+%.4X\n", i );
+ len16 += len16x;
+ }
+ if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** UTF32BE_to_UTF16LE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32BE_to_UTF16LE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len32 = 0x8000*(1+1); len16 = 0x8000*(1+2);
+
+ UTF32BE_to_UTF16LE ( sU32, 0, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF16LE empty input failure, %d -> %d\n", len32x, len16x );
+ UTF32BE_to_UTF16LE ( sU32, len32, sU16, 0, &len32x, &len16x );
+ if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32BE_to_UTF16LE empty output failure, %d -> %d\n", len32x, len16x );
+ UTF32BE_to_UTF16LE ( sU32, len32, sU16, 5, &len32x, &len16x );
+ if ( (len32x != 3) || (len16x != 4) ) fprintf ( log, " *** UTF32BE_to_UTF16LE partial output failure, %d -> %d\n", len32x, len16x );
+
+ fprintf ( log, " UTF32BE_to_UTF16LE done for empty buffers and buffers ending in mid character\n" );
+
+// =================================================================================================
+
+ // --------------------------------------
+ // Test UTF32LE_to_UTF16BE on good input.
+
+ fprintf ( log, "\nTesting UTF32LE_to_UTF16BE on good input\n" );
+
+ // Test inside the BMP, below the surrogates.
+
+ cpLo = 0; cpHi = 0xD800; len32 = len16 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32LE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test inside the BMP, above the surrogates.
+
+ cpLo = 0xE000; cpHi = 0x10000; len32 = len16 = cpHi-cpLo;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32LE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test outside the BMP.
+
+ cpLo = 0x10000; cpHi = 0x110000; len32 = cpHi-cpLo; len16 = (cpHi-cpLo)*2;
+ for ( i = 0, cp = cpLo; cp < cpHi; ++i, ++cp ) sU32[i] = NativeUTF32LE(cp);
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, cp = cpLo; cp < cpHi; i += len16x, ++cp ) {
+ CodePoint_from_UTF16BE ( &sU16[i], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != cp) ) fprintf ( log, " *** UTF32LE_to_UTF16BE failure for U+%.4X\n", cp );
+ }
+ if ( i != len16 ) fprintf ( log, " *** UTF32LE_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32LE_to_UTF16BE done for %.4X..%.4X\n", cpLo, cpHi-1 );
+
+ // Test alternating BMP, beyond BMP.
+
+ len32 = 0x8000*(1+1); len16 = 0x8000*(1+2);
+ for ( i = 0, len32 = 0; i < 0x8000; ++i ) {
+ sU32[len32] = NativeUTF32LE(i);
+ ++len32;
+ sU32[len32] = NativeUTF32LE(i+0x10000);
+ ++len32;
+ }
+ sU32[len32] = 0xFFFFFFFF;
+
+ UTF32LE_to_UTF16BE ( sU32, len32, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32 != len32x) || (len16 != len16x) ) fprintf ( log, " *** UTF32LE_to_UTF16BE length failure, %d -> %d\n", len32x, len16x );
+
+ sU16[len16] = NativeUTF16BE(0xDC00); // Isolated low surrogate.
+ for ( i = 0, len16 = 0; i < 0x8000; ++i ) {
+ CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 1) || (cpx != i) ) fprintf ( log, " *** UTF32LE_to_UTF16BE failure for U+%.4X\n", i );
+ len16 += len16x;
+ CodePoint_from_UTF16BE ( &sU16[len16], 4, &cpx, &len16x );
+ if ( (len16x != 2) || (cpx != i+0x10000) ) fprintf ( log, " *** UTF32LE_to_UTF16BE failure for U+%.4X\n", i );
+ len16 += len16x;
+ }
+ if ( len16 != 0x8000*(1+2) ) fprintf ( log, " *** UTF32LE_to_UTF16BE consume failure, %d != %d\n", i, len16 );
+
+ fprintf ( log, " UTF32LE_to_UTF16BE done for mixed values\n" );
+
+ // Test empty buffers and buffers ending in mid character.
+
+ len32 = 0x8000*(1+1); len16 = 0x8000*(1+2);
+
+ UTF32LE_to_UTF16BE ( sU32, 0, sU16, sizeof(sU16), &len32x, &len16x );
+ if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF16BE empty input failure, %d -> %d\n", len32x, len16x );
+ UTF32LE_to_UTF16BE ( sU32, len32, sU16, 0, &len32x, &len16x );
+ if ( (len32x != 0) || (len16x != 0) ) fprintf ( log, " *** UTF32LE_to_UTF16BE empty output failure, %d -> %d\n", len32x, len16x );
+ UTF32LE_to_UTF16BE ( sU32, len32, sU16, 5, &len32x, &len16x );
+ if ( (len32x != 3) || (len16x != 4) ) fprintf ( log, " *** UTF32LE_to_UTF16BE partial output failure, %d -> %d\n", len32x, len16x );
+
+ fprintf ( log, " UTF32LE_to_UTF16BE done for empty buffers and buffers ending in mid character\n" );
+
+} // Test_UTF32_to_UTF16
+
+// =================================================================================================
+
+static void DoTest ( FILE * log )
+{
+ InitializeUnicodeConversions();
+
+ Test_SwappingPrimitives ( log );
+
+ Test_CodePoint_to_UTF8 ( log );
+ Test_CodePoint_from_UTF8 ( log );
+
+ Test_CodePoint_to_UTF16 ( log );
+ Test_CodePoint_from_UTF16 ( log );
+
+ Test_UTF8_to_UTF16 ( log );
+ Test_UTF8_to_UTF32 ( log );
+
+ Test_UTF16_to_UTF8 ( log );
+ Test_UTF32_to_UTF8 ( log );
+
+ Test_UTF16_to_UTF32 ( log );
+ Test_UTF32_to_UTF16 ( log );
+
+} // DoTest
+
+// =================================================================================================
+
+extern "C" int main ( void )
+{
+ char buffer [1000];
+
+ #if !XMP_AutomatedTestBuild
+ FILE * log = stdout;
+ #else
+ FILE * log = fopen ( "TestUnicode.out", "wb" );
+ #endif
+
+ time_t now;
+ time ( &now );
+ sprintf ( buffer, "// Starting test for Unicode conversion correctness, %s", ctime ( &now ) );
+
+ fprintf ( log, "// " );
+ for ( size_t i = 4; i < strlen(buffer); ++i ) fprintf ( log, "=" );
+ fprintf ( log, "\n%s", buffer );
+ fprintf ( log, "// Native %s endian\n", (kBigEndianHost ? "big" : "little") );
+
+ try {
+
+ DoTest ( log );
+
+ } catch ( ... ) {
+
+ fprintf ( log, "\n## Caught unexpected exception\n" );
+ return -1;
+
+ }
+
+ time ( &now );
+ sprintf ( buffer, "// Finished test for Unicode conversion correctness, %s", ctime ( &now ) );
+
+ fprintf ( log, "\n// " );
+ for ( size_t i = 4; i < strlen(buffer); ++i ) fprintf ( log, "=" );
+ fprintf ( log, "\n%s\n", buffer );
+
+ fclose ( log );
+ return 0;
+
+}
diff --git a/samples/source/UnicodeParseSerialize.cpp b/samples/source/UnicodeParseSerialize.cpp
new file mode 100644
index 0000000..c3c9865
--- /dev/null
+++ b/samples/source/UnicodeParseSerialize.cpp
@@ -0,0 +1,510 @@
+// =================================================================================================
+//
+// A thorough test for UTF-16 and UTF-32 serialization and parsing. It assumes the basic Unicode
+// conversion functions are working - they have their own exhaustive test.
+//
+// =================================================================================================
+
+#include <cstdio>
+#include <vector>
+#include <string>
+#include <cstring>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <ctime>
+
+#include <cstdlib>
+#include <cerrno>
+#include <stdexcept>
+#include <cassert>
+
+#define TXMP_STRING_TYPE std::string
+#include "XMP.hpp"
+#include "XMP.incl_cpp"
+
+#include "source/EndianUtils.hpp"
+#include "source/UnicodeConversions.hpp"
+#include "source/UnicodeConversions.cpp"
+
+using namespace std;
+
+#if WIN_ENV
+ #pragma warning ( disable : 4701 ) // local variable may be used without having been initialized
+#endif
+
+// =================================================================================================
+
+#define IncludeUTF32 0 // *** UTF-32 parsing isn't working at the moment, Expat seems to not handle it.
+
+#define kCodePointCount 0x110000
+
+UTF8Unit sU8 [kCodePointCount*4 + 8];
+UTF16Unit sU16 [kCodePointCount*2 + 4];
+UTF32Unit sU32 [kCodePointCount + 2];
+
+static FILE * sLogFile;
+
+static const char * kNS1 = "ns:test1/";
+
+static const char * kSimpleRDF =
+ "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>"
+ " <rdf:Description rdf:about='Test:kSimpleRDF/' xmlns:ns1='ns:test1/' xmlns:ns2='ns:test2/'>"
+ ""
+ " <ns1:SimpleProp>Simple value</ns1:SimpleProp>"
+ ""
+ " <ns1:ArrayProp>"
+ " <rdf:Bag>"
+ " <rdf:li>Item1 value</rdf:li>"
+ " <rdf:li>Item2 value</rdf:li>"
+ " </rdf:Bag>"
+ " </ns1:ArrayProp>"
+ ""
+ " <ns1:StructProp rdf:parseType='Resource'>"
+ " <ns2:Field1>Field1 value</ns2:Field1>"
+ " <ns2:Field2>Field2 value</ns2:Field2>"
+ " </ns1:StructProp>"
+ ""
+ " <ns1:QualProp rdf:parseType='Resource'>"
+ " <rdf:value>Prop value</rdf:value>"
+ " <ns2:Qual>Qual value</ns2:Qual>"
+ " </ns1:QualProp>"
+ ""
+ " <ns1:AltTextProp>"
+ " <rdf:Alt>"
+ " <rdf:li xml:lang='x-one'>x-one value</rdf:li>"
+ " <rdf:li xml:lang='x-two'>x-two value</rdf:li>"
+ " </rdf:Alt>"
+ " </ns1:AltTextProp>"
+ ""
+ " <ns1:ArrayOfStructProp>"
+ " <rdf:Bag>"
+ " <rdf:li rdf:parseType='Resource'>"
+ " <ns2:Field1>Item-1</ns2:Field1>"
+ " <ns2:Field2>Field 1.2 value</ns2:Field2>"
+ " </rdf:li>"
+ " <rdf:li rdf:parseType='Resource'>"
+ " <ns2:Field1>Item-2</ns2:Field1>"
+ " <ns2:Field2>Field 2.2 value</ns2:Field2>"
+ " </rdf:li>"
+ " </rdf:Bag>"
+ " </ns1:ArrayOfStructProp>"
+ ""
+ " </rdf:Description>"
+ "</rdf:RDF>";
+
+// =================================================================================================
+
+static XMP_Status DumpToString ( void * refCon, XMP_StringPtr outStr, XMP_StringLen outLen )
+{
+ std::string * dumpString = static_cast < std::string * > ( refCon );
+ dumpString->append ( outStr, outLen );
+ return 0;
+}
+
+// =================================================================================================
+
+static XMP_Status DumpToFile ( void * refCon, XMP_StringPtr outStr, XMP_StringLen outLen )
+{
+ FILE * outFile = static_cast < FILE * > ( refCon );
+ fwrite ( outStr, 1, outLen, outFile );
+ return 0;
+}
+
+// =================================================================================================
+
+static void PrintXMPErrorInfo ( const XMP_Error & excep, const char * title )
+{
+ XMP_Int32 id = excep.GetID();
+ const char * message = excep.GetErrMsg();
+ fprintf ( sLogFile, "%s\n", title );
+ fprintf ( sLogFile, " #%d : %s\n", id, message );
+}
+
+// =================================================================================================
+
+static void FullUnicodeParse ( FILE * log, const char * encoding, size_t bufferSize,
+ const std::string & packet, const std::string & fullUnicode )
+{
+ if ( bufferSize > sizeof(sU32) ) {
+ fprintf ( log, "#ERROR: FullUnicodeParse buffer overrun for %s, %d byte buffers\n", encoding, bufferSize );
+ return;
+ }
+
+ SXMPMeta meta;
+ try {
+ memset ( sU32, -1, sizeof(sU32) );
+ for ( size_t i = 0; i < packet.size(); i += bufferSize ) {
+ size_t count = bufferSize;
+ if ( count > (packet.size() - i) ) count = packet.size() - i;
+ memcpy ( sU32, &packet[i], count );
+ meta.ParseFromBuffer ( XMP_StringPtr(sU32), count, kXMP_ParseMoreBuffers );
+ }
+ meta.ParseFromBuffer ( XMP_StringPtr(sU32), 0 );
+ } catch ( XMP_Error& excep ) {
+ char message [200];
+ sprintf ( message, "#ERROR: Full Unicode parsing error for %s, %d byte buffers", encoding, bufferSize );
+ PrintXMPErrorInfo ( excep, message );
+ return;
+ }
+
+ std::string value;
+ bool found = meta.GetProperty ( kNS1, "FullUnicode", &value, 0 );
+ if ( (! found) || (value != fullUnicode) ) fprintf ( log, "#ERROR: Failed to get full Unicode value for %s, %d byte buffers\n", encoding, bufferSize );
+
+} // FullUnicodeParse
+
+// =================================================================================================
+
+static void DoTest ( FILE * log )
+{
+ SXMPMeta meta;
+ size_t u8Count, u32Count;
+ SXMPMeta meta8, meta16b, meta16l, meta32b, meta32l;
+ std::string u8Packet, u16bPacket, u16lPacket, u32bPacket, u32lPacket;
+
+ InitializeUnicodeConversions();
+
+ // ---------------------------------------------------------------------------------------------
+
+ fprintf ( log, "// ------------------------------------------------\n" );
+ fprintf ( log, "// Test basic serialization and parsing using ASCII\n\n" );
+
+ // ----------------------------------------------------
+ // Create basic ASCII packets in each of the encodings.
+
+ meta.ParseFromBuffer ( kSimpleRDF, kXMP_UseNullTermination );
+
+ meta.SerializeToBuffer ( &u8Packet, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF8) );
+ meta.SerializeToBuffer ( &u16bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Big) );
+ meta.SerializeToBuffer ( &u16lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Little) );
+ meta.SerializeToBuffer ( &u32bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Big) );
+ meta.SerializeToBuffer ( &u32lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Little) );
+
+ #if 0
+ FILE* dump;
+ dump = fopen ( "u8Packet.txt", "w" );
+ fwrite ( u8Packet.c_str(), 1, u8Packet.size(), dump );
+ fclose ( dump );
+ dump = fopen ( "u16bPacket.txt", "w" );
+ fwrite ( u16bPacket.c_str(), 1, u16bPacket.size(), dump );
+ fclose ( dump );
+ dump = fopen ( "u16lPacket.txt", "w" );
+ fwrite ( u16lPacket.c_str(), 1, u16lPacket.size(), dump );
+ fclose ( dump );
+ dump = fopen ( "u32bPacket.txt", "w" );
+ fwrite ( u32bPacket.c_str(), 1, u32bPacket.size(), dump );
+ fclose ( dump );
+ dump = fopen ( "u32lPacket.txt", "w" );
+ fwrite ( u32lPacket.c_str(), 1, u32lPacket.size(), dump );
+ fclose ( dump );
+ #endif
+
+ // Verify the character form. The conversion functions are tested separately.
+
+ const char * ptr;
+
+ ptr = u8Packet.c_str();
+ fprintf ( log, "UTF-8 : %d : %.2X %.2X \"%.10s...\"\n", u8Packet.size(), *ptr, *(ptr+1), ptr );
+
+ ptr = u16bPacket.c_str();
+ fprintf ( log, "UTF-16BE : %d : %.2X %.2X %.2X\n", u16bPacket.size(), *ptr, *(ptr+1), *(ptr+2) );
+ ptr = u16lPacket.c_str();
+ fprintf ( log, "UTF-16LE : %d : %.2X %.2X %.2X\n", u16lPacket.size(), *ptr, *(ptr+1), *(ptr+2) );
+
+ ptr = u32bPacket.c_str();
+ fprintf ( log, "UTF-32BE : %d : %.2X %.2X %.2X %.2X %.2X\n", u32bPacket.size(), *ptr, *(ptr+1), *(ptr+2), *(ptr+3), *(ptr+4) );
+ ptr = u32lPacket.c_str();
+ fprintf ( log, "UTF-32LE : %d : %.2X %.2X %.2X %.2X %.2X\n", u32lPacket.size(), *ptr, *(ptr+1), *(ptr+2), *(ptr+3), *(ptr+4) );
+
+ fprintf ( log, "\nBasic serialization tests done\n" );
+
+ // -------------------------------------------------
+ // Verify round trip reparsing of the basic packets.
+
+ std::string origDump, rtDump;
+
+ meta.DumpObject ( DumpToString, &origDump );
+ fprintf ( log, "Original dump\n%s\n", origDump.c_str() );
+
+ try {
+ meta8.ParseFromBuffer ( u8Packet.c_str(), u8Packet.size() );
+ meta16b.ParseFromBuffer ( u16bPacket.c_str(), u16bPacket.size() );
+ meta16l.ParseFromBuffer ( u16lPacket.c_str(), u16lPacket.size() );
+ meta32b.ParseFromBuffer ( u32bPacket.c_str(), u32bPacket.size() );
+ meta32l.ParseFromBuffer ( u32lPacket.c_str(), u32lPacket.size() );
+ } catch ( XMP_Error& excep ) {
+ PrintXMPErrorInfo ( excep, "## Caught reparsing exception" );
+ fprintf ( log, "\n" );
+ }
+
+ #if 0
+ fprintf ( log, "After UTF-8 roundtrip\n" );
+ meta8.DumpObject ( DumpToFile, log );
+ fprintf ( log, "\nAfter UTF-16 BE roundtrip\n" );
+ meta16b.DumpObject ( DumpToFile, log );
+ fprintf ( log, "\nAfter UTF-16 LE roundtrip\n" );
+ meta16l.DumpObject ( DumpToFile, log );
+ fprintf ( log, "\nAfter UTF-32 BE roundtrip\n" );
+ meta32b.DumpObject ( DumpToFile, log );
+ fprintf ( log, "\nAfter UTF-32 LE roundtrip\n" );
+ meta32l.DumpObject ( DumpToFile, log );
+ #endif
+
+ rtDump.clear();
+ meta8.DumpObject ( DumpToString, &rtDump );
+ if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-8\n%s\n", rtDump.c_str() );
+
+ rtDump.clear();
+ meta16b.DumpObject ( DumpToString, &rtDump );
+ if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-16BE\n%s\n", rtDump.c_str() );
+
+ rtDump.clear();
+ meta16l.DumpObject ( DumpToString, &rtDump );
+ if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-16LE\n%s\n", rtDump.c_str() );
+
+ #if IncludeUTF32
+
+ rtDump.clear();
+ meta32b.DumpObject ( DumpToString, &rtDump );
+ if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-32BE\n%s\n", rtDump.c_str() );
+
+ rtDump.clear();
+ meta32l.DumpObject ( DumpToString, &rtDump );
+ if ( rtDump != origDump ) fprintf ( log, "#ERROR: Roundtrip failure for UTF-32LE\n%s\n", rtDump.c_str() );
+
+ #endif
+
+ fprintf ( log, "Basic round-trip parsing tests done\n\n" );
+
+ // ---------------------------------------------------------------------------------------------
+
+ fprintf ( log, "// --------------------------------------------------\n" );
+ fprintf ( log, "// Test parse buffering logic using full Unicode data\n\n" );
+
+ // --------------------------------------------------------------------------------------------
+ // Construct the packets to parse in all encodings. There is just one property with a value
+ // containing all of the Unicode representations. This isn't all of the Unicode characters, but
+ // is more than enough to establish correctness of the buffering logic. It is almost everything
+ // in the BMP, plus the range U+100000..U+10FFFF beyond the BMP. Doing all Unicode characters
+ // takes far to long to execute and does not provide additional confidence. Skip ASCII controls,
+ // they are not allowed in XML and get changed to spaces by SetProperty. Skip U+FFFE and U+FFFF,
+ // the expat parser rejects them.
+
+ #define kTab 0x09
+ #define kLF 0x0A
+ #define kCR 0x0D
+
+ size_t i;
+ UTF32Unit cp;
+ sU32[0] = kTab; sU32[1] = kLF; sU32[2] = kCR;
+ for ( i = 3, cp = 0x20; cp < 0x7F; ++i, ++cp ) sU32[i] = cp;
+ for ( cp = 0x80; cp < 0xD800; ++i, ++cp ) sU32[i] = cp;
+ for ( cp = 0xE000; cp < 0xFFFE; ++i, ++cp ) sU32[i] = cp;
+ for ( cp = 0x100000; cp < 0x110000; ++i, ++cp ) sU32[i] = cp;
+ u32Count = i;
+ assert ( u32Count == (3 + (0x7F-0x20) + (0xD800-0x80) + (0xFFFE - 0xE000) + (0x110000-0x100000)) );
+
+ if ( kBigEndianHost ) {
+ UTF32BE_to_UTF8 ( sU32, u32Count, sU8, sizeof(sU8), &i, &u8Count );
+ } else {
+ UTF32LE_to_UTF8 ( sU32, u32Count, sU8, sizeof(sU8), &i, &u8Count );
+ }
+ if ( i != u32Count ) fprintf ( log, "#ERROR: Failed to convert full UTF-32 buffer\n" );
+ assert ( u8Count == (3 + (0x7F-0x20) + 2*(0x800-0x80) + 3*(0xD800-0x800) + 3*(0xFFFE - 0xE000) + 4*(0x110000-0x100000)) );
+ sU8[u8Count] = 0;
+
+ std::string fullUnicode;
+ SXMPUtils::RemoveProperties ( &meta, "", "", kXMPUI_DoAllProperties );
+ meta.SetProperty ( kNS1, "FullUnicode", XMP_StringPtr(sU8) );
+ meta.GetProperty ( kNS1, "FullUnicode", &fullUnicode, 0 );
+ if ( (fullUnicode.size() != u8Count) || (fullUnicode != XMP_StringPtr(sU8)) ) {
+ fprintf ( log, "#ERROR: Failed to set full UTF-8 value\n" );
+ if ( (fullUnicode.size() != u8Count) ) {
+ fprintf ( log, " Size mismatch, want %d, got %d\n", u8Count, fullUnicode.size() );
+ } else {
+ for ( size_t b = 0; b < u8Count; ++b ) {
+ if ( fullUnicode[b] != sU8[b] ) fprintf ( log, " Byte mismatch at %d\n", b );
+ }
+ }
+ }
+
+ u8Packet.clear();
+ u16bPacket.clear();
+ u16lPacket.clear();
+ u32bPacket.clear();
+ u32lPacket.clear();
+
+ meta.SerializeToBuffer ( &u8Packet, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF8) );
+ meta.SerializeToBuffer ( &u16bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Big) );
+ meta.SerializeToBuffer ( &u16lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF16Little) );
+ #if IncludeUTF32
+ meta.SerializeToBuffer ( &u32bPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Big) );
+ meta.SerializeToBuffer ( &u32lPacket, (kXMP_OmitPacketWrapper | kXMP_EncodeUTF32Little) );
+ #endif
+
+ // ---------------------------------------------------------------------
+ // Parse the whole packet as a sanity check, then at a variety of sizes.
+
+ FullUnicodeParse ( log, "UTF-8", u8Packet.size(), u8Packet, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16BE", u16bPacket.size(), u16bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16LE", u16lPacket.size(), u16lPacket, fullUnicode );
+ #if IncludeUTF32
+ FullUnicodeParse ( log, "UTF-32BE", u32bPacket.size(), u32bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-32LE", u32lPacket.size(), u32lPacket, fullUnicode );
+ #endif
+ fprintf ( log, "Full packet, no BOM, buffered parsing tests done\n" );
+
+#if 0 // Skip the partial buffer tests, there seem to be problems, but no client uses partial buffers.
+
+ for ( i = 1; i <= 3; ++i ) {
+ FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode );
+ #if IncludeUTF32
+ FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode );
+ #endif
+ fprintf ( log, "%d byte buffers, no BOM, buffered parsing tests done\n", i );
+ }
+
+ for ( i = 4; i <= 16; i *= 2 ) {
+ FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode );
+ #if IncludeUTF32
+ FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode );
+ #endif
+ fprintf ( log, "%d byte buffers, no BOM, buffered parsing tests done\n", i );
+ }
+
+#endif
+
+ fprintf ( log, "\n" );
+
+ // -----------------------------------------------------------------------
+ // Redo the buffered parsing tests, now with a leading BOM in the packets.
+
+ u8Packet.insert ( 0, "\xEF\xBB\xBF", 3 );
+
+ UTF32Unit NatBOM = 0x0000FEFF;
+ UTF32Unit SwapBOM = 0xFFFE0000;
+
+ if ( kBigEndianHost ) {
+ u16bPacket.insert ( 0, XMP_StringPtr(&NatBOM)+2, 2 );
+ u16lPacket.insert ( 0, XMP_StringPtr(&SwapBOM), 2 );
+ u32bPacket.insert ( 0, XMP_StringPtr(&NatBOM), 4 );
+ u32lPacket.insert ( 0, XMP_StringPtr(&SwapBOM), 4 );
+ } else {
+ u16lPacket.insert ( 0, XMP_StringPtr(&NatBOM), 2 );
+ u16bPacket.insert ( 0, XMP_StringPtr(&SwapBOM)+2, 2 );
+ u32lPacket.insert ( 0, XMP_StringPtr(&NatBOM), 4 );
+ u32bPacket.insert ( 0, XMP_StringPtr(&SwapBOM), 4 );
+ }
+
+ FullUnicodeParse ( log, "UTF-8", u8Packet.size(), u8Packet, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16BE", u16bPacket.size(), u16bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16LE", u16lPacket.size(), u16lPacket, fullUnicode );
+ #if IncludeUTF32
+ FullUnicodeParse ( log, "UTF-32BE", u32bPacket.size(), u32bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-32LE", u32lPacket.size(), u32lPacket, fullUnicode );
+ #endif
+ fprintf ( log, "Full packet, leading BOM, buffered parsing tests done\n" );
+
+#if 0 // Skip the partial buffer tests, there seem to be problems, but no client uses partial buffers.
+
+ for ( i = 1; i <= 3; ++i ) {
+ FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode );
+ #if IncludeUTF32
+ FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode );
+ #endif
+ fprintf ( log, "%d byte buffers, leading BOM, buffered parsing tests done\n", i );
+ }
+
+ for ( i = 4; i <= 16; i *= 2 ) {
+ FullUnicodeParse ( log, "UTF-8", i, u8Packet, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16BE", i, u16bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-16LE", i, u16lPacket, fullUnicode );
+ #if IncludeUTF32
+ FullUnicodeParse ( log, "UTF-32BE", i, u32bPacket, fullUnicode );
+ FullUnicodeParse ( log, "UTF-32LE", i, u32lPacket, fullUnicode );
+ #endif
+ fprintf ( log, "%d byte buffers, leading BOM, buffered parsing tests done\n", i );
+ }
+
+#endif
+
+ fprintf ( log, "\n" );
+
+} // DoTest
+
+// =================================================================================================
+
+extern "C" int main ( void )
+{
+ int result = 0;
+ char buffer [1000];
+
+ sLogFile = stdout;
+
+ time_t now;
+ time ( &now );
+ sprintf ( buffer, "// Starting test for UTF-16 and UTF-32 serialization and parsing, %s", ctime ( &now ) );
+
+ fprintf ( sLogFile, "// " );
+ for ( int i = 4; i < strlen(buffer); ++i ) fprintf ( sLogFile, "=" );
+ fprintf ( sLogFile, "\n%s", buffer );
+
+ fprintf ( sLogFile, "// =====================================================================================\n" );
+ fprintf ( sLogFile, "// A thorough test for UTF-16 and UTF-32 serialization and parsing. It assumes the basic\n" );
+ fprintf ( sLogFile, "// Unicode conversion functions are working - they have their own exhaustive test.\n\n" );
+
+ #if ! IncludeUTF32
+ fprintf ( sLogFile, "// ** Skipping UTF-32 tests, Expat seems to not handle it.\n\n" );
+ #endif
+
+ #if 0
+ if ( sLogFile == stdout ) {
+ // Use this to be able to move the app window away from debugger windows.
+ fprintf ( sLogFile, "Move window, type return to continue" );
+ fread ( buffer, 1, 1, stdin );
+ }
+ #endif
+
+ try {
+
+ if ( ! SXMPMeta::Initialize() ) {
+ fprintf ( sLogFile, "\n## SXMPMeta::Initialize failed!\n" );
+ return -1;
+ }
+
+ DoTest ( sLogFile );
+
+ SXMPMeta::Terminate();
+
+ } catch ( XMP_Error& excep ) {
+
+ PrintXMPErrorInfo ( excep, "\n## Unhandled XMP_Error exception" );
+
+ } catch ( ... ) {
+
+ fprintf ( sLogFile, "\n## Unexpected exception\n" );
+ return -1;
+
+ }
+
+ time ( &now );
+ sprintf ( buffer, "// Finished test for UTF-16 and UTF-32 serialization and parsing, %s", ctime ( &now ) );
+
+ fprintf ( sLogFile, "// " );
+ for ( int i = 4; i < strlen(buffer); ++i ) fprintf ( sLogFile, "=" );
+ fprintf ( sLogFile, "\n%s\n", buffer );
+
+ fclose ( sLogFile );
+ return 0;
+
+}
diff --git a/samples/source/UnicodePerformance.cpp b/samples/source/UnicodePerformance.cpp
new file mode 100644
index 0000000..d11185c
--- /dev/null
+++ b/samples/source/UnicodePerformance.cpp
@@ -0,0 +1,308 @@
+// =================================================================================================
+
+#include <cstdio>
+#include <vector>
+#include <string>
+#include <cstring>
+#include <ctime>
+
+#include <cstdio>
+#include <cstdlib>
+#include <cerrno>
+#include <stdexcept>
+
+using namespace std;
+
+#if WIN_ENV
+ #pragma warning ( disable : 4701 ) // local variable may be used without having been initialized
+#endif
+
+// =================================================================================================
+
+#include "public/include/XMP_Environment.h"
+#include "public/include/XMP_Const.h"
+
+#include "source/EndianUtils.hpp"
+#include "source/UnicodeConversions.hpp"
+#include "source/UnicodeConversions.cpp"
+
+#define TestUnicodeConsortium 0
+
+#if TestUnicodeConsortium
+ #include "ConvertUTF.c" // The Unicode Consortium implementations.
+#endif
+
+// =================================================================================================
+
+#define kCodePointCount 0x110000
+
+UTF8Unit sU8 [kCodePointCount*4 + 8];
+UTF16Unit sU16 [kCodePointCount*2 + 4];
+UTF32Unit sU32 [kCodePointCount + 2];
+
+// =================================================================================================
+
+static UTF8_to_UTF16_Proc OurUTF8_to_UTF16; // ! Don't use static initialization, VS.Net strips it!
+static UTF8_to_UTF32_Proc OurUTF8_to_UTF32;
+static UTF16_to_UTF8_Proc OurUTF16_to_UTF8;
+static UTF16_to_UTF32_Proc OurUTF16_to_UTF32;
+static UTF32_to_UTF8_Proc OurUTF32_to_UTF8;
+static UTF32_to_UTF16_Proc OurUTF32_to_UTF16;
+
+// =================================================================================================
+
+static void ReportPerformance ( FILE * log, const char * content, const size_t u32Count, const size_t u16Count, const size_t u8Count )
+{
+ size_t inCount, outCount;
+ UTF32Unit * u32Ptr;
+ UTF16Unit * u16Ptr;
+ UTF8Unit * u8Ptr;
+
+ size_t i;
+ const size_t cycles = 100;
+ clock_t start, end;
+ double elapsed;
+
+ // --------------------------------------------------
+ fprintf ( log, "\n Adobe code over %s\n", content );
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) OurUTF32_to_UTF8 ( sU32, u32Count, sU8, sizeof(sU8), &inCount, &outCount );
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF32_to_UTF8 : %.3f seconds\n", elapsed );
+ if ( (inCount != u32Count) || (outCount != u8Count) ) fprintf ( log, " *** Our UTF32_to_UTF8 count error, %d -> %d\n", inCount, outCount );
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) OurUTF32_to_UTF16 ( sU32, u32Count, sU16, sizeof(sU16), &inCount, &outCount );
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF32_to_UTF16 : %.3f seconds\n", elapsed );
+ if ( (inCount != u32Count) || (outCount != u16Count) ) fprintf ( log, " *** Our UTF32_to_UTF16 count error, %d -> %d\n", inCount, outCount );
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) OurUTF16_to_UTF8 ( sU16, u16Count, sU8, sizeof(sU8), &inCount, &outCount );
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF16_to_UTF8 : %.3f seconds\n", elapsed );
+ if ( (inCount != u16Count) || (outCount != u8Count) ) fprintf ( log, " *** Our UTF16_to_UTF8 count error, %d -> %d\n", inCount, outCount );
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) OurUTF16_to_UTF32 ( sU16, u16Count, sU32, sizeof(sU32), &inCount, &outCount );
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF16_to_UTF32 : %.3f seconds\n", elapsed );
+ if ( (inCount != u16Count) || (outCount != u32Count) ) fprintf ( log, " *** Our UTF16_to_UTF32 count error, %d -> %d\n", inCount, outCount );
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) OurUTF8_to_UTF16 ( sU8, u8Count, sU16, sizeof(sU16), &inCount, &outCount );
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF8_to_UTF16 : %.3f seconds\n", elapsed );
+ if ( (inCount != u8Count) || (outCount != u16Count) ) fprintf ( log, " *** Our UTF8_to_UTF16 count error, %d -> %d\n", inCount, outCount );
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) OurUTF8_to_UTF32 ( sU8, u8Count, sU32, sizeof(sU32), &inCount, &outCount );
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF8_to_UTF32 : %.3f seconds\n", elapsed );
+ if ( (inCount != u8Count) || (outCount != u32Count) ) fprintf ( log, " *** Our UTF8_to_UTF32 count error, %d -> %d\n", inCount, outCount );
+
+ #if TestUnicodeConsortium
+
+ // ---------------------------------------------------------------
+ fprintf ( log, "\n Unicode Consortium code over %s\n", content );
+
+ ConversionResult ucStatus;
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) {
+ u32Ptr = sU32; u8Ptr = sU8;
+ ucStatus = ConvertUTF32toUTF8 ( (const UTF32**)(&u32Ptr), (const UTF32*)(sU32+u32Count), &u8Ptr, sU8+sizeof(sU8), strictConversion );
+ }
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF32_to_UTF8 : %.3f seconds\n", elapsed );
+ inCount = u32Ptr - sU32; outCount = u8Ptr - sU8;
+ if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF32toUTF8 status error, %d\n", ucStatus );
+ if ( (inCount != u32Count) || (outCount != u8Count) ) fprintf ( log, " *** UC ConvertUTF32toUTF8 count error, %d, %d -> %d\n", inCount, outCount );
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) {
+ u32Ptr = sU32; u16Ptr = sU16;
+ ucStatus = ConvertUTF32toUTF16 ( (const UTF32**)(&u32Ptr), (const UTF32*)(sU32+u32Count), &u16Ptr, sU16+sizeof(sU16), strictConversion );
+ }
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF32_to_UTF16 : %.3f seconds\n", elapsed );
+ inCount = u32Ptr - sU32; outCount = u16Ptr - sU16;
+ if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF32toUTF16 status error, %d\n", ucStatus );
+ if ( (inCount != u32Count) || (outCount != u16Count) ) fprintf ( log, " *** UC ConvertUTF32toUTF16 count error, %d, %d -> %d\n", inCount, outCount );
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) {
+ u16Ptr = sU16; u8Ptr = sU8;
+ ucStatus = ConvertUTF16toUTF8 ( (const UTF16**)(&u16Ptr), (const UTF16*)(sU16+u16Count), &u8Ptr, sU8+sizeof(sU8), strictConversion );
+ }
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF16_to_UTF8 : %.3f seconds\n", elapsed );
+ inCount = u16Ptr - sU16; outCount = u8Ptr - sU8;
+ if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF16toUTF8 status error, %d\n", ucStatus );
+ if ( (inCount != u16Count) || (outCount != u8Count) ) fprintf ( log, " *** UC ConvertUTF16toUTF8 count error, %d, %d -> %d\n", inCount, outCount );
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) {
+ u16Ptr = sU16; u32Ptr = sU32;
+ ucStatus = ConvertUTF16toUTF32 ( (const UTF16**)(&u16Ptr), (const UTF16*)(sU16+u16Count), &u32Ptr, sU32+sizeof(sU32), strictConversion );
+ }
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF16_to_UTF32 : %.3f seconds\n", elapsed );
+ inCount = u16Ptr - sU16; outCount = u32Ptr - sU32;
+ if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF16toUTF32 status error, %d\n", ucStatus );
+ if ( (inCount != u16Count) || (outCount != u32Count) ) fprintf ( log, " *** UC ConvertUTF16toUTF32 count error, %d, %d -> %d\n", inCount, outCount );
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) {
+ u8Ptr = sU8; u16Ptr = sU16;
+ ucStatus = ConvertUTF8toUTF16 ( (const UTF8**)(&u8Ptr), (const UTF8*)(sU8+u8Count), &u16Ptr, sU16+sizeof(sU16), strictConversion );
+ }
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF8_to_UTF16 : %.3f seconds\n", elapsed );
+ inCount = u8Ptr - sU8; outCount = u16Ptr - sU16;
+ if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF8toUTF16 status error, %d\n", ucStatus );
+ if ( (inCount != u8Count) || (outCount != u16Count) ) fprintf ( log, " *** UC ConvertUTF8toUTF16 count error, %d, %d -> %d\n", inCount, outCount );
+
+ start = clock();
+ for ( i = 0; i < cycles; ++i ) {
+ u8Ptr = sU8; u32Ptr = sU32;
+ ucStatus = ConvertUTF8toUTF32 ( (const UTF8**)(&u8Ptr), (const UTF8*)(sU8+u8Count), &u32Ptr, sU32+sizeof(sU32), strictConversion );
+ }
+ end = clock();
+ elapsed = double(end-start) / CLOCKS_PER_SEC;
+
+ fprintf ( log, " UTF8_to_UTF32 : %.3f seconds\n", elapsed );
+ inCount = u8Ptr - sU8; outCount = u32Ptr - sU32;
+ if ( ucStatus != conversionOK ) fprintf ( log, " *** UC ConvertUTF8toUTF32 status error, %d\n", ucStatus );
+ if ( (inCount != u8Count) || (outCount != u32Count) ) fprintf ( log, " *** UC ConvertUTF8toUTF32 count error, %d, %d -> %d\n", inCount, outCount );
+
+ #endif
+
+} // ReportPerformance
+
+// =================================================================================================
+
+static void ComparePerformance ( FILE * log )
+{
+ size_t i, u32Count, u16Count, u8Count;
+ UTF32Unit cp;
+
+ if ( kBigEndianHost ) {
+ OurUTF8_to_UTF16 = UTF8_to_UTF16BE;
+ OurUTF8_to_UTF32 = UTF8_to_UTF32BE;
+ OurUTF16_to_UTF8 = UTF16BE_to_UTF8;
+ OurUTF16_to_UTF32 = UTF16BE_to_UTF32BE;
+ OurUTF32_to_UTF8 = UTF32BE_to_UTF8;
+ OurUTF32_to_UTF16 = UTF32BE_to_UTF16BE;
+ } else {
+ OurUTF8_to_UTF16 = UTF8_to_UTF16LE;
+ OurUTF8_to_UTF32 = UTF8_to_UTF32LE;
+ OurUTF16_to_UTF8 = UTF16LE_to_UTF8;
+ OurUTF16_to_UTF32 = UTF16LE_to_UTF32LE;
+ OurUTF32_to_UTF8 = UTF32LE_to_UTF8;
+ OurUTF32_to_UTF16 = UTF32LE_to_UTF16LE;
+ }
+
+ for ( i = 0, cp = 0; cp < 0xD800; ++i, ++cp ) sU32[i] = cp; // Measure using the full Unicode set.
+ for ( cp = 0xE000; cp < 0x110000; ++i, ++cp ) sU32[i] = cp;
+ u32Count = 0xD800 + (0x110000 - 0xE000);
+ u16Count = 0xD800 + (0x10000 - 0xE000) + (0x110000 - 0x10000)*2;
+ u8Count = 0x80 + (0x800 - 0x80)*2 + (0xD800 - 0x800)*3 + (0x10000 - 0xE000)*3 + (0x110000 - 0x10000)*4;
+ ReportPerformance ( log, "full Unicode set", u32Count, u16Count, u8Count );
+
+ for ( i = 0; i < 0x110000; ++i ) sU32[i] = i & 0x7F; // Measure using just ASCII.
+ u32Count = 0x110000;
+ u16Count = 0x110000;
+ u8Count = 0x110000;
+ ReportPerformance ( log, "just ASCII", u32Count, u16Count, u8Count );
+
+ for ( i = 0; i < 0x110000; ++i ) sU32[i] = 0x4000 + (i & 0x7FFF); // Measure using just non-ASCII inside the BMP.
+ u32Count = 0x110000;
+ u16Count = 0x110000;
+ u8Count = 0x110000*3;
+ ReportPerformance ( log, "just non-ASCII inside the BMP", u32Count, u16Count, u8Count );
+
+ for ( i = 0; i < 0x110000; ++i ) sU32[i] = 0x40000 + (i & 0xFFFF); // Measure using just outside the BMP.
+ u32Count = 0x110000;
+ u16Count = 0x110000*2;
+ u8Count = 0x110000*4;
+ ReportPerformance ( log, "just outside the BMP", u32Count, u16Count, u8Count );
+
+} // ComparePerformance
+
+// =================================================================================================
+
+static void DoTest ( FILE * log )
+{
+
+ InitializeUnicodeConversions();
+ ComparePerformance ( log );
+
+} // DoTest
+
+// =================================================================================================
+
+extern "C" int main ( void )
+{
+ char buffer [1000];
+
+ #if !XMP_AutomatedTestBuild
+ FILE * log = stdout;
+ #else
+ FILE * log = fopen ( "TestUnicode.out", "wb" );
+ #endif
+
+ time_t now;
+ time ( &now );
+ sprintf ( buffer, "// Starting test for Unicode conversion performance, %s", ctime ( &now ) );
+
+ fprintf ( log, "// " );
+ for ( size_t i = 4; i < strlen(buffer); ++i ) fprintf ( log, "=" );
+ fprintf ( log, "\n%s", buffer );
+ fprintf ( log, "// Native %s endian\n", (kBigEndianHost ? "big" : "little") );
+
+ try {
+
+ DoTest ( log );
+
+ } catch ( ... ) {
+
+ fprintf ( log, "\n## Caught unexpected exception\n" );
+ return -1;
+
+ }
+
+ time ( &now );
+ sprintf ( buffer, "// Finished test for Unicode conversion performance, %s", ctime ( &now ) );
+
+ fprintf ( log, "\n// " );
+ for ( size_t i = 4; i < strlen(buffer); ++i ) fprintf ( log, "=" );
+ fprintf ( log, "\n%s\n", buffer );
+
+ fclose ( log );
+ return 0;
+
+}
diff --git a/samples/source/XMPCoreCoverage.cpp b/samples/source/XMPCoreCoverage.cpp
index 669600e..fd1e65a 100644
--- a/samples/source/XMPCoreCoverage.cpp
+++ b/samples/source/XMPCoreCoverage.cpp
@@ -10,16 +10,17 @@
* Demonstrates syntax and usage by exercising most of the API functions of XMPCore Toolkit SDK component,
* using a sample XMP Packet that contains all of the different property and attribute types.
*/
+#include <cstdio>
+#include <vector>
#include <string>
+#include <cstring>
#include <iostream>
#include <iomanip>
#include <fstream>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <time.h>
+#include <cstdlib>
+#include <cerrno>
+#include <ctime>
#define TXMP_STRING_TYPE std::string
diff --git a/samples/source/XMPFilesCoverage.cpp b/samples/source/XMPFilesCoverage.cpp
index 3742c40..326b19a 100644
--- a/samples/source/XMPFilesCoverage.cpp
+++ b/samples/source/XMPFilesCoverage.cpp
@@ -11,13 +11,13 @@
* using a sample XMP Packet that contains all of the different property and attribute types.
*/
+#include <cstdio>
#include <vector>
#include <string>
+#include <cstring>
#include <stdexcept>
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include <time.h>
+#include <cerrno>
+#include <ctime>
#define TXMP_STRING_TYPE std::string
#define XMP_INCLUDE_XMPFILES 1
diff --git a/samples/source/XMPIterations.cpp b/samples/source/XMPIterations.cpp
index b24c7cd..f93b7d7 100644
--- a/samples/source/XMPIterations.cpp
+++ b/samples/source/XMPIterations.cpp
@@ -7,10 +7,13 @@
// =================================================================================================
/**
-* Demonstrates how to use the iteration utility in the XMPCore component to walk through property trees.
-*/
+ * Demonstrates how to use the iteration utility in the XMPCore component to walk through property trees.
+ */
+#include <cstdio>
+#include <vector>
#include <string>
+#include <cstring>
// Must be defined to instantiate template classes
#define TXMP_STRING_TYPE std::string
@@ -30,124 +33,128 @@ using namespace std;
// Provide some custom XMP
static const char * rdf =
- "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>"
- " <rdf:Description rdf:about='' xmlns:xmpTest='http://ns.adobe.com/xmpTest/'>"
- ""
- " <xmpTest:MySimpleProp rdf:parseType='Resource'>"
- " <rdf:value>A Value</rdf:value>"
- " <xmpTest:MyQual>Qual Value</xmpTest:MyQual>"
- " </xmpTest:MySimpleProp>"
- ""
- " <xmpTest:MyTopStruct rdf:parseType='Resource'>"
- " <xmpTest:MySecondStruct rdf:parseType='Resource'>"
- " <xmpTest:MyThirdStruct rdf:parseType='Resource'>"
- " <xmpTest:MyThirdStructField>Field Value 3</xmpTest:MyThirdStructField>"
- " </xmpTest:MyThirdStruct>"
- " <xmpTest:MySecondStructField>Field Value 2</xmpTest:MySecondStructField>"
- " </xmpTest:MySecondStruct>"
- " <xmpTest:MyTopStructField>Field Value 1</xmpTest:MyTopStructField>"
- " </xmpTest:MyTopStruct>"
-
- " <xmpTest:MyArrayWithNestedArray>"
- " <rdf:Bag>"
- " <rdf:li>"
- " <rdf:Seq>"
- " <rdf:li>Item 1</rdf:li>"
- " <rdf:li>Item 2</rdf:li>"
- " </rdf:Seq>"
- " </rdf:li>"
- " </rdf:Bag>"
- " </xmpTest:MyArrayWithNestedArray>"
-
- " <xmpTest:MyArrayWithStructures>"
- " <rdf:Seq>"
- " <rdf:li rdf:parseType='Resource'>"
- " <rdf:value>Field Value 1</rdf:value>"
- " <xmpTest:FirstQual>Qual Value 1</xmpTest:FirstQual>"
- " <xmpTest:SecondQual>Qual Value 2</xmpTest:SecondQual>"
- " </rdf:li>"
- " <rdf:li rdf:parseType='Resource'>"
- " <rdf:value>Field Value 2</rdf:value>"
- " <xmpTest:FirstQual>Qual Value 3</xmpTest:FirstQual>"
- " <xmpTest:SecondQual>Qual Value 4</xmpTest:SecondQual>"
- " </rdf:li>"
- " </rdf:Seq>"
- " </xmpTest:MyArrayWithStructures>"
- ""
- " <xmpTest:MyStructureWithArray rdf:parseType='Resource'>"
- " <xmpTest:NestedArray>"
- " <rdf:Bag>"
- " <rdf:li>Item 3</rdf:li>"
- " <rdf:li>Item 4</rdf:li>"
- " <rdf:li>Item 5</rdf:li>"
- " <rdf:li>Item 6</rdf:li>"
- " </rdf:Bag>"
- " </xmpTest:NestedArray>"
- " <xmpTest:NestedArray2>"
- " <rdf:Bag>"
- " <rdf:li>Item 66</rdf:li>"
- " <rdf:li>Item 46</rdf:li>"
- " <rdf:li>Item 56</rdf:li>"
- " <rdf:li>Item 66</rdf:li>"
- " </rdf:Bag>"
- " </xmpTest:NestedArray2>"
- " </xmpTest:MyStructureWithArray>"
- ""
- " </rdf:Description>"
- "</rdf:RDF>";
+"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>"
+" <rdf:Description rdf:about='' xmlns:xmpTest='http://ns.adobe.com/xmpTest/'>"
+""
+" <xmpTest:MySimpleProp rdf:parseType='Resource'>"
+" <rdf:value>A Value</rdf:value>"
+" <xmpTest:MyQual>Qual Value</xmpTest:MyQual>"
+" </xmpTest:MySimpleProp>"
+""
+" <xmpTest:MyTopStruct rdf:parseType='Resource'>"
+" <xmpTest:MySecondStruct rdf:parseType='Resource'>"
+" <xmpTest:MyThirdStruct rdf:parseType='Resource'>"
+" <xmpTest:MyThirdStructField>Field Value 3</xmpTest:MyThirdStructField>"
+" </xmpTest:MyThirdStruct>"
+" <xmpTest:MySecondStructField>Field Value 2</xmpTest:MySecondStructField>"
+" </xmpTest:MySecondStruct>"
+" <xmpTest:MyTopStructField>Field Value 1</xmpTest:MyTopStructField>"
+" </xmpTest:MyTopStruct>"
+
+" <xmpTest:MyArrayWithNestedArray>"
+" <rdf:Bag>"
+" <rdf:li>"
+" <rdf:Seq>"
+" <rdf:li>Item 1</rdf:li>"
+" <rdf:li>Item 2</rdf:li>"
+" </rdf:Seq>"
+" </rdf:li>"
+" </rdf:Bag>"
+" </xmpTest:MyArrayWithNestedArray>"
+
+" <xmpTest:MyArrayWithStructures>"
+" <rdf:Seq>"
+" <rdf:li rdf:parseType='Resource'>"
+" <rdf:value>Field Value 1</rdf:value>"
+" <xmpTest:FirstQual>Qual Value 1</xmpTest:FirstQual>"
+" <xmpTest:SecondQual>Qual Value 2</xmpTest:SecondQual>"
+" </rdf:li>"
+" <rdf:li rdf:parseType='Resource'>"
+" <rdf:value>Field Value 2</rdf:value>"
+" <xmpTest:FirstQual>Qual Value 3</xmpTest:FirstQual>"
+" <xmpTest:SecondQual>Qual Value 4</xmpTest:SecondQual>"
+" </rdf:li>"
+" </rdf:Seq>"
+" </xmpTest:MyArrayWithStructures>"
+""
+" <xmpTest:MyStructureWithArray rdf:parseType='Resource'>"
+" <xmpTest:NestedArray>"
+" <rdf:Bag>"
+" <rdf:li>Item 3</rdf:li>"
+" <rdf:li>Item 4</rdf:li>"
+" <rdf:li>Item 5</rdf:li>"
+" <rdf:li>Item 6</rdf:li>"
+" </rdf:Bag>"
+" </xmpTest:NestedArray>"
+" <xmpTest:NestedArray2>"
+" <rdf:Bag>"
+" <rdf:li>Item 66</rdf:li>"
+" <rdf:li>Item 46</rdf:li>"
+" <rdf:li>Item 56</rdf:li>"
+" <rdf:li>Item 66</rdf:li>"
+" </rdf:Bag>"
+" </xmpTest:NestedArray2>"
+" </xmpTest:MyStructureWithArray>"
+""
+" </rdf:Description>"
+"</rdf:RDF>";
// The namespace to be used. This will be automatically registered
// when the RDF is parsed.
const XMP_StringPtr kXMP_NS_SDK = "http://ns.adobe.com/xmpTest/";
/**
-* Reads some metadata from a file and appends some custom XMP to it. Then does several
-* iterations, using various iterators. Each iteration is displayed in the console window.
-*/
+ * Reads some metadata from a file and appends some custom XMP to it. Then does several
+ * iterations, using various iterators. Each iteration is displayed in the console window.
+ */
int main()
{
if(SXMPMeta::Initialize())
{
XMP_OptionBits options = 0;
- #if UNIX_ENV
- options |= kXMPFiles_ServerMode;
- #endif
+#if UNIX_ENV
+ options |= kXMPFiles_ServerMode;
+#endif
if ( SXMPFiles::Initialize ( options ) ) {
bool ok;
SXMPFiles myFile;
-
+
XMP_OptionBits opts = kXMPFiles_OpenForRead | kXMPFiles_OpenUseSmartHandler;
- ok = myFile.OpenFile("../../../testfiles/Image1.jpg", kXMP_UnknownFile, opts);
+#if MAC_ENV
+ ok = myFile.OpenFile("../../../../testfiles/Image1.jpg", kXMP_UnknownFile, opts);
+#else
+ ok = myFile.OpenFile("../../../testfiles/Image1.jpg", kXMP_UnknownFile, opts);
+#endif
if(ok)
{
SXMPMeta xmp;
myFile.GetXMP(&xmp);
-
+
// Add some custom metadata to the XMP object
SXMPMeta custXMP(rdf, (XMP_StringLen) strlen(rdf));
SXMPUtils::ApplyTemplate(&xmp, custXMP, kXMPTemplate_AddNewProperties);
-
+
// Store any details from the iter.Next() call
string schemaNS, propPath, propVal;
-
+
// Only visit the immediate children that are leaf properties of the Dublin Core schema
SXMPIterator dcLeafIter(xmp, kXMP_NS_DC, (kXMP_IterJustChildren | kXMP_IterJustLeafNodes));
while(dcLeafIter.Next(&schemaNS, &propPath, &propVal))
{
cout << schemaNS << " " << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Visit one property from the XMP Basic schema
SXMPIterator xmpKeywordsIter(xmp, kXMP_NS_XMP, "Keywords", kXMP_IterJustLeafNodes);
while(xmpKeywordsIter.Next(&schemaNS, &propPath, &propVal))
{
cout << schemaNS << " " << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Visit the Dublin Core schema, omit any quailifiers and only
// show the leaf properties
SXMPIterator dcIter(xmp, kXMP_NS_DC, (kXMP_IterOmitQualifiers | kXMP_IterJustLeafNodes));
@@ -155,9 +162,9 @@ int main()
{
cout << schemaNS << " " << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Visit the Dublin Core schema, omit any quailifiers,
// show the leaf properties but only return the leaf name and not the full path
SXMPIterator dcIter2(xmp, kXMP_NS_DC, (kXMP_IterOmitQualifiers | kXMP_IterJustLeafNodes | kXMP_IterJustLeafName));
@@ -165,9 +172,9 @@ int main()
{
cout << schemaNS << " " << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Iterate over a single namespace. Show all properties within
// the Photoshop schema
SXMPIterator exifIter(xmp, kXMP_NS_Photoshop);
@@ -175,9 +182,9 @@ int main()
{
cout << schemaNS << " " << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Just visit the leaf nodes of EXIF properties. That is just
// properties that may have values.
SXMPIterator exifLeafIter(xmp, kXMP_NS_EXIF, kXMP_IterJustLeafNodes);
@@ -185,9 +192,9 @@ int main()
{
cout << schemaNS << " " << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Iterate over all properties but skip the EXIF schema and skip the custom schema
// and continue visiting nodes
SXMPIterator skipExifIter (xmp);
@@ -202,9 +209,9 @@ int main()
cout << schemaNS << " " << propPath << " = " << propVal << endl;
}
}
-
+
cout << "----------------------------------" << endl;
-
+
// Iterate over all properties but skip the EXIF schema
// and any remaining siblings of the current node.
SXMPIterator stopAfterExifIter ( xmp );
@@ -219,13 +226,13 @@ int main()
cout << schemaNS << " " << propPath << " = " << propVal << endl;
}
}
-
+
cout << "----------------------------------" << endl;
-
+
//////////////////////////////////////////////////////////////////////////////////////
-
+
// Iterate over the custom XMP
-
+
// Visit the immediate children of this node.
// No qualifiers are visisted as they are below the property being visisted.
SXMPIterator justChildrenIter(xmp, kXMP_NS_SDK, kXMP_IterJustChildren);
@@ -233,9 +240,9 @@ int main()
{
cout << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Visit the immediate children of this node but only those that may have values.
// No qualifiers are visisted as they are below the property being visisted.
SXMPIterator justChildrenAndLeafIter(xmp, kXMP_NS_SDK, (kXMP_IterJustChildren | kXMP_IterJustLeafNodes));
@@ -243,18 +250,18 @@ int main()
{
cout << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Visit the leaf nodes of TopStructProperty
SXMPIterator myTopStructIter(xmp, kXMP_NS_SDK, "MyTopStruct", kXMP_IterJustLeafNodes);
while(myTopStructIter.Next(&schemaNS, &propPath, &propVal))
{
cout << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Visit the leaf nodes of the TopStructProperty but only return the names for
// the leaf components and not the full path
SXMPIterator xmyTopStructIterShortNames(xmp, kXMP_NS_SDK, "MyTopStruct", (kXMP_IterJustLeafNodes | kXMP_IterJustLeafName));
@@ -262,27 +269,27 @@ int main()
{
cout << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Visit a property and all of the qualifiers
SXMPIterator iterArrayProp (xmp, kXMP_NS_SDK, "ArrayWithStructures", kXMP_IterJustLeafNodes );
while(iterArrayProp.Next(&schemaNS, &propPath, &propVal))
{
cout << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Visit a property and omit all of the qualifiers
SXMPIterator iterArrayPropNoQual (xmp, kXMP_NS_SDK, "ArrayWithStructures", (kXMP_IterJustLeafNodes | kXMP_IterOmitQualifiers));
while(iterArrayPropNoQual.Next(&schemaNS, &propPath, &propVal))
{
cout << propPath << " = " << propVal << endl;
}
-
+
cout << "----------------------------------" << endl;
-
+
// Skip a subtree and continue onwards. Once 'Item 4' is found then the we can skip all of the
// siblings of the current node. If the the current node were a top level node the iteration
// would be complete as all siblings would be skipped. However, when 'Item 4' is found the current
@@ -300,22 +307,22 @@ int main()
cout << schemaNS << " " << propPath << " = " << propVal << endl;
}
}
-
+
/*
- // Visit all properties and qualifiers
- SXMPIterator allPropsIter(xmp);
- while(allPropsIter.Next(&schemaNS, &propPath, &propVal))
- {
- cout << schemaNS << " " << propPath << " = " << propVal << endl;
- }
- */
+ // Visit all properties and qualifiers
+ SXMPIterator allPropsIter(xmp);
+ while(allPropsIter.Next(&schemaNS, &propPath, &propVal))
+ {
+ cout << schemaNS << " " << propPath << " = " << propVal << endl;
+ }
+ */
}
}
}
SXMPFiles::Terminate();
SXMPMeta::Terminate();
-
+
return 0;
}
diff --git a/samples/source/common/DumpFile.cpp b/samples/source/common/DumpFile.cpp
index ca5446d..c651aca 100644
--- a/samples/source/common/DumpFile.cpp
+++ b/samples/source/common/DumpFile.cpp
@@ -938,6 +938,9 @@ static const XMP_Int64 kASFMinSize = 16; // ! Not really accurate, but covers th
static const XMP_Int64 kRIFFMinSize = 12;
+static const XMP_Int64 kPostScriptMinSize = 49;
+
+
static const XMP_Int64 kInDesignMinSize = 2 * kINDD_PageSize; // Two master pages.
static const XMP_Int64 kISOMediaMinSize = 16; // At least a minimal file type box.
@@ -945,6 +948,7 @@ static const XMP_Uns8 kISOMediaFTyp[] = { 0x66, 0x74, 0x79, 0x70 }; // "ftyp"
static const XMP_Uns32 kISOTag_ftyp = 0x66747970UL;
static const XMP_Uns32 kISOBrand_mp41 = 0x6D703431UL;
static const XMP_Uns32 kISOBrand_mp42 = 0x6D703432UL;
+static const XMP_Uns32 kISOBrand_avc1 = 0x61766331UL;
static const XMP_Uns32 kISOBrand_f4v = 0x66347620UL;
static const XMP_Uns32 kQTTag_XMP_ = 0x584D505FUL;
@@ -953,6 +957,8 @@ static const XMP_Int64 kSWFMinSize = (8+2+4 + 2); // Header with minimal rectang
static const XMP_Int64 kFLVMinSize = 9; // Header with zero length data.
+static const XMP_Uns8 kPostScriptStart[] = { 0xC5, 0xD0, 0xD3, 0xC6 };
+
static XMP_FileFormat
CheckFileFormat ( const char * filePath, XMP_Uns8 * fileContent, XMP_Int64 fileSize )
{
@@ -998,6 +1004,10 @@ CheckFileFormat ( const char * filePath, XMP_Uns8 * fileContent, XMP_Int64 fileS
if ( CheckBytes ( fileContent+8, "AIFC", 4 ) ) return kXMP_AIFFFile;
}
+ if ( (fileSize >= kPostScriptMinSize) && CheckBytes (fileContent, kPostScriptStart, 4) ) {
+ return kXMP_PostScriptFile;
+ }
+
if ( (fileSize >= kInDesignMinSize) && CheckBytes ( fileContent, kInDesign_MasterPageGUID, kInDesignGUIDSize ) ) {
return kXMP_InDesignFile;
}
@@ -1025,7 +1035,18 @@ CheckFileFormat ( const char * filePath, XMP_Uns8 * fileContent, XMP_Int64 fileS
for ( ; compatPtr < compatEnd; compatPtr += 4 ) {
XMP_Uns32 compatBrand = GetUns32BE (compatPtr);
- if ( (compatBrand == kISOBrand_mp41) || (compatBrand == kISOBrand_mp42) ) return kXMP_MPEG4File;
+ switch ( compatBrand ) {
+ case kISOBrand_mp41:
+ case kISOBrand_mp42:
+ case kISOBrand_avc1:
+ return kXMP_MPEG4File;
+ break;
+
+ default:
+ break;
+
+ }
+
}
}
@@ -1856,8 +1877,13 @@ digestInternationalTextSequence ( LFA_FileRef file, std::string isoPath, XMP_Int
tree->digest16u(file,isoPath+"language code",true,true);
(*remainingSize) -= 4;
if ( (*remainingSize) != miniBoxStringSize )
+ {
tree->addComment("WARNING: boxSize and miniBoxSize differ!");
- tree->digestString( file, isoPath+"value", miniBoxStringSize, false );
+ }
+ else
+ {
+ tree->digestString( file, isoPath+"value", miniBoxStringSize, false );
+ }
}
/**
@@ -1918,7 +1944,9 @@ DumpISOBoxes ( LFA_FileRef file, XMP_Uns32 maxBoxLen, std::string _isoPath )
break;
}
- std::string boxString( fromArgs( "%.4s" , &boxType ) );
+ XMP_Uns32 tempBoxType = GetUns32LE(&boxType);
+ std::string boxString( fromArgs( "%.4s" , &tempBoxType) );
+
// substitute mac-copyright signs with an easier-to-handle "(c)"
if ( boxString.at(0) == 0xA9 )
boxString = std::string("(c)") + boxString.substr(1);
@@ -1983,6 +2011,10 @@ DumpISOBoxes ( LFA_FileRef file, XMP_Uns32 maxBoxLen, std::string _isoPath )
XMP_Uns32 majorBrand = LFA_ReadUns32_LE( file );
XMP_Uns32 minorVersion = LFA_ReadUns32_LE( file );
+ //data has been read in LE make it in BE
+ majorBrand = GetUns32LE(&majorBrand);
+ minorVersion = GetUns32LE(&minorVersion);
+
//Log::info( fromArgs( "major Brand: '%.4s' (0x%.8X)" , &majorBrand, MakeUns32BE(majorBrand) ));
//Log::info( fromArgs( "minor Version: 0x%.8X" , MakeUns32BE(minorVersion) ) );
tree->setKeyValue( isoPath + "majorBrand",
@@ -2223,9 +2255,7 @@ DumpISOBoxes ( LFA_FileRef file, XMP_Uns32 maxBoxLen, std::string _isoPath )
// (c)-style quicktime boxes and boxes of no interest:
default:
- if ( (boxType & 0xA9) == 0xA9) // (c)something
- {
- if ( 0 == isoPath.compare( 0 , 20, "moov/udta/meta/ilst/"))
+ if ( 0 == isoPath.compare( 0 , 20, "moov/udta/meta/ilst/"))
{ // => iTunes metadata (hunt for data childs)
// a container box, hunt for 'data' atom by recursion:
bool ok;
@@ -2236,16 +2266,12 @@ DumpISOBoxes ( LFA_FileRef file, XMP_Uns32 maxBoxLen, std::string _isoPath )
}
else if ( 0 == isoPath.compare( 0 , 10, "moov/udta/" ))
{ // => Quicktime metadata "international text sequence" ( size, language code, value )
- digestInternationalTextSequence( file, isoPath, &remainingSize );
+ digestInternationalTextSequence( file, isoPath, &remainingSize );
} else
{
tree->addComment("WARNING: unknown flavor of (c)*** boxes, neither QT nor iTunes");
}
break;
- }
- //boxes of no interest:
-
- break;
}
bool ok;
@@ -3492,6 +3518,128 @@ DumpPNGChunk ( LFA_FileRef file, XMP_Uns32 pngLen, XMP_Uns32 chunkOffset )
// =================================================================================================
static void
+DumpPS ( LFA_FileRef file, XMP_Uns32 fileLen )
+{
+ XMP_Int32 psOffset;
+ size_t psLength;
+
+ LFA_Seek ( file, 4, SEEK_SET ); // skip fileheader bytes
+ LFA_Read ( file, &psOffset, 4, true );
+ LFA_Read ( file, &psLength, 4, true );
+
+ tree->addComment(" psOffset: %d, psLength: %d", psOffset, psLength);
+
+ // jump to psOffset
+ Skip(file, (psOffset - 12));
+
+ // get the header (everything till first %
+
+ XMP_Int64 offset = LFA_Tell(file);
+ std::string key, value;
+ char byte = LFA_GetChar(file);
+ bool eof = false;
+ while ( !eof )
+ {
+ key.clear();
+ key += byte; // add the first %
+ byte = LFA_GetChar(file);
+
+ while (byte != ' ' && byte != '\r') // get everthing until next space or LF
+ {
+ key += byte;
+ byte = LFA_GetChar(file);
+
+ }
+
+ //if (CheckBytes( key.c_str(), "%%EOF", 5))
+ if (key == "%%EOF")
+ {
+ eof = true;
+ }
+ else
+ {
+ byte = LFA_GetChar(file);
+ value.clear();
+ while (byte != '%') // get everthing until next %
+ {
+ value += byte;
+ byte = LFA_GetChar(file);
+ }
+ }
+ tree->pushNode(key);
+ tree->addOffset( file );
+
+ //for now only store value for header
+ if ( key =="%!PS-Adobe-3.0" )
+ {
+ tree->changeValue(value);
+ }
+
+ tree->addComment("offset: %d", offset );
+ tree->addComment("size: 0x%llX", LFA_Tell(file)-offset );
+ tree->popNode();
+
+ offset = LFA_Tell(file);
+ }
+ // Now just get everything else and store all keys that start with %
+
+
+ // get the key
+ // start of the PostScript DSC header comment
+
+ /*XMP_Uns8 buffer [11];
+ LFA_Read ( file, &buffer, sizeof(buffer), true );
+
+ if (!CheckBytes( buffer, "%!PS-Adobe-", 11))
+ {
+ tree->comment ( "** Invalid PS, unknown PS file tag." );
+ return;
+ }
+
+ // Check the PostScript DSC major version number.
+ XMP_Uns8 byte;
+ LFA_Read ( file, &byte, sizeof(byte), true );
+
+ psMajorVer = 0;
+ while ( IsNumeric( byte ) )
+ {
+ psMajorVer = (psMajorVer * 10) + (byte - '0');
+ if ( psMajorVer > 1000 ) {
+ tree->comment ( "** Invalid PS, Overflow." );
+ return;
+ }; // Overflow.
+ LFA_Read ( file, &byte, sizeof(byte), true );
+ }
+ if ( psMajorVer < 3 ){
+ tree->comment ( "** Invalid PS, The version must be at least 3.0." );
+ return;
+ }; // The version must be at least 3.0.
+
+ if ( byte != '.' ){
+ tree->comment ( "** Invalid PS, No minor number" );
+ return;
+ }; // No minor number.
+ LFA_Read ( file, &byte, sizeof(byte), true );
+
+ // Check the PostScript DSC minor version number.
+
+ psMinorVer = 0;
+ while ( IsNumeric( byte ) )
+ {
+ psMinorVer = (psMinorVer * 10) + (byte - '0');
+ if ( psMinorVer > 1000 ) {
+ tree->comment ( "** Invalid PS, Overflow." );
+ return;
+ }; // Overflow.
+ LFA_Read ( file, &byte, sizeof(byte), true );
+ }
+
+ tree->addComment(" psMajor Version: %d, psMinor Version: %d", psMajorVer, psMinorVer);*/
+}
+
+// =================================================================================================
+
+static void
DumpPNG ( LFA_FileRef file, XMP_Uns32 pngLen )
{
// A PNG file contains an 8 byte signature followed by a sequence of chunks.
@@ -5000,6 +5148,13 @@ void DumpFile::Scan (std::string filename, TagTree &tagTree, bool resetTree)
tagTree.comment ( "** Recognized MPEG-2 file type, but this is a pure sidecar solution. No legacy dump available at this time." );
+ } else if ( format == kXMP_PostScriptFile ) {
+
+ tagTree.pushNode ( "Dumping PostScript file" );
+ tagTree.addComment ( "size %lld (0x%llx)", fileLen, fileLen );
+ DumpPS ( fileRef, fileLen );
+ tagTree.popNode();
+
} else if ( format == kXMP_UnknownFile ) {
tagTree.pushNode ( "Unknown format. packet scanning, size %d (0x%X)", fileLen, fileLen );
diff --git a/samples/source/common/DumpFile.h b/samples/source/common/DumpFile.h
index 96524ca..3324dc5 100644
--- a/samples/source/common/DumpFile.h
+++ b/samples/source/common/DumpFile.h
@@ -11,6 +11,8 @@
#define XMPQE_DUMPFILE_H
#include "samples/source/common/TagTree.h"
+#define IsNumeric( ch ) (ch >='0' && ch<='9' )
+
class DumpFile {
public:
static void Scan( std::string filename, TagTree &tagTree, bool resetTree = true );
diff --git a/samples/source/common/LargeFileAccess.cpp b/samples/source/common/LargeFileAccess.cpp
index 411152b..1d5c725 100644
--- a/samples/source/common/LargeFileAccess.cpp
+++ b/samples/source/common/LargeFileAccess.cpp
@@ -523,7 +523,7 @@ void LFA_Throw ( const char* msg, int id )
// LFA implementations for POSIX
// =============================
-#if XMP_UNIXBuild
+#if XMP_UNIXBuild || XMP_iOSBuild
// ---------------------------------------------------------------------------------------------
@@ -842,7 +842,8 @@ bool LFA_isEof( LFA_FileRef file )
return filesize == filepos;
#endif
- #if XMP_UNIXBuild
+
+ #if XMP_UNIXBuild || XMP_iOSBuild
int descr = (int)file;
struct stat info;
diff --git a/samples/source/common/TagTree.cpp b/samples/source/common/TagTree.cpp
index de5d3b4..713998a 100644
--- a/samples/source/common/TagTree.cpp
+++ b/samples/source/common/TagTree.cpp
@@ -288,10 +288,16 @@ void TagTree::digest64u(XMP_Uns64 expected, LFA_FileRef file,const std::string k
{
XMP_Uns64 tmp=digest64u( file,"",BigEndian, hexDisplay );
if (expected != tmp )
+ {
if (hexDisplay)
+ {
throw DumpFileException("'%s' was 0x%.16X, expected: 0x%.16X",key.c_str(),tmp,expected);
+ }
else
+ {
throw DumpFileException("'%s' was %d, expected: %d",key.c_str(),tmp,expected);
+ }
+ }
}
void TagTree::digest32s(XMP_Int32 expected, LFA_FileRef file,const std::string key /*=""*/, bool BigEndian /*=false*/ )
@@ -305,10 +311,16 @@ void TagTree::digest32u(XMP_Uns32 expected, LFA_FileRef file,const std::string k
{
XMP_Uns32 tmp=digest32u( file,"",BigEndian, hexDisplay );
if (expected != tmp )
+ {
if (hexDisplay)
+ {
throw DumpFileException("'%s' was 0x%.8X, expected: 0x%.8X",key.c_str(),tmp,expected);
+ }
else
+ {
throw DumpFileException("'%s' was %d, expected: %d",key.c_str(),tmp,expected);
+ }
+ }
}
void TagTree::digest16s(XMP_Int16 expected, LFA_FileRef file,const std::string key /*=""*/, bool BigEndian /*=false*/ )
@@ -322,10 +334,16 @@ void TagTree::digest16u(XMP_Uns16 expected, LFA_FileRef file,const std::string k
{
XMP_Uns16 tmp=digest16u( file,key,BigEndian, hexDisplay );
if (expected != tmp )
+ {
if (hexDisplay)
+ {
throw DumpFileException("'%s' was 0x%.4X, expected: 0x%.4X",key.c_str(),tmp,expected);
+ }
else
+ {
throw DumpFileException("'%s' was %d, expected: %d",key.c_str(),tmp,expected);
+ }
+ }
}
//////////////////////////////////////////////////////////////////////////////////////////
diff --git a/samples/source/common/globals.h b/samples/source/common/globals.h
index f056b60..30be27d 100644
--- a/samples/source/common/globals.h
+++ b/samples/source/common/globals.h
@@ -17,13 +17,13 @@
#include <cstdio>
//sanity check platform/endianess
- #if !defined(WIN_ENV) && !defined(MAC_ENV) && !defined(UNIX_ENV)
- #error "XMP environment error - must define one of MAC_ENV, WIN_ENV, or UNIX_ENV"
+ #if !defined(WIN_ENV) && !defined(MAC_ENV) && !defined(UNIX_ENV) && !defined(IOS_ENV)
+ #error "XMP environment error - must define one of MAC_ENV, WIN_ENV, UNIX_ENV or IOS_ENV"
#endif
#ifdef WIN_ENV
#define XMPQE_LITTLE_ENDIAN 1
- #elif defined(MAC_ENV)
+ #elif (defined(MAC_ENV) || defined(IOS_ENV))
#if __BIG_ENDIAN__
#define XMPQE_BIG_ENDIAN 1
#elif __LITTLE_ENDIAN__
diff --git a/samples/source/dumpfile/main.cpp b/samples/source/dumpfile/main.cpp
index eb2f663..bd11b05 100644
--- a/samples/source/dumpfile/main.cpp
+++ b/samples/source/dumpfile/main.cpp
@@ -31,6 +31,8 @@ const int DUMPFILEVERSION=2;
#include <stdexcept>
#include <iostream>
#include <string>
+#include <cstring>
+#include <cstdio>
#include <vector>
#include <sstream>
diff --git a/samples/source/xmpcommand/Actions.cpp b/samples/source/xmpcommand/Actions.cpp
index ad7028c..c738930 100644
--- a/samples/source/xmpcommand/Actions.cpp
+++ b/samples/source/xmpcommand/Actions.cpp
@@ -13,7 +13,11 @@
const char * XMP_EXE_VERSION= "4.4";
#include <stdexcept>
-#include <stdarg.h>
+#include <cstdarg>
+#include <cstdio>
+#include <vector>
+#include <string>
+#include <cstring>
//XMP related
#define TXMP_STRING_TYPE std::string
diff --git a/samples/source/xmpcommand/XMPCommand.cpp b/samples/source/xmpcommand/XMPCommand.cpp
index f6fc574..670fc1b 100644
--- a/samples/source/xmpcommand/XMPCommand.cpp
+++ b/samples/source/xmpcommand/XMPCommand.cpp
@@ -16,8 +16,10 @@
#include <stdexcept>
#include <iostream>
-#include <string>
+#include <cstdio>
#include <vector>
+#include <string>
+#include <cstring>
#include <sstream>
#include "samples/source/common/globals.h"