summaryrefslogtreecommitdiff
path: root/source/XMPFiles/FormatSupport/Reconcile_Impl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/XMPFiles/FormatSupport/Reconcile_Impl.cpp')
-rw-r--r--source/XMPFiles/FormatSupport/Reconcile_Impl.cpp395
1 files changed, 395 insertions, 0 deletions
diff --git a/source/XMPFiles/FormatSupport/Reconcile_Impl.cpp b/source/XMPFiles/FormatSupport/Reconcile_Impl.cpp
new file mode 100644
index 0000000..99339d2
--- /dev/null
+++ b/source/XMPFiles/FormatSupport/Reconcile_Impl.cpp
@@ -0,0 +1,395 @@
+// =================================================================================================
+// ADOBE SYSTEMS INCORPORATED
+// Copyright 2006-2007 Adobe Systems Incorporated
+// All Rights Reserved
+//
+// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms
+// of the Adobe license agreement accompanying it.
+// =================================================================================================
+
+#include "XMP_Environment.h" // ! This must be the first include.
+
+#include "Reconcile_Impl.hpp"
+
+#include "UnicodeConversions.hpp"
+
+#if XMP_WinBuild
+#elif XMP_MacBuild
+ #include "UnicodeConverter.h"
+#endif
+
+// =================================================================================================
+/// \file Reconcile_Impl.cpp
+/// \brief Implementation utilities for the legacy metadata reconciliation support.
+///
+// =================================================================================================
+
+// =================================================================================================
+// IsASCII
+// =======
+//
+// See if a string is 7 bit ASCII.
+
+static inline bool IsASCII ( const void * strPtr, size_t strLen )
+{
+
+ for ( const XMP_Uns8 * strPos = (XMP_Uns8*)strPtr; strLen > 0; --strLen, ++strPos ) {
+ if ( *strPos >= 0x80 ) return false;
+ }
+
+ return true;
+
+} // IsASCII
+
+// =================================================================================================
+// ReconcileUtils::IsUTF8
+// ======================
+//
+// See if a string contains valid UTF-8. Allow nul bytes, they can appear inside of multi-part Exif
+// strings. We don't use CodePoint_from_UTF8_Multi in UnicodeConversions because it throws an
+// exception for non-Unicode and we don't need to actually compute the code points.
+
+bool ReconcileUtils::IsUTF8 ( const void * utf8Ptr, size_t utf8Len )
+{
+ const XMP_Uns8 * utf8Pos = (XMP_Uns8*)utf8Ptr;
+ const XMP_Uns8 * utf8End = utf8Pos + utf8Len;
+
+ while ( utf8Pos < utf8End ) {
+
+ if ( *utf8Pos < 0x80 ) {
+
+ ++utf8Pos; // ASCII is UTF-8, tolerate nuls.
+
+ } else {
+
+ // -------------------------------------------------------------------------------------
+ // We've got a multibyte UTF-8 character. The first byte has the number of bytes as the
+ // number of high order 1 bits. The remaining bytes must have 1 and 0 as the top 2 bits.
+
+ #if 0 // *** This might be a more effcient way to count the bytes.
+ static XMP_Uns8 kByteCounts[16] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
+ size_t bytesNeeded = kByteCounts [ *utf8Pos >> 4 ];
+ if ( (bytesNeeded < 2) || ((bytesNeeded == 4) && ((*utf8Pos & 0x08) != 0)) ) return false;
+ if ( (utf8Pos + bytesNeeded) > utf8End ) return false;
+ #endif
+
+ size_t bytesNeeded = 0; // Count the high order 1 bits in the first byte.
+ for ( XMP_Uns8 temp = *utf8Pos; temp > 0x7F; temp = temp << 1 ) ++bytesNeeded;
+ // *** Consider CPU-specific assembly inline, e.g. cntlzw on PowerPC.
+
+ if ( (bytesNeeded < 2) || (bytesNeeded > 4) || ((utf8Pos+bytesNeeded) > utf8End) ) return false;
+
+ for ( --bytesNeeded, ++utf8Pos; bytesNeeded > 0; --bytesNeeded, ++utf8Pos ) {
+ if ( (*utf8Pos >> 6) != 2 ) return false;
+ }
+
+ }
+
+ }
+
+ return true;
+
+} // ReconcileUtils::IsUTF8
+
+// =================================================================================================
+// UTF8ToHostEncoding
+// ==================
+
+#if XMP_WinBuild
+
+ static void UTF8ToWinEncoding ( UINT codePage,
+ const XMP_Uns8 * utf8Ptr, size_t utf8Len, std::string * host )
+ {
+
+ std::string utf16; // WideCharToMultiByte wants native UTF-16.
+ ToUTF16Native ( (UTF8Unit*)utf8Ptr, utf8Len, &utf16 );
+
+ LPCWSTR utf16Ptr = (LPCWSTR) utf16.c_str();
+ size_t utf16Len = utf16.size() / 2;
+
+ int hostLen = WideCharToMultiByte ( codePage, 0, utf16Ptr, utf16Len, 0, 0, 0, 0 );
+ host->assign ( hostLen, ' ' ); // Allocate space for the results.
+
+ (void) WideCharToMultiByte ( codePage, 0, utf16Ptr, utf16Len, (LPSTR)host->data(), hostLen, 0, 0 );
+ XMP_Assert ( hostLen == host->size() );
+
+ } // UTF8ToWinEncoding
+
+#elif XMP_MacBuild
+
+ static void UTF8ToMacEncoding ( TextEncoding & destEncoding,
+ const XMP_Uns8 * utf8Ptr, size_t utf8Len, std::string * host )
+ {
+ OSStatus err;
+
+ UnicodeMapping mappingInfo;
+ mappingInfo.mappingVersion = kUnicodeUseLatestMapping;
+ mappingInfo.otherEncoding = GetTextEncodingBase ( destEncoding );
+ mappingInfo.unicodeEncoding = CreateTextEncoding ( kTextEncodingUnicodeDefault,
+ kUnicodeNoSubset, kUnicodeUTF8Format );
+
+ UnicodeToTextInfo converterInfo;
+ err = CreateUnicodeToTextInfo ( &mappingInfo, &converterInfo );
+ if ( err != noErr ) XMP_Throw ( "CreateUnicodeToTextInfo failed", kXMPErr_ExternalFailure );
+
+ try { // ! Need to call DisposeUnicodeToTextInfo before exiting.
+
+ OptionBits convFlags = kUnicodeUseFallbacksMask |
+ kUnicodeLooseMappingsMask | kUnicodeDefaultDirectionMask;
+ ByteCount bytesRead, bytesWritten;
+
+ enum { kBufferLen = 1000 }; // Ought to be enough in practice, without using too much stack.
+ char buffer [kBufferLen];
+
+ host->reserve ( utf8Len ); // As good a guess as any.
+
+ while ( utf8Len > 0 ) {
+ // Ignore all errors from ConvertFromUnicodeToText. It returns info like "output
+ // buffer full" or "use substitution" as errors.
+ err = ConvertFromUnicodeToText ( converterInfo, utf8Len, (UniChar*)utf8Ptr, convFlags,
+ 0, 0, 0, 0, kBufferLen, &bytesRead, &bytesWritten, buffer );
+ if ( bytesRead == 0 ) break; // Make sure forward progress happens.
+ host->append ( &buffer[0], bytesWritten );
+ utf8Ptr += bytesRead;
+ utf8Len -= bytesRead;
+ }
+
+ DisposeUnicodeToTextInfo ( &converterInfo );
+
+ } catch ( ... ) {
+
+ DisposeUnicodeToTextInfo ( &converterInfo );
+ throw;
+
+ }
+
+ } // UTF8ToMacEncoding
+
+#elif XMP_UNIXBuild
+
+ #error "UTF8ToHostEncoding is not implemented for UNIX"
+ // *** A nice definition of Windows 1252 is at http://www.microsoft.com/globaldev/reference/sbcs/1252.mspx
+ // *** We should code our own conversions for this, and use it for UNIX - unless better POSIX routines exist.
+
+#endif
+
+// =================================================================================================
+// ReconcileUtils::UTF8ToLocal
+// ===========================
+
+void ReconcileUtils::UTF8ToLocal ( const void * _utf8Ptr, size_t utf8Len, std::string * local )
+{
+ const XMP_Uns8* utf8Ptr = (XMP_Uns8*)_utf8Ptr;
+
+ local->erase();
+
+ if ( IsASCII ( utf8Ptr, utf8Len ) ) {
+ local->assign ( (const char *)utf8Ptr, utf8Len );
+ return;
+ }
+
+ #if XMP_WinBuild
+
+ UTF8ToWinEncoding ( CP_ACP, utf8Ptr, utf8Len, local );
+
+ #elif XMP_MacBuild
+
+ OSStatus err;
+
+ TextEncoding localEncoding;
+ err = UpgradeScriptInfoToTextEncoding ( smSystemScript,
+ kTextLanguageDontCare, kTextRegionDontCare, 0, &localEncoding );
+ if ( err != noErr ) XMP_Throw ( "UpgradeScriptInfoToTextEncoding failed", kXMPErr_ExternalFailure );
+
+ UTF8ToMacEncoding ( localEncoding, utf8Ptr, utf8Len, local );
+
+ #elif XMP_UNIXBuild
+
+ #error "UTF8ToLocal is not implemented for UNIX"
+
+ #endif
+
+} // ReconcileUtils::UTF8ToLocal
+
+// =================================================================================================
+// ReconcileUtils::UTF8ToLatin1
+// ============================
+//
+// Actually to the Windows code page 1252 superset of 8859-1.
+
+void ReconcileUtils::UTF8ToLatin1 ( const void * _utf8Ptr, size_t utf8Len, std::string * latin1 )
+{
+ const XMP_Uns8* utf8Ptr = (XMP_Uns8*)_utf8Ptr;
+
+ latin1->erase();
+
+ if ( IsASCII ( utf8Ptr, utf8Len ) ) {
+ latin1->assign ( (const char *)utf8Ptr, utf8Len );
+ return;
+ }
+
+ #if XMP_WinBuild
+
+ UTF8ToWinEncoding ( 1252, utf8Ptr, utf8Len, latin1 );
+
+ #elif XMP_MacBuild
+
+ TextEncoding latin1Encoding;
+ latin1Encoding = CreateTextEncoding ( kTextEncodingWindowsLatin1,
+ kTextEncodingDefaultVariant, kTextEncodingDefaultFormat );
+
+ UTF8ToMacEncoding ( latin1Encoding, utf8Ptr, utf8Len, latin1 );
+
+ #elif XMP_UNIXBuild
+
+ #error "UTF8ToLatin1 is not implemented for UNIX"
+
+ #endif
+
+} // ReconcileUtils::UTF8ToLatin1
+
+// =================================================================================================
+// HostEncodingToUTF8
+// ==================
+
+#if XMP_WinBuild
+
+ static void WinEncodingToUTF8 ( UINT codePage,
+ const XMP_Uns8 * hostPtr, size_t hostLen, std::string * utf8 )
+ {
+
+ size_t utf16Len = MultiByteToWideChar ( codePage, 0, (LPCSTR)hostPtr, hostLen, 0, 0 );
+ std::vector<UTF16Unit> utf16 ( utf16Len, 0 ); // MultiByteToWideChar returns native UTF-16.
+
+ (void) MultiByteToWideChar ( codePage, 0, (LPCSTR)hostPtr, hostLen, (LPWSTR)&utf16[0], utf16Len );
+ FromUTF16Native ( &utf16[0], utf16Len, utf8 );
+
+ } // WinEncodingToUTF8
+
+#elif XMP_MacBuild
+
+ static void MacEncodingToUTF8 ( TextEncoding & srcEncoding,
+ const XMP_Uns8 * hostPtr, size_t hostLen, std::string * utf8 )
+ {
+ OSStatus err;
+
+ UnicodeMapping mappingInfo;
+ mappingInfo.mappingVersion = kUnicodeUseLatestMapping;
+ mappingInfo.otherEncoding = GetTextEncodingBase ( srcEncoding );
+ mappingInfo.unicodeEncoding = CreateTextEncoding ( kTextEncodingUnicodeDefault,
+ kUnicodeNoSubset, kUnicodeUTF8Format );
+
+ TextToUnicodeInfo converterInfo;
+ err = CreateTextToUnicodeInfo ( &mappingInfo, &converterInfo );
+ if ( err != noErr ) XMP_Throw ( "CreateTextToUnicodeInfo failed", kXMPErr_ExternalFailure );
+
+ try { // ! Need to call DisposeTextToUnicodeInfo before exiting.
+
+ ByteCount bytesRead, bytesWritten;
+
+ enum { kBufferLen = 1000 }; // Ought to be enough in practice, without using too much stack.
+ char buffer [kBufferLen];
+
+ utf8->reserve ( hostLen ); // As good a guess as any.
+
+ while ( hostLen > 0 ) {
+ // Ignore all errors from ConvertFromTextToUnicode. It returns info like "output
+ // buffer full" or "use substitution" as errors.
+ err = ConvertFromTextToUnicode ( converterInfo, hostLen, hostPtr, kNilOptions,
+ 0, 0, 0, 0, kBufferLen, &bytesRead, &bytesWritten, (UniChar*)buffer );
+ if ( bytesRead == 0 ) break; // Make sure forward progress happens.
+ utf8->append ( &buffer[0], bytesWritten );
+ hostPtr += bytesRead;
+ hostLen -= bytesRead;
+ }
+
+ DisposeTextToUnicodeInfo ( &converterInfo );
+
+ } catch ( ... ) {
+
+ DisposeTextToUnicodeInfo ( &converterInfo );
+ throw;
+
+ }
+
+ } // MacEncodingToUTF8
+
+#elif XMP_UNIXBuild
+
+ #error "HostEncodingToUTF8 is not implemented for UNIX"
+
+#endif
+
+// =================================================================================================
+// ReconcileUtils::LocalToUTF8
+// ===========================
+
+void ReconcileUtils::LocalToUTF8 ( const void * _localPtr, size_t localLen, std::string * utf8 )
+{
+ const XMP_Uns8* localPtr = (XMP_Uns8*)_localPtr;
+
+ utf8->erase();
+
+ if ( IsASCII ( localPtr, localLen ) ) {
+ utf8->assign ( (const char *)localPtr, localLen );
+ return;
+ }
+
+ #if XMP_WinBuild
+
+ WinEncodingToUTF8 ( CP_ACP, localPtr, localLen, utf8 );
+
+ #elif XMP_MacBuild
+
+ OSStatus err;
+
+ TextEncoding localEncoding;
+ err = UpgradeScriptInfoToTextEncoding ( smSystemScript, kTextLanguageDontCare, kTextRegionDontCare, 0, &localEncoding );
+ if ( err != noErr ) XMP_Throw ( "UpgradeScriptInfoToTextEncoding failed", kXMPErr_ExternalFailure );
+
+ MacEncodingToUTF8 ( localEncoding, localPtr, localLen, utf8 );
+
+ #elif XMP_UNIXBuild
+
+ #error "LocalToUTF8 is not implemented for UNIX"
+
+ #endif
+
+} // ReconcileUtils::LocalToUTF8
+
+// =================================================================================================
+// ReconcileUtils::Latin1ToUTF8
+// ============================
+//
+// Actually from the Windows code page 1252 superset of 8859-1.
+
+void ReconcileUtils::Latin1ToUTF8 ( const void * _latin1Ptr, size_t latin1Len, std::string * utf8 )
+{
+ const XMP_Uns8* latin1Ptr = (XMP_Uns8*)_latin1Ptr;
+
+ utf8->erase();
+
+ if ( IsASCII ( latin1Ptr, latin1Len ) ) {
+ utf8->assign ( (const char *)latin1Ptr, latin1Len );
+ return;
+ }
+
+ #if XMP_WinBuild
+
+ WinEncodingToUTF8 ( 1252, latin1Ptr, latin1Len, utf8 );
+
+ #elif XMP_MacBuild
+
+ TextEncoding latin1Encoding;
+ latin1Encoding = CreateTextEncoding ( kTextEncodingWindowsLatin1,
+ kTextEncodingDefaultVariant, kTextEncodingDefaultFormat );
+
+ MacEncodingToUTF8 ( latin1Encoding, latin1Ptr, latin1Len, utf8 );
+
+ #elif XMP_UNIXBuild
+
+ #error "Latin1ToUTF8 is not implemented for UNIX"
+
+ #endif
+
+} // ReconcileUtils::Latin1ToUTF8