summaryrefslogtreecommitdiff
path: root/source/XMPFiles/FormatSupport/Reconcile_Impl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/XMPFiles/FormatSupport/Reconcile_Impl.cpp')
-rw-r--r--source/XMPFiles/FormatSupport/Reconcile_Impl.cpp223
1 files changed, 105 insertions, 118 deletions
diff --git a/source/XMPFiles/FormatSupport/Reconcile_Impl.cpp b/source/XMPFiles/FormatSupport/Reconcile_Impl.cpp
index 1f06083..7d27769 100644
--- a/source/XMPFiles/FormatSupport/Reconcile_Impl.cpp
+++ b/source/XMPFiles/FormatSupport/Reconcile_Impl.cpp
@@ -1,6 +1,6 @@
// =================================================================================================
// ADOBE SYSTEMS INCORPORATED
-// Copyright 2006-2007 Adobe Systems Incorporated
+// Copyright 2006 Adobe Systems Incorporated
// All Rights Reserved
//
// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms
@@ -20,26 +20,26 @@
// =================================================================================================
/// \file Reconcile_Impl.cpp
-/// \brief Implementation utilities for the legacy metadata reconciliation support.
+/// \brief Implementation utilities for the photo metadata reconciliation support.
///
// =================================================================================================
// =================================================================================================
-// IsASCII
-// =======
+// ReconcileUtils::IsASCII
+// =======================
//
// See if a string is 7 bit ASCII.
-static inline bool IsASCII ( const void * strPtr, size_t strLen )
+bool ReconcileUtils::IsASCII ( const void * textPtr, size_t textLen )
{
- for ( const XMP_Uns8 * strPos = (XMP_Uns8*)strPtr; strLen > 0; --strLen, ++strPos ) {
- if ( *strPos >= 0x80 ) return false;
+ for ( const XMP_Uns8 * textPos = (XMP_Uns8*)textPtr; textLen > 0; --textLen, ++textPos ) {
+ if ( *textPos >= 0x80 ) return false;
}
return true;
-} // IsASCII
+} // ReconcileUtils::IsASCII
// =================================================================================================
// ReconcileUtils::IsUTF8
@@ -49,16 +49,16 @@ static inline bool IsASCII ( const void * strPtr, size_t strLen )
// strings. We don't use CodePoint_from_UTF8_Multi in UnicodeConversions because it throws an
// exception for non-Unicode and we don't need to actually compute the code points.
-bool ReconcileUtils::IsUTF8 ( const void * utf8Ptr, size_t utf8Len )
+bool ReconcileUtils::IsUTF8 ( const void * textPtr, size_t textLen )
{
- const XMP_Uns8 * utf8Pos = (XMP_Uns8*)utf8Ptr;
- const XMP_Uns8 * utf8End = utf8Pos + utf8Len;
+ const XMP_Uns8 * textPos = (XMP_Uns8*)textPtr;
+ const XMP_Uns8 * textEnd = textPos + textLen;
- while ( utf8Pos < utf8End ) {
+ while ( textPos < textEnd ) {
- if ( *utf8Pos < 0x80 ) {
+ if ( *textPos < 0x80 ) {
- ++utf8Pos; // ASCII is UTF-8, tolerate nuls.
+ ++textPos; // ASCII is UTF-8, tolerate nuls.
} else {
@@ -68,26 +68,26 @@ bool ReconcileUtils::IsUTF8 ( const void * utf8Ptr, size_t utf8Len )
#if 0 // *** This might be a more effcient way to count the bytes.
static XMP_Uns8 kByteCounts[16] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
- size_t bytesNeeded = kByteCounts [ *utf8Pos >> 4 ];
- if ( (bytesNeeded < 2) || ((bytesNeeded == 4) && ((*utf8Pos & 0x08) != 0)) ) return false;
- if ( (utf8Pos + bytesNeeded) > utf8End ) return false;
+ size_t bytesNeeded = kByteCounts [ *textPos >> 4 ];
+ if ( (bytesNeeded < 2) || ((bytesNeeded == 4) && ((*textPos & 0x08) != 0)) ) return false;
+ if ( (textPos + bytesNeeded) > textEnd ) return false;
#endif
size_t bytesNeeded = 0; // Count the high order 1 bits in the first byte.
- for ( XMP_Uns8 temp = *utf8Pos; temp > 0x7F; temp = temp << 1 ) ++bytesNeeded;
+ for ( XMP_Uns8 temp = *textPos; temp > 0x7F; temp = temp << 1 ) ++bytesNeeded;
// *** Consider CPU-specific assembly inline, e.g. cntlzw on PowerPC.
- if ( (bytesNeeded < 2) || (bytesNeeded > 4) || ((utf8Pos+bytesNeeded) > utf8End) ) return false;
+ if ( (bytesNeeded < 2) || (bytesNeeded > 4) || ((textPos+bytesNeeded) > textEnd) ) return false;
- for ( --bytesNeeded, ++utf8Pos; bytesNeeded > 0; --bytesNeeded, ++utf8Pos ) {
- if ( (*utf8Pos >> 6) != 2 ) return false;
+ for ( --bytesNeeded, ++textPos; bytesNeeded > 0; --bytesNeeded, ++textPos ) {
+ if ( (*textPos >> 6) != 2 ) return false;
}
}
}
- return true;
+ return true; // ! Returns true for empty strings.
} // ReconcileUtils::IsUTF8
@@ -97,8 +97,7 @@ bool ReconcileUtils::IsUTF8 ( const void * utf8Ptr, size_t utf8Len )
#if XMP_WinBuild
- static void UTF8ToWinEncoding ( UINT codePage,
- const XMP_Uns8 * utf8Ptr, size_t utf8Len, std::string * host )
+ void ReconcileUtils::UTF8ToWinEncoding ( UINT codePage, const XMP_Uns8 * utf8Ptr, size_t utf8Len, std::string * host )
{
std::string utf16; // WideCharToMultiByte wants native UTF-16.
@@ -117,11 +116,15 @@ bool ReconcileUtils::IsUTF8 ( const void * utf8Ptr, size_t utf8Len )
#elif XMP_MacBuild
- static void UTF8ToMacEncoding ( TextEncoding & destEncoding,
- const XMP_Uns8 * utf8Ptr, size_t utf8Len, std::string * host )
+ void ReconcileUtils::UTF8ToMacEncoding ( XMP_Uns16 macScript, XMP_Uns16 macLang, const XMP_Uns8 * utf8Ptr, size_t utf8Len, std::string * host )
{
OSStatus err;
+ TextEncoding destEncoding;
+ if ( macLang == langUnspecified ) macLang = kTextLanguageDontCare;
+ err = UpgradeScriptInfoToTextEncoding ( macScript, macLang, kTextRegionDontCare, 0, &destEncoding );
+ if ( err != noErr ) XMP_Throw ( "UpgradeScriptInfoToTextEncoding failed", kXMPErr_ExternalFailure );
+
UnicodeMapping mappingInfo;
mappingInfo.mappingVersion = kUnicodeUseLatestMapping;
mappingInfo.otherEncoding = GetTextEncodingBase ( destEncoding );
@@ -167,8 +170,7 @@ bool ReconcileUtils::IsUTF8 ( const void * utf8Ptr, size_t utf8Len )
#elif XMP_UNIXBuild
- // ! Does not exist, must not be called, for Generic UNIX builds. It is not clear at this time
- // ! what notion of local encoding should be used for generic UNIX, especially in a server product.
+ // ! Does not exist, must not be called, for Generic UNIX builds.
#endif
@@ -176,17 +178,13 @@ bool ReconcileUtils::IsUTF8 ( const void * utf8Ptr, size_t utf8Len )
// ReconcileUtils::UTF8ToLocal
// ===========================
-#if ! XMP_UNIXBuild
-// ! Does not exist, must not be called, for Generic UNIX builds. It is not clear at this time
-// ! what notion of local encoding should be used for generic UNIX, especially in a server product.
-
void ReconcileUtils::UTF8ToLocal ( const void * _utf8Ptr, size_t utf8Len, std::string * local )
{
const XMP_Uns8* utf8Ptr = (XMP_Uns8*)_utf8Ptr;
local->erase();
- if ( IsASCII ( utf8Ptr, utf8Len ) ) {
+ if ( ReconcileUtils::IsASCII ( utf8Ptr, utf8Len ) ) {
local->assign ( (const char *)utf8Ptr, utf8Len );
return;
}
@@ -197,76 +195,87 @@ void ReconcileUtils::UTF8ToLocal ( const void * _utf8Ptr, size_t utf8Len, std::s
#elif XMP_MacBuild
- OSStatus err;
-
- TextEncoding localEncoding;
- err = UpgradeScriptInfoToTextEncoding ( smSystemScript,
- kTextLanguageDontCare, kTextRegionDontCare, 0, &localEncoding );
- if ( err != noErr ) XMP_Throw ( "UpgradeScriptInfoToTextEncoding failed", kXMPErr_ExternalFailure );
-
- UTF8ToMacEncoding ( localEncoding, utf8Ptr, utf8Len, local );
+ UTF8ToMacEncoding ( smSystemScript, kTextLanguageDontCare, utf8Ptr, utf8Len, local );
#elif XMP_UNIXBuild
- #error "No generic UNIX implementation"
+ XMP_Throw ( "Generic UNIX does not have conversions between local and Unicode", kXMPErr_Unavailable );
#endif
} // ReconcileUtils::UTF8ToLocal
-#endif
-
// =================================================================================================
// ReconcileUtils::UTF8ToLatin1
// ============================
-//
-// Actually to the Windows code page 1252 superset of 8859-1.
-
-#if ! XMP_UNIXBuild
-// ! Does not exist, must not be called, for Generic UNIX builds. At some point we could consider
-// ! creating our own private implementation. So far only needed for the ID3 legacy in MP3 files.
void ReconcileUtils::UTF8ToLatin1 ( const void * _utf8Ptr, size_t utf8Len, std::string * latin1 )
{
const XMP_Uns8* utf8Ptr = (XMP_Uns8*)_utf8Ptr;
+ const XMP_Uns8* utf8End = utf8Ptr + utf8Len;
latin1->erase();
+ latin1->reserve ( utf8Len ); // As good a guess as any, at least enough, exact for ASCII.
- if ( IsASCII ( utf8Ptr, utf8Len ) ) {
- latin1->assign ( (const char *)utf8Ptr, utf8Len );
- return;
- }
-
- #if XMP_WinBuild
-
- UTF8ToWinEncoding ( 1252, utf8Ptr, utf8Len, latin1 );
+ bool inBadRun = false;
- #elif XMP_MacBuild
-
- TextEncoding latin1Encoding;
- latin1Encoding = CreateTextEncoding ( kTextEncodingWindowsLatin1,
- kTextEncodingDefaultVariant, kTextEncodingDefaultFormat );
-
- UTF8ToMacEncoding ( latin1Encoding, utf8Ptr, utf8Len, latin1 );
+ while ( utf8Ptr < utf8End ) {
- #elif XMP_UNIXBuild
+ if ( *utf8Ptr <= 0x7F ) {
+
+ (*latin1) += (char)*utf8Ptr; // Have an ASCII character.
+ inBadRun = false;
+ ++utf8Ptr;
+
+ } else if ( utf8Ptr == (utf8End - 1) ) {
+
+ inBadRun = false;
+ ++utf8Ptr; // Ignore a bad end to the UTF-8.
+
+ } else {
+
+ XMP_Assert ( (utf8End - utf8Ptr) >= 2 );
+ XMP_Uns16 ch16 = GetUns16BE ( utf8Ptr ); // A Latin-1 80..FF is 2 UTF-8 bytes.
+
+ if ( (0xC280 <= ch16) && (ch16 <= 0xC2BF) ) {
+
+ (*latin1) += (char)(ch16 & 0xFF); // UTF-8 C280..C2BF are Latin-1 80..BF.
+ inBadRun = false;
+ utf8Ptr += 2;
+
+ } else if ( (0xC380 <= ch16) && (ch16 <= 0xC3BF) ) {
+
+ (*latin1) += (char)((ch16 & 0xFF) + 0x40); // UTF-8 C380..C3BF are Latin-1 C0..FF.
+ inBadRun = false;
+ utf8Ptr += 2;
+
+ } else {
+
+ if ( ! inBadRun ) {
+ inBadRun = true;
+ (*latin1) += "(?)"; // Mark the run of out of scope UTF-8.
+ }
+
+ ++utf8Ptr; // Skip the presumably well-formed UTF-8 character.
+ while ( (utf8Ptr < utf8End) && ((*utf8Ptr & 0xC0) == 0x80) ) ++utf8Ptr;
+
+ }
+
+ }
- #error "No generic UNIX implementation"
+ }
- #endif
+ XMP_Assert ( utf8Ptr == utf8End );
} // ReconcileUtils::UTF8ToLatin1
-#endif
-
// =================================================================================================
// HostEncodingToUTF8
// ==================
#if XMP_WinBuild
- static void WinEncodingToUTF8 ( UINT codePage,
- const XMP_Uns8 * hostPtr, size_t hostLen, std::string * utf8 )
+ void ReconcileUtils::WinEncodingToUTF8 ( UINT codePage, const XMP_Uns8 * hostPtr, size_t hostLen, std::string * utf8 )
{
int utf16Len = MultiByteToWideChar ( codePage, 0, (LPCSTR)hostPtr, (int)hostLen, 0, 0 );
@@ -279,11 +288,15 @@ void ReconcileUtils::UTF8ToLatin1 ( const void * _utf8Ptr, size_t utf8Len, std::
#elif XMP_MacBuild
- static void MacEncodingToUTF8 ( TextEncoding & srcEncoding,
- const XMP_Uns8 * hostPtr, size_t hostLen, std::string * utf8 )
+ void ReconcileUtils::MacEncodingToUTF8 ( XMP_Uns16 macScript, XMP_Uns16 macLang, const XMP_Uns8 * hostPtr, size_t hostLen, std::string * utf8 )
{
OSStatus err;
+ TextEncoding srcEncoding;
+ if ( macLang == langUnspecified ) macLang = kTextLanguageDontCare;
+ err = UpgradeScriptInfoToTextEncoding ( macScript, macLang, kTextRegionDontCare, 0, &srcEncoding );
+ if ( err != noErr ) XMP_Throw ( "UpgradeScriptInfoToTextEncoding failed", kXMPErr_ExternalFailure );
+
UnicodeMapping mappingInfo;
mappingInfo.mappingVersion = kUnicodeUseLatestMapping;
mappingInfo.otherEncoding = GetTextEncodingBase ( srcEncoding );
@@ -327,8 +340,7 @@ void ReconcileUtils::UTF8ToLatin1 ( const void * _utf8Ptr, size_t utf8Len, std::
#elif XMP_UNIXBuild
- // ! Does not exist, must not be called, for Generic UNIX builds. It is not clear at this time
- // ! what notion of local encoding should be used for generic UNIX, especially in a server product.
+ // ! Does not exist, must not be called, for Generic UNIX builds.
#endif
@@ -336,17 +348,13 @@ void ReconcileUtils::UTF8ToLatin1 ( const void * _utf8Ptr, size_t utf8Len, std::
// ReconcileUtils::LocalToUTF8
// ===========================
-#if ! XMP_UNIXBuild
-// ! Does not exist, must not be called, for Generic UNIX builds. It is not clear at this time
-// ! what notion of local encoding should be used for generic UNIX, especially in a server product.
-
void ReconcileUtils::LocalToUTF8 ( const void * _localPtr, size_t localLen, std::string * utf8 )
{
const XMP_Uns8* localPtr = (XMP_Uns8*)_localPtr;
utf8->erase();
- if ( IsASCII ( localPtr, localLen ) ) {
+ if ( ReconcileUtils::IsASCII ( localPtr, localLen ) ) {
utf8->assign ( (const char *)localPtr, localLen );
return;
}
@@ -357,63 +365,42 @@ void ReconcileUtils::LocalToUTF8 ( const void * _localPtr, size_t localLen, std:
#elif XMP_MacBuild
- OSStatus err;
-
- TextEncoding localEncoding;
- err = UpgradeScriptInfoToTextEncoding ( smSystemScript, kTextLanguageDontCare, kTextRegionDontCare, 0, &localEncoding );
- if ( err != noErr ) XMP_Throw ( "UpgradeScriptInfoToTextEncoding failed", kXMPErr_ExternalFailure );
-
- MacEncodingToUTF8 ( localEncoding, localPtr, localLen, utf8 );
+ MacEncodingToUTF8 ( smSystemScript, kTextLanguageDontCare, localPtr, localLen, utf8 );
#elif XMP_UNIXBuild
- #error "No generic UNIX implementation"
+ XMP_Throw ( "Generic UNIX does not have conversions between local and Unicode", kXMPErr_Unavailable );
#endif
} // ReconcileUtils::LocalToUTF8
-#endif
-
// =================================================================================================
// ReconcileUtils::Latin1ToUTF8
// ============================
-//
-// Actually from the Windows code page 1252 superset of 8859-1.
-
-#if ! XMP_UNIXBuild
-// ! Does not exist, must not be called, for Generic UNIX builds. At some point we could consider
-// ! creating our own private implementation. So far only needed for the ID3 legacy in MP3 files.
void ReconcileUtils::Latin1ToUTF8 ( const void * _latin1Ptr, size_t latin1Len, std::string * utf8 )
{
const XMP_Uns8* latin1Ptr = (XMP_Uns8*)_latin1Ptr;
+ const XMP_Uns8* latin1End = latin1Ptr + latin1Len;
utf8->erase();
+ utf8->reserve ( latin1Len ); // As good a guess as any, exact for ASCII.
- if ( IsASCII ( latin1Ptr, latin1Len ) ) {
- utf8->assign ( (const char *)latin1Ptr, latin1Len );
- return;
- }
-
- #if XMP_WinBuild
+ for ( ; latin1Ptr < latin1End; ++latin1Ptr ) {
- WinEncodingToUTF8 ( 1252, latin1Ptr, latin1Len, utf8 );
+ XMP_Uns8 ch8 = *latin1Ptr;
- #elif XMP_MacBuild
-
- TextEncoding latin1Encoding;
- latin1Encoding = CreateTextEncoding ( kTextEncodingWindowsLatin1,
- kTextEncodingDefaultVariant, kTextEncodingDefaultFormat );
-
- MacEncodingToUTF8 ( latin1Encoding, latin1Ptr, latin1Len, utf8 );
-
- #elif XMP_UNIXBuild
+ if ( ch8 <= 0x7F ) {
+ (*utf8) += (char)ch8; // Have an ASCII character.
+ } else if ( ch8 <= 0xBF ) {
+ (*utf8) += 0xC2; // Latin-1 80..BF are UTF-8 C280..C2BF.
+ (*utf8) += (char)ch8;
+ } else {
+ (*utf8) += 0xC3; // Latin-1 C0..FF are UTF-8 C380..C3BF.
+ (*utf8) += (char)(ch8 - 0x40);
+ }
- #error "No generic UNIX implementation"
+ }
- #endif
-
} // ReconcileUtils::Latin1ToUTF8
-
-#endif