upgrade to XMP-SDK 4.4.2

author: Hubert Figuiere <hub@figuiere.net> 2008-11-17 23:42:00 -0500
committer: Hubert Figuiere <hub@figuiere.net> 2008-11-17 23:42:00 -0500
commit: 88af812fde414aca8f9add90bc800ea3d8e9a281 (patch)
tree: 0403dd1897c0b287d4d710dd422827683c59dfcb /samples/source/common/QEScanner.cpp
parent: 9d7d7c3caac05db240692ad7e9196fcb7f5a1ce5 (diff)
1 files changed, 1469 insertions, 0 deletions
diff --git a/samples/source/common/QEScanner.cpp b/samples/source/common/QEScanner.cpp
new file mode 100644
index 0000000..5553495
--- /dev/null
+++ b/samples/source/common/QEScanner.cpp
@@ -0,0 +1,1469 @@
+// =================================================================================================
+// Copyright 2002-2006 Adobe Systems Incorporated
+// All Rights Reserved.
+//
+// NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
+// of the Adobe license agreement accompanying it.
+//
+// Adobe patent application tracking #P435, entitled 'Unique markers to simplify embedding data of
+// one format in a file with a different format', inventors: Sean Parent, Greg Gilley.
+// =================================================================================================
+
+#if WIN_ENV
+	#pragma warning ( disable : 4786 )	// The VC++ debugger can't handle long symbol names.
+	#pragma warning ( disable : 4127 )	// conditional expression is constant
+#endif
+
+
+#include "QEScanner.hpp"
+
+#include <cassert>
+#include <string>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <cstdlib>
+
+
+#ifndef UseStringPushBack	// VC++ 6.x does not provide push_back for strings!
+	#define UseStringPushBack	0
+#endif
+
+
+using namespace std;
+
+
+// *** Consider Boyer-Moore style search for "<?xpacket begin=".  It isn't an obvious win, the
+// *** additional code might be slower than scanning every character.  Especially if we will
+// *** read every cache line anyway.
+
+
+// =================================================================================================
+// =================================================================================================
+// class PacketMachine
+// ===================
+//
+// This is the packet recognizer state machine.  The top of the machine is FindNextPacket, this
+// calls the specific state components and handles transitions.  The states are described by an
+// array of RecognizerInfo records, indexed by the RecognizerKind enumeration.  Each RecognizerInfo
+// record has a function that does that state's work, the success and failure transition states,
+// and a string literal that is passed to the state function.  The literal lets a common MatchChar
+// or MatchString function be used in several places.
+//
+// The state functions are responsible for consuming input to recognize their particular state.
+// This includes intervening nulls for 16 and 32 bit character forms.  For the simplicity, things
+// are treated as essentially little endian and the nulls are not actually checked.  The opening
+// '<' is found with a byte-by-byte search, then the number of bytes per character is determined
+// by counting the following nulls.  From then on, consuming a character means incrementing the
+// buffer pointer by the number of bytes per character.  Thus the buffer pointer only points to
+// the "real" bytes.  This also means that the pointer can go off the end of the buffer by a
+// variable amount.  The amount of overrun is saved so that the pointer can be positioned at the
+// right byte to start the next buffer.
+//
+// The state functions return a TriState value, eTriYes means the pattern was found, eTriNo means
+// the pattern was definitely not found, eTriMaybe means that the end of the buffer was reached
+// while working through the pattern.
+//
+// When eTriYes is returned, the fBufferPtr data member is left pointing to the "real" byte
+// following the last actual byte.  Which might not be addressable memory!  This also means that
+// a state function can be entered with nothing available in the buffer.  When eTriNo is returned,
+// the fBufferPtr data member is left pointing to the byte that caused the failure.  The state 
+// machine starts over from the failure byte.
+//
+// The state functions must preserve their internal micro-state before returning eTriMaybe, and
+// resume processing when called with the next buffer.  The fPosition data member is used to denote
+// how many actual characters have been consumed.  The fNullCount data member is used to denote how
+// many nulls are left before the next actual character.
+
+
+// =================================================================================================
+// PacketMachine
+// =============
+
+QEScanner::PacketMachine::PacketMachine ( SInt64 bufferOffset, const void * bufferOrigin, SInt64 bufferLength ) :
+
+	// Public members
+	fPacketStart ( 0 ),
+	fPacketLength ( 0 ),
+	fBytesAttr ( -1 ),
+	fCharForm ( eChar8Bit ),
+	fAccess ( ' ' ),
+	fBogusPacket ( false ),
+	
+	// Private members
+	fBufferOffset ( bufferOffset ),
+	fBufferOrigin ( (const char *) bufferOrigin ),
+	fBufferPtr ( fBufferOrigin ),
+	fBufferLimit ( fBufferOrigin + bufferLength ),
+	fRecognizer ( eLeadInRecognizer ),
+	fPosition ( 0 ),
+	fBytesPerChar ( 1 ),
+	fBufferOverrun ( 0 ),
+	fQuoteChar ( ' ' )
+
+{
+	/*
+	REVIEW NOTES : Should the buffer stuff be in a class?
+	*/
+	
+	assert ( bufferOrigin != NULL );
+	assert ( bufferLength != 0 );
+	
+}	// PacketMachine
+
+
+// =================================================================================================
+// ~PacketMachine
+// ==============
+
+QEScanner::PacketMachine::~PacketMachine ()
+{
+
+	// An empty placeholder.
+
+}	// ~PacketMachine
+
+
+// =================================================================================================
+// AssociateBuffer
+// ===============
+
+void
+QEScanner::PacketMachine::AssociateBuffer ( SInt64 bufferOffset, const void * bufferOrigin, SInt64 bufferLength )
+{
+
+	fBufferOffset = bufferOffset;
+	fBufferOrigin = (const char *) bufferOrigin;
+	fBufferPtr = fBufferOrigin + fBufferOverrun;
+	fBufferLimit = fBufferOrigin + bufferLength;
+		
+}	// AssociateBuffer
+
+
+// =================================================================================================
+// ResetMachine
+// ============
+
+void
+QEScanner::PacketMachine::ResetMachine ()
+{
+
+	fRecognizer = eLeadInRecognizer;
+	fPosition = 0;
+	fBufferOverrun = 0;
+	fCharForm = eChar8Bit;
+	fBytesPerChar = 1;
+	fAccess = ' ';
+	fBytesAttr = -1;
+	fBogusPacket = false;
+	
+	fAttrName.erase ( fAttrName.begin(), fAttrName.end() );
+	fAttrValue.erase ( fAttrValue.begin(), fAttrValue.end() );
+	fEncodingAttr.erase ( fEncodingAttr.begin(), fEncodingAttr.end() );
+		
+}	// ResetMachine
+
+
+// =================================================================================================
+// FindLessThan
+// ============
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::FindLessThan ( PacketMachine * ths, const char * which )
+{
+	
+	if ( *which == 'H' ) {
+	
+		// --------------------------------------------------------------------------------
+		// We're looking for the '<' of the header.  If we fail there is no packet in this
+		// part of the input, so return eTriNo.
+	
+		ths->fCharForm = eChar8Bit;	// We might have just failed from a bogus 16 or 32 bit case.
+		ths->fBytesPerChar = 1;
+
+		while ( ths->fBufferPtr < ths->fBufferLimit ) {	// Don't skip nulls for the header's '<'!
+			if ( *ths->fBufferPtr == '<' ) break;
+			ths->fBufferPtr++;
+		}
+		
+		if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriNo;
+		ths->fBufferPtr++;
+		return eTriYes;
+
+	} else {
+	
+		// --------------------------------------------------------------------------------
+		// We're looking for the '<' of the trailer.  We're already inside the packet body,
+		// looking for the trailer.  So here if we fail we must return eTriMaybe so that we
+		// keep looking for the trailer in the next buffer.
+
+		const int bytesPerChar = ths->fBytesPerChar;
+
+		while ( ths->fBufferPtr < ths->fBufferLimit ) {
+			if ( *ths->fBufferPtr == '<' ) break;
+			ths->fBufferPtr += bytesPerChar;
+		}
+		
+		if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
+		ths->fBufferPtr += bytesPerChar;
+		return eTriYes;
+
+	}
+
+}	// FindLessThan
+
+
+// =================================================================================================
+// MatchString
+// ===========
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::MatchString ( PacketMachine * ths, const char * literal )
+{
+	const int		bytesPerChar	= ths->fBytesPerChar;
+	const char *	litPtr			= literal + ths->fPosition;
+	const size_t	charsToGo		= strlen ( literal ) - ths->fPosition;
+	size_t			charsDone		= 0;
+	
+	while ( (charsDone < charsToGo) && (ths->fBufferPtr < ths->fBufferLimit) ) {
+		if ( *litPtr != *ths->fBufferPtr ) return eTriNo;
+		charsDone++;
+		litPtr++;
+		ths->fBufferPtr += bytesPerChar;
+	}
+	
+	if ( charsDone == charsToGo ) return eTriYes;
+	ths->fPosition += charsDone;
+	return eTriMaybe;
+
+}	// MatchString
+
+
+// =================================================================================================
+// MatchChar
+// =========
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::MatchChar ( PacketMachine * ths, const char * literal )
+{
+	const int	bytesPerChar	= ths->fBytesPerChar;
+
+	if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
+	
+	const char currChar = *ths->fBufferPtr;
+	if ( currChar != *literal ) return eTriNo;
+	ths->fBufferPtr += bytesPerChar;	
+	return eTriYes;
+
+}	// MatchChar
+
+
+// =================================================================================================
+// MatchOpenQuote
+// ==============
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::MatchOpenQuote ( PacketMachine * ths, const char * /* unused */ )
+{
+	const int	bytesPerChar	= ths->fBytesPerChar;
+
+	if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
+	
+	const char currChar = *ths->fBufferPtr;
+	if ( (currChar != '\'') && (currChar != '"') ) return eTriNo;
+	ths->fQuoteChar = currChar;
+	ths->fBufferPtr += bytesPerChar;	
+	return eTriYes;
+
+}	// MatchOpenQuote
+
+
+// =================================================================================================
+// MatchCloseQuote
+// ===============
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::MatchCloseQuote ( PacketMachine * ths, const char * /* unused */ )
+{
+
+	return MatchChar ( ths, &ths->fQuoteChar );
+
+}	// MatchCloseQuote
+
+
+// =================================================================================================
+// CaptureAttrName
+// ===============
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::CaptureAttrName ( PacketMachine * ths, const char * /* unused */ )
+{
+	const int	bytesPerChar	= ths->fBytesPerChar;
+	char		currChar;
+
+	if ( ths->fPosition == 0 ) {	// Get the first character in the name.
+		
+		if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
+
+		currChar = *ths->fBufferPtr;
+		if ( ths->fAttrName.size() == 0 ) {
+			if ( ! ( ( ('a' <= currChar) && (currChar <= 'z') ) ||
+					 ( ('A' <= currChar) && (currChar <= 'Z') ) ||
+					 (currChar == '_') || (currChar == ':') ) ) {
+				return eTriNo;
+			}
+		}
+
+		ths->fAttrName.erase ( ths->fAttrName.begin(), ths->fAttrName.end() );
+		#if UseStringPushBack
+			ths->fAttrName.push_back ( currChar );
+		#else
+			ths->fAttrName.insert ( ths->fAttrName.end(), currChar );
+		#endif
+		ths->fBufferPtr += bytesPerChar;
+	
+	}
+	
+	while ( ths->fBufferPtr < ths->fBufferLimit ) {	// Get the remainder of the name.
+
+		currChar = *ths->fBufferPtr;
+		if ( ! ( ( ('a' <= currChar) && (currChar <= 'z') ) ||
+				 ( ('A' <= currChar) && (currChar <= 'Z') ) ||
+				 ( ('0' <= currChar) && (currChar <= '9') ) ||
+				 (currChar == '-') || (currChar == '.') || (currChar == '_') || (currChar == ':') ) ) {
+			break;
+		}
+
+		#if UseStringPushBack
+			ths->fAttrName.push_back ( currChar );
+		#else
+			ths->fAttrName.insert ( ths->fAttrName.end(), currChar );
+		#endif
+		ths->fBufferPtr += bytesPerChar;
+
+	}
+	
+	if ( ths->fBufferPtr < ths->fBufferLimit ) return eTriYes;
+	ths->fPosition = ths->fAttrName.size();	// The name might span into the next buffer.
+	return eTriMaybe;
+
+}	// CaptureAttrName
+
+
+// =================================================================================================
+// CaptureAttrValue
+// ================
+//
+// Recognize the equal sign and the quoted string value, capture the value along the way.
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::CaptureAttrValue ( PacketMachine * ths, const char * /* unused */ )
+{
+	const int	bytesPerChar	= ths->fBytesPerChar;
+	char		currChar		= 0;
+	TriState	result			= eTriMaybe;
+	
+	if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
+
+	switch ( ths->fPosition ) {
+	
+		case 0 :	// The name should haved ended at the '=', nulls already skipped.
+
+			if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
+			if ( *ths->fBufferPtr != '=' ) return eTriNo;
+			ths->fBufferPtr += bytesPerChar;	
+			ths->fPosition = 1;
+			// fall through OK because MatchOpenQuote will check the buffer limit and nulls ...
+		
+		case 1 :	// Look for the open quote.
+
+			result = MatchOpenQuote ( ths, NULL );
+			if ( result != eTriYes ) return result;
+			ths->fPosition = 2;
+			// fall through OK because the buffer limit and nulls are checked below ...
+		
+		default :	// Look for the close quote, capturing the value along the way.
+			
+			assert ( ths->fPosition == 2 );
+			
+			const char quoteChar = ths->fQuoteChar;
+
+			while ( ths->fBufferPtr < ths->fBufferLimit ) {
+				currChar = *ths->fBufferPtr;
+				if ( currChar == quoteChar ) break;
+				#if UseStringPushBack
+					ths->fAttrValue.push_back ( currChar );
+				#else
+					ths->fAttrValue.insert ( ths->fAttrValue.end(), currChar );
+				#endif
+				ths->fBufferPtr += bytesPerChar;
+			}
+			
+			if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
+			assert ( currChar == quoteChar );
+			ths->fBufferPtr += bytesPerChar;	// Advance past the closing quote.
+			return eTriYes;
+	
+	}
+
+}	// CaptureAttrValue
+
+
+// =================================================================================================
+// RecordStart
+// ===========
+//
+// Note that this routine looks at bytes, not logical characters.  It has to figure out how many
+// bytes per character there are so that the other recognizers can skip intervening nulls.
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::RecordStart ( PacketMachine * ths, const char * /* unused */ )
+{
+
+	while ( true ) {
+
+		if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
+		
+		const char currByte = *ths->fBufferPtr;
+	
+		switch ( ths->fPosition ) {
+		
+			case 0 :	// Record the length.
+				assert ( ths->fCharForm == eChar8Bit );
+				assert ( ths->fBytesPerChar == 1 );
+				ths->fPacketStart = ths->fBufferOffset + ((ths->fBufferPtr - 1) - ths->fBufferOrigin);
+				ths->fPacketLength = 0;
+				ths->fPosition = 1;
+				// ! OK to fall through here, we didn't consume a byte in this step.
+
+			case 1 :	// Look for the first null byte.
+				if ( currByte != 0 ) return eTriYes;	// No nulls found.
+				ths->fCharForm = eChar16BitBig;			// Assume 16 bit big endian for now.
+				ths->fBytesPerChar = 2;
+				ths->fBufferPtr++;
+				ths->fPosition = 2;
+				break;	// ! Don't fall through, have to check for the end of the buffer between each byte.
+
+			case 2 :	// One null was found, look for a second.
+				if ( currByte != 0 ) return eTriYes;	// Just one null found.
+				ths->fBufferPtr++;
+				ths->fPosition = 3;
+				break;
+
+			case 3 :	// Two nulls were found, look for a third.
+				if ( currByte != 0 ) return eTriNo;	// Just two nulls is not valid.
+				ths->fCharForm = eChar32BitBig;		// Assume 32 bit big endian for now.
+				ths->fBytesPerChar = 4;
+				ths->fBufferPtr++;
+				return eTriYes;
+				break;
+
+		}
+	
+	}
+	
+}	// RecordStart
+
+
+// =================================================================================================
+// RecognizeBOM
+// ============
+//
+// Recognizing the byte order marker is a surprisingly messy thing to do.  It can't be done by the
+// normal string matcher, there are no intervening nulls.  There are 4 transitions after the opening
+// quote, the closing quote or one of the three encodings.  For the actual BOM there are then 1 or 2
+// following bytes that depend on which of the encodings we're in.  Not to mention that the buffer
+// might end at any point.
+//
+// The intervening null count done earlier determined 8, 16, or 32 bits per character, but not the
+// big or little endian nature for the 16/32 bit cases.  The BOM must be present for the 16 and 32
+// bit cases in order to determine the endian mode.  There are six possible byte sequences for the
+// quoted BOM string, ignoring the differences for quoting with ''' versus '"'.
+//
+// Keep in mind that for the 16 and 32 bit cases there will be nulls for the quote.  In the table
+// below the symbol <quote> means just the one byte containing the ''' or '"'.  The nulls for the
+// quote character are explicitly shown.
+//
+//	<quote> <quote>					- 1: No BOM, this must be an 8 bit case.
+//	<quote> \xEF \xBB \xBF <quote>	- 1.12-13: The 8 bit form.
+//
+//	<quote> \xFE \xFF \x00 <quote>	- 1.22-23: The 16 bit, big endian form
+//	<quote> \x00 \xFF \xFE <quote>	- 1.32-33: The 16 bit, little endian form.
+//
+//	<quote> \x00 \x00 \xFE \xFF \x00 \x00 \x00 <quote>	- 1.32.43-45.56-57: The 32 bit, big endian form.
+//	<quote> \x00 \x00 \x00 \xFF \xFE \x00 \x00 <quote>	- 1.32.43.54-57: The 32 bit, little endian form.
+
+enum {
+	eBOM_8_1		= 0xEF,
+	eBOM_8_2		= 0xBB,
+	eBOM_8_3		= 0xBF,
+	eBOM_Big_1		= 0xFE,
+	eBOM_Big_2		= 0xFF,
+	eBOM_Little_1	= eBOM_Big_2,
+	eBOM_Little_2	= eBOM_Big_1
+};
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::RecognizeBOM ( PacketMachine * ths, const char * /* unused */ )
+{
+	const int	bytesPerChar	= ths->fBytesPerChar;
+
+	while ( true ) {	// Handle one character at a time, the micro-state (fPosition) changes for each.
+	
+		if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
+
+		const unsigned char currChar = *ths->fBufferPtr;	// ! The BOM bytes look like integers bigger than 127.
+		
+		switch ( ths->fPosition ) {
+		
+			case  0 :	// Look for the opening quote.
+				if ( (currChar != '\'') && (currChar != '"') ) return eTriNo;
+				ths->fQuoteChar = currChar;
+				ths->fBufferPtr++;
+				ths->fPosition = 1;
+				break;	// ! Don't fall through, have to check for the end of the buffer between each byte.
+			
+			case 1 :	// Look at the byte immediately following the opening quote.
+				if ( currChar == ths->fQuoteChar ) {	// Closing quote, no BOM character, must be 8 bit.
+					if ( ths->fCharForm != eChar8Bit ) return eTriNo;
+					ths->fBufferPtr += bytesPerChar;	// Skip the nulls after the closing quote.
+					return eTriYes;
+				} else if ( currChar == eBOM_8_1 ) {	// Start of the 8 bit form.
+					if ( ths->fCharForm != eChar8Bit ) return eTriNo;
+					ths->fBufferPtr++;
+					ths->fPosition = 12;
+				} else if ( currChar == eBOM_Big_1 ) {	// Start of the 16 bit big endian form.
+					if ( ths->fCharForm != eChar16BitBig ) return eTriNo;
+					ths->fBufferPtr++;
+					ths->fPosition = 22;
+				} else if ( currChar == 0 ) {	// Start of the 16 bit little endian or either 32 bit form.
+					if ( ths->fCharForm == eChar8Bit ) return eTriNo;
+					ths->fBufferPtr++;
+					ths->fPosition = 32;
+				} else {
+					return eTriNo;
+				}
+				break;
+			
+			case 12 :	// Look for the second byte of the 8 bit form.
+				if ( currChar != eBOM_8_2 ) return eTriNo;
+				ths->fPosition = 13;
+				ths->fBufferPtr++;
+				break;
+			
+			case 13 :	// Look for the third byte of the 8 bit form.
+				if ( currChar != eBOM_8_3 ) return eTriNo;
+				ths->fPosition = 99;
+				ths->fBufferPtr++;
+				break;
+			
+			case 22 :	// Look for the second byte of the 16 bit big endian form.
+				if ( currChar != eBOM_Big_2 ) return eTriNo;
+				ths->fPosition = 23;
+				ths->fBufferPtr++;
+				break;
+			
+			case 23 :	// Look for the null before the closing quote of the 16 bit big endian form.
+				if ( currChar != 0 ) return eTriNo;
+				ths->fBufferPtr++;
+				ths->fPosition = 99;
+				break;
+			
+			case 32 :	// Look at the second byte of the 16 bit little endian or either 32 bit form.
+				if ( currChar == eBOM_Little_1 ) {
+					ths->fPosition = 33;
+				} else if ( currChar == 0 ) {
+					ths->fPosition = 43;
+				} else {
+					return eTriNo;
+				}
+				ths->fBufferPtr++;
+				break;
+			
+			case 33 :	// Look for the third byte of the 16 bit little endian form.
+				if ( ths->fCharForm != eChar16BitBig ) return eTriNo;	// Null count before assumed big endian.
+				if ( currChar != eBOM_Little_2 ) return eTriNo;
+				ths->fCharForm = eChar16BitLittle;
+				ths->fPosition = 99;
+				ths->fBufferPtr++;
+				break;
+			
+			case 43 :	// Look at the third byte of either 32 bit form.
+				if ( ths->fCharForm != eChar32BitBig ) return eTriNo;	// Null count before assumed big endian.
+				if ( currChar == eBOM_Big_1 ) {
+					ths->fPosition = 44;
+				} else if ( currChar == 0 ) {
+					ths->fPosition = 54;
+				} else {
+					return eTriNo;
+				}
+				ths->fBufferPtr++;
+				break;
+			
+			case 44 :	// Look for the fourth byte of the 32 bit big endian form.
+				if ( currChar != eBOM_Big_2 ) return eTriNo;
+				ths->fPosition = 45;
+				ths->fBufferPtr++;
+				break;
+			
+			case 45 :	// Look for the first null before the closing quote of the 32 bit big endian form.
+				if ( currChar != 0 ) return eTriNo;
+				ths->fPosition = 56;
+				ths->fBufferPtr++;
+				break;
+			
+			case 54 :	// Look for the fourth byte of the 32 bit little endian form.
+				ths->fCharForm = eChar32BitLittle;
+				if ( currChar != eBOM_Little_1 ) return eTriNo;
+				ths->fPosition = 55;
+				ths->fBufferPtr++;
+				break;
+			
+			case 55 :	// Look for the fifth byte of the 32 bit little endian form.
+				if ( currChar != eBOM_Little_2 ) return eTriNo;
+				ths->fPosition = 56;
+				ths->fBufferPtr++;
+				break;
+			
+			case 56 :	// Look for the next to last null before the closing quote of the 32 bit forms.
+				if ( currChar != 0 ) return eTriNo;
+				ths->fPosition = 57;
+				ths->fBufferPtr++;
+				break;
+			
+			case 57 :	// Look for the last null before the closing quote of the 32 bit forms.
+				if ( currChar != 0 ) return eTriNo;
+				ths->fPosition = 99;
+				ths->fBufferPtr++;
+				break;
+			
+			default :	// Look for the closing quote.
+				assert ( ths->fPosition == 99 );
+				if ( currChar != ths->fQuoteChar ) return eTriNo;
+				ths->fBufferPtr += bytesPerChar;	// Skip the nulls after the closing quote.
+				return eTriYes;
+				break;
+
+		}
+	
+	}
+
+}	// RecognizeBOM
+
+
+// =================================================================================================
+// RecordHeadAttr
+// ==============
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::RecordHeadAttr ( PacketMachine * ths, const char * /* unused */ )
+{
+	
+	if ( ths->fAttrName == "encoding" ) {
+	
+		assert ( ths->fEncodingAttr.empty() );
+		ths->fEncodingAttr = ths->fAttrValue;
+	
+	} else if ( ths->fAttrName == "bytes" ) {
+			
+		long	value	= 0;
+		size_t	count	= ths->fAttrValue.size();
+		size_t	i;
+		
+		assert ( ths->fBytesAttr == -1 );
+		
+		if ( count > 0 ) {	// Allow bytes='' to be the same as no bytes attribute.
+
+			for ( i = 0; i < count; i++ ) {
+				const char	currChar	= ths->fAttrValue[i];
+				if ( ('0' <= currChar) && (currChar <= '9') ) {
+					value = (value * 10) + (currChar - '0');
+				} else {
+					ths->fBogusPacket = true;
+					value = -1;
+					break;
+				}
+			}
+			ths->fBytesAttr = value;
+			
+			if ( CharFormIs16Bit ( ths->fCharForm ) ) {
+				if ( (ths->fBytesAttr & 1) != 0 ) ths->fBogusPacket = true;
+			} else if ( CharFormIs32Bit ( ths->fCharForm ) ) {
+				if ( (ths->fBytesAttr & 3) != 0 ) ths->fBogusPacket = true;
+			}
+
+		}
+
+	}
+	
+	ths->fAttrName.erase ( ths->fAttrName.begin(), ths->fAttrName.end() );
+	ths->fAttrValue.erase ( ths->fAttrValue.begin(), ths->fAttrValue.end() );
+
+	return eTriYes;
+
+}	// RecordHeadAttr
+
+
+// =================================================================================================
+// CaptureAccess
+// =============
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::CaptureAccess ( PacketMachine * ths, const char * /* unused */ )
+{
+	const int	bytesPerChar	= ths->fBytesPerChar;
+
+	while ( true ) {
+
+		if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
+		
+		const char currChar = *ths->fBufferPtr;
+		
+		switch ( ths->fPosition ) {
+		
+			case  0 :	// Look for the opening quote.
+				if ( (currChar != '\'') && (currChar != '"') ) return eTriNo;
+				ths->fQuoteChar = currChar;
+				ths->fBufferPtr += bytesPerChar;
+				ths->fPosition = 1;
+				break;	// ! Don't fall through, have to check for the end of the buffer between each byte.
+		
+			case  1 :	// Look for the 'r' or 'w'.
+				if ( (currChar != 'r') && (currChar != 'w') ) return eTriNo;
+				ths->fAccess = currChar;
+				ths->fBufferPtr += bytesPerChar;
+				ths->fPosition = 2;
+				break;
+			
+			default :	// Look for the closing quote.
+				assert ( ths->fPosition == 2 );
+				if ( currChar != ths->fQuoteChar ) return eTriNo;
+				ths->fBufferPtr += bytesPerChar;
+				return eTriYes;
+				break;
+
+		}
+
+	}
+
+}	// CaptureAccess
+
+
+// =================================================================================================
+// RecordTailAttr
+// ==============
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::RecordTailAttr ( PacketMachine * ths, const char * /* unused */ )
+{
+	
+	// There are no known "general" attributes for the packet trailer.
+	
+	ths->fAttrName.erase ( ths->fAttrName.begin(), ths->fAttrName.end() );
+	ths->fAttrValue.erase ( ths->fAttrValue.begin(), ths->fAttrValue.end() );
+
+	return eTriYes;
+
+
+}	// RecordTailAttr
+
+
+// =================================================================================================
+// CheckPacketEnd
+// ==============
+//
+// Check for trailing padding and record the packet length.  We have trailing padding if the bytes
+// attribute is present and has a value greater than the current length.
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::CheckPacketEnd ( PacketMachine * ths, const char * /* unused */ )
+{
+	const int	bytesPerChar	= ths->fBytesPerChar;
+
+	if ( ths->fPosition == 0 ) {	// First call, decide if there is trailing padding.
+	
+		const SInt64 currLength = (ths->fBufferOffset + (ths->fBufferPtr - ths->fBufferOrigin)) - ths->fPacketStart;
+
+		if ( (ths->fBytesAttr != -1) && (ths->fBytesAttr != currLength) ) {
+			if ( ths->fBytesAttr < currLength ) {
+				ths->fBogusPacket = true;	// The bytes attribute value is too small.
+			} else {
+				ths->fPosition = (signed long)(ths->fBytesAttr - currLength);
+				if ( (ths->fPosition % ths->fBytesPerChar) != 0 ) {
+					ths->fBogusPacket = true;	// The padding is not a multiple of the character size.
+					ths->fPosition = (ths->fPosition / ths->fBytesPerChar) * ths->fBytesPerChar;
+				}
+			}
+		}
+		
+	}
+	
+	while ( ths->fPosition > 0 ) {
+
+		if ( ths->fBufferPtr >= ths->fBufferLimit ) return eTriMaybe;
+
+		const char currChar = *ths->fBufferPtr;
+
+		if ( (currChar != ' ') && (currChar != '\t') && (currChar != '\n') && (currChar != '\r') ) {
+			ths->fBogusPacket = true;	// The padding is not whitespace.
+			break;						// Stop the packet here.
+		}
+
+		ths->fPosition -= bytesPerChar;
+		ths->fBufferPtr += bytesPerChar;
+
+	}
+	
+	ths->fPacketLength = (ths->fBufferOffset + (ths->fBufferPtr - ths->fBufferOrigin)) - ths->fPacketStart;
+	return eTriYes;
+
+}	// CheckPacketEnd
+
+
+// =================================================================================================
+// CheckFinalNulls
+// ===============
+//
+// Do some special case processing for little endian characters.  We have to make sure the presumed
+// nulls after the last character actually exist, i.e. that the stream does not end too soon.  Note
+// that the prior character scanning has moved the buffer pointer to the address following the last
+// byte of the last character.  I.e. we're already past the presumed nulls, so we can't check their
+// content.  All we can do is verify that the stream does not end too soon.
+//
+// Doing this check is simple yet subtle.  If we're still in the current buffer then the trailing
+// bytes obviously exist.  If we're exactly at the end of the buffer then the bytes also exist.
+// The only question is when we're actually past this buffer, partly into the next buffer.  This is
+// when "ths->fBufferPtr > ths->fBufferLimit" on entry.  For that case we have to wait until we've
+// actually seen enough extra bytes of input.
+//
+// Since the normal buffer processing is already adjusting for this partial character overrun, all
+// that needs to be done here is wait until "ths->fBufferPtr <= ths->fBufferLimit" on entry.  In
+// other words, if we're presently too far, ths->fBufferPtr will be adjusted by the amount of the
+// overflow the next time QEScanner::Scan is called.  This might still be too far, so just keep
+// waiting for enough data to pass by.
+//
+// Note that there is a corresponding special case for big endian characters, we must decrement the
+// starting offset by the number of leading nulls.  But we don't do that here, we leave it to the
+// outer code.  This is because the leading nulls might have been at the exact end of a previous
+// buffer, in which case we have to also decrement the length of that raw data snip.
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::CheckFinalNulls ( PacketMachine * ths, const char * /* unused */ )
+{
+
+	if ( (ths->fCharForm != eChar8Bit) && CharFormIsLittleEndian ( ths->fCharForm ) ) {
+		if ( ths->fBufferPtr > ths->fBufferLimit ) return eTriMaybe;
+	}
+	
+	return eTriYes;
+
+}	// CheckFinalNulls
+
+
+// =================================================================================================
+// SetNextRecognizer
+// =================
+
+void
+QEScanner::PacketMachine::SetNextRecognizer ( RecognizerKind nextRecognizer )
+{
+
+	fRecognizer = nextRecognizer;
+	fPosition = 0;
+	
+}	// SetNextRecognizer
+
+
+// =================================================================================================
+// FindNextPacket
+// ==============
+
+// *** When we start validating intervening nulls for 2 and 4 bytes characters, throw an exception
+// *** for errors.  Don't return eTriNo, that might skip at an optional point.
+
+QEScanner::PacketMachine::TriState
+QEScanner::PacketMachine::FindNextPacket ()
+{
+
+	TriState	status;
+		
+	#define kPacketHead		"?xpacket begin="
+	#define kPacketID		"W5M0MpCehiHzreSzNTczkc9d"
+	#define kPacketTail		"?xpacket end="
+	
+	static const RecognizerInfo	recognizerTable [eRecognizerCount]	= {		// ! Would be safer to assign these explicitly.
+
+		// proc				successNext					failureNext					literal
+
+		{ NULL,				eFailureRecognizer,			eFailureRecognizer,			NULL},			// eFailureRecognizer
+		{ NULL,				eSuccessRecognizer,			eSuccessRecognizer,			NULL},			// eSuccessRecognizer
+
+		{ FindLessThan,		eHeadStartRecorder,			eFailureRecognizer,			"H" },			// eLeadInRecognizer
+		{ RecordStart,	 	eHeadStartRecognizer,		eLeadInRecognizer,			NULL },			// eHeadStartRecorder
+		{ MatchString, 		eBOMRecognizer,				eLeadInRecognizer,			kPacketHead },	// eHeadStartRecognizer
+
+		{ RecognizeBOM, 	eIDTagRecognizer,			eLeadInRecognizer,			NULL },			// eBOMRecognizer
+
+		{ MatchString, 		eIDOpenRecognizer,			eLeadInRecognizer,			" id=" },		// eIDTagRecognizer
+		{ MatchOpenQuote,	eIDValueRecognizer,			eLeadInRecognizer,			NULL },			// eIDOpenRecognizer
+		{ MatchString, 		eIDCloseRecognizer,			eLeadInRecognizer,			kPacketID },	// eIDValueRecognizer
+		{ MatchCloseQuote,	eAttrSpaceRecognizer_1,		eLeadInRecognizer,			NULL },			// eIDCloseRecognizer
+
+		{ MatchChar, 		eAttrNameRecognizer_1,		eHeadEndRecognizer,			" " },			// eAttrSpaceRecognizer_1
+		{ CaptureAttrName,	eAttrValueRecognizer_1,		eLeadInRecognizer,			NULL },			// eAttrNameRecognizer_1
+		{ CaptureAttrValue,	eAttrValueRecorder_1,		eLeadInRecognizer,			NULL },			// eAttrValueRecognizer_1
+		{ RecordHeadAttr,	eAttrSpaceRecognizer_1,		eLeadInRecognizer,			NULL },			// eAttrValueRecorder_1
+
+		{ MatchString, 		eBodyRecognizer,			eLeadInRecognizer,			"?>" },			// eHeadEndRecognizer
+		
+		{ FindLessThan,		eTailStartRecognizer,		eBodyRecognizer,			"T"},			// eBodyRecognizer
+
+		{ MatchString, 		eAccessValueRecognizer,		eBodyRecognizer,			kPacketTail },	// eTailStartRecognizer
+		{ CaptureAccess,	eAttrSpaceRecognizer_2,		eBodyRecognizer,			NULL },			// eAccessValueRecognizer
+
+		{ MatchChar, 		eAttrNameRecognizer_2,		eTailEndRecognizer,			" " },			// eAttrSpaceRecognizer_2
+		{ CaptureAttrName,	eAttrValueRecognizer_2,		eBodyRecognizer,			NULL },			// eAttrNameRecognizer_2
+		{ CaptureAttrValue,	eAttrValueRecorder_2,		eBodyRecognizer,			NULL },			// eAttrValueRecognizer_2
+		{ RecordTailAttr,	eAttrSpaceRecognizer_2,		eBodyRecognizer,			NULL },			// eAttrValueRecorder_2
+
+		{ MatchString, 		ePacketEndRecognizer,		eBodyRecognizer,			"?>" },			// eTailEndRecognizer
+		{ CheckPacketEnd,	eCloseOutRecognizer,		eBodyRecognizer,			"" },			// ePacketEndRecognizer
+		{ CheckFinalNulls,	eSuccessRecognizer,			eBodyRecognizer,			"" }			// eCloseOutRecognizer
+
+	};
+	
+	while ( true ) {
+	
+		switch ( fRecognizer ) {
+		
+			case eFailureRecognizer :
+				return eTriNo;
+			
+			case eSuccessRecognizer :
+				return eTriYes;
+			
+			default :
+			
+				// -------------------------------------------------------------------
+				// For everything else, the normal cases, use the state machine table.
+				
+				const RecognizerInfo *	thisState	= &recognizerTable [fRecognizer];
+				
+				status = thisState->proc ( this, thisState->literal );
+				
+				switch ( status ) {
+				
+					case eTriNo :
+						SetNextRecognizer ( thisState->failureNext );
+						continue;
+					
+					case eTriYes :
+						SetNextRecognizer ( thisState->successNext );
+						continue;
+					
+					case eTriMaybe :
+						fBufferOverrun = (unsigned char)(fBufferPtr - fBufferLimit);
+						return eTriMaybe;	// Keep this recognizer intact, to be resumed later.
+				
+				}
+		
+		}	// switch ( fRecognizer ) { ...
+		
+	}	// while ( true ) { ...
+
+}	// FindNextPacket
+
+
+// =================================================================================================
+// =================================================================================================
+// class InternalSnip
+// ==================
+
+
+// =================================================================================================
+// InternalSnip
+// ============
+
+QEScanner::InternalSnip::InternalSnip ( SInt64 offset, SInt64 length )
+{
+
+	fInfo.fOffset = offset;
+	fInfo.fLength = length;
+	
+}	// InternalSnip
+
+
+// =================================================================================================
+// InternalSnip
+// ============
+
+QEScanner::InternalSnip::InternalSnip ( const InternalSnip & rhs ) :
+	fInfo ( rhs.fInfo ),
+	fMachine ( NULL )
+{
+
+	assert ( rhs.fMachine.get() == NULL );	// Don't copy a snip with a machine.
+	assert ( (rhs.fInfo.fEncodingAttr == 0) || (*rhs.fInfo.fEncodingAttr == 0) ); // Don't copy a snip with an encoding.
+
+}	// InternalSnip
+
+
+// =================================================================================================
+// ~InternalSnip
+// =============
+
+QEScanner::InternalSnip::~InternalSnip ()
+{
+}	// ~InternalSnip
+
+
+
+// =================================================================================================
+// =================================================================================================
+// class QEScanner
+// ================
+
+
+// =================================================================================================
+// DumpSnipList
+// ============
+
+static const char *	snipStateName [6] = { "not-seen", "pending", "raw-data", "good-packet", "partial", "bad-packet" };
+
+void
+QEScanner::DumpSnipList ( const char * title )
+{
+#if 1
+	InternalSnipIterator currPos = fInternalSnips.begin();
+	InternalSnipIterator endPos  = fInternalSnips.end();
+	
+	cout << endl << title << " snip list: " << fInternalSnips.size() << endl;
+	
+	for ( ; currPos != endPos; ++currPos ) {
+		SnipInfo * currSnip = &currPos->fInfo;
+		cout << '\t' << currSnip << ' ' << snipStateName[currSnip->fState] << ' '
+		     << currSnip->fOffset << ".." << (currSnip->fOffset + currSnip->fLength - 1)
+			 << ' ' << currSnip->fLength << ' ' << endl;
+	}
+#endif
+}	// DumpSnipList
+
+
+// =================================================================================================
+// PrevSnip and NextSnip
+// =====================
+
+QEScanner::InternalSnipIterator
+QEScanner::PrevSnip ( InternalSnipIterator snipPos )
+{
+
+	InternalSnipIterator prev = snipPos;
+	return --prev;
+
+}	// PrevSnip
+
+QEScanner::InternalSnipIterator
+QEScanner::NextSnip ( InternalSnipIterator snipPos )
+{
+
+	InternalSnipIterator next = snipPos;
+	return ++next;
+
+}	// NextSnip
+
+
+// =================================================================================================
+// QEScanner
+// ==========
+//
+// Initialize the scanner object with one "not seen" snip covering the whole stream.
+
+QEScanner::QEScanner ( SInt64 streamLength ) :
+
+	fStreamLength ( streamLength )
+	
+{
+	InternalSnip	rootSnip ( 0, streamLength );
+	
+	if ( streamLength > 0 ) fInternalSnips.push_front ( rootSnip );		// Be nice for empty files.
+	// DumpSnipList ( "New QEScanner" );
+	
+}	// QEScanner
+
+
+// =================================================================================================
+// ~QEScanner
+// ===========
+
+QEScanner::~QEScanner()
+{
+	
+}	// ~QEScanner
+
+
+// =================================================================================================
+// GetSnipCount
+// ============
+
+size_t
+QEScanner::GetSnipCount ()
+{
+
+	return fInternalSnips.size();
+
+}	// GetSnipCount
+
+
+// =================================================================================================
+// StreamAllScanned
+// ================
+
+bool
+QEScanner::StreamAllScanned ()
+{
+	InternalSnipIterator currPos = fInternalSnips.begin();
+	InternalSnipIterator endPos  = fInternalSnips.end();
+
+	for ( ; currPos != endPos; ++currPos ) {
+		if ( currPos->fInfo.fState == eNotSeenSnip ) return false;
+	}
+	return true;
+
+}	// StreamAllScanned
+
+
+// =================================================================================================
+// SplitInternalSnip
+// =================
+//
+// Split the given snip into up to 3 pieces.  The new pieces are inserted before and after this one
+// in the snip list.  The relOffset is the first byte to be kept, it is relative to this snip.  If
+// the preceeding or following snips have the same state as this one, just shift the boundaries.
+// I.e. move the contents from one snip to the other, don't create a new snip.
+
+// *** To be thread safe we ought to lock the entire list during manipulation.  Let data scanning
+// *** happen in parallel, serialize all mucking with the list.
+
+void
+QEScanner::SplitInternalSnip ( InternalSnipIterator snipPos, SInt64 relOffset, SInt64 newLength )
+{
+
+	assert ( (relOffset + newLength) > relOffset );	// Check for overflow.
+	assert ( (relOffset + newLength) <= snipPos->fInfo.fLength );
+	
+	// -----------------------------------
+	// First deal with the low offset end.
+	
+	if ( relOffset > 0 ) {
+
+		InternalSnipIterator prevPos;
+		if ( snipPos != fInternalSnips.begin() ) prevPos = PrevSnip ( snipPos );
+
+		if ( (snipPos != fInternalSnips.begin()) && (snipPos->fInfo.fState == prevPos->fInfo.fState) ) {
+			prevPos->fInfo.fLength += relOffset;	// Adjust the preceeding snip.
+		} else {
+			InternalSnip headExcess ( snipPos->fInfo.fOffset, relOffset );
+			headExcess.fInfo.fState = snipPos->fInfo.fState;
+			headExcess.fInfo.fOutOfOrder = snipPos->fInfo.fOutOfOrder;
+			fInternalSnips.insert ( snipPos, headExcess );	// Insert the head piece before the middle piece.
+		}
+
+		snipPos->fInfo.fOffset += relOffset;	// Adjust the remainder of this snip.
+		snipPos->fInfo.fLength -= relOffset;
+
+	}
+	
+	// ----------------------------------
+	// Now deal with the high offset end.
+	
+	if ( newLength < snipPos->fInfo.fLength ) {
+
+		InternalSnipIterator nextPos    = NextSnip ( snipPos );
+		const SInt64         tailLength = snipPos->fInfo.fLength - newLength;
+
+		if ( (nextPos != fInternalSnips.end()) && (snipPos->fInfo.fState == nextPos->fInfo.fState) ) {
+			nextPos->fInfo.fOffset -= tailLength;		// Adjust the following snip.
+			nextPos->fInfo.fLength += tailLength;
+		} else {
+			InternalSnip tailExcess ( (snipPos->fInfo.fOffset + newLength), tailLength );
+			tailExcess.fInfo.fState = snipPos->fInfo.fState;
+			tailExcess.fInfo.fOutOfOrder = snipPos->fInfo.fOutOfOrder;
+			fInternalSnips.insert ( nextPos, tailExcess );		// Insert the tail piece after the middle piece.
+		}
+
+		snipPos->fInfo.fLength = newLength;
+
+	}
+
+}	// SplitInternalSnip
+
+
+// =================================================================================================
+// MergeInternalSnips
+// ==================
+
+QEScanner::InternalSnipIterator
+QEScanner::MergeInternalSnips ( InternalSnipIterator firstPos, InternalSnipIterator secondPos )
+{
+
+	firstPos->fInfo.fLength += secondPos->fInfo.fLength;
+	fInternalSnips.erase ( secondPos );
+	return firstPos;
+
+}	// MergeInternalSnips
+
+
+// =================================================================================================
+// Scan
+// ====
+
+void
+QEScanner::Scan ( const void * bufferOrigin, SInt64 bufferOffset, SInt64 bufferLength )
+{
+	SInt64	relOffset;
+	
+	#if 0
+		cout << "Scan: @ " << bufferOrigin << ", " << bufferOffset << ", " << bufferLength << endl;
+	#endif
+	
+	if ( bufferLength == 0 ) return;
+	
+	// ----------------------------------------------------------------
+	// These comparisons are carefully done to avoid overflow problems.
+	
+	if ( (bufferOffset >= fStreamLength) ||
+		 (bufferLength > (fStreamLength - bufferOffset)) ||
+		 (bufferOrigin == 0) ) {
+		throw ScanError ( "Bad origin, offset, or length" );
+	}
+	
+	// ----------------------------------------------------------------------------------------------
+	// This buffer must be within a not-seen snip.  Find it and split it.  The first snip whose whose
+	// end is beyond the buffer must be the enclosing one.
+	
+	// *** It would be friendly for rescans for out of order problems to accept any buffer postion.
+	
+	const SInt64			endOffset	= bufferOffset + bufferLength - 1;
+	InternalSnipIterator	snipPos	= fInternalSnips.begin();
+	
+	while ( endOffset > (snipPos->fInfo.fOffset + snipPos->fInfo.fLength - 1) ) ++ snipPos;
+	if ( snipPos->fInfo.fState != eNotSeenSnip ) throw ScanError ( "Already seen" );
+	
+	relOffset = bufferOffset - snipPos->fInfo.fOffset;
+	if ( (relOffset + bufferLength) > snipPos->fInfo.fLength ) throw ScanError ( "Not within existing snip" );
+	
+	SplitInternalSnip ( snipPos, relOffset, bufferLength );		// *** If sequential & prev is partial, just tack on,
+	
+	// --------------------------------------------------------
+	// Merge this snip with the preceeding snip if appropriate.
+	
+	// *** When out of order I/O is supported we have to do something about buffers who's predecessor is not seen.
+	
+	if ( snipPos->fInfo.fOffset > 0 ) {
+		InternalSnipIterator prevPos = PrevSnip ( snipPos );
+		if ( prevPos->fInfo.fState == ePartialPacketSnip ) snipPos = MergeInternalSnips ( prevPos, snipPos );
+	}
+	
+	// ----------------------------------
+	// Look for packets within this snip.
+
+	snipPos->fInfo.fState = ePendingSnip;
+	PacketMachine* thisMachine = snipPos->fMachine.get();
+	// DumpSnipList ( "Before scan" );
+
+	if ( thisMachine != 0 ) {
+		thisMachine->AssociateBuffer ( bufferOffset, bufferOrigin, bufferLength );
+	} else {
+		// *** snipPos->fMachine.reset ( new PacketMachine ( bufferOffset, bufferOrigin, bufferLength ) );		VC++ lacks reset
+		#if 0
+			snipPos->fMachine = auto_ptr<PacketMachine> ( new PacketMachine ( bufferOffset, bufferOrigin, bufferLength ) );
+		#else
+			{
+				// Some versions of gcc complain about the assignment operator above.  This avoids the gcc bug.
+				PacketMachine *	pm	= new PacketMachine ( bufferOffset, bufferOrigin, bufferLength );
+				auto_ptr<PacketMachine>	ap ( pm );
+				snipPos->fMachine = ap;
+			}
+		#endif
+		thisMachine = snipPos->fMachine.get();
+	}
+	
+	bool	bufferDone	= false;
+	while ( ! bufferDone ) {
+
+		PacketMachine::TriState	foundPacket = thisMachine->FindNextPacket();
+		
+		if ( foundPacket == PacketMachine::eTriNo ) {
+		
+			// -----------------------------------------------------------------------
+			// No packet, mark the snip as raw data and get rid of the packet machine.
+			// We're done with this buffer.
+			
+			snipPos->fInfo.fState = eRawInputSnip;
+			#if 0
+				snipPos->fMachine = auto_ptr<PacketMachine>();	// *** snipPos->fMachine.reset();	VC++ lacks reset
+			#else
+				{
+					// Some versions of gcc complain about the assignment operator above.  This avoids the gcc bug.
+					auto_ptr<PacketMachine>	ap ( 0 );
+					snipPos->fMachine = ap;
+				}
+			#endif
+			bufferDone = true;
+		
+		} else {
+		
+			// ---------------------------------------------------------------------------------------------
+			// Either a full or partial packet.  First trim any excess off of the front as a raw input snip.
+			// If this is a partial packet mark the snip and keep the packet machine to be resumed later.
+			// We're done with this buffer, the partial packet by definition extends to the end.  If this is
+			// a complete packet first extract the additional information from the packet machine.  If there
+			// is leftover data split the snip and transfer the packet machine to the new trailing snip.
+			
+			if ( thisMachine->fPacketStart > snipPos->fInfo.fOffset ) {
+
+				// There is data at the front of the current snip that must be trimmed.
+				SnipState	savedState	= snipPos->fInfo.fState;
+				snipPos->fInfo.fState = eRawInputSnip;	// ! So it gets propagated to the trimmed front part.
+				relOffset = thisMachine->fPacketStart - snipPos->fInfo.fOffset;
+				SplitInternalSnip ( snipPos, relOffset, (snipPos->fInfo.fLength - relOffset) );
+				snipPos->fInfo.fState = savedState;
+
+			}
+		
+			if ( foundPacket == PacketMachine::eTriMaybe ) {
+				
+				// We have only found a partial packet.
+				snipPos->fInfo.fState = ePartialPacketSnip;
+				bufferDone = true;
+			
+			} else {
+			
+				// We have found a complete packet. Extract all the info for it and split any trailing data.
+				
+				InternalSnipIterator	packetSnip	= snipPos;
+				SnipState				packetState	= eValidPacketSnip;
+				
+				if ( thisMachine->fBogusPacket ) packetState = eBadPacketSnip;
+				
+				packetSnip->fInfo.fAccess = thisMachine->fAccess;
+				packetSnip->fInfo.fCharForm = thisMachine->fCharForm;
+				packetSnip->fInfo.fBytesAttr = thisMachine->fBytesAttr;
+				packetSnip->fInfo.fEncodingAttr = thisMachine->fEncodingAttr.c_str();
+				thisMachine->fEncodingAttr.erase ( thisMachine->fEncodingAttr.begin(), thisMachine->fEncodingAttr.end() );
+				
+				if ( (thisMachine->fCharForm != eChar8Bit) && CharFormIsBigEndian ( thisMachine->fCharForm ) ) {
+				
+					// ------------------------------------------------------------------------------
+					// Handle a special case for big endian characters.  The packet machine works as
+					// though things were little endian.  The packet starting offset points to the
+					// byte containing the opening '<', and the length includes presumed nulls that
+					// follow the last "real" byte.  If the characters are big endian we now have to
+					// decrement the starting offset of the packet, and also decrement the length of
+					// the previous snip.
+					//
+					// Note that we can't do this before the head trimming above in general.  The
+					// nulls might have been exactly at the end of a buffer and already in the
+					// previous snip.  We are doing this before trimming the tail from the raw snip
+					// containing the packet.  We adjust the raw snip's size because it ends with
+					// the input buffer.  We don't adjust the packet's size, it is already correct.
+					//
+					// The raw snip (the one before the packet) might entirely disappear.  A simple
+					// example of this is when the packet is at the start of the file.
+					
+					assert ( packetSnip != fInternalSnips.begin() );	// Leading nulls were trimmed!
+					
+					if ( packetSnip != fInternalSnips.begin() ) {	// ... but let's program defensibly.
+
+						InternalSnipIterator prevSnip  = PrevSnip ( packetSnip );
+						const unsigned int nullsToAdd = ( CharFormIs16Bit ( thisMachine->fCharForm ) ? 1 : 3 );
+
+						assert ( nullsToAdd <= prevSnip->fInfo.fLength );
+						prevSnip->fInfo.fLength -= nullsToAdd;
+						if ( prevSnip->fInfo.fLength == 0 ) (void) fInternalSnips.erase ( prevSnip );
+						
+						packetSnip->fInfo.fOffset	-= nullsToAdd;
+						packetSnip->fInfo.fLength	+= nullsToAdd;
+						thisMachine->fPacketStart	-= nullsToAdd;
+
+					}
+					
+				}
+				
+				if ( thisMachine->fPacketLength == snipPos->fInfo.fLength ) {
+				
+					// This packet ends exactly at the end of the current snip.
+					#if 0
+						snipPos->fMachine = auto_ptr<PacketMachine>();	// *** snipPos->fMachine.reset();	VC++ lacks reset
+					#else
+						{
+							// Some versions of gcc complain about the assignment operator above.  This avoids the gcc bug.
+							auto_ptr<PacketMachine>	ap ( 0 );
+							snipPos->fMachine = ap;
+						}
+					#endif
+					bufferDone = true;
+
+				} else {
+					
+					// There is trailing data to split from the just found packet.
+					SplitInternalSnip ( snipPos, 0, thisMachine->fPacketLength );
+			
+					InternalSnipIterator	tailPos	= NextSnip ( snipPos );
+					
+					tailPos->fMachine = snipPos->fMachine;	// auto_ptr assignment - taking ownership
+					thisMachine->ResetMachine ();
+					
+					snipPos = tailPos;
+				
+				}
+				
+				packetSnip->fInfo.fState = packetState;	// Do this last to avoid messing up the tail split.
+				// DumpSnipList ( "Found a packet" );
+
+				
+			}
+		
+		}
+	
+	}
+	
+	// --------------------------------------------------------
+	// Merge this snip with the preceeding snip if appropriate.
+	
+	// *** When out of order I/O is supported we have to check the following snip too.
+	
+	if ( (snipPos->fInfo.fOffset > 0) && (snipPos->fInfo.fState == eRawInputSnip) ) {
+		InternalSnipIterator prevPos = PrevSnip ( snipPos );
+		if ( prevPos->fInfo.fState == eRawInputSnip ) snipPos = MergeInternalSnips ( prevPos, snipPos );
+	}
+	
+	// DumpSnipList ( "After scan" );
+	
+}	// Scan
+
+
+// =================================================================================================
+// Report
+// ======
+
+void
+QEScanner::Report ( SnipInfoVector& snips )
+{
+	const size_t count	= fInternalSnips.size();
+	InternalSnipIterator	snipPos	= fInternalSnips.begin();
+	
+	size_t s;
+
+	// DumpSnipList ( "Report" );
+
+	snips.erase ( snips.begin(), snips.end() );		// ! Should use snips.clear, but VC++ doesn't have it.
+	snips.reserve ( count );
+	
+	for ( s = 0; s < count; s += 1 ) {
+		snips.push_back ( SnipInfo ( snipPos->fInfo.fState, snipPos->fInfo.fOffset, snipPos->fInfo.fLength ) );
+		snips[s] = snipPos->fInfo;	// Pick up all of the fields.
+		++ snipPos;
+	}
+
+}	// Report
author	Hubert Figuiere <hub@figuiere.net>	2008-11-17 23:42:00 -0500
committer	Hubert Figuiere <hub@figuiere.net>	2008-11-17 23:42:00 -0500
commit	88af812fde414aca8f9add90bc800ea3d8e9a281 (patch)
tree	0403dd1897c0b287d4d710dd422827683c59dfcb /samples/source/common/QEScanner.cpp
parent	9d7d7c3caac05db240692ad7e9196fcb7f5a1ce5 (diff)