summaryrefslogtreecommitdiff
path: root/samples/source/common/QEScanner.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'samples/source/common/QEScanner.hpp')
-rw-r--r--samples/source/common/QEScanner.hpp331
1 files changed, 331 insertions, 0 deletions
diff --git a/samples/source/common/QEScanner.hpp b/samples/source/common/QEScanner.hpp
new file mode 100644
index 0000000..0c24dce
--- /dev/null
+++ b/samples/source/common/QEScanner.hpp
@@ -0,0 +1,331 @@
+#ifndef __QEScanner_hpp__
+#define __QEScanner_hpp__
+
+// =================================================================================================
+// Copyright 2002-2005 Adobe Systems Incorporated
+// All Rights Reserved.
+//
+// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms
+// of the Adobe license agreement accompanying it.
+//
+// Adobe patent application tracking #P435, entitled 'Unique markers to simplify embedding data of
+// one format in a file with a different format', inventors: Sean Parent, Greg Gilley.
+// =================================================================================================
+
+#include <list>
+#include <vector>
+#include <string>
+#include <memory>
+#include <stdexcept>
+
+// =================================================================================================
+// The QEScanner class is used to scan a stream of input for XMP packets. A scanner object is
+// constructed then fed the input through a series of calls to Scan. Report may be called at any
+// time to get the current knowledge of the input.
+//
+// A packet starts when a valid header is found and ends when a valid trailer is found. If the
+// header contains a "bytes" attribute, additional whitespace must follow.
+//
+// *** RESTRICTIONS: The current implementation of the scanner has the the following restrictions:
+// - The input must be presented in order.
+// - Not fully thread safe, don't make concurrent calls to the same QEScanner object.
+// =================================================================================================
+
+class QEScanner {
+public:
+
+ // =============================================================================================
+ // The entire input stream is represented as a series of snips. Each snip defines one portion
+ // of the input stream that either has not been seen, has been seen and contains no packets, is
+ // exactly one packet, or contains the start of an unfinished packet. Adjacent snips with the
+ // same state are merged, so the number of snips is always minimal.
+ //
+ // A newly constructed QEScanner object has one snip covering the whole input with a state
+ // of "not seen". A block of input that contains a full XMP packet is split into 3 parts: a
+ // (possibly empty) raw input snip, the packet, and another (possibly empty) raw input snip. A
+ // block of input that contains the start of an XMP packet is split into two snips, a (possibly
+ // empty) raw input snip and the packet start; the following snip must be a "not seen" snip.
+ //
+ // It is possible to have ill-formed packets. These have a syntactically valid header and
+ // trailer, but some semantic error. For example, if the "bytes" attribute length does not span
+ // to the end of the trailer, or if the following packet begins within trailing padding.
+
+ typedef unsigned char UInt8;
+ typedef unsigned long UInt32;
+ typedef long long SInt64;
+
+ enum {
+ eNotSeenSnip, // This snip has not been seen yet.
+ ePendingSnip, // This snip is an input buffer being processed.
+ eRawInputSnip, // This snip is raw input, it doesn't contain any part of an XMP packet.
+ eValidPacketSnip, // This snip is a complete, valid XMP packet.
+ ePartialPacketSnip, // This snip contains the start of a possible XMP packet.
+ eBadPacketSnip // This snip contains a complete, but semantically incorrect XMP packet.
+ };
+ typedef UInt8 SnipState;
+
+ enum { // The values allow easy testing for 16/32 bit and big/little endian.
+ eChar8Bit = 0,
+ eChar16BitBig = 2,
+ eChar16BitLittle = 3,
+ eChar32BitBig = 4,
+ eChar32BitLittle = 5
+ };
+ typedef UInt8 CharacterForm;
+
+ enum {
+ eChar16BitMask = 2, // These constant shouldn't be used directly, they are mainly
+ eChar32BitMask = 4, // for the CharFormIsXyz macros below.
+ eCharLittleEndianMask = 1
+ };
+
+ #define CharFormIs16Bit(f) ( ((int)(f) & QEScanner::eChar16BitMask) != 0 )
+ #define CharFormIs32Bit(f) ( ((int)(f) & QEScanner::eChar32BitMask) != 0 )
+
+ #define CharFormIsBigEndian(f) ( ((int)(f) & QEScanner::eCharLittleEndianMask) == 0 )
+ #define CharFormIsLittleEndian(f) ( ((int)(f) & QEScanner::eCharLittleEndianMask) != 0 )
+
+ struct SnipInfo {
+
+ SInt64 fOffset; // The byte offset of this snip within the input stream.
+ SInt64 fLength; // The length in bytes of this snip.
+ SnipState fState; // The state of this snip.
+ bool fOutOfOrder; // If true, this snip was seen before the one in front of it.
+ char fAccess; // The read-only/read-write access from the end attribute.
+ CharacterForm fCharForm; // How the packet is divided into characters.
+ const char * fEncodingAttr; // The value of the encoding attribute, if any, with nulls removed.
+ SInt64 fBytesAttr; // The value of the bytes attribute, -1 if not present.
+
+ SnipInfo() :
+ fOffset ( 0 ),
+ fLength ( 0 ),
+ fState ( eNotSeenSnip ),
+ fOutOfOrder ( false ),
+ fAccess ( ' ' ),
+ fCharForm ( eChar8Bit ),
+ fEncodingAttr ( "" ),
+ fBytesAttr( -1 )
+ { }
+
+ SnipInfo ( SnipState state, SInt64 offset, SInt64 length ) :
+ fOffset ( offset ),
+ fLength ( length ),
+ fState ( state ),
+ fOutOfOrder ( false ),
+ fAccess ( ' ' ),
+ fCharForm ( eChar8Bit ),
+ fEncodingAttr ( "" ),
+ fBytesAttr( -1 )
+ { }
+
+ };
+
+ typedef std::vector<SnipInfo> SnipInfoVector;
+
+ QEScanner ( SInt64 streamLength );
+ // Constructs a new QEScanner object for a stream with the given length.
+
+ ~QEScanner();
+
+ size_t GetSnipCount();
+ // Returns the number of snips that the stream has been divided into.
+
+ bool StreamAllScanned();
+ // Returns true if all of the stream has been seen.
+
+ void Scan ( const void * bufferOrigin, SInt64 bufferOffset, SInt64 bufferLength );
+ // Scans the given part of the input, incorporating it in to the known snips.
+ // The bufferOffset is the offset of this block of input relative to the entire stream.
+ // The bufferLength is the length in bytes of this block of input.
+
+ void Report ( SnipInfoVector & snips );
+ // Produces a report of what is known about the input stream.
+
+ class ScanError : public std::logic_error {
+ public:
+ ScanError() throw() : std::logic_error ( "" ) {}
+ explicit ScanError ( const char * message ) throw() : std::logic_error ( message ) {}
+ virtual ~ScanError() throw() {}
+ };
+
+private: // QEScanner
+
+ class PacketMachine;
+
+ class InternalSnip {
+ public:
+
+ SnipInfo fInfo; // The public info about this snip.
+ std::auto_ptr<PacketMachine> fMachine; // The state machine for "active" snips.
+
+ InternalSnip() {}; // Let everything default.
+ InternalSnip ( SInt64 offset, SInt64 length );
+ InternalSnip ( const InternalSnip & );
+ ~InternalSnip();
+
+ }; // InternalSnip
+
+ typedef std::list<InternalSnip> InternalSnipList;
+ typedef InternalSnipList::iterator InternalSnipIterator;
+
+ class PacketMachine {
+ public:
+
+ SInt64 fPacketStart; // Byte offset relative to the entire stream.
+ SInt64 fPacketLength; // Length in bytes to the end of the trailer processing instruction.
+ SInt64 fBytesAttr; // The value of the bytes attribute, -1 if not present.
+ std::string fEncodingAttr; // The value of the encoding attribute, if any, with nulls removed.
+ CharacterForm fCharForm; // How the packet is divided into characters.
+ char fAccess; // The read-only/read-write access from the end attribute.
+ bool fBogusPacket; // True if the packet has an error such as a bad "bytes" attribute value.
+
+ void ResetMachine();
+
+ enum TriState {
+ eTriNo,
+ eTriMaybe,
+ eTriYes
+ };
+
+ TriState FindNextPacket();
+
+ void AssociateBuffer ( SInt64 bufferOffset, const void * bufferOrigin, SInt64 bufferLength );
+
+ PacketMachine ( SInt64 bufferOffset, const void * bufferOrigin, SInt64 bufferLength );
+ ~PacketMachine();
+
+ private: // PacketMachine
+
+ PacketMachine() {}; // ! Hide the default constructor.
+
+ enum RecognizerKind {
+
+ eFailureRecognizer, // Not really recognizers, special states to end one buffer's processing.
+ eSuccessRecognizer,
+
+ eLeadInRecognizer, // Anything up to the next '<'.
+ eHeadStartRecorder, // Save the starting offset, count intervening nulls.
+ eHeadStartRecognizer, // The literal string "?xpacket begin=".
+
+ eBOMRecognizer, // Recognize and record the quoted byte order marker.
+
+ eIDTagRecognizer, // The literal string " id=".
+ eIDOpenRecognizer, // The opening quote for the ID.
+ eIDValueRecognizer, // The literal string "W5M0MpCehiHzreSzNTczkc9d".
+ eIDCloseRecognizer, // The closing quote for the ID.
+
+ eAttrSpaceRecognizer_1, // The space before an attribute.
+ eAttrNameRecognizer_1, // The name of an attribute.
+ eAttrValueRecognizer_1, // The equal sign and quoted string value for an attribute.
+ eAttrValueRecorder_1, // Record the value of an attribute.
+
+ eHeadEndRecognizer, // The string literal "?>".
+
+ eBodyRecognizer, // The packet body, anything up to the next '<'.
+
+ eTailStartRecognizer, // The string literal "?xpacket end=".
+ eAccessValueRecognizer, // Recognize and record the quoted r/w access mode.
+
+ eAttrSpaceRecognizer_2, // The space before an attribute.
+ eAttrNameRecognizer_2, // The name of an attribute.
+ eAttrValueRecognizer_2, // The equal sign and quoted string value for an attribute.
+ eAttrValueRecorder_2, // Record the value of an attribute.
+
+ eTailEndRecognizer, // The string literal "?>".
+ ePacketEndRecognizer, // Look for trailing padding, check and record the packet size.
+ eCloseOutRecognizer, // Look for final nulls for little endian multibyte characters.
+
+ eRecognizerCount
+
+ };
+
+ SInt64 fBufferOffset; // The offset of the data buffer within the input stream.
+ const char * fBufferOrigin; // The starting address of the data buffer for this snip.
+ const char * fBufferPtr; // The current postion in the data buffer.
+ const char * fBufferLimit; // The address one past the last byte in the data buffer.
+
+ RecognizerKind fRecognizer; // Which recognizer is currently active.
+ /*signed long*/ size_t fPosition; // The internal position within a string literal, etc.
+ unsigned char fBytesPerChar; // The number of bytes per logical character, 1, 2, or 4.
+ unsigned char fBufferOverrun; // Non-zero if suspended while skipping intervening nulls.
+ char fQuoteChar; // The kind of quote seen at the start of a quoted value.
+ std::string fAttrName; // The name for an arbitrary attribute (other than "begin" and "id").
+ std::string fAttrValue; // The value for an arbitrary attribute (other than "begin" and "id").
+
+ void SetNextRecognizer ( RecognizerKind nextRecognizer );
+
+ typedef TriState (* RecognizerProc) ( PacketMachine *, const char * );
+
+ static TriState
+ FindLessThan ( PacketMachine * ths, const char * which );
+
+ static TriState
+ MatchString ( PacketMachine * ths, const char * literal );
+
+ static TriState
+ MatchChar ( PacketMachine * ths, const char * literal );
+
+ static TriState
+ MatchOpenQuote ( PacketMachine * ths, const char * /* unused */ );
+
+ static TriState
+ MatchCloseQuote ( PacketMachine * ths, const char * /* unused */ );
+
+ static TriState
+ CaptureAttrName ( PacketMachine * ths, const char * /* unused */ );
+
+ static TriState
+ CaptureAttrValue ( PacketMachine * ths, const char * /* unused */ );
+
+ static TriState
+ RecordStart ( PacketMachine * ths, const char * /* unused */ );
+
+ static TriState
+ RecognizeBOM ( PacketMachine * ths, const char * /* unused */ );
+
+ static TriState
+ RecordHeadAttr ( PacketMachine * ths, const char * /* unused */ );
+
+ static TriState
+ CaptureAccess ( PacketMachine * ths, const char * /* unused */ );
+
+ static TriState
+ RecordTailAttr ( PacketMachine * ths, const char * /* unused */ );
+
+ static TriState
+ CheckPacketEnd ( PacketMachine * ths, const char * /* unused */ );
+
+ static TriState
+ CheckFinalNulls ( PacketMachine * ths, const char * /* unused */ );
+
+ struct RecognizerInfo {
+ RecognizerProc proc;
+ RecognizerKind successNext;
+ RecognizerKind failureNext;
+ const char * literal;
+ };
+
+ }; // PacketMachine
+
+ SInt64 fStreamLength;
+ InternalSnipList fInternalSnips;
+
+ void
+ SplitInternalSnip ( InternalSnipIterator snipPos, SInt64 relOffset, SInt64 newLength );
+
+ InternalSnipIterator
+ MergeInternalSnips ( InternalSnipIterator firstPos, InternalSnipIterator secondPos );
+
+ void
+ DumpSnipList ( const char * title );
+
+ InternalSnipIterator
+ PrevSnip ( InternalSnipIterator snipPos );
+
+ InternalSnipIterator
+ NextSnip ( InternalSnipIterator snipPos );
+
+}; // QEScanner
+
+
+#endif // __QEScanner_hpp__