diff options
-rw-r--r-- | poppler/Catalog.cc | 36 | ||||
-rw-r--r-- | poppler/Catalog.h | 5 | ||||
-rw-r--r-- | poppler/Makefile.am | 4 | ||||
-rw-r--r-- | poppler/PDFDoc.h | 3 | ||||
-rw-r--r-- | poppler/StructElement.cc | 322 | ||||
-rw-r--r-- | poppler/StructElement.h | 167 | ||||
-rw-r--r-- | poppler/StructTreeRoot.cc | 174 | ||||
-rw-r--r-- | poppler/StructTreeRoot.h | 83 |
8 files changed, 776 insertions, 18 deletions
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc index 2e2511e2..a06ae5f5 100644 --- a/poppler/Catalog.cc +++ b/poppler/Catalog.cc @@ -58,6 +58,7 @@ #include "OptionalContent.h" #include "ViewerPreferences.h" #include "FileSpec.h" +#include "StructTreeRoot.h" #if MULTITHREADED # define catalogLocker() MutexLocker locker(&mutex) @@ -93,6 +94,7 @@ Catalog::Catalog(PDFDoc *docA) { embeddedFileNameTree = NULL; jsNameTree = NULL; viewerPrefs = NULL; + structTreeRoot = NULL; pagesList = NULL; pagesRefList = NULL; @@ -181,8 +183,8 @@ Catalog::~Catalog() { delete form; delete optContent; delete viewerPrefs; + delete structTreeRoot; metadata.free(); - structTreeRoot.free(); outline.free(); acroForm.free(); viewerPreferences.free(); @@ -844,24 +846,28 @@ PageLabelInfo *Catalog::getPageLabelInfo() return pageLabelInfo; } -Object *Catalog::getStructTreeRoot() +StructTreeRoot *Catalog::getStructTreeRoot() { catalogLocker(); - if (structTreeRoot.isNone()) - { - Object catDict; + if (!structTreeRoot) { + Object catalog; + Object root; + + xref->getCatalog(&catalog); + if (!catalog.isDict()) { + error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catalog.getTypeName()); + catalog.free(); + return NULL; + } - xref->getCatalog(&catDict); - if (catDict.isDict()) { - catDict.dictLookup("StructTreeRoot", &structTreeRoot); - } else { - error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName()); - structTreeRoot.initNull(); - } - catDict.free(); - } + if (catalog.dictLookup("StructTreeRoot", &root)->isDict("StructTreeRoot")) { + structTreeRoot = new StructTreeRoot(doc, root.getDict()); + } - return &structTreeRoot; + root.free(); + catalog.free(); + } + return structTreeRoot; } Guint Catalog::getMarkInfo() diff --git a/poppler/Catalog.h b/poppler/Catalog.h index 1a445f56..40c783c7 100644 --- a/poppler/Catalog.h +++ b/poppler/Catalog.h @@ -56,6 +56,7 @@ class Form; class OCGs; class ViewerPreferences; class FileSpec; +class StructTreeRoot; //------------------------------------------------------------------------ // NameTree @@ -126,7 +127,7 @@ public: GooString *readMetadata(); // Return the structure tree root object. - Object *getStructTreeRoot(); + StructTreeRoot *getStructTreeRoot(); // Return values from the MarkInfo dictionary as flags in a bitfield. enum MarkInfoFlags { @@ -241,7 +242,7 @@ private: NameTree *jsNameTree; // Java Script name-tree GooString *baseURI; // base URI for URI-type links Object metadata; // metadata stream - Object structTreeRoot; // structure tree root dictionary + StructTreeRoot *structTreeRoot; // structure tree root Guint markInfo; // Flags from MarkInfo dictionary Object outline; // outline dictionary Object acroForm; // AcroForm dictionary diff --git a/poppler/Makefile.am b/poppler/Makefile.am index aa7c9242..9f90c9d2 100644 --- a/poppler/Makefile.am +++ b/poppler/Makefile.am @@ -216,6 +216,8 @@ poppler_include_HEADERS = \ StdinPDFDocBuilder.h \ Stream-CCITT.h \ Stream.h \ + StructElement.h \ + StructTreeRoot.h \ UnicodeMap.h \ UnicodeMapTables.h \ UnicodeTypeTable.h \ @@ -294,6 +296,8 @@ libpoppler_la_SOURCES = \ StdinCachedFile.cc \ StdinPDFDocBuilder.cc \ Stream.cc \ + StructTreeRoot.cc \ + StructElement.cc \ strtok_r.cpp \ UnicodeMap.cc \ UnicodeTypeTable.cc \ diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h index da9bf5b3..48189bce 100644 --- a/poppler/PDFDoc.h +++ b/poppler/PDFDoc.h @@ -60,6 +60,7 @@ class Outline; class Linearization; class SecurityHandler; class Hints; +class StructTreeRoot; enum PDFWriteMode { writeStandard, @@ -139,7 +140,7 @@ public: GooString *readMetadata() { return catalog->readMetadata(); } // Return the structure tree root object. - Object *getStructTreeRoot() { return catalog->getStructTreeRoot(); } + StructTreeRoot *getStructTreeRoot() { return catalog->getStructTreeRoot(); } // Get page. Page *getPage(int page); diff --git a/poppler/StructElement.cc b/poppler/StructElement.cc new file mode 100644 index 00000000..e403457b --- /dev/null +++ b/poppler/StructElement.cc @@ -0,0 +1,322 @@ +//======================================================================== +// +// StructElement.cc +// +// This file is licensed under the GPLv2 or later +// +// Copyright 2013 Igalia S.L. +// +//======================================================================== + +#ifdef USE_GCC_PRAGMAS +#pragma interface +#endif + +#include "StructElement.h" +#include "StructTreeRoot.h" +#include "PDFDoc.h" +#include "Dict.h" + +#include <assert.h> + +class GfxState; + + +static const char *typeToName(StructElement::Type type) +{ + if (type == StructElement::MCID) + return "MarkedContent"; + if (type == StructElement::OBJR) + return "ObjectReference"; + + return "Unknown"; +} + + +//------------------------------------------------------------------------ +// StructElement +//------------------------------------------------------------------------ + +StructElement::StructData::StructData(): + altText(0), + actualText(0), + id(0), + title(0), + expandedAbbr(0), + language(0), + revision(0) +{ +} + +StructElement::StructData::~StructData() +{ + delete altText; + delete actualText; + delete id; + delete title; + delete language; + parentRef.free(); + for (ElemPtrArray::iterator i = elements.begin(); i != elements.end(); ++i) delete *i; +} + + +StructElement::StructElement(Dict *element, + StructTreeRoot *treeRootA, + StructElement *parentA, + std::set<int> &seen): + type(Unknown), + treeRoot(treeRootA), + parent(parentA), + s(new StructData()) +{ + assert(treeRoot); + assert(element); + + parse(element); + parseChildren(element, seen); +} + +StructElement::StructElement(int mcid, StructTreeRoot *treeRootA, StructElement *parentA): + type(MCID), + treeRoot(treeRootA), + parent(parentA), + c(new ContentData(mcid)) +{ + assert(treeRoot); + assert(parent); +} + +StructElement::StructElement(const Ref& ref, StructTreeRoot *treeRootA, StructElement *parentA): + type(OBJR), + treeRoot(treeRootA), + parent(parentA), + c(new ContentData(ref)) +{ + assert(treeRoot); + assert(parent); +} + +StructElement::~StructElement() +{ + if (isContent()) + delete c; + else + delete s; + pageRef.free(); +} + +GBool StructElement::hasPageRef() const +{ + return pageRef.isRef() || (parent && parent->hasPageRef()); +} + +bool StructElement::getPageRef(Ref& ref) const +{ + if (pageRef.isRef()) { + ref = pageRef.getRef(); + return gTrue; + } + + if (parent) + return parent->getPageRef(ref); + + return gFalse; +} + +const char* StructElement::getTypeName() const +{ + return typeToName(type); +} + +static StructElement::Type roleMapResolve(Dict *roleMap, const char *name, const char *curName, Object *resolved) +{ + // TODO Replace this dummy implementation + return StructElement::Unknown; +} + +void StructElement::parse(Dict *element) +{ + Object obj; + + // Type is optional, but if present must be StructElem + if (!element->lookup("Type", &obj)->isNull() && !obj.isName("StructElem")) { + error(errSyntaxError, -1, "Type of StructElem object is wrong"); + obj.free(); + return; + } + obj.free(); + + // Parent object reference (required). + if (!element->lookupNF("P", &s->parentRef)->isRef()) { + error(errSyntaxError, -1, "P object is wrong type ({0:s})", obj.getTypeName()); + return; + } + + // Check whether the S-type is valid for the top level + // element and create a node of the appropriate type. + if (!element->lookup("S", &obj)->isName()) { + error(errSyntaxError, -1, "S object is wrong type ({0:s})", obj.getTypeName()); + obj.free(); + return; + } + + // Type name may not be standard, resolve through RoleMap first. + if (treeRoot->getRoleMap()) { + Object resolvedName; + type = roleMapResolve(treeRoot->getRoleMap(), obj.getName(), NULL, &resolvedName); + } + + obj.free(); + + // Object ID (optional), to be looked at the IDTree in the tree root. + if (element->lookup("ID", &obj)->isString()) { + s->id = obj.takeString(); + } + obj.free(); + + // Page reference (optional) in which at least one of the child items + // is to be rendered in. Note: each element stores only the /Pg value + // contained by it, and StructElement::getPageRef() may look in parent + // elements to find the page where an element belongs. + element->lookupNF("Pg", &pageRef); + + // Revision number (optional). + if (element->lookup("R", &obj)->isInt()) { + s->revision = obj.getInt(); + } + obj.free(); + + // Element title (optional). + if (element->lookup("T", &obj)->isString()) { + s->title = obj.takeString(); + } + obj.free(); + + // Language (optional). + if (element->lookup("Lang", &obj)->isString()) { + s->language = obj.takeString(); + } + obj.free(); + + // Alternative text (optional). + if (element->lookup("Alt", &obj)->isString()) { + s->altText = obj.takeString(); + } + obj.free(); + + // Expanded form of an abbreviation (optional). + if (element->lookup("E", &obj)->isString()) { + s->expandedAbbr = obj.takeString(); + } + obj.free(); + + // Actual text (optional). + if (element->lookup("ActualText", &obj)->isString()) { + s->actualText = obj.takeString(); + } + obj.free(); + + // TODO: Attributes directly attached to the element (optional). + // TODO: Attributes referenced indirectly through the ClassMap (optional). +} + +StructElement *StructElement::parseChild(Object *ref, + Object *childObj, + std::set<int> &seen) +{ + assert(childObj); + assert(ref); + + StructElement *child = NULL; + + if (childObj->isInt()) { + child = new StructElement(childObj->getInt(), treeRoot, this); + } else if (childObj->isDict("MCR")) { + /* + * TODO: The optional Stm/StwOwn attributes are not handled, so all the + * page will be always scanned when calling StructElement::getText(). + */ + Object mcidObj; + Object pageRefObj; + + if (!childObj->dictLookup("MCID", &mcidObj)->isInt()) { + error(errSyntaxError, -1, "MCID object is wrong type ({0:s})", mcidObj.getTypeName()); + mcidObj.free(); + return NULL; + } + + child = new StructElement(mcidObj.getInt(), treeRoot, this); + mcidObj.free(); + + if (childObj->dictLookupNF("Pg", &pageRefObj)->isRef()) { + child->pageRef = pageRefObj; + } else { + pageRefObj.free(); + } + } else if (childObj->isDict("OBJR")) { + Object refObj; + + if (childObj->dictLookupNF("Obj", &refObj)->isRef()) { + Object pageRefObj; + + child = new StructElement(refObj.getRef(), treeRoot, this); + + if (childObj->dictLookupNF("Pg", &pageRefObj)->isRef()) { + child->pageRef = pageRefObj; + } else { + pageRefObj.free(); + } + } else { + error(errSyntaxError, -1, "Obj object is wrong type ({0:s})", refObj.getTypeName()); + } + refObj.free(); + } else if (childObj->isDict()) { + if (!ref->isRef()) { + error(errSyntaxError, -1, + "Structure element dictionary is not an indirect reference ({0:s})", + ref->getTypeName()); + } else if (seen.find(ref->getRefNum()) == seen.end()) { + seen.insert(ref->getRefNum()); + child = new StructElement(childObj->getDict(), treeRoot, this, seen); + } else { + error(errSyntaxWarning, -1, + "Loop detected in structure tree, skipping subtree at object {0:i}:{0:i}", + ref->getRefNum(), ref->getRefGen()); + } + } else { + error(errSyntaxWarning, -1, "K has a child of wrong type ({0:s})", childObj->getTypeName()); + } + + if (child) { + if (child->isOk()) { + appendElement(child); + if (ref->isRef()) + treeRoot->parentTreeAdd(ref->getRef(), child); + } else { + delete child; + child = NULL; + } + } + + return child; +} + +void StructElement::parseChildren(Dict *element, std::set<int> &seen) +{ + Object kids; + + if (element->lookup("K", &kids)->isArray()) { + for (int i = 0; i < kids.arrayGetLength(); i++) { + Object obj, ref; + parseChild(kids.arrayGetNF(i, &ref), kids.arrayGet(i, &obj), seen); + obj.free(); + ref.free(); + } + } else if (kids.isDict() || kids.isInt()) { + Object ref; + parseChild(element->lookupNF("K", &ref), &kids, seen); + ref.free(); + } + + kids.free(); +} diff --git a/poppler/StructElement.h b/poppler/StructElement.h new file mode 100644 index 00000000..d1997c9c --- /dev/null +++ b/poppler/StructElement.h @@ -0,0 +1,167 @@ +//======================================================================== +// +// StructElement.h +// +// This file is licensed under the GPLv2 or later +// +// Copyright 2013 Igalia S.L. +// +//======================================================================== + +#ifndef STRUCTELEMENT_H +#define STRUCTELEMENT_H + +#ifdef USE_GCC_PRAGMAS +#pragma interface +#endif + +#include "goo/gtypes.h" +#include "goo/GooString.h" +#include "Object.h" +#include <vector> +#include <set> + +class GooString; +class Dict; +class StructTreeRoot; + + +class StructElement { +public: + enum Type { + Unknown = 0, + MCID, // MCID reference, used internally + OBJR, // Object reference, used internally + + Document, Part, Art, Sect, Div, // Structural elements + + Span, Quote, Note, Reference, BibEntry, // Inline elements + Code, Link, Annot, + BlockQuote, Caption, NonStruct, + TOC, TOCI, Index, Private, + + P, H, H1, H2, H3, H4, H5, H6, // Paragraph-like + + L, LI, Lbl, // List elements + + Table, TR, TH, TD, THead, TFoot, TBody, // Table elements + + Ruby, RB, RT, RP, // Ruby text elements + Warichu, WT, WP, + + Figure, Formula, Form, // Illustration-like elements + }; + + static const Ref InvalidRef; + + const char *getTypeName() const; + Type getType() const { return type; } + GBool isOk() const { return type != Unknown; } + + inline GBool isContent() const { return (type == MCID) || isObjectRef(); } + inline GBool isObjectRef() const { return (type == OBJR && c->ref.num != -1 && c->ref.gen != -1); } + + int getMCID() const { return c->mcid; } + Ref getObjectRef() const { return c->ref; } + Ref getParentRef() { return isContent() ? parent->getParentRef() : s->parentRef.getRef(); } + GBool hasPageRef() const; + GBool getPageRef(Ref& ref) const; + StructTreeRoot *getStructTreeRoot() { return treeRoot; } + + // Optional element identifier. + const GooString *getID() const { return isContent() ? NULL : s->id; } + GooString *getID() { return isContent() ? NULL : s->id; } + + // Optional ISO language name, e.g. en_US + GooString *getLanguage() { + if (!isContent() && s->language) return s->language; + return parent ? parent->getLanguage() : NULL; + } + const GooString *getLanguage() const { + if (!isContent() && s->language) return s->language; + return parent ? parent->getLanguage() : NULL; + } + + // Optional revision number, defaults to zero. + Guint getRevision() const { return isContent() ? 0 : s->revision; } + void setRevision(Guint revision) { if (isContent()) s->revision = revision; } + + // Optional element title, in human-readable form. + const GooString *getTitle() const { return isContent() ? NULL : s->title; } + GooString *getTitle() { return isContent() ? NULL : s->title; } + + // Optional element expanded abbreviation text. + const GooString *getExpandedAbbr() const { return isContent() ? NULL : s->expandedAbbr; } + GooString *getExpandedAbbr() { return isContent() ? NULL : s->expandedAbbr; } + + unsigned getNumElements() const { return isContent() ? 0 : s->elements.size(); } + const StructElement *getElement(int i) const { return isContent() ? NULL : s->elements.at(i); } + StructElement *getElement(int i) { return isContent() ? NULL : s->elements.at(i); } + + void appendElement(StructElement *element) { + if (!isContent() && element && element->isOk()) { + s->elements.push_back(element); + } + } + + const GooString *getAltText() const { return isContent() ? NULL : s->altText; } + GooString *getAltText() { return isContent() ? NULL : s->altText; } + + const GooString *getActualText() const { return isContent() ? NULL : s->actualText; } + GooString *getActualText() { return isContent() ? NULL : s->actualText; } + + ~StructElement(); + +private: + typedef std::vector<StructElement*> ElemPtrArray; + + struct StructData { + Object parentRef; + GooString *altText; + GooString *actualText; + GooString *id; + GooString *title; + GooString *expandedAbbr; + GooString *language; + Guint revision; + ElemPtrArray elements; + + StructData(); + ~StructData(); + }; + + // Data in content elements (MCID, MCR) + struct ContentData { + union { + int mcid; + Ref ref; + }; + + ContentData(int mcidA): mcid(mcidA) {} + ContentData(const Ref& r) { ref.num = r.num; ref.gen = r.gen; } + }; + + // Common data + Type type; + StructTreeRoot *treeRoot; + StructElement *parent; + mutable Object pageRef; + + union { + StructData *s; + ContentData *c; + }; + + StructElement(Dict *elementDict, StructTreeRoot *treeRootA, StructElement *parentA, std::set<int> &seen); + StructElement(int mcid, StructTreeRoot *treeRootA, StructElement *parentA); + StructElement(const Ref &ref, StructTreeRoot *treeRootA, StructElement *parentA); + + void parse(Dict* elementDict); + StructElement* parseChild(Object *ref, Object* childObj, std::set<int> &seen); + void parseChildren(Dict* element, std::set<int> &seen); + + friend class StructTreeRoot; +}; + +#endif + diff --git a/poppler/StructTreeRoot.cc b/poppler/StructTreeRoot.cc new file mode 100644 index 00000000..59f017ee --- /dev/null +++ b/poppler/StructTreeRoot.cc @@ -0,0 +1,174 @@ +//======================================================================== +// +// StructTreeRoot.cc +// +// This file is licensed under the GPLv2 or later +// +// Copyright 2013 Igalia S.L. +// +//======================================================================== + +#ifdef USE_GCC_PRAGMAS +#pragma interface +#endif + +#include "goo/GooString.h" +#include "StructTreeRoot.h" +#include "StructElement.h" +#include "PDFDoc.h" +#include "Object.h" +#include "Dict.h" +#include <set> +#include <assert.h> + + +StructTreeRoot::StructTreeRoot(PDFDoc *docA, Dict *structTreeRootDict): + doc(docA) +{ + assert(doc); + assert(structTreeRootDict); + parse(structTreeRootDict); +} + +StructTreeRoot::~StructTreeRoot() +{ + for (ElemPtrArray::iterator i = elements.begin(); i != elements.end(); ++i) + delete *i; + classMap.free(); + roleMap.free(); +} + +void StructTreeRoot::parse(Dict *root) +{ + // The RoleMap/ClassMap dictionaries are needed by all the parsing + // functions, which will resolve the custom names to canonical + // standard names. + root->lookup("RoleMap", &roleMap); + root->lookup("ClassMap", &classMap); + + // ParentTree (optional). If present, it must be a number tree, + // otherwise it is not possible to map stream objects to their + // corresponsing structure element. Here only the references are + // loaded into the array, the pointers to the StructElements will + // be filled-in later when parsing them. + Object obj; + if (root->lookup("ParentTree", &obj)->isDict()) { + Object nums; + if (obj.dictLookup("Nums", &nums)->isArray()) { + if (nums.arrayGetLength() % 2 == 0) { + parentTree.resize(nums.arrayGetLength() / 2); + // Index numbers in even positions, references in odd ones + for (int i = 0; i < nums.arrayGetLength(); i += 2) { + Object index, value; + + if (!nums.arrayGet(i, &index)->isInt()) { + error(errSyntaxError, -1, "Nums item at position {0:d} is wrong type ({1:s})", i, index.getTypeName()); + index.free(); + continue; + } + if (index.getInt() < 0) { + error(errSyntaxError, -1, "Nums item at position {0:d} is invalid value ({1:d})", i, index.getInt()); + index.free(); + continue; + } + + const unsigned idx = index.getInt(); + if (nums.arrayGetNF(i + 1, &value)->isRef()) { + parentTree[idx].resize(1); + parentTree[idx][0].ref = value.getRef(); + } else if (nums.arrayGet(i + 1, &value)->isArray()) { + parentTree[idx].resize(value.arrayGetLength()); + for (int j = 0; j < value.arrayGetLength(); j++) { + Object itemvalue; + if (value.arrayGetNF(j, &itemvalue)->isRef()) + parentTree[idx][j].ref = itemvalue.getRef(); + else + error(errSyntaxError, -1, "Nums array item at position {0:d}/{1:d} is invalid type ({2:s})", i, j, itemvalue.getTypeName()); + itemvalue.free(); + } + } else { + error(errSyntaxError, -1, "Nums item at position {0:d} is wrong type ({1:s})", i + 1, value.getTypeName()); + } + + value.free(); + index.free(); + } + } else { + error(errSyntaxError, -1, "Nums array length is not a even ({0:i})", nums.arrayGetLength()); + } + } else { + error(errSyntaxError, -1, "Nums object is wrong type ({0:s})", nums.getTypeName()); + } + nums.free(); + } + obj.free(); + + std::set<int> seenElements; + + // Parse the children StructElements + const GBool marked = doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked; + Object kids; + if (root->lookup("K", &kids)->isArray()) { + if (marked && kids.arrayGetLength() > 1) { + error(errSyntaxWarning, -1, "K in StructTreeRoot has more than one children in a tagged PDF"); + } + for (int i = 0; i < kids.arrayGetLength(); i++) { + Object obj, ref; + kids.arrayGetNF(i, &ref); + if (ref.isRef()) { + seenElements.insert(ref.getRefNum()); + } + if (kids.arrayGet(i, &obj)->isDict()) { + StructElement *child = new StructElement(obj.getDict(), this, NULL, seenElements); + if (child->isOk()) { + if (marked && !(child->getType() == StructElement::Document || + child->getType() == StructElement::Part || + child->getType() == StructElement::Art || + child->getType() == StructElement::Div)) { + error(errSyntaxWarning, -1, "StructTreeRoot element of tagged PDF is wrong type ({0:s})", child->getTypeName()); + } + appendElement(child); + if (ref.isRef()) { + parentTreeAdd(ref.getRef(), child); + } + } else { + error(errSyntaxWarning, -1, "StructTreeRoot element could not be parsed"); + delete child; + } + } else { + error(errSyntaxWarning, -1, "K has a child of wrong type ({0:s})", obj.getTypeName()); + } + obj.free(); + ref.free(); + } + } else if (kids.isDict()) { + if (marked) { + error(errSyntaxWarning, -1, "K has a child of wrong type for a tagged PDF ({0:s})", kids.getTypeName()); + } + StructElement *child = new StructElement(kids.getDict(), this, NULL, seenElements); + if (child->isOk()) { + appendElement(child); + Object ref; + if (root->lookupNF("K", &ref)->isRef()) + parentTreeAdd(ref.getRef(), child); + ref.free(); + } else { + error(errSyntaxWarning, -1, "StructTreeRoot element could not be parsed"); + delete child; + } + } else if (!kids.isNull()) { + error(errSyntaxWarning, -1, "K in StructTreeRoot is wrong type ({0:s})", kids.getTypeName()); + } + + kids.free(); +} + +void StructTreeRoot::parentTreeAdd(const Ref &objectRef, StructElement *element) +{ + for (std::vector< std::vector<Parent> >::iterator i = parentTree.begin(); i != parentTree.end(); ++i) { + for (std::vector<Parent>::iterator j = i->begin(); j != i->end(); ++j) { + if (j->ref.num == objectRef.num && j->ref.gen == objectRef.gen) + j->element = element; + } + } +} diff --git a/poppler/StructTreeRoot.h b/poppler/StructTreeRoot.h new file mode 100644 index 00000000..9928e2f6 --- /dev/null +++ b/poppler/StructTreeRoot.h @@ -0,0 +1,83 @@ +//======================================================================== +// +// StructTreeRoot.h +// +// This file is licensed under the GPLv2 or later +// +// Copyright 2013 Igalia S.L. +// +//======================================================================== + +#ifndef STRUCTTREEROOT_H +#define STRUCTTREEROOT_H + +#ifdef USE_GCC_PRAGMAS +#pragma interface +#endif + +#include "goo/gtypes.h" +#include "Object.h" +#include "StructElement.h" +#include <vector> + +class Dict; +class PDFDoc; + + +class StructTreeRoot +{ +public: + StructTreeRoot(PDFDoc *docA, Dict *rootDict); + ~StructTreeRoot(); + + PDFDoc *getDoc() { return doc; } + Dict *getRoleMap() { return roleMap.isDict() ? roleMap.getDict() : NULL; } + Dict *getClassMap() { return classMap.isDict() ? classMap.getDict() : NULL; } + unsigned getNumElements() const { return elements.size(); } + const StructElement *getElement(int i) const { return elements.at(i); } + StructElement *getElement(int i) { return elements.at(i); } + + void appendElement(StructElement *element) { + if (element && element->isOk()) { + elements.push_back(element); + } + } + + const StructElement *findParentElement(unsigned index) const { + if (index < parentTree.size() && parentTree[index].size() == 1) { + return parentTree[index][0].element; + } + return NULL; + } + +private: + typedef std::vector<StructElement*> ElemPtrArray; + + // Structure for items in /ParentTree, it keeps a mapping of + // object references and pointers to StructElement objects. + struct Parent { + Ref ref; + StructElement *element; + + Parent(): element(NULL) { ref.num = ref.gen = -1; } + Parent(const Parent &p): element(p.element) { + ref.num = p.ref.num; + ref.gen = p.ref.gen; + } + ~Parent() {} + }; + + PDFDoc *doc; + Object roleMap; + Object classMap; + ElemPtrArray elements; + std::vector< std::vector<Parent> > parentTree; + + void parse(Dict *rootDict); + void parentTreeAdd(const Ref &objectRef, StructElement *element); + + friend class StructElement; +}; + +#endif + |