summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--poppler/Catalog.cc36
-rw-r--r--poppler/Catalog.h5
-rw-r--r--poppler/Makefile.am4
-rw-r--r--poppler/PDFDoc.h3
-rw-r--r--poppler/StructElement.cc322
-rw-r--r--poppler/StructElement.h167
-rw-r--r--poppler/StructTreeRoot.cc174
-rw-r--r--poppler/StructTreeRoot.h83
8 files changed, 776 insertions, 18 deletions
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 2e2511e2..a06ae5f5 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -58,6 +58,7 @@
#include "OptionalContent.h"
#include "ViewerPreferences.h"
#include "FileSpec.h"
+#include "StructTreeRoot.h"
#if MULTITHREADED
# define catalogLocker() MutexLocker locker(&mutex)
@@ -93,6 +94,7 @@ Catalog::Catalog(PDFDoc *docA) {
embeddedFileNameTree = NULL;
jsNameTree = NULL;
viewerPrefs = NULL;
+ structTreeRoot = NULL;
pagesList = NULL;
pagesRefList = NULL;
@@ -181,8 +183,8 @@ Catalog::~Catalog() {
delete form;
delete optContent;
delete viewerPrefs;
+ delete structTreeRoot;
metadata.free();
- structTreeRoot.free();
outline.free();
acroForm.free();
viewerPreferences.free();
@@ -844,24 +846,28 @@ PageLabelInfo *Catalog::getPageLabelInfo()
return pageLabelInfo;
}
-Object *Catalog::getStructTreeRoot()
+StructTreeRoot *Catalog::getStructTreeRoot()
{
catalogLocker();
- if (structTreeRoot.isNone())
- {
- Object catDict;
+ if (!structTreeRoot) {
+ Object catalog;
+ Object root;
+
+ xref->getCatalog(&catalog);
+ if (!catalog.isDict()) {
+ error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catalog.getTypeName());
+ catalog.free();
+ return NULL;
+ }
- xref->getCatalog(&catDict);
- if (catDict.isDict()) {
- catDict.dictLookup("StructTreeRoot", &structTreeRoot);
- } else {
- error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
- structTreeRoot.initNull();
- }
- catDict.free();
- }
+ if (catalog.dictLookup("StructTreeRoot", &root)->isDict("StructTreeRoot")) {
+ structTreeRoot = new StructTreeRoot(doc, root.getDict());
+ }
- return &structTreeRoot;
+ root.free();
+ catalog.free();
+ }
+ return structTreeRoot;
}
Guint Catalog::getMarkInfo()
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 1a445f56..40c783c7 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -56,6 +56,7 @@ class Form;
class OCGs;
class ViewerPreferences;
class FileSpec;
+class StructTreeRoot;
//------------------------------------------------------------------------
// NameTree
@@ -126,7 +127,7 @@ public:
GooString *readMetadata();
// Return the structure tree root object.
- Object *getStructTreeRoot();
+ StructTreeRoot *getStructTreeRoot();
// Return values from the MarkInfo dictionary as flags in a bitfield.
enum MarkInfoFlags {
@@ -241,7 +242,7 @@ private:
NameTree *jsNameTree; // Java Script name-tree
GooString *baseURI; // base URI for URI-type links
Object metadata; // metadata stream
- Object structTreeRoot; // structure tree root dictionary
+ StructTreeRoot *structTreeRoot; // structure tree root
Guint markInfo; // Flags from MarkInfo dictionary
Object outline; // outline dictionary
Object acroForm; // AcroForm dictionary
diff --git a/poppler/Makefile.am b/poppler/Makefile.am
index aa7c9242..9f90c9d2 100644
--- a/poppler/Makefile.am
+++ b/poppler/Makefile.am
@@ -216,6 +216,8 @@ poppler_include_HEADERS = \
StdinPDFDocBuilder.h \
Stream-CCITT.h \
Stream.h \
+ StructElement.h \
+ StructTreeRoot.h \
UnicodeMap.h \
UnicodeMapTables.h \
UnicodeTypeTable.h \
@@ -294,6 +296,8 @@ libpoppler_la_SOURCES = \
StdinCachedFile.cc \
StdinPDFDocBuilder.cc \
Stream.cc \
+ StructTreeRoot.cc \
+ StructElement.cc \
strtok_r.cpp \
UnicodeMap.cc \
UnicodeTypeTable.cc \
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index da9bf5b3..48189bce 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -60,6 +60,7 @@ class Outline;
class Linearization;
class SecurityHandler;
class Hints;
+class StructTreeRoot;
enum PDFWriteMode {
writeStandard,
@@ -139,7 +140,7 @@ public:
GooString *readMetadata() { return catalog->readMetadata(); }
// Return the structure tree root object.
- Object *getStructTreeRoot() { return catalog->getStructTreeRoot(); }
+ StructTreeRoot *getStructTreeRoot() { return catalog->getStructTreeRoot(); }
// Get page.
Page *getPage(int page);
diff --git a/poppler/StructElement.cc b/poppler/StructElement.cc
new file mode 100644
index 00000000..e403457b
--- /dev/null
+++ b/poppler/StructElement.cc
@@ -0,0 +1,322 @@
+//========================================================================
+//
+// StructElement.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2013 Igalia S.L.
+//
+//========================================================================
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include "StructElement.h"
+#include "StructTreeRoot.h"
+#include "PDFDoc.h"
+#include "Dict.h"
+
+#include <assert.h>
+
+class GfxState;
+
+
+static const char *typeToName(StructElement::Type type)
+{
+ if (type == StructElement::MCID)
+ return "MarkedContent";
+ if (type == StructElement::OBJR)
+ return "ObjectReference";
+
+ return "Unknown";
+}
+
+
+//------------------------------------------------------------------------
+// StructElement
+//------------------------------------------------------------------------
+
+StructElement::StructData::StructData():
+ altText(0),
+ actualText(0),
+ id(0),
+ title(0),
+ expandedAbbr(0),
+ language(0),
+ revision(0)
+{
+}
+
+StructElement::StructData::~StructData()
+{
+ delete altText;
+ delete actualText;
+ delete id;
+ delete title;
+ delete language;
+ parentRef.free();
+ for (ElemPtrArray::iterator i = elements.begin(); i != elements.end(); ++i) delete *i;
+}
+
+
+StructElement::StructElement(Dict *element,
+ StructTreeRoot *treeRootA,
+ StructElement *parentA,
+ std::set<int> &seen):
+ type(Unknown),
+ treeRoot(treeRootA),
+ parent(parentA),
+ s(new StructData())
+{
+ assert(treeRoot);
+ assert(element);
+
+ parse(element);
+ parseChildren(element, seen);
+}
+
+StructElement::StructElement(int mcid, StructTreeRoot *treeRootA, StructElement *parentA):
+ type(MCID),
+ treeRoot(treeRootA),
+ parent(parentA),
+ c(new ContentData(mcid))
+{
+ assert(treeRoot);
+ assert(parent);
+}
+
+StructElement::StructElement(const Ref& ref, StructTreeRoot *treeRootA, StructElement *parentA):
+ type(OBJR),
+ treeRoot(treeRootA),
+ parent(parentA),
+ c(new ContentData(ref))
+{
+ assert(treeRoot);
+ assert(parent);
+}
+
+StructElement::~StructElement()
+{
+ if (isContent())
+ delete c;
+ else
+ delete s;
+ pageRef.free();
+}
+
+GBool StructElement::hasPageRef() const
+{
+ return pageRef.isRef() || (parent && parent->hasPageRef());
+}
+
+bool StructElement::getPageRef(Ref& ref) const
+{
+ if (pageRef.isRef()) {
+ ref = pageRef.getRef();
+ return gTrue;
+ }
+
+ if (parent)
+ return parent->getPageRef(ref);
+
+ return gFalse;
+}
+
+const char* StructElement::getTypeName() const
+{
+ return typeToName(type);
+}
+
+static StructElement::Type roleMapResolve(Dict *roleMap, const char *name, const char *curName, Object *resolved)
+{
+ // TODO Replace this dummy implementation
+ return StructElement::Unknown;
+}
+
+void StructElement::parse(Dict *element)
+{
+ Object obj;
+
+ // Type is optional, but if present must be StructElem
+ if (!element->lookup("Type", &obj)->isNull() && !obj.isName("StructElem")) {
+ error(errSyntaxError, -1, "Type of StructElem object is wrong");
+ obj.free();
+ return;
+ }
+ obj.free();
+
+ // Parent object reference (required).
+ if (!element->lookupNF("P", &s->parentRef)->isRef()) {
+ error(errSyntaxError, -1, "P object is wrong type ({0:s})", obj.getTypeName());
+ return;
+ }
+
+ // Check whether the S-type is valid for the top level
+ // element and create a node of the appropriate type.
+ if (!element->lookup("S", &obj)->isName()) {
+ error(errSyntaxError, -1, "S object is wrong type ({0:s})", obj.getTypeName());
+ obj.free();
+ return;
+ }
+
+ // Type name may not be standard, resolve through RoleMap first.
+ if (treeRoot->getRoleMap()) {
+ Object resolvedName;
+ type = roleMapResolve(treeRoot->getRoleMap(), obj.getName(), NULL, &resolvedName);
+ }
+
+ obj.free();
+
+ // Object ID (optional), to be looked at the IDTree in the tree root.
+ if (element->lookup("ID", &obj)->isString()) {
+ s->id = obj.takeString();
+ }
+ obj.free();
+
+ // Page reference (optional) in which at least one of the child items
+ // is to be rendered in. Note: each element stores only the /Pg value
+ // contained by it, and StructElement::getPageRef() may look in parent
+ // elements to find the page where an element belongs.
+ element->lookupNF("Pg", &pageRef);
+
+ // Revision number (optional).
+ if (element->lookup("R", &obj)->isInt()) {
+ s->revision = obj.getInt();
+ }
+ obj.free();
+
+ // Element title (optional).
+ if (element->lookup("T", &obj)->isString()) {
+ s->title = obj.takeString();
+ }
+ obj.free();
+
+ // Language (optional).
+ if (element->lookup("Lang", &obj)->isString()) {
+ s->language = obj.takeString();
+ }
+ obj.free();
+
+ // Alternative text (optional).
+ if (element->lookup("Alt", &obj)->isString()) {
+ s->altText = obj.takeString();
+ }
+ obj.free();
+
+ // Expanded form of an abbreviation (optional).
+ if (element->lookup("E", &obj)->isString()) {
+ s->expandedAbbr = obj.takeString();
+ }
+ obj.free();
+
+ // Actual text (optional).
+ if (element->lookup("ActualText", &obj)->isString()) {
+ s->actualText = obj.takeString();
+ }
+ obj.free();
+
+ // TODO: Attributes directly attached to the element (optional).
+ // TODO: Attributes referenced indirectly through the ClassMap (optional).
+}
+
+StructElement *StructElement::parseChild(Object *ref,
+ Object *childObj,
+ std::set<int> &seen)
+{
+ assert(childObj);
+ assert(ref);
+
+ StructElement *child = NULL;
+
+ if (childObj->isInt()) {
+ child = new StructElement(childObj->getInt(), treeRoot, this);
+ } else if (childObj->isDict("MCR")) {
+ /*
+ * TODO: The optional Stm/StwOwn attributes are not handled, so all the
+ * page will be always scanned when calling StructElement::getText().
+ */
+ Object mcidObj;
+ Object pageRefObj;
+
+ if (!childObj->dictLookup("MCID", &mcidObj)->isInt()) {
+ error(errSyntaxError, -1, "MCID object is wrong type ({0:s})", mcidObj.getTypeName());
+ mcidObj.free();
+ return NULL;
+ }
+
+ child = new StructElement(mcidObj.getInt(), treeRoot, this);
+ mcidObj.free();
+
+ if (childObj->dictLookupNF("Pg", &pageRefObj)->isRef()) {
+ child->pageRef = pageRefObj;
+ } else {
+ pageRefObj.free();
+ }
+ } else if (childObj->isDict("OBJR")) {
+ Object refObj;
+
+ if (childObj->dictLookupNF("Obj", &refObj)->isRef()) {
+ Object pageRefObj;
+
+ child = new StructElement(refObj.getRef(), treeRoot, this);
+
+ if (childObj->dictLookupNF("Pg", &pageRefObj)->isRef()) {
+ child->pageRef = pageRefObj;
+ } else {
+ pageRefObj.free();
+ }
+ } else {
+ error(errSyntaxError, -1, "Obj object is wrong type ({0:s})", refObj.getTypeName());
+ }
+ refObj.free();
+ } else if (childObj->isDict()) {
+ if (!ref->isRef()) {
+ error(errSyntaxError, -1,
+ "Structure element dictionary is not an indirect reference ({0:s})",
+ ref->getTypeName());
+ } else if (seen.find(ref->getRefNum()) == seen.end()) {
+ seen.insert(ref->getRefNum());
+ child = new StructElement(childObj->getDict(), treeRoot, this, seen);
+ } else {
+ error(errSyntaxWarning, -1,
+ "Loop detected in structure tree, skipping subtree at object {0:i}:{0:i}",
+ ref->getRefNum(), ref->getRefGen());
+ }
+ } else {
+ error(errSyntaxWarning, -1, "K has a child of wrong type ({0:s})", childObj->getTypeName());
+ }
+
+ if (child) {
+ if (child->isOk()) {
+ appendElement(child);
+ if (ref->isRef())
+ treeRoot->parentTreeAdd(ref->getRef(), child);
+ } else {
+ delete child;
+ child = NULL;
+ }
+ }
+
+ return child;
+}
+
+void StructElement::parseChildren(Dict *element, std::set<int> &seen)
+{
+ Object kids;
+
+ if (element->lookup("K", &kids)->isArray()) {
+ for (int i = 0; i < kids.arrayGetLength(); i++) {
+ Object obj, ref;
+ parseChild(kids.arrayGetNF(i, &ref), kids.arrayGet(i, &obj), seen);
+ obj.free();
+ ref.free();
+ }
+ } else if (kids.isDict() || kids.isInt()) {
+ Object ref;
+ parseChild(element->lookupNF("K", &ref), &kids, seen);
+ ref.free();
+ }
+
+ kids.free();
+}
diff --git a/poppler/StructElement.h b/poppler/StructElement.h
new file mode 100644
index 00000000..d1997c9c
--- /dev/null
+++ b/poppler/StructElement.h
@@ -0,0 +1,167 @@
+//========================================================================
+//
+// StructElement.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2013 Igalia S.L.
+//
+//========================================================================
+
+#ifndef STRUCTELEMENT_H
+#define STRUCTELEMENT_H
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include "goo/gtypes.h"
+#include "goo/GooString.h"
+#include "Object.h"
+#include <vector>
+#include <set>
+
+class GooString;
+class Dict;
+class StructTreeRoot;
+
+
+class StructElement {
+public:
+ enum Type {
+ Unknown = 0,
+ MCID, // MCID reference, used internally
+ OBJR, // Object reference, used internally
+
+ Document, Part, Art, Sect, Div, // Structural elements
+
+ Span, Quote, Note, Reference, BibEntry, // Inline elements
+ Code, Link, Annot,
+ BlockQuote, Caption, NonStruct,
+ TOC, TOCI, Index, Private,
+
+ P, H, H1, H2, H3, H4, H5, H6, // Paragraph-like
+
+ L, LI, Lbl, // List elements
+
+ Table, TR, TH, TD, THead, TFoot, TBody, // Table elements
+
+ Ruby, RB, RT, RP, // Ruby text elements
+ Warichu, WT, WP,
+
+ Figure, Formula, Form, // Illustration-like elements
+ };
+
+ static const Ref InvalidRef;
+
+ const char *getTypeName() const;
+ Type getType() const { return type; }
+ GBool isOk() const { return type != Unknown; }
+
+ inline GBool isContent() const { return (type == MCID) || isObjectRef(); }
+ inline GBool isObjectRef() const { return (type == OBJR && c->ref.num != -1 && c->ref.gen != -1); }
+
+ int getMCID() const { return c->mcid; }
+ Ref getObjectRef() const { return c->ref; }
+ Ref getParentRef() { return isContent() ? parent->getParentRef() : s->parentRef.getRef(); }
+ GBool hasPageRef() const;
+ GBool getPageRef(Ref& ref) const;
+ StructTreeRoot *getStructTreeRoot() { return treeRoot; }
+
+ // Optional element identifier.
+ const GooString *getID() const { return isContent() ? NULL : s->id; }
+ GooString *getID() { return isContent() ? NULL : s->id; }
+
+ // Optional ISO language name, e.g. en_US
+ GooString *getLanguage() {
+ if (!isContent() && s->language) return s->language;
+ return parent ? parent->getLanguage() : NULL;
+ }
+ const GooString *getLanguage() const {
+ if (!isContent() && s->language) return s->language;
+ return parent ? parent->getLanguage() : NULL;
+ }
+
+ // Optional revision number, defaults to zero.
+ Guint getRevision() const { return isContent() ? 0 : s->revision; }
+ void setRevision(Guint revision) { if (isContent()) s->revision = revision; }
+
+ // Optional element title, in human-readable form.
+ const GooString *getTitle() const { return isContent() ? NULL : s->title; }
+ GooString *getTitle() { return isContent() ? NULL : s->title; }
+
+ // Optional element expanded abbreviation text.
+ const GooString *getExpandedAbbr() const { return isContent() ? NULL : s->expandedAbbr; }
+ GooString *getExpandedAbbr() { return isContent() ? NULL : s->expandedAbbr; }
+
+ unsigned getNumElements() const { return isContent() ? 0 : s->elements.size(); }
+ const StructElement *getElement(int i) const { return isContent() ? NULL : s->elements.at(i); }
+ StructElement *getElement(int i) { return isContent() ? NULL : s->elements.at(i); }
+
+ void appendElement(StructElement *element) {
+ if (!isContent() && element && element->isOk()) {
+ s->elements.push_back(element);
+ }
+ }
+
+ const GooString *getAltText() const { return isContent() ? NULL : s->altText; }
+ GooString *getAltText() { return isContent() ? NULL : s->altText; }
+
+ const GooString *getActualText() const { return isContent() ? NULL : s->actualText; }
+ GooString *getActualText() { return isContent() ? NULL : s->actualText; }
+
+ ~StructElement();
+
+private:
+ typedef std::vector<StructElement*> ElemPtrArray;
+
+ struct StructData {
+ Object parentRef;
+ GooString *altText;
+ GooString *actualText;
+ GooString *id;
+ GooString *title;
+ GooString *expandedAbbr;
+ GooString *language;
+ Guint revision;
+ ElemPtrArray elements;
+
+ StructData();
+ ~StructData();
+ };
+
+ // Data in content elements (MCID, MCR)
+ struct ContentData {
+ union {
+ int mcid;
+ Ref ref;
+ };
+
+ ContentData(int mcidA): mcid(mcidA) {}
+ ContentData(const Ref& r) { ref.num = r.num; ref.gen = r.gen; }
+ };
+
+ // Common data
+ Type type;
+ StructTreeRoot *treeRoot;
+ StructElement *parent;
+ mutable Object pageRef;
+
+ union {
+ StructData *s;
+ ContentData *c;
+ };
+
+ StructElement(Dict *elementDict, StructTreeRoot *treeRootA, StructElement *parentA, std::set<int> &seen);
+ StructElement(int mcid, StructTreeRoot *treeRootA, StructElement *parentA);
+ StructElement(const Ref &ref, StructTreeRoot *treeRootA, StructElement *parentA);
+
+ void parse(Dict* elementDict);
+ StructElement* parseChild(Object *ref, Object* childObj, std::set<int> &seen);
+ void parseChildren(Dict* element, std::set<int> &seen);
+
+ friend class StructTreeRoot;
+};
+
+#endif
+
diff --git a/poppler/StructTreeRoot.cc b/poppler/StructTreeRoot.cc
new file mode 100644
index 00000000..59f017ee
--- /dev/null
+++ b/poppler/StructTreeRoot.cc
@@ -0,0 +1,174 @@
+//========================================================================
+//
+// StructTreeRoot.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2013 Igalia S.L.
+//
+//========================================================================
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include "goo/GooString.h"
+#include "StructTreeRoot.h"
+#include "StructElement.h"
+#include "PDFDoc.h"
+#include "Object.h"
+#include "Dict.h"
+#include <set>
+#include <assert.h>
+
+
+StructTreeRoot::StructTreeRoot(PDFDoc *docA, Dict *structTreeRootDict):
+ doc(docA)
+{
+ assert(doc);
+ assert(structTreeRootDict);
+ parse(structTreeRootDict);
+}
+
+StructTreeRoot::~StructTreeRoot()
+{
+ for (ElemPtrArray::iterator i = elements.begin(); i != elements.end(); ++i)
+ delete *i;
+ classMap.free();
+ roleMap.free();
+}
+
+void StructTreeRoot::parse(Dict *root)
+{
+ // The RoleMap/ClassMap dictionaries are needed by all the parsing
+ // functions, which will resolve the custom names to canonical
+ // standard names.
+ root->lookup("RoleMap", &roleMap);
+ root->lookup("ClassMap", &classMap);
+
+ // ParentTree (optional). If present, it must be a number tree,
+ // otherwise it is not possible to map stream objects to their
+ // corresponsing structure element. Here only the references are
+ // loaded into the array, the pointers to the StructElements will
+ // be filled-in later when parsing them.
+ Object obj;
+ if (root->lookup("ParentTree", &obj)->isDict()) {
+ Object nums;
+ if (obj.dictLookup("Nums", &nums)->isArray()) {
+ if (nums.arrayGetLength() % 2 == 0) {
+ parentTree.resize(nums.arrayGetLength() / 2);
+ // Index numbers in even positions, references in odd ones
+ for (int i = 0; i < nums.arrayGetLength(); i += 2) {
+ Object index, value;
+
+ if (!nums.arrayGet(i, &index)->isInt()) {
+ error(errSyntaxError, -1, "Nums item at position {0:d} is wrong type ({1:s})", i, index.getTypeName());
+ index.free();
+ continue;
+ }
+ if (index.getInt() < 0) {
+ error(errSyntaxError, -1, "Nums item at position {0:d} is invalid value ({1:d})", i, index.getInt());
+ index.free();
+ continue;
+ }
+
+ const unsigned idx = index.getInt();
+ if (nums.arrayGetNF(i + 1, &value)->isRef()) {
+ parentTree[idx].resize(1);
+ parentTree[idx][0].ref = value.getRef();
+ } else if (nums.arrayGet(i + 1, &value)->isArray()) {
+ parentTree[idx].resize(value.arrayGetLength());
+ for (int j = 0; j < value.arrayGetLength(); j++) {
+ Object itemvalue;
+ if (value.arrayGetNF(j, &itemvalue)->isRef())
+ parentTree[idx][j].ref = itemvalue.getRef();
+ else
+ error(errSyntaxError, -1, "Nums array item at position {0:d}/{1:d} is invalid type ({2:s})", i, j, itemvalue.getTypeName());
+ itemvalue.free();
+ }
+ } else {
+ error(errSyntaxError, -1, "Nums item at position {0:d} is wrong type ({1:s})", i + 1, value.getTypeName());
+ }
+
+ value.free();
+ index.free();
+ }
+ } else {
+ error(errSyntaxError, -1, "Nums array length is not a even ({0:i})", nums.arrayGetLength());
+ }
+ } else {
+ error(errSyntaxError, -1, "Nums object is wrong type ({0:s})", nums.getTypeName());
+ }
+ nums.free();
+ }
+ obj.free();
+
+ std::set<int> seenElements;
+
+ // Parse the children StructElements
+ const GBool marked = doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked;
+ Object kids;
+ if (root->lookup("K", &kids)->isArray()) {
+ if (marked && kids.arrayGetLength() > 1) {
+ error(errSyntaxWarning, -1, "K in StructTreeRoot has more than one children in a tagged PDF");
+ }
+ for (int i = 0; i < kids.arrayGetLength(); i++) {
+ Object obj, ref;
+ kids.arrayGetNF(i, &ref);
+ if (ref.isRef()) {
+ seenElements.insert(ref.getRefNum());
+ }
+ if (kids.arrayGet(i, &obj)->isDict()) {
+ StructElement *child = new StructElement(obj.getDict(), this, NULL, seenElements);
+ if (child->isOk()) {
+ if (marked && !(child->getType() == StructElement::Document ||
+ child->getType() == StructElement::Part ||
+ child->getType() == StructElement::Art ||
+ child->getType() == StructElement::Div)) {
+ error(errSyntaxWarning, -1, "StructTreeRoot element of tagged PDF is wrong type ({0:s})", child->getTypeName());
+ }
+ appendElement(child);
+ if (ref.isRef()) {
+ parentTreeAdd(ref.getRef(), child);
+ }
+ } else {
+ error(errSyntaxWarning, -1, "StructTreeRoot element could not be parsed");
+ delete child;
+ }
+ } else {
+ error(errSyntaxWarning, -1, "K has a child of wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+ ref.free();
+ }
+ } else if (kids.isDict()) {
+ if (marked) {
+ error(errSyntaxWarning, -1, "K has a child of wrong type for a tagged PDF ({0:s})", kids.getTypeName());
+ }
+ StructElement *child = new StructElement(kids.getDict(), this, NULL, seenElements);
+ if (child->isOk()) {
+ appendElement(child);
+ Object ref;
+ if (root->lookupNF("K", &ref)->isRef())
+ parentTreeAdd(ref.getRef(), child);
+ ref.free();
+ } else {
+ error(errSyntaxWarning, -1, "StructTreeRoot element could not be parsed");
+ delete child;
+ }
+ } else if (!kids.isNull()) {
+ error(errSyntaxWarning, -1, "K in StructTreeRoot is wrong type ({0:s})", kids.getTypeName());
+ }
+
+ kids.free();
+}
+
+void StructTreeRoot::parentTreeAdd(const Ref &objectRef, StructElement *element)
+{
+ for (std::vector< std::vector<Parent> >::iterator i = parentTree.begin(); i != parentTree.end(); ++i) {
+ for (std::vector<Parent>::iterator j = i->begin(); j != i->end(); ++j) {
+ if (j->ref.num == objectRef.num && j->ref.gen == objectRef.gen)
+ j->element = element;
+ }
+ }
+}
diff --git a/poppler/StructTreeRoot.h b/poppler/StructTreeRoot.h
new file mode 100644
index 00000000..9928e2f6
--- /dev/null
+++ b/poppler/StructTreeRoot.h
@@ -0,0 +1,83 @@
+//========================================================================
+//
+// StructTreeRoot.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2013 Igalia S.L.
+//
+//========================================================================
+
+#ifndef STRUCTTREEROOT_H
+#define STRUCTTREEROOT_H
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include "goo/gtypes.h"
+#include "Object.h"
+#include "StructElement.h"
+#include <vector>
+
+class Dict;
+class PDFDoc;
+
+
+class StructTreeRoot
+{
+public:
+ StructTreeRoot(PDFDoc *docA, Dict *rootDict);
+ ~StructTreeRoot();
+
+ PDFDoc *getDoc() { return doc; }
+ Dict *getRoleMap() { return roleMap.isDict() ? roleMap.getDict() : NULL; }
+ Dict *getClassMap() { return classMap.isDict() ? classMap.getDict() : NULL; }
+ unsigned getNumElements() const { return elements.size(); }
+ const StructElement *getElement(int i) const { return elements.at(i); }
+ StructElement *getElement(int i) { return elements.at(i); }
+
+ void appendElement(StructElement *element) {
+ if (element && element->isOk()) {
+ elements.push_back(element);
+ }
+ }
+
+ const StructElement *findParentElement(unsigned index) const {
+ if (index < parentTree.size() && parentTree[index].size() == 1) {
+ return parentTree[index][0].element;
+ }
+ return NULL;
+ }
+
+private:
+ typedef std::vector<StructElement*> ElemPtrArray;
+
+ // Structure for items in /ParentTree, it keeps a mapping of
+ // object references and pointers to StructElement objects.
+ struct Parent {
+ Ref ref;
+ StructElement *element;
+
+ Parent(): element(NULL) { ref.num = ref.gen = -1; }
+ Parent(const Parent &p): element(p.element) {
+ ref.num = p.ref.num;
+ ref.gen = p.ref.gen;
+ }
+ ~Parent() {}
+ };
+
+ PDFDoc *doc;
+ Object roleMap;
+ Object classMap;
+ ElemPtrArray elements;
+ std::vector< std::vector<Parent> > parentTree;
+
+ void parse(Dict *rootDict);
+ void parentTreeAdd(const Ref &objectRef, StructElement *element);
+
+ friend class StructElement;
+};
+
+#endif
+