summaryrefslogtreecommitdiff
path: root/poppler/StructTreeRoot.h
diff options
context:
space:
mode:
authorAdrian Perez de Castro <aperez@igalia.com>2013-06-17 17:00:27 +0300
committerCarlos Garcia Campos <carlosgc@gnome.org>2013-10-02 12:34:00 +0200
commite04cabd878a0fd84faa5178f423fd828d010b664 (patch)
tree04d6cbd9439d2bbfbc8a7e3ed58b158d5d88dc4b /poppler/StructTreeRoot.h
parent45e0fe56985f34e695c99a2f6ec1ffe14e239b9e (diff)
Tagged-PDF: Implement parsing of StructTreeRoot
Implement parsing of the StructTreeRoot entry of the Catalog. Also, the Catalog::getStructTreeRoot() and PDFDoc::getStructTreeRoot() methods are modified to return an instance of StructTreeRoot instead of an Object. All elements from the StructTreeRoot are parsed except for: - IDTree: it is a lookup tree to locate items by their ID, which would be barely useful because the whole structure tree is to be kept in memory, which should be fast enough to traverse. - ParentTreeNextKey: This is needed only when the ParentTree object is to be modified. For the moment the implementation deals only with reading, so this has been deliberately left out. StructElem tree nodes from the document structure tree are parsed as a StructElement instance. Attributes and extraction of content out from elements are not yet handled. https://bugs.freedesktop.org/show_bug.cgi?id=64815
Diffstat (limited to 'poppler/StructTreeRoot.h')
-rw-r--r--poppler/StructTreeRoot.h83
1 files changed, 83 insertions, 0 deletions
diff --git a/poppler/StructTreeRoot.h b/poppler/StructTreeRoot.h
new file mode 100644
index 00000000..9928e2f6
--- /dev/null
+++ b/poppler/StructTreeRoot.h
@@ -0,0 +1,83 @@
+//========================================================================
+//
+// StructTreeRoot.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2013 Igalia S.L.
+//
+//========================================================================
+
+#ifndef STRUCTTREEROOT_H
+#define STRUCTTREEROOT_H
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include "goo/gtypes.h"
+#include "Object.h"
+#include "StructElement.h"
+#include <vector>
+
+class Dict;
+class PDFDoc;
+
+
+class StructTreeRoot
+{
+public:
+ StructTreeRoot(PDFDoc *docA, Dict *rootDict);
+ ~StructTreeRoot();
+
+ PDFDoc *getDoc() { return doc; }
+ Dict *getRoleMap() { return roleMap.isDict() ? roleMap.getDict() : NULL; }
+ Dict *getClassMap() { return classMap.isDict() ? classMap.getDict() : NULL; }
+ unsigned getNumElements() const { return elements.size(); }
+ const StructElement *getElement(int i) const { return elements.at(i); }
+ StructElement *getElement(int i) { return elements.at(i); }
+
+ void appendElement(StructElement *element) {
+ if (element && element->isOk()) {
+ elements.push_back(element);
+ }
+ }
+
+ const StructElement *findParentElement(unsigned index) const {
+ if (index < parentTree.size() && parentTree[index].size() == 1) {
+ return parentTree[index][0].element;
+ }
+ return NULL;
+ }
+
+private:
+ typedef std::vector<StructElement*> ElemPtrArray;
+
+ // Structure for items in /ParentTree, it keeps a mapping of
+ // object references and pointers to StructElement objects.
+ struct Parent {
+ Ref ref;
+ StructElement *element;
+
+ Parent(): element(NULL) { ref.num = ref.gen = -1; }
+ Parent(const Parent &p): element(p.element) {
+ ref.num = p.ref.num;
+ ref.gen = p.ref.gen;
+ }
+ ~Parent() {}
+ };
+
+ PDFDoc *doc;
+ Object roleMap;
+ Object classMap;
+ ElemPtrArray elements;
+ std::vector< std::vector<Parent> > parentTree;
+
+ void parse(Dict *rootDict);
+ void parentTreeAdd(const Ref &objectRef, StructElement *element);
+
+ friend class StructElement;
+};
+
+#endif
+