diff options
author | Adrian Perez de Castro <aperez@igalia.com> | 2013-06-17 17:00:27 +0300 |
---|---|---|
committer | Carlos Garcia Campos <carlosgc@gnome.org> | 2013-10-02 12:34:00 +0200 |
commit | e04cabd878a0fd84faa5178f423fd828d010b664 (patch) | |
tree | 04d6cbd9439d2bbfbc8a7e3ed58b158d5d88dc4b /poppler/StructTreeRoot.h | |
parent | 45e0fe56985f34e695c99a2f6ec1ffe14e239b9e (diff) |
Tagged-PDF: Implement parsing of StructTreeRoot
Implement parsing of the StructTreeRoot entry of the Catalog. Also, the
Catalog::getStructTreeRoot() and PDFDoc::getStructTreeRoot() methods are
modified to return an instance of StructTreeRoot instead of an Object.
All elements from the StructTreeRoot are parsed except for:
- IDTree: it is a lookup tree to locate items by their ID, which would
be barely useful because the whole structure tree is to be kept in
memory, which should be fast enough to traverse.
- ParentTreeNextKey: This is needed only when the ParentTree object is
to be modified. For the moment the implementation deals only with
reading, so this has been deliberately left out.
StructElem tree nodes from the document structure tree are parsed as a
StructElement instance. Attributes and extraction of content out from
elements are not yet handled.
https://bugs.freedesktop.org/show_bug.cgi?id=64815
Diffstat (limited to 'poppler/StructTreeRoot.h')
-rw-r--r-- | poppler/StructTreeRoot.h | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/poppler/StructTreeRoot.h b/poppler/StructTreeRoot.h new file mode 100644 index 00000000..9928e2f6 --- /dev/null +++ b/poppler/StructTreeRoot.h @@ -0,0 +1,83 @@ +//======================================================================== +// +// StructTreeRoot.h +// +// This file is licensed under the GPLv2 or later +// +// Copyright 2013 Igalia S.L. +// +//======================================================================== + +#ifndef STRUCTTREEROOT_H +#define STRUCTTREEROOT_H + +#ifdef USE_GCC_PRAGMAS +#pragma interface +#endif + +#include "goo/gtypes.h" +#include "Object.h" +#include "StructElement.h" +#include <vector> + +class Dict; +class PDFDoc; + + +class StructTreeRoot +{ +public: + StructTreeRoot(PDFDoc *docA, Dict *rootDict); + ~StructTreeRoot(); + + PDFDoc *getDoc() { return doc; } + Dict *getRoleMap() { return roleMap.isDict() ? roleMap.getDict() : NULL; } + Dict *getClassMap() { return classMap.isDict() ? classMap.getDict() : NULL; } + unsigned getNumElements() const { return elements.size(); } + const StructElement *getElement(int i) const { return elements.at(i); } + StructElement *getElement(int i) { return elements.at(i); } + + void appendElement(StructElement *element) { + if (element && element->isOk()) { + elements.push_back(element); + } + } + + const StructElement *findParentElement(unsigned index) const { + if (index < parentTree.size() && parentTree[index].size() == 1) { + return parentTree[index][0].element; + } + return NULL; + } + +private: + typedef std::vector<StructElement*> ElemPtrArray; + + // Structure for items in /ParentTree, it keeps a mapping of + // object references and pointers to StructElement objects. + struct Parent { + Ref ref; + StructElement *element; + + Parent(): element(NULL) { ref.num = ref.gen = -1; } + Parent(const Parent &p): element(p.element) { + ref.num = p.ref.num; + ref.gen = p.ref.gen; + } + ~Parent() {} + }; + + PDFDoc *doc; + Object roleMap; + Object classMap; + ElemPtrArray elements; + std::vector< std::vector<Parent> > parentTree; + + void parse(Dict *rootDict); + void parentTreeAdd(const Ref &objectRef, StructElement *element); + + friend class StructElement; +}; + +#endif + |