summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSune Vuorela <sune@vuorela.dk>2023-05-25 13:21:08 +0200
committerAlbert Astals Cid <tsdgeos@yahoo.es>2023-06-04 23:03:00 +0000
commitc002bee1f2010d13c85a46e6816920ecdccb3015 (patch)
tree14528a0c3921252821bd0f378e84a3a4fed29a31
parent2dcbf58328e5acaafaa73dc2e601e5737c685789 (diff)
Store embedded fonts widths table more effective
Some non-scientific experiments: A single 8k pdf document with okular adding a simple typewriter annotation with content "aoeu" in approximately the same place grows without this patch to 894k. With this patch, it grows to 649k. For comparison, if one doesn't embed the widths content at all, the same process grew to 638k While this isn't ground breaking gains, it is still some improvement.
-rw-r--r--poppler/CIDFontsWidthsBuilder.h190
-rw-r--r--poppler/Form.cc36
-rw-r--r--qt5/tests/CMakeLists.txt1
-rw-r--r--qt5/tests/check_cidfontswidthsbuilder.cpp100
-rw-r--r--qt6/tests/CMakeLists.txt1
-rw-r--r--qt6/tests/check_cidfontswidthsbuilder.cpp100
6 files changed, 423 insertions, 5 deletions
diff --git a/poppler/CIDFontsWidthsBuilder.h b/poppler/CIDFontsWidthsBuilder.h
new file mode 100644
index 00000000..164016d0
--- /dev/null
+++ b/poppler/CIDFontsWidthsBuilder.h
@@ -0,0 +1,190 @@
+//========================================================================
+//
+// CIDFontsWidthsBuilder.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2023 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
+//========================================================================
+
+#ifndef CIDFontsWidthsBuilder_H
+#define CIDFontsWidthsBuilder_H
+
+#include <optional>
+#include <vector>
+#include <variant>
+#include <algorithm>
+#include <cassert>
+
+/** Class to help build the widths array as defined in
+ pdf standard 9.7.4.3 Glyph Metrcis in CIDFonts in
+ ISO 32000-2:2020
+
+ The way to use this is to create a builder, then add all the widths
+ and their attached code in order using \ref addWidth and finally call \ref takeSegments
+
+ The resulting value is a list of segments of either \ref ListSegment or
+ \ref RangeSegment
+ */
+class CIDFontsWidthsBuilder
+{
+public:
+ /// Segment that should be encoded as a first index and a list of n number specifying the next n widths
+ class ListSegment
+ {
+ public:
+ int first;
+ std::vector<int> widths;
+ };
+ /// Segment that should be encoded as 3 integers, first, last (included) and the width for that group.
+ class RangeSegment
+ {
+ public:
+ int first;
+ int last;
+ int width;
+ };
+ using Segment = std::variant<RangeSegment, ListSegment>;
+
+ /**
+ * Adds a width for a given index.
+ *
+ * Must be called with ever increasing indices until \ref takeSegments
+ * has been called
+ */
+ void addWidth(int index, int width)
+ {
+ if (m_currentSegment.m_lastIndex.has_value() && index <= m_currentSegment.m_lastIndex) {
+ assert(false); // this is likely a error originating from the user of this code that this function gets called twice with the same or decreasing value.
+ return;
+ }
+ while (!m_currentSegment.accept(index, width)) {
+ segmentDone();
+ }
+ }
+
+ /**
+ * \return the resulting segments and resets this font builder
+ */
+ [[nodiscard]] std::vector<Segment> takeSegments()
+ {
+ finish();
+ auto rv = std::move(m_segments);
+ m_segments = {};
+ return rv;
+ }
+
+private:
+ void finish()
+ {
+ while (m_currentSegment.m_values.size()) {
+ segmentDone();
+ }
+ m_currentSegment = {};
+ }
+ class SegmentBuilder
+ {
+ // How many elements at the end has this
+ int uniqueElementsFromEnd(int value)
+ {
+ auto lastDifferent = std::find_if(m_values.rbegin(), m_values.rend(), [value](auto &&element) { return element != value; });
+ return std::distance(m_values.rbegin(), lastDifferent);
+ }
+
+ public:
+ /** Tries to add a index/width combo.
+ * If a value is not accepted, caller should
+ * build a segment and repeat the accept call.
+ *
+ * \return if accepted or not
+ */
+ bool accept(int index, int value)
+ {
+ if (m_lastIndex.has_value() && m_lastIndex != index - 1) {
+ // we have gaps. That's okay. We just need to ensure to finish the segment
+ return false;
+ }
+ if (!m_firstIndex) {
+ m_firstIndex = index;
+ }
+ if (m_values.size() < 4) {
+ m_values.push_back(value);
+ if (m_values.front() != value) {
+ differentValues = true;
+ }
+ m_lastIndex = index;
+ return true;
+ }
+ if (!differentValues) {
+ if (m_values.back() == value) {
+ m_values.push_back(value);
+ m_lastIndex = index;
+ return true;
+ } else {
+ // We need to end a range segment
+ // to start a new segment with different value
+ return false;
+ }
+ } else {
+ if (uniqueElementsFromEnd(value) >= 3) {
+ // We now have at least 3 unique elements
+ // at the end, so we should finish the previous
+ // list segment and then start a range segment
+ return false;
+ } else {
+ m_values.push_back(value);
+ m_lastIndex = index;
+ return true;
+ }
+ }
+ }
+ /**
+ * Builds the segment of the values so far.
+ */
+ Segment build()
+ {
+ if (differentValues || m_values.size() < 4) {
+ std::vector<int> savedValues;
+ if (m_values.size() >= 4) {
+ auto lastDifferent = std::find_if(m_values.rbegin(), m_values.rend(), [value = m_values.back()](auto &&element) { return element != value; });
+ if (std::distance(m_values.rbegin(), lastDifferent) >= 3) {
+ savedValues.push_back(m_values.back());
+ m_values.pop_back();
+ while (m_values.size() && m_values.back() == savedValues.back()) {
+ savedValues.push_back(m_values.back());
+ m_values.pop_back();
+ }
+ }
+ }
+
+ ListSegment segment { m_firstIndex.value(), std::move(m_values) };
+ if (!savedValues.empty()) {
+ m_firstIndex = m_lastIndex.value() - savedValues.size() + 1;
+ } else {
+ m_firstIndex = {};
+ m_lastIndex = {};
+ }
+ m_values = std::move(savedValues);
+ differentValues = false;
+ return segment;
+ } else {
+ auto segment = RangeSegment { m_firstIndex.value(), m_lastIndex.value(), m_values.back() };
+ m_values.clear();
+ m_firstIndex = {};
+ m_lastIndex = {};
+ differentValues = false;
+ return segment;
+ }
+ }
+ std::vector<int> m_values;
+ std::optional<int> m_lastIndex;
+ std::optional<int> m_firstIndex;
+ bool differentValues = false;
+ };
+ std::vector<Segment> m_segments;
+ SegmentBuilder m_currentSegment;
+
+ void segmentDone() { m_segments.push_back(m_currentSegment.build()); }
+};
+
+#endif // CIDFontsWidthsBuilder_H
diff --git a/poppler/Form.cc b/poppler/Form.cc
index 110339e2..72898976 100644
--- a/poppler/Form.cc
+++ b/poppler/Form.cc
@@ -73,6 +73,7 @@
#include "Link.h"
#include "Lexer.h"
#include "Parser.h"
+#include "CIDFontsWidthsBuilder.h"
#include "fofi/FoFiTrueType.h"
#include "fofi/FoFiIdentifier.h"
@@ -80,6 +81,11 @@
#include <ft2build.h>
#include FT_FREETYPE_H
+// helper for using std::visit to get a dependent false for static_asserts
+// to help get compile errors if one ever extends variants
+template<class>
+inline constexpr bool always_false_v = false;
+
// return a newly allocated char* containing an UTF16BE string of size length
char *pdfDocEncodingToUTF16(const std::string &orig, int *length)
{
@@ -2895,18 +2901,38 @@ Form::AddFontResult Form::addFontToDefaultResources(const std::string &filepath,
return {};
}
- Array *widthsInner = new Array(xref);
+ CIDFontsWidthsBuilder fontsWidths;
+
for (int code = 0; code <= basicMultilingualMaxCode; ++code) {
const int glyph = fft->mapCodeToGID(unicodeBMPCMap, code);
if (FT_Load_Glyph(face, glyph, FT_LOAD_DEFAULT | FT_LOAD_NO_HINTING)) {
- widthsInner->add(Object(0));
+ fontsWidths.addWidth(code, 0);
} else {
- widthsInner->add(Object(static_cast<int>(face->glyph->metrics.horiAdvance)));
+ fontsWidths.addWidth(code, static_cast<int>(face->glyph->metrics.horiAdvance));
}
}
Array *widths = new Array(xref);
- widths->add(Object(0));
- widths->add(Object(widthsInner));
+ for (const auto &segment : fontsWidths.takeSegments()) {
+ std::visit(
+ [&widths, &xref](auto &&s) {
+ using T = std::decay_t<decltype(s)>;
+ if constexpr (std::is_same_v<T, CIDFontsWidthsBuilder::ListSegment>) {
+ widths->add(Object(s.first));
+ auto widthsInner = std::make_unique<Array>(xref);
+ for (const auto &w : s.widths) {
+ widthsInner->add(Object(w));
+ }
+ widths->add(Object(widthsInner.release()));
+ } else if constexpr (std::is_same_v<T, CIDFontsWidthsBuilder::RangeSegment>) {
+ widths->add(Object(s.first));
+ widths->add(Object(s.last));
+ widths->add(Object(s.width));
+ } else {
+ static_assert(always_false_v<T>, "non-exhaustive visitor");
+ }
+ },
+ segment);
+ }
descendantFont->set("W", Object(widths));
char *dataPtr = static_cast<char *>(gmalloc(2 * (basicMultilingualMaxCode + 1)));
diff --git a/qt5/tests/CMakeLists.txt b/qt5/tests/CMakeLists.txt
index 297d9560..0b1931ba 100644
--- a/qt5/tests/CMakeLists.txt
+++ b/qt5/tests/CMakeLists.txt
@@ -72,6 +72,7 @@ qt5_add_qtest(check_qt5_utf_conversion check_utf_conversion.cpp)
qt5_add_qtest(check_qt5_outline check_outline.cpp)
qt5_add_qtest(check_qt5_signature_basics check_signature_basics.cpp)
qt5_add_qtest(check_qt5_distinguished_name_parser check_distinguished_name_parser.cpp)
+qt5_add_qtest(check_qt5_cidfontswidthsbuilder check_cidfontswidthsbuilder.cpp)
if (NOT WIN32)
qt5_add_qtest(check_qt5_pagelabelinfo check_pagelabelinfo.cpp)
qt5_add_qtest(check_qt5_strings check_strings.cpp)
diff --git a/qt5/tests/check_cidfontswidthsbuilder.cpp b/qt5/tests/check_cidfontswidthsbuilder.cpp
new file mode 100644
index 00000000..94c6d52a
--- /dev/null
+++ b/qt5/tests/check_cidfontswidthsbuilder.cpp
@@ -0,0 +1,100 @@
+//========================================================================
+//
+// check_cidfontswidthsbuilder.cpp
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2023 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
+//========================================================================
+
+#include "CIDFontsWidthsBuilder.h"
+
+#include <QtTest/QtTest>
+
+class TestCIDFontsWidthsBuilder : public QObject
+{
+ Q_OBJECT
+public:
+ using QObject::QObject;
+private Q_SLOTS:
+ void testEmpty();
+ void testSingle();
+ void testSimpleSequence();
+};
+
+void TestCIDFontsWidthsBuilder::testEmpty()
+{
+ CIDFontsWidthsBuilder b;
+ auto segments = b.takeSegments();
+ QCOMPARE(segments.size(), 0);
+}
+
+static bool compare(const CIDFontsWidthsBuilder::Segment &segment1, const CIDFontsWidthsBuilder::Segment &segment2)
+{
+ return std::visit(
+ [](const auto &s1, const auto &s2) {
+ using T1 = std::decay_t<decltype(s1)>;
+ using T2 = std::decay_t<decltype(s2)>;
+ if constexpr (!std::is_same_v<T1, T2>) {
+ return false;
+ } else if constexpr (std::is_same_v<T1, CIDFontsWidthsBuilder::ListSegment>) {
+ return s1.first == s2.first && s1.widths == s2.widths;
+ } else if constexpr (std::is_same_v<T1, CIDFontsWidthsBuilder::RangeSegment>) {
+ return s1.first == s2.first && s1.last == s2.last && s1.width == s2.width;
+ } else {
+ return false;
+ }
+ },
+ segment1, segment2);
+}
+
+void TestCIDFontsWidthsBuilder::testSingle()
+{
+ CIDFontsWidthsBuilder b;
+ b.addWidth(0, 10);
+ auto segments = b.takeSegments();
+ QCOMPARE(segments.size(), 1);
+ auto segment0 = CIDFontsWidthsBuilder::ListSegment { 0, { 10 } };
+ QVERIFY(compare(segments[0], segment0));
+}
+
+void TestCIDFontsWidthsBuilder::testSimpleSequence()
+{
+ CIDFontsWidthsBuilder b;
+ for (int i = 0; i < 2; i++) { // repeat to verify that takeSegments resets
+ b.addWidth(0, 10);
+ b.addWidth(1, 10);
+ b.addWidth(2, 10);
+ b.addWidth(3, 10);
+ b.addWidth(4, 10);
+ b.addWidth(5, 20);
+ b.addWidth(6, 21);
+ b.addWidth(7, 21);
+ b.addWidth(8, 20);
+ b.addWidth(9, 10);
+ b.addWidth(10, 10);
+ b.addWidth(11, 10);
+ b.addWidth(12, 10);
+ b.addWidth(13, 10);
+ b.addWidth(14, 20);
+ b.addWidth(15, 21);
+ b.addWidth(16, 21);
+ b.addWidth(17, 20);
+ b.addWidth(19, 20);
+ auto segments = b.takeSegments();
+ QCOMPARE(segments.size(), 5);
+ auto segment0 = CIDFontsWidthsBuilder::RangeSegment { 0, 4, 10 };
+ QVERIFY(compare(segments[0], segment0));
+ auto segment1 = CIDFontsWidthsBuilder::ListSegment { 5, { 20, 21, 21, 20 } };
+ QVERIFY(compare(segments[1], segment1));
+ auto segment2 = CIDFontsWidthsBuilder::RangeSegment { 9, 13, 10 };
+ QVERIFY(compare(segments[2], segment2));
+ auto segment3 = CIDFontsWidthsBuilder::ListSegment { 14, { 20, 21, 21, 20 } };
+ QVERIFY(compare(segments[3], segment3));
+ auto segment4 = CIDFontsWidthsBuilder::ListSegment { 19, { 20 } };
+ QVERIFY(compare(segments[4], segment4));
+ }
+}
+
+QTEST_GUILESS_MAIN(TestCIDFontsWidthsBuilder);
+#include "check_cidfontswidthsbuilder.moc"
diff --git a/qt6/tests/CMakeLists.txt b/qt6/tests/CMakeLists.txt
index 3bb09d9f..da18b15d 100644
--- a/qt6/tests/CMakeLists.txt
+++ b/qt6/tests/CMakeLists.txt
@@ -64,6 +64,7 @@ qt6_add_qtest(check_qt6_utf_conversion check_utf_conversion.cpp)
qt6_add_qtest(check_qt6_outline check_outline.cpp)
qt6_add_qtest(check_qt6_signature_basics check_signature_basics.cpp)
qt6_add_qtest(check_qt6_distinguished_name_parser check_distinguished_name_parser.cpp)
+qt6_add_qtest(check_qt6_cidfontswidthsbuilder check_cidfontswidthsbuilder.cpp)
if (NOT WIN32)
qt6_add_qtest(check_qt6_pagelabelinfo check_pagelabelinfo.cpp)
qt6_add_qtest(check_qt6_strings check_strings.cpp)
diff --git a/qt6/tests/check_cidfontswidthsbuilder.cpp b/qt6/tests/check_cidfontswidthsbuilder.cpp
new file mode 100644
index 00000000..94c6d52a
--- /dev/null
+++ b/qt6/tests/check_cidfontswidthsbuilder.cpp
@@ -0,0 +1,100 @@
+//========================================================================
+//
+// check_cidfontswidthsbuilder.cpp
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2023 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
+//========================================================================
+
+#include "CIDFontsWidthsBuilder.h"
+
+#include <QtTest/QtTest>
+
+class TestCIDFontsWidthsBuilder : public QObject
+{
+ Q_OBJECT
+public:
+ using QObject::QObject;
+private Q_SLOTS:
+ void testEmpty();
+ void testSingle();
+ void testSimpleSequence();
+};
+
+void TestCIDFontsWidthsBuilder::testEmpty()
+{
+ CIDFontsWidthsBuilder b;
+ auto segments = b.takeSegments();
+ QCOMPARE(segments.size(), 0);
+}
+
+static bool compare(const CIDFontsWidthsBuilder::Segment &segment1, const CIDFontsWidthsBuilder::Segment &segment2)
+{
+ return std::visit(
+ [](const auto &s1, const auto &s2) {
+ using T1 = std::decay_t<decltype(s1)>;
+ using T2 = std::decay_t<decltype(s2)>;
+ if constexpr (!std::is_same_v<T1, T2>) {
+ return false;
+ } else if constexpr (std::is_same_v<T1, CIDFontsWidthsBuilder::ListSegment>) {
+ return s1.first == s2.first && s1.widths == s2.widths;
+ } else if constexpr (std::is_same_v<T1, CIDFontsWidthsBuilder::RangeSegment>) {
+ return s1.first == s2.first && s1.last == s2.last && s1.width == s2.width;
+ } else {
+ return false;
+ }
+ },
+ segment1, segment2);
+}
+
+void TestCIDFontsWidthsBuilder::testSingle()
+{
+ CIDFontsWidthsBuilder b;
+ b.addWidth(0, 10);
+ auto segments = b.takeSegments();
+ QCOMPARE(segments.size(), 1);
+ auto segment0 = CIDFontsWidthsBuilder::ListSegment { 0, { 10 } };
+ QVERIFY(compare(segments[0], segment0));
+}
+
+void TestCIDFontsWidthsBuilder::testSimpleSequence()
+{
+ CIDFontsWidthsBuilder b;
+ for (int i = 0; i < 2; i++) { // repeat to verify that takeSegments resets
+ b.addWidth(0, 10);
+ b.addWidth(1, 10);
+ b.addWidth(2, 10);
+ b.addWidth(3, 10);
+ b.addWidth(4, 10);
+ b.addWidth(5, 20);
+ b.addWidth(6, 21);
+ b.addWidth(7, 21);
+ b.addWidth(8, 20);
+ b.addWidth(9, 10);
+ b.addWidth(10, 10);
+ b.addWidth(11, 10);
+ b.addWidth(12, 10);
+ b.addWidth(13, 10);
+ b.addWidth(14, 20);
+ b.addWidth(15, 21);
+ b.addWidth(16, 21);
+ b.addWidth(17, 20);
+ b.addWidth(19, 20);
+ auto segments = b.takeSegments();
+ QCOMPARE(segments.size(), 5);
+ auto segment0 = CIDFontsWidthsBuilder::RangeSegment { 0, 4, 10 };
+ QVERIFY(compare(segments[0], segment0));
+ auto segment1 = CIDFontsWidthsBuilder::ListSegment { 5, { 20, 21, 21, 20 } };
+ QVERIFY(compare(segments[1], segment1));
+ auto segment2 = CIDFontsWidthsBuilder::RangeSegment { 9, 13, 10 };
+ QVERIFY(compare(segments[2], segment2));
+ auto segment3 = CIDFontsWidthsBuilder::ListSegment { 14, { 20, 21, 21, 20 } };
+ QVERIFY(compare(segments[3], segment3));
+ auto segment4 = CIDFontsWidthsBuilder::ListSegment { 19, { 20 } };
+ QVERIFY(compare(segments[4], segment4));
+ }
+}
+
+QTEST_GUILESS_MAIN(TestCIDFontsWidthsBuilder);
+#include "check_cidfontswidthsbuilder.moc"