Rewrite FoFiType1::parse to be more flexible

Previous code needed Encoding to be at the start of it's own line The file at issue #1422 doesn't have that and most of the other renderers show it correctly so we should do the same. Also the code should be easier to understand now, and probably faster since we're not copying string around like before
author: Albert Astals Cid <aacid@kde.org> 2023-09-28 00:27:54 +0200
committer: Albert Astals Cid <aacid@kde.org> 2023-11-10 00:15:33 +0100
commit: 84b359b82ee5b10ae714a0fbf3fda53f73e88722 (patch)
tree: 2378146f67285e440b8d8867c0e984885f5f3c5b
parent: 0c53de52deb1328d4d45ab3dfc742acb6386a369 (diff)
3 files changed, 120 insertions, 196 deletions
diff --git a/fofi/FoFiType1.cc b/fofi/FoFiType1.cc
index 6d972232..eaa74143 100644
--- a/fofi/FoFiType1.cc
+++ b/fofi/FoFiType1.cc
@@ -13,7 +13,7 @@
 // All changes made under the Poppler project to this file are licensed
 // under GPL version 2 or later
 //
-// Copyright (C) 2005, 2008, 2010, 2018, 2021, 2022 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2005, 2008, 2010, 2018, 2021-2023 Albert Astals Cid <aacid@kde.org>
 // Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com>
 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
 // Copyright (C) 2014 Carlos Garcia Campos <carlosgc@gnome.org>
@@ -28,6 +28,9 @@
 
 #include <config.h>
 
+#include <charconv>
+#include <optional>
+
 #include <cstdlib>
 #include <cstring>
 #include <climits>
@@ -49,34 +52,22 @@ FoFiType1 *FoFiType1::make(const unsigned char *fileA, int lenA)
 
 FoFiType1::FoFiType1(const unsigned char *fileA, int lenA, bool freeFileDataA) : FoFiBase(fileA, lenA, freeFileDataA)
 {
-    name = nullptr;
     encoding = nullptr;
-    fontMatrix[0] = 0.001;
-    fontMatrix[1] = 0;
-    fontMatrix[2] = 0;
-    fontMatrix[3] = 0.001;
-    fontMatrix[4] = 0;
-    fontMatrix[5] = 0;
     parsed = false;
     undoPFB();
 }
 
 FoFiType1::~FoFiType1()
 {
-    int i;
-
-    if (name) {
-        gfree(name);
-    }
     if (encoding && encoding != fofiType1StandardEncoding) {
-        for (i = 0; i < 256; ++i) {
+        for (int i = 0; i < 256; ++i) {
             gfree(encoding[i]);
         }
         gfree(encoding);
     }
 }
 
-const char *FoFiType1::getName()
+std::string FoFiType1::getName()
 {
     if (!parsed) {
         parse();
@@ -92,18 +83,6 @@ char **FoFiType1::getEncoding()
     return encoding;
 }
 
-void FoFiType1::getFontMatrix(double *mat)
-{
-    int i;
-
-    if (!parsed) {
-        parse();
-    }
-    for (i = 0; i < 6; ++i) {
-        mat[i] = fontMatrix[i];
-    }
-}
-
 void FoFiType1::writeEncoded(const char **newEncoding, FoFiOutputFunc outputFunc, void *outputStream) const
 {
     char buf[512];
@@ -197,178 +176,126 @@ char *FoFiType1::getNextLine(char *line) const
     return line;
 }
 
+static const char tokenSeparators[] = " \t\n\r";
+
+class FoFiType1Tokenizer
+{
+public:
+    explicit FoFiType1Tokenizer(std::string_view &&stringViewA) : stringView(stringViewA) { }
+
+    std::optional<std::string_view> getToken()
+    {
+        const auto length = stringView.length();
+        if (currentPos >= length) {
+            return {};
+        }
+
+        std::string_view::size_type pos = stringView.find_first_of(tokenSeparators, currentPos);
+        while (pos == currentPos) {
+            // skip multiple contiguous separators
+            ++currentPos;
+            pos = stringView.find_first_of(tokenSeparators, currentPos);
+        }
+        if (pos == std::string_view::npos) {
+            std::string_view token = stringView.substr(currentPos, length - currentPos);
+            currentPos = length;
+            return token;
+        }
+
+        std::string_view token = stringView.substr(currentPos, pos - currentPos);
+
+        currentPos = pos + 1;
+
+        return token;
+    }
+
+private:
+    std::string_view::size_type currentPos = 0;
+    const std::string_view stringView;
+};
+
 void FoFiType1::parse()
 {
-    char *line, *line1, *firstLine, *p, *p2;
-    char buf[256];
-    char c;
-    int n, code, base, i, j;
-    char *tokptr;
-    bool gotMatrix, continueLine;
-
-    gotMatrix = false;
-    for (i = 1, line = (char *)file; i <= 100 && line && (!name || !encoding || !gotMatrix); ++i) {
-
-        // get font name
-        if (!name && (line + 9 <= (char *)file + len) && !strncmp(line, "/FontName", 9)) {
-            const auto availableFile = (char *)file + len - line;
-            const int lineLen = static_cast<int>(availableFile < 255 ? availableFile : 255);
-            strncpy(buf, line, lineLen);
-            buf[lineLen] = '\0';
-            if ((p = strchr(buf + 9, '/')) && (p = strtok_r(p + 1, " \t\n\r", &tokptr))) {
-                name = copyString(p);
+    FoFiType1Tokenizer tokenizer(std::string_view(reinterpret_cast<const char *>(file), len));
+    while (name.empty() || !encoding) {
+        const std::optional<std::string_view> token = tokenizer.getToken();
+
+        if (!token) {
+            break;
+        }
+
+        if (name.empty() && token == "/FontName") {
+            const std::optional<std::string_view> fontNameToken = tokenizer.getToken();
+            if (!fontNameToken) {
+                break;
             }
-            line = getNextLine(line);
-
-            // get encoding
-        } else if (!encoding && (line + 9 <= (char *)file + len) && !strncmp(line, "/Encoding", 9)) {
-            line = line + 9;
-            const auto availableFile = (char *)file + len - line;
-            const int lineLen = static_cast<int>(availableFile < 255 ? availableFile : 255);
-            strncpy(buf, line, lineLen);
-            buf[lineLen] = '\0';
-            p = strtok_r(buf, " \t\n\r", &tokptr);
-            if (p && (p + 3 <= (char *)buf + lineLen) && !strncmp(p, "256", 3)) {
-                p = strtok_r(nullptr, " \t\n\r", &tokptr);
-                if (p && (p + 5 <= (char *)buf + lineLen) && !strncmp(p, "array", 5)) {
-                    encoding = (char **)gmallocn(256, sizeof(char *));
-                    for (j = 0; j < 256; ++j) {
-                        encoding[j] = nullptr;
+
+            // Skip the /
+            name = fontNameToken->substr(1);
+
+        } else if (!encoding && token == "/Encoding") {
+            const std::optional<std::string_view> token2 = tokenizer.getToken();
+            if (!token2) {
+                break;
+            }
+
+            const std::optional<std::string_view> token3 = tokenizer.getToken();
+            if (!token3) {
+                break;
+            }
+
+            if (token2 == "StandardEncoding" && token3 == "def") {
+                encoding = (char **)fofiType1StandardEncoding;
+            } else if (token2 == "256" && token3 == "array") {
+                encoding = (char **)gmallocn(256, sizeof(char *));
+                for (int j = 0; j < 256; ++j) {
+                    encoding[j] = nullptr;
+                }
+
+                while (true) {
+                    const std::optional<std::string_view> encodingToken = tokenizer.getToken();
+                    if (!encodingToken) {
+                        break;
                     }
-                    continueLine = false;
-                    for (j = 0, line = getNextLine(line); j < 1200 && line && (line1 = getNextLine(line)); ++j, line = line1) {
-                        if ((n = (int)(line1 - line)) > 255) {
-                            error(errSyntaxWarning, -1, "FoFiType1::parse a line has more than 255 characters, we don't support this");
-                            n = 255;
-                        }
-                        if (continueLine) {
-                            continueLine = false;
-                            if ((line1 - firstLine) + 1 > (int)sizeof(buf)) {
-                                break;
-                            }
-                            p = firstLine;
-                            p2 = buf;
-                            while (p < line1) {
-                                if (*p == '\n' || *p == '\r') {
-                                    *p2++ = ' ';
-                                    p++;
-                                } else {
-                                    *p2++ = *p++;
-                                }
-                            }
-                            *p2 = '\0';
-                        } else {
-                            firstLine = line;
-                            strncpy(buf, line, n);
-                            buf[n] = '\0';
-                        }
-                        for (p = buf; *p == ' ' || *p == '\t'; ++p) {
-                            ;
+
+                    if (encodingToken == "dup") {
+                        std::optional<std::string_view> codeToken = tokenizer.getToken();
+                        if (!codeToken) {
+                            break;
                         }
-                        if (!strncmp(p, "dup", 3)) {
-                            while (true) {
-                                p += 3;
-                                for (; *p == ' ' || *p == '\t'; ++p) {
-                                    ;
-                                }
-                                code = 0;
-                                if (*p == '8' && p[1] == '#') {
-                                    base = 8;
-                                    p += 2;
-                                } else if (*p >= '0' && *p <= '9') {
-                                    base = 10;
-                                } else if (*p == '\n' || *p == '\r') {
-                                    continueLine = true;
-                                    break;
-                                } else {
-                                    break;
-                                }
-                                for (; *p >= '0' && *p < '0' + base && code < INT_MAX / (base + (*p - '0')); ++p) {
-                                    code = code * base + (*p - '0');
-                                }
-                                for (; *p == ' ' || *p == '\t'; ++p) {
-                                    ;
-                                }
-                                if (*p == '\n' || *p == '\r' || *p == '\0') {
-                                    continueLine = true;
-                                    break;
-                                } else if (*p != '/') {
-                                    break;
-                                }
-                                ++p;
-                                for (p2 = p; *p2 && *p2 != ' ' && *p2 != '\t'; ++p2) {
-                                    ;
-                                }
-                                if (code >= 0 && code < 256) {
-                                    c = *p2;
-                                    *p2 = '\0';
-                                    gfree(encoding[code]);
-                                    encoding[code] = copyString(p);
-                                    *p2 = c;
-                                }
-                                for (p = p2; *p == ' ' || *p == '\t'; ++p) {
-                                    ;
-                                }
-                                if (*p == '\n' || *p == '\r') {
-                                    continueLine = true;
-                                    break;
-                                }
-                                if (strncmp(p, "put", 3)) {
-                                    break;
-                                }
-                                for (p += 3; *p == ' ' || *p == '\t'; ++p) {
-                                    ;
-                                }
-                                if (strncmp(p, "dup", 3)) {
-                                    break;
-                                }
-                            }
+
+                        std::optional<std::string_view> nameToken;
+                        // Sometimes font data has code and name together without spacing i.e. 33/exclam
+                        // if that happens don't call getToken again and just split codeToken in 2
+                        const auto slashPositionInCodeToken = codeToken->find('/');
+                        if (slashPositionInCodeToken != std::string_view::npos) {
+                            nameToken = codeToken->substr(slashPositionInCodeToken, codeToken->length() - slashPositionInCodeToken);
+                            codeToken = codeToken->substr(0, slashPositionInCodeToken);
                         } else {
-                            if (strtok_r(buf, " \t", &tokptr) && (p = strtok_r(nullptr, " \t\n\r", &tokptr)) && !strcmp(p, "def")) {
-                                break;
-                            }
+                            nameToken = tokenizer.getToken();
                         }
 
-                        bool allEncodingSet = true;
-                        for (int k = 0; allEncodingSet && k < 256; ++k) {
-                            allEncodingSet = encoding[k] != nullptr;
-                        }
-                        if (allEncodingSet) {
+                        if (!nameToken) {
                             break;
                         }
-                    }
-                    //~ check for getinterval/putinterval junk
-                }
-            } else if (p && (p + 16 <= (char *)buf + lineLen) && !strncmp(p, "StandardEncoding", 16)) {
-                p = strtok_r(nullptr, " \t\n\r", &tokptr);
-                if (p && (p + 3 <= (char *)buf + lineLen) && !strncmp(p, "def", 3)) {
-                    encoding = (char **)fofiType1StandardEncoding;
-                }
-            } else {
-                line = getNextLine(line);
-            }
-        } else if (!gotMatrix && (line + 11 <= (char *)file + len) && !strncmp(line, "/FontMatrix", 11)) {
-            const auto availableFile = (char *)file + len - (line + 11);
-            const int bufLen = static_cast<int>(availableFile < 255 ? availableFile : 255);
-            strncpy(buf, line + 11, bufLen);
-            buf[bufLen] = '\0';
-            if ((p = strchr(buf, '['))) {
-                ++p;
-                if ((p2 = strchr(p, ']'))) {
-                    *p2 = '\0';
-                    for (j = 0; j < 6; ++j) {
-                        if ((p = strtok_r(j == 0 ? p : nullptr, " \t\n\r", &tokptr))) {
-                            fontMatrix[j] = atof(p);
+
+                        int code = 0;
+                        if (codeToken->length() > 2 && codeToken->at(0) == '8' && codeToken->at(1) == '#') {
+                            std::from_chars(codeToken->data() + 2, codeToken->data() + codeToken->length(), code, 8);
                         } else {
-                            break;
+                            std::from_chars(codeToken->data(), codeToken->data() + codeToken->length(), code);
+                        }
+
+                        if (code >= 0 && code < 256 && nameToken->length() > 1) {
+                            gfree(encoding[code]);
+                            encoding[code] = copyString(nameToken->data() + 1, nameToken->length() - 1);
                         }
+
+                    } else if (encodingToken == "def") {
+                        break;
                     }
                 }
             }
-            gotMatrix = true;
-
-        } else {
-            line = getNextLine(line);
         }
     }
 
diff --git a/fofi/FoFiType1.h b/fofi/FoFiType1.h
index 1bb64e4e..b3cc91e8 100644
--- a/fofi/FoFiType1.h
+++ b/fofi/FoFiType1.h
@@ -13,7 +13,7 @@
 // All changes made under the Poppler project to this file are licensed
 // under GPL version 2 or later
 //
-// Copyright (C) 2018, 2022 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2018, 2022, 2023 Albert Astals Cid <aacid@kde.org>
 // Copyright (C) 2022 Oliver Sander <oliver.sander@tu-dresden.de>
 //
 // To see a description of the changes please see the Changelog file that
@@ -26,6 +26,8 @@
 
 #include "FoFiBase.h"
 
+#include <string>
+
 //------------------------------------------------------------------------
 // FoFiType1
 //------------------------------------------------------------------------
@@ -39,15 +41,12 @@ public:
     ~FoFiType1() override;
 
     // Return the font name.
-    const char *getName();
+    std::string getName();
 
     // Return the encoding, as an array of 256 names (any of which may
     // be NULL).
     char **getEncoding();
 
-    // Return the font matrix as an array of six numbers.
-    void getFontMatrix(double *mat);
-
     // Write a version of the Type 1 font file with a new encoding.
     void writeEncoded(const char **newEncoding, FoFiOutputFunc outputFunc, void *outputStream) const;
 
@@ -58,9 +57,8 @@ private:
     void parse();
     void undoPFB();
 
-    char *name;
+    std::string name;
     char **encoding;
-    double fontMatrix[6];
     bool parsed;
 };
 
diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc
index a75eae4c..21e8693d 100644
--- a/poppler/GfxFont.cc
+++ b/poppler/GfxFont.cc
@@ -1140,11 +1140,10 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, std::optional<st
         const std::optional<std::vector<unsigned char>> buf = readEmbFontFile(xref);
         if (buf) {
             if ((ffT1 = FoFiType1::make(buf->data(), buf->size()))) {
-                if (ffT1->getName()) {
-                    if (embFontName) {
-                        delete embFontName;
-                    }
-                    embFontName = new GooString(ffT1->getName());
+                const std::string fontName = ffT1->getName();
+                if (!fontName.empty()) {
+                    delete embFontName;
+                    embFontName = new GooString(fontName);
                 }
                 if (!baseEnc) {
                     baseEnc = (const char **)ffT1->getEncoding();
author	Albert Astals Cid <aacid@kde.org>	2023-09-28 00:27:54 +0200
committer	Albert Astals Cid <aacid@kde.org>	2023-11-10 00:15:33 +0100
commit	84b359b82ee5b10ae714a0fbf3fda53f73e88722 (patch)
tree	2378146f67285e440b8d8867c0e984885f5f3c5b
parent	0c53de52deb1328d4d45ab3dfc742acb6386a369 (diff)