diff options
author | Albert Astals Cid <aacid@kde.org> | 2023-09-28 00:27:54 +0200 |
---|---|---|
committer | Albert Astals Cid <aacid@kde.org> | 2023-11-10 00:15:33 +0100 |
commit | 84b359b82ee5b10ae714a0fbf3fda53f73e88722 (patch) | |
tree | 2378146f67285e440b8d8867c0e984885f5f3c5b | |
parent | 0c53de52deb1328d4d45ab3dfc742acb6386a369 (diff) |
Previous code needed Encoding to be at the start of it's own line
The file at issue #1422 doesn't have that and most of the other
renderers show it correctly so we should do the same.
Also the code should be easier to understand now, and probably
faster since we're not copying string around like before
-rw-r--r-- | fofi/FoFiType1.cc | 295 | ||||
-rw-r--r-- | fofi/FoFiType1.h | 12 | ||||
-rw-r--r-- | poppler/GfxFont.cc | 9 |
3 files changed, 120 insertions, 196 deletions
diff --git a/fofi/FoFiType1.cc b/fofi/FoFiType1.cc index 6d972232..eaa74143 100644 --- a/fofi/FoFiType1.cc +++ b/fofi/FoFiType1.cc @@ -13,7 +13,7 @@ // All changes made under the Poppler project to this file are licensed // under GPL version 2 or later // -// Copyright (C) 2005, 2008, 2010, 2018, 2021, 2022 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2005, 2008, 2010, 2018, 2021-2023 Albert Astals Cid <aacid@kde.org> // Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com> // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net> // Copyright (C) 2014 Carlos Garcia Campos <carlosgc@gnome.org> @@ -28,6 +28,9 @@ #include <config.h> +#include <charconv> +#include <optional> + #include <cstdlib> #include <cstring> #include <climits> @@ -49,34 +52,22 @@ FoFiType1 *FoFiType1::make(const unsigned char *fileA, int lenA) FoFiType1::FoFiType1(const unsigned char *fileA, int lenA, bool freeFileDataA) : FoFiBase(fileA, lenA, freeFileDataA) { - name = nullptr; encoding = nullptr; - fontMatrix[0] = 0.001; - fontMatrix[1] = 0; - fontMatrix[2] = 0; - fontMatrix[3] = 0.001; - fontMatrix[4] = 0; - fontMatrix[5] = 0; parsed = false; undoPFB(); } FoFiType1::~FoFiType1() { - int i; - - if (name) { - gfree(name); - } if (encoding && encoding != fofiType1StandardEncoding) { - for (i = 0; i < 256; ++i) { + for (int i = 0; i < 256; ++i) { gfree(encoding[i]); } gfree(encoding); } } -const char *FoFiType1::getName() +std::string FoFiType1::getName() { if (!parsed) { parse(); @@ -92,18 +83,6 @@ char **FoFiType1::getEncoding() return encoding; } -void FoFiType1::getFontMatrix(double *mat) -{ - int i; - - if (!parsed) { - parse(); - } - for (i = 0; i < 6; ++i) { - mat[i] = fontMatrix[i]; - } -} - void FoFiType1::writeEncoded(const char **newEncoding, FoFiOutputFunc outputFunc, void *outputStream) const { char buf[512]; @@ -197,178 +176,126 @@ char *FoFiType1::getNextLine(char *line) const return line; } +static const char tokenSeparators[] = " \t\n\r"; + +class FoFiType1Tokenizer +{ +public: + explicit FoFiType1Tokenizer(std::string_view &&stringViewA) : stringView(stringViewA) { } + + std::optional<std::string_view> getToken() + { + const auto length = stringView.length(); + if (currentPos >= length) { + return {}; + } + + std::string_view::size_type pos = stringView.find_first_of(tokenSeparators, currentPos); + while (pos == currentPos) { + // skip multiple contiguous separators + ++currentPos; + pos = stringView.find_first_of(tokenSeparators, currentPos); + } + if (pos == std::string_view::npos) { + std::string_view token = stringView.substr(currentPos, length - currentPos); + currentPos = length; + return token; + } + + std::string_view token = stringView.substr(currentPos, pos - currentPos); + + currentPos = pos + 1; + + return token; + } + +private: + std::string_view::size_type currentPos = 0; + const std::string_view stringView; +}; + void FoFiType1::parse() { - char *line, *line1, *firstLine, *p, *p2; - char buf[256]; - char c; - int n, code, base, i, j; - char *tokptr; - bool gotMatrix, continueLine; - - gotMatrix = false; - for (i = 1, line = (char *)file; i <= 100 && line && (!name || !encoding || !gotMatrix); ++i) { - - // get font name - if (!name && (line + 9 <= (char *)file + len) && !strncmp(line, "/FontName", 9)) { - const auto availableFile = (char *)file + len - line; - const int lineLen = static_cast<int>(availableFile < 255 ? availableFile : 255); - strncpy(buf, line, lineLen); - buf[lineLen] = '\0'; - if ((p = strchr(buf + 9, '/')) && (p = strtok_r(p + 1, " \t\n\r", &tokptr))) { - name = copyString(p); + FoFiType1Tokenizer tokenizer(std::string_view(reinterpret_cast<const char *>(file), len)); + while (name.empty() || !encoding) { + const std::optional<std::string_view> token = tokenizer.getToken(); + + if (!token) { + break; + } + + if (name.empty() && token == "/FontName") { + const std::optional<std::string_view> fontNameToken = tokenizer.getToken(); + if (!fontNameToken) { + break; } - line = getNextLine(line); - - // get encoding - } else if (!encoding && (line + 9 <= (char *)file + len) && !strncmp(line, "/Encoding", 9)) { - line = line + 9; - const auto availableFile = (char *)file + len - line; - const int lineLen = static_cast<int>(availableFile < 255 ? availableFile : 255); - strncpy(buf, line, lineLen); - buf[lineLen] = '\0'; - p = strtok_r(buf, " \t\n\r", &tokptr); - if (p && (p + 3 <= (char *)buf + lineLen) && !strncmp(p, "256", 3)) { - p = strtok_r(nullptr, " \t\n\r", &tokptr); - if (p && (p + 5 <= (char *)buf + lineLen) && !strncmp(p, "array", 5)) { - encoding = (char **)gmallocn(256, sizeof(char *)); - for (j = 0; j < 256; ++j) { - encoding[j] = nullptr; + + // Skip the / + name = fontNameToken->substr(1); + + } else if (!encoding && token == "/Encoding") { + const std::optional<std::string_view> token2 = tokenizer.getToken(); + if (!token2) { + break; + } + + const std::optional<std::string_view> token3 = tokenizer.getToken(); + if (!token3) { + break; + } + + if (token2 == "StandardEncoding" && token3 == "def") { + encoding = (char **)fofiType1StandardEncoding; + } else if (token2 == "256" && token3 == "array") { + encoding = (char **)gmallocn(256, sizeof(char *)); + for (int j = 0; j < 256; ++j) { + encoding[j] = nullptr; + } + + while (true) { + const std::optional<std::string_view> encodingToken = tokenizer.getToken(); + if (!encodingToken) { + break; } - continueLine = false; - for (j = 0, line = getNextLine(line); j < 1200 && line && (line1 = getNextLine(line)); ++j, line = line1) { - if ((n = (int)(line1 - line)) > 255) { - error(errSyntaxWarning, -1, "FoFiType1::parse a line has more than 255 characters, we don't support this"); - n = 255; - } - if (continueLine) { - continueLine = false; - if ((line1 - firstLine) + 1 > (int)sizeof(buf)) { - break; - } - p = firstLine; - p2 = buf; - while (p < line1) { - if (*p == '\n' || *p == '\r') { - *p2++ = ' '; - p++; - } else { - *p2++ = *p++; - } - } - *p2 = '\0'; - } else { - firstLine = line; - strncpy(buf, line, n); - buf[n] = '\0'; - } - for (p = buf; *p == ' ' || *p == '\t'; ++p) { - ; + + if (encodingToken == "dup") { + std::optional<std::string_view> codeToken = tokenizer.getToken(); + if (!codeToken) { + break; } - if (!strncmp(p, "dup", 3)) { - while (true) { - p += 3; - for (; *p == ' ' || *p == '\t'; ++p) { - ; - } - code = 0; - if (*p == '8' && p[1] == '#') { - base = 8; - p += 2; - } else if (*p >= '0' && *p <= '9') { - base = 10; - } else if (*p == '\n' || *p == '\r') { - continueLine = true; - break; - } else { - break; - } - for (; *p >= '0' && *p < '0' + base && code < INT_MAX / (base + (*p - '0')); ++p) { - code = code * base + (*p - '0'); - } - for (; *p == ' ' || *p == '\t'; ++p) { - ; - } - if (*p == '\n' || *p == '\r' || *p == '\0') { - continueLine = true; - break; - } else if (*p != '/') { - break; - } - ++p; - for (p2 = p; *p2 && *p2 != ' ' && *p2 != '\t'; ++p2) { - ; - } - if (code >= 0 && code < 256) { - c = *p2; - *p2 = '\0'; - gfree(encoding[code]); - encoding[code] = copyString(p); - *p2 = c; - } - for (p = p2; *p == ' ' || *p == '\t'; ++p) { - ; - } - if (*p == '\n' || *p == '\r') { - continueLine = true; - break; - } - if (strncmp(p, "put", 3)) { - break; - } - for (p += 3; *p == ' ' || *p == '\t'; ++p) { - ; - } - if (strncmp(p, "dup", 3)) { - break; - } - } + + std::optional<std::string_view> nameToken; + // Sometimes font data has code and name together without spacing i.e. 33/exclam + // if that happens don't call getToken again and just split codeToken in 2 + const auto slashPositionInCodeToken = codeToken->find('/'); + if (slashPositionInCodeToken != std::string_view::npos) { + nameToken = codeToken->substr(slashPositionInCodeToken, codeToken->length() - slashPositionInCodeToken); + codeToken = codeToken->substr(0, slashPositionInCodeToken); } else { - if (strtok_r(buf, " \t", &tokptr) && (p = strtok_r(nullptr, " \t\n\r", &tokptr)) && !strcmp(p, "def")) { - break; - } + nameToken = tokenizer.getToken(); } - bool allEncodingSet = true; - for (int k = 0; allEncodingSet && k < 256; ++k) { - allEncodingSet = encoding[k] != nullptr; - } - if (allEncodingSet) { + if (!nameToken) { break; } - } - //~ check for getinterval/putinterval junk - } - } else if (p && (p + 16 <= (char *)buf + lineLen) && !strncmp(p, "StandardEncoding", 16)) { - p = strtok_r(nullptr, " \t\n\r", &tokptr); - if (p && (p + 3 <= (char *)buf + lineLen) && !strncmp(p, "def", 3)) { - encoding = (char **)fofiType1StandardEncoding; - } - } else { - line = getNextLine(line); - } - } else if (!gotMatrix && (line + 11 <= (char *)file + len) && !strncmp(line, "/FontMatrix", 11)) { - const auto availableFile = (char *)file + len - (line + 11); - const int bufLen = static_cast<int>(availableFile < 255 ? availableFile : 255); - strncpy(buf, line + 11, bufLen); - buf[bufLen] = '\0'; - if ((p = strchr(buf, '['))) { - ++p; - if ((p2 = strchr(p, ']'))) { - *p2 = '\0'; - for (j = 0; j < 6; ++j) { - if ((p = strtok_r(j == 0 ? p : nullptr, " \t\n\r", &tokptr))) { - fontMatrix[j] = atof(p); + + int code = 0; + if (codeToken->length() > 2 && codeToken->at(0) == '8' && codeToken->at(1) == '#') { + std::from_chars(codeToken->data() + 2, codeToken->data() + codeToken->length(), code, 8); } else { - break; + std::from_chars(codeToken->data(), codeToken->data() + codeToken->length(), code); + } + + if (code >= 0 && code < 256 && nameToken->length() > 1) { + gfree(encoding[code]); + encoding[code] = copyString(nameToken->data() + 1, nameToken->length() - 1); } + + } else if (encodingToken == "def") { + break; } } } - gotMatrix = true; - - } else { - line = getNextLine(line); } } diff --git a/fofi/FoFiType1.h b/fofi/FoFiType1.h index 1bb64e4e..b3cc91e8 100644 --- a/fofi/FoFiType1.h +++ b/fofi/FoFiType1.h @@ -13,7 +13,7 @@ // All changes made under the Poppler project to this file are licensed // under GPL version 2 or later // -// Copyright (C) 2018, 2022 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2018, 2022, 2023 Albert Astals Cid <aacid@kde.org> // Copyright (C) 2022 Oliver Sander <oliver.sander@tu-dresden.de> // // To see a description of the changes please see the Changelog file that @@ -26,6 +26,8 @@ #include "FoFiBase.h" +#include <string> + //------------------------------------------------------------------------ // FoFiType1 //------------------------------------------------------------------------ @@ -39,15 +41,12 @@ public: ~FoFiType1() override; // Return the font name. - const char *getName(); + std::string getName(); // Return the encoding, as an array of 256 names (any of which may // be NULL). char **getEncoding(); - // Return the font matrix as an array of six numbers. - void getFontMatrix(double *mat); - // Write a version of the Type 1 font file with a new encoding. void writeEncoded(const char **newEncoding, FoFiOutputFunc outputFunc, void *outputStream) const; @@ -58,9 +57,8 @@ private: void parse(); void undoPFB(); - char *name; + std::string name; char **encoding; - double fontMatrix[6]; bool parsed; }; diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc index a75eae4c..21e8693d 100644 --- a/poppler/GfxFont.cc +++ b/poppler/GfxFont.cc @@ -1140,11 +1140,10 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, std::optional<st const std::optional<std::vector<unsigned char>> buf = readEmbFontFile(xref); if (buf) { if ((ffT1 = FoFiType1::make(buf->data(), buf->size()))) { - if (ffT1->getName()) { - if (embFontName) { - delete embFontName; - } - embFontName = new GooString(ffT1->getName()); + const std::string fontName = ffT1->getName(); + if (!fontName.empty()) { + delete embFontName; + embFontName = new GooString(fontName); } if (!baseEnc) { baseEnc = (const char **)ffT1->getEncoding(); |