summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiklos Vajna <vmiklos@collabora.com>2021-05-12 10:51:09 +0200
committerAndras Timar <andras.timar@collabora.com>2021-05-27 11:09:27 +0200
commit5f00200b8d6cb095156b5ea963a8cad3bbd918a6 (patch)
treeeb850e3e948d81e4bb5b9aed38cee8fd877438c6
parent8def73a66021cccb241bf4916ef051932bd5730c (diff)
vcl PDF tokenizer: fix EOF position when \r is not followed by \n
Otherwise this would break partial tokenize when we only read a trailer in the middle of the file: m_aEOFs.back() is one byte larger than rStream.Tell(), so we reader past the end of the trailer, resulting in a tokenize failure. What's special about the bugdoc: - it has 2 xrefs, the first is incomplete, and refers to a second which is later in the file - the object length is as indirect object, triggering an xref lookup - the first EOF is followed by a \r, but then not with a \n This results in reading past the end of the first trailer and then triggering a lookup failure. FWIW, pdfium does the same in <https://pdfium.googlesource.com/pdfium/+/59d107323f6727bbd5f8a4d0843081790638a1dd/core/fpdfapi/parser/cpdf_syntax_parser.cpp#446>, we're on in sync with it. (cherry picked from commit 6b1d5bafdc722d07d3dc4980764275a6caa707ba) Conflicts: vcl/qa/cppunit/filter/ipdf/ipdf.cxx Change-Id: Ia556a25e333b5e4f1418d92a98d74358862120e2 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/115537 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com> Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
-rw-r--r--vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf69
-rw-r--r--vcl/qa/cppunit/filter/ipdf/ipdf.cxx19
-rw-r--r--vcl/source/filter/ipdf/pdfdocument.cxx7
3 files changed, 94 insertions, 1 deletions
diff --git a/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf b/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf
new file mode 100644
index 000000000000..6f1ad86f5c99
--- /dev/null
+++ b/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf
@@ -0,0 +1,69 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+ /Type /Catalog
+ /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+ /Type /Pages
+ /MediaBox [0 0 200 300]
+ /Count 1
+ /Kids [3 0 R]
+>>
+endobj
+3 0 obj <<
+ /Type /Page
+ /Parent 2 0 R
+ /Contents 4 0 R
+>>
+endobj
+4 0 obj <<
+ /Length 4
+>>
+stream
+q
+Q
+endstream
+endobj
+xref
+0 5
+0000000000 65535 f
+0000000015 00000 n
+0000000068 00000 n
+0000000157 00000 n
+0000000226 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 5
+ /Prev 541
+>>
+startxref
+280
+%%EOF %%TEST
+4 0 obj <<
+ /Length 5 0 R
+>>
+stream
+q
+Q
+endstream
+endobj
+5 0 obj
+4
+endobj
+xref
+0 6
+0000000000 65535 f
+0000000015 00000 n
+0000000068 00000 n
+0000000157 00000 n
+0000000466 00000 n
+0000000524 00000 n
+trailer <<
+ /Root 1 0 R
+ /Size 6
+>>
+startxref
+280
+%%EOF
diff --git a/vcl/qa/cppunit/filter/ipdf/ipdf.cxx b/vcl/qa/cppunit/filter/ipdf/ipdf.cxx
index 5055e36a922e..3307db5c9743 100644
--- a/vcl/qa/cppunit/filter/ipdf/ipdf.cxx
+++ b/vcl/qa/cppunit/filter/ipdf/ipdf.cxx
@@ -168,6 +168,25 @@ CPPUNIT_TEST_FIXTURE(VclFilterIpdfTest, testDictArrayDict)
CPPUNIT_ASSERT(pKey);
}
+CPPUNIT_TEST_FIXTURE(VclFilterIpdfTest, testCommentEnd)
+{
+ // Load the test document:
+ // - it has two xrefs
+ // - second xref has an updated page content object with an indirect length
+ // - last startxref refers to the first xref
+ // - first xref has a /Prev to the second xref
+ // - first xref is terminated by a \r, which is not followed by a newline
+ // this means that if reading doesn't stop at the end of the first xref, then we'll try to look
+ // up the offset of the length object, which we don't yet have
+ OUString aSourceURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "comment-end.pdf";
+ SvFileStream aFile(aSourceURL, StreamMode::READ);
+ vcl::filter::PDFDocument aDocument;
+
+ // Without the accompanying fix in place, this test would have failed, because Tokenize() didn't
+ // stop at the end of the first xref.
+ CPPUNIT_ASSERT(aDocument.Read(aFile));
+}
+
CPPUNIT_PLUGIN_IMPLEMENT();
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index 64cf9dc4ef90..8715000f1627 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -2145,9 +2145,14 @@ bool PDFCommentElement::Read(SvStream& rStream)
sal_uInt64 nPos = rStream.Tell();
if (ch == '\r')
{
+ rStream.ReadChar(ch);
+ rStream.SeekRel(-1);
// If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
// behavior.
- nPos += 1;
+ if (ch == '\n')
+ {
+ nPos += 1;
+ }
}
m_rDoc.PushBackEOF(nPos);
}