From 1e0ee8141207a425b56592c136ac5e94fc821173 Mon Sep 17 00:00:00 2001 From: Miklos Vajna Date: Wed, 12 May 2021 10:51:09 +0200 Subject: [PATCH] vcl PDF tokenizer: fix EOF position when \r is not followed by \n MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Otherwise this would break partial tokenize when we only read a trailer in the middle of the file: m_aEOFs.back() is one byte larger than rStream.Tell(), so we reader past the end of the trailer, resulting in a tokenize failure. What's special about the bugdoc: - it has 2 xrefs, the first is incomplete, and refers to a second which is later in the file - the object length is as indirect object, triggering an xref lookup - the first EOF is followed by a \r, but then not with a \n This results in reading past the end of the first trailer and then triggering a lookup failure. FWIW, pdfium does the same in , we're on in sync with it. (cherry picked from commit 6b1d5bafdc722d07d3dc4980764275a6caa707ba) Conflicts: vcl/qa/cppunit/filter/ipdf/ipdf.cxx Change-Id: Ia556a25e333b5e4f1418d92a98d74358862120e2 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/115537 Tested-by: Jenkins CollaboraOffice Reviewed-by: Tomaž Vajngerl --- vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf | 69 +++++++++++++++++++++++++ vcl/qa/cppunit/filter/ipdf/ipdf.cxx | 19 +++++++ vcl/source/filter/ipdf/pdfdocument.cxx | 7 ++- 3 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf diff --git a/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf b/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf new file mode 100644 index 000000000000..6f1ad86f5c99 --- /dev/null +++ b/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf @@ -0,0 +1,69 @@ +%PDF-1.7 +% ò¤ô +1 0 obj << + /Type /Catalog + /Pages 2 0 R +>> +endobj +2 0 obj << + /Type /Pages + /MediaBox [0 0 200 300] + /Count 1 + /Kids [3 0 R] +>> +endobj +3 0 obj << + /Type /Page + /Parent 2 0 R + /Contents 4 0 R +>> +endobj +4 0 obj << + /Length 4 +>> +stream +q +Q +endstream +endobj +xref +0 5 +0000000000 65535 f +0000000015 00000 n +0000000068 00000 n +0000000157 00000 n +0000000226 00000 n +trailer << + /Root 1 0 R + /Size 5 + /Prev 541 +>> +startxref +280 +%%EOF %%TEST +4 0 obj << + /Length 5 0 R +>> +stream +q +Q +endstream +endobj +5 0 obj +4 +endobj +xref +0 6 +0000000000 65535 f +0000000015 00000 n +0000000068 00000 n +0000000157 00000 n +0000000466 00000 n +0000000524 00000 n +trailer << + /Root 1 0 R + /Size 6 +>> +startxref +280 +%%EOF diff --git a/vcl/qa/cppunit/filter/ipdf/ipdf.cxx b/vcl/qa/cppunit/filter/ipdf/ipdf.cxx index 5055e36a922e..3307db5c9743 100644 --- a/vcl/qa/cppunit/filter/ipdf/ipdf.cxx +++ b/vcl/qa/cppunit/filter/ipdf/ipdf.cxx @@ -168,6 +168,25 @@ CPPUNIT_TEST_FIXTURE(VclFilterIpdfTest, testDictArrayDict) CPPUNIT_ASSERT(pKey); } +CPPUNIT_TEST_FIXTURE(VclFilterIpdfTest, testCommentEnd) +{ + // Load the test document: + // - it has two xrefs + // - second xref has an updated page content object with an indirect length + // - last startxref refers to the first xref + // - first xref has a /Prev to the second xref + // - first xref is terminated by a \r, which is not followed by a newline + // this means that if reading doesn't stop at the end of the first xref, then we'll try to look + // up the offset of the length object, which we don't yet have + OUString aSourceURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "comment-end.pdf"; + SvFileStream aFile(aSourceURL, StreamMode::READ); + vcl::filter::PDFDocument aDocument; + + // Without the accompanying fix in place, this test would have failed, because Tokenize() didn't + // stop at the end of the first xref. + CPPUNIT_ASSERT(aDocument.Read(aFile)); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx index 64cf9dc4ef90..8715000f1627 100644 --- a/vcl/source/filter/ipdf/pdfdocument.cxx +++ b/vcl/source/filter/ipdf/pdfdocument.cxx @@ -2145,9 +2145,14 @@ bool PDFCommentElement::Read(SvStream& rStream) sal_uInt64 nPos = rStream.Tell(); if (ch == '\r') { + rStream.ReadChar(ch); + rStream.SeekRel(-1); // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat // behavior. - nPos += 1; + if (ch == '\n') + { + nPos += 1; + } } m_rDoc.PushBackEOF(nPos); } -- 2.11.4.GIT