loplugin:constmethod in vcl
[LibreOffice.git] / vcl / source / filter / ipdf / pdfdocument.cxx
blob1aec86c9ee2ffdfae1d9690b624a5668cb0c95fb
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <vcl/filter/pdfdocument.hxx>
12 #include <map>
13 #include <memory>
14 #include <vector>
16 #include <com/sun/star/uno/Sequence.hxx>
17 #include <com/sun/star/security/XCertificate.hpp>
19 #include <comphelper/scopeguard.hxx>
20 #include <comphelper/string.hxx>
21 #include <rtl/character.hxx>
22 #include <rtl/strbuf.hxx>
23 #include <rtl/string.hxx>
24 #include <sal/log.hxx>
25 #include <sal/types.h>
26 #include <svl/cryptosign.hxx>
27 #include <tools/zcodec.hxx>
28 #include <vcl/pdfwriter.hxx>
30 using namespace com::sun::star;
32 namespace vcl
34 namespace filter
36 const int MAX_SIGNATURE_CONTENT_LENGTH = 50000;
38 class PDFTrailerElement;
40 /// A one-liner comment.
41 class PDFCommentElement : public PDFElement
43 PDFDocument& m_rDoc;
44 OString m_aComment;
46 public:
47 explicit PDFCommentElement(PDFDocument& rDoc);
48 bool Read(SvStream& rStream) override;
51 class PDFReferenceElement;
53 /// End of a dictionary: '>>'.
54 class PDFEndDictionaryElement : public PDFElement
56 /// Offset before the '>>' token.
57 sal_uInt64 m_nLocation = 0;
59 public:
60 PDFEndDictionaryElement();
61 bool Read(SvStream& rStream) override;
62 sal_uInt64 GetLocation() const;
65 /// End of a stream: 'endstream' keyword.
66 class PDFEndStreamElement : public PDFElement
68 public:
69 bool Read(SvStream& rStream) override;
72 /// End of an object: 'endobj' keyword.
73 class PDFEndObjectElement : public PDFElement
75 public:
76 bool Read(SvStream& rStream) override;
79 /// End of an array: ']'.
80 class PDFEndArrayElement : public PDFElement
82 /// Location before the ']' token.
83 sal_uInt64 m_nOffset = 0;
85 public:
86 PDFEndArrayElement();
87 bool Read(SvStream& rStream) override;
88 sal_uInt64 GetOffset() const;
91 /// Boolean object: a 'true' or a 'false'.
92 class PDFBooleanElement : public PDFElement
94 public:
95 explicit PDFBooleanElement(bool bValue);
96 bool Read(SvStream& rStream) override;
99 /// Null object: the 'null' singleton.
100 class PDFNullElement : public PDFElement
102 public:
103 bool Read(SvStream& rStream) override;
106 /// The trailer singleton is at the end of the doc.
107 class PDFTrailerElement : public PDFElement
109 PDFDocument& m_rDoc;
110 std::map<OString, PDFElement*> m_aDictionary;
111 /// Location of the end of the trailer token.
112 sal_uInt64 m_nOffset = 0;
114 public:
115 explicit PDFTrailerElement(PDFDocument& rDoc);
116 bool Read(SvStream& rStream) override;
117 PDFElement* Lookup(const OString& rDictionaryKey);
118 sal_uInt64 GetLocation() const;
121 XRefEntry::XRefEntry() = default;
123 PDFDocument::PDFDocument() = default;
125 bool PDFDocument::RemoveSignature(size_t nPosition)
127 std::vector<PDFObjectElement*> aSignatures = GetSignatureWidgets();
128 if (nPosition >= aSignatures.size())
130 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: invalid nPosition");
131 return false;
134 if (aSignatures.size() != m_aEOFs.size() - 1)
136 SAL_WARN("vcl.filter", "PDFDocument::RemoveSignature: no 1:1 mapping between signatures "
137 "and incremental updates");
138 return false;
141 // The EOF offset is the end of the original file, without the signature at
142 // nPosition.
143 m_aEditBuffer.Seek(m_aEOFs[nPosition]);
144 // Drop all bytes after the current position.
145 m_aEditBuffer.SetStreamSize(m_aEditBuffer.Tell() + 1);
147 return m_aEditBuffer.good();
150 sal_uInt32 PDFDocument::GetNextSignature()
152 sal_uInt32 nRet = 0;
153 for (const auto& pSignature : GetSignatureWidgets())
155 auto pT = dynamic_cast<PDFLiteralStringElement*>(pSignature->Lookup("T"));
156 if (!pT)
157 continue;
159 const OString& rValue = pT->GetValue();
160 const OString aPrefix = "Signature";
161 if (!rValue.startsWith(aPrefix))
162 continue;
164 nRet = std::max(nRet, rValue.copy(aPrefix.getLength()).toUInt32());
167 return nRet + 1;
170 sal_Int32 PDFDocument::WriteSignatureObject(const OUString& rDescription, bool bAdES,
171 sal_uInt64& rLastByteRangeOffset,
172 sal_Int64& rContentOffset)
174 // Write signature object.
175 sal_Int32 nSignatureId = m_aXRef.size();
176 XRefEntry aSignatureEntry;
177 aSignatureEntry.SetOffset(m_aEditBuffer.Tell());
178 aSignatureEntry.SetDirty(true);
179 m_aXRef[nSignatureId] = aSignatureEntry;
180 OStringBuffer aSigBuffer;
181 aSigBuffer.append(nSignatureId);
182 aSigBuffer.append(" 0 obj\n");
183 aSigBuffer.append("<</Contents <");
184 rContentOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
185 // Reserve space for the PKCS#7 object.
186 OStringBuffer aContentFiller(MAX_SIGNATURE_CONTENT_LENGTH);
187 comphelper::string::padToLength(aContentFiller, MAX_SIGNATURE_CONTENT_LENGTH, '0');
188 aSigBuffer.append(aContentFiller.makeStringAndClear());
189 aSigBuffer.append(">\n/Type/Sig/SubFilter");
190 if (bAdES)
191 aSigBuffer.append("/ETSI.CAdES.detached");
192 else
193 aSigBuffer.append("/adbe.pkcs7.detached");
195 // Time of signing.
196 aSigBuffer.append(" /M (");
197 aSigBuffer.append(vcl::PDFWriter::GetDateTime());
198 aSigBuffer.append(")");
200 // Byte range: we can write offset1-length1 and offset2 right now, will
201 // write length2 later.
202 aSigBuffer.append(" /ByteRange [ 0 ");
203 // -1 and +1 is the leading "<" and the trailing ">" around the hex string.
204 aSigBuffer.append(rContentOffset - 1);
205 aSigBuffer.append(" ");
206 aSigBuffer.append(rContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
207 aSigBuffer.append(" ");
208 rLastByteRangeOffset = aSignatureEntry.GetOffset() + aSigBuffer.getLength();
209 // We don't know how many bytes we need for the last ByteRange value, this
210 // should be enough.
211 OStringBuffer aByteRangeFiller;
212 comphelper::string::padToLength(aByteRangeFiller, 100, ' ');
213 aSigBuffer.append(aByteRangeFiller.makeStringAndClear());
214 // Finish the Sig obj.
215 aSigBuffer.append(" /Filter/Adobe.PPKMS");
217 if (!rDescription.isEmpty())
219 aSigBuffer.append("/Reason<");
220 vcl::PDFWriter::AppendUnicodeTextString(rDescription, aSigBuffer);
221 aSigBuffer.append(">");
224 aSigBuffer.append(" >>\nendobj\n\n");
225 m_aEditBuffer.WriteOString(aSigBuffer.toString());
227 return nSignatureId;
230 sal_Int32 PDFDocument::WriteAppearanceObject()
232 // Write appearance object.
233 sal_Int32 nAppearanceId = m_aXRef.size();
234 XRefEntry aAppearanceEntry;
235 aAppearanceEntry.SetOffset(m_aEditBuffer.Tell());
236 aAppearanceEntry.SetDirty(true);
237 m_aXRef[nAppearanceId] = aAppearanceEntry;
238 m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
239 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
240 m_aEditBuffer.WriteCharPtr("<</Type/XObject\n/Subtype/Form\n");
241 m_aEditBuffer.WriteCharPtr("/BBox[0 0 0 0]\n/Length 0\n>>\n");
242 m_aEditBuffer.WriteCharPtr("stream\n\nendstream\nendobj\n\n");
244 return nAppearanceId;
247 sal_Int32 PDFDocument::WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
248 sal_Int32 nAppearanceId)
250 // Decide what identifier to use for the new signature.
251 sal_uInt32 nNextSignature = GetNextSignature();
253 // Write the Annot object, references nSignatureId and nAppearanceId.
254 sal_Int32 nAnnotId = m_aXRef.size();
255 XRefEntry aAnnotEntry;
256 aAnnotEntry.SetOffset(m_aEditBuffer.Tell());
257 aAnnotEntry.SetDirty(true);
258 m_aXRef[nAnnotId] = aAnnotEntry;
259 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
260 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
261 m_aEditBuffer.WriteCharPtr("<</Type/Annot/Subtype/Widget/F 132\n");
262 m_aEditBuffer.WriteCharPtr("/Rect[0 0 0 0]\n");
263 m_aEditBuffer.WriteCharPtr("/FT/Sig\n");
264 m_aEditBuffer.WriteCharPtr("/P ");
265 m_aEditBuffer.WriteUInt32AsString(rFirstPage.GetObjectValue());
266 m_aEditBuffer.WriteCharPtr(" 0 R\n");
267 m_aEditBuffer.WriteCharPtr("/T(Signature");
268 m_aEditBuffer.WriteUInt32AsString(nNextSignature);
269 m_aEditBuffer.WriteCharPtr(")\n");
270 m_aEditBuffer.WriteCharPtr("/V ");
271 m_aEditBuffer.WriteUInt32AsString(nSignatureId);
272 m_aEditBuffer.WriteCharPtr(" 0 R\n");
273 m_aEditBuffer.WriteCharPtr("/DV ");
274 m_aEditBuffer.WriteUInt32AsString(nSignatureId);
275 m_aEditBuffer.WriteCharPtr(" 0 R\n");
276 m_aEditBuffer.WriteCharPtr("/AP<<\n/N ");
277 m_aEditBuffer.WriteUInt32AsString(nAppearanceId);
278 m_aEditBuffer.WriteCharPtr(" 0 R\n>>\n");
279 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
281 return nAnnotId;
284 bool PDFDocument::WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId)
286 PDFElement* pAnnots = rFirstPage.Lookup("Annots");
287 auto pAnnotsReference = dynamic_cast<PDFReferenceElement*>(pAnnots);
288 if (pAnnotsReference)
290 // Write the updated Annots key of the Page object.
291 PDFObjectElement* pAnnotsObject = pAnnotsReference->LookupObject();
292 if (!pAnnotsObject)
294 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid Annots reference");
295 return false;
298 sal_uInt32 nAnnotsId = pAnnotsObject->GetObjectValue();
299 m_aXRef[nAnnotsId].SetType(XRefEntryType::NOT_COMPRESSED);
300 m_aXRef[nAnnotsId].SetOffset(m_aEditBuffer.Tell());
301 m_aXRef[nAnnotsId].SetDirty(true);
302 m_aEditBuffer.WriteUInt32AsString(nAnnotsId);
303 m_aEditBuffer.WriteCharPtr(" 0 obj\n[");
305 // Write existing references.
306 PDFArrayElement* pArray = pAnnotsObject->GetArray();
307 if (!pArray)
309 SAL_WARN("vcl.filter", "PDFDocument::Sign: Page Annots is a reference to a non-array");
310 return false;
313 for (size_t i = 0; i < pArray->GetElements().size(); ++i)
315 auto pReference = dynamic_cast<PDFReferenceElement*>(pArray->GetElements()[i]);
316 if (!pReference)
317 continue;
319 if (i)
320 m_aEditBuffer.WriteCharPtr(" ");
321 m_aEditBuffer.WriteUInt32AsString(pReference->GetObjectValue());
322 m_aEditBuffer.WriteCharPtr(" 0 R");
324 // Write our reference.
325 m_aEditBuffer.WriteCharPtr(" ");
326 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
327 m_aEditBuffer.WriteCharPtr(" 0 R");
329 m_aEditBuffer.WriteCharPtr("]\nendobj\n\n");
331 else
333 // Write the updated first page object, references nAnnotId.
334 sal_uInt32 nFirstPageId = rFirstPage.GetObjectValue();
335 if (nFirstPageId >= m_aXRef.size())
337 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid first page obj id");
338 return false;
340 m_aXRef[nFirstPageId].SetOffset(m_aEditBuffer.Tell());
341 m_aXRef[nFirstPageId].SetDirty(true);
342 m_aEditBuffer.WriteUInt32AsString(nFirstPageId);
343 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
344 m_aEditBuffer.WriteCharPtr("<<");
345 auto pAnnotsArray = dynamic_cast<PDFArrayElement*>(pAnnots);
346 if (!pAnnotsArray)
348 // No Annots key, just write the key with a single reference.
349 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
350 + rFirstPage.GetDictionaryOffset(),
351 rFirstPage.GetDictionaryLength());
352 m_aEditBuffer.WriteCharPtr("/Annots[");
353 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
354 m_aEditBuffer.WriteCharPtr(" 0 R]");
356 else
358 // Annots key is already there, insert our reference at the end.
359 PDFDictionaryElement* pDictionary = rFirstPage.GetDictionary();
361 // Offset right before the end of the Annots array.
362 sal_uInt64 nAnnotsEndOffset = pDictionary->GetKeyOffset("Annots")
363 + pDictionary->GetKeyValueLength("Annots") - 1;
364 // Length of beginning of the dictionary -> Annots end.
365 sal_uInt64 nAnnotsBeforeEndLength = nAnnotsEndOffset - rFirstPage.GetDictionaryOffset();
366 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
367 + rFirstPage.GetDictionaryOffset(),
368 nAnnotsBeforeEndLength);
369 m_aEditBuffer.WriteCharPtr(" ");
370 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
371 m_aEditBuffer.WriteCharPtr(" 0 R");
372 // Length of Annots end -> end of the dictionary.
373 sal_uInt64 nAnnotsAfterEndLength = rFirstPage.GetDictionaryOffset()
374 + rFirstPage.GetDictionaryLength()
375 - nAnnotsEndOffset;
376 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
377 + nAnnotsEndOffset,
378 nAnnotsAfterEndLength);
380 m_aEditBuffer.WriteCharPtr(">>");
381 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
384 return true;
387 bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot)
389 if (m_pXRefStream)
390 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
391 else
393 if (!m_pTrailer)
395 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no trailer");
396 return false;
398 pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
400 if (!pRoot)
402 SAL_WARN("vcl.filter", "PDFDocument::Sign: trailer has no root reference");
403 return false;
405 PDFObjectElement* pCatalog = pRoot->LookupObject();
406 if (!pCatalog)
408 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog reference");
409 return false;
411 sal_uInt32 nCatalogId = pCatalog->GetObjectValue();
412 if (nCatalogId >= m_aXRef.size())
414 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid catalog obj id");
415 return false;
417 PDFElement* pAcroForm = pCatalog->Lookup("AcroForm");
418 auto pAcroFormReference = dynamic_cast<PDFReferenceElement*>(pAcroForm);
419 if (pAcroFormReference)
421 // Write the updated AcroForm key of the Catalog object.
422 PDFObjectElement* pAcroFormObject = pAcroFormReference->LookupObject();
423 if (!pAcroFormObject)
425 SAL_WARN("vcl.filter", "PDFDocument::Sign: invalid AcroForm reference");
426 return false;
429 sal_uInt32 nAcroFormId = pAcroFormObject->GetObjectValue();
430 m_aXRef[nAcroFormId].SetType(XRefEntryType::NOT_COMPRESSED);
431 m_aXRef[nAcroFormId].SetOffset(m_aEditBuffer.Tell());
432 m_aXRef[nAcroFormId].SetDirty(true);
433 m_aEditBuffer.WriteUInt32AsString(nAcroFormId);
434 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
436 // If this is nullptr, then the AcroForm object is not in an object stream.
437 SvMemoryStream* pStreamBuffer = pAcroFormObject->GetStreamBuffer();
439 if (!pAcroFormObject->Lookup("Fields"))
441 SAL_WARN("vcl.filter",
442 "PDFDocument::Sign: AcroForm object without required Fields key");
443 return false;
446 PDFDictionaryElement* pAcroFormDictionary = pAcroFormObject->GetDictionary();
447 if (!pAcroFormDictionary)
449 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm object has no dictionary");
450 return false;
453 // Offset right before the end of the Fields array.
454 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
455 + pAcroFormDictionary->GetKeyValueLength("Fields")
456 - strlen("]");
457 // Length of beginning of the object dictionary -> Fields end.
458 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
459 if (pStreamBuffer)
460 m_aEditBuffer.WriteBytes(pStreamBuffer->GetData(), nFieldsBeforeEndLength);
461 else
463 nFieldsBeforeEndLength -= pAcroFormObject->GetDictionaryOffset();
464 m_aEditBuffer.WriteCharPtr("<<");
465 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
466 + pAcroFormObject->GetDictionaryOffset(),
467 nFieldsBeforeEndLength);
470 // Append our reference at the end of the Fields array.
471 m_aEditBuffer.WriteCharPtr(" ");
472 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
473 m_aEditBuffer.WriteCharPtr(" 0 R");
475 // Length of Fields end -> end of the object dictionary.
476 if (pStreamBuffer)
478 sal_uInt64 nFieldsAfterEndLength = pStreamBuffer->GetSize() - nFieldsEndOffset;
479 m_aEditBuffer.WriteBytes(static_cast<const char*>(pStreamBuffer->GetData())
480 + nFieldsEndOffset,
481 nFieldsAfterEndLength);
483 else
485 sal_uInt64 nFieldsAfterEndLength = pAcroFormObject->GetDictionaryOffset()
486 + pAcroFormObject->GetDictionaryLength()
487 - nFieldsEndOffset;
488 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
489 + nFieldsEndOffset,
490 nFieldsAfterEndLength);
491 m_aEditBuffer.WriteCharPtr(">>");
494 m_aEditBuffer.WriteCharPtr("\nendobj\n\n");
496 else
498 // Write the updated Catalog object, references nAnnotId.
499 auto pAcroFormDictionary = dynamic_cast<PDFDictionaryElement*>(pAcroForm);
500 m_aXRef[nCatalogId].SetOffset(m_aEditBuffer.Tell());
501 m_aXRef[nCatalogId].SetDirty(true);
502 m_aEditBuffer.WriteUInt32AsString(nCatalogId);
503 m_aEditBuffer.WriteCharPtr(" 0 obj\n");
504 m_aEditBuffer.WriteCharPtr("<<");
505 if (!pAcroFormDictionary)
507 // No AcroForm key, assume no signatures.
508 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
509 + pCatalog->GetDictionaryOffset(),
510 pCatalog->GetDictionaryLength());
511 m_aEditBuffer.WriteCharPtr("/AcroForm<</Fields[\n");
512 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
513 m_aEditBuffer.WriteCharPtr(" 0 R\n]/SigFlags 3>>\n");
515 else
517 // AcroForm key is already there, insert our reference at the Fields end.
518 auto it = pAcroFormDictionary->GetItems().find("Fields");
519 if (it == pAcroFormDictionary->GetItems().end())
521 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm without required Fields key");
522 return false;
525 auto pFields = dynamic_cast<PDFArrayElement*>(it->second);
526 if (!pFields)
528 SAL_WARN("vcl.filter", "PDFDocument::Sign: AcroForm Fields is not an array");
529 return false;
532 // Offset right before the end of the Fields array.
533 sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
534 + pAcroFormDictionary->GetKeyValueLength("Fields") - 1;
535 // Length of beginning of the Catalog dictionary -> Fields end.
536 sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset - pCatalog->GetDictionaryOffset();
537 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
538 + pCatalog->GetDictionaryOffset(),
539 nFieldsBeforeEndLength);
540 m_aEditBuffer.WriteCharPtr(" ");
541 m_aEditBuffer.WriteUInt32AsString(nAnnotId);
542 m_aEditBuffer.WriteCharPtr(" 0 R");
543 // Length of Fields end -> end of the Catalog dictionary.
544 sal_uInt64 nFieldsAfterEndLength = pCatalog->GetDictionaryOffset()
545 + pCatalog->GetDictionaryLength() - nFieldsEndOffset;
546 m_aEditBuffer.WriteBytes(static_cast<const char*>(m_aEditBuffer.GetData())
547 + nFieldsEndOffset,
548 nFieldsAfterEndLength);
550 m_aEditBuffer.WriteCharPtr(">>\nendobj\n\n");
553 return true;
556 void PDFDocument::WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot)
558 if (m_pXRefStream)
560 // Write the xref stream.
561 // This is a bit meta: the xref stream stores its own offset.
562 sal_Int32 nXRefStreamId = m_aXRef.size();
563 XRefEntry aXRefStreamEntry;
564 aXRefStreamEntry.SetOffset(nXRefOffset);
565 aXRefStreamEntry.SetDirty(true);
566 m_aXRef[nXRefStreamId] = aXRefStreamEntry;
568 // Write stream data.
569 SvMemoryStream aXRefStream;
570 const size_t nOffsetLen = 3;
571 // 3 additional bytes: predictor, the first and the third field.
572 const size_t nLineLength = nOffsetLen + 3;
573 // This is the line as it appears before tweaking according to the predictor.
574 std::vector<unsigned char> aOrigLine(nLineLength);
575 // This is the previous line.
576 std::vector<unsigned char> aPrevLine(nLineLength);
577 // This is the line as written to the stream.
578 std::vector<unsigned char> aFilteredLine(nLineLength);
579 for (const auto& rXRef : m_aXRef)
581 const XRefEntry& rEntry = rXRef.second;
583 if (!rEntry.GetDirty())
584 continue;
586 // Predictor.
587 size_t nPos = 0;
588 // PNG prediction: up (on all rows).
589 aOrigLine[nPos++] = 2;
591 // First field.
592 unsigned char nType = 0;
593 switch (rEntry.GetType())
595 case XRefEntryType::FREE:
596 nType = 0;
597 break;
598 case XRefEntryType::NOT_COMPRESSED:
599 nType = 1;
600 break;
601 case XRefEntryType::COMPRESSED:
602 nType = 2;
603 break;
605 aOrigLine[nPos++] = nType;
607 // Second field.
608 for (size_t i = 0; i < nOffsetLen; ++i)
610 size_t nByte = nOffsetLen - i - 1;
611 // Fields requiring more than one byte are stored with the
612 // high-order byte first.
613 unsigned char nCh = (rEntry.GetOffset() & (0xff << (nByte * 8))) >> (nByte * 8);
614 aOrigLine[nPos++] = nCh;
617 // Third field.
618 aOrigLine[nPos++] = 0;
620 // Now apply the predictor.
621 aFilteredLine[0] = aOrigLine[0];
622 for (size_t i = 1; i < nLineLength; ++i)
624 // Count the delta vs the previous line.
625 aFilteredLine[i] = aOrigLine[i] - aPrevLine[i];
626 // Remember the new reference.
627 aPrevLine[i] = aOrigLine[i];
630 aXRefStream.WriteBytes(aFilteredLine.data(), aFilteredLine.size());
633 m_aEditBuffer.WriteUInt32AsString(nXRefStreamId);
634 m_aEditBuffer.WriteCharPtr(
635 " 0 obj\n<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode");
637 // ID.
638 auto pID = dynamic_cast<PDFArrayElement*>(m_pXRefStream->Lookup("ID"));
639 if (pID)
641 const std::vector<PDFElement*>& rElements = pID->GetElements();
642 m_aEditBuffer.WriteCharPtr("/ID [ <");
643 for (size_t i = 0; i < rElements.size(); ++i)
645 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
646 if (!pIDString)
647 continue;
649 m_aEditBuffer.WriteOString(pIDString->GetValue());
650 if ((i + 1) < rElements.size())
651 m_aEditBuffer.WriteCharPtr("> <");
653 m_aEditBuffer.WriteCharPtr("> ] ");
656 // Index.
657 m_aEditBuffer.WriteCharPtr("/Index [ ");
658 for (const auto& rXRef : m_aXRef)
660 if (!rXRef.second.GetDirty())
661 continue;
663 m_aEditBuffer.WriteUInt32AsString(rXRef.first);
664 m_aEditBuffer.WriteCharPtr(" 1 ");
666 m_aEditBuffer.WriteCharPtr("] ");
668 // Info.
669 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Info"));
670 if (pInfo)
672 m_aEditBuffer.WriteCharPtr("/Info ");
673 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
674 m_aEditBuffer.WriteCharPtr(" ");
675 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
676 m_aEditBuffer.WriteCharPtr(" R ");
679 // Length.
680 m_aEditBuffer.WriteCharPtr("/Length ");
682 ZCodec aZCodec;
683 aZCodec.BeginCompression();
684 aXRefStream.Seek(0);
685 SvMemoryStream aStream;
686 aZCodec.Compress(aXRefStream, aStream);
687 aZCodec.EndCompression();
688 aXRefStream.Seek(0);
689 aXRefStream.SetStreamSize(0);
690 aStream.Seek(0);
691 aXRefStream.WriteStream(aStream);
693 m_aEditBuffer.WriteUInt32AsString(aXRefStream.GetSize());
695 if (!m_aStartXRefs.empty())
697 // Write location of the previous cross-reference section.
698 m_aEditBuffer.WriteCharPtr("/Prev ");
699 m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
702 // Root.
703 m_aEditBuffer.WriteCharPtr("/Root ");
704 m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
705 m_aEditBuffer.WriteCharPtr(" ");
706 m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
707 m_aEditBuffer.WriteCharPtr(" R ");
709 // Size.
710 m_aEditBuffer.WriteCharPtr("/Size ");
711 m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
713 m_aEditBuffer.WriteCharPtr("/Type/XRef/W[1 3 1]>>\nstream\n");
714 aXRefStream.Seek(0);
715 m_aEditBuffer.WriteStream(aXRefStream);
716 m_aEditBuffer.WriteCharPtr("\nendstream\nendobj\n\n");
718 else
720 // Write the xref table.
721 m_aEditBuffer.WriteCharPtr("xref\n");
722 for (const auto& rXRef : m_aXRef)
724 size_t nObject = rXRef.first;
725 size_t nOffset = rXRef.second.GetOffset();
726 if (!rXRef.second.GetDirty())
727 continue;
729 m_aEditBuffer.WriteUInt32AsString(nObject);
730 m_aEditBuffer.WriteCharPtr(" 1\n");
731 OStringBuffer aBuffer;
732 aBuffer.append(static_cast<sal_Int32>(nOffset));
733 while (aBuffer.getLength() < 10)
734 aBuffer.insert(0, "0");
735 if (nObject == 0)
736 aBuffer.append(" 65535 f \n");
737 else
738 aBuffer.append(" 00000 n \n");
739 m_aEditBuffer.WriteOString(aBuffer.toString());
742 // Write the trailer.
743 m_aEditBuffer.WriteCharPtr("trailer\n<</Size ");
744 m_aEditBuffer.WriteUInt32AsString(m_aXRef.size());
745 m_aEditBuffer.WriteCharPtr("/Root ");
746 m_aEditBuffer.WriteUInt32AsString(pRoot->GetObjectValue());
747 m_aEditBuffer.WriteCharPtr(" ");
748 m_aEditBuffer.WriteUInt32AsString(pRoot->GetGenerationValue());
749 m_aEditBuffer.WriteCharPtr(" R\n");
750 auto pInfo = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Info"));
751 if (pInfo)
753 m_aEditBuffer.WriteCharPtr("/Info ");
754 m_aEditBuffer.WriteUInt32AsString(pInfo->GetObjectValue());
755 m_aEditBuffer.WriteCharPtr(" ");
756 m_aEditBuffer.WriteUInt32AsString(pInfo->GetGenerationValue());
757 m_aEditBuffer.WriteCharPtr(" R\n");
759 auto pID = dynamic_cast<PDFArrayElement*>(m_pTrailer->Lookup("ID"));
760 if (pID)
762 const std::vector<PDFElement*>& rElements = pID->GetElements();
763 m_aEditBuffer.WriteCharPtr("/ID [ <");
764 for (size_t i = 0; i < rElements.size(); ++i)
766 auto pIDString = dynamic_cast<PDFHexStringElement*>(rElements[i]);
767 if (!pIDString)
768 continue;
770 m_aEditBuffer.WriteOString(pIDString->GetValue());
771 if ((i + 1) < rElements.size())
772 m_aEditBuffer.WriteCharPtr(">\n<");
774 m_aEditBuffer.WriteCharPtr("> ]\n");
777 if (!m_aStartXRefs.empty())
779 // Write location of the previous cross-reference section.
780 m_aEditBuffer.WriteCharPtr("/Prev ");
781 m_aEditBuffer.WriteUInt32AsString(m_aStartXRefs.back());
784 m_aEditBuffer.WriteCharPtr(">>\n");
788 bool PDFDocument::Sign(const uno::Reference<security::XCertificate>& xCertificate,
789 const OUString& rDescription, bool bAdES)
791 m_aEditBuffer.Seek(STREAM_SEEK_TO_END);
792 m_aEditBuffer.WriteCharPtr("\n");
794 sal_uInt64 nSignatureLastByteRangeOffset = 0;
795 sal_Int64 nSignatureContentOffset = 0;
796 sal_Int32 nSignatureId = WriteSignatureObject(
797 rDescription, bAdES, nSignatureLastByteRangeOffset, nSignatureContentOffset);
799 sal_Int32 nAppearanceId = WriteAppearanceObject();
801 std::vector<PDFObjectElement*> aPages = GetPages();
802 if (aPages.empty() || !aPages[0])
804 SAL_WARN("vcl.filter", "PDFDocument::Sign: found no pages");
805 return false;
808 PDFObjectElement& rFirstPage = *aPages[0];
809 sal_Int32 nAnnotId = WriteAnnotObject(rFirstPage, nSignatureId, nAppearanceId);
811 if (!WritePageObject(rFirstPage, nAnnotId))
813 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Page object");
814 return false;
817 PDFReferenceElement* pRoot = nullptr;
818 if (!WriteCatalogObject(nAnnotId, pRoot))
820 SAL_WARN("vcl.filter", "PDFDocument::Sign: failed to write the updated Catalog object");
821 return false;
824 sal_uInt64 nXRefOffset = m_aEditBuffer.Tell();
825 WriteXRef(nXRefOffset, pRoot);
827 // Write startxref.
828 m_aEditBuffer.WriteCharPtr("startxref\n");
829 m_aEditBuffer.WriteUInt32AsString(nXRefOffset);
830 m_aEditBuffer.WriteCharPtr("\n%%EOF\n");
832 // Finalize the signature, now that we know the total file size.
833 // Calculate the length of the last byte range.
834 sal_uInt64 nFileEnd = m_aEditBuffer.Tell();
835 sal_Int64 nLastByteRangeLength
836 = nFileEnd - (nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
837 // Write the length to the buffer.
838 m_aEditBuffer.Seek(nSignatureLastByteRangeOffset);
839 OStringBuffer aByteRangeBuffer;
840 aByteRangeBuffer.append(nLastByteRangeLength);
841 aByteRangeBuffer.append(" ]");
842 m_aEditBuffer.WriteOString(aByteRangeBuffer.toString());
844 // Create the PKCS#7 object.
845 css::uno::Sequence<sal_Int8> aDerEncoded = xCertificate->getEncoded();
846 if (!aDerEncoded.hasElements())
848 SAL_WARN("vcl.filter", "PDFDocument::Sign: empty certificate");
849 return false;
852 m_aEditBuffer.Seek(0);
853 sal_uInt64 nBufferSize1 = nSignatureContentOffset - 1;
854 std::unique_ptr<char[]> aBuffer1(new char[nBufferSize1]);
855 m_aEditBuffer.ReadBytes(aBuffer1.get(), nBufferSize1);
857 m_aEditBuffer.Seek(nSignatureContentOffset + MAX_SIGNATURE_CONTENT_LENGTH + 1);
858 sal_uInt64 nBufferSize2 = nLastByteRangeLength;
859 std::unique_ptr<char[]> aBuffer2(new char[nBufferSize2]);
860 m_aEditBuffer.ReadBytes(aBuffer2.get(), nBufferSize2);
862 OStringBuffer aCMSHexBuffer;
863 svl::crypto::Signing aSigning(xCertificate);
864 aSigning.AddDataRange(aBuffer1.get(), nBufferSize1);
865 aSigning.AddDataRange(aBuffer2.get(), nBufferSize2);
866 if (!aSigning.Sign(aCMSHexBuffer))
868 SAL_WARN("vcl.filter", "PDFDocument::Sign: PDFWriter::Sign() failed");
869 return false;
872 assert(aCMSHexBuffer.getLength() <= MAX_SIGNATURE_CONTENT_LENGTH);
874 m_aEditBuffer.Seek(nSignatureContentOffset);
875 m_aEditBuffer.WriteOString(aCMSHexBuffer.toString());
877 return true;
880 bool PDFDocument::Write(SvStream& rStream)
882 m_aEditBuffer.Seek(0);
883 rStream.WriteStream(m_aEditBuffer);
884 return rStream.good();
887 bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
888 std::vector<std::unique_ptr<PDFElement>>& rElements,
889 PDFObjectElement* pObjectElement)
891 // Last seen object token.
892 PDFObjectElement* pObject = pObjectElement;
893 PDFNameElement* pObjectKey = nullptr;
894 PDFObjectElement* pObjectStream = nullptr;
895 bool bInXRef = false;
896 // The next number will be an xref offset.
897 bool bInStartXRef = false;
898 // Dictionary depth, so we know when we're outside any dictionaries.
899 int nDictionaryDepth = 0;
900 // Array depth, only the offset/length of the toplevel array is tracked.
901 int nArrayDepth = 0;
902 // Last seen array token that's outside any dictionaries.
903 PDFArrayElement* pArray = nullptr;
904 // If we're inside an obj/endobj pair.
905 bool bInObject = false;
906 while (true)
908 char ch;
909 rStream.ReadChar(ch);
910 if (rStream.eof())
911 break;
913 switch (ch)
915 case '%':
917 auto pComment = new PDFCommentElement(*this);
918 rElements.push_back(std::unique_ptr<PDFElement>(pComment));
919 rStream.SeekRel(-1);
920 if (!rElements.back()->Read(rStream))
922 SAL_WARN("vcl.filter",
923 "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
924 return false;
926 if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty()
927 && m_aEOFs.back() == rStream.Tell())
929 // Found EOF and partial parsing requested, we're done.
930 return true;
932 break;
934 case '<':
936 // Dictionary or hex string.
937 rStream.ReadChar(ch);
938 rStream.SeekRel(-2);
939 if (ch == '<')
941 rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
942 ++nDictionaryDepth;
944 else
945 rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
946 if (!rElements.back()->Read(rStream))
948 SAL_WARN("vcl.filter",
949 "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
950 return false;
952 break;
954 case '>':
956 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
957 --nDictionaryDepth;
958 rStream.SeekRel(-1);
959 if (!rElements.back()->Read(rStream))
961 SAL_WARN("vcl.filter",
962 "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
963 return false;
965 break;
967 case '[':
969 auto pArr = new PDFArrayElement(pObject);
970 rElements.push_back(std::unique_ptr<PDFElement>(pArr));
971 if (nDictionaryDepth == 0 && nArrayDepth == 0)
973 // The array is attached directly, inform the object.
974 pArray = pArr;
975 if (pObject)
977 pObject->SetArray(pArray);
978 pObject->SetArrayOffset(rStream.Tell());
981 ++nArrayDepth;
982 rStream.SeekRel(-1);
983 if (!rElements.back()->Read(rStream))
985 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
986 return false;
988 break;
990 case ']':
992 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
993 --nArrayDepth;
994 if (nArrayDepth == 0)
995 pArray = nullptr;
996 rStream.SeekRel(-1);
997 if (nDictionaryDepth == 0 && nArrayDepth == 0)
999 if (pObject)
1001 pObject->SetArrayLength(rStream.Tell() - pObject->GetArrayOffset());
1004 if (!rElements.back()->Read(rStream))
1006 SAL_WARN("vcl.filter",
1007 "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
1008 return false;
1010 break;
1012 case '/':
1014 auto pNameElement = new PDFNameElement();
1015 rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
1016 rStream.SeekRel(-1);
1017 if (!pNameElement->Read(rStream))
1019 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
1020 return false;
1022 if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
1023 && pNameElement->GetValue() == "ObjStm")
1024 pObjectStream = pObject;
1025 else
1026 pObjectKey = pNameElement;
1027 break;
1029 case '(':
1031 rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement));
1032 rStream.SeekRel(-1);
1033 if (!rElements.back()->Read(rStream))
1035 SAL_WARN("vcl.filter",
1036 "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
1037 return false;
1039 break;
1041 default:
1043 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)) || ch == '-')
1045 // Numbering object: an integer or a real.
1046 auto pNumberElement = new PDFNumberElement();
1047 rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
1048 rStream.SeekRel(-1);
1049 if (!pNumberElement->Read(rStream))
1051 SAL_WARN("vcl.filter",
1052 "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
1053 return false;
1055 if (bInStartXRef)
1057 bInStartXRef = false;
1058 m_aStartXRefs.push_back(pNumberElement->GetValue());
1060 auto it = m_aOffsetObjects.find(pNumberElement->GetValue());
1061 if (it != m_aOffsetObjects.end())
1062 m_pXRefStream = it->second;
1064 else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
1065 // Number element inside an object, but outside a
1066 // dictionary / array: remember it.
1067 pObject->SetNumberElement(pNumberElement);
1069 else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1071 // Possible keyword, like "obj".
1072 rStream.SeekRel(-1);
1073 OString aKeyword = ReadKeyword(rStream);
1075 bool bObj = aKeyword == "obj";
1076 if (bObj || aKeyword == "R")
1078 size_t nElements = rElements.size();
1079 if (nElements < 2)
1081 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: expected at least two "
1082 "tokens before 'obj' or 'R' keyword");
1083 return false;
1086 auto pObjectNumber
1087 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 2].get());
1088 auto pGenerationNumber
1089 = dynamic_cast<PDFNumberElement*>(rElements[nElements - 1].get());
1090 if (!pObjectNumber || !pGenerationNumber)
1092 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: missing object or "
1093 "generation number before 'obj' or 'R' keyword");
1094 return false;
1097 if (bObj)
1099 pObject = new PDFObjectElement(*this, pObjectNumber->GetValue(),
1100 pGenerationNumber->GetValue());
1101 rElements.push_back(std::unique_ptr<PDFElement>(pObject));
1102 m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
1103 m_aIDObjects[pObjectNumber->GetValue()] = pObject;
1104 bInObject = true;
1106 else
1108 auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
1109 *pGenerationNumber);
1110 rElements.push_back(std::unique_ptr<PDFElement>(pReference));
1111 if (pArray)
1112 // Reference is part of a direct (non-dictionary) array, inform the array.
1113 pArray->PushBack(rElements.back().get());
1114 if (bInObject && nDictionaryDepth > 0 && pObject)
1115 // Inform the object about a new in-dictionary reference.
1116 pObject->AddDictionaryReference(pReference);
1118 if (!rElements.back()->Read(rStream))
1120 SAL_WARN("vcl.filter",
1121 "PDFDocument::Tokenize: PDFElement::Read() failed");
1122 return false;
1125 else if (aKeyword == "stream")
1127 // Look up the length of the stream from the parent object's dictionary.
1128 size_t nLength = 0;
1129 for (size_t nElement = 0; nElement < rElements.size(); ++nElement)
1131 // Iterate in reverse order.
1132 size_t nIndex = rElements.size() - nElement - 1;
1133 PDFElement* pElement = rElements[nIndex].get();
1134 auto pObj = dynamic_cast<PDFObjectElement*>(pElement);
1135 if (!pObj)
1136 continue;
1138 PDFElement* pLookup = pObj->Lookup("Length");
1139 auto pReference = dynamic_cast<PDFReferenceElement*>(pLookup);
1140 if (pReference)
1142 // Length is provided as a reference.
1143 nLength = pReference->LookupNumber(rStream);
1144 break;
1147 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1148 if (pNumber)
1150 // Length is provided directly.
1151 nLength = pNumber->GetValue();
1152 break;
1155 SAL_WARN(
1156 "vcl.filter",
1157 "PDFDocument::Tokenize: found no Length key for stream keyword");
1158 return false;
1161 PDFDocument::SkipLineBreaks(rStream);
1162 auto pStreamElement = new PDFStreamElement(nLength);
1163 if (pObject)
1164 pObject->SetStream(pStreamElement);
1165 rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
1166 if (!rElements.back()->Read(rStream))
1168 SAL_WARN("vcl.filter",
1169 "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
1170 return false;
1173 else if (aKeyword == "endstream")
1175 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement));
1176 if (!rElements.back()->Read(rStream))
1178 SAL_WARN("vcl.filter",
1179 "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
1180 return false;
1183 else if (aKeyword == "endobj")
1185 rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement));
1186 if (!rElements.back()->Read(rStream))
1188 SAL_WARN("vcl.filter",
1189 "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
1190 return false;
1192 if (eMode == TokenizeMode::END_OF_OBJECT)
1194 // Found endobj and only object parsing was requested, we're done.
1195 return true;
1198 if (pObjectStream)
1200 // We're at the end of an object stream, parse the stored objects.
1201 pObjectStream->ParseStoredObjects();
1202 pObjectStream = nullptr;
1203 pObjectKey = nullptr;
1205 bInObject = false;
1207 else if (aKeyword == "true" || aKeyword == "false")
1208 rElements.push_back(std::unique_ptr<PDFElement>(
1209 new PDFBooleanElement(aKeyword.toBoolean())));
1210 else if (aKeyword == "null")
1211 rElements.push_back(std::unique_ptr<PDFElement>(new PDFNullElement));
1212 else if (aKeyword == "xref")
1213 // Allow 'f' and 'n' keywords.
1214 bInXRef = true;
1215 else if (bInXRef && (aKeyword == "f" || aKeyword == "n"))
1218 else if (aKeyword == "trailer")
1220 auto pTrailer = new PDFTrailerElement(*this);
1222 // Make it possible to find this trailer later by offset.
1223 pTrailer->Read(rStream);
1224 m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
1226 // When reading till the first EOF token only, remember
1227 // just the first trailer token.
1228 if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
1229 m_pTrailer = pTrailer;
1230 rElements.push_back(std::unique_ptr<PDFElement>(pTrailer));
1232 else if (aKeyword == "startxref")
1234 bInStartXRef = true;
1236 else
1238 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected '"
1239 << aKeyword << "' keyword at byte position "
1240 << rStream.Tell());
1241 return false;
1244 else
1246 if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1248 SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: "
1249 << ch << " at byte position " << rStream.Tell());
1250 return false;
1253 break;
1258 return true;
1261 void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
1263 m_aIDObjects[nID] = pObject;
1266 bool PDFDocument::Read(SvStream& rStream)
1268 // Check file magic.
1269 std::vector<sal_Int8> aHeader(5);
1270 rStream.Seek(0);
1271 rStream.ReadBytes(aHeader.data(), aHeader.size());
1272 if (aHeader[0] != '%' || aHeader[1] != 'P' || aHeader[2] != 'D' || aHeader[3] != 'F'
1273 || aHeader[4] != '-')
1275 SAL_WARN("vcl.filter", "PDFDocument::Read: header mismatch");
1276 return false;
1279 // Allow later editing of the contents in-memory.
1280 rStream.Seek(0);
1281 m_aEditBuffer.WriteStream(rStream);
1283 // Look up the offset of the xref table.
1284 size_t nStartXRef = FindStartXRef(rStream);
1285 SAL_INFO("vcl.filter", "PDFDocument::Read: nStartXRef is " << nStartXRef);
1286 if (nStartXRef == 0)
1288 SAL_WARN("vcl.filter", "PDFDocument::Read: found no xref start offset");
1289 return false;
1291 while (true)
1293 rStream.Seek(nStartXRef);
1294 OString aKeyword = ReadKeyword(rStream);
1295 if (aKeyword.isEmpty())
1296 ReadXRefStream(rStream);
1298 else
1300 if (aKeyword != "xref")
1302 SAL_WARN("vcl.filter", "PDFDocument::Read: xref is not the first keyword");
1303 return false;
1305 ReadXRef(rStream);
1306 if (!Tokenize(rStream, TokenizeMode::EOF_TOKEN, m_aElements, nullptr))
1308 SAL_WARN("vcl.filter", "PDFDocument::Read: failed to tokenizer trailer after xref");
1309 return false;
1313 PDFNumberElement* pPrev = nullptr;
1314 if (m_pTrailer)
1316 pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
1318 // Remember the offset of this trailer in the correct order. It's
1319 // possible that newer trailers don't have a larger offset.
1320 m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
1322 else if (m_pXRefStream)
1323 pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
1324 if (pPrev)
1325 nStartXRef = pPrev->GetValue();
1327 // Reset state, except the edit buffer.
1328 m_aElements.clear();
1329 m_aOffsetObjects.clear();
1330 m_aIDObjects.clear();
1331 m_aStartXRefs.clear();
1332 m_aEOFs.clear();
1333 m_pTrailer = nullptr;
1334 m_pXRefStream = nullptr;
1335 if (!pPrev)
1336 break;
1339 // Then we can tokenize the stream.
1340 rStream.Seek(0);
1341 return Tokenize(rStream, TokenizeMode::END_OF_STREAM, m_aElements, nullptr);
1344 OString PDFDocument::ReadKeyword(SvStream& rStream)
1346 OStringBuffer aBuf;
1347 char ch;
1348 rStream.ReadChar(ch);
1349 if (rStream.eof())
1350 return OString();
1351 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
1353 aBuf.append(ch);
1354 rStream.ReadChar(ch);
1355 if (rStream.eof())
1356 return aBuf.toString();
1358 rStream.SeekRel(-1);
1359 return aBuf.toString();
1362 size_t PDFDocument::FindStartXRef(SvStream& rStream)
1364 // Find the "startxref" token, somewhere near the end of the document.
1365 std::vector<char> aBuf(1024);
1366 rStream.Seek(STREAM_SEEK_TO_END);
1367 if (rStream.Tell() > aBuf.size())
1368 rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
1369 else
1370 // The document is really short, then just read it from the start.
1371 rStream.Seek(0);
1372 size_t nBeforePeek = rStream.Tell();
1373 size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
1374 rStream.Seek(nBeforePeek);
1375 if (nSize != aBuf.size())
1376 aBuf.resize(nSize);
1377 OString aPrefix("startxref");
1378 // Find the last startxref at the end of the document.
1379 auto itLastValid = aBuf.end();
1380 auto it = aBuf.begin();
1381 while (true)
1383 it = std::search(it, aBuf.end(), aPrefix.getStr(), aPrefix.getStr() + aPrefix.getLength());
1384 if (it == aBuf.end())
1385 break;
1387 itLastValid = it;
1388 ++it;
1390 if (itLastValid == aBuf.end())
1392 SAL_WARN("vcl.filter", "PDFDocument::FindStartXRef: found no startxref");
1393 return 0;
1396 rStream.SeekRel(itLastValid - aBuf.begin() + aPrefix.getLength());
1397 if (rStream.eof())
1399 SAL_WARN("vcl.filter",
1400 "PDFDocument::FindStartXRef: unexpected end of stream after startxref");
1401 return 0;
1404 PDFDocument::SkipWhitespace(rStream);
1405 PDFNumberElement aNumber;
1406 if (!aNumber.Read(rStream))
1407 return 0;
1408 return aNumber.GetValue();
1411 void PDFDocument::ReadXRefStream(SvStream& rStream)
1413 // Look up the stream length in the object dictionary.
1414 if (!Tokenize(rStream, TokenizeMode::END_OF_OBJECT, m_aElements, nullptr))
1416 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: failed to read object");
1417 return;
1420 if (m_aElements.empty())
1422 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no tokens found");
1423 return;
1426 PDFObjectElement* pObject = nullptr;
1427 for (const auto& pElement : m_aElements)
1429 if (auto pObj = dynamic_cast<PDFObjectElement*>(pElement.get()))
1431 pObject = pObj;
1432 break;
1435 if (!pObject)
1437 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no object token found");
1438 return;
1441 // So that the Prev key can be looked up later.
1442 m_pXRefStream = pObject;
1444 PDFElement* pLookup = pObject->Lookup("Length");
1445 auto pNumber = dynamic_cast<PDFNumberElement*>(pLookup);
1446 if (!pNumber)
1448 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: stream length is not provided");
1449 return;
1451 sal_uInt64 nLength = pNumber->GetValue();
1453 // Look up the stream offset.
1454 PDFStreamElement* pStream = nullptr;
1455 for (const auto& pElement : m_aElements)
1457 if (auto pS = dynamic_cast<PDFStreamElement*>(pElement.get()))
1459 pStream = pS;
1460 break;
1463 if (!pStream)
1465 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no stream token found");
1466 return;
1469 // Read and decompress it.
1470 rStream.Seek(pStream->GetOffset());
1471 std::vector<char> aBuf(nLength);
1472 rStream.ReadBytes(aBuf.data(), aBuf.size());
1474 auto pFilter = dynamic_cast<PDFNameElement*>(pObject->Lookup("Filter"));
1475 if (!pFilter)
1477 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: no Filter found");
1478 return;
1481 if (pFilter->GetValue() != "FlateDecode")
1483 SAL_WARN("vcl.filter",
1484 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
1485 return;
1488 int nColumns = 1;
1489 int nPredictor = 1;
1490 if (auto pDecodeParams = dynamic_cast<PDFDictionaryElement*>(pObject->Lookup("DecodeParms")))
1492 const std::map<OString, PDFElement*>& rItems = pDecodeParams->GetItems();
1493 auto it = rItems.find("Columns");
1494 if (it != rItems.end())
1495 if (auto pColumns = dynamic_cast<PDFNumberElement*>(it->second))
1496 nColumns = pColumns->GetValue();
1497 it = rItems.find("Predictor");
1498 if (it != rItems.end())
1499 if (auto pPredictor = dynamic_cast<PDFNumberElement*>(it->second))
1500 nPredictor = pPredictor->GetValue();
1503 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
1504 SvMemoryStream aStream;
1505 ZCodec aZCodec;
1506 aZCodec.BeginCompression();
1507 aZCodec.Decompress(aSource, aStream);
1508 if (!aZCodec.EndCompression())
1510 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: decompression failed");
1511 return;
1514 // Look up the first and the last entry we need to read.
1515 auto pIndex = dynamic_cast<PDFArrayElement*>(pObject->Lookup("Index"));
1516 std::vector<size_t> aFirstObjects;
1517 std::vector<size_t> aNumberOfObjects;
1518 if (!pIndex)
1520 auto pSize = dynamic_cast<PDFNumberElement*>(pObject->Lookup("Size"));
1521 if (pSize)
1523 aFirstObjects.push_back(0);
1524 aNumberOfObjects.push_back(pSize->GetValue());
1526 else
1528 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: Index and Size not found");
1529 return;
1532 else
1534 const std::vector<PDFElement*>& rIndexElements = pIndex->GetElements();
1535 size_t nFirstObject = 0;
1536 for (size_t i = 0; i < rIndexElements.size(); ++i)
1538 if (i % 2 == 0)
1540 auto pFirstObject = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1541 if (!pFirstObject)
1543 SAL_WARN("vcl.filter",
1544 "PDFDocument::ReadXRefStream: Index has no first object");
1545 return;
1547 nFirstObject = pFirstObject->GetValue();
1548 continue;
1551 auto pNumberOfObjects = dynamic_cast<PDFNumberElement*>(rIndexElements[i]);
1552 if (!pNumberOfObjects)
1554 SAL_WARN("vcl.filter",
1555 "PDFDocument::ReadXRefStream: Index has no number of objects");
1556 return;
1558 aFirstObjects.push_back(nFirstObject);
1559 aNumberOfObjects.push_back(pNumberOfObjects->GetValue());
1563 // Look up the format of a single entry.
1564 const int nWSize = 3;
1565 auto pW = dynamic_cast<PDFArrayElement*>(pObject->Lookup("W"));
1566 if (!pW || pW->GetElements().size() < nWSize)
1568 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W not found or has < 3 elements");
1569 return;
1571 int aW[nWSize];
1572 // First character is the (kind of) repeated predictor.
1573 int nLineLength = 1;
1574 for (size_t i = 0; i < nWSize; ++i)
1576 auto pI = dynamic_cast<PDFNumberElement*>(pW->GetElements()[i]);
1577 if (!pI)
1579 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: W contains non-number");
1580 return;
1582 aW[i] = pI->GetValue();
1583 nLineLength += aW[i];
1586 if (nPredictor > 1 && nLineLength - 1 != nColumns)
1588 SAL_WARN("vcl.filter",
1589 "PDFDocument::ReadXRefStream: /DecodeParms/Columns is inconsistent with /W");
1590 return;
1593 aStream.Seek(0);
1594 for (size_t nSubSection = 0; nSubSection < aFirstObjects.size(); ++nSubSection)
1596 size_t nFirstObject = aFirstObjects[nSubSection];
1597 size_t nNumberOfObjects = aNumberOfObjects[nSubSection];
1599 // This is the line as read from the stream.
1600 std::vector<unsigned char> aOrigLine(nLineLength);
1601 // This is the line as it appears after tweaking according to nPredictor.
1602 std::vector<unsigned char> aFilteredLine(nLineLength);
1603 for (size_t nEntry = 0; nEntry < nNumberOfObjects; ++nEntry)
1605 size_t nIndex = nFirstObject + nEntry;
1607 aStream.ReadBytes(aOrigLine.data(), aOrigLine.size());
1608 if (nPredictor > 1 && aOrigLine[0] + 10 != nPredictor)
1610 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: in-stream predictor is "
1611 "inconsistent with /DecodeParms/Predictor for object #"
1612 << nIndex);
1613 return;
1616 for (int i = 0; i < nLineLength; ++i)
1618 switch (nPredictor)
1620 case 1:
1621 // No prediction.
1622 break;
1623 case 12:
1624 // PNG prediction: up (on all rows).
1625 aFilteredLine[i] = aFilteredLine[i] + aOrigLine[i];
1626 break;
1627 default:
1628 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: unexpected predictor: "
1629 << nPredictor);
1630 return;
1631 break;
1635 // First character is already handled above.
1636 int nPos = 1;
1637 size_t nType = 0;
1638 // Start of the current field in the stream data.
1639 int nOffset = nPos;
1640 for (; nPos < nOffset + aW[0]; ++nPos)
1642 unsigned char nCh = aFilteredLine[nPos];
1643 nType = (nType << 8) + nCh;
1646 // Start of the object in the file stream.
1647 size_t nStreamOffset = 0;
1648 nOffset = nPos;
1649 for (; nPos < nOffset + aW[1]; ++nPos)
1651 unsigned char nCh = aFilteredLine[nPos];
1652 nStreamOffset = (nStreamOffset << 8) + nCh;
1655 // Generation number of the object.
1656 size_t nGenerationNumber = 0;
1657 nOffset = nPos;
1658 for (; nPos < nOffset + aW[2]; ++nPos)
1660 unsigned char nCh = aFilteredLine[nPos];
1661 nGenerationNumber = (nGenerationNumber << 8) + nCh;
1664 // Ignore invalid nType.
1665 if (nType <= 2)
1667 if (m_aXRef.find(nIndex) == m_aXRef.end())
1669 XRefEntry aEntry;
1670 switch (nType)
1672 case 0:
1673 aEntry.SetType(XRefEntryType::FREE);
1674 break;
1675 case 1:
1676 aEntry.SetType(XRefEntryType::NOT_COMPRESSED);
1677 break;
1678 case 2:
1679 aEntry.SetType(XRefEntryType::COMPRESSED);
1680 break;
1682 aEntry.SetOffset(nStreamOffset);
1683 m_aXRef[nIndex] = aEntry;
1690 void PDFDocument::ReadXRef(SvStream& rStream)
1692 PDFDocument::SkipWhitespace(rStream);
1694 while (true)
1696 PDFNumberElement aFirstObject;
1697 if (!aFirstObject.Read(rStream))
1699 // Next token is not a number, it'll be the trailer.
1700 return;
1703 if (aFirstObject.GetValue() < 0)
1705 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected first object number >= 0");
1706 return;
1709 PDFDocument::SkipWhitespace(rStream);
1710 PDFNumberElement aNumberOfEntries;
1711 if (!aNumberOfEntries.Read(rStream))
1713 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read number of entries");
1714 return;
1717 if (aNumberOfEntries.GetValue() < 0)
1719 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: expected zero or more entries");
1720 return;
1723 size_t nSize = aNumberOfEntries.GetValue();
1724 for (size_t nEntry = 0; nEntry < nSize; ++nEntry)
1726 size_t nIndex = aFirstObject.GetValue() + nEntry;
1727 PDFDocument::SkipWhitespace(rStream);
1728 PDFNumberElement aOffset;
1729 if (!aOffset.Read(rStream))
1731 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read offset");
1732 return;
1735 PDFDocument::SkipWhitespace(rStream);
1736 PDFNumberElement aGenerationNumber;
1737 if (!aGenerationNumber.Read(rStream))
1739 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: failed to read generation number");
1740 return;
1743 PDFDocument::SkipWhitespace(rStream);
1744 OString aKeyword = ReadKeyword(rStream);
1745 if (aKeyword != "f" && aKeyword != "n")
1747 SAL_WARN("vcl.filter", "PDFDocument::ReadXRef: unexpected keyword");
1748 return;
1750 // xrefs are read in reverse order, so never update an existing
1751 // offset with an older one.
1752 if (m_aXRef.find(nIndex) == m_aXRef.end())
1754 XRefEntry aEntry;
1755 aEntry.SetOffset(aOffset.GetValue());
1756 // Initially only the first entry is dirty.
1757 if (nIndex == 0)
1758 aEntry.SetDirty(true);
1759 m_aXRef[nIndex] = aEntry;
1761 PDFDocument::SkipWhitespace(rStream);
1766 void PDFDocument::SkipWhitespace(SvStream& rStream)
1768 char ch = 0;
1770 while (true)
1772 rStream.ReadChar(ch);
1773 if (rStream.eof())
1774 break;
1776 if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
1778 rStream.SeekRel(-1);
1779 return;
1784 void PDFDocument::SkipLineBreaks(SvStream& rStream)
1786 char ch = 0;
1788 while (true)
1790 rStream.ReadChar(ch);
1791 if (rStream.eof())
1792 break;
1794 if (ch != '\n' && ch != '\r')
1796 rStream.SeekRel(-1);
1797 return;
1802 size_t PDFDocument::GetObjectOffset(size_t nIndex) const
1804 auto it = m_aXRef.find(nIndex);
1805 if (it == m_aXRef.end() || it->second.GetType() == XRefEntryType::COMPRESSED)
1807 SAL_WARN("vcl.filter", "PDFDocument::GetObjectOffset: wanted to look up index #"
1808 << nIndex << ", but failed");
1809 return 0;
1812 return it->second.GetOffset();
1815 const std::vector<std::unique_ptr<PDFElement>>& PDFDocument::GetElements() const
1817 return m_aElements;
1820 /// Visits the page tree recursively, looking for page objects.
1821 static void visitPages(PDFObjectElement* pPages, std::vector<PDFObjectElement*>& rRet)
1823 auto pKids = dynamic_cast<PDFArrayElement*>(pPages->Lookup("Kids"));
1824 if (!pKids)
1826 SAL_WARN("vcl.filter", "visitPages: pages has no kids");
1827 return;
1830 pPages->setVisiting(true);
1832 for (const auto& pKid : pKids->GetElements())
1834 auto pReference = dynamic_cast<PDFReferenceElement*>(pKid);
1835 if (!pReference)
1836 continue;
1838 PDFObjectElement* pKidObject = pReference->LookupObject();
1839 if (!pKidObject)
1840 continue;
1842 // detect if visiting reenters itself
1843 if (pKidObject->alreadyVisiting())
1845 SAL_WARN("vcl.filter", "visitPages: loop in hierarchy");
1846 continue;
1849 auto pName = dynamic_cast<PDFNameElement*>(pKidObject->Lookup("Type"));
1850 if (pName && pName->GetValue() == "Pages")
1851 // Pages inside pages: recurse.
1852 visitPages(pKidObject, rRet);
1853 else
1854 // Found an actual page.
1855 rRet.push_back(pKidObject);
1858 pPages->setVisiting(false);
1861 std::vector<PDFObjectElement*> PDFDocument::GetPages()
1863 std::vector<PDFObjectElement*> aRet;
1865 PDFReferenceElement* pRoot = nullptr;
1867 PDFTrailerElement* pTrailer = nullptr;
1868 if (!m_aTrailerOffsets.empty())
1870 // Get access to the latest trailer, and work with the keys of that
1871 // one.
1872 auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
1873 if (it != m_aOffsetTrailers.end())
1874 pTrailer = it->second;
1877 if (pTrailer)
1878 pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
1879 else if (m_pXRefStream)
1880 pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
1882 if (!pRoot)
1884 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no Root key");
1885 return aRet;
1888 PDFObjectElement* pCatalog = pRoot->LookupObject();
1889 if (!pCatalog)
1891 SAL_WARN("vcl.filter", "PDFDocument::GetPages: trailer has no catalog");
1892 return aRet;
1895 PDFObjectElement* pPages = pCatalog->LookupObject("Pages");
1896 if (!pPages)
1898 SAL_WARN("vcl.filter", "PDFDocument::GetPages: catalog (obj " << pCatalog->GetObjectValue()
1899 << ") has no pages");
1900 return aRet;
1903 visitPages(pPages, aRet);
1905 return aRet;
1908 void PDFDocument::PushBackEOF(size_t nOffset) { m_aEOFs.push_back(nOffset); }
1910 std::vector<PDFObjectElement*> PDFDocument::GetSignatureWidgets()
1912 std::vector<PDFObjectElement*> aRet;
1914 std::vector<PDFObjectElement*> aPages = GetPages();
1916 for (const auto& pPage : aPages)
1918 if (!pPage)
1919 continue;
1921 PDFElement* pAnnotsElement = pPage->Lookup("Annots");
1922 auto pAnnots = dynamic_cast<PDFArrayElement*>(pAnnotsElement);
1923 if (!pAnnots)
1925 // Annots is not an array, see if it's a reference to an object
1926 // with a direct array.
1927 auto pAnnotsRef = dynamic_cast<PDFReferenceElement*>(pAnnotsElement);
1928 if (pAnnotsRef)
1930 if (PDFObjectElement* pAnnotsObject = pAnnotsRef->LookupObject())
1932 pAnnots = pAnnotsObject->GetArray();
1937 if (!pAnnots)
1938 continue;
1940 for (const auto& pAnnot : pAnnots->GetElements())
1942 auto pReference = dynamic_cast<PDFReferenceElement*>(pAnnot);
1943 if (!pReference)
1944 continue;
1946 PDFObjectElement* pAnnotObject = pReference->LookupObject();
1947 if (!pAnnotObject)
1948 continue;
1950 auto pFT = dynamic_cast<PDFNameElement*>(pAnnotObject->Lookup("FT"));
1951 if (!pFT || pFT->GetValue() != "Sig")
1952 continue;
1954 aRet.push_back(pAnnotObject);
1958 return aRet;
1961 std::vector<unsigned char> PDFDocument::DecodeHexString(PDFHexStringElement const* pElement)
1963 return svl::crypto::DecodeHexString(pElement->GetValue());
1966 PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)
1967 : m_rDoc(rDoc)
1971 bool PDFCommentElement::Read(SvStream& rStream)
1973 // Read from (including) the % char till (excluding) the end of the line/stream.
1974 OStringBuffer aBuf;
1975 char ch;
1976 rStream.ReadChar(ch);
1977 while (true)
1979 if (ch == '\n' || ch == '\r' || rStream.eof())
1981 m_aComment = aBuf.makeStringAndClear();
1983 if (m_aComment.startsWith("%%EOF"))
1984 m_rDoc.PushBackEOF(rStream.Tell());
1986 SAL_INFO("vcl.filter", "PDFCommentElement::Read: m_aComment is '" << m_aComment << "'");
1987 return true;
1989 aBuf.append(ch);
1990 rStream.ReadChar(ch);
1993 return false;
1996 PDFNumberElement::PDFNumberElement() = default;
1998 bool PDFNumberElement::Read(SvStream& rStream)
2000 OStringBuffer aBuf;
2001 m_nOffset = rStream.Tell();
2002 char ch;
2003 rStream.ReadChar(ch);
2004 if (rStream.eof())
2006 return false;
2008 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2010 rStream.SeekRel(-1);
2011 return false;
2013 while (!rStream.eof())
2015 if (!rtl::isAsciiDigit(static_cast<unsigned char>(ch)) && ch != '-' && ch != '.')
2017 rStream.SeekRel(-1);
2018 m_nLength = rStream.Tell() - m_nOffset;
2019 m_fValue = aBuf.makeStringAndClear().toDouble();
2020 SAL_INFO("vcl.filter", "PDFNumberElement::Read: m_fValue is '" << m_fValue << "'");
2021 return true;
2023 aBuf.append(ch);
2024 rStream.ReadChar(ch);
2027 return false;
2030 sal_uInt64 PDFNumberElement::GetLocation() const { return m_nOffset; }
2032 sal_uInt64 PDFNumberElement::GetLength() const { return m_nLength; }
2034 PDFBooleanElement::PDFBooleanElement(bool /*bValue*/) {}
2036 bool PDFBooleanElement::Read(SvStream& /*rStream*/) { return true; }
2038 bool PDFNullElement::Read(SvStream& /*rStream*/) { return true; }
2040 bool PDFHexStringElement::Read(SvStream& rStream)
2042 char ch;
2043 rStream.ReadChar(ch);
2044 if (ch != '<')
2046 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '<' as first character");
2047 return false;
2049 rStream.ReadChar(ch);
2051 OStringBuffer aBuf;
2052 while (!rStream.eof())
2054 if (ch == '>')
2056 m_aValue = aBuf.makeStringAndClear();
2057 SAL_INFO("vcl.filter",
2058 "PDFHexStringElement::Read: m_aValue length is " << m_aValue.getLength());
2059 return true;
2061 aBuf.append(ch);
2062 rStream.ReadChar(ch);
2065 return false;
2068 const OString& PDFHexStringElement::GetValue() const { return m_aValue; }
2070 bool PDFLiteralStringElement::Read(SvStream& rStream)
2072 char nPrevCh = 0;
2073 char ch = 0;
2074 rStream.ReadChar(ch);
2075 if (ch != '(')
2077 SAL_INFO("vcl.filter", "PDFHexStringElement::Read: expected '(' as first character");
2078 return false;
2080 nPrevCh = ch;
2081 rStream.ReadChar(ch);
2083 // Start with 1 nesting level as we read a '(' above already.
2084 int nDepth = 1;
2085 OStringBuffer aBuf;
2086 while (!rStream.eof())
2088 if (ch == '(' && nPrevCh != '\\')
2089 ++nDepth;
2091 if (ch == ')' && nPrevCh != '\\')
2092 --nDepth;
2094 if (nDepth == 0)
2096 // ')' of the outermost '(' is reached.
2097 m_aValue = aBuf.makeStringAndClear();
2098 SAL_INFO("vcl.filter",
2099 "PDFLiteralStringElement::Read: m_aValue is '" << m_aValue << "'");
2100 return true;
2102 aBuf.append(ch);
2103 nPrevCh = ch;
2104 rStream.ReadChar(ch);
2107 return false;
2110 const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
2112 PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc)
2113 : m_rDoc(rDoc)
2117 bool PDFTrailerElement::Read(SvStream& rStream)
2119 m_nOffset = rStream.Tell();
2120 return true;
2123 PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
2125 if (m_aDictionary.empty())
2126 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2128 return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2131 sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
2133 double PDFNumberElement::GetValue() const { return m_fValue; }
2135 PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue)
2136 : m_rDoc(rDoc)
2137 , m_fObjectValue(fObjectValue)
2138 , m_fGenerationValue(fGenerationValue)
2139 , m_pNumberElement(nullptr)
2140 , m_nDictionaryOffset(0)
2141 , m_nDictionaryLength(0)
2142 , m_pDictionaryElement(nullptr)
2143 , m_nArrayOffset(0)
2144 , m_nArrayLength(0)
2145 , m_pArrayElement(nullptr)
2146 , m_pStreamElement(nullptr)
2150 bool PDFObjectElement::Read(SvStream& /*rStream*/)
2152 SAL_INFO("vcl.filter",
2153 "PDFObjectElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " obj");
2154 return true;
2157 PDFDictionaryElement::PDFDictionaryElement() = default;
2159 size_t PDFDictionaryElement::Parse(const std::vector<std::unique_ptr<PDFElement>>& rElements,
2160 PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary)
2162 // The index of last parsed element, in case of nested dictionaries.
2163 size_t nRet = 0;
2165 if (!rDictionary.empty())
2166 return nRet;
2168 pThis->setParsing(true);
2170 auto pThisObject = dynamic_cast<PDFObjectElement*>(pThis);
2171 // This is set to non-nullptr here for nested dictionaries only.
2172 auto pThisDictionary = dynamic_cast<PDFDictionaryElement*>(pThis);
2174 // Find out where the dictionary for this object starts.
2175 size_t nIndex = 0;
2176 for (size_t i = 0; i < rElements.size(); ++i)
2178 if (rElements[i].get() == pThis)
2180 nIndex = i;
2181 break;
2185 OString aName;
2186 sal_uInt64 nNameOffset = 0;
2187 std::vector<PDFNumberElement*> aNumbers;
2188 // The array value we're in -- if any.
2189 PDFArrayElement* pArray = nullptr;
2190 sal_uInt64 nDictionaryOffset = 0;
2191 int nDictionaryDepth = 0;
2192 // Toplevel dictionary found (not inside an array).
2193 bool bDictionaryFound = false;
2194 // Toplevel array found (not inside a dictionary).
2195 bool bArrayFound = false;
2196 for (size_t i = nIndex; i < rElements.size(); ++i)
2198 // Dictionary tokens can be nested, track enter/leave.
2199 if (auto pDictionary = dynamic_cast<PDFDictionaryElement*>(rElements[i].get()))
2201 bDictionaryFound = true;
2202 if (++nDictionaryDepth == 1)
2204 // First dictionary start, track start offset.
2205 nDictionaryOffset = pDictionary->m_nLocation;
2206 if (pThisObject)
2208 if (!bArrayFound)
2209 // Then the toplevel dictionary of the object.
2210 pThisObject->SetDictionary(pDictionary);
2211 pThisDictionary = pDictionary;
2212 pThisObject->SetDictionaryOffset(nDictionaryOffset);
2215 else if (!pDictionary->alreadyParsing())
2217 // Nested dictionary.
2218 const size_t nexti
2219 = PDFDictionaryElement::Parse(rElements, pDictionary, pDictionary->m_aItems);
2220 if (nexti >= i) // ensure we go forwards and not endlessly loop
2222 i = nexti;
2223 rDictionary[aName] = pDictionary;
2224 aName.clear();
2229 if (auto pEndDictionary = dynamic_cast<PDFEndDictionaryElement*>(rElements[i].get()))
2231 if (--nDictionaryDepth == 0)
2233 // Last dictionary end, track length and stop parsing.
2234 if (pThisObject)
2235 pThisObject->SetDictionaryLength(pEndDictionary->GetLocation()
2236 - nDictionaryOffset);
2237 nRet = i;
2238 break;
2242 auto pName = dynamic_cast<PDFNameElement*>(rElements[i].get());
2243 if (pName)
2245 if (!aNumbers.empty())
2247 PDFNumberElement* pNumber = aNumbers.back();
2248 rDictionary[aName] = pNumber;
2249 if (pThisDictionary)
2251 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2252 pThisDictionary->SetKeyValueLength(
2253 aName, pNumber->GetLocation() + pNumber->GetLength() - nNameOffset);
2255 aName.clear();
2256 aNumbers.clear();
2259 if (aName.isEmpty())
2261 // Remember key.
2262 aName = pName->GetValue();
2263 nNameOffset = pName->GetLocation();
2265 else
2267 if (pArray)
2269 if (bDictionaryFound)
2270 // Array inside dictionary.
2271 pArray->PushBack(pName);
2273 else
2275 // Name-name key-value.
2276 rDictionary[aName] = pName;
2277 if (pThisDictionary)
2279 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2280 pThisDictionary->SetKeyValueLength(aName, pName->GetLocation()
2281 + PDFNameElement::GetLength()
2282 - nNameOffset);
2284 aName.clear();
2287 continue;
2290 auto pArr = dynamic_cast<PDFArrayElement*>(rElements[i].get());
2291 if (pArr)
2293 bArrayFound = true;
2294 pArray = pArr;
2295 continue;
2298 auto pEndArr = dynamic_cast<PDFEndArrayElement*>(rElements[i].get());
2299 if (pArray && pEndArr)
2301 for (auto& pNumber : aNumbers)
2302 pArray->PushBack(pNumber);
2303 aNumbers.clear();
2304 rDictionary[aName] = pArray;
2305 if (pThisDictionary)
2307 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2308 // Include the ending ']' in the length of the key - (array)value pair length.
2309 pThisDictionary->SetKeyValueLength(aName, pEndArr->GetOffset() - nNameOffset + 1);
2311 aName.clear();
2312 pArray = nullptr;
2313 continue;
2316 auto pReference = dynamic_cast<PDFReferenceElement*>(rElements[i].get());
2317 if (pReference)
2319 if (!pArray)
2321 rDictionary[aName] = pReference;
2322 if (pThisDictionary)
2324 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2325 pThisDictionary->SetKeyValueLength(aName,
2326 pReference->GetOffset() - nNameOffset);
2328 aName.clear();
2330 else
2332 if (bDictionaryFound)
2333 // Array inside dictionary.
2334 pArray->PushBack(pReference);
2336 aNumbers.clear();
2337 continue;
2340 auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(rElements[i].get());
2341 if (pLiteralString)
2343 rDictionary[aName] = pLiteralString;
2344 if (pThisDictionary)
2345 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2346 aName.clear();
2347 continue;
2350 auto pBoolean = dynamic_cast<PDFBooleanElement*>(rElements[i].get());
2351 if (pBoolean)
2353 rDictionary[aName] = pBoolean;
2354 if (pThisDictionary)
2355 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2356 aName.clear();
2357 continue;
2360 auto pHexString = dynamic_cast<PDFHexStringElement*>(rElements[i].get());
2361 if (pHexString)
2363 if (!pArray)
2365 rDictionary[aName] = pHexString;
2366 if (pThisDictionary)
2367 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2368 aName.clear();
2370 else
2372 pArray->PushBack(pHexString);
2374 continue;
2377 if (dynamic_cast<PDFEndObjectElement*>(rElements[i].get()))
2378 break;
2380 // Just remember this, so that in case it's not a reference parameter,
2381 // we can handle it later.
2382 auto pNumber = dynamic_cast<PDFNumberElement*>(rElements[i].get());
2383 if (pNumber)
2384 aNumbers.push_back(pNumber);
2387 if (!aNumbers.empty())
2389 rDictionary[aName] = aNumbers.back();
2390 if (pThisDictionary)
2391 pThisDictionary->SetKeyOffset(aName, nNameOffset);
2392 aName.clear();
2393 aNumbers.clear();
2396 pThis->setParsing(false);
2398 return nRet;
2401 PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
2402 const OString& rKey)
2404 auto it = rDictionary.find(rKey);
2405 if (it == rDictionary.end())
2406 return nullptr;
2408 return it->second;
2411 PDFObjectElement* PDFDictionaryElement::LookupObject(const OString& rDictionaryKey)
2413 auto pKey = dynamic_cast<PDFReferenceElement*>(
2414 PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey));
2415 if (!pKey)
2417 SAL_WARN("vcl.filter",
2418 "PDFDictionaryElement::LookupObject: no such key with reference value: "
2419 << rDictionaryKey);
2420 return nullptr;
2423 return pKey->LookupObject();
2426 PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
2428 return PDFDictionaryElement::Lookup(m_aItems, rDictionaryKey);
2431 PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
2433 if (m_aDictionary.empty())
2435 if (!m_aElements.empty())
2436 // This is a stored object in an object stream.
2437 PDFDictionaryElement::Parse(m_aElements, this, m_aDictionary);
2438 else
2439 // Normal object: elements are stored as members of the document itself.
2440 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2443 return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
2446 PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
2448 auto pKey = dynamic_cast<PDFReferenceElement*>(Lookup(rDictionaryKey));
2449 if (!pKey)
2451 SAL_WARN("vcl.filter", "PDFObjectElement::LookupObject: no such key with reference value: "
2452 << rDictionaryKey);
2453 return nullptr;
2456 return pKey->LookupObject();
2459 double PDFObjectElement::GetObjectValue() const { return m_fObjectValue; }
2461 void PDFObjectElement::SetDictionaryOffset(sal_uInt64 nDictionaryOffset)
2463 m_nDictionaryOffset = nDictionaryOffset;
2466 sal_uInt64 PDFObjectElement::GetDictionaryOffset()
2468 if (m_aDictionary.empty())
2469 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2471 return m_nDictionaryOffset;
2474 void PDFObjectElement::SetArrayOffset(sal_uInt64 nArrayOffset) { m_nArrayOffset = nArrayOffset; }
2476 sal_uInt64 PDFObjectElement::GetArrayOffset() const { return m_nArrayOffset; }
2478 void PDFDictionaryElement::SetKeyOffset(const OString& rKey, sal_uInt64 nOffset)
2480 m_aDictionaryKeyOffset[rKey] = nOffset;
2483 void PDFDictionaryElement::SetKeyValueLength(const OString& rKey, sal_uInt64 nLength)
2485 m_aDictionaryKeyValueLength[rKey] = nLength;
2488 sal_uInt64 PDFDictionaryElement::GetKeyOffset(const OString& rKey) const
2490 auto it = m_aDictionaryKeyOffset.find(rKey);
2491 if (it == m_aDictionaryKeyOffset.end())
2492 return 0;
2494 return it->second;
2497 sal_uInt64 PDFDictionaryElement::GetKeyValueLength(const OString& rKey) const
2499 auto it = m_aDictionaryKeyValueLength.find(rKey);
2500 if (it == m_aDictionaryKeyValueLength.end())
2501 return 0;
2503 return it->second;
2506 const std::map<OString, PDFElement*>& PDFDictionaryElement::GetItems() const { return m_aItems; }
2508 void PDFObjectElement::SetDictionaryLength(sal_uInt64 nDictionaryLength)
2510 m_nDictionaryLength = nDictionaryLength;
2513 sal_uInt64 PDFObjectElement::GetDictionaryLength()
2515 if (m_aDictionary.empty())
2516 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2518 return m_nDictionaryLength;
2521 void PDFObjectElement::SetArrayLength(sal_uInt64 nArrayLength) { m_nArrayLength = nArrayLength; }
2523 sal_uInt64 PDFObjectElement::GetArrayLength() const { return m_nArrayLength; }
2525 PDFDictionaryElement* PDFObjectElement::GetDictionary()
2527 if (m_aDictionary.empty())
2528 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2529 return m_pDictionaryElement;
2532 void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement)
2534 m_pDictionaryElement = pDictionaryElement;
2537 void PDFObjectElement::SetNumberElement(PDFNumberElement* pNumberElement)
2539 m_pNumberElement = pNumberElement;
2542 PDFNumberElement* PDFObjectElement::GetNumberElement() const { return m_pNumberElement; }
2544 const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
2546 return m_aDictionaryReferences;
2549 void PDFObjectElement::AddDictionaryReference(PDFReferenceElement* pReference)
2551 m_aDictionaryReferences.push_back(pReference);
2554 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
2556 if (m_aDictionary.empty())
2557 PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
2559 return m_aDictionary;
2562 void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
2564 void PDFObjectElement::SetStream(PDFStreamElement* pStreamElement)
2566 m_pStreamElement = pStreamElement;
2569 PDFStreamElement* PDFObjectElement::GetStream() const { return m_pStreamElement; }
2571 PDFArrayElement* PDFObjectElement::GetArray() const { return m_pArrayElement; }
2573 void PDFObjectElement::ParseStoredObjects()
2575 if (!m_pStreamElement)
2577 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no stream");
2578 return;
2581 auto pType = dynamic_cast<PDFNameElement*>(Lookup("Type"));
2582 if (!pType || pType->GetValue() != "ObjStm")
2584 if (!pType)
2585 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing unexpected type");
2586 else
2587 SAL_WARN("vcl.filter",
2588 "PDFDocument::ReadXRefStream: unexpected type: " << pType->GetValue());
2589 return;
2592 auto pFilter = dynamic_cast<PDFNameElement*>(Lookup("Filter"));
2593 if (!pFilter || pFilter->GetValue() != "FlateDecode")
2595 if (!pFilter)
2596 SAL_WARN("vcl.filter", "PDFDocument::ReadXRefStream: missing filter");
2597 else
2598 SAL_WARN("vcl.filter",
2599 "PDFDocument::ReadXRefStream: unexpected filter: " << pFilter->GetValue());
2600 return;
2603 auto pFirst = dynamic_cast<PDFNumberElement*>(Lookup("First"));
2604 if (!pFirst)
2606 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no First");
2607 return;
2610 auto pN = dynamic_cast<PDFNumberElement*>(Lookup("N"));
2611 if (!pN)
2613 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no N");
2614 return;
2616 size_t nN = pN->GetValue();
2618 auto pLength = dynamic_cast<PDFNumberElement*>(Lookup("Length"));
2619 if (!pLength)
2621 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: no length");
2622 return;
2624 size_t nLength = pLength->GetValue();
2626 // Read and decompress it.
2627 SvMemoryStream& rEditBuffer = m_rDoc.GetEditBuffer();
2628 rEditBuffer.Seek(m_pStreamElement->GetOffset());
2629 std::vector<char> aBuf(nLength);
2630 rEditBuffer.ReadBytes(aBuf.data(), aBuf.size());
2631 SvMemoryStream aSource(aBuf.data(), aBuf.size(), StreamMode::READ);
2632 SvMemoryStream aStream;
2633 ZCodec aZCodec;
2634 aZCodec.BeginCompression();
2635 aZCodec.Decompress(aSource, aStream);
2636 if (!aZCodec.EndCompression())
2638 SAL_WARN("vcl.filter", "PDFObjectElement::ParseStoredObjects: decompression failed");
2639 return;
2642 nLength = aStream.TellEnd();
2643 aStream.Seek(0);
2644 std::vector<size_t> aObjNums;
2645 std::vector<size_t> aOffsets;
2646 std::vector<size_t> aLengths;
2647 // First iterate over and find out the lengths.
2648 for (size_t nObject = 0; nObject < nN; ++nObject)
2650 PDFNumberElement aObjNum;
2651 if (!aObjNum.Read(aStream))
2653 SAL_WARN("vcl.filter",
2654 "PDFObjectElement::ParseStoredObjects: failed to read object number");
2655 return;
2657 aObjNums.push_back(aObjNum.GetValue());
2659 PDFDocument::SkipWhitespace(aStream);
2661 PDFNumberElement aByteOffset;
2662 if (!aByteOffset.Read(aStream))
2664 SAL_WARN("vcl.filter",
2665 "PDFObjectElement::ParseStoredObjects: failed to read byte offset");
2666 return;
2668 aOffsets.push_back(pFirst->GetValue() + aByteOffset.GetValue());
2670 if (aOffsets.size() > 1)
2671 aLengths.push_back(aOffsets.back() - aOffsets[aOffsets.size() - 2]);
2672 if (nObject + 1 == nN)
2673 aLengths.push_back(nLength - aOffsets.back());
2675 PDFDocument::SkipWhitespace(aStream);
2678 // Now create streams with the proper length and tokenize the data.
2679 for (size_t nObject = 0; nObject < nN; ++nObject)
2681 size_t nObjNum = aObjNums[nObject];
2682 size_t nOffset = aOffsets[nObject];
2683 size_t nLen = aLengths[nObject];
2685 aStream.Seek(nOffset);
2686 m_aStoredElements.push_back(std::make_unique<PDFObjectElement>(m_rDoc, nObjNum, 0));
2687 PDFObjectElement* pStored = m_aStoredElements.back().get();
2689 aBuf.clear();
2690 aBuf.resize(nLen);
2691 aStream.ReadBytes(aBuf.data(), aBuf.size());
2692 SvMemoryStream aStoredStream(aBuf.data(), aBuf.size(), StreamMode::READ);
2694 m_rDoc.Tokenize(aStoredStream, TokenizeMode::STORED_OBJECT, pStored->GetStoredElements(),
2695 pStored);
2696 // This is how references know the object is stored inside this object stream.
2697 m_rDoc.SetIDObject(nObjNum, pStored);
2699 // Store the stream of the object in the object stream for later use.
2700 std::unique_ptr<SvMemoryStream> pStreamBuffer(new SvMemoryStream());
2701 aStoredStream.Seek(0);
2702 pStreamBuffer->WriteStream(aStoredStream);
2703 pStored->SetStreamBuffer(pStreamBuffer);
2707 std::vector<std::unique_ptr<PDFElement>>& PDFObjectElement::GetStoredElements()
2709 return m_aElements;
2712 SvMemoryStream* PDFObjectElement::GetStreamBuffer() const { return m_pStreamBuffer.get(); }
2714 void PDFObjectElement::SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer)
2716 m_pStreamBuffer = std::move(pStreamBuffer);
2719 PDFDocument& PDFObjectElement::GetDocument() { return m_rDoc; }
2721 PDFReferenceElement::PDFReferenceElement(PDFDocument& rDoc, PDFNumberElement& rObject,
2722 PDFNumberElement const& rGeneration)
2723 : m_rDoc(rDoc)
2724 , m_fObjectValue(rObject.GetValue())
2725 , m_fGenerationValue(rGeneration.GetValue())
2726 , m_rObject(rObject)
2730 PDFNumberElement& PDFReferenceElement::GetObjectElement() const { return m_rObject; }
2732 bool PDFReferenceElement::Read(SvStream& rStream)
2734 SAL_INFO("vcl.filter",
2735 "PDFReferenceElement::Read: " << m_fObjectValue << " " << m_fGenerationValue << " R");
2736 m_nOffset = rStream.Tell();
2737 return true;
2740 sal_uInt64 PDFReferenceElement::GetOffset() const { return m_nOffset; }
2742 double PDFReferenceElement::LookupNumber(SvStream& rStream) const
2744 size_t nOffset = m_rDoc.GetObjectOffset(m_fObjectValue);
2745 if (nOffset == 0)
2747 SAL_WARN("vcl.filter", "PDFReferenceElement::LookupNumber: found no offset for object #"
2748 << m_fObjectValue);
2749 return 0;
2752 sal_uInt64 nOrigPos = rStream.Tell();
2753 comphelper::ScopeGuard g([&]() { rStream.Seek(nOrigPos); });
2755 rStream.Seek(nOffset);
2757 PDFDocument::SkipWhitespace(rStream);
2758 PDFNumberElement aNumber;
2759 bool bRet = aNumber.Read(rStream);
2760 if (!bRet || aNumber.GetValue() != m_fObjectValue)
2762 SAL_WARN("vcl.filter",
2763 "PDFReferenceElement::LookupNumber: offset points to not matching object");
2764 return 0;
2769 PDFDocument::SkipWhitespace(rStream);
2770 PDFNumberElement aNumber;
2771 bool bRet = aNumber.Read(rStream);
2772 if (!bRet || aNumber.GetValue() != m_fGenerationValue)
2774 SAL_WARN("vcl.filter",
2775 "PDFReferenceElement::LookupNumber: offset points to not matching generation");
2776 return 0;
2781 PDFDocument::SkipWhitespace(rStream);
2782 OString aKeyword = PDFDocument::ReadKeyword(rStream);
2783 if (aKeyword != "obj")
2785 SAL_WARN("vcl.filter",
2786 "PDFReferenceElement::LookupNumber: offset doesn't point to an obj keyword");
2787 return 0;
2791 PDFDocument::SkipWhitespace(rStream);
2792 PDFNumberElement aNumber;
2793 if (!aNumber.Read(rStream))
2795 SAL_WARN("vcl.filter",
2796 "PDFReferenceElement::LookupNumber: failed to read referenced number");
2797 return 0;
2800 return aNumber.GetValue();
2803 PDFObjectElement* PDFReferenceElement::LookupObject()
2805 return m_rDoc.LookupObject(m_fObjectValue);
2808 PDFObjectElement* PDFDocument::LookupObject(size_t nObjectNumber)
2810 auto itIDObjects = m_aIDObjects.find(nObjectNumber);
2812 if (itIDObjects != m_aIDObjects.end())
2813 return itIDObjects->second;
2815 SAL_WARN("vcl.filter", "PDFDocument::LookupObject: can't find obj " << nObjectNumber);
2816 return nullptr;
2819 SvMemoryStream& PDFDocument::GetEditBuffer() { return m_aEditBuffer; }
2821 int PDFReferenceElement::GetObjectValue() const { return m_fObjectValue; }
2823 int PDFReferenceElement::GetGenerationValue() const { return m_fGenerationValue; }
2825 bool PDFDictionaryElement::Read(SvStream& rStream)
2827 char ch;
2828 rStream.ReadChar(ch);
2829 if (ch != '<')
2831 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2832 return false;
2835 if (rStream.eof())
2837 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected end of file");
2838 return false;
2841 rStream.ReadChar(ch);
2842 if (ch != '<')
2844 SAL_WARN("vcl.filter", "PDFDictionaryElement::Read: unexpected character: " << ch);
2845 return false;
2848 m_nLocation = rStream.Tell();
2850 SAL_INFO("vcl.filter", "PDFDictionaryElement::Read: '<<'");
2852 return true;
2855 PDFEndDictionaryElement::PDFEndDictionaryElement() = default;
2857 sal_uInt64 PDFEndDictionaryElement::GetLocation() const { return m_nLocation; }
2859 bool PDFEndDictionaryElement::Read(SvStream& rStream)
2861 m_nLocation = rStream.Tell();
2862 char ch;
2863 rStream.ReadChar(ch);
2864 if (ch != '>')
2866 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2867 return false;
2870 if (rStream.eof())
2872 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected end of file");
2873 return false;
2876 rStream.ReadChar(ch);
2877 if (ch != '>')
2879 SAL_WARN("vcl.filter", "PDFEndDictionaryElement::Read: unexpected character: " << ch);
2880 return false;
2883 SAL_INFO("vcl.filter", "PDFEndDictionaryElement::Read: '>>'");
2885 return true;
2888 PDFNameElement::PDFNameElement() = default;
2890 bool PDFNameElement::Read(SvStream& rStream)
2892 char ch;
2893 rStream.ReadChar(ch);
2894 if (ch != '/')
2896 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected character: " << ch);
2897 return false;
2899 m_nLocation = rStream.Tell();
2901 if (rStream.eof())
2903 SAL_WARN("vcl.filter", "PDFNameElement::Read: unexpected end of file");
2904 return false;
2907 // Read till the first white-space.
2908 OStringBuffer aBuf;
2909 rStream.ReadChar(ch);
2910 while (!rStream.eof())
2912 if (rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)) || ch == '/' || ch == '['
2913 || ch == ']' || ch == '<' || ch == '>' || ch == '(')
2915 rStream.SeekRel(-1);
2916 m_aValue = aBuf.makeStringAndClear();
2917 SAL_INFO("vcl.filter", "PDFNameElement::Read: m_aValue is '" << m_aValue << "'");
2918 return true;
2920 aBuf.append(ch);
2921 rStream.ReadChar(ch);
2924 return false;
2927 const OString& PDFNameElement::GetValue() const { return m_aValue; }
2929 sal_uInt64 PDFNameElement::GetLocation() const { return m_nLocation; }
2931 PDFStreamElement::PDFStreamElement(size_t nLength)
2932 : m_nLength(nLength)
2933 , m_nOffset(0)
2937 bool PDFStreamElement::Read(SvStream& rStream)
2939 SAL_INFO("vcl.filter", "PDFStreamElement::Read: length is " << m_nLength);
2940 m_nOffset = rStream.Tell();
2941 std::vector<unsigned char> aBytes(m_nLength);
2942 rStream.ReadBytes(aBytes.data(), aBytes.size());
2943 m_aMemory.WriteBytes(aBytes.data(), aBytes.size());
2945 return rStream.good();
2948 SvMemoryStream& PDFStreamElement::GetMemory() { return m_aMemory; }
2950 sal_uInt64 PDFStreamElement::GetOffset() const { return m_nOffset; }
2952 bool PDFEndStreamElement::Read(SvStream& /*rStream*/) { return true; }
2954 bool PDFEndObjectElement::Read(SvStream& /*rStream*/) { return true; }
2956 PDFArrayElement::PDFArrayElement(PDFObjectElement* pObject)
2957 : m_pObject(pObject)
2961 bool PDFArrayElement::Read(SvStream& rStream)
2963 char ch;
2964 rStream.ReadChar(ch);
2965 if (ch != '[')
2967 SAL_WARN("vcl.filter", "PDFArrayElement::Read: unexpected character: " << ch);
2968 return false;
2971 SAL_INFO("vcl.filter", "PDFArrayElement::Read: '['");
2973 return true;
2976 void PDFArrayElement::PushBack(PDFElement* pElement)
2978 if (m_pObject)
2979 SAL_INFO("vcl.filter",
2980 "PDFArrayElement::PushBack: object is " << m_pObject->GetObjectValue());
2981 m_aElements.push_back(pElement);
2984 const std::vector<PDFElement*>& PDFArrayElement::GetElements() const { return m_aElements; }
2986 PDFEndArrayElement::PDFEndArrayElement() = default;
2988 bool PDFEndArrayElement::Read(SvStream& rStream)
2990 m_nOffset = rStream.Tell();
2991 char ch;
2992 rStream.ReadChar(ch);
2993 if (ch != ']')
2995 SAL_WARN("vcl.filter", "PDFEndArrayElement::Read: unexpected character: " << ch);
2996 return false;
2999 SAL_INFO("vcl.filter", "PDFEndArrayElement::Read: ']'");
3001 return true;
3004 sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
3006 } // namespace filter
3007 } // namespace vcl
3009 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */