1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 #ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
12 #define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
18 #include <tools/stream.hxx>
20 #include <vcl/dllapi.h>
44 template <class interface_type
> class Reference
;
54 class PDFTrailerElement
;
55 class PDFReferenceElement
;
57 class PDFDictionaryElement
;
58 class PDFArrayElement
;
59 class PDFStreamElement
;
60 class PDFNumberElement
;
62 /// A byte range in a PDF file.
63 class VCL_DLLPUBLIC PDFElement
65 bool m_bVisiting
= false;
66 bool m_bParsing
= false;
69 PDFElement() = default;
70 virtual bool Read(SvStream
& rStream
) = 0;
71 virtual ~PDFElement() = default;
72 void setVisiting(bool bVisiting
) { m_bVisiting
= bVisiting
; }
73 bool alreadyVisiting() const { return m_bVisiting
; }
74 void setParsing(bool bParsing
) { m_bParsing
= bParsing
; }
75 bool alreadyParsing() const { return m_bParsing
; }
78 /// Indirect object: something with a unique ID.
79 class VCL_DLLPUBLIC PDFObjectElement final
: public PDFElement
81 /// The document owning this element.
83 double m_fObjectValue
;
84 double m_fGenerationValue
;
85 std::map
<OString
, PDFElement
*> m_aDictionary
;
86 /// If set, the object contains this number element (outside any dictionary/array).
87 PDFNumberElement
* m_pNumberElement
;
88 /// Position after the '<<' token.
89 sal_uInt64 m_nDictionaryOffset
;
90 /// Length of the dictionary buffer till (before) the '>>' token.
91 sal_uInt64 m_nDictionaryLength
;
92 PDFDictionaryElement
* m_pDictionaryElement
;
93 /// Position after the '[' token, if m_pArrayElement is set.
94 sal_uInt64 m_nArrayOffset
;
95 /// Length of the array buffer till (before) the ']' token.
96 sal_uInt64 m_nArrayLength
;
97 /// The contained direct array, if any.
98 PDFArrayElement
* m_pArrayElement
;
99 /// The stream of this object, used when this is an object stream.
100 PDFStreamElement
* m_pStreamElement
;
101 /// Objects of an object stream.
102 std::vector
<std::unique_ptr
<PDFObjectElement
>> m_aStoredElements
;
103 /// Elements of an object in an object stream.
104 std::vector
<std::unique_ptr
<PDFElement
>> m_aElements
;
105 /// Uncompressed buffer of an object in an object stream.
106 std::unique_ptr
<SvMemoryStream
> m_pStreamBuffer
;
107 /// List of all reference elements inside this object's dictionary and
108 /// nested dictionaries.
109 std::vector
<PDFReferenceElement
*> m_aDictionaryReferences
;
112 PDFObjectElement(PDFDocument
& rDoc
, double fObjectValue
, double fGenerationValue
);
113 bool Read(SvStream
& rStream
) override
;
114 PDFElement
* Lookup(const OString
& rDictionaryKey
);
115 PDFObjectElement
* LookupObject(const OString
& rDictionaryKey
);
116 double GetObjectValue() const;
117 void SetDictionaryOffset(sal_uInt64 nDictionaryOffset
);
118 sal_uInt64
GetDictionaryOffset();
119 void SetDictionaryLength(sal_uInt64 nDictionaryLength
);
120 sal_uInt64
GetDictionaryLength();
121 PDFDictionaryElement
* GetDictionary();
122 void SetDictionary(PDFDictionaryElement
* pDictionaryElement
);
123 void SetNumberElement(PDFNumberElement
* pNumberElement
);
124 PDFNumberElement
* GetNumberElement() const;
125 /// Get access to the parsed key-value items from the object dictionary.
126 const std::map
<OString
, PDFElement
*>& GetDictionaryItems();
127 const std::vector
<PDFReferenceElement
*>& GetDictionaryReferences() const;
128 void AddDictionaryReference(PDFReferenceElement
* pReference
);
129 void SetArray(PDFArrayElement
* pArrayElement
);
130 void SetStream(PDFStreamElement
* pStreamElement
);
131 /// Access to the stream of the object, if it has any.
132 PDFStreamElement
* GetStream() const;
133 void SetArrayOffset(sal_uInt64 nArrayOffset
);
134 sal_uInt64
GetArrayOffset() const;
135 void SetArrayLength(sal_uInt64 nArrayLength
);
136 sal_uInt64
GetArrayLength() const;
137 PDFArrayElement
* GetArray() const;
138 /// Parse objects stored in this object stream.
139 void ParseStoredObjects();
140 std::vector
<std::unique_ptr
<PDFElement
>>& GetStoredElements();
141 SvMemoryStream
* GetStreamBuffer() const;
142 void SetStreamBuffer(std::unique_ptr
<SvMemoryStream
>& pStreamBuffer
);
143 PDFDocument
& GetDocument();
146 /// Array object: a list.
147 class VCL_DLLPUBLIC PDFArrayElement
: public PDFElement
149 std::vector
<PDFElement
*> m_aElements
;
150 /// The object that contains this array.
151 PDFObjectElement
* const m_pObject
;
154 PDFArrayElement(PDFObjectElement
* pObject
);
155 bool Read(SvStream
& rStream
) override
;
156 void PushBack(PDFElement
* pElement
);
157 const std::vector
<PDFElement
*>& GetElements() const;
160 /// Reference object: something with a unique ID.
161 class VCL_DLLPUBLIC PDFReferenceElement
: public PDFElement
165 int m_fGenerationValue
;
166 /// Location after the 'R' token.
167 sal_uInt64 m_nOffset
= 0;
168 /// The element providing the object number.
169 PDFNumberElement
& m_rObject
;
172 PDFReferenceElement(PDFDocument
& rDoc
, PDFNumberElement
& rObject
,
173 PDFNumberElement
const& rGeneration
);
174 bool Read(SvStream
& rStream
) override
;
175 /// Assuming the reference points to a number object, return its value.
176 double LookupNumber(SvStream
& rStream
) const;
177 /// Lookup referenced object, without assuming anything about its contents.
178 PDFObjectElement
* LookupObject();
179 int GetObjectValue() const;
180 int GetGenerationValue() const;
181 sal_uInt64
GetOffset() const;
182 PDFNumberElement
& GetObjectElement() const;
185 /// Stream object: a byte array with a known length.
186 class VCL_DLLPUBLIC PDFStreamElement
: public PDFElement
188 size_t const m_nLength
;
189 sal_uInt64 m_nOffset
;
190 /// The byte array itself.
191 SvMemoryStream m_aMemory
;
194 explicit PDFStreamElement(size_t nLength
);
195 bool Read(SvStream
& rStream
) override
;
196 sal_uInt64
GetOffset() const;
197 SvMemoryStream
& GetMemory();
200 /// Name object: a key string.
201 class VCL_DLLPUBLIC PDFNameElement final
: public PDFElement
204 /// Offset after the '/' token.
205 sal_uInt64 m_nLocation
= 0;
209 bool Read(SvStream
& rStream
) override
;
210 const OString
& GetValue() const;
211 sal_uInt64
GetLocation() const;
212 static sal_uInt64
GetLength() { return 0; }
215 /// Dictionary object: a set key-value pairs.
216 class VCL_DLLPUBLIC PDFDictionaryElement
: public PDFElement
218 /// Key-value pairs when the dictionary is a nested value.
219 std::map
<OString
, PDFElement
*> m_aItems
;
220 /// Offset after the '<<' token.
221 sal_uInt64 m_nLocation
= 0;
222 /// Position after the '/' token.
223 std::map
<OString
, sal_uInt64
> m_aDictionaryKeyOffset
;
224 /// Length of the dictionary key and value, till (before) the next token.
225 std::map
<OString
, sal_uInt64
> m_aDictionaryKeyValueLength
;
228 PDFDictionaryElement();
229 bool Read(SvStream
& rStream
) override
;
231 static size_t Parse(const std::vector
<std::unique_ptr
<PDFElement
>>& rElements
,
232 PDFElement
* pThis
, std::map
<OString
, PDFElement
*>& rDictionary
);
233 static PDFElement
* Lookup(const std::map
<OString
, PDFElement
*>& rDictionary
,
234 const OString
& rKey
);
235 void SetKeyOffset(const OString
& rKey
, sal_uInt64 nOffset
);
236 sal_uInt64
GetKeyOffset(const OString
& rKey
) const;
237 void SetKeyValueLength(const OString
& rKey
, sal_uInt64 nLength
);
238 sal_uInt64
GetKeyValueLength(const OString
& rKey
) const;
239 const std::map
<OString
, PDFElement
*>& GetItems() const;
240 /// Looks up an object which is only referenced in this dictionary.
241 PDFObjectElement
* LookupObject(const OString
& rDictionaryKey
);
242 /// Looks up an element which is contained in this dictionary.
243 PDFElement
* LookupElement(const OString
& rDictionaryKey
);
246 enum class TokenizeMode
250 /// Till the first %%EOF token.
252 /// Till the end of the current object.
254 /// Same as END_OF_OBJECT, but for object streams (no endobj keyword).
258 /// The type column of an entry in a cross-reference stream.
259 enum class XRefEntryType
261 /// xref "f" or xref stream "0".
263 /// xref "n" or xref stream "1".
269 /// An entry in a cross-reference stream.
272 XRefEntryType m_eType
= XRefEntryType::NOT_COMPRESSED
;
274 * Non-compressed: The byte offset of the object, starting from the
275 * beginning of the file.
276 * Compressed: The object number of the object stream in which this object is
279 sal_uInt64 m_nOffset
= 0;
280 /// Are changed as part of an incremental update?.
281 bool m_bDirty
= false;
286 void SetType(XRefEntryType eType
) { m_eType
= eType
; }
288 XRefEntryType
GetType() const { return m_eType
; }
290 void SetOffset(sal_uInt64 nOffset
) { m_nOffset
= nOffset
; }
292 sal_uInt64
GetOffset() const { return m_nOffset
; }
294 void SetDirty(bool bDirty
) { m_bDirty
= bDirty
; }
296 bool GetDirty() const { return m_bDirty
; }
299 /// Hex string: in <AABB> form.
300 class VCL_DLLPUBLIC PDFHexStringElement final
: public PDFElement
305 bool Read(SvStream
& rStream
) override
;
306 const OString
& GetValue() const;
309 /// Literal string: in (asdf) form.
310 class VCL_DLLPUBLIC PDFLiteralStringElement final
: public PDFElement
315 bool Read(SvStream
& rStream
) override
;
316 const OString
& GetValue() const;
319 /// Numbering object: an integer or a real.
320 class VCL_DLLPUBLIC PDFNumberElement
: public PDFElement
322 /// Input file start location.
323 sal_uInt64 m_nOffset
= 0;
324 /// Input file token length.
325 sal_uInt64 m_nLength
= 0;
330 bool Read(SvStream
& rStream
) override
;
331 double GetValue() const;
332 sal_uInt64
GetLocation() const;
333 sal_uInt64
GetLength() const;
337 * In-memory representation of an on-disk PDF document.
339 * The PDF element list is not meant to be saved back to disk, but some
340 * elements remember their source offset / length, and based on that it's
341 * possible to modify the input file.
343 class VCL_DLLPUBLIC PDFDocument
345 /// This vector owns all elements.
346 std::vector
<std::unique_ptr
<PDFElement
>> m_aElements
;
347 /// Object ID <-> object offset map.
348 std::map
<size_t, XRefEntry
> m_aXRef
;
349 /// Object offset <-> Object pointer map.
350 std::map
<size_t, PDFObjectElement
*> m_aOffsetObjects
;
351 /// Object ID <-> Object pointer map.
352 std::map
<size_t, PDFObjectElement
*> m_aIDObjects
;
353 /// List of xref offsets we know.
354 std::vector
<size_t> m_aStartXRefs
;
355 /// Offsets of trailers, from latest to oldest.
356 std::vector
<size_t> m_aTrailerOffsets
;
357 /// Trailer offset <-> Trailer pointer map.
358 std::map
<size_t, PDFTrailerElement
*> m_aOffsetTrailers
;
359 /// List of EOF offsets we know.
360 std::vector
<size_t> m_aEOFs
;
361 PDFTrailerElement
* m_pTrailer
= nullptr;
362 /// When m_pTrailer is nullptr, this can still have a dictionary.
363 PDFObjectElement
* m_pXRefStream
= nullptr;
364 /// All editing takes place in this buffer, if it happens.
365 SvMemoryStream m_aEditBuffer
;
367 /// Suggest a minimal, yet free signature ID to use for the next signature.
368 sal_uInt32
GetNextSignature();
369 /// Write the signature object as part of signing.
370 sal_Int32
WriteSignatureObject(const OUString
& rDescription
, bool bAdES
,
371 sal_uInt64
& rLastByteRangeOffset
, sal_Int64
& rContentOffset
);
372 /// Write the appearance object as part of signing.
373 sal_Int32
WriteAppearanceObject();
374 /// Write the annot object as part of signing.
375 sal_Int32
WriteAnnotObject(PDFObjectElement
const& rFirstPage
, sal_Int32 nSignatureId
,
376 sal_Int32 nAppearanceId
);
377 /// Write the updated Page object as part of signing.
378 bool WritePageObject(PDFObjectElement
& rFirstPage
, sal_Int32 nAnnotId
);
379 /// Write the updated Catalog object as part of signing.
380 bool WriteCatalogObject(sal_Int32 nAnnotId
, PDFReferenceElement
*& pRoot
);
381 /// Write the updated cross-references as part of signing.
382 void WriteXRef(sal_uInt64 nXRefOffset
, PDFReferenceElement
const* pRoot
);
386 PDFDocument
& operator=(const PDFDocument
&) = delete;
387 PDFDocument(const PDFDocument
&) = delete;
388 /// @name Low-level functions, to be used by PDFElement subclasses.
390 /// Decode a hex dump.
391 static std::vector
<unsigned char> DecodeHexString(PDFHexStringElement
const* pElement
);
392 static OString
ReadKeyword(SvStream
& rStream
);
393 static size_t FindStartXRef(SvStream
& rStream
);
394 void ReadXRef(SvStream
& rStream
);
395 void ReadXRefStream(SvStream
& rStream
);
396 static void SkipWhitespace(SvStream
& rStream
);
397 /// Instead of all whitespace, just skip CR and NL characters.
398 static void SkipLineBreaks(SvStream
& rStream
);
399 size_t GetObjectOffset(size_t nIndex
) const;
400 const std::vector
<std::unique_ptr
<PDFElement
>>& GetElements() const;
401 std::vector
<PDFObjectElement
*> GetPages();
402 /// Remember the end location of an EOF token.
403 void PushBackEOF(size_t nOffset
);
404 /// Look up object based on object number, possibly by parsing object streams.
405 PDFObjectElement
* LookupObject(size_t nObjectNumber
);
406 /// Access to the input document, even after the input stream is gone.
407 SvMemoryStream
& GetEditBuffer();
408 /// Tokenize elements from current offset.
409 bool Tokenize(SvStream
& rStream
, TokenizeMode eMode
,
410 std::vector
<std::unique_ptr
<PDFElement
>>& rElements
,
411 PDFObjectElement
* pObjectElement
);
412 /// Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID.
413 void SetIDObject(size_t nID
, PDFObjectElement
* pObject
);
416 /// @name High-level functions, to be used by others.
418 /// Read elements from the start of the stream till its end.
419 bool Read(SvStream
& rStream
);
420 /// Sign the read document with xCertificate in the edit buffer.
421 bool Sign(const css::uno::Reference
<css::security::XCertificate
>& xCertificate
,
422 const OUString
& rDescription
, bool bAdES
);
423 /// Serializes the contents of the edit buffer.
424 bool Write(SvStream
& rStream
);
425 /// Get a list of signatures embedded into this document.
426 std::vector
<PDFObjectElement
*> GetSignatureWidgets();
427 /// Remove the nth signature from read document in the edit buffer.
428 bool RemoveSignature(size_t nPosition
);
433 } // namespace xmlsecurity
435 #endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
437 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */