loplugin:constmethod in vcl
[LibreOffice.git] / include / vcl / filter / pdfdocument.hxx
blob023882b8e8a2e7b5a75043f526dce95059ac75a4
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 */
11 #ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
12 #define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
14 #include <memory>
15 #include <map>
16 #include <vector>
18 #include <tools/stream.hxx>
20 #include <vcl/dllapi.h>
22 namespace com
24 namespace sun
26 namespace star
28 namespace security
30 class XCertificate;
36 namespace com
38 namespace sun
40 namespace star
42 namespace uno
44 template <class interface_type> class Reference;
50 namespace vcl
52 namespace filter
54 class PDFTrailerElement;
55 class PDFReferenceElement;
56 class PDFDocument;
57 class PDFDictionaryElement;
58 class PDFArrayElement;
59 class PDFStreamElement;
60 class PDFNumberElement;
62 /// A byte range in a PDF file.
63 class VCL_DLLPUBLIC PDFElement
65 bool m_bVisiting = false;
66 bool m_bParsing = false;
68 public:
69 PDFElement() = default;
70 virtual bool Read(SvStream& rStream) = 0;
71 virtual ~PDFElement() = default;
72 void setVisiting(bool bVisiting) { m_bVisiting = bVisiting; }
73 bool alreadyVisiting() const { return m_bVisiting; }
74 void setParsing(bool bParsing) { m_bParsing = bParsing; }
75 bool alreadyParsing() const { return m_bParsing; }
78 /// Indirect object: something with a unique ID.
79 class VCL_DLLPUBLIC PDFObjectElement : public PDFElement
81 /// The document owning this element.
82 PDFDocument& m_rDoc;
83 double m_fObjectValue;
84 double m_fGenerationValue;
85 std::map<OString, PDFElement*> m_aDictionary;
86 /// If set, the object contains this number element (outside any dictionary/array).
87 PDFNumberElement* m_pNumberElement;
88 /// Position after the '<<' token.
89 sal_uInt64 m_nDictionaryOffset;
90 /// Length of the dictionary buffer till (before) the '>>' token.
91 sal_uInt64 m_nDictionaryLength;
92 PDFDictionaryElement* m_pDictionaryElement;
93 /// Position after the '[' token, if m_pArrayElement is set.
94 sal_uInt64 m_nArrayOffset;
95 /// Length of the array buffer till (before) the ']' token.
96 sal_uInt64 m_nArrayLength;
97 /// The contained direct array, if any.
98 PDFArrayElement* m_pArrayElement;
99 /// The stream of this object, used when this is an object stream.
100 PDFStreamElement* m_pStreamElement;
101 /// Objects of an object stream.
102 std::vector<std::unique_ptr<PDFObjectElement>> m_aStoredElements;
103 /// Elements of an object in an object stream.
104 std::vector<std::unique_ptr<PDFElement>> m_aElements;
105 /// Uncompressed buffer of an object in an object stream.
106 std::unique_ptr<SvMemoryStream> m_pStreamBuffer;
107 /// List of all reference elements inside this object's dictionary and
108 /// nested dictionaries.
109 std::vector<PDFReferenceElement*> m_aDictionaryReferences;
111 public:
112 PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue);
113 bool Read(SvStream& rStream) override;
114 PDFElement* Lookup(const OString& rDictionaryKey);
115 PDFObjectElement* LookupObject(const OString& rDictionaryKey);
116 double GetObjectValue() const;
117 void SetDictionaryOffset(sal_uInt64 nDictionaryOffset);
118 sal_uInt64 GetDictionaryOffset();
119 void SetDictionaryLength(sal_uInt64 nDictionaryLength);
120 sal_uInt64 GetDictionaryLength();
121 PDFDictionaryElement* GetDictionary();
122 void SetDictionary(PDFDictionaryElement* pDictionaryElement);
123 void SetNumberElement(PDFNumberElement* pNumberElement);
124 PDFNumberElement* GetNumberElement() const;
125 /// Get access to the parsed key-value items from the object dictionary.
126 const std::map<OString, PDFElement*>& GetDictionaryItems();
127 const std::vector<PDFReferenceElement*>& GetDictionaryReferences() const;
128 void AddDictionaryReference(PDFReferenceElement* pReference);
129 void SetArray(PDFArrayElement* pArrayElement);
130 void SetStream(PDFStreamElement* pStreamElement);
131 /// Access to the stream of the object, if it has any.
132 PDFStreamElement* GetStream() const;
133 void SetArrayOffset(sal_uInt64 nArrayOffset);
134 sal_uInt64 GetArrayOffset() const;
135 void SetArrayLength(sal_uInt64 nArrayLength);
136 sal_uInt64 GetArrayLength() const;
137 PDFArrayElement* GetArray() const;
138 /// Parse objects stored in this object stream.
139 void ParseStoredObjects();
140 std::vector<std::unique_ptr<PDFElement>>& GetStoredElements();
141 SvMemoryStream* GetStreamBuffer() const;
142 void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer);
143 PDFDocument& GetDocument();
146 /// Array object: a list.
147 class VCL_DLLPUBLIC PDFArrayElement : public PDFElement
149 std::vector<PDFElement*> m_aElements;
150 /// The object that contains this array.
151 PDFObjectElement* const m_pObject;
153 public:
154 PDFArrayElement(PDFObjectElement* pObject);
155 bool Read(SvStream& rStream) override;
156 void PushBack(PDFElement* pElement);
157 const std::vector<PDFElement*>& GetElements() const;
160 /// Reference object: something with a unique ID.
161 class VCL_DLLPUBLIC PDFReferenceElement : public PDFElement
163 PDFDocument& m_rDoc;
164 int m_fObjectValue;
165 int m_fGenerationValue;
166 /// Location after the 'R' token.
167 sal_uInt64 m_nOffset = 0;
168 /// The element providing the object number.
169 PDFNumberElement& m_rObject;
171 public:
172 PDFReferenceElement(PDFDocument& rDoc, PDFNumberElement& rObject,
173 PDFNumberElement const& rGeneration);
174 bool Read(SvStream& rStream) override;
175 /// Assuming the reference points to a number object, return its value.
176 double LookupNumber(SvStream& rStream) const;
177 /// Lookup referenced object, without assuming anything about its contents.
178 PDFObjectElement* LookupObject();
179 int GetObjectValue() const;
180 int GetGenerationValue() const;
181 sal_uInt64 GetOffset() const;
182 PDFNumberElement& GetObjectElement() const;
185 /// Stream object: a byte array with a known length.
186 class VCL_DLLPUBLIC PDFStreamElement : public PDFElement
188 size_t const m_nLength;
189 sal_uInt64 m_nOffset;
190 /// The byte array itself.
191 SvMemoryStream m_aMemory;
193 public:
194 explicit PDFStreamElement(size_t nLength);
195 bool Read(SvStream& rStream) override;
196 sal_uInt64 GetOffset() const;
197 SvMemoryStream& GetMemory();
200 /// Name object: a key string.
201 class VCL_DLLPUBLIC PDFNameElement : public PDFElement
203 OString m_aValue;
204 /// Offset after the '/' token.
205 sal_uInt64 m_nLocation = 0;
207 public:
208 PDFNameElement();
209 bool Read(SvStream& rStream) override;
210 const OString& GetValue() const;
211 sal_uInt64 GetLocation() const;
212 static sal_uInt64 GetLength() { return 0; }
215 /// Dictionary object: a set key-value pairs.
216 class VCL_DLLPUBLIC PDFDictionaryElement : public PDFElement
218 /// Key-value pairs when the dictionary is a nested value.
219 std::map<OString, PDFElement*> m_aItems;
220 /// Offset after the '<<' token.
221 sal_uInt64 m_nLocation = 0;
222 /// Position after the '/' token.
223 std::map<OString, sal_uInt64> m_aDictionaryKeyOffset;
224 /// Length of the dictionary key and value, till (before) the next token.
225 std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength;
227 public:
228 PDFDictionaryElement();
229 bool Read(SvStream& rStream) override;
231 static size_t Parse(const std::vector<std::unique_ptr<PDFElement>>& rElements,
232 PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary);
233 static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary,
234 const OString& rKey);
235 void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset);
236 sal_uInt64 GetKeyOffset(const OString& rKey) const;
237 void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength);
238 sal_uInt64 GetKeyValueLength(const OString& rKey) const;
239 const std::map<OString, PDFElement*>& GetItems() const;
240 /// Looks up an object which is only referenced in this dictionary.
241 PDFObjectElement* LookupObject(const OString& rDictionaryKey);
242 /// Looks up an element which is contained in this dictionary.
243 PDFElement* LookupElement(const OString& rDictionaryKey);
246 enum class TokenizeMode
248 /// Full file.
249 END_OF_STREAM,
250 /// Till the first %%EOF token.
251 EOF_TOKEN,
252 /// Till the end of the current object.
253 END_OF_OBJECT,
254 /// Same as END_OF_OBJECT, but for object streams (no endobj keyword).
255 STORED_OBJECT
258 /// The type column of an entry in a cross-reference stream.
259 enum class XRefEntryType
261 /// xref "f" or xref stream "0".
262 FREE,
263 /// xref "n" or xref stream "1".
264 NOT_COMPRESSED,
265 /// xref stream "2".
266 COMPRESSED
269 /// An entry in a cross-reference stream.
270 class XRefEntry
272 XRefEntryType m_eType = XRefEntryType::NOT_COMPRESSED;
274 * Non-compressed: The byte offset of the object, starting from the
275 * beginning of the file.
276 * Compressed: The object number of the object stream in which this object is
277 * stored.
279 sal_uInt64 m_nOffset = 0;
280 /// Are changed as part of an incremental update?.
281 bool m_bDirty = false;
283 public:
284 XRefEntry();
286 void SetType(XRefEntryType eType) { m_eType = eType; }
288 XRefEntryType GetType() const { return m_eType; }
290 void SetOffset(sal_uInt64 nOffset) { m_nOffset = nOffset; }
292 sal_uInt64 GetOffset() const { return m_nOffset; }
294 void SetDirty(bool bDirty) { m_bDirty = bDirty; }
296 bool GetDirty() const { return m_bDirty; }
299 /// Hex string: in <AABB> form.
300 class VCL_DLLPUBLIC PDFHexStringElement : public PDFElement
302 OString m_aValue;
304 public:
305 bool Read(SvStream& rStream) override;
306 const OString& GetValue() const;
309 /// Literal string: in (asdf) form.
310 class VCL_DLLPUBLIC PDFLiteralStringElement : public PDFElement
312 OString m_aValue;
314 public:
315 bool Read(SvStream& rStream) override;
316 const OString& GetValue() const;
319 /// Numbering object: an integer or a real.
320 class VCL_DLLPUBLIC PDFNumberElement : public PDFElement
322 /// Input file start location.
323 sal_uInt64 m_nOffset = 0;
324 /// Input file token length.
325 sal_uInt64 m_nLength = 0;
326 double m_fValue = 0;
328 public:
329 PDFNumberElement();
330 bool Read(SvStream& rStream) override;
331 double GetValue() const;
332 sal_uInt64 GetLocation() const;
333 sal_uInt64 GetLength() const;
337 * In-memory representation of an on-disk PDF document.
339 * The PDF element list is not meant to be saved back to disk, but some
340 * elements remember their source offset / length, and based on that it's
341 * possible to modify the input file.
343 class VCL_DLLPUBLIC PDFDocument
345 /// This vector owns all elements.
346 std::vector<std::unique_ptr<PDFElement>> m_aElements;
347 /// Object ID <-> object offset map.
348 std::map<size_t, XRefEntry> m_aXRef;
349 /// Object offset <-> Object pointer map.
350 std::map<size_t, PDFObjectElement*> m_aOffsetObjects;
351 /// Object ID <-> Object pointer map.
352 std::map<size_t, PDFObjectElement*> m_aIDObjects;
353 /// List of xref offsets we know.
354 std::vector<size_t> m_aStartXRefs;
355 /// Offsets of trailers, from latest to oldest.
356 std::vector<size_t> m_aTrailerOffsets;
357 /// Trailer offset <-> Trailer pointer map.
358 std::map<size_t, PDFTrailerElement*> m_aOffsetTrailers;
359 /// List of EOF offsets we know.
360 std::vector<size_t> m_aEOFs;
361 PDFTrailerElement* m_pTrailer = nullptr;
362 /// When m_pTrailer is nullptr, this can still have a dictionary.
363 PDFObjectElement* m_pXRefStream = nullptr;
364 /// All editing takes place in this buffer, if it happens.
365 SvMemoryStream m_aEditBuffer;
367 /// Suggest a minimal, yet free signature ID to use for the next signature.
368 sal_uInt32 GetNextSignature();
369 /// Write the signature object as part of signing.
370 sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES,
371 sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset);
372 /// Write the appearance object as part of signing.
373 sal_Int32 WriteAppearanceObject();
374 /// Write the annot object as part of signing.
375 sal_Int32 WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId,
376 sal_Int32 nAppearanceId);
377 /// Write the updated Page object as part of signing.
378 bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId);
379 /// Write the updated Catalog object as part of signing.
380 bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot);
381 /// Write the updated cross-references as part of signing.
382 void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot);
384 public:
385 PDFDocument();
386 PDFDocument& operator=(const PDFDocument&) = delete;
387 PDFDocument(const PDFDocument&) = delete;
388 /// @name Low-level functions, to be used by PDFElement subclasses.
389 //@{
390 /// Decode a hex dump.
391 static std::vector<unsigned char> DecodeHexString(PDFHexStringElement const* pElement);
392 static OString ReadKeyword(SvStream& rStream);
393 static size_t FindStartXRef(SvStream& rStream);
394 void ReadXRef(SvStream& rStream);
395 void ReadXRefStream(SvStream& rStream);
396 static void SkipWhitespace(SvStream& rStream);
397 /// Instead of all whitespace, just skip CR and NL characters.
398 static void SkipLineBreaks(SvStream& rStream);
399 size_t GetObjectOffset(size_t nIndex) const;
400 const std::vector<std::unique_ptr<PDFElement>>& GetElements() const;
401 std::vector<PDFObjectElement*> GetPages();
402 /// Remember the end location of an EOF token.
403 void PushBackEOF(size_t nOffset);
404 /// Look up object based on object number, possibly by parsing object streams.
405 PDFObjectElement* LookupObject(size_t nObjectNumber);
406 /// Access to the input document, even after the input stream is gone.
407 SvMemoryStream& GetEditBuffer();
408 /// Tokenize elements from current offset.
409 bool Tokenize(SvStream& rStream, TokenizeMode eMode,
410 std::vector<std::unique_ptr<PDFElement>>& rElements,
411 PDFObjectElement* pObjectElement);
412 /// Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID.
413 void SetIDObject(size_t nID, PDFObjectElement* pObject);
414 //@}
416 /// @name High-level functions, to be used by others.
417 //@{
418 /// Read elements from the start of the stream till its end.
419 bool Read(SvStream& rStream);
420 /// Sign the read document with xCertificate in the edit buffer.
421 bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate,
422 const OUString& rDescription, bool bAdES);
423 /// Serializes the contents of the edit buffer.
424 bool Write(SvStream& rStream);
425 /// Get a list of signatures embedded into this document.
426 std::vector<PDFObjectElement*> GetSignatureWidgets();
427 /// Remove the nth signature from read document in the edit buffer.
428 bool RemoveSignature(size_t nPosition);
429 //@}
432 } // namespace pdfio
433 } // namespace xmlsecurity
435 #endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
437 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */