pdf: deduplicate resources when copying from external PDF stream
[LibreOffice.git] / vcl / source / gdi / pdfobjectcopier.cxx
bloba953c864c122b4373ab92da619e4c4faa487da4e
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <pdf/objectcopier.hxx>
12 #include <rtl/strbuf.hxx>
13 #include <sal/log.hxx>
14 #include <sal/types.h>
15 #include <tools/stream.hxx>
16 #include <tools/zcodec.hxx>
17 #include <vcl/filter/pdfdocument.hxx>
18 #include <vcl/filter/pdfobjectcontainer.hxx>
20 #include "pdfwriter_impl.hxx"
22 namespace vcl
24 PDFObjectCopier::PDFObjectCopier(PDFObjectContainer& rContainer)
25 : m_rContainer(rContainer)
29 sal_Int32 PDFObjectCopier::copyExternalResource(SvMemoryStream& rDocBuffer,
30 filter::PDFObjectElement& rObject,
31 std::map<sal_Int32, sal_Int32>& rCopiedResources)
33 auto it = rCopiedResources.find(rObject.GetObjectValue());
34 if (it != rCopiedResources.end())
35 // This resource was already copied once, nothing to do.
36 return it->second;
38 sal_Int32 nObject = m_rContainer.createObject();
39 // Remember what is the ID of this object in our output.
40 rCopiedResources[rObject.GetObjectValue()] = nObject;
41 SAL_INFO("vcl.pdfwriter", "PDFObjectCopier::copyExternalResource: " << rObject.GetObjectValue()
42 << " -> " << nObject);
44 OStringBuffer aLine;
45 aLine.append(nObject);
46 aLine.append(" 0 obj\n");
47 if (rObject.GetDictionary())
49 aLine.append("<<");
51 // Complex case: can't copy the dictionary byte array as is, as it may contain references.
52 bool bDone = false;
53 sal_uInt64 nCopyStart = 0;
54 for (auto pReference : rObject.GetDictionaryReferences())
56 if (pReference)
58 filter::PDFObjectElement* pReferenced = pReference->LookupObject();
59 if (pReferenced)
61 // Copy the referenced object.
62 sal_Int32 nRef
63 = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
65 sal_uInt64 nReferenceStart = pReference->GetObjectElement().GetLocation();
66 sal_uInt64 nReferenceEnd = pReference->GetOffset();
67 sal_uInt64 nOffset = 0;
68 if (nCopyStart == 0)
69 // Dict start -> reference start.
70 nOffset = rObject.GetDictionaryOffset();
71 else
72 // Previous reference end -> reference start.
73 nOffset = nCopyStart;
74 aLine.append(static_cast<const char*>(rDocBuffer.GetData()) + nOffset,
75 nReferenceStart - nOffset);
76 // Write the updated reference.
77 aLine.append(" ");
78 aLine.append(nRef);
79 aLine.append(" 0 R");
80 // Start copying here next time.
81 nCopyStart = nReferenceEnd;
83 bDone = true;
88 if (bDone)
90 // Copy the last part here, in the complex case.
91 sal_uInt64 nDictEnd = rObject.GetDictionaryOffset() + rObject.GetDictionaryLength();
92 const sal_Int32 nLen = nDictEnd - nCopyStart;
93 if (nLen < 0)
94 SAL_WARN("vcl.pdfwriter", "copyExternalResource() failed");
95 else
96 aLine.append(static_cast<const char*>(rDocBuffer.GetData()) + nCopyStart, nLen);
98 else
99 // Can copy it as-is.
100 aLine.append(static_cast<const char*>(rDocBuffer.GetData())
101 + rObject.GetDictionaryOffset(),
102 rObject.GetDictionaryLength());
104 aLine.append(">>\n");
107 if (filter::PDFStreamElement* pStream = rObject.GetStream())
109 aLine.append("stream\n");
110 SvMemoryStream& rStream = pStream->GetMemory();
111 aLine.append(static_cast<const char*>(rStream.GetData()), rStream.GetSize());
112 aLine.append("\nendstream\n");
115 if (filter::PDFArrayElement* pArray = rObject.GetArray())
117 aLine.append("[");
119 const std::vector<filter::PDFElement*>& rElements = pArray->GetElements();
120 bool bDone = false;
121 // Complex case: can't copy the array byte array as is, as it may contain references.
122 sal_uInt64 nCopyStart = 0;
123 for (const auto pElement : rElements)
125 auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pElement);
126 if (pReference)
128 filter::PDFObjectElement* pReferenced = pReference->LookupObject();
129 if (pReferenced)
131 // Copy the referenced object.
132 sal_Int32 nRef
133 = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
135 sal_uInt64 nReferenceStart = pReference->GetObjectElement().GetLocation();
136 sal_uInt64 nReferenceEnd = pReference->GetOffset();
137 sal_uInt64 nOffset = 0;
138 if (nCopyStart == 0)
139 // Array start -> reference start.
140 nOffset = rObject.GetArrayOffset();
141 else
142 // Previous reference end -> reference start.
143 nOffset = nCopyStart;
144 aLine.append(static_cast<const char*>(rDocBuffer.GetData()) + nOffset,
145 nReferenceStart - nOffset);
147 // Write the updated reference.
148 aLine.append(" ");
149 aLine.append(nRef);
150 aLine.append(" 0 R");
151 // Start copying here next time.
152 nCopyStart = nReferenceEnd;
154 bDone = true;
159 if (bDone)
161 // Copy the last part here, in the complex case.
162 sal_uInt64 nArrEnd = rObject.GetArrayOffset() + rObject.GetArrayLength();
163 const sal_Int32 nLen = nArrEnd - nCopyStart;
164 if (nLen < 0)
165 SAL_WARN("vcl.pdfwriter", "copyExternalResource() failed");
166 else
167 aLine.append(static_cast<const char*>(rDocBuffer.GetData()) + nCopyStart, nLen);
169 else
170 // Can copy it as-is.
171 aLine.append(static_cast<const char*>(rDocBuffer.GetData()) + rObject.GetArrayOffset(),
172 rObject.GetArrayLength());
174 aLine.append("]\n");
177 // If the object has a number element outside a dictionary or array, copy that.
178 if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
180 aLine.append(static_cast<const char*>(rDocBuffer.GetData()) + pNumber->GetLocation(),
181 pNumber->GetLength());
182 aLine.append("\n");
185 aLine.append("endobj\n\n");
187 // We have the whole object, now write it to the output.
188 if (!m_rContainer.updateObject(nObject))
189 return -1;
190 if (!m_rContainer.writeBuffer(aLine.getStr(), aLine.getLength()))
191 return -1;
193 return nObject;
196 OString PDFObjectCopier::copyExternalResources(filter::PDFObjectElement& rPage,
197 const OString& rKind,
198 std::map<sal_Int32, sal_Int32>& rCopiedResources)
200 // A name - object ID map, IDs as they appear in our output, not the
201 // original ones.
202 std::map<OString, sal_Int32> aRet;
204 // Get the rKind subset of the resource dictionary.
205 std::map<OString, filter::PDFElement*> aItems;
206 if (auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources")))
208 // Resources is a direct dictionary.
209 filter::PDFElement* pLookup = pResources->LookupElement(rKind);
210 if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pLookup))
212 // rKind is an inline dictionary.
213 aItems = pDictionary->GetItems();
215 else if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pLookup))
217 // rKind refers to a dictionary.
218 filter::PDFObjectElement* pReferenced = pReference->LookupObject();
219 if (!pReferenced)
221 return OString();
224 aItems = pReferenced->GetDictionaryItems();
227 else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"))
229 // Resources is an indirect object.
230 filter::PDFElement* pValue = pPageResources->Lookup(rKind);
231 if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue))
232 // Kind is a direct dictionary.
233 aItems = pDictionary->GetItems();
234 else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind))
235 // Kind is an indirect object.
236 aItems = pObject->GetDictionaryItems();
238 if (aItems.empty())
239 return OString();
241 SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer();
243 for (const auto& rItem : aItems)
245 // For each item copy it over to our output then insert it into aRet.
246 auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);
247 if (!pReference)
248 continue;
250 filter::PDFObjectElement* pValue = pReference->LookupObject();
251 if (!pValue)
252 continue;
254 // Then copying over an object copy its dictionary and its stream.
255 sal_Int32 nObject = copyExternalResource(rDocBuffer, *pValue, rCopiedResources);
256 aRet[rItem.first] = nObject;
259 // Build the dictionary entry string.
260 OStringBuffer sRet("/" + rKind + "<<");
261 for (const auto& rPair : aRet)
263 sRet.append("/")
264 .append(rPair.first)
265 .append(" ")
266 .append(OString::number(rPair.second))
267 .append(" 0 R");
269 sRet.append(">>");
271 return sRet.makeStringAndClear();
274 void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine)
276 // Maps from source object id (PDF image) to target object id (export result).
277 std::map<sal_Int32, sal_Int32> aCopiedResources;
278 copyPageResources(pPage, rLine, aCopiedResources);
281 void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine,
282 std::map<sal_Int32, sal_Int32>& rCopiedResources)
284 rLine.append(" /Resources <<");
285 static const std::initializer_list<OString> aKeys
286 = { "ColorSpace", "ExtGState", "Font", "XObject", "Shading" };
287 for (const auto& rKey : aKeys)
289 rLine.append(copyExternalResources(*pPage, rKey, rCopiedResources));
291 rLine.append(">>");
294 sal_Int32 PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams,
295 SvMemoryStream& rStream, bool& rCompressed)
297 for (auto pContent : rContentStreams)
299 filter::PDFStreamElement* pPageStream = pContent->GetStream();
300 if (!pPageStream)
302 SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: contents has no stream");
303 continue;
306 SvMemoryStream& rPageStream = pPageStream->GetMemory();
308 auto pFilter = dynamic_cast<filter::PDFNameElement*>(pContent->Lookup("Filter"));
309 if (pFilter)
311 if (pFilter->GetValue() != "FlateDecode")
313 continue;
316 SvMemoryStream aMemoryStream;
317 ZCodec aZCodec;
318 rPageStream.Seek(0);
319 aZCodec.BeginCompression();
320 aZCodec.Decompress(rPageStream, aMemoryStream);
321 if (!aZCodec.EndCompression())
323 SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: decompression failed");
324 continue;
327 rStream.WriteBytes(aMemoryStream.GetData(), aMemoryStream.GetSize());
329 else
331 rStream.WriteBytes(rPageStream.GetData(), rPageStream.GetSize());
335 rCompressed = PDFWriterImpl::compressStream(&rStream);
337 return rStream.Tell();
341 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */