beta-0.89.2
[luatex.git] / source / libs / poppler / poppler-src / poppler / PDFDoc.cc
blob8287060ae6ce22c1963113bea9fc91bc04f4ab79
1 //========================================================================
2 //
3 // PDFDoc.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
9 //========================================================================
11 // Modified under the Poppler project - http://poppler.freedesktop.org
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
16 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
17 // Copyright (C) 2005, 2007-2009, 2011-2014 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
19 // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
20 // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
21 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
22 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
23 // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
24 // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
25 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
26 // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
27 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
28 // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
29 // Copyright (C) 2011-2016 Thomas Freitag <Thomas.Freitag@alfa.de>
30 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
31 // Copyright (C) 2013, 2014 Adrian Johnson <ajohnson@redneon.com>
32 // Copyright (C) 2013 Adam Reichold <adamreichold@myopera.com>
33 // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
34 // Copyright (C) 2015 Li Junling <lijunling@sina.com>
36 // To see a description of the changes please see the Changelog file that
37 // came with your tarball or type make ChangeLog if you are building from git
39 //========================================================================
41 #include <config.h>
43 #ifdef USE_GCC_PRAGMAS
44 #pragma implementation
45 #endif
47 #include <ctype.h>
48 #include <locale.h>
49 #include <stdio.h>
50 #include <errno.h>
51 #include <stdlib.h>
52 #include <stddef.h>
53 #include <string.h>
54 #include <time.h>
55 #include <sys/stat.h>
56 #include "goo/gstrtod.h"
57 #include "goo/GooString.h"
58 #include "goo/gfile.h"
59 #include "poppler-config.h"
60 #include "GlobalParams.h"
61 #include "Page.h"
62 #include "Catalog.h"
63 #include "Stream.h"
64 #include "XRef.h"
65 #include "Linearization.h"
66 #include "Link.h"
67 #include "OutputDev.h"
68 #include "Error.h"
69 #include "ErrorCodes.h"
70 #include "Lexer.h"
71 #include "Parser.h"
72 #include "SecurityHandler.h"
73 #include "Decrypt.h"
74 #ifndef DISABLE_OUTLINE
75 #include "Outline.h"
76 #endif
77 #include "PDFDoc.h"
78 #include "Hints.h"
80 #if MULTITHREADED
81 # define pdfdocLocker() MutexLocker locker(&mutex)
82 #else
83 # define pdfdocLocker()
84 #endif
86 //------------------------------------------------------------------------
88 #define headerSearchSize 1024 // read this many bytes at beginning of
89 // file to look for '%PDF'
90 #define pdfIdLength 32 // PDF Document IDs (PermanentId, UpdateId) length
92 #define linearizationSearchSize 1024 // read this many bytes at beginning of
93 // file to look for linearization
94 // dictionary
96 #define xrefSearchSize 1024 // read this many bytes at end of file
97 // to look for 'startxref'
99 //------------------------------------------------------------------------
100 // PDFDoc
101 //------------------------------------------------------------------------
103 void PDFDoc::init()
105 #if MULTITHREADED
106 gInitMutex(&mutex);
107 #endif
108 ok = gFalse;
109 errCode = errNone;
110 fileName = NULL;
111 file = NULL;
112 str = NULL;
113 xref = NULL;
114 linearization = NULL;
115 catalog = NULL;
116 hints = NULL;
117 #ifndef DISABLE_OUTLINE
118 outline = NULL;
119 #endif
120 startXRefPos = -1;
121 secHdlr = NULL;
122 pageCache = NULL;
125 PDFDoc::PDFDoc()
127 init();
130 PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
131 GooString *userPassword, void *guiDataA) {
132 Object obj;
133 #ifdef _WIN32
134 int n, i;
135 #endif
137 init();
139 fileName = fileNameA;
140 guiData = guiDataA;
141 #ifdef _WIN32
142 n = fileName->getLength();
143 fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
144 for (i = 0; i < n; ++i) {
145 fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
147 fileNameU[n] = L'\0';
148 #endif
150 // try to open file
151 file = GooFile::open(fileName);
152 if (file == NULL) {
153 // fopen() has failed.
154 // Keep a copy of the errno returned by fopen so that it can be
155 // referred to later.
156 fopenErrno = errno;
157 error(errIO, -1, "Couldn't open file '{0:t}': {1:s}.", fileName, strerror(errno));
158 errCode = errOpenFile;
159 return;
162 // create stream
163 obj.initNull();
164 str = new FileStream(file, 0, gFalse, file->size(), &obj);
166 ok = setup(ownerPassword, userPassword);
169 #ifdef _WIN32
170 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
171 GooString *userPassword, void *guiDataA) {
172 OSVERSIONINFO version;
173 Object obj;
174 int i;
176 init();
178 guiData = guiDataA;
180 // save both Unicode and 8-bit copies of the file name
181 fileName = new GooString();
182 fileNameU = (wchar_t *)gmallocn(fileNameLen + 1, sizeof(wchar_t));
183 for (i = 0; i < fileNameLen; ++i) {
184 fileName->append((char)fileNameA[i]);
185 fileNameU[i] = fileNameA[i];
187 fileNameU[fileNameLen] = L'\0';
189 // try to open file
190 // NB: _wfopen is only available in NT
191 version.dwOSVersionInfoSize = sizeof(version);
192 GetVersionEx(&version);
193 if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
194 file = GooFile::open(fileNameU);
195 } else {
196 file = GooFile::open(fileName);
198 if (!file) {
199 error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
200 errCode = errOpenFile;
201 return;
204 // create stream
205 obj.initNull();
206 str = new FileStream(file, 0, gFalse, file->size(), &obj);
208 ok = setup(ownerPassword, userPassword);
210 #endif
212 PDFDoc::PDFDoc(BaseStream *strA, GooString *ownerPassword,
213 GooString *userPassword, void *guiDataA) {
214 #ifdef _WIN32
215 int n, i;
216 #endif
218 init();
219 guiData = guiDataA;
220 if (strA->getFileName()) {
221 fileName = strA->getFileName()->copy();
222 #ifdef _WIN32
223 n = fileName->getLength();
224 fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
225 for (i = 0; i < n; ++i) {
226 fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
228 fileNameU[n] = L'\0';
229 #endif
230 } else {
231 fileName = NULL;
232 #ifdef _WIN32
233 fileNameU = NULL;
234 #endif
236 str = strA;
237 ok = setup(ownerPassword, userPassword);
240 GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
241 pdfdocLocker();
242 str->setPos(0, -1);
243 if (str->getPos() < 0)
245 error(errSyntaxError, -1, "Document base stream is not seekable");
246 return gFalse;
249 str->reset();
251 // check footer
252 // Adobe does not seem to enforce %%EOF, so we do the same
253 // if (!checkFooter()) return gFalse;
255 // check header
256 checkHeader();
258 GBool wasReconstructed = false;
260 // read xref table
261 xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed);
262 if (!xref->isOk()) {
263 if (wasReconstructed) {
264 delete xref;
265 startXRefPos = -1;
266 xref = new XRef(str, getStartXRef(gTrue), getMainXRefEntriesOffset(gTrue), &wasReconstructed);
268 if (!xref->isOk()) {
269 error(errSyntaxError, -1, "Couldn't read xref table");
270 errCode = xref->getErrorCode();
271 return gFalse;
275 // check for encryption
276 if (!checkEncryption(ownerPassword, userPassword)) {
277 errCode = errEncrypted;
278 return gFalse;
281 // read catalog
282 catalog = new Catalog(this);
283 if (catalog && !catalog->isOk()) {
284 if (!wasReconstructed)
286 // try one more time to contruct the Catalog, maybe the problem is damaged XRef
287 delete catalog;
288 delete xref;
289 xref = new XRef(str, 0, 0, NULL, true);
290 catalog = new Catalog(this);
293 if (catalog && !catalog->isOk()) {
294 error(errSyntaxError, -1, "Couldn't read page catalog");
295 errCode = errBadCatalog;
296 return gFalse;
300 // done
301 return gTrue;
304 PDFDoc::~PDFDoc() {
305 if (pageCache) {
306 for (int i = 0; i < getNumPages(); i++) {
307 if (pageCache[i]) {
308 delete pageCache[i];
311 gfree(pageCache);
313 delete secHdlr;
314 #ifndef DISABLE_OUTLINE
315 if (outline) {
316 delete outline;
318 #endif
319 if (catalog) {
320 delete catalog;
322 if (xref) {
323 delete xref;
325 if (hints) {
326 delete hints;
328 if (linearization) {
329 delete linearization;
331 if (str) {
332 delete str;
334 if (file) {
335 delete file;
337 if (fileName) {
338 delete fileName;
340 #ifdef _WIN32
341 if (fileNameU) {
342 gfree(fileNameU);
344 #endif
345 #if MULTITHREADED
346 gDestroyMutex(&mutex);
347 #endif
351 // Check for a %%EOF at the end of this stream
352 GBool PDFDoc::checkFooter() {
353 // we look in the last 1024 chars because Adobe does the same
354 char *eof = new char[1025];
355 Goffset pos = str->getPos();
356 str->setPos(1024, -1);
357 int i, ch;
358 for (i = 0; i < 1024; i++)
360 ch = str->getChar();
361 if (ch == EOF)
362 break;
363 eof[i] = ch;
365 eof[i] = '\0';
367 bool found = false;
368 for (i = i - 5; i >= 0; i--) {
369 if (strncmp (&eof[i], "%%EOF", 5) == 0) {
370 found = true;
371 break;
374 if (!found)
376 error(errSyntaxError, -1, "Document has not the mandatory ending %%EOF");
377 errCode = errDamaged;
378 delete[] eof;
379 return gFalse;
381 delete[] eof;
382 str->setPos(pos);
383 return gTrue;
386 // Check for a PDF header on this stream. Skip past some garbage
387 // if necessary.
388 void PDFDoc::checkHeader() {
389 char hdrBuf[headerSearchSize+1];
390 char *p;
391 char *tokptr;
392 int i;
394 pdfMajorVersion = 0;
395 pdfMinorVersion = 0;
396 for (i = 0; i < headerSearchSize; ++i) {
397 hdrBuf[i] = str->getChar();
399 hdrBuf[headerSearchSize] = '\0';
400 for (i = 0; i < headerSearchSize - 5; ++i) {
401 if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
402 break;
405 if (i >= headerSearchSize - 5) {
406 error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
407 return;
409 str->moveStart(i);
410 if (!(p = strtok_r(&hdrBuf[i+5], " \t\n\r", &tokptr))) {
411 error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
412 return;
414 sscanf(p, "%d.%d", &pdfMajorVersion, &pdfMinorVersion);
415 // We don't do the version check. Don't add it back in.
418 GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) {
419 Object encrypt;
420 GBool encrypted;
421 GBool ret;
423 xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
424 if ((encrypted = encrypt.isDict())) {
425 if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
426 if (secHdlr->isUnencrypted()) {
427 // no encryption
428 ret = gTrue;
429 } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
430 // authorization succeeded
431 xref->setEncryption(secHdlr->getPermissionFlags(),
432 secHdlr->getOwnerPasswordOk(),
433 secHdlr->getFileKey(),
434 secHdlr->getFileKeyLength(),
435 secHdlr->getEncVersion(),
436 secHdlr->getEncRevision(),
437 secHdlr->getEncAlgorithm());
438 ret = gTrue;
439 } else {
440 // authorization failed
441 ret = gFalse;
443 } else {
444 // couldn't find the matching security handler
445 ret = gFalse;
447 } else {
448 // document is not encrypted
449 ret = gTrue;
451 encrypt.free();
452 return ret;
455 void PDFDoc::displayPage(OutputDev *out, int page,
456 double hDPI, double vDPI, int rotate,
457 GBool useMediaBox, GBool crop, GBool printing,
458 GBool (*abortCheckCbk)(void *data),
459 void *abortCheckCbkData,
460 GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
461 void *annotDisplayDecideCbkData, GBool copyXRef) {
462 if (globalParams->getPrintCommands()) {
463 printf("***** page %d *****\n", page);
466 if (getPage(page))
467 getPage(page)->display(out, hDPI, vDPI,
468 rotate, useMediaBox, crop, printing,
469 abortCheckCbk, abortCheckCbkData,
470 annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
474 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
475 double hDPI, double vDPI, int rotate,
476 GBool useMediaBox, GBool crop, GBool printing,
477 GBool (*abortCheckCbk)(void *data),
478 void *abortCheckCbkData,
479 GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
480 void *annotDisplayDecideCbkData) {
481 int page;
483 for (page = firstPage; page <= lastPage; ++page) {
484 displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
485 abortCheckCbk, abortCheckCbkData,
486 annotDisplayDecideCbk, annotDisplayDecideCbkData);
490 void PDFDoc::displayPageSlice(OutputDev *out, int page,
491 double hDPI, double vDPI, int rotate,
492 GBool useMediaBox, GBool crop, GBool printing,
493 int sliceX, int sliceY, int sliceW, int sliceH,
494 GBool (*abortCheckCbk)(void *data),
495 void *abortCheckCbkData,
496 GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
497 void *annotDisplayDecideCbkData, GBool copyXRef) {
498 if (getPage(page))
499 getPage(page)->displaySlice(out, hDPI, vDPI,
500 rotate, useMediaBox, crop,
501 sliceX, sliceY, sliceW, sliceH,
502 printing,
503 abortCheckCbk, abortCheckCbkData,
504 annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
507 Links *PDFDoc::getLinks(int page) {
508 Page *p = getPage(page);
509 if (!p) {
510 return new Links (NULL);
512 return p->getLinks();
515 void PDFDoc::processLinks(OutputDev *out, int page) {
516 if (getPage(page))
517 getPage(page)->processLinks(out);
520 Linearization *PDFDoc::getLinearization()
522 if (!linearization) {
523 linearization = new Linearization(str);
525 return linearization;
528 GBool PDFDoc::isLinearized(GBool tryingToReconstruct) {
529 if ((str->getLength()) &&
530 (getLinearization()->getLength() == str->getLength()))
531 return gTrue;
532 else {
533 if (tryingToReconstruct)
534 return getLinearization()->getLength() > 0;
535 else
536 return gFalse;
540 static GBool
541 get_id (GooString *encodedidstring, GooString *id) {
542 const char *encodedid = encodedidstring->getCString();
543 char pdfid[pdfIdLength + 1];
544 int n;
546 if (encodedidstring->getLength() != pdfIdLength / 2)
547 return gFalse;
549 n = sprintf(pdfid, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
550 encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff,
551 encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff, encodedid[7] & 0xff,
552 encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff,
553 encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff);
554 if (n != pdfIdLength)
555 return gFalse;
557 id->Set(pdfid, pdfIdLength);
558 return gTrue;
561 GBool PDFDoc::getID(GooString *permanent_id, GooString *update_id) {
562 Object obj;
563 xref->getTrailerDict()->dictLookup ("ID", &obj);
565 if (obj.isArray() && obj.arrayGetLength() == 2) {
566 Object obj2;
568 if (permanent_id) {
569 if (obj.arrayGet(0, &obj2)->isString()) {
570 if (!get_id (obj2.getString(), permanent_id)) {
571 obj2.free();
572 return gFalse;
574 } else {
575 error(errSyntaxError, -1, "Invalid permanent ID");
576 obj2.free();
577 return gFalse;
579 obj2.free();
582 if (update_id) {
583 if (obj.arrayGet(1, &obj2)->isString()) {
584 if (!get_id (obj2.getString(), update_id)) {
585 obj2.free();
586 return gFalse;
588 } else {
589 error(errSyntaxError, -1, "Invalid update ID");
590 obj2.free();
591 return gFalse;
593 obj2.free();
596 obj.free();
598 return gTrue;
600 obj.free();
602 return gFalse;
605 Hints *PDFDoc::getHints()
607 if (!hints && isLinearized()) {
608 hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
611 return hints;
614 int PDFDoc::savePageAs(GooString *name, int pageNo)
616 FILE *f;
617 OutStream *outStr;
618 XRef *yRef, *countRef;
619 int rootNum = getXRef()->getNumObjects() + 1;
621 // Make sure that special flags are set, because we are going to read
622 // all objects, including Unencrypted ones.
623 xref->scanSpecialFlags();
625 Guchar *fileKey;
626 CryptAlgorithm encAlgorithm;
627 int keyLength;
628 xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
630 if (pageNo < 1 || pageNo > getNumPages() || !getCatalog()->getPage(pageNo)) {
631 error(errInternal, -1, "Illegal pageNo: {0:d}({1:d})", pageNo, getNumPages() );
632 return errOpenFile;
634 PDFRectangle *cropBox = NULL;
635 if (getCatalog()->getPage(pageNo)->isCropped()) {
636 cropBox = getCatalog()->getPage(pageNo)->getCropBox();
638 replacePageDict(pageNo,
639 getCatalog()->getPage(pageNo)->getRotate(),
640 getCatalog()->getPage(pageNo)->getMediaBox(),
641 cropBox);
642 Ref *refPage = getCatalog()->getPageRef(pageNo);
643 Object page;
644 getXRef()->fetch(refPage->num, refPage->gen, &page);
646 if (!(f = fopen(name->getCString(), "wb"))) {
647 error(errIO, -1, "Couldn't open file '{0:t}'", name);
648 return errOpenFile;
650 outStr = new FileOutStream(f,0);
652 yRef = new XRef(getXRef()->getTrailerDict());
654 if (secHdlr != NULL && !secHdlr->isUnencrypted()) {
655 yRef->setEncryption(secHdlr->getPermissionFlags(),
656 secHdlr->getOwnerPasswordOk(), fileKey, keyLength, secHdlr->getEncVersion(), secHdlr->getEncRevision(), encAlgorithm);
658 countRef = new XRef();
659 Object *trailerObj = getXRef()->getTrailerDict();
660 if (trailerObj->isDict()) {
661 markPageObjects(trailerObj->getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
663 yRef->add(0, 65535, 0, gFalse);
664 writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
666 // get and mark info dict
667 Object infoObj;
668 getXRef()->getDocInfo(&infoObj);
669 if (infoObj.isDict()) {
670 Dict *infoDict = infoObj.getDict();
671 markPageObjects(infoDict, yRef, countRef, 0, refPage->num, rootNum + 2);
672 if (trailerObj->isDict()) {
673 Dict *trailerDict = trailerObj->getDict();
674 Object ref;
675 trailerDict->lookupNF("Info", &ref);
676 if (ref.isRef()) {
677 yRef->add(ref.getRef().num, ref.getRef().gen, 0, gTrue);
678 if (getXRef()->getEntry(ref.getRef().num)->type == xrefEntryCompressed) {
679 yRef->getEntry(ref.getRef().num)->type = xrefEntryCompressed;
682 ref.free();
685 infoObj.free();
687 // get and mark output intents etc.
688 Object catObj, pagesObj, resourcesObj, annotsObj, afObj;
689 getXRef()->getCatalog(&catObj);
690 Dict *catDict = catObj.getDict();
691 catDict->lookup("Pages", &pagesObj);
692 catDict->lookupNF("AcroForm", &afObj);
693 if (!afObj.isNull()) {
694 markAcroForm(&afObj, yRef, countRef, 0, refPage->num, rootNum + 2);
695 afObj.free();
697 Dict *pagesDict = pagesObj.getDict();
698 pagesDict->lookup("Resources", &resourcesObj);
699 if (resourcesObj.isDict())
700 markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
701 markPageObjects(catDict, yRef, countRef, 0, refPage->num, rootNum + 2);
703 Dict *pageDict = page.getDict();
704 if (resourcesObj.isNull() && !pageDict->hasKey("Resources")) {
705 Dict *resourceDict = getCatalog()->getPage(pageNo)->getResourceDict();
706 if (resourceDict != NULL) {
707 resourcesObj.initDict(resourceDict);
708 markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
711 markPageObjects(pageDict, yRef, countRef, 0, refPage->num, rootNum + 2);
712 pageDict->lookupNF("Annots", &annotsObj);
713 if (!annotsObj.isNull()) {
714 markAnnotations(&annotsObj, yRef, countRef, 0, refPage->num, rootNum + 2);
715 annotsObj.free();
717 yRef->markUnencrypted();
718 writePageObjects(outStr, yRef, 0);
720 yRef->add(rootNum,0,outStr->getPos(),gTrue);
721 outStr->printf("%d 0 obj\n", rootNum);
722 outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
723 for (int j = 0; j < catDict->getLength(); j++) {
724 const char *key = catDict->getKey(j);
725 if (strcmp(key, "Type") != 0 &&
726 strcmp(key, "Catalog") != 0 &&
727 strcmp(key, "Pages") != 0)
729 if (j > 0) outStr->printf(" ");
730 Object value; catDict->getValNF(j, &value);
731 outStr->printf("/%s ", key);
732 writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
733 value.free();
736 catObj.free();
737 pagesObj.free();
738 outStr->printf(">>\nendobj\n");
740 yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
741 outStr->printf("%d 0 obj\n", rootNum + 1);
742 outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum + 2);
743 if (resourcesObj.isDict()) {
744 outStr->printf("/Resources ");
745 writeObject(&resourcesObj, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
746 resourcesObj.free();
748 outStr->printf(">>\n");
749 outStr->printf("endobj\n");
751 yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
752 outStr->printf("%d 0 obj\n", rootNum + 2);
753 outStr->printf("<< ");
754 for (int n = 0; n < pageDict->getLength(); n++) {
755 if (n > 0) outStr->printf(" ");
756 const char *key = pageDict->getKey(n);
757 Object value; pageDict->getValNF(n, &value);
758 if (strcmp(key, "Parent") == 0) {
759 outStr->printf("/Parent %d 0 R", rootNum + 1);
760 } else {
761 outStr->printf("/%s ", key);
762 writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
764 value.free();
766 outStr->printf(" >>\nendobj\n");
767 page.free();
769 Goffset uxrefOffset = outStr->getPos();
770 Ref ref;
771 ref.num = rootNum;
772 ref.gen = 0;
773 Dict *trailerDict = createTrailerDict(rootNum + 3, gFalse, 0, &ref, getXRef(),
774 name->getCString(), uxrefOffset);
775 writeXRefTableTrailer(trailerDict, yRef, gFalse /* do not write unnecessary entries */,
776 uxrefOffset, outStr, getXRef());
777 delete trailerDict;
779 outStr->close();
780 fclose(f);
781 delete yRef;
782 delete countRef;
783 delete outStr;
785 return errNone;
788 int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
789 FILE *f;
790 OutStream *outStr;
791 int res;
793 if (!(f = fopen(name->getCString(), "wb"))) {
794 error(errIO, -1, "Couldn't open file '{0:t}'", name);
795 return errOpenFile;
797 outStr = new FileOutStream(f,0);
798 res = saveAs(outStr, mode);
799 delete outStr;
800 fclose(f);
801 return res;
804 int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode) {
806 // find if we have updated objects
807 GBool updated = gFalse;
808 for(int i=0; i<xref->getNumObjects(); i++) {
809 if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) {
810 updated = gTrue;
811 break;
815 if (!updated && mode == writeStandard) {
816 // simply copy the original file
817 saveWithoutChangesAs (outStr);
818 } else if (mode == writeForceRewrite) {
819 saveCompleteRewrite(outStr);
820 } else {
821 saveIncrementalUpdate(outStr);
824 return errNone;
827 int PDFDoc::saveWithoutChangesAs(GooString *name) {
828 FILE *f;
829 OutStream *outStr;
830 int res;
832 if (!(f = fopen(name->getCString(), "wb"))) {
833 error(errIO, -1, "Couldn't open file '{0:t}'", name);
834 return errOpenFile;
837 outStr = new FileOutStream(f,0);
838 res = saveWithoutChangesAs(outStr);
839 delete outStr;
841 fclose(f);
843 return res;
846 int PDFDoc::saveWithoutChangesAs(OutStream *outStr) {
847 int c;
849 BaseStream *copyStr = str->copy();
850 copyStr->reset();
851 while ((c = copyStr->getChar()) != EOF) {
852 outStr->put(c);
854 copyStr->close();
855 delete copyStr;
857 return errNone;
860 void PDFDoc::saveIncrementalUpdate (OutStream* outStr)
862 XRef *uxref;
863 int c;
864 //copy the original file
865 BaseStream *copyStr = str->copy();
866 copyStr->reset();
867 while ((c = copyStr->getChar()) != EOF) {
868 outStr->put(c);
870 copyStr->close();
871 delete copyStr;
873 Guchar *fileKey;
874 CryptAlgorithm encAlgorithm;
875 int keyLength;
876 xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
878 uxref = new XRef();
879 uxref->add(0, 65535, 0, gFalse);
880 xref->lock();
881 for(int i=0; i<xref->getNumObjects(); i++) {
882 if ((xref->getEntry(i)->type == xrefEntryFree) &&
883 (xref->getEntry(i)->gen == 0)) //we skip the irrelevant free objects
884 continue;
886 if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) { //we have an updated object
887 Ref ref;
888 ref.num = i;
889 ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen;
890 if (xref->getEntry(i)->type != xrefEntryFree) {
891 Object obj1;
892 xref->fetch(ref.num, ref.gen, &obj1, 1);
893 Goffset offset = writeObjectHeader(&ref, outStr);
894 writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
895 writeObjectFooter(outStr);
896 uxref->add(ref.num, ref.gen, offset, gTrue);
897 obj1.free();
898 } else {
899 uxref->add(ref.num, ref.gen, 0, gFalse);
903 xref->unlock();
904 if (uxref->getNumObjects() == 0) { //we have nothing to update
905 delete uxref;
906 return;
909 Goffset uxrefOffset = outStr->getPos();
910 int numobjects = xref->getNumObjects();
911 const char *fileNameA = fileName ? fileName->getCString() : NULL;
912 Ref rootRef, uxrefStreamRef;
913 rootRef.num = getXRef()->getRootNum();
914 rootRef.gen = getXRef()->getRootGen();
916 // Output a xref stream if there is a xref stream already
917 GBool xRefStream = xref->isXRefStream();
919 if (xRefStream) {
920 // Append an entry for the xref stream itself
921 uxrefStreamRef.num = numobjects++;
922 uxrefStreamRef.gen = 0;
923 uxref->add(uxrefStreamRef.num, uxrefStreamRef.gen, uxrefOffset, gTrue);
926 Dict *trailerDict = createTrailerDict(numobjects, gTrue, getStartXRef(), &rootRef, getXRef(), fileNameA, uxrefOffset);
927 if (xRefStream) {
928 writeXRefStreamTrailer(trailerDict, uxref, &uxrefStreamRef, uxrefOffset, outStr, getXRef());
929 } else {
930 writeXRefTableTrailer(trailerDict, uxref, gFalse, uxrefOffset, outStr, getXRef());
933 delete trailerDict;
934 delete uxref;
937 void PDFDoc::saveCompleteRewrite (OutStream* outStr)
939 // Make sure that special flags are set, because we are going to read
940 // all objects, including Unencrypted ones.
941 xref->scanSpecialFlags();
943 Guchar *fileKey;
944 CryptAlgorithm encAlgorithm;
945 int keyLength;
946 xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
948 outStr->printf("%%PDF-%d.%d\r\n",pdfMajorVersion,pdfMinorVersion);
949 XRef *uxref = new XRef();
950 uxref->add(0, 65535, 0, gFalse);
951 xref->lock();
952 for(int i=0; i<xref->getNumObjects(); i++) {
953 Object obj1;
954 Ref ref;
955 XRefEntryType type = xref->getEntry(i)->type;
956 if (type == xrefEntryFree) {
957 ref.num = i;
958 ref.gen = xref->getEntry(i)->gen;
959 /* the XRef class adds a lot of irrelevant free entries, we only want the significant one
960 and we don't want the one with num=0 because it has already been added (gen = 65535)*/
961 if (ref.gen > 0 && ref.num > 0)
962 uxref->add(ref.num, ref.gen, 0, gFalse);
963 } else if (xref->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
964 // This entry must not be written, put a free entry instead (with incremented gen)
965 ref.num = i;
966 ref.gen = xref->getEntry(i)->gen + 1;
967 uxref->add(ref.num, ref.gen, 0, gFalse);
968 } else if (type == xrefEntryUncompressed){
969 ref.num = i;
970 ref.gen = xref->getEntry(i)->gen;
971 xref->fetch(ref.num, ref.gen, &obj1, 1);
972 Goffset offset = writeObjectHeader(&ref, outStr);
973 // Write unencrypted objects in unencrypted form
974 if (xref->getEntry(i)->getFlag(XRefEntry::Unencrypted)) {
975 writeObject(&obj1, outStr, NULL, cryptRC4, 0, 0, 0);
976 } else {
977 writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
979 writeObjectFooter(outStr);
980 uxref->add(ref.num, ref.gen, offset, gTrue);
981 obj1.free();
982 } else if (type == xrefEntryCompressed) {
983 ref.num = i;
984 ref.gen = 0; //compressed entries have gen == 0
985 xref->fetch(ref.num, ref.gen, &obj1, 1);
986 Goffset offset = writeObjectHeader(&ref, outStr);
987 writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
988 writeObjectFooter(outStr);
989 uxref->add(ref.num, ref.gen, offset, gTrue);
990 obj1.free();
993 xref->unlock();
994 Goffset uxrefOffset = outStr->getPos();
995 writeXRefTableTrailer(uxrefOffset, uxref, gTrue /* write all entries */,
996 uxref->getNumObjects(), outStr, gFalse /* complete rewrite */);
997 delete uxref;
1000 void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
1001 CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1003 Object obj1;
1004 outStr->printf("<<");
1005 for (int i=0; i<dict->getLength(); i++) {
1006 GooString keyName(dict->getKey(i));
1007 GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
1008 outStr->printf("/%s ", keyNameToPrint->getCString());
1009 delete keyNameToPrint;
1010 writeObject(dict->getValNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1011 obj1.free();
1013 outStr->printf(">> ");
1016 void PDFDoc::writeStream (Stream* str, OutStream* outStr)
1018 outStr->printf("stream\r\n");
1019 str->reset();
1020 for (int c=str->getChar(); c!= EOF; c=str->getChar()) {
1021 outStr->printf("%c", c);
1023 outStr->printf("\r\nendstream\r\n");
1026 void PDFDoc::writeRawStream (Stream* str, OutStream* outStr)
1028 Object obj1;
1029 str->getDict()->lookup("Length", &obj1);
1030 if (!obj1.isInt() && !obj1.isInt64()) {
1031 error (errSyntaxError, -1, "PDFDoc::writeRawStream, no Length in stream dict");
1032 return;
1035 Goffset length;
1036 if (obj1.isInt())
1037 length = obj1.getInt();
1038 else
1039 length = obj1.getInt64();
1040 obj1.free();
1042 outStr->printf("stream\r\n");
1043 str->unfilteredReset();
1044 for (Goffset i = 0; i < length; i++) {
1045 int c = str->getUnfilteredChar();
1046 if (unlikely(c == EOF)) {
1047 error (errSyntaxError, -1, "PDFDoc::writeRawStream: EOF reading stream");
1048 break;
1050 outStr->printf("%c", c);
1052 str->reset();
1053 outStr->printf("\r\nendstream\r\n");
1056 void PDFDoc::writeString (GooString* s, OutStream* outStr, Guchar *fileKey,
1057 CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1059 // Encrypt string if encryption is enabled
1060 GooString *sEnc = NULL;
1061 if (fileKey) {
1062 Object obj;
1063 EncryptStream *enc = new EncryptStream(new MemStream(s->getCString(), 0, s->getLength(), obj.initNull()),
1064 fileKey, encAlgorithm, keyLength, objNum, objGen);
1065 sEnc = new GooString();
1066 int c;
1067 enc->reset();
1068 while ((c = enc->getChar()) != EOF) {
1069 sEnc->append((char)c);
1072 delete enc;
1073 s = sEnc;
1076 // Write data
1077 if (s->hasUnicodeMarker()) {
1078 //unicode string don't necessary end with \0
1079 const char* c = s->getCString();
1080 outStr->printf("(");
1081 for(int i=0; i<s->getLength(); i++) {
1082 char unescaped = *(c+i)&0x000000ff;
1083 //escape if needed
1084 if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
1085 outStr->printf("%c", '\\');
1086 outStr->printf("%c", unescaped);
1088 outStr->printf(") ");
1089 } else {
1090 const char* c = s->getCString();
1091 outStr->printf("(");
1092 for(int i=0; i<s->getLength(); i++) {
1093 char unescaped = *(c+i)&0x000000ff;
1094 //escape if needed
1095 if (unescaped == '\r')
1096 outStr->printf("\\r");
1097 else if (unescaped == '\n')
1098 outStr->printf("\\n");
1099 else {
1100 if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
1101 outStr->printf("%c", '\\');
1103 outStr->printf("%c", unescaped);
1106 outStr->printf(") ");
1109 delete sEnc;
1112 Goffset PDFDoc::writeObjectHeader (Ref *ref, OutStream* outStr)
1114 Goffset offset = outStr->getPos();
1115 outStr->printf("%i %i obj ", ref->num, ref->gen);
1116 return offset;
1119 void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
1120 CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1122 Array *array;
1123 Object obj1;
1124 Goffset tmp;
1126 switch (obj->getType()) {
1127 case objBool:
1128 outStr->printf("%s ", obj->getBool()?"true":"false");
1129 break;
1130 case objInt:
1131 outStr->printf("%i ", obj->getInt());
1132 break;
1133 case objInt64:
1134 outStr->printf("%lli ", obj->getInt64());
1135 break;
1136 case objReal:
1138 GooString s;
1139 s.appendf("{0:.10g}", obj->getReal());
1140 outStr->printf("%s ", s.getCString());
1141 break;
1143 case objString:
1144 writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, objNum, objGen);
1145 break;
1146 case objName:
1148 GooString name(obj->getName());
1149 GooString *nameToPrint = name.sanitizedName(gFalse /* non ps mode */);
1150 outStr->printf("/%s ", nameToPrint->getCString());
1151 delete nameToPrint;
1152 break;
1154 case objNull:
1155 outStr->printf( "null ");
1156 break;
1157 case objArray:
1158 array = obj->getArray();
1159 outStr->printf("[");
1160 for (int i=0; i<array->getLength(); i++) {
1161 writeObject(array->getNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1162 obj1.free();
1164 outStr->printf("] ");
1165 break;
1166 case objDict:
1167 writeDictionnary (obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1168 break;
1169 case objStream:
1171 //We can't modify stream with the current implementation (no write functions in Stream API)
1172 // => the only type of streams which that have been modified are internal streams (=strWeird)
1173 Stream *stream = obj->getStream();
1174 if (stream->getKind() == strWeird || stream->getKind() == strCrypt) {
1175 //we write the stream unencoded => TODO: write stream encoder
1177 // Encrypt stream
1178 EncryptStream *encStream = NULL;
1179 GBool removeFilter = gTrue;
1180 if (stream->getKind() == strWeird && fileKey) {
1181 Object filter;
1182 stream->getDict()->lookup("Filter", &filter);
1183 if (!filter.isName("Crypt")) {
1184 if (filter.isArray()) {
1185 for (int i = 0; i < filter.arrayGetLength(); i++) {
1186 Object filterEle;
1187 filter.arrayGet(i, &filterEle);
1188 if (filterEle.isName("Crypt")) {
1189 filterEle.free();
1190 removeFilter = gFalse;
1191 break;
1193 filterEle.free();
1195 if (removeFilter) {
1196 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1197 encStream->setAutoDelete(gFalse);
1198 stream = encStream;
1200 } else {
1201 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1202 encStream->setAutoDelete(gFalse);
1203 stream = encStream;
1205 } else {
1206 removeFilter = gFalse;
1208 filter.free();
1209 } else if (fileKey != NULL) { // Encrypt stream
1210 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1211 encStream->setAutoDelete(gFalse);
1212 stream = encStream;
1215 stream->reset();
1216 //recalculate stream length
1217 tmp = 0;
1218 for (int c=stream->getChar(); c!=EOF; c=stream->getChar()) {
1219 tmp++;
1221 obj1.initInt64(tmp);
1222 stream->getDict()->set("Length", &obj1);
1224 //Remove Stream encoding
1225 if (removeFilter) {
1226 stream->getDict()->remove("Filter");
1228 stream->getDict()->remove("DecodeParms");
1230 writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1231 writeStream (stream,outStr);
1232 delete encStream;
1233 obj1.free();
1234 } else {
1235 //raw stream copy
1236 FilterStream *fs = dynamic_cast<FilterStream*>(stream);
1237 if (fs) {
1238 BaseStream *bs = fs->getBaseStream();
1239 if (bs) {
1240 Goffset streamEnd;
1241 if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
1242 Object val;
1243 val.initInt64(streamEnd - bs->getStart());
1244 stream->getDict()->set("Length", &val);
1248 writeDictionnary (stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1249 writeRawStream (stream, outStr);
1251 break;
1253 case objRef:
1254 outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
1255 break;
1256 case objCmd:
1257 outStr->printf("%s\n", obj->getCmd());
1258 break;
1259 case objError:
1260 outStr->printf("error\r\n");
1261 break;
1262 case objEOF:
1263 outStr->printf("eof\r\n");
1264 break;
1265 case objNone:
1266 outStr->printf("none\r\n");
1267 break;
1268 default:
1269 error(errUnimplemented, -1,"Unhandled objType : {0:d}, please report a bug with a testcase\r\n", obj->getType());
1270 break;
1274 void PDFDoc::writeObjectFooter (OutStream* outStr)
1276 outStr->printf("endobj\r\n");
1279 Dict *PDFDoc::createTrailerDict(int uxrefSize, GBool incrUpdate, Goffset startxRef,
1280 Ref *root, XRef *xRef, const char *fileName, Goffset fileSize)
1282 Dict *trailerDict = new Dict(xRef);
1283 Object obj1;
1284 obj1.initInt(uxrefSize);
1285 trailerDict->set("Size", &obj1);
1286 obj1.free();
1288 //build a new ID, as recommended in the reference, uses:
1289 // - current time
1290 // - file name
1291 // - file size
1292 // - values of entry in information dictionnary
1293 GooString message;
1294 char buffer[256];
1295 sprintf(buffer, "%i", (int)time(NULL));
1296 message.append(buffer);
1298 if (fileName)
1299 message.append(fileName);
1301 sprintf(buffer, "%lli", (long long)fileSize);
1302 message.append(buffer);
1304 //info dict -- only use text string
1305 if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
1306 for(int i=0; i<obj1.getDict()->getLength(); i++) {
1307 Object obj2;
1308 obj1.getDict()->getVal(i, &obj2);
1309 if (obj2.isString()) {
1310 message.append(obj2.getString());
1312 obj2.free();
1315 obj1.free();
1317 GBool hasEncrypt = gFalse;
1318 if (!xRef->getTrailerDict()->isNone()) {
1319 Object obj2;
1320 xRef->getTrailerDict()->dictLookupNF("Encrypt", &obj2);
1321 if (!obj2.isNull()) {
1322 trailerDict->set("Encrypt", &obj2);
1323 hasEncrypt = gTrue;
1324 obj2.free();
1328 //calculate md5 digest
1329 Guchar digest[16];
1330 md5((Guchar*)message.getCString(), message.getLength(), digest);
1331 obj1.initString(new GooString((const char*)digest, 16));
1333 //create ID array
1334 Object obj2,obj3,obj5;
1335 obj2.initArray(xRef);
1337 // In case of encrypted files, the ID must not be changed because it's used to calculate the key
1338 if (incrUpdate || hasEncrypt) {
1339 Object obj4;
1340 //only update the second part of the array
1341 xRef->getTrailerDict()->getDict()->lookup("ID", &obj4);
1342 if (!obj4.isArray()) {
1343 error(errSyntaxWarning, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
1344 } else {
1345 //Get the first part of the ID
1346 obj4.arrayGet(0,&obj3);
1348 obj2.arrayAdd(&obj3);
1349 obj2.arrayAdd(&obj1);
1350 trailerDict->set("ID", &obj2);
1352 obj4.free();
1353 } else {
1354 //new file => same values for the two identifiers
1355 obj2.arrayAdd(&obj1);
1356 obj1.initString(new GooString((const char*)digest, 16));
1357 obj2.arrayAdd(&obj1);
1358 trailerDict->set("ID", &obj2);
1361 obj1.initRef(root->num, root->gen);
1362 trailerDict->set("Root", &obj1);
1364 if (incrUpdate) {
1365 obj1.initInt64(startxRef);
1366 trailerDict->set("Prev", &obj1);
1369 if (!xRef->getTrailerDict()->isNone()) {
1370 xRef->getDocInfoNF(&obj5);
1371 if (!obj5.isNull()) {
1372 trailerDict->set("Info", &obj5);
1376 return trailerDict;
1379 void PDFDoc::writeXRefTableTrailer(Dict *trailerDict, XRef *uxref, GBool writeAllEntries, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1381 uxref->writeTableToFile( outStr, writeAllEntries );
1382 outStr->printf( "trailer\r\n");
1383 writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1384 outStr->printf( "\r\nstartxref\r\n");
1385 outStr->printf( "%lli\r\n", uxrefOffset);
1386 outStr->printf( "%%%%EOF\r\n");
1389 void PDFDoc::writeXRefStreamTrailer (Dict *trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1391 GooString stmData;
1393 // Fill stmData and some trailerDict fields
1394 uxref->writeStreamToBuffer(&stmData, trailerDict, xRef);
1396 // Create XRef stream object and write it
1397 Object obj1;
1398 MemStream *mStream = new MemStream( stmData.getCString(), 0,
1399 stmData.getLength(), obj1.initDict(trailerDict) );
1400 writeObjectHeader(uxrefStreamRef, outStr);
1401 writeObject(obj1.initStream(mStream), outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1402 writeObjectFooter(outStr);
1403 obj1.free();
1405 outStr->printf( "startxref\r\n");
1406 outStr->printf( "%lli\r\n", uxrefOffset);
1407 outStr->printf( "%%%%EOF\r\n");
1410 void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, GBool writeAllEntries,
1411 int uxrefSize, OutStream* outStr, GBool incrUpdate)
1413 const char *fileNameA = fileName ? fileName->getCString() : NULL;
1414 // file size (doesn't include the trailer)
1415 unsigned int fileSize = 0;
1416 int c;
1417 str->reset();
1418 while ((c = str->getChar()) != EOF) {
1419 fileSize++;
1421 str->close();
1422 Ref ref;
1423 ref.num = getXRef()->getRootNum();
1424 ref.gen = getXRef()->getRootGen();
1425 Dict * trailerDict = createTrailerDict(uxrefSize, incrUpdate, getStartXRef(), &ref,
1426 getXRef(), fileNameA, fileSize);
1427 writeXRefTableTrailer(trailerDict, uxref, writeAllEntries, uxrefOffset, outStr, getXRef());
1428 delete trailerDict;
1431 void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
1433 outStr->printf("%%PDF-%d.%d\n", major, minor);
1434 outStr->printf("%%\xE2\xE3\xCF\xD3\n");
1437 void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1439 Object obj1;
1440 for (int i=0; i<dict->getLength(); i++) {
1441 const char *key = dict->getKey(i);
1442 if (strcmp(key, "Annots") != 0) {
1443 markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1444 } else {
1445 Object annotsObj;
1446 dict->getValNF(i, &annotsObj);
1447 if (!annotsObj.isNull()) {
1448 markAnnotations(&annotsObj, xRef, countRef, 0, oldRefNum, newRefNum);
1449 annotsObj.free();
1452 obj1.free();
1456 void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1458 Array *array;
1459 Object obj1;
1461 switch (obj->getType()) {
1462 case objArray:
1463 array = obj->getArray();
1464 for (int i=0; i<array->getLength(); i++) {
1465 markObject(array->getNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1466 obj1.free();
1468 break;
1469 case objDict:
1470 markDictionnary (obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1471 break;
1472 case objStream:
1474 Stream *stream = obj->getStream();
1475 markDictionnary (stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1477 break;
1478 case objRef:
1480 if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
1481 if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
1482 return; // already marked as free => should be replaced
1484 xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, gTrue);
1485 if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
1486 xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
1489 if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1490 countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree)
1492 countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
1493 } else {
1494 XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
1495 entry->gen++;
1496 if (entry->gen > 9)
1497 break;
1499 Object obj1;
1500 getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1);
1501 markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum);
1502 obj1.free();
1504 break;
1505 default:
1506 break;
1510 void PDFDoc::replacePageDict(int pageNo, int rotate,
1511 PDFRectangle *mediaBox,
1512 PDFRectangle *cropBox)
1514 Ref *refPage = getCatalog()->getPageRef(pageNo);
1515 Object page;
1516 getXRef()->fetch(refPage->num, refPage->gen, &page);
1517 Dict *pageDict = page.getDict();
1518 pageDict->remove("MediaBoxssdf");
1519 pageDict->remove("MediaBox");
1520 pageDict->remove("CropBox");
1521 pageDict->remove("ArtBox");
1522 pageDict->remove("BleedBox");
1523 pageDict->remove("TrimBox");
1524 pageDict->remove("Rotate");
1525 Object mediaBoxObj;
1526 mediaBoxObj.initArray(getXRef());
1527 Object murx;
1528 murx.initReal(mediaBox->x1);
1529 Object mury;
1530 mury.initReal(mediaBox->y1);
1531 Object mllx;
1532 mllx.initReal(mediaBox->x2);
1533 Object mlly;
1534 mlly.initReal(mediaBox->y2);
1535 mediaBoxObj.arrayAdd(&murx);
1536 mediaBoxObj.arrayAdd(&mury);
1537 mediaBoxObj.arrayAdd(&mllx);
1538 mediaBoxObj.arrayAdd(&mlly);
1539 pageDict->add(copyString("MediaBox"), &mediaBoxObj);
1540 if (cropBox != NULL) {
1541 Object cropBoxObj;
1542 cropBoxObj.initArray(getXRef());
1543 Object curx;
1544 curx.initReal(cropBox->x1);
1545 Object cury;
1546 cury.initReal(cropBox->y1);
1547 Object cllx;
1548 cllx.initReal(cropBox->x2);
1549 Object clly;
1550 clly.initReal(cropBox->y2);
1551 cropBoxObj.arrayAdd(&curx);
1552 cropBoxObj.arrayAdd(&cury);
1553 cropBoxObj.arrayAdd(&cllx);
1554 cropBoxObj.arrayAdd(&clly);
1555 pageDict->add(copyString("CropBox"), &cropBoxObj);
1556 cropBoxObj.getArray()->incRef();
1557 pageDict->add(copyString("TrimBox"), &cropBoxObj);
1558 } else {
1559 mediaBoxObj.getArray()->incRef();
1560 pageDict->add(copyString("TrimBox"), &mediaBoxObj);
1562 Object rotateObj;
1563 rotateObj.initInt(rotate);
1564 pageDict->add(copyString("Rotate"), &rotateObj);
1565 getXRef()->setModifiedObject(&page, *refPage);
1566 page.free();
1569 void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1571 pageDict->remove("OpenAction");
1572 pageDict->remove("Outlines");
1573 pageDict->remove("StructTreeRoot");
1575 for (int n = 0; n < pageDict->getLength(); n++) {
1576 const char *key = pageDict->getKey(n);
1577 Object value; pageDict->getValNF(n, &value);
1578 if (strcmp(key, "Parent") != 0 &&
1579 strcmp(key, "Pages") != 0 &&
1580 strcmp(key, "AcroForm") != 0 &&
1581 strcmp(key, "Annots") != 0 &&
1582 strcmp(key, "P") != 0 &&
1583 strcmp(key, "Root") != 0) {
1584 markObject(&value, xRef, countRef, numOffset, oldRefNum, newRefNum);
1586 value.free();
1590 GBool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldPageNum, int newPageNum) {
1591 Object annots;
1592 GBool modified = gFalse;
1593 annotsObj->fetch(getXRef(), &annots);
1594 if (annots.isArray()) {
1595 Array *array = annots.getArray();
1596 for (int i=array->getLength() - 1; i >= 0; i--) {
1597 Object obj1;
1598 if (array->get(i, &obj1)->isDict()) {
1599 Object type;
1600 Dict *dict = obj1.getDict();
1601 dict->lookup("Type", &type);
1602 if (type.isName() && strcmp(type.getName(), "Annot") == 0) {
1603 Object obj2;
1604 if (dict->lookupNF("P", &obj2)->isRef()) {
1605 if (obj2.getRef().num == oldPageNum) {
1606 Object obj3;
1607 array->getNF(i, &obj3);
1608 if (obj3.isRef()) {
1609 Object *newRef = new Object();
1610 newRef->initRef(newPageNum, 0);
1611 dict->set("P", newRef);
1612 getXRef()->setModifiedObject(&obj1, obj3.getRef());
1614 obj3.free();
1615 } else if (obj2.getRef().num == newPageNum) {
1616 obj1.free();
1617 obj2.free();
1618 type.free();
1619 continue;
1620 } else {
1621 Object page;
1622 getXRef()->fetch(obj2.getRef().num, obj2.getRef().gen, &page);
1623 if (page.isDict()) {
1624 Object pagetype;
1625 Dict *dict = page.getDict();
1626 dict->lookup("Type", &pagetype);
1627 if (!pagetype.isName() || strcmp(pagetype.getName(), "Page") != 0) {
1628 obj1.free();
1629 obj2.free();
1630 type.free();
1631 page.free();
1632 pagetype.free();
1633 continue;
1635 pagetype.free();
1637 page.free();
1638 obj1.free();
1639 obj2.free();
1640 type.free();
1641 array->remove(i);
1642 modified = gTrue;
1643 continue;
1646 obj2.free();
1648 type.free();
1649 markPageObjects(dict, xRef, countRef, numOffset, oldPageNum, newPageNum);
1651 obj1.free();
1652 array->getNF(i, &obj1);
1653 if (obj1.isRef()) {
1654 if (obj1.getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
1655 if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryFree) {
1656 continue; // already marked as free => should be replaced
1658 xRef->add(obj1.getRef().num + numOffset, obj1.getRef().gen, 0, gTrue);
1659 if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryCompressed) {
1660 xRef->getEntry(obj1.getRef().num + numOffset)->type = xrefEntryCompressed;
1663 if (obj1.getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1664 countRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree)
1666 countRef->add(obj1.getRef().num + numOffset, 1, 0, gTrue);
1667 } else {
1668 XRefEntry *entry = countRef->getEntry(obj1.getRef().num + numOffset);
1669 entry->gen++;
1672 obj1.free();
1675 if (annotsObj->isRef()) {
1676 if (annotsObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
1677 if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryFree) {
1678 return modified; // already marked as free => should be replaced
1680 xRef->add(annotsObj->getRef().num + numOffset, annotsObj->getRef().gen, 0, gTrue);
1681 if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryCompressed) {
1682 xRef->getEntry(annotsObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1685 if (annotsObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1686 countRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree)
1688 countRef->add(annotsObj->getRef().num + numOffset, 1, 0, gTrue);
1689 } else {
1690 XRefEntry *entry = countRef->getEntry(annotsObj->getRef().num + numOffset);
1691 entry->gen++;
1693 getXRef()->setModifiedObject(&annots, annotsObj->getRef());
1695 annots.free();
1696 return modified;
1699 void PDFDoc::markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum) {
1700 Object acroform;
1701 GBool modified = gFalse;
1702 afObj->fetch(getXRef(), &acroform);
1703 if (acroform.isDict()) {
1704 Dict *dict = acroform.getDict();
1705 for (int i=0; i < dict->getLength(); i++) {
1706 if (strcmp(dict->getKey(i), "Fields") == 0) {
1707 Object fields;
1708 modified = markAnnotations(dict->getValNF(i, &fields), xRef, countRef, numOffset, oldRefNum, newRefNum);
1709 fields.free();
1710 } else {
1711 Object obj;
1712 markObject(dict->getValNF(i, &obj), xRef, countRef, numOffset, oldRefNum, newRefNum);
1713 obj.free();
1717 if (afObj->isRef()) {
1718 if (afObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
1719 if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryFree) {
1720 return; // already marked as free => should be replaced
1722 xRef->add(afObj->getRef().num + numOffset, afObj->getRef().gen, 0, gTrue);
1723 if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryCompressed) {
1724 xRef->getEntry(afObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1727 if (afObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1728 countRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree)
1730 countRef->add(afObj->getRef().num + numOffset, 1, 0, gTrue);
1731 } else {
1732 XRefEntry *entry = countRef->getEntry(afObj->getRef().num + numOffset);
1733 entry->gen++;
1735 if (modified){
1736 getXRef()->setModifiedObject(&acroform, afObj->getRef());
1739 acroform.free();
1740 return;
1743 Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset, GBool combine)
1745 Guint objectsCount = 0; //count the number of objects in the XRef(s)
1746 Guchar *fileKey;
1747 CryptAlgorithm encAlgorithm;
1748 int keyLength;
1749 xRef->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1751 for (int n = numOffset; n < xRef->getNumObjects(); n++) {
1752 if (xRef->getEntry(n)->type != xrefEntryFree) {
1753 Object obj;
1754 Ref ref;
1755 ref.num = n;
1756 ref.gen = xRef->getEntry(n)->gen;
1757 objectsCount++;
1758 getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
1759 Goffset offset = writeObjectHeader(&ref, outStr);
1760 if (combine) {
1761 writeObject(&obj, outStr, getXRef(), numOffset, NULL, cryptRC4, 0, 0, 0);
1762 } else if (xRef->getEntry(n)->getFlag(XRefEntry::Unencrypted)) {
1763 writeObject(&obj, outStr, NULL, cryptRC4, 0, 0, 0);
1764 } else {
1765 writeObject(&obj, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1767 writeObjectFooter(outStr);
1768 xRef->add(ref.num, ref.gen, offset, gTrue);
1769 obj.free();
1772 return objectsCount;
1775 #ifndef DISABLE_OUTLINE
1776 Outline *PDFDoc::getOutline()
1778 if (!outline) {
1779 pdfdocLocker();
1780 // read outline
1781 outline = new Outline(catalog->getOutline(), xref);
1784 return outline;
1786 #endif
1788 PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
1790 PDFDoc *doc = new PDFDoc();
1791 doc->errCode = errorCode;
1792 doc->fileName = fileNameA;
1794 return doc;
1797 long long PDFDoc::strToLongLong(char *s) {
1798 long long x, d;
1799 char *p;
1801 x = 0;
1802 for (p = s; *p && isdigit(*p & 0xff); ++p) {
1803 d = *p - '0';
1804 if (x > (LLONG_MAX - d) / 10) {
1805 break;
1807 x = 10 * x + d;
1809 return x;
1812 // Read the 'startxref' position.
1813 Goffset PDFDoc::getStartXRef(GBool tryingToReconstruct)
1815 if (startXRefPos == -1) {
1817 if (isLinearized(tryingToReconstruct)) {
1818 char buf[linearizationSearchSize+1];
1819 int c, n, i;
1821 str->setPos(0);
1822 for (n = 0; n < linearizationSearchSize; ++n) {
1823 if ((c = str->getChar()) == EOF) {
1824 break;
1826 buf[n] = c;
1828 buf[n] = '\0';
1830 // find end of first obj (linearization dictionary)
1831 startXRefPos = 0;
1832 for (i = 0; i < n; i++) {
1833 if (!strncmp("endobj", &buf[i], 6)) {
1834 i += 6;
1835 //skip whitespace
1836 while (buf[i] && Lexer::isSpace(buf[i])) ++i;
1837 startXRefPos = i;
1838 break;
1841 } else {
1842 char buf[xrefSearchSize+1];
1843 char *p;
1844 int c, n, i;
1846 // read last xrefSearchSize bytes
1847 int segnum = 0;
1848 int maxXRefSearch = 24576;
1849 if (str->getLength() < maxXRefSearch) maxXRefSearch = str->getLength();
1850 for (; (xrefSearchSize - 16) * segnum < maxXRefSearch; segnum++) {
1851 str->setPos((xrefSearchSize - 16) * segnum + xrefSearchSize, -1);
1852 for (n = 0; n < xrefSearchSize; ++n) {
1853 if ((c = str->getChar()) == EOF) {
1854 break;
1856 buf[n] = c;
1858 buf[n] = '\0';
1860 // find startxref
1861 for (i = n - 9; i >= 0; --i) {
1862 if (!strncmp(&buf[i], "startxref", 9)) {
1863 break;
1866 if (i < 0) {
1867 startXRefPos = 0;
1868 } else {
1869 for (p = &buf[i + 9]; isspace(*p); ++p);
1870 startXRefPos = strToLongLong(p);
1871 break;
1878 return startXRefPos;
1881 Goffset PDFDoc::getMainXRefEntriesOffset(GBool tryingToReconstruct)
1883 Guint mainXRefEntriesOffset = 0;
1885 if (isLinearized(tryingToReconstruct)) {
1886 mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
1889 return mainXRefEntriesOffset;
1892 int PDFDoc::getNumPages()
1894 if (isLinearized()) {
1895 int n;
1896 if ((n = getLinearization()->getNumPages())) {
1897 return n;
1901 return catalog->getNumPages();
1904 Page *PDFDoc::parsePage(int page)
1906 Page *p = NULL;
1907 Object obj;
1908 Ref pageRef;
1909 Dict *pageDict;
1911 pageRef.num = getHints()->getPageObjectNum(page);
1912 if (!pageRef.num) {
1913 error(errSyntaxWarning, -1, "Failed to get object num from hint tables for page {0:d}", page);
1914 return NULL;
1917 // check for bogus ref - this can happen in corrupted PDF files
1918 if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
1919 error(errSyntaxWarning, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef.num, page);
1920 return NULL;
1923 pageRef.gen = xref->getEntry(pageRef.num)->gen;
1924 xref->fetch(pageRef.num, pageRef.gen, &obj);
1925 if (!obj.isDict("Page")) {
1926 obj.free();
1927 error(errSyntaxWarning, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef.num, pageRef.gen);
1928 return NULL;
1930 pageDict = obj.getDict();
1932 p = new Page(this, page, pageDict, pageRef,
1933 new PageAttrs(NULL, pageDict), catalog->getForm());
1934 obj.free();
1936 return p;
1939 Page *PDFDoc::getPage(int page)
1941 if ((page < 1) || page > getNumPages()) return NULL;
1943 if (isLinearized()) {
1944 pdfdocLocker();
1945 if (!pageCache) {
1946 pageCache = (Page **) gmallocn(getNumPages(), sizeof(Page *));
1947 for (int i = 0; i < getNumPages(); i++) {
1948 pageCache[i] = NULL;
1951 if (!pageCache[page-1]) {
1952 pageCache[page-1] = parsePage(page);
1954 if (pageCache[page-1]) {
1955 return pageCache[page-1];
1956 } else {
1957 error(errSyntaxWarning, -1, "Failed parsing page {0:d} using hint tables", page);
1961 return catalog->getPage(page);