1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
9 //========================================================================
11 // Modified under the Poppler project - http://poppler.freedesktop.org
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
16 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
17 // Copyright (C) 2005, 2007-2009, 2011-2016 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
19 // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
20 // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
21 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
22 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
23 // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
24 // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
25 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
26 // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
27 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
28 // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
29 // Copyright (C) 2011-2016 Thomas Freitag <Thomas.Freitag@alfa.de>
30 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
31 // Copyright (C) 2013, 2014 Adrian Johnson <ajohnson@redneon.com>
32 // Copyright (C) 2013 Adam Reichold <adamreichold@myopera.com>
33 // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
34 // Copyright (C) 2015 Li Junling <lijunling@sina.com>
35 // Copyright (C) 2015 André Guerreiro <aguerreiro1985@gmail.com>
36 // Copyright (C) 2015 André Esser <bepandre@hotmail.com>
37 // Copyright (C) 2016 Jakub Kucharski <jakubkucharski97@gmail.com>
39 // To see a description of the changes please see the Changelog file that
40 // came with your tarball or type make ChangeLog if you are building from git
42 //========================================================================
46 #ifdef USE_GCC_PRAGMAS
47 #pragma implementation
59 #include "goo/gstrtod.h"
60 #include "goo/GooString.h"
61 #include "goo/gfile.h"
62 #include "poppler-config.h"
63 #include "GlobalParams.h"
68 #include "Linearization.h"
70 #include "OutputDev.h"
72 #include "ErrorCodes.h"
75 #include "SecurityHandler.h"
77 #ifndef DISABLE_OUTLINE
84 # define pdfdocLocker() MutexLocker locker(&mutex)
86 # define pdfdocLocker()
89 //------------------------------------------------------------------------
91 #define headerSearchSize 1024 // read this many bytes at beginning of
92 // file to look for '%PDF'
93 #define pdfIdLength 32 // PDF Document IDs (PermanentId, UpdateId) length
95 #define linearizationSearchSize 1024 // read this many bytes at beginning of
96 // file to look for linearization
99 #define xrefSearchSize 1024 // read this many bytes at end of file
100 // to look for 'startxref'
102 //------------------------------------------------------------------------
104 //------------------------------------------------------------------------
117 linearization
= NULL
;
120 #ifndef DISABLE_OUTLINE
133 PDFDoc::PDFDoc(GooString
*fileNameA
, GooString
*ownerPassword
,
134 GooString
*userPassword
, void *guiDataA
) {
142 fileName
= fileNameA
;
145 n
= fileName
->getLength();
146 fileNameU
= (wchar_t *)gmallocn(n
+ 1, sizeof(wchar_t));
147 for (i
= 0; i
< n
; ++i
) {
148 fileNameU
[i
] = (wchar_t)(fileName
->getChar(i
) & 0xff);
150 fileNameU
[n
] = L
'\0';
154 file
= GooFile::open(fileName
);
156 // fopen() has failed.
157 // Keep a copy of the errno returned by fopen so that it can be
158 // referred to later.
160 error(errIO
, -1, "Couldn't open file '{0:t}': {1:s}.", fileName
, strerror(errno
));
161 errCode
= errOpenFile
;
167 str
= new FileStream(file
, 0, gFalse
, file
->size(), &obj
);
169 ok
= setup(ownerPassword
, userPassword
);
173 PDFDoc::PDFDoc(wchar_t *fileNameA
, int fileNameLen
, GooString
*ownerPassword
,
174 GooString
*userPassword
, void *guiDataA
) {
175 OSVERSIONINFO version
;
183 // save both Unicode and 8-bit copies of the file name
184 fileName
= new GooString();
185 fileNameU
= (wchar_t *)gmallocn(fileNameLen
+ 1, sizeof(wchar_t));
186 for (i
= 0; i
< fileNameLen
; ++i
) {
187 fileName
->append((char)fileNameA
[i
]);
188 fileNameU
[i
] = fileNameA
[i
];
190 fileNameU
[fileNameLen
] = L
'\0';
193 // NB: _wfopen is only available in NT
194 version
.dwOSVersionInfoSize
= sizeof(version
);
195 GetVersionEx(&version
);
196 if (version
.dwPlatformId
== VER_PLATFORM_WIN32_NT
) {
197 file
= GooFile::open(fileNameU
);
199 file
= GooFile::open(fileName
);
202 error(errIO
, -1, "Couldn't open file '{0:t}'", fileName
);
203 errCode
= errOpenFile
;
209 str
= new FileStream(file
, 0, gFalse
, file
->size(), &obj
);
211 ok
= setup(ownerPassword
, userPassword
);
215 PDFDoc::PDFDoc(BaseStream
*strA
, GooString
*ownerPassword
,
216 GooString
*userPassword
, void *guiDataA
) {
223 if (strA
->getFileName()) {
224 fileName
= strA
->getFileName()->copy();
226 n
= fileName
->getLength();
227 fileNameU
= (wchar_t *)gmallocn(n
+ 1, sizeof(wchar_t));
228 for (i
= 0; i
< n
; ++i
) {
229 fileNameU
[i
] = (wchar_t)(fileName
->getChar(i
) & 0xff);
231 fileNameU
[n
] = L
'\0';
240 ok
= setup(ownerPassword
, userPassword
);
243 GBool
PDFDoc::setup(GooString
*ownerPassword
, GooString
*userPassword
) {
246 if (str
->getPos() < 0)
248 error(errSyntaxError
, -1, "Document base stream is not seekable");
255 // Adobe does not seem to enforce %%EOF, so we do the same
256 // if (!checkFooter()) return gFalse;
261 GBool wasReconstructed
= false;
264 xref
= new XRef(str
, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed
);
266 if (wasReconstructed
) {
269 xref
= new XRef(str
, getStartXRef(gTrue
), getMainXRefEntriesOffset(gTrue
), &wasReconstructed
);
272 error(errSyntaxError
, -1, "Couldn't read xref table");
273 errCode
= xref
->getErrorCode();
278 // check for encryption
279 if (!checkEncryption(ownerPassword
, userPassword
)) {
280 errCode
= errEncrypted
;
285 catalog
= new Catalog(this);
286 if (catalog
&& !catalog
->isOk()) {
287 if (!wasReconstructed
)
289 // try one more time to contruct the Catalog, maybe the problem is damaged XRef
292 xref
= new XRef(str
, 0, 0, NULL
, true);
293 catalog
= new Catalog(this);
296 if (catalog
&& !catalog
->isOk()) {
297 error(errSyntaxError
, -1, "Couldn't read page catalog");
298 errCode
= errBadCatalog
;
309 for (int i
= 0; i
< getNumPages(); i
++) {
317 #ifndef DISABLE_OUTLINE
332 delete linearization
;
349 gDestroyMutex(&mutex
);
354 // Check for a %%EOF at the end of this stream
355 GBool
PDFDoc::checkFooter() {
356 // we look in the last 1024 chars because Adobe does the same
357 char *eof
= new char[1025];
358 Goffset pos
= str
->getPos();
359 str
->setPos(1024, -1);
361 for (i
= 0; i
< 1024; i
++)
371 for (i
= i
- 5; i
>= 0; i
--) {
372 if (strncmp (&eof
[i
], "%%EOF", 5) == 0) {
379 error(errSyntaxError
, -1, "Document has not the mandatory ending %%EOF");
380 errCode
= errDamaged
;
389 // Check for a PDF header on this stream. Skip past some garbage
391 void PDFDoc::checkHeader() {
392 char hdrBuf
[headerSearchSize
+1];
399 for (i
= 0; i
< headerSearchSize
; ++i
) {
400 hdrBuf
[i
] = str
->getChar();
402 hdrBuf
[headerSearchSize
] = '\0';
403 for (i
= 0; i
< headerSearchSize
- 5; ++i
) {
404 if (!strncmp(&hdrBuf
[i
], "%PDF-", 5)) {
408 if (i
>= headerSearchSize
- 5) {
409 error(errSyntaxWarning
, -1, "May not be a PDF file (continuing anyway)");
413 if (!(p
= strtok_r(&hdrBuf
[i
+5], " \t\n\r", &tokptr
))) {
414 error(errSyntaxWarning
, -1, "May not be a PDF file (continuing anyway)");
417 sscanf(p
, "%d.%d", &pdfMajorVersion
, &pdfMinorVersion
);
418 // We don't do the version check. Don't add it back in.
421 GBool
PDFDoc::checkEncryption(GooString
*ownerPassword
, GooString
*userPassword
) {
426 xref
->getTrailerDict()->dictLookup("Encrypt", &encrypt
);
427 if ((encrypted
= encrypt
.isDict())) {
428 if ((secHdlr
= SecurityHandler::make(this, &encrypt
))) {
429 if (secHdlr
->isUnencrypted()) {
432 } else if (secHdlr
->checkEncryption(ownerPassword
, userPassword
)) {
433 // authorization succeeded
434 xref
->setEncryption(secHdlr
->getPermissionFlags(),
435 secHdlr
->getOwnerPasswordOk(),
436 secHdlr
->getFileKey(),
437 secHdlr
->getFileKeyLength(),
438 secHdlr
->getEncVersion(),
439 secHdlr
->getEncRevision(),
440 secHdlr
->getEncAlgorithm());
443 // authorization failed
447 // couldn't find the matching security handler
451 // document is not encrypted
458 std::vector
<FormWidgetSignature
*> PDFDoc::getSignatureWidgets()
460 int num_pages
= getNumPages();
461 FormPageWidgets
*page_widgets
= NULL
;
462 std::vector
<FormWidgetSignature
*> widget_vector
;
464 for (int i
= 1; i
<= num_pages
; i
++) {
465 Page
*p
= getCatalog()->getPage(i
);
467 page_widgets
= p
->getFormWidgets();
468 for (int j
= 0; page_widgets
!= NULL
&& j
< page_widgets
->getNumWidgets(); j
++) {
469 if (page_widgets
->getWidget(j
)->getType() == formSignature
) {
470 widget_vector
.push_back(static_cast<FormWidgetSignature
*>(page_widgets
->getWidget(j
)));
476 return widget_vector
;
479 void PDFDoc::displayPage(OutputDev
*out
, int page
,
480 double hDPI
, double vDPI
, int rotate
,
481 GBool useMediaBox
, GBool crop
, GBool printing
,
482 GBool (*abortCheckCbk
)(void *data
),
483 void *abortCheckCbkData
,
484 GBool (*annotDisplayDecideCbk
)(Annot
*annot
, void *user_data
),
485 void *annotDisplayDecideCbkData
, GBool copyXRef
) {
486 if (globalParams
->getPrintCommands()) {
487 printf("***** page %d *****\n", page
);
491 getPage(page
)->display(out
, hDPI
, vDPI
,
492 rotate
, useMediaBox
, crop
, printing
,
493 abortCheckCbk
, abortCheckCbkData
,
494 annotDisplayDecideCbk
, annotDisplayDecideCbkData
, copyXRef
);
498 void PDFDoc::displayPages(OutputDev
*out
, int firstPage
, int lastPage
,
499 double hDPI
, double vDPI
, int rotate
,
500 GBool useMediaBox
, GBool crop
, GBool printing
,
501 GBool (*abortCheckCbk
)(void *data
),
502 void *abortCheckCbkData
,
503 GBool (*annotDisplayDecideCbk
)(Annot
*annot
, void *user_data
),
504 void *annotDisplayDecideCbkData
) {
507 for (page
= firstPage
; page
<= lastPage
; ++page
) {
508 displayPage(out
, page
, hDPI
, vDPI
, rotate
, useMediaBox
, crop
, printing
,
509 abortCheckCbk
, abortCheckCbkData
,
510 annotDisplayDecideCbk
, annotDisplayDecideCbkData
);
514 void PDFDoc::displayPageSlice(OutputDev
*out
, int page
,
515 double hDPI
, double vDPI
, int rotate
,
516 GBool useMediaBox
, GBool crop
, GBool printing
,
517 int sliceX
, int sliceY
, int sliceW
, int sliceH
,
518 GBool (*abortCheckCbk
)(void *data
),
519 void *abortCheckCbkData
,
520 GBool (*annotDisplayDecideCbk
)(Annot
*annot
, void *user_data
),
521 void *annotDisplayDecideCbkData
, GBool copyXRef
) {
523 getPage(page
)->displaySlice(out
, hDPI
, vDPI
,
524 rotate
, useMediaBox
, crop
,
525 sliceX
, sliceY
, sliceW
, sliceH
,
527 abortCheckCbk
, abortCheckCbkData
,
528 annotDisplayDecideCbk
, annotDisplayDecideCbkData
, copyXRef
);
531 Links
*PDFDoc::getLinks(int page
) {
532 Page
*p
= getPage(page
);
534 return new Links (NULL
);
536 return p
->getLinks();
539 void PDFDoc::processLinks(OutputDev
*out
, int page
) {
541 getPage(page
)->processLinks(out
);
544 Linearization
*PDFDoc::getLinearization()
546 if (!linearization
) {
547 linearization
= new Linearization(str
);
548 linearizationState
= 0;
550 return linearization
;
553 GBool
PDFDoc::checkLinearization() {
554 if (linearization
== NULL
)
556 if (linearizationState
== 1)
558 if (linearizationState
== 2)
561 hints
= new Hints(str
, linearization
, getXRef(), secHdlr
);
563 for (int page
= 1; page
<= linearization
->getNumPages(); page
++) {
567 pageRef
.num
= hints
->getPageObjectNum(page
);
569 linearizationState
= 2;
573 // check for bogus ref - this can happen in corrupted PDF files
574 if (pageRef
.num
< 0 || pageRef
.num
>= xref
->getNumObjects()) {
575 linearizationState
= 2;
579 pageRef
.gen
= xref
->getEntry(pageRef
.num
)->gen
;
580 xref
->fetch(pageRef
.num
, pageRef
.gen
, &obj
);
581 if (!obj
.isDict("Page")) {
583 linearizationState
= 2;
588 linearizationState
= 1;
592 GBool
PDFDoc::isLinearized(GBool tryingToReconstruct
) {
593 if ((str
->getLength()) &&
594 (getLinearization()->getLength() == str
->getLength()))
597 if (tryingToReconstruct
)
598 return getLinearization()->getLength() > 0;
604 void PDFDoc::setDocInfoModified(Object
*infoObj
)
607 getDocInfoNF(&infoObjRef
);
608 xref
->setModifiedObject(infoObj
, infoObjRef
.getRef());
612 void PDFDoc::setDocInfoStringEntry(const char *key
, GooString
*value
)
614 GBool removeEntry
= !value
|| value
->getLength() == 0;
620 getDocInfo(&infoObj
);
622 if (infoObj
.isNull() && removeEntry
) {
623 // No info dictionary, so no entry to remove.
627 createDocInfoIfNoneExists(&infoObj
);
631 gooStrObj
.initNull();
633 gooStrObj
.initString(value
);
636 // gooStrObj is set to value or null by now. The latter will cause a removal.
637 infoObj
.dictSet(key
, &gooStrObj
);
639 if (infoObj
.dictGetLength() == 0) {
640 // Info dictionary is empty. Remove it altogether.
643 setDocInfoModified(&infoObj
);
649 GooString
*PDFDoc::getDocInfoStringEntry(const char *key
) {
651 getDocInfo(&infoObj
);
652 if (!infoObj
.isDict()) {
657 infoObj
.dictLookup(key
, &entryObj
);
661 if (entryObj
.isString()) {
662 result
= entryObj
.takeString();
674 get_id (GooString
*encodedidstring
, GooString
*id
) {
675 const char *encodedid
= encodedidstring
->getCString();
676 char pdfid
[pdfIdLength
+ 1];
679 if (encodedidstring
->getLength() != pdfIdLength
/ 2)
682 n
= sprintf(pdfid
, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
683 encodedid
[0] & 0xff, encodedid
[1] & 0xff, encodedid
[2] & 0xff, encodedid
[3] & 0xff,
684 encodedid
[4] & 0xff, encodedid
[5] & 0xff, encodedid
[6] & 0xff, encodedid
[7] & 0xff,
685 encodedid
[8] & 0xff, encodedid
[9] & 0xff, encodedid
[10] & 0xff, encodedid
[11] & 0xff,
686 encodedid
[12] & 0xff, encodedid
[13] & 0xff, encodedid
[14] & 0xff, encodedid
[15] & 0xff);
687 if (n
!= pdfIdLength
)
690 id
->Set(pdfid
, pdfIdLength
);
694 GBool
PDFDoc::getID(GooString
*permanent_id
, GooString
*update_id
) {
696 xref
->getTrailerDict()->dictLookup ("ID", &obj
);
698 if (obj
.isArray() && obj
.arrayGetLength() == 2) {
702 if (obj
.arrayGet(0, &obj2
)->isString()) {
703 if (!get_id (obj2
.getString(), permanent_id
)) {
708 error(errSyntaxError
, -1, "Invalid permanent ID");
716 if (obj
.arrayGet(1, &obj2
)->isString()) {
717 if (!get_id (obj2
.getString(), update_id
)) {
722 error(errSyntaxError
, -1, "Invalid update ID");
738 Hints
*PDFDoc::getHints()
740 if (!hints
&& isLinearized()) {
741 hints
= new Hints(str
, getLinearization(), getXRef(), secHdlr
);
747 int PDFDoc::savePageAs(GooString
*name
, int pageNo
)
751 XRef
*yRef
, *countRef
;
752 int rootNum
= getXRef()->getNumObjects() + 1;
754 // Make sure that special flags are set, because we are going to read
755 // all objects, including Unencrypted ones.
756 xref
->scanSpecialFlags();
759 CryptAlgorithm encAlgorithm
;
761 xref
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
763 if (pageNo
< 1 || pageNo
> getNumPages() || !getCatalog()->getPage(pageNo
)) {
764 error(errInternal
, -1, "Illegal pageNo: {0:d}({1:d})", pageNo
, getNumPages() );
767 PDFRectangle
*cropBox
= NULL
;
768 if (getCatalog()->getPage(pageNo
)->isCropped()) {
769 cropBox
= getCatalog()->getPage(pageNo
)->getCropBox();
771 replacePageDict(pageNo
,
772 getCatalog()->getPage(pageNo
)->getRotate(),
773 getCatalog()->getPage(pageNo
)->getMediaBox(),
775 Ref
*refPage
= getCatalog()->getPageRef(pageNo
);
777 getXRef()->fetch(refPage
->num
, refPage
->gen
, &page
);
779 if (!(f
= fopen(name
->getCString(), "wb"))) {
780 error(errIO
, -1, "Couldn't open file '{0:t}'", name
);
783 outStr
= new FileOutStream(f
,0);
785 yRef
= new XRef(getXRef()->getTrailerDict());
787 if (secHdlr
!= NULL
&& !secHdlr
->isUnencrypted()) {
788 yRef
->setEncryption(secHdlr
->getPermissionFlags(),
789 secHdlr
->getOwnerPasswordOk(), fileKey
, keyLength
, secHdlr
->getEncVersion(), secHdlr
->getEncRevision(), encAlgorithm
);
791 countRef
= new XRef();
792 Object
*trailerObj
= getXRef()->getTrailerDict();
793 if (trailerObj
->isDict()) {
794 markPageObjects(trailerObj
->getDict(), yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
796 yRef
->add(0, 65535, 0, gFalse
);
797 writeHeader(outStr
, getPDFMajorVersion(), getPDFMinorVersion());
799 // get and mark info dict
801 getXRef()->getDocInfo(&infoObj
);
802 if (infoObj
.isDict()) {
803 Dict
*infoDict
= infoObj
.getDict();
804 markPageObjects(infoDict
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
805 if (trailerObj
->isDict()) {
806 Dict
*trailerDict
= trailerObj
->getDict();
808 trailerDict
->lookupNF("Info", &ref
);
810 yRef
->add(ref
.getRef().num
, ref
.getRef().gen
, 0, gTrue
);
811 if (getXRef()->getEntry(ref
.getRef().num
)->type
== xrefEntryCompressed
) {
812 yRef
->getEntry(ref
.getRef().num
)->type
= xrefEntryCompressed
;
820 // get and mark output intents etc.
821 Object catObj
, pagesObj
, resourcesObj
, annotsObj
, afObj
;
822 getXRef()->getCatalog(&catObj
);
823 Dict
*catDict
= catObj
.getDict();
824 catDict
->lookup("Pages", &pagesObj
);
825 catDict
->lookupNF("AcroForm", &afObj
);
826 if (!afObj
.isNull()) {
827 markAcroForm(&afObj
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
830 Dict
*pagesDict
= pagesObj
.getDict();
831 pagesDict
->lookup("Resources", &resourcesObj
);
832 if (resourcesObj
.isDict())
833 markPageObjects(resourcesObj
.getDict(), yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
834 markPageObjects(catDict
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
836 Dict
*pageDict
= page
.getDict();
837 if (resourcesObj
.isNull() && !pageDict
->hasKey("Resources")) {
838 Dict
*resourceDict
= getCatalog()->getPage(pageNo
)->getResourceDict();
839 if (resourceDict
!= NULL
) {
840 resourcesObj
.initDict(resourceDict
);
841 markPageObjects(resourcesObj
.getDict(), yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
844 markPageObjects(pageDict
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
845 pageDict
->lookupNF("Annots", &annotsObj
);
846 if (!annotsObj
.isNull()) {
847 markAnnotations(&annotsObj
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
850 yRef
->markUnencrypted();
851 writePageObjects(outStr
, yRef
, 0);
853 yRef
->add(rootNum
,0,outStr
->getPos(),gTrue
);
854 outStr
->printf("%d 0 obj\n", rootNum
);
855 outStr
->printf("<< /Type /Catalog /Pages %d 0 R", rootNum
+ 1);
856 for (int j
= 0; j
< catDict
->getLength(); j
++) {
857 const char *key
= catDict
->getKey(j
);
858 if (strcmp(key
, "Type") != 0 &&
859 strcmp(key
, "Catalog") != 0 &&
860 strcmp(key
, "Pages") != 0)
862 if (j
> 0) outStr
->printf(" ");
863 Object value
; catDict
->getValNF(j
, &value
);
864 outStr
->printf("/%s ", key
);
865 writeObject(&value
, outStr
, getXRef(), 0, NULL
, cryptRC4
, 0, 0, 0);
871 outStr
->printf(">>\nendobj\n");
873 yRef
->add(rootNum
+ 1,0,outStr
->getPos(),gTrue
);
874 outStr
->printf("%d 0 obj\n", rootNum
+ 1);
875 outStr
->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum
+ 2);
876 if (resourcesObj
.isDict()) {
877 outStr
->printf("/Resources ");
878 writeObject(&resourcesObj
, outStr
, getXRef(), 0, NULL
, cryptRC4
, 0, 0, 0);
881 outStr
->printf(">>\n");
882 outStr
->printf("endobj\n");
884 yRef
->add(rootNum
+ 2,0,outStr
->getPos(),gTrue
);
885 outStr
->printf("%d 0 obj\n", rootNum
+ 2);
886 outStr
->printf("<< ");
887 for (int n
= 0; n
< pageDict
->getLength(); n
++) {
888 if (n
> 0) outStr
->printf(" ");
889 const char *key
= pageDict
->getKey(n
);
890 Object value
; pageDict
->getValNF(n
, &value
);
891 if (strcmp(key
, "Parent") == 0) {
892 outStr
->printf("/Parent %d 0 R", rootNum
+ 1);
894 outStr
->printf("/%s ", key
);
895 writeObject(&value
, outStr
, getXRef(), 0, NULL
, cryptRC4
, 0, 0, 0);
899 outStr
->printf(" >>\nendobj\n");
902 Goffset uxrefOffset
= outStr
->getPos();
906 Dict
*trailerDict
= createTrailerDict(rootNum
+ 3, gFalse
, 0, &ref
, getXRef(),
907 name
->getCString(), uxrefOffset
);
908 writeXRefTableTrailer(trailerDict
, yRef
, gFalse
/* do not write unnecessary entries */,
909 uxrefOffset
, outStr
, getXRef());
921 int PDFDoc::saveAs(GooString
*name
, PDFWriteMode mode
) {
926 if (!(f
= fopen(name
->getCString(), "wb"))) {
927 error(errIO
, -1, "Couldn't open file '{0:t}'", name
);
930 outStr
= new FileOutStream(f
,0);
931 res
= saveAs(outStr
, mode
);
937 int PDFDoc::saveAs(OutStream
*outStr
, PDFWriteMode mode
) {
938 if (!xref
->isModified() && mode
== writeStandard
) {
939 // simply copy the original file
940 saveWithoutChangesAs (outStr
);
941 } else if (mode
== writeForceRewrite
) {
942 saveCompleteRewrite(outStr
);
944 saveIncrementalUpdate(outStr
);
950 int PDFDoc::saveWithoutChangesAs(GooString
*name
) {
955 if (!(f
= fopen(name
->getCString(), "wb"))) {
956 error(errIO
, -1, "Couldn't open file '{0:t}'", name
);
960 outStr
= new FileOutStream(f
,0);
961 res
= saveWithoutChangesAs(outStr
);
969 int PDFDoc::saveWithoutChangesAs(OutStream
*outStr
) {
972 BaseStream
*copyStr
= str
->copy();
974 while ((c
= copyStr
->getChar()) != EOF
) {
983 void PDFDoc::saveIncrementalUpdate (OutStream
* outStr
)
987 //copy the original file
988 BaseStream
*copyStr
= str
->copy();
990 while ((c
= copyStr
->getChar()) != EOF
) {
997 CryptAlgorithm encAlgorithm
;
999 xref
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
1002 uxref
->add(0, 65535, 0, gFalse
);
1004 for(int i
=0; i
<xref
->getNumObjects(); i
++) {
1005 if ((xref
->getEntry(i
)->type
== xrefEntryFree
) &&
1006 (xref
->getEntry(i
)->gen
== 0)) //we skip the irrelevant free objects
1009 if (xref
->getEntry(i
)->getFlag(XRefEntry::Updated
)) { //we have an updated object
1012 ref
.gen
= xref
->getEntry(i
)->type
== xrefEntryCompressed
? 0 : xref
->getEntry(i
)->gen
;
1013 if (xref
->getEntry(i
)->type
!= xrefEntryFree
) {
1015 xref
->fetch(ref
.num
, ref
.gen
, &obj1
, 1);
1016 Goffset offset
= writeObjectHeader(&ref
, outStr
);
1017 writeObject(&obj1
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
1018 writeObjectFooter(outStr
);
1019 uxref
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
1022 uxref
->add(ref
.num
, ref
.gen
, 0, gFalse
);
1027 if (uxref
->getNumObjects() == 0) { //we have nothing to update
1032 Goffset uxrefOffset
= outStr
->getPos();
1033 int numobjects
= xref
->getNumObjects();
1034 const char *fileNameA
= fileName
? fileName
->getCString() : NULL
;
1035 Ref rootRef
, uxrefStreamRef
;
1036 rootRef
.num
= getXRef()->getRootNum();
1037 rootRef
.gen
= getXRef()->getRootGen();
1039 // Output a xref stream if there is a xref stream already
1040 GBool xRefStream
= xref
->isXRefStream();
1043 // Append an entry for the xref stream itself
1044 uxrefStreamRef
.num
= numobjects
++;
1045 uxrefStreamRef
.gen
= 0;
1046 uxref
->add(uxrefStreamRef
.num
, uxrefStreamRef
.gen
, uxrefOffset
, gTrue
);
1049 Dict
*trailerDict
= createTrailerDict(numobjects
, gTrue
, getStartXRef(), &rootRef
, getXRef(), fileNameA
, uxrefOffset
);
1051 writeXRefStreamTrailer(trailerDict
, uxref
, &uxrefStreamRef
, uxrefOffset
, outStr
, getXRef());
1053 writeXRefTableTrailer(trailerDict
, uxref
, gFalse
, uxrefOffset
, outStr
, getXRef());
1060 void PDFDoc::saveCompleteRewrite (OutStream
* outStr
)
1062 // Make sure that special flags are set, because we are going to read
1063 // all objects, including Unencrypted ones.
1064 xref
->scanSpecialFlags();
1067 CryptAlgorithm encAlgorithm
;
1069 xref
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
1071 outStr
->printf("%%PDF-%d.%d\r\n",pdfMajorVersion
,pdfMinorVersion
);
1072 XRef
*uxref
= new XRef();
1073 uxref
->add(0, 65535, 0, gFalse
);
1075 for(int i
=0; i
<xref
->getNumObjects(); i
++) {
1078 XRefEntryType type
= xref
->getEntry(i
)->type
;
1079 if (type
== xrefEntryFree
) {
1081 ref
.gen
= xref
->getEntry(i
)->gen
;
1082 /* the XRef class adds a lot of irrelevant free entries, we only want the significant one
1083 and we don't want the one with num=0 because it has already been added (gen = 65535)*/
1084 if (ref
.gen
> 0 && ref
.num
> 0)
1085 uxref
->add(ref
.num
, ref
.gen
, 0, gFalse
);
1086 } else if (xref
->getEntry(i
)->getFlag(XRefEntry::DontRewrite
)) {
1087 // This entry must not be written, put a free entry instead (with incremented gen)
1089 ref
.gen
= xref
->getEntry(i
)->gen
+ 1;
1090 uxref
->add(ref
.num
, ref
.gen
, 0, gFalse
);
1091 } else if (type
== xrefEntryUncompressed
){
1093 ref
.gen
= xref
->getEntry(i
)->gen
;
1094 xref
->fetch(ref
.num
, ref
.gen
, &obj1
, 1);
1095 Goffset offset
= writeObjectHeader(&ref
, outStr
);
1096 // Write unencrypted objects in unencrypted form
1097 if (xref
->getEntry(i
)->getFlag(XRefEntry::Unencrypted
)) {
1098 writeObject(&obj1
, outStr
, NULL
, cryptRC4
, 0, 0, 0);
1100 writeObject(&obj1
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
1102 writeObjectFooter(outStr
);
1103 uxref
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
1105 } else if (type
== xrefEntryCompressed
) {
1107 ref
.gen
= 0; //compressed entries have gen == 0
1108 xref
->fetch(ref
.num
, ref
.gen
, &obj1
, 1);
1109 Goffset offset
= writeObjectHeader(&ref
, outStr
);
1110 writeObject(&obj1
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
1111 writeObjectFooter(outStr
);
1112 uxref
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
1117 Goffset uxrefOffset
= outStr
->getPos();
1118 writeXRefTableTrailer(uxrefOffset
, uxref
, gTrue
/* write all entries */,
1119 uxref
->getNumObjects(), outStr
, gFalse
/* complete rewrite */);
1123 void PDFDoc::writeDictionnary (Dict
* dict
, OutStream
* outStr
, XRef
*xRef
, Guint numOffset
, Guchar
*fileKey
,
1124 CryptAlgorithm encAlgorithm
, int keyLength
, int objNum
, int objGen
)
1127 outStr
->printf("<<");
1128 for (int i
=0; i
<dict
->getLength(); i
++) {
1129 GooString
keyName(dict
->getKey(i
));
1130 GooString
*keyNameToPrint
= keyName
.sanitizedName(gFalse
/* non ps mode */);
1131 outStr
->printf("/%s ", keyNameToPrint
->getCString());
1132 delete keyNameToPrint
;
1133 writeObject(dict
->getValNF(i
, &obj1
), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1136 outStr
->printf(">> ");
1139 void PDFDoc::writeStream (Stream
* str
, OutStream
* outStr
)
1141 outStr
->printf("stream\r\n");
1143 for (int c
=str
->getChar(); c
!= EOF
; c
=str
->getChar()) {
1144 outStr
->printf("%c", c
);
1146 outStr
->printf("\r\nendstream\r\n");
1149 void PDFDoc::writeRawStream (Stream
* str
, OutStream
* outStr
)
1152 str
->getDict()->lookup("Length", &obj1
);
1153 if (!obj1
.isInt() && !obj1
.isInt64()) {
1154 error (errSyntaxError
, -1, "PDFDoc::writeRawStream, no Length in stream dict");
1160 length
= obj1
.getInt();
1162 length
= obj1
.getInt64();
1165 outStr
->printf("stream\r\n");
1166 str
->unfilteredReset();
1167 for (Goffset i
= 0; i
< length
; i
++) {
1168 int c
= str
->getUnfilteredChar();
1169 if (unlikely(c
== EOF
)) {
1170 error (errSyntaxError
, -1, "PDFDoc::writeRawStream: EOF reading stream");
1173 outStr
->printf("%c", c
);
1176 outStr
->printf("\r\nendstream\r\n");
1179 void PDFDoc::writeString (GooString
* s
, OutStream
* outStr
, Guchar
*fileKey
,
1180 CryptAlgorithm encAlgorithm
, int keyLength
, int objNum
, int objGen
)
1182 // Encrypt string if encryption is enabled
1183 GooString
*sEnc
= NULL
;
1186 EncryptStream
*enc
= new EncryptStream(new MemStream(s
->getCString(), 0, s
->getLength(), obj
.initNull()),
1187 fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1188 sEnc
= new GooString();
1191 while ((c
= enc
->getChar()) != EOF
) {
1192 sEnc
->append((char)c
);
1200 if (s
->hasUnicodeMarker()) {
1201 //unicode string don't necessary end with \0
1202 const char* c
= s
->getCString();
1203 outStr
->printf("(");
1204 for(int i
=0; i
<s
->getLength(); i
++) {
1205 char unescaped
= *(c
+i
)&0x000000ff;
1207 if (unescaped
== '(' || unescaped
== ')' || unescaped
== '\\')
1208 outStr
->printf("%c", '\\');
1209 outStr
->printf("%c", unescaped
);
1211 outStr
->printf(") ");
1213 const char* c
= s
->getCString();
1214 outStr
->printf("(");
1215 for(int i
=0; i
<s
->getLength(); i
++) {
1216 char unescaped
= *(c
+i
)&0x000000ff;
1218 if (unescaped
== '\r')
1219 outStr
->printf("\\r");
1220 else if (unescaped
== '\n')
1221 outStr
->printf("\\n");
1223 if (unescaped
== '(' || unescaped
== ')' || unescaped
== '\\') {
1224 outStr
->printf("%c", '\\');
1226 outStr
->printf("%c", unescaped
);
1229 outStr
->printf(") ");
1235 Goffset
PDFDoc::writeObjectHeader (Ref
*ref
, OutStream
* outStr
)
1237 Goffset offset
= outStr
->getPos();
1238 outStr
->printf("%i %i obj ", ref
->num
, ref
->gen
);
1242 void PDFDoc::writeObject (Object
* obj
, OutStream
* outStr
, XRef
*xRef
, Guint numOffset
, Guchar
*fileKey
,
1243 CryptAlgorithm encAlgorithm
, int keyLength
, int objNum
, int objGen
)
1249 switch (obj
->getType()) {
1251 outStr
->printf("%s ", obj
->getBool()?"true":"false");
1254 outStr
->printf("%i ", obj
->getInt());
1257 outStr
->printf("%lli ", obj
->getInt64());
1262 s
.appendf("{0:.10g}", obj
->getReal());
1263 outStr
->printf("%s ", s
.getCString());
1267 writeString(obj
->getString(), outStr
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1271 GooString
name(obj
->getName());
1272 GooString
*nameToPrint
= name
.sanitizedName(gFalse
/* non ps mode */);
1273 outStr
->printf("/%s ", nameToPrint
->getCString());
1278 outStr
->printf( "null ");
1281 array
= obj
->getArray();
1282 outStr
->printf("[");
1283 for (int i
=0; i
<array
->getLength(); i
++) {
1284 writeObject(array
->getNF(i
, &obj1
), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1287 outStr
->printf("] ");
1290 writeDictionnary (obj
->getDict(), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1294 //We can't modify stream with the current implementation (no write functions in Stream API)
1295 // => the only type of streams which that have been modified are internal streams (=strWeird)
1296 Stream
*stream
= obj
->getStream();
1297 if (stream
->getKind() == strWeird
|| stream
->getKind() == strCrypt
) {
1298 //we write the stream unencoded => TODO: write stream encoder
1301 EncryptStream
*encStream
= NULL
;
1302 GBool removeFilter
= gTrue
;
1303 if (stream
->getKind() == strWeird
&& fileKey
) {
1305 stream
->getDict()->lookup("Filter", &filter
);
1306 if (!filter
.isName("Crypt")) {
1307 if (filter
.isArray()) {
1308 for (int i
= 0; i
< filter
.arrayGetLength(); i
++) {
1310 filter
.arrayGet(i
, &filterEle
);
1311 if (filterEle
.isName("Crypt")) {
1313 removeFilter
= gFalse
;
1319 encStream
= new EncryptStream(stream
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1320 encStream
->setAutoDelete(gFalse
);
1324 encStream
= new EncryptStream(stream
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1325 encStream
->setAutoDelete(gFalse
);
1329 removeFilter
= gFalse
;
1332 } else if (fileKey
!= NULL
) { // Encrypt stream
1333 encStream
= new EncryptStream(stream
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1334 encStream
->setAutoDelete(gFalse
);
1339 //recalculate stream length
1341 for (int c
=stream
->getChar(); c
!=EOF
; c
=stream
->getChar()) {
1344 obj1
.initInt64(tmp
);
1345 stream
->getDict()->set("Length", &obj1
);
1347 //Remove Stream encoding
1349 stream
->getDict()->remove("Filter");
1351 stream
->getDict()->remove("DecodeParms");
1353 writeDictionnary (stream
->getDict(),outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1354 writeStream (stream
,outStr
);
1359 FilterStream
*fs
= dynamic_cast<FilterStream
*>(stream
);
1361 BaseStream
*bs
= fs
->getBaseStream();
1364 if (xRef
->getStreamEnd(bs
->getStart(), &streamEnd
)) {
1366 val
.initInt64(streamEnd
- bs
->getStart());
1367 stream
->getDict()->set("Length", &val
);
1371 writeDictionnary (stream
->getDict(), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1372 writeRawStream (stream
, outStr
);
1377 outStr
->printf("%i %i R ", obj
->getRef().num
+ numOffset
, obj
->getRef().gen
);
1380 outStr
->printf("%s\n", obj
->getCmd());
1383 outStr
->printf("error\r\n");
1386 outStr
->printf("eof\r\n");
1389 outStr
->printf("none\r\n");
1392 error(errUnimplemented
, -1,"Unhandled objType : {0:d}, please report a bug with a testcase\r\n", obj
->getType());
1397 void PDFDoc::writeObjectFooter (OutStream
* outStr
)
1399 outStr
->printf("endobj\r\n");
1402 Dict
*PDFDoc::createTrailerDict(int uxrefSize
, GBool incrUpdate
, Goffset startxRef
,
1403 Ref
*root
, XRef
*xRef
, const char *fileName
, Goffset fileSize
)
1405 Dict
*trailerDict
= new Dict(xRef
);
1407 obj1
.initInt(uxrefSize
);
1408 trailerDict
->set("Size", &obj1
);
1411 //build a new ID, as recommended in the reference, uses:
1415 // - values of entry in information dictionnary
1418 sprintf(buffer
, "%i", (int)time(NULL
));
1419 message
.append(buffer
);
1422 message
.append(fileName
);
1424 sprintf(buffer
, "%lli", (long long)fileSize
);
1425 message
.append(buffer
);
1427 //info dict -- only use text string
1428 if (!xRef
->getTrailerDict()->isNone() && xRef
->getDocInfo(&obj1
)->isDict()) {
1429 for(int i
=0; i
<obj1
.getDict()->getLength(); i
++) {
1431 obj1
.getDict()->getVal(i
, &obj2
);
1432 if (obj2
.isString()) {
1433 message
.append(obj2
.getString());
1440 GBool hasEncrypt
= gFalse
;
1441 if (!xRef
->getTrailerDict()->isNone()) {
1443 xRef
->getTrailerDict()->dictLookupNF("Encrypt", &obj2
);
1444 if (!obj2
.isNull()) {
1445 trailerDict
->set("Encrypt", &obj2
);
1451 //calculate md5 digest
1453 md5((Guchar
*)message
.getCString(), message
.getLength(), digest
);
1454 obj1
.initString(new GooString((const char*)digest
, 16));
1457 Object obj2
,obj3
,obj5
;
1458 obj2
.initArray(xRef
);
1460 // In case of encrypted files, the ID must not be changed because it's used to calculate the key
1461 if (incrUpdate
|| hasEncrypt
) {
1463 //only update the second part of the array
1464 xRef
->getTrailerDict()->getDict()->lookup("ID", &obj4
);
1465 if (!obj4
.isArray()) {
1466 error(errSyntaxWarning
, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
1468 //Get the first part of the ID
1469 obj4
.arrayGet(0,&obj3
);
1471 obj2
.arrayAdd(&obj3
);
1472 obj2
.arrayAdd(&obj1
);
1473 trailerDict
->set("ID", &obj2
);
1477 //new file => same values for the two identifiers
1478 obj2
.arrayAdd(&obj1
);
1479 obj1
.initString(new GooString((const char*)digest
, 16));
1480 obj2
.arrayAdd(&obj1
);
1481 trailerDict
->set("ID", &obj2
);
1484 obj1
.initRef(root
->num
, root
->gen
);
1485 trailerDict
->set("Root", &obj1
);
1488 obj1
.initInt64(startxRef
);
1489 trailerDict
->set("Prev", &obj1
);
1492 if (!xRef
->getTrailerDict()->isNone()) {
1493 xRef
->getDocInfoNF(&obj5
);
1494 if (!obj5
.isNull()) {
1495 trailerDict
->set("Info", &obj5
);
1502 void PDFDoc::writeXRefTableTrailer(Dict
*trailerDict
, XRef
*uxref
, GBool writeAllEntries
, Goffset uxrefOffset
, OutStream
* outStr
, XRef
*xRef
)
1504 uxref
->writeTableToFile( outStr
, writeAllEntries
);
1505 outStr
->printf( "trailer\r\n");
1506 writeDictionnary(trailerDict
, outStr
, xRef
, 0, NULL
, cryptRC4
, 0, 0, 0);
1507 outStr
->printf( "\r\nstartxref\r\n");
1508 outStr
->printf( "%lli\r\n", uxrefOffset
);
1509 outStr
->printf( "%%%%EOF\r\n");
1512 void PDFDoc::writeXRefStreamTrailer (Dict
*trailerDict
, XRef
*uxref
, Ref
*uxrefStreamRef
, Goffset uxrefOffset
, OutStream
* outStr
, XRef
*xRef
)
1516 // Fill stmData and some trailerDict fields
1517 uxref
->writeStreamToBuffer(&stmData
, trailerDict
, xRef
);
1519 // Create XRef stream object and write it
1521 MemStream
*mStream
= new MemStream( stmData
.getCString(), 0,
1522 stmData
.getLength(), obj1
.initDict(trailerDict
) );
1523 writeObjectHeader(uxrefStreamRef
, outStr
);
1524 writeObject(obj1
.initStream(mStream
), outStr
, xRef
, 0, NULL
, cryptRC4
, 0, 0, 0);
1525 writeObjectFooter(outStr
);
1528 outStr
->printf( "startxref\r\n");
1529 outStr
->printf( "%lli\r\n", uxrefOffset
);
1530 outStr
->printf( "%%%%EOF\r\n");
1533 void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset
, XRef
*uxref
, GBool writeAllEntries
,
1534 int uxrefSize
, OutStream
* outStr
, GBool incrUpdate
)
1536 const char *fileNameA
= fileName
? fileName
->getCString() : NULL
;
1537 // file size (doesn't include the trailer)
1538 unsigned int fileSize
= 0;
1541 while ((c
= str
->getChar()) != EOF
) {
1546 ref
.num
= getXRef()->getRootNum();
1547 ref
.gen
= getXRef()->getRootGen();
1548 Dict
* trailerDict
= createTrailerDict(uxrefSize
, incrUpdate
, getStartXRef(), &ref
,
1549 getXRef(), fileNameA
, fileSize
);
1550 writeXRefTableTrailer(trailerDict
, uxref
, writeAllEntries
, uxrefOffset
, outStr
, getXRef());
1554 void PDFDoc::writeHeader(OutStream
*outStr
, int major
, int minor
)
1556 outStr
->printf("%%PDF-%d.%d\n", major
, minor
);
1557 outStr
->printf("%%\xE2\xE3\xCF\xD3\n");
1560 void PDFDoc::markDictionnary (Dict
* dict
, XRef
* xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
)
1563 for (int i
=0; i
<dict
->getLength(); i
++) {
1564 const char *key
= dict
->getKey(i
);
1565 if (strcmp(key
, "Annots") != 0) {
1566 markObject(dict
->getValNF(i
, &obj1
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1569 dict
->getValNF(i
, &annotsObj
);
1570 if (!annotsObj
.isNull()) {
1571 markAnnotations(&annotsObj
, xRef
, countRef
, 0, oldRefNum
, newRefNum
);
1579 void PDFDoc::markObject (Object
* obj
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
)
1584 switch (obj
->getType()) {
1586 array
= obj
->getArray();
1587 for (int i
=0; i
<array
->getLength(); i
++) {
1588 markObject(array
->getNF(i
, &obj1
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1593 markDictionnary (obj
->getDict(), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1597 Stream
*stream
= obj
->getStream();
1598 markDictionnary (stream
->getDict(), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1603 if (obj
->getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(obj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1604 if (getXRef()->getEntry(obj
->getRef().num
)->type
== xrefEntryFree
) {
1605 return; // already marked as free => should be replaced
1607 xRef
->add(obj
->getRef().num
+ numOffset
, obj
->getRef().gen
, 0, gTrue
);
1608 if (getXRef()->getEntry(obj
->getRef().num
)->type
== xrefEntryCompressed
) {
1609 xRef
->getEntry(obj
->getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1612 if (obj
->getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1613 countRef
->getEntry(obj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1615 countRef
->add(obj
->getRef().num
+ numOffset
, 1, 0, gTrue
);
1617 XRefEntry
*entry
= countRef
->getEntry(obj
->getRef().num
+ numOffset
);
1623 getXRef()->fetch(obj
->getRef().num
, obj
->getRef().gen
, &obj1
);
1624 markObject(&obj1
, xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1633 void PDFDoc::replacePageDict(int pageNo
, int rotate
,
1634 PDFRectangle
*mediaBox
,
1635 PDFRectangle
*cropBox
)
1637 Ref
*refPage
= getCatalog()->getPageRef(pageNo
);
1639 getXRef()->fetch(refPage
->num
, refPage
->gen
, &page
);
1640 Dict
*pageDict
= page
.getDict();
1641 pageDict
->remove("MediaBoxssdf");
1642 pageDict
->remove("MediaBox");
1643 pageDict
->remove("CropBox");
1644 pageDict
->remove("ArtBox");
1645 pageDict
->remove("BleedBox");
1646 pageDict
->remove("TrimBox");
1647 pageDict
->remove("Rotate");
1649 mediaBoxObj
.initArray(getXRef());
1651 murx
.initReal(mediaBox
->x1
);
1653 mury
.initReal(mediaBox
->y1
);
1655 mllx
.initReal(mediaBox
->x2
);
1657 mlly
.initReal(mediaBox
->y2
);
1658 mediaBoxObj
.arrayAdd(&murx
);
1659 mediaBoxObj
.arrayAdd(&mury
);
1660 mediaBoxObj
.arrayAdd(&mllx
);
1661 mediaBoxObj
.arrayAdd(&mlly
);
1662 pageDict
->add(copyString("MediaBox"), &mediaBoxObj
);
1663 if (cropBox
!= NULL
) {
1665 cropBoxObj
.initArray(getXRef());
1667 curx
.initReal(cropBox
->x1
);
1669 cury
.initReal(cropBox
->y1
);
1671 cllx
.initReal(cropBox
->x2
);
1673 clly
.initReal(cropBox
->y2
);
1674 cropBoxObj
.arrayAdd(&curx
);
1675 cropBoxObj
.arrayAdd(&cury
);
1676 cropBoxObj
.arrayAdd(&cllx
);
1677 cropBoxObj
.arrayAdd(&clly
);
1678 pageDict
->add(copyString("CropBox"), &cropBoxObj
);
1679 cropBoxObj
.getArray()->incRef();
1680 pageDict
->add(copyString("TrimBox"), &cropBoxObj
);
1682 mediaBoxObj
.getArray()->incRef();
1683 pageDict
->add(copyString("TrimBox"), &mediaBoxObj
);
1686 rotateObj
.initInt(rotate
);
1687 pageDict
->add(copyString("Rotate"), &rotateObj
);
1688 getXRef()->setModifiedObject(&page
, *refPage
);
1692 void PDFDoc::markPageObjects(Dict
*pageDict
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
)
1694 pageDict
->remove("OpenAction");
1695 pageDict
->remove("Outlines");
1696 pageDict
->remove("StructTreeRoot");
1698 for (int n
= 0; n
< pageDict
->getLength(); n
++) {
1699 const char *key
= pageDict
->getKey(n
);
1700 Object value
; pageDict
->getValNF(n
, &value
);
1701 if (strcmp(key
, "Parent") != 0 &&
1702 strcmp(key
, "Pages") != 0 &&
1703 strcmp(key
, "AcroForm") != 0 &&
1704 strcmp(key
, "Annots") != 0 &&
1705 strcmp(key
, "P") != 0 &&
1706 strcmp(key
, "Root") != 0) {
1707 markObject(&value
, xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1713 GBool
PDFDoc::markAnnotations(Object
*annotsObj
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldPageNum
, int newPageNum
) {
1715 GBool modified
= gFalse
;
1716 annotsObj
->fetch(getXRef(), &annots
);
1717 if (annots
.isArray()) {
1718 Array
*array
= annots
.getArray();
1719 for (int i
=array
->getLength() - 1; i
>= 0; i
--) {
1721 if (array
->get(i
, &obj1
)->isDict()) {
1723 Dict
*dict
= obj1
.getDict();
1724 dict
->lookup("Type", &type
);
1725 if (type
.isName() && strcmp(type
.getName(), "Annot") == 0) {
1727 if (dict
->lookupNF("P", &obj2
)->isRef()) {
1728 if (obj2
.getRef().num
== oldPageNum
) {
1730 array
->getNF(i
, &obj3
);
1732 Object
*newRef
= new Object();
1733 newRef
->initRef(newPageNum
, 0);
1734 dict
->set("P", newRef
);
1735 getXRef()->setModifiedObject(&obj1
, obj3
.getRef());
1738 } else if (obj2
.getRef().num
== newPageNum
) {
1745 getXRef()->fetch(obj2
.getRef().num
, obj2
.getRef().gen
, &page
);
1746 if (page
.isDict()) {
1748 Dict
*dict
= page
.getDict();
1749 dict
->lookup("Type", &pagetype
);
1750 if (!pagetype
.isName() || strcmp(pagetype
.getName(), "Page") != 0) {
1772 markPageObjects(dict
, xRef
, countRef
, numOffset
, oldPageNum
, newPageNum
);
1775 array
->getNF(i
, &obj1
);
1777 if (obj1
.getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(obj1
.getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1778 if (getXRef()->getEntry(obj1
.getRef().num
)->type
== xrefEntryFree
) {
1779 continue; // already marked as free => should be replaced
1781 xRef
->add(obj1
.getRef().num
+ numOffset
, obj1
.getRef().gen
, 0, gTrue
);
1782 if (getXRef()->getEntry(obj1
.getRef().num
)->type
== xrefEntryCompressed
) {
1783 xRef
->getEntry(obj1
.getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1786 if (obj1
.getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1787 countRef
->getEntry(obj1
.getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1789 countRef
->add(obj1
.getRef().num
+ numOffset
, 1, 0, gTrue
);
1791 XRefEntry
*entry
= countRef
->getEntry(obj1
.getRef().num
+ numOffset
);
1798 if (annotsObj
->isRef()) {
1799 if (annotsObj
->getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(annotsObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1800 if (getXRef()->getEntry(annotsObj
->getRef().num
)->type
== xrefEntryFree
) {
1801 return modified
; // already marked as free => should be replaced
1803 xRef
->add(annotsObj
->getRef().num
+ numOffset
, annotsObj
->getRef().gen
, 0, gTrue
);
1804 if (getXRef()->getEntry(annotsObj
->getRef().num
)->type
== xrefEntryCompressed
) {
1805 xRef
->getEntry(annotsObj
->getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1808 if (annotsObj
->getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1809 countRef
->getEntry(annotsObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1811 countRef
->add(annotsObj
->getRef().num
+ numOffset
, 1, 0, gTrue
);
1813 XRefEntry
*entry
= countRef
->getEntry(annotsObj
->getRef().num
+ numOffset
);
1816 getXRef()->setModifiedObject(&annots
, annotsObj
->getRef());
1822 void PDFDoc::markAcroForm(Object
*afObj
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
) {
1824 GBool modified
= gFalse
;
1825 afObj
->fetch(getXRef(), &acroform
);
1826 if (acroform
.isDict()) {
1827 Dict
*dict
= acroform
.getDict();
1828 for (int i
=0; i
< dict
->getLength(); i
++) {
1829 if (strcmp(dict
->getKey(i
), "Fields") == 0) {
1831 modified
= markAnnotations(dict
->getValNF(i
, &fields
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1835 markObject(dict
->getValNF(i
, &obj
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1840 if (afObj
->isRef()) {
1841 if (afObj
->getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(afObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1842 if (getXRef()->getEntry(afObj
->getRef().num
)->type
== xrefEntryFree
) {
1843 return; // already marked as free => should be replaced
1845 xRef
->add(afObj
->getRef().num
+ numOffset
, afObj
->getRef().gen
, 0, gTrue
);
1846 if (getXRef()->getEntry(afObj
->getRef().num
)->type
== xrefEntryCompressed
) {
1847 xRef
->getEntry(afObj
->getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1850 if (afObj
->getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1851 countRef
->getEntry(afObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1853 countRef
->add(afObj
->getRef().num
+ numOffset
, 1, 0, gTrue
);
1855 XRefEntry
*entry
= countRef
->getEntry(afObj
->getRef().num
+ numOffset
);
1859 getXRef()->setModifiedObject(&acroform
, afObj
->getRef());
1866 Guint
PDFDoc::writePageObjects(OutStream
*outStr
, XRef
*xRef
, Guint numOffset
, GBool combine
)
1868 Guint objectsCount
= 0; //count the number of objects in the XRef(s)
1870 CryptAlgorithm encAlgorithm
;
1872 xRef
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
1874 for (int n
= numOffset
; n
< xRef
->getNumObjects(); n
++) {
1875 if (xRef
->getEntry(n
)->type
!= xrefEntryFree
) {
1879 ref
.gen
= xRef
->getEntry(n
)->gen
;
1881 getXRef()->fetch(ref
.num
- numOffset
, ref
.gen
, &obj
);
1882 Goffset offset
= writeObjectHeader(&ref
, outStr
);
1884 writeObject(&obj
, outStr
, getXRef(), numOffset
, NULL
, cryptRC4
, 0, 0, 0);
1885 } else if (xRef
->getEntry(n
)->getFlag(XRefEntry::Unencrypted
)) {
1886 writeObject(&obj
, outStr
, NULL
, cryptRC4
, 0, 0, 0);
1888 writeObject(&obj
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
1890 writeObjectFooter(outStr
);
1891 xRef
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
1895 return objectsCount
;
1898 #ifndef DISABLE_OUTLINE
1899 Outline
*PDFDoc::getOutline()
1904 outline
= new Outline(catalog
->getOutline(), xref
);
1911 PDFDoc
*PDFDoc::ErrorPDFDoc(int errorCode
, GooString
*fileNameA
)
1913 PDFDoc
*doc
= new PDFDoc();
1914 doc
->errCode
= errorCode
;
1915 doc
->fileName
= fileNameA
;
1920 long long PDFDoc::strToLongLong(char *s
) {
1925 for (p
= s
; *p
&& isdigit(*p
& 0xff); ++p
) {
1927 if (x
> (LLONG_MAX
- d
) / 10) {
1935 // Read the 'startxref' position.
1936 Goffset
PDFDoc::getStartXRef(GBool tryingToReconstruct
)
1938 if (startXRefPos
== -1) {
1940 if (isLinearized(tryingToReconstruct
)) {
1941 char buf
[linearizationSearchSize
+1];
1945 for (n
= 0; n
< linearizationSearchSize
; ++n
) {
1946 if ((c
= str
->getChar()) == EOF
) {
1953 // find end of first obj (linearization dictionary)
1955 for (i
= 0; i
< n
; i
++) {
1956 if (!strncmp("endobj", &buf
[i
], 6)) {
1959 while (buf
[i
] && Lexer::isSpace(buf
[i
])) ++i
;
1965 char buf
[xrefSearchSize
+1];
1969 // read last xrefSearchSize bytes
1971 int maxXRefSearch
= 24576;
1972 if (str
->getLength() < maxXRefSearch
) maxXRefSearch
= str
->getLength();
1973 for (; (xrefSearchSize
- 16) * segnum
< maxXRefSearch
; segnum
++) {
1974 str
->setPos((xrefSearchSize
- 16) * segnum
+ xrefSearchSize
, -1);
1975 for (n
= 0; n
< xrefSearchSize
; ++n
) {
1976 if ((c
= str
->getChar()) == EOF
) {
1984 for (i
= n
- 9; i
>= 0; --i
) {
1985 if (!strncmp(&buf
[i
], "startxref", 9)) {
1992 for (p
= &buf
[i
+ 9]; isspace(*p
); ++p
);
1993 startXRefPos
= strToLongLong(p
);
2001 return startXRefPos
;
2004 Goffset
PDFDoc::getMainXRefEntriesOffset(GBool tryingToReconstruct
)
2006 Guint mainXRefEntriesOffset
= 0;
2008 if (isLinearized(tryingToReconstruct
)) {
2009 mainXRefEntriesOffset
= getLinearization()->getMainXRefEntriesOffset();
2012 return mainXRefEntriesOffset
;
2015 int PDFDoc::getNumPages()
2017 if (isLinearized()) {
2019 if ((n
= getLinearization()->getNumPages())) {
2024 return catalog
->getNumPages();
2027 Page
*PDFDoc::parsePage(int page
)
2034 pageRef
.num
= getHints()->getPageObjectNum(page
);
2036 error(errSyntaxWarning
, -1, "Failed to get object num from hint tables for page {0:d}", page
);
2040 // check for bogus ref - this can happen in corrupted PDF files
2041 if (pageRef
.num
< 0 || pageRef
.num
>= xref
->getNumObjects()) {
2042 error(errSyntaxWarning
, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef
.num
, page
);
2046 pageRef
.gen
= xref
->getEntry(pageRef
.num
)->gen
;
2047 xref
->fetch(pageRef
.num
, pageRef
.gen
, &obj
);
2048 if (!obj
.isDict("Page")) {
2050 error(errSyntaxWarning
, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef
.num
, pageRef
.gen
);
2053 pageDict
= obj
.getDict();
2055 p
= new Page(this, page
, pageDict
, pageRef
,
2056 new PageAttrs(NULL
, pageDict
), catalog
->getForm());
2062 Page
*PDFDoc::getPage(int page
)
2064 if ((page
< 1) || page
> getNumPages()) return NULL
;
2066 if (isLinearized() && checkLinearization()) {
2069 pageCache
= (Page
**) gmallocn(getNumPages(), sizeof(Page
*));
2070 for (int i
= 0; i
< getNumPages(); i
++) {
2071 pageCache
[i
] = NULL
;
2074 if (!pageCache
[page
-1]) {
2075 pageCache
[page
-1] = parsePage(page
);
2077 if (pageCache
[page
-1]) {
2078 return pageCache
[page
-1];
2080 error(errSyntaxWarning
, -1, "Failed parsing page {0:d} using hint tables", page
);
2084 return catalog
->getPage(page
);