1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
9 //========================================================================
11 // Modified under the Poppler project - http://poppler.freedesktop.org
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
16 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
17 // Copyright (C) 2005, 2007-2009, 2011-2014 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
19 // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
20 // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
21 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
22 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
23 // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
24 // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
25 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
26 // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
27 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
28 // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
29 // Copyright (C) 2011-2016 Thomas Freitag <Thomas.Freitag@alfa.de>
30 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
31 // Copyright (C) 2013, 2014 Adrian Johnson <ajohnson@redneon.com>
32 // Copyright (C) 2013 Adam Reichold <adamreichold@myopera.com>
33 // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
34 // Copyright (C) 2015 Li Junling <lijunling@sina.com>
36 // To see a description of the changes please see the Changelog file that
37 // came with your tarball or type make ChangeLog if you are building from git
39 //========================================================================
43 #ifdef USE_GCC_PRAGMAS
44 #pragma implementation
56 #include "goo/gstrtod.h"
57 #include "goo/GooString.h"
58 #include "goo/gfile.h"
59 #include "poppler-config.h"
60 #include "GlobalParams.h"
65 #include "Linearization.h"
67 #include "OutputDev.h"
69 #include "ErrorCodes.h"
72 #include "SecurityHandler.h"
74 #ifndef DISABLE_OUTLINE
81 # define pdfdocLocker() MutexLocker locker(&mutex)
83 # define pdfdocLocker()
86 //------------------------------------------------------------------------
88 #define headerSearchSize 1024 // read this many bytes at beginning of
89 // file to look for '%PDF'
90 #define pdfIdLength 32 // PDF Document IDs (PermanentId, UpdateId) length
92 #define linearizationSearchSize 1024 // read this many bytes at beginning of
93 // file to look for linearization
96 #define xrefSearchSize 1024 // read this many bytes at end of file
97 // to look for 'startxref'
99 //------------------------------------------------------------------------
101 //------------------------------------------------------------------------
114 linearization
= NULL
;
117 #ifndef DISABLE_OUTLINE
130 PDFDoc::PDFDoc(GooString
*fileNameA
, GooString
*ownerPassword
,
131 GooString
*userPassword
, void *guiDataA
) {
139 fileName
= fileNameA
;
142 n
= fileName
->getLength();
143 fileNameU
= (wchar_t *)gmallocn(n
+ 1, sizeof(wchar_t));
144 for (i
= 0; i
< n
; ++i
) {
145 fileNameU
[i
] = (wchar_t)(fileName
->getChar(i
) & 0xff);
147 fileNameU
[n
] = L
'\0';
151 file
= GooFile::open(fileName
);
153 // fopen() has failed.
154 // Keep a copy of the errno returned by fopen so that it can be
155 // referred to later.
157 error(errIO
, -1, "Couldn't open file '{0:t}': {1:s}.", fileName
, strerror(errno
));
158 errCode
= errOpenFile
;
164 str
= new FileStream(file
, 0, gFalse
, file
->size(), &obj
);
166 ok
= setup(ownerPassword
, userPassword
);
170 PDFDoc::PDFDoc(wchar_t *fileNameA
, int fileNameLen
, GooString
*ownerPassword
,
171 GooString
*userPassword
, void *guiDataA
) {
172 OSVERSIONINFO version
;
180 // save both Unicode and 8-bit copies of the file name
181 fileName
= new GooString();
182 fileNameU
= (wchar_t *)gmallocn(fileNameLen
+ 1, sizeof(wchar_t));
183 for (i
= 0; i
< fileNameLen
; ++i
) {
184 fileName
->append((char)fileNameA
[i
]);
185 fileNameU
[i
] = fileNameA
[i
];
187 fileNameU
[fileNameLen
] = L
'\0';
190 // NB: _wfopen is only available in NT
191 version
.dwOSVersionInfoSize
= sizeof(version
);
192 GetVersionEx(&version
);
193 if (version
.dwPlatformId
== VER_PLATFORM_WIN32_NT
) {
194 file
= GooFile::open(fileNameU
);
196 file
= GooFile::open(fileName
);
199 error(errIO
, -1, "Couldn't open file '{0:t}'", fileName
);
200 errCode
= errOpenFile
;
206 str
= new FileStream(file
, 0, gFalse
, file
->size(), &obj
);
208 ok
= setup(ownerPassword
, userPassword
);
212 PDFDoc::PDFDoc(BaseStream
*strA
, GooString
*ownerPassword
,
213 GooString
*userPassword
, void *guiDataA
) {
220 if (strA
->getFileName()) {
221 fileName
= strA
->getFileName()->copy();
223 n
= fileName
->getLength();
224 fileNameU
= (wchar_t *)gmallocn(n
+ 1, sizeof(wchar_t));
225 for (i
= 0; i
< n
; ++i
) {
226 fileNameU
[i
] = (wchar_t)(fileName
->getChar(i
) & 0xff);
228 fileNameU
[n
] = L
'\0';
237 ok
= setup(ownerPassword
, userPassword
);
240 GBool
PDFDoc::setup(GooString
*ownerPassword
, GooString
*userPassword
) {
243 if (str
->getPos() < 0)
245 error(errSyntaxError
, -1, "Document base stream is not seekable");
252 // Adobe does not seem to enforce %%EOF, so we do the same
253 // if (!checkFooter()) return gFalse;
258 GBool wasReconstructed
= false;
261 xref
= new XRef(str
, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed
);
263 if (wasReconstructed
) {
266 xref
= new XRef(str
, getStartXRef(gTrue
), getMainXRefEntriesOffset(gTrue
), &wasReconstructed
);
269 error(errSyntaxError
, -1, "Couldn't read xref table");
270 errCode
= xref
->getErrorCode();
275 // check for encryption
276 if (!checkEncryption(ownerPassword
, userPassword
)) {
277 errCode
= errEncrypted
;
282 catalog
= new Catalog(this);
283 if (catalog
&& !catalog
->isOk()) {
284 if (!wasReconstructed
)
286 // try one more time to contruct the Catalog, maybe the problem is damaged XRef
289 xref
= new XRef(str
, 0, 0, NULL
, true);
290 catalog
= new Catalog(this);
293 if (catalog
&& !catalog
->isOk()) {
294 error(errSyntaxError
, -1, "Couldn't read page catalog");
295 errCode
= errBadCatalog
;
306 for (int i
= 0; i
< getNumPages(); i
++) {
314 #ifndef DISABLE_OUTLINE
329 delete linearization
;
346 gDestroyMutex(&mutex
);
351 // Check for a %%EOF at the end of this stream
352 GBool
PDFDoc::checkFooter() {
353 // we look in the last 1024 chars because Adobe does the same
354 char *eof
= new char[1025];
355 Goffset pos
= str
->getPos();
356 str
->setPos(1024, -1);
358 for (i
= 0; i
< 1024; i
++)
368 for (i
= i
- 5; i
>= 0; i
--) {
369 if (strncmp (&eof
[i
], "%%EOF", 5) == 0) {
376 error(errSyntaxError
, -1, "Document has not the mandatory ending %%EOF");
377 errCode
= errDamaged
;
386 // Check for a PDF header on this stream. Skip past some garbage
388 void PDFDoc::checkHeader() {
389 char hdrBuf
[headerSearchSize
+1];
396 for (i
= 0; i
< headerSearchSize
; ++i
) {
397 hdrBuf
[i
] = str
->getChar();
399 hdrBuf
[headerSearchSize
] = '\0';
400 for (i
= 0; i
< headerSearchSize
- 5; ++i
) {
401 if (!strncmp(&hdrBuf
[i
], "%PDF-", 5)) {
405 if (i
>= headerSearchSize
- 5) {
406 error(errSyntaxWarning
, -1, "May not be a PDF file (continuing anyway)");
410 if (!(p
= strtok_r(&hdrBuf
[i
+5], " \t\n\r", &tokptr
))) {
411 error(errSyntaxWarning
, -1, "May not be a PDF file (continuing anyway)");
414 sscanf(p
, "%d.%d", &pdfMajorVersion
, &pdfMinorVersion
);
415 // We don't do the version check. Don't add it back in.
418 GBool
PDFDoc::checkEncryption(GooString
*ownerPassword
, GooString
*userPassword
) {
423 xref
->getTrailerDict()->dictLookup("Encrypt", &encrypt
);
424 if ((encrypted
= encrypt
.isDict())) {
425 if ((secHdlr
= SecurityHandler::make(this, &encrypt
))) {
426 if (secHdlr
->isUnencrypted()) {
429 } else if (secHdlr
->checkEncryption(ownerPassword
, userPassword
)) {
430 // authorization succeeded
431 xref
->setEncryption(secHdlr
->getPermissionFlags(),
432 secHdlr
->getOwnerPasswordOk(),
433 secHdlr
->getFileKey(),
434 secHdlr
->getFileKeyLength(),
435 secHdlr
->getEncVersion(),
436 secHdlr
->getEncRevision(),
437 secHdlr
->getEncAlgorithm());
440 // authorization failed
444 // couldn't find the matching security handler
448 // document is not encrypted
455 void PDFDoc::displayPage(OutputDev
*out
, int page
,
456 double hDPI
, double vDPI
, int rotate
,
457 GBool useMediaBox
, GBool crop
, GBool printing
,
458 GBool (*abortCheckCbk
)(void *data
),
459 void *abortCheckCbkData
,
460 GBool (*annotDisplayDecideCbk
)(Annot
*annot
, void *user_data
),
461 void *annotDisplayDecideCbkData
, GBool copyXRef
) {
462 if (globalParams
->getPrintCommands()) {
463 printf("***** page %d *****\n", page
);
467 getPage(page
)->display(out
, hDPI
, vDPI
,
468 rotate
, useMediaBox
, crop
, printing
,
469 abortCheckCbk
, abortCheckCbkData
,
470 annotDisplayDecideCbk
, annotDisplayDecideCbkData
, copyXRef
);
474 void PDFDoc::displayPages(OutputDev
*out
, int firstPage
, int lastPage
,
475 double hDPI
, double vDPI
, int rotate
,
476 GBool useMediaBox
, GBool crop
, GBool printing
,
477 GBool (*abortCheckCbk
)(void *data
),
478 void *abortCheckCbkData
,
479 GBool (*annotDisplayDecideCbk
)(Annot
*annot
, void *user_data
),
480 void *annotDisplayDecideCbkData
) {
483 for (page
= firstPage
; page
<= lastPage
; ++page
) {
484 displayPage(out
, page
, hDPI
, vDPI
, rotate
, useMediaBox
, crop
, printing
,
485 abortCheckCbk
, abortCheckCbkData
,
486 annotDisplayDecideCbk
, annotDisplayDecideCbkData
);
490 void PDFDoc::displayPageSlice(OutputDev
*out
, int page
,
491 double hDPI
, double vDPI
, int rotate
,
492 GBool useMediaBox
, GBool crop
, GBool printing
,
493 int sliceX
, int sliceY
, int sliceW
, int sliceH
,
494 GBool (*abortCheckCbk
)(void *data
),
495 void *abortCheckCbkData
,
496 GBool (*annotDisplayDecideCbk
)(Annot
*annot
, void *user_data
),
497 void *annotDisplayDecideCbkData
, GBool copyXRef
) {
499 getPage(page
)->displaySlice(out
, hDPI
, vDPI
,
500 rotate
, useMediaBox
, crop
,
501 sliceX
, sliceY
, sliceW
, sliceH
,
503 abortCheckCbk
, abortCheckCbkData
,
504 annotDisplayDecideCbk
, annotDisplayDecideCbkData
, copyXRef
);
507 Links
*PDFDoc::getLinks(int page
) {
508 Page
*p
= getPage(page
);
510 return new Links (NULL
);
512 return p
->getLinks();
515 void PDFDoc::processLinks(OutputDev
*out
, int page
) {
517 getPage(page
)->processLinks(out
);
520 Linearization
*PDFDoc::getLinearization()
522 if (!linearization
) {
523 linearization
= new Linearization(str
);
525 return linearization
;
528 GBool
PDFDoc::isLinearized(GBool tryingToReconstruct
) {
529 if ((str
->getLength()) &&
530 (getLinearization()->getLength() == str
->getLength()))
533 if (tryingToReconstruct
)
534 return getLinearization()->getLength() > 0;
541 get_id (GooString
*encodedidstring
, GooString
*id
) {
542 const char *encodedid
= encodedidstring
->getCString();
543 char pdfid
[pdfIdLength
+ 1];
546 if (encodedidstring
->getLength() != pdfIdLength
/ 2)
549 n
= sprintf(pdfid
, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
550 encodedid
[0] & 0xff, encodedid
[1] & 0xff, encodedid
[2] & 0xff, encodedid
[3] & 0xff,
551 encodedid
[4] & 0xff, encodedid
[5] & 0xff, encodedid
[6] & 0xff, encodedid
[7] & 0xff,
552 encodedid
[8] & 0xff, encodedid
[9] & 0xff, encodedid
[10] & 0xff, encodedid
[11] & 0xff,
553 encodedid
[12] & 0xff, encodedid
[13] & 0xff, encodedid
[14] & 0xff, encodedid
[15] & 0xff);
554 if (n
!= pdfIdLength
)
557 id
->Set(pdfid
, pdfIdLength
);
561 GBool
PDFDoc::getID(GooString
*permanent_id
, GooString
*update_id
) {
563 xref
->getTrailerDict()->dictLookup ("ID", &obj
);
565 if (obj
.isArray() && obj
.arrayGetLength() == 2) {
569 if (obj
.arrayGet(0, &obj2
)->isString()) {
570 if (!get_id (obj2
.getString(), permanent_id
)) {
575 error(errSyntaxError
, -1, "Invalid permanent ID");
583 if (obj
.arrayGet(1, &obj2
)->isString()) {
584 if (!get_id (obj2
.getString(), update_id
)) {
589 error(errSyntaxError
, -1, "Invalid update ID");
605 Hints
*PDFDoc::getHints()
607 if (!hints
&& isLinearized()) {
608 hints
= new Hints(str
, getLinearization(), getXRef(), secHdlr
);
614 int PDFDoc::savePageAs(GooString
*name
, int pageNo
)
618 XRef
*yRef
, *countRef
;
619 int rootNum
= getXRef()->getNumObjects() + 1;
621 // Make sure that special flags are set, because we are going to read
622 // all objects, including Unencrypted ones.
623 xref
->scanSpecialFlags();
626 CryptAlgorithm encAlgorithm
;
628 xref
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
630 if (pageNo
< 1 || pageNo
> getNumPages() || !getCatalog()->getPage(pageNo
)) {
631 error(errInternal
, -1, "Illegal pageNo: {0:d}({1:d})", pageNo
, getNumPages() );
634 PDFRectangle
*cropBox
= NULL
;
635 if (getCatalog()->getPage(pageNo
)->isCropped()) {
636 cropBox
= getCatalog()->getPage(pageNo
)->getCropBox();
638 replacePageDict(pageNo
,
639 getCatalog()->getPage(pageNo
)->getRotate(),
640 getCatalog()->getPage(pageNo
)->getMediaBox(),
642 Ref
*refPage
= getCatalog()->getPageRef(pageNo
);
644 getXRef()->fetch(refPage
->num
, refPage
->gen
, &page
);
646 if (!(f
= fopen(name
->getCString(), "wb"))) {
647 error(errIO
, -1, "Couldn't open file '{0:t}'", name
);
650 outStr
= new FileOutStream(f
,0);
652 yRef
= new XRef(getXRef()->getTrailerDict());
654 if (secHdlr
!= NULL
&& !secHdlr
->isUnencrypted()) {
655 yRef
->setEncryption(secHdlr
->getPermissionFlags(),
656 secHdlr
->getOwnerPasswordOk(), fileKey
, keyLength
, secHdlr
->getEncVersion(), secHdlr
->getEncRevision(), encAlgorithm
);
658 countRef
= new XRef();
659 Object
*trailerObj
= getXRef()->getTrailerDict();
660 if (trailerObj
->isDict()) {
661 markPageObjects(trailerObj
->getDict(), yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
663 yRef
->add(0, 65535, 0, gFalse
);
664 writeHeader(outStr
, getPDFMajorVersion(), getPDFMinorVersion());
666 // get and mark info dict
668 getXRef()->getDocInfo(&infoObj
);
669 if (infoObj
.isDict()) {
670 Dict
*infoDict
= infoObj
.getDict();
671 markPageObjects(infoDict
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
672 if (trailerObj
->isDict()) {
673 Dict
*trailerDict
= trailerObj
->getDict();
675 trailerDict
->lookupNF("Info", &ref
);
677 yRef
->add(ref
.getRef().num
, ref
.getRef().gen
, 0, gTrue
);
678 if (getXRef()->getEntry(ref
.getRef().num
)->type
== xrefEntryCompressed
) {
679 yRef
->getEntry(ref
.getRef().num
)->type
= xrefEntryCompressed
;
687 // get and mark output intents etc.
688 Object catObj
, pagesObj
, resourcesObj
, annotsObj
, afObj
;
689 getXRef()->getCatalog(&catObj
);
690 Dict
*catDict
= catObj
.getDict();
691 catDict
->lookup("Pages", &pagesObj
);
692 catDict
->lookupNF("AcroForm", &afObj
);
693 if (!afObj
.isNull()) {
694 markAcroForm(&afObj
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
697 Dict
*pagesDict
= pagesObj
.getDict();
698 pagesDict
->lookup("Resources", &resourcesObj
);
699 if (resourcesObj
.isDict())
700 markPageObjects(resourcesObj
.getDict(), yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
701 markPageObjects(catDict
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
703 Dict
*pageDict
= page
.getDict();
704 if (resourcesObj
.isNull() && !pageDict
->hasKey("Resources")) {
705 Dict
*resourceDict
= getCatalog()->getPage(pageNo
)->getResourceDict();
706 if (resourceDict
!= NULL
) {
707 resourcesObj
.initDict(resourceDict
);
708 markPageObjects(resourcesObj
.getDict(), yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
711 markPageObjects(pageDict
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
712 pageDict
->lookupNF("Annots", &annotsObj
);
713 if (!annotsObj
.isNull()) {
714 markAnnotations(&annotsObj
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
717 yRef
->markUnencrypted();
718 writePageObjects(outStr
, yRef
, 0);
720 yRef
->add(rootNum
,0,outStr
->getPos(),gTrue
);
721 outStr
->printf("%d 0 obj\n", rootNum
);
722 outStr
->printf("<< /Type /Catalog /Pages %d 0 R", rootNum
+ 1);
723 for (int j
= 0; j
< catDict
->getLength(); j
++) {
724 const char *key
= catDict
->getKey(j
);
725 if (strcmp(key
, "Type") != 0 &&
726 strcmp(key
, "Catalog") != 0 &&
727 strcmp(key
, "Pages") != 0)
729 if (j
> 0) outStr
->printf(" ");
730 Object value
; catDict
->getValNF(j
, &value
);
731 outStr
->printf("/%s ", key
);
732 writeObject(&value
, outStr
, getXRef(), 0, NULL
, cryptRC4
, 0, 0, 0);
738 outStr
->printf(">>\nendobj\n");
740 yRef
->add(rootNum
+ 1,0,outStr
->getPos(),gTrue
);
741 outStr
->printf("%d 0 obj\n", rootNum
+ 1);
742 outStr
->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum
+ 2);
743 if (resourcesObj
.isDict()) {
744 outStr
->printf("/Resources ");
745 writeObject(&resourcesObj
, outStr
, getXRef(), 0, NULL
, cryptRC4
, 0, 0, 0);
748 outStr
->printf(">>\n");
749 outStr
->printf("endobj\n");
751 yRef
->add(rootNum
+ 2,0,outStr
->getPos(),gTrue
);
752 outStr
->printf("%d 0 obj\n", rootNum
+ 2);
753 outStr
->printf("<< ");
754 for (int n
= 0; n
< pageDict
->getLength(); n
++) {
755 if (n
> 0) outStr
->printf(" ");
756 const char *key
= pageDict
->getKey(n
);
757 Object value
; pageDict
->getValNF(n
, &value
);
758 if (strcmp(key
, "Parent") == 0) {
759 outStr
->printf("/Parent %d 0 R", rootNum
+ 1);
761 outStr
->printf("/%s ", key
);
762 writeObject(&value
, outStr
, getXRef(), 0, NULL
, cryptRC4
, 0, 0, 0);
766 outStr
->printf(" >>\nendobj\n");
769 Goffset uxrefOffset
= outStr
->getPos();
773 Dict
*trailerDict
= createTrailerDict(rootNum
+ 3, gFalse
, 0, &ref
, getXRef(),
774 name
->getCString(), uxrefOffset
);
775 writeXRefTableTrailer(trailerDict
, yRef
, gFalse
/* do not write unnecessary entries */,
776 uxrefOffset
, outStr
, getXRef());
788 int PDFDoc::saveAs(GooString
*name
, PDFWriteMode mode
) {
793 if (!(f
= fopen(name
->getCString(), "wb"))) {
794 error(errIO
, -1, "Couldn't open file '{0:t}'", name
);
797 outStr
= new FileOutStream(f
,0);
798 res
= saveAs(outStr
, mode
);
804 int PDFDoc::saveAs(OutStream
*outStr
, PDFWriteMode mode
) {
806 // find if we have updated objects
807 GBool updated
= gFalse
;
808 for(int i
=0; i
<xref
->getNumObjects(); i
++) {
809 if (xref
->getEntry(i
)->getFlag(XRefEntry::Updated
)) {
815 if (!updated
&& mode
== writeStandard
) {
816 // simply copy the original file
817 saveWithoutChangesAs (outStr
);
818 } else if (mode
== writeForceRewrite
) {
819 saveCompleteRewrite(outStr
);
821 saveIncrementalUpdate(outStr
);
827 int PDFDoc::saveWithoutChangesAs(GooString
*name
) {
832 if (!(f
= fopen(name
->getCString(), "wb"))) {
833 error(errIO
, -1, "Couldn't open file '{0:t}'", name
);
837 outStr
= new FileOutStream(f
,0);
838 res
= saveWithoutChangesAs(outStr
);
846 int PDFDoc::saveWithoutChangesAs(OutStream
*outStr
) {
849 BaseStream
*copyStr
= str
->copy();
851 while ((c
= copyStr
->getChar()) != EOF
) {
860 void PDFDoc::saveIncrementalUpdate (OutStream
* outStr
)
864 //copy the original file
865 BaseStream
*copyStr
= str
->copy();
867 while ((c
= copyStr
->getChar()) != EOF
) {
874 CryptAlgorithm encAlgorithm
;
876 xref
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
879 uxref
->add(0, 65535, 0, gFalse
);
881 for(int i
=0; i
<xref
->getNumObjects(); i
++) {
882 if ((xref
->getEntry(i
)->type
== xrefEntryFree
) &&
883 (xref
->getEntry(i
)->gen
== 0)) //we skip the irrelevant free objects
886 if (xref
->getEntry(i
)->getFlag(XRefEntry::Updated
)) { //we have an updated object
889 ref
.gen
= xref
->getEntry(i
)->type
== xrefEntryCompressed
? 0 : xref
->getEntry(i
)->gen
;
890 if (xref
->getEntry(i
)->type
!= xrefEntryFree
) {
892 xref
->fetch(ref
.num
, ref
.gen
, &obj1
, 1);
893 Goffset offset
= writeObjectHeader(&ref
, outStr
);
894 writeObject(&obj1
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
895 writeObjectFooter(outStr
);
896 uxref
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
899 uxref
->add(ref
.num
, ref
.gen
, 0, gFalse
);
904 if (uxref
->getNumObjects() == 0) { //we have nothing to update
909 Goffset uxrefOffset
= outStr
->getPos();
910 int numobjects
= xref
->getNumObjects();
911 const char *fileNameA
= fileName
? fileName
->getCString() : NULL
;
912 Ref rootRef
, uxrefStreamRef
;
913 rootRef
.num
= getXRef()->getRootNum();
914 rootRef
.gen
= getXRef()->getRootGen();
916 // Output a xref stream if there is a xref stream already
917 GBool xRefStream
= xref
->isXRefStream();
920 // Append an entry for the xref stream itself
921 uxrefStreamRef
.num
= numobjects
++;
922 uxrefStreamRef
.gen
= 0;
923 uxref
->add(uxrefStreamRef
.num
, uxrefStreamRef
.gen
, uxrefOffset
, gTrue
);
926 Dict
*trailerDict
= createTrailerDict(numobjects
, gTrue
, getStartXRef(), &rootRef
, getXRef(), fileNameA
, uxrefOffset
);
928 writeXRefStreamTrailer(trailerDict
, uxref
, &uxrefStreamRef
, uxrefOffset
, outStr
, getXRef());
930 writeXRefTableTrailer(trailerDict
, uxref
, gFalse
, uxrefOffset
, outStr
, getXRef());
937 void PDFDoc::saveCompleteRewrite (OutStream
* outStr
)
939 // Make sure that special flags are set, because we are going to read
940 // all objects, including Unencrypted ones.
941 xref
->scanSpecialFlags();
944 CryptAlgorithm encAlgorithm
;
946 xref
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
948 outStr
->printf("%%PDF-%d.%d\r\n",pdfMajorVersion
,pdfMinorVersion
);
949 XRef
*uxref
= new XRef();
950 uxref
->add(0, 65535, 0, gFalse
);
952 for(int i
=0; i
<xref
->getNumObjects(); i
++) {
955 XRefEntryType type
= xref
->getEntry(i
)->type
;
956 if (type
== xrefEntryFree
) {
958 ref
.gen
= xref
->getEntry(i
)->gen
;
959 /* the XRef class adds a lot of irrelevant free entries, we only want the significant one
960 and we don't want the one with num=0 because it has already been added (gen = 65535)*/
961 if (ref
.gen
> 0 && ref
.num
> 0)
962 uxref
->add(ref
.num
, ref
.gen
, 0, gFalse
);
963 } else if (xref
->getEntry(i
)->getFlag(XRefEntry::DontRewrite
)) {
964 // This entry must not be written, put a free entry instead (with incremented gen)
966 ref
.gen
= xref
->getEntry(i
)->gen
+ 1;
967 uxref
->add(ref
.num
, ref
.gen
, 0, gFalse
);
968 } else if (type
== xrefEntryUncompressed
){
970 ref
.gen
= xref
->getEntry(i
)->gen
;
971 xref
->fetch(ref
.num
, ref
.gen
, &obj1
, 1);
972 Goffset offset
= writeObjectHeader(&ref
, outStr
);
973 // Write unencrypted objects in unencrypted form
974 if (xref
->getEntry(i
)->getFlag(XRefEntry::Unencrypted
)) {
975 writeObject(&obj1
, outStr
, NULL
, cryptRC4
, 0, 0, 0);
977 writeObject(&obj1
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
979 writeObjectFooter(outStr
);
980 uxref
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
982 } else if (type
== xrefEntryCompressed
) {
984 ref
.gen
= 0; //compressed entries have gen == 0
985 xref
->fetch(ref
.num
, ref
.gen
, &obj1
, 1);
986 Goffset offset
= writeObjectHeader(&ref
, outStr
);
987 writeObject(&obj1
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
988 writeObjectFooter(outStr
);
989 uxref
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
994 Goffset uxrefOffset
= outStr
->getPos();
995 writeXRefTableTrailer(uxrefOffset
, uxref
, gTrue
/* write all entries */,
996 uxref
->getNumObjects(), outStr
, gFalse
/* complete rewrite */);
1000 void PDFDoc::writeDictionnary (Dict
* dict
, OutStream
* outStr
, XRef
*xRef
, Guint numOffset
, Guchar
*fileKey
,
1001 CryptAlgorithm encAlgorithm
, int keyLength
, int objNum
, int objGen
)
1004 outStr
->printf("<<");
1005 for (int i
=0; i
<dict
->getLength(); i
++) {
1006 GooString
keyName(dict
->getKey(i
));
1007 GooString
*keyNameToPrint
= keyName
.sanitizedName(gFalse
/* non ps mode */);
1008 outStr
->printf("/%s ", keyNameToPrint
->getCString());
1009 delete keyNameToPrint
;
1010 writeObject(dict
->getValNF(i
, &obj1
), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1013 outStr
->printf(">> ");
1016 void PDFDoc::writeStream (Stream
* str
, OutStream
* outStr
)
1018 outStr
->printf("stream\r\n");
1020 for (int c
=str
->getChar(); c
!= EOF
; c
=str
->getChar()) {
1021 outStr
->printf("%c", c
);
1023 outStr
->printf("\r\nendstream\r\n");
1026 void PDFDoc::writeRawStream (Stream
* str
, OutStream
* outStr
)
1029 str
->getDict()->lookup("Length", &obj1
);
1030 if (!obj1
.isInt() && !obj1
.isInt64()) {
1031 error (errSyntaxError
, -1, "PDFDoc::writeRawStream, no Length in stream dict");
1037 length
= obj1
.getInt();
1039 length
= obj1
.getInt64();
1042 outStr
->printf("stream\r\n");
1043 str
->unfilteredReset();
1044 for (Goffset i
= 0; i
< length
; i
++) {
1045 int c
= str
->getUnfilteredChar();
1046 if (unlikely(c
== EOF
)) {
1047 error (errSyntaxError
, -1, "PDFDoc::writeRawStream: EOF reading stream");
1050 outStr
->printf("%c", c
);
1053 outStr
->printf("\r\nendstream\r\n");
1056 void PDFDoc::writeString (GooString
* s
, OutStream
* outStr
, Guchar
*fileKey
,
1057 CryptAlgorithm encAlgorithm
, int keyLength
, int objNum
, int objGen
)
1059 // Encrypt string if encryption is enabled
1060 GooString
*sEnc
= NULL
;
1063 EncryptStream
*enc
= new EncryptStream(new MemStream(s
->getCString(), 0, s
->getLength(), obj
.initNull()),
1064 fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1065 sEnc
= new GooString();
1068 while ((c
= enc
->getChar()) != EOF
) {
1069 sEnc
->append((char)c
);
1077 if (s
->hasUnicodeMarker()) {
1078 //unicode string don't necessary end with \0
1079 const char* c
= s
->getCString();
1080 outStr
->printf("(");
1081 for(int i
=0; i
<s
->getLength(); i
++) {
1082 char unescaped
= *(c
+i
)&0x000000ff;
1084 if (unescaped
== '(' || unescaped
== ')' || unescaped
== '\\')
1085 outStr
->printf("%c", '\\');
1086 outStr
->printf("%c", unescaped
);
1088 outStr
->printf(") ");
1090 const char* c
= s
->getCString();
1091 outStr
->printf("(");
1092 for(int i
=0; i
<s
->getLength(); i
++) {
1093 char unescaped
= *(c
+i
)&0x000000ff;
1095 if (unescaped
== '\r')
1096 outStr
->printf("\\r");
1097 else if (unescaped
== '\n')
1098 outStr
->printf("\\n");
1100 if (unescaped
== '(' || unescaped
== ')' || unescaped
== '\\') {
1101 outStr
->printf("%c", '\\');
1103 outStr
->printf("%c", unescaped
);
1106 outStr
->printf(") ");
1112 Goffset
PDFDoc::writeObjectHeader (Ref
*ref
, OutStream
* outStr
)
1114 Goffset offset
= outStr
->getPos();
1115 outStr
->printf("%i %i obj ", ref
->num
, ref
->gen
);
1119 void PDFDoc::writeObject (Object
* obj
, OutStream
* outStr
, XRef
*xRef
, Guint numOffset
, Guchar
*fileKey
,
1120 CryptAlgorithm encAlgorithm
, int keyLength
, int objNum
, int objGen
)
1126 switch (obj
->getType()) {
1128 outStr
->printf("%s ", obj
->getBool()?"true":"false");
1131 outStr
->printf("%i ", obj
->getInt());
1134 outStr
->printf("%lli ", obj
->getInt64());
1139 s
.appendf("{0:.10g}", obj
->getReal());
1140 outStr
->printf("%s ", s
.getCString());
1144 writeString(obj
->getString(), outStr
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1148 GooString
name(obj
->getName());
1149 GooString
*nameToPrint
= name
.sanitizedName(gFalse
/* non ps mode */);
1150 outStr
->printf("/%s ", nameToPrint
->getCString());
1155 outStr
->printf( "null ");
1158 array
= obj
->getArray();
1159 outStr
->printf("[");
1160 for (int i
=0; i
<array
->getLength(); i
++) {
1161 writeObject(array
->getNF(i
, &obj1
), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1164 outStr
->printf("] ");
1167 writeDictionnary (obj
->getDict(), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1171 //We can't modify stream with the current implementation (no write functions in Stream API)
1172 // => the only type of streams which that have been modified are internal streams (=strWeird)
1173 Stream
*stream
= obj
->getStream();
1174 if (stream
->getKind() == strWeird
|| stream
->getKind() == strCrypt
) {
1175 //we write the stream unencoded => TODO: write stream encoder
1178 EncryptStream
*encStream
= NULL
;
1179 GBool removeFilter
= gTrue
;
1180 if (stream
->getKind() == strWeird
&& fileKey
) {
1182 stream
->getDict()->lookup("Filter", &filter
);
1183 if (!filter
.isName("Crypt")) {
1184 if (filter
.isArray()) {
1185 for (int i
= 0; i
< filter
.arrayGetLength(); i
++) {
1187 filter
.arrayGet(i
, &filterEle
);
1188 if (filterEle
.isName("Crypt")) {
1190 removeFilter
= gFalse
;
1196 encStream
= new EncryptStream(stream
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1197 encStream
->setAutoDelete(gFalse
);
1201 encStream
= new EncryptStream(stream
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1202 encStream
->setAutoDelete(gFalse
);
1206 removeFilter
= gFalse
;
1209 } else if (fileKey
!= NULL
) { // Encrypt stream
1210 encStream
= new EncryptStream(stream
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1211 encStream
->setAutoDelete(gFalse
);
1216 //recalculate stream length
1218 for (int c
=stream
->getChar(); c
!=EOF
; c
=stream
->getChar()) {
1221 obj1
.initInt64(tmp
);
1222 stream
->getDict()->set("Length", &obj1
);
1224 //Remove Stream encoding
1226 stream
->getDict()->remove("Filter");
1228 stream
->getDict()->remove("DecodeParms");
1230 writeDictionnary (stream
->getDict(),outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1231 writeStream (stream
,outStr
);
1236 FilterStream
*fs
= dynamic_cast<FilterStream
*>(stream
);
1238 BaseStream
*bs
= fs
->getBaseStream();
1241 if (xRef
->getStreamEnd(bs
->getStart(), &streamEnd
)) {
1243 val
.initInt64(streamEnd
- bs
->getStart());
1244 stream
->getDict()->set("Length", &val
);
1248 writeDictionnary (stream
->getDict(), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1249 writeRawStream (stream
, outStr
);
1254 outStr
->printf("%i %i R ", obj
->getRef().num
+ numOffset
, obj
->getRef().gen
);
1257 outStr
->printf("%s\n", obj
->getCmd());
1260 outStr
->printf("error\r\n");
1263 outStr
->printf("eof\r\n");
1266 outStr
->printf("none\r\n");
1269 error(errUnimplemented
, -1,"Unhandled objType : {0:d}, please report a bug with a testcase\r\n", obj
->getType());
1274 void PDFDoc::writeObjectFooter (OutStream
* outStr
)
1276 outStr
->printf("endobj\r\n");
1279 Dict
*PDFDoc::createTrailerDict(int uxrefSize
, GBool incrUpdate
, Goffset startxRef
,
1280 Ref
*root
, XRef
*xRef
, const char *fileName
, Goffset fileSize
)
1282 Dict
*trailerDict
= new Dict(xRef
);
1284 obj1
.initInt(uxrefSize
);
1285 trailerDict
->set("Size", &obj1
);
1288 //build a new ID, as recommended in the reference, uses:
1292 // - values of entry in information dictionnary
1295 sprintf(buffer
, "%i", (int)time(NULL
));
1296 message
.append(buffer
);
1299 message
.append(fileName
);
1301 sprintf(buffer
, "%lli", (long long)fileSize
);
1302 message
.append(buffer
);
1304 //info dict -- only use text string
1305 if (!xRef
->getTrailerDict()->isNone() && xRef
->getDocInfo(&obj1
)->isDict()) {
1306 for(int i
=0; i
<obj1
.getDict()->getLength(); i
++) {
1308 obj1
.getDict()->getVal(i
, &obj2
);
1309 if (obj2
.isString()) {
1310 message
.append(obj2
.getString());
1317 GBool hasEncrypt
= gFalse
;
1318 if (!xRef
->getTrailerDict()->isNone()) {
1320 xRef
->getTrailerDict()->dictLookupNF("Encrypt", &obj2
);
1321 if (!obj2
.isNull()) {
1322 trailerDict
->set("Encrypt", &obj2
);
1328 //calculate md5 digest
1330 md5((Guchar
*)message
.getCString(), message
.getLength(), digest
);
1331 obj1
.initString(new GooString((const char*)digest
, 16));
1334 Object obj2
,obj3
,obj5
;
1335 obj2
.initArray(xRef
);
1337 // In case of encrypted files, the ID must not be changed because it's used to calculate the key
1338 if (incrUpdate
|| hasEncrypt
) {
1340 //only update the second part of the array
1341 xRef
->getTrailerDict()->getDict()->lookup("ID", &obj4
);
1342 if (!obj4
.isArray()) {
1343 error(errSyntaxWarning
, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
1345 //Get the first part of the ID
1346 obj4
.arrayGet(0,&obj3
);
1348 obj2
.arrayAdd(&obj3
);
1349 obj2
.arrayAdd(&obj1
);
1350 trailerDict
->set("ID", &obj2
);
1354 //new file => same values for the two identifiers
1355 obj2
.arrayAdd(&obj1
);
1356 obj1
.initString(new GooString((const char*)digest
, 16));
1357 obj2
.arrayAdd(&obj1
);
1358 trailerDict
->set("ID", &obj2
);
1361 obj1
.initRef(root
->num
, root
->gen
);
1362 trailerDict
->set("Root", &obj1
);
1365 obj1
.initInt64(startxRef
);
1366 trailerDict
->set("Prev", &obj1
);
1369 if (!xRef
->getTrailerDict()->isNone()) {
1370 xRef
->getDocInfoNF(&obj5
);
1371 if (!obj5
.isNull()) {
1372 trailerDict
->set("Info", &obj5
);
1379 void PDFDoc::writeXRefTableTrailer(Dict
*trailerDict
, XRef
*uxref
, GBool writeAllEntries
, Goffset uxrefOffset
, OutStream
* outStr
, XRef
*xRef
)
1381 uxref
->writeTableToFile( outStr
, writeAllEntries
);
1382 outStr
->printf( "trailer\r\n");
1383 writeDictionnary(trailerDict
, outStr
, xRef
, 0, NULL
, cryptRC4
, 0, 0, 0);
1384 outStr
->printf( "\r\nstartxref\r\n");
1385 outStr
->printf( "%lli\r\n", uxrefOffset
);
1386 outStr
->printf( "%%%%EOF\r\n");
1389 void PDFDoc::writeXRefStreamTrailer (Dict
*trailerDict
, XRef
*uxref
, Ref
*uxrefStreamRef
, Goffset uxrefOffset
, OutStream
* outStr
, XRef
*xRef
)
1393 // Fill stmData and some trailerDict fields
1394 uxref
->writeStreamToBuffer(&stmData
, trailerDict
, xRef
);
1396 // Create XRef stream object and write it
1398 MemStream
*mStream
= new MemStream( stmData
.getCString(), 0,
1399 stmData
.getLength(), obj1
.initDict(trailerDict
) );
1400 writeObjectHeader(uxrefStreamRef
, outStr
);
1401 writeObject(obj1
.initStream(mStream
), outStr
, xRef
, 0, NULL
, cryptRC4
, 0, 0, 0);
1402 writeObjectFooter(outStr
);
1405 outStr
->printf( "startxref\r\n");
1406 outStr
->printf( "%lli\r\n", uxrefOffset
);
1407 outStr
->printf( "%%%%EOF\r\n");
1410 void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset
, XRef
*uxref
, GBool writeAllEntries
,
1411 int uxrefSize
, OutStream
* outStr
, GBool incrUpdate
)
1413 const char *fileNameA
= fileName
? fileName
->getCString() : NULL
;
1414 // file size (doesn't include the trailer)
1415 unsigned int fileSize
= 0;
1418 while ((c
= str
->getChar()) != EOF
) {
1423 ref
.num
= getXRef()->getRootNum();
1424 ref
.gen
= getXRef()->getRootGen();
1425 Dict
* trailerDict
= createTrailerDict(uxrefSize
, incrUpdate
, getStartXRef(), &ref
,
1426 getXRef(), fileNameA
, fileSize
);
1427 writeXRefTableTrailer(trailerDict
, uxref
, writeAllEntries
, uxrefOffset
, outStr
, getXRef());
1431 void PDFDoc::writeHeader(OutStream
*outStr
, int major
, int minor
)
1433 outStr
->printf("%%PDF-%d.%d\n", major
, minor
);
1434 outStr
->printf("%%\xE2\xE3\xCF\xD3\n");
1437 void PDFDoc::markDictionnary (Dict
* dict
, XRef
* xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
)
1440 for (int i
=0; i
<dict
->getLength(); i
++) {
1441 const char *key
= dict
->getKey(i
);
1442 if (strcmp(key
, "Annots") != 0) {
1443 markObject(dict
->getValNF(i
, &obj1
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1446 dict
->getValNF(i
, &annotsObj
);
1447 if (!annotsObj
.isNull()) {
1448 markAnnotations(&annotsObj
, xRef
, countRef
, 0, oldRefNum
, newRefNum
);
1456 void PDFDoc::markObject (Object
* obj
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
)
1461 switch (obj
->getType()) {
1463 array
= obj
->getArray();
1464 for (int i
=0; i
<array
->getLength(); i
++) {
1465 markObject(array
->getNF(i
, &obj1
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1470 markDictionnary (obj
->getDict(), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1474 Stream
*stream
= obj
->getStream();
1475 markDictionnary (stream
->getDict(), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1480 if (obj
->getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(obj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1481 if (getXRef()->getEntry(obj
->getRef().num
)->type
== xrefEntryFree
) {
1482 return; // already marked as free => should be replaced
1484 xRef
->add(obj
->getRef().num
+ numOffset
, obj
->getRef().gen
, 0, gTrue
);
1485 if (getXRef()->getEntry(obj
->getRef().num
)->type
== xrefEntryCompressed
) {
1486 xRef
->getEntry(obj
->getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1489 if (obj
->getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1490 countRef
->getEntry(obj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1492 countRef
->add(obj
->getRef().num
+ numOffset
, 1, 0, gTrue
);
1494 XRefEntry
*entry
= countRef
->getEntry(obj
->getRef().num
+ numOffset
);
1500 getXRef()->fetch(obj
->getRef().num
, obj
->getRef().gen
, &obj1
);
1501 markObject(&obj1
, xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1510 void PDFDoc::replacePageDict(int pageNo
, int rotate
,
1511 PDFRectangle
*mediaBox
,
1512 PDFRectangle
*cropBox
)
1514 Ref
*refPage
= getCatalog()->getPageRef(pageNo
);
1516 getXRef()->fetch(refPage
->num
, refPage
->gen
, &page
);
1517 Dict
*pageDict
= page
.getDict();
1518 pageDict
->remove("MediaBoxssdf");
1519 pageDict
->remove("MediaBox");
1520 pageDict
->remove("CropBox");
1521 pageDict
->remove("ArtBox");
1522 pageDict
->remove("BleedBox");
1523 pageDict
->remove("TrimBox");
1524 pageDict
->remove("Rotate");
1526 mediaBoxObj
.initArray(getXRef());
1528 murx
.initReal(mediaBox
->x1
);
1530 mury
.initReal(mediaBox
->y1
);
1532 mllx
.initReal(mediaBox
->x2
);
1534 mlly
.initReal(mediaBox
->y2
);
1535 mediaBoxObj
.arrayAdd(&murx
);
1536 mediaBoxObj
.arrayAdd(&mury
);
1537 mediaBoxObj
.arrayAdd(&mllx
);
1538 mediaBoxObj
.arrayAdd(&mlly
);
1539 pageDict
->add(copyString("MediaBox"), &mediaBoxObj
);
1540 if (cropBox
!= NULL
) {
1542 cropBoxObj
.initArray(getXRef());
1544 curx
.initReal(cropBox
->x1
);
1546 cury
.initReal(cropBox
->y1
);
1548 cllx
.initReal(cropBox
->x2
);
1550 clly
.initReal(cropBox
->y2
);
1551 cropBoxObj
.arrayAdd(&curx
);
1552 cropBoxObj
.arrayAdd(&cury
);
1553 cropBoxObj
.arrayAdd(&cllx
);
1554 cropBoxObj
.arrayAdd(&clly
);
1555 pageDict
->add(copyString("CropBox"), &cropBoxObj
);
1556 cropBoxObj
.getArray()->incRef();
1557 pageDict
->add(copyString("TrimBox"), &cropBoxObj
);
1559 mediaBoxObj
.getArray()->incRef();
1560 pageDict
->add(copyString("TrimBox"), &mediaBoxObj
);
1563 rotateObj
.initInt(rotate
);
1564 pageDict
->add(copyString("Rotate"), &rotateObj
);
1565 getXRef()->setModifiedObject(&page
, *refPage
);
1569 void PDFDoc::markPageObjects(Dict
*pageDict
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
)
1571 pageDict
->remove("OpenAction");
1572 pageDict
->remove("Outlines");
1573 pageDict
->remove("StructTreeRoot");
1575 for (int n
= 0; n
< pageDict
->getLength(); n
++) {
1576 const char *key
= pageDict
->getKey(n
);
1577 Object value
; pageDict
->getValNF(n
, &value
);
1578 if (strcmp(key
, "Parent") != 0 &&
1579 strcmp(key
, "Pages") != 0 &&
1580 strcmp(key
, "AcroForm") != 0 &&
1581 strcmp(key
, "Annots") != 0 &&
1582 strcmp(key
, "P") != 0 &&
1583 strcmp(key
, "Root") != 0) {
1584 markObject(&value
, xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1590 GBool
PDFDoc::markAnnotations(Object
*annotsObj
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldPageNum
, int newPageNum
) {
1592 GBool modified
= gFalse
;
1593 annotsObj
->fetch(getXRef(), &annots
);
1594 if (annots
.isArray()) {
1595 Array
*array
= annots
.getArray();
1596 for (int i
=array
->getLength() - 1; i
>= 0; i
--) {
1598 if (array
->get(i
, &obj1
)->isDict()) {
1600 Dict
*dict
= obj1
.getDict();
1601 dict
->lookup("Type", &type
);
1602 if (type
.isName() && strcmp(type
.getName(), "Annot") == 0) {
1604 if (dict
->lookupNF("P", &obj2
)->isRef()) {
1605 if (obj2
.getRef().num
== oldPageNum
) {
1607 array
->getNF(i
, &obj3
);
1609 Object
*newRef
= new Object();
1610 newRef
->initRef(newPageNum
, 0);
1611 dict
->set("P", newRef
);
1612 getXRef()->setModifiedObject(&obj1
, obj3
.getRef());
1615 } else if (obj2
.getRef().num
== newPageNum
) {
1622 getXRef()->fetch(obj2
.getRef().num
, obj2
.getRef().gen
, &page
);
1623 if (page
.isDict()) {
1625 Dict
*dict
= page
.getDict();
1626 dict
->lookup("Type", &pagetype
);
1627 if (!pagetype
.isName() || strcmp(pagetype
.getName(), "Page") != 0) {
1649 markPageObjects(dict
, xRef
, countRef
, numOffset
, oldPageNum
, newPageNum
);
1652 array
->getNF(i
, &obj1
);
1654 if (obj1
.getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(obj1
.getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1655 if (getXRef()->getEntry(obj1
.getRef().num
)->type
== xrefEntryFree
) {
1656 continue; // already marked as free => should be replaced
1658 xRef
->add(obj1
.getRef().num
+ numOffset
, obj1
.getRef().gen
, 0, gTrue
);
1659 if (getXRef()->getEntry(obj1
.getRef().num
)->type
== xrefEntryCompressed
) {
1660 xRef
->getEntry(obj1
.getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1663 if (obj1
.getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1664 countRef
->getEntry(obj1
.getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1666 countRef
->add(obj1
.getRef().num
+ numOffset
, 1, 0, gTrue
);
1668 XRefEntry
*entry
= countRef
->getEntry(obj1
.getRef().num
+ numOffset
);
1675 if (annotsObj
->isRef()) {
1676 if (annotsObj
->getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(annotsObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1677 if (getXRef()->getEntry(annotsObj
->getRef().num
)->type
== xrefEntryFree
) {
1678 return modified
; // already marked as free => should be replaced
1680 xRef
->add(annotsObj
->getRef().num
+ numOffset
, annotsObj
->getRef().gen
, 0, gTrue
);
1681 if (getXRef()->getEntry(annotsObj
->getRef().num
)->type
== xrefEntryCompressed
) {
1682 xRef
->getEntry(annotsObj
->getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1685 if (annotsObj
->getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1686 countRef
->getEntry(annotsObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1688 countRef
->add(annotsObj
->getRef().num
+ numOffset
, 1, 0, gTrue
);
1690 XRefEntry
*entry
= countRef
->getEntry(annotsObj
->getRef().num
+ numOffset
);
1693 getXRef()->setModifiedObject(&annots
, annotsObj
->getRef());
1699 void PDFDoc::markAcroForm(Object
*afObj
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
) {
1701 GBool modified
= gFalse
;
1702 afObj
->fetch(getXRef(), &acroform
);
1703 if (acroform
.isDict()) {
1704 Dict
*dict
= acroform
.getDict();
1705 for (int i
=0; i
< dict
->getLength(); i
++) {
1706 if (strcmp(dict
->getKey(i
), "Fields") == 0) {
1708 modified
= markAnnotations(dict
->getValNF(i
, &fields
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1712 markObject(dict
->getValNF(i
, &obj
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1717 if (afObj
->isRef()) {
1718 if (afObj
->getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(afObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1719 if (getXRef()->getEntry(afObj
->getRef().num
)->type
== xrefEntryFree
) {
1720 return; // already marked as free => should be replaced
1722 xRef
->add(afObj
->getRef().num
+ numOffset
, afObj
->getRef().gen
, 0, gTrue
);
1723 if (getXRef()->getEntry(afObj
->getRef().num
)->type
== xrefEntryCompressed
) {
1724 xRef
->getEntry(afObj
->getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1727 if (afObj
->getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1728 countRef
->getEntry(afObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1730 countRef
->add(afObj
->getRef().num
+ numOffset
, 1, 0, gTrue
);
1732 XRefEntry
*entry
= countRef
->getEntry(afObj
->getRef().num
+ numOffset
);
1736 getXRef()->setModifiedObject(&acroform
, afObj
->getRef());
1743 Guint
PDFDoc::writePageObjects(OutStream
*outStr
, XRef
*xRef
, Guint numOffset
, GBool combine
)
1745 Guint objectsCount
= 0; //count the number of objects in the XRef(s)
1747 CryptAlgorithm encAlgorithm
;
1749 xRef
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
1751 for (int n
= numOffset
; n
< xRef
->getNumObjects(); n
++) {
1752 if (xRef
->getEntry(n
)->type
!= xrefEntryFree
) {
1756 ref
.gen
= xRef
->getEntry(n
)->gen
;
1758 getXRef()->fetch(ref
.num
- numOffset
, ref
.gen
, &obj
);
1759 Goffset offset
= writeObjectHeader(&ref
, outStr
);
1761 writeObject(&obj
, outStr
, getXRef(), numOffset
, NULL
, cryptRC4
, 0, 0, 0);
1762 } else if (xRef
->getEntry(n
)->getFlag(XRefEntry::Unencrypted
)) {
1763 writeObject(&obj
, outStr
, NULL
, cryptRC4
, 0, 0, 0);
1765 writeObject(&obj
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
1767 writeObjectFooter(outStr
);
1768 xRef
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
1772 return objectsCount
;
1775 #ifndef DISABLE_OUTLINE
1776 Outline
*PDFDoc::getOutline()
1781 outline
= new Outline(catalog
->getOutline(), xref
);
1788 PDFDoc
*PDFDoc::ErrorPDFDoc(int errorCode
, GooString
*fileNameA
)
1790 PDFDoc
*doc
= new PDFDoc();
1791 doc
->errCode
= errorCode
;
1792 doc
->fileName
= fileNameA
;
1797 long long PDFDoc::strToLongLong(char *s
) {
1802 for (p
= s
; *p
&& isdigit(*p
& 0xff); ++p
) {
1804 if (x
> (LLONG_MAX
- d
) / 10) {
1812 // Read the 'startxref' position.
1813 Goffset
PDFDoc::getStartXRef(GBool tryingToReconstruct
)
1815 if (startXRefPos
== -1) {
1817 if (isLinearized(tryingToReconstruct
)) {
1818 char buf
[linearizationSearchSize
+1];
1822 for (n
= 0; n
< linearizationSearchSize
; ++n
) {
1823 if ((c
= str
->getChar()) == EOF
) {
1830 // find end of first obj (linearization dictionary)
1832 for (i
= 0; i
< n
; i
++) {
1833 if (!strncmp("endobj", &buf
[i
], 6)) {
1836 while (buf
[i
] && Lexer::isSpace(buf
[i
])) ++i
;
1842 char buf
[xrefSearchSize
+1];
1846 // read last xrefSearchSize bytes
1848 int maxXRefSearch
= 24576;
1849 if (str
->getLength() < maxXRefSearch
) maxXRefSearch
= str
->getLength();
1850 for (; (xrefSearchSize
- 16) * segnum
< maxXRefSearch
; segnum
++) {
1851 str
->setPos((xrefSearchSize
- 16) * segnum
+ xrefSearchSize
, -1);
1852 for (n
= 0; n
< xrefSearchSize
; ++n
) {
1853 if ((c
= str
->getChar()) == EOF
) {
1861 for (i
= n
- 9; i
>= 0; --i
) {
1862 if (!strncmp(&buf
[i
], "startxref", 9)) {
1869 for (p
= &buf
[i
+ 9]; isspace(*p
); ++p
);
1870 startXRefPos
= strToLongLong(p
);
1878 return startXRefPos
;
1881 Goffset
PDFDoc::getMainXRefEntriesOffset(GBool tryingToReconstruct
)
1883 Guint mainXRefEntriesOffset
= 0;
1885 if (isLinearized(tryingToReconstruct
)) {
1886 mainXRefEntriesOffset
= getLinearization()->getMainXRefEntriesOffset();
1889 return mainXRefEntriesOffset
;
1892 int PDFDoc::getNumPages()
1894 if (isLinearized()) {
1896 if ((n
= getLinearization()->getNumPages())) {
1901 return catalog
->getNumPages();
1904 Page
*PDFDoc::parsePage(int page
)
1911 pageRef
.num
= getHints()->getPageObjectNum(page
);
1913 error(errSyntaxWarning
, -1, "Failed to get object num from hint tables for page {0:d}", page
);
1917 // check for bogus ref - this can happen in corrupted PDF files
1918 if (pageRef
.num
< 0 || pageRef
.num
>= xref
->getNumObjects()) {
1919 error(errSyntaxWarning
, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef
.num
, page
);
1923 pageRef
.gen
= xref
->getEntry(pageRef
.num
)->gen
;
1924 xref
->fetch(pageRef
.num
, pageRef
.gen
, &obj
);
1925 if (!obj
.isDict("Page")) {
1927 error(errSyntaxWarning
, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef
.num
, pageRef
.gen
);
1930 pageDict
= obj
.getDict();
1932 p
= new Page(this, page
, pageDict
, pageRef
,
1933 new PageAttrs(NULL
, pageDict
), catalog
->getForm());
1939 Page
*PDFDoc::getPage(int page
)
1941 if ((page
< 1) || page
> getNumPages()) return NULL
;
1943 if (isLinearized()) {
1946 pageCache
= (Page
**) gmallocn(getNumPages(), sizeof(Page
*));
1947 for (int i
= 0; i
< getNumPages(); i
++) {
1948 pageCache
[i
] = NULL
;
1951 if (!pageCache
[page
-1]) {
1952 pageCache
[page
-1] = parsePage(page
);
1954 if (pageCache
[page
-1]) {
1955 return pageCache
[page
-1];
1957 error(errSyntaxWarning
, -1, "Failed parsing page {0:d} using hint tables", page
);
1961 return catalog
->getPage(page
);