1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
9 //========================================================================
11 // Modified under the Poppler project - http://poppler.freedesktop.org
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
16 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
17 // Copyright (C) 2005, 2007-2009, 2011-2014 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
19 // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
20 // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
21 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
22 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
23 // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
24 // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
25 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
26 // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
27 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
28 // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
29 // Copyright (C) 2011-2015 Thomas Freitag <Thomas.Freitag@alfa.de>
30 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
31 // Copyright (C) 2013, 2014 Adrian Johnson <ajohnson@redneon.com>
32 // Copyright (C) 2013 Adam Reichold <adamreichold@myopera.com>
33 // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
34 // Copyright (C) 2015 Li Junling <lijunling@sina.com>
36 // To see a description of the changes please see the Changelog file that
37 // came with your tarball or type make ChangeLog if you are building from git
39 //========================================================================
43 #ifdef USE_GCC_PRAGMAS
44 #pragma implementation
56 #include "goo/gstrtod.h"
57 #include "goo/GooString.h"
58 #include "goo/gfile.h"
59 #include "poppler-config.h"
60 #include "GlobalParams.h"
65 #include "Linearization.h"
67 #include "OutputDev.h"
69 #include "ErrorCodes.h"
72 #include "SecurityHandler.h"
74 #ifndef DISABLE_OUTLINE
81 # define pdfdocLocker() MutexLocker locker(&mutex)
83 # define pdfdocLocker()
86 //------------------------------------------------------------------------
88 #define headerSearchSize 1024 // read this many bytes at beginning of
89 // file to look for '%PDF'
90 #define pdfIdLength 32 // PDF Document IDs (PermanentId, UpdateId) length
92 #define linearizationSearchSize 1024 // read this many bytes at beginning of
93 // file to look for linearization
96 #define xrefSearchSize 1024 // read this many bytes at end of file
97 // to look for 'startxref'
99 //------------------------------------------------------------------------
101 //------------------------------------------------------------------------
114 linearization
= NULL
;
117 #ifndef DISABLE_OUTLINE
130 PDFDoc::PDFDoc(GooString
*fileNameA
, GooString
*ownerPassword
,
131 GooString
*userPassword
, void *guiDataA
) {
139 fileName
= fileNameA
;
142 n
= fileName
->getLength();
143 fileNameU
= (wchar_t *)gmallocn(n
+ 1, sizeof(wchar_t));
144 for (i
= 0; i
< n
; ++i
) {
145 fileNameU
[i
] = (wchar_t)(fileName
->getChar(i
) & 0xff);
147 fileNameU
[n
] = L
'\0';
151 file
= GooFile::open(fileName
);
153 // fopen() has failed.
154 // Keep a copy of the errno returned by fopen so that it can be
155 // referred to later.
157 error(errIO
, -1, "Couldn't open file '{0:t}': {1:s}.", fileName
, strerror(errno
));
158 errCode
= errOpenFile
;
164 str
= new FileStream(file
, 0, gFalse
, file
->size(), &obj
);
166 ok
= setup(ownerPassword
, userPassword
);
170 PDFDoc::PDFDoc(wchar_t *fileNameA
, int fileNameLen
, GooString
*ownerPassword
,
171 GooString
*userPassword
, void *guiDataA
) {
172 OSVERSIONINFO version
;
180 // save both Unicode and 8-bit copies of the file name
181 fileName
= new GooString();
182 fileNameU
= (wchar_t *)gmallocn(fileNameLen
+ 1, sizeof(wchar_t));
183 for (i
= 0; i
< fileNameLen
; ++i
) {
184 fileName
->append((char)fileNameA
[i
]);
185 fileNameU
[i
] = fileNameA
[i
];
187 fileNameU
[fileNameLen
] = L
'\0';
190 // NB: _wfopen is only available in NT
191 version
.dwOSVersionInfoSize
= sizeof(version
);
192 GetVersionEx(&version
);
193 if (version
.dwPlatformId
== VER_PLATFORM_WIN32_NT
) {
194 file
= GooFile::open(fileNameU
);
196 file
= GooFile::open(fileName
);
199 error(errIO
, -1, "Couldn't open file '{0:t}'", fileName
);
200 errCode
= errOpenFile
;
206 str
= new FileStream(file
, 0, gFalse
, file
->size(), &obj
);
208 ok
= setup(ownerPassword
, userPassword
);
212 PDFDoc::PDFDoc(BaseStream
*strA
, GooString
*ownerPassword
,
213 GooString
*userPassword
, void *guiDataA
) {
220 if (strA
->getFileName()) {
221 fileName
= strA
->getFileName()->copy();
223 n
= fileName
->getLength();
224 fileNameU
= (wchar_t *)gmallocn(n
+ 1, sizeof(wchar_t));
225 for (i
= 0; i
< n
; ++i
) {
226 fileNameU
[i
] = (wchar_t)(fileName
->getChar(i
) & 0xff);
228 fileNameU
[n
] = L
'\0';
237 ok
= setup(ownerPassword
, userPassword
);
240 GBool
PDFDoc::setup(GooString
*ownerPassword
, GooString
*userPassword
) {
243 if (str
->getPos() < 0)
245 error(errSyntaxError
, -1, "Document base stream is not seekable");
252 // Adobe does not seem to enforce %%EOF, so we do the same
253 // if (!checkFooter()) return gFalse;
258 GBool wasReconstructed
= false;
261 xref
= new XRef(str
, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed
);
263 if (wasReconstructed
) {
266 xref
= new XRef(str
, getStartXRef(gTrue
), getMainXRefEntriesOffset(gTrue
), &wasReconstructed
);
269 error(errSyntaxError
, -1, "Couldn't read xref table");
270 errCode
= xref
->getErrorCode();
275 // check for encryption
276 if (!checkEncryption(ownerPassword
, userPassword
)) {
277 errCode
= errEncrypted
;
282 catalog
= new Catalog(this);
283 if (catalog
&& !catalog
->isOk()) {
284 if (!wasReconstructed
)
286 // try one more time to contruct the Catalog, maybe the problem is damaged XRef
289 xref
= new XRef(str
, 0, 0, NULL
, true);
290 catalog
= new Catalog(this);
293 if (catalog
&& !catalog
->isOk()) {
294 error(errSyntaxError
, -1, "Couldn't read page catalog");
295 errCode
= errBadCatalog
;
306 for (int i
= 0; i
< getNumPages(); i
++) {
314 #ifndef DISABLE_OUTLINE
329 delete linearization
;
346 gDestroyMutex(&mutex
);
351 // Check for a %%EOF at the end of this stream
352 GBool
PDFDoc::checkFooter() {
353 // we look in the last 1024 chars because Adobe does the same
354 char *eof
= new char[1025];
355 Goffset pos
= str
->getPos();
356 str
->setPos(1024, -1);
358 for (i
= 0; i
< 1024; i
++)
368 for (i
= i
- 5; i
>= 0; i
--) {
369 if (strncmp (&eof
[i
], "%%EOF", 5) == 0) {
376 error(errSyntaxError
, -1, "Document has not the mandatory ending %%EOF");
377 errCode
= errDamaged
;
386 // Check for a PDF header on this stream. Skip past some garbage
388 void PDFDoc::checkHeader() {
389 char hdrBuf
[headerSearchSize
+1];
396 for (i
= 0; i
< headerSearchSize
; ++i
) {
397 hdrBuf
[i
] = str
->getChar();
399 hdrBuf
[headerSearchSize
] = '\0';
400 for (i
= 0; i
< headerSearchSize
- 5; ++i
) {
401 if (!strncmp(&hdrBuf
[i
], "%PDF-", 5)) {
405 if (i
>= headerSearchSize
- 5) {
406 error(errSyntaxWarning
, -1, "May not be a PDF file (continuing anyway)");
410 if (!(p
= strtok_r(&hdrBuf
[i
+5], " \t\n\r", &tokptr
))) {
411 error(errSyntaxWarning
, -1, "May not be a PDF file (continuing anyway)");
414 sscanf(p
, "%d.%d", &pdfMajorVersion
, &pdfMinorVersion
);
415 // We don't do the version check. Don't add it back in.
418 GBool
PDFDoc::checkEncryption(GooString
*ownerPassword
, GooString
*userPassword
) {
423 xref
->getTrailerDict()->dictLookup("Encrypt", &encrypt
);
424 if ((encrypted
= encrypt
.isDict())) {
425 if ((secHdlr
= SecurityHandler::make(this, &encrypt
))) {
426 if (secHdlr
->isUnencrypted()) {
429 } else if (secHdlr
->checkEncryption(ownerPassword
, userPassword
)) {
430 // authorization succeeded
431 xref
->setEncryption(secHdlr
->getPermissionFlags(),
432 secHdlr
->getOwnerPasswordOk(),
433 secHdlr
->getFileKey(),
434 secHdlr
->getFileKeyLength(),
435 secHdlr
->getEncVersion(),
436 secHdlr
->getEncRevision(),
437 secHdlr
->getEncAlgorithm());
440 // authorization failed
444 // couldn't find the matching security handler
448 // document is not encrypted
455 void PDFDoc::displayPage(OutputDev
*out
, int page
,
456 double hDPI
, double vDPI
, int rotate
,
457 GBool useMediaBox
, GBool crop
, GBool printing
,
458 GBool (*abortCheckCbk
)(void *data
),
459 void *abortCheckCbkData
,
460 GBool (*annotDisplayDecideCbk
)(Annot
*annot
, void *user_data
),
461 void *annotDisplayDecideCbkData
, GBool copyXRef
) {
462 if (globalParams
->getPrintCommands()) {
463 printf("***** page %d *****\n", page
);
467 getPage(page
)->display(out
, hDPI
, vDPI
,
468 rotate
, useMediaBox
, crop
, printing
,
469 abortCheckCbk
, abortCheckCbkData
,
470 annotDisplayDecideCbk
, annotDisplayDecideCbkData
, copyXRef
);
474 void PDFDoc::displayPages(OutputDev
*out
, int firstPage
, int lastPage
,
475 double hDPI
, double vDPI
, int rotate
,
476 GBool useMediaBox
, GBool crop
, GBool printing
,
477 GBool (*abortCheckCbk
)(void *data
),
478 void *abortCheckCbkData
,
479 GBool (*annotDisplayDecideCbk
)(Annot
*annot
, void *user_data
),
480 void *annotDisplayDecideCbkData
) {
483 for (page
= firstPage
; page
<= lastPage
; ++page
) {
484 displayPage(out
, page
, hDPI
, vDPI
, rotate
, useMediaBox
, crop
, printing
,
485 abortCheckCbk
, abortCheckCbkData
,
486 annotDisplayDecideCbk
, annotDisplayDecideCbkData
);
490 void PDFDoc::displayPageSlice(OutputDev
*out
, int page
,
491 double hDPI
, double vDPI
, int rotate
,
492 GBool useMediaBox
, GBool crop
, GBool printing
,
493 int sliceX
, int sliceY
, int sliceW
, int sliceH
,
494 GBool (*abortCheckCbk
)(void *data
),
495 void *abortCheckCbkData
,
496 GBool (*annotDisplayDecideCbk
)(Annot
*annot
, void *user_data
),
497 void *annotDisplayDecideCbkData
, GBool copyXRef
) {
499 getPage(page
)->displaySlice(out
, hDPI
, vDPI
,
500 rotate
, useMediaBox
, crop
,
501 sliceX
, sliceY
, sliceW
, sliceH
,
503 abortCheckCbk
, abortCheckCbkData
,
504 annotDisplayDecideCbk
, annotDisplayDecideCbkData
, copyXRef
);
507 Links
*PDFDoc::getLinks(int page
) {
508 Page
*p
= getPage(page
);
510 return new Links (NULL
);
512 return p
->getLinks();
515 void PDFDoc::processLinks(OutputDev
*out
, int page
) {
517 getPage(page
)->processLinks(out
);
520 Linearization
*PDFDoc::getLinearization()
522 if (!linearization
) {
523 linearization
= new Linearization(str
);
525 return linearization
;
528 GBool
PDFDoc::isLinearized(GBool tryingToReconstruct
) {
529 if ((str
->getLength()) &&
530 (getLinearization()->getLength() == str
->getLength()))
533 if (tryingToReconstruct
)
534 return getLinearization()->getLength() > 0;
541 get_id (GooString
*encodedidstring
, GooString
*id
) {
542 const char *encodedid
= encodedidstring
->getCString();
543 char pdfid
[pdfIdLength
+ 1];
546 if (encodedidstring
->getLength() != pdfIdLength
/ 2)
549 n
= sprintf(pdfid
, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
550 encodedid
[0] & 0xff, encodedid
[1] & 0xff, encodedid
[2] & 0xff, encodedid
[3] & 0xff,
551 encodedid
[4] & 0xff, encodedid
[5] & 0xff, encodedid
[6] & 0xff, encodedid
[7] & 0xff,
552 encodedid
[8] & 0xff, encodedid
[9] & 0xff, encodedid
[10] & 0xff, encodedid
[11] & 0xff,
553 encodedid
[12] & 0xff, encodedid
[13] & 0xff, encodedid
[14] & 0xff, encodedid
[15] & 0xff);
554 if (n
!= pdfIdLength
)
557 id
->Set(pdfid
, pdfIdLength
);
561 GBool
PDFDoc::getID(GooString
*permanent_id
, GooString
*update_id
) {
563 xref
->getTrailerDict()->dictLookup ("ID", &obj
);
565 if (obj
.isArray() && obj
.arrayGetLength() == 2) {
569 if (obj
.arrayGet(0, &obj2
)->isString()) {
570 if (!get_id (obj2
.getString(), permanent_id
)) {
575 error(errSyntaxError
, -1, "Invalid permanent ID");
583 if (obj
.arrayGet(1, &obj2
)->isString()) {
584 if (!get_id (obj2
.getString(), update_id
)) {
589 error(errSyntaxError
, -1, "Invalid update ID");
605 Hints
*PDFDoc::getHints()
607 if (!hints
&& isLinearized()) {
608 hints
= new Hints(str
, getLinearization(), getXRef(), secHdlr
);
614 int PDFDoc::savePageAs(GooString
*name
, int pageNo
)
618 XRef
*yRef
, *countRef
;
619 int rootNum
= getXRef()->getNumObjects() + 1;
621 // Make sure that special flags are set, because we are going to read
622 // all objects, including Unencrypted ones.
623 xref
->scanSpecialFlags();
626 CryptAlgorithm encAlgorithm
;
628 xref
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
630 if (pageNo
< 1 || pageNo
> getNumPages() || !getCatalog()->getPage(pageNo
)) {
631 error(errInternal
, -1, "Illegal pageNo: {0:d}({1:d})", pageNo
, getNumPages() );
634 PDFRectangle
*cropBox
= NULL
;
635 if (getCatalog()->getPage(pageNo
)->isCropped()) {
636 cropBox
= getCatalog()->getPage(pageNo
)->getCropBox();
638 replacePageDict(pageNo
,
639 getCatalog()->getPage(pageNo
)->getRotate(),
640 getCatalog()->getPage(pageNo
)->getMediaBox(),
642 Ref
*refPage
= getCatalog()->getPageRef(pageNo
);
644 getXRef()->fetch(refPage
->num
, refPage
->gen
, &page
);
646 if (!(f
= fopen(name
->getCString(), "wb"))) {
647 error(errIO
, -1, "Couldn't open file '{0:t}'", name
);
650 outStr
= new FileOutStream(f
,0);
652 yRef
= new XRef(getXRef()->getTrailerDict());
654 if (secHdlr
!= NULL
&& !secHdlr
->isUnencrypted()) {
655 yRef
->setEncryption(secHdlr
->getPermissionFlags(),
656 secHdlr
->getOwnerPasswordOk(), fileKey
, keyLength
, secHdlr
->getEncVersion(), secHdlr
->getEncRevision(), encAlgorithm
);
658 countRef
= new XRef();
659 Object
*trailerObj
= getXRef()->getTrailerDict();
660 if (trailerObj
->isDict()) {
661 markPageObjects(trailerObj
->getDict(), yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
663 yRef
->add(0, 65535, 0, gFalse
);
664 writeHeader(outStr
, getPDFMajorVersion(), getPDFMinorVersion());
666 // get and mark info dict
668 getXRef()->getDocInfo(&infoObj
);
669 if (infoObj
.isDict()) {
670 Dict
*infoDict
= infoObj
.getDict();
671 markPageObjects(infoDict
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
672 if (trailerObj
->isDict()) {
673 Dict
*trailerDict
= trailerObj
->getDict();
675 trailerDict
->lookupNF("Info", &ref
);
677 yRef
->add(ref
.getRef().num
, ref
.getRef().gen
, 0, gTrue
);
678 if (getXRef()->getEntry(ref
.getRef().num
)->type
== xrefEntryCompressed
) {
679 yRef
->getEntry(ref
.getRef().num
)->type
= xrefEntryCompressed
;
687 // get and mark output intents etc.
688 Object catObj
, pagesObj
, resourcesObj
, annotsObj
, afObj
;
689 getXRef()->getCatalog(&catObj
);
690 Dict
*catDict
= catObj
.getDict();
691 catDict
->lookup("Pages", &pagesObj
);
692 catDict
->lookupNF("AcroForm", &afObj
);
693 if (!afObj
.isNull()) {
694 markAcroForm(&afObj
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
697 Dict
*pagesDict
= pagesObj
.getDict();
698 pagesDict
->lookup("Resources", &resourcesObj
);
699 if (resourcesObj
.isDict())
700 markPageObjects(resourcesObj
.getDict(), yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
701 markPageObjects(catDict
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
703 Dict
*pageDict
= page
.getDict();
704 markPageObjects(pageDict
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
705 pageDict
->lookupNF("Annots", &annotsObj
);
706 if (!annotsObj
.isNull()) {
707 markAnnotations(&annotsObj
, yRef
, countRef
, 0, refPage
->num
, rootNum
+ 2);
710 yRef
->markUnencrypted();
711 writePageObjects(outStr
, yRef
, 0);
713 yRef
->add(rootNum
,0,outStr
->getPos(),gTrue
);
714 outStr
->printf("%d 0 obj\n", rootNum
);
715 outStr
->printf("<< /Type /Catalog /Pages %d 0 R", rootNum
+ 1);
716 for (int j
= 0; j
< catDict
->getLength(); j
++) {
717 const char *key
= catDict
->getKey(j
);
718 if (strcmp(key
, "Type") != 0 &&
719 strcmp(key
, "Catalog") != 0 &&
720 strcmp(key
, "Pages") != 0)
722 if (j
> 0) outStr
->printf(" ");
723 Object value
; catDict
->getValNF(j
, &value
);
724 outStr
->printf("/%s ", key
);
725 writeObject(&value
, outStr
, getXRef(), 0, NULL
, cryptRC4
, 0, 0, 0);
731 outStr
->printf(">>\nendobj\n");
733 yRef
->add(rootNum
+ 1,0,outStr
->getPos(),gTrue
);
734 outStr
->printf("%d 0 obj\n", rootNum
+ 1);
735 outStr
->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum
+ 2);
736 if (resourcesObj
.isDict()) {
737 outStr
->printf("/Resources ");
738 writeObject(&resourcesObj
, outStr
, getXRef(), 0, NULL
, cryptRC4
, 0, 0, 0);
741 outStr
->printf(">>\n");
742 outStr
->printf("endobj\n");
744 yRef
->add(rootNum
+ 2,0,outStr
->getPos(),gTrue
);
745 outStr
->printf("%d 0 obj\n", rootNum
+ 2);
746 outStr
->printf("<< ");
747 for (int n
= 0; n
< pageDict
->getLength(); n
++) {
748 if (n
> 0) outStr
->printf(" ");
749 const char *key
= pageDict
->getKey(n
);
750 Object value
; pageDict
->getValNF(n
, &value
);
751 if (strcmp(key
, "Parent") == 0) {
752 outStr
->printf("/Parent %d 0 R", rootNum
+ 1);
754 outStr
->printf("/%s ", key
);
755 writeObject(&value
, outStr
, getXRef(), 0, NULL
, cryptRC4
, 0, 0, 0);
759 outStr
->printf(" >>\nendobj\n");
762 Goffset uxrefOffset
= outStr
->getPos();
766 Dict
*trailerDict
= createTrailerDict(rootNum
+ 3, gFalse
, 0, &ref
, getXRef(),
767 name
->getCString(), uxrefOffset
);
768 writeXRefTableTrailer(trailerDict
, yRef
, gFalse
/* do not write unnecessary entries */,
769 uxrefOffset
, outStr
, getXRef());
781 int PDFDoc::saveAs(GooString
*name
, PDFWriteMode mode
) {
786 if (!(f
= fopen(name
->getCString(), "wb"))) {
787 error(errIO
, -1, "Couldn't open file '{0:t}'", name
);
790 outStr
= new FileOutStream(f
,0);
791 res
= saveAs(outStr
, mode
);
797 int PDFDoc::saveAs(OutStream
*outStr
, PDFWriteMode mode
) {
799 // find if we have updated objects
800 GBool updated
= gFalse
;
801 for(int i
=0; i
<xref
->getNumObjects(); i
++) {
802 if (xref
->getEntry(i
)->getFlag(XRefEntry::Updated
)) {
808 if (!updated
&& mode
== writeStandard
) {
809 // simply copy the original file
810 saveWithoutChangesAs (outStr
);
811 } else if (mode
== writeForceRewrite
) {
812 saveCompleteRewrite(outStr
);
814 saveIncrementalUpdate(outStr
);
820 int PDFDoc::saveWithoutChangesAs(GooString
*name
) {
825 if (!(f
= fopen(name
->getCString(), "wb"))) {
826 error(errIO
, -1, "Couldn't open file '{0:t}'", name
);
830 outStr
= new FileOutStream(f
,0);
831 res
= saveWithoutChangesAs(outStr
);
839 int PDFDoc::saveWithoutChangesAs(OutStream
*outStr
) {
842 BaseStream
*copyStr
= str
->copy();
844 while ((c
= copyStr
->getChar()) != EOF
) {
853 void PDFDoc::saveIncrementalUpdate (OutStream
* outStr
)
857 //copy the original file
858 BaseStream
*copyStr
= str
->copy();
860 while ((c
= copyStr
->getChar()) != EOF
) {
867 CryptAlgorithm encAlgorithm
;
869 xref
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
872 uxref
->add(0, 65535, 0, gFalse
);
874 for(int i
=0; i
<xref
->getNumObjects(); i
++) {
875 if ((xref
->getEntry(i
)->type
== xrefEntryFree
) &&
876 (xref
->getEntry(i
)->gen
== 0)) //we skip the irrelevant free objects
879 if (xref
->getEntry(i
)->getFlag(XRefEntry::Updated
)) { //we have an updated object
882 ref
.gen
= xref
->getEntry(i
)->type
== xrefEntryCompressed
? 0 : xref
->getEntry(i
)->gen
;
883 if (xref
->getEntry(i
)->type
!= xrefEntryFree
) {
885 xref
->fetch(ref
.num
, ref
.gen
, &obj1
, 1);
886 Goffset offset
= writeObjectHeader(&ref
, outStr
);
887 writeObject(&obj1
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
888 writeObjectFooter(outStr
);
889 uxref
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
892 uxref
->add(ref
.num
, ref
.gen
, 0, gFalse
);
897 if (uxref
->getNumObjects() == 0) { //we have nothing to update
902 Goffset uxrefOffset
= outStr
->getPos();
903 int numobjects
= xref
->getNumObjects();
904 const char *fileNameA
= fileName
? fileName
->getCString() : NULL
;
905 Ref rootRef
, uxrefStreamRef
;
906 rootRef
.num
= getXRef()->getRootNum();
907 rootRef
.gen
= getXRef()->getRootGen();
909 // Output a xref stream if there is a xref stream already
910 GBool xRefStream
= xref
->isXRefStream();
913 // Append an entry for the xref stream itself
914 uxrefStreamRef
.num
= numobjects
++;
915 uxrefStreamRef
.gen
= 0;
916 uxref
->add(uxrefStreamRef
.num
, uxrefStreamRef
.gen
, uxrefOffset
, gTrue
);
919 Dict
*trailerDict
= createTrailerDict(numobjects
, gTrue
, getStartXRef(), &rootRef
, getXRef(), fileNameA
, uxrefOffset
);
921 writeXRefStreamTrailer(trailerDict
, uxref
, &uxrefStreamRef
, uxrefOffset
, outStr
, getXRef());
923 writeXRefTableTrailer(trailerDict
, uxref
, gFalse
, uxrefOffset
, outStr
, getXRef());
930 void PDFDoc::saveCompleteRewrite (OutStream
* outStr
)
932 // Make sure that special flags are set, because we are going to read
933 // all objects, including Unencrypted ones.
934 xref
->scanSpecialFlags();
937 CryptAlgorithm encAlgorithm
;
939 xref
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
941 outStr
->printf("%%PDF-%d.%d\r\n",pdfMajorVersion
,pdfMinorVersion
);
942 XRef
*uxref
= new XRef();
943 uxref
->add(0, 65535, 0, gFalse
);
945 for(int i
=0; i
<xref
->getNumObjects(); i
++) {
948 XRefEntryType type
= xref
->getEntry(i
)->type
;
949 if (type
== xrefEntryFree
) {
951 ref
.gen
= xref
->getEntry(i
)->gen
;
952 /* the XRef class adds a lot of irrelevant free entries, we only want the significant one
953 and we don't want the one with num=0 because it has already been added (gen = 65535)*/
954 if (ref
.gen
> 0 && ref
.num
> 0)
955 uxref
->add(ref
.num
, ref
.gen
, 0, gFalse
);
956 } else if (xref
->getEntry(i
)->getFlag(XRefEntry::DontRewrite
)) {
957 // This entry must not be written, put a free entry instead (with incremented gen)
959 ref
.gen
= xref
->getEntry(i
)->gen
+ 1;
960 uxref
->add(ref
.num
, ref
.gen
, 0, gFalse
);
961 } else if (type
== xrefEntryUncompressed
){
963 ref
.gen
= xref
->getEntry(i
)->gen
;
964 xref
->fetch(ref
.num
, ref
.gen
, &obj1
, 1);
965 Goffset offset
= writeObjectHeader(&ref
, outStr
);
966 // Write unencrypted objects in unencrypted form
967 if (xref
->getEntry(i
)->getFlag(XRefEntry::Unencrypted
)) {
968 writeObject(&obj1
, outStr
, NULL
, cryptRC4
, 0, 0, 0);
970 writeObject(&obj1
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
972 writeObjectFooter(outStr
);
973 uxref
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
975 } else if (type
== xrefEntryCompressed
) {
977 ref
.gen
= 0; //compressed entries have gen == 0
978 xref
->fetch(ref
.num
, ref
.gen
, &obj1
, 1);
979 Goffset offset
= writeObjectHeader(&ref
, outStr
);
980 writeObject(&obj1
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
981 writeObjectFooter(outStr
);
982 uxref
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
987 Goffset uxrefOffset
= outStr
->getPos();
988 writeXRefTableTrailer(uxrefOffset
, uxref
, gTrue
/* write all entries */,
989 uxref
->getNumObjects(), outStr
, gFalse
/* complete rewrite */);
993 void PDFDoc::writeDictionnary (Dict
* dict
, OutStream
* outStr
, XRef
*xRef
, Guint numOffset
, Guchar
*fileKey
,
994 CryptAlgorithm encAlgorithm
, int keyLength
, int objNum
, int objGen
)
997 outStr
->printf("<<");
998 for (int i
=0; i
<dict
->getLength(); i
++) {
999 GooString
keyName(dict
->getKey(i
));
1000 GooString
*keyNameToPrint
= keyName
.sanitizedName(gFalse
/* non ps mode */);
1001 outStr
->printf("/%s ", keyNameToPrint
->getCString());
1002 delete keyNameToPrint
;
1003 writeObject(dict
->getValNF(i
, &obj1
), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1006 outStr
->printf(">> ");
1009 void PDFDoc::writeStream (Stream
* str
, OutStream
* outStr
)
1011 outStr
->printf("stream\r\n");
1013 for (int c
=str
->getChar(); c
!= EOF
; c
=str
->getChar()) {
1014 outStr
->printf("%c", c
);
1016 outStr
->printf("\r\nendstream\r\n");
1019 void PDFDoc::writeRawStream (Stream
* str
, OutStream
* outStr
)
1022 str
->getDict()->lookup("Length", &obj1
);
1023 if (!obj1
.isInt() && !obj1
.isInt64()) {
1024 error (errSyntaxError
, -1, "PDFDoc::writeRawStream, no Length in stream dict");
1030 length
= obj1
.getInt();
1032 length
= obj1
.getInt64();
1035 outStr
->printf("stream\r\n");
1036 str
->unfilteredReset();
1037 for (Goffset i
= 0; i
< length
; i
++) {
1038 int c
= str
->getUnfilteredChar();
1039 if (unlikely(c
== EOF
)) {
1040 error (errSyntaxError
, -1, "PDFDoc::writeRawStream: EOF reading stream");
1043 outStr
->printf("%c", c
);
1046 outStr
->printf("\r\nendstream\r\n");
1049 void PDFDoc::writeString (GooString
* s
, OutStream
* outStr
, Guchar
*fileKey
,
1050 CryptAlgorithm encAlgorithm
, int keyLength
, int objNum
, int objGen
)
1052 // Encrypt string if encryption is enabled
1053 GooString
*sEnc
= NULL
;
1056 EncryptStream
*enc
= new EncryptStream(new MemStream(s
->getCString(), 0, s
->getLength(), obj
.initNull()),
1057 fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1058 sEnc
= new GooString();
1061 while ((c
= enc
->getChar()) != EOF
) {
1062 sEnc
->append((char)c
);
1070 if (s
->hasUnicodeMarker()) {
1071 //unicode string don't necessary end with \0
1072 const char* c
= s
->getCString();
1073 outStr
->printf("(");
1074 for(int i
=0; i
<s
->getLength(); i
++) {
1075 char unescaped
= *(c
+i
)&0x000000ff;
1077 if (unescaped
== '(' || unescaped
== ')' || unescaped
== '\\')
1078 outStr
->printf("%c", '\\');
1079 outStr
->printf("%c", unescaped
);
1081 outStr
->printf(") ");
1083 const char* c
= s
->getCString();
1084 outStr
->printf("(");
1085 for(int i
=0; i
<s
->getLength(); i
++) {
1086 char unescaped
= *(c
+i
)&0x000000ff;
1088 if (unescaped
== '\r')
1089 outStr
->printf("\\r");
1090 else if (unescaped
== '\n')
1091 outStr
->printf("\\n");
1093 if (unescaped
== '(' || unescaped
== ')' || unescaped
== '\\') {
1094 outStr
->printf("%c", '\\');
1096 outStr
->printf("%c", unescaped
);
1099 outStr
->printf(") ");
1105 Goffset
PDFDoc::writeObjectHeader (Ref
*ref
, OutStream
* outStr
)
1107 Goffset offset
= outStr
->getPos();
1108 outStr
->printf("%i %i obj ", ref
->num
, ref
->gen
);
1112 void PDFDoc::writeObject (Object
* obj
, OutStream
* outStr
, XRef
*xRef
, Guint numOffset
, Guchar
*fileKey
,
1113 CryptAlgorithm encAlgorithm
, int keyLength
, int objNum
, int objGen
)
1119 switch (obj
->getType()) {
1121 outStr
->printf("%s ", obj
->getBool()?"true":"false");
1124 outStr
->printf("%i ", obj
->getInt());
1127 outStr
->printf("%lli ", obj
->getInt64());
1132 s
.appendf("{0:.10g}", obj
->getReal());
1133 outStr
->printf("%s ", s
.getCString());
1137 writeString(obj
->getString(), outStr
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1141 GooString
name(obj
->getName());
1142 GooString
*nameToPrint
= name
.sanitizedName(gFalse
/* non ps mode */);
1143 outStr
->printf("/%s ", nameToPrint
->getCString());
1148 outStr
->printf( "null ");
1151 array
= obj
->getArray();
1152 outStr
->printf("[");
1153 for (int i
=0; i
<array
->getLength(); i
++) {
1154 writeObject(array
->getNF(i
, &obj1
), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1157 outStr
->printf("] ");
1160 writeDictionnary (obj
->getDict(), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1164 //We can't modify stream with the current implementation (no write functions in Stream API)
1165 // => the only type of streams which that have been modified are internal streams (=strWeird)
1166 Stream
*stream
= obj
->getStream();
1167 if (stream
->getKind() == strWeird
|| stream
->getKind() == strCrypt
) {
1168 //we write the stream unencoded => TODO: write stream encoder
1171 EncryptStream
*encStream
= NULL
;
1172 GBool removeFilter
= gTrue
;
1173 if (stream
->getKind() == strWeird
&& fileKey
) {
1175 stream
->getDict()->lookup("Filter", &filter
);
1176 if (!filter
.isName("Crypt")) {
1177 if (filter
.isArray()) {
1178 for (int i
= 0; i
< filter
.arrayGetLength(); i
++) {
1180 filter
.arrayGet(i
, &filterEle
);
1181 if (filterEle
.isName("Crypt")) {
1183 removeFilter
= gFalse
;
1189 encStream
= new EncryptStream(stream
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1190 encStream
->setAutoDelete(gFalse
);
1194 encStream
= new EncryptStream(stream
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1195 encStream
->setAutoDelete(gFalse
);
1199 removeFilter
= gFalse
;
1202 } else if (fileKey
!= NULL
) { // Encrypt stream
1203 encStream
= new EncryptStream(stream
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1204 encStream
->setAutoDelete(gFalse
);
1209 //recalculate stream length
1211 for (int c
=stream
->getChar(); c
!=EOF
; c
=stream
->getChar()) {
1214 obj1
.initInt64(tmp
);
1215 stream
->getDict()->set("Length", &obj1
);
1217 //Remove Stream encoding
1219 stream
->getDict()->remove("Filter");
1221 stream
->getDict()->remove("DecodeParms");
1223 writeDictionnary (stream
->getDict(),outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1224 writeStream (stream
,outStr
);
1229 FilterStream
*fs
= dynamic_cast<FilterStream
*>(stream
);
1231 BaseStream
*bs
= fs
->getBaseStream();
1234 if (xRef
->getStreamEnd(bs
->getStart(), &streamEnd
)) {
1236 val
.initInt64(streamEnd
- bs
->getStart());
1237 stream
->getDict()->set("Length", &val
);
1241 writeDictionnary (stream
->getDict(), outStr
, xRef
, numOffset
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
);
1242 writeRawStream (stream
, outStr
);
1247 outStr
->printf("%i %i R ", obj
->getRef().num
+ numOffset
, obj
->getRef().gen
);
1250 outStr
->printf("%s\n", obj
->getCmd());
1253 outStr
->printf("error\r\n");
1256 outStr
->printf("eof\r\n");
1259 outStr
->printf("none\r\n");
1262 error(errUnimplemented
, -1,"Unhandled objType : {0:d}, please report a bug with a testcase\r\n", obj
->getType());
1267 void PDFDoc::writeObjectFooter (OutStream
* outStr
)
1269 outStr
->printf("endobj\r\n");
1272 Dict
*PDFDoc::createTrailerDict(int uxrefSize
, GBool incrUpdate
, Goffset startxRef
,
1273 Ref
*root
, XRef
*xRef
, const char *fileName
, Goffset fileSize
)
1275 Dict
*trailerDict
= new Dict(xRef
);
1277 obj1
.initInt(uxrefSize
);
1278 trailerDict
->set("Size", &obj1
);
1281 //build a new ID, as recommended in the reference, uses:
1285 // - values of entry in information dictionnary
1288 sprintf(buffer
, "%i", (int)time(NULL
));
1289 message
.append(buffer
);
1292 message
.append(fileName
);
1294 sprintf(buffer
, "%lli", (long long)fileSize
);
1295 message
.append(buffer
);
1297 //info dict -- only use text string
1298 if (!xRef
->getTrailerDict()->isNone() && xRef
->getDocInfo(&obj1
)->isDict()) {
1299 for(int i
=0; i
<obj1
.getDict()->getLength(); i
++) {
1301 obj1
.getDict()->getVal(i
, &obj2
);
1302 if (obj2
.isString()) {
1303 message
.append(obj2
.getString());
1310 GBool hasEncrypt
= gFalse
;
1311 if (!xRef
->getTrailerDict()->isNone()) {
1313 xRef
->getTrailerDict()->dictLookupNF("Encrypt", &obj2
);
1314 if (!obj2
.isNull()) {
1315 trailerDict
->set("Encrypt", &obj2
);
1321 //calculate md5 digest
1323 md5((Guchar
*)message
.getCString(), message
.getLength(), digest
);
1324 obj1
.initString(new GooString((const char*)digest
, 16));
1327 Object obj2
,obj3
,obj5
;
1328 obj2
.initArray(xRef
);
1330 // In case of encrypted files, the ID must not be changed because it's used to calculate the key
1331 if (incrUpdate
|| hasEncrypt
) {
1333 //only update the second part of the array
1334 xRef
->getTrailerDict()->getDict()->lookup("ID", &obj4
);
1335 if (!obj4
.isArray()) {
1336 error(errSyntaxWarning
, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
1338 //Get the first part of the ID
1339 obj4
.arrayGet(0,&obj3
);
1341 obj2
.arrayAdd(&obj3
);
1342 obj2
.arrayAdd(&obj1
);
1343 trailerDict
->set("ID", &obj2
);
1347 //new file => same values for the two identifiers
1348 obj2
.arrayAdd(&obj1
);
1349 obj1
.initString(new GooString((const char*)digest
, 16));
1350 obj2
.arrayAdd(&obj1
);
1351 trailerDict
->set("ID", &obj2
);
1354 obj1
.initRef(root
->num
, root
->gen
);
1355 trailerDict
->set("Root", &obj1
);
1358 obj1
.initInt64(startxRef
);
1359 trailerDict
->set("Prev", &obj1
);
1362 if (!xRef
->getTrailerDict()->isNone()) {
1363 xRef
->getDocInfoNF(&obj5
);
1364 if (!obj5
.isNull()) {
1365 trailerDict
->set("Info", &obj5
);
1372 void PDFDoc::writeXRefTableTrailer(Dict
*trailerDict
, XRef
*uxref
, GBool writeAllEntries
, Goffset uxrefOffset
, OutStream
* outStr
, XRef
*xRef
)
1374 uxref
->writeTableToFile( outStr
, writeAllEntries
);
1375 outStr
->printf( "trailer\r\n");
1376 writeDictionnary(trailerDict
, outStr
, xRef
, 0, NULL
, cryptRC4
, 0, 0, 0);
1377 outStr
->printf( "\r\nstartxref\r\n");
1378 outStr
->printf( "%lli\r\n", uxrefOffset
);
1379 outStr
->printf( "%%%%EOF\r\n");
1382 void PDFDoc::writeXRefStreamTrailer (Dict
*trailerDict
, XRef
*uxref
, Ref
*uxrefStreamRef
, Goffset uxrefOffset
, OutStream
* outStr
, XRef
*xRef
)
1386 // Fill stmData and some trailerDict fields
1387 uxref
->writeStreamToBuffer(&stmData
, trailerDict
, xRef
);
1389 // Create XRef stream object and write it
1391 MemStream
*mStream
= new MemStream( stmData
.getCString(), 0,
1392 stmData
.getLength(), obj1
.initDict(trailerDict
) );
1393 writeObjectHeader(uxrefStreamRef
, outStr
);
1394 writeObject(obj1
.initStream(mStream
), outStr
, xRef
, 0, NULL
, cryptRC4
, 0, 0, 0);
1395 writeObjectFooter(outStr
);
1398 outStr
->printf( "startxref\r\n");
1399 outStr
->printf( "%lli\r\n", uxrefOffset
);
1400 outStr
->printf( "%%%%EOF\r\n");
1403 void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset
, XRef
*uxref
, GBool writeAllEntries
,
1404 int uxrefSize
, OutStream
* outStr
, GBool incrUpdate
)
1406 const char *fileNameA
= fileName
? fileName
->getCString() : NULL
;
1407 // file size (doesn't include the trailer)
1408 unsigned int fileSize
= 0;
1411 while ((c
= str
->getChar()) != EOF
) {
1416 ref
.num
= getXRef()->getRootNum();
1417 ref
.gen
= getXRef()->getRootGen();
1418 Dict
* trailerDict
= createTrailerDict(uxrefSize
, incrUpdate
, getStartXRef(), &ref
,
1419 getXRef(), fileNameA
, fileSize
);
1420 writeXRefTableTrailer(trailerDict
, uxref
, writeAllEntries
, uxrefOffset
, outStr
, getXRef());
1424 void PDFDoc::writeHeader(OutStream
*outStr
, int major
, int minor
)
1426 outStr
->printf("%%PDF-%d.%d\n", major
, minor
);
1427 outStr
->printf("%%\xE2\xE3\xCF\xD3\n");
1430 void PDFDoc::markDictionnary (Dict
* dict
, XRef
* xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
)
1433 for (int i
=0; i
<dict
->getLength(); i
++) {
1434 const char *key
= dict
->getKey(i
);
1435 if (strcmp(key
, "Annots") != 0) {
1436 markObject(dict
->getValNF(i
, &obj1
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1439 dict
->getValNF(i
, &annotsObj
);
1440 if (!annotsObj
.isNull()) {
1441 markAnnotations(&annotsObj
, xRef
, countRef
, 0, oldRefNum
, newRefNum
);
1449 void PDFDoc::markObject (Object
* obj
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
)
1454 switch (obj
->getType()) {
1456 array
= obj
->getArray();
1457 for (int i
=0; i
<array
->getLength(); i
++) {
1458 markObject(array
->getNF(i
, &obj1
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1463 markDictionnary (obj
->getDict(), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1467 Stream
*stream
= obj
->getStream();
1468 markDictionnary (stream
->getDict(), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1473 if (obj
->getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(obj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1474 if (getXRef()->getEntry(obj
->getRef().num
)->type
== xrefEntryFree
) {
1475 return; // already marked as free => should be replaced
1477 xRef
->add(obj
->getRef().num
+ numOffset
, obj
->getRef().gen
, 0, gTrue
);
1478 if (getXRef()->getEntry(obj
->getRef().num
)->type
== xrefEntryCompressed
) {
1479 xRef
->getEntry(obj
->getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1482 if (obj
->getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1483 countRef
->getEntry(obj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1485 countRef
->add(obj
->getRef().num
+ numOffset
, 1, 0, gTrue
);
1487 XRefEntry
*entry
= countRef
->getEntry(obj
->getRef().num
+ numOffset
);
1493 getXRef()->fetch(obj
->getRef().num
, obj
->getRef().gen
, &obj1
);
1494 markObject(&obj1
, xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1503 void PDFDoc::replacePageDict(int pageNo
, int rotate
,
1504 PDFRectangle
*mediaBox
,
1505 PDFRectangle
*cropBox
)
1507 Ref
*refPage
= getCatalog()->getPageRef(pageNo
);
1509 getXRef()->fetch(refPage
->num
, refPage
->gen
, &page
);
1510 Dict
*pageDict
= page
.getDict();
1511 pageDict
->remove("MediaBoxssdf");
1512 pageDict
->remove("MediaBox");
1513 pageDict
->remove("CropBox");
1514 pageDict
->remove("ArtBox");
1515 pageDict
->remove("BleedBox");
1516 pageDict
->remove("TrimBox");
1517 pageDict
->remove("Rotate");
1519 mediaBoxObj
.initArray(getXRef());
1521 murx
.initReal(mediaBox
->x1
);
1523 mury
.initReal(mediaBox
->y1
);
1525 mllx
.initReal(mediaBox
->x2
);
1527 mlly
.initReal(mediaBox
->y2
);
1528 mediaBoxObj
.arrayAdd(&murx
);
1529 mediaBoxObj
.arrayAdd(&mury
);
1530 mediaBoxObj
.arrayAdd(&mllx
);
1531 mediaBoxObj
.arrayAdd(&mlly
);
1532 pageDict
->add(copyString("MediaBox"), &mediaBoxObj
);
1533 if (cropBox
!= NULL
) {
1535 cropBoxObj
.initArray(getXRef());
1537 curx
.initReal(cropBox
->x1
);
1539 cury
.initReal(cropBox
->y1
);
1541 cllx
.initReal(cropBox
->x2
);
1543 clly
.initReal(cropBox
->y2
);
1544 cropBoxObj
.arrayAdd(&curx
);
1545 cropBoxObj
.arrayAdd(&cury
);
1546 cropBoxObj
.arrayAdd(&cllx
);
1547 cropBoxObj
.arrayAdd(&clly
);
1548 pageDict
->add(copyString("CropBox"), &cropBoxObj
);
1549 cropBoxObj
.getArray()->incRef();
1550 pageDict
->add(copyString("TrimBox"), &cropBoxObj
);
1552 mediaBoxObj
.getArray()->incRef();
1553 pageDict
->add(copyString("TrimBox"), &mediaBoxObj
);
1556 rotateObj
.initInt(rotate
);
1557 pageDict
->add(copyString("Rotate"), &rotateObj
);
1558 getXRef()->setModifiedObject(&page
, *refPage
);
1562 void PDFDoc::markPageObjects(Dict
*pageDict
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
)
1564 pageDict
->remove("OpenAction");
1565 pageDict
->remove("Outlines");
1566 pageDict
->remove("StructTreeRoot");
1568 for (int n
= 0; n
< pageDict
->getLength(); n
++) {
1569 const char *key
= pageDict
->getKey(n
);
1570 Object value
; pageDict
->getValNF(n
, &value
);
1571 if (strcmp(key
, "Parent") != 0 &&
1572 strcmp(key
, "Pages") != 0 &&
1573 strcmp(key
, "AcroForm") != 0 &&
1574 strcmp(key
, "Annots") != 0 &&
1575 strcmp(key
, "P") != 0 &&
1576 strcmp(key
, "Root") != 0) {
1577 markObject(&value
, xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1583 GBool
PDFDoc::markAnnotations(Object
*annotsObj
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldPageNum
, int newPageNum
) {
1585 GBool modified
= gFalse
;
1586 annotsObj
->fetch(getXRef(), &annots
);
1587 if (annots
.isArray()) {
1588 Array
*array
= annots
.getArray();
1589 for (int i
=array
->getLength() - 1; i
>= 0; i
--) {
1591 if (array
->get(i
, &obj1
)->isDict()) {
1593 Dict
*dict
= obj1
.getDict();
1594 dict
->lookup("Type", &type
);
1595 if (type
.isName() && strcmp(type
.getName(), "Annot") == 0) {
1597 if (dict
->lookupNF("P", &obj2
)->isRef()) {
1598 if (obj2
.getRef().num
== oldPageNum
) {
1600 array
->getNF(i
, &obj3
);
1602 Object
*newRef
= new Object();
1603 newRef
->initRef(newPageNum
, 0);
1604 dict
->set("P", newRef
);
1605 getXRef()->setModifiedObject(&obj1
, obj3
.getRef());
1608 } else if (obj2
.getRef().num
== newPageNum
) {
1615 getXRef()->fetch(obj2
.getRef().num
, obj2
.getRef().gen
, &page
);
1616 if (page
.isDict()) {
1618 Dict
*dict
= page
.getDict();
1619 dict
->lookup("Type", &pagetype
);
1620 if (!pagetype
.isName() || strcmp(pagetype
.getName(), "Page") != 0) {
1642 markPageObjects(dict
, xRef
, countRef
, numOffset
, oldPageNum
, newPageNum
);
1645 array
->getNF(i
, &obj1
);
1647 if (obj1
.getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(obj1
.getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1648 if (getXRef()->getEntry(obj1
.getRef().num
)->type
== xrefEntryFree
) {
1649 continue; // already marked as free => should be replaced
1651 xRef
->add(obj1
.getRef().num
+ numOffset
, obj1
.getRef().gen
, 0, gTrue
);
1652 if (getXRef()->getEntry(obj1
.getRef().num
)->type
== xrefEntryCompressed
) {
1653 xRef
->getEntry(obj1
.getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1656 if (obj1
.getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1657 countRef
->getEntry(obj1
.getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1659 countRef
->add(obj1
.getRef().num
+ numOffset
, 1, 0, gTrue
);
1661 XRefEntry
*entry
= countRef
->getEntry(obj1
.getRef().num
+ numOffset
);
1668 if (annotsObj
->isRef()) {
1669 if (annotsObj
->getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(annotsObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1670 if (getXRef()->getEntry(annotsObj
->getRef().num
)->type
== xrefEntryFree
) {
1671 return modified
; // already marked as free => should be replaced
1673 xRef
->add(annotsObj
->getRef().num
+ numOffset
, annotsObj
->getRef().gen
, 0, gTrue
);
1674 if (getXRef()->getEntry(annotsObj
->getRef().num
)->type
== xrefEntryCompressed
) {
1675 xRef
->getEntry(annotsObj
->getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1678 if (annotsObj
->getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1679 countRef
->getEntry(annotsObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1681 countRef
->add(annotsObj
->getRef().num
+ numOffset
, 1, 0, gTrue
);
1683 XRefEntry
*entry
= countRef
->getEntry(annotsObj
->getRef().num
+ numOffset
);
1686 getXRef()->setModifiedObject(&annots
, annotsObj
->getRef());
1692 void PDFDoc::markAcroForm(Object
*afObj
, XRef
*xRef
, XRef
*countRef
, Guint numOffset
, int oldRefNum
, int newRefNum
) {
1694 GBool modified
= gFalse
;
1695 afObj
->fetch(getXRef(), &acroform
);
1696 if (acroform
.isDict()) {
1697 Dict
*dict
= acroform
.getDict();
1698 for (int i
=0; i
< dict
->getLength(); i
++) {
1699 if (strcmp(dict
->getKey(i
), "Fields") == 0) {
1701 modified
= markAnnotations(dict
->getValNF(i
, &fields
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1705 markObject(dict
->getValNF(i
, &obj
), xRef
, countRef
, numOffset
, oldRefNum
, newRefNum
);
1710 if (afObj
->isRef()) {
1711 if (afObj
->getRef().num
+ (int) numOffset
>= xRef
->getNumObjects() || xRef
->getEntry(afObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
) {
1712 if (getXRef()->getEntry(afObj
->getRef().num
)->type
== xrefEntryFree
) {
1713 return; // already marked as free => should be replaced
1715 xRef
->add(afObj
->getRef().num
+ numOffset
, afObj
->getRef().gen
, 0, gTrue
);
1716 if (getXRef()->getEntry(afObj
->getRef().num
)->type
== xrefEntryCompressed
) {
1717 xRef
->getEntry(afObj
->getRef().num
+ numOffset
)->type
= xrefEntryCompressed
;
1720 if (afObj
->getRef().num
+ (int) numOffset
>= countRef
->getNumObjects() ||
1721 countRef
->getEntry(afObj
->getRef().num
+ numOffset
)->type
== xrefEntryFree
)
1723 countRef
->add(afObj
->getRef().num
+ numOffset
, 1, 0, gTrue
);
1725 XRefEntry
*entry
= countRef
->getEntry(afObj
->getRef().num
+ numOffset
);
1729 getXRef()->setModifiedObject(&acroform
, afObj
->getRef());
1736 Guint
PDFDoc::writePageObjects(OutStream
*outStr
, XRef
*xRef
, Guint numOffset
, GBool combine
)
1738 Guint objectsCount
= 0; //count the number of objects in the XRef(s)
1740 CryptAlgorithm encAlgorithm
;
1742 xRef
->getEncryptionParameters(&fileKey
, &encAlgorithm
, &keyLength
);
1744 for (int n
= numOffset
; n
< xRef
->getNumObjects(); n
++) {
1745 if (xRef
->getEntry(n
)->type
!= xrefEntryFree
) {
1749 ref
.gen
= xRef
->getEntry(n
)->gen
;
1751 getXRef()->fetch(ref
.num
- numOffset
, ref
.gen
, &obj
);
1752 Goffset offset
= writeObjectHeader(&ref
, outStr
);
1754 writeObject(&obj
, outStr
, getXRef(), numOffset
, NULL
, cryptRC4
, 0, 0, 0);
1755 } else if (xRef
->getEntry(n
)->getFlag(XRefEntry::Unencrypted
)) {
1756 writeObject(&obj
, outStr
, NULL
, cryptRC4
, 0, 0, 0);
1758 writeObject(&obj
, outStr
, fileKey
, encAlgorithm
, keyLength
, ref
.num
, ref
.gen
);
1760 writeObjectFooter(outStr
);
1761 xRef
->add(ref
.num
, ref
.gen
, offset
, gTrue
);
1765 return objectsCount
;
1768 #ifndef DISABLE_OUTLINE
1769 Outline
*PDFDoc::getOutline()
1774 outline
= new Outline(catalog
->getOutline(), xref
);
1781 PDFDoc
*PDFDoc::ErrorPDFDoc(int errorCode
, GooString
*fileNameA
)
1783 PDFDoc
*doc
= new PDFDoc();
1784 doc
->errCode
= errorCode
;
1785 doc
->fileName
= fileNameA
;
1790 long long PDFDoc::strToLongLong(char *s
) {
1795 for (p
= s
; *p
&& isdigit(*p
& 0xff); ++p
) {
1797 if (x
> (LLONG_MAX
- d
) / 10) {
1805 // Read the 'startxref' position.
1806 Goffset
PDFDoc::getStartXRef(GBool tryingToReconstruct
)
1808 if (startXRefPos
== -1) {
1810 if (isLinearized(tryingToReconstruct
)) {
1811 char buf
[linearizationSearchSize
+1];
1815 for (n
= 0; n
< linearizationSearchSize
; ++n
) {
1816 if ((c
= str
->getChar()) == EOF
) {
1823 // find end of first obj (linearization dictionary)
1825 for (i
= 0; i
< n
; i
++) {
1826 if (!strncmp("endobj", &buf
[i
], 6)) {
1829 while (buf
[i
] && Lexer::isSpace(buf
[i
])) ++i
;
1835 char buf
[xrefSearchSize
+1];
1839 // read last xrefSearchSize bytes
1841 int maxXRefSearch
= 24576;
1842 if (str
->getLength() < maxXRefSearch
) maxXRefSearch
= str
->getLength();
1843 for (; (xrefSearchSize
- 16) * segnum
< maxXRefSearch
; segnum
++) {
1844 str
->setPos((xrefSearchSize
- 16) * segnum
+ xrefSearchSize
, -1);
1845 for (n
= 0; n
< xrefSearchSize
; ++n
) {
1846 if ((c
= str
->getChar()) == EOF
) {
1854 for (i
= n
- 9; i
>= 0; --i
) {
1855 if (!strncmp(&buf
[i
], "startxref", 9)) {
1862 for (p
= &buf
[i
+ 9]; isspace(*p
); ++p
);
1863 startXRefPos
= strToLongLong(p
);
1871 return startXRefPos
;
1874 Goffset
PDFDoc::getMainXRefEntriesOffset(GBool tryingToReconstruct
)
1876 Guint mainXRefEntriesOffset
= 0;
1878 if (isLinearized(tryingToReconstruct
)) {
1879 mainXRefEntriesOffset
= getLinearization()->getMainXRefEntriesOffset();
1882 return mainXRefEntriesOffset
;
1885 int PDFDoc::getNumPages()
1887 if (isLinearized()) {
1889 if ((n
= getLinearization()->getNumPages())) {
1894 return catalog
->getNumPages();
1897 Page
*PDFDoc::parsePage(int page
)
1904 pageRef
.num
= getHints()->getPageObjectNum(page
);
1906 error(errSyntaxWarning
, -1, "Failed to get object num from hint tables for page {0:d}", page
);
1910 // check for bogus ref - this can happen in corrupted PDF files
1911 if (pageRef
.num
< 0 || pageRef
.num
>= xref
->getNumObjects()) {
1912 error(errSyntaxWarning
, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef
.num
, page
);
1916 pageRef
.gen
= xref
->getEntry(pageRef
.num
)->gen
;
1917 xref
->fetch(pageRef
.num
, pageRef
.gen
, &obj
);
1918 if (!obj
.isDict("Page")) {
1920 error(errSyntaxWarning
, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef
.num
, pageRef
.gen
);
1923 pageDict
= obj
.getDict();
1925 p
= new Page(this, page
, pageDict
, pageRef
,
1926 new PageAttrs(NULL
, pageDict
), catalog
->getForm());
1932 Page
*PDFDoc::getPage(int page
)
1934 if ((page
< 1) || page
> getNumPages()) return NULL
;
1936 if (isLinearized()) {
1939 pageCache
= (Page
**) gmallocn(getNumPages(), sizeof(Page
*));
1940 for (int i
= 0; i
< getNumPages(); i
++) {
1941 pageCache
[i
] = NULL
;
1944 if (!pageCache
[page
-1]) {
1945 pageCache
[page
-1] = parsePage(page
);
1947 if (pageCache
[page
-1]) {
1948 return pageCache
[page
-1];
1950 error(errSyntaxWarning
, -1, "Failed parsing page {0:d} using hint tables", page
);
1954 return catalog
->getPage(page
);