1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
9 //========================================================================
11 // Modified under the Poppler project - http://poppler.freedesktop.org
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
16 // Copyright (C) 2005 Dan Sheridan <dan.sheridan@postman.org.uk>
17 // Copyright (C) 2005 Brad Hards <bradh@frogmouth.net>
18 // Copyright (C) 2006, 2008, 2010, 2012-2014 Albert Astals Cid <aacid@kde.org>
19 // Copyright (C) 2007-2008 Julien Rebetez <julienr@svn.gnome.org>
20 // Copyright (C) 2007 Carlos Garcia Campos <carlosgc@gnome.org>
21 // Copyright (C) 2009, 2010 Ilya Gorenbein <igorenbein@finjan.com>
22 // Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
23 // Copyright (C) 2012, 2013 Thomas Freitag <Thomas.Freitag@kabelmail.de>
24 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
25 // Copyright (C) 2013, 2014 Adrian Johnson <ajohnson@redneon.com>
26 // Copyright (C) 2013 Pino Toscano <pino@kde.org>
28 // To see a description of the changes please see the Changelog file that
29 // came with your tarball or type make ChangeLog if you are building from git
31 //========================================================================
35 #ifdef USE_GCC_PRAGMAS
36 #pragma implementation
46 #include "goo/gfile.h"
54 #include "ErrorCodes.h"
56 #include "PopplerCache.h"
58 //------------------------------------------------------------------------
60 // Note that the PDF spec uses 1 base (eg bit 3 is 1<<2)
61 //------------------------------------------------------------------------
63 #define permPrint (1<<2) // bit 3
64 #define permChange (1<<3) // bit 4
65 #define permCopy (1<<4) // bit 5
66 #define permNotes (1<<5) // bit 6
67 #define permFillForm (1<<8) // bit 9
68 #define permAccessibility (1<<9) // bit 10
69 #define permAssemble (1<<10) // bit 11
70 #define permHighResPrint (1<<11) // bit 12
71 #define defPermFlags 0xfffc
74 # define xrefLocker() MutexLocker locker(&mutex)
75 # define xrefCondLocker(X) MutexLocker locker(&mutex, (X))
78 # define xrefCondLocker(X)
81 //------------------------------------------------------------------------
83 //------------------------------------------------------------------------
88 // Create an object stream, using object number <objStrNum>,
90 ObjectStream(XRef
*xref
, int objStrNumA
, int recursion
= 0);
92 GBool
isOk() { return ok
; }
96 // Return the object number of this object stream.
97 int getObjStrNum() { return objStrNum
; }
99 // Get the <objIdx>th object from this stream, which should be
100 // object number <objNum>, generation 0.
101 Object
*getObject(int objIdx
, int objNum
, Object
*obj
);
105 int objStrNum
; // object number of the object stream
106 int nObjects
; // number of objects in the stream
107 Object
*objs
; // the objects (length = nObjects)
108 int *objNums
; // the object numbers (length = nObjects)
112 class ObjectStreamKey
: public PopplerCacheKey
115 ObjectStreamKey(int num
) : objStrNum(num
)
119 bool operator==(const PopplerCacheKey
&key
) const
121 const ObjectStreamKey
*k
= static_cast<const ObjectStreamKey
*>(&key
);
122 return objStrNum
== k
->objStrNum
;
128 class ObjectStreamItem
: public PopplerCacheItem
131 ObjectStreamItem(ObjectStream
*objStr
) : objStream(objStr
)
140 ObjectStream
*objStream
;
143 ObjectStream::ObjectStream(XRef
*xref
, int objStrNumA
, int recursion
) {
147 Object objStr
, obj1
, obj2
;
151 objStrNum
= objStrNumA
;
157 if (!xref
->fetch(objStrNum
, 0, &objStr
, recursion
)->isStream()) {
161 if (!objStr
.streamGetDict()->lookup("N", &obj1
, recursion
)->isInt()) {
165 nObjects
= obj1
.getInt();
171 objStr
.streamGetDict()->lookup("First", &obj1
, recursion
);
172 if (!obj1
.isInt() && !obj1
.isInt64()) {
177 first
= obj1
.getInt();
179 first
= obj1
.getInt64();
185 // this is an arbitrary limit to avoid integer overflow problems
186 // in the 'new Object[nObjects]' call (Acrobat apparently limits
187 // object streams to 100-200 objects)
188 if (nObjects
> 1000000) {
189 error(errSyntaxError
, -1, "Too many objects in an object stream");
192 objs
= new Object
[nObjects
];
193 objNums
= (int *)gmallocn(nObjects
, sizeof(int));
194 offsets
= (Goffset
*)gmallocn(nObjects
, sizeof(Goffset
));
196 // parse the header: object numbers and offsets
197 objStr
.streamReset();
199 str
= new EmbedStream(objStr
.getStream(), &obj1
, gTrue
, first
);
200 parser
= new Parser(xref
, new Lexer(xref
, str
), gFalse
);
201 for (i
= 0; i
< nObjects
; ++i
) {
202 parser
->getObj(&obj1
);
203 parser
->getObj(&obj2
);
204 if (!obj1
.isInt() || !(obj2
.isInt() || obj2
.isInt64())) {
211 objNums
[i
] = obj1
.getInt();
213 offsets
[i
] = obj2
.getInt();
215 offsets
[i
] = obj2
.getInt64();
218 if (objNums
[i
] < 0 || offsets
[i
] < 0 ||
219 (i
> 0 && offsets
[i
] < offsets
[i
-1])) {
225 while (str
->getChar() != EOF
) ;
228 // skip to the first object - this shouldn't be necessary because
229 // the First key is supposed to be equal to offsets[0], but just in
231 for (Goffset pos
= first
; pos
< offsets
[0]; ++pos
) {
232 objStr
.getStream()->getChar();
236 for (i
= 0; i
< nObjects
; ++i
) {
238 if (i
== nObjects
- 1) {
239 str
= new EmbedStream(objStr
.getStream(), &obj1
, gFalse
, 0);
241 str
= new EmbedStream(objStr
.getStream(), &obj1
, gTrue
,
242 offsets
[i
+1] - offsets
[i
]);
244 parser
= new Parser(xref
, new Lexer(xref
, str
), gFalse
);
245 parser
->getObj(&objs
[i
]);
246 while (str
->getChar() != EOF
) ;
257 ObjectStream::~ObjectStream() {
261 for (i
= 0; i
< nObjects
; ++i
) {
269 Object
*ObjectStream::getObject(int objIdx
, int objNum
, Object
*obj
) {
270 if (objIdx
< 0 || objIdx
>= nObjects
|| objNum
!= objNums
[objIdx
]) {
271 return obj
->initNull();
273 return objs
[objIdx
].copy(obj
);
276 //------------------------------------------------------------------------
278 //------------------------------------------------------------------------
291 objStrs
= new PopplerCache(5);
292 mainXRefEntriesOffset
= 0;
294 scannedSpecialFlags
= gFalse
;
296 permFlags
= defPermFlags
;
297 ownerPasswordOk
= gFalse
;
306 XRef::XRef(Object
*trailerDictA
) {
309 if (trailerDictA
->isDict())
310 trailerDict
.initDict(trailerDictA
->getDict());
313 XRef::XRef(BaseStream
*strA
, Goffset pos
, Goffset mainXRefEntriesOffsetA
, GBool
*wasReconstructed
, GBool reconstruct
) {
317 mainXRefEntriesOffset
= mainXRefEntriesOffsetA
;
321 start
= str
->getStart();
322 prevXRefOffset
= mainXRefOffset
= pos
;
324 if (reconstruct
&& !(ok
= constructXRef(wasReconstructed
)))
326 errCode
= errDamaged
;
331 // if there was a problem with the 'startxref' position, try to
332 // reconstruct the xref table
333 if (prevXRefOffset
== 0) {
334 if (!(ok
= constructXRef(wasReconstructed
))) {
335 errCode
= errDamaged
;
339 // read the xref table
341 std::vector
<Goffset
> followedXRefStm
;
342 readXRef(&prevXRefOffset
, &followedXRefStm
, NULL
);
344 // if there was a problem with the xref table,
345 // try to reconstruct it
347 if (!(ok
= constructXRef(wasReconstructed
))) {
348 errCode
= errDamaged
;
354 // set size to (at least) the size specified in trailer dict
355 trailerDict
.dictLookupNF("Size", &obj
);
357 error(errSyntaxWarning
, -1, "No valid XRef size in trailer");
359 if (obj
.getInt() > size
) {
360 if (resize(obj
.getInt()) != obj
.getInt()) {
361 if (!(ok
= constructXRef(wasReconstructed
))) {
363 errCode
= errDamaged
;
371 // get the root dictionary (catalog) object
372 trailerDict
.dictLookupNF("Root", &obj
);
374 rootNum
= obj
.getRefNum();
375 rootGen
= obj
.getRefGen();
379 if (!(ok
= constructXRef(wasReconstructed
))) {
380 errCode
= errDamaged
;
385 // now set the trailer dictionary's xref pointer so we can fetch
386 // indirect objects from it
387 trailerDict
.getDict()->setXRef(this);
391 for(int i
=0; i
<size
; i
++) {
392 entries
[i
].obj
.free ();
407 gDestroyMutex(&mutex
);
412 XRef
*xref
= new XRef();
413 xref
->str
= str
->copy();
414 xref
->strOwner
= gTrue
;
415 xref
->encrypted
= encrypted
;
416 xref
->permFlags
= permFlags
;
417 xref
->ownerPasswordOk
= ownerPasswordOk
;
418 xref
->rootGen
= rootGen
;
419 xref
->rootNum
= rootNum
;
422 xref
->prevXRefOffset
= prevXRefOffset
;
423 xref
->mainXRefEntriesOffset
= mainXRefEntriesOffset
;
424 xref
->xRefStream
= xRefStream
;
425 trailerDict
.copy(&xref
->trailerDict
);
426 xref
->encAlgorithm
= encAlgorithm
;
427 xref
->encRevision
= encRevision
;
428 xref
->encVersion
= encVersion
;
429 xref
->permFlags
= permFlags
;
430 xref
->keyLength
= keyLength
;
431 xref
->permFlags
= permFlags
;
432 for (int i
= 0; i
< 32; i
++) {
433 xref
->fileKey
[i
] = fileKey
[i
];
436 if (xref
->reserve(size
) == 0) {
437 error(errSyntaxError
, -1, "unable to allocate {0:d} entries", size
);
442 for (int i
= 0; i
< size
; ++i
) {
443 xref
->entries
[i
].offset
= entries
[i
].offset
;
444 xref
->entries
[i
].type
= entries
[i
].type
;
445 xref
->entries
[i
].obj
.initNull ();
446 xref
->entries
[i
].flags
= entries
[i
].flags
;
447 xref
->entries
[i
].gen
= entries
[i
].gen
;
449 xref
->streamEndsLen
= streamEndsLen
;
450 if (streamEndsLen
!= 0) {
451 xref
->streamEnds
= (Goffset
*)gmalloc(streamEndsLen
* sizeof(Goffset
));
452 for (int i
= 0; i
< streamEndsLen
; i
++) {
453 xref
->streamEnds
[i
] = streamEnds
[i
];
459 int XRef::reserve(int newSize
)
461 if (newSize
> capacity
) {
464 for (realNewSize
= capacity
? 2 * capacity
: 1024;
465 newSize
> realNewSize
&& realNewSize
> 0;
467 if ((realNewSize
< 0) ||
468 (realNewSize
>= INT_MAX
/ (int)sizeof(XRefEntry
))) {
472 void *p
= greallocn_checkoverflow(entries
, realNewSize
, sizeof(XRefEntry
));
477 entries
= (XRefEntry
*) p
;
478 capacity
= realNewSize
;
485 int XRef::resize(int newSize
)
487 if (newSize
> size
) {
489 if (reserve(newSize
) < newSize
) return size
;
491 for (int i
= size
; i
< newSize
; ++i
) {
492 entries
[i
].offset
= -1;
493 entries
[i
].type
= xrefEntryNone
;
494 entries
[i
].obj
.initNull ();
495 entries
[i
].flags
= 0;
499 for (int i
= newSize
; i
< size
; i
++) {
500 entries
[i
].obj
.free ();
509 /* Read one xref table section. Also reads the associated trailer
510 * dictionary, and returns the prev pointer (if any).
512 * pos Points to a Goffset containing the offset of the XRef
513 * section to be read. If a prev pointer is found, *pos is
514 * updated with its value
515 * followedXRefStm Used in case of nested readXRef calls to spot circular
516 * references in XRefStm pointers
517 * xrefStreamObjsNum If not NULL, every time a XRef stream is encountered,
518 * its object number is appended
520 * gTrue if a prev pointer is found, otherwise gFalse
522 GBool
XRef::readXRef(Goffset
*pos
, std::vector
<Goffset
> *followedXRefStm
, std::vector
<int> *xrefStreamObjsNum
) {
527 // start up a parser, parse one token
529 parser
= new Parser(NULL
,
531 str
->makeSubStream(start
+ *pos
, gFalse
, 0, &obj
)),
533 parser
->getObj(&obj
, gTrue
);
535 // parse an old-style xref table
536 if (obj
.isCmd("xref")) {
538 more
= readXRefTable(parser
, pos
, followedXRefStm
, xrefStreamObjsNum
);
540 // parse an xref stream
541 } else if (obj
.isInt()) {
542 const int objNum
= obj
.getInt();
544 if (!parser
->getObj(&obj
, gTrue
)->isInt()) {
548 if (!parser
->getObj(&obj
, gTrue
)->isCmd("obj")) {
552 if (!parser
->getObj(&obj
)->isStream()) {
555 if (trailerDict
.isNone()) {
558 if (xrefStreamObjsNum
) {
559 xrefStreamObjsNum
->push_back(objNum
);
561 more
= readXRefStream(obj
.getStream(), pos
);
578 GBool
XRef::readXRefTable(Parser
*parser
, Goffset
*pos
, std::vector
<Goffset
> *followedXRefStm
, std::vector
<int> *xrefStreamObjsNum
) {
586 parser
->getObj(&obj
, gTrue
);
587 if (obj
.isCmd("trailer")) {
594 first
= obj
.getInt();
596 if (!parser
->getObj(&obj
, gTrue
)->isInt()) {
601 if (first
< 0 || n
< 0 || first
+ n
< 0) {
604 if (first
+ n
> size
) {
605 if (resize(first
+ n
) != first
+ n
) {
606 error(errSyntaxError
, -1, "Invalid 'obj' parameters'");
610 for (i
= first
; i
< first
+ n
; ++i
) {
611 parser
->getObj(&obj
, gTrue
);
613 entry
.offset
= obj
.getInt();
614 } else if (obj
.isInt64()) {
615 entry
.offset
= obj
.getInt64();
620 if (!parser
->getObj(&obj
, gTrue
)->isInt()) {
623 entry
.gen
= obj
.getInt();
624 entry
.obj
.initNull ();
627 parser
->getObj(&obj
, gTrue
);
628 if (obj
.isCmd("n")) {
629 entry
.type
= xrefEntryUncompressed
;
630 } else if (obj
.isCmd("f")) {
631 entry
.type
= xrefEntryFree
;
636 if (entries
[i
].offset
== -1) {
638 // PDF files of patents from the IBM Intellectual Property
639 // Network have a bug: the xref table claims to start at 1
641 if (i
== 1 && first
== 1 &&
642 entries
[1].offset
== 0 && entries
[1].gen
== 65535 &&
643 entries
[1].type
== xrefEntryFree
) {
645 entries
[0] = entries
[1];
646 entries
[1].offset
= -1;
652 // read the trailer dictionary
653 if (!parser
->getObj(&obj
)->isDict()) {
657 // get the 'Prev' pointer
658 obj
.getDict()->lookupNF("Prev", &obj2
);
659 if (obj2
.isInt() || obj2
.isInt64()) {
661 pos2
= obj2
.getInt();
663 pos2
= obj2
.getInt64();
668 error(errSyntaxWarning
, -1, "Infinite loop in xref table");
671 } else if (obj2
.isRef()) {
672 // certain buggy PDF generators generate "/Prev NNN 0 R" instead
674 pos2
= (Guint
)obj2
.getRefNum();
679 error(errSyntaxWarning
, -1, "Infinite loop in xref table");
687 // save the first trailer dictionary
688 if (trailerDict
.isNone()) {
689 obj
.copy(&trailerDict
);
692 // check for an 'XRefStm' key
693 obj
.getDict()->lookup("XRefStm", &obj2
);
694 if (obj2
.isInt() || obj2
.isInt64()) {
696 pos2
= obj2
.getInt();
698 pos2
= obj2
.getInt64();
699 for (size_t i
= 0; ok
== gTrue
&& i
< followedXRefStm
->size(); ++i
) {
700 if (followedXRefStm
->at(i
) == pos2
) {
705 followedXRefStm
->push_back(pos2
);
706 readXRef(&pos2
, followedXRefStm
, xrefStreamObjsNum
);
725 GBool
XRef::readXRefStream(Stream
*xrefStr
, Goffset
*pos
) {
729 Object obj
, obj2
, idx
;
730 int newSize
, first
, n
, i
;
732 dict
= xrefStr
->getDict();
734 if (!dict
->lookupNF("Size", &obj
)->isInt()) {
737 newSize
= obj
.getInt();
742 if (newSize
> size
) {
743 if (resize(newSize
) != newSize
) {
744 error(errSyntaxError
, -1, "Invalid 'size' parameter");
749 if (!dict
->lookupNF("W", &obj
)->isArray() ||
750 obj
.arrayGetLength() < 3) {
753 for (i
= 0; i
< 3; ++i
) {
754 if (!obj
.arrayGet(i
, &obj2
)->isInt()) {
758 w
[i
] = obj2
.getInt();
765 if (w
[0] > (int)sizeof(int) || w
[1] > (int)sizeof(long long) || w
[2] > (int)sizeof(int)) {
770 dict
->lookupNF("Index", &idx
);
772 for (i
= 0; i
+1 < idx
.arrayGetLength(); i
+= 2) {
773 if (!idx
.arrayGet(i
, &obj
)->isInt()) {
777 first
= obj
.getInt();
779 if (!idx
.arrayGet(i
+1, &obj
)->isInt()) {
785 if (first
< 0 || n
< 0 ||
786 !readXRefStreamSection(xrefStr
, w
, first
, n
)) {
792 if (!readXRefStreamSection(xrefStr
, w
, 0, newSize
)) {
799 dict
->lookupNF("Prev", &obj
);
803 } else if (obj
.isInt64()) {
804 *pos
= obj
.getInt64();
810 if (trailerDict
.isNone()) {
811 trailerDict
.initDict(dict
);
823 GBool
XRef::readXRefStreamSection(Stream
*xrefStr
, int *w
, int first
, int n
) {
824 unsigned long long offset
;
825 int type
, gen
, c
, i
, j
;
830 if (first
+ n
> size
) {
831 if (resize(first
+ n
) != size
) {
832 error(errSyntaxError
, -1, "Invalid 'size' inside xref table");
835 if (first
+ n
> size
) {
836 error(errSyntaxError
, -1, "Invalid 'first' or 'n' inside xref table");
840 for (i
= first
; i
< first
+ n
; ++i
) {
844 for (type
= 0, j
= 0; j
< w
[0]; ++j
) {
845 if ((c
= xrefStr
->getChar()) == EOF
) {
848 type
= (type
<< 8) + c
;
851 for (offset
= 0, j
= 0; j
< w
[1]; ++j
) {
852 if ((c
= xrefStr
->getChar()) == EOF
) {
855 offset
= (offset
<< 8) + c
;
857 if (offset
> (unsigned long long)GoffsetMax()) {
858 error(errSyntaxError
, -1, "Offset inside xref table too large for fseek");
861 for (gen
= 0, j
= 0; j
< w
[2]; ++j
) {
862 if ((c
= xrefStr
->getChar()) == EOF
) {
865 gen
= (gen
<< 8) + c
;
867 if (entries
[i
].offset
== -1) {
870 entries
[i
].offset
= offset
;
871 entries
[i
].gen
= gen
;
872 entries
[i
].type
= xrefEntryFree
;
875 entries
[i
].offset
= offset
;
876 entries
[i
].gen
= gen
;
877 entries
[i
].type
= xrefEntryUncompressed
;
880 entries
[i
].offset
= offset
;
881 entries
[i
].gen
= gen
;
882 entries
[i
].type
= xrefEntryCompressed
;
893 // Attempt to construct an xref table for a damaged file.
894 GBool
XRef::constructXRef(GBool
*wasReconstructed
, GBool needCatalogDict
) {
896 Object newTrailerDict
, obj
;
905 bool oneCycle
= true;
914 streamEndsLen
= streamEndsSize
= 0;
916 if (wasReconstructed
)
918 *wasReconstructed
= true;
924 if (!str
->getLine(buf
, 256)) {
930 while (*p
&& Lexer::isSpace(*p
& 0xff)) ++p
;
935 while( ( token
= strstr( p
, "endobj" ) ) || oneCycle
) {
944 // got trailer dictionary
945 if (!strncmp(p
, "trailer", 7)) {
947 parser
= new Parser(NULL
,
949 str
->makeSubStream(pos
+ 7, gFalse
, 0, &obj
)),
951 parser
->getObj(&newTrailerDict
);
952 if (newTrailerDict
.isDict()) {
953 newTrailerDict
.dictLookupNF("Root", &obj
);
954 if (obj
.isRef() && (!gotRoot
|| !needCatalogDict
) && rootNum
!= obj
.getRefNum()) {
955 rootNum
= obj
.getRefNum();
956 rootGen
= obj
.getRefGen();
957 if (!trailerDict
.isNone()) {
960 newTrailerDict
.copy(&trailerDict
);
965 newTrailerDict
.free();
969 } else if (isdigit(*p
& 0xff)) {
974 } while (*p
&& isdigit(*p
& 0xff));
975 if (isspace(*p
& 0xff)) {
978 } while (*p
&& isspace(*p
& 0xff));
979 if (isdigit(*p
& 0xff)) {
983 } while (*p
&& isdigit(*p
& 0xff));
984 if (isspace(*p
& 0xff)) {
987 } while (*p
&& isspace(*p
& 0xff));
988 if (!strncmp(p
, "obj", 3)) {
990 newSize
= (num
+ 1 + 255) & ~255;
992 error(errSyntaxError
, -1, "Bad object number");
995 if (resize(newSize
) != newSize
) {
996 error(errSyntaxError
, -1, "Invalid 'obj' parameters");
1000 if (entries
[num
].type
== xrefEntryFree
||
1001 gen
>= entries
[num
].gen
) {
1002 entries
[num
].offset
= pos
- start
;
1003 entries
[num
].gen
= gen
;
1004 entries
[num
].type
= xrefEntryUncompressed
;
1012 } else if (!strncmp(p
, "endstream", 9)) {
1013 if (streamEndsLen
== streamEndsSize
) {
1014 streamEndsSize
+= 64;
1015 if (streamEndsSize
>= INT_MAX
/ (int)sizeof(int)) {
1016 error(errSyntaxError
, -1, "Invalid 'endstream' parameter.");
1019 streamEnds
= (Goffset
*)greallocn(streamEnds
,
1020 streamEndsSize
, sizeof(Goffset
));
1022 streamEnds
[streamEndsLen
++] = pos
;
1025 p
= token
+ 6;// strlen( "endobj" ) = 6
1026 pos
+= offset
+ 6;// strlen( "endobj" ) = 6
1027 while (*p
&& Lexer::isSpace(*p
& 0xff)) {
1038 error(errSyntaxError
, -1, "Couldn't find trailer dictionary");
1042 void XRef::setEncryption(int permFlagsA
, GBool ownerPasswordOkA
,
1043 Guchar
*fileKeyA
, int keyLengthA
,
1044 int encVersionA
, int encRevisionA
,
1045 CryptAlgorithm encAlgorithmA
) {
1049 permFlags
= permFlagsA
;
1050 ownerPasswordOk
= ownerPasswordOkA
;
1051 if (keyLengthA
<= 32) {
1052 keyLength
= keyLengthA
;
1056 for (i
= 0; i
< keyLength
; ++i
) {
1057 fileKey
[i
] = fileKeyA
[i
];
1059 encVersion
= encVersionA
;
1060 encRevision
= encRevisionA
;
1061 encAlgorithm
= encAlgorithmA
;
1064 void XRef::getEncryptionParameters(Guchar
**fileKeyA
, CryptAlgorithm
*encAlgorithmA
,
1067 *fileKeyA
= fileKey
;
1068 *encAlgorithmA
= encAlgorithm
;
1069 *keyLengthA
= keyLength
;
1071 // null encryption parameters
1073 *encAlgorithmA
= cryptRC4
;
1078 GBool
XRef::okToPrint(GBool ignoreOwnerPW
) {
1079 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permPrint
);
1082 // we can print at high res if we are only doing security handler revision
1083 // 2 (and we are allowed to print at all), or with security handler rev
1084 // 3 and we are allowed to print, and bit 12 is set.
1085 GBool
XRef::okToPrintHighRes(GBool ignoreOwnerPW
) {
1087 if (2 == encRevision
) {
1088 return (okToPrint(ignoreOwnerPW
));
1089 } else if (encRevision
>= 3) {
1090 return (okToPrint(ignoreOwnerPW
) && (permFlags
& permHighResPrint
));
1092 // something weird - unknown security handler version
1100 GBool
XRef::okToChange(GBool ignoreOwnerPW
) {
1101 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permChange
);
1104 GBool
XRef::okToCopy(GBool ignoreOwnerPW
) {
1105 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permCopy
);
1108 GBool
XRef::okToAddNotes(GBool ignoreOwnerPW
) {
1109 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permNotes
);
1112 GBool
XRef::okToFillForm(GBool ignoreOwnerPW
) {
1113 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permFillForm
);
1116 GBool
XRef::okToAccessibility(GBool ignoreOwnerPW
) {
1117 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permAccessibility
);
1120 GBool
XRef::okToAssemble(GBool ignoreOwnerPW
) {
1121 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permAssemble
);
1124 Object
*XRef::getCatalog(Object
*catalog
) {
1125 Object
*obj
= fetch(rootNum
, rootGen
, catalog
);
1126 if (obj
->isDict()) {
1129 GBool wasReconstructed
= false;
1130 GBool ok
= constructXRef(&wasReconstructed
, gTrue
);
1131 return (ok
) ? fetch(rootNum
, rootGen
, catalog
) : obj
;
1134 Object
*XRef::fetch(int num
, int gen
, Object
*obj
, int recursion
) {
1137 Object obj1
, obj2
, obj3
;
1140 // check for bogus ref - this can happen in corrupted PDF files
1141 if (num
< 0 || num
>= size
) {
1146 if(!e
->obj
.isNull ()) { //check for updated object
1147 obj
= e
->obj
.copy(obj
);
1153 case xrefEntryUncompressed
:
1154 if (e
->gen
!= gen
) {
1158 parser
= new Parser(this,
1160 str
->makeSubStream(start
+ e
->offset
, gFalse
, 0, &obj1
)),
1162 parser
->getObj(&obj1
, recursion
);
1163 parser
->getObj(&obj2
, recursion
);
1164 parser
->getObj(&obj3
, recursion
);
1165 if (!obj1
.isInt() || obj1
.getInt() != num
||
1166 !obj2
.isInt() || obj2
.getInt() != gen
||
1167 !obj3
.isCmd("obj")) {
1168 // some buggy pdf have obj1234 for ints that represent 1234
1169 // try to recover here
1170 if (obj1
.isInt() && obj1
.getInt() == num
&&
1171 obj2
.isInt() && obj2
.getInt() == gen
&&
1173 char *cmd
= obj3
.getCmd();
1174 if (strlen(cmd
) > 3 &&
1179 long longNumber
= strtol(cmd
+ 3, &end_ptr
, 0);
1180 if (longNumber
<= INT_MAX
&& longNumber
>= INT_MIN
&& *end_ptr
== '\0') {
1181 int number
= longNumber
;
1182 error(errSyntaxWarning
, -1, "Cmd was not obj but {0:s}, assuming the creator meant obj {1:d}", cmd
, number
);
1183 obj
->initInt(number
);
1198 parser
->getObj(obj
, gFalse
, (encrypted
&& !e
->getFlag(XRefEntry::Unencrypted
)) ? fileKey
: NULL
,
1199 encAlgorithm
, keyLength
, num
, gen
, recursion
);
1206 case xrefEntryCompressed
:
1208 #if 0 // Adobe apparently ignores the generation number on compressed objects
1213 if (e
->offset
>= (Guint
)size
||
1214 entries
[e
->offset
].type
!= xrefEntryUncompressed
) {
1215 error(errSyntaxError
, -1, "Invalid object stream");
1219 ObjectStream
*objStr
= NULL
;
1220 ObjectStreamKey
key(e
->offset
);
1221 PopplerCacheItem
*item
= objStrs
->lookup(key
);
1223 ObjectStreamItem
*it
= static_cast<ObjectStreamItem
*>(item
);
1224 objStr
= it
->objStream
;
1228 objStr
= new ObjectStream(this, e
->offset
, recursion
+ 1);
1229 if (!objStr
->isOk()) {
1234 // XRef could be reconstructed in constructor of ObjectStream:
1236 ObjectStreamKey
*newkey
= new ObjectStreamKey(e
->offset
);
1237 ObjectStreamItem
*newitem
= new ObjectStreamItem(objStr
);
1238 objStrs
->put(newkey
, newitem
);
1241 objStr
->getObject(e
->gen
, num
, obj
);
1252 return obj
->initNull();
1261 void XRef::unlock() {
1263 gUnlockMutex(&mutex
);
1267 Object
*XRef::getDocInfo(Object
*obj
) {
1268 return trailerDict
.dictLookup("Info", obj
);
1271 // Added for the pdftex project.
1272 Object
*XRef::getDocInfoNF(Object
*obj
) {
1273 return trailerDict
.dictLookupNF("Info", obj
);
1276 GBool
XRef::getStreamEnd(Goffset streamStart
, Goffset
*streamEnd
) {
1279 if (streamEndsLen
== 0 ||
1280 streamStart
> streamEnds
[streamEndsLen
- 1]) {
1285 b
= streamEndsLen
- 1;
1286 // invariant: streamEnds[a] < streamStart <= streamEnds[b]
1289 if (streamStart
<= streamEnds
[m
]) {
1295 *streamEnd
= streamEnds
[b
];
1299 int XRef::getNumEntry(Goffset offset
)
1304 Goffset resOffset
= getEntry(0)->offset
;
1306 for (int i
= 1; i
< size
; ++i
)
1308 e
= getEntry(i
, gFalse
);
1309 if (e
->type
!= xrefEntryFree
&& e
->offset
< offset
&& e
->offset
>= resOffset
)
1312 resOffset
= e
->offset
;
1320 void XRef::add(int num
, int gen
, Goffset offs
, GBool used
) {
1323 if (num
>= capacity
) {
1324 entries
= (XRefEntry
*)greallocn(entries
, num
+ 1, sizeof(XRefEntry
));
1327 for (int i
= size
; i
< num
+ 1; ++i
) {
1328 entries
[i
].offset
= -1;
1329 entries
[i
].type
= xrefEntryFree
;
1330 entries
[i
].obj
.initNull ();
1331 entries
[i
].flags
= 0;
1336 XRefEntry
*e
= getEntry(num
);
1341 e
->type
= xrefEntryUncompressed
;
1344 e
->type
= xrefEntryFree
;
1349 void XRef::setModifiedObject (Object
* o
, Ref r
) {
1351 if (r
.num
< 0 || r
.num
>= size
) {
1352 error(errInternal
, -1,"XRef::setModifiedObject on unknown ref: {0:d}, {1:d}\n", r
.num
, r
.gen
);
1355 XRefEntry
*e
= getEntry(r
.num
);
1358 e
->setFlag(XRefEntry::Updated
, gTrue
);
1361 Ref
XRef::addIndirectObject (Object
* o
) {
1362 int entryIndexToUse
= -1;
1363 for (int i
= 1; entryIndexToUse
== -1 && i
< size
; ++i
) {
1364 XRefEntry
*e
= getEntry(i
, false /* complainIfMissing */);
1365 if (e
->type
== xrefEntryFree
&& e
->gen
!= 65535) {
1366 entryIndexToUse
= i
;
1371 if (entryIndexToUse
== -1) {
1372 entryIndexToUse
= size
;
1373 add(entryIndexToUse
, 0, 0, gFalse
);
1374 e
= getEntry(entryIndexToUse
);
1376 //reuse a free entry
1377 e
= getEntry(entryIndexToUse
);
1378 //we don't touch gen number, because it should have been
1379 //incremented when the object was deleted
1381 e
->type
= xrefEntryUncompressed
;
1383 e
->setFlag(XRefEntry::Updated
, gTrue
);
1386 r
.num
= entryIndexToUse
;
1391 void XRef::removeIndirectObject(Ref r
) {
1393 if (r
.num
< 0 || r
.num
>= size
) {
1394 error(errInternal
, -1,"XRef::removeIndirectObject on unknown ref: {0:d}, {1:d}\n", r
.num
, r
.gen
);
1397 XRefEntry
*e
= getEntry(r
.num
);
1398 if (e
->type
== xrefEntryFree
) {
1402 e
->type
= xrefEntryFree
;
1404 e
->setFlag(XRefEntry::Updated
, gTrue
);
1407 void XRef::writeXRef(XRef::XRefWriter
*writer
, GBool writeAllEntries
) {
1408 //create free entries linked-list
1409 if (getEntry(0)->gen
!= 65535) {
1410 error(errInternal
, -1, "XRef::writeXRef, entry 0 of the XRef is invalid (gen != 65535)\n");
1412 int lastFreeEntry
= 0;
1413 for (int i
=0; i
<size
; i
++) {
1414 if (getEntry(i
)->type
== xrefEntryFree
) {
1415 getEntry(lastFreeEntry
)->offset
= i
;
1419 getEntry(lastFreeEntry
)->offset
= 0;
1421 if (writeAllEntries
) {
1422 writer
->startSection(0, size
);
1423 for (int i
=0; i
<size
; i
++) {
1424 XRefEntry
*e
= getEntry(i
);
1425 if(e
->gen
> 65535) e
->gen
= 65535; //cap generation number to 65535 (required by PDFReference)
1426 writer
->writeEntry(e
->offset
, e
->gen
, e
->type
);
1432 for(j
=i
; j
<size
; j
++) { //look for consecutive entries
1433 if ((getEntry(j
)->type
== xrefEntryFree
) && (getEntry(j
)->gen
== 0))
1438 writer
->startSection(i
, j
-i
);
1439 for (int k
=i
; k
<j
; k
++) {
1440 XRefEntry
*e
= getEntry(k
);
1441 if(e
->gen
> 65535) e
->gen
= 65535; //cap generation number to 65535 (required by PDFReference)
1442 writer
->writeEntry(e
->offset
, e
->gen
, e
->type
);
1451 XRef::XRefTableWriter::XRefTableWriter(OutStream
* outStrA
) {
1455 void XRef::XRefTableWriter::startSection(int first
, int count
) {
1456 outStr
->printf("%i %i\r\n", first
, count
);
1459 void XRef::XRefTableWriter::writeEntry(Goffset offset
, int gen
, XRefEntryType type
) {
1460 outStr
->printf("%010lli %05i %c\r\n", (long long)offset
, gen
, (type
==xrefEntryFree
)?'f':'n');
1463 void XRef::writeTableToFile(OutStream
* outStr
, GBool writeAllEntries
) {
1464 XRefTableWriter
writer(outStr
);
1465 outStr
->printf("xref\r\n");
1466 writeXRef(&writer
, writeAllEntries
);
1469 XRef::XRefStreamWriter::XRefStreamWriter(Object
*indexA
, GooString
*stmBufA
, int offsetSizeA
) {
1472 offsetSize
= offsetSizeA
;
1475 void XRef::XRefStreamWriter::startSection(int first
, int count
) {
1477 index
->arrayAdd( obj
.initInt(first
) );
1478 index
->arrayAdd( obj
.initInt(count
) );
1481 void XRef::XRefStreamWriter::writeEntry(Goffset offset
, int gen
, XRefEntryType type
) {
1482 const int entryTotalSize
= 1 + offsetSize
+ 2; /* type + offset + gen */
1484 data
[0] = (type
==xrefEntryFree
) ? 0 : 1;
1485 for (int i
= offsetSize
; i
> 0; i
--) {
1486 data
[i
] = offset
& 0xff;
1489 data
[offsetSize
+ 1] = (gen
>> 8) & 0xff;
1490 data
[offsetSize
+ 2] = gen
& 0xff;
1491 stmBuf
->append(data
, entryTotalSize
);
1494 XRef::XRefPreScanWriter::XRefPreScanWriter() {
1495 hasOffsetsBeyond4GB
= gFalse
;
1498 void XRef::XRefPreScanWriter::startSection(int first
, int count
) {
1501 void XRef::XRefPreScanWriter::writeEntry(Goffset offset
, int gen
, XRefEntryType type
) {
1502 if (offset
>= 0x100000000ll
)
1503 hasOffsetsBeyond4GB
= gTrue
;
1506 void XRef::writeStreamToBuffer(GooString
*stmBuf
, Dict
*xrefDict
, XRef
*xref
) {
1508 index
.initArray(xref
);
1511 // First pass: determine whether all offsets fit in 4 bytes or not
1512 XRefPreScanWriter prescan
;
1513 writeXRef(&prescan
, gFalse
);
1514 const int offsetSize
= prescan
.hasOffsetsBeyond4GB
? sizeof(Goffset
) : 4;
1516 // Second pass: actually write the xref stream
1517 XRefStreamWriter
writer(&index
, stmBuf
, offsetSize
);
1518 writeXRef(&writer
, gFalse
);
1521 xrefDict
->set("Type", obj1
.initName("XRef"));
1522 xrefDict
->set("Index", &index
);
1523 obj2
.initArray(xref
);
1524 obj2
.arrayAdd( obj1
.initInt(1) );
1525 obj2
.arrayAdd( obj1
.initInt(offsetSize
) );
1526 obj2
.arrayAdd( obj1
.initInt(2) );
1527 xrefDict
->set("W", &obj2
);
1530 GBool
XRef::parseEntry(Goffset offset
, XRefEntry
*entry
)
1536 Parser parser
= Parser(NULL
, new Lexer(NULL
,
1537 str
->makeSubStream(offset
, gFalse
, 20, &obj
)), gTrue
);
1539 Object obj1
, obj2
, obj3
;
1540 if (((parser
.getObj(&obj1
)->isInt()) ||
1541 parser
.getObj(&obj1
)->isInt64()) &&
1542 (parser
.getObj(&obj2
)->isInt()) &&
1543 (parser
.getObj(&obj3
)->isCmd("n") || obj3
.isCmd("f"))) {
1545 entry
->offset
= obj1
.getInt64();
1547 entry
->offset
= obj1
.getInt();
1548 entry
->gen
= obj2
.getInt();
1549 entry
->type
= obj3
.isCmd("n") ? xrefEntryUncompressed
: xrefEntryFree
;
1550 entry
->obj
.initNull ();
1563 /* Traverse all XRef tables and, if untilEntryNum != -1, stop as soon as
1564 * untilEntryNum is found, or try to reconstruct the xref table if it's not
1565 * present in any xref.
1566 * If xrefStreamObjsNum is not NULL, it is filled with the list of the object
1567 * numbers of the XRef streams that have been traversed */
1568 void XRef::readXRefUntil(int untilEntryNum
, std::vector
<int> *xrefStreamObjsNum
)
1570 std::vector
<Goffset
> followedPrev
;
1571 while (prevXRefOffset
&& (untilEntryNum
== -1 || (untilEntryNum
< size
&& entries
[untilEntryNum
].type
== xrefEntryNone
))) {
1572 bool followed
= false;
1573 for (size_t j
= 0; j
< followedPrev
.size(); j
++) {
1574 if (followedPrev
.at(j
) == prevXRefOffset
) {
1580 error(errSyntaxError
, -1, "Circular XRef");
1581 if (!(ok
= constructXRef(NULL
))) {
1582 errCode
= errDamaged
;
1587 followedPrev
.push_back (prevXRefOffset
);
1589 std::vector
<Goffset
> followedXRefStm
;
1590 if (!readXRef(&prevXRefOffset
, &followedXRefStm
, xrefStreamObjsNum
)) {
1594 // if there was a problem with the xref table, or we haven't found the entry
1595 // we were looking for, try to reconstruct the xref
1596 if (!ok
|| (!prevXRefOffset
&& untilEntryNum
!= -1 && entries
[untilEntryNum
].type
== xrefEntryNone
)) {
1597 GBool wasReconstructed
= false;
1598 if (!(ok
= constructXRef(&wasReconstructed
))) {
1599 errCode
= errDamaged
;
1607 XRefEntry
*XRef::getEntry(int i
, GBool complainIfMissing
)
1609 if (i
>= size
|| entries
[i
].type
== xrefEntryNone
) {
1611 if ((!xRefStream
) && mainXRefEntriesOffset
) {
1612 if (!parseEntry(mainXRefEntriesOffset
+ 20*i
, &entries
[i
])) {
1613 error(errSyntaxError
, -1, "Failed to parse XRef entry [{0:d}].", i
);
1616 // Read XRef tables until the entry we're looking for is found
1619 // We might have reconstructed the xref
1620 // Check again i is in bounds
1621 if (unlikely(i
>= size
)) {
1622 static XRefEntry dummy
;
1625 dummy
.type
= xrefEntryNone
;
1630 if (entries
[i
].type
== xrefEntryNone
) {
1631 if (complainIfMissing
) {
1632 error(errSyntaxError
, -1, "Invalid XRef entry");
1634 entries
[i
].type
= xrefEntryFree
;
1642 // Recursively sets the Unencrypted flag in all referenced xref entries
1643 void XRef::markUnencrypted(Object
*obj
) {
1646 switch (obj
->getType()) {
1649 Array
*array
= obj
->getArray();
1650 for (int i
= 0; i
< array
->getLength(); i
++) {
1651 markUnencrypted(array
->getNF(i
, &obj1
));
1660 if (obj
->getType() == objStream
) {
1661 Stream
*stream
= obj
->getStream();
1662 dict
= stream
->getDict();
1664 dict
= obj
->getDict();
1666 for (int i
= 0; i
< dict
->getLength(); i
++) {
1667 markUnencrypted(dict
->getValNF(i
, &obj1
));
1674 Ref ref
= obj
->getRef();
1675 XRefEntry
*e
= getEntry(ref
.num
);
1676 if (e
->getFlag(XRefEntry::Unencrypted
))
1677 return; // We've already been here: prevent infinite recursion
1678 e
->setFlag(XRefEntry::Unencrypted
, gTrue
);
1679 fetch(ref
.num
, ref
.gen
, &obj1
);
1680 markUnencrypted(&obj1
);
1689 void XRef::scanSpecialFlags() {
1690 if (scannedSpecialFlags
) {
1693 scannedSpecialFlags
= gTrue
;
1695 // "Rewind" the XRef linked list, so that readXRefUntil re-reads all XRef
1696 // tables/streams, even those that had already been parsed
1697 prevXRefOffset
= mainXRefOffset
;
1699 std::vector
<int> xrefStreamObjNums
;
1700 if (!streamEndsLen
) { // don't do it for already reconstructed xref
1701 readXRefUntil(-1 /* read all xref sections */, &xrefStreamObjNums
);
1704 // Mark object streams as DontRewrite, because we write each object
1705 // individually in full rewrite mode.
1706 for (int i
= 0; i
< size
; ++i
) {
1707 if (entries
[i
].type
== xrefEntryCompressed
) {
1708 const int objStmNum
= entries
[i
].offset
;
1709 if (unlikely(objStmNum
< 0 || objStmNum
>= size
)) {
1710 error(errSyntaxError
, -1, "Compressed object offset out of xref bounds");
1712 getEntry(objStmNum
)->setFlag(XRefEntry::DontRewrite
, gTrue
);
1717 // Mark XRef streams objects as Unencrypted and DontRewrite
1718 for (size_t i
= 0; i
< xrefStreamObjNums
.size(); ++i
) {
1719 const int objNum
= xrefStreamObjNums
.at(i
);
1720 getEntry(objNum
)->setFlag(XRefEntry::Unencrypted
, gTrue
);
1721 getEntry(objNum
)->setFlag(XRefEntry::DontRewrite
, gTrue
);
1724 // Mark objects referred from the Encrypt dict as Unencrypted
1726 markUnencrypted(trailerDict
.dictLookupNF("Encrypt", &obj
));
1730 void XRef::markUnencrypted() {
1731 // Mark objects referred from the Encrypt dict as Unencrypted
1733 trailerDict
.dictLookupNF("Encrypt", &obj
);
1735 XRefEntry
*e
= getEntry(obj
.getRefNum());
1736 e
->setFlag(XRefEntry::Unencrypted
, gTrue
);