1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
26 #include "ErrorCodes.h"
29 //------------------------------------------------------------------------
31 #define xrefSearchSize 1024 // read this many bytes at end of file
32 // to look for 'startxref'
34 //------------------------------------------------------------------------
36 //------------------------------------------------------------------------
38 #define permPrint (1<<2)
39 #define permChange (1<<3)
40 #define permCopy (1<<4)
41 #define permNotes (1<<5)
42 #define defPermFlags 0xfffc
44 //------------------------------------------------------------------------
46 //------------------------------------------------------------------------
51 // Create an xObject stream, using xObject number <objStrNum>,
53 ObjectStream(XRef
*xref
, int objStrNumA
);
57 // Return the xObject number of this xObject stream.
58 int getObjStrNum() { return objStrNum
; }
60 // Get the <objIdx>th xObject from this stream, which should be
61 // xObject number <objNum>, generation 0.
62 xObject
*getObject(int objIdx
, int objNum
, xObject
*obj
);
66 int objStrNum
; // xObject number of the xObject stream
67 int nObjects
; // number of xObjects in the stream
68 xObject
*objs
; // the xObjects (length = nObjects)
69 int *objNums
; // the xObject numbers (length = nObjects)
72 ObjectStream::ObjectStream(XRef
*xref
, int objStrNumA
) {
76 xObject objStr
, obj1
, obj2
;
79 objStrNum
= objStrNumA
;
84 if (!xref
->fetch(objStrNum
, 0, &objStr
)->isStream()) {
88 if (!objStr
.streamGetDict()->lookup("N", &obj1
)->isInt()) {
92 nObjects
= obj1
.getInt();
98 if (!objStr
.streamGetDict()->lookup("First", &obj1
)->isInt()) {
102 first
= obj1
.getInt();
108 objs
= new xObject
[nObjects
];
109 objNums
= (int *)gmallocn(nObjects
, sizeof(int));
110 offsets
= (int *)gmallocn(nObjects
, sizeof(int));
112 // parse the header: xObject numbers and offsets
113 objStr
.streamReset();
115 str
= new EmbedStream(objStr
.getStream(), &obj1
, gTrue
, first
);
116 parser
= new Parser(xref
, new Lexer(xref
, str
), gFalse
);
117 for (i
= 0; i
< nObjects
; ++i
) {
118 parser
->getObj(&obj1
);
119 parser
->getObj(&obj2
);
120 if (!obj1
.isInt() || !obj2
.isInt()) {
127 objNums
[i
] = obj1
.getInt();
128 offsets
[i
] = obj2
.getInt();
131 if (objNums
[i
] < 0 || offsets
[i
] < 0 ||
132 (i
> 0 && offsets
[i
] < offsets
[i
-1])) {
138 while (str
->getChar() != EOF
) ;
141 // skip to the first xObject - this shouldn't be necessary because
142 // the First key is supposed to be equal to offsets[0], but just in
144 for (i
= first
; i
< offsets
[0]; ++i
) {
145 objStr
.getStream()->getChar();
148 // parse the xObjects
149 for (i
= 0; i
< nObjects
; ++i
) {
151 if (i
== nObjects
- 1) {
152 str
= new EmbedStream(objStr
.getStream(), &obj1
, gFalse
, 0);
154 str
= new EmbedStream(objStr
.getStream(), &obj1
, gTrue
,
155 offsets
[i
+1] - offsets
[i
]);
157 parser
= new Parser(xref
, new Lexer(xref
, str
), gFalse
);
158 parser
->getObj(&objs
[i
]);
159 while (str
->getChar() != EOF
) ;
170 ObjectStream::~ObjectStream() {
174 for (i
= 0; i
< nObjects
; ++i
) {
182 xObject
*ObjectStream::getObject(int objIdx
, int objNum
, xObject
*obj
) {
183 if (objIdx
< 0 || objIdx
>= nObjects
|| objNum
!= objNums
[objIdx
]) {
184 return obj
->initNull();
186 return objs
[objIdx
].copy(obj
);
189 //------------------------------------------------------------------------
191 //------------------------------------------------------------------------
193 XRef::XRef(BaseStream
*strA
) {
206 permFlags
= defPermFlags
;
207 ownerPasswordOk
= gFalse
;
211 start
= str
->getStart();
212 pos
= getStartXref();
214 // if there was a problem with the 'startxref' position, try to
215 // reconstruct the xref table
217 if (!(ok
= constructXRef())) {
218 errCode
= errDamaged
;
222 // read the xref table
224 while (readXRef(&pos
)) ;
226 // if there was a problem with the xref table,
227 // try to reconstruct it
229 if (!(ok
= constructXRef())) {
230 errCode
= errDamaged
;
236 // get the root dictionary (catalog) xObject
237 trailerDict
.dictLookupNF("Root", &obj
);
239 rootNum
= obj
.getRefNum();
240 rootGen
= obj
.getRefGen();
244 if (!(ok
= constructXRef())) {
245 errCode
= errDamaged
;
250 // now set the trailer dictionary's xref pointer so we can fetch
251 // indirect xObjects from it
252 trailerDict
.getDict()->setXRef(this);
266 // Read the 'startxref' position.
267 Guint
XRef::getStartXref() {
268 char buf
[xrefSearchSize
+1];
272 // read last xrefSearchSize bytes
273 str
->setPos(xrefSearchSize
, -1);
274 for (n
= 0; n
< xrefSearchSize
; ++n
) {
275 if ((c
= str
->getChar()) == EOF
) {
283 for (i
= n
- 9; i
>= 0; --i
) {
284 if (!strncmp(&buf
[i
], "startxref", 9)) {
291 for (p
= &buf
[i
+9]; isspace(*p
); ++p
) ;
292 lastXRefPos
= strToUnsigned(p
);
297 // Read one xref table section. Also reads the associated trailer
298 // dictionary, and returns the prev pointer (if any).
299 GBool
XRef::readXRef(Guint
*pos
) {
304 // start up a parser, parse one token
306 parser
= new Parser(NULL
,
308 str
->makeSubStream(start
+ *pos
, gFalse
, 0, &obj
)),
310 parser
->getObj(&obj
);
312 // parse an old-style xref table
313 if (obj
.isCmd("xref")) {
315 more
= readXRefTable(parser
, pos
);
317 // parse an xref stream
318 } else if (obj
.isInt()) {
320 if (!parser
->getObj(&obj
)->isInt()) {
324 if (!parser
->getObj(&obj
)->isCmd("obj")) {
328 if (!parser
->getObj(&obj
)->isStream()) {
331 more
= readXRefStream(obj
.getStream(), pos
);
348 GBool
XRef::readXRefTable(Parser
*parser
, Guint
*pos
) {
353 int first
, n
, newSize
, i
;
356 parser
->getObj(&obj
);
357 if (obj
.isCmd("trailer")) {
364 first
= obj
.getInt();
366 if (!parser
->getObj(&obj
)->isInt()) {
371 if (first
< 0 || n
< 0 || first
+ n
< 0) {
374 if (first
+ n
> size
) {
375 for (newSize
= size
? 2 * size
: 1024;
376 first
+ n
> newSize
&& newSize
> 0;
381 entries
= (XRefEntry
*)greallocn(entries
, newSize
, sizeof(XRefEntry
));
382 for (i
= size
; i
< newSize
; ++i
) {
383 entries
[i
].offset
= 0xffffffff;
384 entries
[i
].type
= xrefEntryFree
;
388 for (i
= first
; i
< first
+ n
; ++i
) {
389 if (!parser
->getObj(&obj
)->isInt()) {
392 entry
.offset
= (Guint
)obj
.getInt();
394 if (!parser
->getObj(&obj
)->isInt()) {
397 entry
.gen
= obj
.getInt();
399 parser
->getObj(&obj
);
400 if (obj
.isCmd("n")) {
401 entry
.type
= xrefEntryUncompressed
;
402 } else if (obj
.isCmd("f")) {
403 entry
.type
= xrefEntryFree
;
408 if (entries
[i
].offset
== 0xffffffff) {
410 // PDF files of patents from the IBM Intellectual Property
411 // Network have a bug: the xref table claims to start at 1
413 if (i
== 1 && first
== 1 &&
414 entries
[1].offset
== 0 && entries
[1].gen
== 65535 &&
415 entries
[1].type
== xrefEntryFree
) {
417 entries
[0] = entries
[1];
418 entries
[1].offset
= 0xffffffff;
424 // read the trailer dictionary
425 if (!parser
->getObj(&obj
)->isDict()) {
429 // get the 'Prev' pointer
430 obj
.getDict()->lookupNF("Prev", &obj2
);
432 *pos
= (Guint
)obj2
.getInt();
434 } else if (obj2
.isRef()) {
435 // certain buggy PDF generators generate "/Prev NNN 0 R" instead
437 *pos
= (Guint
)obj2
.getRefNum();
444 // save the first trailer dictionary
445 if (trailerDict
.isNone()) {
446 obj
.copy(&trailerDict
);
449 // check for an 'XRefStm' key
450 if (obj
.getDict()->lookup("XRefStm", &obj2
)->isInt()) {
451 pos2
= (Guint
)obj2
.getInt();
469 GBool
XRef::readXRefStream(Stream
*xrefStr
, Guint
*pos
) {
473 xObject obj
, obj2
, idx
;
474 int newSize
, first
, n
, i
;
476 dict
= xrefStr
->getDict();
478 if (!dict
->lookupNF("Size", &obj
)->isInt()) {
481 newSize
= obj
.getInt();
486 if (newSize
> size
) {
487 entries
= (XRefEntry
*)greallocn(entries
, newSize
, sizeof(XRefEntry
));
488 for (i
= size
; i
< newSize
; ++i
) {
489 entries
[i
].offset
= 0xffffffff;
490 entries
[i
].type
= xrefEntryFree
;
495 if (!dict
->lookupNF("W", &obj
)->isArray() ||
496 obj
.arrayGetLength() < 3) {
499 for (i
= 0; i
< 3; ++i
) {
500 if (!obj
.arrayGet(i
, &obj2
)->isInt()) {
504 w
[i
] = obj2
.getInt();
506 if (w
[i
] < 0 || w
[i
] > 4) {
513 dict
->lookupNF("Index", &idx
);
515 for (i
= 0; i
+1 < idx
.arrayGetLength(); i
+= 2) {
516 if (!idx
.arrayGet(i
, &obj
)->isInt()) {
520 first
= obj
.getInt();
522 if (!idx
.arrayGet(i
+1, &obj
)->isInt()) {
528 if (first
< 0 || n
< 0 ||
529 !readXRefStreamSection(xrefStr
, w
, first
, n
)) {
535 if (!readXRefStreamSection(xrefStr
, w
, 0, newSize
)) {
542 dict
->lookupNF("Prev", &obj
);
544 *pos
= (Guint
)obj
.getInt();
550 if (trailerDict
.isNone()) {
551 trailerDict
.initDict(dict
);
563 GBool
XRef::readXRefStreamSection(Stream
*xrefStr
, int *w
, int first
, int n
) {
565 int type
, gen
, c
, newSize
, i
, j
;
570 if (first
+ n
> size
) {
571 for (newSize
= size
? 2 * size
: 1024;
572 first
+ n
> newSize
&& newSize
> 0;
577 entries
= (XRefEntry
*)greallocn(entries
, newSize
, sizeof(XRefEntry
));
578 for (i
= size
; i
< newSize
; ++i
) {
579 entries
[i
].offset
= 0xffffffff;
580 entries
[i
].type
= xrefEntryFree
;
584 for (i
= first
; i
< first
+ n
; ++i
) {
588 for (type
= 0, j
= 0; j
< w
[0]; ++j
) {
589 if ((c
= xrefStr
->getChar()) == EOF
) {
592 type
= (type
<< 8) + c
;
595 for (offset
= 0, j
= 0; j
< w
[1]; ++j
) {
596 if ((c
= xrefStr
->getChar()) == EOF
) {
599 offset
= (offset
<< 8) + c
;
601 for (gen
= 0, j
= 0; j
< w
[2]; ++j
) {
602 if ((c
= xrefStr
->getChar()) == EOF
) {
605 gen
= (gen
<< 8) + c
;
607 if (entries
[i
].offset
== 0xffffffff) {
610 entries
[i
].offset
= offset
;
611 entries
[i
].gen
= gen
;
612 entries
[i
].type
= xrefEntryFree
;
615 entries
[i
].offset
= offset
;
616 entries
[i
].gen
= gen
;
617 entries
[i
].type
= xrefEntryUncompressed
;
620 entries
[i
].offset
= offset
;
621 entries
[i
].gen
= gen
;
622 entries
[i
].type
= xrefEntryCompressed
;
633 // Attempt to construct an xref table for a damaged file.
634 GBool
XRef::constructXRef() {
636 xObject newTrailerDict
, obj
;
650 error(-1, "PDF file is damaged - attempting to reconstruct xref table...");
652 streamEndsLen
= streamEndsSize
= 0;
657 if (!str
->getLine(buf
, 256)) {
663 while (*p
&& Lexer::isSpace(*p
& 0xff)) ++p
;
665 // got trailer dictionary
666 if (!strncmp(p
, "trailer", 7)) {
668 parser
= new Parser(NULL
,
670 str
->makeSubStream(pos
+ 7, gFalse
, 0, &obj
)),
672 parser
->getObj(&newTrailerDict
);
673 if (newTrailerDict
.isDict()) {
674 newTrailerDict
.dictLookupNF("Root", &obj
);
676 rootNum
= obj
.getRefNum();
677 rootGen
= obj
.getRefGen();
678 if (!trailerDict
.isNone()) {
681 newTrailerDict
.copy(&trailerDict
);
686 newTrailerDict
.free();
690 } else if (isdigit(*p
)) {
695 } while (*p
&& isdigit(*p
));
699 } while (*p
&& isspace(*p
));
704 } while (*p
&& isdigit(*p
));
708 } while (*p
&& isspace(*p
));
709 if (!strncmp(p
, "obj", 3)) {
711 newSize
= (num
+ 1 + 255) & ~255;
713 error(-1, "Bad xObject number");
716 entries
= (XRefEntry
*)
717 greallocn(entries
, newSize
, sizeof(XRefEntry
));
718 for (i
= size
; i
< newSize
; ++i
) {
719 entries
[i
].offset
= 0xffffffff;
720 entries
[i
].type
= xrefEntryFree
;
724 if (entries
[num
].type
== xrefEntryFree
||
725 gen
>= entries
[num
].gen
) {
726 entries
[num
].offset
= pos
- start
;
727 entries
[num
].gen
= gen
;
728 entries
[num
].type
= xrefEntryUncompressed
;
736 } else if (!strncmp(p
, "endstream", 9)) {
737 if (streamEndsLen
== streamEndsSize
) {
738 streamEndsSize
+= 64;
739 streamEnds
= (Guint
*)greallocn(streamEnds
,
740 streamEndsSize
, sizeof(int));
742 streamEnds
[streamEndsLen
++] = pos
;
749 error(-1, "Couldn't find trailer dictionary");
753 void XRef::setEncryption(int permFlagsA
, GBool ownerPasswordOkA
,
754 Guchar
*fileKeyA
, int keyLengthA
, int encVersionA
,
755 CryptAlgorithm encAlgorithmA
) {
759 permFlags
= permFlagsA
;
760 ownerPasswordOk
= ownerPasswordOkA
;
761 if (keyLengthA
<= 16) {
762 keyLength
= keyLengthA
;
766 for (i
= 0; i
< keyLength
; ++i
) {
767 fileKey
[i
] = fileKeyA
[i
];
769 encVersion
= encVersionA
;
770 encAlgorithm
= encAlgorithmA
;
773 GBool
XRef::okToPrint(GBool ignoreOwnerPW
) {
774 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permPrint
);
777 GBool
XRef::okToChange(GBool ignoreOwnerPW
) {
778 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permChange
);
781 GBool
XRef::okToCopy(GBool ignoreOwnerPW
) {
782 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permCopy
);
785 GBool
XRef::okToAddNotes(GBool ignoreOwnerPW
) {
786 return (!ignoreOwnerPW
&& ownerPasswordOk
) || (permFlags
& permNotes
);
789 xObject
*XRef::fetch(int num
, int gen
, xObject
*obj
) {
792 xObject obj1
, obj2
, obj3
;
794 // check for bogus ref - this can happen in corrupted PDF files
795 if (num
< 0 || num
>= size
) {
802 case xrefEntryUncompressed
:
807 parser
= new Parser(this,
809 str
->makeSubStream(start
+ e
->offset
, gFalse
, 0, &obj1
)),
811 parser
->getObj(&obj1
);
812 parser
->getObj(&obj2
);
813 parser
->getObj(&obj3
);
814 if (!obj1
.isInt() || obj1
.getInt() != num
||
815 !obj2
.isInt() || obj2
.getInt() != gen
||
816 !obj3
.isCmd("obj")) {
823 parser
->getObj(obj
, encrypted
? fileKey
: (Guchar
*)NULL
,
824 encAlgorithm
, keyLength
, num
, gen
);
831 case xrefEntryCompressed
:
835 if (!objStr
|| objStr
->getObjStrNum() != (int)e
->offset
) {
839 objStr
= new ObjectStream(this, e
->offset
);
841 objStr
->getObject(e
->gen
, num
, obj
);
851 return obj
->initNull();
854 xObject
*XRef::getDocInfo(xObject
*obj
) {
855 return trailerDict
.dictLookup("Info", obj
);
858 // Added for the pdftex project.
859 xObject
*XRef::getDocInfoNF(xObject
*obj
) {
860 return trailerDict
.dictLookupNF("Info", obj
);
863 GBool
XRef::getStreamEnd(Guint streamStart
, Guint
*streamEnd
) {
866 if (streamEndsLen
== 0 ||
867 streamStart
> streamEnds
[streamEndsLen
- 1]) {
872 b
= streamEndsLen
- 1;
873 // invariant: streamEnds[a] < streamStart <= streamEnds[b]
876 if (streamStart
<= streamEnds
[m
]) {
882 *streamEnd
= streamEnds
[b
];
886 Guint
XRef::strToUnsigned(char *s
) {
892 for (p
= s
, i
= 0; *p
&& isdigit(*p
) && i
< 10; ++p
, ++i
) {
893 x
= 10 * x
+ (*p
- '0');