Various Datatypes.
[AROS-Contrib.git] / arospdf / xpdf / XRef.cc
blobbf6c40d49ea00e647c99a1211ccc83a08d36de92
1 //========================================================================
2 //
3 // XRef.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
9 #include <aconf.h>
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
13 #endif
15 #include <stdlib.h>
16 #include <stddef.h>
17 #include <string.h>
18 #include <ctype.h>
19 #include "gmem.h"
20 #include "Object.h"
21 #include "Stream.h"
22 #include "Lexer.h"
23 #include "Parser.h"
24 #include "Dict.h"
25 #include "Error.h"
26 #include "ErrorCodes.h"
27 #include "XRef.h"
29 //------------------------------------------------------------------------
31 #define xrefSearchSize 1024 // read this many bytes at end of file
32 // to look for 'startxref'
34 //------------------------------------------------------------------------
35 // Permission bits
36 //------------------------------------------------------------------------
38 #define permPrint (1<<2)
39 #define permChange (1<<3)
40 #define permCopy (1<<4)
41 #define permNotes (1<<5)
42 #define defPermFlags 0xfffc
44 //------------------------------------------------------------------------
45 // ObjectStream
46 //------------------------------------------------------------------------
48 class ObjectStream {
49 public:
51 // Create an xObject stream, using xObject number <objStrNum>,
52 // generation 0.
53 ObjectStream(XRef *xref, int objStrNumA);
55 ~ObjectStream();
57 // Return the xObject number of this xObject stream.
58 int getObjStrNum() { return objStrNum; }
60 // Get the <objIdx>th xObject from this stream, which should be
61 // xObject number <objNum>, generation 0.
62 xObject *getObject(int objIdx, int objNum, xObject *obj);
64 private:
66 int objStrNum; // xObject number of the xObject stream
67 int nObjects; // number of xObjects in the stream
68 xObject *objs; // the xObjects (length = nObjects)
69 int *objNums; // the xObject numbers (length = nObjects)
72 ObjectStream::ObjectStream(XRef *xref, int objStrNumA) {
73 Stream *str;
74 Parser *parser;
75 int *offsets;
76 xObject objStr, obj1, obj2;
77 int first, i;
79 objStrNum = objStrNumA;
80 nObjects = 0;
81 objs = NULL;
82 objNums = NULL;
84 if (!xref->fetch(objStrNum, 0, &objStr)->isStream()) {
85 goto err1;
88 if (!objStr.streamGetDict()->lookup("N", &obj1)->isInt()) {
89 obj1.free();
90 goto err1;
92 nObjects = obj1.getInt();
93 obj1.free();
94 if (nObjects <= 0) {
95 goto err1;
98 if (!objStr.streamGetDict()->lookup("First", &obj1)->isInt()) {
99 obj1.free();
100 goto err1;
102 first = obj1.getInt();
103 obj1.free();
104 if (first < 0) {
105 goto err1;
108 objs = new xObject[nObjects];
109 objNums = (int *)gmallocn(nObjects, sizeof(int));
110 offsets = (int *)gmallocn(nObjects, sizeof(int));
112 // parse the header: xObject numbers and offsets
113 objStr.streamReset();
114 obj1.initNull();
115 str = new EmbedStream(objStr.getStream(), &obj1, gTrue, first);
116 parser = new Parser(xref, new Lexer(xref, str), gFalse);
117 for (i = 0; i < nObjects; ++i) {
118 parser->getObj(&obj1);
119 parser->getObj(&obj2);
120 if (!obj1.isInt() || !obj2.isInt()) {
121 obj1.free();
122 obj2.free();
123 delete parser;
124 gfree(offsets);
125 goto err1;
127 objNums[i] = obj1.getInt();
128 offsets[i] = obj2.getInt();
129 obj1.free();
130 obj2.free();
131 if (objNums[i] < 0 || offsets[i] < 0 ||
132 (i > 0 && offsets[i] < offsets[i-1])) {
133 delete parser;
134 gfree(offsets);
135 goto err1;
138 while (str->getChar() != EOF) ;
139 delete parser;
141 // skip to the first xObject - this shouldn't be necessary because
142 // the First key is supposed to be equal to offsets[0], but just in
143 // case...
144 for (i = first; i < offsets[0]; ++i) {
145 objStr.getStream()->getChar();
148 // parse the xObjects
149 for (i = 0; i < nObjects; ++i) {
150 obj1.initNull();
151 if (i == nObjects - 1) {
152 str = new EmbedStream(objStr.getStream(), &obj1, gFalse, 0);
153 } else {
154 str = new EmbedStream(objStr.getStream(), &obj1, gTrue,
155 offsets[i+1] - offsets[i]);
157 parser = new Parser(xref, new Lexer(xref, str), gFalse);
158 parser->getObj(&objs[i]);
159 while (str->getChar() != EOF) ;
160 delete parser;
163 gfree(offsets);
165 err1:
166 objStr.free();
167 return;
170 ObjectStream::~ObjectStream() {
171 int i;
173 if (objs) {
174 for (i = 0; i < nObjects; ++i) {
175 objs[i].free();
177 delete[] objs;
179 gfree(objNums);
182 xObject *ObjectStream::getObject(int objIdx, int objNum, xObject *obj) {
183 if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) {
184 return obj->initNull();
186 return objs[objIdx].copy(obj);
189 //------------------------------------------------------------------------
190 // XRef
191 //------------------------------------------------------------------------
193 XRef::XRef(BaseStream *strA) {
194 Guint pos;
195 xObject obj;
197 ok = gTrue;
198 errCode = errNone;
199 size = 0;
200 entries = NULL;
201 streamEnds = NULL;
202 streamEndsLen = 0;
203 objStr = NULL;
205 encrypted = gFalse;
206 permFlags = defPermFlags;
207 ownerPasswordOk = gFalse;
209 // read the trailer
210 str = strA;
211 start = str->getStart();
212 pos = getStartXref();
214 // if there was a problem with the 'startxref' position, try to
215 // reconstruct the xref table
216 if (pos == 0) {
217 if (!(ok = constructXRef())) {
218 errCode = errDamaged;
219 return;
222 // read the xref table
223 } else {
224 while (readXRef(&pos)) ;
226 // if there was a problem with the xref table,
227 // try to reconstruct it
228 if (!ok) {
229 if (!(ok = constructXRef())) {
230 errCode = errDamaged;
231 return;
236 // get the root dictionary (catalog) xObject
237 trailerDict.dictLookupNF("Root", &obj);
238 if (obj.isRef()) {
239 rootNum = obj.getRefNum();
240 rootGen = obj.getRefGen();
241 obj.free();
242 } else {
243 obj.free();
244 if (!(ok = constructXRef())) {
245 errCode = errDamaged;
246 return;
250 // now set the trailer dictionary's xref pointer so we can fetch
251 // indirect xObjects from it
252 trailerDict.getDict()->setXRef(this);
255 XRef::~XRef() {
256 gfree(entries);
257 trailerDict.free();
258 if (streamEnds) {
259 gfree(streamEnds);
261 if (objStr) {
262 delete objStr;
266 // Read the 'startxref' position.
267 Guint XRef::getStartXref() {
268 char buf[xrefSearchSize+1];
269 char *p;
270 int c, n, i;
272 // read last xrefSearchSize bytes
273 str->setPos(xrefSearchSize, -1);
274 for (n = 0; n < xrefSearchSize; ++n) {
275 if ((c = str->getChar()) == EOF) {
276 break;
278 buf[n] = c;
280 buf[n] = '\0';
282 // find startxref
283 for (i = n - 9; i >= 0; --i) {
284 if (!strncmp(&buf[i], "startxref", 9)) {
285 break;
288 if (i < 0) {
289 return 0;
291 for (p = &buf[i+9]; isspace(*p); ++p) ;
292 lastXRefPos = strToUnsigned(p);
294 return lastXRefPos;
297 // Read one xref table section. Also reads the associated trailer
298 // dictionary, and returns the prev pointer (if any).
299 GBool XRef::readXRef(Guint *pos) {
300 Parser *parser;
301 xObject obj;
302 GBool more;
304 // start up a parser, parse one token
305 obj.initNull();
306 parser = new Parser(NULL,
307 new Lexer(NULL,
308 str->makeSubStream(start + *pos, gFalse, 0, &obj)),
309 gTrue);
310 parser->getObj(&obj);
312 // parse an old-style xref table
313 if (obj.isCmd("xref")) {
314 obj.free();
315 more = readXRefTable(parser, pos);
317 // parse an xref stream
318 } else if (obj.isInt()) {
319 obj.free();
320 if (!parser->getObj(&obj)->isInt()) {
321 goto err1;
323 obj.free();
324 if (!parser->getObj(&obj)->isCmd("obj")) {
325 goto err1;
327 obj.free();
328 if (!parser->getObj(&obj)->isStream()) {
329 goto err1;
331 more = readXRefStream(obj.getStream(), pos);
332 obj.free();
334 } else {
335 goto err1;
338 delete parser;
339 return more;
341 err1:
342 obj.free();
343 delete parser;
344 ok = gFalse;
345 return gFalse;
348 GBool XRef::readXRefTable(Parser *parser, Guint *pos) {
349 XRefEntry entry;
350 GBool more;
351 xObject obj, obj2;
352 Guint pos2;
353 int first, n, newSize, i;
355 while (1) {
356 parser->getObj(&obj);
357 if (obj.isCmd("trailer")) {
358 obj.free();
359 break;
361 if (!obj.isInt()) {
362 goto err1;
364 first = obj.getInt();
365 obj.free();
366 if (!parser->getObj(&obj)->isInt()) {
367 goto err1;
369 n = obj.getInt();
370 obj.free();
371 if (first < 0 || n < 0 || first + n < 0) {
372 goto err1;
374 if (first + n > size) {
375 for (newSize = size ? 2 * size : 1024;
376 first + n > newSize && newSize > 0;
377 newSize <<= 1) ;
378 if (newSize < 0) {
379 goto err1;
381 entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
382 for (i = size; i < newSize; ++i) {
383 entries[i].offset = 0xffffffff;
384 entries[i].type = xrefEntryFree;
386 size = newSize;
388 for (i = first; i < first + n; ++i) {
389 if (!parser->getObj(&obj)->isInt()) {
390 goto err1;
392 entry.offset = (Guint)obj.getInt();
393 obj.free();
394 if (!parser->getObj(&obj)->isInt()) {
395 goto err1;
397 entry.gen = obj.getInt();
398 obj.free();
399 parser->getObj(&obj);
400 if (obj.isCmd("n")) {
401 entry.type = xrefEntryUncompressed;
402 } else if (obj.isCmd("f")) {
403 entry.type = xrefEntryFree;
404 } else {
405 goto err1;
407 obj.free();
408 if (entries[i].offset == 0xffffffff) {
409 entries[i] = entry;
410 // PDF files of patents from the IBM Intellectual Property
411 // Network have a bug: the xref table claims to start at 1
412 // instead of 0.
413 if (i == 1 && first == 1 &&
414 entries[1].offset == 0 && entries[1].gen == 65535 &&
415 entries[1].type == xrefEntryFree) {
416 i = first = 0;
417 entries[0] = entries[1];
418 entries[1].offset = 0xffffffff;
424 // read the trailer dictionary
425 if (!parser->getObj(&obj)->isDict()) {
426 goto err1;
429 // get the 'Prev' pointer
430 obj.getDict()->lookupNF("Prev", &obj2);
431 if (obj2.isInt()) {
432 *pos = (Guint)obj2.getInt();
433 more = gTrue;
434 } else if (obj2.isRef()) {
435 // certain buggy PDF generators generate "/Prev NNN 0 R" instead
436 // of "/Prev NNN"
437 *pos = (Guint)obj2.getRefNum();
438 more = gTrue;
439 } else {
440 more = gFalse;
442 obj2.free();
444 // save the first trailer dictionary
445 if (trailerDict.isNone()) {
446 obj.copy(&trailerDict);
449 // check for an 'XRefStm' key
450 if (obj.getDict()->lookup("XRefStm", &obj2)->isInt()) {
451 pos2 = (Guint)obj2.getInt();
452 readXRef(&pos2);
453 if (!ok) {
454 obj2.free();
455 goto err1;
458 obj2.free();
460 obj.free();
461 return more;
463 err1:
464 obj.free();
465 ok = gFalse;
466 return gFalse;
469 GBool XRef::readXRefStream(Stream *xrefStr, Guint *pos) {
470 Dict *dict;
471 int w[3];
472 GBool more;
473 xObject obj, obj2, idx;
474 int newSize, first, n, i;
476 dict = xrefStr->getDict();
478 if (!dict->lookupNF("Size", &obj)->isInt()) {
479 goto err1;
481 newSize = obj.getInt();
482 obj.free();
483 if (newSize < 0) {
484 goto err1;
486 if (newSize > size) {
487 entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
488 for (i = size; i < newSize; ++i) {
489 entries[i].offset = 0xffffffff;
490 entries[i].type = xrefEntryFree;
492 size = newSize;
495 if (!dict->lookupNF("W", &obj)->isArray() ||
496 obj.arrayGetLength() < 3) {
497 goto err1;
499 for (i = 0; i < 3; ++i) {
500 if (!obj.arrayGet(i, &obj2)->isInt()) {
501 obj2.free();
502 goto err1;
504 w[i] = obj2.getInt();
505 obj2.free();
506 if (w[i] < 0 || w[i] > 4) {
507 goto err1;
510 obj.free();
512 xrefStr->reset();
513 dict->lookupNF("Index", &idx);
514 if (idx.isArray()) {
515 for (i = 0; i+1 < idx.arrayGetLength(); i += 2) {
516 if (!idx.arrayGet(i, &obj)->isInt()) {
517 idx.free();
518 goto err1;
520 first = obj.getInt();
521 obj.free();
522 if (!idx.arrayGet(i+1, &obj)->isInt()) {
523 idx.free();
524 goto err1;
526 n = obj.getInt();
527 obj.free();
528 if (first < 0 || n < 0 ||
529 !readXRefStreamSection(xrefStr, w, first, n)) {
530 idx.free();
531 goto err0;
534 } else {
535 if (!readXRefStreamSection(xrefStr, w, 0, newSize)) {
536 idx.free();
537 goto err0;
540 idx.free();
542 dict->lookupNF("Prev", &obj);
543 if (obj.isInt()) {
544 *pos = (Guint)obj.getInt();
545 more = gTrue;
546 } else {
547 more = gFalse;
549 obj.free();
550 if (trailerDict.isNone()) {
551 trailerDict.initDict(dict);
554 return more;
556 err1:
557 obj.free();
558 err0:
559 ok = gFalse;
560 return gFalse;
563 GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) {
564 Guint offset;
565 int type, gen, c, newSize, i, j;
567 if (first + n < 0) {
568 return gFalse;
570 if (first + n > size) {
571 for (newSize = size ? 2 * size : 1024;
572 first + n > newSize && newSize > 0;
573 newSize <<= 1) ;
574 if (newSize < 0) {
575 return gFalse;
577 entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
578 for (i = size; i < newSize; ++i) {
579 entries[i].offset = 0xffffffff;
580 entries[i].type = xrefEntryFree;
582 size = newSize;
584 for (i = first; i < first + n; ++i) {
585 if (w[0] == 0) {
586 type = 1;
587 } else {
588 for (type = 0, j = 0; j < w[0]; ++j) {
589 if ((c = xrefStr->getChar()) == EOF) {
590 return gFalse;
592 type = (type << 8) + c;
595 for (offset = 0, j = 0; j < w[1]; ++j) {
596 if ((c = xrefStr->getChar()) == EOF) {
597 return gFalse;
599 offset = (offset << 8) + c;
601 for (gen = 0, j = 0; j < w[2]; ++j) {
602 if ((c = xrefStr->getChar()) == EOF) {
603 return gFalse;
605 gen = (gen << 8) + c;
607 if (entries[i].offset == 0xffffffff) {
608 switch (type) {
609 case 0:
610 entries[i].offset = offset;
611 entries[i].gen = gen;
612 entries[i].type = xrefEntryFree;
613 break;
614 case 1:
615 entries[i].offset = offset;
616 entries[i].gen = gen;
617 entries[i].type = xrefEntryUncompressed;
618 break;
619 case 2:
620 entries[i].offset = offset;
621 entries[i].gen = gen;
622 entries[i].type = xrefEntryCompressed;
623 break;
624 default:
625 return gFalse;
630 return gTrue;
633 // Attempt to construct an xref table for a damaged file.
634 GBool XRef::constructXRef() {
635 Parser *parser;
636 xObject newTrailerDict, obj;
637 char buf[256];
638 Guint pos;
639 int num, gen;
640 int newSize;
641 int streamEndsSize;
642 char *p;
643 int i;
644 GBool gotRoot;
646 gfree(entries);
647 size = 0;
648 entries = NULL;
650 error(-1, "PDF file is damaged - attempting to reconstruct xref table...");
651 gotRoot = gFalse;
652 streamEndsLen = streamEndsSize = 0;
654 str->reset();
655 while (1) {
656 pos = str->getPos();
657 if (!str->getLine(buf, 256)) {
658 break;
660 p = buf;
662 // skip whitespace
663 while (*p && Lexer::isSpace(*p & 0xff)) ++p;
665 // got trailer dictionary
666 if (!strncmp(p, "trailer", 7)) {
667 obj.initNull();
668 parser = new Parser(NULL,
669 new Lexer(NULL,
670 str->makeSubStream(pos + 7, gFalse, 0, &obj)),
671 gFalse);
672 parser->getObj(&newTrailerDict);
673 if (newTrailerDict.isDict()) {
674 newTrailerDict.dictLookupNF("Root", &obj);
675 if (obj.isRef()) {
676 rootNum = obj.getRefNum();
677 rootGen = obj.getRefGen();
678 if (!trailerDict.isNone()) {
679 trailerDict.free();
681 newTrailerDict.copy(&trailerDict);
682 gotRoot = gTrue;
684 obj.free();
686 newTrailerDict.free();
687 delete parser;
689 // look for xObject
690 } else if (isdigit(*p)) {
691 num = atoi(p);
692 if (num > 0) {
693 do {
694 ++p;
695 } while (*p && isdigit(*p));
696 if (isspace(*p)) {
697 do {
698 ++p;
699 } while (*p && isspace(*p));
700 if (isdigit(*p)) {
701 gen = atoi(p);
702 do {
703 ++p;
704 } while (*p && isdigit(*p));
705 if (isspace(*p)) {
706 do {
707 ++p;
708 } while (*p && isspace(*p));
709 if (!strncmp(p, "obj", 3)) {
710 if (num >= size) {
711 newSize = (num + 1 + 255) & ~255;
712 if (newSize < 0) {
713 error(-1, "Bad xObject number");
714 return gFalse;
716 entries = (XRefEntry *)
717 greallocn(entries, newSize, sizeof(XRefEntry));
718 for (i = size; i < newSize; ++i) {
719 entries[i].offset = 0xffffffff;
720 entries[i].type = xrefEntryFree;
722 size = newSize;
724 if (entries[num].type == xrefEntryFree ||
725 gen >= entries[num].gen) {
726 entries[num].offset = pos - start;
727 entries[num].gen = gen;
728 entries[num].type = xrefEntryUncompressed;
736 } else if (!strncmp(p, "endstream", 9)) {
737 if (streamEndsLen == streamEndsSize) {
738 streamEndsSize += 64;
739 streamEnds = (Guint *)greallocn(streamEnds,
740 streamEndsSize, sizeof(int));
742 streamEnds[streamEndsLen++] = pos;
746 if (gotRoot)
747 return gTrue;
749 error(-1, "Couldn't find trailer dictionary");
750 return gFalse;
753 void XRef::setEncryption(int permFlagsA, GBool ownerPasswordOkA,
754 Guchar *fileKeyA, int keyLengthA, int encVersionA,
755 CryptAlgorithm encAlgorithmA) {
756 int i;
758 encrypted = gTrue;
759 permFlags = permFlagsA;
760 ownerPasswordOk = ownerPasswordOkA;
761 if (keyLengthA <= 16) {
762 keyLength = keyLengthA;
763 } else {
764 keyLength = 16;
766 for (i = 0; i < keyLength; ++i) {
767 fileKey[i] = fileKeyA[i];
769 encVersion = encVersionA;
770 encAlgorithm = encAlgorithmA;
773 GBool XRef::okToPrint(GBool ignoreOwnerPW) {
774 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint);
777 GBool XRef::okToChange(GBool ignoreOwnerPW) {
778 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange);
781 GBool XRef::okToCopy(GBool ignoreOwnerPW) {
782 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy);
785 GBool XRef::okToAddNotes(GBool ignoreOwnerPW) {
786 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes);
789 xObject *XRef::fetch(int num, int gen, xObject *obj) {
790 XRefEntry *e;
791 Parser *parser;
792 xObject obj1, obj2, obj3;
794 // check for bogus ref - this can happen in corrupted PDF files
795 if (num < 0 || num >= size) {
796 goto err;
799 e = &entries[num];
800 switch (e->type) {
802 case xrefEntryUncompressed:
803 if (e->gen != gen) {
804 goto err;
806 obj1.initNull();
807 parser = new Parser(this,
808 new Lexer(this,
809 str->makeSubStream(start + e->offset, gFalse, 0, &obj1)),
810 gTrue);
811 parser->getObj(&obj1);
812 parser->getObj(&obj2);
813 parser->getObj(&obj3);
814 if (!obj1.isInt() || obj1.getInt() != num ||
815 !obj2.isInt() || obj2.getInt() != gen ||
816 !obj3.isCmd("obj")) {
817 obj1.free();
818 obj2.free();
819 obj3.free();
820 delete parser;
821 goto err;
823 parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL,
824 encAlgorithm, keyLength, num, gen);
825 obj1.free();
826 obj2.free();
827 obj3.free();
828 delete parser;
829 break;
831 case xrefEntryCompressed:
832 if (gen != 0) {
833 goto err;
835 if (!objStr || objStr->getObjStrNum() != (int)e->offset) {
836 if (objStr) {
837 delete objStr;
839 objStr = new ObjectStream(this, e->offset);
841 objStr->getObject(e->gen, num, obj);
842 break;
844 default:
845 goto err;
848 return obj;
850 err:
851 return obj->initNull();
854 xObject *XRef::getDocInfo(xObject *obj) {
855 return trailerDict.dictLookup("Info", obj);
858 // Added for the pdftex project.
859 xObject *XRef::getDocInfoNF(xObject *obj) {
860 return trailerDict.dictLookupNF("Info", obj);
863 GBool XRef::getStreamEnd(Guint streamStart, Guint *streamEnd) {
864 int a, b, m;
866 if (streamEndsLen == 0 ||
867 streamStart > streamEnds[streamEndsLen - 1]) {
868 return gFalse;
871 a = -1;
872 b = streamEndsLen - 1;
873 // invariant: streamEnds[a] < streamStart <= streamEnds[b]
874 while (b - a > 1) {
875 m = (a + b) / 2;
876 if (streamStart <= streamEnds[m]) {
877 b = m;
878 } else {
879 a = m;
882 *streamEnd = streamEnds[b];
883 return gTrue;
886 Guint XRef::strToUnsigned(char *s) {
887 Guint x;
888 char *p;
889 int i;
891 x = 0;
892 for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
893 x = 10 * x + (*p - '0');
895 return x;