beta-0.89.2
[luatex.git] / source / libs / poppler / poppler-src / poppler / Parser.cc
blob28a54607f5ce9b12ab73a6789af8da45b583bb2b
1 //========================================================================
2 //
3 // Parser.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
9 //========================================================================
11 // Modified under the Poppler project - http://poppler.freedesktop.org
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
16 // Copyright (C) 2006, 2009, 201, 2010, 2013, 2014 Albert Astals Cid <aacid@kde.org>
17 // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
18 // Copyright (C) 2009 Ilya Gorenbein <igorenbein@finjan.com>
19 // Copyright (C) 2012 Hib Eris <hib@hiberis.nl>
20 // Copyright (C) 2013 Adrian Johnson <ajohnson@redneon.com>
21 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
23 // To see a description of the changes please see the Changelog file that
24 // came with your tarball or type make ChangeLog if you are building from git
26 //========================================================================
28 #include <config.h>
30 #ifdef USE_GCC_PRAGMAS
31 #pragma implementation
32 #endif
34 #include <stddef.h>
35 #include "Object.h"
36 #include "Array.h"
37 #include "Dict.h"
38 #include "Decrypt.h"
39 #include "Parser.h"
40 #include "XRef.h"
41 #include "Error.h"
43 // Max number of nested objects. This is used to catch infinite loops
44 // in the object structure. And also technically valid files with
45 // lots of nested arrays that made us consume all the stack
46 #define recursionLimit 500
48 Parser::Parser(XRef *xrefA, Lexer *lexerA, GBool allowStreamsA) {
49 xref = xrefA;
50 lexer = lexerA;
51 inlineImg = 0;
52 allowStreams = allowStreamsA;
53 lexer->getObj(&buf1);
54 lexer->getObj(&buf2);
57 Parser::~Parser() {
58 buf1.free();
59 buf2.free();
60 delete lexer;
63 Object *Parser::getObj(Object *obj, int recursion)
65 return getObj(obj, gFalse, NULL, cryptRC4, 0, 0, 0, recursion);
68 Object *Parser::getObj(Object *obj, GBool simpleOnly,
69 Guchar *fileKey,
70 CryptAlgorithm encAlgorithm, int keyLength,
71 int objNum, int objGen, int recursion,
72 GBool strict) {
73 char *key;
74 Stream *str;
75 Object obj2;
76 int num;
77 DecryptStream *decrypt;
78 GooString *s, *s2;
79 int c;
81 // refill buffer after inline image data
82 if (inlineImg == 2) {
83 buf1.free();
84 buf2.free();
85 lexer->getObj(&buf1);
86 lexer->getObj(&buf2);
87 inlineImg = 0;
90 // array
91 if (!simpleOnly && likely(recursion < recursionLimit) && buf1.isCmd("[")) {
92 shift();
93 obj->initArray(xref);
94 while (!buf1.isCmd("]") && !buf1.isEOF())
95 obj->arrayAdd(getObj(&obj2, gFalse, fileKey, encAlgorithm, keyLength,
96 objNum, objGen, recursion + 1));
97 if (buf1.isEOF()) {
98 error(errSyntaxError, getPos(), "End of file inside array");
99 if (strict) goto err;
101 shift();
103 // dictionary or stream
104 } else if (!simpleOnly && likely(recursion < recursionLimit) && buf1.isCmd("<<")) {
105 shift(objNum);
106 obj->initDict(xref);
107 while (!buf1.isCmd(">>") && !buf1.isEOF()) {
108 if (!buf1.isName()) {
109 error(errSyntaxError, getPos(), "Dictionary key must be a name object");
110 if (strict) goto err;
111 shift();
112 } else {
113 // buf1 might go away in shift(), so construct the key
114 key = copyString(buf1.getName());
115 shift();
116 if (buf1.isEOF() || buf1.isError()) {
117 gfree(key);
118 if (strict && buf1.isError()) goto err;
119 break;
121 obj->dictAdd(key, getObj(&obj2, gFalse, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1));
124 if (buf1.isEOF()) {
125 error(errSyntaxError, getPos(), "End of file inside dictionary");
126 if (strict) goto err;
128 // stream objects are not allowed inside content streams or
129 // object streams
130 if (buf2.isCmd("stream")) {
131 if (allowStreams && (str = makeStream(obj, fileKey, encAlgorithm, keyLength,
132 objNum, objGen, recursion + 1,
133 strict))) {
134 obj->initStream(str);
135 } else {
136 obj->free();
137 obj->initError();
139 } else {
140 shift();
143 // indirect reference or integer
144 } else if (buf1.isInt()) {
145 num = buf1.getInt();
146 shift();
147 if (buf1.isInt() && buf2.isCmd("R")) {
148 obj->initRef(num, buf1.getInt());
149 shift();
150 shift();
151 } else {
152 obj->initInt(num);
155 // string
156 } else if (buf1.isString() && fileKey) {
157 s = buf1.getString();
158 s2 = new GooString();
159 obj2.initNull();
160 decrypt = new DecryptStream(new MemStream(s->getCString(), 0,
161 s->getLength(), &obj2),
162 fileKey, encAlgorithm, keyLength,
163 objNum, objGen);
164 decrypt->reset();
165 while ((c = decrypt->getChar()) != EOF) {
166 s2->append((char)c);
168 delete decrypt;
169 obj->initString(s2);
170 shift();
172 // simple object
173 } else {
174 // avoid re-allocating memory for complex objects like strings by
175 // shallow copy of <buf1> to <obj> and nulling <buf1> so that
176 // subsequent buf1.free() won't free this memory
177 buf1.shallowCopy(obj);
178 buf1.initNull();
179 shift();
182 return obj;
184 err:
185 obj->free();
186 obj->initError();
187 return obj;
191 Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
192 CryptAlgorithm encAlgorithm, int keyLength,
193 int objNum, int objGen, int recursion,
194 GBool strict) {
195 Object obj;
196 BaseStream *baseStr;
197 Stream *str;
198 Goffset length;
199 Goffset pos, endPos;
201 // get stream start position
202 lexer->skipToNextLine();
203 if (!(str = lexer->getStream())) {
204 return NULL;
206 pos = str->getPos();
208 // get length
209 dict->dictLookup("Length", &obj, recursion);
210 if (obj.isInt()) {
211 length = obj.getInt();
212 obj.free();
213 } else if (obj.isInt64()) {
214 length = obj.getInt64();
215 obj.free();
216 } else {
217 error(errSyntaxError, getPos(), "Bad 'Length' attribute in stream");
218 obj.free();
219 if (strict) return NULL;
220 length = 0;
223 // check for length in damaged file
224 if (xref && xref->getStreamEnd(pos, &endPos)) {
225 length = endPos - pos;
228 // in badly damaged PDF files, we can run off the end of the input
229 // stream immediately after the "stream" token
230 if (!lexer->getStream()) {
231 return NULL;
233 baseStr = lexer->getStream()->getBaseStream();
235 // skip over stream data
236 if (Lexer::LOOK_VALUE_NOT_CACHED != lexer->lookCharLastValueCached) {
237 // take into account the fact that we've cached one value
238 pos = pos - 1;
239 lexer->lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED;
241 lexer->setPos(pos + length);
243 // refill token buffers and check for 'endstream'
244 shift(); // kill '>>'
245 shift("endstream", objNum); // kill 'stream'
246 if (buf1.isCmd("endstream")) {
247 shift();
248 } else {
249 error(errSyntaxError, getPos(), "Missing 'endstream' or incorrect stream length");
250 if (strict) return NULL;
251 if (xref && lexer->getStream()) {
252 // shift until we find the proper endstream or we change to another object or reach eof
253 length = lexer->getPos() - pos;
254 if (buf1.isCmd("endstream")) {
255 obj.initInt64(length);
256 dict->dictSet("Length", &obj);
257 obj.free();
259 } else {
260 // When building the xref we can't use it so use this
261 // kludge for broken PDF files: just add 5k to the length, and
262 // hope its enough
263 length += 5000;
267 // make base stream
268 str = baseStr->makeSubStream(pos, gTrue, length, dict);
270 // handle decryption
271 if (fileKey) {
272 str = new DecryptStream(str, fileKey, encAlgorithm, keyLength,
273 objNum, objGen);
276 // get filters
277 str = str->addFilters(dict, recursion);
279 return str;
282 void Parser::shift(int objNum) {
283 if (inlineImg > 0) {
284 if (inlineImg < 2) {
285 ++inlineImg;
286 } else {
287 // in a damaged content stream, if 'ID' shows up in the middle
288 // of a dictionary, we need to reset
289 inlineImg = 0;
291 } else if (buf2.isCmd("ID")) {
292 lexer->skipChar(); // skip char after 'ID' command
293 inlineImg = 1;
295 buf1.free();
296 buf2.shallowCopy(&buf1);
297 if (inlineImg > 0) // don't buffer inline image data
298 buf2.initNull();
299 else
300 lexer->getObj(&buf2, objNum);
303 void Parser::shift(const char *cmdA, int objNum) {
304 if (inlineImg > 0) {
305 if (inlineImg < 2) {
306 ++inlineImg;
307 } else {
308 // in a damaged content stream, if 'ID' shows up in the middle
309 // of a dictionary, we need to reset
310 inlineImg = 0;
312 } else if (buf2.isCmd("ID")) {
313 lexer->skipChar(); // skip char after 'ID' command
314 inlineImg = 1;
316 buf1.free();
317 buf2.shallowCopy(&buf1);
318 if (inlineImg > 0) {
319 buf2.initNull();
320 } else if (buf1.isCmd(cmdA)) {
321 lexer->getObj(&buf2, objNum);
322 } else {
323 lexer->getObj(&buf2, cmdA, objNum);