1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
9 //========================================================================
11 // Modified under the Poppler project - http://poppler.freedesktop.org
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
16 // Copyright (C) 2006, 2009, 201, 2010, 2013, 2014 Albert Astals Cid <aacid@kde.org>
17 // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
18 // Copyright (C) 2009 Ilya Gorenbein <igorenbein@finjan.com>
19 // Copyright (C) 2012 Hib Eris <hib@hiberis.nl>
20 // Copyright (C) 2013 Adrian Johnson <ajohnson@redneon.com>
21 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
23 // To see a description of the changes please see the Changelog file that
24 // came with your tarball or type make ChangeLog if you are building from git
26 //========================================================================
30 #ifdef USE_GCC_PRAGMAS
31 #pragma implementation
43 // Max number of nested objects. This is used to catch infinite loops
44 // in the object structure. And also technically valid files with
45 // lots of nested arrays that made us consume all the stack
46 #define recursionLimit 500
48 Parser::Parser(XRef
*xrefA
, Lexer
*lexerA
, GBool allowStreamsA
) {
52 allowStreams
= allowStreamsA
;
63 Object
*Parser::getObj(Object
*obj
, int recursion
)
65 return getObj(obj
, gFalse
, NULL
, cryptRC4
, 0, 0, 0, recursion
);
68 Object
*Parser::getObj(Object
*obj
, GBool simpleOnly
,
70 CryptAlgorithm encAlgorithm
, int keyLength
,
71 int objNum
, int objGen
, int recursion
,
77 DecryptStream
*decrypt
;
81 // refill buffer after inline image data
91 if (!simpleOnly
&& likely(recursion
< recursionLimit
) && buf1
.isCmd("[")) {
94 while (!buf1
.isCmd("]") && !buf1
.isEOF())
95 obj
->arrayAdd(getObj(&obj2
, gFalse
, fileKey
, encAlgorithm
, keyLength
,
96 objNum
, objGen
, recursion
+ 1));
98 error(errSyntaxError
, getPos(), "End of file inside array");
103 // dictionary or stream
104 } else if (!simpleOnly
&& likely(recursion
< recursionLimit
) && buf1
.isCmd("<<")) {
107 while (!buf1
.isCmd(">>") && !buf1
.isEOF()) {
108 if (!buf1
.isName()) {
109 error(errSyntaxError
, getPos(), "Dictionary key must be a name object");
110 if (strict
) goto err
;
113 // buf1 might go away in shift(), so construct the key
114 key
= copyString(buf1
.getName());
116 if (buf1
.isEOF() || buf1
.isError()) {
118 if (strict
&& buf1
.isError()) goto err
;
121 obj
->dictAdd(key
, getObj(&obj2
, gFalse
, fileKey
, encAlgorithm
, keyLength
, objNum
, objGen
, recursion
+ 1));
125 error(errSyntaxError
, getPos(), "End of file inside dictionary");
126 if (strict
) goto err
;
128 // stream objects are not allowed inside content streams or
130 if (buf2
.isCmd("stream")) {
131 if (allowStreams
&& (str
= makeStream(obj
, fileKey
, encAlgorithm
, keyLength
,
132 objNum
, objGen
, recursion
+ 1,
134 obj
->initStream(str
);
143 // indirect reference or integer
144 } else if (buf1
.isInt()) {
147 if (buf1
.isInt() && buf2
.isCmd("R")) {
148 obj
->initRef(num
, buf1
.getInt());
156 } else if (buf1
.isString() && fileKey
) {
157 s
= buf1
.getString();
158 s2
= new GooString();
160 decrypt
= new DecryptStream(new MemStream(s
->getCString(), 0,
161 s
->getLength(), &obj2
),
162 fileKey
, encAlgorithm
, keyLength
,
165 while ((c
= decrypt
->getChar()) != EOF
) {
174 // avoid re-allocating memory for complex objects like strings by
175 // shallow copy of <buf1> to <obj> and nulling <buf1> so that
176 // subsequent buf1.free() won't free this memory
177 buf1
.shallowCopy(obj
);
191 Stream
*Parser::makeStream(Object
*dict
, Guchar
*fileKey
,
192 CryptAlgorithm encAlgorithm
, int keyLength
,
193 int objNum
, int objGen
, int recursion
,
201 // get stream start position
202 lexer
->skipToNextLine();
203 if (!(str
= lexer
->getStream())) {
209 dict
->dictLookup("Length", &obj
, recursion
);
211 length
= obj
.getInt();
213 } else if (obj
.isInt64()) {
214 length
= obj
.getInt64();
217 error(errSyntaxError
, getPos(), "Bad 'Length' attribute in stream");
219 if (strict
) return NULL
;
223 // check for length in damaged file
224 if (xref
&& xref
->getStreamEnd(pos
, &endPos
)) {
225 length
= endPos
- pos
;
228 // in badly damaged PDF files, we can run off the end of the input
229 // stream immediately after the "stream" token
230 if (!lexer
->getStream()) {
233 baseStr
= lexer
->getStream()->getBaseStream();
235 // skip over stream data
236 if (Lexer::LOOK_VALUE_NOT_CACHED
!= lexer
->lookCharLastValueCached
) {
237 // take into account the fact that we've cached one value
239 lexer
->lookCharLastValueCached
= Lexer::LOOK_VALUE_NOT_CACHED
;
241 lexer
->setPos(pos
+ length
);
243 // refill token buffers and check for 'endstream'
244 shift(); // kill '>>'
245 shift("endstream", objNum
); // kill 'stream'
246 if (buf1
.isCmd("endstream")) {
249 error(errSyntaxError
, getPos(), "Missing 'endstream' or incorrect stream length");
250 if (strict
) return NULL
;
251 if (xref
&& lexer
->getStream()) {
252 // shift until we find the proper endstream or we change to another object or reach eof
253 length
= lexer
->getPos() - pos
;
254 if (buf1
.isCmd("endstream")) {
255 obj
.initInt64(length
);
256 dict
->dictSet("Length", &obj
);
260 // When building the xref we can't use it so use this
261 // kludge for broken PDF files: just add 5k to the length, and
268 str
= baseStr
->makeSubStream(pos
, gTrue
, length
, dict
);
272 str
= new DecryptStream(str
, fileKey
, encAlgorithm
, keyLength
,
277 str
= str
->addFilters(dict
, recursion
);
282 void Parser::shift(int objNum
) {
287 // in a damaged content stream, if 'ID' shows up in the middle
288 // of a dictionary, we need to reset
291 } else if (buf2
.isCmd("ID")) {
292 lexer
->skipChar(); // skip char after 'ID' command
296 buf2
.shallowCopy(&buf1
);
297 if (inlineImg
> 0) // don't buffer inline image data
300 lexer
->getObj(&buf2
, objNum
);
303 void Parser::shift(const char *cmdA
, int objNum
) {
308 // in a damaged content stream, if 'ID' shows up in the middle
309 // of a dictionary, we need to reset
312 } else if (buf2
.isCmd("ID")) {
313 lexer
->skipChar(); // skip char after 'ID' command
317 buf2
.shallowCopy(&buf1
);
320 } else if (buf1
.isCmd(cmdA
)) {
321 lexer
->getObj(&buf2
, objNum
);
323 lexer
->getObj(&buf2
, cmdA
, objNum
);