1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
9 //========================================================================
11 // Modified under the Poppler project - http://poppler.freedesktop.org
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
16 // Copyright (C) 2006-2010, 2012-2014 Albert Astals Cid <aacid@kde.org>
17 // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
18 // Copyright (C) 2010 Carlos Garcia Campos <carlosgc@gnome.org>
19 // Copyright (C) 2012, 2013 Adrian Johnson <ajohnson@redneon.com>
20 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
22 // To see a description of the changes please see the Changelog file that
23 // came with your tarball or type make ChangeLog if you are building from git
25 //========================================================================
29 #ifdef USE_GCC_PRAGMAS
30 #pragma implementation
42 //------------------------------------------------------------------------
44 // A '1' in this array means the character is white space. A '1' or
45 // '2' means the character ends a name or command.
46 static const char specialChars
[256] = {
47 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
49 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
52 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
65 static const int IntegerSafeLimit
= (INT_MAX
- 9) / 10;
66 static const long long LongLongSafeLimit
= (LLONG_MAX
- 9) / 10;
68 //------------------------------------------------------------------------
70 //------------------------------------------------------------------------
72 Lexer::Lexer(XRef
*xrefA
, Stream
*str
) {
75 lookCharLastValueCached
= LOOK_VALUE_NOT_CACHED
;
78 curStr
.initStream(str
);
79 streams
= new Array(xref
);
80 streams
->add(curStr
.copy(&obj
));
86 Lexer::Lexer(XRef
*xrefA
, Object
*obj
) {
89 lookCharLastValueCached
= LOOK_VALUE_NOT_CACHED
;
92 if (obj
->isStream()) {
93 streams
= new Array(xref
);
95 streams
->add(obj
->copy(&obj2
));
97 streams
= obj
->getArray();
101 if (streams
->getLength() > 0) {
102 streams
->get(strPtr
, &curStr
);
103 curStr
.streamReset();
108 if (!curStr
.isNone()) {
109 curStr
.streamClose();
117 int Lexer::getChar(GBool comesFromLook
) {
120 if (LOOK_VALUE_NOT_CACHED
!= lookCharLastValueCached
) {
121 c
= lookCharLastValueCached
;
122 lookCharLastValueCached
= LOOK_VALUE_NOT_CACHED
;
127 while (!curStr
.isNone() && (c
= curStr
.streamGetChar()) == EOF
) {
128 if (comesFromLook
== gTrue
) {
131 curStr
.streamClose();
134 if (strPtr
< streams
->getLength()) {
135 streams
->get(strPtr
, &curStr
);
136 curStr
.streamReset();
143 int Lexer::lookChar() {
145 if (LOOK_VALUE_NOT_CACHED
!= lookCharLastValueCached
) {
146 return lookCharLastValueCached
;
148 lookCharLastValueCached
= getChar(gTrue
);
149 if (lookCharLastValueCached
== EOF
) {
150 lookCharLastValueCached
= LOOK_VALUE_NOT_CACHED
;
153 return lookCharLastValueCached
;
157 Object
*Lexer::getObj(Object
*obj
, int objNum
) {
160 GBool comment
, neg
, done
, overflownInteger
, overflownLongLong
;
164 double xf
= 0, scale
;
168 // skip whitespace and comments
171 if ((c
= getChar()) == EOF
) {
172 return obj
->initEOF();
175 if (c
== '\r' || c
== '\n')
177 } else if (c
== '%') {
179 } else if (specialChars
[c
] != 1) {
184 // start reading token
188 case '0': case '1': case '2': case '3': case '4':
189 case '5': case '6': case '7': case '8': case '9':
190 case '+': case '-': case '.':
191 overflownInteger
= gFalse
;
192 overflownLongLong
= gFalse
;
197 } else if (c
== '.') {
199 } else if (c
!= '+') {
206 if (unlikely(overflownLongLong
)) {
207 xf
= xf
* 10.0 + (c
- '0');
208 } else if (unlikely (overflownInteger
)) {
209 if (unlikely(xll
> LongLongSafeLimit
) &&
210 (xll
> (LLONG_MAX
- (c
- '0')) / 10.0)) {
211 overflownLongLong
= gTrue
;
212 xf
= xll
* 10.0 + (c
- '0');
214 xll
= xll
* 10 + (c
- '0');
217 if (unlikely(xi
> IntegerSafeLimit
) &&
218 (xi
> (INT_MAX
- (c
- '0')) / 10.0)) {
219 overflownInteger
= gTrue
;
220 xll
= xi
* 10LL + (c
- '0');
222 xi
= xi
* 10 + (c
- '0');
225 } else if (c
== '.') {
237 if (unlikely(overflownInteger
)) {
238 if (overflownLongLong
) {
241 if (unlikely(xll
== INT_MIN
)) {
242 obj
->initInt(INT_MIN
);
252 if (likely(!overflownInteger
)) {
254 } else if (!overflownLongLong
) {
261 // ignore minus signs in the middle of numbers to match
263 error(errSyntaxWarning
, getPos(), "Badly formatted number");
271 xf
= xf
+ scale
* (c
- '0');
289 switch (c
= getChar()) {
293 // This breaks some PDF files, e.g., ones from Photoshop.
297 error(errSyntaxError
, getPos(), "Unterminated string");
307 if (--numParen
== 0) {
315 switch (c
= getChar()) {
336 case '0': case '1': case '2': case '3':
337 case '4': case '5': case '6': case '7':
340 if (c
>= '0' && c
<= '7') {
342 c2
= (c2
<< 3) + (c
- '0');
344 if (c
>= '0' && c
<= '7') {
346 c2
= (c2
<< 3) + (c
- '0');
359 error(errSyntaxError
, getPos(), "Unterminated string");
374 if (n
== tokBufSize
) {
376 s
= new GooString(tokBuf
, tokBufSize
);
378 s
->append(tokBuf
, tokBufSize
);
382 // we are growing see if the document is not malformed and we are growing too much
383 if (objNum
> 0 && xref
!= NULL
)
385 int newObjNum
= xref
->getNumEntry(curStr
.streamGetPos());
386 if (newObjNum
!= objNum
)
388 error(errSyntaxError
, getPos(), "Unterminated string");
401 s
= new GooString(tokBuf
, n
);
403 s
->append(tokBuf
, n
);
415 while ((c
= lookChar()) != EOF
&& !specialChars
[c
]) {
419 if (c2
>= '0' && c2
<= '9') {
421 } else if (c2
>= 'A' && c2
<= 'F') {
423 } else if (c2
>= 'a' && c2
<= 'f') {
431 if (c2
>= '0' && c2
<= '9') {
433 } else if (c2
>= 'A' && c2
<= 'F') {
435 } else if (c2
>= 'a' && c2
<= 'f') {
438 error(errSyntaxError
, getPos(), "Illegal digit in hex char in name");
442 // the PDF spec claims that names are limited to 127 chars, but
443 // Distiller 8 will produce longer names, and Acrobat 8 will
444 // accept longer names
446 if (n
< tokBufSize
) {
448 } else if (n
== tokBufSize
) {
449 error(errSyntaxError
, getPos(), "Warning: name token is longer than what the specification says it can be");
451 s
= new GooString(tokBuf
, n
);
456 if (n
< tokBufSize
) {
458 obj
->initName(tokBuf
);
460 obj
->initName(s
->getCString());
470 obj
->initCmd(tokBuf
);
473 // hex string or dict punctuation
480 tokBuf
[0] = tokBuf
[1] = '<';
482 obj
->initCmd(tokBuf
);
494 } else if (c
== EOF
) {
495 error(errSyntaxError
, getPos(), "Unterminated hex string");
497 } else if (specialChars
[c
] != 1) {
499 if (c
>= '0' && c
<= '9')
501 else if (c
>= 'A' && c
<= 'F')
503 else if (c
>= 'a' && c
<= 'f')
506 error(errSyntaxError
, getPos(), "Illegal character <{0:02x}> in hex string", c
);
508 if (n
== tokBufSize
) {
510 s
= new GooString(tokBuf
, tokBufSize
);
512 s
->append(tokBuf
, tokBufSize
);
524 s
= new GooString(tokBuf
, n
);
526 s
->append(tokBuf
, n
);
528 s
->append((char)(c2
<< 4));
538 tokBuf
[0] = tokBuf
[1] = '>';
540 obj
->initCmd(tokBuf
);
542 error(errSyntaxError
, getPos(), "Illegal character '>'");
551 error(errSyntaxError
, getPos(), "Illegal character '{0:c}'", c
);
560 while ((c
= lookChar()) != EOF
&& !specialChars
[c
]) {
562 if (++n
== tokBufSize
) {
563 error(errSyntaxError
, getPos(), "Command token too long");
569 if (tokBuf
[0] == 't' && !strcmp(tokBuf
, "true")) {
570 obj
->initBool(gTrue
);
571 } else if (tokBuf
[0] == 'f' && !strcmp(tokBuf
, "false")) {
572 obj
->initBool(gFalse
);
573 } else if (tokBuf
[0] == 'n' && !strcmp(tokBuf
, "null")) {
576 obj
->initCmd(tokBuf
);
584 Object
*Lexer::getObj(Object
*obj
, const char *cmdA
, int objNum
) {
590 // skip whitespace and comments
592 const char *cmd1
= tokBuf
;
594 while (strcmp(cmdA
, cmd1
) && (objNum
< 0 || (xref
&& xref
->getNumEntry(getPos()) == objNum
))) {
596 if ((c
= getChar()) == EOF
) {
597 return obj
->initEOF();
600 if (c
== '\r' || c
== '\n') {
603 } else if (c
== '%') {
605 } else if (specialChars
[c
] != 1) {
612 while ((c
= lookChar()) != EOF
&& specialChars
[c
] == 0) {
614 if (++n
== tokBufSize
) {
621 obj
->initCmd(tokBuf
);
626 void Lexer::skipToNextLine() {
631 if (c
== EOF
|| c
== '\n') {
635 if ((c
= lookChar()) == '\n') {
643 GBool
Lexer::isSpace(int c
) {
644 return c
>= 0 && c
<= 0xff && specialChars
[c
] == 1;