1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
22 //------------------------------------------------------------------------
24 // A '1' in this array means the character is white space. A '1' or
25 // '2' means the character ends a name or command.
26 static char specialChars
[256] = {
27 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
29 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
45 //------------------------------------------------------------------------
47 //------------------------------------------------------------------------
49 Lexer::Lexer(XRef
*xref
, Stream
*str
) {
52 curStr
.initStream(str
);
53 streams
= new Array(xref
);
54 streams
->add(curStr
.copy(&obj
));
60 Lexer::Lexer(XRef
*xref
, xObject
*obj
) {
63 if (obj
->isStream()) {
64 streams
= new Array(xref
);
66 streams
->add(obj
->copy(&obj2
));
68 streams
= obj
->getArray();
72 if (streams
->getLength() > 0) {
73 streams
->get(strPtr
, &curStr
);
79 if (!curStr
.isNone()) {
88 int Lexer::getChar() {
92 while (!curStr
.isNone() && (c
= curStr
.streamGetChar()) == EOF
) {
96 if (strPtr
< streams
->getLength()) {
97 streams
->get(strPtr
, &curStr
);
104 int Lexer::lookChar() {
105 if (curStr
.isNone()) {
108 return curStr
.streamLookChar();
111 xObject
*Lexer::getObj(xObject
*obj
) {
114 GBool comment
, neg
, done
;
121 // skip whitespace and comments
124 if ((c
= getChar()) == EOF
) {
125 return obj
->initEOF();
128 if (c
== '\r' || c
== '\n')
130 } else if (c
== '%') {
132 } else if (specialChars
[c
] != 1) {
137 // start reading token
141 case '0': case '1': case '2': case '3': case '4':
142 case '5': case '6': case '7': case '8': case '9':
148 } else if (c
== '.') {
157 xi
= xi
* 10 + (c
- '0');
158 } else if (c
== '.') {
175 // ignore minus signs in the middle of numbers to match
177 error(getPos(), "Badly formatted number");
185 xf
= xf
+ scale
* (c
- '0');
202 switch (c
= getChar()) {
206 // This breaks some PDF files, e.g., ones from Photoshop.
210 error(getPos(), "Unterminated string");
220 if (--numParen
== 0) {
228 switch (c
= getChar()) {
249 case '0': case '1': case '2': case '3':
250 case '4': case '5': case '6': case '7':
253 if (c
>= '0' && c
<= '7') {
255 c2
= (c2
<< 3) + (c
- '0');
257 if (c
>= '0' && c
<= '7') {
259 c2
= (c2
<< 3) + (c
- '0');
272 error(getPos(), "Unterminated string");
287 if (n
== tokBufSize
) {
289 s
= new GString(tokBuf
, tokBufSize
);
291 s
->append(tokBuf
, tokBufSize
);
300 s
= new GString(tokBuf
, n
);
302 s
->append(tokBuf
, n
);
310 while ((c
= lookChar()) != EOF
&& !specialChars
[c
]) {
314 if (c2
>= '0' && c2
<= '9') {
316 } else if (c2
>= 'A' && c2
<= 'F') {
318 } else if (c2
>= 'a' && c2
<= 'f') {
326 if (c2
>= '0' && c2
<= '9') {
328 } else if (c2
>= 'A' && c2
<= 'F') {
330 } else if (c2
>= 'a' && c2
<= 'f') {
333 error(getPos(), "Illegal digit in hex char in name");
337 if (++n
== tokBufSize
) {
338 error(getPos(), "Name token too long");
344 obj
->initName(tokBuf
);
352 obj
->initCmd(tokBuf
);
355 // hex string or dict punctuation
362 tokBuf
[0] = tokBuf
[1] = '<';
364 obj
->initCmd(tokBuf
);
376 } else if (c
== EOF
) {
377 error(getPos(), "Unterminated hex string");
379 } else if (specialChars
[c
] != 1) {
381 if (c
>= '0' && c
<= '9')
383 else if (c
>= 'A' && c
<= 'F')
385 else if (c
>= 'a' && c
<= 'f')
388 error(getPos(), "Illegal character <%02x> in hex string", c
);
390 if (n
== tokBufSize
) {
392 s
= new GString(tokBuf
, tokBufSize
);
394 s
->append(tokBuf
, tokBufSize
);
406 s
= new GString(tokBuf
, n
);
408 s
->append(tokBuf
, n
);
410 s
->append((char)(c2
<< 4));
420 tokBuf
[0] = tokBuf
[1] = '>';
422 obj
->initCmd(tokBuf
);
424 error(getPos(), "Illegal character '>'");
433 error(getPos(), "Illegal character '%c'", c
);
442 while ((c
= lookChar()) != EOF
&& !specialChars
[c
]) {
444 if (++n
== tokBufSize
) {
445 error(getPos(), "Command token too long");
451 if (tokBuf
[0] == 't' && !strcmp(tokBuf
, "true")) {
452 obj
->initBool(gTrue
);
453 } else if (tokBuf
[0] == 'f' && !strcmp(tokBuf
, "false")) {
454 obj
->initBool(gFalse
);
455 } else if (tokBuf
[0] == 'n' && !strcmp(tokBuf
, "null")) {
458 obj
->initCmd(tokBuf
);
466 void Lexer::skipToNextLine() {
471 if (c
== EOF
|| c
== '\n') {
475 if ((c
= lookChar()) == '\n') {
483 GBool
Lexer::isSpace(int c
) {
484 return c
>= 0 && c
<= 0xff && specialChars
[c
] == 1;