Bringing apdf from vendor into main branch.
[AROS-Contrib.git] / apdf / xpdf / Lexer.cc
blob9f0c3ca8f361d46e151c00e3e0857d3f0a1dac5b
1 //========================================================================
2 //
3 // Lexer.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
9 #include <aconf.h>
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
13 #endif
15 #include <stdlib.h>
16 #include <stddef.h>
17 #include <string.h>
18 #include <ctype.h>
19 #include "Lexer.h"
20 #include "Error.h"
22 //------------------------------------------------------------------------
24 // A '1' in this array means the character is white space. A '1' or
25 // '2' means the character ends a name or command.
26 static char specialChars[256] = {
27 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
29 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
34 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
45 //------------------------------------------------------------------------
46 // Lexer
47 //------------------------------------------------------------------------
49 Lexer::Lexer(XRef *xref, Stream *str) {
50 Object obj;
52 curStr.initStream(str);
53 streams = new Array(xref);
54 streams->add(curStr.copy(&obj));
55 strPtr = 0;
56 freeArray = gTrue;
57 curStr.streamReset();
60 Lexer::Lexer(XRef *xref, Object *obj) {
61 Object obj2;
63 if (obj->isStream()) {
64 streams = new Array(xref);
65 freeArray = gTrue;
66 streams->add(obj->copy(&obj2));
67 } else {
68 streams = obj->getArray();
69 freeArray = gFalse;
71 strPtr = 0;
72 if (streams->getLength() > 0) {
73 streams->get(strPtr, &curStr);
74 curStr.streamReset();
78 Lexer::~Lexer() {
79 if (!curStr.isNone()) {
80 curStr.streamClose();
81 curStr.free();
83 if (freeArray) {
84 delete streams;
88 int Lexer::getChar() {
89 int c;
91 c = EOF;
92 while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
93 curStr.streamClose();
94 curStr.free();
95 ++strPtr;
96 if (strPtr < streams->getLength()) {
97 streams->get(strPtr, &curStr);
98 curStr.streamReset();
101 return c;
104 int Lexer::lookChar() {
105 if (curStr.isNone()) {
106 return EOF;
108 return curStr.streamLookChar();
111 Object *Lexer::getObj(Object *obj) {
112 char *p;
113 int c, c2;
114 GBool comment, neg, done;
115 int numParen;
116 int xi;
117 double xf, scale;
118 GString *s;
119 int n, m;
121 // skip whitespace and comments
122 comment = gFalse;
123 while (1) {
124 if ((c = getChar()) == EOF) {
125 return obj->initEOF();
127 if (comment) {
128 if (c == '\r' || c == '\n')
129 comment = gFalse;
130 } else if (c == '%') {
131 comment = gTrue;
132 } else if (specialChars[c] != 1) {
133 break;
137 // start reading token
138 switch (c) {
140 // number
141 case '0': case '1': case '2': case '3': case '4':
142 case '5': case '6': case '7': case '8': case '9':
143 case '-': case '.':
144 neg = gFalse;
145 xi = 0;
146 if (c == '-') {
147 neg = gTrue;
148 } else if (c == '.') {
149 goto doReal;
150 } else {
151 xi = c - '0';
153 while (1) {
154 c = lookChar();
155 if (isdigit(c)) {
156 getChar();
157 xi = xi * 10 + (c - '0');
158 } else if (c == '.') {
159 getChar();
160 goto doReal;
161 } else {
162 break;
165 if (neg)
166 xi = -xi;
167 obj->initInt(xi);
168 break;
169 doReal:
170 xf = xi;
171 scale = 0.1;
172 while (1) {
173 c = lookChar();
174 if (c == '-') {
175 // ignore minus signs in the middle of numbers to match
176 // Adobe's behavior
177 error(getPos(), "Badly formatted number");
178 getChar();
179 continue;
181 if (!isdigit(c)) {
182 break;
184 getChar();
185 xf = xf + scale * (c - '0');
186 scale *= 0.1;
188 if (neg)
189 xf = -xf;
190 obj->initReal(xf);
191 break;
193 // string
194 case '(':
195 p = tokBuf;
196 n = 0;
197 numParen = 1;
198 done = gFalse;
199 s = NULL;
200 do {
201 c2 = EOF;
202 switch (c = getChar()) {
204 case EOF:
205 #if 0
206 // This breaks some PDF files, e.g., ones from Photoshop.
207 case '\r':
208 case '\n':
209 #endif
210 error(getPos(), "Unterminated string");
211 done = gTrue;
212 break;
214 case '(':
215 ++numParen;
216 c2 = c;
217 break;
219 case ')':
220 if (--numParen == 0) {
221 done = gTrue;
222 } else {
223 c2 = c;
225 break;
227 case '\\':
228 switch (c = getChar()) {
229 case 'n':
230 c2 = '\n';
231 break;
232 case 'r':
233 c2 = '\r';
234 break;
235 case 't':
236 c2 = '\t';
237 break;
238 case 'b':
239 c2 = '\b';
240 break;
241 case 'f':
242 c2 = '\f';
243 break;
244 case '\\':
245 case '(':
246 case ')':
247 c2 = c;
248 break;
249 case '0': case '1': case '2': case '3':
250 case '4': case '5': case '6': case '7':
251 c2 = c - '0';
252 c = lookChar();
253 if (c >= '0' && c <= '7') {
254 getChar();
255 c2 = (c2 << 3) + (c - '0');
256 c = lookChar();
257 if (c >= '0' && c <= '7') {
258 getChar();
259 c2 = (c2 << 3) + (c - '0');
262 break;
263 case '\r':
264 c = lookChar();
265 if (c == '\n') {
266 getChar();
268 break;
269 case '\n':
270 break;
271 case EOF:
272 error(getPos(), "Unterminated string");
273 done = gTrue;
274 break;
275 default:
276 c2 = c;
277 break;
279 break;
281 default:
282 c2 = c;
283 break;
286 if (c2 != EOF) {
287 if (n == tokBufSize) {
288 if (!s)
289 s = new GString(tokBuf, tokBufSize);
290 else
291 s->append(tokBuf, tokBufSize);
292 p = tokBuf;
293 n = 0;
295 *p++ = (char)c2;
296 ++n;
298 } while (!done);
299 if (!s)
300 s = new GString(tokBuf, n);
301 else
302 s->append(tokBuf, n);
303 obj->initString(s);
304 break;
306 // name
307 case '/':
308 p = tokBuf;
309 n = 0;
310 while ((c = lookChar()) != EOF && !specialChars[c]) {
311 getChar();
312 if (c == '#') {
313 c2 = lookChar();
314 if (c2 >= '0' && c2 <= '9') {
315 c = c2 - '0';
316 } else if (c2 >= 'A' && c2 <= 'F') {
317 c = c2 - 'A' + 10;
318 } else if (c2 >= 'a' && c2 <= 'f') {
319 c = c2 - 'a' + 10;
320 } else {
321 goto notEscChar;
323 getChar();
324 c <<= 4;
325 c2 = getChar();
326 if (c2 >= '0' && c2 <= '9') {
327 c += c2 - '0';
328 } else if (c2 >= 'A' && c2 <= 'F') {
329 c += c2 - 'A' + 10;
330 } else if (c2 >= 'a' && c2 <= 'f') {
331 c += c2 - 'a' + 10;
332 } else {
333 error(getPos(), "Illegal digit in hex char in name");
336 notEscChar:
337 if (++n == tokBufSize) {
338 error(getPos(), "Name token too long");
339 break;
341 *p++ = c;
343 *p = '\0';
344 obj->initName(tokBuf);
345 break;
347 // array punctuation
348 case '[':
349 case ']':
350 tokBuf[0] = c;
351 tokBuf[1] = '\0';
352 obj->initCmd(tokBuf);
353 break;
355 // hex string or dict punctuation
356 case '<':
357 c = lookChar();
359 // dict punctuation
360 if (c == '<') {
361 getChar();
362 tokBuf[0] = tokBuf[1] = '<';
363 tokBuf[2] = '\0';
364 obj->initCmd(tokBuf);
366 // hex string
367 } else {
368 p = tokBuf;
369 m = n = 0;
370 c2 = 0;
371 s = NULL;
372 while (1) {
373 c = getChar();
374 if (c == '>') {
375 break;
376 } else if (c == EOF) {
377 error(getPos(), "Unterminated hex string");
378 break;
379 } else if (specialChars[c] != 1) {
380 c2 = c2 << 4;
381 if (c >= '0' && c <= '9')
382 c2 += c - '0';
383 else if (c >= 'A' && c <= 'F')
384 c2 += c - 'A' + 10;
385 else if (c >= 'a' && c <= 'f')
386 c2 += c - 'a' + 10;
387 else
388 error(getPos(), "Illegal character <%02x> in hex string", c);
389 if (++m == 2) {
390 if (n == tokBufSize) {
391 if (!s)
392 s = new GString(tokBuf, tokBufSize);
393 else
394 s->append(tokBuf, tokBufSize);
395 p = tokBuf;
396 n = 0;
398 *p++ = (char)c2;
399 ++n;
400 c2 = 0;
401 m = 0;
405 if (!s)
406 s = new GString(tokBuf, n);
407 else
408 s->append(tokBuf, n);
409 if (m == 1)
410 s->append((char)(c2 << 4));
411 obj->initString(s);
413 break;
415 // dict punctuation
416 case '>':
417 c = lookChar();
418 if (c == '>') {
419 getChar();
420 tokBuf[0] = tokBuf[1] = '>';
421 tokBuf[2] = '\0';
422 obj->initCmd(tokBuf);
423 } else {
424 error(getPos(), "Illegal character '>'");
425 obj->initError();
427 break;
429 // error
430 case ')':
431 case '{':
432 case '}':
433 error(getPos(), "Illegal character '%c'", c);
434 obj->initError();
435 break;
437 // command
438 default:
439 p = tokBuf;
440 *p++ = c;
441 n = 1;
442 while ((c = lookChar()) != EOF && !specialChars[c]) {
443 getChar();
444 if (++n == tokBufSize) {
445 error(getPos(), "Command token too long");
446 break;
448 *p++ = c;
450 *p = '\0';
451 if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) {
452 obj->initBool(gTrue);
453 } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) {
454 obj->initBool(gFalse);
455 } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) {
456 obj->initNull();
457 } else {
458 obj->initCmd(tokBuf);
460 break;
463 return obj;
466 void Lexer::skipToNextLine() {
467 int c;
469 while (1) {
470 c = getChar();
471 if (c == EOF || c == '\n') {
472 return;
474 if (c == '\r') {
475 if ((c = lookChar()) == '\n') {
476 getChar();
478 return;
483 GBool Lexer::isSpace(int c) {
484 return c >= 0 && c <= 0xff && specialChars[c] == 1;