Added package with the documentation and the examples
[lwc.git] / lex.c
blob420a419a8ea72a06182d1526559b37a98b40ce88
1 #include "global.h"
2 #include "SYS.h"
4 int *CODE, c_ntok, c_nsym;
6 /*********************************************************
7 expanding array of itokens
8 *********************************************************/
10 static void checkline ();
11 int enter_value (char *txt);
13 static int concate_str (int s1, int s2)
15 char s [30];
16 sprintf (s, "%%%i+%i", s1, s2);
17 return enter_value (s);
20 static void enter_itoken (int i)
22 #define STORE_TOK(x) { output_itoken (GLOBAL, x); ++c_ntok; }
23 static int pstring;
24 static int perlopst;
25 static int pointsat;
27 if (!i) return;
29 /* =~ is valid C. If followed by string literal, sematically invalid though. Do perlop */
30 if (perlopst) {
31 if (perlopst == 1) {
32 if (i == '~') {
33 perlopst = 2;
34 return;
35 } else {
36 STORE_TOK ('=')
37 perlopst = 0;
39 } else {
40 if (i >= STRBASE)
41 STORE_TOK (PERLOP)
42 else { STORE_TOK ('=') STORE_TOK ('~') }
43 perlopst = 0;
45 } else if (i == '=') {
46 perlopst = 1;
47 if (pstring) {
48 STORE_TOK (pstring)
49 pstring = 0;
50 } else if (pointsat) {
51 STORE_TOK (POINTSAT)
52 pointsat = 0;
54 return;
57 if (pointsat) {
58 switch (i) {
59 case POINTSAT: case MINUSMINUS: case ASSIGNA: case ASSIGNS: case PLUSPLUS:
60 case GEQCMP: case OROR: case ANDAND: case EQCMP: case NEQCMP: case LEQCMP:
61 case '[': case '*': case '+': case '-': case '!': case '=': case '<': case '>':
62 case '~':
63 i += ESCBASE;
64 ndefault:
65 STORE_TOK (POINTSAT)
67 pointsat = 0;
68 } else if (i == POINTSAT) {
69 pointsat = 1;
70 if (pstring) {
71 STORE_TOK (pstring)
72 pstring = 0;
74 return;
77 if (i >= STRBASE) {
78 i -= STRBASE - VALBASE;
79 pstring = pstring ? concate_str (pstring, i) : i;
80 return;
83 if (pstring) {
84 STORE_TOK (pstring)
85 pstring = 0;
88 if (ISRESERVED (i)) {
89 if (i == RESERVED_true) i = enter_value ("1");
90 else if (i == RESERVED_false) i = enter_value ("0");
91 else if (i == RESERVED___extension__) return;
92 else if (i == RESERVED_bool) i = RESERVED_int;
93 else if (i == RESERVED_alloca) i = INTERN_alloca;
96 STORE_TOK (i)
98 checkline ();
101 int processor;
103 int enter_string (char *txt)
105 bool escape_quotes = *txt == '\'';
106 if (!processor && !escape_quotes)
107 return enter_value (txt) + STRBASE - VALBASE;
108 int r;
109 if (processor) {
110 txt = TP [processor] (txt + 1, strlen (txt + 1) - 1);
111 printf ("Willdo [%s]\n", txt);
112 r = processor = 0;
113 yydo_mem (txt, strlen (txt));
114 } else {
115 txt = escape_q_string (txt + 1, strlen (txt + 1) - 1);
116 r = enter_value (txt) + STRBASE - VALBASE;
118 free (txt);
119 return r;
121 /***************************************************************
122 Symbol/value tables
123 we'll take our chances with a simple binary tree
124 ***************************************************************/
126 typedef struct snode_t {
127 struct snode_t *less, *more;
128 char *txt;
129 int id;
130 } snode;
132 static snode *symbol_tree, *value_tree;
134 static int symbol_inc, value_inc;
135 int c_nval;
137 static snode *newnode (char *txt, int id)
139 snode *nn = (snode*) malloc (sizeof (snode));
140 nn->less = nn->more = NULL;
141 nn->txt = txt;
142 nn->id = id;
143 return nn;
146 static char **c_symbol, **c_value;
147 static int csym_alloc, cval_alloc;
149 static int addsym (snode **nn, char *txt)
151 *nn = newnode (txt = strdup (txt), symbol_inc);
152 if (csym_alloc == c_nsym) {
153 c_symbol = (char**) realloc (c_symbol, (csym_alloc += 1024) * sizeof (char*));
154 memset (c_symbol + c_nsym, 0, 1024 * sizeof (char*));
156 if (!c_symbol [symbol_inc - IDENTBASE]) {
157 c_symbol [symbol_inc - IDENTBASE] = txt;
158 ++c_nsym;
160 return symbol_inc++;
163 int enter_symbol (char *txt)
165 snode *nn;
166 int c;
168 if (!(nn = symbol_tree))
169 return addsym (&symbol_tree, txt);
171 for (;;)
172 if (!(c = strcmp (txt, nn->txt))) return nn->id;
173 else if (c < 0)
174 if (nn->less) nn = nn->less;
175 else return addsym (&nn->less, txt);
176 else
177 if (nn->more) nn = nn->more;
178 else return addsym (&nn->more, txt);
182 static int addval (snode **nn, char *txt)
184 *nn = newnode (txt = strdup (txt), value_inc);
185 if (cval_alloc == c_nval)
186 c_value = (char**) realloc (c_value, (cval_alloc += 1024) * sizeof (char*));
187 c_value [value_inc - VALBASE] = txt;
188 ++c_nval;
189 return value_inc++;
192 int enter_value (char *txt)
194 snode *nn;
195 int c;
197 if (!(nn = value_tree))
198 return addval (&value_tree, txt);
200 for (;;)
201 if (!(c = strcmp (txt, nn->txt))) return nn->id;
202 else if (c < 0)
203 if (nn->less) nn = nn->less;
204 else return addval (&nn->less, txt);
205 else
206 if (nn->more) nn = nn->more;
207 else return addval (&nn->more, txt);
210 /*********************************************************
211 Linenumbers
212 *********************************************************/
214 typedef struct {
215 int line, p;
216 } toklin;
218 static toklin* linenumber;
219 static int nlinenumber;
221 #define TLCHUNK 1024
223 typedef struct ms_tl_t {
224 struct ms_tl_t *next;
225 toklin data [TLCHUNK];
226 int i;
227 } ms_tl;
229 static ms_tl *lfirst, *llast;
231 static void store_line (int line)
233 if (llast->i == TLCHUNK) {
234 llast->next = (ms_tl*) malloc (sizeof (ms_tl));
235 llast = llast->next;
236 llast->i = 0;
237 llast->next = NULL;
239 llast->data [llast->i].line = line;
240 llast->data [llast->i++].p = c_ntok;
241 ++nlinenumber;
244 int line = 1;
246 static void checkline ()
248 static int pline = -1;
249 if (line != pline)
250 store_line (pline = line);
253 static void static_lines ()
255 int i, k;
256 ms_tl *p, *n;
258 linenumber = (toklin*) malloc (nlinenumber * sizeof (toklin));
260 for (k = 0, p = lfirst; p; p = n) {
261 for (i = 0; i < p->i; i++)
262 linenumber [k++] = p->data [i];
263 n = p->next;
264 free (p);
268 /* once linenumbers are frozen into one array */
269 int c_line_of (int p)
271 int s = 0, e = nlinenumber - 1, m;
273 while (e - s > 1)
274 if (linenumber [m = (e + s) / 2].p <= p)
275 s = m;
276 else e = m;
277 return linenumber [e].line;
279 /*********************************************************
280 File marks
281 *********************************************************/
283 typedef struct {
284 char *file;
285 int p;
286 } filemark;
288 static filemark *files;
289 static int nfiles = -1;
291 static void store_file (char *file)
293 if (nfiles == -1) {
294 files [++nfiles].file = strdup (file);
295 files [nfiles].p = c_ntok;
296 return;
298 if (!strcmp (file, files [nfiles].file)) return;
299 if (files [nfiles].p == c_ntok) {
300 free (files [nfiles].file);
301 files [nfiles].file = strdup (file);
302 } else {
303 files [++nfiles].file = strdup (file);
304 files [nfiles].p = c_ntok;
308 static void static_files ()
310 files = realloc (files, ++nfiles * sizeof (filemark));
313 /* once files are frozen into one array */
314 char *c_file_of (int p)
316 int s = 0, e = nfiles, m;
318 while (e - s > 1)
319 if (files [m = (e + s) / 2].p < p)
320 s = m;
321 else e = m;
322 return files [s].file;
324 /******************************************************************************
325 -- fix the file/line to token data
326 ******************************************************************************/
327 void adjust_lines (NormPtr p, int adj)
329 int i;
330 for (i = 0; i < nfiles; i++)
331 if (files [i].p > p) break;
332 while (i < nfiles)
333 files [i++].p += adj;
334 // we fix not the line numbering
336 /******************************************************************************
337 -- lexical analyser
338 ******************************************************************************/
339 char *Cpp;
340 int Ci, Clen;
341 /******************************************************************************
343 ******************************************************************************/
344 static const signed char ll_ctypes [256] = {
345 ['a']=2, ['b']=2, ['c']=2, ['d']=2, ['e']=4, ['f']=3, ['g']=2, ['h']=2,
346 ['i']=2, ['j']=2, ['k']=2, ['l']=3, ['m']=2, ['n']=2, ['o']=2, ['p']=4,
347 ['q']=2, ['r']=2, ['s']=2, ['t']=2, ['u']=3, ['v']=2, ['w']=2, ['x']=2,
348 ['y']=2, ['z']=2,
349 ['A']=2, ['B']=2, ['C']=2, ['D']=2, ['E']=4, ['F']=3, ['G']=2, ['H']=2,
350 ['I']=2, ['J']=2, ['K']=2, ['L']=3, ['M']=2, ['N']=2, ['O']=2, ['P']=4,
351 ['Q']=2, ['R']=2, ['S']=2, ['T']=2, ['U']=3, ['V']=2, ['W']=2, ['X']=2,
352 ['Y']=2, ['Z']=2,
353 ['_']=2,
354 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1, ['5']=1, ['6']=1, ['7']=1,
355 ['8']=1, ['9']=1,
358 #define ISEXPON(x) (ll_ctypes [(int)x] == 4)
359 #define ISNIEND(x) (ll_ctypes [(int)x] == 3)
360 #define ISALPHA(x) (ll_ctypes [(int)x] >= 2)
361 #define ISDIGIT(x) (ll_ctypes [(int)x] == 1)
362 #define ISALNUM(x) (ll_ctypes [(int)x] > 0)
366 inline void skip_comment ()
368 Ci += 2;
370 for (;;) {
371 while (Cpp [Ci] != '*') {
372 if (Cpp [Ci++] == '\n') ++line;
373 if (Ci > Clen) fatal ("unterminated comment");
375 if (Cpp [++Ci] != '/') continue;
376 break;
379 ++Ci;
382 inline void skip_line ()
384 for (;;) {
385 while (Cpp [Ci] != '\n')
386 if (++Ci > Clen) return;
387 if (Cpp [Ci - 1] == '\\') {
388 ++line;
389 ++Ci;
390 continue;
392 break;
396 inline int skip_ws ()
398 int cl = 0;
400 for (;;) {
401 for (;;) {
402 if (Cpp [Ci] == ' ' || Cpp [Ci] == '\t') {
403 if (++Ci >= Clen) return cl;
404 continue;
406 if (Cpp [Ci] == '\n') {
407 ++line;
408 cl = 1;
409 if (++Ci >= Clen) return cl;
410 continue;
412 break;
414 if (Cpp [Ci] == '/') {
415 if (Cpp [Ci + 1] == '*')
416 skip_comment ();
417 else if (Cpp [Ci + 1] == '/')
418 skip_line ();
419 else return cl;
420 continue;
422 if (Cpp [Ci] == '\\' && Cpp [Ci + 1] == '\n') {
423 Ci += 2;
424 ++line;
425 continue;
427 return cl;
431 static int loadtext_directive ()
433 char filename [128], *p = filename;
434 int Cis = Ci + 127 > Clen ? Clen : Ci + 127;
436 skip_ws ();
437 if (Ci >= Clen || Cpp [Ci++] != '<') fatal (expand (RESERVED__loadtext));
438 while (Cpp [Ci] != '>') {
439 *p++ = Cpp [Ci++];
440 if (Ci >= Cis) fatal ("filename too long");
442 *p = 0, ++Ci;
444 if (!(p = loadtext (filename))) {
445 fprintf (stderr, "Can't open file %s\n", filename);
446 fatal ("");
448 return enter_string (p);
451 static int random_directive ()
453 char tmp [100];
454 sprintf (tmp, "%i", rand ());
455 return enter_value (tmp);
458 #ifdef DO_CPP
459 #define CATCH_BS_GHOST \
460 if (Cpp [Ci] == '\\' && Cpp [Ci + 1] == '\n') {\
461 Ci += 2;\
462 ++line;\
463 continue;\
465 #else
466 #define CATCH_BS_GHOST
467 #endif
469 static inline int get_ident ()
471 char identstr [64], *p = identstr;
472 int Cis = Ci + 63 > Clen ? Clen + 1: Ci + 63, i;
474 for (;;) {
475 while (ISALNUM (Cpp [Ci])) {
476 *p++ = Cpp [Ci++];
477 if (Ci >= Cis) fatal ("identifier out of file");
479 CATCH_BS_GHOST;
480 break;
482 *p = 0;
484 if ((i = enter_symbol (identstr)) == RESERVED__loadtext)
485 return loadtext_directive ();
486 if (i == RESERVED__random_)
487 return random_directive ();
488 return i;
491 static inline int get_vident ()
493 char identstr [64], *p = identstr;
494 int Cis = Ci + 63 > Clen ? Clen : Ci + 63;
496 for (;;) {
497 while (ISALNUM (Cpp [Ci])) {
498 *p++ = Cpp [Ci++];
499 if (Ci >= Cis) fatal ("identifier out of file");
501 CATCH_BS_GHOST;
502 break;
504 *p = 0;
505 return enter_value (identstr);
508 static int get_string ()
510 char *p = &Cpp [Ci], *a;
511 int len, qchar;
513 Ci += (*p == 'L') ? 2 : 1;
514 qchar = Cpp [Ci - 1];
515 for (;;) {
516 if (qchar == '"')
517 while (Cpp [Ci] != '\\' && Cpp [Ci] != '"')
518 { if (++Ci >= Clen) fatal ("forgoten \""); }
519 else
520 while (Cpp [Ci] != '\\' && Cpp [Ci] != '\'')
521 if (++Ci >= Clen) fatal ("forgoten '");
522 if (Cpp [Ci] == '\\') {
523 if (Cpp [++Ci] == '\n') ++line;
524 if (++Ci >= Clen) fatal ("forgotten \"");
525 continue;
527 break;
530 len = &Cpp [++Ci] - p;
531 a = (char*) alloca (len + 1);
532 strncpy (a, p, len);
533 a [len] = 0;
534 return enter_string (a);
537 static int get_char_const ()
539 char cchar [5], *p = cchar;
540 int Cis = Ci + 4 > Clen ? Clen : Ci + 4, Cisv = Ci;
542 if ((*p++ = Cpp [Ci++]) == 'L')
543 *p++ = Cpp [Ci++];
545 while (Cpp [Ci] != '\'') {
546 if ((*p++ = Cpp [Ci]) == '\\')
547 *p++ = Cpp [++Ci];
548 //if (++Ci >= Cis) fatal ("character constant too long");
549 if (++Ci >= Cis) {
550 Ci = Cisv;
551 return get_string ();
554 *p++ = Cpp [Ci++];
555 *p = 0;
556 return enter_value (cchar);
559 static inline int get_nconst ()
561 char nombre [64], *p = nombre;
562 int Cis = Ci + 31 > Clen ? Clen : Ci + 31;
564 for (;;) {
565 while (ISALNUM (Cpp [Ci]) /* XXX && Cpp [Ci] != '_' */) {
566 *p++ = Cpp [Ci++];
567 if (Ci >= Cis) fatal ("numeric constant too long");
569 CATCH_BS_GHOST;
570 break;
573 if (Cpp [Ci] == '.') {
574 *p++ = Cpp [Ci++];
575 for (;;) {
576 while (ISDIGIT (Cpp [Ci])) {
577 *p++ = Cpp [Ci++];
578 if (Ci >= Cis) fatal ("numeric constant too long");
580 CATCH_BS_GHOST;
581 break;
583 } else if (ISEXPON (Cpp [Ci])) {
584 *p++ = Cpp [Ci++];
585 if (Cpp [Ci] == '-' || Cpp [Ci] == '+')
586 *p++ = Cpp [Ci++];
587 for (;;) {
588 while (ISDIGIT (Cpp [Ci])) {
589 *p++ = Cpp [Ci++];
590 if (Ci >= Cis) fatal ("numeric constant too long");
592 CATCH_BS_GHOST;
593 break;
597 while (ISNIEND (Cpp [Ci])) {
598 *p++ = Cpp [Ci++];
599 if (Ci >= Cis) fatal ("numeric constant too long");
601 *p = 0;
603 return enter_value (nombre);
605 /******************************************************************************
606 -- main lex loop
607 ******************************************************************************/
608 static void preproc_line ()
610 // # <line> "file"
611 char file [200], *p1, *p2;
612 int l;
614 if (skip_ws ()) return;
616 if (Cpp [Ci++] == 'p') goto skippy;
617 p1 = (Cpp [++Ci] == 'l') ? strchr (&Cpp [Ci], ' ') : &Cpp [Ci];
618 line = strtol (p1, NULL, 10);
619 p1 = strchr (&Cpp [Ci], '"') + 1;
620 p2 = strchr (p1, '"');
621 l = p2 - p1;
622 Ci = p2 - Cpp;
623 strncpy (file, p1, l);
624 file [l] = 0;
625 store_file (file);
626 skippy:
627 while (Cpp [Ci] != '\n' && Ci < Clen)
628 ++Ci;
631 int do_yylex ()
633 int r;
634 Again:
635 if (Ci >= Clen)
636 return THE_END;
637 skip_ws ();
638 if (Ci >= Clen)
639 return THE_END;
641 if (ISDIGIT (Cpp [Ci]))
642 return get_nconst ();
643 else if (ISALPHA (Cpp [Ci]))
644 if (Cpp [Ci] == 'L')
645 if (Cpp [Ci + 1] == '\'') goto Lchar;
646 else if (Cpp [Ci + 1] == '"') goto Lstring;
647 else goto eelse;
648 else if (in2 (Cpp [Ci + 1], '"', '\''))
649 goto Qstring;
650 else eelse: // bad goto. bad bad bad
651 return get_ident ();
652 else switch (r = Cpp [Ci]) {
653 case '~': case ')':
654 case ';': case ',':
655 case '?': case ':':
656 case '[': case ']':
657 case '{': case '}': case '(':
658 ++Ci;
659 ncase '*':
660 if (Cpp [++Ci] == '=') {
661 r = ASSIGNM;
662 ++Ci;
664 break;
665 Qstring: processor = Cpp [Ci++];
666 Lstring:
667 case '"':
668 return get_string ();
669 ncase '\'':
670 Lchar:
671 return get_char_const ();
672 ncase '/':
673 if (Cpp [++Ci] == '=') {
674 r = ASSIGND;
675 ++Ci;
677 ncase '.':
678 if (ISDIGIT (Cpp [Ci + 1]))
679 return get_nconst ();
680 else if (Cpp [++Ci] == '.') {
681 if (Cpp [++Ci] != '.')
682 fatal ("the ellipsis is three dots");
683 r = ELLIPSIS;
684 ++Ci;
686 ncase '-':
687 switch (Cpp [++Ci]) {
688 case '>': r = POINTSAT; ++Ci;
689 ncase '-': r = MINUSMINUS; ++Ci;
690 ncase '=': r = ASSIGNS; ++Ci;
692 ncase '+':
693 switch (Cpp [++Ci]) {
694 case '+': r = PLUSPLUS; ++Ci;
695 ncase '=': r = ASSIGNA; ++Ci;
697 ncase '!':
698 if (Cpp [++Ci] == '=') {
699 r = NEQCMP;
700 ++Ci;
702 ncase '%':
703 if (Cpp [++Ci] == '=') {
704 r = ASSIGNR;
705 ++Ci;
707 ncase '^':
708 if (Cpp [++Ci] == '=') {
709 r = ASSIGNBX;
710 ++Ci;
712 ncase '&':
713 case '|':
714 ++Ci;
715 if (Cpp [Ci] == r) {
716 r = r == '&' ? ANDAND : OROR;
717 ++Ci;
718 } else if (Cpp [Ci] == '=') {
719 r = r == '&' ? ASSIGNBA : ASSIGNBO;
720 ++Ci;
722 ncase '=':
723 if (Cpp [++Ci] == '=') {
724 r = EQCMP;
725 ++Ci;
727 ncase '<':
728 case '>':
729 ++Ci;
730 if (Cpp [Ci] == r) {
731 if (Cpp [++Ci] == '=') {
732 ++Ci;
733 r = r == '>' ? ASSIGNRS : ASSIGNLS;
734 } else r = r == '>' ? RSH : LSH;
735 } else if (Cpp [Ci] == '=') {
736 ++Ci;
737 r = r == '>' ? GEQCMP : LEQCMP;
739 ncase '$':
740 if (ISALPHA (Cpp [++Ci]))
741 return get_vident ();
742 ncase '#':
743 #ifdef DO_CPP
744 if (!Ci || Cpp [Ci - 1] == '\n')
745 r = CPP_DIRECTIVE;
746 else if (Cpp [Ci + 1] == '#') {
747 ++Ci;
748 r = CPP_CONCAT;
750 ++Ci;
751 #else
752 ++Ci;
753 preproc_line ();
754 goto Again;
755 #endif
756 ncase '\n':
757 ++line;
758 case '\r':
759 case '\f':
760 ++Ci;
761 goto Again;
762 ndefault:
763 fprintf (stderr, "Ci = %c(%i) at %i/%i\n", Cpp [Ci], Cpp [Ci], Ci, line);
764 fatal ("invalid character");
767 return r;
769 /******************************************************************************
770 Initialization
771 ******************************************************************************/
772 #define ENTER_SYMBOL(x) \
773 symbol_inc = RESERVED_ ## x;\
774 enter_symbol (#x);
775 #define ALIAS_LEX(x, y) \
776 symbol_inc = RESERVED_ ## x;\
777 enter_symbol (#y);
778 #define ENTER_VALUE(x) \
779 value_inc = RESERVED_ ## x;\
780 enter_value (#x);
782 static void calc_binshift ();
784 Token RESERVED_attr_stdcall;
786 void initlex ()
788 lfirst = llast = (ms_tl*) malloc (sizeof (ms_tl));
789 lfirst->next = NULL;
790 lfirst->i = 0;
791 symbol_inc = 0;
792 files = (filemark*) malloc (1024 * sizeof (filemark));
793 ENTER_SYMBOL (inline);
794 ENTER_SYMBOL (do);
795 ENTER_SYMBOL (struct);
796 ENTER_SYMBOL (case);
797 ENTER_SYMBOL (for);
798 ENTER_SYMBOL (short);
799 ENTER_SYMBOL (union);
800 ENTER_SYMBOL (sizeof);
801 ENTER_SYMBOL (register);
802 ENTER_SYMBOL (break);
803 ENTER_SYMBOL (auto);
804 ENTER_SYMBOL (continue);
805 ENTER_SYMBOL (const);
806 ENTER_SYMBOL (default);
807 ENTER_SYMBOL (enum);
808 ENTER_SYMBOL (else);
809 ENTER_SYMBOL (extern);
810 ENTER_SYMBOL (goto);
811 ENTER_SYMBOL (if);
812 ENTER_SYMBOL (long);
813 ENTER_SYMBOL (return);
814 ENTER_SYMBOL (signed);
815 ENTER_SYMBOL (static);
816 ENTER_SYMBOL (switch);
817 ENTER_SYMBOL (typedef);
818 ENTER_SYMBOL (unsigned);
819 ENTER_SYMBOL (linkonce);
820 ENTER_SYMBOL (volatile);
821 ENTER_SYMBOL (while);
822 ENTER_SYMBOL (void);
823 ENTER_SYMBOL (int);
824 ENTER_SYMBOL (char);
825 ENTER_SYMBOL (float);
826 ENTER_SYMBOL (double);
827 ENTER_SYMBOL (modular);
828 ENTER_SYMBOL (class);
829 // extensive
830 ENTER_SYMBOL (__asm__);
831 ENTER_SYMBOL (__extension__);
832 ENTER_SYMBOL (__attribute__);
833 ENTER_SYMBOL (__restrict);
834 ENTER_SYMBOL (__thread);
835 ENTER_SYMBOL (__unwind__);
836 ENTER_SYMBOL (__noctor__);
837 // gnu damage
838 ALIAS_LEX (typeof, __typeof);
839 ALIAS_LEX (const, __const);
840 ALIAS_LEX (inline, __inline__);
841 ALIAS_LEX (signed, __signed__);
842 ALIAS_LEX (volatile, __volatile__);
843 ALIAS_LEX (inline, __inline);
844 ALIAS_LEX (const, __const__);
845 ALIAS_LEX (__asm__, asm);
846 ALIAS_LEX (__attribute__, __attribute);
847 // our own reserved words
848 ENTER_SYMBOL (true);
849 ENTER_SYMBOL (false);
850 ENTER_SYMBOL (template);
851 ENTER_SYMBOL (bool);
852 ENTER_SYMBOL (this);
853 ENTER_SYMBOL (new);
854 ENTER_SYMBOL (delete);
855 ENTER_SYMBOL (localloc);
856 ENTER_SYMBOL (virtual);
857 ENTER_SYMBOL (operator);
858 ENTER_SYMBOL (try);
859 ENTER_SYMBOL (throw);
860 ENTER_SYMBOL (benum);
861 ENTER_SYMBOL (typeof);
862 ENTER_SYMBOL (specialize);
863 ENTER_SYMBOL (postfix);
864 ENTER_SYMBOL (dereference);
865 ENTER_SYMBOL (RegExp);
866 ENTER_SYMBOL (final);
867 ENTER_SYMBOL (__declexpr__);
868 ENTER_SYMBOL (_lwc_config_);
869 ENTER_SYMBOL (__C__);
870 Ci = 0;
871 // pseudo reserved symbols
872 ENTER_SYMBOL (include);
873 ENTER_SYMBOL (define);
874 ENTER_SYMBOL (undef);
875 ENTER_SYMBOL (endif);
876 ENTER_SYMBOL (ifdef);
877 ENTER_SYMBOL (ifndef);
878 ENTER_SYMBOL (elif);
879 ENTER_SYMBOL (error);
880 ENTER_SYMBOL (line);
881 ENTER_SYMBOL (uses);
882 symbol_inc = RESERVED___VA_ARGS__;
883 enter_symbol ("__VA_ARGS__");
884 ENTER_SYMBOL (defined);
885 ENTER_SYMBOL (__LINE__);
886 ENTER_SYMBOL (__FILE__);
887 ENTER_SYMBOL (__TIME__);
888 ENTER_SYMBOL (__DATE__);
889 ENTER_SYMBOL (_);
890 ENTER_SYMBOL (ctor);
891 ENTER_SYMBOL (nothrow);
892 ENTER_SYMBOL (alias);
893 ENTER_SYMBOL (used);
894 ENTER_SYMBOL (dtor);
895 ENTER_SYMBOL (malloc);
896 ENTER_SYMBOL (free);
897 ENTER_SYMBOL (alloca);
898 ENTER_SYMBOL (__builtin_alloca);
899 ALIAS_LEX (alloca, __builtin_alloca);
900 ENTER_SYMBOL (private);
901 ENTER_SYMBOL (public);
902 ENTER_SYMBOL (__typeof__);
903 ENTER_SYMBOL (__enumstr__);
904 ENTER_SYMBOL (__inset__);
905 ENTER_SYMBOL (_v_p_t_r_);
906 ENTER_SYMBOL (_CLASS_);
907 ENTER_SYMBOL (typeid);
908 ENTER_SYMBOL (jmp_buf);
909 ENTER_SYMBOL (setjmp);
910 ENTER_SYMBOL (longjmp);
911 ENTER_SYMBOL (__on_throw__);
912 ENTER_SYMBOL (__emit_vtbl__);
914 ENTER_SYMBOL (__section__);
915 ENTER_SYMBOL (noreturn);
916 ENTER_SYMBOL (__label__);
917 ENTER_SYMBOL (__lwc_unwind);
918 ENTER_SYMBOL (__lwc_landingpad);
919 ENTER_SYMBOL (p);
920 ENTER_SYMBOL (a);
921 ENTER_SYMBOL (pos);
922 ENTER_SYMBOL (len);
923 ENTER_SYMBOL (constructor);
924 ENTER_SYMBOL (memcpy);
925 ENTER_SYMBOL (__builtin_memcpy);
926 ENTER_SYMBOL (strncmp);
927 ENTER_SYMBOL (strncasecmp);
928 ENTER_SYMBOL (__builtin_strncmp);
929 ENTER_SYMBOL (__builtin_strncasecmp);
930 ENTER_SYMBOL (_loadtext);
931 ENTER_SYMBOL (_random_);
932 ENTER_SYMBOL (__FUNCTION__);
933 ENTER_SYMBOL (__PRETTY_FUNCTION__);
934 ENTER_SYMBOL (size_t);
935 ENTER_SYMBOL (wchar_t);
936 ENTER_SYMBOL (min);
937 ENTER_SYMBOL (max);
938 ENTER_SYMBOL (charp_len);
939 ENTER_SYMBOL (abbrev);
940 ENTER_SYMBOL (strlen);
941 ENTER_SYMBOL (x);
942 ENTER_SYMBOL (X);
943 ENTER_SYMBOL (y);
944 ENTER_SYMBOL (i);
945 ENTER_SYMBOL (j);
946 ENTER_SYMBOL (s);
947 ENTER_SYMBOL (main);
948 ENTER_SYMBOL (oper_plus);
949 ENTER_SYMBOL (oper_minus);
950 ENTER_SYMBOL (oper_thingy);
951 ENTER_SYMBOL (oper_fcall);
952 ENTER_SYMBOL (oper_comma);
953 ENTER_SYMBOL (oper_mod);
954 ENTER_SYMBOL (oper_or);
955 ENTER_SYMBOL (oper_and);
956 ENTER_SYMBOL (oper_xor);
957 ENTER_SYMBOL (oper_lsh);
958 ENTER_SYMBOL (oper_rsh);
959 ENTER_SYMBOL (oper_mul);
960 ENTER_SYMBOL (oper_div);
961 ENTER_SYMBOL (oper_andand);
962 ENTER_SYMBOL (oper_oror);
963 ENTER_SYMBOL (oper_as_m);
964 ENTER_SYMBOL (oper_as_d);
965 ENTER_SYMBOL (oper_as_r);
966 ENTER_SYMBOL (oper_as_ba);
967 ENTER_SYMBOL (oper_as_bx);
968 ENTER_SYMBOL (oper_as_bo);
969 ENTER_SYMBOL (oper_as_rs);
970 ENTER_SYMBOL (oper_as_ls);
971 ENTER_SYMBOL (oper_star);
972 ENTER_SYMBOL (oper_excl);
973 ENTER_SYMBOL (oper_array);
974 ENTER_SYMBOL (oper_plusplus);
975 ENTER_SYMBOL (oper_minusminus);
976 ENTER_SYMBOL (oper_plusplusp);
977 ENTER_SYMBOL (oper_minusminusp);
978 ENTER_SYMBOL (oper_add);
979 ENTER_SYMBOL (oper_sub);
980 ENTER_SYMBOL (oper_gr);
981 ENTER_SYMBOL (oper_le);
982 ENTER_SYMBOL (oper_greq);
983 ENTER_SYMBOL (oper_leq);
984 ENTER_SYMBOL (oper_eq);
985 ENTER_SYMBOL (oper_neq);
986 ENTER_SYMBOL (oper_assign);
987 ENTER_SYMBOL (oper_as_a);
988 ENTER_SYMBOL (oper_as_s);
989 ENTER_SYMBOL (oper_pointsat);
990 ENTER_VALUE (0);
991 ENTER_VALUE (1);
992 ENTER_VALUE (3);
993 value_inc = RESERVED_C; enter_value ("\"C\"");
994 calc_binshift ();
995 RESERVED_attr_stdcall = enter_symbol ("__attribute__((stdcall))");
996 GLOBAL = new_stream ();
999 /******************************************************************************
1000 binshift values
1001 ******************************************************************************/
1002 int binshift [32];
1004 static void calc_binshift ()
1006 char tmp [20];
1007 #define S(x) sprintf (tmp, "0x%x", 1 << x); binshift [x] = enter_value (tmp);
1008 S (16) S (0) S (31) S (8) S (24) S (4) S (12) S (20) S (28) S (2)
1009 S (6) S (10) S (14) S (18) S (22) S (26) S (30) S (1) S (3) S (5)
1010 S (7) S (9) S (11) S (13) S (15) S (17) S (19) S (21) S (23) S (25)
1011 S (27) S (29)
1012 #undef S
1014 /******************************************************************************
1015 yydo interface
1016 ******************************************************************************/
1017 void fatal (char *m)
1019 fprintf (stderr, "lex-error: %s\n", m);
1020 exit (1);
1023 char *tfile = "-no file-";
1025 void yydo_mem (char *data, int len)
1027 int token;
1029 SAVE_VAR (Clen, len);
1030 SAVE_VAR (Cpp, data);
1031 SAVE_VAR (Ci, 0);
1033 while ((token = do_yylex ()) != THE_END)
1034 if (token == CPP_DIRECTIVE)
1035 #ifdef DO_CPP
1036 if (!sys_cpp) cpp_directive ();
1037 else
1038 #endif
1039 preproc_line ();
1040 #ifdef DO_CPP
1041 else if (ISSYMBOL (token) && is_macro (token) != -1) {
1042 Token *E = expand_macro (token);
1043 int i;
1044 for (i = 0; E [i] != -1; i++)
1045 enter_itoken (E [i]);
1046 free (E);
1048 #endif
1049 else enter_itoken (token);
1051 RESTOR_VAR (Clen);
1052 RESTOR_VAR (Cpp);
1053 RESTOR_VAR (Ci);
1056 int yydo_file (char *file)
1058 #ifdef DEBUG
1059 static int depth;
1060 if (debugflag.CPP) {
1061 int i;
1062 for (i = depth++; i; --i) PRINTF (" ");
1063 PRINTF ("lex on file ["COLS"%s"COLE"]\n", file);
1065 #endif
1067 struct load_file L;
1068 ctor_load_file_ (&L, file);
1070 if (!L.success) return -1;
1072 SAVE_VAR (line, 1);
1073 SAVE_VAR (tfile, file);
1074 SAVE_VAR (current_file, file);
1075 store_file (tfile);
1077 yydo_mem (L.data, L.len);
1079 RESTOR_VAR (line);
1080 RESTOR_VAR (tfile);
1081 RESTOR_VAR (current_file);
1082 store_file (tfile);
1084 dtor_load_file_ (&L);
1086 #ifdef DEBUG
1087 if (debugflag.CPP) --depth;
1088 #endif
1090 return 0;
1093 int yydo (char *file)
1095 if (yydo_file (file) == -1)
1096 return -1;
1097 enter_itoken (';');
1098 enter_itoken (-1);
1099 enter_itoken (-1);
1100 CODE = combine_output (GLOBAL);
1102 static_lines ();
1103 static_files ();
1105 return 0;
1107 /******************************************************************************
1108 -- Dynamic symbols
1109 new identifiers may be generated during the translation.
1110 store these in a dynamic symbol table and make them printable
1111 by expand() below.
1112 ******************************************************************************/
1114 #define DCHUNK 512
1116 static char **dynsym;
1117 static int ndynsym, dynsymalloc;
1118 static snode *ns_tree;
1120 static snode *lookup_dynsym (char *s)
1122 snode *n = ns_tree;
1123 snode *r = (snode*) malloc (sizeof *r);
1124 int c;
1126 r->less = r->more = 0;
1127 r->id = -1;
1128 r->txt = s;
1130 if (!n) return ns_tree = r;
1132 for (;;)
1133 if ((c = strcmp (s, n->txt)) == 0) {
1134 free (r);
1135 return n;
1136 } else if (c > 0)
1137 if (n->less) n = n->less;
1138 else return n->less = r;
1139 else if (n->more) n = n->more;
1140 else return n->more = r;
1143 Token new_symbol (char *s)
1145 snode *z = lookup_dynsym (s);
1146 if (z->id != -1) {
1147 free (s);
1148 return DYNBASE + z->id;
1150 if (ndynsym == dynsymalloc) {
1151 dynsymalloc += DCHUNK;
1152 dynsym = realloc (dynsym, dynsymalloc * sizeof (char*));
1154 dynsym [z->id = ndynsym] = s;
1155 return DYNBASE + ndynsym++;
1158 Token new_value_int (int i)
1160 char tmp [20];
1161 sprintf (tmp, "%i", i);
1162 //return enter_value (strdup (tmp));
1163 return enter_value (tmp);
1166 Token new_value_string (char *s)
1168 return enter_value (escape_c_string (s, strlen (s)));
1171 Token stringify (Token t)
1173 char tmp [512];
1174 sprintf (tmp, "\"%s\"", expand (t));
1175 return enter_value (strdup (tmp));
1178 Token token_addchar (Token t, int c)
1180 char tmp [512];
1181 sprintf (tmp, "%s%c", expand (t), c);
1182 return enter_symbol (strdup (tmp));
1184 /******************************************************************************
1185 -- Add bogus values
1186 ******************************************************************************/
1187 void add_extra_values (char **v, int n)
1189 c_value = realloc (c_value, (c_nval + n) * sizeof v[0]);
1190 memcpy (c_value + c_nval, v, n * sizeof v[0]);
1191 c_nval += n;
1193 /******************************************************************************
1194 -- Lookup what is a constant
1195 ******************************************************************************/
1196 typeID type_of_const (Token c)
1198 char *t = c_value [c - VALBASE];
1199 if (t [0] == '"' || (t [0] == 'L' && t [1] == '"') || t [0] == '%')
1200 return typeID_charP;
1201 if (t [0] == '\'')
1202 return typeID_int;
1203 if (ISALPHA(t [0]))
1204 return typeID_int;
1205 /* XXX: checkout, '.' missing but 'e' present */
1206 return strchr (t, '.') ? typeID_float : typeID_int;
1209 bool is_literal (Token c)
1211 return ISVALUE (c) && c_value [c - VALBASE][0] == '"';
1213 /******************************************************************************
1214 -- Evaluate constant values
1215 ******************************************************************************/
1216 int eval_int (Token c)
1218 if (!ISVALUE (c) || type_of_const (c) != typeID_int)
1219 parse_error_ll ("Integer value expected");
1220 /* XXX: evaluate 'a', 'b', '0' character constants */
1221 return strtol (c_value [c - VALBASE], 0, 10);
1224 long long int eval_intll (Token c)
1226 if (!ISVALUE (c) || type_of_const (c) != typeID_int)
1227 parse_error_ll ("Integer value expected");
1228 return strtoll (c_value [c - VALBASE], 0, 10);
1230 /******************************************************************************
1231 -- Find specific symbols
1232 This is very rare. We care for special names which however are not
1233 reserved. Like "__FUNCTION__" "main", etc.
1234 The search in N, but this is ok (for now at least)
1235 ******************************************************************************/
1236 Token Lookup_Symbol (char *s)
1238 int i;
1239 for (i = 0; i < c_nsym; i++)
1240 if (!strcmp (s, c_symbol [i]))
1241 return i + IDENTBASE;
1242 return 0;
1244 /******************************************************************************
1245 -- C expand
1246 expand the integer normalized tokens to strings
1247 ******************************************************************************/
1249 #define pstr(x) escop ? "->" x : x
1250 #define rcase(x) case RESERVED_ ## x: return #x
1251 #define ocase(x, y) case x: return y
1252 #define pcase(x, y) case x: return pstr (y)
1254 char *expand (int token)
1256 bool escop = 0;
1257 if (token >= IDENTBASE) {
1258 if (token < DYNBASE)
1259 return (token - IDENTBASE < c_nsym) ?
1260 c_symbol [token - IDENTBASE] : "/*bug*/";
1261 if (token >= STRBASE) token -= STRBASE - VALBASE;
1262 if (token >= VALBASE) {
1263 char *v = token - VALBASE < c_nval ? c_value [token - VALBASE] : "/*BUG*/";
1264 if (*v != '%') return v;
1265 int t1, t2;
1266 t1 = strtol (v + 1, &v, 10);
1267 t2 = strtol (v + 1, 0, 10);
1268 char *s1 = expand (t1), *s2 = expand (t2);
1269 char *s3 = strcpy (c_value [token - VALBASE] = (char*)
1270 malloc (strlen (s1) + strlen (s2) + 2), s1);
1271 strcpy (s3 + strlen (s3) - 1, s2 + 1);
1272 return s3;
1274 if (token >= ARGBASE) return "*argument*";
1275 return dynsym [token - DYNBASE];
1277 if (escop = token >= ESCBASE) token -= ESCBASE;
1278 if (token > 256) switch (token) {
1279 ocase (ELLIPSIS, "...");
1280 pcase (POINTSAT, "->");
1281 pcase (MINUSMINUS, "--");
1282 pcase (ASSIGNA, "+=");
1283 pcase (ASSIGNS, "-=");
1284 ocase (ASSIGNM, "*=");
1285 ocase (ASSIGND, "/=");
1286 ocase (ASSIGNR, "%=");
1287 ocase (ASSIGNBA, "&=");
1288 ocase (ASSIGNBO, "|=");
1289 ocase (ASSIGNBX, "^=");
1290 ocase (ASSIGNRS, ">>=");
1291 ocase (ASSIGNLS, "<<=");
1292 ocase (PERLOP, "=~");
1293 pcase (PLUSPLUS, "++");
1294 pcase (GEQCMP, ">=");
1295 ocase (LSH, "<<");
1296 pcase (OROR, "||");
1297 pcase (ANDAND, "&&");
1298 pcase (EQCMP, "==");
1299 pcase (NEQCMP, "!=");
1300 ocase (RSH, ">>");
1301 pcase (LEQCMP, "<=");
1302 ocase (MARKER, "-MARKER-");
1303 ocase (DBG_MARK, "\n/*+*+*+*+*+*+*+*+*+*+*+*+*/\n");
1304 ocase (NOOBJ, "/*not-an-object*/");
1305 default: return "n/A\n";
1306 case NOTHING:
1307 case UWMARK: return "";
1309 switch (token) {
1310 case '(': return "(";
1311 case ')': return ")";
1312 case '[': return pstr ("[");
1313 case ']': return "]";
1314 case ';': return ";";
1315 case ',': return ",";
1316 case ':': return ":";
1317 case '~': return pstr ("~");
1318 case '?': return "?";
1319 case '{': return "{";
1320 case '}': return "}";
1321 case '.': return ".";
1322 case '*': return pstr ("*");
1323 case '/': return "/";
1324 case '+': return pstr ("+");
1325 case '-': return pstr ("-");
1326 case '!': return pstr ("!");
1327 case '%': return "%";
1328 case '^': return "^";
1329 case '&': return "&";
1330 case '|': return "|";
1331 case '=': return pstr ("=");
1332 case '<': return pstr ("<");
1333 case '>': return pstr (">");
1334 case '"': return "\"";
1335 case '#': return "#";
1336 case CPP_CONCAT: return "##";
1337 case THE_END: return "/*End of unit*/";
1339 if (token < 0) return token == BLANKT ? "" : "/*-1*/";
1340 return "plonk!";