Fixed semi-colon parsing in C-style for loops
[delight/core.git] / dmd2 / lexer.c
blob12155b1439586f7cf636537ccfc3de4dac8b354b
2 // Compiler implementation of the D programming language
3 // Copyright (c) 1999-2008 by Digital Mars
4 // All Rights Reserved
5 // written by Walter Bright
6 // http://www.digitalmars.com
7 // License for redistribution is by either the Artistic License
8 // in artistic.txt, or the GNU General Public License in gnu.txt.
9 // See the included readme.txt for details.
11 /* NOTE: This file has been patched from the original DMD distribution to
12 work with the GDC compiler.
14 Modified by David Friedman, December 2006
17 /* Lexical Analyzer */
19 #include <stdio.h>
20 #include <string.h>
21 #include <ctype.h>
22 #include <stdarg.h>
23 #include <errno.h>
24 //#include <wchar.h>
25 #include <stdlib.h>
26 #include <assert.h>
27 #include <sys/time.h>
29 #ifdef IN_GCC
31 #include <time.h>
32 #include "mem.h"
34 #else
36 #if __GNUC__
37 #include <time.h>
38 #endif
40 #if _WIN32
41 #include "..\root\mem.h"
42 #else
43 #include "../root/mem.h"
44 #endif
45 #endif
47 #include "stringtable.h"
49 #include "lexer.h"
50 #include "utf.h"
51 #include "identifier.h"
52 #include "id.h"
53 #include "module.h"
55 #if _WIN32 && __DMC__
56 // from \dm\src\include\setlocal.h
57 extern "C" char * __cdecl __locale_decpoint;
58 #endif
60 extern int HtmlNamedEntity(unsigned char *p, int length);
62 #define LS 0x2028 // UTF line separator
63 #define PS 0x2029 // UTF paragraph separator
65 /********************************************
66 * Do our own char maps
69 static unsigned char cmtable[256];
71 const int CMoctal = 0x1;
72 const int CMhex = 0x2;
73 const int CMidchar = 0x4;
75 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; }
76 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; }
77 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; }
79 static void cmtable_init()
81 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++)
83 if ('0' <= c && c <= '7')
84 cmtable[c] |= CMoctal;
85 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
86 cmtable[c] |= CMhex;
87 if (isalnum(c) || c == '_')
88 cmtable[c] |= CMidchar;
93 /************************* Token **********************************************/
95 char *Token::tochars[TOKMAX];
97 void *Token::operator new(size_t size)
98 { Token *t;
100 if (Lexer::freelist)
102 t = Lexer::freelist;
103 Lexer::freelist = t->next;
104 return t;
107 return ::operator new(size);
110 #ifdef DEBUG
111 void Token::print()
113 fprintf(stdmsg, "%s\n", toChars());
115 #endif
117 char *Token::toChars()
118 { char *p;
119 static char buffer[3 + 3 * sizeof(value) + 1];
121 p = buffer;
122 switch (value)
124 case TOKint32v:
125 #if IN_GCC
126 sprintf(buffer,"%d",(d_int32)int64value);
127 #else
128 sprintf(buffer,"%d",int32value);
129 #endif
130 break;
132 case TOKuns32v:
133 case TOKcharv:
134 case TOKwcharv:
135 case TOKdcharv:
136 #if IN_GCC
137 sprintf(buffer,"%uU",(d_uns32)uns64value);
138 #else
139 sprintf(buffer,"%uU",uns32value);
140 #endif
141 break;
143 case TOKint64v:
144 sprintf(buffer,"%"PRIdMAX"L",int64value);
145 break;
147 case TOKuns64v:
148 sprintf(buffer,"%"PRIuMAX"UL",uns64value);
149 break;
151 #if IN_GCC
152 case TOKfloat32v:
153 case TOKfloat64v:
154 case TOKfloat80v:
155 float80value.format(buffer, sizeof(buffer));
156 break;
157 case TOKimaginary32v:
158 case TOKimaginary64v:
159 case TOKimaginary80v:
160 float80value.format(buffer, sizeof(buffer));
161 // %% buffer
162 strcat(buffer, "i");
163 break;
164 #else
165 case TOKfloat32v:
166 sprintf(buffer,"%Lgf", float80value);
167 break;
169 case TOKfloat64v:
170 sprintf(buffer,"%Lg", float80value);
171 break;
173 case TOKfloat80v:
174 sprintf(buffer,"%LgL", float80value);
175 break;
177 case TOKimaginary32v:
178 sprintf(buffer,"%Lgfi", float80value);
179 break;
181 case TOKimaginary64v:
182 sprintf(buffer,"%Lgi", float80value);
183 break;
185 case TOKimaginary80v:
186 sprintf(buffer,"%LgLi", float80value);
187 break;
188 #endif
191 case TOKstring:
192 #if CSTRINGS
193 p = string;
194 #else
195 { OutBuffer buf;
197 buf.writeByte('"');
198 for (size_t i = 0; i < len; )
199 { unsigned c;
201 utf_decodeChar((unsigned char *)ustring, len, &i, &c);
202 switch (c)
204 case 0:
205 break;
207 case '"':
208 case '\\':
209 buf.writeByte('\\');
210 default:
211 if (isprint(c))
212 buf.writeByte(c);
213 else if (c <= 0x7F)
214 buf.printf("\\x%02x", c);
215 else if (c <= 0xFFFF)
216 buf.printf("\\u%04x", c);
217 else
218 buf.printf("\\U%08x", c);
219 continue;
221 break;
223 buf.writeByte('"');
224 if (postfix)
225 buf.writeByte('"');
226 buf.writeByte(0);
227 p = (char *)buf.extractData();
229 #endif
230 break;
232 case TOKidentifier:
233 case TOKenum:
234 case TOKstruct:
235 case TOKimport:
236 CASE_BASIC_TYPES:
237 p = ident->toChars();
238 break;
240 default:
241 p = toChars(value);
242 break;
244 return p;
247 char *Token::toChars(enum TOK value)
248 { char *p;
249 static char buffer[3 + 3 * sizeof(value) + 1];
251 p = tochars[value];
252 if (!p)
253 { sprintf(buffer,"TOK%d",value);
254 p = buffer;
256 return p;
259 /*************************** Lexer ********************************************/
261 Token *Lexer::freelist = NULL;
262 StringTable Lexer::stringtable;
263 OutBuffer Lexer::stringbuffer;
265 Lexer::Lexer(Module *mod,
266 unsigned char *base, unsigned begoffset, unsigned endoffset,
267 int doDocComment, int commentToken)
268 : loc(mod, 1)
270 //printf("Lexer::Lexer(%p,%d)\n",base,length);
271 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod);
272 memset(&token,0,sizeof(token));
273 this->base = base;
274 this->end = base + endoffset;
275 p = base + begoffset;
276 this->mod = mod;
277 this->doDocComment = doDocComment;
278 this->anyToken = 0;
279 this->commentToken = commentToken;
280 //initKeywords();
282 /* If first line starts with '#!', ignore the line
285 if (p[0] == '#' && p[1] =='!')
287 p += 2;
288 while (1)
289 { unsigned char c = *p;
290 switch (c)
292 case '\n':
293 p++;
294 break;
296 case '\r':
297 p++;
298 if (*p == '\n')
299 p++;
300 break;
302 case 0:
303 case 0x1A:
304 break;
306 default:
307 if (c & 0x80)
308 { unsigned u = decodeUTF();
309 if (u == PS || u == LS)
310 break;
312 p++;
313 continue;
315 break;
317 loc.linnum = 2;
322 void Lexer::error(const char *format, ...)
324 if (mod && !global.gag)
326 char *p = loc.toChars();
327 if (*p)
328 fprintf(stdmsg, "%s: ", p);
329 mem.free(p);
331 va_list ap;
332 va_start(ap, format);
333 vfprintf(stdmsg, format, ap);
334 va_end(ap);
336 fprintf(stdmsg, "\n");
337 fflush(stdmsg);
339 if (global.errors >= 20) // moderate blizzard of cascading messages
340 fatal();
342 global.errors++;
345 void Lexer::error(Loc loc, const char *format, ...)
347 if (mod && !global.gag)
349 char *p = loc.toChars();
350 if (*p)
351 fprintf(stdmsg, "%s: ", p);
352 mem.free(p);
354 va_list ap;
355 va_start(ap, format);
356 vfprintf(stdmsg, format, ap);
357 va_end(ap);
359 fprintf(stdmsg, "\n");
360 fflush(stdmsg);
362 if (global.errors >= 20) // moderate blizzard of cascading messages
363 fatal();
365 global.errors++;
368 TOK Lexer::nextToken()
369 { Token *t;
371 if (token.next)
373 t = token.next;
374 memcpy(&token,t,sizeof(Token));
375 t->next = freelist;
376 freelist = t;
378 else
380 scan(&token);
382 //token.print();
383 return token.value;
386 Token *Lexer::peek(Token *ct)
387 { Token *t;
389 if (ct->next)
390 t = ct->next;
391 else
393 t = new Token();
394 scan(t);
395 t->next = NULL;
396 ct->next = t;
398 return t;
401 /*********************************
402 * tk is on the opening (.
403 * Look ahead and return token that is past the closing ).
406 Token *Lexer::peekPastParen(Token *tk)
408 //printf("peekPastParen()\n");
409 int parens = 1;
410 int curlynest = 0;
411 while (1)
413 tk = peek(tk);
414 //tk->print();
415 switch (tk->value)
417 case TOKlparen:
418 parens++;
419 continue;
421 case TOKrparen:
422 --parens;
423 if (parens)
424 continue;
425 tk = peek(tk);
426 break;
428 case TOKlcurly:
429 curlynest++;
430 continue;
432 case TOKrcurly:
433 if (--curlynest >= 0)
434 continue;
435 break;
437 case TOKsemicolon:
438 if (curlynest)
439 continue;
440 break;
442 case TOKeof:
443 break;
445 default:
446 continue;
448 return tk;
452 /**********************************
453 * Determine if string is a valid Identifier.
454 * Placed here because of commonality with Lexer functionality.
455 * Returns:
456 * 0 invalid
459 int Lexer::isValidIdentifier(char *p)
461 size_t len;
462 size_t idx;
464 if (!p || !*p)
465 goto Linvalid;
467 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars
468 goto Linvalid;
470 len = strlen(p);
471 idx = 0;
472 while (p[idx])
473 { dchar_t dc;
475 char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc);
476 if (q)
477 goto Linvalid;
479 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
480 goto Linvalid;
482 return 1;
484 Linvalid:
485 return 0;
488 /****************************
489 * Turn next token in buffer into a token.
492 void Lexer::scan(Token *t)
494 unsigned lastLine = loc.linnum;
495 unsigned linnum;
497 t->blockComment = NULL;
498 t->lineComment = NULL;
499 while (1)
501 t->ptr = p;
502 //printf("p = %p, *p = '%c'\n",p,*p);
503 switch (*p)
505 case 0:
506 case 0x1A:
507 t->value = TOKeof; // end of file
508 return;
510 case ' ':
511 case '\t':
512 case '\v':
513 case '\f':
514 p++;
515 continue; // skip white space
517 case '\r':
518 p++;
519 if (*p != '\n') // if CR stands by itself
520 loc.linnum++;
521 continue; // skip white space
523 case '\n':
524 p++;
525 loc.linnum++;
526 continue; // skip white space
528 case '0': case '1': case '2': case '3': case '4':
529 case '5': case '6': case '7': case '8': case '9':
530 t->value = number(t);
531 return;
533 #if CSTRINGS
534 case '\'':
535 t->value = charConstant(t, 0);
536 return;
538 case '"':
539 t->value = stringConstant(t,0);
540 return;
542 case 'l':
543 case 'L':
544 if (p[1] == '\'')
546 p++;
547 t->value = charConstant(t, 1);
548 return;
550 else if (p[1] == '"')
552 p++;
553 t->value = stringConstant(t, 1);
554 return;
556 #else
557 case '\'':
558 t->value = charConstant(t,0);
559 return;
561 case 'r':
562 if (p[1] != '"')
563 goto case_ident;
564 p++;
565 case '`':
566 t->value = wysiwygStringConstant(t, *p);
567 return;
569 case 'x':
570 if (p[1] != '"')
571 goto case_ident;
572 p++;
573 t->value = hexStringConstant(t);
574 return;
576 #if V2
577 case 'q':
578 if (p[1] == '"')
580 p++;
581 t->value = delimitedStringConstant(t);
582 return;
584 else if (p[1] == '{')
586 p++;
587 t->value = tokenStringConstant(t);
588 return;
590 else
591 goto case_ident;
592 #endif
594 case '"':
595 t->value = escapeStringConstant(t,0);
596 return;
598 case '\\': // escaped string literal
599 { unsigned c;
601 stringbuffer.reset();
604 p++;
605 switch (*p)
607 case 'u':
608 case 'U':
609 case '&':
610 c = escapeSequence();
611 stringbuffer.writeUTF8(c);
612 break;
614 default:
615 c = escapeSequence();
616 stringbuffer.writeByte(c);
617 break;
619 } while (*p == '\\');
620 t->len = stringbuffer.offset;
621 stringbuffer.writeByte(0);
622 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
623 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
624 t->postfix = 0;
625 t->value = TOKstring;
626 return;
629 case 'l':
630 case 'L':
631 #endif
632 case 'a': case 'b': case 'c': case 'd': case 'e':
633 case 'f': case 'g': case 'h': case 'i': case 'j':
634 case 'k': case 'm': case 'n': case 'o':
635 #if V2
636 case 'p': /*case 'q': case 'r':*/ case 's': case 't':
637 #else
638 case 'p': case 'q': /*case 'r':*/ case 's': case 't':
639 #endif
640 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
641 case 'z':
642 case 'A': case 'B': case 'C': case 'D': case 'E':
643 case 'F': case 'G': case 'H': case 'I': case 'J':
644 case 'K': case 'M': case 'N': case 'O':
645 case 'P': case 'Q': case 'R': case 'S': case 'T':
646 case 'U': case 'V': case 'W': case 'X': case 'Y':
647 case 'Z':
648 case '_':
649 case_ident:
650 { unsigned char c;
651 StringValue *sv;
652 Identifier *id;
656 c = *++p;
657 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
658 sv = stringtable.update((char *)t->ptr, p - t->ptr);
659 id = (Identifier *) sv->ptrvalue;
660 if (!id)
661 { id = new Identifier(sv->lstring.string,TOKidentifier);
662 sv->ptrvalue = id;
664 t->ident = id;
665 t->value = (enum TOK) id->value;
666 anyToken = 1;
667 if (*t->ptr == '_') // if special identifier token
669 static char date[11+1];
670 static char time[8+1];
671 static char timestamp[24+1];
673 if (!date[0]) // lazy evaluation
674 { time_t t;
675 char *p;
677 ::time(&t);
678 p = ctime(&t);
679 assert(p);
680 sprintf(date, "%.6s %.4s", p + 4, p + 20);
681 sprintf(time, "%.8s", p + 11);
682 sprintf(timestamp, "%.24s", p);
685 #if !V2
686 if (mod && id == Id::FILE)
688 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars());
689 goto Lstring;
691 else if (mod && id == Id::LINE)
693 t->value = TOKint64v;
694 t->uns64value = loc.linnum;
696 else
697 #endif
698 if (id == Id::DATE)
700 t->ustring = (unsigned char *)date;
701 goto Lstring;
703 else if (id == Id::TIME)
705 t->ustring = (unsigned char *)time;
706 goto Lstring;
708 else if (id == Id::VENDOR)
710 #ifdef IN_GCC
711 t->ustring = (unsigned char *)"GDC";
712 #else
713 t->ustring = (unsigned char *)"Digital Mars D";
714 #endif
715 goto Lstring;
717 else if (id == Id::TIMESTAMP)
719 t->ustring = (unsigned char *)timestamp;
720 Lstring:
721 t->value = TOKstring;
722 Llen:
723 t->postfix = 0;
724 t->len = strlen((char *)t->ustring);
726 else if (id == Id::VERSIONX)
727 { unsigned major = 0;
728 unsigned minor = 0;
730 for (char *p = global.version + 1; 1; p++)
732 char c = *p;
733 if (isdigit(c))
734 minor = minor * 10 + c - '0';
735 else if (c == '.')
736 { major = minor;
737 minor = 0;
739 else
740 break;
742 t->value = TOKint64v;
743 t->uns64value = major * 1000 + minor;
745 #if V2
746 else if (id == Id::EOFX)
748 t->value = TOKeof;
749 // Advance scanner to end of file
750 while (!(*p == 0 || *p == 0x1A))
751 p++;
753 #endif
755 //printf("t->value = %d\n",t->value);
756 return;
759 case '/':
760 p++;
761 switch (*p)
763 case '=':
764 p++;
765 t->value = TOKdivass;
766 return;
768 case '*':
769 p++;
770 linnum = loc.linnum;
771 while (1)
773 while (1)
774 { unsigned char c = *p;
775 switch (c)
777 case '/':
778 break;
780 case '\n':
781 loc.linnum++;
782 p++;
783 continue;
785 case '\r':
786 p++;
787 if (*p != '\n')
788 loc.linnum++;
789 continue;
791 case 0:
792 case 0x1A:
793 error("unterminated /* */ comment");
794 p = end;
795 t->value = TOKeof;
796 return;
798 default:
799 if (c & 0x80)
800 { unsigned u = decodeUTF();
801 if (u == PS || u == LS)
802 loc.linnum++;
804 p++;
805 continue;
807 break;
809 p++;
810 if (p[-2] == '*' && p - 3 != t->ptr)
811 break;
813 if (commentToken)
815 t->value = TOKcomment;
816 return;
818 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr)
819 { // if /** but not /**/
820 getDocComment(t, lastLine == linnum);
822 continue;
824 case '/': // do // style comments
825 linnum = loc.linnum;
826 while (1)
827 { unsigned char c = *++p;
828 switch (c)
830 case '\n':
831 break;
833 case '\r':
834 if (p[1] == '\n')
835 p++;
836 break;
838 case 0:
839 case 0x1A:
840 if (commentToken)
842 p = end;
843 t->value = TOKcomment;
844 return;
846 if (doDocComment && t->ptr[2] == '/')
847 getDocComment(t, lastLine == linnum);
848 p = end;
849 t->value = TOKeof;
850 return;
852 default:
853 if (c & 0x80)
854 { unsigned u = decodeUTF();
855 if (u == PS || u == LS)
856 break;
858 continue;
860 break;
863 if (commentToken)
865 p++;
866 loc.linnum++;
867 t->value = TOKcomment;
868 return;
870 if (doDocComment && t->ptr[2] == '/')
871 getDocComment(t, lastLine == linnum);
873 p++;
874 loc.linnum++;
875 continue;
877 case '+':
878 { int nest;
880 linnum = loc.linnum;
881 p++;
882 nest = 1;
883 while (1)
884 { unsigned char c = *p;
885 switch (c)
887 case '/':
888 p++;
889 if (*p == '+')
891 p++;
892 nest++;
894 continue;
896 case '+':
897 p++;
898 if (*p == '/')
900 p++;
901 if (--nest == 0)
902 break;
904 continue;
906 case '\r':
907 p++;
908 if (*p != '\n')
909 loc.linnum++;
910 continue;
912 case '\n':
913 loc.linnum++;
914 p++;
915 continue;
917 case 0:
918 case 0x1A:
919 error("unterminated /+ +/ comment");
920 p = end;
921 t->value = TOKeof;
922 return;
924 default:
925 if (c & 0x80)
926 { unsigned u = decodeUTF();
927 if (u == PS || u == LS)
928 loc.linnum++;
930 p++;
931 continue;
933 break;
935 if (commentToken)
937 t->value = TOKcomment;
938 return;
940 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr)
941 { // if /++ but not /++/
942 getDocComment(t, lastLine == linnum);
944 continue;
947 t->value = TOKdiv;
948 return;
950 case '.':
951 p++;
952 if (isdigit(*p))
953 { /* Note that we don't allow ._1 and ._ as being
954 * valid floating point numbers.
956 p--;
957 t->value = inreal(t);
959 else if (p[0] == '.')
961 if (p[1] == '.')
962 { p += 2;
963 t->value = TOKdotdotdot;
965 else
966 { p++;
967 t->value = TOKslice;
970 else
971 t->value = TOKdot;
972 return;
974 case '&':
975 p++;
976 if (*p == '=')
977 { p++;
978 t->value = TOKandass;
980 else if (*p == '&')
981 { p++;
982 t->value = TOKandand;
984 else
985 t->value = TOKand;
986 return;
988 case '|':
989 p++;
990 if (*p == '=')
991 { p++;
992 t->value = TOKorass;
994 else if (*p == '|')
995 { p++;
996 t->value = TOKoror;
998 else
999 t->value = TOKor;
1000 return;
1002 case '-':
1003 p++;
1004 if (*p == '=')
1005 { p++;
1006 t->value = TOKminass;
1008 #if 0
1009 else if (*p == '>')
1010 { p++;
1011 t->value = TOKarrow;
1013 #endif
1014 else if (*p == '-')
1015 { p++;
1016 t->value = TOKminusminus;
1018 else
1019 t->value = TOKmin;
1020 return;
1022 case '+':
1023 p++;
1024 if (*p == '=')
1025 { p++;
1026 t->value = TOKaddass;
1028 else if (*p == '+')
1029 { p++;
1030 t->value = TOKplusplus;
1032 else
1033 t->value = TOKadd;
1034 return;
1036 case '<':
1037 p++;
1038 if (*p == '=')
1039 { p++;
1040 t->value = TOKle; // <=
1042 else if (*p == '<')
1043 { p++;
1044 if (*p == '=')
1045 { p++;
1046 t->value = TOKshlass; // <<=
1048 else
1049 t->value = TOKshl; // <<
1051 else if (*p == '>')
1052 { p++;
1053 if (*p == '=')
1054 { p++;
1055 t->value = TOKleg; // <>=
1057 else
1058 t->value = TOKlg; // <>
1060 else
1061 t->value = TOKlt; // <
1062 return;
1064 case '>':
1065 p++;
1066 if (*p == '=')
1067 { p++;
1068 t->value = TOKge; // >=
1070 else if (*p == '>')
1071 { p++;
1072 if (*p == '=')
1073 { p++;
1074 t->value = TOKshrass; // >>=
1076 else if (*p == '>')
1077 { p++;
1078 if (*p == '=')
1079 { p++;
1080 t->value = TOKushrass; // >>>=
1082 else
1083 t->value = TOKushr; // >>>
1085 else
1086 t->value = TOKshr; // >>
1088 else
1089 t->value = TOKgt; // >
1090 return;
1092 case '!':
1093 p++;
1094 if (*p == '=')
1095 { p++;
1096 if (*p == '=' && global.params.Dversion == 1)
1097 { p++;
1098 t->value = TOKnotidentity; // !==
1100 else
1101 t->value = TOKnotequal; // !=
1103 else if (*p == '<')
1104 { p++;
1105 if (*p == '>')
1106 { p++;
1107 if (*p == '=')
1108 { p++;
1109 t->value = TOKunord; // !<>=
1111 else
1112 t->value = TOKue; // !<>
1114 else if (*p == '=')
1115 { p++;
1116 t->value = TOKug; // !<=
1118 else
1119 t->value = TOKuge; // !<
1121 else if (*p == '>')
1122 { p++;
1123 if (*p == '=')
1124 { p++;
1125 t->value = TOKul; // !>=
1127 else
1128 t->value = TOKule; // !>
1130 else
1131 t->value = TOKnot; // !
1132 return;
1134 case '=':
1135 p++;
1136 if (*p == '=')
1137 { p++;
1138 if (*p == '=' && global.params.Dversion == 1)
1139 { p++;
1140 t->value = TOKidentity; // ===
1142 else
1143 t->value = TOKequal; // ==
1145 else
1146 t->value = TOKassign; // =
1147 return;
1149 case '~':
1150 p++;
1151 if (*p == '=')
1152 { p++;
1153 t->value = TOKcatass; // ~=
1155 else
1156 t->value = TOKtilde; // ~
1157 return;
1159 #define SINGLE(c,tok) case c: p++; t->value = tok; return;
1161 SINGLE('(', TOKlparen)
1162 SINGLE(')', TOKrparen)
1163 SINGLE('[', TOKlbracket)
1164 SINGLE(']', TOKrbracket)
1165 SINGLE('{', TOKlcurly)
1166 SINGLE('}', TOKrcurly)
1167 SINGLE('?', TOKquestion)
1168 SINGLE(',', TOKcomma)
1169 SINGLE(';', TOKsemicolon)
1170 SINGLE(':', TOKcolon)
1171 SINGLE('$', TOKdollar)
1173 #undef SINGLE
1175 #define DOUBLE(c1,tok1,c2,tok2) \
1176 case c1: \
1177 p++; \
1178 if (*p == c2) \
1179 { p++; \
1180 t->value = tok2; \
1182 else \
1183 t->value = tok1; \
1184 return;
1186 DOUBLE('*', TOKmul, '=', TOKmulass)
1187 DOUBLE('%', TOKmod, '=', TOKmodass)
1188 DOUBLE('^', TOKxor, '=', TOKxorass)
1190 #undef DOUBLE
1192 case '#':
1193 p++;
1194 pragma();
1195 continue;
1197 default:
1198 { unsigned char c = *p;
1200 if (c & 0x80)
1201 { unsigned u = decodeUTF();
1203 // Check for start of unicode identifier
1204 if (isUniAlpha(u))
1205 goto case_ident;
1207 if (u == PS || u == LS)
1209 loc.linnum++;
1210 p++;
1211 continue;
1214 if (isprint(c))
1215 error("unsupported char '%c'", c);
1216 else
1217 error("unsupported char 0x%02x", c);
1218 p++;
1219 continue;
1225 /*******************************************
1226 * Parse escape sequence.
1229 unsigned Lexer::escapeSequence()
1230 { unsigned c;
1231 int n;
1232 int ndigits;
1234 c = *p;
1235 switch (c)
1237 case '\'':
1238 case '"':
1239 case '?':
1240 case '\\':
1241 Lconsume:
1242 p++;
1243 break;
1245 case 'a': c = 7; goto Lconsume;
1246 case 'b': c = 8; goto Lconsume;
1247 case 'f': c = 12; goto Lconsume;
1248 case 'n': c = 10; goto Lconsume;
1249 case 'r': c = 13; goto Lconsume;
1250 case 't': c = 9; goto Lconsume;
1251 case 'v': c = 11; goto Lconsume;
1253 case 'u':
1254 ndigits = 4;
1255 goto Lhex;
1256 case 'U':
1257 ndigits = 8;
1258 goto Lhex;
1259 case 'x':
1260 ndigits = 2;
1261 Lhex:
1262 p++;
1263 c = *p;
1264 if (ishex(c))
1265 { unsigned v;
1267 n = 0;
1268 v = 0;
1269 while (1)
1271 if (isdigit(c))
1272 c -= '0';
1273 else if (islower(c))
1274 c -= 'a' - 10;
1275 else
1276 c -= 'A' - 10;
1277 v = v * 16 + c;
1278 c = *++p;
1279 if (++n == ndigits)
1280 break;
1281 if (!ishex(c))
1282 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
1283 break;
1286 if (ndigits != 2 && !utf_isValidDchar(v))
1287 error("invalid UTF character \\U%08x", v);
1288 c = v;
1290 else
1291 error("undefined escape hex sequence \\%c\n",c);
1292 break;
1294 case '&': // named character entity
1295 for (unsigned char *idstart = ++p; 1; p++)
1297 switch (*p)
1299 case ';':
1300 c = HtmlNamedEntity(idstart, p - idstart);
1301 if (c == ~0)
1302 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart);
1303 c = ' ';
1305 p++;
1306 break;
1308 default:
1309 if (isalpha(*p) ||
1310 (p != idstart + 1 && isdigit(*p)))
1311 continue;
1312 error("unterminated named entity");
1313 break;
1315 break;
1317 break;
1319 case 0:
1320 case 0x1A: // end of file
1321 c = '\\';
1322 break;
1324 default:
1325 if (isoctal(c))
1326 { unsigned v;
1328 n = 0;
1329 v = 0;
1332 v = v * 8 + (c - '0');
1333 c = *++p;
1334 } while (++n < 3 && isoctal(c));
1335 c = v;
1336 if (c > 0xFF)
1337 error("0%03o is larger than a byte", c);
1339 else
1340 error("undefined escape sequence \\%c\n",c);
1341 break;
1343 return c;
1346 /**************************************
1349 TOK Lexer::wysiwygStringConstant(Token *t, int tc)
1350 { unsigned c;
1351 Loc start = loc;
1353 p++;
1354 stringbuffer.reset();
1355 while (1)
1357 c = *p++;
1358 switch (c)
1360 case '\n':
1361 loc.linnum++;
1362 break;
1364 case '\r':
1365 if (*p == '\n')
1366 continue; // ignore
1367 c = '\n'; // treat EndOfLine as \n character
1368 loc.linnum++;
1369 break;
1371 case 0:
1372 case 0x1A:
1373 error("unterminated string constant starting at %s", start.toChars());
1374 t->ustring = (unsigned char *)"";
1375 t->len = 0;
1376 t->postfix = 0;
1377 return TOKstring;
1379 case '"':
1380 case '`':
1381 if (c == tc)
1383 t->len = stringbuffer.offset;
1384 stringbuffer.writeByte(0);
1385 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1386 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1387 stringPostfix(t);
1388 return TOKstring;
1390 break;
1392 default:
1393 if (c & 0x80)
1394 { p--;
1395 unsigned u = decodeUTF();
1396 p++;
1397 if (u == PS || u == LS)
1398 loc.linnum++;
1399 stringbuffer.writeUTF8(u);
1400 continue;
1402 break;
1404 stringbuffer.writeByte(c);
1408 /**************************************
1409 * Lex hex strings:
1410 * x"0A ae 34FE BD"
1413 TOK Lexer::hexStringConstant(Token *t)
1414 { unsigned c;
1415 Loc start = loc;
1416 unsigned n = 0;
1417 unsigned v;
1419 p++;
1420 stringbuffer.reset();
1421 while (1)
1423 c = *p++;
1424 switch (c)
1426 case ' ':
1427 case '\t':
1428 case '\v':
1429 case '\f':
1430 continue; // skip white space
1432 case '\r':
1433 if (*p == '\n')
1434 continue; // ignore
1435 // Treat isolated '\r' as if it were a '\n'
1436 case '\n':
1437 loc.linnum++;
1438 continue;
1440 case 0:
1441 case 0x1A:
1442 error("unterminated string constant starting at %s", start.toChars());
1443 t->ustring = (unsigned char *)"";
1444 t->len = 0;
1445 t->postfix = 0;
1446 return TOKstring;
1448 case '"':
1449 if (n & 1)
1450 { error("odd number (%d) of hex characters in hex string", n);
1451 stringbuffer.writeByte(v);
1453 t->len = stringbuffer.offset;
1454 stringbuffer.writeByte(0);
1455 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1456 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1457 stringPostfix(t);
1458 return TOKstring;
1460 default:
1461 if (c >= '0' && c <= '9')
1462 c -= '0';
1463 else if (c >= 'a' && c <= 'f')
1464 c -= 'a' - 10;
1465 else if (c >= 'A' && c <= 'F')
1466 c -= 'A' - 10;
1467 else if (c & 0x80)
1468 { p--;
1469 unsigned u = decodeUTF();
1470 p++;
1471 if (u == PS || u == LS)
1472 loc.linnum++;
1473 else
1474 error("non-hex character \\u%x", u);
1476 else
1477 error("non-hex character '%c'", c);
1478 if (n & 1)
1479 { v = (v << 4) | c;
1480 stringbuffer.writeByte(v);
1482 else
1483 v = c;
1484 n++;
1485 break;
1491 #if V2
1492 /**************************************
1493 * Lex delimited strings:
1494 * q"(foo(xxx))" // "foo(xxx)"
1495 * q"[foo(]" // "foo("
1496 * q"/foo]/" // "foo]"
1497 * q"HERE
1498 * foo
1499 * HERE" // "foo\n"
1500 * Input:
1501 * p is on the "
1504 TOK Lexer::delimitedStringConstant(Token *t)
1505 { unsigned c;
1506 Loc start = loc;
1507 unsigned delimleft = 0;
1508 unsigned delimright = 0;
1509 unsigned nest = 1;
1510 unsigned nestcount;
1511 Identifier *hereid = NULL;
1512 unsigned blankrol = 0;
1513 unsigned startline = 0;
1515 p++;
1516 stringbuffer.reset();
1517 while (1)
1519 c = *p++;
1520 //printf("c = '%c'\n", c);
1521 switch (c)
1523 case '\n':
1524 Lnextline:
1525 loc.linnum++;
1526 startline = 1;
1527 if (blankrol)
1528 { blankrol = 0;
1529 continue;
1531 if (hereid)
1533 stringbuffer.writeUTF8(c);
1534 continue;
1536 break;
1538 case '\r':
1539 if (*p == '\n')
1540 continue; // ignore
1541 c = '\n'; // treat EndOfLine as \n character
1542 goto Lnextline;
1544 case 0:
1545 case 0x1A:
1546 goto Lerror;
1548 default:
1549 if (c & 0x80)
1550 { p--;
1551 c = decodeUTF();
1552 p++;
1553 if (c == PS || c == LS)
1554 goto Lnextline;
1556 break;
1558 if (delimleft == 0)
1559 { delimleft = c;
1560 nest = 1;
1561 nestcount = 1;
1562 if (c == '(')
1563 delimright = ')';
1564 else if (c == '{')
1565 delimright = '}';
1566 else if (c == '[')
1567 delimright = ']';
1568 else if (c == '<')
1569 delimright = '>';
1570 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1571 { // Start of identifier; must be a heredoc
1572 Token t;
1573 p--;
1574 scan(&t); // read in heredoc identifier
1575 if (t.value != TOKidentifier)
1576 { error("identifier expected for heredoc, not %s", t.toChars());
1577 delimright = c;
1579 else
1580 { hereid = t.ident;
1581 //printf("hereid = '%s'\n", hereid->toChars());
1582 blankrol = 1;
1584 nest = 0;
1586 else
1587 { delimright = c;
1588 nest = 0;
1591 else
1593 if (blankrol)
1594 { error("heredoc rest of line should be blank");
1595 blankrol = 0;
1596 continue;
1598 if (nest == 1)
1600 if (c == delimleft)
1601 nestcount++;
1602 else if (c == delimright)
1603 { nestcount--;
1604 if (nestcount == 0)
1605 goto Ldone;
1608 else if (c == delimright)
1609 goto Ldone;
1610 if (startline && isalpha(c))
1611 { Token t;
1612 unsigned char *psave = p;
1613 p--;
1614 scan(&t); // read in possible heredoc identifier
1615 //printf("endid = '%s'\n", t.ident->toChars());
1616 if (t.value == TOKidentifier && t.ident->equals(hereid))
1617 { /* should check that rest of line is blank
1619 goto Ldone;
1621 p = psave;
1623 stringbuffer.writeUTF8(c);
1624 startline = 0;
1628 Ldone:
1629 if (*p == '"')
1630 p++;
1631 else
1632 error("delimited string must end in %c\"", delimright);
1633 t->len = stringbuffer.offset;
1634 stringbuffer.writeByte(0);
1635 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1636 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1637 stringPostfix(t);
1638 return TOKstring;
1640 Lerror:
1641 error("unterminated string constant starting at %s", start.toChars());
1642 t->ustring = (unsigned char *)"";
1643 t->len = 0;
1644 t->postfix = 0;
1645 return TOKstring;
1648 /**************************************
1649 * Lex delimited strings:
1650 * q{ foo(xxx) } // " foo(xxx) "
1651 * q{foo(} // "foo("
1652 * q{{foo}"}"} // "{foo}"}""
1653 * Input:
1654 * p is on the q
1657 TOK Lexer::tokenStringConstant(Token *t)
1659 unsigned nest = 1;
1660 Loc start = loc;
1661 unsigned char *pstart = ++p;
1663 while (1)
1664 { Token tok;
1666 scan(&tok);
1667 switch (tok.value)
1669 case TOKlcurly:
1670 nest++;
1671 continue;
1673 case TOKrcurly:
1674 if (--nest == 0)
1675 goto Ldone;
1676 continue;
1678 case TOKeof:
1679 goto Lerror;
1681 default:
1682 continue;
1686 Ldone:
1687 t->len = p - 1 - pstart;
1688 t->ustring = (unsigned char *)mem.malloc(t->len + 1);
1689 memcpy(t->ustring, pstart, t->len);
1690 t->ustring[t->len] = 0;
1691 stringPostfix(t);
1692 return TOKstring;
1694 Lerror:
1695 error("unterminated token string constant starting at %s", start.toChars());
1696 t->ustring = (unsigned char *)"";
1697 t->len = 0;
1698 t->postfix = 0;
1699 return TOKstring;
1702 #endif
1705 /**************************************
1708 TOK Lexer::escapeStringConstant(Token *t, int wide)
1709 { unsigned c;
1710 Loc start = loc;
1712 p++;
1713 stringbuffer.reset();
1714 while (1)
1716 c = *p++;
1717 switch (c)
1719 case '\\':
1720 switch (*p)
1722 case 'u':
1723 case 'U':
1724 case '&':
1725 c = escapeSequence();
1726 stringbuffer.writeUTF8(c);
1727 continue;
1729 default:
1730 c = escapeSequence();
1731 break;
1733 break;
1735 case '\n':
1736 loc.linnum++;
1737 break;
1739 case '\r':
1740 if (*p == '\n')
1741 continue; // ignore
1742 c = '\n'; // treat EndOfLine as \n character
1743 loc.linnum++;
1744 break;
1746 case '"':
1747 t->len = stringbuffer.offset;
1748 stringbuffer.writeByte(0);
1749 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1750 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1751 stringPostfix(t);
1752 return TOKstring;
1754 case 0:
1755 case 0x1A:
1756 p--;
1757 error("unterminated string constant starting at %s", start.toChars());
1758 t->ustring = (unsigned char *)"";
1759 t->len = 0;
1760 t->postfix = 0;
1761 return TOKstring;
1763 default:
1764 if (c & 0x80)
1766 p--;
1767 c = decodeUTF();
1768 if (c == LS || c == PS)
1769 { c = '\n';
1770 loc.linnum++;
1772 p++;
1773 stringbuffer.writeUTF8(c);
1774 continue;
1776 break;
1778 stringbuffer.writeByte(c);
1782 /**************************************
1785 TOK Lexer::charConstant(Token *t, int wide)
1787 unsigned c;
1788 TOK tk = TOKcharv;
1790 //printf("Lexer::charConstant\n");
1791 p++;
1792 c = *p++;
1793 switch (c)
1795 case '\\':
1796 switch (*p)
1798 case 'u':
1799 t->uns64value = escapeSequence();
1800 tk = TOKwcharv;
1801 break;
1803 case 'U':
1804 case '&':
1805 t->uns64value = escapeSequence();
1806 tk = TOKdcharv;
1807 break;
1809 default:
1810 t->uns64value = escapeSequence();
1811 break;
1813 break;
1815 case '\n':
1817 loc.linnum++;
1818 case '\r':
1819 case 0:
1820 case 0x1A:
1821 case '\'':
1822 error("unterminated character constant");
1823 return tk;
1825 default:
1826 if (c & 0x80)
1828 p--;
1829 c = decodeUTF();
1830 p++;
1831 if (c == LS || c == PS)
1832 goto L1;
1833 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1834 tk = TOKwcharv;
1835 else
1836 tk = TOKdcharv;
1838 t->uns64value = c;
1839 break;
1842 if (*p != '\'')
1843 { error("unterminated character constant");
1844 return tk;
1846 p++;
1847 return tk;
1850 /***************************************
1851 * Get postfix of string literal.
1854 void Lexer::stringPostfix(Token *t)
1856 switch (*p)
1858 case 'c':
1859 case 'w':
1860 case 'd':
1861 t->postfix = *p;
1862 p++;
1863 break;
1865 default:
1866 t->postfix = 0;
1867 break;
1871 /***************************************
1872 * Read \u or \U unicode sequence
1873 * Input:
1874 * u 'u' or 'U'
1877 #if 0
1878 unsigned Lexer::wchar(unsigned u)
1880 unsigned value;
1881 unsigned n;
1882 unsigned char c;
1883 unsigned nchars;
1885 nchars = (u == 'U') ? 8 : 4;
1886 value = 0;
1887 for (n = 0; 1; n++)
1889 ++p;
1890 if (n == nchars)
1891 break;
1892 c = *p;
1893 if (!ishex(c))
1894 { error("\\%c sequence must be followed by %d hex characters", u, nchars);
1895 break;
1897 if (isdigit(c))
1898 c -= '0';
1899 else if (islower(c))
1900 c -= 'a' - 10;
1901 else
1902 c -= 'A' - 10;
1903 value <<= 4;
1904 value |= c;
1906 return value;
1908 #endif
1910 /**************************************
1911 * Read in a number.
1912 * If it's an integer, store it in tok.TKutok.Vlong.
1913 * integers can be decimal, octal or hex
1914 * Handle the suffixes U, UL, LU, L, etc.
1915 * If it's double, store it in tok.TKutok.Vdouble.
1916 * Returns:
1917 * TKnum
1918 * TKdouble,...
1921 TOK Lexer::number(Token *t)
1923 // We use a state machine to collect numbers
1924 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
1925 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
1926 STATE_hexh, STATE_error };
1927 enum STATE state;
1929 enum FLAGS
1930 { FLAGS_decimal = 1, // decimal
1931 FLAGS_unsigned = 2, // u or U suffix
1932 FLAGS_long = 4, // l or L suffix
1934 enum FLAGS flags = FLAGS_decimal;
1936 int i;
1937 int base;
1938 unsigned c;
1939 unsigned char *start;
1940 TOK result;
1942 //printf("Lexer::number()\n");
1943 state = STATE_initial;
1944 base = 0;
1945 stringbuffer.reset();
1946 start = p;
1947 while (1)
1949 c = *p;
1950 switch (state)
1952 case STATE_initial: // opening state
1953 if (c == '0')
1954 state = STATE_0;
1955 else
1956 state = STATE_decimal;
1957 break;
1959 case STATE_0:
1960 flags = (FLAGS) (flags & ~FLAGS_decimal);
1961 switch (c)
1963 #if ZEROH
1964 case 'H': // 0h
1965 case 'h':
1966 goto hexh;
1967 #endif
1968 case 'X':
1969 case 'x':
1970 state = STATE_hex0;
1971 break;
1973 case '.':
1974 if (p[1] == '.') // .. is a separate token
1975 goto done;
1976 case 'i':
1977 case 'f':
1978 case 'F':
1979 goto real;
1980 #if ZEROH
1981 case 'E':
1982 case 'e':
1983 goto case_hex;
1984 #endif
1985 case 'B':
1986 case 'b':
1987 state = STATE_binary0;
1988 break;
1990 case '0': case '1': case '2': case '3':
1991 case '4': case '5': case '6': case '7':
1992 state = STATE_octal;
1993 break;
1995 #if ZEROH
1996 case '8': case '9': case 'A':
1997 case 'C': case 'D': case 'F':
1998 case 'a': case 'c': case 'd': case 'f':
1999 case_hex:
2000 state = STATE_hexh;
2001 break;
2002 #endif
2003 case '_':
2004 state = STATE_octal;
2005 p++;
2006 continue;
2008 case 'L':
2009 if (p[1] == 'i')
2010 goto real;
2011 goto done;
2013 default:
2014 goto done;
2016 break;
2018 case STATE_decimal: // reading decimal number
2019 if (!isdigit(c))
2021 #if ZEROH
2022 if (ishex(c)
2023 || c == 'H' || c == 'h'
2025 goto hexh;
2026 #endif
2027 if (c == '_') // ignore embedded _
2028 { p++;
2029 continue;
2031 if (c == '.' && p[1] != '.')
2032 goto real;
2033 else if (c == 'i' || c == 'f' || c == 'F' ||
2034 c == 'e' || c == 'E')
2036 real: // It's a real number. Back up and rescan as a real
2037 p = start;
2038 return inreal(t);
2040 else if (c == 'L' && p[1] == 'i')
2041 goto real;
2042 goto done;
2044 break;
2046 case STATE_hex0: // reading hex number
2047 case STATE_hex:
2048 if (!ishex(c))
2050 if (c == '_') // ignore embedded _
2051 { p++;
2052 continue;
2054 if (c == '.' && p[1] != '.')
2055 goto real;
2056 if (c == 'P' || c == 'p' || c == 'i')
2057 goto real;
2058 if (state == STATE_hex0)
2059 error("Hex digit expected, not '%c'", c);
2060 goto done;
2062 state = STATE_hex;
2063 break;
2065 #if ZEROH
2066 hexh:
2067 state = STATE_hexh;
2068 case STATE_hexh: // parse numbers like 0FFh
2069 if (!ishex(c))
2071 if (c == 'H' || c == 'h')
2073 p++;
2074 base = 16;
2075 goto done;
2077 else
2079 // Check for something like 1E3 or 0E24
2080 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) ||
2081 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset))
2082 goto real;
2083 error("Hex digit expected, not '%c'", c);
2084 goto done;
2087 break;
2088 #endif
2090 case STATE_octal: // reading octal number
2091 case STATE_octale: // reading octal number with non-octal digits
2092 if (!isoctal(c))
2094 #if ZEROH
2095 if (ishex(c)
2096 || c == 'H' || c == 'h'
2098 goto hexh;
2099 #endif
2100 if (c == '_') // ignore embedded _
2101 { p++;
2102 continue;
2104 if (c == '.' && p[1] != '.')
2105 goto real;
2106 if (c == 'i')
2107 goto real;
2108 if (isdigit(c))
2110 state = STATE_octale;
2112 else
2113 goto done;
2115 break;
2117 case STATE_binary0: // starting binary number
2118 case STATE_binary: // reading binary number
2119 if (c != '0' && c != '1')
2121 #if ZEROH
2122 if (ishex(c)
2123 || c == 'H' || c == 'h'
2125 goto hexh;
2126 #endif
2127 if (c == '_') // ignore embedded _
2128 { p++;
2129 continue;
2131 if (state == STATE_binary0)
2132 { error("binary digit expected");
2133 state = STATE_error;
2134 break;
2136 else
2137 goto done;
2139 state = STATE_binary;
2140 break;
2142 case STATE_error: // for error recovery
2143 if (!isdigit(c)) // scan until non-digit
2144 goto done;
2145 break;
2147 default:
2148 assert(0);
2150 stringbuffer.writeByte(c);
2151 p++;
2153 done:
2154 stringbuffer.writeByte(0); // terminate string
2155 if (state == STATE_octale)
2156 error("Octal digit expected");
2158 uinteger_t n; // unsigned >=64 bit integer type
2160 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0))
2161 n = stringbuffer.data[0] - '0';
2162 else
2164 // Convert string to integer
2165 #if __DMC__
2166 errno = 0;
2167 n = strtoull((char *)stringbuffer.data,NULL,base);
2168 if (errno == ERANGE)
2169 error("integer overflow");
2170 #else
2171 // Not everybody implements strtoull()
2172 char *p = (char *)stringbuffer.data;
2173 int r = 10, d;
2175 if (*p == '0')
2177 if (p[1] == 'x' || p[1] == 'X')
2178 p += 2, r = 16;
2179 else if (p[1] == 'b' || p[1] == 'B')
2180 p += 2, r = 2;
2181 else if (isdigit(p[1]))
2182 p += 1, r = 8;
2185 n = 0;
2186 while (1)
2188 if (*p >= '0' && *p <= '9')
2189 d = *p - '0';
2190 else if (*p >= 'a' && *p <= 'z')
2191 d = *p - 'a' + 10;
2192 else if (*p >= 'A' && *p <= 'Z')
2193 d = *p - 'A' + 10;
2194 else
2195 break;
2196 if (d >= r)
2197 break;
2198 if (n && n * r + d <= n)
2200 error ("integer overflow");
2201 break;
2204 n = n * r + d;
2205 p++;
2207 #endif
2208 if (sizeof(n) > 8 &&
2209 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits
2210 error("integer overflow");
2213 // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2214 while (1)
2215 { unsigned char f;
2217 switch (*p)
2218 { case 'U':
2219 case 'u':
2220 f = FLAGS_unsigned;
2221 goto L1;
2223 case 'l':
2224 if (1 || !global.params.useDeprecated)
2225 error("'l' suffix is deprecated, use 'L' instead");
2226 case 'L':
2227 f = FLAGS_long;
2229 p++;
2230 if (flags & f)
2231 error("unrecognized token");
2232 flags = (FLAGS) (flags | f);
2233 continue;
2234 default:
2235 break;
2237 break;
2240 switch (flags)
2242 case 0:
2243 /* Octal or Hexadecimal constant.
2244 * First that fits: int, uint, long, ulong
2246 if (n & 0x8000000000000000LL)
2247 result = TOKuns64v;
2248 else if (n & 0xFFFFFFFF00000000LL)
2249 result = TOKint64v;
2250 else if (n & 0x80000000)
2251 result = TOKuns32v;
2252 else
2253 result = TOKint32v;
2254 break;
2256 case FLAGS_decimal:
2257 /* First that fits: int, long, long long
2259 if (n & 0x8000000000000000LL)
2260 { error("signed integer overflow");
2261 result = TOKuns64v;
2263 else if (n & 0xFFFFFFFF80000000LL)
2264 result = TOKint64v;
2265 else
2266 result = TOKint32v;
2267 break;
2269 case FLAGS_unsigned:
2270 case FLAGS_decimal | FLAGS_unsigned:
2271 /* First that fits: uint, ulong
2273 if (n & 0xFFFFFFFF00000000LL)
2274 result = TOKuns64v;
2275 else
2276 result = TOKuns32v;
2277 break;
2279 case FLAGS_decimal | FLAGS_long:
2280 if (n & 0x8000000000000000LL)
2281 { error("signed integer overflow");
2282 result = TOKuns64v;
2284 else
2285 result = TOKint64v;
2286 break;
2288 case FLAGS_long:
2289 if (n & 0x8000000000000000LL)
2290 result = TOKuns64v;
2291 else
2292 result = TOKint64v;
2293 break;
2295 case FLAGS_unsigned | FLAGS_long:
2296 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
2297 result = TOKuns64v;
2298 break;
2300 default:
2301 #ifdef DEBUG
2302 printf("%x\n",flags);
2303 #endif
2304 assert(0);
2306 t->uns64value = n;
2307 return result;
2310 /**************************************
2311 * Read in characters, converting them to real.
2312 * Bugs:
2313 * Exponent overflow not detected.
2314 * Too much requested precision is not detected.
2317 TOK Lexer::inreal(Token *t)
2318 #ifdef __DMC__
2319 __in
2321 assert(*p == '.' || isdigit(*p));
2323 __out (result)
2325 switch (result)
2327 case TOKfloat32v:
2328 case TOKfloat64v:
2329 case TOKfloat80v:
2330 case TOKimaginary32v:
2331 case TOKimaginary64v:
2332 case TOKimaginary80v:
2333 break;
2335 default:
2336 assert(0);
2339 __body
2340 #endif /* __DMC__ */
2341 { int dblstate;
2342 unsigned c;
2343 char hex; // is this a hexadecimal-floating-constant?
2344 TOK result;
2346 //printf("Lexer::inreal()\n");
2347 stringbuffer.reset();
2348 dblstate = 0;
2349 hex = 0;
2350 Lnext:
2351 while (1)
2353 // Get next char from input
2354 c = *p++;
2355 //printf("dblstate = %d, c = '%c'\n", dblstate, c);
2356 while (1)
2358 switch (dblstate)
2360 case 0: // opening state
2361 if (c == '0')
2362 dblstate = 9;
2363 else if (c == '.')
2364 dblstate = 3;
2365 else
2366 dblstate = 1;
2367 break;
2369 case 9:
2370 dblstate = 1;
2371 if (c == 'X' || c == 'x')
2372 { hex++;
2373 break;
2375 case 1: // digits to left of .
2376 case 3: // digits to right of .
2377 case 7: // continuing exponent digits
2378 if (!isdigit(c) && !(hex && isxdigit(c)))
2380 if (c == '_')
2381 goto Lnext; // ignore embedded '_'
2382 dblstate++;
2383 continue;
2385 break;
2387 case 2: // no more digits to left of .
2388 if (c == '.')
2389 { dblstate++;
2390 break;
2392 case 4: // no more digits to right of .
2393 if ((c == 'E' || c == 'e') ||
2394 hex && (c == 'P' || c == 'p'))
2395 { dblstate = 5;
2396 hex = 0; // exponent is always decimal
2397 break;
2399 if (hex)
2400 error("binary-exponent-part required");
2401 goto done;
2403 case 5: // looking immediately to right of E
2404 dblstate++;
2405 if (c == '-' || c == '+')
2406 break;
2407 case 6: // 1st exponent digit expected
2408 if (!isdigit(c))
2409 error("exponent expected");
2410 dblstate++;
2411 break;
2413 case 8: // past end of exponent digits
2414 goto done;
2416 break;
2418 stringbuffer.writeByte(c);
2420 done:
2421 p--;
2423 stringbuffer.writeByte(0);
2425 #if _WIN32 && __DMC__
2426 char *save = __locale_decpoint;
2427 __locale_decpoint = ".";
2428 #endif
2429 #ifdef IN_GCC
2430 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble);
2431 #else
2432 t->float80value = strtold((char *)stringbuffer.data, NULL);
2433 #endif
2434 errno = 0;
2435 switch (*p)
2437 case 'F':
2438 case 'f':
2439 #ifdef IN_GCC
2440 real_t::parse((char *)stringbuffer.data, real_t::Float);
2441 #else
2442 strtof((char *)stringbuffer.data, NULL);
2443 #endif
2444 result = TOKfloat32v;
2445 p++;
2446 break;
2448 default:
2449 #ifdef IN_GCC
2450 real_t::parse((char *)stringbuffer.data, real_t::Double);
2451 #else
2452 strtod((char *)stringbuffer.data, NULL);
2453 #endif
2454 result = TOKfloat64v;
2455 break;
2457 case 'l':
2458 if (!global.params.useDeprecated)
2459 error("'l' suffix is deprecated, use 'L' instead");
2460 case 'L':
2461 result = TOKfloat80v;
2462 p++;
2463 break;
2465 if (*p == 'i' || *p == 'I')
2467 if (!global.params.useDeprecated && *p == 'I')
2468 error("'I' suffix is deprecated, use 'i' instead");
2469 p++;
2470 switch (result)
2472 case TOKfloat32v:
2473 result = TOKimaginary32v;
2474 break;
2475 case TOKfloat64v:
2476 result = TOKimaginary64v;
2477 break;
2478 case TOKfloat80v:
2479 result = TOKimaginary80v;
2480 break;
2483 #if _WIN32 && __DMC__
2484 __locale_decpoint = save;
2485 #endif
2486 if (errno == ERANGE)
2487 error("number is not representable");
2488 return result;
2491 /*********************************************
2492 * Do pragma.
2493 * Currently, the only pragma supported is:
2494 * #line linnum [filespec]
2497 void Lexer::pragma()
2499 Token tok;
2500 int linnum;
2501 char *filespec = NULL;
2502 Loc loc = this->loc;
2504 scan(&tok);
2505 if (tok.value != TOKidentifier || tok.ident != Id::line)
2506 goto Lerr;
2508 scan(&tok);
2509 if (tok.value == TOKint32v || tok.value == TOKint64v)
2510 linnum = tok.uns64value - 1;
2511 else
2512 goto Lerr;
2514 while (1)
2516 switch (*p)
2518 case 0:
2519 case 0x1A:
2520 case '\n':
2521 Lnewline:
2522 this->loc.linnum = linnum;
2523 if (filespec)
2524 this->loc.filename = filespec;
2525 return;
2527 case '\r':
2528 p++;
2529 if (*p != '\n')
2530 { p--;
2531 goto Lnewline;
2533 continue;
2535 case ' ':
2536 case '\t':
2537 case '\v':
2538 case '\f':
2539 p++;
2540 continue; // skip white space
2542 case '_':
2543 if (mod && memcmp(p, "__FILE__", 8) == 0)
2545 p += 8;
2546 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars());
2548 continue;
2550 case '"':
2551 if (filespec)
2552 goto Lerr;
2553 stringbuffer.reset();
2554 p++;
2555 while (1)
2556 { unsigned c;
2558 c = *p;
2559 switch (c)
2561 case '\n':
2562 case '\r':
2563 case 0:
2564 case 0x1A:
2565 goto Lerr;
2567 case '"':
2568 stringbuffer.writeByte(0);
2569 filespec = mem.strdup((char *)stringbuffer.data);
2570 p++;
2571 break;
2573 default:
2574 if (c & 0x80)
2575 { unsigned u = decodeUTF();
2576 if (u == PS || u == LS)
2577 goto Lerr;
2579 stringbuffer.writeByte(c);
2580 p++;
2581 continue;
2583 break;
2585 continue;
2587 default:
2588 if (*p & 0x80)
2589 { unsigned u = decodeUTF();
2590 if (u == PS || u == LS)
2591 goto Lnewline;
2593 goto Lerr;
2597 Lerr:
2598 error(loc, "#line integer [\"filespec\"]\\n expected");
2602 /********************************************
2603 * Decode UTF character.
2604 * Issue error messages for invalid sequences.
2605 * Return decoded character, advance p to last character in UTF sequence.
2608 unsigned Lexer::decodeUTF()
2610 dchar_t u;
2611 unsigned char c;
2612 unsigned char *s = p;
2613 size_t len;
2614 size_t idx;
2615 char *msg;
2617 c = *s;
2618 assert(c & 0x80);
2620 // Check length of remaining string up to 6 UTF-8 characters
2621 for (len = 1; len < 6 && s[len]; len++)
2624 idx = 0;
2625 msg = utf_decodeChar(s, len, &idx, &u);
2626 p += idx - 1;
2627 if (msg)
2629 error("%s", msg);
2631 return u;
2635 /***************************************************
2636 * Parse doc comment embedded between t->ptr and p.
2637 * Remove trailing blanks and tabs from lines.
2638 * Replace all newlines with \n.
2639 * Remove leading comment character from each line.
2640 * Decide if it's a lineComment or a blockComment.
2641 * Append to previous one for this token.
2644 void Lexer::getDocComment(Token *t, unsigned lineComment)
2646 OutBuffer buf;
2647 unsigned char ct = t->ptr[2];
2648 unsigned char *q = t->ptr + 3; // start of comment text
2649 int linestart = 0;
2651 unsigned char *qend = p;
2652 if (ct == '*' || ct == '+')
2653 qend -= 2;
2655 /* Scan over initial row of ****'s or ++++'s or ////'s
2657 for (; q < qend; q++)
2659 if (*q != ct)
2660 break;
2663 /* Remove trailing row of ****'s or ++++'s
2665 if (ct != '/')
2667 for (; q < qend; qend--)
2669 if (qend[-1] != ct)
2670 break;
2674 for (; q < qend; q++)
2676 unsigned char c = *q;
2678 switch (c)
2680 case '*':
2681 case '+':
2682 if (linestart && c == ct)
2683 { linestart = 0;
2684 /* Trim preceding whitespace up to preceding \n
2686 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2687 buf.offset--;
2688 continue;
2690 break;
2692 case ' ':
2693 case '\t':
2694 break;
2696 case '\r':
2697 if (q[1] == '\n')
2698 continue; // skip the \r
2699 goto Lnewline;
2701 default:
2702 if (c == 226)
2704 // If LS or PS
2705 if (q[1] == 128 &&
2706 (q[2] == 168 || q[2] == 169))
2708 q += 2;
2709 goto Lnewline;
2712 linestart = 0;
2713 break;
2715 Lnewline:
2716 c = '\n'; // replace all newlines with \n
2717 case '\n':
2718 linestart = 1;
2720 /* Trim trailing whitespace
2722 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2723 buf.offset--;
2725 break;
2727 buf.writeByte(c);
2730 // Always end with a newline
2731 if (!buf.offset || buf.data[buf.offset - 1] != '\n')
2732 buf.writeByte('\n');
2734 buf.writeByte(0);
2736 // It's a line comment if the start of the doc comment comes
2737 // after other non-whitespace on the same line.
2738 unsigned char** dc = (lineComment && anyToken)
2739 ? &t->lineComment
2740 : &t->blockComment;
2742 // Combine with previous doc comment, if any
2743 if (*dc)
2744 *dc = combineComments(*dc, (unsigned char *)buf.data);
2745 else
2746 *dc = (unsigned char *)buf.extractData();
2749 /********************************************
2750 * Combine two document comments into one.
2753 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2)
2755 unsigned char *c = c2;
2757 if (c1)
2758 { c = c1;
2759 if (c2)
2760 { size_t len1 = strlen((char *)c1);
2761 size_t len2 = strlen((char *)c2);
2763 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1);
2764 memcpy(c, c1, len1);
2765 c[len1] = '\n';
2766 memcpy(c + len1 + 1, c2, len2);
2767 c[len1 + 1 + len2] = 0;
2770 return c;
2773 /********************************************
2774 * Create an identifier in the string table.
2777 Identifier *Lexer::idPool(const char *s)
2779 size_t len = strlen(s);
2780 StringValue *sv = stringtable.update(s, len);
2781 Identifier *id = (Identifier *) sv->ptrvalue;
2782 if (!id)
2784 id = new Identifier(sv->lstring.string, TOKidentifier);
2785 sv->ptrvalue = id;
2787 return id;
2790 /*********************************************
2791 * Create a unique identifier using the prefix s.
2794 Identifier *Lexer::uniqueId(const char *s, int num)
2795 { char buffer[32];
2796 size_t slen = strlen(s);
2798 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer));
2799 sprintf(buffer, "%s%d", s, num);
2800 return idPool(buffer);
2803 Identifier *Lexer::uniqueId(const char *s)
2805 static int num;
2806 return uniqueId(s, ++num);
2809 /****************************************
2812 struct Keyword
2813 { char *name;
2814 enum TOK value;
2817 static Keyword keywords[] =
2819 // { "", TOK },
2821 { "this", TOKthis },
2822 { "super", TOKsuper },
2823 { "assert", TOKassert },
2824 { "null", TOKnull },
2825 { "true", TOKtrue },
2826 { "false", TOKfalse },
2827 { "cast", TOKcast },
2828 { "new", TOKnew },
2829 { "delete", TOKdelete },
2830 { "throw", TOKthrow },
2831 { "module", TOKmodule },
2832 { "pragma", TOKpragma },
2833 { "typeof", TOKtypeof },
2834 { "typeid", TOKtypeid },
2836 { "template", TOKtemplate },
2838 { "void", TOKvoid },
2839 { "byte", TOKint8 },
2840 { "ubyte", TOKuns8 },
2841 { "short", TOKint16 },
2842 { "ushort", TOKuns16 },
2843 { "int", TOKint32 },
2844 { "uint", TOKuns32 },
2845 { "long", TOKint64 },
2846 { "ulong", TOKuns64 },
2847 { "cent", TOKcent, },
2848 { "ucent", TOKucent, },
2849 { "float", TOKfloat32 },
2850 { "double", TOKfloat64 },
2851 { "real", TOKfloat80 },
2853 { "bool", TOKbool },
2854 { "char", TOKchar },
2855 { "wchar", TOKwchar },
2856 { "dchar", TOKdchar },
2858 { "ifloat", TOKimaginary32 },
2859 { "idouble", TOKimaginary64 },
2860 { "ireal", TOKimaginary80 },
2862 { "cfloat", TOKcomplex32 },
2863 { "cdouble", TOKcomplex64 },
2864 { "creal", TOKcomplex80 },
2866 { "delegate", TOKdelegate },
2867 { "function", TOKfunction },
2869 { "is", TOKis },
2870 { "if", TOKif },
2871 { "else", TOKelse },
2872 { "while", TOKwhile },
2873 { "for", TOKfor },
2874 { "do", TOKdo },
2875 { "switch", TOKswitch },
2876 { "case", TOKcase },
2877 { "default", TOKdefault },
2878 { "break", TOKbreak },
2879 { "continue", TOKcontinue },
2880 { "synchronized", TOKsynchronized },
2881 { "return", TOKreturn },
2882 { "goto", TOKgoto },
2883 { "try", TOKtry },
2884 { "catch", TOKcatch },
2885 { "finally", TOKfinally },
2886 { "with", TOKwith },
2887 { "asm", TOKasm },
2888 { "foreach", TOKforeach },
2889 { "foreach_reverse", TOKforeach_reverse },
2890 { "scope", TOKscope },
2892 { "struct", TOKstruct },
2893 { "class", TOKclass },
2894 { "interface", TOKinterface },
2895 { "union", TOKunion },
2896 { "enum", TOKenum },
2897 { "import", TOKimport },
2898 { "mixin", TOKmixin },
2899 { "static", TOKstatic },
2900 { "final", TOKfinal },
2901 { "const", TOKconst },
2902 { "typedef", TOKtypedef },
2903 { "alias", TOKalias },
2904 { "override", TOKoverride },
2905 { "abstract", TOKabstract },
2906 { "volatile", TOKvolatile },
2907 { "debug", TOKdebug },
2908 { "deprecated", TOKdeprecated },
2909 { "in", TOKin },
2910 { "out", TOKout },
2911 { "inout", TOKinout },
2912 { "lazy", TOKlazy },
2913 { "auto", TOKauto },
2915 { "align", TOKalign },
2916 { "extern", TOKextern },
2917 { "private", TOKprivate },
2918 { "package", TOKpackage },
2919 { "protected", TOKprotected },
2920 { "public", TOKpublic },
2921 { "export", TOKexport },
2923 { "body", TOKbody },
2924 { "invariant", TOKinvariant },
2925 { "unittest", TOKunittest },
2926 { "version", TOKversion },
2927 //{ "manifest", TOKmanifest },
2929 // Added after 1.0
2930 { "ref", TOKref },
2931 { "macro", TOKmacro },
2932 #if V2
2933 { "pure", TOKpure },
2934 { "nothrow", TOKnothrow },
2935 { "__thread", TOKtls },
2936 { "__traits", TOKtraits },
2937 { "__overloadset", TOKoverloadset },
2938 { "__FILE__", TOKfile },
2939 { "__LINE__", TOKline },
2940 #endif
2943 int Token::isKeyword()
2945 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++)
2947 if (keywords[u].value == value)
2948 return 1;
2950 return 0;
2953 void Lexer::initKeywords()
2954 { StringValue *sv;
2955 unsigned u;
2956 enum TOK v;
2957 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]);
2959 if (global.params.Dversion == 1)
2960 nkeywords -= 2;
2962 cmtable_init();
2964 for (u = 0; u < nkeywords; u++)
2965 { char *s;
2967 //printf("keyword[%d] = '%s'\n",u, keywords[u].name);
2968 s = keywords[u].name;
2969 v = keywords[u].value;
2970 sv = stringtable.insert(s, strlen(s));
2971 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v);
2973 //printf("tochars[%d] = '%s'\n",v, s);
2974 Token::tochars[v] = s;
2977 Token::tochars[TOKeof] = "EOF";
2978 Token::tochars[TOKlcurly] = "{";
2979 Token::tochars[TOKrcurly] = "}";
2980 Token::tochars[TOKlparen] = "(";
2981 Token::tochars[TOKrparen] = ")";
2982 Token::tochars[TOKlbracket] = "[";
2983 Token::tochars[TOKrbracket] = "]";
2984 Token::tochars[TOKsemicolon] = ";";
2985 Token::tochars[TOKcolon] = ":";
2986 Token::tochars[TOKcomma] = ",";
2987 Token::tochars[TOKdot] = ".";
2988 Token::tochars[TOKxor] = "^";
2989 Token::tochars[TOKxorass] = "^=";
2990 Token::tochars[TOKassign] = "=";
2991 Token::tochars[TOKconstruct] = "=";
2992 #if V2
2993 Token::tochars[TOKblit] = "=";
2994 #endif
2995 Token::tochars[TOKlt] = "<";
2996 Token::tochars[TOKgt] = ">";
2997 Token::tochars[TOKle] = "<=";
2998 Token::tochars[TOKge] = ">=";
2999 Token::tochars[TOKequal] = "==";
3000 Token::tochars[TOKnotequal] = "!=";
3001 Token::tochars[TOKnotidentity] = "!is";
3002 Token::tochars[TOKtobool] = "!!";
3004 Token::tochars[TOKunord] = "!<>=";
3005 Token::tochars[TOKue] = "!<>";
3006 Token::tochars[TOKlg] = "<>";
3007 Token::tochars[TOKleg] = "<>=";
3008 Token::tochars[TOKule] = "!>";
3009 Token::tochars[TOKul] = "!>=";
3010 Token::tochars[TOKuge] = "!<";
3011 Token::tochars[TOKug] = "!<=";
3013 Token::tochars[TOKnot] = "!";
3014 Token::tochars[TOKtobool] = "!!";
3015 Token::tochars[TOKshl] = "<<";
3016 Token::tochars[TOKshr] = ">>";
3017 Token::tochars[TOKushr] = ">>>";
3018 Token::tochars[TOKadd] = "+";
3019 Token::tochars[TOKmin] = "-";
3020 Token::tochars[TOKmul] = "*";
3021 Token::tochars[TOKdiv] = "/";
3022 Token::tochars[TOKmod] = "%";
3023 Token::tochars[TOKslice] = "..";
3024 Token::tochars[TOKdotdotdot] = "...";
3025 Token::tochars[TOKand] = "&";
3026 Token::tochars[TOKandand] = "&&";
3027 Token::tochars[TOKor] = "|";
3028 Token::tochars[TOKoror] = "||";
3029 Token::tochars[TOKarray] = "[]";
3030 Token::tochars[TOKindex] = "[i]";
3031 Token::tochars[TOKaddress] = "&";
3032 Token::tochars[TOKstar] = "*";
3033 Token::tochars[TOKtilde] = "~";
3034 Token::tochars[TOKdollar] = "$";
3035 Token::tochars[TOKcast] = "cast";
3036 Token::tochars[TOKplusplus] = "++";
3037 Token::tochars[TOKminusminus] = "--";
3038 Token::tochars[TOKtype] = "type";
3039 Token::tochars[TOKquestion] = "?";
3040 Token::tochars[TOKneg] = "-";
3041 Token::tochars[TOKuadd] = "+";
3042 Token::tochars[TOKvar] = "var";
3043 Token::tochars[TOKaddass] = "+=";
3044 Token::tochars[TOKminass] = "-=";
3045 Token::tochars[TOKmulass] = "*=";
3046 Token::tochars[TOKdivass] = "/=";
3047 Token::tochars[TOKmodass] = "%=";
3048 Token::tochars[TOKshlass] = "<<=";
3049 Token::tochars[TOKshrass] = ">>=";
3050 Token::tochars[TOKushrass] = ">>>=";
3051 Token::tochars[TOKandass] = "&=";
3052 Token::tochars[TOKorass] = "|=";
3053 Token::tochars[TOKcatass] = "~=";
3054 Token::tochars[TOKcat] = "~";
3055 Token::tochars[TOKcall] = "call";
3056 Token::tochars[TOKidentity] = "is";
3057 Token::tochars[TOKnotidentity] = "!is";
3059 Token::tochars[TOKorass] = "|=";
3060 Token::tochars[TOKidentifier] = "identifier";
3062 // For debugging
3063 Token::tochars[TOKdotexp] = "dotexp";
3064 Token::tochars[TOKdotti] = "dotti";
3065 Token::tochars[TOKdotvar] = "dotvar";
3066 Token::tochars[TOKdottype] = "dottype";
3067 Token::tochars[TOKsymoff] = "symoff";
3068 Token::tochars[TOKtypedot] = "typedot";
3069 Token::tochars[TOKarraylength] = "arraylength";
3070 Token::tochars[TOKarrayliteral] = "arrayliteral";
3071 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral";
3072 Token::tochars[TOKstructliteral] = "structliteral";
3073 Token::tochars[TOKstring] = "string";
3074 Token::tochars[TOKdsymbol] = "symbol";
3075 Token::tochars[TOKtuple] = "tuple";
3076 Token::tochars[TOKdeclaration] = "declaration";
3077 Token::tochars[TOKdottd] = "dottd";
3078 Token::tochars[TOKon_scope_exit] = "scope(exit)";