Fixed some lexing problems with DOS line-endings
[delight/core.git] / dmd / lexer.c
blob3b91e18e5de974572aeb3158075c0c8e250760fb
2 // Compiler implementation of the D programming language
3 // Copyright (c) 1999-2008 by Digital Mars
4 // All Rights Reserved
5 // written by Walter Bright
6 // http://www.digitalmars.com
7 // License for redistribution is by either the Artistic License
8 // in artistic.txt, or the GNU General Public License in gnu.txt.
9 // See the included readme.txt for details.
11 /* NOTE: This file has been patched from the original DMD distribution to
12 work with the GDC compiler.
14 Modified by David Friedman, December 2006
17 /* Lexical Analyzer */
19 #include <stdio.h>
20 #include <string.h>
21 #include <ctype.h>
22 #include <stdarg.h>
23 #include <errno.h>
24 //#include <wchar.h>
25 #include <stdlib.h>
26 #include <assert.h>
27 #include <sys/time.h>
29 #ifdef IN_GCC
31 #include <time.h>
32 #include "mem.h"
34 #else
36 #if __GNUC__
37 #include <time.h>
38 #endif
40 #if _WIN32
41 #include "..\root\mem.h"
42 #else
43 #include "../root/mem.h"
44 #endif
45 #endif
47 #include "stringtable.h"
49 #include "lexer.h"
50 #include "utf.h"
51 #include "identifier.h"
52 #include "id.h"
53 #include "module.h"
55 #if _WIN32 && __DMC__
56 // from \dm\src\include\setlocal.h
57 extern "C" char * __cdecl __locale_decpoint;
58 #endif
60 extern int HtmlNamedEntity(unsigned char *p, int length);
62 #define LS 0x2028 // UTF line separator
63 #define PS 0x2029 // UTF paragraph separator
65 /********************************************
66 * Do our own char maps
69 static unsigned char cmtable[256];
71 const int CMoctal = 0x1;
72 const int CMhex = 0x2;
73 const int CMidchar = 0x4;
75 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; }
76 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; }
77 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; }
79 static void cmtable_init()
81 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++)
83 if ('0' <= c && c <= '7')
84 cmtable[c] |= CMoctal;
85 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
86 cmtable[c] |= CMhex;
87 if (isalnum(c) || c == '_')
88 cmtable[c] |= CMidchar;
93 /************************* Token **********************************************/
95 char *Token::tochars[TOKMAX];
97 void *Token::operator new(size_t size)
98 { Token *t;
100 if (Lexer::freelist)
102 t = Lexer::freelist;
103 Lexer::freelist = t->next;
104 return t;
107 return ::operator new(size);
110 #ifdef DEBUG
111 void Token::print()
113 fprintf(stdmsg, "%s\n", toChars());
115 #endif
117 char *Token::toChars()
118 { char *p;
119 static char buffer[3 + 3 * sizeof(value) + 1];
121 p = buffer;
122 switch (value)
124 case TOKint32v:
125 #if IN_GCC
126 sprintf(buffer,"%d",(d_int32)int64value);
127 #else
128 sprintf(buffer,"%d",int32value);
129 #endif
130 break;
132 case TOKuns32v:
133 case TOKcharv:
134 case TOKwcharv:
135 case TOKdcharv:
136 #if IN_GCC
137 sprintf(buffer,"%uU",(d_uns32)uns64value);
138 #else
139 sprintf(buffer,"%uU",uns32value);
140 #endif
141 break;
143 case TOKint64v:
144 sprintf(buffer,"%"PRIdMAX"L",int64value);
145 break;
147 case TOKuns64v:
148 sprintf(buffer,"%"PRIuMAX"UL",uns64value);
149 break;
151 #if IN_GCC
152 case TOKfloat32v:
153 case TOKfloat64v:
154 case TOKfloat80v:
155 float80value.format(buffer, sizeof(buffer));
156 break;
157 case TOKimaginary32v:
158 case TOKimaginary64v:
159 case TOKimaginary80v:
160 float80value.format(buffer, sizeof(buffer));
161 // %% buffer
162 strcat(buffer, "i");
163 break;
164 #else
165 case TOKfloat32v:
166 sprintf(buffer,"%Lgf", float80value);
167 break;
169 case TOKfloat64v:
170 sprintf(buffer,"%Lg", float80value);
171 break;
173 case TOKfloat80v:
174 sprintf(buffer,"%LgL", float80value);
175 break;
177 case TOKimaginary32v:
178 sprintf(buffer,"%Lgfi", float80value);
179 break;
181 case TOKimaginary64v:
182 sprintf(buffer,"%Lgi", float80value);
183 break;
185 case TOKimaginary80v:
186 sprintf(buffer,"%LgLi", float80value);
187 break;
188 #endif
191 case TOKstring:
192 #if CSTRINGS
193 p = string;
194 #else
195 { OutBuffer buf;
197 buf.writeByte('"');
198 for (size_t i = 0; i < len; )
199 { unsigned c;
201 utf_decodeChar((unsigned char *)ustring, len, &i, &c);
202 switch (c)
204 case 0:
205 break;
207 case '"':
208 case '\\':
209 buf.writeByte('\\');
210 default:
211 if (isprint(c))
212 buf.writeByte(c);
213 else if (c <= 0x7F)
214 buf.printf("\\x%02x", c);
215 else if (c <= 0xFFFF)
216 buf.printf("\\u%04x", c);
217 else
218 buf.printf("\\U%08x", c);
219 continue;
221 break;
223 buf.writeByte('"');
224 if (postfix)
225 buf.writeByte('"');
226 buf.writeByte(0);
227 p = (char *)buf.extractData();
229 #endif
230 break;
232 case TOKidentifier:
233 case TOKenum:
234 case TOKstruct:
235 case TOKimport:
236 CASE_BASIC_TYPES:
237 p = ident->toChars();
238 break;
240 default:
241 p = toChars(value);
242 break;
244 return p;
247 char *Token::toChars(enum TOK value)
248 { char *p;
249 static char buffer[3 + 3 * sizeof(value) + 1];
251 p = tochars[value];
252 if (!p)
253 { sprintf(buffer,"TOK%d",value);
254 p = buffer;
256 return p;
259 /*************************** Lexer ********************************************/
261 Token *Lexer::freelist = NULL;
262 StringTable Lexer::stringtable;
263 OutBuffer Lexer::stringbuffer;
265 Lexer::Lexer(Module *mod,
266 unsigned char *base, unsigned begoffset, unsigned endoffset,
267 int doDocComment, int commentToken, bool dltSyntax)
268 : loc(mod, 1), dltSyntax(dltSyntax)
270 //printf("Lexer::Lexer(%p,%d)\n",base,length);
271 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod);
272 memset(&token,0,sizeof(token));
273 this->base = base;
274 this->end = base + endoffset;
275 p = base + begoffset;
276 this->mod = mod;
277 this->doDocComment = doDocComment;
278 this->anyToken = 0;
279 this->commentToken = commentToken;
280 this->nesting = 0;
281 this->indent = 0;
282 this->atStartOfLine = 1;
283 //initKeywords();
285 /* If first line starts with '#!', ignore the line
288 if (p[0] == '#' && p[1] =='!')
290 p += 2;
291 while (1)
292 { unsigned char c = *p;
293 switch (c)
295 case '\n':
296 p++;
297 break;
299 case '\r':
300 p++;
301 if (*p == '\n')
302 p++;
303 break;
305 case 0:
306 case 0x1A:
307 break;
309 default:
310 if (c & 0x80)
311 { unsigned u = decodeUTF();
312 if (u == PS || u == LS)
313 break;
315 p++;
316 continue;
318 break;
320 loc.linnum = 2;
325 void Lexer::error(const char *format, ...)
327 if (mod && !global.gag)
329 char *p = loc.toChars();
330 if (*p)
331 fprintf(stdmsg, "%s: ", p);
332 mem.free(p);
334 va_list ap;
335 va_start(ap, format);
336 vfprintf(stdmsg, format, ap);
337 va_end(ap);
339 fprintf(stdmsg, "\n");
340 fflush(stdmsg);
342 if (global.errors >= 20) // moderate blizzard of cascading messages
343 fatal();
345 global.errors++;
348 void Lexer::error(Loc loc, const char *format, ...)
350 if (mod && !global.gag)
352 char *p = loc.toChars();
353 if (*p)
354 fprintf(stdmsg, "%s: ", p);
355 mem.free(p);
357 va_list ap;
358 va_start(ap, format);
359 vfprintf(stdmsg, format, ap);
360 va_end(ap);
362 fprintf(stdmsg, "\n");
363 fflush(stdmsg);
365 if (global.errors >= 20) // moderate blizzard of cascading messages
366 fatal();
368 global.errors++;
371 TOK Lexer::nextToken()
372 { Token *t;
374 if (token.next)
376 t = token.next;
377 memcpy(&token,t,sizeof(Token));
378 t->next = freelist;
379 freelist = t;
381 else
383 scan(&token);
385 //token.print();
386 return token.value;
389 Token *Lexer::peek(Token *ct)
390 { Token *t;
392 if (ct->next)
393 t = ct->next;
394 else
396 t = new Token();
397 scan(t);
398 t->next = NULL;
399 ct->next = t;
401 return t;
404 /*********************************
405 * tk is on the opening (.
406 * Look ahead and return token that is past the closing ).
409 Token *Lexer::peekPastParen(Token *tk)
411 //printf("peekPastParen()\n");
412 int parens = 1;
413 int curlynest = 0;
414 while (1)
416 tk = peek(tk);
417 //tk->print();
418 switch (tk->value)
420 case TOKlparen:
421 parens++;
422 continue;
424 case TOKrparen:
425 --parens;
426 if (parens)
427 continue;
428 tk = peek(tk);
429 break;
431 case TOKlcurly:
432 curlynest++;
433 continue;
435 case TOKrcurly:
436 if (--curlynest >= 0)
437 continue;
438 break;
440 case TOKsemicolon:
441 if (curlynest)
442 continue;
443 break;
445 case TOKeof:
446 break;
448 default:
449 continue;
451 return tk;
455 /**********************************
456 * Determine if string is a valid Identifier.
457 * Placed here because of commonality with Lexer functionality.
458 * Returns:
459 * 0 invalid
462 int Lexer::isValidIdentifier(char *p)
464 size_t len;
465 size_t idx;
467 if (!p || !*p)
468 goto Linvalid;
470 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars
471 goto Linvalid;
473 len = strlen(p);
474 idx = 0;
475 while (p[idx])
476 { dchar_t dc;
478 char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc);
479 if (q)
480 goto Linvalid;
482 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
483 goto Linvalid;
485 return 1;
487 Linvalid:
488 return 0;
491 /****************************
492 * Turn next token in buffer into a token.
495 void Lexer::scan(Token *t)
497 unsigned lastLine = loc.linnum;
498 unsigned linnum;
500 t->blockComment = NULL;
501 t->lineComment = NULL;
502 while (1)
504 t->ptr = p;
506 if (dltSyntax && atStartOfLine) {
507 // Check indent
508 int i;
509 for (i = 0; p[i] == '\t'; i++) {
511 if (p[i] == ' ') {
512 error("Whitespace error: use tabs to indent!");
514 if (p[i] == '#') {
515 p += i;
516 atStartOfLine = 0;
517 } else if (p[i] != '\n' && p[i] != '\r') {
518 if (p[i] == '\0')
519 i = 0; // End-of-file always has no indent
520 if (i > indent) {
521 error("unexpected indentation (expected %d tabs, not %d)",
522 indent, i);
523 } else if (i < indent) {
524 indent -= 1;
525 t->value = TOKrcurly;
526 return;
528 atStartOfLine = 0;
529 } /* else ignore blank line */
532 //printf("p = %p, *p = '%c'\n",p,*p);
533 switch (*p)
535 case 0:
536 case 0x1A:
537 t->value = TOKeof; // end of file
538 return;
540 case ' ':
541 case '\t':
542 case '\v':
543 case '\f':
544 p++;
545 continue; // skip white space
547 case '\r':
548 if (p[1] == '\n') { // if CRLF
549 p++;
550 continue;
552 // fall-through
553 case '\n':
554 p++;
555 loc.linnum++;
556 if (dltSyntax && !nesting) {
557 atStartOfLine = 1;
558 t->value = TOKendline;
559 return;
561 continue; // Ignore newlines inside brackets
562 case '0': case '1': case '2': case '3': case '4':
563 case '5': case '6': case '7': case '8': case '9':
564 t->value = number(t);
565 return;
567 #if CSTRINGS
568 case '\'':
569 t->value = charConstant(t, 0);
570 return;
572 case '"':
573 t->value = stringConstant(t,0);
574 return;
576 case 'l':
577 case 'L':
578 if (p[1] == '\'')
580 p++;
581 t->value = charConstant(t, 1);
582 return;
584 else if (p[1] == '"')
586 p++;
587 t->value = stringConstant(t, 1);
588 return;
590 #else
591 case '\'':
592 t->value = charConstant(t,0);
593 return;
595 case 'r':
596 if (p[1] != '"')
597 goto case_ident;
598 p++;
599 case '`':
600 t->value = wysiwygStringConstant(t, *p);
601 return;
603 case 'x':
604 if (p[1] != '"')
605 goto case_ident;
606 p++;
607 t->value = hexStringConstant(t);
608 return;
610 #if V2
611 case 'q':
612 if (p[1] == '"')
614 p++;
615 t->value = delimitedStringConstant(t);
616 return;
618 else if (p[1] == '{')
620 p++;
621 t->value = tokenStringConstant(t);
622 return;
624 else
625 goto case_ident;
626 #endif
628 case '"':
629 t->value = escapeStringConstant(t,0);
630 return;
632 case '\\': // escaped string literal
633 { unsigned c;
635 stringbuffer.reset();
638 p++;
639 switch (*p)
641 case 'u':
642 case 'U':
643 case '&':
644 c = escapeSequence();
645 stringbuffer.writeUTF8(c);
646 break;
648 default:
649 c = escapeSequence();
650 stringbuffer.writeByte(c);
651 break;
653 } while (*p == '\\');
654 t->len = stringbuffer.offset;
655 stringbuffer.writeByte(0);
656 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
657 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
658 t->postfix = 0;
659 t->value = TOKstring;
660 return;
663 case 'l':
664 case 'L':
665 #endif
666 case 'a': case 'b': case 'c': case 'd': case 'e':
667 case 'f': case 'g': case 'h': case 'i': case 'j':
668 case 'k': case 'm': case 'n': case 'o':
669 #if V2
670 case 'p': /*case 'q': case 'r':*/ case 's': case 't':
671 #else
672 case 'p': case 'q': /*case 'r':*/ case 's': case 't':
673 #endif
674 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
675 case 'z':
676 case 'A': case 'B': case 'C': case 'D': case 'E':
677 case 'F': case 'G': case 'H': case 'I': case 'J':
678 case 'K': case 'M': case 'N': case 'O':
679 case 'P': case 'Q': case 'R': case 'S': case 'T':
680 case 'U': case 'V': case 'W': case 'X': case 'Y':
681 case 'Z':
682 case '_':
683 case_ident:
684 { unsigned char c;
685 StringValue *sv;
686 Identifier *id;
690 c = *++p;
691 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
692 sv = stringtable.update((char *)t->ptr, p - t->ptr);
693 id = (Identifier *) sv->ptrvalue;
694 if (!id)
695 { id = new Identifier(sv->lstring.string,TOKidentifier);
696 sv->ptrvalue = id;
698 t->ident = id;
699 t->value = (enum TOK) id->value;
700 anyToken = 1;
701 if (*t->ptr == '_') // if special identifier token
703 static char date[11+1];
704 static char time[8+1];
705 static char timestamp[24+1];
707 if (!date[0]) // lazy evaluation
708 { time_t t;
709 char *p;
711 ::time(&t);
712 p = ctime(&t);
713 assert(p);
714 sprintf(date, "%.6s %.4s", p + 4, p + 20);
715 sprintf(time, "%.8s", p + 11);
716 sprintf(timestamp, "%.24s", p);
719 if (mod && id == Id::FILE)
721 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars());
722 goto Lstring;
724 else if (mod && id == Id::LINE)
726 t->value = TOKint64v;
727 t->uns64value = loc.linnum;
729 else if (id == Id::DATE)
731 t->ustring = (unsigned char *)date;
732 goto Lstring;
734 else if (id == Id::TIME)
736 t->ustring = (unsigned char *)time;
737 goto Lstring;
739 else if (id == Id::VENDOR)
741 #ifdef IN_GCC
742 t->ustring = (unsigned char *)"GDC";
743 #else
744 t->ustring = (unsigned char *)"Digital Mars D";
745 #endif
746 goto Lstring;
748 else if (id == Id::TIMESTAMP)
750 t->ustring = (unsigned char *)timestamp;
751 Lstring:
752 t->value = TOKstring;
753 Llen:
754 t->postfix = 0;
755 t->len = strlen((char *)t->ustring);
757 else if (id == Id::VERSIONX)
758 { unsigned major = 0;
759 unsigned minor = 0;
761 for (char *p = global.version + 1; 1; p++)
763 char c = *p;
764 if (isdigit(c))
765 minor = minor * 10 + c - '0';
766 else if (c == '.')
767 { major = minor;
768 minor = 0;
770 else
771 break;
773 t->value = TOKint64v;
774 t->uns64value = major * 1000 + minor;
776 #if V2
777 else if (id == Id::EOFX)
779 t->value = TOKeof;
780 // Advance scanner to end of file
781 while (!(*p == 0 || *p == 0x1A))
782 p++;
784 #endif
786 //printf("t->value = %d\n",t->value);
787 return;
790 case '/':
791 p++;
792 switch (*p)
794 case '=':
795 p++;
796 t->value = TOKdivass;
797 return;
799 case '*':
800 p++;
801 linnum = loc.linnum;
802 while (1)
804 while (1)
805 { unsigned char c = *p;
806 switch (c)
808 case '/':
809 break;
811 case '\n':
812 loc.linnum++;
813 p++;
814 continue;
816 case '\r':
817 p++;
818 if (*p != '\n')
819 loc.linnum++;
820 continue;
822 case 0:
823 case 0x1A:
824 error("unterminated /* */ comment");
825 p = end;
826 t->value = TOKeof;
827 return;
829 default:
830 if (c & 0x80)
831 { unsigned u = decodeUTF();
832 if (u == PS || u == LS)
833 loc.linnum++;
835 p++;
836 continue;
838 break;
840 p++;
841 if (p[-2] == '*' && p - 3 != t->ptr)
842 break;
844 if (commentToken)
846 t->value = TOKcomment;
847 return;
849 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr)
850 { // if /** but not /**/
851 getDocComment(t, lastLine == linnum);
853 continue;
855 case '/': // do // style comments
856 linnum = loc.linnum;
857 while (1)
858 { unsigned char c = *++p;
859 switch (c)
861 case '\n':
862 break;
864 case '\r':
865 if (p[1] == '\n')
866 p++;
867 break;
869 case 0:
870 case 0x1A:
871 if (commentToken)
873 p = end;
874 t->value = TOKcomment;
875 return;
877 if (doDocComment && t->ptr[2] == '/')
878 getDocComment(t, lastLine == linnum);
879 p = end;
880 t->value = TOKeof;
881 return;
883 default:
884 if (c & 0x80)
885 { unsigned u = decodeUTF();
886 if (u == PS || u == LS)
887 break;
889 continue;
891 break;
894 if (commentToken)
896 p++;
897 loc.linnum++;
898 t->value = TOKcomment;
899 return;
901 if (doDocComment && t->ptr[2] == '/')
902 getDocComment(t, lastLine == linnum);
904 p++;
905 loc.linnum++;
906 continue;
908 case '+':
909 { int nest;
911 linnum = loc.linnum;
912 p++;
913 nest = 1;
914 while (1)
915 { unsigned char c = *p;
916 switch (c)
918 case '/':
919 p++;
920 if (*p == '+')
922 p++;
923 nest++;
925 continue;
927 case '+':
928 p++;
929 if (*p == '/')
931 p++;
932 if (--nest == 0)
933 break;
935 continue;
937 case '\r':
938 p++;
939 if (*p != '\n')
940 loc.linnum++;
941 continue;
943 case '\n':
944 loc.linnum++;
945 p++;
946 continue;
948 case 0:
949 case 0x1A:
950 error("unterminated /+ +/ comment");
951 p = end;
952 t->value = TOKeof;
953 return;
955 default:
956 if (c & 0x80)
957 { unsigned u = decodeUTF();
958 if (u == PS || u == LS)
959 loc.linnum++;
961 p++;
962 continue;
964 break;
966 if (commentToken)
968 t->value = TOKcomment;
969 return;
971 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr)
972 { // if /++ but not /++/
973 getDocComment(t, lastLine == linnum);
975 continue;
978 t->value = TOKdiv;
979 return;
981 case '.':
982 p++;
983 if (isdigit(*p))
984 { /* Note that we don't allow ._1 and ._ as being
985 * valid floating point numbers.
987 p--;
988 t->value = inreal(t);
990 else if (p[0] == '.')
992 if (p[1] == '.')
993 { p += 2;
994 t->value = TOKdotdotdot;
996 else
997 { p++;
998 t->value = TOKslice;
1001 else
1002 t->value = TOKdot;
1003 return;
1005 case '&':
1006 p++;
1007 if (*p == '=')
1008 { p++;
1009 t->value = TOKandass;
1011 else if (*p == '&')
1012 { p++;
1013 t->value = TOKandand;
1014 if (dltSyntax)
1015 error("Use 'and' instead of '&&'");
1017 else
1018 t->value = TOKand;
1019 return;
1021 case '|':
1022 p++;
1023 if (*p == '=')
1024 { p++;
1025 t->value = TOKorass;
1027 else if (*p == '|')
1028 { p++;
1029 t->value = TOKoror;
1030 if (dltSyntax)
1031 error("Use 'or' instead of '||'");
1033 else
1034 t->value = TOKor;
1035 return;
1037 case '-':
1038 p++;
1039 if (*p == '=')
1040 { p++;
1041 t->value = TOKminass;
1043 #if 0
1044 else if (*p == '>')
1045 { p++;
1046 t->value = TOKarrow;
1048 #endif
1049 else if (*p == '-')
1050 { p++;
1051 t->value = TOKminusminus;
1053 else
1054 t->value = TOKmin;
1055 return;
1057 case '+':
1058 p++;
1059 if (*p == '=')
1060 { p++;
1061 t->value = TOKaddass;
1063 else if (*p == '+')
1064 { p++;
1065 t->value = TOKplusplus;
1067 else
1068 t->value = TOKadd;
1069 return;
1071 case '<':
1072 p++;
1073 if (*p == '=')
1074 { p++;
1075 t->value = TOKle; // <=
1077 else if (*p == '<')
1078 { p++;
1079 if (*p == '=')
1080 { p++;
1081 t->value = TOKshlass; // <<=
1083 else
1084 t->value = TOKshl; // <<
1086 else if (*p == '>')
1087 { p++;
1088 if (*p == '=')
1089 { p++;
1090 t->value = TOKleg; // <>=
1092 else
1093 t->value = TOKlg; // <>
1095 else
1096 t->value = TOKlt; // <
1097 return;
1099 case '>':
1100 p++;
1101 if (*p == '=')
1102 { p++;
1103 t->value = TOKge; // >=
1105 else if (*p == '>')
1106 { p++;
1107 if (*p == '=')
1108 { p++;
1109 t->value = TOKshrass; // >>=
1111 else if (*p == '>')
1112 { p++;
1113 if (*p == '=')
1114 { p++;
1115 t->value = TOKushrass; // >>>=
1117 else
1118 t->value = TOKushr; // >>>
1120 else
1121 t->value = TOKshr; // >>
1123 else
1124 t->value = TOKgt; // >
1125 return;
1127 case '!':
1128 p++;
1129 if (*p == '=')
1130 { p++;
1131 if (*p == '=' && global.params.Dversion == 1)
1132 { p++;
1133 t->value = TOKnotidentity; // !==
1135 else
1136 t->value = TOKnotequal; // !=
1138 else if (*p == '<')
1139 { p++;
1140 if (*p == '>')
1141 { p++;
1142 if (*p == '=')
1143 { p++;
1144 t->value = TOKunord; // !<>=
1146 else
1147 t->value = TOKue; // !<>
1149 else if (*p == '=')
1150 { p++;
1151 t->value = TOKug; // !<=
1153 else
1154 t->value = TOKuge; // !<
1156 else if (*p == '>')
1157 { p++;
1158 if (*p == '=')
1159 { p++;
1160 t->value = TOKul; // !>=
1162 else
1163 t->value = TOKule; // !>
1165 else
1166 t->value = TOKnot; // !
1167 return;
1169 case '=':
1170 p++;
1171 if (*p == '=')
1172 { p++;
1173 if (*p == '=' && global.params.Dversion == 1)
1174 { p++;
1175 t->value = TOKidentity; // ===
1177 else
1178 t->value = TOKequal; // ==
1180 else
1181 t->value = TOKassign; // =
1182 return;
1184 case '~':
1185 p++;
1186 if (*p == '=')
1187 { p++;
1188 t->value = TOKcatass; // ~=
1190 else
1191 t->value = TOKtilde; // ~
1192 return;
1194 #define NESTED(cin,tokin,cout,tokout) \
1195 case cin: nesting++; p++; t->value = tokin; return;\
1196 case cout: if (nesting == 0) {error("Unexpected '%c'", cout);} else {nesting--;} p++; t->value = tokout; return;
1198 NESTED('(', TOKlparen, ')', TOKrparen)
1199 NESTED('[', TOKlbracket, ']', TOKrbracket)
1200 NESTED('{', TOKlcurly, '}', TOKrcurly)
1201 #undef NESTED
1203 #define SINGLE(c,tok) case c: p++; t->value = tok; return;
1204 SINGLE('?', TOKquestion)
1205 SINGLE(',', TOKcomma)
1206 SINGLE(';', TOKsemicolon)
1207 SINGLE('$', TOKdollar)
1208 SINGLE('@', TOKat)
1210 #undef SINGLE
1212 case ':':
1213 p++;
1214 if (!nesting)
1215 indent += 1;
1216 t->value = TOKcolon;
1217 return;
1219 #define DOUBLE(c1,tok1,c2,tok2) \
1220 case c1: \
1221 p++; \
1222 if (*p == c2) \
1223 { p++; \
1224 t->value = tok2; \
1226 else \
1227 t->value = tok1; \
1228 return;
1230 DOUBLE('*', TOKmul, '=', TOKmulass)
1231 DOUBLE('%', TOKmod, '=', TOKmodass)
1232 DOUBLE('^', TOKxor, '=', TOKxorass)
1234 #undef DOUBLE
1236 case '#': // do # style comments and pragmas
1237 if (dltSyntax)
1239 do { p++; } while (*p != '\n');
1241 else
1243 p++;
1244 pragma();
1246 continue;
1248 default:
1249 { unsigned char c = *p;
1251 if (c & 0x80)
1252 { unsigned u = decodeUTF();
1254 // Check for start of unicode identifier
1255 if (isUniAlpha(u))
1256 goto case_ident;
1258 if (u == PS || u == LS)
1260 loc.linnum++;
1261 p++;
1262 continue;
1265 if (isprint(c))
1266 error("unsupported char '%c'", c);
1267 else
1268 error("unsupported char 0x%02x", c);
1269 p++;
1270 continue;
1276 /*******************************************
1277 * Parse escape sequence.
1280 unsigned Lexer::escapeSequence()
1281 { unsigned c;
1282 int n;
1283 int ndigits;
1285 c = *p;
1286 switch (c)
1288 case '\'':
1289 case '"':
1290 case '?':
1291 case '\\':
1292 Lconsume:
1293 p++;
1294 break;
1296 case 'a': c = 7; goto Lconsume;
1297 case 'b': c = 8; goto Lconsume;
1298 case 'f': c = 12; goto Lconsume;
1299 case 'n': c = 10; goto Lconsume;
1300 case 'r': c = 13; goto Lconsume;
1301 case 't': c = 9; goto Lconsume;
1302 case 'v': c = 11; goto Lconsume;
1304 case 'u':
1305 ndigits = 4;
1306 goto Lhex;
1307 case 'U':
1308 ndigits = 8;
1309 goto Lhex;
1310 case 'x':
1311 ndigits = 2;
1312 Lhex:
1313 p++;
1314 c = *p;
1315 if (ishex(c))
1316 { unsigned v;
1318 n = 0;
1319 v = 0;
1320 while (1)
1322 if (isdigit(c))
1323 c -= '0';
1324 else if (islower(c))
1325 c -= 'a' - 10;
1326 else
1327 c -= 'A' - 10;
1328 v = v * 16 + c;
1329 c = *++p;
1330 if (++n == ndigits)
1331 break;
1332 if (!ishex(c))
1333 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
1334 break;
1337 if (ndigits != 2 && !utf_isValidDchar(v))
1338 error("invalid UTF character \\U%08x", v);
1339 c = v;
1341 else
1342 error("undefined escape hex sequence \\%c\n",c);
1343 break;
1345 case '&': // named character entity
1346 for (unsigned char *idstart = ++p; 1; p++)
1348 switch (*p)
1350 case ';':
1351 c = HtmlNamedEntity(idstart, p - idstart);
1352 if (c == ~0)
1353 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart);
1354 c = ' ';
1356 p++;
1357 break;
1359 default:
1360 if (isalpha(*p) ||
1361 (p != idstart + 1 && isdigit(*p)))
1362 continue;
1363 error("unterminated named entity");
1364 break;
1366 break;
1368 break;
1370 case 0:
1371 case 0x1A: // end of file
1372 c = '\\';
1373 break;
1375 default:
1376 if (isoctal(c))
1377 { unsigned v;
1379 n = 0;
1380 v = 0;
1383 v = v * 8 + (c - '0');
1384 c = *++p;
1385 } while (++n < 3 && isoctal(c));
1386 c = v;
1387 if (c > 0xFF)
1388 error("0%03o is larger than a byte", c);
1390 else
1391 error("undefined escape sequence \\%c\n",c);
1392 break;
1394 return c;
1397 /**************************************
1400 TOK Lexer::wysiwygStringConstant(Token *t, int tc)
1401 { unsigned c;
1402 Loc start = loc;
1404 p++;
1405 stringbuffer.reset();
1406 while (1)
1408 c = *p++;
1409 switch (c)
1411 case '\n':
1412 loc.linnum++;
1413 break;
1415 case '\r':
1416 if (*p == '\n')
1417 continue; // ignore
1418 c = '\n'; // treat EndOfLine as \n character
1419 loc.linnum++;
1420 break;
1422 case 0:
1423 case 0x1A:
1424 error("unterminated string constant starting at %s", start.toChars());
1425 t->ustring = (unsigned char *)"";
1426 t->len = 0;
1427 t->postfix = 0;
1428 return TOKstring;
1430 case '"':
1431 case '`':
1432 if (c == tc)
1434 t->len = stringbuffer.offset;
1435 stringbuffer.writeByte(0);
1436 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1437 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1438 stringPostfix(t);
1439 return TOKstring;
1441 break;
1443 default:
1444 if (c & 0x80)
1445 { p--;
1446 unsigned u = decodeUTF();
1447 p++;
1448 if (u == PS || u == LS)
1449 loc.linnum++;
1450 stringbuffer.writeUTF8(u);
1451 continue;
1453 break;
1455 stringbuffer.writeByte(c);
1459 /**************************************
1460 * Lex hex strings:
1461 * x"0A ae 34FE BD"
1464 TOK Lexer::hexStringConstant(Token *t)
1465 { unsigned c;
1466 Loc start = loc;
1467 unsigned n = 0;
1468 unsigned v;
1470 p++;
1471 stringbuffer.reset();
1472 while (1)
1474 c = *p++;
1475 switch (c)
1477 case ' ':
1478 case '\t':
1479 case '\v':
1480 case '\f':
1481 continue; // skip white space
1483 case '\r':
1484 if (*p == '\n')
1485 continue; // ignore
1486 // Treat isolated '\r' as if it were a '\n'
1487 case '\n':
1488 loc.linnum++;
1489 continue;
1491 case 0:
1492 case 0x1A:
1493 error("unterminated string constant starting at %s", start.toChars());
1494 t->ustring = (unsigned char *)"";
1495 t->len = 0;
1496 t->postfix = 0;
1497 return TOKstring;
1499 case '"':
1500 if (n & 1)
1501 { error("odd number (%d) of hex characters in hex string", n);
1502 stringbuffer.writeByte(v);
1504 t->len = stringbuffer.offset;
1505 stringbuffer.writeByte(0);
1506 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1507 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1508 stringPostfix(t);
1509 return TOKstring;
1511 default:
1512 if (c >= '0' && c <= '9')
1513 c -= '0';
1514 else if (c >= 'a' && c <= 'f')
1515 c -= 'a' - 10;
1516 else if (c >= 'A' && c <= 'F')
1517 c -= 'A' - 10;
1518 else if (c & 0x80)
1519 { p--;
1520 unsigned u = decodeUTF();
1521 p++;
1522 if (u == PS || u == LS)
1523 loc.linnum++;
1524 else
1525 error("non-hex character \\u%x", u);
1527 else
1528 error("non-hex character '%c'", c);
1529 if (n & 1)
1530 { v = (v << 4) | c;
1531 stringbuffer.writeByte(v);
1533 else
1534 v = c;
1535 n++;
1536 break;
1542 #if V2
1543 /**************************************
1544 * Lex delimited strings:
1545 * q"(foo(xxx))" // "foo(xxx)"
1546 * q"[foo(]" // "foo("
1547 * q"/foo]/" // "foo]"
1548 * q"HERE
1549 * foo
1550 * HERE" // "foo\n"
1551 * Input:
1552 * p is on the "
1555 TOK Lexer::delimitedStringConstant(Token *t)
1556 { unsigned c;
1557 Loc start = loc;
1558 unsigned delimleft = 0;
1559 unsigned delimright = 0;
1560 unsigned nest = 1;
1561 unsigned nestcount;
1562 Identifier *hereid = NULL;
1563 unsigned blankrol = 0;
1564 unsigned startline = 0;
1566 p++;
1567 stringbuffer.reset();
1568 while (1)
1570 c = *p++;
1571 //printf("c = '%c'\n", c);
1572 switch (c)
1574 case '\n':
1575 Lnextline:
1576 loc.linnum++;
1577 startline = 1;
1578 if (blankrol)
1579 { blankrol = 0;
1580 continue;
1582 if (hereid)
1584 stringbuffer.writeUTF8(c);
1585 continue;
1587 break;
1589 case '\r':
1590 if (*p == '\n')
1591 continue; // ignore
1592 c = '\n'; // treat EndOfLine as \n character
1593 goto Lnextline;
1595 case 0:
1596 case 0x1A:
1597 goto Lerror;
1599 default:
1600 if (c & 0x80)
1601 { p--;
1602 c = decodeUTF();
1603 p++;
1604 if (c == PS || c == LS)
1605 goto Lnextline;
1607 break;
1609 if (delimleft == 0)
1610 { delimleft = c;
1611 nest = 1;
1612 nestcount = 1;
1613 if (c == '(')
1614 delimright = ')';
1615 else if (c == '{')
1616 delimright = '}';
1617 else if (c == '[')
1618 delimright = ']';
1619 else if (c == '<')
1620 delimright = '>';
1621 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1622 { // Start of identifier; must be a heredoc
1623 Token t;
1624 p--;
1625 scan(&t); // read in heredoc identifier
1626 if (t.value != TOKidentifier)
1627 { error("identifier expected for heredoc, not %s", t.toChars());
1628 delimright = c;
1630 else
1631 { hereid = t.ident;
1632 //printf("hereid = '%s'\n", hereid->toChars());
1633 blankrol = 1;
1635 nest = 0;
1637 else
1638 { delimright = c;
1639 nest = 0;
1642 else
1644 if (blankrol)
1645 { error("heredoc rest of line should be blank");
1646 blankrol = 0;
1647 continue;
1649 if (nest == 1)
1651 if (c == delimleft)
1652 nestcount++;
1653 else if (c == delimright)
1654 { nestcount--;
1655 if (nestcount == 0)
1656 goto Ldone;
1659 else if (c == delimright)
1660 goto Ldone;
1661 if (startline && isalpha(c))
1662 { Token t;
1663 unsigned char *psave = p;
1664 p--;
1665 scan(&t); // read in possible heredoc identifier
1666 //printf("endid = '%s'\n", t.ident->toChars());
1667 if (t.value == TOKidentifier && t.ident->equals(hereid))
1668 { /* should check that rest of line is blank
1670 goto Ldone;
1672 p = psave;
1674 stringbuffer.writeUTF8(c);
1675 startline = 0;
1679 Ldone:
1680 if (*p == '"')
1681 p++;
1682 else
1683 error("delimited string must end in %c\"", delimright);
1684 t->len = stringbuffer.offset;
1685 stringbuffer.writeByte(0);
1686 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1687 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1688 stringPostfix(t);
1689 return TOKstring;
1691 Lerror:
1692 error("unterminated string constant starting at %s", start.toChars());
1693 t->ustring = (unsigned char *)"";
1694 t->len = 0;
1695 t->postfix = 0;
1696 return TOKstring;
1699 /**************************************
1700 * Lex delimited strings:
1701 * q{ foo(xxx) } // " foo(xxx) "
1702 * q{foo(} // "foo("
1703 * q{{foo}"}"} // "{foo}"}""
1704 * Input:
1705 * p is on the q
1708 TOK Lexer::tokenStringConstant(Token *t)
1710 unsigned nest = 1;
1711 Loc start = loc;
1712 unsigned char *pstart = ++p;
1714 while (1)
1715 { Token tok;
1717 scan(&tok);
1718 switch (tok.value)
1720 case TOKlcurly:
1721 nest++;
1722 continue;
1724 case TOKrcurly:
1725 if (--nest == 0)
1726 goto Ldone;
1727 continue;
1729 case TOKeof:
1730 goto Lerror;
1732 default:
1733 continue;
1737 Ldone:
1738 t->len = p - 1 - pstart;
1739 t->ustring = (unsigned char *)mem.malloc(t->len + 1);
1740 memcpy(t->ustring, pstart, t->len);
1741 t->ustring[t->len] = 0;
1742 stringPostfix(t);
1743 return TOKstring;
1745 Lerror:
1746 error("unterminated token string constant starting at %s", start.toChars());
1747 t->ustring = (unsigned char *)"";
1748 t->len = 0;
1749 t->postfix = 0;
1750 return TOKstring;
1753 #endif
1756 /**************************************
1759 TOK Lexer::escapeStringConstant(Token *t, int wide)
1760 { unsigned c;
1761 Loc start = loc;
1763 p++;
1764 stringbuffer.reset();
1765 while (1)
1767 c = *p++;
1768 switch (c)
1770 case '\\':
1771 switch (*p)
1773 case 'u':
1774 case 'U':
1775 case '&':
1776 c = escapeSequence();
1777 stringbuffer.writeUTF8(c);
1778 continue;
1780 default:
1781 c = escapeSequence();
1782 break;
1784 break;
1786 case '\n':
1787 loc.linnum++;
1788 break;
1790 case '\r':
1791 if (*p == '\n')
1792 continue; // ignore
1793 c = '\n'; // treat EndOfLine as \n character
1794 loc.linnum++;
1795 break;
1797 case '"':
1798 t->len = stringbuffer.offset;
1799 stringbuffer.writeByte(0);
1800 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1801 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1802 stringPostfix(t);
1803 return TOKstring;
1805 case 0:
1806 case 0x1A:
1807 p--;
1808 error("unterminated string constant starting at %s", start.toChars());
1809 t->ustring = (unsigned char *)"";
1810 t->len = 0;
1811 t->postfix = 0;
1812 return TOKstring;
1814 default:
1815 if (c & 0x80)
1817 p--;
1818 c = decodeUTF();
1819 if (c == LS || c == PS)
1820 { c = '\n';
1821 loc.linnum++;
1823 p++;
1824 stringbuffer.writeUTF8(c);
1825 continue;
1827 break;
1829 stringbuffer.writeByte(c);
1833 /**************************************
1836 TOK Lexer::charConstant(Token *t, int wide)
1838 unsigned c;
1839 TOK tk = TOKcharv;
1841 //printf("Lexer::charConstant\n");
1842 p++;
1843 c = *p++;
1844 switch (c)
1846 case '\\':
1847 switch (*p)
1849 case 'u':
1850 t->uns64value = escapeSequence();
1851 tk = TOKwcharv;
1852 break;
1854 case 'U':
1855 case '&':
1856 t->uns64value = escapeSequence();
1857 tk = TOKdcharv;
1858 break;
1860 default:
1861 t->uns64value = escapeSequence();
1862 break;
1864 break;
1866 case '\n':
1868 loc.linnum++;
1869 case '\r':
1870 case 0:
1871 case 0x1A:
1872 case '\'':
1873 error("unterminated character constant");
1874 return tk;
1876 default:
1877 if (c & 0x80)
1879 p--;
1880 c = decodeUTF();
1881 p++;
1882 if (c == LS || c == PS)
1883 goto L1;
1884 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1885 tk = TOKwcharv;
1886 else
1887 tk = TOKdcharv;
1889 t->uns64value = c;
1890 break;
1893 if (*p != '\'')
1894 { error("unterminated character constant");
1895 return tk;
1897 p++;
1898 return tk;
1901 /***************************************
1902 * Get postfix of string literal.
1905 void Lexer::stringPostfix(Token *t)
1907 switch (*p)
1909 case 'c':
1910 case 'w':
1911 case 'd':
1912 t->postfix = *p;
1913 p++;
1914 break;
1916 default:
1917 t->postfix = 0;
1918 break;
1922 /***************************************
1923 * Read \u or \U unicode sequence
1924 * Input:
1925 * u 'u' or 'U'
1928 #if 0
1929 unsigned Lexer::wchar(unsigned u)
1931 unsigned value;
1932 unsigned n;
1933 unsigned char c;
1934 unsigned nchars;
1936 nchars = (u == 'U') ? 8 : 4;
1937 value = 0;
1938 for (n = 0; 1; n++)
1940 ++p;
1941 if (n == nchars)
1942 break;
1943 c = *p;
1944 if (!ishex(c))
1945 { error("\\%c sequence must be followed by %d hex characters", u, nchars);
1946 break;
1948 if (isdigit(c))
1949 c -= '0';
1950 else if (islower(c))
1951 c -= 'a' - 10;
1952 else
1953 c -= 'A' - 10;
1954 value <<= 4;
1955 value |= c;
1957 return value;
1959 #endif
1961 /**************************************
1962 * Read in a number.
1963 * If it's an integer, store it in tok.TKutok.Vlong.
1964 * integers can be decimal, octal or hex
1965 * Handle the suffixes U, UL, LU, L, etc.
1966 * If it's double, store it in tok.TKutok.Vdouble.
1967 * Returns:
1968 * TKnum
1969 * TKdouble,...
1972 TOK Lexer::number(Token *t)
1974 // We use a state machine to collect numbers
1975 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
1976 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
1977 STATE_hexh, STATE_error };
1978 enum STATE state;
1980 enum FLAGS
1981 { FLAGS_decimal = 1, // decimal
1982 FLAGS_unsigned = 2, // u or U suffix
1983 FLAGS_long = 4, // l or L suffix
1985 enum FLAGS flags = FLAGS_decimal;
1987 int i;
1988 int base;
1989 unsigned c;
1990 unsigned char *start;
1991 TOK result;
1993 //printf("Lexer::number()\n");
1994 state = STATE_initial;
1995 base = 0;
1996 stringbuffer.reset();
1997 start = p;
1998 while (1)
2000 c = *p;
2001 switch (state)
2003 case STATE_initial: // opening state
2004 if (c == '0')
2005 state = STATE_0;
2006 else
2007 state = STATE_decimal;
2008 break;
2010 case STATE_0:
2011 flags = (FLAGS) (flags & ~FLAGS_decimal);
2012 switch (c)
2014 #if ZEROH
2015 case 'H': // 0h
2016 case 'h':
2017 goto hexh;
2018 #endif
2019 case 'X':
2020 case 'x':
2021 state = STATE_hex0;
2022 break;
2024 case '.':
2025 if (p[1] == '.') // .. is a separate token
2026 goto done;
2027 case 'i':
2028 case 'f':
2029 case 'F':
2030 goto real;
2031 #if ZEROH
2032 case 'E':
2033 case 'e':
2034 goto case_hex;
2035 #endif
2036 case 'B':
2037 case 'b':
2038 state = STATE_binary0;
2039 break;
2041 case '0': case '1': case '2': case '3':
2042 case '4': case '5': case '6': case '7':
2043 state = STATE_octal;
2044 break;
2046 #if ZEROH
2047 case '8': case '9': case 'A':
2048 case 'C': case 'D': case 'F':
2049 case 'a': case 'c': case 'd': case 'f':
2050 case_hex:
2051 state = STATE_hexh;
2052 break;
2053 #endif
2054 case '_':
2055 state = STATE_octal;
2056 p++;
2057 continue;
2059 case 'L':
2060 if (p[1] == 'i')
2061 goto real;
2062 goto done;
2064 default:
2065 goto done;
2067 break;
2069 case STATE_decimal: // reading decimal number
2070 if (!isdigit(c))
2072 #if ZEROH
2073 if (ishex(c)
2074 || c == 'H' || c == 'h'
2076 goto hexh;
2077 #endif
2078 if (c == '_') // ignore embedded _
2079 { p++;
2080 continue;
2082 if (c == '.' && p[1] != '.')
2083 goto real;
2084 else if (c == 'i' || c == 'f' || c == 'F' ||
2085 c == 'e' || c == 'E')
2087 real: // It's a real number. Back up and rescan as a real
2088 p = start;
2089 return inreal(t);
2091 else if (c == 'L' && p[1] == 'i')
2092 goto real;
2093 goto done;
2095 break;
2097 case STATE_hex0: // reading hex number
2098 case STATE_hex:
2099 if (!ishex(c))
2101 if (c == '_') // ignore embedded _
2102 { p++;
2103 continue;
2105 if (c == '.' && p[1] != '.')
2106 goto real;
2107 if (c == 'P' || c == 'p' || c == 'i')
2108 goto real;
2109 if (state == STATE_hex0)
2110 error("Hex digit expected, not '%c'", c);
2111 goto done;
2113 state = STATE_hex;
2114 break;
2116 #if ZEROH
2117 hexh:
2118 state = STATE_hexh;
2119 case STATE_hexh: // parse numbers like 0FFh
2120 if (!ishex(c))
2122 if (c == 'H' || c == 'h')
2124 p++;
2125 base = 16;
2126 goto done;
2128 else
2130 // Check for something like 1E3 or 0E24
2131 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) ||
2132 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset))
2133 goto real;
2134 error("Hex digit expected, not '%c'", c);
2135 goto done;
2138 break;
2139 #endif
2141 case STATE_octal: // reading octal number
2142 case STATE_octale: // reading octal number with non-octal digits
2143 if (!isoctal(c))
2145 #if ZEROH
2146 if (ishex(c)
2147 || c == 'H' || c == 'h'
2149 goto hexh;
2150 #endif
2151 if (c == '_') // ignore embedded _
2152 { p++;
2153 continue;
2155 if (c == '.' && p[1] != '.')
2156 goto real;
2157 if (c == 'i')
2158 goto real;
2159 if (isdigit(c))
2161 state = STATE_octale;
2163 else
2164 goto done;
2166 break;
2168 case STATE_binary0: // starting binary number
2169 case STATE_binary: // reading binary number
2170 if (c != '0' && c != '1')
2172 #if ZEROH
2173 if (ishex(c)
2174 || c == 'H' || c == 'h'
2176 goto hexh;
2177 #endif
2178 if (c == '_') // ignore embedded _
2179 { p++;
2180 continue;
2182 if (state == STATE_binary0)
2183 { error("binary digit expected");
2184 state = STATE_error;
2185 break;
2187 else
2188 goto done;
2190 state = STATE_binary;
2191 break;
2193 case STATE_error: // for error recovery
2194 if (!isdigit(c)) // scan until non-digit
2195 goto done;
2196 break;
2198 default:
2199 assert(0);
2201 stringbuffer.writeByte(c);
2202 p++;
2204 done:
2205 stringbuffer.writeByte(0); // terminate string
2206 if (state == STATE_octale)
2207 error("Octal digit expected");
2209 uinteger_t n; // unsigned >=64 bit integer type
2211 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0))
2212 n = stringbuffer.data[0] - '0';
2213 else
2215 // Convert string to integer
2216 #if __DMC__
2217 errno = 0;
2218 n = strtoull((char *)stringbuffer.data,NULL,base);
2219 if (errno == ERANGE)
2220 error("integer overflow");
2221 #else
2222 // Not everybody implements strtoull()
2223 char *p = (char *)stringbuffer.data;
2224 int r = 10, d;
2226 if (*p == '0')
2228 if (p[1] == 'x' || p[1] == 'X')
2229 p += 2, r = 16;
2230 else if (p[1] == 'b' || p[1] == 'B')
2231 p += 2, r = 2;
2232 else if (isdigit(p[1]))
2233 p += 1, r = 8;
2236 n = 0;
2237 while (1)
2239 if (*p >= '0' && *p <= '9')
2240 d = *p - '0';
2241 else if (*p >= 'a' && *p <= 'z')
2242 d = *p - 'a' + 10;
2243 else if (*p >= 'A' && *p <= 'Z')
2244 d = *p - 'A' + 10;
2245 else
2246 break;
2247 if (d >= r)
2248 break;
2249 if (n && n * r + d <= n)
2251 error ("integer overflow");
2252 break;
2255 n = n * r + d;
2256 p++;
2258 #endif
2259 if (sizeof(n) > 8 &&
2260 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits
2261 error("integer overflow");
2264 // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2265 while (1)
2266 { unsigned char f;
2268 switch (*p)
2269 { case 'U':
2270 case 'u':
2271 f = FLAGS_unsigned;
2272 goto L1;
2274 case 'l':
2275 if (1 || !global.params.useDeprecated)
2276 error("'l' suffix is deprecated, use 'L' instead");
2277 case 'L':
2278 f = FLAGS_long;
2280 p++;
2281 if (flags & f)
2282 error("unrecognized token");
2283 flags = (FLAGS) (flags | f);
2284 continue;
2285 default:
2286 break;
2288 break;
2291 switch (flags)
2293 case 0:
2294 /* Octal or Hexadecimal constant.
2295 * First that fits: int, uint, long, ulong
2297 if (n & 0x8000000000000000LL)
2298 result = TOKuns64v;
2299 else if (n & 0xFFFFFFFF00000000LL)
2300 result = TOKint64v;
2301 else if (n & 0x80000000)
2302 result = TOKuns32v;
2303 else
2304 result = TOKint32v;
2305 break;
2307 case FLAGS_decimal:
2308 /* First that fits: int, long, long long
2310 if (n & 0x8000000000000000LL)
2311 { error("signed integer overflow");
2312 result = TOKuns64v;
2314 else if (n & 0xFFFFFFFF80000000LL)
2315 result = TOKint64v;
2316 else
2317 result = TOKint32v;
2318 break;
2320 case FLAGS_unsigned:
2321 case FLAGS_decimal | FLAGS_unsigned:
2322 /* First that fits: uint, ulong
2324 if (n & 0xFFFFFFFF00000000LL)
2325 result = TOKuns64v;
2326 else
2327 result = TOKuns32v;
2328 break;
2330 case FLAGS_decimal | FLAGS_long:
2331 if (n & 0x8000000000000000LL)
2332 { error("signed integer overflow");
2333 result = TOKuns64v;
2335 else
2336 result = TOKint64v;
2337 break;
2339 case FLAGS_long:
2340 if (n & 0x8000000000000000LL)
2341 result = TOKuns64v;
2342 else
2343 result = TOKint64v;
2344 break;
2346 case FLAGS_unsigned | FLAGS_long:
2347 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
2348 result = TOKuns64v;
2349 break;
2351 default:
2352 #ifdef DEBUG
2353 printf("%x\n",flags);
2354 #endif
2355 assert(0);
2357 t->uns64value = n;
2358 return result;
2361 /**************************************
2362 * Read in characters, converting them to real.
2363 * Bugs:
2364 * Exponent overflow not detected.
2365 * Too much requested precision is not detected.
2368 TOK Lexer::inreal(Token *t)
2369 #ifdef __DMC__
2370 __in
2372 assert(*p == '.' || isdigit(*p));
2374 __out (result)
2376 switch (result)
2378 case TOKfloat32v:
2379 case TOKfloat64v:
2380 case TOKfloat80v:
2381 case TOKimaginary32v:
2382 case TOKimaginary64v:
2383 case TOKimaginary80v:
2384 break;
2386 default:
2387 assert(0);
2390 __body
2391 #endif /* __DMC__ */
2392 { int dblstate;
2393 unsigned c;
2394 char hex; // is this a hexadecimal-floating-constant?
2395 TOK result;
2397 //printf("Lexer::inreal()\n");
2398 stringbuffer.reset();
2399 dblstate = 0;
2400 hex = 0;
2401 Lnext:
2402 while (1)
2404 // Get next char from input
2405 c = *p++;
2406 //printf("dblstate = %d, c = '%c'\n", dblstate, c);
2407 while (1)
2409 switch (dblstate)
2411 case 0: // opening state
2412 if (c == '0')
2413 dblstate = 9;
2414 else if (c == '.')
2415 dblstate = 3;
2416 else
2417 dblstate = 1;
2418 break;
2420 case 9:
2421 dblstate = 1;
2422 if (c == 'X' || c == 'x')
2423 { hex++;
2424 break;
2426 case 1: // digits to left of .
2427 case 3: // digits to right of .
2428 case 7: // continuing exponent digits
2429 if (!isdigit(c) && !(hex && isxdigit(c)))
2431 if (c == '_')
2432 goto Lnext; // ignore embedded '_'
2433 dblstate++;
2434 continue;
2436 break;
2438 case 2: // no more digits to left of .
2439 if (c == '.')
2440 { dblstate++;
2441 break;
2443 case 4: // no more digits to right of .
2444 if ((c == 'E' || c == 'e') ||
2445 hex && (c == 'P' || c == 'p'))
2446 { dblstate = 5;
2447 hex = 0; // exponent is always decimal
2448 break;
2450 if (hex)
2451 error("binary-exponent-part required");
2452 goto done;
2454 case 5: // looking immediately to right of E
2455 dblstate++;
2456 if (c == '-' || c == '+')
2457 break;
2458 case 6: // 1st exponent digit expected
2459 if (!isdigit(c))
2460 error("exponent expected");
2461 dblstate++;
2462 break;
2464 case 8: // past end of exponent digits
2465 goto done;
2467 break;
2469 stringbuffer.writeByte(c);
2471 done:
2472 p--;
2474 stringbuffer.writeByte(0);
2476 #if _WIN32 && __DMC__
2477 char *save = __locale_decpoint;
2478 __locale_decpoint = ".";
2479 #endif
2480 #ifdef IN_GCC
2481 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble);
2482 #else
2483 t->float80value = strtold((char *)stringbuffer.data, NULL);
2484 #endif
2485 errno = 0;
2486 switch (*p)
2488 case 'F':
2489 case 'f':
2490 #ifdef IN_GCC
2491 real_t::parse((char *)stringbuffer.data, real_t::Float);
2492 #else
2493 strtof((char *)stringbuffer.data, NULL);
2494 #endif
2495 result = TOKfloat32v;
2496 p++;
2497 break;
2499 default:
2500 #ifdef IN_GCC
2501 real_t::parse((char *)stringbuffer.data, real_t::Double);
2502 #else
2503 strtod((char *)stringbuffer.data, NULL);
2504 #endif
2505 result = TOKfloat64v;
2506 break;
2508 case 'l':
2509 if (!global.params.useDeprecated)
2510 error("'l' suffix is deprecated, use 'L' instead");
2511 case 'L':
2512 result = TOKfloat80v;
2513 p++;
2514 break;
2516 if (*p == 'i' || *p == 'I')
2518 if (!global.params.useDeprecated && *p == 'I')
2519 error("'I' suffix is deprecated, use 'i' instead");
2520 p++;
2521 switch (result)
2523 case TOKfloat32v:
2524 result = TOKimaginary32v;
2525 break;
2526 case TOKfloat64v:
2527 result = TOKimaginary64v;
2528 break;
2529 case TOKfloat80v:
2530 result = TOKimaginary80v;
2531 break;
2534 #if _WIN32 && __DMC__
2535 __locale_decpoint = save;
2536 #endif
2537 if (errno == ERANGE)
2538 error("number is not representable");
2539 return result;
2542 /*********************************************
2543 * Do pragma.
2544 * Currently, the only pragma supported is:
2545 * #line linnum [filespec]
2548 void Lexer::pragma()
2550 Token tok;
2551 int linnum;
2552 char *filespec = NULL;
2553 Loc loc = this->loc;
2555 while (isblank(*p)) p++;
2556 if (*p == '\n')
2557 goto Lerr;
2559 scan(&tok);
2560 if (tok.value != TOKidentifier || tok.ident != Id::line)
2561 goto Lerr;
2563 scan(&tok);
2564 if (tok.value == TOKint32v || tok.value == TOKint64v)
2565 linnum = tok.uns64value - 1;
2566 else
2567 goto Lerr;
2569 while (1)
2571 switch (*p)
2573 case 0:
2574 case 0x1A:
2575 case '\n':
2576 Lnewline:
2577 this->loc.linnum = linnum;
2578 if (filespec)
2579 this->loc.filename = filespec;
2580 return;
2582 case '\r':
2583 p++;
2584 if (*p != '\n')
2585 { p--;
2586 goto Lnewline;
2588 continue;
2590 case ' ':
2591 case '\t':
2592 case '\v':
2593 case '\f':
2594 p++;
2595 continue; // skip white space
2597 case '_':
2598 if (mod && memcmp(p, "__FILE__", 8) == 0)
2600 p += 8;
2601 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars());
2603 continue;
2605 case '"':
2606 if (filespec)
2607 goto Lerr;
2608 stringbuffer.reset();
2609 p++;
2610 while (1)
2611 { unsigned c;
2613 c = *p;
2614 switch (c)
2616 case '\n':
2617 case '\r':
2618 case 0:
2619 case 0x1A:
2620 goto Lerr;
2622 case '"':
2623 stringbuffer.writeByte(0);
2624 filespec = mem.strdup((char *)stringbuffer.data);
2625 p++;
2626 break;
2628 default:
2629 if (c & 0x80)
2630 { unsigned u = decodeUTF();
2631 if (u == PS || u == LS)
2632 goto Lerr;
2634 stringbuffer.writeByte(c);
2635 p++;
2636 continue;
2638 break;
2640 continue;
2642 default:
2643 if (*p & 0x80)
2644 { unsigned u = decodeUTF();
2645 if (u == PS || u == LS)
2646 goto Lnewline;
2648 goto Lerr;
2652 Lerr:
2653 error(loc, "#line integer [\"filespec\"]\\n expected");
2657 /********************************************
2658 * Decode UTF character.
2659 * Issue error messages for invalid sequences.
2660 * Return decoded character, advance p to last character in UTF sequence.
2663 unsigned Lexer::decodeUTF()
2665 dchar_t u;
2666 unsigned char c;
2667 unsigned char *s = p;
2668 size_t len;
2669 size_t idx;
2670 char *msg;
2672 c = *s;
2673 assert(c & 0x80);
2675 // Check length of remaining string up to 6 UTF-8 characters
2676 for (len = 1; len < 6 && s[len]; len++)
2679 idx = 0;
2680 msg = utf_decodeChar(s, len, &idx, &u);
2681 p += idx - 1;
2682 if (msg)
2684 error("%s", msg);
2686 return u;
2690 /***************************************************
2691 * Parse doc comment embedded between t->ptr and p.
2692 * Remove trailing blanks and tabs from lines.
2693 * Replace all newlines with \n.
2694 * Remove leading comment character from each line.
2695 * Decide if it's a lineComment or a blockComment.
2696 * Append to previous one for this token.
2699 void Lexer::getDocComment(Token *t, unsigned lineComment)
2701 OutBuffer buf;
2702 unsigned char ct = t->ptr[2];
2703 unsigned char *q = t->ptr + 3; // start of comment text
2704 int linestart = 0;
2706 unsigned char *qend = p;
2707 if (ct == '*' || ct == '+')
2708 qend -= 2;
2710 /* Scan over initial row of ****'s or ++++'s or ////'s
2712 for (; q < qend; q++)
2714 if (*q != ct)
2715 break;
2718 /* Remove trailing row of ****'s or ++++'s
2720 if (ct != '/')
2722 for (; q < qend; qend--)
2724 if (qend[-1] != ct)
2725 break;
2729 for (; q < qend; q++)
2731 unsigned char c = *q;
2733 switch (c)
2735 case '*':
2736 case '+':
2737 if (linestart && c == ct)
2738 { linestart = 0;
2739 /* Trim preceding whitespace up to preceding \n
2741 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2742 buf.offset--;
2743 continue;
2745 break;
2747 case ' ':
2748 case '\t':
2749 break;
2751 case '\r':
2752 if (q[1] == '\n')
2753 continue; // skip the \r
2754 goto Lnewline;
2756 default:
2757 if (c == 226)
2759 // If LS or PS
2760 if (q[1] == 128 &&
2761 (q[2] == 168 || q[2] == 169))
2763 q += 2;
2764 goto Lnewline;
2767 linestart = 0;
2768 break;
2770 Lnewline:
2771 c = '\n'; // replace all newlines with \n
2772 case '\n':
2773 linestart = 1;
2775 /* Trim trailing whitespace
2777 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2778 buf.offset--;
2780 break;
2782 buf.writeByte(c);
2785 // Always end with a newline
2786 if (!buf.offset || buf.data[buf.offset - 1] != '\n')
2787 buf.writeByte('\n');
2789 buf.writeByte(0);
2791 // It's a line comment if the start of the doc comment comes
2792 // after other non-whitespace on the same line.
2793 unsigned char** dc = (lineComment && anyToken)
2794 ? &t->lineComment
2795 : &t->blockComment;
2797 // Combine with previous doc comment, if any
2798 if (*dc)
2799 *dc = combineComments(*dc, (unsigned char *)buf.data);
2800 else
2801 *dc = (unsigned char *)buf.extractData();
2804 /********************************************
2805 * Combine two document comments into one.
2808 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2)
2810 unsigned char *c = c2;
2812 if (c1)
2813 { c = c1;
2814 if (c2)
2815 { size_t len1 = strlen((char *)c1);
2816 size_t len2 = strlen((char *)c2);
2818 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1);
2819 memcpy(c, c1, len1);
2820 c[len1] = '\n';
2821 memcpy(c + len1 + 1, c2, len2);
2822 c[len1 + 1 + len2] = 0;
2825 return c;
2828 /********************************************
2829 * Create an identifier in the string table.
2832 Identifier *Lexer::idPool(const char *s)
2834 size_t len = strlen(s);
2835 StringValue *sv = stringtable.update(s, len);
2836 Identifier *id = (Identifier *) sv->ptrvalue;
2837 if (!id)
2839 id = new Identifier(sv->lstring.string, TOKidentifier);
2840 sv->ptrvalue = id;
2842 return id;
2845 /*********************************************
2846 * Create a unique identifier using the prefix s.
2849 Identifier *Lexer::uniqueId(const char *s, int num)
2850 { char buffer[32];
2851 size_t slen = strlen(s);
2853 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer));
2854 sprintf(buffer, "%s%d", s, num);
2855 return idPool(buffer);
2858 Identifier *Lexer::uniqueId(const char *s)
2860 static int num;
2861 return uniqueId(s, ++num);
2864 /****************************************
2867 struct Keyword
2868 { char *name;
2869 enum TOK value;
2872 static Keyword keywords[] =
2874 // { "", TOK },
2876 { "this", TOKthis },
2877 { "super", TOKsuper },
2878 { "assert", TOKassert },
2879 { "null", TOKnull },
2880 { "true", TOKtrue },
2881 { "false", TOKfalse },
2882 { "cast", TOKcast },
2883 { "new", TOKnew },
2884 { "delete", TOKdelete },
2885 { "throw", TOKthrow },
2886 { "module", TOKmodule },
2887 { "pragma", TOKpragma },
2888 { "typeof", TOKtypeof },
2889 { "typeid", TOKtypeid },
2891 { "template", TOKtemplate },
2893 { "void", TOKvoid },
2894 { "byte", TOKint8 },
2895 { "ubyte", TOKuns8 },
2896 { "short", TOKint16 },
2897 { "ushort", TOKuns16 },
2898 { "int", TOKint32 },
2899 { "uint", TOKuns32 },
2900 { "long", TOKint64 },
2901 { "ulong", TOKuns64 },
2902 { "cent", TOKcent, },
2903 { "ucent", TOKucent, },
2904 { "float", TOKfloat32 },
2905 { "double", TOKfloat64 },
2906 { "real", TOKfloat80 },
2908 { "bool", TOKbool },
2909 { "char", TOKchar },
2910 { "wchar", TOKwchar },
2911 { "dchar", TOKdchar },
2913 { "ifloat", TOKimaginary32 },
2914 { "idouble", TOKimaginary64 },
2915 { "ireal", TOKimaginary80 },
2917 { "cfloat", TOKcomplex32 },
2918 { "cdouble", TOKcomplex64 },
2919 { "creal", TOKcomplex80 },
2921 { "delegate", TOKdelegate },
2922 { "function", TOKfunction },
2924 { "is", TOKis },
2925 { "if", TOKif },
2926 { "else", TOKelse },
2927 { "while", TOKwhile },
2928 { "for", TOKfor },
2929 { "do", TOKdo },
2930 { "switch", TOKswitch },
2931 { "case", TOKcase },
2932 { "default", TOKdefault },
2933 { "break", TOKbreak },
2934 { "continue", TOKcontinue },
2935 { "synchronized", TOKsynchronized },
2936 { "return", TOKreturn },
2937 { "goto", TOKgoto },
2938 { "try", TOKtry },
2939 { "catch", TOKcatch },
2940 { "finally", TOKfinally },
2941 { "with", TOKwith },
2942 { "asm", TOKasm },
2943 { "foreach", TOKforeach },
2944 { "foreach_reverse", TOKforeach_reverse },
2945 { "reversed", TOKreversed },
2946 { "scope", TOKscope },
2948 { "struct", TOKstruct },
2949 { "class", TOKclass },
2950 { "interface", TOKinterface },
2951 { "union", TOKunion },
2952 { "enum", TOKenum },
2953 { "import", TOKimport },
2954 { "mixin", TOKmixin },
2955 { "static", TOKstatic },
2956 { "final", TOKfinal },
2957 { "const", TOKconst },
2958 { "typedef", TOKtypedef },
2959 { "alias", TOKalias },
2960 { "override", TOKoverride },
2961 { "abstract", TOKabstract },
2962 { "volatile", TOKvolatile },
2963 { "debug", TOKdebug },
2964 { "deprecated", TOKdeprecated },
2965 { "in", TOKin },
2966 { "out", TOKout },
2967 { "inout", TOKinout },
2968 { "lazy", TOKlazy },
2969 { "auto", TOKauto },
2971 { "align", TOKalign },
2972 { "extern", TOKextern },
2973 { "private", TOKprivate },
2974 { "package", TOKpackage },
2975 { "protected", TOKprotected },
2976 { "public", TOKpublic },
2977 { "export", TOKexport },
2979 { "body", TOKbody },
2980 { "invariant", TOKinvariant },
2981 { "unittest", TOKunittest },
2982 { "version", TOKversion },
2983 //{ "manifest", TOKmanifest },
2985 // Added after 1.0
2986 { "ref", TOKref },
2987 { "macro", TOKmacro },
2990 // TAL
2991 { "and", TOKandand },
2992 { "or", TOKoror },
2993 { "not", TOKnot },
2994 { "extends", TOKextends },
2995 { "implements", TOKimplements },
2996 { "log_error", TOKlog_error },
2997 { "log_warning", TOKlog_warning },
2998 { "log_info", TOKlog_info },
2999 { "log_trace", TOKlog_trace },
3000 #if V2
3001 { "pure", TOKpure },
3002 { "nothrow", TOKnothrow },
3003 { "__traits", TOKtraits },
3004 { "__overloadset", TOKoverloadset },
3005 #endif
3008 int Token::isKeyword()
3010 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++)
3012 if (keywords[u].value == value)
3013 return 1;
3015 return 0;
3018 void Lexer::initKeywords()
3019 { StringValue *sv;
3020 unsigned u;
3021 enum TOK v;
3022 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]);
3024 if (global.params.Dversion == 1)
3025 nkeywords -= 2;
3027 cmtable_init();
3029 for (u = 0; u < nkeywords; u++)
3030 { char *s;
3032 //printf("keyword[%d] = '%s'\n",u, keywords[u].name);
3033 s = keywords[u].name;
3034 v = keywords[u].value;
3035 sv = stringtable.insert(s, strlen(s));
3036 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v);
3038 //printf("tochars[%d] = '%s'\n",v, s);
3039 Token::tochars[v] = s;
3042 Token::tochars[TOKeof] = "EOF";
3043 Token::tochars[TOKlcurly] = "{";
3044 Token::tochars[TOKrcurly] = "}";
3045 Token::tochars[TOKlparen] = "(";
3046 Token::tochars[TOKrparen] = ")";
3047 Token::tochars[TOKlbracket] = "[";
3048 Token::tochars[TOKrbracket] = "]";
3049 Token::tochars[TOKsemicolon] = ";";
3050 Token::tochars[TOKcolon] = ":";
3051 Token::tochars[TOKcomma] = ",";
3052 Token::tochars[TOKdot] = ".";
3053 Token::tochars[TOKxor] = "^";
3054 Token::tochars[TOKxorass] = "^=";
3055 Token::tochars[TOKassign] = "=";
3056 Token::tochars[TOKconstruct] = "=";
3057 #if V2
3058 Token::tochars[TOKblit] = "=";
3059 #endif
3060 Token::tochars[TOKlt] = "<";
3061 Token::tochars[TOKgt] = ">";
3062 Token::tochars[TOKle] = "<=";
3063 Token::tochars[TOKge] = ">=";
3064 Token::tochars[TOKequal] = "==";
3065 Token::tochars[TOKnotequal] = "!=";
3066 Token::tochars[TOKnotidentity] = "!is";
3067 Token::tochars[TOKtobool] = "!!";
3068 Token::tochars[TOKat] = "@";
3070 Token::tochars[TOKunord] = "!<>=";
3071 Token::tochars[TOKue] = "!<>";
3072 Token::tochars[TOKlg] = "<>";
3073 Token::tochars[TOKleg] = "<>=";
3074 Token::tochars[TOKule] = "!>";
3075 Token::tochars[TOKul] = "!>=";
3076 Token::tochars[TOKuge] = "!<";
3077 Token::tochars[TOKug] = "!<=";
3079 Token::tochars[TOKnot] = "!";
3080 Token::tochars[TOKtobool] = "!!";
3081 Token::tochars[TOKshl] = "<<";
3082 Token::tochars[TOKshr] = ">>";
3083 Token::tochars[TOKushr] = ">>>";
3084 Token::tochars[TOKadd] = "+";
3085 Token::tochars[TOKmin] = "-";
3086 Token::tochars[TOKmul] = "*";
3087 Token::tochars[TOKdiv] = "/";
3088 Token::tochars[TOKmod] = "%";
3089 Token::tochars[TOKslice] = "..";
3090 Token::tochars[TOKdotdotdot] = "...";
3091 Token::tochars[TOKand] = "&";
3092 Token::tochars[TOKandand] = "&&";
3093 Token::tochars[TOKor] = "|";
3094 Token::tochars[TOKoror] = "||";
3095 Token::tochars[TOKarray] = "[]";
3096 Token::tochars[TOKindex] = "[i]";
3097 Token::tochars[TOKaddress] = "&";
3098 Token::tochars[TOKstar] = "*";
3099 Token::tochars[TOKtilde] = "~";
3100 Token::tochars[TOKdollar] = "$";
3101 Token::tochars[TOKcast] = "cast";
3102 Token::tochars[TOKplusplus] = "++";
3103 Token::tochars[TOKminusminus] = "--";
3104 Token::tochars[TOKtype] = "type";
3105 Token::tochars[TOKquestion] = "?";
3106 Token::tochars[TOKneg] = "-";
3107 Token::tochars[TOKuadd] = "+";
3108 Token::tochars[TOKvar] = "var";
3109 Token::tochars[TOKaddass] = "+=";
3110 Token::tochars[TOKminass] = "-=";
3111 Token::tochars[TOKmulass] = "*=";
3112 Token::tochars[TOKdivass] = "/=";
3113 Token::tochars[TOKmodass] = "%=";
3114 Token::tochars[TOKshlass] = "<<=";
3115 Token::tochars[TOKshrass] = ">>=";
3116 Token::tochars[TOKushrass] = ">>>=";
3117 Token::tochars[TOKandass] = "&=";
3118 Token::tochars[TOKorass] = "|=";
3119 Token::tochars[TOKcatass] = "~=";
3120 Token::tochars[TOKcat] = "~";
3121 Token::tochars[TOKcall] = "call";
3122 Token::tochars[TOKidentity] = "is";
3123 Token::tochars[TOKnotidentity] = "!is";
3124 Token::tochars[TOKendline] = "\\n";
3126 Token::tochars[TOKorass] = "|=";
3127 Token::tochars[TOKidentifier] = "identifier";
3129 // For debugging
3130 Token::tochars[TOKdotexp] = "dotexp";
3131 Token::tochars[TOKdotti] = "dotti";
3132 Token::tochars[TOKdotvar] = "dotvar";
3133 Token::tochars[TOKdottype] = "dottype";
3134 Token::tochars[TOKsymoff] = "symoff";
3135 Token::tochars[TOKtypedot] = "typedot";
3136 Token::tochars[TOKarraylength] = "arraylength";
3137 Token::tochars[TOKarrayliteral] = "arrayliteral";
3138 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral";
3139 Token::tochars[TOKstructliteral] = "structliteral";
3140 Token::tochars[TOKstring] = "string";
3141 Token::tochars[TOKdsymbol] = "symbol";
3142 Token::tochars[TOKtuple] = "tuple";
3143 Token::tochars[TOKdeclaration] = "declaration";
3144 Token::tochars[TOKdottd] = "dottd";
3145 Token::tochars[TOKlogger] = "logger";
3146 Token::tochars[TOKon_scope_exit] = "scope(exit)";