Disallow x[y] if x has a maybe type
[delight/core.git] / dmd2 / lexer.c
blob7e6e6119e9183e45435d31992030347b5bcfa2f3
2 // Compiler implementation of the D programming language
3 // Copyright (c) 1999-2008 by Digital Mars
4 // All Rights Reserved
5 // written by Walter Bright
6 // http://www.digitalmars.com
7 // License for redistribution is by either the Artistic License
8 // in artistic.txt, or the GNU General Public License in gnu.txt.
9 // See the included readme.txt for details.
11 /* NOTE: This file has been patched from the original DMD distribution to
12 work with the GDC compiler.
14 Modified by David Friedman, December 2006
17 /* Lexical Analyzer */
19 #include <stdio.h>
20 #include <string.h>
21 #include <ctype.h>
22 #include <stdarg.h>
23 #include <errno.h>
24 //#include <wchar.h>
25 #include <stdlib.h>
26 #include <assert.h>
27 #include <sys/time.h>
29 #ifdef IN_GCC
31 #include <time.h>
32 #include "mem.h"
34 #else
36 #if __GNUC__
37 #include <time.h>
38 #endif
40 #if _WIN32
41 #include "..\root\mem.h"
42 #else
43 #include "../root/mem.h"
44 #endif
45 #endif
47 #include "stringtable.h"
49 #include "lexer.h"
50 #include "utf.h"
51 #include "identifier.h"
52 #include "id.h"
53 #include "module.h"
55 #if _WIN32 && __DMC__
56 // from \dm\src\include\setlocal.h
57 extern "C" char * __cdecl __locale_decpoint;
58 #endif
60 extern int HtmlNamedEntity(unsigned char *p, int length);
62 #define LS 0x2028 // UTF line separator
63 #define PS 0x2029 // UTF paragraph separator
65 /********************************************
66 * Do our own char maps
69 static unsigned char cmtable[256];
71 const int CMoctal = 0x1;
72 const int CMhex = 0x2;
73 const int CMidchar = 0x4;
75 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; }
76 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; }
77 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; }
79 static void cmtable_init()
81 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++)
83 if ('0' <= c && c <= '7')
84 cmtable[c] |= CMoctal;
85 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
86 cmtable[c] |= CMhex;
87 if (isalnum(c) || c == '_')
88 cmtable[c] |= CMidchar;
93 /************************* Token **********************************************/
95 char *Token::tochars[TOKMAX];
97 void *Token::operator new(size_t size)
98 { Token *t;
100 if (Lexer::freelist)
102 t = Lexer::freelist;
103 Lexer::freelist = t->next;
104 return t;
107 return ::operator new(size);
110 #ifdef DEBUG
111 void Token::print()
113 fprintf(stdmsg, "%s\n", toChars());
115 #endif
117 char *Token::toChars()
118 { char *p;
119 static char buffer[3 + 3 * sizeof(value) + 1];
121 p = buffer;
122 switch (value)
124 case TOKint32v:
125 #if IN_GCC
126 sprintf(buffer,"%d",(d_int32)int64value);
127 #else
128 sprintf(buffer,"%d",int32value);
129 #endif
130 break;
132 case TOKuns32v:
133 case TOKcharv:
134 case TOKwcharv:
135 case TOKdcharv:
136 #if IN_GCC
137 sprintf(buffer,"%uU",(d_uns32)uns64value);
138 #else
139 sprintf(buffer,"%uU",uns32value);
140 #endif
141 break;
143 case TOKint64v:
144 sprintf(buffer,"%"PRIdMAX"L",int64value);
145 break;
147 case TOKuns64v:
148 sprintf(buffer,"%"PRIuMAX"UL",uns64value);
149 break;
151 #if IN_GCC
152 case TOKfloat32v:
153 case TOKfloat64v:
154 case TOKfloat80v:
155 float80value.format(buffer, sizeof(buffer));
156 break;
157 case TOKimaginary32v:
158 case TOKimaginary64v:
159 case TOKimaginary80v:
160 float80value.format(buffer, sizeof(buffer));
161 // %% buffer
162 strcat(buffer, "i");
163 break;
164 #else
165 case TOKfloat32v:
166 sprintf(buffer,"%Lgf", float80value);
167 break;
169 case TOKfloat64v:
170 sprintf(buffer,"%Lg", float80value);
171 break;
173 case TOKfloat80v:
174 sprintf(buffer,"%LgL", float80value);
175 break;
177 case TOKimaginary32v:
178 sprintf(buffer,"%Lgfi", float80value);
179 break;
181 case TOKimaginary64v:
182 sprintf(buffer,"%Lgi", float80value);
183 break;
185 case TOKimaginary80v:
186 sprintf(buffer,"%LgLi", float80value);
187 break;
188 #endif
191 case TOKstring:
192 #if CSTRINGS
193 p = string;
194 #else
195 { OutBuffer buf;
197 buf.writeByte('"');
198 for (size_t i = 0; i < len; )
199 { unsigned c;
201 utf_decodeChar((unsigned char *)ustring, len, &i, &c);
202 switch (c)
204 case 0:
205 break;
207 case '"':
208 case '\\':
209 buf.writeByte('\\');
210 default:
211 if (isprint(c))
212 buf.writeByte(c);
213 else if (c <= 0x7F)
214 buf.printf("\\x%02x", c);
215 else if (c <= 0xFFFF)
216 buf.printf("\\u%04x", c);
217 else
218 buf.printf("\\U%08x", c);
219 continue;
221 break;
223 buf.writeByte('"');
224 if (postfix)
225 buf.writeByte('"');
226 buf.writeByte(0);
227 p = (char *)buf.extractData();
229 #endif
230 break;
232 case TOKidentifier:
233 case TOKenum:
234 case TOKstruct:
235 case TOKimport:
236 CASE_BASIC_TYPES:
237 p = ident->toChars();
238 break;
240 default:
241 p = toChars(value);
242 break;
244 return p;
247 char *Token::toChars(enum TOK value)
248 { char *p;
249 static char buffer[3 + 3 * sizeof(value) + 1];
251 p = tochars[value];
252 if (!p)
253 { sprintf(buffer,"TOK%d",value);
254 p = buffer;
256 return p;
259 /*************************** Lexer ********************************************/
261 Token *Lexer::freelist = NULL;
262 StringTable Lexer::stringtable;
263 OutBuffer Lexer::stringbuffer;
265 Lexer::Lexer(Module *mod,
266 unsigned char *base, unsigned begoffset, unsigned endoffset,
267 int doDocComment, int commentToken, bool dltSyntax)
268 : loc(mod, 1), dltSyntax(dltSyntax)
270 //printf("Lexer::Lexer(%p,%d)\n",base,length);
271 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod);
272 memset(&token,0,sizeof(token));
273 this->base = base;
274 this->end = base + endoffset;
275 p = base + begoffset;
276 this->mod = mod;
277 this->doDocComment = doDocComment;
278 this->anyToken = 0;
279 this->commentToken = commentToken;
280 this->nesting = 0;
281 this->indent = 0;
282 this->atStartOfLine = 1;
283 this->incLineno = 0;
284 //initKeywords();
286 /* If first line starts with '#!', ignore the line
289 if (p[0] == '#' && p[1] =='!')
291 p += 2;
292 while (1)
293 { unsigned char c = *p;
294 switch (c)
296 case '\n':
297 p++;
298 break;
300 case '\r':
301 p++;
302 if (*p == '\n')
303 p++;
304 break;
306 case 0:
307 case 0x1A:
308 break;
310 default:
311 if (c & 0x80)
312 { unsigned u = decodeUTF();
313 if (u == PS || u == LS)
314 break;
316 p++;
317 continue;
319 break;
321 loc.linnum = 2;
326 void Lexer::error(const char *format, ...)
328 if (mod && !global.gag)
330 char *p = loc.toChars();
331 if (*p)
332 fprintf(stdmsg, "%s: ", p);
333 mem.free(p);
335 va_list ap;
336 va_start(ap, format);
337 vfprintf(stdmsg, format, ap);
338 va_end(ap);
340 fprintf(stdmsg, "\n");
341 fflush(stdmsg);
343 if (global.errors >= 20) // moderate blizzard of cascading messages
344 fatal();
346 global.errors++;
349 void Lexer::error(Loc loc, const char *format, ...)
351 if (mod && !global.gag)
353 char *p = loc.toChars();
354 if (*p)
355 fprintf(stdmsg, "%s: ", p);
356 mem.free(p);
358 va_list ap;
359 va_start(ap, format);
360 vfprintf(stdmsg, format, ap);
361 va_end(ap);
363 fprintf(stdmsg, "\n");
364 fflush(stdmsg);
366 if (global.errors >= 20) // moderate blizzard of cascading messages
367 fatal();
369 global.errors++;
372 TOK Lexer::nextToken()
373 { Token *t;
375 if (token.next)
377 t = token.next;
378 memcpy(&token,t,sizeof(Token));
379 t->next = freelist;
380 freelist = t;
382 else
384 scan(&token);
386 //token.print();
387 return token.value;
390 Token *Lexer::peek(Token *ct)
391 { Token *t;
393 if (ct->next)
394 t = ct->next;
395 else
397 t = new Token();
398 scan(t);
399 t->next = NULL;
400 ct->next = t;
402 return t;
405 /*********************************
406 * tk is on the opening (.
407 * Look ahead and return token that is past the closing ).
410 Token *Lexer::peekPastParen(Token *tk)
412 //printf("peekPastParen()\n");
413 int parens = 1;
414 int curlynest = 0;
415 while (1)
417 tk = peek(tk);
418 //tk->print();
419 switch (tk->value)
421 case TOKlparen:
422 parens++;
423 continue;
425 case TOKrparen:
426 --parens;
427 if (parens)
428 continue;
429 tk = peek(tk);
430 break;
432 case TOKlcurly:
433 curlynest++;
434 continue;
436 case TOKrcurly:
437 if (--curlynest >= 0)
438 continue;
439 break;
441 case TOKsemicolon:
442 if (curlynest)
443 continue;
444 break;
446 case TOKeof:
447 break;
449 default:
450 continue;
452 return tk;
456 /**********************************
457 * Determine if string is a valid Identifier.
458 * Placed here because of commonality with Lexer functionality.
459 * Returns:
460 * 0 invalid
463 int Lexer::isValidIdentifier(char *p)
465 size_t len;
466 size_t idx;
468 if (!p || !*p)
469 goto Linvalid;
471 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars
472 goto Linvalid;
474 len = strlen(p);
475 idx = 0;
476 while (p[idx])
477 { dchar_t dc;
479 char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc);
480 if (q)
481 goto Linvalid;
483 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
484 goto Linvalid;
486 return 1;
488 Linvalid:
489 return 0;
492 /****************************
493 * Turn next token in buffer into a token.
496 void Lexer::scan(Token *t)
498 unsigned lastLine = loc.linnum;
499 unsigned linnum;
501 // Delayed line-number updating
502 if (incLineno)
504 assert(incLineno == 1);
505 incLineno = 0;
506 loc.linnum++;
509 t->blockComment = NULL;
510 t->lineComment = NULL;
511 while (1)
513 t->ptr = p;
515 if (dltSyntax && atStartOfLine) {
516 // Check indent
517 int i;
518 for (i = 0; p[i] == '\t'; i++) {
520 if (p[i] == ' ') {
521 error("Whitespace error: use tabs to indent!");
523 if (p[i] == '#') {
524 p += i;
525 atStartOfLine = 0;
526 } else if (p[i] != '\n' && p[i] != '\r') {
527 if (p[i] == '\0')
528 i = 0; // End-of-file always has no indent
529 if (i > indent) {
530 error("unexpected indentation (expected %d tabs, not %d)",
531 indent, i);
532 } else if (i < indent) {
533 indent -= 1;
534 t->value = TOKrcurly;
535 return;
537 atStartOfLine = 0;
538 } /* else ignore blank line */
541 //printf("p = %p, *p = '%c'\n",p,*p);
542 switch (*p)
544 case 0:
545 case 0x1A:
546 t->value = TOKeof; // end of file
547 return;
549 case ' ':
550 case '\t':
551 case '\v':
552 case '\f':
553 p++;
554 continue; // skip white space
556 case '\r':
557 if (p[1] == '\n') { // if CRLF
558 p++;
559 continue;
561 // fall-through
562 case '\n':
563 p++;
564 if (dltSyntax)
566 // Delay incrementing the line number until after sending
567 // the TOKendline, for better error messages
568 if (incLineno)
569 loc.linnum++;
570 incLineno = 1;
572 if (!nesting)
574 atStartOfLine = 1;
575 t->value = TOKendline;
576 return;
579 else
580 loc.linnum++;
581 continue; // Ignore newlines inside brackets
582 case '0': case '1': case '2': case '3': case '4':
583 case '5': case '6': case '7': case '8': case '9':
584 t->value = number(t);
585 return;
587 #if CSTRINGS
588 case '\'':
589 t->value = charConstant(t, 0);
590 return;
592 case '"':
593 t->value = stringConstant(t,0);
594 return;
596 case 'l':
597 case 'L':
598 if (p[1] == '\'')
600 p++;
601 t->value = charConstant(t, 1);
602 return;
604 else if (p[1] == '"')
606 p++;
607 t->value = stringConstant(t, 1);
608 return;
610 #else
611 case '\'':
612 t->value = charConstant(t,0);
613 return;
615 case 'r':
616 if (p[1] != '"')
617 goto case_ident;
618 p++;
619 case '`':
620 t->value = wysiwygStringConstant(t, *p);
621 return;
623 case 'x':
624 if (p[1] != '"')
625 goto case_ident;
626 p++;
627 t->value = hexStringConstant(t);
628 return;
630 #if V2
631 case 'q':
632 if (p[1] == '"')
634 p++;
635 t->value = delimitedStringConstant(t);
636 return;
638 else if (p[1] == '{')
640 p++;
641 t->value = tokenStringConstant(t);
642 return;
644 else
645 goto case_ident;
646 #endif
648 case '"':
649 t->value = escapeStringConstant(t,0);
650 return;
652 case '\\': // escaped string literal
653 { unsigned c;
655 stringbuffer.reset();
658 p++;
659 switch (*p)
661 case 'u':
662 case 'U':
663 case '&':
664 c = escapeSequence();
665 stringbuffer.writeUTF8(c);
666 break;
668 default:
669 c = escapeSequence();
670 stringbuffer.writeByte(c);
671 break;
673 } while (*p == '\\');
674 t->len = stringbuffer.offset;
675 stringbuffer.writeByte(0);
676 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
677 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
678 t->postfix = 0;
679 t->value = TOKstring;
680 return;
683 case 'l':
684 case 'L':
685 #endif
686 case 'a': case 'b': case 'c': case 'd': case 'e':
687 case 'f': case 'g': case 'h': case 'i': case 'j':
688 case 'k': case 'm': case 'n': case 'o':
689 #if V2
690 case 'p': /*case 'q': case 'r':*/ case 's': case 't':
691 #else
692 case 'p': case 'q': /*case 'r':*/ case 's': case 't':
693 #endif
694 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
695 case 'z':
696 case 'A': case 'B': case 'C': case 'D': case 'E':
697 case 'F': case 'G': case 'H': case 'I': case 'J':
698 case 'K': case 'M': case 'N': case 'O':
699 case 'P': case 'Q': case 'R': case 'S': case 'T':
700 case 'U': case 'V': case 'W': case 'X': case 'Y':
701 case 'Z':
702 case '_':
703 case_ident:
704 { unsigned char c;
705 StringValue *sv;
706 Identifier *id;
710 c = *++p;
711 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
712 sv = stringtable.update((char *)t->ptr, p - t->ptr);
713 id = (Identifier *) sv->ptrvalue;
714 if (!id)
715 { id = new Identifier(sv->lstring.string,TOKidentifier);
716 sv->ptrvalue = id;
718 t->ident = id;
719 t->value = (enum TOK) id->value;
720 if (!dltSyntax)
722 if (t->value == TOKand ||
723 t->value == TOKor ||
724 t->value == TOKnot)
726 t->value = TOKidentifier;
729 anyToken = 1;
730 if (*t->ptr == '_') // if special identifier token
732 static char date[11+1];
733 static char time[8+1];
734 static char timestamp[24+1];
736 if (!date[0]) // lazy evaluation
737 { time_t t;
738 char *p;
740 ::time(&t);
741 p = ctime(&t);
742 assert(p);
743 sprintf(date, "%.6s %.4s", p + 4, p + 20);
744 sprintf(time, "%.8s", p + 11);
745 sprintf(timestamp, "%.24s", p);
748 #if !V2
749 if (mod && id == Id::FILE)
751 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars());
752 goto Lstring;
754 else if (mod && id == Id::LINE)
756 t->value = TOKint64v;
757 t->uns64value = loc.linnum;
759 else
760 #endif
761 if (id == Id::DATE)
763 t->ustring = (unsigned char *)date;
764 goto Lstring;
766 else if (id == Id::TIME)
768 t->ustring = (unsigned char *)time;
769 goto Lstring;
771 else if (id == Id::VENDOR)
773 #ifdef IN_GCC
774 t->ustring = (unsigned char *)"GDC";
775 #else
776 t->ustring = (unsigned char *)"Digital Mars D";
777 #endif
778 goto Lstring;
780 else if (id == Id::TIMESTAMP)
782 t->ustring = (unsigned char *)timestamp;
783 Lstring:
784 t->value = TOKstring;
785 Llen:
786 t->postfix = 0;
787 t->len = strlen((char *)t->ustring);
789 else if (id == Id::VERSIONX)
790 { unsigned major = 0;
791 unsigned minor = 0;
793 for (char *p = global.version + 1; 1; p++)
795 char c = *p;
796 if (isdigit(c))
797 minor = minor * 10 + c - '0';
798 else if (c == '.')
799 { major = minor;
800 minor = 0;
802 else
803 break;
805 t->value = TOKint64v;
806 t->uns64value = major * 1000 + minor;
808 #if V2
809 else if (id == Id::EOFX)
811 t->value = TOKeof;
812 // Advance scanner to end of file
813 while (!(*p == 0 || *p == 0x1A))
814 p++;
816 #endif
818 //printf("t->value = %d\n",t->value);
819 return;
822 case '/':
823 p++;
824 switch (*p)
826 case '=':
827 p++;
828 t->value = TOKdivass;
829 return;
831 case '*':
832 p++;
833 linnum = loc.linnum;
834 while (1)
836 while (1)
837 { unsigned char c = *p;
838 switch (c)
840 case '/':
841 break;
843 case '\n':
844 loc.linnum++;
845 p++;
846 continue;
848 case '\r':
849 p++;
850 if (*p != '\n')
851 loc.linnum++;
852 continue;
854 case 0:
855 case 0x1A:
856 error("unterminated /* */ comment");
857 p = end;
858 t->value = TOKeof;
859 return;
861 default:
862 if (c & 0x80)
863 { unsigned u = decodeUTF();
864 if (u == PS || u == LS)
865 loc.linnum++;
867 p++;
868 continue;
870 break;
872 p++;
873 if (p[-2] == '*' && p - 3 != t->ptr)
874 break;
876 if (commentToken)
878 t->value = TOKcomment;
879 return;
881 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr)
882 { // if /** but not /**/
883 getDocComment(t, lastLine == linnum);
885 continue;
887 case '/': // do // style comments
888 linnum = loc.linnum;
889 while (1)
890 { unsigned char c = *++p;
891 switch (c)
893 case '\n':
894 break;
896 case '\r':
897 if (p[1] == '\n')
898 p++;
899 break;
901 case 0:
902 case 0x1A:
903 if (commentToken)
905 p = end;
906 t->value = TOKcomment;
907 return;
909 if (doDocComment && t->ptr[2] == '/')
910 getDocComment(t, lastLine == linnum);
911 p = end;
912 t->value = TOKeof;
913 return;
915 default:
916 if (c & 0x80)
917 { unsigned u = decodeUTF();
918 if (u == PS || u == LS)
919 break;
921 continue;
923 break;
926 if (commentToken)
928 p++;
929 loc.linnum++;
930 t->value = TOKcomment;
931 return;
933 if (doDocComment && t->ptr[2] == '/')
934 getDocComment(t, lastLine == linnum);
936 p++;
937 loc.linnum++;
938 continue;
940 case '+':
941 { int nest;
943 linnum = loc.linnum;
944 p++;
945 nest = 1;
946 while (1)
947 { unsigned char c = *p;
948 switch (c)
950 case '/':
951 p++;
952 if (*p == '+')
954 p++;
955 nest++;
957 continue;
959 case '+':
960 p++;
961 if (*p == '/')
963 p++;
964 if (--nest == 0)
965 break;
967 continue;
969 case '\r':
970 p++;
971 if (*p != '\n')
972 loc.linnum++;
973 continue;
975 case '\n':
976 loc.linnum++;
977 p++;
978 continue;
980 case 0:
981 case 0x1A:
982 error("unterminated /+ +/ comment");
983 p = end;
984 t->value = TOKeof;
985 return;
987 default:
988 if (c & 0x80)
989 { unsigned u = decodeUTF();
990 if (u == PS || u == LS)
991 loc.linnum++;
993 p++;
994 continue;
996 break;
998 if (commentToken)
1000 t->value = TOKcomment;
1001 return;
1003 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr)
1004 { // if /++ but not /++/
1005 getDocComment(t, lastLine == linnum);
1007 continue;
1010 t->value = TOKdiv;
1011 return;
1013 case '.':
1014 p++;
1015 if (isdigit(*p))
1016 { /* Note that we don't allow ._1 and ._ as being
1017 * valid floating point numbers.
1019 p--;
1020 t->value = inreal(t);
1022 else if (p[0] == '.')
1024 if (p[1] == '.')
1025 { p += 2;
1026 t->value = TOKdotdotdot;
1028 else
1029 { p++;
1030 t->value = TOKslice;
1033 else
1034 t->value = TOKdot;
1035 return;
1037 case '&':
1038 p++;
1039 if (*p == '=')
1040 { p++;
1041 t->value = TOKandass;
1043 else if (*p == '&')
1044 { p++;
1045 t->value = TOKandand;
1046 if (dltSyntax)
1047 error("Use 'and' instead of '&&'");
1049 else
1050 t->value = TOKand;
1051 return;
1053 case '|':
1054 p++;
1055 if (*p == '=')
1056 { p++;
1057 t->value = TOKorass;
1059 else if (*p == '|')
1060 { p++;
1061 t->value = TOKoror;
1062 if (dltSyntax)
1063 error("Use 'or' instead of '||'");
1065 else
1066 t->value = TOKor;
1067 return;
1069 case '-':
1070 p++;
1071 if (*p == '=')
1072 { p++;
1073 t->value = TOKminass;
1075 #if 0
1076 else if (*p == '>')
1077 { p++;
1078 t->value = TOKarrow;
1080 #endif
1081 else if (*p == '-')
1082 { p++;
1083 t->value = TOKminusminus;
1085 else
1086 t->value = TOKmin;
1087 return;
1089 case '+':
1090 p++;
1091 if (*p == '=')
1092 { p++;
1093 t->value = TOKaddass;
1095 else if (*p == '+')
1096 { p++;
1097 t->value = TOKplusplus;
1099 else
1100 t->value = TOKadd;
1101 return;
1103 case '<':
1104 p++;
1105 if (*p == '=')
1106 { p++;
1107 t->value = TOKle; // <=
1109 else if (*p == '<')
1110 { p++;
1111 if (*p == '=')
1112 { p++;
1113 t->value = TOKshlass; // <<=
1115 else
1116 t->value = TOKshl; // <<
1118 else if (*p == '>')
1119 { p++;
1120 if (*p == '=')
1121 { p++;
1122 t->value = TOKleg; // <>=
1124 else
1125 t->value = TOKlg; // <>
1127 else
1128 t->value = TOKlt; // <
1129 return;
1131 case '>':
1132 p++;
1133 if (*p == '=')
1134 { p++;
1135 t->value = TOKge; // >=
1137 else if (*p == '>')
1138 { p++;
1139 if (*p == '=')
1140 { p++;
1141 t->value = TOKshrass; // >>=
1143 else if (*p == '>')
1144 { p++;
1145 if (*p == '=')
1146 { p++;
1147 t->value = TOKushrass; // >>>=
1149 else
1150 t->value = TOKushr; // >>>
1152 else
1153 t->value = TOKshr; // >>
1155 else
1156 t->value = TOKgt; // >
1157 return;
1159 case '!':
1160 p++;
1161 if (*p == '=')
1162 { p++;
1163 if (*p == '=' && global.params.Dversion == 1)
1164 { p++;
1165 t->value = TOKnotidentity; // !==
1167 else
1168 t->value = TOKnotequal; // !=
1170 else if (*p == '<')
1171 { p++;
1172 if (*p == '>')
1173 { p++;
1174 if (*p == '=')
1175 { p++;
1176 t->value = TOKunord; // !<>=
1178 else
1179 t->value = TOKue; // !<>
1181 else if (*p == '=')
1182 { p++;
1183 t->value = TOKug; // !<=
1185 else
1186 t->value = TOKuge; // !<
1188 else if (*p == '>')
1189 { p++;
1190 if (*p == '=')
1191 { p++;
1192 t->value = TOKul; // !>=
1194 else
1195 t->value = TOKule; // !>
1197 else
1198 t->value = TOKnot; // !
1199 return;
1201 case '=':
1202 p++;
1203 if (*p == '=')
1204 { p++;
1205 if (*p == '=' && global.params.Dversion == 1)
1206 { p++;
1207 t->value = TOKidentity; // ===
1209 else
1210 t->value = TOKequal; // ==
1212 else
1213 t->value = TOKassign; // =
1214 return;
1216 case '~':
1217 p++;
1218 if (*p == '=')
1219 { p++;
1220 t->value = TOKcatass; // ~=
1222 else
1223 t->value = TOKtilde; // ~
1224 return;
1226 #define NESTED(cin,tokin,cout,tokout) \
1227 case cin: nesting++; p++; t->value = tokin; return;\
1228 case cout: if (nesting == 0) {error("Unexpected '%c'", cout);} else {nesting--;} p++; t->value = tokout; return;
1230 NESTED('(', TOKlparen, ')', TOKrparen)
1231 NESTED('[', TOKlbracket, ']', TOKrbracket)
1232 NESTED('{', TOKlcurly, '}', TOKrcurly)
1233 #undef NESTED
1235 #define SINGLE(c,tok) case c: p++; t->value = tok; return;
1236 SINGLE('?', TOKquestion)
1237 SINGLE(',', TOKcomma)
1238 SINGLE(';', TOKsemicolon)
1239 SINGLE('$', TOKdollar)
1240 SINGLE('@', TOKat)
1242 #undef SINGLE
1244 case ':':
1245 p++;
1246 if (!nesting)
1247 indent += 1;
1248 t->value = TOKcolon;
1249 return;
1251 #define DOUBLE(c1,tok1,c2,tok2) \
1252 case c1: \
1253 p++; \
1254 if (*p == c2) \
1255 { p++; \
1256 t->value = tok2; \
1258 else \
1259 t->value = tok1; \
1260 return;
1262 DOUBLE('*', TOKmul, '=', TOKmulass)
1263 DOUBLE('%', TOKmod, '=', TOKmodass)
1264 DOUBLE('^', TOKxor, '=', TOKxorass)
1266 #undef DOUBLE
1268 case '#': // do # style comments and pragmas
1269 if (dltSyntax)
1271 do { p++; } while (*p != '\n');
1273 else
1275 p++;
1276 pragma();
1278 continue;
1280 default:
1281 { unsigned char c = *p;
1283 if (c & 0x80)
1284 { unsigned u = decodeUTF();
1286 // Check for start of unicode identifier
1287 if (isUniAlpha(u))
1288 goto case_ident;
1290 if (u == PS || u == LS)
1292 loc.linnum++;
1293 p++;
1294 continue;
1297 if (isprint(c))
1298 error("unsupported char '%c'", c);
1299 else
1300 error("unsupported char 0x%02x", c);
1301 p++;
1302 continue;
1308 /*******************************************
1309 * Parse escape sequence.
1312 unsigned Lexer::escapeSequence()
1313 { unsigned c;
1314 int n;
1315 int ndigits;
1317 c = *p;
1318 switch (c)
1320 case '\'':
1321 case '"':
1322 case '?':
1323 case '\\':
1324 Lconsume:
1325 p++;
1326 break;
1328 case 'a': c = 7; goto Lconsume;
1329 case 'b': c = 8; goto Lconsume;
1330 case 'f': c = 12; goto Lconsume;
1331 case 'n': c = 10; goto Lconsume;
1332 case 'r': c = 13; goto Lconsume;
1333 case 't': c = 9; goto Lconsume;
1334 case 'v': c = 11; goto Lconsume;
1336 case 'u':
1337 ndigits = 4;
1338 goto Lhex;
1339 case 'U':
1340 ndigits = 8;
1341 goto Lhex;
1342 case 'x':
1343 ndigits = 2;
1344 Lhex:
1345 p++;
1346 c = *p;
1347 if (ishex(c))
1348 { unsigned v;
1350 n = 0;
1351 v = 0;
1352 while (1)
1354 if (isdigit(c))
1355 c -= '0';
1356 else if (islower(c))
1357 c -= 'a' - 10;
1358 else
1359 c -= 'A' - 10;
1360 v = v * 16 + c;
1361 c = *++p;
1362 if (++n == ndigits)
1363 break;
1364 if (!ishex(c))
1365 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
1366 break;
1369 if (ndigits != 2 && !utf_isValidDchar(v))
1370 error("invalid UTF character \\U%08x", v);
1371 c = v;
1373 else
1374 error("undefined escape hex sequence \\%c\n",c);
1375 break;
1377 case '&': // named character entity
1378 for (unsigned char *idstart = ++p; 1; p++)
1380 switch (*p)
1382 case ';':
1383 c = HtmlNamedEntity(idstart, p - idstart);
1384 if (c == ~0)
1385 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart);
1386 c = ' ';
1388 p++;
1389 break;
1391 default:
1392 if (isalpha(*p) ||
1393 (p != idstart + 1 && isdigit(*p)))
1394 continue;
1395 error("unterminated named entity");
1396 break;
1398 break;
1400 break;
1402 case 0:
1403 case 0x1A: // end of file
1404 c = '\\';
1405 break;
1407 default:
1408 if (isoctal(c))
1409 { unsigned v;
1411 n = 0;
1412 v = 0;
1415 v = v * 8 + (c - '0');
1416 c = *++p;
1417 } while (++n < 3 && isoctal(c));
1418 c = v;
1419 if (c > 0xFF)
1420 error("0%03o is larger than a byte", c);
1422 else
1423 error("undefined escape sequence \\%c\n",c);
1424 break;
1426 return c;
1429 /**************************************
1432 TOK Lexer::wysiwygStringConstant(Token *t, int tc)
1433 { unsigned c;
1434 Loc start = loc;
1436 p++;
1437 stringbuffer.reset();
1438 while (1)
1440 c = *p++;
1441 switch (c)
1443 case '\n':
1444 loc.linnum++;
1445 break;
1447 case '\r':
1448 if (*p == '\n')
1449 continue; // ignore
1450 c = '\n'; // treat EndOfLine as \n character
1451 loc.linnum++;
1452 break;
1454 case 0:
1455 case 0x1A:
1456 error("unterminated string constant starting at %s", start.toChars());
1457 t->ustring = (unsigned char *)"";
1458 t->len = 0;
1459 t->postfix = 0;
1460 return TOKstring;
1462 case '"':
1463 case '`':
1464 if (c == tc)
1466 t->len = stringbuffer.offset;
1467 stringbuffer.writeByte(0);
1468 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1469 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1470 stringPostfix(t);
1471 return TOKstring;
1473 break;
1475 default:
1476 if (c & 0x80)
1477 { p--;
1478 unsigned u = decodeUTF();
1479 p++;
1480 if (u == PS || u == LS)
1481 loc.linnum++;
1482 stringbuffer.writeUTF8(u);
1483 continue;
1485 break;
1487 stringbuffer.writeByte(c);
1491 /**************************************
1492 * Lex hex strings:
1493 * x"0A ae 34FE BD"
1496 TOK Lexer::hexStringConstant(Token *t)
1497 { unsigned c;
1498 Loc start = loc;
1499 unsigned n = 0;
1500 unsigned v;
1502 p++;
1503 stringbuffer.reset();
1504 while (1)
1506 c = *p++;
1507 switch (c)
1509 case ' ':
1510 case '\t':
1511 case '\v':
1512 case '\f':
1513 continue; // skip white space
1515 case '\r':
1516 if (*p == '\n')
1517 continue; // ignore
1518 // Treat isolated '\r' as if it were a '\n'
1519 case '\n':
1520 loc.linnum++;
1521 continue;
1523 case 0:
1524 case 0x1A:
1525 error("unterminated string constant starting at %s", start.toChars());
1526 t->ustring = (unsigned char *)"";
1527 t->len = 0;
1528 t->postfix = 0;
1529 return TOKstring;
1531 case '"':
1532 if (n & 1)
1533 { error("odd number (%d) of hex characters in hex string", n);
1534 stringbuffer.writeByte(v);
1536 t->len = stringbuffer.offset;
1537 stringbuffer.writeByte(0);
1538 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1539 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1540 stringPostfix(t);
1541 return TOKstring;
1543 default:
1544 if (c >= '0' && c <= '9')
1545 c -= '0';
1546 else if (c >= 'a' && c <= 'f')
1547 c -= 'a' - 10;
1548 else if (c >= 'A' && c <= 'F')
1549 c -= 'A' - 10;
1550 else if (c & 0x80)
1551 { p--;
1552 unsigned u = decodeUTF();
1553 p++;
1554 if (u == PS || u == LS)
1555 loc.linnum++;
1556 else
1557 error("non-hex character \\u%x", u);
1559 else
1560 error("non-hex character '%c'", c);
1561 if (n & 1)
1562 { v = (v << 4) | c;
1563 stringbuffer.writeByte(v);
1565 else
1566 v = c;
1567 n++;
1568 break;
1574 #if V2
1575 /**************************************
1576 * Lex delimited strings:
1577 * q"(foo(xxx))" // "foo(xxx)"
1578 * q"[foo(]" // "foo("
1579 * q"/foo]/" // "foo]"
1580 * q"HERE
1581 * foo
1582 * HERE" // "foo\n"
1583 * Input:
1584 * p is on the "
1587 TOK Lexer::delimitedStringConstant(Token *t)
1588 { unsigned c;
1589 Loc start = loc;
1590 unsigned delimleft = 0;
1591 unsigned delimright = 0;
1592 unsigned nest = 1;
1593 unsigned nestcount;
1594 Identifier *hereid = NULL;
1595 unsigned blankrol = 0;
1596 unsigned startline = 0;
1598 p++;
1599 stringbuffer.reset();
1600 while (1)
1602 c = *p++;
1603 //printf("c = '%c'\n", c);
1604 switch (c)
1606 case '\n':
1607 Lnextline:
1608 loc.linnum++;
1609 startline = 1;
1610 if (blankrol)
1611 { blankrol = 0;
1612 continue;
1614 if (hereid)
1616 stringbuffer.writeUTF8(c);
1617 continue;
1619 break;
1621 case '\r':
1622 if (*p == '\n')
1623 continue; // ignore
1624 c = '\n'; // treat EndOfLine as \n character
1625 goto Lnextline;
1627 case 0:
1628 case 0x1A:
1629 goto Lerror;
1631 default:
1632 if (c & 0x80)
1633 { p--;
1634 c = decodeUTF();
1635 p++;
1636 if (c == PS || c == LS)
1637 goto Lnextline;
1639 break;
1641 if (delimleft == 0)
1642 { delimleft = c;
1643 nest = 1;
1644 nestcount = 1;
1645 if (c == '(')
1646 delimright = ')';
1647 else if (c == '{')
1648 delimright = '}';
1649 else if (c == '[')
1650 delimright = ']';
1651 else if (c == '<')
1652 delimright = '>';
1653 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1654 { // Start of identifier; must be a heredoc
1655 Token t;
1656 p--;
1657 scan(&t); // read in heredoc identifier
1658 if (t.value != TOKidentifier)
1659 { error("identifier expected for heredoc, not %s", t.toChars());
1660 delimright = c;
1662 else
1663 { hereid = t.ident;
1664 //printf("hereid = '%s'\n", hereid->toChars());
1665 blankrol = 1;
1667 nest = 0;
1669 else
1670 { delimright = c;
1671 nest = 0;
1674 else
1676 if (blankrol)
1677 { error("heredoc rest of line should be blank");
1678 blankrol = 0;
1679 continue;
1681 if (nest == 1)
1683 if (c == delimleft)
1684 nestcount++;
1685 else if (c == delimright)
1686 { nestcount--;
1687 if (nestcount == 0)
1688 goto Ldone;
1691 else if (c == delimright)
1692 goto Ldone;
1693 if (startline && isalpha(c))
1694 { Token t;
1695 unsigned char *psave = p;
1696 p--;
1697 scan(&t); // read in possible heredoc identifier
1698 //printf("endid = '%s'\n", t.ident->toChars());
1699 if (t.value == TOKidentifier && t.ident->equals(hereid))
1700 { /* should check that rest of line is blank
1702 goto Ldone;
1704 p = psave;
1706 stringbuffer.writeUTF8(c);
1707 startline = 0;
1711 Ldone:
1712 if (*p == '"')
1713 p++;
1714 else
1715 error("delimited string must end in %c\"", delimright);
1716 t->len = stringbuffer.offset;
1717 stringbuffer.writeByte(0);
1718 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1719 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1720 stringPostfix(t);
1721 return TOKstring;
1723 Lerror:
1724 error("unterminated string constant starting at %s", start.toChars());
1725 t->ustring = (unsigned char *)"";
1726 t->len = 0;
1727 t->postfix = 0;
1728 return TOKstring;
1731 /**************************************
1732 * Lex delimited strings:
1733 * q{ foo(xxx) } // " foo(xxx) "
1734 * q{foo(} // "foo("
1735 * q{{foo}"}"} // "{foo}"}""
1736 * Input:
1737 * p is on the q
1740 TOK Lexer::tokenStringConstant(Token *t)
1742 unsigned nest = 1;
1743 Loc start = loc;
1744 unsigned char *pstart = ++p;
1746 nesting++;
1747 while (1)
1748 { Token tok;
1750 scan(&tok);
1751 switch (tok.value)
1753 case TOKlcurly:
1754 nest++;
1755 continue;
1757 case TOKrcurly:
1758 if (--nest == 0)
1759 goto Ldone;
1760 continue;
1762 case TOKeof:
1763 goto Lerror;
1765 default:
1766 continue;
1770 Ldone:
1771 t->len = p - 1 - pstart;
1772 t->ustring = (unsigned char *)mem.malloc(t->len + 1);
1773 memcpy(t->ustring, pstart, t->len);
1774 t->ustring[t->len] = 0;
1775 stringPostfix(t);
1776 return TOKstring;
1778 Lerror:
1779 error("unterminated token string constant starting at %s", start.toChars());
1780 t->ustring = (unsigned char *)"";
1781 t->len = 0;
1782 t->postfix = 0;
1783 return TOKstring;
1786 #endif
1789 /**************************************
1792 TOK Lexer::escapeStringConstant(Token *t, int wide)
1793 { unsigned c;
1794 Loc start = loc;
1796 p++;
1797 stringbuffer.reset();
1798 while (1)
1800 c = *p++;
1801 switch (c)
1803 case '\\':
1804 switch (*p)
1806 case 'u':
1807 case 'U':
1808 case '&':
1809 c = escapeSequence();
1810 stringbuffer.writeUTF8(c);
1811 continue;
1813 default:
1814 c = escapeSequence();
1815 break;
1817 break;
1819 case '\n':
1820 loc.linnum++;
1821 break;
1823 case '\r':
1824 if (*p == '\n')
1825 continue; // ignore
1826 c = '\n'; // treat EndOfLine as \n character
1827 loc.linnum++;
1828 break;
1830 case '"':
1831 t->len = stringbuffer.offset;
1832 stringbuffer.writeByte(0);
1833 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1834 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1835 stringPostfix(t);
1836 return TOKstring;
1838 case 0:
1839 case 0x1A:
1840 p--;
1841 error("unterminated string constant starting at %s", start.toChars());
1842 t->ustring = (unsigned char *)"";
1843 t->len = 0;
1844 t->postfix = 0;
1845 return TOKstring;
1847 default:
1848 if (c & 0x80)
1850 p--;
1851 c = decodeUTF();
1852 if (c == LS || c == PS)
1853 { c = '\n';
1854 loc.linnum++;
1856 p++;
1857 stringbuffer.writeUTF8(c);
1858 continue;
1860 break;
1862 stringbuffer.writeByte(c);
1866 /**************************************
1869 TOK Lexer::charConstant(Token *t, int wide)
1871 unsigned c;
1872 TOK tk = TOKcharv;
1874 //printf("Lexer::charConstant\n");
1875 p++;
1876 c = *p++;
1877 switch (c)
1879 case '\\':
1880 switch (*p)
1882 case 'u':
1883 t->uns64value = escapeSequence();
1884 tk = TOKwcharv;
1885 break;
1887 case 'U':
1888 case '&':
1889 t->uns64value = escapeSequence();
1890 tk = TOKdcharv;
1891 break;
1893 default:
1894 t->uns64value = escapeSequence();
1895 break;
1897 break;
1899 case '\n':
1901 loc.linnum++;
1902 case '\r':
1903 case 0:
1904 case 0x1A:
1905 case '\'':
1906 error("unterminated character constant");
1907 return tk;
1909 default:
1910 if (c & 0x80)
1912 p--;
1913 c = decodeUTF();
1914 p++;
1915 if (c == LS || c == PS)
1916 goto L1;
1917 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1918 tk = TOKwcharv;
1919 else
1920 tk = TOKdcharv;
1922 t->uns64value = c;
1923 break;
1926 if (*p != '\'')
1927 { error("unterminated character constant");
1928 return tk;
1930 p++;
1931 return tk;
1934 /***************************************
1935 * Get postfix of string literal.
1938 void Lexer::stringPostfix(Token *t)
1940 switch (*p)
1942 case 'c':
1943 case 'w':
1944 case 'd':
1945 t->postfix = *p;
1946 p++;
1947 break;
1949 default:
1950 t->postfix = 0;
1951 break;
1955 /***************************************
1956 * Read \u or \U unicode sequence
1957 * Input:
1958 * u 'u' or 'U'
1961 #if 0
1962 unsigned Lexer::wchar(unsigned u)
1964 unsigned value;
1965 unsigned n;
1966 unsigned char c;
1967 unsigned nchars;
1969 nchars = (u == 'U') ? 8 : 4;
1970 value = 0;
1971 for (n = 0; 1; n++)
1973 ++p;
1974 if (n == nchars)
1975 break;
1976 c = *p;
1977 if (!ishex(c))
1978 { error("\\%c sequence must be followed by %d hex characters", u, nchars);
1979 break;
1981 if (isdigit(c))
1982 c -= '0';
1983 else if (islower(c))
1984 c -= 'a' - 10;
1985 else
1986 c -= 'A' - 10;
1987 value <<= 4;
1988 value |= c;
1990 return value;
1992 #endif
1994 /**************************************
1995 * Read in a number.
1996 * If it's an integer, store it in tok.TKutok.Vlong.
1997 * integers can be decimal, octal or hex
1998 * Handle the suffixes U, UL, LU, L, etc.
1999 * If it's double, store it in tok.TKutok.Vdouble.
2000 * Returns:
2001 * TKnum
2002 * TKdouble,...
2005 TOK Lexer::number(Token *t)
2007 // We use a state machine to collect numbers
2008 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
2009 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
2010 STATE_hexh, STATE_error };
2011 enum STATE state;
2013 enum FLAGS
2014 { FLAGS_decimal = 1, // decimal
2015 FLAGS_unsigned = 2, // u or U suffix
2016 FLAGS_long = 4, // l or L suffix
2018 enum FLAGS flags = FLAGS_decimal;
2020 int i;
2021 int base;
2022 unsigned c;
2023 unsigned char *start;
2024 TOK result;
2026 //printf("Lexer::number()\n");
2027 state = STATE_initial;
2028 base = 0;
2029 stringbuffer.reset();
2030 start = p;
2031 while (1)
2033 c = *p;
2034 switch (state)
2036 case STATE_initial: // opening state
2037 if (c == '0')
2038 state = STATE_0;
2039 else
2040 state = STATE_decimal;
2041 break;
2043 case STATE_0:
2044 flags = (FLAGS) (flags & ~FLAGS_decimal);
2045 switch (c)
2047 #if ZEROH
2048 case 'H': // 0h
2049 case 'h':
2050 goto hexh;
2051 #endif
2052 case 'X':
2053 case 'x':
2054 state = STATE_hex0;
2055 break;
2057 case '.':
2058 if (p[1] == '.') // .. is a separate token
2059 goto done;
2060 case 'i':
2061 case 'f':
2062 case 'F':
2063 goto real;
2064 #if ZEROH
2065 case 'E':
2066 case 'e':
2067 goto case_hex;
2068 #endif
2069 case 'B':
2070 case 'b':
2071 state = STATE_binary0;
2072 break;
2074 case '0': case '1': case '2': case '3':
2075 case '4': case '5': case '6': case '7':
2076 state = STATE_octal;
2077 break;
2079 #if ZEROH
2080 case '8': case '9': case 'A':
2081 case 'C': case 'D': case 'F':
2082 case 'a': case 'c': case 'd': case 'f':
2083 case_hex:
2084 state = STATE_hexh;
2085 break;
2086 #endif
2087 case '_':
2088 state = STATE_octal;
2089 p++;
2090 continue;
2092 case 'L':
2093 if (p[1] == 'i')
2094 goto real;
2095 goto done;
2097 default:
2098 goto done;
2100 break;
2102 case STATE_decimal: // reading decimal number
2103 if (!isdigit(c))
2105 #if ZEROH
2106 if (ishex(c)
2107 || c == 'H' || c == 'h'
2109 goto hexh;
2110 #endif
2111 if (c == '_') // ignore embedded _
2112 { p++;
2113 continue;
2115 if (c == '.' && p[1] != '.')
2116 goto real;
2117 else if (c == 'i' || c == 'f' || c == 'F' ||
2118 c == 'e' || c == 'E')
2120 real: // It's a real number. Back up and rescan as a real
2121 p = start;
2122 return inreal(t);
2124 else if (c == 'L' && p[1] == 'i')
2125 goto real;
2126 goto done;
2128 break;
2130 case STATE_hex0: // reading hex number
2131 case STATE_hex:
2132 if (!ishex(c))
2134 if (c == '_') // ignore embedded _
2135 { p++;
2136 continue;
2138 if (c == '.' && p[1] != '.')
2139 goto real;
2140 if (c == 'P' || c == 'p' || c == 'i')
2141 goto real;
2142 if (state == STATE_hex0)
2143 error("Hex digit expected, not '%c'", c);
2144 goto done;
2146 state = STATE_hex;
2147 break;
2149 #if ZEROH
2150 hexh:
2151 state = STATE_hexh;
2152 case STATE_hexh: // parse numbers like 0FFh
2153 if (!ishex(c))
2155 if (c == 'H' || c == 'h')
2157 p++;
2158 base = 16;
2159 goto done;
2161 else
2163 // Check for something like 1E3 or 0E24
2164 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) ||
2165 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset))
2166 goto real;
2167 error("Hex digit expected, not '%c'", c);
2168 goto done;
2171 break;
2172 #endif
2174 case STATE_octal: // reading octal number
2175 case STATE_octale: // reading octal number with non-octal digits
2176 if (!isoctal(c))
2178 #if ZEROH
2179 if (ishex(c)
2180 || c == 'H' || c == 'h'
2182 goto hexh;
2183 #endif
2184 if (c == '_') // ignore embedded _
2185 { p++;
2186 continue;
2188 if (c == '.' && p[1] != '.')
2189 goto real;
2190 if (c == 'i')
2191 goto real;
2192 if (isdigit(c))
2194 state = STATE_octale;
2196 else
2197 goto done;
2199 break;
2201 case STATE_binary0: // starting binary number
2202 case STATE_binary: // reading binary number
2203 if (c != '0' && c != '1')
2205 #if ZEROH
2206 if (ishex(c)
2207 || c == 'H' || c == 'h'
2209 goto hexh;
2210 #endif
2211 if (c == '_') // ignore embedded _
2212 { p++;
2213 continue;
2215 if (state == STATE_binary0)
2216 { error("binary digit expected");
2217 state = STATE_error;
2218 break;
2220 else
2221 goto done;
2223 state = STATE_binary;
2224 break;
2226 case STATE_error: // for error recovery
2227 if (!isdigit(c)) // scan until non-digit
2228 goto done;
2229 break;
2231 default:
2232 assert(0);
2234 stringbuffer.writeByte(c);
2235 p++;
2237 done:
2238 stringbuffer.writeByte(0); // terminate string
2239 if (state == STATE_octale)
2240 error("Octal digit expected");
2242 uinteger_t n; // unsigned >=64 bit integer type
2244 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0))
2245 n = stringbuffer.data[0] - '0';
2246 else
2248 // Convert string to integer
2249 #if __DMC__
2250 errno = 0;
2251 n = strtoull((char *)stringbuffer.data,NULL,base);
2252 if (errno == ERANGE)
2253 error("integer overflow");
2254 #else
2255 // Not everybody implements strtoull()
2256 char *p = (char *)stringbuffer.data;
2257 int r = 10, d;
2259 if (*p == '0')
2261 if (p[1] == 'x' || p[1] == 'X')
2262 p += 2, r = 16;
2263 else if (p[1] == 'b' || p[1] == 'B')
2264 p += 2, r = 2;
2265 else if (isdigit(p[1]))
2266 p += 1, r = 8;
2269 n = 0;
2270 while (1)
2272 if (*p >= '0' && *p <= '9')
2273 d = *p - '0';
2274 else if (*p >= 'a' && *p <= 'z')
2275 d = *p - 'a' + 10;
2276 else if (*p >= 'A' && *p <= 'Z')
2277 d = *p - 'A' + 10;
2278 else
2279 break;
2280 if (d >= r)
2281 break;
2282 if (n && n * r + d <= n)
2284 error ("integer overflow");
2285 break;
2288 n = n * r + d;
2289 p++;
2291 #endif
2292 if (sizeof(n) > 8 &&
2293 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits
2294 error("integer overflow");
2297 // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2298 while (1)
2299 { unsigned char f;
2301 switch (*p)
2302 { case 'U':
2303 case 'u':
2304 f = FLAGS_unsigned;
2305 goto L1;
2307 case 'l':
2308 if (1 || !global.params.useDeprecated)
2309 error("'l' suffix is deprecated, use 'L' instead");
2310 case 'L':
2311 f = FLAGS_long;
2313 p++;
2314 if (flags & f)
2315 error("unrecognized token");
2316 flags = (FLAGS) (flags | f);
2317 continue;
2318 default:
2319 break;
2321 break;
2324 switch (flags)
2326 case 0:
2327 /* Octal or Hexadecimal constant.
2328 * First that fits: int, uint, long, ulong
2330 if (n & 0x8000000000000000LL)
2331 result = TOKuns64v;
2332 else if (n & 0xFFFFFFFF00000000LL)
2333 result = TOKint64v;
2334 else if (n & 0x80000000)
2335 result = TOKuns32v;
2336 else
2337 result = TOKint32v;
2338 break;
2340 case FLAGS_decimal:
2341 /* First that fits: int, long, long long
2343 if (n & 0x8000000000000000LL)
2344 { error("signed integer overflow");
2345 result = TOKuns64v;
2347 else if (n & 0xFFFFFFFF80000000LL)
2348 result = TOKint64v;
2349 else
2350 result = TOKint32v;
2351 break;
2353 case FLAGS_unsigned:
2354 case FLAGS_decimal | FLAGS_unsigned:
2355 /* First that fits: uint, ulong
2357 if (n & 0xFFFFFFFF00000000LL)
2358 result = TOKuns64v;
2359 else
2360 result = TOKuns32v;
2361 break;
2363 case FLAGS_decimal | FLAGS_long:
2364 if (n & 0x8000000000000000LL)
2365 { error("signed integer overflow");
2366 result = TOKuns64v;
2368 else
2369 result = TOKint64v;
2370 break;
2372 case FLAGS_long:
2373 if (n & 0x8000000000000000LL)
2374 result = TOKuns64v;
2375 else
2376 result = TOKint64v;
2377 break;
2379 case FLAGS_unsigned | FLAGS_long:
2380 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
2381 result = TOKuns64v;
2382 break;
2384 default:
2385 #ifdef DEBUG
2386 printf("%x\n",flags);
2387 #endif
2388 assert(0);
2390 t->uns64value = n;
2391 return result;
2394 /**************************************
2395 * Read in characters, converting them to real.
2396 * Bugs:
2397 * Exponent overflow not detected.
2398 * Too much requested precision is not detected.
2401 TOK Lexer::inreal(Token *t)
2402 #ifdef __DMC__
2403 __in
2405 assert(*p == '.' || isdigit(*p));
2407 __out (result)
2409 switch (result)
2411 case TOKfloat32v:
2412 case TOKfloat64v:
2413 case TOKfloat80v:
2414 case TOKimaginary32v:
2415 case TOKimaginary64v:
2416 case TOKimaginary80v:
2417 break;
2419 default:
2420 assert(0);
2423 __body
2424 #endif /* __DMC__ */
2425 { int dblstate;
2426 unsigned c;
2427 char hex; // is this a hexadecimal-floating-constant?
2428 TOK result;
2430 //printf("Lexer::inreal()\n");
2431 stringbuffer.reset();
2432 dblstate = 0;
2433 hex = 0;
2434 Lnext:
2435 while (1)
2437 // Get next char from input
2438 c = *p++;
2439 //printf("dblstate = %d, c = '%c'\n", dblstate, c);
2440 while (1)
2442 switch (dblstate)
2444 case 0: // opening state
2445 if (c == '0')
2446 dblstate = 9;
2447 else if (c == '.')
2448 dblstate = 3;
2449 else
2450 dblstate = 1;
2451 break;
2453 case 9:
2454 dblstate = 1;
2455 if (c == 'X' || c == 'x')
2456 { hex++;
2457 break;
2459 case 1: // digits to left of .
2460 case 3: // digits to right of .
2461 case 7: // continuing exponent digits
2462 if (!isdigit(c) && !(hex && isxdigit(c)))
2464 if (c == '_')
2465 goto Lnext; // ignore embedded '_'
2466 dblstate++;
2467 continue;
2469 break;
2471 case 2: // no more digits to left of .
2472 if (c == '.')
2473 { dblstate++;
2474 break;
2476 case 4: // no more digits to right of .
2477 if ((c == 'E' || c == 'e') ||
2478 hex && (c == 'P' || c == 'p'))
2479 { dblstate = 5;
2480 hex = 0; // exponent is always decimal
2481 break;
2483 if (hex)
2484 error("binary-exponent-part required");
2485 goto done;
2487 case 5: // looking immediately to right of E
2488 dblstate++;
2489 if (c == '-' || c == '+')
2490 break;
2491 case 6: // 1st exponent digit expected
2492 if (!isdigit(c))
2493 error("exponent expected");
2494 dblstate++;
2495 break;
2497 case 8: // past end of exponent digits
2498 goto done;
2500 break;
2502 stringbuffer.writeByte(c);
2504 done:
2505 p--;
2507 stringbuffer.writeByte(0);
2509 #if _WIN32 && __DMC__
2510 char *save = __locale_decpoint;
2511 __locale_decpoint = ".";
2512 #endif
2513 #ifdef IN_GCC
2514 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble);
2515 #else
2516 t->float80value = strtold((char *)stringbuffer.data, NULL);
2517 #endif
2518 errno = 0;
2519 switch (*p)
2521 case 'F':
2522 case 'f':
2523 #ifdef IN_GCC
2524 real_t::parse((char *)stringbuffer.data, real_t::Float);
2525 #else
2526 strtof((char *)stringbuffer.data, NULL);
2527 #endif
2528 result = TOKfloat32v;
2529 p++;
2530 break;
2532 default:
2533 #ifdef IN_GCC
2534 real_t::parse((char *)stringbuffer.data, real_t::Double);
2535 #else
2536 strtod((char *)stringbuffer.data, NULL);
2537 #endif
2538 result = TOKfloat64v;
2539 break;
2541 case 'l':
2542 if (!global.params.useDeprecated)
2543 error("'l' suffix is deprecated, use 'L' instead");
2544 case 'L':
2545 result = TOKfloat80v;
2546 p++;
2547 break;
2549 if (*p == 'i' || *p == 'I')
2551 if (!global.params.useDeprecated && *p == 'I')
2552 error("'I' suffix is deprecated, use 'i' instead");
2553 p++;
2554 switch (result)
2556 case TOKfloat32v:
2557 result = TOKimaginary32v;
2558 break;
2559 case TOKfloat64v:
2560 result = TOKimaginary64v;
2561 break;
2562 case TOKfloat80v:
2563 result = TOKimaginary80v;
2564 break;
2567 #if _WIN32 && __DMC__
2568 __locale_decpoint = save;
2569 #endif
2570 if (errno == ERANGE)
2571 error("number is not representable");
2572 return result;
2575 /*********************************************
2576 * Do pragma.
2577 * Currently, the only pragma supported is:
2578 * #line linnum [filespec]
2581 void Lexer::pragma()
2583 Token tok;
2584 int linnum;
2585 char *filespec = NULL;
2586 Loc loc = this->loc;
2588 while (isblank(*p)) p++;
2589 if (*p == '\n')
2590 goto Lerr;
2592 scan(&tok);
2593 if (tok.value != TOKidentifier || tok.ident != Id::line)
2594 goto Lerr;
2596 scan(&tok);
2597 if (tok.value == TOKint32v || tok.value == TOKint64v)
2598 linnum = tok.uns64value - 1;
2599 else
2600 goto Lerr;
2602 while (1)
2604 switch (*p)
2606 case 0:
2607 case 0x1A:
2608 case '\n':
2609 Lnewline:
2610 this->loc.linnum = linnum;
2611 if (filespec)
2612 this->loc.filename = filespec;
2613 return;
2615 case '\r':
2616 p++;
2617 if (*p != '\n')
2618 { p--;
2619 goto Lnewline;
2621 continue;
2623 case ' ':
2624 case '\t':
2625 case '\v':
2626 case '\f':
2627 p++;
2628 continue; // skip white space
2630 case '_':
2631 if (mod && memcmp(p, "__FILE__", 8) == 0)
2633 p += 8;
2634 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars());
2636 continue;
2638 case '"':
2639 if (filespec)
2640 goto Lerr;
2641 stringbuffer.reset();
2642 p++;
2643 while (1)
2644 { unsigned c;
2646 c = *p;
2647 switch (c)
2649 case '\n':
2650 case '\r':
2651 case 0:
2652 case 0x1A:
2653 goto Lerr;
2655 case '"':
2656 stringbuffer.writeByte(0);
2657 filespec = mem.strdup((char *)stringbuffer.data);
2658 p++;
2659 break;
2661 default:
2662 if (c & 0x80)
2663 { unsigned u = decodeUTF();
2664 if (u == PS || u == LS)
2665 goto Lerr;
2667 stringbuffer.writeByte(c);
2668 p++;
2669 continue;
2671 break;
2673 continue;
2675 default:
2676 if (*p & 0x80)
2677 { unsigned u = decodeUTF();
2678 if (u == PS || u == LS)
2679 goto Lnewline;
2681 goto Lerr;
2685 Lerr:
2686 error(loc, "#line integer [\"filespec\"]\\n expected");
2690 /********************************************
2691 * Decode UTF character.
2692 * Issue error messages for invalid sequences.
2693 * Return decoded character, advance p to last character in UTF sequence.
2696 unsigned Lexer::decodeUTF()
2698 dchar_t u;
2699 unsigned char c;
2700 unsigned char *s = p;
2701 size_t len;
2702 size_t idx;
2703 char *msg;
2705 c = *s;
2706 assert(c & 0x80);
2708 // Check length of remaining string up to 6 UTF-8 characters
2709 for (len = 1; len < 6 && s[len]; len++)
2712 idx = 0;
2713 msg = utf_decodeChar(s, len, &idx, &u);
2714 p += idx - 1;
2715 if (msg)
2717 error("%s", msg);
2719 return u;
2723 /***************************************************
2724 * Parse doc comment embedded between t->ptr and p.
2725 * Remove trailing blanks and tabs from lines.
2726 * Replace all newlines with \n.
2727 * Remove leading comment character from each line.
2728 * Decide if it's a lineComment or a blockComment.
2729 * Append to previous one for this token.
2732 void Lexer::getDocComment(Token *t, unsigned lineComment)
2734 OutBuffer buf;
2735 unsigned char ct = t->ptr[2];
2736 unsigned char *q = t->ptr + 3; // start of comment text
2737 int linestart = 0;
2739 unsigned char *qend = p;
2740 if (ct == '*' || ct == '+')
2741 qend -= 2;
2743 /* Scan over initial row of ****'s or ++++'s or ////'s
2745 for (; q < qend; q++)
2747 if (*q != ct)
2748 break;
2751 /* Remove trailing row of ****'s or ++++'s
2753 if (ct != '/')
2755 for (; q < qend; qend--)
2757 if (qend[-1] != ct)
2758 break;
2762 for (; q < qend; q++)
2764 unsigned char c = *q;
2766 switch (c)
2768 case '*':
2769 case '+':
2770 if (linestart && c == ct)
2771 { linestart = 0;
2772 /* Trim preceding whitespace up to preceding \n
2774 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2775 buf.offset--;
2776 continue;
2778 break;
2780 case ' ':
2781 case '\t':
2782 break;
2784 case '\r':
2785 if (q[1] == '\n')
2786 continue; // skip the \r
2787 goto Lnewline;
2789 default:
2790 if (c == 226)
2792 // If LS or PS
2793 if (q[1] == 128 &&
2794 (q[2] == 168 || q[2] == 169))
2796 q += 2;
2797 goto Lnewline;
2800 linestart = 0;
2801 break;
2803 Lnewline:
2804 c = '\n'; // replace all newlines with \n
2805 case '\n':
2806 linestart = 1;
2808 /* Trim trailing whitespace
2810 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2811 buf.offset--;
2813 break;
2815 buf.writeByte(c);
2818 // Always end with a newline
2819 if (!buf.offset || buf.data[buf.offset - 1] != '\n')
2820 buf.writeByte('\n');
2822 buf.writeByte(0);
2824 // It's a line comment if the start of the doc comment comes
2825 // after other non-whitespace on the same line.
2826 unsigned char** dc = (lineComment && anyToken)
2827 ? &t->lineComment
2828 : &t->blockComment;
2830 // Combine with previous doc comment, if any
2831 if (*dc)
2832 *dc = combineComments(*dc, (unsigned char *)buf.data);
2833 else
2834 *dc = (unsigned char *)buf.extractData();
2837 /********************************************
2838 * Combine two document comments into one.
2841 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2)
2843 unsigned char *c = c2;
2845 if (c1)
2846 { c = c1;
2847 if (c2)
2848 { size_t len1 = strlen((char *)c1);
2849 size_t len2 = strlen((char *)c2);
2851 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1);
2852 memcpy(c, c1, len1);
2853 c[len1] = '\n';
2854 memcpy(c + len1 + 1, c2, len2);
2855 c[len1 + 1 + len2] = 0;
2858 return c;
2861 /********************************************
2862 * Create an identifier in the string table.
2865 Identifier *Lexer::idPool(const char *s)
2867 size_t len = strlen(s);
2868 StringValue *sv = stringtable.update(s, len);
2869 Identifier *id = (Identifier *) sv->ptrvalue;
2870 if (!id)
2872 id = new Identifier(sv->lstring.string, TOKidentifier);
2873 sv->ptrvalue = id;
2875 return id;
2878 /*********************************************
2879 * Create a unique identifier using the prefix s.
2882 Identifier *Lexer::uniqueId(const char *s, int num)
2883 { char buffer[32];
2884 size_t slen = strlen(s);
2886 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer));
2887 sprintf(buffer, "%s%d", s, num);
2888 return idPool(buffer);
2891 Identifier *Lexer::uniqueId(const char *s)
2893 static int num;
2894 return uniqueId(s, ++num);
2897 /****************************************
2900 struct Keyword
2901 { char *name;
2902 enum TOK value;
2905 static Keyword keywords[] =
2907 // { "", TOK },
2909 { "this", TOKthis },
2910 { "super", TOKsuper },
2911 { "assert", TOKassert },
2912 { "null", TOKnull },
2913 { "true", TOKtrue },
2914 { "false", TOKfalse },
2915 { "cast", TOKcast },
2916 { "new", TOKnew },
2917 { "delete", TOKdelete },
2918 { "throw", TOKthrow },
2919 { "module", TOKmodule },
2920 { "pragma", TOKpragma },
2921 { "typeof", TOKtypeof },
2922 { "typeid", TOKtypeid },
2924 { "template", TOKtemplate },
2926 { "void", TOKvoid },
2927 { "byte", TOKint8 },
2928 { "ubyte", TOKuns8 },
2929 { "short", TOKint16 },
2930 { "ushort", TOKuns16 },
2931 { "int", TOKint32 },
2932 { "uint", TOKuns32 },
2933 { "long", TOKint64 },
2934 { "ulong", TOKuns64 },
2935 { "cent", TOKcent, },
2936 { "ucent", TOKucent, },
2937 { "float", TOKfloat32 },
2938 { "double", TOKfloat64 },
2939 { "real", TOKfloat80 },
2941 { "bool", TOKbool },
2942 { "char", TOKchar },
2943 { "wchar", TOKwchar },
2944 { "dchar", TOKdchar },
2946 { "ifloat", TOKimaginary32 },
2947 { "idouble", TOKimaginary64 },
2948 { "ireal", TOKimaginary80 },
2950 { "cfloat", TOKcomplex32 },
2951 { "cdouble", TOKcomplex64 },
2952 { "creal", TOKcomplex80 },
2954 { "delegate", TOKdelegate },
2955 { "function", TOKfunction },
2957 { "is", TOKis },
2958 { "if", TOKif },
2959 { "else", TOKelse },
2960 { "while", TOKwhile },
2961 { "for", TOKfor },
2962 { "do", TOKdo },
2963 { "switch", TOKswitch },
2964 { "case", TOKcase },
2965 { "default", TOKdefault },
2966 { "break", TOKbreak },
2967 { "continue", TOKcontinue },
2968 { "synchronized", TOKsynchronized },
2969 { "return", TOKreturn },
2970 { "goto", TOKgoto },
2971 { "try", TOKtry },
2972 { "catch", TOKcatch },
2973 { "finally", TOKfinally },
2974 { "with", TOKwith },
2975 { "asm", TOKasm },
2976 { "foreach", TOKforeach },
2977 { "foreach_reverse", TOKforeach_reverse },
2978 { "reversed", TOKreversed },
2979 { "scope", TOKscope },
2981 { "struct", TOKstruct },
2982 { "class", TOKclass },
2983 { "interface", TOKinterface },
2984 { "union", TOKunion },
2985 { "enum", TOKenum },
2986 { "import", TOKimport },
2987 { "mixin", TOKmixin },
2988 { "static", TOKstatic },
2989 { "final", TOKfinal },
2990 { "const", TOKconst },
2991 { "typedef", TOKtypedef },
2992 { "alias", TOKalias },
2993 { "override", TOKoverride },
2994 { "abstract", TOKabstract },
2995 { "volatile", TOKvolatile },
2996 { "debug", TOKdebug },
2997 { "deprecated", TOKdeprecated },
2998 { "in", TOKin },
2999 { "out", TOKout },
3000 { "inout", TOKinout },
3001 { "lazy", TOKlazy },
3002 { "auto", TOKauto },
3004 { "align", TOKalign },
3005 { "extern", TOKextern },
3006 { "private", TOKprivate },
3007 { "package", TOKpackage },
3008 { "protected", TOKprotected },
3009 { "public", TOKpublic },
3010 { "export", TOKexport },
3012 { "body", TOKbody },
3013 { "invariant", TOKinvariant },
3014 { "unittest", TOKunittest },
3015 { "version", TOKversion },
3016 //{ "manifest", TOKmanifest },
3018 // Added after 1.0
3019 { "ref", TOKref },
3020 { "macro", TOKmacro },
3023 // TAL
3024 { "and", TOKandand },
3025 { "or", TOKoror },
3026 { "not", TOKnot },
3027 { "extends", TOKextends },
3028 { "implements", TOKimplements },
3029 { "log_error", TOKlog_error },
3030 { "log_warning", TOKlog_warning },
3031 { "log_info", TOKlog_info },
3032 { "log_trace", TOKlog_trace },
3033 #if V2
3034 { "pure", TOKpure },
3035 { "nothrow", TOKnothrow },
3036 { "__thread", TOKtls },
3037 { "__traits", TOKtraits },
3038 { "__overloadset", TOKoverloadset },
3039 { "__FILE__", TOKfile },
3040 { "__LINE__", TOKline },
3041 #endif
3044 int Token::isKeyword()
3046 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++)
3048 if (keywords[u].value == value)
3049 return 1;
3051 return 0;
3054 void Lexer::initKeywords()
3055 { StringValue *sv;
3056 unsigned u;
3057 enum TOK v;
3058 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]);
3060 if (global.params.Dversion == 1)
3061 nkeywords -= 2;
3063 cmtable_init();
3065 for (u = 0; u < nkeywords; u++)
3066 { char *s;
3068 //printf("keyword[%d] = '%s'\n",u, keywords[u].name);
3069 s = keywords[u].name;
3070 v = keywords[u].value;
3071 sv = stringtable.insert(s, strlen(s));
3072 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v);
3074 //printf("tochars[%d] = '%s'\n",v, s);
3075 Token::tochars[v] = s;
3078 Token::tochars[TOKeof] = "EOF";
3079 Token::tochars[TOKlcurly] = "{";
3080 Token::tochars[TOKrcurly] = "}";
3081 Token::tochars[TOKlparen] = "(";
3082 Token::tochars[TOKrparen] = ")";
3083 Token::tochars[TOKlbracket] = "[";
3084 Token::tochars[TOKrbracket] = "]";
3085 Token::tochars[TOKsemicolon] = ";";
3086 Token::tochars[TOKcolon] = ":";
3087 Token::tochars[TOKcomma] = ",";
3088 Token::tochars[TOKdot] = ".";
3089 Token::tochars[TOKxor] = "^";
3090 Token::tochars[TOKxorass] = "^=";
3091 Token::tochars[TOKassign] = "=";
3092 Token::tochars[TOKconstruct] = "=";
3093 #if V2
3094 Token::tochars[TOKblit] = "=";
3095 #endif
3096 Token::tochars[TOKlt] = "<";
3097 Token::tochars[TOKgt] = ">";
3098 Token::tochars[TOKle] = "<=";
3099 Token::tochars[TOKge] = ">=";
3100 Token::tochars[TOKequal] = "==";
3101 Token::tochars[TOKnotequal] = "!=";
3102 Token::tochars[TOKnotidentity] = "!is";
3103 Token::tochars[TOKtobool] = "!!";
3104 Token::tochars[TOKat] = "@";
3106 Token::tochars[TOKunord] = "!<>=";
3107 Token::tochars[TOKue] = "!<>";
3108 Token::tochars[TOKlg] = "<>";
3109 Token::tochars[TOKleg] = "<>=";
3110 Token::tochars[TOKule] = "!>";
3111 Token::tochars[TOKul] = "!>=";
3112 Token::tochars[TOKuge] = "!<";
3113 Token::tochars[TOKug] = "!<=";
3115 Token::tochars[TOKnot] = "!";
3116 Token::tochars[TOKtobool] = "!!";
3117 Token::tochars[TOKshl] = "<<";
3118 Token::tochars[TOKshr] = ">>";
3119 Token::tochars[TOKushr] = ">>>";
3120 Token::tochars[TOKadd] = "+";
3121 Token::tochars[TOKmin] = "-";
3122 Token::tochars[TOKmul] = "*";
3123 Token::tochars[TOKdiv] = "/";
3124 Token::tochars[TOKmod] = "%";
3125 Token::tochars[TOKslice] = "..";
3126 Token::tochars[TOKdotdotdot] = "...";
3127 Token::tochars[TOKand] = "&";
3128 Token::tochars[TOKandand] = "&&";
3129 Token::tochars[TOKor] = "|";
3130 Token::tochars[TOKoror] = "||";
3131 Token::tochars[TOKarray] = "[]";
3132 Token::tochars[TOKindex] = "[i]";
3133 Token::tochars[TOKaddress] = "&";
3134 Token::tochars[TOKstar] = "*";
3135 Token::tochars[TOKtilde] = "~";
3136 Token::tochars[TOKdollar] = "$";
3137 Token::tochars[TOKcast] = "cast";
3138 Token::tochars[TOKplusplus] = "++";
3139 Token::tochars[TOKminusminus] = "--";
3140 Token::tochars[TOKtype] = "type";
3141 Token::tochars[TOKquestion] = "?";
3142 Token::tochars[TOKneg] = "-";
3143 Token::tochars[TOKuadd] = "+";
3144 Token::tochars[TOKvar] = "var";
3145 Token::tochars[TOKaddass] = "+=";
3146 Token::tochars[TOKminass] = "-=";
3147 Token::tochars[TOKmulass] = "*=";
3148 Token::tochars[TOKdivass] = "/=";
3149 Token::tochars[TOKmodass] = "%=";
3150 Token::tochars[TOKshlass] = "<<=";
3151 Token::tochars[TOKshrass] = ">>=";
3152 Token::tochars[TOKushrass] = ">>>=";
3153 Token::tochars[TOKandass] = "&=";
3154 Token::tochars[TOKorass] = "|=";
3155 Token::tochars[TOKcatass] = "~=";
3156 Token::tochars[TOKcat] = "~";
3157 Token::tochars[TOKcall] = "call";
3158 Token::tochars[TOKidentity] = "is";
3159 Token::tochars[TOKnotidentity] = "!is";
3160 Token::tochars[TOKendline] = "\\n";
3162 Token::tochars[TOKorass] = "|=";
3163 Token::tochars[TOKidentifier] = "identifier";
3165 // For debugging
3166 Token::tochars[TOKdotexp] = "dotexp";
3167 Token::tochars[TOKdotti] = "dotti";
3168 Token::tochars[TOKdotvar] = "dotvar";
3169 Token::tochars[TOKdottype] = "dottype";
3170 Token::tochars[TOKsymoff] = "symoff";
3171 Token::tochars[TOKtypedot] = "typedot";
3172 Token::tochars[TOKarraylength] = "arraylength";
3173 Token::tochars[TOKarrayliteral] = "arrayliteral";
3174 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral";
3175 Token::tochars[TOKstructliteral] = "structliteral";
3176 Token::tochars[TOKstring] = "string";
3177 Token::tochars[TOKdsymbol] = "symbol";
3178 Token::tochars[TOKtuple] = "tuple";
3179 Token::tochars[TOKdeclaration] = "declaration";
3180 Token::tochars[TOKdottd] = "dottd";
3181 Token::tochars[TOKlogger] = "logger";
3182 Token::tochars[TOKon_scope_exit] = "scope(exit)";