Allow log_ statements outside of a class
[delight/core.git] / dmd / lexer.c
blobe8963ab1f2bef598113018a8c6d8eccb3c63ddef
2 // Compiler implementation of the D programming language
3 // Copyright (c) 1999-2008 by Digital Mars
4 // All Rights Reserved
5 // written by Walter Bright
6 // http://www.digitalmars.com
7 // License for redistribution is by either the Artistic License
8 // in artistic.txt, or the GNU General Public License in gnu.txt.
9 // See the included readme.txt for details.
11 /* NOTE: This file has been patched from the original DMD distribution to
12 work with the GDC compiler.
14 Modified by David Friedman, December 2006
17 /* Lexical Analyzer */
19 #include <stdio.h>
20 #include <string.h>
21 #include <ctype.h>
22 #include <stdarg.h>
23 #include <errno.h>
24 //#include <wchar.h>
25 #include <stdlib.h>
26 #include <assert.h>
27 #include <sys/time.h>
29 #ifdef IN_GCC
31 #include <time.h>
32 #include "mem.h"
34 #else
36 #if __GNUC__
37 #include <time.h>
38 #endif
40 #if _WIN32
41 #include "..\root\mem.h"
42 #else
43 #include "../root/mem.h"
44 #endif
45 #endif
47 #include "stringtable.h"
49 #include "lexer.h"
50 #include "utf.h"
51 #include "identifier.h"
52 #include "id.h"
53 #include "module.h"
55 #if _WIN32 && __DMC__
56 // from \dm\src\include\setlocal.h
57 extern "C" char * __cdecl __locale_decpoint;
58 #endif
60 extern int HtmlNamedEntity(unsigned char *p, int length);
62 #define LS 0x2028 // UTF line separator
63 #define PS 0x2029 // UTF paragraph separator
65 /********************************************
66 * Do our own char maps
69 static unsigned char cmtable[256];
71 const int CMoctal = 0x1;
72 const int CMhex = 0x2;
73 const int CMidchar = 0x4;
75 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; }
76 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; }
77 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; }
79 static void cmtable_init()
81 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++)
83 if ('0' <= c && c <= '7')
84 cmtable[c] |= CMoctal;
85 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
86 cmtable[c] |= CMhex;
87 if (isalnum(c) || c == '_')
88 cmtable[c] |= CMidchar;
93 /************************* Token **********************************************/
95 char *Token::tochars[TOKMAX];
97 void *Token::operator new(size_t size)
98 { Token *t;
100 if (Lexer::freelist)
102 t = Lexer::freelist;
103 Lexer::freelist = t->next;
104 return t;
107 return ::operator new(size);
110 #ifdef DEBUG
111 void Token::print()
113 fprintf(stdmsg, "%s\n", toChars());
115 #endif
117 char *Token::toChars()
118 { char *p;
119 static char buffer[3 + 3 * sizeof(value) + 1];
121 p = buffer;
122 switch (value)
124 case TOKint32v:
125 #if IN_GCC
126 sprintf(buffer,"%d",(d_int32)int64value);
127 #else
128 sprintf(buffer,"%d",int32value);
129 #endif
130 break;
132 case TOKuns32v:
133 case TOKcharv:
134 case TOKwcharv:
135 case TOKdcharv:
136 #if IN_GCC
137 sprintf(buffer,"%uU",(d_uns32)uns64value);
138 #else
139 sprintf(buffer,"%uU",uns32value);
140 #endif
141 break;
143 case TOKint64v:
144 sprintf(buffer,"%"PRIdMAX"L",int64value);
145 break;
147 case TOKuns64v:
148 sprintf(buffer,"%"PRIuMAX"UL",uns64value);
149 break;
151 #if IN_GCC
152 case TOKfloat32v:
153 case TOKfloat64v:
154 case TOKfloat80v:
155 float80value.format(buffer, sizeof(buffer));
156 break;
157 case TOKimaginary32v:
158 case TOKimaginary64v:
159 case TOKimaginary80v:
160 float80value.format(buffer, sizeof(buffer));
161 // %% buffer
162 strcat(buffer, "i");
163 break;
164 #else
165 case TOKfloat32v:
166 sprintf(buffer,"%Lgf", float80value);
167 break;
169 case TOKfloat64v:
170 sprintf(buffer,"%Lg", float80value);
171 break;
173 case TOKfloat80v:
174 sprintf(buffer,"%LgL", float80value);
175 break;
177 case TOKimaginary32v:
178 sprintf(buffer,"%Lgfi", float80value);
179 break;
181 case TOKimaginary64v:
182 sprintf(buffer,"%Lgi", float80value);
183 break;
185 case TOKimaginary80v:
186 sprintf(buffer,"%LgLi", float80value);
187 break;
188 #endif
191 case TOKstring:
192 #if CSTRINGS
193 p = string;
194 #else
195 { OutBuffer buf;
197 buf.writeByte('"');
198 for (size_t i = 0; i < len; )
199 { unsigned c;
201 utf_decodeChar((unsigned char *)ustring, len, &i, &c);
202 switch (c)
204 case 0:
205 break;
207 case '"':
208 case '\\':
209 buf.writeByte('\\');
210 default:
211 if (isprint(c))
212 buf.writeByte(c);
213 else if (c <= 0x7F)
214 buf.printf("\\x%02x", c);
215 else if (c <= 0xFFFF)
216 buf.printf("\\u%04x", c);
217 else
218 buf.printf("\\U%08x", c);
219 continue;
221 break;
223 buf.writeByte('"');
224 if (postfix)
225 buf.writeByte('"');
226 buf.writeByte(0);
227 p = (char *)buf.extractData();
229 #endif
230 break;
232 case TOKidentifier:
233 case TOKenum:
234 case TOKstruct:
235 case TOKimport:
236 CASE_BASIC_TYPES:
237 p = ident->toChars();
238 break;
240 default:
241 p = toChars(value);
242 break;
244 return p;
247 char *Token::toChars(enum TOK value)
248 { char *p;
249 static char buffer[3 + 3 * sizeof(value) + 1];
251 p = tochars[value];
252 if (!p)
253 { sprintf(buffer,"TOK%d",value);
254 p = buffer;
256 return p;
259 /*************************** Lexer ********************************************/
261 Token *Lexer::freelist = NULL;
262 StringTable Lexer::stringtable;
263 OutBuffer Lexer::stringbuffer;
265 Lexer::Lexer(Module *mod,
266 unsigned char *base, unsigned begoffset, unsigned endoffset,
267 int doDocComment, int commentToken, bool dltSyntax)
268 : loc(mod, 1), dltSyntax(dltSyntax)
270 //printf("Lexer::Lexer(%p,%d)\n",base,length);
271 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod);
272 memset(&token,0,sizeof(token));
273 this->base = base;
274 this->end = base + endoffset;
275 p = base + begoffset;
276 this->mod = mod;
277 this->doDocComment = doDocComment;
278 this->anyToken = 0;
279 this->commentToken = commentToken;
280 this->nesting = 0;
281 this->indent = 0;
282 this->atStartOfLine = 1;
283 //initKeywords();
285 /* If first line starts with '#!', ignore the line
288 if (p[0] == '#' && p[1] =='!')
290 p += 2;
291 while (1)
292 { unsigned char c = *p;
293 switch (c)
295 case '\n':
296 p++;
297 break;
299 case '\r':
300 p++;
301 if (*p == '\n')
302 p++;
303 break;
305 case 0:
306 case 0x1A:
307 break;
309 default:
310 if (c & 0x80)
311 { unsigned u = decodeUTF();
312 if (u == PS || u == LS)
313 break;
315 p++;
316 continue;
318 break;
320 loc.linnum = 2;
325 void Lexer::error(const char *format, ...)
327 if (mod && !global.gag)
329 char *p = loc.toChars();
330 if (*p)
331 fprintf(stdmsg, "%s: ", p);
332 mem.free(p);
334 va_list ap;
335 va_start(ap, format);
336 vfprintf(stdmsg, format, ap);
337 va_end(ap);
339 fprintf(stdmsg, "\n");
340 fflush(stdmsg);
342 if (global.errors >= 20) // moderate blizzard of cascading messages
343 fatal();
345 global.errors++;
348 void Lexer::error(Loc loc, const char *format, ...)
350 if (mod && !global.gag)
352 char *p = loc.toChars();
353 if (*p)
354 fprintf(stdmsg, "%s: ", p);
355 mem.free(p);
357 va_list ap;
358 va_start(ap, format);
359 vfprintf(stdmsg, format, ap);
360 va_end(ap);
362 fprintf(stdmsg, "\n");
363 fflush(stdmsg);
365 if (global.errors >= 20) // moderate blizzard of cascading messages
366 fatal();
368 global.errors++;
371 TOK Lexer::nextToken()
372 { Token *t;
374 if (token.next)
376 t = token.next;
377 memcpy(&token,t,sizeof(Token));
378 t->next = freelist;
379 freelist = t;
381 else
383 scan(&token);
385 //token.print();
386 return token.value;
389 Token *Lexer::peek(Token *ct)
390 { Token *t;
392 if (ct->next)
393 t = ct->next;
394 else
396 t = new Token();
397 scan(t);
398 t->next = NULL;
399 ct->next = t;
401 return t;
404 /*********************************
405 * tk is on the opening (.
406 * Look ahead and return token that is past the closing ).
409 Token *Lexer::peekPastParen(Token *tk)
411 //printf("peekPastParen()\n");
412 int parens = 1;
413 int curlynest = 0;
414 while (1)
416 tk = peek(tk);
417 //tk->print();
418 switch (tk->value)
420 case TOKlparen:
421 parens++;
422 continue;
424 case TOKrparen:
425 --parens;
426 if (parens)
427 continue;
428 tk = peek(tk);
429 break;
431 case TOKlcurly:
432 curlynest++;
433 continue;
435 case TOKrcurly:
436 if (--curlynest >= 0)
437 continue;
438 break;
440 case TOKsemicolon:
441 if (curlynest)
442 continue;
443 break;
445 case TOKeof:
446 break;
448 default:
449 continue;
451 return tk;
455 /**********************************
456 * Determine if string is a valid Identifier.
457 * Placed here because of commonality with Lexer functionality.
458 * Returns:
459 * 0 invalid
462 int Lexer::isValidIdentifier(char *p)
464 size_t len;
465 size_t idx;
467 if (!p || !*p)
468 goto Linvalid;
470 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars
471 goto Linvalid;
473 len = strlen(p);
474 idx = 0;
475 while (p[idx])
476 { dchar_t dc;
478 char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc);
479 if (q)
480 goto Linvalid;
482 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
483 goto Linvalid;
485 return 1;
487 Linvalid:
488 return 0;
491 /****************************
492 * Turn next token in buffer into a token.
495 void Lexer::scan(Token *t)
497 unsigned lastLine = loc.linnum;
498 unsigned linnum;
500 t->blockComment = NULL;
501 t->lineComment = NULL;
502 while (1)
504 t->ptr = p;
506 if (dltSyntax && atStartOfLine) {
507 // Check indent
508 int i;
509 for (i = 0; p[i] == '\t'; i++) {
511 if (p[i] == ' ') {
512 error("Whitespace error: use tabs to indent!");
514 if (p[i] == '#') {
515 p += i;
516 atStartOfLine = 0;
517 } else if (p[i] != '\n') {
518 if (p[i] == '\0')
519 i = 0; // End-of-file always has no indent
520 if (i > indent) {
521 error("Unexpected indentation");
522 } else if (i < indent) {
523 indent -= 1;
524 t->value = TOKrcurly;
525 return;
527 atStartOfLine = 0;
528 } /* else ignore blank line */
531 //printf("p = %p, *p = '%c'\n",p,*p);
532 switch (*p)
534 case 0:
535 case 0x1A:
536 t->value = TOKeof; // end of file
537 return;
539 case ' ':
540 case '\t':
541 case '\v':
542 case '\f':
543 p++;
544 continue; // skip white space
546 case '\r':
547 if (*p == '\n') { // if CRLF
548 p++;
549 continue;
551 // fall-through
552 case '\n':
553 p++;
554 loc.linnum++;
555 if (dltSyntax && !nesting) {
556 atStartOfLine = 1;
557 t->value = TOKendline;
558 return;
560 continue; // Ignore newlines inside brackets
561 case '0': case '1': case '2': case '3': case '4':
562 case '5': case '6': case '7': case '8': case '9':
563 t->value = number(t);
564 return;
566 #if CSTRINGS
567 case '\'':
568 t->value = charConstant(t, 0);
569 return;
571 case '"':
572 t->value = stringConstant(t,0);
573 return;
575 case 'l':
576 case 'L':
577 if (p[1] == '\'')
579 p++;
580 t->value = charConstant(t, 1);
581 return;
583 else if (p[1] == '"')
585 p++;
586 t->value = stringConstant(t, 1);
587 return;
589 #else
590 case '\'':
591 t->value = charConstant(t,0);
592 return;
594 case 'r':
595 if (p[1] != '"')
596 goto case_ident;
597 p++;
598 case '`':
599 t->value = wysiwygStringConstant(t, *p);
600 return;
602 case 'x':
603 if (p[1] != '"')
604 goto case_ident;
605 p++;
606 t->value = hexStringConstant(t);
607 return;
609 #if V2
610 case 'q':
611 if (p[1] == '"')
613 p++;
614 t->value = delimitedStringConstant(t);
615 return;
617 else if (p[1] == '{')
619 p++;
620 t->value = tokenStringConstant(t);
621 return;
623 else
624 goto case_ident;
625 #endif
627 case '"':
628 t->value = escapeStringConstant(t,0);
629 return;
631 case '\\': // escaped string literal
632 { unsigned c;
634 stringbuffer.reset();
637 p++;
638 switch (*p)
640 case 'u':
641 case 'U':
642 case '&':
643 c = escapeSequence();
644 stringbuffer.writeUTF8(c);
645 break;
647 default:
648 c = escapeSequence();
649 stringbuffer.writeByte(c);
650 break;
652 } while (*p == '\\');
653 t->len = stringbuffer.offset;
654 stringbuffer.writeByte(0);
655 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
656 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
657 t->postfix = 0;
658 t->value = TOKstring;
659 return;
662 case 'l':
663 case 'L':
664 #endif
665 case 'a': case 'b': case 'c': case 'd': case 'e':
666 case 'f': case 'g': case 'h': case 'i': case 'j':
667 case 'k': case 'm': case 'n': case 'o':
668 #if V2
669 case 'p': /*case 'q': case 'r':*/ case 's': case 't':
670 #else
671 case 'p': case 'q': /*case 'r':*/ case 's': case 't':
672 #endif
673 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
674 case 'z':
675 case 'A': case 'B': case 'C': case 'D': case 'E':
676 case 'F': case 'G': case 'H': case 'I': case 'J':
677 case 'K': case 'M': case 'N': case 'O':
678 case 'P': case 'Q': case 'R': case 'S': case 'T':
679 case 'U': case 'V': case 'W': case 'X': case 'Y':
680 case 'Z':
681 case '_':
682 case_ident:
683 { unsigned char c;
684 StringValue *sv;
685 Identifier *id;
689 c = *++p;
690 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
691 sv = stringtable.update((char *)t->ptr, p - t->ptr);
692 id = (Identifier *) sv->ptrvalue;
693 if (!id)
694 { id = new Identifier(sv->lstring.string,TOKidentifier);
695 sv->ptrvalue = id;
697 t->ident = id;
698 t->value = (enum TOK) id->value;
699 anyToken = 1;
700 if (*t->ptr == '_') // if special identifier token
702 static char date[11+1];
703 static char time[8+1];
704 static char timestamp[24+1];
706 if (!date[0]) // lazy evaluation
707 { time_t t;
708 char *p;
710 ::time(&t);
711 p = ctime(&t);
712 assert(p);
713 sprintf(date, "%.6s %.4s", p + 4, p + 20);
714 sprintf(time, "%.8s", p + 11);
715 sprintf(timestamp, "%.24s", p);
718 if (mod && id == Id::FILE)
720 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars());
721 goto Lstring;
723 else if (mod && id == Id::LINE)
725 t->value = TOKint64v;
726 t->uns64value = loc.linnum;
728 else if (id == Id::DATE)
730 t->ustring = (unsigned char *)date;
731 goto Lstring;
733 else if (id == Id::TIME)
735 t->ustring = (unsigned char *)time;
736 goto Lstring;
738 else if (id == Id::VENDOR)
740 #ifdef IN_GCC
741 t->ustring = (unsigned char *)"GDC";
742 #else
743 t->ustring = (unsigned char *)"Digital Mars D";
744 #endif
745 goto Lstring;
747 else if (id == Id::TIMESTAMP)
749 t->ustring = (unsigned char *)timestamp;
750 Lstring:
751 t->value = TOKstring;
752 Llen:
753 t->postfix = 0;
754 t->len = strlen((char *)t->ustring);
756 else if (id == Id::VERSIONX)
757 { unsigned major = 0;
758 unsigned minor = 0;
760 for (char *p = global.version + 1; 1; p++)
762 char c = *p;
763 if (isdigit(c))
764 minor = minor * 10 + c - '0';
765 else if (c == '.')
766 { major = minor;
767 minor = 0;
769 else
770 break;
772 t->value = TOKint64v;
773 t->uns64value = major * 1000 + minor;
775 #if V2
776 else if (id == Id::EOFX)
778 t->value = TOKeof;
779 // Advance scanner to end of file
780 while (!(*p == 0 || *p == 0x1A))
781 p++;
783 #endif
785 //printf("t->value = %d\n",t->value);
786 return;
789 case '/':
790 p++;
791 switch (*p)
793 case '=':
794 p++;
795 t->value = TOKdivass;
796 return;
798 case '*':
799 p++;
800 linnum = loc.linnum;
801 while (1)
803 while (1)
804 { unsigned char c = *p;
805 switch (c)
807 case '/':
808 break;
810 case '\n':
811 loc.linnum++;
812 p++;
813 continue;
815 case '\r':
816 p++;
817 if (*p != '\n')
818 loc.linnum++;
819 continue;
821 case 0:
822 case 0x1A:
823 error("unterminated /* */ comment");
824 p = end;
825 t->value = TOKeof;
826 return;
828 default:
829 if (c & 0x80)
830 { unsigned u = decodeUTF();
831 if (u == PS || u == LS)
832 loc.linnum++;
834 p++;
835 continue;
837 break;
839 p++;
840 if (p[-2] == '*' && p - 3 != t->ptr)
841 break;
843 if (commentToken)
845 t->value = TOKcomment;
846 return;
848 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr)
849 { // if /** but not /**/
850 getDocComment(t, lastLine == linnum);
852 continue;
854 case '/': // do // style comments
855 linnum = loc.linnum;
856 while (1)
857 { unsigned char c = *++p;
858 switch (c)
860 case '\n':
861 break;
863 case '\r':
864 if (p[1] == '\n')
865 p++;
866 break;
868 case 0:
869 case 0x1A:
870 if (commentToken)
872 p = end;
873 t->value = TOKcomment;
874 return;
876 if (doDocComment && t->ptr[2] == '/')
877 getDocComment(t, lastLine == linnum);
878 p = end;
879 t->value = TOKeof;
880 return;
882 default:
883 if (c & 0x80)
884 { unsigned u = decodeUTF();
885 if (u == PS || u == LS)
886 break;
888 continue;
890 break;
893 if (commentToken)
895 p++;
896 loc.linnum++;
897 t->value = TOKcomment;
898 return;
900 if (doDocComment && t->ptr[2] == '/')
901 getDocComment(t, lastLine == linnum);
903 p++;
904 loc.linnum++;
905 continue;
907 case '+':
908 { int nest;
910 linnum = loc.linnum;
911 p++;
912 nest = 1;
913 while (1)
914 { unsigned char c = *p;
915 switch (c)
917 case '/':
918 p++;
919 if (*p == '+')
921 p++;
922 nest++;
924 continue;
926 case '+':
927 p++;
928 if (*p == '/')
930 p++;
931 if (--nest == 0)
932 break;
934 continue;
936 case '\r':
937 p++;
938 if (*p != '\n')
939 loc.linnum++;
940 continue;
942 case '\n':
943 loc.linnum++;
944 p++;
945 continue;
947 case 0:
948 case 0x1A:
949 error("unterminated /+ +/ comment");
950 p = end;
951 t->value = TOKeof;
952 return;
954 default:
955 if (c & 0x80)
956 { unsigned u = decodeUTF();
957 if (u == PS || u == LS)
958 loc.linnum++;
960 p++;
961 continue;
963 break;
965 if (commentToken)
967 t->value = TOKcomment;
968 return;
970 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr)
971 { // if /++ but not /++/
972 getDocComment(t, lastLine == linnum);
974 continue;
977 t->value = TOKdiv;
978 return;
980 case '.':
981 p++;
982 if (isdigit(*p))
983 { /* Note that we don't allow ._1 and ._ as being
984 * valid floating point numbers.
986 p--;
987 t->value = inreal(t);
989 else if (p[0] == '.')
991 if (p[1] == '.')
992 { p += 2;
993 t->value = TOKdotdotdot;
995 else
996 { p++;
997 t->value = TOKslice;
1000 else
1001 t->value = TOKdot;
1002 return;
1004 case '&':
1005 p++;
1006 if (*p == '=')
1007 { p++;
1008 t->value = TOKandass;
1010 else if (*p == '&')
1011 { p++;
1012 t->value = TOKandand;
1014 else
1015 t->value = TOKand;
1016 return;
1018 case '|':
1019 p++;
1020 if (*p == '=')
1021 { p++;
1022 t->value = TOKorass;
1024 else if (*p == '|')
1025 { p++;
1026 t->value = TOKoror;
1028 else
1029 t->value = TOKor;
1030 return;
1032 case '-':
1033 p++;
1034 if (*p == '=')
1035 { p++;
1036 t->value = TOKminass;
1038 #if 0
1039 else if (*p == '>')
1040 { p++;
1041 t->value = TOKarrow;
1043 #endif
1044 else if (*p == '-')
1045 { p++;
1046 t->value = TOKminusminus;
1048 else
1049 t->value = TOKmin;
1050 return;
1052 case '+':
1053 p++;
1054 if (*p == '=')
1055 { p++;
1056 t->value = TOKaddass;
1058 else if (*p == '+')
1059 { p++;
1060 t->value = TOKplusplus;
1062 else
1063 t->value = TOKadd;
1064 return;
1066 case '<':
1067 p++;
1068 if (*p == '=')
1069 { p++;
1070 t->value = TOKle; // <=
1072 else if (*p == '<')
1073 { p++;
1074 if (*p == '=')
1075 { p++;
1076 t->value = TOKshlass; // <<=
1078 else
1079 t->value = TOKshl; // <<
1081 else if (*p == '>')
1082 { p++;
1083 if (*p == '=')
1084 { p++;
1085 t->value = TOKleg; // <>=
1087 else
1088 t->value = TOKlg; // <>
1090 else
1091 t->value = TOKlt; // <
1092 return;
1094 case '>':
1095 p++;
1096 if (*p == '=')
1097 { p++;
1098 t->value = TOKge; // >=
1100 else if (*p == '>')
1101 { p++;
1102 if (*p == '=')
1103 { p++;
1104 t->value = TOKshrass; // >>=
1106 else if (*p == '>')
1107 { p++;
1108 if (*p == '=')
1109 { p++;
1110 t->value = TOKushrass; // >>>=
1112 else
1113 t->value = TOKushr; // >>>
1115 else
1116 t->value = TOKshr; // >>
1118 else
1119 t->value = TOKgt; // >
1120 return;
1122 case '!':
1123 p++;
1124 if (*p == '=')
1125 { p++;
1126 if (*p == '=' && global.params.Dversion == 1)
1127 { p++;
1128 t->value = TOKnotidentity; // !==
1130 else
1131 t->value = TOKnotequal; // !=
1133 else if (*p == '<')
1134 { p++;
1135 if (*p == '>')
1136 { p++;
1137 if (*p == '=')
1138 { p++;
1139 t->value = TOKunord; // !<>=
1141 else
1142 t->value = TOKue; // !<>
1144 else if (*p == '=')
1145 { p++;
1146 t->value = TOKug; // !<=
1148 else
1149 t->value = TOKuge; // !<
1151 else if (*p == '>')
1152 { p++;
1153 if (*p == '=')
1154 { p++;
1155 t->value = TOKul; // !>=
1157 else
1158 t->value = TOKule; // !>
1160 else
1161 t->value = TOKnot; // !
1162 return;
1164 case '=':
1165 p++;
1166 if (*p == '=')
1167 { p++;
1168 if (*p == '=' && global.params.Dversion == 1)
1169 { p++;
1170 t->value = TOKidentity; // ===
1172 else
1173 t->value = TOKequal; // ==
1175 else
1176 t->value = TOKassign; // =
1177 return;
1179 case '~':
1180 p++;
1181 if (*p == '=')
1182 { p++;
1183 t->value = TOKcatass; // ~=
1185 else
1186 t->value = TOKtilde; // ~
1187 return;
1189 #define NESTED(cin,tokin,cout,tokout) \
1190 case cin: nesting++; p++; t->value = tokin; return;\
1191 case cout: if (nesting == 0) {error("Unexpected '%c'", cout);} else {nesting--;} p++; t->value = tokout; return;
1193 NESTED('(', TOKlparen, ')', TOKrparen)
1194 NESTED('[', TOKlbracket, ']', TOKrbracket)
1195 NESTED('{', TOKlcurly, '}', TOKrcurly)
1196 #undef NESTED
1198 #define SINGLE(c,tok) case c: p++; t->value = tok; return;
1199 SINGLE('?', TOKquestion)
1200 SINGLE(',', TOKcomma)
1201 SINGLE(';', TOKsemicolon)
1202 SINGLE('$', TOKdollar)
1203 SINGLE('@', TOKat)
1205 #undef SINGLE
1207 case ':':
1208 p++;
1209 if (!nesting)
1210 indent += 1;
1211 t->value = TOKcolon;
1212 return;
1214 #define DOUBLE(c1,tok1,c2,tok2) \
1215 case c1: \
1216 p++; \
1217 if (*p == c2) \
1218 { p++; \
1219 t->value = tok2; \
1221 else \
1222 t->value = tok1; \
1223 return;
1225 DOUBLE('*', TOKmul, '=', TOKmulass)
1226 DOUBLE('%', TOKmod, '=', TOKmodass)
1227 DOUBLE('^', TOKxor, '=', TOKxorass)
1229 #undef DOUBLE
1231 case '#': // do # style comments and pragmas
1232 p++;
1233 pragma();
1234 continue;
1236 default:
1237 { unsigned char c = *p;
1239 if (c & 0x80)
1240 { unsigned u = decodeUTF();
1242 // Check for start of unicode identifier
1243 if (isUniAlpha(u))
1244 goto case_ident;
1246 if (u == PS || u == LS)
1248 loc.linnum++;
1249 p++;
1250 continue;
1253 if (isprint(c))
1254 error("unsupported char '%c'", c);
1255 else
1256 error("unsupported char 0x%02x", c);
1257 p++;
1258 continue;
1264 /*******************************************
1265 * Parse escape sequence.
1268 unsigned Lexer::escapeSequence()
1269 { unsigned c;
1270 int n;
1271 int ndigits;
1273 c = *p;
1274 switch (c)
1276 case '\'':
1277 case '"':
1278 case '?':
1279 case '\\':
1280 Lconsume:
1281 p++;
1282 break;
1284 case 'a': c = 7; goto Lconsume;
1285 case 'b': c = 8; goto Lconsume;
1286 case 'f': c = 12; goto Lconsume;
1287 case 'n': c = 10; goto Lconsume;
1288 case 'r': c = 13; goto Lconsume;
1289 case 't': c = 9; goto Lconsume;
1290 case 'v': c = 11; goto Lconsume;
1292 case 'u':
1293 ndigits = 4;
1294 goto Lhex;
1295 case 'U':
1296 ndigits = 8;
1297 goto Lhex;
1298 case 'x':
1299 ndigits = 2;
1300 Lhex:
1301 p++;
1302 c = *p;
1303 if (ishex(c))
1304 { unsigned v;
1306 n = 0;
1307 v = 0;
1308 while (1)
1310 if (isdigit(c))
1311 c -= '0';
1312 else if (islower(c))
1313 c -= 'a' - 10;
1314 else
1315 c -= 'A' - 10;
1316 v = v * 16 + c;
1317 c = *++p;
1318 if (++n == ndigits)
1319 break;
1320 if (!ishex(c))
1321 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
1322 break;
1325 if (ndigits != 2 && !utf_isValidDchar(v))
1326 error("invalid UTF character \\U%08x", v);
1327 c = v;
1329 else
1330 error("undefined escape hex sequence \\%c\n",c);
1331 break;
1333 case '&': // named character entity
1334 for (unsigned char *idstart = ++p; 1; p++)
1336 switch (*p)
1338 case ';':
1339 c = HtmlNamedEntity(idstart, p - idstart);
1340 if (c == ~0)
1341 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart);
1342 c = ' ';
1344 p++;
1345 break;
1347 default:
1348 if (isalpha(*p) ||
1349 (p != idstart + 1 && isdigit(*p)))
1350 continue;
1351 error("unterminated named entity");
1352 break;
1354 break;
1356 break;
1358 case 0:
1359 case 0x1A: // end of file
1360 c = '\\';
1361 break;
1363 default:
1364 if (isoctal(c))
1365 { unsigned v;
1367 n = 0;
1368 v = 0;
1371 v = v * 8 + (c - '0');
1372 c = *++p;
1373 } while (++n < 3 && isoctal(c));
1374 c = v;
1375 if (c > 0xFF)
1376 error("0%03o is larger than a byte", c);
1378 else
1379 error("undefined escape sequence \\%c\n",c);
1380 break;
1382 return c;
1385 /**************************************
1388 TOK Lexer::wysiwygStringConstant(Token *t, int tc)
1389 { unsigned c;
1390 Loc start = loc;
1392 p++;
1393 stringbuffer.reset();
1394 while (1)
1396 c = *p++;
1397 switch (c)
1399 case '\n':
1400 loc.linnum++;
1401 break;
1403 case '\r':
1404 if (*p == '\n')
1405 continue; // ignore
1406 c = '\n'; // treat EndOfLine as \n character
1407 loc.linnum++;
1408 break;
1410 case 0:
1411 case 0x1A:
1412 error("unterminated string constant starting at %s", start.toChars());
1413 t->ustring = (unsigned char *)"";
1414 t->len = 0;
1415 t->postfix = 0;
1416 return TOKstring;
1418 case '"':
1419 case '`':
1420 if (c == tc)
1422 t->len = stringbuffer.offset;
1423 stringbuffer.writeByte(0);
1424 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1425 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1426 stringPostfix(t);
1427 return TOKstring;
1429 break;
1431 default:
1432 if (c & 0x80)
1433 { p--;
1434 unsigned u = decodeUTF();
1435 p++;
1436 if (u == PS || u == LS)
1437 loc.linnum++;
1438 stringbuffer.writeUTF8(u);
1439 continue;
1441 break;
1443 stringbuffer.writeByte(c);
1447 /**************************************
1448 * Lex hex strings:
1449 * x"0A ae 34FE BD"
1452 TOK Lexer::hexStringConstant(Token *t)
1453 { unsigned c;
1454 Loc start = loc;
1455 unsigned n = 0;
1456 unsigned v;
1458 p++;
1459 stringbuffer.reset();
1460 while (1)
1462 c = *p++;
1463 switch (c)
1465 case ' ':
1466 case '\t':
1467 case '\v':
1468 case '\f':
1469 continue; // skip white space
1471 case '\r':
1472 if (*p == '\n')
1473 continue; // ignore
1474 // Treat isolated '\r' as if it were a '\n'
1475 case '\n':
1476 loc.linnum++;
1477 continue;
1479 case 0:
1480 case 0x1A:
1481 error("unterminated string constant starting at %s", start.toChars());
1482 t->ustring = (unsigned char *)"";
1483 t->len = 0;
1484 t->postfix = 0;
1485 return TOKstring;
1487 case '"':
1488 if (n & 1)
1489 { error("odd number (%d) of hex characters in hex string", n);
1490 stringbuffer.writeByte(v);
1492 t->len = stringbuffer.offset;
1493 stringbuffer.writeByte(0);
1494 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1495 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1496 stringPostfix(t);
1497 return TOKstring;
1499 default:
1500 if (c >= '0' && c <= '9')
1501 c -= '0';
1502 else if (c >= 'a' && c <= 'f')
1503 c -= 'a' - 10;
1504 else if (c >= 'A' && c <= 'F')
1505 c -= 'A' - 10;
1506 else if (c & 0x80)
1507 { p--;
1508 unsigned u = decodeUTF();
1509 p++;
1510 if (u == PS || u == LS)
1511 loc.linnum++;
1512 else
1513 error("non-hex character \\u%x", u);
1515 else
1516 error("non-hex character '%c'", c);
1517 if (n & 1)
1518 { v = (v << 4) | c;
1519 stringbuffer.writeByte(v);
1521 else
1522 v = c;
1523 n++;
1524 break;
1530 #if V2
1531 /**************************************
1532 * Lex delimited strings:
1533 * q"(foo(xxx))" // "foo(xxx)"
1534 * q"[foo(]" // "foo("
1535 * q"/foo]/" // "foo]"
1536 * q"HERE
1537 * foo
1538 * HERE" // "foo\n"
1539 * Input:
1540 * p is on the "
1543 TOK Lexer::delimitedStringConstant(Token *t)
1544 { unsigned c;
1545 Loc start = loc;
1546 unsigned delimleft = 0;
1547 unsigned delimright = 0;
1548 unsigned nest = 1;
1549 unsigned nestcount;
1550 Identifier *hereid = NULL;
1551 unsigned blankrol = 0;
1552 unsigned startline = 0;
1554 p++;
1555 stringbuffer.reset();
1556 while (1)
1558 c = *p++;
1559 //printf("c = '%c'\n", c);
1560 switch (c)
1562 case '\n':
1563 Lnextline:
1564 loc.linnum++;
1565 startline = 1;
1566 if (blankrol)
1567 { blankrol = 0;
1568 continue;
1570 if (hereid)
1572 stringbuffer.writeUTF8(c);
1573 continue;
1575 break;
1577 case '\r':
1578 if (*p == '\n')
1579 continue; // ignore
1580 c = '\n'; // treat EndOfLine as \n character
1581 goto Lnextline;
1583 case 0:
1584 case 0x1A:
1585 goto Lerror;
1587 default:
1588 if (c & 0x80)
1589 { p--;
1590 c = decodeUTF();
1591 p++;
1592 if (c == PS || c == LS)
1593 goto Lnextline;
1595 break;
1597 if (delimleft == 0)
1598 { delimleft = c;
1599 nest = 1;
1600 nestcount = 1;
1601 if (c == '(')
1602 delimright = ')';
1603 else if (c == '{')
1604 delimright = '}';
1605 else if (c == '[')
1606 delimright = ']';
1607 else if (c == '<')
1608 delimright = '>';
1609 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1610 { // Start of identifier; must be a heredoc
1611 Token t;
1612 p--;
1613 scan(&t); // read in heredoc identifier
1614 if (t.value != TOKidentifier)
1615 { error("identifier expected for heredoc, not %s", t.toChars());
1616 delimright = c;
1618 else
1619 { hereid = t.ident;
1620 //printf("hereid = '%s'\n", hereid->toChars());
1621 blankrol = 1;
1623 nest = 0;
1625 else
1626 { delimright = c;
1627 nest = 0;
1630 else
1632 if (blankrol)
1633 { error("heredoc rest of line should be blank");
1634 blankrol = 0;
1635 continue;
1637 if (nest == 1)
1639 if (c == delimleft)
1640 nestcount++;
1641 else if (c == delimright)
1642 { nestcount--;
1643 if (nestcount == 0)
1644 goto Ldone;
1647 else if (c == delimright)
1648 goto Ldone;
1649 if (startline && isalpha(c))
1650 { Token t;
1651 unsigned char *psave = p;
1652 p--;
1653 scan(&t); // read in possible heredoc identifier
1654 //printf("endid = '%s'\n", t.ident->toChars());
1655 if (t.value == TOKidentifier && t.ident->equals(hereid))
1656 { /* should check that rest of line is blank
1658 goto Ldone;
1660 p = psave;
1662 stringbuffer.writeUTF8(c);
1663 startline = 0;
1667 Ldone:
1668 if (*p == '"')
1669 p++;
1670 else
1671 error("delimited string must end in %c\"", delimright);
1672 t->len = stringbuffer.offset;
1673 stringbuffer.writeByte(0);
1674 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1675 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1676 stringPostfix(t);
1677 return TOKstring;
1679 Lerror:
1680 error("unterminated string constant starting at %s", start.toChars());
1681 t->ustring = (unsigned char *)"";
1682 t->len = 0;
1683 t->postfix = 0;
1684 return TOKstring;
1687 /**************************************
1688 * Lex delimited strings:
1689 * q{ foo(xxx) } // " foo(xxx) "
1690 * q{foo(} // "foo("
1691 * q{{foo}"}"} // "{foo}"}""
1692 * Input:
1693 * p is on the q
1696 TOK Lexer::tokenStringConstant(Token *t)
1698 unsigned nest = 1;
1699 Loc start = loc;
1700 unsigned char *pstart = ++p;
1702 while (1)
1703 { Token tok;
1705 scan(&tok);
1706 switch (tok.value)
1708 case TOKlcurly:
1709 nest++;
1710 continue;
1712 case TOKrcurly:
1713 if (--nest == 0)
1714 goto Ldone;
1715 continue;
1717 case TOKeof:
1718 goto Lerror;
1720 default:
1721 continue;
1725 Ldone:
1726 t->len = p - 1 - pstart;
1727 t->ustring = (unsigned char *)mem.malloc(t->len + 1);
1728 memcpy(t->ustring, pstart, t->len);
1729 t->ustring[t->len] = 0;
1730 stringPostfix(t);
1731 return TOKstring;
1733 Lerror:
1734 error("unterminated token string constant starting at %s", start.toChars());
1735 t->ustring = (unsigned char *)"";
1736 t->len = 0;
1737 t->postfix = 0;
1738 return TOKstring;
1741 #endif
1744 /**************************************
1747 TOK Lexer::escapeStringConstant(Token *t, int wide)
1748 { unsigned c;
1749 Loc start = loc;
1751 p++;
1752 stringbuffer.reset();
1753 while (1)
1755 c = *p++;
1756 switch (c)
1758 case '\\':
1759 switch (*p)
1761 case 'u':
1762 case 'U':
1763 case '&':
1764 c = escapeSequence();
1765 stringbuffer.writeUTF8(c);
1766 continue;
1768 default:
1769 c = escapeSequence();
1770 break;
1772 break;
1774 case '\n':
1775 loc.linnum++;
1776 break;
1778 case '\r':
1779 if (*p == '\n')
1780 continue; // ignore
1781 c = '\n'; // treat EndOfLine as \n character
1782 loc.linnum++;
1783 break;
1785 case '"':
1786 t->len = stringbuffer.offset;
1787 stringbuffer.writeByte(0);
1788 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1789 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1790 stringPostfix(t);
1791 return TOKstring;
1793 case 0:
1794 case 0x1A:
1795 p--;
1796 error("unterminated string constant starting at %s", start.toChars());
1797 t->ustring = (unsigned char *)"";
1798 t->len = 0;
1799 t->postfix = 0;
1800 return TOKstring;
1802 default:
1803 if (c & 0x80)
1805 p--;
1806 c = decodeUTF();
1807 if (c == LS || c == PS)
1808 { c = '\n';
1809 loc.linnum++;
1811 p++;
1812 stringbuffer.writeUTF8(c);
1813 continue;
1815 break;
1817 stringbuffer.writeByte(c);
1821 /**************************************
1824 TOK Lexer::charConstant(Token *t, int wide)
1826 unsigned c;
1827 TOK tk = TOKcharv;
1829 //printf("Lexer::charConstant\n");
1830 p++;
1831 c = *p++;
1832 switch (c)
1834 case '\\':
1835 switch (*p)
1837 case 'u':
1838 t->uns64value = escapeSequence();
1839 tk = TOKwcharv;
1840 break;
1842 case 'U':
1843 case '&':
1844 t->uns64value = escapeSequence();
1845 tk = TOKdcharv;
1846 break;
1848 default:
1849 t->uns64value = escapeSequence();
1850 break;
1852 break;
1854 case '\n':
1856 loc.linnum++;
1857 case '\r':
1858 case 0:
1859 case 0x1A:
1860 case '\'':
1861 error("unterminated character constant");
1862 return tk;
1864 default:
1865 if (c & 0x80)
1867 p--;
1868 c = decodeUTF();
1869 p++;
1870 if (c == LS || c == PS)
1871 goto L1;
1872 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1873 tk = TOKwcharv;
1874 else
1875 tk = TOKdcharv;
1877 t->uns64value = c;
1878 break;
1881 if (*p != '\'')
1882 { error("unterminated character constant");
1883 return tk;
1885 p++;
1886 return tk;
1889 /***************************************
1890 * Get postfix of string literal.
1893 void Lexer::stringPostfix(Token *t)
1895 switch (*p)
1897 case 'c':
1898 case 'w':
1899 case 'd':
1900 t->postfix = *p;
1901 p++;
1902 break;
1904 default:
1905 t->postfix = 0;
1906 break;
1910 /***************************************
1911 * Read \u or \U unicode sequence
1912 * Input:
1913 * u 'u' or 'U'
1916 #if 0
1917 unsigned Lexer::wchar(unsigned u)
1919 unsigned value;
1920 unsigned n;
1921 unsigned char c;
1922 unsigned nchars;
1924 nchars = (u == 'U') ? 8 : 4;
1925 value = 0;
1926 for (n = 0; 1; n++)
1928 ++p;
1929 if (n == nchars)
1930 break;
1931 c = *p;
1932 if (!ishex(c))
1933 { error("\\%c sequence must be followed by %d hex characters", u, nchars);
1934 break;
1936 if (isdigit(c))
1937 c -= '0';
1938 else if (islower(c))
1939 c -= 'a' - 10;
1940 else
1941 c -= 'A' - 10;
1942 value <<= 4;
1943 value |= c;
1945 return value;
1947 #endif
1949 /**************************************
1950 * Read in a number.
1951 * If it's an integer, store it in tok.TKutok.Vlong.
1952 * integers can be decimal, octal or hex
1953 * Handle the suffixes U, UL, LU, L, etc.
1954 * If it's double, store it in tok.TKutok.Vdouble.
1955 * Returns:
1956 * TKnum
1957 * TKdouble,...
1960 TOK Lexer::number(Token *t)
1962 // We use a state machine to collect numbers
1963 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
1964 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
1965 STATE_hexh, STATE_error };
1966 enum STATE state;
1968 enum FLAGS
1969 { FLAGS_decimal = 1, // decimal
1970 FLAGS_unsigned = 2, // u or U suffix
1971 FLAGS_long = 4, // l or L suffix
1973 enum FLAGS flags = FLAGS_decimal;
1975 int i;
1976 int base;
1977 unsigned c;
1978 unsigned char *start;
1979 TOK result;
1981 //printf("Lexer::number()\n");
1982 state = STATE_initial;
1983 base = 0;
1984 stringbuffer.reset();
1985 start = p;
1986 while (1)
1988 c = *p;
1989 switch (state)
1991 case STATE_initial: // opening state
1992 if (c == '0')
1993 state = STATE_0;
1994 else
1995 state = STATE_decimal;
1996 break;
1998 case STATE_0:
1999 flags = (FLAGS) (flags & ~FLAGS_decimal);
2000 switch (c)
2002 #if ZEROH
2003 case 'H': // 0h
2004 case 'h':
2005 goto hexh;
2006 #endif
2007 case 'X':
2008 case 'x':
2009 state = STATE_hex0;
2010 break;
2012 case '.':
2013 if (p[1] == '.') // .. is a separate token
2014 goto done;
2015 case 'i':
2016 case 'f':
2017 case 'F':
2018 goto real;
2019 #if ZEROH
2020 case 'E':
2021 case 'e':
2022 goto case_hex;
2023 #endif
2024 case 'B':
2025 case 'b':
2026 state = STATE_binary0;
2027 break;
2029 case '0': case '1': case '2': case '3':
2030 case '4': case '5': case '6': case '7':
2031 state = STATE_octal;
2032 break;
2034 #if ZEROH
2035 case '8': case '9': case 'A':
2036 case 'C': case 'D': case 'F':
2037 case 'a': case 'c': case 'd': case 'f':
2038 case_hex:
2039 state = STATE_hexh;
2040 break;
2041 #endif
2042 case '_':
2043 state = STATE_octal;
2044 p++;
2045 continue;
2047 case 'L':
2048 if (p[1] == 'i')
2049 goto real;
2050 goto done;
2052 default:
2053 goto done;
2055 break;
2057 case STATE_decimal: // reading decimal number
2058 if (!isdigit(c))
2060 #if ZEROH
2061 if (ishex(c)
2062 || c == 'H' || c == 'h'
2064 goto hexh;
2065 #endif
2066 if (c == '_') // ignore embedded _
2067 { p++;
2068 continue;
2070 if (c == '.' && p[1] != '.')
2071 goto real;
2072 else if (c == 'i' || c == 'f' || c == 'F' ||
2073 c == 'e' || c == 'E')
2075 real: // It's a real number. Back up and rescan as a real
2076 p = start;
2077 return inreal(t);
2079 else if (c == 'L' && p[1] == 'i')
2080 goto real;
2081 goto done;
2083 break;
2085 case STATE_hex0: // reading hex number
2086 case STATE_hex:
2087 if (!ishex(c))
2089 if (c == '_') // ignore embedded _
2090 { p++;
2091 continue;
2093 if (c == '.' && p[1] != '.')
2094 goto real;
2095 if (c == 'P' || c == 'p' || c == 'i')
2096 goto real;
2097 if (state == STATE_hex0)
2098 error("Hex digit expected, not '%c'", c);
2099 goto done;
2101 state = STATE_hex;
2102 break;
2104 #if ZEROH
2105 hexh:
2106 state = STATE_hexh;
2107 case STATE_hexh: // parse numbers like 0FFh
2108 if (!ishex(c))
2110 if (c == 'H' || c == 'h')
2112 p++;
2113 base = 16;
2114 goto done;
2116 else
2118 // Check for something like 1E3 or 0E24
2119 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) ||
2120 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset))
2121 goto real;
2122 error("Hex digit expected, not '%c'", c);
2123 goto done;
2126 break;
2127 #endif
2129 case STATE_octal: // reading octal number
2130 case STATE_octale: // reading octal number with non-octal digits
2131 if (!isoctal(c))
2133 #if ZEROH
2134 if (ishex(c)
2135 || c == 'H' || c == 'h'
2137 goto hexh;
2138 #endif
2139 if (c == '_') // ignore embedded _
2140 { p++;
2141 continue;
2143 if (c == '.' && p[1] != '.')
2144 goto real;
2145 if (c == 'i')
2146 goto real;
2147 if (isdigit(c))
2149 state = STATE_octale;
2151 else
2152 goto done;
2154 break;
2156 case STATE_binary0: // starting binary number
2157 case STATE_binary: // reading binary number
2158 if (c != '0' && c != '1')
2160 #if ZEROH
2161 if (ishex(c)
2162 || c == 'H' || c == 'h'
2164 goto hexh;
2165 #endif
2166 if (c == '_') // ignore embedded _
2167 { p++;
2168 continue;
2170 if (state == STATE_binary0)
2171 { error("binary digit expected");
2172 state = STATE_error;
2173 break;
2175 else
2176 goto done;
2178 state = STATE_binary;
2179 break;
2181 case STATE_error: // for error recovery
2182 if (!isdigit(c)) // scan until non-digit
2183 goto done;
2184 break;
2186 default:
2187 assert(0);
2189 stringbuffer.writeByte(c);
2190 p++;
2192 done:
2193 stringbuffer.writeByte(0); // terminate string
2194 if (state == STATE_octale)
2195 error("Octal digit expected");
2197 uinteger_t n; // unsigned >=64 bit integer type
2199 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0))
2200 n = stringbuffer.data[0] - '0';
2201 else
2203 // Convert string to integer
2204 #if __DMC__
2205 errno = 0;
2206 n = strtoull((char *)stringbuffer.data,NULL,base);
2207 if (errno == ERANGE)
2208 error("integer overflow");
2209 #else
2210 // Not everybody implements strtoull()
2211 char *p = (char *)stringbuffer.data;
2212 int r = 10, d;
2214 if (*p == '0')
2216 if (p[1] == 'x' || p[1] == 'X')
2217 p += 2, r = 16;
2218 else if (p[1] == 'b' || p[1] == 'B')
2219 p += 2, r = 2;
2220 else if (isdigit(p[1]))
2221 p += 1, r = 8;
2224 n = 0;
2225 while (1)
2227 if (*p >= '0' && *p <= '9')
2228 d = *p - '0';
2229 else if (*p >= 'a' && *p <= 'z')
2230 d = *p - 'a' + 10;
2231 else if (*p >= 'A' && *p <= 'Z')
2232 d = *p - 'A' + 10;
2233 else
2234 break;
2235 if (d >= r)
2236 break;
2237 if (n && n * r + d <= n)
2239 error ("integer overflow");
2240 break;
2243 n = n * r + d;
2244 p++;
2246 #endif
2247 if (sizeof(n) > 8 &&
2248 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits
2249 error("integer overflow");
2252 // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2253 while (1)
2254 { unsigned char f;
2256 switch (*p)
2257 { case 'U':
2258 case 'u':
2259 f = FLAGS_unsigned;
2260 goto L1;
2262 case 'l':
2263 if (1 || !global.params.useDeprecated)
2264 error("'l' suffix is deprecated, use 'L' instead");
2265 case 'L':
2266 f = FLAGS_long;
2268 p++;
2269 if (flags & f)
2270 error("unrecognized token");
2271 flags = (FLAGS) (flags | f);
2272 continue;
2273 default:
2274 break;
2276 break;
2279 switch (flags)
2281 case 0:
2282 /* Octal or Hexadecimal constant.
2283 * First that fits: int, uint, long, ulong
2285 if (n & 0x8000000000000000LL)
2286 result = TOKuns64v;
2287 else if (n & 0xFFFFFFFF00000000LL)
2288 result = TOKint64v;
2289 else if (n & 0x80000000)
2290 result = TOKuns32v;
2291 else
2292 result = TOKint32v;
2293 break;
2295 case FLAGS_decimal:
2296 /* First that fits: int, long, long long
2298 if (n & 0x8000000000000000LL)
2299 { error("signed integer overflow");
2300 result = TOKuns64v;
2302 else if (n & 0xFFFFFFFF80000000LL)
2303 result = TOKint64v;
2304 else
2305 result = TOKint32v;
2306 break;
2308 case FLAGS_unsigned:
2309 case FLAGS_decimal | FLAGS_unsigned:
2310 /* First that fits: uint, ulong
2312 if (n & 0xFFFFFFFF00000000LL)
2313 result = TOKuns64v;
2314 else
2315 result = TOKuns32v;
2316 break;
2318 case FLAGS_decimal | FLAGS_long:
2319 if (n & 0x8000000000000000LL)
2320 { error("signed integer overflow");
2321 result = TOKuns64v;
2323 else
2324 result = TOKint64v;
2325 break;
2327 case FLAGS_long:
2328 if (n & 0x8000000000000000LL)
2329 result = TOKuns64v;
2330 else
2331 result = TOKint64v;
2332 break;
2334 case FLAGS_unsigned | FLAGS_long:
2335 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
2336 result = TOKuns64v;
2337 break;
2339 default:
2340 #ifdef DEBUG
2341 printf("%x\n",flags);
2342 #endif
2343 assert(0);
2345 t->uns64value = n;
2346 return result;
2349 /**************************************
2350 * Read in characters, converting them to real.
2351 * Bugs:
2352 * Exponent overflow not detected.
2353 * Too much requested precision is not detected.
2356 TOK Lexer::inreal(Token *t)
2357 #ifdef __DMC__
2358 __in
2360 assert(*p == '.' || isdigit(*p));
2362 __out (result)
2364 switch (result)
2366 case TOKfloat32v:
2367 case TOKfloat64v:
2368 case TOKfloat80v:
2369 case TOKimaginary32v:
2370 case TOKimaginary64v:
2371 case TOKimaginary80v:
2372 break;
2374 default:
2375 assert(0);
2378 __body
2379 #endif /* __DMC__ */
2380 { int dblstate;
2381 unsigned c;
2382 char hex; // is this a hexadecimal-floating-constant?
2383 TOK result;
2385 //printf("Lexer::inreal()\n");
2386 stringbuffer.reset();
2387 dblstate = 0;
2388 hex = 0;
2389 Lnext:
2390 while (1)
2392 // Get next char from input
2393 c = *p++;
2394 //printf("dblstate = %d, c = '%c'\n", dblstate, c);
2395 while (1)
2397 switch (dblstate)
2399 case 0: // opening state
2400 if (c == '0')
2401 dblstate = 9;
2402 else if (c == '.')
2403 dblstate = 3;
2404 else
2405 dblstate = 1;
2406 break;
2408 case 9:
2409 dblstate = 1;
2410 if (c == 'X' || c == 'x')
2411 { hex++;
2412 break;
2414 case 1: // digits to left of .
2415 case 3: // digits to right of .
2416 case 7: // continuing exponent digits
2417 if (!isdigit(c) && !(hex && isxdigit(c)))
2419 if (c == '_')
2420 goto Lnext; // ignore embedded '_'
2421 dblstate++;
2422 continue;
2424 break;
2426 case 2: // no more digits to left of .
2427 if (c == '.')
2428 { dblstate++;
2429 break;
2431 case 4: // no more digits to right of .
2432 if ((c == 'E' || c == 'e') ||
2433 hex && (c == 'P' || c == 'p'))
2434 { dblstate = 5;
2435 hex = 0; // exponent is always decimal
2436 break;
2438 if (hex)
2439 error("binary-exponent-part required");
2440 goto done;
2442 case 5: // looking immediately to right of E
2443 dblstate++;
2444 if (c == '-' || c == '+')
2445 break;
2446 case 6: // 1st exponent digit expected
2447 if (!isdigit(c))
2448 error("exponent expected");
2449 dblstate++;
2450 break;
2452 case 8: // past end of exponent digits
2453 goto done;
2455 break;
2457 stringbuffer.writeByte(c);
2459 done:
2460 p--;
2462 stringbuffer.writeByte(0);
2464 #if _WIN32 && __DMC__
2465 char *save = __locale_decpoint;
2466 __locale_decpoint = ".";
2467 #endif
2468 #ifdef IN_GCC
2469 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble);
2470 #else
2471 t->float80value = strtold((char *)stringbuffer.data, NULL);
2472 #endif
2473 errno = 0;
2474 switch (*p)
2476 case 'F':
2477 case 'f':
2478 #ifdef IN_GCC
2479 real_t::parse((char *)stringbuffer.data, real_t::Float);
2480 #else
2481 strtof((char *)stringbuffer.data, NULL);
2482 #endif
2483 result = TOKfloat32v;
2484 p++;
2485 break;
2487 default:
2488 #ifdef IN_GCC
2489 real_t::parse((char *)stringbuffer.data, real_t::Double);
2490 #else
2491 strtod((char *)stringbuffer.data, NULL);
2492 #endif
2493 result = TOKfloat64v;
2494 break;
2496 case 'l':
2497 if (!global.params.useDeprecated)
2498 error("'l' suffix is deprecated, use 'L' instead");
2499 case 'L':
2500 result = TOKfloat80v;
2501 p++;
2502 break;
2504 if (*p == 'i' || *p == 'I')
2506 if (!global.params.useDeprecated && *p == 'I')
2507 error("'I' suffix is deprecated, use 'i' instead");
2508 p++;
2509 switch (result)
2511 case TOKfloat32v:
2512 result = TOKimaginary32v;
2513 break;
2514 case TOKfloat64v:
2515 result = TOKimaginary64v;
2516 break;
2517 case TOKfloat80v:
2518 result = TOKimaginary80v;
2519 break;
2522 #if _WIN32 && __DMC__
2523 __locale_decpoint = save;
2524 #endif
2525 if (errno == ERANGE)
2526 error("number is not representable");
2527 return result;
2530 /*********************************************
2531 * Do pragma.
2532 * Currently, the only pragma supported is:
2533 * #line linnum [filespec]
2536 void Lexer::pragma()
2538 Token tok;
2539 int linnum;
2540 char *filespec = NULL;
2541 Loc loc = this->loc;
2543 while (isblank(*p)) p++;
2544 if (*p == '\n')
2545 goto Lerr;
2547 scan(&tok);
2548 if (tok.value != TOKidentifier || tok.ident != Id::line)
2549 goto Lerr;
2551 scan(&tok);
2552 if (tok.value == TOKint32v || tok.value == TOKint64v)
2553 linnum = tok.uns64value - 1;
2554 else
2555 goto Lerr;
2557 while (1)
2559 switch (*p)
2561 case 0:
2562 case 0x1A:
2563 case '\n':
2564 Lnewline:
2565 this->loc.linnum = linnum;
2566 if (filespec)
2567 this->loc.filename = filespec;
2568 return;
2570 case '\r':
2571 p++;
2572 if (*p != '\n')
2573 { p--;
2574 goto Lnewline;
2576 continue;
2578 case ' ':
2579 case '\t':
2580 case '\v':
2581 case '\f':
2582 p++;
2583 continue; // skip white space
2585 case '_':
2586 if (mod && memcmp(p, "__FILE__", 8) == 0)
2588 p += 8;
2589 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars());
2591 continue;
2593 case '"':
2594 if (filespec)
2595 goto Lerr;
2596 stringbuffer.reset();
2597 p++;
2598 while (1)
2599 { unsigned c;
2601 c = *p;
2602 switch (c)
2604 case '\n':
2605 case '\r':
2606 case 0:
2607 case 0x1A:
2608 goto Lerr;
2610 case '"':
2611 stringbuffer.writeByte(0);
2612 filespec = mem.strdup((char *)stringbuffer.data);
2613 p++;
2614 break;
2616 default:
2617 if (c & 0x80)
2618 { unsigned u = decodeUTF();
2619 if (u == PS || u == LS)
2620 goto Lerr;
2622 stringbuffer.writeByte(c);
2623 p++;
2624 continue;
2626 break;
2628 continue;
2630 default:
2631 if (*p & 0x80)
2632 { unsigned u = decodeUTF();
2633 if (u == PS || u == LS)
2634 goto Lnewline;
2636 goto Lerr;
2640 Lerr:
2641 // No problem: this is just a comment line
2642 while (*p != '\n')
2643 p++;
2645 // error(loc, "#line integer [\"filespec\"]\\n expected");
2649 /********************************************
2650 * Decode UTF character.
2651 * Issue error messages for invalid sequences.
2652 * Return decoded character, advance p to last character in UTF sequence.
2655 unsigned Lexer::decodeUTF()
2657 dchar_t u;
2658 unsigned char c;
2659 unsigned char *s = p;
2660 size_t len;
2661 size_t idx;
2662 char *msg;
2664 c = *s;
2665 assert(c & 0x80);
2667 // Check length of remaining string up to 6 UTF-8 characters
2668 for (len = 1; len < 6 && s[len]; len++)
2671 idx = 0;
2672 msg = utf_decodeChar(s, len, &idx, &u);
2673 p += idx - 1;
2674 if (msg)
2676 error("%s", msg);
2678 return u;
2682 /***************************************************
2683 * Parse doc comment embedded between t->ptr and p.
2684 * Remove trailing blanks and tabs from lines.
2685 * Replace all newlines with \n.
2686 * Remove leading comment character from each line.
2687 * Decide if it's a lineComment or a blockComment.
2688 * Append to previous one for this token.
2691 void Lexer::getDocComment(Token *t, unsigned lineComment)
2693 OutBuffer buf;
2694 unsigned char ct = t->ptr[2];
2695 unsigned char *q = t->ptr + 3; // start of comment text
2696 int linestart = 0;
2698 unsigned char *qend = p;
2699 if (ct == '*' || ct == '+')
2700 qend -= 2;
2702 /* Scan over initial row of ****'s or ++++'s or ////'s
2704 for (; q < qend; q++)
2706 if (*q != ct)
2707 break;
2710 /* Remove trailing row of ****'s or ++++'s
2712 if (ct != '/')
2714 for (; q < qend; qend--)
2716 if (qend[-1] != ct)
2717 break;
2721 for (; q < qend; q++)
2723 unsigned char c = *q;
2725 switch (c)
2727 case '*':
2728 case '+':
2729 if (linestart && c == ct)
2730 { linestart = 0;
2731 /* Trim preceding whitespace up to preceding \n
2733 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2734 buf.offset--;
2735 continue;
2737 break;
2739 case ' ':
2740 case '\t':
2741 break;
2743 case '\r':
2744 if (q[1] == '\n')
2745 continue; // skip the \r
2746 goto Lnewline;
2748 default:
2749 if (c == 226)
2751 // If LS or PS
2752 if (q[1] == 128 &&
2753 (q[2] == 168 || q[2] == 169))
2755 q += 2;
2756 goto Lnewline;
2759 linestart = 0;
2760 break;
2762 Lnewline:
2763 c = '\n'; // replace all newlines with \n
2764 case '\n':
2765 linestart = 1;
2767 /* Trim trailing whitespace
2769 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2770 buf.offset--;
2772 break;
2774 buf.writeByte(c);
2777 // Always end with a newline
2778 if (!buf.offset || buf.data[buf.offset - 1] != '\n')
2779 buf.writeByte('\n');
2781 buf.writeByte(0);
2783 // It's a line comment if the start of the doc comment comes
2784 // after other non-whitespace on the same line.
2785 unsigned char** dc = (lineComment && anyToken)
2786 ? &t->lineComment
2787 : &t->blockComment;
2789 // Combine with previous doc comment, if any
2790 if (*dc)
2791 *dc = combineComments(*dc, (unsigned char *)buf.data);
2792 else
2793 *dc = (unsigned char *)buf.extractData();
2796 /********************************************
2797 * Combine two document comments into one.
2800 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2)
2802 unsigned char *c = c2;
2804 if (c1)
2805 { c = c1;
2806 if (c2)
2807 { size_t len1 = strlen((char *)c1);
2808 size_t len2 = strlen((char *)c2);
2810 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1);
2811 memcpy(c, c1, len1);
2812 c[len1] = '\n';
2813 memcpy(c + len1 + 1, c2, len2);
2814 c[len1 + 1 + len2] = 0;
2817 return c;
2820 /********************************************
2821 * Create an identifier in the string table.
2824 Identifier *Lexer::idPool(const char *s)
2826 size_t len = strlen(s);
2827 StringValue *sv = stringtable.update(s, len);
2828 Identifier *id = (Identifier *) sv->ptrvalue;
2829 if (!id)
2831 id = new Identifier(sv->lstring.string, TOKidentifier);
2832 sv->ptrvalue = id;
2834 return id;
2837 /*********************************************
2838 * Create a unique identifier using the prefix s.
2841 Identifier *Lexer::uniqueId(const char *s, int num)
2842 { char buffer[32];
2843 size_t slen = strlen(s);
2845 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer));
2846 sprintf(buffer, "%s%d", s, num);
2847 return idPool(buffer);
2850 Identifier *Lexer::uniqueId(const char *s)
2852 static int num;
2853 return uniqueId(s, ++num);
2856 /****************************************
2859 struct Keyword
2860 { char *name;
2861 enum TOK value;
2864 static Keyword keywords[] =
2866 // { "", TOK },
2868 { "this", TOKthis },
2869 { "super", TOKsuper },
2870 { "assert", TOKassert },
2871 { "null", TOKnull },
2872 { "true", TOKtrue },
2873 { "false", TOKfalse },
2874 { "cast", TOKcast },
2875 { "new", TOKnew },
2876 { "delete", TOKdelete },
2877 { "throw", TOKthrow },
2878 { "module", TOKmodule },
2879 { "pragma", TOKpragma },
2880 { "typeof", TOKtypeof },
2881 { "typeid", TOKtypeid },
2883 { "template", TOKtemplate },
2885 { "void", TOKvoid },
2886 { "byte", TOKint8 },
2887 { "ubyte", TOKuns8 },
2888 { "short", TOKint16 },
2889 { "ushort", TOKuns16 },
2890 { "int", TOKint32 },
2891 { "uint", TOKuns32 },
2892 { "long", TOKint64 },
2893 { "ulong", TOKuns64 },
2894 { "cent", TOKcent, },
2895 { "ucent", TOKucent, },
2896 { "float", TOKfloat32 },
2897 { "double", TOKfloat64 },
2898 { "real", TOKfloat80 },
2900 { "bool", TOKbool },
2901 { "char", TOKchar },
2902 { "wchar", TOKwchar },
2903 { "dchar", TOKdchar },
2905 { "ifloat", TOKimaginary32 },
2906 { "idouble", TOKimaginary64 },
2907 { "ireal", TOKimaginary80 },
2909 { "cfloat", TOKcomplex32 },
2910 { "cdouble", TOKcomplex64 },
2911 { "creal", TOKcomplex80 },
2913 { "delegate", TOKdelegate },
2914 { "function", TOKfunction },
2916 { "is", TOKis },
2917 { "if", TOKif },
2918 { "else", TOKelse },
2919 { "while", TOKwhile },
2920 { "for", TOKfor },
2921 { "do", TOKdo },
2922 { "switch", TOKswitch },
2923 { "case", TOKcase },
2924 { "default", TOKdefault },
2925 { "break", TOKbreak },
2926 { "continue", TOKcontinue },
2927 { "synchronized", TOKsynchronized },
2928 { "return", TOKreturn },
2929 { "goto", TOKgoto },
2930 { "try", TOKtry },
2931 { "catch", TOKcatch },
2932 { "finally", TOKfinally },
2933 { "with", TOKwith },
2934 { "asm", TOKasm },
2935 { "foreach", TOKforeach },
2936 { "foreach_reverse", TOKforeach_reverse },
2937 { "reversed", TOKreversed },
2938 { "scope", TOKscope },
2940 { "struct", TOKstruct },
2941 { "class", TOKclass },
2942 { "interface", TOKinterface },
2943 { "union", TOKunion },
2944 { "enum", TOKenum },
2945 { "import", TOKimport },
2946 { "mixin", TOKmixin },
2947 { "static", TOKstatic },
2948 { "final", TOKfinal },
2949 { "const", TOKconst },
2950 { "typedef", TOKtypedef },
2951 { "alias", TOKalias },
2952 { "override", TOKoverride },
2953 { "abstract", TOKabstract },
2954 { "volatile", TOKvolatile },
2955 { "debug", TOKdebug },
2956 { "deprecated", TOKdeprecated },
2957 { "in", TOKin },
2958 { "out", TOKout },
2959 { "inout", TOKinout },
2960 { "lazy", TOKlazy },
2961 { "auto", TOKauto },
2963 { "align", TOKalign },
2964 { "extern", TOKextern },
2965 { "private", TOKprivate },
2966 { "package", TOKpackage },
2967 { "protected", TOKprotected },
2968 { "public", TOKpublic },
2969 { "export", TOKexport },
2971 { "body", TOKbody },
2972 { "invariant", TOKinvariant },
2973 { "unittest", TOKunittest },
2974 { "version", TOKversion },
2975 //{ "manifest", TOKmanifest },
2977 // Added after 1.0
2978 { "ref", TOKref },
2979 { "macro", TOKmacro },
2982 // TAL
2983 { "and", TOKandand },
2984 { "or", TOKoror },
2985 { "not", TOKnot },
2986 { "extends", TOKextends },
2987 { "log_error", TOKlog_error },
2988 { "log_warning", TOKlog_warning },
2989 { "log_info", TOKlog_info },
2990 { "log_trace", TOKlog_trace },
2991 #if V2
2992 { "pure", TOKpure },
2993 { "nothrow", TOKnothrow },
2994 { "__traits", TOKtraits },
2995 { "__overloadset", TOKoverloadset },
2996 #endif
2999 int Token::isKeyword()
3001 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++)
3003 if (keywords[u].value == value)
3004 return 1;
3006 return 0;
3009 void Lexer::initKeywords()
3010 { StringValue *sv;
3011 unsigned u;
3012 enum TOK v;
3013 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]);
3015 if (global.params.Dversion == 1)
3016 nkeywords -= 2;
3018 cmtable_init();
3020 for (u = 0; u < nkeywords; u++)
3021 { char *s;
3023 //printf("keyword[%d] = '%s'\n",u, keywords[u].name);
3024 s = keywords[u].name;
3025 v = keywords[u].value;
3026 sv = stringtable.insert(s, strlen(s));
3027 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v);
3029 //printf("tochars[%d] = '%s'\n",v, s);
3030 Token::tochars[v] = s;
3033 Token::tochars[TOKeof] = "EOF";
3034 Token::tochars[TOKlcurly] = "{";
3035 Token::tochars[TOKrcurly] = "}";
3036 Token::tochars[TOKlparen] = "(";
3037 Token::tochars[TOKrparen] = ")";
3038 Token::tochars[TOKlbracket] = "[";
3039 Token::tochars[TOKrbracket] = "]";
3040 Token::tochars[TOKsemicolon] = ";";
3041 Token::tochars[TOKcolon] = ":";
3042 Token::tochars[TOKcomma] = ",";
3043 Token::tochars[TOKdot] = ".";
3044 Token::tochars[TOKxor] = "^";
3045 Token::tochars[TOKxorass] = "^=";
3046 Token::tochars[TOKassign] = "=";
3047 Token::tochars[TOKconstruct] = "=";
3048 #if V2
3049 Token::tochars[TOKblit] = "=";
3050 #endif
3051 Token::tochars[TOKlt] = "<";
3052 Token::tochars[TOKgt] = ">";
3053 Token::tochars[TOKle] = "<=";
3054 Token::tochars[TOKge] = ">=";
3055 Token::tochars[TOKequal] = "==";
3056 Token::tochars[TOKnotequal] = "!=";
3057 Token::tochars[TOKnotidentity] = "!is";
3058 Token::tochars[TOKtobool] = "!!";
3059 Token::tochars[TOKat] = "@";
3061 Token::tochars[TOKunord] = "!<>=";
3062 Token::tochars[TOKue] = "!<>";
3063 Token::tochars[TOKlg] = "<>";
3064 Token::tochars[TOKleg] = "<>=";
3065 Token::tochars[TOKule] = "!>";
3066 Token::tochars[TOKul] = "!>=";
3067 Token::tochars[TOKuge] = "!<";
3068 Token::tochars[TOKug] = "!<=";
3070 Token::tochars[TOKnot] = "!";
3071 Token::tochars[TOKtobool] = "!!";
3072 Token::tochars[TOKshl] = "<<";
3073 Token::tochars[TOKshr] = ">>";
3074 Token::tochars[TOKushr] = ">>>";
3075 Token::tochars[TOKadd] = "+";
3076 Token::tochars[TOKmin] = "-";
3077 Token::tochars[TOKmul] = "*";
3078 Token::tochars[TOKdiv] = "/";
3079 Token::tochars[TOKmod] = "%";
3080 Token::tochars[TOKslice] = "..";
3081 Token::tochars[TOKdotdotdot] = "...";
3082 Token::tochars[TOKand] = "&";
3083 Token::tochars[TOKandand] = "&&";
3084 Token::tochars[TOKor] = "|";
3085 Token::tochars[TOKoror] = "||";
3086 Token::tochars[TOKarray] = "[]";
3087 Token::tochars[TOKindex] = "[i]";
3088 Token::tochars[TOKaddress] = "&";
3089 Token::tochars[TOKstar] = "*";
3090 Token::tochars[TOKtilde] = "~";
3091 Token::tochars[TOKdollar] = "$";
3092 Token::tochars[TOKcast] = "cast";
3093 Token::tochars[TOKplusplus] = "++";
3094 Token::tochars[TOKminusminus] = "--";
3095 Token::tochars[TOKtype] = "type";
3096 Token::tochars[TOKquestion] = "?";
3097 Token::tochars[TOKneg] = "-";
3098 Token::tochars[TOKuadd] = "+";
3099 Token::tochars[TOKvar] = "var";
3100 Token::tochars[TOKaddass] = "+=";
3101 Token::tochars[TOKminass] = "-=";
3102 Token::tochars[TOKmulass] = "*=";
3103 Token::tochars[TOKdivass] = "/=";
3104 Token::tochars[TOKmodass] = "%=";
3105 Token::tochars[TOKshlass] = "<<=";
3106 Token::tochars[TOKshrass] = ">>=";
3107 Token::tochars[TOKushrass] = ">>>=";
3108 Token::tochars[TOKandass] = "&=";
3109 Token::tochars[TOKorass] = "|=";
3110 Token::tochars[TOKcatass] = "~=";
3111 Token::tochars[TOKcat] = "~";
3112 Token::tochars[TOKcall] = "call";
3113 Token::tochars[TOKidentity] = "is";
3114 Token::tochars[TOKnotidentity] = "!is";
3115 Token::tochars[TOKendline] = "\\n";
3117 Token::tochars[TOKorass] = "|=";
3118 Token::tochars[TOKidentifier] = "identifier";
3120 // For debugging
3121 Token::tochars[TOKdotexp] = "dotexp";
3122 Token::tochars[TOKdotti] = "dotti";
3123 Token::tochars[TOKdotvar] = "dotvar";
3124 Token::tochars[TOKdottype] = "dottype";
3125 Token::tochars[TOKsymoff] = "symoff";
3126 Token::tochars[TOKtypedot] = "typedot";
3127 Token::tochars[TOKarraylength] = "arraylength";
3128 Token::tochars[TOKarrayliteral] = "arrayliteral";
3129 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral";
3130 Token::tochars[TOKstructliteral] = "structliteral";
3131 Token::tochars[TOKstring] = "string";
3132 Token::tochars[TOKdsymbol] = "symbol";
3133 Token::tochars[TOKtuple] = "tuple";
3134 Token::tochars[TOKdeclaration] = "declaration";
3135 Token::tochars[TOKdottd] = "dottd";
3136 Token::tochars[TOKlogger] = "logger";
3137 Token::tochars[TOKon_scope_exit] = "scope(exit)";