The expression "x in dict" is now a maybe type
[delight/core.git] / dmd / lexer.c
blob462659169e36ea648336d32aad0da9974e7abbd2
2 // Compiler implementation of the D programming language
3 // Copyright (c) 1999-2008 by Digital Mars
4 // All Rights Reserved
5 // written by Walter Bright
6 // http://www.digitalmars.com
7 // License for redistribution is by either the Artistic License
8 // in artistic.txt, or the GNU General Public License in gnu.txt.
9 // See the included readme.txt for details.
11 /* NOTE: This file has been patched from the original DMD distribution to
12 work with the GDC compiler.
14 Modified by David Friedman, December 2006
17 /* Lexical Analyzer */
19 #include <stdio.h>
20 #include <string.h>
21 #include <ctype.h>
22 #include <stdarg.h>
23 #include <errno.h>
24 //#include <wchar.h>
25 #include <stdlib.h>
26 #include <assert.h>
27 #include <sys/time.h>
29 #ifdef IN_GCC
31 #include <time.h>
32 #include "mem.h"
34 #else
36 #if __GNUC__
37 #include <time.h>
38 #endif
40 #if _WIN32
41 #include "..\root\mem.h"
42 #else
43 #include "../root/mem.h"
44 #endif
45 #endif
47 #include "stringtable.h"
49 #include "lexer.h"
50 #include "utf.h"
51 #include "identifier.h"
52 #include "id.h"
53 #include "module.h"
55 #if _WIN32 && __DMC__
56 // from \dm\src\include\setlocal.h
57 extern "C" char * __cdecl __locale_decpoint;
58 #endif
60 extern int HtmlNamedEntity(unsigned char *p, int length);
62 #define LS 0x2028 // UTF line separator
63 #define PS 0x2029 // UTF paragraph separator
65 /********************************************
66 * Do our own char maps
69 static unsigned char cmtable[256];
71 const int CMoctal = 0x1;
72 const int CMhex = 0x2;
73 const int CMidchar = 0x4;
75 inline unsigned char isoctal (unsigned char c) { return cmtable[c] & CMoctal; }
76 inline unsigned char ishex (unsigned char c) { return cmtable[c] & CMhex; }
77 inline unsigned char isidchar(unsigned char c) { return cmtable[c] & CMidchar; }
79 static void cmtable_init()
81 for (unsigned c = 0; c < sizeof(cmtable) / sizeof(cmtable[0]); c++)
83 if ('0' <= c && c <= '7')
84 cmtable[c] |= CMoctal;
85 if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
86 cmtable[c] |= CMhex;
87 if (isalnum(c) || c == '_')
88 cmtable[c] |= CMidchar;
93 /************************* Token **********************************************/
95 char *Token::tochars[TOKMAX];
97 void *Token::operator new(size_t size)
98 { Token *t;
100 if (Lexer::freelist)
102 t = Lexer::freelist;
103 Lexer::freelist = t->next;
104 return t;
107 return ::operator new(size);
110 #ifdef DEBUG
111 void Token::print()
113 fprintf(stdmsg, "%s\n", toChars());
115 #endif
117 char *Token::toChars()
118 { char *p;
119 static char buffer[3 + 3 * sizeof(value) + 1];
121 p = buffer;
122 switch (value)
124 case TOKint32v:
125 #if IN_GCC
126 sprintf(buffer,"%d",(d_int32)int64value);
127 #else
128 sprintf(buffer,"%d",int32value);
129 #endif
130 break;
132 case TOKuns32v:
133 case TOKcharv:
134 case TOKwcharv:
135 case TOKdcharv:
136 #if IN_GCC
137 sprintf(buffer,"%uU",(d_uns32)uns64value);
138 #else
139 sprintf(buffer,"%uU",uns32value);
140 #endif
141 break;
143 case TOKint64v:
144 sprintf(buffer,"%"PRIdMAX"L",int64value);
145 break;
147 case TOKuns64v:
148 sprintf(buffer,"%"PRIuMAX"UL",uns64value);
149 break;
151 #if IN_GCC
152 case TOKfloat32v:
153 case TOKfloat64v:
154 case TOKfloat80v:
155 float80value.format(buffer, sizeof(buffer));
156 break;
157 case TOKimaginary32v:
158 case TOKimaginary64v:
159 case TOKimaginary80v:
160 float80value.format(buffer, sizeof(buffer));
161 // %% buffer
162 strcat(buffer, "i");
163 break;
164 #else
165 case TOKfloat32v:
166 sprintf(buffer,"%Lgf", float80value);
167 break;
169 case TOKfloat64v:
170 sprintf(buffer,"%Lg", float80value);
171 break;
173 case TOKfloat80v:
174 sprintf(buffer,"%LgL", float80value);
175 break;
177 case TOKimaginary32v:
178 sprintf(buffer,"%Lgfi", float80value);
179 break;
181 case TOKimaginary64v:
182 sprintf(buffer,"%Lgi", float80value);
183 break;
185 case TOKimaginary80v:
186 sprintf(buffer,"%LgLi", float80value);
187 break;
188 #endif
191 case TOKstring:
192 #if CSTRINGS
193 p = string;
194 #else
195 { OutBuffer buf;
197 buf.writeByte('"');
198 for (size_t i = 0; i < len; )
199 { unsigned c;
201 utf_decodeChar((unsigned char *)ustring, len, &i, &c);
202 switch (c)
204 case 0:
205 break;
207 case '"':
208 case '\\':
209 buf.writeByte('\\');
210 default:
211 if (isprint(c))
212 buf.writeByte(c);
213 else if (c <= 0x7F)
214 buf.printf("\\x%02x", c);
215 else if (c <= 0xFFFF)
216 buf.printf("\\u%04x", c);
217 else
218 buf.printf("\\U%08x", c);
219 continue;
221 break;
223 buf.writeByte('"');
224 if (postfix)
225 buf.writeByte('"');
226 buf.writeByte(0);
227 p = (char *)buf.extractData();
229 #endif
230 break;
232 case TOKidentifier:
233 case TOKenum:
234 case TOKstruct:
235 case TOKimport:
236 CASE_BASIC_TYPES:
237 p = ident->toChars();
238 break;
240 default:
241 p = toChars(value);
242 break;
244 return p;
247 char *Token::toChars(enum TOK value)
248 { char *p;
249 static char buffer[3 + 3 * sizeof(value) + 1];
251 p = tochars[value];
252 if (!p)
253 { sprintf(buffer,"TOK%d",value);
254 p = buffer;
256 return p;
259 /*************************** Lexer ********************************************/
261 Token *Lexer::freelist = NULL;
262 StringTable Lexer::stringtable;
263 OutBuffer Lexer::stringbuffer;
265 Lexer::Lexer(Module *mod,
266 unsigned char *base, unsigned begoffset, unsigned endoffset,
267 int doDocComment, int commentToken, bool dltSyntax)
268 : loc(mod, 1), dltSyntax(dltSyntax)
270 //printf("Lexer::Lexer(%p,%d)\n",base,length);
271 //printf("lexer.mod = %p, %p\n", mod, this->loc.mod);
272 memset(&token,0,sizeof(token));
273 this->base = base;
274 this->end = base + endoffset;
275 p = base + begoffset;
276 this->mod = mod;
277 this->doDocComment = doDocComment;
278 this->anyToken = 0;
279 this->commentToken = commentToken;
280 this->nesting = 0;
281 this->indent = 0;
282 this->atStartOfLine = 1;
283 this->incLineno = 0;
284 //initKeywords();
286 /* If first line starts with '#!', ignore the line
289 if (p[0] == '#' && p[1] =='!')
291 p += 2;
292 while (1)
293 { unsigned char c = *p;
294 switch (c)
296 case '\n':
297 p++;
298 break;
300 case '\r':
301 p++;
302 if (*p == '\n')
303 p++;
304 break;
306 case 0:
307 case 0x1A:
308 break;
310 default:
311 if (c & 0x80)
312 { unsigned u = decodeUTF();
313 if (u == PS || u == LS)
314 break;
316 p++;
317 continue;
319 break;
321 loc.linnum = 2;
326 void Lexer::error(const char *format, ...)
328 if (mod && !global.gag)
330 char *p = loc.toChars();
331 if (*p)
332 fprintf(stdmsg, "%s: ", p);
333 mem.free(p);
335 va_list ap;
336 va_start(ap, format);
337 vfprintf(stdmsg, format, ap);
338 va_end(ap);
340 fprintf(stdmsg, "\n");
341 fflush(stdmsg);
343 if (global.errors >= 20) // moderate blizzard of cascading messages
344 fatal();
346 global.errors++;
349 void Lexer::error(Loc loc, const char *format, ...)
351 if (mod && !global.gag)
353 char *p = loc.toChars();
354 if (*p)
355 fprintf(stdmsg, "%s: ", p);
356 mem.free(p);
358 va_list ap;
359 va_start(ap, format);
360 vfprintf(stdmsg, format, ap);
361 va_end(ap);
363 fprintf(stdmsg, "\n");
364 fflush(stdmsg);
366 if (global.errors >= 20) // moderate blizzard of cascading messages
367 fatal();
369 global.errors++;
372 TOK Lexer::nextToken()
373 { Token *t;
375 if (token.next)
377 t = token.next;
378 memcpy(&token,t,sizeof(Token));
379 t->next = freelist;
380 freelist = t;
382 else
384 scan(&token);
386 //token.print();
387 return token.value;
390 Token *Lexer::peek(Token *ct)
391 { Token *t;
393 if (ct->next)
394 t = ct->next;
395 else
397 t = new Token();
398 scan(t);
399 t->next = NULL;
400 ct->next = t;
402 return t;
405 /*********************************
406 * tk is on the opening (.
407 * Look ahead and return token that is past the closing ).
410 Token *Lexer::peekPastParen(Token *tk)
412 //printf("peekPastParen()\n");
413 int parens = 1;
414 int curlynest = 0;
415 while (1)
417 tk = peek(tk);
418 //tk->print();
419 switch (tk->value)
421 case TOKlparen:
422 parens++;
423 continue;
425 case TOKrparen:
426 --parens;
427 if (parens)
428 continue;
429 tk = peek(tk);
430 break;
432 case TOKlcurly:
433 curlynest++;
434 continue;
436 case TOKrcurly:
437 if (--curlynest >= 0)
438 continue;
439 break;
441 case TOKsemicolon:
442 if (curlynest)
443 continue;
444 break;
446 case TOKeof:
447 break;
449 default:
450 continue;
452 return tk;
456 /**********************************
457 * Determine if string is a valid Identifier.
458 * Placed here because of commonality with Lexer functionality.
459 * Returns:
460 * 0 invalid
463 int Lexer::isValidIdentifier(char *p)
465 size_t len;
466 size_t idx;
468 if (!p || !*p)
469 goto Linvalid;
471 if (*p >= '0' && *p <= '9') // beware of isdigit() on signed chars
472 goto Linvalid;
474 len = strlen(p);
475 idx = 0;
476 while (p[idx])
477 { dchar_t dc;
479 char *q = utf_decodeChar((unsigned char *)p, len, &idx, &dc);
480 if (q)
481 goto Linvalid;
483 if (!((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
484 goto Linvalid;
486 return 1;
488 Linvalid:
489 return 0;
492 /****************************
493 * Turn next token in buffer into a token.
496 void Lexer::scan(Token *t)
498 unsigned lastLine = loc.linnum;
499 unsigned linnum;
501 // Delayed line-number updating
502 if (incLineno)
504 assert(incLineno == 1);
505 incLineno = 0;
506 loc.linnum++;
509 t->blockComment = NULL;
510 t->lineComment = NULL;
511 while (1)
513 t->ptr = p;
515 if (dltSyntax && atStartOfLine) {
516 // Check indent
517 int i;
518 for (i = 0; p[i] == '\t'; i++) {
520 if (p[i] == ' ') {
521 error("Whitespace error: use tabs to indent!");
523 if (p[i] == '#') {
524 p += i;
525 atStartOfLine = 0;
526 } else if (p[i] != '\n' && p[i] != '\r') {
527 if (p[i] == '\0')
528 i = 0; // End-of-file always has no indent
529 if (i > indent) {
530 error("unexpected indentation (expected %d tabs, not %d)",
531 indent, i);
532 } else if (i < indent) {
533 indent -= 1;
534 t->value = TOKrcurly;
535 return;
537 atStartOfLine = 0;
538 } /* else ignore blank line */
541 //printf("p = %p, *p = '%c'\n",p,*p);
542 switch (*p)
544 case 0:
545 case 0x1A:
546 t->value = TOKeof; // end of file
547 return;
549 case ' ':
550 case '\t':
551 case '\v':
552 case '\f':
553 p++;
554 continue; // skip white space
556 case '\r':
557 if (p[1] == '\n') { // if CRLF
558 p++;
559 continue;
561 // fall-through
562 case '\n':
563 p++;
564 if (dltSyntax)
566 // Delay incrementing the line number until after sending
567 // the TOKendline, for better error messages
568 assert(!incLineno);
569 incLineno++;
571 if (!nesting)
573 atStartOfLine = 1;
574 t->value = TOKendline;
575 return;
578 else
579 loc.linnum++;
580 continue; // Ignore newlines inside brackets
581 case '0': case '1': case '2': case '3': case '4':
582 case '5': case '6': case '7': case '8': case '9':
583 t->value = number(t);
584 return;
586 #if CSTRINGS
587 case '\'':
588 t->value = charConstant(t, 0);
589 return;
591 case '"':
592 t->value = stringConstant(t,0);
593 return;
595 case 'l':
596 case 'L':
597 if (p[1] == '\'')
599 p++;
600 t->value = charConstant(t, 1);
601 return;
603 else if (p[1] == '"')
605 p++;
606 t->value = stringConstant(t, 1);
607 return;
609 #else
610 case '\'':
611 t->value = charConstant(t,0);
612 return;
614 case 'r':
615 if (p[1] != '"')
616 goto case_ident;
617 p++;
618 case '`':
619 t->value = wysiwygStringConstant(t, *p);
620 return;
622 case 'x':
623 if (p[1] != '"')
624 goto case_ident;
625 p++;
626 t->value = hexStringConstant(t);
627 return;
629 #if V2
630 case 'q':
631 if (p[1] == '"')
633 p++;
634 t->value = delimitedStringConstant(t);
635 return;
637 else if (p[1] == '{')
639 p++;
640 t->value = tokenStringConstant(t);
641 return;
643 else
644 goto case_ident;
645 #endif
647 case '"':
648 t->value = escapeStringConstant(t,0);
649 return;
651 case '\\': // escaped string literal
652 { unsigned c;
654 stringbuffer.reset();
657 p++;
658 switch (*p)
660 case 'u':
661 case 'U':
662 case '&':
663 c = escapeSequence();
664 stringbuffer.writeUTF8(c);
665 break;
667 default:
668 c = escapeSequence();
669 stringbuffer.writeByte(c);
670 break;
672 } while (*p == '\\');
673 t->len = stringbuffer.offset;
674 stringbuffer.writeByte(0);
675 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
676 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
677 t->postfix = 0;
678 t->value = TOKstring;
679 return;
682 case 'l':
683 case 'L':
684 #endif
685 case 'a': case 'b': case 'c': case 'd': case 'e':
686 case 'f': case 'g': case 'h': case 'i': case 'j':
687 case 'k': case 'm': case 'n': case 'o':
688 #if V2
689 case 'p': /*case 'q': case 'r':*/ case 's': case 't':
690 #else
691 case 'p': case 'q': /*case 'r':*/ case 's': case 't':
692 #endif
693 case 'u': case 'v': case 'w': /*case 'x':*/ case 'y':
694 case 'z':
695 case 'A': case 'B': case 'C': case 'D': case 'E':
696 case 'F': case 'G': case 'H': case 'I': case 'J':
697 case 'K': case 'M': case 'N': case 'O':
698 case 'P': case 'Q': case 'R': case 'S': case 'T':
699 case 'U': case 'V': case 'W': case 'X': case 'Y':
700 case 'Z':
701 case '_':
702 case_ident:
703 { unsigned char c;
704 StringValue *sv;
705 Identifier *id;
709 c = *++p;
710 } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
711 sv = stringtable.update((char *)t->ptr, p - t->ptr);
712 id = (Identifier *) sv->ptrvalue;
713 if (!id)
714 { id = new Identifier(sv->lstring.string,TOKidentifier);
715 sv->ptrvalue = id;
717 t->ident = id;
718 t->value = (enum TOK) id->value;
719 anyToken = 1;
720 if (*t->ptr == '_') // if special identifier token
722 static char date[11+1];
723 static char time[8+1];
724 static char timestamp[24+1];
726 if (!date[0]) // lazy evaluation
727 { time_t t;
728 char *p;
730 ::time(&t);
731 p = ctime(&t);
732 assert(p);
733 sprintf(date, "%.6s %.4s", p + 4, p + 20);
734 sprintf(time, "%.8s", p + 11);
735 sprintf(timestamp, "%.24s", p);
738 if (mod && id == Id::FILE)
740 t->ustring = (unsigned char *)(loc.filename ? loc.filename : mod->ident->toChars());
741 goto Lstring;
743 else if (mod && id == Id::LINE)
745 t->value = TOKint64v;
746 t->uns64value = loc.linnum;
748 else if (id == Id::DATE)
750 t->ustring = (unsigned char *)date;
751 goto Lstring;
753 else if (id == Id::TIME)
755 t->ustring = (unsigned char *)time;
756 goto Lstring;
758 else if (id == Id::VENDOR)
760 #ifdef IN_GCC
761 t->ustring = (unsigned char *)"GDC";
762 #else
763 t->ustring = (unsigned char *)"Digital Mars D";
764 #endif
765 goto Lstring;
767 else if (id == Id::TIMESTAMP)
769 t->ustring = (unsigned char *)timestamp;
770 Lstring:
771 t->value = TOKstring;
772 Llen:
773 t->postfix = 0;
774 t->len = strlen((char *)t->ustring);
776 else if (id == Id::VERSIONX)
777 { unsigned major = 0;
778 unsigned minor = 0;
780 for (char *p = global.version + 1; 1; p++)
782 char c = *p;
783 if (isdigit(c))
784 minor = minor * 10 + c - '0';
785 else if (c == '.')
786 { major = minor;
787 minor = 0;
789 else
790 break;
792 t->value = TOKint64v;
793 t->uns64value = major * 1000 + minor;
795 #if V2
796 else if (id == Id::EOFX)
798 t->value = TOKeof;
799 // Advance scanner to end of file
800 while (!(*p == 0 || *p == 0x1A))
801 p++;
803 #endif
805 //printf("t->value = %d\n",t->value);
806 return;
809 case '/':
810 p++;
811 switch (*p)
813 case '=':
814 p++;
815 t->value = TOKdivass;
816 return;
818 case '*':
819 p++;
820 linnum = loc.linnum;
821 while (1)
823 while (1)
824 { unsigned char c = *p;
825 switch (c)
827 case '/':
828 break;
830 case '\n':
831 loc.linnum++;
832 p++;
833 continue;
835 case '\r':
836 p++;
837 if (*p != '\n')
838 loc.linnum++;
839 continue;
841 case 0:
842 case 0x1A:
843 error("unterminated /* */ comment");
844 p = end;
845 t->value = TOKeof;
846 return;
848 default:
849 if (c & 0x80)
850 { unsigned u = decodeUTF();
851 if (u == PS || u == LS)
852 loc.linnum++;
854 p++;
855 continue;
857 break;
859 p++;
860 if (p[-2] == '*' && p - 3 != t->ptr)
861 break;
863 if (commentToken)
865 t->value = TOKcomment;
866 return;
868 else if (doDocComment && t->ptr[2] == '*' && p - 4 != t->ptr)
869 { // if /** but not /**/
870 getDocComment(t, lastLine == linnum);
872 continue;
874 case '/': // do // style comments
875 linnum = loc.linnum;
876 while (1)
877 { unsigned char c = *++p;
878 switch (c)
880 case '\n':
881 break;
883 case '\r':
884 if (p[1] == '\n')
885 p++;
886 break;
888 case 0:
889 case 0x1A:
890 if (commentToken)
892 p = end;
893 t->value = TOKcomment;
894 return;
896 if (doDocComment && t->ptr[2] == '/')
897 getDocComment(t, lastLine == linnum);
898 p = end;
899 t->value = TOKeof;
900 return;
902 default:
903 if (c & 0x80)
904 { unsigned u = decodeUTF();
905 if (u == PS || u == LS)
906 break;
908 continue;
910 break;
913 if (commentToken)
915 p++;
916 loc.linnum++;
917 t->value = TOKcomment;
918 return;
920 if (doDocComment && t->ptr[2] == '/')
921 getDocComment(t, lastLine == linnum);
923 p++;
924 loc.linnum++;
925 continue;
927 case '+':
928 { int nest;
930 linnum = loc.linnum;
931 p++;
932 nest = 1;
933 while (1)
934 { unsigned char c = *p;
935 switch (c)
937 case '/':
938 p++;
939 if (*p == '+')
941 p++;
942 nest++;
944 continue;
946 case '+':
947 p++;
948 if (*p == '/')
950 p++;
951 if (--nest == 0)
952 break;
954 continue;
956 case '\r':
957 p++;
958 if (*p != '\n')
959 loc.linnum++;
960 continue;
962 case '\n':
963 loc.linnum++;
964 p++;
965 continue;
967 case 0:
968 case 0x1A:
969 error("unterminated /+ +/ comment");
970 p = end;
971 t->value = TOKeof;
972 return;
974 default:
975 if (c & 0x80)
976 { unsigned u = decodeUTF();
977 if (u == PS || u == LS)
978 loc.linnum++;
980 p++;
981 continue;
983 break;
985 if (commentToken)
987 t->value = TOKcomment;
988 return;
990 if (doDocComment && t->ptr[2] == '+' && p - 4 != t->ptr)
991 { // if /++ but not /++/
992 getDocComment(t, lastLine == linnum);
994 continue;
997 t->value = TOKdiv;
998 return;
1000 case '.':
1001 p++;
1002 if (isdigit(*p))
1003 { /* Note that we don't allow ._1 and ._ as being
1004 * valid floating point numbers.
1006 p--;
1007 t->value = inreal(t);
1009 else if (p[0] == '.')
1011 if (p[1] == '.')
1012 { p += 2;
1013 t->value = TOKdotdotdot;
1015 else
1016 { p++;
1017 t->value = TOKslice;
1020 else
1021 t->value = TOKdot;
1022 return;
1024 case '&':
1025 p++;
1026 if (*p == '=')
1027 { p++;
1028 t->value = TOKandass;
1030 else if (*p == '&')
1031 { p++;
1032 t->value = TOKandand;
1033 if (dltSyntax)
1034 error("Use 'and' instead of '&&'");
1036 else
1037 t->value = TOKand;
1038 return;
1040 case '|':
1041 p++;
1042 if (*p == '=')
1043 { p++;
1044 t->value = TOKorass;
1046 else if (*p == '|')
1047 { p++;
1048 t->value = TOKoror;
1049 if (dltSyntax)
1050 error("Use 'or' instead of '||'");
1052 else
1053 t->value = TOKor;
1054 return;
1056 case '-':
1057 p++;
1058 if (*p == '=')
1059 { p++;
1060 t->value = TOKminass;
1062 #if 0
1063 else if (*p == '>')
1064 { p++;
1065 t->value = TOKarrow;
1067 #endif
1068 else if (*p == '-')
1069 { p++;
1070 t->value = TOKminusminus;
1072 else
1073 t->value = TOKmin;
1074 return;
1076 case '+':
1077 p++;
1078 if (*p == '=')
1079 { p++;
1080 t->value = TOKaddass;
1082 else if (*p == '+')
1083 { p++;
1084 t->value = TOKplusplus;
1086 else
1087 t->value = TOKadd;
1088 return;
1090 case '<':
1091 p++;
1092 if (*p == '=')
1093 { p++;
1094 t->value = TOKle; // <=
1096 else if (*p == '<')
1097 { p++;
1098 if (*p == '=')
1099 { p++;
1100 t->value = TOKshlass; // <<=
1102 else
1103 t->value = TOKshl; // <<
1105 else if (*p == '>')
1106 { p++;
1107 if (*p == '=')
1108 { p++;
1109 t->value = TOKleg; // <>=
1111 else
1112 t->value = TOKlg; // <>
1114 else
1115 t->value = TOKlt; // <
1116 return;
1118 case '>':
1119 p++;
1120 if (*p == '=')
1121 { p++;
1122 t->value = TOKge; // >=
1124 else if (*p == '>')
1125 { p++;
1126 if (*p == '=')
1127 { p++;
1128 t->value = TOKshrass; // >>=
1130 else if (*p == '>')
1131 { p++;
1132 if (*p == '=')
1133 { p++;
1134 t->value = TOKushrass; // >>>=
1136 else
1137 t->value = TOKushr; // >>>
1139 else
1140 t->value = TOKshr; // >>
1142 else
1143 t->value = TOKgt; // >
1144 return;
1146 case '!':
1147 p++;
1148 if (*p == '=')
1149 { p++;
1150 if (*p == '=' && global.params.Dversion == 1)
1151 { p++;
1152 t->value = TOKnotidentity; // !==
1154 else
1155 t->value = TOKnotequal; // !=
1157 else if (*p == '<')
1158 { p++;
1159 if (*p == '>')
1160 { p++;
1161 if (*p == '=')
1162 { p++;
1163 t->value = TOKunord; // !<>=
1165 else
1166 t->value = TOKue; // !<>
1168 else if (*p == '=')
1169 { p++;
1170 t->value = TOKug; // !<=
1172 else
1173 t->value = TOKuge; // !<
1175 else if (*p == '>')
1176 { p++;
1177 if (*p == '=')
1178 { p++;
1179 t->value = TOKul; // !>=
1181 else
1182 t->value = TOKule; // !>
1184 else
1185 t->value = TOKnot; // !
1186 return;
1188 case '=':
1189 p++;
1190 if (*p == '=')
1191 { p++;
1192 if (*p == '=' && global.params.Dversion == 1)
1193 { p++;
1194 t->value = TOKidentity; // ===
1196 else
1197 t->value = TOKequal; // ==
1199 else
1200 t->value = TOKassign; // =
1201 return;
1203 case '~':
1204 p++;
1205 if (*p == '=')
1206 { p++;
1207 t->value = TOKcatass; // ~=
1209 else
1210 t->value = TOKtilde; // ~
1211 return;
1213 #define NESTED(cin,tokin,cout,tokout) \
1214 case cin: nesting++; p++; t->value = tokin; return;\
1215 case cout: if (nesting == 0) {error("Unexpected '%c'", cout);} else {nesting--;} p++; t->value = tokout; return;
1217 NESTED('(', TOKlparen, ')', TOKrparen)
1218 NESTED('[', TOKlbracket, ']', TOKrbracket)
1219 NESTED('{', TOKlcurly, '}', TOKrcurly)
1220 #undef NESTED
1222 #define SINGLE(c,tok) case c: p++; t->value = tok; return;
1223 SINGLE('?', TOKquestion)
1224 SINGLE(',', TOKcomma)
1225 SINGLE(';', TOKsemicolon)
1226 SINGLE('$', TOKdollar)
1227 SINGLE('@', TOKat)
1229 #undef SINGLE
1231 case ':':
1232 p++;
1233 if (!nesting)
1234 indent += 1;
1235 t->value = TOKcolon;
1236 return;
1238 #define DOUBLE(c1,tok1,c2,tok2) \
1239 case c1: \
1240 p++; \
1241 if (*p == c2) \
1242 { p++; \
1243 t->value = tok2; \
1245 else \
1246 t->value = tok1; \
1247 return;
1249 DOUBLE('*', TOKmul, '=', TOKmulass)
1250 DOUBLE('%', TOKmod, '=', TOKmodass)
1251 DOUBLE('^', TOKxor, '=', TOKxorass)
1253 #undef DOUBLE
1255 case '#': // do # style comments and pragmas
1256 if (dltSyntax)
1258 do { p++; } while (*p != '\n');
1260 else
1262 p++;
1263 pragma();
1265 continue;
1267 default:
1268 { unsigned char c = *p;
1270 if (c & 0x80)
1271 { unsigned u = decodeUTF();
1273 // Check for start of unicode identifier
1274 if (isUniAlpha(u))
1275 goto case_ident;
1277 if (u == PS || u == LS)
1279 loc.linnum++;
1280 p++;
1281 continue;
1284 if (isprint(c))
1285 error("unsupported char '%c'", c);
1286 else
1287 error("unsupported char 0x%02x", c);
1288 p++;
1289 continue;
1295 /*******************************************
1296 * Parse escape sequence.
1299 unsigned Lexer::escapeSequence()
1300 { unsigned c;
1301 int n;
1302 int ndigits;
1304 c = *p;
1305 switch (c)
1307 case '\'':
1308 case '"':
1309 case '?':
1310 case '\\':
1311 Lconsume:
1312 p++;
1313 break;
1315 case 'a': c = 7; goto Lconsume;
1316 case 'b': c = 8; goto Lconsume;
1317 case 'f': c = 12; goto Lconsume;
1318 case 'n': c = 10; goto Lconsume;
1319 case 'r': c = 13; goto Lconsume;
1320 case 't': c = 9; goto Lconsume;
1321 case 'v': c = 11; goto Lconsume;
1323 case 'u':
1324 ndigits = 4;
1325 goto Lhex;
1326 case 'U':
1327 ndigits = 8;
1328 goto Lhex;
1329 case 'x':
1330 ndigits = 2;
1331 Lhex:
1332 p++;
1333 c = *p;
1334 if (ishex(c))
1335 { unsigned v;
1337 n = 0;
1338 v = 0;
1339 while (1)
1341 if (isdigit(c))
1342 c -= '0';
1343 else if (islower(c))
1344 c -= 'a' - 10;
1345 else
1346 c -= 'A' - 10;
1347 v = v * 16 + c;
1348 c = *++p;
1349 if (++n == ndigits)
1350 break;
1351 if (!ishex(c))
1352 { error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
1353 break;
1356 if (ndigits != 2 && !utf_isValidDchar(v))
1357 error("invalid UTF character \\U%08x", v);
1358 c = v;
1360 else
1361 error("undefined escape hex sequence \\%c\n",c);
1362 break;
1364 case '&': // named character entity
1365 for (unsigned char *idstart = ++p; 1; p++)
1367 switch (*p)
1369 case ';':
1370 c = HtmlNamedEntity(idstart, p - idstart);
1371 if (c == ~0)
1372 { error("unnamed character entity &%.*s;", (int)(p - idstart), idstart);
1373 c = ' ';
1375 p++;
1376 break;
1378 default:
1379 if (isalpha(*p) ||
1380 (p != idstart + 1 && isdigit(*p)))
1381 continue;
1382 error("unterminated named entity");
1383 break;
1385 break;
1387 break;
1389 case 0:
1390 case 0x1A: // end of file
1391 c = '\\';
1392 break;
1394 default:
1395 if (isoctal(c))
1396 { unsigned v;
1398 n = 0;
1399 v = 0;
1402 v = v * 8 + (c - '0');
1403 c = *++p;
1404 } while (++n < 3 && isoctal(c));
1405 c = v;
1406 if (c > 0xFF)
1407 error("0%03o is larger than a byte", c);
1409 else
1410 error("undefined escape sequence \\%c\n",c);
1411 break;
1413 return c;
1416 /**************************************
1419 TOK Lexer::wysiwygStringConstant(Token *t, int tc)
1420 { unsigned c;
1421 Loc start = loc;
1423 p++;
1424 stringbuffer.reset();
1425 while (1)
1427 c = *p++;
1428 switch (c)
1430 case '\n':
1431 loc.linnum++;
1432 break;
1434 case '\r':
1435 if (*p == '\n')
1436 continue; // ignore
1437 c = '\n'; // treat EndOfLine as \n character
1438 loc.linnum++;
1439 break;
1441 case 0:
1442 case 0x1A:
1443 error("unterminated string constant starting at %s", start.toChars());
1444 t->ustring = (unsigned char *)"";
1445 t->len = 0;
1446 t->postfix = 0;
1447 return TOKstring;
1449 case '"':
1450 case '`':
1451 if (c == tc)
1453 t->len = stringbuffer.offset;
1454 stringbuffer.writeByte(0);
1455 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1456 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1457 stringPostfix(t);
1458 return TOKstring;
1460 break;
1462 default:
1463 if (c & 0x80)
1464 { p--;
1465 unsigned u = decodeUTF();
1466 p++;
1467 if (u == PS || u == LS)
1468 loc.linnum++;
1469 stringbuffer.writeUTF8(u);
1470 continue;
1472 break;
1474 stringbuffer.writeByte(c);
1478 /**************************************
1479 * Lex hex strings:
1480 * x"0A ae 34FE BD"
1483 TOK Lexer::hexStringConstant(Token *t)
1484 { unsigned c;
1485 Loc start = loc;
1486 unsigned n = 0;
1487 unsigned v;
1489 p++;
1490 stringbuffer.reset();
1491 while (1)
1493 c = *p++;
1494 switch (c)
1496 case ' ':
1497 case '\t':
1498 case '\v':
1499 case '\f':
1500 continue; // skip white space
1502 case '\r':
1503 if (*p == '\n')
1504 continue; // ignore
1505 // Treat isolated '\r' as if it were a '\n'
1506 case '\n':
1507 loc.linnum++;
1508 continue;
1510 case 0:
1511 case 0x1A:
1512 error("unterminated string constant starting at %s", start.toChars());
1513 t->ustring = (unsigned char *)"";
1514 t->len = 0;
1515 t->postfix = 0;
1516 return TOKstring;
1518 case '"':
1519 if (n & 1)
1520 { error("odd number (%d) of hex characters in hex string", n);
1521 stringbuffer.writeByte(v);
1523 t->len = stringbuffer.offset;
1524 stringbuffer.writeByte(0);
1525 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1526 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1527 stringPostfix(t);
1528 return TOKstring;
1530 default:
1531 if (c >= '0' && c <= '9')
1532 c -= '0';
1533 else if (c >= 'a' && c <= 'f')
1534 c -= 'a' - 10;
1535 else if (c >= 'A' && c <= 'F')
1536 c -= 'A' - 10;
1537 else if (c & 0x80)
1538 { p--;
1539 unsigned u = decodeUTF();
1540 p++;
1541 if (u == PS || u == LS)
1542 loc.linnum++;
1543 else
1544 error("non-hex character \\u%x", u);
1546 else
1547 error("non-hex character '%c'", c);
1548 if (n & 1)
1549 { v = (v << 4) | c;
1550 stringbuffer.writeByte(v);
1552 else
1553 v = c;
1554 n++;
1555 break;
1561 #if V2
1562 /**************************************
1563 * Lex delimited strings:
1564 * q"(foo(xxx))" // "foo(xxx)"
1565 * q"[foo(]" // "foo("
1566 * q"/foo]/" // "foo]"
1567 * q"HERE
1568 * foo
1569 * HERE" // "foo\n"
1570 * Input:
1571 * p is on the "
1574 TOK Lexer::delimitedStringConstant(Token *t)
1575 { unsigned c;
1576 Loc start = loc;
1577 unsigned delimleft = 0;
1578 unsigned delimright = 0;
1579 unsigned nest = 1;
1580 unsigned nestcount;
1581 Identifier *hereid = NULL;
1582 unsigned blankrol = 0;
1583 unsigned startline = 0;
1585 p++;
1586 stringbuffer.reset();
1587 while (1)
1589 c = *p++;
1590 //printf("c = '%c'\n", c);
1591 switch (c)
1593 case '\n':
1594 Lnextline:
1595 loc.linnum++;
1596 startline = 1;
1597 if (blankrol)
1598 { blankrol = 0;
1599 continue;
1601 if (hereid)
1603 stringbuffer.writeUTF8(c);
1604 continue;
1606 break;
1608 case '\r':
1609 if (*p == '\n')
1610 continue; // ignore
1611 c = '\n'; // treat EndOfLine as \n character
1612 goto Lnextline;
1614 case 0:
1615 case 0x1A:
1616 goto Lerror;
1618 default:
1619 if (c & 0x80)
1620 { p--;
1621 c = decodeUTF();
1622 p++;
1623 if (c == PS || c == LS)
1624 goto Lnextline;
1626 break;
1628 if (delimleft == 0)
1629 { delimleft = c;
1630 nest = 1;
1631 nestcount = 1;
1632 if (c == '(')
1633 delimright = ')';
1634 else if (c == '{')
1635 delimright = '}';
1636 else if (c == '[')
1637 delimright = ']';
1638 else if (c == '<')
1639 delimright = '>';
1640 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1641 { // Start of identifier; must be a heredoc
1642 Token t;
1643 p--;
1644 scan(&t); // read in heredoc identifier
1645 if (t.value != TOKidentifier)
1646 { error("identifier expected for heredoc, not %s", t.toChars());
1647 delimright = c;
1649 else
1650 { hereid = t.ident;
1651 //printf("hereid = '%s'\n", hereid->toChars());
1652 blankrol = 1;
1654 nest = 0;
1656 else
1657 { delimright = c;
1658 nest = 0;
1661 else
1663 if (blankrol)
1664 { error("heredoc rest of line should be blank");
1665 blankrol = 0;
1666 continue;
1668 if (nest == 1)
1670 if (c == delimleft)
1671 nestcount++;
1672 else if (c == delimright)
1673 { nestcount--;
1674 if (nestcount == 0)
1675 goto Ldone;
1678 else if (c == delimright)
1679 goto Ldone;
1680 if (startline && isalpha(c))
1681 { Token t;
1682 unsigned char *psave = p;
1683 p--;
1684 scan(&t); // read in possible heredoc identifier
1685 //printf("endid = '%s'\n", t.ident->toChars());
1686 if (t.value == TOKidentifier && t.ident->equals(hereid))
1687 { /* should check that rest of line is blank
1689 goto Ldone;
1691 p = psave;
1693 stringbuffer.writeUTF8(c);
1694 startline = 0;
1698 Ldone:
1699 if (*p == '"')
1700 p++;
1701 else
1702 error("delimited string must end in %c\"", delimright);
1703 t->len = stringbuffer.offset;
1704 stringbuffer.writeByte(0);
1705 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1706 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1707 stringPostfix(t);
1708 return TOKstring;
1710 Lerror:
1711 error("unterminated string constant starting at %s", start.toChars());
1712 t->ustring = (unsigned char *)"";
1713 t->len = 0;
1714 t->postfix = 0;
1715 return TOKstring;
1718 /**************************************
1719 * Lex delimited strings:
1720 * q{ foo(xxx) } // " foo(xxx) "
1721 * q{foo(} // "foo("
1722 * q{{foo}"}"} // "{foo}"}""
1723 * Input:
1724 * p is on the q
1727 TOK Lexer::tokenStringConstant(Token *t)
1729 unsigned nest = 1;
1730 Loc start = loc;
1731 unsigned char *pstart = ++p;
1733 while (1)
1734 { Token tok;
1736 scan(&tok);
1737 switch (tok.value)
1739 case TOKlcurly:
1740 nest++;
1741 continue;
1743 case TOKrcurly:
1744 if (--nest == 0)
1745 goto Ldone;
1746 continue;
1748 case TOKeof:
1749 goto Lerror;
1751 default:
1752 continue;
1756 Ldone:
1757 t->len = p - 1 - pstart;
1758 t->ustring = (unsigned char *)mem.malloc(t->len + 1);
1759 memcpy(t->ustring, pstart, t->len);
1760 t->ustring[t->len] = 0;
1761 stringPostfix(t);
1762 return TOKstring;
1764 Lerror:
1765 error("unterminated token string constant starting at %s", start.toChars());
1766 t->ustring = (unsigned char *)"";
1767 t->len = 0;
1768 t->postfix = 0;
1769 return TOKstring;
1772 #endif
1775 /**************************************
1778 TOK Lexer::escapeStringConstant(Token *t, int wide)
1779 { unsigned c;
1780 Loc start = loc;
1782 p++;
1783 stringbuffer.reset();
1784 while (1)
1786 c = *p++;
1787 switch (c)
1789 case '\\':
1790 switch (*p)
1792 case 'u':
1793 case 'U':
1794 case '&':
1795 c = escapeSequence();
1796 stringbuffer.writeUTF8(c);
1797 continue;
1799 default:
1800 c = escapeSequence();
1801 break;
1803 break;
1805 case '\n':
1806 loc.linnum++;
1807 break;
1809 case '\r':
1810 if (*p == '\n')
1811 continue; // ignore
1812 c = '\n'; // treat EndOfLine as \n character
1813 loc.linnum++;
1814 break;
1816 case '"':
1817 t->len = stringbuffer.offset;
1818 stringbuffer.writeByte(0);
1819 t->ustring = (unsigned char *)mem.malloc(stringbuffer.offset);
1820 memcpy(t->ustring, stringbuffer.data, stringbuffer.offset);
1821 stringPostfix(t);
1822 return TOKstring;
1824 case 0:
1825 case 0x1A:
1826 p--;
1827 error("unterminated string constant starting at %s", start.toChars());
1828 t->ustring = (unsigned char *)"";
1829 t->len = 0;
1830 t->postfix = 0;
1831 return TOKstring;
1833 default:
1834 if (c & 0x80)
1836 p--;
1837 c = decodeUTF();
1838 if (c == LS || c == PS)
1839 { c = '\n';
1840 loc.linnum++;
1842 p++;
1843 stringbuffer.writeUTF8(c);
1844 continue;
1846 break;
1848 stringbuffer.writeByte(c);
1852 /**************************************
1855 TOK Lexer::charConstant(Token *t, int wide)
1857 unsigned c;
1858 TOK tk = TOKcharv;
1860 //printf("Lexer::charConstant\n");
1861 p++;
1862 c = *p++;
1863 switch (c)
1865 case '\\':
1866 switch (*p)
1868 case 'u':
1869 t->uns64value = escapeSequence();
1870 tk = TOKwcharv;
1871 break;
1873 case 'U':
1874 case '&':
1875 t->uns64value = escapeSequence();
1876 tk = TOKdcharv;
1877 break;
1879 default:
1880 t->uns64value = escapeSequence();
1881 break;
1883 break;
1885 case '\n':
1887 loc.linnum++;
1888 case '\r':
1889 case 0:
1890 case 0x1A:
1891 case '\'':
1892 error("unterminated character constant");
1893 return tk;
1895 default:
1896 if (c & 0x80)
1898 p--;
1899 c = decodeUTF();
1900 p++;
1901 if (c == LS || c == PS)
1902 goto L1;
1903 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1904 tk = TOKwcharv;
1905 else
1906 tk = TOKdcharv;
1908 t->uns64value = c;
1909 break;
1912 if (*p != '\'')
1913 { error("unterminated character constant");
1914 return tk;
1916 p++;
1917 return tk;
1920 /***************************************
1921 * Get postfix of string literal.
1924 void Lexer::stringPostfix(Token *t)
1926 switch (*p)
1928 case 'c':
1929 case 'w':
1930 case 'd':
1931 t->postfix = *p;
1932 p++;
1933 break;
1935 default:
1936 t->postfix = 0;
1937 break;
1941 /***************************************
1942 * Read \u or \U unicode sequence
1943 * Input:
1944 * u 'u' or 'U'
1947 #if 0
1948 unsigned Lexer::wchar(unsigned u)
1950 unsigned value;
1951 unsigned n;
1952 unsigned char c;
1953 unsigned nchars;
1955 nchars = (u == 'U') ? 8 : 4;
1956 value = 0;
1957 for (n = 0; 1; n++)
1959 ++p;
1960 if (n == nchars)
1961 break;
1962 c = *p;
1963 if (!ishex(c))
1964 { error("\\%c sequence must be followed by %d hex characters", u, nchars);
1965 break;
1967 if (isdigit(c))
1968 c -= '0';
1969 else if (islower(c))
1970 c -= 'a' - 10;
1971 else
1972 c -= 'A' - 10;
1973 value <<= 4;
1974 value |= c;
1976 return value;
1978 #endif
1980 /**************************************
1981 * Read in a number.
1982 * If it's an integer, store it in tok.TKutok.Vlong.
1983 * integers can be decimal, octal or hex
1984 * Handle the suffixes U, UL, LU, L, etc.
1985 * If it's double, store it in tok.TKutok.Vdouble.
1986 * Returns:
1987 * TKnum
1988 * TKdouble,...
1991 TOK Lexer::number(Token *t)
1993 // We use a state machine to collect numbers
1994 enum STATE { STATE_initial, STATE_0, STATE_decimal, STATE_octal, STATE_octale,
1995 STATE_hex, STATE_binary, STATE_hex0, STATE_binary0,
1996 STATE_hexh, STATE_error };
1997 enum STATE state;
1999 enum FLAGS
2000 { FLAGS_decimal = 1, // decimal
2001 FLAGS_unsigned = 2, // u or U suffix
2002 FLAGS_long = 4, // l or L suffix
2004 enum FLAGS flags = FLAGS_decimal;
2006 int i;
2007 int base;
2008 unsigned c;
2009 unsigned char *start;
2010 TOK result;
2012 //printf("Lexer::number()\n");
2013 state = STATE_initial;
2014 base = 0;
2015 stringbuffer.reset();
2016 start = p;
2017 while (1)
2019 c = *p;
2020 switch (state)
2022 case STATE_initial: // opening state
2023 if (c == '0')
2024 state = STATE_0;
2025 else
2026 state = STATE_decimal;
2027 break;
2029 case STATE_0:
2030 flags = (FLAGS) (flags & ~FLAGS_decimal);
2031 switch (c)
2033 #if ZEROH
2034 case 'H': // 0h
2035 case 'h':
2036 goto hexh;
2037 #endif
2038 case 'X':
2039 case 'x':
2040 state = STATE_hex0;
2041 break;
2043 case '.':
2044 if (p[1] == '.') // .. is a separate token
2045 goto done;
2046 case 'i':
2047 case 'f':
2048 case 'F':
2049 goto real;
2050 #if ZEROH
2051 case 'E':
2052 case 'e':
2053 goto case_hex;
2054 #endif
2055 case 'B':
2056 case 'b':
2057 state = STATE_binary0;
2058 break;
2060 case '0': case '1': case '2': case '3':
2061 case '4': case '5': case '6': case '7':
2062 state = STATE_octal;
2063 break;
2065 #if ZEROH
2066 case '8': case '9': case 'A':
2067 case 'C': case 'D': case 'F':
2068 case 'a': case 'c': case 'd': case 'f':
2069 case_hex:
2070 state = STATE_hexh;
2071 break;
2072 #endif
2073 case '_':
2074 state = STATE_octal;
2075 p++;
2076 continue;
2078 case 'L':
2079 if (p[1] == 'i')
2080 goto real;
2081 goto done;
2083 default:
2084 goto done;
2086 break;
2088 case STATE_decimal: // reading decimal number
2089 if (!isdigit(c))
2091 #if ZEROH
2092 if (ishex(c)
2093 || c == 'H' || c == 'h'
2095 goto hexh;
2096 #endif
2097 if (c == '_') // ignore embedded _
2098 { p++;
2099 continue;
2101 if (c == '.' && p[1] != '.')
2102 goto real;
2103 else if (c == 'i' || c == 'f' || c == 'F' ||
2104 c == 'e' || c == 'E')
2106 real: // It's a real number. Back up and rescan as a real
2107 p = start;
2108 return inreal(t);
2110 else if (c == 'L' && p[1] == 'i')
2111 goto real;
2112 goto done;
2114 break;
2116 case STATE_hex0: // reading hex number
2117 case STATE_hex:
2118 if (!ishex(c))
2120 if (c == '_') // ignore embedded _
2121 { p++;
2122 continue;
2124 if (c == '.' && p[1] != '.')
2125 goto real;
2126 if (c == 'P' || c == 'p' || c == 'i')
2127 goto real;
2128 if (state == STATE_hex0)
2129 error("Hex digit expected, not '%c'", c);
2130 goto done;
2132 state = STATE_hex;
2133 break;
2135 #if ZEROH
2136 hexh:
2137 state = STATE_hexh;
2138 case STATE_hexh: // parse numbers like 0FFh
2139 if (!ishex(c))
2141 if (c == 'H' || c == 'h')
2143 p++;
2144 base = 16;
2145 goto done;
2147 else
2149 // Check for something like 1E3 or 0E24
2150 if (memchr((char *)stringbuffer.data, 'E', stringbuffer.offset) ||
2151 memchr((char *)stringbuffer.data, 'e', stringbuffer.offset))
2152 goto real;
2153 error("Hex digit expected, not '%c'", c);
2154 goto done;
2157 break;
2158 #endif
2160 case STATE_octal: // reading octal number
2161 case STATE_octale: // reading octal number with non-octal digits
2162 if (!isoctal(c))
2164 #if ZEROH
2165 if (ishex(c)
2166 || c == 'H' || c == 'h'
2168 goto hexh;
2169 #endif
2170 if (c == '_') // ignore embedded _
2171 { p++;
2172 continue;
2174 if (c == '.' && p[1] != '.')
2175 goto real;
2176 if (c == 'i')
2177 goto real;
2178 if (isdigit(c))
2180 state = STATE_octale;
2182 else
2183 goto done;
2185 break;
2187 case STATE_binary0: // starting binary number
2188 case STATE_binary: // reading binary number
2189 if (c != '0' && c != '1')
2191 #if ZEROH
2192 if (ishex(c)
2193 || c == 'H' || c == 'h'
2195 goto hexh;
2196 #endif
2197 if (c == '_') // ignore embedded _
2198 { p++;
2199 continue;
2201 if (state == STATE_binary0)
2202 { error("binary digit expected");
2203 state = STATE_error;
2204 break;
2206 else
2207 goto done;
2209 state = STATE_binary;
2210 break;
2212 case STATE_error: // for error recovery
2213 if (!isdigit(c)) // scan until non-digit
2214 goto done;
2215 break;
2217 default:
2218 assert(0);
2220 stringbuffer.writeByte(c);
2221 p++;
2223 done:
2224 stringbuffer.writeByte(0); // terminate string
2225 if (state == STATE_octale)
2226 error("Octal digit expected");
2228 uinteger_t n; // unsigned >=64 bit integer type
2230 if (stringbuffer.offset == 2 && (state == STATE_decimal || state == STATE_0))
2231 n = stringbuffer.data[0] - '0';
2232 else
2234 // Convert string to integer
2235 #if __DMC__
2236 errno = 0;
2237 n = strtoull((char *)stringbuffer.data,NULL,base);
2238 if (errno == ERANGE)
2239 error("integer overflow");
2240 #else
2241 // Not everybody implements strtoull()
2242 char *p = (char *)stringbuffer.data;
2243 int r = 10, d;
2245 if (*p == '0')
2247 if (p[1] == 'x' || p[1] == 'X')
2248 p += 2, r = 16;
2249 else if (p[1] == 'b' || p[1] == 'B')
2250 p += 2, r = 2;
2251 else if (isdigit(p[1]))
2252 p += 1, r = 8;
2255 n = 0;
2256 while (1)
2258 if (*p >= '0' && *p <= '9')
2259 d = *p - '0';
2260 else if (*p >= 'a' && *p <= 'z')
2261 d = *p - 'a' + 10;
2262 else if (*p >= 'A' && *p <= 'Z')
2263 d = *p - 'A' + 10;
2264 else
2265 break;
2266 if (d >= r)
2267 break;
2268 if (n && n * r + d <= n)
2270 error ("integer overflow");
2271 break;
2274 n = n * r + d;
2275 p++;
2277 #endif
2278 if (sizeof(n) > 8 &&
2279 n > 0xFFFFFFFFFFFFFFFFULL) // if n needs more than 64 bits
2280 error("integer overflow");
2283 // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2284 while (1)
2285 { unsigned char f;
2287 switch (*p)
2288 { case 'U':
2289 case 'u':
2290 f = FLAGS_unsigned;
2291 goto L1;
2293 case 'l':
2294 if (1 || !global.params.useDeprecated)
2295 error("'l' suffix is deprecated, use 'L' instead");
2296 case 'L':
2297 f = FLAGS_long;
2299 p++;
2300 if (flags & f)
2301 error("unrecognized token");
2302 flags = (FLAGS) (flags | f);
2303 continue;
2304 default:
2305 break;
2307 break;
2310 switch (flags)
2312 case 0:
2313 /* Octal or Hexadecimal constant.
2314 * First that fits: int, uint, long, ulong
2316 if (n & 0x8000000000000000LL)
2317 result = TOKuns64v;
2318 else if (n & 0xFFFFFFFF00000000LL)
2319 result = TOKint64v;
2320 else if (n & 0x80000000)
2321 result = TOKuns32v;
2322 else
2323 result = TOKint32v;
2324 break;
2326 case FLAGS_decimal:
2327 /* First that fits: int, long, long long
2329 if (n & 0x8000000000000000LL)
2330 { error("signed integer overflow");
2331 result = TOKuns64v;
2333 else if (n & 0xFFFFFFFF80000000LL)
2334 result = TOKint64v;
2335 else
2336 result = TOKint32v;
2337 break;
2339 case FLAGS_unsigned:
2340 case FLAGS_decimal | FLAGS_unsigned:
2341 /* First that fits: uint, ulong
2343 if (n & 0xFFFFFFFF00000000LL)
2344 result = TOKuns64v;
2345 else
2346 result = TOKuns32v;
2347 break;
2349 case FLAGS_decimal | FLAGS_long:
2350 if (n & 0x8000000000000000LL)
2351 { error("signed integer overflow");
2352 result = TOKuns64v;
2354 else
2355 result = TOKint64v;
2356 break;
2358 case FLAGS_long:
2359 if (n & 0x8000000000000000LL)
2360 result = TOKuns64v;
2361 else
2362 result = TOKint64v;
2363 break;
2365 case FLAGS_unsigned | FLAGS_long:
2366 case FLAGS_decimal | FLAGS_unsigned | FLAGS_long:
2367 result = TOKuns64v;
2368 break;
2370 default:
2371 #ifdef DEBUG
2372 printf("%x\n",flags);
2373 #endif
2374 assert(0);
2376 t->uns64value = n;
2377 return result;
2380 /**************************************
2381 * Read in characters, converting them to real.
2382 * Bugs:
2383 * Exponent overflow not detected.
2384 * Too much requested precision is not detected.
2387 TOK Lexer::inreal(Token *t)
2388 #ifdef __DMC__
2389 __in
2391 assert(*p == '.' || isdigit(*p));
2393 __out (result)
2395 switch (result)
2397 case TOKfloat32v:
2398 case TOKfloat64v:
2399 case TOKfloat80v:
2400 case TOKimaginary32v:
2401 case TOKimaginary64v:
2402 case TOKimaginary80v:
2403 break;
2405 default:
2406 assert(0);
2409 __body
2410 #endif /* __DMC__ */
2411 { int dblstate;
2412 unsigned c;
2413 char hex; // is this a hexadecimal-floating-constant?
2414 TOK result;
2416 //printf("Lexer::inreal()\n");
2417 stringbuffer.reset();
2418 dblstate = 0;
2419 hex = 0;
2420 Lnext:
2421 while (1)
2423 // Get next char from input
2424 c = *p++;
2425 //printf("dblstate = %d, c = '%c'\n", dblstate, c);
2426 while (1)
2428 switch (dblstate)
2430 case 0: // opening state
2431 if (c == '0')
2432 dblstate = 9;
2433 else if (c == '.')
2434 dblstate = 3;
2435 else
2436 dblstate = 1;
2437 break;
2439 case 9:
2440 dblstate = 1;
2441 if (c == 'X' || c == 'x')
2442 { hex++;
2443 break;
2445 case 1: // digits to left of .
2446 case 3: // digits to right of .
2447 case 7: // continuing exponent digits
2448 if (!isdigit(c) && !(hex && isxdigit(c)))
2450 if (c == '_')
2451 goto Lnext; // ignore embedded '_'
2452 dblstate++;
2453 continue;
2455 break;
2457 case 2: // no more digits to left of .
2458 if (c == '.')
2459 { dblstate++;
2460 break;
2462 case 4: // no more digits to right of .
2463 if ((c == 'E' || c == 'e') ||
2464 hex && (c == 'P' || c == 'p'))
2465 { dblstate = 5;
2466 hex = 0; // exponent is always decimal
2467 break;
2469 if (hex)
2470 error("binary-exponent-part required");
2471 goto done;
2473 case 5: // looking immediately to right of E
2474 dblstate++;
2475 if (c == '-' || c == '+')
2476 break;
2477 case 6: // 1st exponent digit expected
2478 if (!isdigit(c))
2479 error("exponent expected");
2480 dblstate++;
2481 break;
2483 case 8: // past end of exponent digits
2484 goto done;
2486 break;
2488 stringbuffer.writeByte(c);
2490 done:
2491 p--;
2493 stringbuffer.writeByte(0);
2495 #if _WIN32 && __DMC__
2496 char *save = __locale_decpoint;
2497 __locale_decpoint = ".";
2498 #endif
2499 #ifdef IN_GCC
2500 t->float80value = real_t::parse((char *)stringbuffer.data, real_t::LongDouble);
2501 #else
2502 t->float80value = strtold((char *)stringbuffer.data, NULL);
2503 #endif
2504 errno = 0;
2505 switch (*p)
2507 case 'F':
2508 case 'f':
2509 #ifdef IN_GCC
2510 real_t::parse((char *)stringbuffer.data, real_t::Float);
2511 #else
2512 strtof((char *)stringbuffer.data, NULL);
2513 #endif
2514 result = TOKfloat32v;
2515 p++;
2516 break;
2518 default:
2519 #ifdef IN_GCC
2520 real_t::parse((char *)stringbuffer.data, real_t::Double);
2521 #else
2522 strtod((char *)stringbuffer.data, NULL);
2523 #endif
2524 result = TOKfloat64v;
2525 break;
2527 case 'l':
2528 if (!global.params.useDeprecated)
2529 error("'l' suffix is deprecated, use 'L' instead");
2530 case 'L':
2531 result = TOKfloat80v;
2532 p++;
2533 break;
2535 if (*p == 'i' || *p == 'I')
2537 if (!global.params.useDeprecated && *p == 'I')
2538 error("'I' suffix is deprecated, use 'i' instead");
2539 p++;
2540 switch (result)
2542 case TOKfloat32v:
2543 result = TOKimaginary32v;
2544 break;
2545 case TOKfloat64v:
2546 result = TOKimaginary64v;
2547 break;
2548 case TOKfloat80v:
2549 result = TOKimaginary80v;
2550 break;
2553 #if _WIN32 && __DMC__
2554 __locale_decpoint = save;
2555 #endif
2556 if (errno == ERANGE)
2557 error("number is not representable");
2558 return result;
2561 /*********************************************
2562 * Do pragma.
2563 * Currently, the only pragma supported is:
2564 * #line linnum [filespec]
2567 void Lexer::pragma()
2569 Token tok;
2570 int linnum;
2571 char *filespec = NULL;
2572 Loc loc = this->loc;
2574 while (isblank(*p)) p++;
2575 if (*p == '\n')
2576 goto Lerr;
2578 scan(&tok);
2579 if (tok.value != TOKidentifier || tok.ident != Id::line)
2580 goto Lerr;
2582 scan(&tok);
2583 if (tok.value == TOKint32v || tok.value == TOKint64v)
2584 linnum = tok.uns64value - 1;
2585 else
2586 goto Lerr;
2588 while (1)
2590 switch (*p)
2592 case 0:
2593 case 0x1A:
2594 case '\n':
2595 Lnewline:
2596 this->loc.linnum = linnum;
2597 if (filespec)
2598 this->loc.filename = filespec;
2599 return;
2601 case '\r':
2602 p++;
2603 if (*p != '\n')
2604 { p--;
2605 goto Lnewline;
2607 continue;
2609 case ' ':
2610 case '\t':
2611 case '\v':
2612 case '\f':
2613 p++;
2614 continue; // skip white space
2616 case '_':
2617 if (mod && memcmp(p, "__FILE__", 8) == 0)
2619 p += 8;
2620 filespec = mem.strdup(loc.filename ? loc.filename : mod->ident->toChars());
2622 continue;
2624 case '"':
2625 if (filespec)
2626 goto Lerr;
2627 stringbuffer.reset();
2628 p++;
2629 while (1)
2630 { unsigned c;
2632 c = *p;
2633 switch (c)
2635 case '\n':
2636 case '\r':
2637 case 0:
2638 case 0x1A:
2639 goto Lerr;
2641 case '"':
2642 stringbuffer.writeByte(0);
2643 filespec = mem.strdup((char *)stringbuffer.data);
2644 p++;
2645 break;
2647 default:
2648 if (c & 0x80)
2649 { unsigned u = decodeUTF();
2650 if (u == PS || u == LS)
2651 goto Lerr;
2653 stringbuffer.writeByte(c);
2654 p++;
2655 continue;
2657 break;
2659 continue;
2661 default:
2662 if (*p & 0x80)
2663 { unsigned u = decodeUTF();
2664 if (u == PS || u == LS)
2665 goto Lnewline;
2667 goto Lerr;
2671 Lerr:
2672 error(loc, "#line integer [\"filespec\"]\\n expected");
2676 /********************************************
2677 * Decode UTF character.
2678 * Issue error messages for invalid sequences.
2679 * Return decoded character, advance p to last character in UTF sequence.
2682 unsigned Lexer::decodeUTF()
2684 dchar_t u;
2685 unsigned char c;
2686 unsigned char *s = p;
2687 size_t len;
2688 size_t idx;
2689 char *msg;
2691 c = *s;
2692 assert(c & 0x80);
2694 // Check length of remaining string up to 6 UTF-8 characters
2695 for (len = 1; len < 6 && s[len]; len++)
2698 idx = 0;
2699 msg = utf_decodeChar(s, len, &idx, &u);
2700 p += idx - 1;
2701 if (msg)
2703 error("%s", msg);
2705 return u;
2709 /***************************************************
2710 * Parse doc comment embedded between t->ptr and p.
2711 * Remove trailing blanks and tabs from lines.
2712 * Replace all newlines with \n.
2713 * Remove leading comment character from each line.
2714 * Decide if it's a lineComment or a blockComment.
2715 * Append to previous one for this token.
2718 void Lexer::getDocComment(Token *t, unsigned lineComment)
2720 OutBuffer buf;
2721 unsigned char ct = t->ptr[2];
2722 unsigned char *q = t->ptr + 3; // start of comment text
2723 int linestart = 0;
2725 unsigned char *qend = p;
2726 if (ct == '*' || ct == '+')
2727 qend -= 2;
2729 /* Scan over initial row of ****'s or ++++'s or ////'s
2731 for (; q < qend; q++)
2733 if (*q != ct)
2734 break;
2737 /* Remove trailing row of ****'s or ++++'s
2739 if (ct != '/')
2741 for (; q < qend; qend--)
2743 if (qend[-1] != ct)
2744 break;
2748 for (; q < qend; q++)
2750 unsigned char c = *q;
2752 switch (c)
2754 case '*':
2755 case '+':
2756 if (linestart && c == ct)
2757 { linestart = 0;
2758 /* Trim preceding whitespace up to preceding \n
2760 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2761 buf.offset--;
2762 continue;
2764 break;
2766 case ' ':
2767 case '\t':
2768 break;
2770 case '\r':
2771 if (q[1] == '\n')
2772 continue; // skip the \r
2773 goto Lnewline;
2775 default:
2776 if (c == 226)
2778 // If LS or PS
2779 if (q[1] == 128 &&
2780 (q[2] == 168 || q[2] == 169))
2782 q += 2;
2783 goto Lnewline;
2786 linestart = 0;
2787 break;
2789 Lnewline:
2790 c = '\n'; // replace all newlines with \n
2791 case '\n':
2792 linestart = 1;
2794 /* Trim trailing whitespace
2796 while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t'))
2797 buf.offset--;
2799 break;
2801 buf.writeByte(c);
2804 // Always end with a newline
2805 if (!buf.offset || buf.data[buf.offset - 1] != '\n')
2806 buf.writeByte('\n');
2808 buf.writeByte(0);
2810 // It's a line comment if the start of the doc comment comes
2811 // after other non-whitespace on the same line.
2812 unsigned char** dc = (lineComment && anyToken)
2813 ? &t->lineComment
2814 : &t->blockComment;
2816 // Combine with previous doc comment, if any
2817 if (*dc)
2818 *dc = combineComments(*dc, (unsigned char *)buf.data);
2819 else
2820 *dc = (unsigned char *)buf.extractData();
2823 /********************************************
2824 * Combine two document comments into one.
2827 unsigned char *Lexer::combineComments(unsigned char *c1, unsigned char *c2)
2829 unsigned char *c = c2;
2831 if (c1)
2832 { c = c1;
2833 if (c2)
2834 { size_t len1 = strlen((char *)c1);
2835 size_t len2 = strlen((char *)c2);
2837 c = (unsigned char *)mem.malloc(len1 + 1 + len2 + 1);
2838 memcpy(c, c1, len1);
2839 c[len1] = '\n';
2840 memcpy(c + len1 + 1, c2, len2);
2841 c[len1 + 1 + len2] = 0;
2844 return c;
2847 /********************************************
2848 * Create an identifier in the string table.
2851 Identifier *Lexer::idPool(const char *s)
2853 size_t len = strlen(s);
2854 StringValue *sv = stringtable.update(s, len);
2855 Identifier *id = (Identifier *) sv->ptrvalue;
2856 if (!id)
2858 id = new Identifier(sv->lstring.string, TOKidentifier);
2859 sv->ptrvalue = id;
2861 return id;
2864 /*********************************************
2865 * Create a unique identifier using the prefix s.
2868 Identifier *Lexer::uniqueId(const char *s, int num)
2869 { char buffer[32];
2870 size_t slen = strlen(s);
2872 assert(slen + sizeof(num) * 3 + 1 <= sizeof(buffer));
2873 sprintf(buffer, "%s%d", s, num);
2874 return idPool(buffer);
2877 Identifier *Lexer::uniqueId(const char *s)
2879 static int num;
2880 return uniqueId(s, ++num);
2883 /****************************************
2886 struct Keyword
2887 { char *name;
2888 enum TOK value;
2891 static Keyword keywords[] =
2893 // { "", TOK },
2895 { "this", TOKthis },
2896 { "super", TOKsuper },
2897 { "assert", TOKassert },
2898 { "null", TOKnull },
2899 { "true", TOKtrue },
2900 { "false", TOKfalse },
2901 { "cast", TOKcast },
2902 { "new", TOKnew },
2903 { "delete", TOKdelete },
2904 { "throw", TOKthrow },
2905 { "module", TOKmodule },
2906 { "pragma", TOKpragma },
2907 { "typeof", TOKtypeof },
2908 { "typeid", TOKtypeid },
2910 { "template", TOKtemplate },
2912 { "void", TOKvoid },
2913 { "byte", TOKint8 },
2914 { "ubyte", TOKuns8 },
2915 { "short", TOKint16 },
2916 { "ushort", TOKuns16 },
2917 { "int", TOKint32 },
2918 { "uint", TOKuns32 },
2919 { "long", TOKint64 },
2920 { "ulong", TOKuns64 },
2921 { "cent", TOKcent, },
2922 { "ucent", TOKucent, },
2923 { "float", TOKfloat32 },
2924 { "double", TOKfloat64 },
2925 { "real", TOKfloat80 },
2927 { "bool", TOKbool },
2928 { "char", TOKchar },
2929 { "wchar", TOKwchar },
2930 { "dchar", TOKdchar },
2932 { "ifloat", TOKimaginary32 },
2933 { "idouble", TOKimaginary64 },
2934 { "ireal", TOKimaginary80 },
2936 { "cfloat", TOKcomplex32 },
2937 { "cdouble", TOKcomplex64 },
2938 { "creal", TOKcomplex80 },
2940 { "delegate", TOKdelegate },
2941 { "function", TOKfunction },
2943 { "is", TOKis },
2944 { "if", TOKif },
2945 { "else", TOKelse },
2946 { "while", TOKwhile },
2947 { "for", TOKfor },
2948 { "do", TOKdo },
2949 { "switch", TOKswitch },
2950 { "case", TOKcase },
2951 { "default", TOKdefault },
2952 { "break", TOKbreak },
2953 { "continue", TOKcontinue },
2954 { "synchronized", TOKsynchronized },
2955 { "return", TOKreturn },
2956 { "goto", TOKgoto },
2957 { "try", TOKtry },
2958 { "catch", TOKcatch },
2959 { "finally", TOKfinally },
2960 { "with", TOKwith },
2961 { "asm", TOKasm },
2962 { "foreach", TOKforeach },
2963 { "foreach_reverse", TOKforeach_reverse },
2964 { "reversed", TOKreversed },
2965 { "scope", TOKscope },
2967 { "struct", TOKstruct },
2968 { "class", TOKclass },
2969 { "interface", TOKinterface },
2970 { "union", TOKunion },
2971 { "enum", TOKenum },
2972 { "import", TOKimport },
2973 { "mixin", TOKmixin },
2974 { "static", TOKstatic },
2975 { "final", TOKfinal },
2976 { "const", TOKconst },
2977 { "typedef", TOKtypedef },
2978 { "alias", TOKalias },
2979 { "override", TOKoverride },
2980 { "abstract", TOKabstract },
2981 { "volatile", TOKvolatile },
2982 { "debug", TOKdebug },
2983 { "deprecated", TOKdeprecated },
2984 { "in", TOKin },
2985 { "out", TOKout },
2986 { "inout", TOKinout },
2987 { "lazy", TOKlazy },
2988 { "auto", TOKauto },
2990 { "align", TOKalign },
2991 { "extern", TOKextern },
2992 { "private", TOKprivate },
2993 { "package", TOKpackage },
2994 { "protected", TOKprotected },
2995 { "public", TOKpublic },
2996 { "export", TOKexport },
2998 { "body", TOKbody },
2999 { "invariant", TOKinvariant },
3000 { "unittest", TOKunittest },
3001 { "version", TOKversion },
3002 //{ "manifest", TOKmanifest },
3004 // Added after 1.0
3005 { "ref", TOKref },
3006 { "macro", TOKmacro },
3009 // TAL
3010 { "and", TOKandand },
3011 { "or", TOKoror },
3012 { "not", TOKnot },
3013 { "extends", TOKextends },
3014 { "implements", TOKimplements },
3015 { "log_error", TOKlog_error },
3016 { "log_warning", TOKlog_warning },
3017 { "log_info", TOKlog_info },
3018 { "log_trace", TOKlog_trace },
3019 #if V2
3020 { "pure", TOKpure },
3021 { "nothrow", TOKnothrow },
3022 { "__traits", TOKtraits },
3023 { "__overloadset", TOKoverloadset },
3024 #endif
3027 int Token::isKeyword()
3029 for (unsigned u = 0; u < sizeof(keywords) / sizeof(keywords[0]); u++)
3031 if (keywords[u].value == value)
3032 return 1;
3034 return 0;
3037 void Lexer::initKeywords()
3038 { StringValue *sv;
3039 unsigned u;
3040 enum TOK v;
3041 unsigned nkeywords = sizeof(keywords) / sizeof(keywords[0]);
3043 if (global.params.Dversion == 1)
3044 nkeywords -= 2;
3046 cmtable_init();
3048 for (u = 0; u < nkeywords; u++)
3049 { char *s;
3051 //printf("keyword[%d] = '%s'\n",u, keywords[u].name);
3052 s = keywords[u].name;
3053 v = keywords[u].value;
3054 sv = stringtable.insert(s, strlen(s));
3055 sv->ptrvalue = (void *) new Identifier(sv->lstring.string,v);
3057 //printf("tochars[%d] = '%s'\n",v, s);
3058 Token::tochars[v] = s;
3061 Token::tochars[TOKeof] = "EOF";
3062 Token::tochars[TOKlcurly] = "{";
3063 Token::tochars[TOKrcurly] = "}";
3064 Token::tochars[TOKlparen] = "(";
3065 Token::tochars[TOKrparen] = ")";
3066 Token::tochars[TOKlbracket] = "[";
3067 Token::tochars[TOKrbracket] = "]";
3068 Token::tochars[TOKsemicolon] = ";";
3069 Token::tochars[TOKcolon] = ":";
3070 Token::tochars[TOKcomma] = ",";
3071 Token::tochars[TOKdot] = ".";
3072 Token::tochars[TOKxor] = "^";
3073 Token::tochars[TOKxorass] = "^=";
3074 Token::tochars[TOKassign] = "=";
3075 Token::tochars[TOKconstruct] = "=";
3076 #if V2
3077 Token::tochars[TOKblit] = "=";
3078 #endif
3079 Token::tochars[TOKlt] = "<";
3080 Token::tochars[TOKgt] = ">";
3081 Token::tochars[TOKle] = "<=";
3082 Token::tochars[TOKge] = ">=";
3083 Token::tochars[TOKequal] = "==";
3084 Token::tochars[TOKnotequal] = "!=";
3085 Token::tochars[TOKnotidentity] = "!is";
3086 Token::tochars[TOKtobool] = "!!";
3087 Token::tochars[TOKat] = "@";
3089 Token::tochars[TOKunord] = "!<>=";
3090 Token::tochars[TOKue] = "!<>";
3091 Token::tochars[TOKlg] = "<>";
3092 Token::tochars[TOKleg] = "<>=";
3093 Token::tochars[TOKule] = "!>";
3094 Token::tochars[TOKul] = "!>=";
3095 Token::tochars[TOKuge] = "!<";
3096 Token::tochars[TOKug] = "!<=";
3098 Token::tochars[TOKnot] = "!";
3099 Token::tochars[TOKtobool] = "!!";
3100 Token::tochars[TOKshl] = "<<";
3101 Token::tochars[TOKshr] = ">>";
3102 Token::tochars[TOKushr] = ">>>";
3103 Token::tochars[TOKadd] = "+";
3104 Token::tochars[TOKmin] = "-";
3105 Token::tochars[TOKmul] = "*";
3106 Token::tochars[TOKdiv] = "/";
3107 Token::tochars[TOKmod] = "%";
3108 Token::tochars[TOKslice] = "..";
3109 Token::tochars[TOKdotdotdot] = "...";
3110 Token::tochars[TOKand] = "&";
3111 Token::tochars[TOKandand] = "&&";
3112 Token::tochars[TOKor] = "|";
3113 Token::tochars[TOKoror] = "||";
3114 Token::tochars[TOKarray] = "[]";
3115 Token::tochars[TOKindex] = "[i]";
3116 Token::tochars[TOKaddress] = "&";
3117 Token::tochars[TOKstar] = "*";
3118 Token::tochars[TOKtilde] = "~";
3119 Token::tochars[TOKdollar] = "$";
3120 Token::tochars[TOKcast] = "cast";
3121 Token::tochars[TOKplusplus] = "++";
3122 Token::tochars[TOKminusminus] = "--";
3123 Token::tochars[TOKtype] = "type";
3124 Token::tochars[TOKquestion] = "?";
3125 Token::tochars[TOKneg] = "-";
3126 Token::tochars[TOKuadd] = "+";
3127 Token::tochars[TOKvar] = "var";
3128 Token::tochars[TOKaddass] = "+=";
3129 Token::tochars[TOKminass] = "-=";
3130 Token::tochars[TOKmulass] = "*=";
3131 Token::tochars[TOKdivass] = "/=";
3132 Token::tochars[TOKmodass] = "%=";
3133 Token::tochars[TOKshlass] = "<<=";
3134 Token::tochars[TOKshrass] = ">>=";
3135 Token::tochars[TOKushrass] = ">>>=";
3136 Token::tochars[TOKandass] = "&=";
3137 Token::tochars[TOKorass] = "|=";
3138 Token::tochars[TOKcatass] = "~=";
3139 Token::tochars[TOKcat] = "~";
3140 Token::tochars[TOKcall] = "call";
3141 Token::tochars[TOKidentity] = "is";
3142 Token::tochars[TOKnotidentity] = "!is";
3143 Token::tochars[TOKendline] = "\\n";
3145 Token::tochars[TOKorass] = "|=";
3146 Token::tochars[TOKidentifier] = "identifier";
3148 // For debugging
3149 Token::tochars[TOKdotexp] = "dotexp";
3150 Token::tochars[TOKdotti] = "dotti";
3151 Token::tochars[TOKdotvar] = "dotvar";
3152 Token::tochars[TOKdottype] = "dottype";
3153 Token::tochars[TOKsymoff] = "symoff";
3154 Token::tochars[TOKtypedot] = "typedot";
3155 Token::tochars[TOKarraylength] = "arraylength";
3156 Token::tochars[TOKarrayliteral] = "arrayliteral";
3157 Token::tochars[TOKassocarrayliteral] = "assocarrayliteral";
3158 Token::tochars[TOKstructliteral] = "structliteral";
3159 Token::tochars[TOKstring] = "string";
3160 Token::tochars[TOKdsymbol] = "symbol";
3161 Token::tochars[TOKtuple] = "tuple";
3162 Token::tochars[TOKdeclaration] = "declaration";
3163 Token::tochars[TOKdottd] = "dottd";
3164 Token::tochars[TOKlogger] = "logger";
3165 Token::tochars[TOKon_scope_exit] = "scope(exit)";