2 * Defines lexical tokens.
4 * Specification: $(LINK2 https://dlang.org/spec/lex.html#tokens, Tokens)
6 * Copyright: Copyright (C) 1999-2024 by The D Language Foundation, All Rights Reserved
7 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright)
8 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/tokens.d, _tokens.d)
10 * Documentation: https://dlang.org/phobos/dmd_tokens.html
11 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/tokens.d
16 import core
.stdc
.ctype
;
17 import core
.stdc
.stdio
;
18 import core
.stdc
.string
;
19 import dmd
.identifier
;
21 import dmd
.root
.ctfloat
;
22 import dmd
.common
.outbuffer
;
76 unsignedRightShiftAssign
,
77 concatenateAssign
, // ~=
236 moduleString
, // __MODULE__
237 functionString
, // __FUNCTION__
238 prettyFunction
, // __PRETTY_FUNCTION__
250 endOfLine
, // \n, \r, \u2028, \u2029
273 // C only extended keywords
313 dotTemplateDeclaration
,
326 delegateFunctionPointer
,
345 unsignedRightShiftAssign
,
347 concatenateAssign
, // ~=
348 concatenateElemAssign
,
349 concatenateDcharAssign
,
411 moduleString
, // __MODULE__
412 functionString
, // __FUNCTION__
413 prettyFunction
, // __PRETTY_FUNCTION__
423 compoundLiteral
, // ( type-name ) { initializer-list }
430 enum FirstCKeyword
= TOK
.inline
;
432 // Assert that all token enum members have consecutive values and
433 // that none of them overlap
435 foreach (idx
, enumName
; __traits(allMembers
, TOK
)) {
436 static if (idx
!= __traits(getMember
, TOK
, enumName
)) {
437 pragma(msg
, "Error: Expected TOK.", enumName
, " to be ", idx
, " but is ", __traits(getMember
, TOK
, enumName
));
444 /****************************************
447 private immutable TOK
[] keywords
=
510 TOK
.foreach_reverse_
,
580 // C only extended keywords
592 // Initialize the identifier pool
593 shared static this() nothrow
595 Identifier
.initTable();
596 foreach (kw
; keywords
)
598 //printf("keyword[%d] = '%s'\n",kw, Token.tochars[kw].ptr);
599 Identifier
.idPool(Token
.tochars
[kw
], kw
);
603 /************************************
604 * This is used to pick the C keywords out of the tokens.
605 * If it's not a C keyword, then it's an identifier.
607 static immutable TOK
[TOK
.max
+ 1] Ckeywords
=
611 TOK
[TOK
.max
+ 1] tab
= identifier
; // default to identifier
612 enum Ckwds
= [ auto_
, break_
, case_
, char_
, const_
, continue_
, default_
, do_
, float64
, else_
,
613 enum_
, extern_
, float32
, for_
, goto_
, if_
, inline
, int32
, int64
, register
,
614 restrict
, return_
, int16
, signed
, sizeof_
, static_
, struct_
, switch_
, typedef_
,
615 union_
, unsigned
, void_
, volatile, while_
, asm_
, typeof_
,
616 _Alignas
, _Alignof
, _Atomic
, _Bool
, _Complex
, _Generic
, _Imaginary
, _Noreturn
,
617 _Static_assert
, _Thread_local
,
618 _import
, __cdecl
, __declspec
, __stdcall
, __thread
, __pragma
, __int128
, __attribute__
,
622 tab
[kw
] = cast(TOK
) kw
;
628 struct InterpolatedSet
{
629 // all strings in the parts are zero terminated at length+1
633 /***********************************************************
635 extern (C
++) struct Token
639 const(char)* ptr
; // pointer to first character of this token within buffer
641 const(char)[] blockComment
; // doc comment string prior to this token
642 const(char)[] lineComment
; // doc comment for previous token
656 const(char)* ustring
; // UTF8 string
657 InterpolatedSet
* interpolatedSet
;
660 ubyte postfix
; // 'c', 'w', 'd'
666 extern (D
) private static immutable string
[TOK
.max
+ 1] tochars
=
671 TOK
.assert_
: "assert",
677 TOK
.delete_
: "delete",
679 TOK
.module_
: "module",
680 TOK
.pragma_
: "pragma",
681 TOK
.typeof_
: "typeof",
682 TOK
.typeid_
: "typeid",
683 TOK
.template_
: "template",
695 TOK
.float32
: "float",
696 TOK
.float64
: "double",
702 TOK
.imaginary32
: "ifloat",
703 TOK
.imaginary64
: "idouble",
704 TOK
.imaginary80
: "ireal",
705 TOK
.complex32
: "cfloat",
706 TOK
.complex64
: "cdouble",
707 TOK
.complex80
: "creal",
708 TOK
.delegate_
: "delegate",
709 TOK
.function_
: "function",
716 TOK
.switch_
: "switch",
718 TOK
.default_
: "default",
720 TOK
.continue_
: "continue",
721 TOK
.synchronized_
: "synchronized",
722 TOK
.return_
: "return",
726 TOK
.finally_
: "finally",
729 TOK
.foreach_
: "foreach",
730 TOK
.foreach_reverse_
: "foreach_reverse",
732 TOK
.struct_
: "struct",
734 TOK
.interface_
: "interface",
737 TOK
.import_
: "import",
739 TOK
.static_
: "static",
743 TOK
.override_
: "override",
744 TOK
.abstract_
: "abstract",
746 TOK
.deprecated_
: "deprecated",
753 TOK
.extern_
: "extern",
754 TOK
.private_
: "private",
755 TOK
.package_
: "package",
756 TOK
.protected_
: "protected",
757 TOK
.public_
: "public",
758 TOK
.export_
: "export",
759 TOK
.invariant_
: "invariant",
760 TOK
.unittest_
: "unittest",
761 TOK
.version_
: "version",
762 TOK
.argumentTypes
: "__argTypes",
763 TOK
.parameters
: "__parameters",
767 TOK
.nothrow_
: "nothrow",
768 TOK
.gshared
: "__gshared",
769 TOK
.traits
: "__traits",
770 TOK
.vector
: "__vector",
771 TOK
.file
: "__FILE__",
772 TOK
.fileFullPath
: "__FILE_FULL_PATH__",
773 TOK
.line
: "__LINE__",
774 TOK
.moduleString
: "__MODULE__",
775 TOK
.functionString
: "__FUNCTION__",
776 TOK
.prettyFunction
: "__PRETTY_FUNCTION__",
777 TOK
.shared_
: "shared",
778 TOK
.immutable_
: "immutable",
780 TOK
.endOfFile
: "End of File",
783 TOK
.leftParenthesis
: "(",
784 TOK
.rightParenthesis
: ")",
785 TOK
.leftBracket
: "[",
786 TOK
.rightBracket
: "]",
795 TOK
.greaterThan
: ">",
796 TOK
.lessOrEqual
: "<=",
797 TOK
.greaterOrEqual
: ">=",
802 TOK
.rightShift
: ">>",
803 TOK
.unsignedRightShift
: ">>>",
810 TOK
.dotDotDot
: "...",
818 TOK
.minusMinus
: "--",
826 TOK
.leftShiftAssign
: "<<=",
827 TOK
.rightShiftAssign
: ">>=",
828 TOK
.unsignedRightShiftAssign
: ">>>=",
831 TOK
.concatenateAssign
: "~=",
833 TOK
.notIdentity
: "!is",
834 TOK
.identifier
: "identifier",
837 TOK
.powAssign
: "^^=",
841 TOK
.colonColon
: "::",
845 TOK
.string_
: "string",
846 TOK
.interpolated
: "interpolated string",
847 TOK
.onScopeExit
: "scope(exit)",
848 TOK
.onScopeSuccess
: "scope(success)",
849 TOK
.onScopeFailure
: "scope(failure)",
852 TOK
.reserved
: "reserved",
853 TOK
.comment
: "comment",
854 TOK
.int32Literal
: "int32v",
855 TOK
.uns32Literal
: "uns32v",
856 TOK
.int64Literal
: "int64v",
857 TOK
.uns64Literal
: "uns64v",
858 TOK
.int128Literal
: "int128v",
859 TOK
.uns128Literal
: "uns128v",
860 TOK
.float32Literal
: "float32v",
861 TOK
.float64Literal
: "float64v",
862 TOK
.float80Literal
: "float80v",
863 TOK
.imaginary32Literal
: "imaginary32v",
864 TOK
.imaginary64Literal
: "imaginary64v",
865 TOK
.imaginary80Literal
: "imaginary80v",
866 TOK
.charLiteral
: "charv",
867 TOK
.wcharLiteral
: "wcharv",
868 TOK
.dcharLiteral
: "dcharv",
869 TOK
.wchar_tLiteral
: "wchar_tv",
870 TOK
.hexadecimalString
: "xstring",
871 TOK
.endOfLine
: "\\n",
872 TOK
.whitespace
: "whitespace",
875 TOK
.inline
: "inline",
876 TOK
.register
: "register",
877 TOK
.restrict
: "restrict",
878 TOK
.signed
: "signed",
879 TOK
.sizeof_
: "sizeof",
880 TOK
.typedef_
: "typedef",
881 TOK
.unsigned
: "unsigned",
882 TOK
.volatile : "volatile",
883 TOK
._Alignas
: "_Alignas",
884 TOK
._Alignof
: "_Alignof",
885 TOK
._Atomic
: "_Atomic",
887 TOK
._Complex
: "_Complex",
888 TOK
._Generic
: "_Generic",
889 TOK
._Imaginary
: "_Imaginary",
890 TOK
._Noreturn
: "_Noreturn",
891 TOK
._Static_assert
: "_Static_assert",
892 TOK
._Thread_local
: "_Thread_local",
894 // C only extended keywords
895 TOK
._assert
: "__check",
896 TOK
._import
: "__import",
897 TOK
.__cdecl
: "__cdecl",
898 TOK
.__declspec
: "__declspec",
899 TOK
.__stdcall
: "__stdcall",
900 TOK
.__thread
: "__thread",
901 TOK
.__pragma
: "__pragma",
902 TOK
.__int128
: "__int128",
903 TOK
.__attribute__
: "__attribute__",
914 extern (D
) int isKeyword() pure const @safe @nogc
916 foreach (kw
; keywords
)
924 extern(D
) void appendInterpolatedPart(const ref OutBuffer buf
) {
925 appendInterpolatedPart(cast(const(char)*)buf
[].ptr
, buf
.length
);
927 extern(D
) void appendInterpolatedPart(const(char)[] str) {
928 appendInterpolatedPart(str.ptr
, str.length
);
930 extern(D
) void appendInterpolatedPart(const(char)* ptr
, size_t length
) {
931 assert(value
== TOK
.interpolated
);
932 if (interpolatedSet
is null)
933 interpolatedSet
= new InterpolatedSet
;
935 auto s
= cast(char*)mem
.xmalloc_noscan(length
+ 1);
936 memcpy(s
, ptr
, length
);
939 interpolatedSet
.parts
~= cast(string
) s
[0 .. length
];
943 * Set to contents of ptr[0..length]
945 * ptr = pointer to string
946 * length = length of string
948 void setString(const(char)* ptr
, size_t length
)
951 auto s
= cast(char*)mem
.xmalloc_noscan(length
+ 1);
952 memcpy(s
, ptr
, length
);
955 len
= cast(uint)length
;
960 * Set to contents of buf
962 * buf = string (not zero terminated)
964 void setString(const ref OutBuffer buf
)
966 setString(cast(const(char)*)buf
[].ptr
, buf
.length
);
970 * Set to empty string
980 extern (C
++) const(char)* toChars() const
982 return toString().ptr
;
985 /*********************************
987 * a zero-terminated string representation of the token,
988 * sometimes reusing a static buffer, sometimes leaking memory
990 extern (D
) const(char)[] toString() const
992 const bufflen
= 3 + 3 * floatvalue
.sizeof
+ 1;
993 __gshared
char[bufflen
+ 2] buffer
; // extra 2 for suffixes
994 char* p
= &buffer
[0];
997 case TOK
.int32Literal
:
998 const length
= snprintf(p
, bufflen
, "%d", cast(int)intvalue
);
999 return p
[0 .. length
];
1001 case TOK
.uns32Literal
:
1002 case TOK
.wchar_tLiteral
:
1003 const length
= snprintf(p
, bufflen
, "%uU", cast(uint)unsvalue
);
1004 return p
[0 .. length
];
1006 case TOK
.wcharLiteral
:
1007 case TOK
.dcharLiteral
:
1008 case TOK
.charLiteral
:
1010 buf
.writeSingleCharLiteral(cast(dchar) intvalue
);
1011 return buf
.extractSlice(true);
1013 case TOK
.int64Literal
:
1014 const length
= snprintf(p
, bufflen
, "%lldL", cast(long)intvalue
);
1015 return p
[0 .. length
];
1017 case TOK
.uns64Literal
:
1018 const length
= snprintf(p
, bufflen
, "%lluUL", cast(ulong)unsvalue
);
1019 return p
[0 .. length
];
1021 case TOK
.float32Literal
:
1022 const length
= CTFloat
.sprint(p
, bufflen
, 'g', floatvalue
);
1025 return p
[0 .. length
+ 1];
1027 case TOK
.float64Literal
:
1028 const length
= CTFloat
.sprint(p
, bufflen
, 'g', floatvalue
);
1029 return p
[0 .. length
];
1031 case TOK
.float80Literal
:
1032 const length
= CTFloat
.sprint(p
, bufflen
, 'g', floatvalue
);
1035 return p
[0 .. length
+ 1];
1037 case TOK
.imaginary32Literal
:
1038 const length
= CTFloat
.sprint(p
, bufflen
, 'g', floatvalue
);
1040 p
[length
+ 1] = 'i';
1042 return p
[0 .. length
+ 2];
1044 case TOK
.imaginary64Literal
:
1045 const length
= CTFloat
.sprint(p
, bufflen
, 'g', floatvalue
);
1048 return p
[0 .. length
+ 1];
1050 case TOK
.imaginary80Literal
:
1051 const length
= CTFloat
.sprint(p
, bufflen
, 'g', floatvalue
);
1053 p
[length
+ 1] = 'i';
1055 return p
[0 .. length
+ 2];
1060 for (size_t i
= 0; i
< len
;)
1063 utf_decodeChar(ustring
[0 .. len
], i
, c
);
1064 writeCharLiteral(buf
, c
);
1068 buf
.writeByte(postfix
);
1069 return buf
.extractSlice(true);
1071 case TOK
.hexadecimalString
:
1075 foreach (size_t i
; 0 .. len
)
1079 buf
.printf("%02x", ustring
[i
]);
1083 buf
.writeByte(postfix
);
1084 return buf
.extractSlice(true);
1086 case TOK
.identifier
:
1107 case TOK
.imaginary32
:
1108 case TOK
.imaginary64
:
1109 case TOK
.imaginary80
:
1114 return ident
.toString();
1117 return tochars
[value
];
1121 static const(char)* toChars(TOK value
)
1123 return toString(value
).ptr
;
1126 extern (D
) static string
toString(TOK value
) pure nothrow @nogc @safe
1128 return tochars
[value
];
1133 * Write a character, using a readable escape sequence if needed
1135 * Useful for printing "" string literals in e.g. error messages, ddoc, or the `.stringof` property
1138 * buf = buffer to append character in
1139 * c = code point to write
1142 void writeCharLiteral(ref OutBuffer buf
, dchar c
)
1147 buf
.writestring("\\0");
1150 buf
.writestring("\\n");
1153 buf
.writestring("\\r");
1156 buf
.writestring("\\t");
1159 buf
.writestring("\\b");
1162 buf
.writestring("\\f");
1166 buf
.writeByte('\\');
1174 buf
.printf("\\x%02x", c
);
1176 else if (c
<= 0xFFFF)
1177 buf
.printf("\\u%04x", c
);
1179 buf
.printf("\\U%08x", c
);
1187 foreach(dchar d
; "a\n\r\t\b\f\0\x11\u7233\U00017233"d
)
1189 writeCharLiteral(buf
, d
);
1191 assert(buf
[] == `a\n\r\t\b\f\0\x11\u7233\U00017233`);
1195 * Write a single-quoted character literal
1197 * Useful for printing '' char literals in e.g. error messages, ddoc, or the `.stringof` property
1200 * buf = buffer to append character in
1201 * c = code point to write
1204 void writeSingleCharLiteral(ref OutBuffer buf
, dchar c
)
1206 buf
.writeByte('\'');
1208 buf
.writeByte('\\');
1213 writeCharLiteral(buf
, c
);
1215 buf
.writeByte('\'');
1221 writeSingleCharLiteral(buf
, '\'');
1222 assert(buf
[] == `'\''`);
1224 writeSingleCharLiteral(buf
, '"');
1225 assert(buf
[] == `'"'`);
1227 writeSingleCharLiteral(buf
, '\n');
1228 assert(buf
[] == `'\n'`);