d: Merge dmd, druntime d8e3976a58, phobos 7a6e95688
[official-gcc.git] / gcc / d / dmd / dmacro.d
blobc04fbec731d350538837b1a868573ad42b7ae742
1 /**
2 * Text macro processor for Ddoc.
4 * Copyright: Copyright (C) 1999-2024 by The D Language Foundation, All Rights Reserved
5 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright)
6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/dmacro.d, _dmacro.d)
8 * Documentation: https://dlang.org/phobos/dmd_dmacro.html
9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/dmacro.d
12 module dmd.dmacro;
14 import core.stdc.ctype;
15 import core.stdc.string;
16 import dmd.common.outbuffer;
17 import dmd.root.rmem;
19 @trusted:
21 struct MacroTable
23 /**********************************
24 * Define name=text macro.
25 * If macro `name` already exists, replace the text for it.
26 * Params:
27 * name = name of macro
28 * text = text of macro
30 void define(const(char)[] name, const(char)[] text) nothrow pure @safe
32 //printf("MacroTable::define('%.*s' = '%.*s')\n", cast(int)name.length, name.ptr, text.length, text.ptr);
33 if (auto table = name in mactab)
35 (*table).text = text;
36 return;
38 mactab[name] = new Macro(name, text);
41 alias fp_t = bool function(const(char)* p) @nogc nothrow pure;
43 /*****************************************************
44 * Look for macros in buf and expand them in place.
45 * Only look at the text in buf from start to pend.
47 * Returns: `true` on success, `false` when the recursion limit was reached
49 bool expand(ref OutBuffer buf, size_t start, ref size_t pend, const(char)[] arg, int recursionLimit,
50 fp_t isIdStart, fp_t isIdTail) nothrow pure
52 version (none)
54 printf("Macro::expand(buf[%d..%d], arg = '%.*s')\n", start, pend, cast(int)arg.length, arg.ptr);
55 printf("Buf is: '%.*s'\n", cast(int)(pend - start), buf.data + start);
57 // limit recursive expansion
58 recursionLimit--;
59 if (recursionLimit < 0)
60 return false;
62 size_t end = pend;
63 assert(start <= end);
64 assert(end <= buf.length);
65 /* First pass - replace $0
67 arg = memdup(arg);
68 for (size_t u = start; u + 1 < end;)
70 char* p = cast(char*)buf[].ptr; // buf.data is not loop invariant
71 /* Look for $0, but not $$0, and replace it with arg.
73 if (p[u] == '$' && (isdigit(p[u + 1]) || p[u + 1] == '+'))
75 if (u > start && p[u - 1] == '$')
77 // Don't expand $$0, but replace it with $0
78 buf.remove(u - 1, 1);
79 end--;
80 u += 1; // now u is one past the closing '1'
81 continue;
83 char c = p[u + 1];
84 int n = (c == '+') ? -1 : c - '0';
85 const(char)[] marg;
86 if (n == 0)
88 marg = arg;
90 else
91 extractArgN(arg, marg, n);
92 if (marg.length == 0)
94 // Just remove macro invocation
95 //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], cast(int)marg.length, marg.ptr);
96 buf.remove(u, 2);
97 end -= 2;
99 else if (c == '+')
101 // Replace '$+' with 'arg'
102 //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], cast(int)marg.length, marg.ptr);
103 buf.remove(u, 2);
104 buf.insert(u, marg);
105 end += marg.length - 2;
106 // Scan replaced text for further expansion
107 size_t mend = u + marg.length;
108 const success = expand(buf, u, mend, null, recursionLimit, isIdStart, isIdTail);
109 if (!success)
110 return false;
111 end += mend - (u + marg.length);
112 u = mend;
114 else
116 // Replace '$1' with '\xFF{arg\xFF}'
117 //printf("Replacing '$%c' with '\xFF{%.*s\xFF}'\n", p[u + 1], cast(int)marg.length, marg.ptr);
118 ubyte[] slice = cast(ubyte[])buf[];
119 slice[u] = 0xFF;
120 slice[u + 1] = '{';
121 buf.insert(u + 2, marg);
122 buf.insert(u + 2 + marg.length, "\xFF}");
123 end += -2 + 2 + marg.length + 2;
124 // Scan replaced text for further expansion
125 size_t mend = u + 2 + marg.length;
126 const success = expand(buf, u + 2, mend, null, recursionLimit, isIdStart, isIdTail);
127 if (!success)
128 return false;
129 end += mend - (u + 2 + marg.length);
130 u = mend;
132 //printf("u = %d, end = %d\n", u, end);
133 //printf("#%.*s#\n", cast(int)end, &buf.data[0]);
134 continue;
136 u++;
138 /* Second pass - replace other macros
140 for (size_t u = start; u + 4 < end;)
142 char* p = cast(char*)buf[].ptr; // buf.data is not loop invariant
143 /* A valid start of macro expansion is $(c, where c is
144 * an id start character, and not $$(c.
146 if (p[u] == '$' && p[u + 1] == '(' && isIdStart(p + u + 2))
148 //printf("\tfound macro start '%c'\n", p[u + 2]);
149 char* name = p + u + 2;
150 size_t namelen = 0;
151 const(char)[] marg;
152 size_t v;
153 /* Scan forward to find end of macro name and
154 * beginning of macro argument (marg).
156 for (v = u + 2; v < end; v += utfStride(p[v]))
158 if (!isIdTail(p + v))
160 // We've gone past the end of the macro name.
161 namelen = v - (u + 2);
162 break;
165 v += extractArgN(p[v .. end], marg, 0);
166 assert(v <= end);
167 if (v < end)
169 // v is on the closing ')'
170 if (u > start && p[u - 1] == '$')
172 // Don't expand $$(NAME), but replace it with $(NAME)
173 buf.remove(u - 1, 1);
174 end--;
175 u = v; // now u is one past the closing ')'
176 continue;
178 Macro* m = search(name[0 .. namelen]);
179 if (!m)
181 immutable undef = "DDOC_UNDEFINED_MACRO";
182 m = search(undef);
183 if (m)
185 // Macro was not defined, so this is an expansion of
186 // DDOC_UNDEFINED_MACRO. Prepend macro name to args.
187 // marg = name[ ] ~ "," ~ marg[ ];
188 if (marg.length)
190 char* q = cast(char*)mem.xmalloc(namelen + 1 + marg.length);
191 assert(q);
192 memcpy(q, name, namelen);
193 q[namelen] = ',';
194 memcpy(q + namelen + 1, marg.ptr, marg.length);
195 marg = q[0 .. marg.length + namelen + 1];
197 else
199 marg = name[0 .. namelen];
203 if (m)
205 if (m.inuse && marg.length == 0)
207 // Remove macro invocation
208 buf.remove(u, v + 1 - u);
209 end -= v + 1 - u;
211 else if (m.inuse && ((arg.length == marg.length && memcmp(arg.ptr, marg.ptr, arg.length) == 0) ||
212 (arg.length + 4 == marg.length && marg[0] == 0xFF && marg[1] == '{' && memcmp(arg.ptr, marg.ptr + 2, arg.length) == 0 && marg[marg.length - 2] == 0xFF && marg[marg.length - 1] == '}')))
214 /* Recursive expansion:
215 * marg is same as arg (with blue paint added)
216 * Just leave in place.
219 else
221 //printf("\tmacro '%.*s'(%.*s) = '%.*s'\n", cast(int)m.namelen, m.name, cast(int)marg.length, marg.ptr, cast(int)m.textlen, m.text);
222 marg = memdup(marg);
223 // Insert replacement text
224 buf.spread(v + 1, 2 + m.text.length + 2);
225 ubyte[] slice = cast(ubyte[])buf[];
226 slice[v + 1] = 0xFF;
227 slice[v + 2] = '{';
228 slice[v + 3 .. v + 3 + m.text.length] = cast(ubyte[])m.text[];
229 slice[v + 3 + m.text.length] = 0xFF;
230 slice[v + 3 + m.text.length + 1] = '}';
231 end += 2 + m.text.length + 2;
232 // Scan replaced text for further expansion
233 m.inuse++;
234 size_t mend = v + 1 + 2 + m.text.length + 2;
235 const success = expand(buf, v + 1, mend, marg, recursionLimit, isIdStart, isIdTail);
236 if (!success)
237 return false;
238 end += mend - (v + 1 + 2 + m.text.length + 2);
239 m.inuse--;
240 buf.remove(u, v + 1 - u);
241 end -= v + 1 - u;
242 u += mend - (v + 1);
243 mem.xfree(cast(char*)marg.ptr);
244 //printf("u = %d, end = %d\n", u, end);
245 //printf("#%.*s#\n", cast(int)(end - u), &buf.data[u]);
246 continue;
249 else
251 // Replace $(NAME) with nothing
252 buf.remove(u, v + 1 - u);
253 end -= (v + 1 - u);
254 continue;
258 u++;
260 mem.xfree(cast(char*)arg);
261 pend = end;
262 return true;
265 private:
267 Macro* search(const(char)[] name) @nogc nothrow pure @safe
269 //printf("Macro::search(%.*s)\n", cast(int)name.length, name.ptr);
270 if (auto table = name in mactab)
272 //printf("\tfound %d\n", table.textlen);
273 return *table;
275 return null;
278 private Macro*[const(char)[]] mactab;
281 /* ************************************************************************ */
283 private:
285 struct Macro
287 const(char)[] name; // macro name
288 const(char)[] text; // macro replacement text
289 int inuse; // macro is in use (don't expand)
291 this(const(char)[] name, const(char)[] text) @nogc nothrow pure @safe
293 this.name = name;
294 this.text = text;
298 /************************
299 * Make mutable copy of slice p.
300 * Params:
301 * p = slice
302 * Returns:
303 * copy allocated with mem.xmalloc()
306 char[] memdup(const(char)[] p) nothrow pure
308 size_t len = p.length;
309 return (cast(char*)memcpy(mem.xmalloc(len), p.ptr, len))[0 .. len];
312 /**********************************************************
313 * Given buffer buf[], extract argument marg[].
314 * Params:
315 * buf = source string
316 * marg = set to slice of buf[]
317 * n = 0: get entire argument
318 * 1..9: get nth argument
319 * -1: get 2nd through end
321 size_t extractArgN(const(char)[] buf, out const(char)[] marg, int n) @nogc nothrow pure
323 /* Scan forward for matching right parenthesis.
324 * Nest parentheses.
325 * Skip over "..." and '...' strings inside HTML tags.
326 * Skip over <!-- ... --> comments.
327 * Skip over previous macro insertions
328 * Set marg.
330 uint parens = 1;
331 ubyte instring = 0;
332 uint incomment = 0;
333 uint intag = 0;
334 uint inexp = 0;
335 uint argn = 0;
336 size_t v = 0;
337 const p = buf.ptr;
338 const end = buf.length;
339 Largstart:
340 // Skip first space, if any, to find the start of the macro argument
341 if (n != 1 && v < end && isspace(p[v]))
342 v++;
343 size_t vstart = v;
344 for (; v < end; v++)
346 char c = p[v];
347 switch (c)
349 case ',':
350 if (!inexp && !instring && !incomment && parens == 1)
352 argn++;
353 if (argn == 1 && n == -1)
355 v++;
356 goto Largstart;
358 if (argn == n)
359 break;
360 if (argn + 1 == n)
362 v++;
363 goto Largstart;
366 continue;
367 case '(':
368 if (!inexp && !instring && !incomment)
369 parens++;
370 continue;
371 case ')':
372 if (!inexp && !instring && !incomment && --parens == 0)
374 break;
376 continue;
377 case '"':
378 case '\'':
379 if (!inexp && !incomment && intag)
381 if (c == instring)
382 instring = 0;
383 else if (!instring)
384 instring = c;
386 continue;
387 case '<':
388 if (!inexp && !instring && !incomment)
390 if (v + 6 < end && p[v + 1] == '!' && p[v + 2] == '-' && p[v + 3] == '-')
392 incomment = 1;
393 v += 3;
395 else if (v + 2 < end && isalpha(p[v + 1]))
396 intag = 1;
398 continue;
399 case '>':
400 if (!inexp)
401 intag = 0;
402 continue;
403 case '-':
404 if (!inexp && !instring && incomment && v + 2 < end && p[v + 1] == '-' && p[v + 2] == '>')
406 incomment = 0;
407 v += 2;
409 continue;
410 case 0xFF:
411 if (v + 1 < end)
413 if (p[v + 1] == '{')
414 inexp++;
415 else if (p[v + 1] == '}')
416 inexp--;
418 continue;
419 default:
420 continue;
422 break;
424 if (argn == 0 && n == -1)
425 marg = p[v .. v];
426 else
427 marg = p[vstart .. v];
428 //printf("extractArg%d('%.*s') = '%.*s'\n", n, cast(int)end, p, cast(int)marg.length, marg.ptr);
429 return v;
432 /*****************************************
433 * Get number of UTF-8 code units in code point that starts with `c`
434 * Params:
435 * c = starting code unit
436 * Returns: number of UTF-8 code units (i.e. bytes), else 1 on invalid UTF start
438 @safe
439 int utfStride(char c) @nogc nothrow pure
441 return
442 c < 0x80 ? 1 :
443 c < 0xC0 ? 1 : // invalid UTF start
444 c < 0xE0 ? 2 :
445 c < 0xF0 ? 3 :
446 c < 0xF8 ? 4 :
447 c < 0xFC ? 5 :
448 c < 0xFE ? 6 :
449 1; // invalid UTF start
452 unittest
454 assert(utfStride(0) == 1);
455 assert(utfStride(0x80) == 1);
456 assert(utfStride(0xC0) == 2);
457 assert(utfStride(0xE0) == 3);
458 assert(utfStride(0xF0) == 4);
459 assert(utfStride(0xF8) == 5);
460 assert(utfStride(0xFC) == 6);
461 assert(utfStride(0xFE) == 1);