expand.c: using dstrings for expansion, so we don't need to worry about too long...
[k8jam.git] / src / expand.c
blob2ba24c09b031a0bba8a0eaad5b9b3fb2a172df86
1 /*
2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
4 * This file is part of Jam - see jam.c for Copyright information.
5 */
6 /*
7 * expand.c - expand a buffer, given variable values
9 * External routines:
11 * var_expand() - variable-expand input string into list of strings
13 * Internal routines:
15 * var_edit_parse() - parse : modifiers into PATHNAME structure
16 * var_edit_file() - copy input target name to output, modifying filename
17 * var_edit_shift() - do upshift/downshift mods
19 * 01/25/94 (seiwald) - $(X)$(UNDEF) was expanding like plain $(X)
20 * 04/13/94 (seiwald) - added shorthand L0 for null list pointer
21 * 01/20/00 (seiwald) - Upgraded from K&R to ANSI C
22 * 01/11/01 (seiwald) - added support for :E=emptyvalue, :J=joinval
23 * 01/13/01 (seiwald) - :UDJE work on non-filename strings
24 * 02/19/01 (seiwald) - make $($(var):J=x) join multiple values of var
25 * 01/25/02 (seiwald) - fixed broken $(v[1-]), by ian godin
26 * 10/22/02 (seiwald) - list_new() now does its own newstr()/copystr()
27 * 11/04/02 (seiwald) - const-ing for string literals
28 * 12/30/02 (armstrong) - fix out-of-bounds access in var_expand()
30 #include "jam.h"
31 #include "lists.h"
32 #include "variable.h"
33 #include "expand.h"
34 #include "pathsys.h"
35 #include "newstr.h"
38 typedef struct {
39 PATHNAME f; /* :GDBSMR -- pieces */
40 char parent; /* :P -- go to parent directory */
41 char filemods; /* one of the above applied */
42 char downshift; /* :L -- downshift result */
43 char upshift; /* :U -- upshift result */
44 char quote; /* :Q -- quote */
45 PATHPART empty; /* :E -- default for empties */
46 PATHPART join; /* :J -- join list with char */
47 } VAR_EDITS;
50 static void var_edit_parse (char *mods, VAR_EDITS *edits);
51 static void var_edit_file (const char *in, dstring_t *out, VAR_EDITS *edits);
53 static void var_edit_shift (dstring_t *out, int cpos, VAR_EDITS *edits);
54 static void var_edit_quote (dstring_t *out, int cpos);
57 #define MAGIC_COLON '\001'
58 #define MAGIC_LEFT '\002'
59 #define MAGIC_RIGHT '\003'
63 * var_expand() - variable-expand input string into list of strings
65 * Would just copy input to output, performing variable expansion,
66 * except that since variables can contain multiple values the result
67 * of variable expansion may contain multiple values (a list). Properly
68 * performs "product" operations that occur in "$(var1)xxx$(var2)" or
69 * even "$($(var2))".
71 * Returns a newly created list.
73 * `end` can be NULL, it means: `in` is cstr.
75 LIST *var_expand (LIST *l, const char *in, const char *end, LOL *lol, int cancopyin) {
76 LIST *res;
77 dstring_t obuf, varspec;
78 const char *inp = in;
79 int depth;
80 if (end == NULL) end = in+strlen(in);
81 if (DEBUG_VAREXP) printf("expand '%.*s'\n", (int)(end-in), in);
82 /* this gets alot of cases: $(<) and $(>) */
83 if (end-in == 4 && in[0] == '$' && in[1] == '(' && in[3] == ')') {
84 switch (in[2]) {
85 case '1': case '<': return list_copy(l, lol_get(lol, 0));
86 case '2': case '>': return list_copy(l, lol_get(lol, 1));
89 dstr_init(&obuf);
90 /* just try simple copy of in to out */
91 while (in < end) {
92 if (in[0] == '$' && in[1] == '(') goto expand;
93 dstr_push_char(&obuf, *in++);
95 /* no variables expanded - just add copy of input string to list */
96 /* cancopyin is an optimization: if the input was already a list */
97 /* item, we can use the copystr() to put it on the new list, */
98 /* otherwise, we use the slower newstr() */
99 res = (cancopyin ? list_new(l, inp, 1) : list_new(l, dstr_cstr(&obuf), 0));
100 //if (DEBUG_VAREXP) printf("expand result (copy): '%s'\n", dstr_cstr(&obuf));
101 dstr_done(&obuf);
102 return res;
103 expand:
105 * Input so far (ignore blanks):
107 * stuff-in-outbuf $(variable) remainder
108 * ^ ^
109 * in end
110 * Output so far:
112 * stuff-in-outbuf $
113 * ^ ^
114 * out_buf out
117 * We didn't copy the $ of $(...), so no need to back up.
118 * We now find the matching close paren, copying the variable and
119 * modifiers between the $( and ) temporarily into out_buf, so that
120 * we can replace :'s with MAGIC_COLON. This is necessary to avoid
121 * being confused by modifier values that are variables containing
122 * :'s. Ugly.
124 dstr_init(&varspec);
125 depth = 1;
126 in += 2; /* skip $ and '(' */
127 while (in < end && depth) {
128 int ch = (unsigned char)(*in++);
129 //if (ch <= MAGIC_RIGHT) { fprintf(stderr, "FATAL: variable name can't contain control characters!\n"); abort(); }
130 switch (ch) {
131 case '(': ++depth; break;
132 case ')': --depth; break;
133 case ':': ch = MAGIC_COLON; break;
134 case '[': ch = MAGIC_LEFT; break;
135 case ']': ch = MAGIC_RIGHT; break;
137 dstr_push_char(&varspec, ch);
139 /* Copied) - back up. */
140 dstr_pop_char(&varspec);
141 //if (DEBUG_VAREXP) printf("expand varspec: '%s'\n", dstr_cstr(&varspec));
143 * Input so far (ignore blanks):
145 * stuff-in-outbuf $(variable) remainder
146 * ^ ^
147 * in end
148 * Output so far:
150 * stuff-in-outbuf variable
151 * ^ ^ ^
152 * out_buf out ov
154 * Later we will overwrite 'variable' in out_buf, but we'll be
155 * done with it by then. 'variable' may be a multi-element list,
156 * so may each value for '$(variable element)', and so may 'remainder'.
157 * Thus we produce a product of three lists.
160 LIST *variables = NULL;
161 LIST *remainder = NULL;
162 LIST *vars;
163 int cpos;
164 dstring_t varname;
165 dstr_init(&varname);
166 /* recursively expand variable name & rest of input */
167 if (dstr_cstr(&varspec)[0]) variables = var_expand(L0, dstr_cstr(&varspec), NULL, lol, 0);
168 if (in < end) remainder = var_expand(L0, in, end, lol, 0);
169 /* now produce the result chain */
170 cpos = dstr_len(&obuf);
171 /* for each variable name */
172 for (vars = variables; vars != NULL; vars = list_next(vars)) {
173 LIST *value, *evalue = NULL;
174 char *colon, *bracket, *vtmp;
175 int sub1 = 0, sub2 = -1;
176 VAR_EDITS edits;
177 /* look for a : modifier in the variable name */
178 /* must copy into varname so we can modify it */
179 dstr_clear(&varname);
180 dstr_push_cstr(&varname, vars->string);
181 vtmp = dstr_cstr(&varname); /* WARNING! don't forget to update `vtmp` if one of dstr updating functions called! */
182 //if (DEBUG_VAREXP) printf(" expand varname: '%s'\n", vtmp);
183 if ((colon = strchr(vtmp, MAGIC_COLON))) {
184 *colon = '\0';
185 var_edit_parse(colon+1, &edits);
187 /* look for [x-y] subscripting */
188 /* sub1 is x (0 default) */
189 /* sub2 is length (-1 means forever) */
190 if ((bracket = strchr(vtmp, MAGIC_LEFT))) {
191 char *dash;
192 if ((dash = strchr(bracket+1, '-'))) *dash = '\0';
193 sub1 = atoi(bracket+1)-1;
194 if (!dash) sub2 = 1;
195 else if (!dash[1] || dash[1] == MAGIC_RIGHT) sub2 = -1;
196 else sub2 = atoi(dash+1)-sub1;
197 *bracket = '\0';
199 /* get variable value, specially handling $(<), $(>), $(n) */
200 if (vtmp[0] == '<' && !vtmp[1]) value = lol_get(lol, 0);
201 else if (vtmp[0] == '>' && !vtmp[1]) value = lol_get(lol, 1);
202 else if (vtmp[0] >= '1' && vtmp[0] <= '9' && !vtmp[1]) value = lol_get(lol, vtmp[0]-'1');
203 else value = var_get(vtmp);
204 /* the fast path: $(x) - just copy the variable value */
205 /* this is only an optimization */
206 if (cpos == 0 && !bracket && !colon && in == end) {
207 //if (DEBUG_VAREXP) printf(" expand valcopy: '%s'\n", value->string);
208 l = list_copy(l, value);
209 continue;
211 /* handle start subscript */
212 while (sub1 > 0 && value) {
213 --sub1;
214 value = list_next(value);
216 /* empty w/ :E=default? */
217 if (!value && colon && edits.empty.ptr) evalue = value = list_new(L0, edits.empty.ptr, 0);
218 //if (DEBUG_VAREXP) { printf(" expanding values for var '%s': ", vtmp); list_print(value); printf("\n"); printf(" curout: (%d) '%s'\n", cpos, dstr_cstr(&obuf)); }
219 /* for each variable value */
220 for (; value; value = list_next(value)) {
221 /* handle end subscript (length actually) */
222 if (sub2 >= 0 && --sub2 < 0) break;
223 /* apply : mods, if present */
224 dstr_chop(&obuf, cpos);
225 if (colon != NULL) {
226 /* the following line will *ADD* chars to 'obuf' */
227 if (edits.filemods) var_edit_file(value->string, &obuf, &edits); else dstr_push_cstr(&obuf, value->string);
228 /* and the following lines will *CHANGE* chars in 'obuf' */
229 if (edits.upshift || edits.downshift) var_edit_shift(&obuf, cpos, &edits);
230 if (edits.quote) var_edit_quote(&obuf, cpos);
231 /* handle :J=joinval */
232 /* if we have more values for this var, just */
233 /* keep appending them (with the join value) */
234 /* rather than creating separate LIST elements */
235 if (edits.join.ptr && (list_next(value) || list_next(vars))) {
236 dstr_push_cstr(&obuf, edits.join.ptr);
237 cpos = dstr_len(&obuf);
238 continue;
240 } else {
241 /* no modifiers, just use literal value */
242 dstr_push_cstr(&obuf, value->string);
244 /* if no remainder, append result to output chain */
245 if (in == end) { l = list_new(l, dstr_cstr(&obuf), 0); continue; }
246 /* for each remainder, append the complete string to the output chain */
247 /* remember the end of the variable expansion so we can just tack on each instance of 'remainder' */
249 int np = dstr_len(&obuf);
250 //if (DEBUG_VAREXP) { printf(" remainders for var '%s': ", vtmp); list_print(remainder); printf("\n"); printf(" curout: (%d) '%s'\n", cpos, dstr_cstr(&obuf)); }
251 for (LIST *rem = remainder; rem; rem = list_next(rem)) {
252 dstr_chop(&obuf, np);
253 dstr_push_cstr(&obuf, rem->string);
254 l = list_new(l, dstr_cstr(&obuf), 0);
258 /* toss used empty */
259 if (evalue) list_free(evalue);
260 } /* for */
261 /* variables & remainder were gifts from var_expand and must be freed */
262 if (variables) list_free(variables);
263 if (remainder) list_free(remainder);
264 if (DEBUG_VAREXP) { printf("expanded to "); list_print(l); printf("\n"); }
265 dstr_done(&varname);
266 dstr_done(&varspec);
267 dstr_done(&obuf);
268 return l;
274 * var_edit_parse() - parse : modifiers into PATHNAME structure
276 * The : modifiers in a $(varname:modifier) currently support replacing
277 * or omitting elements of a filename, and so they are parsed into a
278 * PATHNAME structure (which contains pointers into the original string).
280 * Modifiers of the form "X=value" replace the component X with
281 * the given value. Modifiers without the "=value" cause everything
282 * but the component X to be omitted. X is one of:
284 * G <grist>
285 * D directory name
286 * B base name
287 * S .suffix
288 * M (member)
289 * R root directory - prepended to whole path
291 * This routine sets:
293 * f->f_xxx.ptr = 0
294 * f->f_xxx.len = 0
295 * -> leave the original component xxx
297 * f->f_xxx.ptr = string
298 * f->f_xxx.len = strlen( string )
299 * -> replace component xxx with string
301 * f->f_xxx.ptr = ""
302 * f->f_xxx.len = 0
303 * -> omit component xxx
305 * var_edit_file() below and path_build() obligingly follow this convention.
307 static void var_edit_parse (char *mods, VAR_EDITS *edits) {
308 int havezeroed = 0;
309 memset((char *)edits, 0, sizeof(*edits));
310 while (*mods) {
311 char *p;
312 PATHPART *fp;
313 switch (*mods++) {
314 case 'L': edits->downshift = 1; continue;
315 case 'U': edits->upshift = 1; continue;
316 case 'Q': edits->quote = 1; continue;
317 case 'P': edits->parent = edits->filemods = 1; continue;
318 case 'E': fp = &edits->empty; goto strval;
319 case 'J': fp = &edits->join; goto strval;
320 case 'G': fp = &edits->f.f_grist; goto fileval;
321 case 'R': fp = &edits->f.f_root; goto fileval;
322 case 'D': fp = &edits->f.f_dir; goto fileval;
323 case 'B': fp = &edits->f.f_base; goto fileval;
324 case 'S': fp = &edits->f.f_suffix; goto fileval;
325 case 'M': fp = &edits->f.f_member; goto fileval;
326 default: return; /* should complain, but so what... */
328 fileval:
329 /* handle :CHARS, where each char (without a following =) selects a particular file path element */
330 /* on the first such char, we deselect all others (by setting ptr = "", len = 0) */
331 /* and for each char we select that element (by setting ptr = 0) */
332 edits->filemods = 1;
333 if (*mods != '=') {
334 if (!havezeroed++) {
335 for (int i = 0; i < 6; ++i) {
336 edits->f.part[i].len = 0;
337 edits->f.part[i].ptr = "";
340 fp->ptr = 0;
341 continue;
343 strval:
344 /* handle :X=value, or :X */
345 if (*mods != '=') {
346 fp->ptr = "";
347 fp->len = 0;
348 } else {
349 //FIXME: ugly hack to allow things like $(>[2:S=.tiles]) (they are bad, but...)
350 p = mods;
351 while (*p && *p != MAGIC_COLON && *p != MAGIC_LEFT && *p != MAGIC_RIGHT) ++p;
352 if (*p) {
353 *p = 0;
354 fp->ptr = ++mods;
355 fp->len = p-mods;
356 mods = p+1;
357 } else {
358 fp->ptr = ++mods;
359 fp->len = strlen(fp->ptr);
360 mods += fp->len;
361 //fprintf(stderr, "len=%d [", fp->len); fwrite(fp->ptr, 1, fp->len, stderr); fprintf(stderr, "]\n");
363 // another ugly hack
364 if (fp == &edits->join) {
365 if (strcmp(fp->ptr, "|space|") == 0) { strcpy((char *)fp->ptr, " "); fp->len = 1; }
366 else if (strcmp(fp->ptr, "|tab|") == 0) { strcpy((char *)fp->ptr, "\t"); fp->len = 1; }
374 * var_edit_file() - copy input target name to output, modifying filename
376 static void var_edit_file (const char *in, dstring_t *out, VAR_EDITS *edits) {
377 PATHNAME pathname;
378 static char obuf[8192];
379 /* parse apart original filename, putting parts into "pathname" */
380 path_parse(in, &pathname);
381 /* replace any pathname with edits->f */
382 if (edits->f.f_grist.ptr) pathname.f_grist = edits->f.f_grist;
383 if (edits->f.f_root.ptr) pathname.f_root = edits->f.f_root;
384 if (edits->f.f_dir.ptr) pathname.f_dir = edits->f.f_dir;
385 if (edits->f.f_base.ptr) pathname.f_base = edits->f.f_base;
386 if (edits->f.f_suffix.ptr) pathname.f_suffix = edits->f.f_suffix;
387 if (edits->f.f_member.ptr) pathname.f_member = edits->f.f_member;
388 /* if requested, modify pathname to point to parent */
389 if (edits->parent) path_parent(&pathname);
390 /* put filename back together */
391 path_build(&pathname, obuf, 0);
392 dstr_push_cstr(out, obuf);
397 * var_edit_shift() - do upshift/downshift mods
399 static void var_edit_shift (dstring_t *out, int cpos, VAR_EDITS *edits) {
400 char *p = dstr_cstr(out)+cpos; /* it's safe */
401 if (edits->upshift) {
402 for(; *p; ++p) *p = toupper(*p);
403 } else if (edits->downshift) {
404 for(; *p; ++p) *p = tolower(*p);
410 * need_screen() - does the given char need to be screened?
412 static inline int need_screen (char ch) {
413 unsigned char uch = (unsigned char)ch;
414 if (!uch) return 0;
415 if (uch <= '*' || uch == '`' ||
416 (uch >= ';' && uch <= '<') ||
417 (uch >= '>' && uch <= '?') ||
418 (uch >= '[' && uch <= ']') ||
419 (uch >= '{' && uch <= '}')) return 1;
420 return 0;
425 * var_edit_quote() - do 'shell quoting'
427 static void var_edit_quote (dstring_t *out, int cpos) {
428 int needq = 0;
429 char *p = dstr_cstr(out)+cpos; /* it's safe */
430 for (const char *t = p; *t; ++t) if (need_screen(*t)) { needq = 1; break; }
431 if (needq) {
432 dstring_t ts;
433 dstr_init(&ts);
434 for (; *p; ++p) {
435 if (need_screen(*p)) dstr_push_char(&ts, '\\');
436 dstr_push_char(&ts, *p);
438 dstr_chop(out, cpos);
439 dstr_push_buf(out, dstr_cstr(&ts), dstr_len(&ts));
440 dstr_done(&ts);