built-in jambase is packed now
[k8jam.git] / src / expand.c
blobc18f42bd1d399e64923407b5945fe4268be53890
1 /*
2 * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
4 * This file is part of Jam - see jam.c for Copyright information.
5 */
6 /*
7 * expand.c - expand a buffer, given variable values
9 * External routines:
11 * var_expand() - variable-expand input string into list of strings
13 * Internal routines:
15 * var_edit_parse() - parse : modifiers into PATHNAME structure
16 * var_edit_file() - copy input target name to output, modifying filename
17 * var_edit_shift() - do upshift/downshift mods
19 * 01/25/94 (seiwald) - $(X)$(UNDEF) was expanding like plain $(X)
20 * 04/13/94 (seiwald) - added shorthand L0 for null list pointer
21 * 01/20/00 (seiwald) - Upgraded from K&R to ANSI C
22 * 01/11/01 (seiwald) - added support for :E=emptyvalue, :J=joinval
23 * 01/13/01 (seiwald) - :UDJE work on non-filename strings
24 * 02/19/01 (seiwald) - make $($(var):J=x) join multiple values of var
25 * 01/25/02 (seiwald) - fixed broken $(v[1-]), by ian godin
26 * 10/22/02 (seiwald) - list_new() now does its own newstr()/copystr()
27 * 11/04/02 (seiwald) - const-ing for string literals
28 * 12/30/02 (armstrong) - fix out-of-bounds access in var_expand()
30 #include "jam.h"
31 #include "lists.h"
32 #include "variable.h"
33 #include "expand.h"
34 #include "pathsys.h"
35 #include "newstr.h"
38 typedef struct {
39 PATHNAME f; /* :GDBSMR -- pieces */
40 char parent; /* :P -- go to parent directory */
41 char filemods; /* one of the above applied */
42 char downshift; /* :L -- downshift result */
43 char upshift; /* :U -- upshift result */
44 char quote; /* :Q -- quote */
45 PATHPART empty; /* :E -- default for empties */
46 PATHPART join; /* :J -- join list with char */
47 } VAR_EDITS;
50 static void var_edit_parse (char *mods, VAR_EDITS *edits);
51 static void var_edit_file (const char *in, char *out, VAR_EDITS *edits);
52 static void var_edit_shift (char *out, VAR_EDITS *edits);
53 static void var_edit_quote (char *out);
55 #define MAGIC_COLON '\001'
56 #define MAGIC_LEFT '\002'
57 #define MAGIC_RIGHT '\003'
61 * var_expand() - variable-expand input string into list of strings
63 * Would just copy input to output, performing variable expansion,
64 * except that since variables can contain multiple values the result
65 * of variable expansion may contain multiple values (a list). Properly
66 * performs "product" operations that occur in "$(var1)xxx$(var2)" or
67 * even "$($(var2))".
69 * Returns a newly created list.
71 LIST *var_expand (LIST *l, const char *in, const char *end, LOL *lol, int cancopyin) {
72 char out_buf[MAXSYM];
73 char *out = out_buf;
74 const char *inp = in;
75 char *ov; /* for temp copy of variable in outbuf */
76 int depth;
78 if (DEBUG_VAREXP) printf("expand '%.*s'\n", (int)(end-in), in);
79 /* this gets alot of cases: $(<) and $(>) */
80 if (end-in == 4 && in[0] == '$' && in[1] == '(' && in[3] == ')') {
81 switch (in[2]) {
82 case '1': case '<': return list_copy(l, lol_get(lol, 0));
83 case '2': case '>': return list_copy(l, lol_get(lol, 1));
86 /* just try simple copy of in to out */
87 while (in < end) {
88 if ((*out++ = *in++) == '$' && *in == '(') goto expand;
90 /* no variables expanded - just add copy of input string to list */
91 /* cancopyin is an optimization: if the input was already a list */
92 /* item, we can use the copystr() to put it on the new list, */
93 /* otherwise, we use the slower newstr() */
94 *out = '\0';
95 if (cancopyin) return list_new(l, inp, 1); else return list_new(l, out_buf, 0);
96 expand:
98 * Input so far (ignore blanks):
100 * stuff-in-outbuf $(variable) remainder
101 * ^ ^
102 * in end
103 * Output so far:
105 * stuff-in-outbuf $
106 * ^ ^
107 * out_buf out
110 * We just copied the $ of $(...), so back up one on the output.
111 * We now find the matching close paren, copying the variable and
112 * modifiers between the $(and) temporarily into out_buf, so that
113 * we can replace :'s with MAGIC_COLON. This is necessary to avoid
114 * being confused by modifier values that are variables containing
115 * :'s. Ugly.
117 depth = 1;
118 --out; ++in;
119 ov = out;
120 while (in < end && depth) {
121 switch (*ov++ = *in++) {
122 case '(': ++depth; break;
123 case ')': --depth; break;
124 case ':': ov[-1] = MAGIC_COLON; break;
125 case '[': ov[-1] = MAGIC_LEFT; break;
126 case ']': ov[-1] = MAGIC_RIGHT; break;
129 /* Copied) - back up. */
130 --ov;
132 * Input so far (ignore blanks):
134 * stuff-in-outbuf $(variable) remainder
135 * ^ ^
136 * in end
137 * Output so far:
139 * stuff-in-outbuf variable
140 * ^ ^ ^
141 * out_buf out ov
143 * Later we will overwrite 'variable' in out_buf, but we'll be
144 * done with it by then. 'variable' may be a multi-element list,
145 * so may each value for '$(variable element)', and so may 'remainder'.
146 * Thus we produce a product of three lists.
149 LIST *variables = 0;
150 LIST *remainder = 0;
151 LIST *vars;
153 /* recursively expand variable name & rest of input */
154 if (out < ov) variables = var_expand(L0, out, ov, lol, 0);
155 if (in < end) remainder = var_expand(L0, in, end, lol, 0);
156 /* now produce the result chain */
157 /* for each variable name */
158 for (vars = variables; vars; vars = list_next(vars)) {
159 LIST *value, *evalue = 0;
160 char *colon;
161 char *bracket;
162 char varname[MAXSYM];
163 int sub1 = 0, sub2 = -1;
164 VAR_EDITS edits;
166 /* look for a : modifier in the variable name */
167 /* must copy into varname so we can modify it */
168 strcpy(varname, vars->string);
169 if ((colon = strchr(varname, MAGIC_COLON))) {
170 *colon = '\0';
171 var_edit_parse(colon+1, &edits);
173 /* look for [x-y] subscripting */
174 /* sub1 is x (0 default) */
175 /* sub2 is length (-1 means forever) */
176 if ((bracket = strchr(varname, MAGIC_LEFT))) {
177 char *dash;
179 if ((dash = strchr(bracket+1, '-'))) *dash = '\0';
180 sub1 = atoi(bracket+1)-1;
181 if (!dash) sub2 = 1;
182 else if (!dash[1] || dash[1] == MAGIC_RIGHT) sub2 = -1;
183 else sub2 = atoi(dash+1)-sub1;
184 *bracket = '\0';
186 /* get variable value, specially handling $(<), $(>), $(n) */
187 if (varname[0] == '<' && !varname[1]) value = lol_get(lol, 0);
188 else if (varname[0] == '>' && !varname[1]) value = lol_get(lol, 1);
189 else if (varname[0] >= '1' && varname[0] <= '9' && !varname[1]) value = lol_get(lol, varname[0]-'1');
190 else value = var_get(varname);
191 /* the fast path: $(x) - just copy the variable value */
192 /* this is only an optimization */
193 if (out == out_buf && !bracket && !colon && in == end) {
194 l = list_copy(l, value);
195 continue;
197 /* handle start subscript */
198 while (sub1 > 0 && value) {
199 --sub1;
200 value = list_next(value);
202 /* empty w/ :E=default? */
203 if (!value && colon && edits.empty.ptr) evalue = value = list_new(L0, edits.empty.ptr, 0);
204 /* for each variable value */
205 for (; value; value = list_next(value)) {
206 LIST *rem;
207 char *out1;
209 /* handle end subscript (length actually) */
210 if (sub2 >= 0 && --sub2 < 0) break;
211 /* apply : mods, if present */
212 if (colon && edits.filemods) var_edit_file(value->string, out, &edits); else strcpy(out, value->string);
213 if (colon && (edits.upshift || edits.downshift)) var_edit_shift(out, &edits);
214 if (colon && edits.quote) var_edit_quote(out);
215 /* handle :J=joinval */
216 /* if we have more values for this var, just */
217 /* keep appending them (with the join value) */
218 /* rather than creating separate LIST elements */
219 if (colon && edits.join.ptr && (list_next(value) || list_next(vars))) {
220 out += strlen(out);
221 strcpy(out, edits.join.ptr);
222 out += strlen(out);
223 continue;
225 /* if no remainder, append result to output chain */
226 if (in == end) { l = list_new(l, out_buf, 0); continue; }
227 /* for each remainder, append the complete string to the output chain */
228 /* remember the end of the variable expansion so we can just tack on each instance of 'remainder' */
229 out1 = out+strlen(out);
230 for (rem = remainder; rem; rem = list_next(rem)) {
231 strcpy(out1, rem->string);
232 l = list_new(l, out_buf, 0);
235 /* toss used empty */
236 if (evalue) list_free(evalue);
237 } /* for */
238 /* variables & remainder were gifts from var_expand and must be freed */
239 if (variables) list_free(variables);
240 if (remainder) list_free(remainder);
241 if (DEBUG_VAREXP) {
242 printf("expanded to ");
243 list_print(l);
244 printf("\n");
246 return l;
252 * var_edit_parse() - parse : modifiers into PATHNAME structure
254 * The : modifiers in a $(varname:modifier) currently support replacing
255 * or omitting elements of a filename, and so they are parsed into a
256 * PATHNAME structure (which contains pointers into the original string).
258 * Modifiers of the form "X=value" replace the component X with
259 * the given value. Modifiers without the "=value" cause everything
260 * but the component X to be omitted. X is one of:
262 * G <grist>
263 * D directory name
264 * B base name
265 * S .suffix
266 * M (member)
267 * R root directory - prepended to whole path
269 * This routine sets:
271 * f->f_xxx.ptr = 0
272 * f->f_xxx.len = 0
273 * -> leave the original component xxx
275 * f->f_xxx.ptr = string
276 * f->f_xxx.len = strlen(string)
277 * -> replace component xxx with string
279 * f->f_xxx.ptr = ""
280 * f->f_xxx.len = 0
281 * -> omit component xxx
283 * var_edit_file() below and path_build() obligingly follow this convention.
285 static void var_edit_parse (char *mods, VAR_EDITS *edits) {
286 int havezeroed = 0;
288 memset((char *)edits, 0, sizeof(*edits));
289 while (*mods) {
290 char *p;
291 PATHPART *fp;
293 switch (*mods++) {
294 case 'L': edits->downshift = 1; continue;
295 case 'U': edits->upshift = 1; continue;
296 case 'Q': edits->quote = 1; continue;
297 case 'P': edits->parent = edits->filemods = 1; continue;
298 case 'E': fp = &edits->empty; goto strval;
299 case 'J': fp = &edits->join; goto strval;
300 case 'G': fp = &edits->f.f_grist; goto fileval;
301 case 'R': fp = &edits->f.f_root; goto fileval;
302 case 'D': fp = &edits->f.f_dir; goto fileval;
303 case 'B': fp = &edits->f.f_base; goto fileval;
304 case 'S': fp = &edits->f.f_suffix; goto fileval;
305 case 'M': fp = &edits->f.f_member; goto fileval;
306 default: return; /* should complain, but so what... */
308 fileval:
309 /* handle :CHARS, where each char (without a following =) selects a particular file path element */
310 /* on the first such char, we deselect all others (by setting ptr = "", len = 0) */
311 /* and for each char we select that element (by setting ptr = 0) */
312 edits->filemods = 1;
313 if (*mods != '=') {
314 if (!havezeroed++) {
315 int i;
317 for (i = 0; i < 6; i++) {
318 edits->f.part[i].len = 0;
319 edits->f.part[i].ptr = "";
322 fp->ptr = 0;
323 continue;
325 strval:
326 /* handle :X=value, or :X */
327 if (*mods != '=') {
328 fp->ptr = "";
329 fp->len = 0;
330 } else {
331 //FIXME: ugly hack to allow things like $(>[2:S=.tiles]) (they are bad, but...)
332 p = mods;
333 while (*p && *p != MAGIC_COLON && *p != MAGIC_LEFT && *p != MAGIC_RIGHT) ++p;
334 if (*p) {
335 *p = 0;
336 fp->ptr = ++mods;
337 fp->len = p-mods;
338 mods = p+1;
339 } else {
340 fp->ptr = ++mods;
341 fp->len = strlen(fp->ptr);
342 mods += fp->len;
343 //fprintf(stderr, "len=%d [", fp->len); fwrite(fp->ptr, 1, fp->len, stderr); fprintf(stderr, "]\n");
345 // another ugly hack
346 if (fp == &edits->join) {
347 if (strcmp(fp->ptr, "|space|") == 0) { strcpy((char *)fp->ptr, " "); fp->len = 1; }
348 else if (strcmp(fp->ptr, "|tab|") == 0) { strcpy((char *)fp->ptr, "\t"); fp->len = 1; }
356 * var_edit_file() - copy input target name to output, modifying filename
358 static void var_edit_file (const char *in, char *out, VAR_EDITS *edits) {
359 PATHNAME pathname;
361 /* parse apart original filename, putting parts into "pathname" */
362 path_parse(in, &pathname);
363 /* replace any pathname with edits->f */
364 if (edits->f.f_grist.ptr) pathname.f_grist = edits->f.f_grist;
365 if (edits->f.f_root.ptr) pathname.f_root = edits->f.f_root;
366 if (edits->f.f_dir.ptr) pathname.f_dir = edits->f.f_dir;
367 if (edits->f.f_base.ptr) pathname.f_base = edits->f.f_base;
368 if (edits->f.f_suffix.ptr) pathname.f_suffix = edits->f.f_suffix;
369 if (edits->f.f_member.ptr) pathname.f_member = edits->f.f_member;
370 /* if requested, modify pathname to point to parent */
371 if (edits->parent) path_parent(&pathname);
372 /* put filename back together */
373 path_build(&pathname, out, 0);
378 * var_edit_shift() - do upshift/downshift mods
380 static void var_edit_shift (char *out, VAR_EDITS *edits) {
381 /* handle upshifting, downshifting now */
382 if (edits->upshift) {
383 for(; *out; ++out) *out = toupper(*out);
384 } else if (edits->downshift) {
385 for(; *out; ++out) *out = tolower(*out);
390 static int needToScreen (char ch) {
391 unsigned char uch = (unsigned char)ch;
392 if (!uch) return 0;
393 if (uch <= '*' || uch == '`' ||
394 (uch >= ';' && uch <= '<') ||
395 (uch >= '>' && uch <= '?') ||
396 (uch >= '[' && uch <= ']') ||
397 (uch >= '{' && uch <= '}')) return 1;
398 return 0;
402 static void var_edit_quote (char *out) {
403 /* handle quoting now */
404 int count;
405 char *p = out;
406 char *q;
408 count = 0;
409 for (p = out; *p; ++p) if (needToScreen(*p)) ++count;
410 q = p+count;
411 for (; p >= out;) {
412 if (needToScreen(*p)) {
413 *q-- = *p--;
414 *q-- = '\\';
415 } else {
416 *q-- = *p--;