Adjust inject/history handling for new, *ifs* trimmed input..
[s-mailx.git] / shexp.c
blob1d6dbf7bd3de51849944bc1762c0a4f28b08f516
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
5 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
6 * Copyright (c) 2012 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
7 */
8 /*
9 * Copyright (c) 1980, 1993
10 * The Regents of the University of California. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 #undef n_FILE
37 #define n_FILE shexp
39 #ifndef HAVE_AMALGAMATION
40 # include "nail.h"
41 #endif
43 #include <pwd.h>
45 #ifdef HAVE_FNMATCH
46 # include <dirent.h>
47 # include <fnmatch.h>
48 #endif
50 /* POSIX says
51 * Environment variable names used by the utilities in the Shell and
52 * Utilities volume of POSIX.1-2008 consist solely of uppercase
53 * letters, digits, and the <underscore> ('_') from the characters
54 * defined in Portable Character Set and do not begin with a digit.
55 * Other characters may be permitted by an implementation;
56 * applications shall tolerate the presence of such names.
57 * We do support the hyphen-minus "-" (except in last position for ${x[:]-y}).
58 * We support some special parameter names for one-letter(++) variable names;
59 * these have counterparts in the code that manages internal variables,
60 * and some more special treatment below! */
61 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
62 #define a_SHEXP_ISVARC_BAD1ST(C) (digitchar(C)) /* (Actually assumed below!) */
63 #define a_SHEXP_ISVARC_BADNST(C) ((C) == '-')
65 enum a_shexp_quote_flags{
66 a_SHEXP_QUOTE_NONE,
67 a_SHEXP_QUOTE_ROUNDTRIP = 1u<<0, /* Result won't be consumed immediately */
69 a_SHEXP_QUOTE_T_REVSOL = 1u<<8, /* Type: by reverse solidus */
70 a_SHEXP_QUOTE_T_SINGLE = 1u<<9, /* Type: single-quotes */
71 a_SHEXP_QUOTE_T_DOUBLE = 1u<<10, /* Type: double-quotes */
72 a_SHEXP_QUOTE_T_DOLLAR = 1u<<11, /* Type: dollar-single-quotes */
73 a_SHEXP_QUOTE_T_MASK = a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
74 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR,
76 a_SHEXP_QUOTE__FREESHIFT = 16u
79 #ifdef HAVE_FNMATCH
80 struct a_shexp_glob_ctx{
81 char const *sgc_patdat; /* Remaining pattern (at and below level) */
82 size_t sgc_patlen;
83 struct n_string *sgc_outer; /* Resolved path up to this level */
84 ui32_t sgc_flags;
85 ui8_t sgc__dummy[4];
87 #endif
89 struct a_shexp_quote_ctx{
90 struct n_string *sqc_store; /* Result storage */
91 struct str sqc_input; /* Input data, topmost level */
92 ui32_t sqc_cnt_revso;
93 ui32_t sqc_cnt_single;
94 ui32_t sqc_cnt_double;
95 ui32_t sqc_cnt_dollar;
96 enum a_shexp_quote_flags sqc_flags;
97 ui8_t sqc__dummy[4];
100 struct a_shexp_quote_lvl{
101 struct a_shexp_quote_lvl *sql_link; /* Outer level */
102 struct str sql_dat; /* This level (has to) handle(d) */
103 enum a_shexp_quote_flags sql_flags;
104 ui8_t sql__dummy[4];
107 /* Locate the user's mailbox file (where new, unread mail is queued) */
108 static char *a_shexp_findmail(char const *user, bool_t force);
110 /* Expand ^~/? and ^~USER/? constructs.
111 * Returns the completely resolved (maybe empty or identical to input)
112 * salloc()ed string */
113 static char *a_shexp_tilde(char const *s);
115 /* Perform fnmatch(3). May return NULL on error */
116 static char *a_shexp_globname(char const *name, enum fexp_mode fexpm);
117 #ifdef HAVE_FNMATCH
118 static bool_t a_shexp__glob(struct a_shexp_glob_ctx *sgcp,
119 struct n_strlist **slpp);
120 static int a_shexp__globsort(void const *cvpa, void const *cvpb);
121 #endif
123 /* Parse an input string and create a sh(1)ell-quoted result */
124 static void a_shexp__quote(struct a_shexp_quote_ctx *sqcp,
125 struct a_shexp_quote_lvl *sqlp);
127 static char *
128 a_shexp_findmail(char const *user, bool_t force){
129 char *rv;
130 char const *cp;
131 NYD2_ENTER;
133 if(!force){
134 if((cp = ok_vlook(inbox)) != NULL && *cp != '\0'){
135 /* Folder extra introduced to avoid % recursion loops */
136 if((rv = fexpand(cp, FEXP_NSPECIAL | FEXP_NFOLDER | FEXP_NSHELL)
137 ) != NULL)
138 goto jleave;
139 n_err(_("*inbox* expansion failed, using $MAIL / builtin: %s\n"), cp);
142 if((cp = ok_vlook(MAIL)) != NULL){
143 rv = savestr(cp);
144 goto jleave;
148 /* C99 */{
149 size_t ul, i;
151 ul = strlen(user) +1;
152 i = sizeof(VAL_MAIL) -1 + 1 + ul;
154 rv = salloc(i);
155 memcpy(rv, VAL_MAIL, (i = sizeof(VAL_MAIL) -1));
156 rv[i] = '/';
157 memcpy(&rv[++i], user, ul);
159 jleave:
160 NYD2_LEAVE;
161 return rv;
164 static char *
165 a_shexp_tilde(char const *s){
166 struct passwd *pwp;
167 size_t nl, rl;
168 char const *rp, *np;
169 char *rv;
170 NYD2_ENTER;
172 if(*(rp = &s[1]) == '/' || *rp == '\0'){
173 np = ok_vlook(HOME);
174 rl = strlen(rp);
175 }else{
176 if((rp = strchr(np = rp, '/')) != NULL){
177 nl = PTR2SIZE(rp - np);
178 np = savestrbuf(np, nl);
179 rl = strlen(rp);
180 }else
181 rl = 0;
183 if((pwp = getpwnam(np)) == NULL){
184 rv = savestr(s);
185 goto jleave;
187 np = pwp->pw_dir;
190 nl = strlen(np);
191 rv = salloc(nl + 1 + rl +1);
192 memcpy(rv, np, nl);
193 if(rl > 0){
194 memcpy(rv + nl, rp, rl);
195 nl += rl;
197 rv[nl] = '\0';
198 jleave:
199 NYD2_LEAVE;
200 return rv;
203 static char *
204 a_shexp_globname(char const *name, enum fexp_mode fexpm){
205 #ifdef HAVE_FNMATCH
206 struct a_shexp_glob_ctx sgc;
207 struct n_string outer;
208 struct n_strlist *slp;
209 char *cp;
210 NYD_ENTER;
212 memset(&sgc, 0, sizeof sgc);
213 sgc.sgc_patlen = strlen(name);
214 sgc.sgc_patdat = savestrbuf(name, sgc.sgc_patlen);
215 sgc.sgc_outer = n_string_reserve(n_string_creat(&outer), sgc.sgc_patlen);
216 sgc.sgc_flags = ((fexpm & FEXP_SILENT) != 0);
217 slp = NULL;
218 if(a_shexp__glob(&sgc, &slp))
219 cp = (char*)1;
220 else
221 cp = NULL;
222 n_string_gut(&outer);
224 if(cp == NULL)
225 goto jleave;
227 if(slp == NULL){
228 cp = n_UNCONST(N_("File pattern does not match"));
229 goto jerr;
230 }else if(slp->sl_next == NULL)
231 cp = savestrbuf(slp->sl_dat, slp->sl_len);
232 else if(fexpm & FEXP_MULTIOK){
233 struct n_strlist **sorta, *xslp;
234 size_t i, no, l;
236 no = l = 0;
237 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next){
238 ++no;
239 l += xslp->sl_len + 1;
242 sorta = smalloc(sizeof(*sorta) * no);
243 no = 0;
244 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next)
245 sorta[no++] = xslp;
246 qsort(sorta, no, sizeof *sorta, &a_shexp__globsort);
248 cp = salloc(++l);
249 l = 0;
250 for(i = 0; i < no; ++i){
251 xslp = sorta[i];
252 memcpy(&cp[l], xslp->sl_dat, xslp->sl_len);
253 l += xslp->sl_len;
254 cp[l++] = '\0';
256 cp[l] = '\0';
258 free(sorta);
259 n_pstate |= n_PS_EXPAND_MULTIRESULT;
260 }else{
261 cp = n_UNCONST(N_("File pattern matches multiple results"));
262 goto jerr;
265 jleave:
266 while(slp != NULL){
267 struct n_strlist *tmp = slp;
269 slp = slp->sl_next;
270 free(tmp);
272 NYD_LEAVE;
273 return cp;
275 jerr:
276 if(!(fexpm & FEXP_SILENT)){
277 name = n_shexp_quote_cp(name, FAL0);
278 n_err("%s: %s\n", V_(cp), name);
280 cp = NULL;
281 goto jleave;
283 #else /* HAVE_FNMATCH */
284 n_UNUSED(fexpm);
286 if(!(fexpm & FEXP_SILENT))
287 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
288 return savestr(name);
289 #endif
292 #ifdef HAVE_FNMATCH
293 static bool_t
294 a_shexp__glob(struct a_shexp_glob_ctx *sgcp, struct n_strlist **slpp){
295 enum{a_SILENT = 1<<0, a_DEEP=1<<1, a_SALLOC=1<<2};
297 struct a_shexp_glob_ctx nsgc;
298 struct dirent *dep;
299 DIR *dp;
300 size_t old_outerlen;
301 char const *ccp, *myp;
302 NYD2_ENTER;
304 /* We need some special treatment for the outermost level */
305 if(!(sgcp->sgc_flags & a_DEEP)){
306 if(sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/'){
307 myp = n_string_cp(n_string_push_c(sgcp->sgc_outer, '/'));
308 ++sgcp->sgc_patdat;
309 --sgcp->sgc_patlen;
310 }else
311 myp = "./";
312 }else
313 myp = n_string_cp(sgcp->sgc_outer);
314 old_outerlen = sgcp->sgc_outer->s_len;
316 /* Separate current directory/pattern level from any possible remaining
317 * pattern in order to be able to use it for fnmatch(3) */
318 if((ccp = memchr(sgcp->sgc_patdat, '/', sgcp->sgc_patlen)) == NULL)
319 nsgc.sgc_patlen = 0;
320 else{
321 nsgc = *sgcp;
322 nsgc.sgc_flags |= a_DEEP;
323 sgcp->sgc_patlen = PTR2SIZE((nsgc.sgc_patdat = &ccp[1]) -
324 &sgcp->sgc_patdat[0]);
325 nsgc.sgc_patlen -= sgcp->sgc_patlen;
326 /* Trim solidus */
327 if(sgcp->sgc_patlen > 0){
328 assert(sgcp->sgc_patdat[sgcp->sgc_patlen -1] == '/');
329 ((char*)n_UNCONST(sgcp->sgc_patdat))[--sgcp->sgc_patlen] = '\0';
333 /* Our current directory level */
334 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
335 * xxx be the (sole) result depending on pattern surroundings, etc. */
336 if((dp = opendir(myp)) == NULL){
337 int err;
339 switch((err = n_err_no)){
340 case n_ERR_NOTDIR:
341 ccp = N_("cannot access paths under non-directory");
342 goto jerr;
343 case n_ERR_NOENT:
344 ccp = N_("path component of (sub)pattern non-existent");
345 goto jerr;
346 case n_ERR_ACCES:
347 ccp = N_("file permission for file (sub)pattern denied");
348 goto jerr;
349 default:
350 ccp = N_("cannot handle file (sub)pattern");
351 goto jerr;
355 /* As necessary, quote bytes in the current pattern */
356 /* C99 */{
357 char *ncp;
358 size_t i;
359 bool_t need;
361 for(need = FAL0, i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
362 switch(*myp){
363 case '\'': case '"': case '\\': case '$':
364 case ' ': case '\t':
365 need = TRU1;
366 ++i;
367 /* FALLTHRU */
368 default:
369 ++i;
370 break;
373 if(need){
374 ncp = salloc(i +1);
375 for(i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
376 switch(*myp){
377 case '\'': case '"': case '\\': case '$':
378 case ' ': case '\t':
379 ncp[i++] = '\\';
380 /* FALLTHRU */
381 default:
382 ncp[i++] = *myp;
383 break;
385 ncp[i] = '\0';
386 myp = ncp;
387 }else
388 myp = sgcp->sgc_patdat;
391 while((dep = readdir(dp)) != NULL){
392 switch(fnmatch(myp, dep->d_name, FNM_PATHNAME | FNM_PERIOD)){
393 case 0:{
394 /* A match expresses the desire to recurse if there is more pattern */
395 if(nsgc.sgc_patlen > 0){
396 bool_t isdir;
398 n_string_push_cp((sgcp->sgc_outer->s_len > 1
399 ? n_string_push_c(sgcp->sgc_outer, '/') : sgcp->sgc_outer),
400 dep->d_name);
402 isdir = FAL0;
403 #ifdef HAVE_DIRENT_TYPE
404 if(dep->d_type == DT_DIR)
405 isdir = TRU1;
406 else if(dep->d_type == DT_LNK || dep->d_type == DT_UNKNOWN)
407 #endif
409 struct stat sb;
411 if(stat(n_string_cp(sgcp->sgc_outer), &sb)){
412 ccp = N_("I/O error when querying file status");
413 goto jerr;
414 }else if(S_ISDIR(sb.st_mode))
415 isdir = TRU1;
418 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
419 * TODO Instead save away a list of such n_string's for later */
420 if(isdir && !a_shexp__glob(&nsgc, slpp)){
421 ccp = (char*)1;
422 goto jleave;
425 n_string_trunc(sgcp->sgc_outer, old_outerlen);
426 }else{
427 struct n_strlist *slp;
428 size_t i, j;
430 i = strlen(dep->d_name);
431 j = (old_outerlen > 0) ? old_outerlen + 1 + i : i;
432 slp = n_STRLIST_ALLOC(j);
433 *slpp = slp;
434 slpp = &slp->sl_next;
435 slp->sl_next = NULL;
436 if((j = old_outerlen) > 0){
437 memcpy(&slp->sl_dat[0], sgcp->sgc_outer->s_dat, j);
438 if(slp->sl_dat[j -1] != '/')
439 slp->sl_dat[j++] = '/';
441 memcpy(&slp->sl_dat[j], dep->d_name, i);
442 slp->sl_dat[j += i] = '\0';
443 slp->sl_len = j;
445 } break;
446 case FNM_NOMATCH:
447 break;
448 default:
449 ccp = N_("fnmatch(3) cannot handle file (sub)pattern");
450 goto jerr;
454 ccp = NULL;
455 jleave:
456 if(dp != NULL)
457 closedir(dp);
458 NYD2_LEAVE;
459 return (ccp == NULL);
461 jerr:
462 if(!(sgcp->sgc_flags & a_SILENT)){
463 char const *s2, *s3;
465 if(sgcp->sgc_outer->s_len > 0){
466 s2 = n_shexp_quote_cp(n_string_cp(sgcp->sgc_outer), FAL0);
467 s3 = "/";
468 }else
469 s2 = s3 = n_empty;
471 n_err("%s: %s%s%s\n", V_(ccp), s2, s3,
472 n_shexp_quote_cp(sgcp->sgc_patdat, FAL0));
474 goto jleave;
477 static int
478 a_shexp__globsort(void const *cvpa, void const *cvpb){
479 int rv;
480 struct n_strlist const * const *slpa, * const *slpb;
481 NYD2_ENTER;
483 slpa = cvpa;
484 slpb = cvpb;
485 rv = asccasecmp((*slpa)->sl_dat, (*slpb)->sl_dat);
486 NYD2_LEAVE;
487 return rv;
489 #endif /* HAVE_FNMATCH */
491 static void
492 a_shexp__quote(struct a_shexp_quote_ctx *sqcp, struct a_shexp_quote_lvl *sqlp){
493 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
494 * XXX use the recursive implementation because of stateful encodings.
495 * XXX I.e., if a quoted substring cannot be self-contained - the data after
496 * XXX the quote relies on "the former state", then this doesn't make sense.
497 * XXX Therefore this is not fully programmed out but instead only detects
498 * XXX the "most fancy" quoting necessary, and directly does that.
499 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
500 * XXX Otherwise we rather have to convert to wide first and act on that,
501 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
502 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
503 #ifdef a_SHEXP_QUOTE_RECURSE
504 # define jrecurse jrecurse
505 struct a_shexp_quote_lvl sql;
506 #else
507 # define jrecurse jstep
508 #endif
509 struct n_visual_info_ctx vic;
510 union {struct a_shexp_quote_lvl *head; struct n_string *store;} u;
511 ui32_t flags;
512 size_t il;
513 char const *ib, *ib_base;
514 NYD2_ENTER;
516 ib_base = ib = sqlp->sql_dat.s;
517 il = sqlp->sql_dat.l;
518 flags = sqlp->sql_flags;
520 /* Iterate over the entire input, classify characters and type of quotes
521 * along the way. Whenever a quote change has to be applied, adjust flags
522 * for the new situation -, setup sql.* and recurse- */
523 while(il > 0){
524 char c;
526 c = *ib;
527 if(cntrlchar(c)){
528 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
529 goto jstep;
530 if(c == '\t' && (flags & (a_SHEXP_QUOTE_T_REVSOL |
531 a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOUBLE)))
532 goto jstep;
533 #ifdef a_SHEXP_QUOTE_RECURSE
534 ++sqcp->sqc_cnt_dollar;
535 #endif
536 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
537 goto jrecurse;
538 }else if(blankspacechar(c) || c == '|' || c == '&' || c == ';' ||
539 /* Whereas we don't support those, quote them for the sh(1)ell */
540 c == '(' || c == ')' || c == '<' || c == '>' ||
541 c == '"' || c == '$'){
542 if(flags & a_SHEXP_QUOTE_T_MASK)
543 goto jstep;
544 #ifdef a_SHEXP_QUOTE_RECURSE
545 ++sqcp->sqc_cnt_single;
546 #endif
547 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
548 goto jrecurse;
549 }else if(c == '\''){
550 if(flags & (a_SHEXP_QUOTE_T_MASK & ~a_SHEXP_QUOTE_T_SINGLE))
551 goto jstep;
552 #ifdef a_SHEXP_QUOTE_RECURSE
553 ++sqcp->sqc_cnt_dollar;
554 #endif
555 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
556 goto jrecurse;
557 }else if(c == '\\' || (c == '#' && ib == ib_base)){
558 if(flags & a_SHEXP_QUOTE_T_MASK)
559 goto jstep;
560 #ifdef a_SHEXP_QUOTE_RECURSE
561 ++sqcp->sqc_cnt_single;
562 #endif
563 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
564 goto jrecurse;
565 }else if(!asciichar(c)){
566 /* Need to keep together multibytes */
567 #ifdef a_SHEXP_QUOTE_RECURSE
568 memset(&vic, 0, sizeof vic);
569 vic.vic_indat = ib;
570 vic.vic_inlen = il;
571 n_visual_info(&vic,
572 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
573 #endif
574 /* xxx check whether resulting \u would be ASCII */
575 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP) ||
576 (flags & a_SHEXP_QUOTE_T_DOLLAR)){
577 #ifdef a_SHEXP_QUOTE_RECURSE
578 ib = vic.vic_oudat;
579 il = vic.vic_oulen;
580 continue;
581 #else
582 goto jstep;
583 #endif
585 #ifdef a_SHEXP_QUOTE_RECURSE
586 ++sqcp->sqc_cnt_dollar;
587 #endif
588 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
589 goto jrecurse;
590 }else
591 jstep:
592 ++ib, --il;
594 sqlp->sql_flags = flags;
596 /* Level made the great and completed processing input. Reverse the list of
597 * levels, detect the "most fancy" quote type needed along this way */
598 /* XXX Due to restriction as above very crude */
599 for(flags = 0, il = 0, u.head = NULL; sqlp != NULL;){
600 struct a_shexp_quote_lvl *tmp;
602 tmp = sqlp->sql_link;
603 sqlp->sql_link = u.head;
604 u.head = sqlp;
605 il += sqlp->sql_dat.l;
606 if(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK)
607 il += (sqlp->sql_dat.l >> 1);
608 flags |= sqlp->sql_flags;
609 sqlp = tmp;
611 sqlp = u.head;
613 /* Finally work the substrings in the correct order, adjusting quotes along
614 * the way as necessary. Start off with the "most fancy" quote, so that
615 * the user sees an overall boundary she can orientate herself on.
616 * We do it like that to be able to give the user some "encapsulation
617 * experience", to address what strikes me is a problem of sh(1)ell quoting:
618 * different to, e.g., perl(1), where you see at a glance where a string
619 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
620 * visual appearance of "a string" as such */
621 u.store = n_string_reserve(sqcp->sqc_store, il);
623 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
624 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
625 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
626 }else if(flags & a_SHEXP_QUOTE_T_DOUBLE){
627 u.store = n_string_push_c(u.store, '"');
628 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOUBLE;
629 }else if(flags & a_SHEXP_QUOTE_T_SINGLE){
630 u.store = n_string_push_c(u.store, '\'');
631 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
632 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
633 flags &= ~a_SHEXP_QUOTE_T_MASK;
635 /* Work all the levels */
636 for(; sqlp != NULL; sqlp = sqlp->sql_link){
637 /* As necessary update our mode of quoting */
638 #ifdef a_SHEXP_QUOTE_RECURSE
639 il = 0;
641 switch(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK){
642 case a_SHEXP_QUOTE_T_DOLLAR:
643 if(!(flags & a_SHEXP_QUOTE_T_DOLLAR))
644 il = a_SHEXP_QUOTE_T_DOLLAR;
645 break;
646 case a_SHEXP_QUOTE_T_DOUBLE:
647 if(!(flags & (a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
648 il = a_SHEXP_QUOTE_T_DOLLAR;
649 break;
650 case a_SHEXP_QUOTE_T_SINGLE:
651 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
652 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
653 il = a_SHEXP_QUOTE_T_SINGLE;
654 break;
655 default:
656 case a_SHEXP_QUOTE_T_REVSOL:
657 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
658 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
659 il = a_SHEXP_QUOTE_T_REVSOL;
660 break;
663 if(il != 0){
664 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
665 u.store = n_string_push_c(u.store, '\'');
666 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
667 u.store = n_string_push_c(u.store, '"');
668 flags &= ~a_SHEXP_QUOTE_T_MASK;
670 flags |= (ui32_t)il;
671 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
672 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
673 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
674 u.store = n_string_push_c(u.store, '"');
675 else if(flags & a_SHEXP_QUOTE_T_SINGLE)
676 u.store = n_string_push_c(u.store, '\'');
678 #endif /* a_SHEXP_QUOTE_RECURSE */
680 /* Work the level's substring */
681 ib = sqlp->sql_dat.s;
682 il = sqlp->sql_dat.l;
684 while(il > 0){
685 char c2, c;
687 c = *ib;
689 if(cntrlchar(c)){
690 assert(c == '\t' || (flags & a_SHEXP_QUOTE_T_DOLLAR));
691 assert((flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
692 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)));
693 switch((c2 = c)){
694 case 0x07: c = 'a'; break;
695 case 0x08: c = 'b'; break;
696 case 0x0A: c = 'n'; break;
697 case 0x0B: c = 'v'; break;
698 case 0x0C: c = 'f'; break;
699 case 0x0D: c = 'r'; break;
700 case 0x1B: c = 'E'; break;
701 default: break;
702 case 0x09:
703 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
704 c = 't';
705 break;
707 if(flags & a_SHEXP_QUOTE_T_REVSOL)
708 u.store = n_string_push_c(u.store, '\\');
709 goto jpush;
711 u.store = n_string_push_c(u.store, '\\');
712 if(c == c2){
713 u.store = n_string_push_c(u.store, 'c');
714 c ^= 0x40;
716 goto jpush;
717 }else if(blankspacechar(c) || c == '|' || c == '&' || c == ';' ||
718 /* Whereas we don't support those, quote them for the sh(1)ell */
719 c == '(' || c == ')' || c == '<' || c == '>' ||
720 c == '"' || c == '$'){
721 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
722 goto jpush;
723 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE));
724 u.store = n_string_push_c(u.store, '\\');
725 goto jpush;
726 }else if(c == '\''){
727 if(flags & a_SHEXP_QUOTE_T_DOUBLE)
728 goto jpush;
729 assert(!(flags & a_SHEXP_QUOTE_T_SINGLE));
730 u.store = n_string_push_c(u.store, '\\');
731 goto jpush;
732 }else if(c == '\\' || (c == '#' && ib == ib_base)){
733 if(flags & a_SHEXP_QUOTE_T_SINGLE)
734 goto jpush;
735 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE |
736 a_SHEXP_QUOTE_T_DOLLAR));
737 u.store = n_string_push_c(u.store, '\\');
738 goto jpush;
739 }else if(asciichar(c)){
740 /* Shorthand: we can simply push that thing out */
741 jpush:
742 u.store = n_string_push_c(u.store, c);
743 ++ib, --il;
744 }else{
745 /* Not an ASCII character, take care not to split up multibyte
746 * sequences etc. For the sake of compile testing, don't enwrap in
747 * HAVE_ALWAYS_UNICODE_LOCALE || HAVE_NATCH_CHAR */
748 if(n_psonce & n_PSO_UNICODE){
749 ui32_t uc;
750 char const *ib2;
751 size_t il2, il3;
753 ib2 = ib;
754 il3 = il2 = il;
755 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
756 char itoa[32];
757 char const *cp;
759 il2 = PTR2SIZE(&ib2[0] - &ib[0]);
760 if((flags & a_SHEXP_QUOTE_ROUNDTRIP) || uc == 0xFFFDu){
761 /* Use padding to make ambiguities impossible */
762 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
763 (uc > 0xFFFFu ? 'U' : 'u'),
764 (int)(uc > 0xFFFFu ? 8 : 4), uc);
765 cp = itoa;
766 }else{
767 il3 = il2;
768 cp = &ib[0];
770 u.store = n_string_push_buf(u.store, cp, il3);
771 ib += il2, il -= il2;
772 continue;
776 memset(&vic, 0, sizeof vic);
777 vic.vic_indat = ib;
778 vic.vic_inlen = il;
779 n_visual_info(&vic,
780 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
782 /* Work this substring as sensitive as possible */
783 il -= vic.vic_oulen;
784 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP))
785 u.store = n_string_push_buf(u.store, ib, il);
786 #ifdef HAVE_ICONV
787 else if((vic.vic_indat = n_iconv_onetime_cp(n_ICONV_NONE,
788 "utf-8", ok_vlook(ttycharset), savestrbuf(ib, il))) != NULL){
789 ui32_t uc;
790 char const *ib2;
791 size_t il2, il3;
793 il3 = il2 = strlen(ib2 = vic.vic_indat);
794 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
795 char itoa[32];
797 il2 = PTR2SIZE(&ib2[0] - &vic.vic_indat[0]);
798 /* Use padding to make ambiguities impossible */
799 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
800 (uc > 0xFFFFu ? 'U' : 'u'),
801 (int)(uc > 0xFFFFu ? 8 : 4), uc);
802 u.store = n_string_push_buf(u.store, itoa, il3);
803 }else
804 goto Jxseq;
806 #endif
807 else
808 #ifdef HAVE_ICONV
809 Jxseq:
810 #endif
811 while(il-- > 0){
812 u.store = n_string_push_buf(u.store, "\\xFF",
813 sizeof("\\xFF") -1);
814 n_c_to_hex_base16(&u.store->s_dat[u.store->s_len - 2], *ib++);
817 ib = vic.vic_oudat;
818 il = vic.vic_oulen;
823 /* Close an open quote */
824 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
825 u.store = n_string_push_c(u.store, '\'');
826 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
827 u.store = n_string_push_c(u.store, '"');
828 #ifdef a_SHEXP_QUOTE_RECURSE
829 jleave:
830 #endif
831 NYD2_LEAVE;
832 return;
834 #ifdef a_SHEXP_QUOTE_RECURSE
835 jrecurse:
836 sqlp->sql_dat.l -= il;
838 sql.sql_link = sqlp;
839 sql.sql_dat.s = n_UNCONST(ib);
840 sql.sql_dat.l = il;
841 sql.sql_flags = flags;
842 a_shexp__quote(sqcp, &sql);
843 goto jleave;
844 #endif
846 #undef jrecurse
847 #undef a_SHEXP_QUOTE_RECURSE
850 FL char *
851 fexpand(char const *name, enum fexp_mode fexpm) /* TODO in parts: -> URL::!! */
853 struct str proto, s;
854 char const *res, *cp;
855 bool_t dyn, haveproto;
856 NYD_ENTER;
858 n_pstate &= ~n_PS_EXPAND_MULTIRESULT;
859 dyn = FAL0;
861 /* The order of evaluation is "%" and "#" expand into constants.
862 * "&" can expand into "+". "+" can expand into shell meta characters.
863 * Shell meta characters expand into constants.
864 * This way, we make no recursive expansion */
865 if ((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
866 res = n_UNCONST(name);
868 jprotonext:
869 n_UNINIT(proto.s, NULL), n_UNINIT(proto.l, 0);
870 haveproto = FAL0;
871 for(cp = res; *cp && *cp != ':'; ++cp)
872 if(!alnumchar(*cp))
873 goto jnoproto;
874 if(cp[0] == ':' && cp[1] == '/' && cp[2] == '/'){
875 haveproto = TRU1;
876 proto.s = n_UNCONST(res);
877 cp += 3;
878 proto.l = PTR2SIZE(cp - res);
879 res = cp;
882 jnoproto:
883 if(!(fexpm & FEXP_NSPECIAL)){
884 jnext:
885 dyn = FAL0;
886 switch (*res) {
887 case '%':
888 if(res[1] == ':' && res[2] != '\0'){
889 res = &res[2];
890 goto jprotonext;
891 }else{
892 bool_t force;
894 force = (res[1] != '\0');
895 res = a_shexp_findmail((force ? &res[1] : ok_vlook(LOGNAME)),
896 force);
897 if(force)
898 goto jislocal;
900 goto jnext;
901 case '#':
902 if (res[1] != '\0')
903 break;
904 if (prevfile[0] == '\0') {
905 n_err(_("No previous file\n"));
906 res = NULL;
907 goto jleave;
909 res = prevfile;
910 goto jislocal;
911 case '&':
912 if (res[1] == '\0')
913 res = ok_vlook(MBOX);
914 break;
918 /* POSIX: if *folder* unset or null, "+" shall be retained */
919 if (!(fexpm & FEXP_NFOLDER) && *res == '+' &&
920 *(cp = n_folder_query()) != '\0') {
921 res = str_concat_csvl(&s, cp, &res[1], NULL)->s;
922 dyn = TRU1;
924 /* TODO *folder* can't start with %[:], can it!?! */
925 if (res[0] == '%' && res[1] == ':') {
926 res += 2;
927 goto jprotonext;
931 /* Do some meta expansions */
932 if((fexpm & (FEXP_NSHELL | FEXP_NVAR)) != FEXP_NVAR &&
933 ((fexpm & FEXP_NSHELL) ? (strchr(res, '$') != NULL)
934 : anyof(res, "{}[]*?$"))){
935 bool_t doexp;
937 if(fexpm & FEXP_NOPROTO)
938 doexp = TRU1;
939 else{
940 cp = haveproto ? savecat(savestrbuf(proto.s, proto.l), res) : res;
942 switch(which_protocol(cp, TRU1, FAL0, NULL)){
943 case PROTO_FILE:
944 case PROTO_MAILDIR:
945 doexp = TRU1;
946 break;
947 default:
948 doexp = FAL0;
949 break;
953 if(doexp){
954 struct str shin;
955 struct n_string shou, *shoup;
957 shin.s = n_UNCONST(res);
958 shin.l = UIZ_MAX;
959 shoup = n_string_creat_auto(&shou);
960 for(;;){
961 enum n_shexp_state shs;
963 /* TODO shexp: take care to not include backtick eval once avail! */
964 shs = n_shexp_parse_token((n_SHEXP_PARSE_LOG_D_V |
965 n_SHEXP_PARSE_QUOTE_AUTO_FIXED | n_SHEXP_PARSE_QUOTE_AUTO_DQ |
966 n_SHEXP_PARSE_QUOTE_AUTO_CLOSE), shoup, &shin, NULL);
967 if(shs & n_SHEXP_STATE_STOP)
968 break;
970 res = n_string_cp(shoup);
971 shoup = n_string_drop_ownership(shoup);
972 dyn = TRU1;
974 if(res[0] == '~')
975 res = a_shexp_tilde(res);
977 if(!(fexpm & FEXP_NSHELL) &&
978 (res = a_shexp_globname(res, fexpm)) == NULL)
979 goto jleave;
980 dyn = TRU1;
981 }/* else no tilde */
982 }else if(res[0] == '~'){
983 res = a_shexp_tilde(res);
984 dyn = TRU1;
987 jislocal:
988 if(res != NULL && haveproto){
989 res = savecat(savestrbuf(proto.s, proto.l), res);
990 dyn = TRU1;
993 if(fexpm & FEXP_LOCAL){
994 switch (which_protocol(res, FAL0, FAL0, NULL)) {
995 case PROTO_FILE:
996 case PROTO_MAILDIR: /* Cannot happen since we don't stat(2), but.. */
997 break;
998 default:
999 n_err(_("Not a local file or directory: %s\n"),
1000 n_shexp_quote_cp(name, FAL0));
1001 res = NULL;
1002 break;
1006 jleave:
1007 if(res != NULL && !dyn)
1008 res = savestr(res);
1009 NYD_LEAVE;
1010 return n_UNCONST(res);
1013 FL enum n_shexp_state
1014 n_shexp_parse_token(enum n_shexp_parse_flags flags, struct n_string *store,
1015 struct str *input, void const **cookie){
1016 /* TODO shexp_parse_token: WCHAR; $IFS (sp20=' '; echo a $sp20 b; ..) */
1017 char c2, c, quotec, utf[8];
1018 enum n_shexp_state rv;
1019 size_t i, il;
1020 char const *ifs, *ifs_ws, *ib_save, *ib;
1021 enum{
1022 a_NONE = 0,
1023 a_SKIPQ = 1u<<0, /* Skip rest of this quote (\u0 ..) */
1024 a_SKIPT = 1u<<1, /* Skip entire token (\c@) */
1025 a_SKIPMASK = a_SKIPQ | a_SKIPT,
1026 a_SURPLUS = 1u<<2, /* Extended sequence interpretation */
1027 a_NTOKEN = 1u<<3, /* "New token": e.g., comments are possible */
1028 a_BRACE = 1u<<4, /* Variable substitution: brace enclosed */
1029 a_DIGIT1 = 1u<<5, /* ..first character was digit */
1030 a_NONDIGIT = 1u<<6, /* ..has seen any non-digits */
1031 a_VARSUBST_MASK = n_BITENUM_MASK(4, 6),
1033 a_ROUND_MASK = a_SKIPT | (int)~n_BITENUM_MASK(0, 7),
1034 a_COOKIE = 1u<<8,
1035 a_EXPLODE = 1u<<9,
1036 a_CONSUME = 1u<<10, /* When done, "consume" remaining input */
1037 a_TMP = 1u<<30
1038 } state;
1039 NYD2_ENTER;
1041 assert((flags & n_SHEXP_PARSE_DRYRUN) || store != NULL);
1042 assert(input != NULL);
1043 assert(input->l == 0 || input->s != NULL);
1044 assert(!(flags & n_SHEXP_PARSE_LOG) || !(flags & n_SHEXP_PARSE_LOG_D_V));
1045 assert(!(flags & n_SHEXP_PARSE_IFS_ADD_COMMA) ||
1046 !(flags & n_SHEXP_PARSE_IFS_IS_COMMA));
1047 assert(!(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED) ||
1048 (flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK));
1050 if((flags & n_SHEXP_PARSE_LOG_D_V) && (n_poption & n_PO_D_V))
1051 flags |= n_SHEXP_PARSE_LOG;
1052 if(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)
1053 flags |= n_SHEXP_PARSE_QUOTE_AUTO_CLOSE;
1055 if((flags & n_SHEXP_PARSE_TRUNC) && store != NULL)
1056 store = n_string_trunc(store, 0);
1058 if(flags & n_SHEXP_PARSE_IFS_VAR){
1059 ifs = ok_vlook(ifs);
1060 ifs_ws = ok_vlook(ifs_ws);
1063 state = a_NONE;
1064 ib = input->s;
1065 if((il = input->l) == UIZ_MAX)
1066 input->l = il = strlen(ib);
1067 n_UNINIT(c, '\0');
1069 if(cookie != NULL && *cookie != NULL){
1070 assert(!(flags & n_SHEXP_PARSE_DRYRUN));
1071 state |= a_COOKIE;
1074 jrestart_empty:
1075 rv = n_SHEXP_STATE_NONE;
1076 state &= a_ROUND_MASK;
1078 /* In cookie mode, the next ARGV entry is the token already, unchanged,
1079 * since it has already been expanded before! */
1080 if(state & a_COOKIE){
1081 char const * const *xcookie, *cp;
1083 i = store->s_len;
1084 xcookie = *cookie;
1085 if((store = n_string_push_cp(store, *xcookie))->s_len > 0)
1086 rv |= n_SHEXP_STATE_OUTPUT;
1087 if(*++xcookie == NULL){
1088 *cookie = NULL;
1089 state &= ~a_COOKIE;
1090 flags |= n_SHEXP_PARSE_QUOTE_AUTO_DQ; /* ..why we are here! */
1091 }else
1092 *cookie = n_UNCONST(xcookie);
1094 for(cp = &n_string_cp(store)[i]; (c = *cp++) != '\0';)
1095 if(cntrlchar(c)){
1096 rv |= n_SHEXP_STATE_CONTROL;
1097 break;
1100 /* The last exploded cookie will join with the yielded input token, so
1101 * simply fall through in this case */
1102 if(state & a_COOKIE)
1103 goto jleave_quick;
1104 }else{
1105 jrestart:
1106 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1107 if(flags & n_SHEXP_PARSE_IFS_VAR){
1108 for(; il > 0; ++ib, --il)
1109 if(strchr(ifs_ws, *ib) == NULL)
1110 break;
1111 }else{
1112 for(; il > 0; ++ib, --il)
1113 if(!blankspacechar(*ib))
1114 break;
1117 input->s = n_UNCONST(ib);
1118 input->l = il;
1121 if(il == 0){
1122 rv |= n_SHEXP_STATE_STOP;
1123 goto jleave;
1126 if(store != NULL)
1127 store = n_string_reserve(store, n_MIN(il, 32)); /* XXX */
1129 switch(flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK){
1130 case n_SHEXP_PARSE_QUOTE_AUTO_SQ:
1131 quotec = '\'';
1132 break;
1133 case n_SHEXP_PARSE_QUOTE_AUTO_DQ:
1134 quotec = '"';
1135 if(0){
1136 case n_SHEXP_PARSE_QUOTE_AUTO_DSQ:
1137 quotec = '\'';
1139 state |= a_SURPLUS;
1140 break;
1141 default:
1142 quotec = '\0';
1143 state |= a_NTOKEN;
1144 break;
1147 while(il > 0){
1148 --il, c = *ib++;
1150 /* If no quote-mode active.. */
1151 if(quotec == '\0'){
1152 if(c == '"' || c == '\''){
1153 quotec = c;
1154 if(c == '"')
1155 state |= a_SURPLUS;
1156 else
1157 state &= ~a_SURPLUS;
1158 state &= ~a_NTOKEN;
1159 continue;
1160 }else if(c == '$'){
1161 if(il > 0){
1162 state &= ~a_NTOKEN;
1163 if(*ib == '\''){
1164 --il, ++ib;
1165 quotec = '\'';
1166 state |= a_SURPLUS;
1167 continue;
1168 }else
1169 goto J_var_expand;
1171 }else if(c == '\\'){
1172 /* Outside of quotes this just escapes any next character, but a sole
1173 * <reverse solidus> at EOS is left unchanged */
1174 if(il > 0)
1175 --il, c = *ib++;
1176 state &= ~a_NTOKEN;
1178 /* A comment may it be if no token has yet started */
1179 else if(c == '#' && (state & a_NTOKEN)){
1180 rv |= n_SHEXP_STATE_STOP;
1181 goto jleave;
1183 /* Metacharacters which separate tokens must be turned on explicitly */
1184 else if(c == '|'){
1185 rv |= n_SHEXP_STATE_META_VERTBAR;
1186 /* The parsed sequence may be _the_ output, so ensure we don't
1187 * include the metacharacter, then. */
1188 /*if(flags & n_SHEXP_PARSE_META_VERTBAR)*/
1189 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1190 ++il, --ib;
1191 break;
1192 }else if(c == '&'){
1193 rv |= n_SHEXP_STATE_META_AMPERSAND;
1194 /* The parsed sequence may be _the_ output, so ensure we don't
1195 * include the metacharacter, then. */
1196 /*if(flags & n_SHEXP_PARSE_META_AMPERSAND)*/
1197 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1198 ++il, --ib;
1199 break;
1200 }else if(c == ';'){
1201 rv |= n_SHEXP_STATE_META_SEMICOLON;
1202 /* The parsed sequence may be _the_ output, so ensure we don't
1203 * include the metacharacter, then. */
1204 if(flags & n_SHEXP_PARSE_META_SEMICOLON){
1205 if(il > 0)
1206 n_go_input_inject(n_GO_INPUT_INJECT_COMMIT, ib, il);
1207 state |= a_CONSUME;
1208 rv |= n_SHEXP_STATE_STOP;
1210 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1211 ++il, --ib;
1212 break;
1213 }else if(c == ',' && (flags &
1214 (n_SHEXP_PARSE_IFS_ADD_COMMA | n_SHEXP_PARSE_IFS_IS_COMMA))){
1215 /* The parsed sequence may be _the_ output, so ensure we don't
1216 * include the metacharacter, then. */
1217 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1218 ++il, --ib;
1219 break;
1220 }else if((!(flags & n_SHEXP_PARSE_IFS_VAR) && blankchar(c)) ||
1221 ((flags & n_SHEXP_PARSE_IFS_VAR) && strchr(ifs, c) != NULL)){
1222 if(!(flags & n_SHEXP_PARSE_IFS_IS_COMMA)){
1223 /* The parsed sequence may be _the_ output, so ensure we don't
1224 * include the metacharacter, then. */
1225 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1226 ++il, --ib;
1227 break;
1229 state |= a_NTOKEN;
1230 }else
1231 state &= ~a_NTOKEN;
1232 }else{
1233 /* Quote-mode */
1234 assert(!(state & a_NTOKEN));
1235 if(c == quotec && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)){
1236 state &= a_ROUND_MASK;
1237 quotec = '\0';
1238 /* Users may need to recognize the presence of empty quotes */
1239 rv |= n_SHEXP_STATE_OUTPUT;
1240 continue;
1241 }else if(c == '\\' && (state & a_SURPLUS)){
1242 ib_save = ib - 1;
1243 /* A sole <reverse solidus> at EOS is treated as-is! This is ok
1244 * since the "closing quote" error will occur next, anyway */
1245 if(il == 0)
1246 break;
1247 else if((c2 = *ib) == quotec){
1248 --il, ++ib;
1249 c = quotec;
1250 }else if(quotec == '"'){
1251 /* Double quotes, POSIX says:
1252 * The <backslash> shall retain its special meaning as an
1253 * escape character (see Section 2.2.1) only when followed
1254 * by one of the following characters when considered
1255 * special: $ ` " \ <newline> */
1256 switch(c2){
1257 case '$':
1258 case '`':
1259 /* case '"': already handled via c2 == quotec */
1260 case '\\':
1261 --il, ++ib;
1262 c = c2;
1263 /* FALLTHRU */
1264 default:
1265 break;
1267 }else{
1268 /* Dollar-single-quote */
1269 --il, ++ib;
1270 switch(c2){
1271 case '"':
1272 /* case '\'': already handled via c2 == quotec */
1273 case '\\':
1274 c = c2;
1275 break;
1277 case 'b': c = '\b'; break;
1278 case 'f': c = '\f'; break;
1279 case 'n': c = '\n'; break;
1280 case 'r': c = '\r'; break;
1281 case 't': c = '\t'; break;
1282 case 'v': c = '\v'; break;
1284 case 'E':
1285 case 'e': c = '\033'; break;
1287 /* Control character */
1288 case 'c':
1289 if(il == 0)
1290 goto j_dollar_ungetc;
1291 --il, c2 = *ib++;
1292 if(state & a_SKIPMASK)
1293 continue;
1294 c = upperconv(c2) ^ 0x40;
1295 if((ui8_t)c > 0x1F && c != 0x7F){ /* ASCII C0: 0..1F, 7F */
1296 if(flags & n_SHEXP_PARSE_LOG)
1297 n_err(_("Invalid \\c notation: %.*s\n"),
1298 (int)input->l, input->s);
1299 rv |= n_SHEXP_STATE_ERR_CONTROL;
1301 /* As an implementation-defined extension, support \c@
1302 * EQ printf(1) alike \c */
1303 if(c == '\0'){
1304 state |= a_SKIPT;
1305 continue;
1307 break;
1309 /* Octal sequence: 1 to 3 octal bytes */
1310 case '0':
1311 /* As an extension (dependent on where you look, echo(1), or
1312 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1313 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1314 c2 = c;
1315 --il, ++ib;
1317 /* FALLTHRU */
1318 case '1': case '2': case '3':
1319 case '4': case '5': case '6': case '7':
1320 c2 -= '0';
1321 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1322 c2 = (c2 << 3) | (c - '0');
1323 --il, ++ib;
1325 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1326 if(!(state & a_SKIPMASK) && (ui8_t)c2 > 0x1F){
1327 if(flags & n_SHEXP_PARSE_LOG)
1328 n_err(_("\\0 argument exceeds a byte: %.*s\n"),
1329 (int)input->l, input->s);
1330 rv |= n_SHEXP_STATE_ERR_NUMBER;
1331 --il, ++ib;
1332 /* Write unchanged */
1333 je_ib_save:
1334 rv |= n_SHEXP_STATE_OUTPUT;
1335 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1336 store = n_string_push_buf(store, ib_save,
1337 PTR2SIZE(ib - ib_save));
1338 continue;
1340 c2 = (c2 << 3) | (c -= '0');
1341 --il, ++ib;
1343 if(state & a_SKIPMASK)
1344 continue;
1345 if((c = c2) == '\0'){
1346 state |= a_SKIPQ;
1347 continue;
1349 break;
1351 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1352 case 'U':
1353 i = 8;
1354 if(0){
1355 /* FALLTHRU */
1356 case 'u':
1357 i = 4;
1359 if(il == 0)
1360 goto j_dollar_ungetc;
1361 if(0){
1362 /* FALLTHRU */
1364 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1365 case 'X':
1366 case 'x':
1367 if(il == 0)
1368 goto j_dollar_ungetc;
1369 i = 2;
1371 /* C99 */{
1372 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
1373 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1375 size_t no, j;
1377 i = n_MIN(il, i);
1378 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
1379 c = *ib;
1380 if(hexchar(c)){
1381 no <<= 4;
1382 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
1383 : ((c) <= 'F' ? 55 : 87)))];
1384 }else if(j == 0){
1385 if(state & a_SKIPMASK)
1386 break;
1387 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
1388 if(flags & n_SHEXP_PARSE_LOG)
1389 n_err(_("Invalid \\%c notation: %.*s\n"),
1390 c2, (int)input->l, input->s);
1391 rv |= n_SHEXP_STATE_ERR_NUMBER;
1392 goto je_ib_save;
1393 }else
1394 break;
1397 /* Unicode massage */
1398 if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){
1399 if((c = (char)no) == '\0')
1400 state |= a_SKIPQ;
1401 }else if(no == 0)
1402 state |= a_SKIPQ;
1403 else if(!(state & a_SKIPMASK)){
1404 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1405 store = n_string_reserve(store, n_MAX(j, 4));
1407 c2 = FAL0;
1408 if(no > 0x10FFFF){ /* XXX magic; CText */
1409 if(flags & n_SHEXP_PARSE_LOG)
1410 n_err(_("\\U argument exceeds 0x10FFFF: %.*s\n"),
1411 (int)input->l, input->s);
1412 rv |= n_SHEXP_STATE_ERR_NUMBER;
1413 /* But normalize the output anyway */
1414 goto Je_uni_norm;
1417 j = n_utf32_to_utf8(no, utf);
1419 if(n_psonce & n_PSO_UNICODE){
1420 rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE;
1421 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1422 store = n_string_push_buf(store, utf, j);
1423 continue;
1425 #ifdef HAVE_ICONV
1426 else{
1427 char *icp;
1429 icp = n_iconv_onetime_cp(n_ICONV_NONE,
1430 NULL, NULL, utf);
1431 if(icp != NULL){
1432 rv |= n_SHEXP_STATE_OUTPUT;
1433 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1434 store = n_string_push_cp(store, icp);
1435 continue;
1438 #endif
1439 if(!(flags & n_SHEXP_PARSE_DRYRUN)) Je_uni_norm:{
1440 char itoa[32];
1442 rv |= n_SHEXP_STATE_OUTPUT |
1443 n_SHEXP_STATE_ERR_UNICODE;
1444 i = snprintf(itoa, sizeof itoa, "\\%c%0*X",
1445 (no > 0xFFFFu ? 'U' : 'u'),
1446 (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no);
1447 store = n_string_push_buf(store, itoa, i);
1449 continue;
1451 if(state & a_SKIPMASK)
1452 continue;
1454 break;
1456 /* Extension: \$ can be used to expand a variable.
1457 * Bug|ad effect: if conversion fails, not written "as-is" */
1458 case '$':
1459 if(il == 0)
1460 goto j_dollar_ungetc;
1461 goto J_var_expand;
1463 default:
1464 j_dollar_ungetc:
1465 /* Follow bash(1) behaviour, print sequence unchanged */
1466 ++il, --ib;
1467 break;
1470 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
1471 state &= ~a_VARSUBST_MASK;
1472 if(*ib == '{')
1473 state |= a_BRACE;
1475 if(!(state & a_BRACE) || il > 1){
1476 char const *cp, *vp;
1478 ib_save = ib - 1;
1479 if(state & a_BRACE)
1480 --il, ++ib;
1481 vp = ib;
1482 state &= ~a_EXPLODE;
1484 for(i = 0; il > 0; --il, ++ib, ++i){
1485 /* We have some special cases regarding macro-local special
1486 * parameters, so ensure these don't cause failure.
1487 * This has counterparts in the code that manages internal
1488 * variables! */
1489 c = *ib;
1490 if(!a_SHEXP_ISVARC(c)){
1491 if(i == 0 && (c == '*' || c == '@' || c == '#' ||
1492 c == '?' || c == '!' || c == '^')){
1493 /* Skip over multiplexer */
1494 if(c == '^')
1495 continue;
1496 if(c == '@'){
1497 if(quotec == '"')
1498 state |= a_EXPLODE;
1500 --il, ++ib;
1501 ++i;
1503 break;
1504 }else if(a_SHEXP_ISVARC_BAD1ST(c)){
1505 if(i == 0)
1506 state |= a_DIGIT1;
1507 }else
1508 state |= a_NONDIGIT;
1511 if(state & a_SKIPMASK){
1512 if((state & a_BRACE) && il > 0 && *ib == '}')
1513 --il, ++ib;
1514 continue;
1517 /* XXX Digit in first place is not supported, however we do
1518 * XXX support all digits because these refer to macro-local
1519 * XXX variables; if we would have a notion of whether we're in
1520 * XXX a macro this could be made more fine grained */
1521 if((state & (a_DIGIT1 | a_NONDIGIT)) == (a_DIGIT1 | a_NONDIGIT)){
1522 if(state & a_BRACE){
1523 if(il > 0 && *ib == '}')
1524 --il, ++ib;
1525 else
1526 rv |= n_SHEXP_STATE_ERR_BRACE;
1528 if(flags & n_SHEXP_PARSE_LOG)
1529 n_err(_("Invalid identifier for ${}: %.*s\n"),
1530 (int)input->l, input->s);
1531 rv |= n_SHEXP_STATE_ERR_IDENTIFIER;
1532 goto je_ib_save;
1533 }else if(i == 0){
1534 if(state & a_BRACE){
1535 if(flags & n_SHEXP_PARSE_LOG)
1536 n_err(_("Bad substitution for ${}: %.*s\n"),
1537 (int)input->l, input->s);
1538 rv |= n_SHEXP_STATE_ERR_BADSUB;
1539 if(il > 0 && *ib == '}')
1540 --il, ++ib;
1541 else
1542 rv |= n_SHEXP_STATE_ERR_BRACE;
1543 goto je_ib_save;
1545 c = '$';
1546 }else{
1547 if(state & a_BRACE){
1548 if(il == 0 || *ib != '}'){
1549 if(flags & n_SHEXP_PARSE_LOG)
1550 n_err(_("No closing brace for ${}: %.*s\n"),
1551 (int)input->l, input->s);
1552 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN |
1553 n_SHEXP_STATE_ERR_BRACE;
1554 goto je_ib_save;
1556 --il, ++ib;
1559 if(flags & n_SHEXP_PARSE_DRYRUN)
1560 continue;
1562 /* We may shall explode "${@}" to a series of successive,
1563 * properly quoted tokens (instead). The first exploded
1564 * cookie will join with the current token */
1565 if((state & a_EXPLODE) && !(flags & n_SHEXP_PARSE_DRYRUN) &&
1566 cookie != NULL && n_var_vexplode(cookie)){
1567 state |= a_COOKIE;
1568 input->s = n_UNCONST(ib);
1569 input->l = il;
1570 goto jrestart_empty;
1573 /* Check getenv(3) shall no internal variable exist!
1574 * XXX We have some common idioms, avoid memory for them
1575 * XXX Even better would be var_vlook_buf()! */
1576 if(i == 1){
1577 switch(*vp){
1578 case '?': vp = n_qm; break;
1579 case '!': vp = n_em; break;
1580 case '*': vp = n_star; break;
1581 case '@': vp = n_at; break;
1582 case '#': vp = n_ns; break;
1583 default: goto j_var_look_buf;
1585 }else
1586 j_var_look_buf:
1587 vp = savestrbuf(vp, i);
1589 if((cp = n_var_vlook(vp, TRU1)) != NULL){
1590 rv |= n_SHEXP_STATE_OUTPUT;
1591 store = n_string_push_cp(store, cp);
1592 for(; (c = *cp) != '\0'; ++cp)
1593 if(cntrlchar(c)){
1594 rv |= n_SHEXP_STATE_CONTROL;
1595 break;
1598 continue;
1601 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
1602 continue;
1606 if(!(state & a_SKIPMASK)){
1607 rv |= n_SHEXP_STATE_OUTPUT;
1608 if(cntrlchar(c))
1609 rv |= n_SHEXP_STATE_CONTROL;
1610 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1611 store = n_string_push_c(store, c);
1615 if(quotec != '\0' && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_CLOSE)){
1616 if(flags & n_SHEXP_PARSE_LOG)
1617 n_err(_("No closing quote: %.*s\n"), (int)input->l, input->s);
1618 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
1621 jleave:
1622 assert(!(state & a_COOKIE));
1623 if((flags & n_SHEXP_PARSE_DRYRUN) && store != NULL){
1624 store = n_string_push_buf(store, input->s, PTR2SIZE(ib - input->s));
1625 rv |= n_SHEXP_STATE_OUTPUT;
1628 if(state & a_CONSUME){
1629 input->s = n_UNCONST(&ib[il]);
1630 input->l = 0;
1631 }else{
1632 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1633 if(flags & n_SHEXP_PARSE_IFS_VAR){
1634 for(; il > 0; ++ib, --il)
1635 if(strchr(ifs_ws, *ib) == NULL)
1636 break;
1637 }else{
1638 for(; il > 0; ++ib, --il)
1639 if(!blankchar(*ib))
1640 break;
1643 input->l = il;
1644 input->s = n_UNCONST(ib);
1647 if(!(rv & n_SHEXP_STATE_STOP)){
1648 if(!(rv & n_SHEXP_STATE_OUTPUT) && (flags & n_SHEXP_PARSE_IGNORE_EMPTY) &&
1649 il > 0)
1650 goto jrestart_empty;
1651 if(/*!(rv & n_SHEXP_STATE_OUTPUT) &&*/ il == 0)
1652 rv |= n_SHEXP_STATE_STOP;
1655 if((state & a_SKIPT) && !(rv & n_SHEXP_STATE_STOP) &&
1656 (flags & n__SHEXP_PARSE_META_MASK))
1657 goto jrestart;
1658 jleave_quick:
1659 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_UNICODE));
1660 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_CONTROL));
1661 NYD2_LEAVE;
1662 return rv;
1665 FL char *
1666 n_shexp_parse_token_cp(enum n_shexp_parse_flags flags, char const **cp){
1667 struct str input;
1668 struct n_string sou, *soup;
1669 char *rv;
1670 enum n_shexp_state shs;
1671 NYD2_ENTER;
1673 assert(cp != NULL);
1675 input.s = n_UNCONST(*cp);
1676 input.l = UIZ_MAX;
1677 soup = n_string_creat_auto(&sou);
1679 shs = n_shexp_parse_token(flags, soup, &input, NULL);
1680 if(shs & n_SHEXP_STATE_ERR_MASK){
1681 soup = n_string_assign_cp(soup, *cp);
1682 *cp = NULL;
1683 }else
1684 *cp = input.s;
1686 rv = n_string_cp(soup);
1687 /*n_string_gut(n_string_drop_ownership(soup));*/
1688 NYD2_LEAVE;
1689 return rv;
1692 FL struct n_string *
1693 n_shexp_quote(struct n_string *store, struct str const *input, bool_t rndtrip){
1694 struct a_shexp_quote_lvl sql;
1695 struct a_shexp_quote_ctx sqc;
1696 NYD2_ENTER;
1698 assert(store != NULL);
1699 assert(input != NULL);
1700 assert(input->l == 0 || input->s != NULL);
1702 memset(&sqc, 0, sizeof sqc);
1703 sqc.sqc_store = store;
1704 sqc.sqc_input.s = input->s;
1705 if((sqc.sqc_input.l = input->l) == UIZ_MAX)
1706 sqc.sqc_input.l = strlen(input->s);
1707 sqc.sqc_flags = rndtrip ? a_SHEXP_QUOTE_ROUNDTRIP : a_SHEXP_QUOTE_NONE;
1709 if(sqc.sqc_input.l == 0)
1710 store = n_string_push_buf(store, "''", sizeof("''") -1);
1711 else{
1712 memset(&sql, 0, sizeof sql);
1713 sql.sql_dat = sqc.sqc_input;
1714 sql.sql_flags = sqc.sqc_flags;
1715 a_shexp__quote(&sqc, &sql);
1717 NYD2_LEAVE;
1718 return store;
1721 FL char *
1722 n_shexp_quote_cp(char const *cp, bool_t rndtrip){
1723 struct n_string store;
1724 struct str input;
1725 char *rv;
1726 NYD2_ENTER;
1728 assert(cp != NULL);
1730 input.s = n_UNCONST(cp);
1731 input.l = UIZ_MAX;
1732 rv = n_string_cp(n_shexp_quote(n_string_creat_auto(&store), &input,
1733 rndtrip));
1734 n_string_gut(n_string_drop_ownership(&store));
1735 NYD2_LEAVE;
1736 return rv;
1739 FL bool_t
1740 n_shexp_is_valid_varname(char const *name){
1741 char lc, c;
1742 bool_t rv;
1743 NYD2_ENTER;
1745 rv = FAL0;
1747 for(lc = '\0'; (c = *name++) != '\0'; lc = c)
1748 if(!a_SHEXP_ISVARC(c))
1749 goto jleave;
1750 else if(lc == '\0' && a_SHEXP_ISVARC_BAD1ST(c))
1751 goto jleave;
1752 if(a_SHEXP_ISVARC_BADNST(lc))
1753 goto jleave;
1755 rv = TRU1;
1756 jleave:
1757 NYD2_LEAVE;
1758 return rv;
1761 FL int
1762 c_shcodec(void *v){
1763 struct str in;
1764 struct n_string sou_b, *soup;
1765 si32_t nerrn;
1766 size_t alen;
1767 bool_t norndtrip;
1768 char const **argv, *varname, *act, *cp;
1770 soup = n_string_creat_auto(&sou_b);
1771 argv = v;
1772 varname = (n_pstate & n_PS_ARGMOD_VPUT) ? *argv++ : NULL;
1774 act = *argv;
1775 for(cp = act; *cp != '\0' && !blankspacechar(*cp); ++cp)
1777 if((norndtrip = (*act == '+')))
1778 ++act;
1779 if(act == cp)
1780 goto jesynopsis;
1781 alen = PTR2SIZE(cp - act);
1782 if(*cp != '\0')
1783 ++cp;
1785 in.l = strlen(in.s = n_UNCONST(cp));
1786 nerrn = n_ERR_NONE;
1788 if(is_ascncaseprefix(act, "encode", alen))
1789 soup = n_shexp_quote(soup, &in, !norndtrip);
1790 else if(!norndtrip && is_ascncaseprefix(act, "decode", alen)){
1791 for(;;){
1792 enum n_shexp_state shs;
1794 shs = n_shexp_parse_token((n_SHEXP_PARSE_LOG |
1795 n_SHEXP_PARSE_IGNORE_EMPTY), soup, &in, NULL);
1796 if(shs & n_SHEXP_STATE_ERR_MASK){
1797 soup = n_string_assign_cp(soup, cp);
1798 nerrn = n_ERR_CANCELED;
1799 v = NULL;
1800 break;
1802 if(shs & n_SHEXP_STATE_STOP)
1803 break;
1805 }else
1806 goto jesynopsis;
1808 assert(cp != NULL);
1809 if(varname != NULL){
1810 cp = n_string_cp(soup);
1811 if(!n_var_vset(varname, (uintptr_t)cp)){
1812 nerrn = n_ERR_NOTSUP;
1813 cp = NULL;
1815 }else{
1816 struct str out;
1818 in.s = n_string_cp(soup);
1819 in.l = soup->s_len;
1820 makeprint(&in, &out);
1821 if(fprintf(n_stdout, "%s\n", out.s) < 0){
1822 nerrn = n_err_no;
1823 cp = NULL;
1825 free(out.s);
1828 jleave:
1829 n_pstate_err_no = nerrn;
1830 NYD_LEAVE;
1831 return (cp != NULL ? 0 : 1);
1832 jesynopsis:
1833 n_err(_("Synopsis: shcodec: <[+]e[ncode]|d[ecode]> <rest-of-line>\n"));
1834 nerrn = n_ERR_INVAL;
1835 cp = NULL;
1836 goto jleave;
1839 /* s-it-mode */