mk-release.inc: update
[s-mailx.git] / shexp.c
blobc321c71f8dd43aea3e68a4b757b22a33863b3a65
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
5 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
6 * Copyright (c) 2012 - 2016 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
7 */
8 /*
9 * Copyright (c) 1980, 1993
10 * The Regents of the University of California. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 #undef n_FILE
37 #define n_FILE shexp
39 #ifndef HAVE_AMALGAMATION
40 # include "nail.h"
41 #endif
43 #include <sys/wait.h>
45 #include <pwd.h>
47 #ifdef HAVE_FNMATCH
48 # include <dirent.h>
49 # include <fnmatch.h>
50 #endif
52 /* POSIX says
53 * Environment variable names used by the utilities in the Shell and
54 * Utilities volume of POSIX.1-2008 consist solely of uppercase
55 * letters, digits, and the <underscore> ('_') from the characters
56 * defined in Portable Character Set and do not begin with a digit.
57 * Other characters may be permitted by an implementation;
58 * applications shall tolerate the presence of such names.
59 * We do support the hyphen "-" because it is common for mailx. */
60 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
62 enum a_shexp_quote_flags{
63 a_SHEXP_QUOTE_NONE,
64 a_SHEXP_QUOTE_ROUNDTRIP = 1u<<0, /* Result won't be consumed immediately */
66 a_SHEXP_QUOTE_T_REVSOL = 1u<<8, /* Type: by reverse solidus */
67 a_SHEXP_QUOTE_T_SINGLE = 1u<<9, /* Type: single-quotes */
68 a_SHEXP_QUOTE_T_DOUBLE = 1u<<10, /* Type: double-quotes */
69 a_SHEXP_QUOTE_T_DOLLAR = 1u<<11, /* Type: dollar-single-quotes */
70 a_SHEXP_QUOTE_T_MASK = a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
71 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR,
73 a_SHEXP_QUOTE__FREESHIFT = 16u
76 struct a_shexp_var_stack {
77 struct a_shexp_var_stack *svs_next; /* Outer stack frame */
78 char const *svs_value; /* Remaining value to expand */
79 size_t svs_len; /* gth of .svs_dat this level */
80 char const *svs_dat; /* Result data of this level */
81 bool_t svs_bsesc; /* Shall backslash escaping be performed */
82 ui8_t svs__dummy[7];
85 #ifdef HAVE_FNMATCH
86 struct a_shexp_glob_ctx{
87 char const *sgc_patdat; /* Remaining pattern (at and below level) */
88 size_t sgc_patlen;
89 struct n_string *sgc_outer; /* Resolved path up to this level */
90 ui32_t sgc_flags;
91 ui8_t sgc__dummy[4];
93 #endif
95 struct a_shexp_quote_ctx{
96 struct n_string *sqc_store; /* Result storage */
97 struct str sqc_input; /* Input data, topmost level */
98 ui32_t sqc_cnt_revso;
99 ui32_t sqc_cnt_single;
100 ui32_t sqc_cnt_double;
101 ui32_t sqc_cnt_dollar;
102 enum a_shexp_quote_flags sqc_flags;
103 ui8_t sqc__dummy[4];
106 struct a_shexp_quote_lvl{
107 struct a_shexp_quote_lvl *sql_link; /* Outer level */
108 struct str sql_dat; /* This level (has to) handle(d) */
109 enum a_shexp_quote_flags sql_flags;
110 ui8_t sql__dummy[4];
113 /* Locate the user's mailbox file (where new, unread mail is queued) */
114 static char * _findmail(char const *user, bool_t force);
116 /* Expand ^~/? and ^~USER/? constructs.
117 * Returns the completely resolved (maybe empty or identical to input)
118 * salloc()ed string */
119 static char *a_shexp_tilde(char const *s);
121 /* (Try to) Expand any shell variable in s.
122 * Returns the completely resolved (maybe empty) salloc()ed string.
123 * Logs on error */
124 static char *a_shexp_var(struct a_shexp_var_stack *svsp);
126 /* Perform fnmatch(3). May return NULL on error */
127 static char *a_shexp_globname(char const *name, enum fexp_mode fexpm);
128 #ifdef HAVE_FNMATCH
129 static bool_t a_shexp__glob(struct a_shexp_glob_ctx *sgcp,
130 struct n_strlist **slpp);
131 static int a_shexp__globsort(void const *cvpa, void const *cvpb);
132 #endif
134 /* Parse an input string and create a sh(1)ell-quoted result */
135 static void a_shexp__quote(struct a_shexp_quote_ctx *sqcp,
136 struct a_shexp_quote_lvl *sqlp);
138 static char *
139 _findmail(char const *user, bool_t force)
141 char *rv;
142 char const *cp;
143 NYD_ENTER;
145 if (force || (cp = ok_vlook(MAIL)) == NULL) {
146 size_t ul = strlen(user), i = sizeof(VAL_MAIL) -1 + 1 + ul +1;
148 rv = salloc(i);
149 memcpy(rv, VAL_MAIL, i = sizeof(VAL_MAIL));
150 rv[i] = '/';
151 memcpy(&rv[++i], user, ul +1);
152 } else if ((rv = fexpand(cp, FEXP_NSHELL)) == NULL)
153 rv = savestr(cp);
154 NYD_LEAVE;
155 return rv;
158 static char *
159 a_shexp_tilde(char const *s){
160 struct passwd *pwp;
161 size_t nl, rl;
162 char const *rp, *np;
163 char *rv;
164 NYD2_ENTER;
166 if(*(rp = &s[1]) == '/' || *rp == '\0'){
167 np = ok_vlook(HOME);
168 rl = strlen(rp);
169 }else{
170 if((rp = strchr(np = rp, '/')) != NULL){
171 nl = PTR2SIZE(rp - np);
172 np = savestrbuf(np, nl);
173 rl = strlen(rp);
174 }else
175 rl = 0;
177 if((pwp = getpwnam(np)) == NULL){
178 rv = savestr(s);
179 goto jleave;
181 np = pwp->pw_dir;
184 nl = strlen(np);
185 rv = salloc(nl + 1 + rl +1);
186 memcpy(rv, np, nl);
187 if(rl > 0){
188 memcpy(rv + nl, rp, rl);
189 nl += rl;
191 rv[nl] = '\0';
192 jleave:
193 NYD2_LEAVE;
194 return rv;
197 static char *
198 a_shexp_var(struct a_shexp_var_stack *svsp)
200 struct a_shexp_var_stack next, *np, *tmp;
201 char const *vp;
202 char lc, c, *cp, *rv;
203 size_t i;
204 NYD2_ENTER;
206 if (*(vp = svsp->svs_value) != '$') {
207 bool_t bsesc = svsp->svs_bsesc;
208 union {bool_t hadbs; char c;} u = {FAL0};
210 svsp->svs_dat = vp;
211 for (lc = '\0', i = 0; ((c = *vp) != '\0'); ++i, ++vp) {
212 if (c == '$' && lc != '\\')
213 break;
214 if (!bsesc)
215 continue;
216 lc = (lc == '\\') ? (u.hadbs = TRU1, '\0') : c;
218 svsp->svs_len = i;
220 if (u.hadbs) {
221 svsp->svs_dat = cp = savestrbuf(svsp->svs_dat, i);
223 for (lc = '\0', rv = cp; (u.c = *cp++) != '\0';) {
224 if (u.c != '\\' || lc == '\\')
225 *rv++ = u.c;
226 lc = (lc == '\\') ? '\0' : u.c;
228 *rv = '\0';
230 svsp->svs_len = PTR2SIZE(rv - svsp->svs_dat);
232 } else {
233 if ((lc = (*++vp == '{')))
234 ++vp;
236 svsp->svs_dat = vp;
237 for (i = 0; (c = *vp) != '\0'; ++i, ++vp)
238 if (!a_SHEXP_ISVARC(c))
239 break;
241 if (lc) {
242 if (c != '}') {
243 n_err(_("Variable name misses closing }: %s\n"),
244 svsp->svs_value);
245 svsp->svs_len = strlen(svsp->svs_value);
246 svsp->svs_dat = svsp->svs_value;
247 goto junroll;
249 c = *++vp;
252 svsp->svs_len = i;
253 /* Check getenv(3) shall no internal variable exist! */
254 if ((rv = vok_vlook(cp = savestrbuf(svsp->svs_dat, i))) != NULL ||
255 (rv = getenv(cp)) != NULL)
256 svsp->svs_len = strlen(svsp->svs_dat = rv);
257 else
258 svsp->svs_len = 0, svsp->svs_dat = UNCONST("");
260 if (c != '\0')
261 goto jrecurse;
263 /* That level made the great and completed encoding. Build result */
264 junroll:
265 for (i = 0, np = svsp, svsp = NULL; np != NULL;) {
266 i += np->svs_len;
267 tmp = np->svs_next;
268 np->svs_next = svsp;
269 svsp = np;
270 np = tmp;
273 cp = rv = salloc(i +1);
274 while (svsp != NULL) {
275 np = svsp;
276 svsp = svsp->svs_next;
277 memcpy(cp, np->svs_dat, np->svs_len);
278 cp += np->svs_len;
280 *cp = '\0';
282 jleave:
283 NYD2_LEAVE;
284 return rv;
285 jrecurse:
286 memset(&next, 0, sizeof next);
287 next.svs_next = svsp;
288 next.svs_value = vp;
289 next.svs_bsesc = svsp->svs_bsesc;
290 rv = a_shexp_var(&next);
291 goto jleave;
294 static char *
295 a_shexp_globname(char const *name, enum fexp_mode fexpm){
296 #ifdef HAVE_FNMATCH
297 struct a_shexp_glob_ctx sgc;
298 struct n_string outer;
299 struct n_strlist *slp;
300 char *cp;
301 NYD_ENTER;
303 memset(&sgc, 0, sizeof sgc);
304 sgc.sgc_patlen = strlen(name);
305 sgc.sgc_patdat = savestrbuf(name, sgc.sgc_patlen);
306 sgc.sgc_outer = n_string_reserve(n_string_creat(&outer), sgc.sgc_patlen);
307 sgc.sgc_flags = ((fexpm & FEXP_SILENT) != 0);
308 slp = NULL;
309 if(a_shexp__glob(&sgc, &slp))
310 cp = (char*)1;
311 else
312 cp = NULL;
313 n_string_gut(&outer);
315 if(cp == NULL)
316 goto jleave;
318 if(slp == NULL){
319 cp = UNCONST(N_("File pattern does not match"));
320 goto jerr;
321 }else if(slp->sl_next == NULL)
322 cp = savestrbuf(slp->sl_dat, slp->sl_len);
323 else if(fexpm & FEXP_MULTIOK){
324 struct n_strlist **sorta, *xslp;
325 size_t i, no, l;
327 no = l = 0;
328 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next){
329 ++no;
330 l += xslp->sl_len + 1;
333 sorta = smalloc(sizeof(*sorta) * no);
334 no = 0;
335 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next)
336 sorta[no++] = xslp;
337 qsort(sorta, no, sizeof *sorta, &a_shexp__globsort);
339 cp = salloc(++l);
340 l = 0;
341 for(i = 0; i < no; ++i){
342 xslp = sorta[i];
343 memcpy(&cp[l], xslp->sl_dat, xslp->sl_len);
344 l += xslp->sl_len;
345 cp[l++] = '\0';
347 cp[l] = '\0';
349 free(sorta);
350 pstate |= PS_EXPAND_MULTIRESULT;
351 }else{
352 cp = UNCONST(N_("File pattern matches multiple results"));
353 goto jerr;
356 jleave:
357 while(slp != NULL){
358 struct n_strlist *tmp = slp;
360 slp = slp->sl_next;
361 free(tmp);
363 NYD_LEAVE;
364 return cp;
366 jerr:
367 if(!(fexpm & FEXP_SILENT)){
368 name = n_shell_quote_cp(name, FAL0);
369 n_err("%s: %s\n", V_(cp), name);
371 cp = NULL;
372 goto jleave;
374 #else /* HAVE_FNMATCH */
375 UNUSED(fexpm);
377 if(!(fexpm & FEXP_SILENT))
378 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
379 return savestr(name);
380 #endif
383 #ifdef HAVE_FNMATCH
384 static bool_t
385 a_shexp__glob(struct a_shexp_glob_ctx *sgcp, struct n_strlist **slpp){
386 enum{a_SILENT = 1<<0, a_DEEP=1<<1, a_SALLOC=1<<2};
388 struct a_shexp_glob_ctx nsgc;
389 struct dirent *dep;
390 DIR *dp;
391 size_t old_outerlen;
392 char const *ccp, *myp;
393 NYD2_ENTER;
395 /* We need some special treatment for the outermost level */
396 if(!(sgcp->sgc_flags & a_DEEP)){
397 if(sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/'){
398 myp = n_string_cp(n_string_push_c(sgcp->sgc_outer, '/'));
399 ++sgcp->sgc_patdat;
400 --sgcp->sgc_patlen;
401 }else
402 myp = "./";
403 }else
404 myp = n_string_cp(sgcp->sgc_outer);
405 old_outerlen = sgcp->sgc_outer->s_len;
407 /* Separate current directory/pattern level from any possible remaining
408 * pattern in order to be able to use it for fnmatch(3) */
409 if((ccp = memchr(sgcp->sgc_patdat, '/', sgcp->sgc_patlen)) == NULL)
410 nsgc.sgc_patlen = 0;
411 else{
412 nsgc = *sgcp;
413 nsgc.sgc_flags |= a_DEEP;
414 sgcp->sgc_patlen = PTR2SIZE((nsgc.sgc_patdat = &ccp[1]) -
415 &sgcp->sgc_patdat[0]);
416 nsgc.sgc_patlen -= sgcp->sgc_patlen;
417 /* Trim solidus */
418 if(sgcp->sgc_patlen > 0){
419 assert(sgcp->sgc_patdat[sgcp->sgc_patlen -1] == '/');
420 ((char*)UNCONST(sgcp->sgc_patdat))[--sgcp->sgc_patlen] = '\0';
424 /* Our current directory level */
425 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
426 * xxx be the (sole) result depending on pattern surroundings, etc. */
427 if((dp = opendir(myp)) == NULL){
428 int err;
430 switch((err = errno)){
431 case ENOTDIR:
432 ccp = N_("cannot access paths under non-directory");
433 goto jerr;
434 case ENOENT:
435 ccp = N_("path component of (sub)pattern non-existent");
436 goto jerr;
437 case EACCES:
438 ccp = N_("file permission for file (sub)pattern denied");
439 goto jerr;
440 default:
441 ccp = N_("cannot handle file (sub)pattern");
442 goto jerr;
446 /* As necessary, quote bytes in the current pattern */
447 /* C99 */{
448 char *ncp;
449 size_t i;
450 bool_t need;
452 for(need = FAL0, i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
453 switch(*myp){
454 case '\'': case '"': case '\\': case '$':
455 case ' ': case '\t':
456 need = TRU1;
457 ++i;
458 /* FALLTHRU */
459 default:
460 ++i;
461 break;
464 if(need){
465 ncp = salloc(i +1);
466 for(i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
467 switch(*myp){
468 case '\'': case '"': case '\\': case '$':
469 case ' ': case '\t':
470 ncp[i++] = '\\';
471 /* FALLTHRU */
472 default:
473 ncp[i++] = *myp;
474 break;
476 ncp[i] = '\0';
477 myp = ncp;
478 }else
479 myp = sgcp->sgc_patdat;
482 while((dep = readdir(dp)) != NULL){
483 switch(fnmatch(myp, dep->d_name, FNM_PATHNAME | FNM_PERIOD)){
484 case 0:{
485 /* A match expresses the desire to recurse if there is more pattern */
486 if(nsgc.sgc_patlen > 0){
487 bool_t isdir;
489 n_string_push_cp((sgcp->sgc_outer->s_len > 1
490 ? n_string_push_c(sgcp->sgc_outer, '/') : sgcp->sgc_outer),
491 dep->d_name);
493 isdir = FAL0;
494 #ifdef HAVE_DIRENT_TYPE
495 if(dep->d_type == DT_DIR)
496 isdir = TRU1;
497 else if(dep->d_type == DT_LNK || dep->d_type == DT_UNKNOWN)
498 #endif
500 struct stat sb;
502 if(stat(n_string_cp(sgcp->sgc_outer), &sb)){
503 ccp = N_("I/O error when querying file status");
504 goto jerr;
505 }else if(S_ISDIR(sb.st_mode))
506 isdir = TRU1;
509 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
510 * TODO Instead save away a list of such n_string's for later */
511 if(isdir && !a_shexp__glob(&nsgc, slpp)){
512 ccp = (char*)1;
513 goto jleave;
516 n_string_trunc(sgcp->sgc_outer, old_outerlen);
517 }else{
518 struct n_strlist *slp;
519 size_t i, j;
521 i = strlen(dep->d_name);
522 j = (old_outerlen > 0) ? old_outerlen + 1 + i : i;
523 slp = n_STRLIST_MALLOC(j);
524 *slpp = slp;
525 slpp = &slp->sl_next;
526 slp->sl_next = NULL;
527 if((j = old_outerlen) > 0){
528 memcpy(&slp->sl_dat[0], sgcp->sgc_outer->s_dat, j);
529 if(slp->sl_dat[j -1] != '/')
530 slp->sl_dat[j++] = '/';
532 memcpy(&slp->sl_dat[j], dep->d_name, i);
533 slp->sl_dat[j += i] = '\0';
534 slp->sl_len = j;
536 } break;
537 case FNM_NOMATCH:
538 break;
539 default:
540 ccp = N_("fnmatch(3) cannot handle file (sub)pattern");
541 goto jerr;
545 ccp = NULL;
546 jleave:
547 if(dp != NULL)
548 closedir(dp);
549 NYD2_LEAVE;
550 return (ccp == NULL);
552 jerr:
553 if(!(sgcp->sgc_flags & a_SILENT)){
554 char const *s2, *s3;
556 if(sgcp->sgc_outer->s_len > 0){
557 s2 = n_shell_quote_cp(n_string_cp(sgcp->sgc_outer), FAL0);
558 s3 = "/";
559 }else
560 s2 = s3 = "";
562 n_err("%s: %s%s%s\n", V_(ccp), s2, s3,
563 n_shell_quote_cp(sgcp->sgc_patdat, FAL0));
565 goto jleave;
568 static int
569 a_shexp__globsort(void const *cvpa, void const *cvpb){
570 int rv;
571 struct n_strlist const * const *slpa, * const *slpb;
572 NYD2_ENTER;
574 slpa = cvpa;
575 slpb = cvpb;
576 rv = asccasecmp((*slpa)->sl_dat, (*slpb)->sl_dat);
577 NYD2_LEAVE;
578 return rv;
580 #endif /* HAVE_FNMATCH */
582 static void
583 a_shexp__quote(struct a_shexp_quote_ctx *sqcp, struct a_shexp_quote_lvl *sqlp){
584 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
585 * XXX use the recursive implementation because of stateful encodings.
586 * XXX I.e., if a quoted substring cannot be self-contained - the data after
587 * XXX the quote relies on "the former state", then this doesn't make sense.
588 * XXX Therefore this is not fully programmed out but instead only detects
589 * XXX the "most fancy" quoting necessary, and directly does that.
590 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
591 * XXX Otherwise we rather have to convert to wide first and act on that,
592 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
593 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
594 #ifdef a_SHEXP_QUOTE_RECURSE
595 # define jrecurse jrecurse
596 struct a_shexp_quote_lvl sql;
597 #else
598 # define jrecurse jstep
599 #endif
600 struct n_visual_info_ctx vic;
601 union {struct a_shexp_quote_lvl *head; struct n_string *store;} u;
602 ui32_t flags;
603 size_t il;
604 char const *ib;
605 NYD2_ENTER;
607 ib = sqlp->sql_dat.s;
608 il = sqlp->sql_dat.l;
609 flags = sqlp->sql_flags;
611 /* Iterate over the entire input, classify characters and type of quotes
612 * along the way. Whenever a quote change has to be applied, adjust flags
613 * for the new situation -, setup sql.* and recurse- */
614 while(il > 0){
615 char c;
617 c = *ib;
618 if(cntrlchar(c)){
619 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
620 goto jstep;
621 if(c == '\t' && (flags & (a_SHEXP_QUOTE_T_REVSOL |
622 a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOUBLE)))
623 goto jstep;
624 #ifdef a_SHEXP_QUOTE_RECURSE
625 ++sqcp->sqc_cnt_dollar;
626 #endif
627 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
628 goto jrecurse;
629 }else if(blankspacechar(c) || c == '"' || c == '$'){
630 if(flags & a_SHEXP_QUOTE_T_MASK)
631 goto jstep;
632 #ifdef a_SHEXP_QUOTE_RECURSE
633 ++sqcp->sqc_cnt_single;
634 #endif
635 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
636 goto jrecurse;
637 }else if(c == '\''){
638 if(flags & (a_SHEXP_QUOTE_T_MASK & ~a_SHEXP_QUOTE_T_SINGLE))
639 goto jstep;
640 #ifdef a_SHEXP_QUOTE_RECURSE
641 ++sqcp->sqc_cnt_dollar;
642 #endif
643 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
644 goto jrecurse;
645 }else if(c == '\\'){
646 if(flags & a_SHEXP_QUOTE_T_MASK)
647 goto jstep;
648 #ifdef a_SHEXP_QUOTE_RECURSE
649 ++sqcp->sqc_cnt_single;
650 #endif
651 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
652 goto jrecurse;
653 }else if(!asciichar(c)){
654 /* Need to keep together multibytes */
655 #ifdef a_SHEXP_QUOTE_RECURSE
656 memset(&vic, 0, sizeof vic);
657 vic.vic_indat = ib;
658 vic.vic_inlen = il;
659 n_visual_info(&vic,
660 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
661 #endif
662 /* xxx check whether resulting \u would be ASCII */
663 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP) ||
664 (flags & a_SHEXP_QUOTE_T_DOLLAR)){
665 #ifdef a_SHEXP_QUOTE_RECURSE
666 ib = vic.vic_oudat;
667 il = vic.vic_oulen;
668 continue;
669 #else
670 goto jstep;
671 #endif
673 #ifdef a_SHEXP_QUOTE_RECURSE
674 ++sqcp->sqc_cnt_dollar;
675 #endif
676 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
677 goto jrecurse;
678 }else
679 jstep:
680 ++ib, --il;
682 sqlp->sql_flags = flags;
684 /* Level made the great and completed processing input. Reverse the list of
685 * levels, detect the "most fancy" quote type needed along this way */
686 /* XXX Due to restriction as above very crude */
687 for(flags = 0, il = 0, u.head = NULL; sqlp != NULL;){
688 struct a_shexp_quote_lvl *tmp;
690 tmp = sqlp->sql_link;
691 sqlp->sql_link = u.head;
692 u.head = sqlp;
693 il += sqlp->sql_dat.l;
694 if(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK)
695 il += (sqlp->sql_dat.l >> 1);
696 flags |= sqlp->sql_flags;
697 sqlp = tmp;
699 sqlp = u.head;
701 /* Finally work the substrings in the correct order, adjusting quotes along
702 * the way as necessary. Start off with the "most fancy" quote, so that
703 * the user sees an overall boundary she can orientate herself on.
704 * We do it like that to be able to give the user some "encapsulation
705 * experience", to address what strikes me is a problem of sh(1)ell quoting:
706 * different to, e.g., perl(1), where you see at a glance where a string
707 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
708 * visual appearance of "a string" as such */
709 u.store = n_string_reserve(sqcp->sqc_store, il);
711 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
712 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
713 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
714 }else if(flags & a_SHEXP_QUOTE_T_DOUBLE){
715 u.store = n_string_push_c(u.store, '"');
716 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOUBLE;
717 }else if(flags & a_SHEXP_QUOTE_T_SINGLE){
718 u.store = n_string_push_c(u.store, '\'');
719 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
720 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
721 flags &= ~a_SHEXP_QUOTE_T_MASK;
723 /* Work all the levels */
724 for(; sqlp != NULL; sqlp = sqlp->sql_link){
725 /* As necessary update our mode of quoting */
726 #ifdef a_SHEXP_QUOTE_RECURSE
727 il = 0;
729 switch(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK){
730 case a_SHEXP_QUOTE_T_DOLLAR:
731 if(!(flags & a_SHEXP_QUOTE_T_DOLLAR))
732 il = a_SHEXP_QUOTE_T_DOLLAR;
733 break;
734 case a_SHEXP_QUOTE_T_DOUBLE:
735 if(!(flags & (a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
736 il = a_SHEXP_QUOTE_T_DOLLAR;
737 break;
738 case a_SHEXP_QUOTE_T_SINGLE:
739 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
740 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
741 il = a_SHEXP_QUOTE_T_SINGLE;
742 break;
743 default:
744 case a_SHEXP_QUOTE_T_REVSOL:
745 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
746 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
747 il = a_SHEXP_QUOTE_T_REVSOL;
748 break;
751 if(il != 0){
752 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
753 u.store = n_string_push_c(u.store, '\'');
754 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
755 u.store = n_string_push_c(u.store, '"');
756 flags &= ~a_SHEXP_QUOTE_T_MASK;
758 flags |= (ui32_t)il;
759 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
760 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
761 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
762 u.store = n_string_push_c(u.store, '"');
763 else if(flags & a_SHEXP_QUOTE_T_SINGLE)
764 u.store = n_string_push_c(u.store, '\'');
766 #endif /* a_SHEXP_QUOTE_RECURSE */
768 /* Work the level's substring */
769 ib = sqlp->sql_dat.s;
770 il = sqlp->sql_dat.l;
772 while(il > 0){
773 char c2, c;
775 c = *ib;
777 if(cntrlchar(c)){
778 assert(c == '\t' || (flags & a_SHEXP_QUOTE_T_DOLLAR));
779 assert((flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
780 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)));
781 switch((c2 = c)){
782 case 0x07: c = 'a'; break;
783 case 0x08: c = 'b'; break;
784 case 0x0A: c = 'n'; break;
785 case 0x0B: c = 'v'; break;
786 case 0x0C: c = 'f'; break;
787 case 0x0D: c = 'r'; break;
788 case 0x1B: c = 'E'; break;
789 default: break;
790 case 0x09:
791 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
792 c = 't';
793 break;
795 if(flags & a_SHEXP_QUOTE_T_REVSOL)
796 u.store = n_string_push_c(u.store, '\\');
797 goto jpush;
799 u.store = n_string_push_c(u.store, '\\');
800 if(c == c2){
801 u.store = n_string_push_c(u.store, 'c');
802 c ^= 0x40;
804 goto jpush;
805 }else if(blankspacechar(c) || c == '"' || c == '$'){
806 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
807 goto jpush;
808 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE));
809 u.store = n_string_push_c(u.store, '\\');
810 goto jpush;
811 }else if(c == '\''){
812 if(flags & a_SHEXP_QUOTE_T_DOUBLE)
813 goto jpush;
814 assert(!(flags & a_SHEXP_QUOTE_T_SINGLE));
815 u.store = n_string_push_c(u.store, '\\');
816 goto jpush;
817 }else if(c == '\\'){
818 if(flags & a_SHEXP_QUOTE_T_SINGLE)
819 goto jpush;
820 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE |
821 a_SHEXP_QUOTE_T_DOLLAR));
822 u.store = n_string_push_c(u.store, '\\');
823 goto jpush;
824 }else if(asciichar(c)){
825 /* Shorthand: we can simply push that thing out */
826 jpush:
827 u.store = n_string_push_c(u.store, c);
828 ++ib, --il;
829 }else{
830 /* Not an ASCII character, take care not to split up multibyte
831 * sequences etc. */
832 #ifdef HAVE_NATCH_CHAR
833 if(options & OPT_UNICODE){
834 ui32_t uc;
835 char const *ib2;
836 size_t il2, il3;
838 ib2 = ib;
839 il3 = il2 = il;
840 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
841 char itoa[32];
842 char const *cp;
844 il2 = PTR2SIZE(&ib2[0] - &ib[0]);
845 if((flags & a_SHEXP_QUOTE_ROUNDTRIP) || uc == 0xFFFDu){
846 /* Use padding to make ambiguities impossible */
847 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
848 (uc > 0xFFFFu ? 'U' : 'u'),
849 (int)(uc > 0xFFFFu ? 8 : 4), uc);
850 cp = itoa;
851 }else{
852 il3 = il2;
853 cp = &ib[0];
855 u.store = n_string_push_buf(u.store, cp, il3);
856 ib += il2, il -= il2;
857 continue;
860 #endif /* HAVE_NATCH_CHAR */
862 memset(&vic, 0, sizeof vic);
863 vic.vic_indat = ib;
864 vic.vic_inlen = il;
865 n_visual_info(&vic,
866 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
868 /* Work this substring as sensitive as possible */
869 il -= vic.vic_oulen;
870 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP))
871 u.store = n_string_push_buf(u.store, ib, il);
872 #ifdef HAVE_ICONV
873 else if((vic.vic_indat = n_iconv_onetime_cp("utf-8",
874 charset_get_lc(), savestrbuf(ib, il), FAL0)) != NULL){
875 ui32_t uc;
876 char const *ib2;
877 size_t il2, il3;
879 il3 = il2 = strlen(ib2 = vic.vic_indat);
880 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
881 char itoa[32];
883 il2 = PTR2SIZE(&ib2[0] - &vic.vic_indat[0]);
884 /* Use padding to make ambiguities impossible */
885 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
886 (uc > 0xFFFFu ? 'U' : 'u'),
887 (int)(uc > 0xFFFFu ? 8 : 4), uc);
888 u.store = n_string_push_buf(u.store, itoa, il3);
889 }else
890 goto Jxseq;
892 #endif
893 else
894 #ifdef HAVE_ICONV
895 Jxseq:
896 #endif
897 while(il-- > 0){
898 u.store = n_string_push_buf(u.store, "\\xFF",
899 sizeof("\\xFF") -1);
900 n_c_to_hex_base16(&u.store->s_dat[u.store->s_len - 2], *ib++);
903 ib = vic.vic_oudat;
904 il = vic.vic_oulen;
909 /* Close an open quote */
910 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
911 u.store = n_string_push_c(u.store, '\'');
912 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
913 u.store = n_string_push_c(u.store, '"');
914 #ifdef a_SHEXP_QUOTE_RECURSE
915 jleave:
916 #endif
917 NYD2_LEAVE;
918 return;
920 #ifdef a_SHEXP_QUOTE_RECURSE
921 jrecurse:
922 sqlp->sql_dat.l -= il;
924 sql.sql_link = sqlp;
925 sql.sql_dat.s = UNCONST(ib);
926 sql.sql_dat.l = il;
927 sql.sql_flags = flags;
928 a_shexp__quote(sqcp, &sql);
929 goto jleave;
930 #endif
932 #undef jrecurse
933 #undef a_SHEXP_QUOTE_RECURSE
936 FL char *
937 fexpand(char const *name, enum fexp_mode fexpm)
939 struct str s;
940 char const *cp, *res;
941 bool_t dyn;
942 NYD_ENTER;
944 pstate &= ~PS_EXPAND_MULTIRESULT;
946 /* The order of evaluation is "%" and "#" expand into constants.
947 * "&" can expand into "+". "+" can expand into shell meta characters.
948 * Shell meta characters expand into constants.
949 * This way, we make no recursive expansion */
950 if ((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
951 res = UNCONST(name);
953 jnext:
954 dyn = FAL0;
955 switch (*res) {
956 case '%':
957 if (res[1] == ':' && res[2] != '\0') {
958 res = &res[2];
959 goto jnext;
961 res = _findmail((res[1] != '\0' ? res + 1 : myname), (res[1] != '\0'));
962 goto jislocal;
963 case '#':
964 if (res[1] != '\0')
965 break;
966 if (prevfile[0] == '\0') {
967 n_err(_("No previous file\n"));
968 res = NULL;
969 goto jleave;
971 res = prevfile;
972 goto jislocal;
973 case '&':
974 if (res[1] == '\0')
975 res = ok_vlook(MBOX);
976 break;
979 /* POSIX: if *folder* unset or null, "+" shall be retained */
980 if (*res == '+' && *(cp = folder_query()) != '\0') {
981 size_t i = strlen(cp);
983 res = str_concat_csvl(&s, cp,
984 ((i == 0 || cp[i -1] == '/') ? "" : "/"), res + 1, NULL)->s;
985 dyn = TRU1;
987 /* TODO *folder* can't start with %[:], can it!?! */
988 if (res[0] == '%' && res[1] == ':') {
989 res += 2;
990 goto jnext;
994 /* Do some meta expansions */
995 if((fexpm & (FEXP_NSHELL | FEXP_NVAR)) != FEXP_NVAR &&
996 ((fexpm & FEXP_NSHELL) ? (strchr(res, '$') != NULL)
997 : anyof(res, "{}[]*?$"))){
998 bool_t doexp;
1000 if(fexpm & FEXP_NOPROTO)
1001 doexp = TRU1;
1002 else switch(which_protocol(res)){
1003 case PROTO_FILE:
1004 case PROTO_MAILDIR:
1005 doexp = TRU1;
1006 break;
1007 default:
1008 doexp = FAL0;
1009 break;
1012 if(doexp){
1013 struct a_shexp_var_stack top;
1015 memset(&top, 0, sizeof top);
1016 top.svs_value = res;
1017 top.svs_bsesc = TRU1;
1018 res = a_shexp_var(&top);
1020 if(res[0] == '~')
1021 res = a_shexp_tilde(res);
1023 if(!(fexpm & FEXP_NSHELL) &&
1024 (res = a_shexp_globname(res, fexpm)) == NULL)
1025 goto jleave;
1026 dyn = TRU1;
1027 }/* else no tilde */
1028 }else if(res[0] == '~'){
1029 res = a_shexp_tilde(res);
1030 dyn = TRU1;
1033 jislocal:
1034 if (fexpm & FEXP_LOCAL)
1035 switch (which_protocol(res)) {
1036 case PROTO_FILE:
1037 case PROTO_MAILDIR:
1038 break;
1039 default:
1040 n_err(_("Not a local file or directory: %s\n"),
1041 n_shell_quote_cp(name, FAL0));
1042 res = NULL;
1043 break;
1046 jleave:
1047 if(res != NULL && !dyn)
1048 res = savestr(res);
1049 NYD_LEAVE;
1050 return UNCONST(res);
1053 FL int
1054 n_shell_expand_escape(char const **s, bool_t use_nail_extensions)/* TODO DROP!*/
1056 char const *xs;
1057 int c, n;
1058 NYD2_ENTER;
1060 xs = *s;
1062 if ((c = *xs & 0xFF) == '\0')
1063 goto jleave;
1064 ++xs;
1065 if (c != '\\')
1066 goto jleave;
1068 switch ((c = *xs & 0xFF)) {
1069 case 'a': c = '\a'; break;
1070 case 'b': c = '\b'; break;
1071 case 'c': c = PROMPT_STOP; break;
1072 case 'f': c = '\f'; break;
1073 case 'n': c = '\n'; break;
1074 case 'r': c = '\r'; break;
1075 case 't': c = '\t'; break;
1076 case 'v': c = '\v'; break;
1078 /* ESCape */
1079 case 'E':
1080 case 'e':
1081 c = '\033';
1082 break;
1084 /* Hexadecimal TODO uses ASCII */
1085 case 'X':
1086 case 'x': {
1087 static ui8_t const hexatoi[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
1088 #undef a_HEX
1089 #define a_HEX(n) \
1090 hexatoi[(ui8_t)((n) - ((n) <= '9' ? 48 : ((n) <= 'F' ? 55 : 87)))]
1092 c = 0;
1093 ++xs;
1094 if(hexchar(*xs))
1095 c = a_HEX(*xs);
1096 else{
1097 --xs;
1098 if(options & OPT_D_V)
1099 n_err(_("Invalid \\xNUMBER notation in: %s\n"), xs - 1);
1100 c = '\\';
1101 goto jleave;
1103 ++xs;
1104 if(hexchar(*xs)){
1105 c <<= 4;
1106 c += a_HEX(*xs);
1107 ++xs;
1109 goto jleave;
1111 #undef a_HEX
1113 /* octal, with optional 0 prefix */
1114 case '0':
1115 ++xs;
1116 if(0){
1117 default:
1118 if(*xs == '\0'){
1119 c = '\\';
1120 break;
1123 for (c = 0, n = 3; n-- > 0 && octalchar(*xs); ++xs) {
1124 c <<= 3;
1125 c |= *xs - '0';
1127 goto jleave;
1129 /* S-nail extension for nice (get)prompt(()) support */
1130 case '&':
1131 case '?':
1132 case '$':
1133 case '@':
1134 if (use_nail_extensions) {
1135 switch (c) {
1136 case '&': c = ok_blook(bsdcompat) ? '&' : '?'; break;
1137 case '?': c = (pstate & PS_EVAL_ERROR) ? '1' : '0'; break;
1138 case '$': c = PROMPT_DOLLAR; break;
1139 case '@': c = PROMPT_AT; break;
1141 break;
1144 /* FALLTHRU */
1145 case '\0':
1146 /* A sole <backslash> at EOS is treated as-is! */
1147 c = '\\';
1148 /* FALLTHRU */
1149 case '\\':
1150 break;
1153 ++xs;
1154 jleave:
1155 *s = xs;
1156 NYD2_LEAVE;
1157 return c;
1160 FL enum n_shexp_state
1161 n_shell_parse_token(struct n_string *store, struct str *input, /* TODO WCHAR */
1162 enum n_shexp_parse_flags flags){
1163 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
1164 char utf[8];
1165 #endif
1166 char c2, c, quotec;
1167 enum{
1168 a_NONE = 0,
1169 a_SKIPQ = 1<<0, /* Skip rest of this quote (\c0 ..) */
1170 a_SURPLUS = 1<<1, /* Extended sequence interpretation */
1171 a_NTOKEN = 1<<2 /* "New token": e.g., comments are possible */
1172 } state;
1173 enum n_shexp_state rv;
1174 size_t i, il;
1175 char const *ib_save, *ib;
1176 NYD2_ENTER;
1177 UNINIT(c, '\0');
1179 assert((flags & n_SHEXP_PARSE_DRYRUN) || store != NULL);
1180 assert(input != NULL);
1181 assert(input->l == 0 || input->s != NULL);
1182 assert(!(flags & n_SHEXP_PARSE_LOG) || !(flags & n_SHEXP_PARSE_LOG_D_V));
1183 assert(!(flags & n_SHEXP_PARSE_IFS_ADD_COMMA) ||
1184 !(flags & n_SHEXP_PARSE_IFS_IS_COMMA));
1186 if((flags & n_SHEXP_PARSE_LOG_D_V) && (options & OPT_D_V))
1187 flags |= n_SHEXP_PARSE_LOG;
1189 if((flags & n_SHEXP_PARSE_TRUNC) && store != NULL)
1190 store = n_string_trunc(store, 0);
1192 ib = input->s;
1193 if((il = input->l) == UIZ_MAX)
1194 il = strlen(ib);
1196 jrestart_empty:
1197 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1198 for(; il > 0; ++ib, --il)
1199 if(!blankspacechar(*ib))
1200 break;
1202 input->s = UNCONST(ib);
1203 input->l = il;
1205 if(il == 0){
1206 rv = n_SHEXP_STATE_STOP;
1207 goto jleave;
1210 if(store != NULL)
1211 store = n_string_reserve(store, MIN(il, 32)); /* XXX */
1213 for(rv = n_SHEXP_STATE_NONE, state = a_NTOKEN, quotec = '\0'; il > 0;){
1214 --il, c = *ib++;
1216 /* If no quote-mode active.. */
1217 if(quotec == '\0'){
1218 if(c == '"' || c == '\''){
1219 quotec = c;
1220 if(c == '"')
1221 state |= a_SURPLUS;
1222 else
1223 state &= ~a_SURPLUS;
1224 state &= ~a_NTOKEN;
1225 continue;
1226 }else if(c == '$'){
1227 if(il > 0){
1228 state &= ~a_NTOKEN;
1229 if(*ib == '\''){
1230 --il, ++ib;
1231 quotec = '\'';
1232 state |= a_SURPLUS;
1233 continue;
1234 }else
1235 goto J_var_expand;
1237 }else if(c == '\\'){
1238 /* Outside of quotes this just escapes any next character, but a sole
1239 * <backslash> at EOS is left unchanged */
1240 if(il > 0)
1241 --il, c = *ib++;
1242 state &= ~a_NTOKEN;
1243 }else if(c == '#' && (state & a_NTOKEN)){
1244 rv |= n_SHEXP_STATE_STOP;
1245 goto jleave;
1246 }else if(c == ',' && (flags &
1247 (n_SHEXP_PARSE_IFS_ADD_COMMA | n_SHEXP_PARSE_IFS_IS_COMMA)))
1248 break;
1249 else if(blankchar(c)){
1250 if(!(flags & n_SHEXP_PARSE_IFS_IS_COMMA)){
1251 ++il, --ib;
1252 break;
1254 state |= a_NTOKEN;
1255 }else
1256 state &= ~a_NTOKEN;
1257 }else{
1258 /* Quote-mode */
1259 assert(!(state & a_NTOKEN));
1260 if(c == quotec){
1261 state = a_NONE;
1262 quotec = '\0';
1263 /* Users may need to recognize the presence of empty quotes */
1264 rv |= n_SHEXP_STATE_OUTPUT;
1265 continue;
1266 }else if(c == '\\' && (state & a_SURPLUS)){
1267 ib_save = ib - 1;
1268 /* A sole <backslash> at EOS is treated as-is! This is ok since
1269 * the "closing quote" error will occur next, anyway */
1270 if(il == 0)
1271 break;
1272 else if((c2 = *ib) == quotec){
1273 --il, ++ib;
1274 c = quotec;
1275 }else if(quotec == '"'){
1276 /* Double quotes:
1277 * The <backslash> shall retain its special meaning as an
1278 * escape character (see Section 2.2.1) only when followed
1279 * by one of the following characters when considered
1280 * special: $ ` " \ <newline> */
1281 switch(c2){
1282 case '$':
1283 case '`':
1284 /* case '"': already handled via c2 == quotec */
1285 case '\\':
1286 --il, ++ib;
1287 c = c2;
1288 /* FALLTHRU */
1289 default:
1290 break;
1292 }else{
1293 /* Dollar-single-quote */
1294 --il, ++ib;
1295 switch(c2){
1296 case '"':
1297 /* case '\'': already handled via c2 == quotec */
1298 case '\\':
1299 c = c2;
1300 break;
1302 case 'b': c = '\b'; break;
1303 case 'f': c = '\f'; break;
1304 case 'n': c = '\n'; break;
1305 case 'r': c = '\r'; break;
1306 case 't': c = '\t'; break;
1307 case 'v': c = '\v'; break;
1309 case 'E':
1310 case 'e': c = '\033'; break;
1312 /* Control character */
1313 case 'c':
1314 if(il == 0)
1315 goto j_dollar_ungetc;
1316 --il, c2 = *ib++;
1317 if(state & a_SKIPQ)
1318 continue;
1319 c = upperconv(c2) ^ 0x40;
1320 if((ui8_t)c > 0x1F && c != 0x7F){ /* ASCII C0: 0..1F, 7F */
1321 if(flags & n_SHEXP_PARSE_LOG)
1322 n_err(_("Invalid \\c notation: %.*s\n"),
1323 (int)input->l, input->s);
1324 rv |= n_SHEXP_STATE_ERR_CONTROL;
1326 /* As an implementation-defined extension, support \c@
1327 * EQ printf(1) alike \c */
1328 if(c == '\0'){
1329 rv |= n_SHEXP_STATE_STOP;
1330 goto jleave;
1332 break;
1334 /* Octal sequence: 1 to 3 octal bytes */
1335 case '0':
1336 /* As an extension (dependent on where you look, echo(1), or
1337 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1338 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1339 c2 = c;
1340 --il, ++ib;
1342 /* FALLTHRU */
1343 case '1': case '2': case '3':
1344 case '4': case '5': case '6': case '7':
1345 c2 -= '0';
1346 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1347 c2 = (c2 << 3) | (c - '0');
1348 --il, ++ib;
1350 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1351 if((ui8_t)c2 > 0x1F){
1352 if(flags & n_SHEXP_PARSE_LOG)
1353 n_err(_("\\0 argument exceeds a byte: %.*s\n"),
1354 (int)input->l, input->s);
1355 rv |= n_SHEXP_STATE_ERR_NUMBER;
1356 --il, ++ib;
1357 /* Write unchanged */
1358 je_ib_save:
1359 rv |= n_SHEXP_STATE_OUTPUT;
1360 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1361 store = n_string_push_buf(store, ib_save,
1362 PTR2SIZE(ib - ib_save));
1363 continue;
1365 c2 = (c2 << 3) | (c -= '0');
1366 --il, ++ib;
1368 if((c = c2) == '\0')
1369 state |= a_SKIPQ;
1370 if(state & a_SKIPQ)
1371 continue;
1372 break;
1374 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1375 case 'U':
1376 i = 8;
1377 if(0){
1378 /* FALLTHRU */
1379 case 'u':
1380 i = 4;
1382 if(il == 0)
1383 goto j_dollar_ungetc;
1384 if(0){
1385 /* FALLTHRU */
1387 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1388 case 'X':
1389 case 'x':
1390 if(il == 0)
1391 goto j_dollar_ungetc;
1392 i = 2;
1394 /* C99 */{
1395 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
1396 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1398 size_t no, j;
1400 i = MIN(il, i);
1401 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
1402 c = *ib;
1403 if(hexchar(c)){
1404 no <<= 4;
1405 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
1406 : ((c) <= 'F' ? 55 : 87)))];
1407 }else if(j == 0){
1408 if(state & a_SKIPQ)
1409 break;
1410 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
1411 if(flags & n_SHEXP_PARSE_LOG)
1412 n_err(_("Invalid \\%c notation: %.*s\n"),
1413 c2, (int)input->l, input->s);
1414 rv |= n_SHEXP_STATE_ERR_NUMBER;
1415 goto je_ib_save;
1416 }else
1417 break;
1420 /* Unicode massage */
1421 if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){
1422 if((c = (char)no) == '\0')
1423 state |= a_SKIPQ;
1424 }else if(no == 0)
1425 state |= a_SKIPQ;
1426 else if(!(state & a_SKIPQ)){
1427 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1428 store = n_string_reserve(store, MAX(j, 4));
1430 c2 = FAL0;
1431 if(no > 0x10FFFF){ /* XXX magic; CText */
1432 if(flags & n_SHEXP_PARSE_LOG)
1433 n_err(_("\\U argument exceeds 0x10FFFF: %.*s\n"),
1434 (int)input->l, input->s);
1435 rv |= n_SHEXP_STATE_ERR_NUMBER;
1436 /* But normalize the output anyway */
1437 goto Je_uni_norm;
1440 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
1441 j = n_utf32_to_utf8(no, utf);
1442 #endif
1443 #ifdef HAVE_NATCH_CHAR
1444 if(options & OPT_UNICODE){
1445 rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE;
1446 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1447 store = n_string_push_buf(store, utf, j);
1448 continue;
1450 #endif
1451 #ifdef HAVE_ICONV
1452 /* C99 */{
1453 char *icp;
1455 icp = n_iconv_onetime_cp(NULL, NULL, utf, FAL0);
1456 if(icp != NULL){
1457 rv |= n_SHEXP_STATE_OUTPUT;
1458 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1459 store = n_string_push_cp(store, icp);
1460 continue;
1463 #endif
1464 if(!(flags & n_SHEXP_PARSE_DRYRUN)) Je_uni_norm:{
1465 char itoa[32];
1467 rv |= n_SHEXP_STATE_OUTPUT |
1468 n_SHEXP_STATE_ERR_UNICODE;
1469 i = snprintf(itoa, sizeof itoa, "\\%c%0*X",
1470 (no > 0xFFFFu ? 'U' : 'u'),
1471 (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no);
1472 store = n_string_push_buf(store, itoa, i);
1474 continue;
1476 if(state & a_SKIPQ)
1477 continue;
1479 break;
1481 /* Extension: \$ can be used to expand a variable.
1482 * Bug|ad effect: if conversion fails, not written "as-is" */
1483 case '$':
1484 if(il == 0)
1485 goto j_dollar_ungetc;
1486 goto J_var_expand;
1488 default:
1489 j_dollar_ungetc:
1490 /* Follow bash behaviour, print sequence unchanged */
1491 ++il, --ib;
1492 break;
1495 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
1496 bool_t brace;
1498 if(!(brace = (*ib == '{')) || il > 1){
1499 char const *cp, *vp;
1501 ib_save = ib - 1;
1502 il -= brace;
1503 vp = (ib += brace);
1505 for(i = 0; il > 0 && (c = *ib, a_SHEXP_ISVARC(c)); ++i)
1506 --il, ++ib;
1508 if(brace){
1509 if(il == 0 || *ib != '}'){
1510 if(state & a_SKIPQ){
1511 assert((state & a_SURPLUS) && quotec == '\'');
1512 continue;
1514 if(flags & n_SHEXP_PARSE_LOG)
1515 n_err(_("Closing brace missing for ${VAR}: %.*s\n"),
1516 (int)input->l, input->s);
1517 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN |
1518 n_SHEXP_STATE_ERR_BRACE;
1519 goto je_ib_save;
1521 --il, ++ib;
1524 if(state & a_SKIPQ)
1525 continue;
1527 if(i == 0){
1528 if(brace){
1529 if(flags & n_SHEXP_PARSE_LOG)
1530 n_err(_("Bad substitution (${}): %.*s\n"),
1531 (int)input->l, input->s);
1532 rv |= n_SHEXP_STATE_ERR_BADSUB;
1533 goto je_ib_save;
1535 c = '$';
1536 }else if(flags & n_SHEXP_PARSE_DRYRUN)
1537 continue;
1538 else{
1539 vp = savestrbuf(vp, i);
1540 /* Check getenv(3) shall no internal variable exist! */
1541 if((cp = vok_vlook(vp)) != NULL || (cp = getenv(vp)) != NULL){
1542 rv |= n_SHEXP_STATE_OUTPUT;
1543 store = n_string_push_cp(store, cp);
1544 for(; (c = *cp) != '\0'; ++cp)
1545 if(cntrlchar(c)){
1546 rv |= n_SHEXP_STATE_CONTROL;
1547 break;
1550 continue;
1553 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
1554 continue;
1558 if(!(state & a_SKIPQ)){
1559 rv |= n_SHEXP_STATE_OUTPUT;
1560 if(cntrlchar(c))
1561 rv |= n_SHEXP_STATE_CONTROL;
1562 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1563 store = n_string_push_c(store, c);
1567 if(quotec != '\0'){
1568 if(flags & n_SHEXP_PARSE_LOG)
1569 n_err(_("no closing quote: %.*s\n"), (int)input->l, input->s);
1570 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
1573 jleave:
1574 if((flags & n_SHEXP_PARSE_DRYRUN) && store != NULL){
1575 store = n_string_push_buf(store, input->s, PTR2SIZE(ib - input->s));
1576 rv |= n_SHEXP_STATE_OUTPUT;
1579 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1580 for(; il > 0; ++ib, --il)
1581 if(!blankchar(*ib))
1582 break;
1584 input->l = il;
1585 input->s = UNCONST(ib);
1587 if(!(rv & n_SHEXP_STATE_STOP)){
1588 if(il > 0 && !(rv & n_SHEXP_STATE_OUTPUT) &&
1589 (flags & n_SHEXP_PARSE_IGNORE_EMPTY))
1590 goto jrestart_empty;
1591 if(!(rv & n_SHEXP_STATE_OUTPUT) && il == 0)
1592 rv |= n_SHEXP_STATE_STOP;
1594 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_UNICODE));
1595 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_CONTROL));
1596 NYD2_LEAVE;
1597 return rv;
1600 FL enum n_shexp_state
1601 n_shell_parse_token_buf(char **store, char const *indat, size_t inlen,
1602 enum n_shexp_parse_flags flags){
1603 struct n_string ss;
1604 struct str is;
1605 enum n_shexp_state shs;
1606 NYD2_ENTER;
1608 assert(store != NULL);
1609 assert(inlen == 0 || indat != NULL);
1611 n_string_creat_auto(&ss);
1612 is.s = UNCONST(indat);
1613 is.l = inlen;
1615 shs = n_shell_parse_token(&ss, &is, flags);
1616 if(is.l > 0)
1617 shs &= ~n_SHEXP_STATE_STOP;
1618 else
1619 shs |= n_SHEXP_STATE_STOP;
1620 *store = n_string_cp(&ss);
1621 n_string_drop_ownership(&ss);
1623 n_string_gut(&ss);
1624 NYD2_LEAVE;
1625 return shs;
1628 FL struct n_string *
1629 n_shell_quote(struct n_string *store, struct str const *input, bool_t rndtrip){
1630 struct a_shexp_quote_lvl sql;
1631 struct a_shexp_quote_ctx sqc;
1632 NYD2_ENTER;
1634 assert(store != NULL);
1635 assert(input != NULL);
1636 assert(input->l == 0 || input->s != NULL);
1638 memset(&sqc, 0, sizeof sqc);
1639 sqc.sqc_store = store;
1640 sqc.sqc_input.s = input->s;
1641 if((sqc.sqc_input.l = input->l) == UIZ_MAX)
1642 sqc.sqc_input.l = strlen(input->s);
1643 sqc.sqc_flags = rndtrip ? a_SHEXP_QUOTE_ROUNDTRIP : a_SHEXP_QUOTE_NONE;
1645 if(sqc.sqc_input.l == 0)
1646 store = n_string_push_buf(store, "''", sizeof("''") -1);
1647 else{
1648 memset(&sql, 0, sizeof sql);
1649 sql.sql_dat = sqc.sqc_input;
1650 sql.sql_flags = sqc.sqc_flags;
1651 a_shexp__quote(&sqc, &sql);
1653 NYD2_LEAVE;
1654 return store;
1657 FL char *
1658 n_shell_quote_cp(char const *cp, bool_t rndtrip){
1659 struct n_string store;
1660 struct str input;
1661 char *rv;
1662 NYD2_ENTER;
1664 assert(cp != NULL);
1666 input.s = UNCONST(cp);
1667 input.l = UIZ_MAX;
1668 rv = n_string_cp(n_shell_quote(n_string_creat_auto(&store), &input,
1669 rndtrip));
1670 n_string_gut(n_string_drop_ownership(&store));
1671 NYD2_LEAVE;
1672 return rv;
1675 /* s-it-mode */