Generate lowercase MIME charset=/RFC 2231 charset strings
[s-mailx.git] / shexp.c
blobbec7f3bdc0097140c9b01e801502ec7afa4a65d1
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
5 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
6 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
7 */
8 /*
9 * Copyright (c) 1980, 1993
10 * The Regents of the University of California. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 #undef n_FILE
37 #define n_FILE shexp
39 #ifndef HAVE_AMALGAMATION
40 # include "nail.h"
41 #endif
43 #include <sys/wait.h>
45 #include <pwd.h>
47 #ifdef HAVE_FNMATCH
48 # include <dirent.h>
49 # include <fnmatch.h>
50 #endif
52 /* POSIX says
53 * Environment variable names used by the utilities in the Shell and
54 * Utilities volume of POSIX.1-2008 consist solely of uppercase
55 * letters, digits, and the <underscore> ('_') from the characters
56 * defined in Portable Character Set and do not begin with a digit.
57 * Other characters may be permitted by an implementation;
58 * applications shall tolerate the presence of such names.
59 * We do support the hyphen "-" because it is common for mailx. */
60 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
62 enum a_shexp_quote_flags{
63 a_SHEXP_QUOTE_NONE,
64 a_SHEXP_QUOTE_ROUNDTRIP = 1<<0, /* Result won't be consumed immediately */
66 a_SHEXP_QUOTE_T_REVSOL = 1<<8, /* Type: by reverse solidus */
67 a_SHEXP_QUOTE_T_SINGLE = 1<<9, /* Type: single-quotes */
68 a_SHEXP_QUOTE_T_DOUBLE = 1<<10, /* Type: double-quotes */
69 a_SHEXP_QUOTE_T_DOLLAR = 1<<11, /* Type: dollar-single-quotes */
70 a_SHEXP_QUOTE_T_MASK = a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
71 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR,
73 a_SHEXP_QUOTE__FREESHIFT = 16
76 struct a_shexp_var_stack {
77 struct a_shexp_var_stack *svs_next; /* Outer stack frame */
78 char const *svs_value; /* Remaining value to expand */
79 size_t svs_len; /* gth of .svs_dat this level */
80 char const *svs_dat; /* Result data of this level */
81 bool_t svs_bsesc; /* Shall backslash escaping be performed */
84 #ifdef HAVE_FNMATCH
85 struct a_shexp_glob_ctx{
86 char const *sgc_patdat; /* Remaining pattern (at and below level) */
87 size_t sgc_patlen;
88 struct n_string *sgc_outer; /* Resolved path up to this level */
89 ui32_t sgc_flags;
91 #endif
93 struct a_shexp_quote_ctx{
94 struct n_string *sqc_store; /* Result storage */
95 struct str sqc_input; /* Input data, topmost level */
96 ui32_t sqc_cnt_revso;
97 ui32_t sqc_cnt_single;
98 ui32_t sqc_cnt_double;
99 ui32_t sqc_cnt_dollar;
100 enum a_shexp_quote_flags sqc_flags;
103 struct a_shexp_quote_lvl{
104 struct a_shexp_quote_lvl *sql_link; /* Outer level */
105 struct str sql_dat; /* This level (has to) handle(d) */
106 enum a_shexp_quote_flags sql_flags;
109 /* Locate the user's mailbox file (where new, unread mail is queued) */
110 static char * _findmail(char const *user, bool_t force);
112 /* Expand ^~/? and ^~USER/? constructs.
113 * Returns the completely resolved (maybe empty or identical to input)
114 * salloc()ed string */
115 static char *a_shexp_tilde(char const *s);
117 /* (Try to) Expand any shell variable in s.
118 * Returns the completely resolved (maybe empty) salloc()ed string.
119 * Logs on error */
120 static char *a_shexp_var(struct a_shexp_var_stack *svsp);
122 /* Perform fnmatch(3). May return NULL on error */
123 static char *a_shexp_globname(char const *name, enum fexp_mode fexpm);
124 #ifdef HAVE_FNMATCH
125 static bool_t a_shexp__glob(struct a_shexp_glob_ctx *sgcp,
126 struct n_strlist **slpp);
127 static int a_shexp__globsort(void const *cvpa, void const *cvpb);
128 #endif
130 /* Parse an input string and create a sh(1)ell-quoted result */
131 static void a_shexp__quote(struct a_shexp_quote_ctx *sqcp,
132 struct a_shexp_quote_lvl *sqlp);
134 static char *
135 _findmail(char const *user, bool_t force)
137 char *rv;
138 char const *cp;
139 NYD_ENTER;
141 if (force || (cp = ok_vlook(MAIL)) == NULL) {
142 size_t ul = strlen(user), i = sizeof(MAILSPOOL) -1 + 1 + ul +1;
144 rv = salloc(i);
145 memcpy(rv, MAILSPOOL, i = sizeof(MAILSPOOL));
146 rv[i] = '/';
147 memcpy(&rv[++i], user, ul +1);
148 } else if ((rv = fexpand(cp, FEXP_NSHELL)) == NULL)
149 rv = savestr(cp);
150 NYD_LEAVE;
151 return rv;
154 static char *
155 a_shexp_tilde(char const *s){
156 struct passwd *pwp;
157 size_t nl, rl;
158 char const *rp, *np;
159 char *rv;
160 NYD2_ENTER;
162 if(*(rp = &s[1]) == '/' || *rp == '\0'){
163 np = ok_vlook(HOME);
164 rl = strlen(rp);
165 }else{
166 if((rp = strchr(np = rp, '/')) != NULL){
167 nl = PTR2SIZE(rp - np);
168 np = savestrbuf(np, nl);
169 rl = strlen(rp);
170 }else
171 rl = 0;
173 if((pwp = getpwnam(np)) == NULL){
174 rv = savestr(s);
175 goto jleave;
177 np = pwp->pw_dir;
180 nl = strlen(np);
181 rv = salloc(nl + 1 + rl +1);
182 memcpy(rv, np, nl);
183 if(rl > 0){
184 memcpy(rv + nl, rp, rl);
185 nl += rl;
187 rv[nl] = '\0';
188 jleave:
189 NYD2_LEAVE;
190 return rv;
193 static char *
194 a_shexp_var(struct a_shexp_var_stack *svsp)
196 struct a_shexp_var_stack next, *np, *tmp;
197 char const *vp;
198 char lc, c, *cp, *rv;
199 size_t i;
200 NYD2_ENTER;
202 if (*(vp = svsp->svs_value) != '$') {
203 bool_t bsesc = svsp->svs_bsesc;
204 union {bool_t hadbs; char c;} u = {FAL0};
206 svsp->svs_dat = vp;
207 for (lc = '\0', i = 0; ((c = *vp) != '\0'); ++i, ++vp) {
208 if (c == '$' && lc != '\\')
209 break;
210 if (!bsesc)
211 continue;
212 lc = (lc == '\\') ? (u.hadbs = TRU1, '\0') : c;
214 svsp->svs_len = i;
216 if (u.hadbs) {
217 svsp->svs_dat = cp = savestrbuf(svsp->svs_dat, i);
219 for (lc = '\0', rv = cp; (u.c = *cp++) != '\0';) {
220 if (u.c != '\\' || lc == '\\')
221 *rv++ = u.c;
222 lc = (lc == '\\') ? '\0' : u.c;
224 *rv = '\0';
226 svsp->svs_len = PTR2SIZE(rv - svsp->svs_dat);
228 } else {
229 if ((lc = (*++vp == '{')))
230 ++vp;
232 svsp->svs_dat = vp;
233 for (i = 0; (c = *vp) != '\0'; ++i, ++vp)
234 if (!a_SHEXP_ISVARC(c))
235 break;
237 if (lc) {
238 if (c != '}') {
239 n_err(_("Variable name misses closing \"}\": %s\n"),
240 svsp->svs_value);
241 svsp->svs_len = strlen(svsp->svs_value);
242 svsp->svs_dat = svsp->svs_value;
243 goto junroll;
245 c = *++vp;
248 svsp->svs_len = i;
249 /* Check getenv(3) shall no internal variable exist! */
250 if ((rv = vok_vlook(cp = savestrbuf(svsp->svs_dat, i))) != NULL ||
251 (rv = getenv(cp)) != NULL)
252 svsp->svs_len = strlen(svsp->svs_dat = rv);
253 else
254 svsp->svs_len = 0, svsp->svs_dat = UNCONST("");
256 if (c != '\0')
257 goto jrecurse;
259 /* That level made the great and completed encoding. Build result */
260 junroll:
261 for (i = 0, np = svsp, svsp = NULL; np != NULL;) {
262 i += np->svs_len;
263 tmp = np->svs_next;
264 np->svs_next = svsp;
265 svsp = np;
266 np = tmp;
269 cp = rv = salloc(i +1);
270 while (svsp != NULL) {
271 np = svsp;
272 svsp = svsp->svs_next;
273 memcpy(cp, np->svs_dat, np->svs_len);
274 cp += np->svs_len;
276 *cp = '\0';
278 jleave:
279 NYD2_LEAVE;
280 return rv;
281 jrecurse:
282 memset(&next, 0, sizeof next);
283 next.svs_next = svsp;
284 next.svs_value = vp;
285 next.svs_bsesc = svsp->svs_bsesc;
286 rv = a_shexp_var(&next);
287 goto jleave;
290 static char *
291 a_shexp_globname(char const *name, enum fexp_mode fexpm){
292 #ifdef HAVE_FNMATCH
293 struct a_shexp_glob_ctx sgc;
294 struct n_string outer;
295 struct n_strlist *slp;
296 char *cp;
297 NYD_ENTER;
299 memset(&sgc, 0, sizeof sgc);
300 sgc.sgc_patlen = strlen(name);
301 sgc.sgc_patdat = savestrbuf(name, sgc.sgc_patlen);
302 sgc.sgc_outer = n_string_reserve(n_string_creat(&outer), sgc.sgc_patlen);
303 sgc.sgc_flags = ((fexpm & FEXP_SILENT) != 0);
304 slp = NULL;
305 if(a_shexp__glob(&sgc, &slp))
306 cp = (char*)1;
307 else
308 cp = NULL;
309 n_string_gut(&outer);
311 if(cp == NULL)
312 goto jleave;
314 if(slp == NULL){
315 cp = UNCONST(N_("File pattern does not match"));
316 goto jerr;
317 }else if(slp->sl_next == NULL)
318 cp = savestrbuf(slp->sl_dat, slp->sl_len);
319 else if(fexpm & FEXP_MULTIOK){
320 struct n_strlist **sorta, *xslp;
321 size_t i, no, l;
323 no = l = 0;
324 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next){
325 ++no;
326 l += xslp->sl_len + 1;
329 sorta = smalloc(sizeof(*sorta) * no);
330 no = 0;
331 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next)
332 sorta[no++] = xslp;
333 qsort(sorta, no, sizeof *sorta, &a_shexp__globsort);
335 cp = salloc(++l);
336 l = 0;
337 for(i = 0; i < no; ++i){
338 xslp = sorta[i];
339 memcpy(&cp[l], xslp->sl_dat, xslp->sl_len);
340 l += xslp->sl_len;
341 cp[l++] = '\0';
343 cp[l] = '\0';
345 free(sorta);
346 pstate |= PS_EXPAND_MULTIRESULT;
347 }else{
348 cp = UNCONST(N_("File pattern matches multiple results"));
349 goto jerr;
352 jleave:
353 while(slp != NULL){
354 struct n_strlist *tmp = slp;
356 slp = slp->sl_next;
357 free(tmp);
359 NYD_LEAVE;
360 return cp;
362 jerr:
363 if(!(fexpm & FEXP_SILENT)){
364 name = n_shell_quote_cp(name, FAL0);
365 n_err("%s: %s\n", V_(cp), name);
367 cp = NULL;
368 goto jleave;
370 #else /* HAVE_FNMATCH */
371 UNUSED(fexpm);
373 if(!(fexpm & FEXP_SILENT))
374 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
375 return savestr(name);
376 #endif
379 #ifdef HAVE_FNMATCH
380 static bool_t
381 a_shexp__glob(struct a_shexp_glob_ctx *sgcp, struct n_strlist **slpp){
382 enum{a_SILENT = 1<<0, a_DEEP=1<<1, a_SALLOC=1<<2};
384 struct a_shexp_glob_ctx nsgc;
385 struct dirent *dep;
386 DIR *dp;
387 size_t old_outerlen;
388 char const *ccp, *myp;
389 NYD2_ENTER;
391 /* We need some special treatment for the outermost level */
392 if(!(sgcp->sgc_flags & a_DEEP)){
393 if(sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/'){
394 myp = n_string_cp(n_string_push_c(sgcp->sgc_outer, '/'));
395 ++sgcp->sgc_patdat;
396 --sgcp->sgc_patlen;
397 }else
398 myp = "./";
399 }else
400 myp = n_string_cp(sgcp->sgc_outer);
401 old_outerlen = sgcp->sgc_outer->s_len;
403 /* Separate current directory/pattern level from any possible remaining
404 * pattern in order to be able to use it for fnmatch(3) */
405 if((ccp = memchr(sgcp->sgc_patdat, '/', sgcp->sgc_patlen)) == NULL)
406 nsgc.sgc_patlen = 0;
407 else{
408 nsgc = *sgcp;
409 nsgc.sgc_flags |= a_DEEP;
410 sgcp->sgc_patlen = PTR2SIZE((nsgc.sgc_patdat = &ccp[1]) -
411 &sgcp->sgc_patdat[0]);
412 nsgc.sgc_patlen -= sgcp->sgc_patlen;
413 /* Trim solidus */
414 if(sgcp->sgc_patlen > 0){
415 assert(sgcp->sgc_patdat[sgcp->sgc_patlen -1] == '/');
416 ((char*)UNCONST(sgcp->sgc_patdat))[--sgcp->sgc_patlen] = '\0';
420 /* Our current directory level */
421 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
422 * xxx be the (sole) result depending on pattern surroundings, etc. */
423 if((dp = opendir(myp)) == NULL){
424 int err;
426 switch((err = errno)){
427 case ENOTDIR:
428 ccp = N_("cannot access paths under non-directory");
429 goto jerr;
430 case ENOENT:
431 ccp = N_("path component of (sub)pattern non-existent");
432 goto jerr;
433 case EACCES:
434 ccp = N_("file permission for file (sub)pattern denied");
435 goto jerr;
436 default:
437 ccp = N_("cannot handle file (sub)pattern");
438 goto jerr;
442 /* As necessary, quote bytes in the current pattern */
443 /* C99 */{
444 char *ncp;
445 size_t i;
446 bool_t need;
448 for(need = FAL0, i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
449 switch(*myp){
450 case '\'': case '"': case '\\': case '$':
451 case ' ': case '\t':
452 need = TRU1;
453 ++i;
454 /* FALLTHRU */
455 default:
456 ++i;
457 break;
460 if(need){
461 ncp = salloc(i +1);
462 for(i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
463 switch(*myp){
464 case '\'': case '"': case '\\': case '$':
465 case ' ': case '\t':
466 ncp[i++] = '\\';
467 /* FALLTHRU */
468 default:
469 ncp[i++] = *myp;
470 break;
472 ncp[i] = '\0';
473 myp = ncp;
474 }else
475 myp = sgcp->sgc_patdat;
478 while((dep = readdir(dp)) != NULL){
479 switch(fnmatch(myp, dep->d_name, FNM_PATHNAME | FNM_PERIOD)){
480 case 0:{
481 /* A match expresses the desire to recurse if there is more pattern */
482 if(nsgc.sgc_patlen > 0){
483 bool_t isdir;
485 n_string_push_cp((sgcp->sgc_outer->s_len > 1
486 ? n_string_push_c(sgcp->sgc_outer, '/') : sgcp->sgc_outer),
487 dep->d_name);
489 isdir = FAL0;
490 #ifdef HAVE_DIRENT_TYPE
491 if(dep->d_type == DT_DIR)
492 isdir = TRU1;
493 else if(dep->d_type == DT_LNK || dep->d_type == DT_UNKNOWN)
494 #endif
496 struct stat sb;
498 if(stat(n_string_cp(sgcp->sgc_outer), &sb)){
499 ccp = N_("I/O error when querying file status");
500 goto jerr;
501 }else if(S_ISDIR(sb.st_mode))
502 isdir = TRU1;
505 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
506 * TODO Instead save away a list of such n_string's for later */
507 if(isdir && !a_shexp__glob(&nsgc, slpp)){
508 ccp = (char*)1;
509 goto jleave;
512 n_string_trunc(sgcp->sgc_outer, old_outerlen);
513 }else{
514 struct n_strlist *slp;
515 size_t i, j;
517 i = strlen(dep->d_name);
518 j = (old_outerlen > 0) ? old_outerlen + 1 + i : i;
519 slp = n_STRLIST_MALLOC(j);
520 *slpp = slp;
521 slpp = &slp->sl_next;
522 slp->sl_next = NULL;
523 if((j = old_outerlen) > 0){
524 memcpy(&slp->sl_dat[0], sgcp->sgc_outer->s_dat, j);
525 if(slp->sl_dat[j -1] != '/')
526 slp->sl_dat[j++] = '/';
528 memcpy(&slp->sl_dat[j], dep->d_name, i);
529 slp->sl_dat[j += i] = '\0';
530 slp->sl_len = j;
532 } break;
533 case FNM_NOMATCH:
534 break;
535 default:
536 ccp = N_("fnmatch(3) cannot handle file (sub)pattern");
537 goto jerr;
541 ccp = NULL;
542 jleave:
543 if(dp != NULL)
544 closedir(dp);
545 NYD2_LEAVE;
546 return (ccp == NULL);
548 jerr:
549 if(!(sgcp->sgc_flags & a_SILENT)){
550 char const *s2, *s3;
552 if(sgcp->sgc_outer->s_len > 0){
553 s2 = n_shell_quote_cp(n_string_cp(sgcp->sgc_outer), FAL0);
554 s3 = "/";
555 }else
556 s2 = s3 = "";
558 n_err("%s: %s%s%s\n", V_(ccp), s2, s3,
559 n_shell_quote_cp(sgcp->sgc_patdat, FAL0));
561 goto jleave;
564 static int
565 a_shexp__globsort(void const *cvpa, void const *cvpb){
566 int rv;
567 struct n_strlist const * const *slpa, * const *slpb;
568 NYD2_ENTER;
570 slpa = cvpa;
571 slpb = cvpb;
572 rv = asccasecmp((*slpa)->sl_dat, (*slpb)->sl_dat);
573 NYD2_LEAVE;
574 return rv;
576 #endif /* HAVE_FNMATCH */
578 static void
579 a_shexp__quote(struct a_shexp_quote_ctx *sqcp, struct a_shexp_quote_lvl *sqlp){
580 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
581 * XXX use the recursive implementation because of stateful encodings.
582 * XXX I.e., if a quoted substring cannot be self-contained - the data after
583 * XXX the quote relies on "the former state", then this doesn't make sense.
584 * XXX Therefore this is not fully programmed out but instead only detects
585 * XXX the "most fancy" quoting necessary, and directly does that.
586 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
587 * XXX Otherwise we rather have to convert to wide first and act on that,
588 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
589 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
590 #ifdef a_SHEXP_QUOTE_RECURSE
591 # define jrecurse jrecurse
592 struct a_shexp_quote_lvl sql;
593 #else
594 # define jrecurse jstep
595 #endif
596 struct n_visual_info_ctx vic;
597 union {struct a_shexp_quote_lvl *head; struct n_string *store;} u;
598 ui32_t flags;
599 size_t il;
600 char const *ib;
601 NYD2_ENTER;
603 ib = sqlp->sql_dat.s;
604 il = sqlp->sql_dat.l;
605 flags = sqlp->sql_flags;
607 /* Iterate over the entire input, classify characters and type of quotes
608 * along the way. Whenever a quote change has to be applied, adjust flags
609 * for the new situation -, setup sql.* and recurse- */
610 while(il > 0){
611 char c;
613 c = *ib;
614 if(cntrlchar(c)){
615 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
616 goto jstep;
617 if(c == '\t' && (flags & (a_SHEXP_QUOTE_T_REVSOL |
618 a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOUBLE)))
619 goto jstep;
620 #ifdef a_SHEXP_QUOTE_RECURSE
621 ++sqcp->sqc_cnt_dollar;
622 #endif
623 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
624 goto jrecurse;
625 }else if(blankspacechar(c) || c == '"' || c == '$'){
626 if(flags & a_SHEXP_QUOTE_T_MASK)
627 goto jstep;
628 #ifdef a_SHEXP_QUOTE_RECURSE
629 ++sqcp->sqc_cnt_single;
630 #endif
631 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
632 goto jrecurse;
633 }else if(c == '\''){
634 if(flags & (a_SHEXP_QUOTE_T_MASK & ~a_SHEXP_QUOTE_T_SINGLE))
635 goto jstep;
636 #ifdef a_SHEXP_QUOTE_RECURSE
637 ++sqcp->sqc_cnt_dollar;
638 #endif
639 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
640 goto jrecurse;
641 }else if(c == '\\'){
642 if(flags & a_SHEXP_QUOTE_T_MASK)
643 goto jstep;
644 #ifdef a_SHEXP_QUOTE_RECURSE
645 ++sqcp->sqc_cnt_single;
646 #endif
647 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
648 goto jrecurse;
649 }else if(!asciichar(c)){
650 /* Need to keep together multibytes */
651 #ifdef a_SHEXP_QUOTE_RECURSE
652 memset(&vic, 0, sizeof vic);
653 vic.vic_indat = ib;
654 vic.vic_inlen = il;
655 n_visual_info(&vic,
656 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
657 #endif
658 /* xxx check wether resulting \u would be ASCII */
659 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP) ||
660 (flags & a_SHEXP_QUOTE_T_DOLLAR)){
661 #ifdef a_SHEXP_QUOTE_RECURSE
662 ib = vic.vic_oudat;
663 il = vic.vic_oulen;
664 continue;
665 #else
666 goto jstep;
667 #endif
669 #ifdef a_SHEXP_QUOTE_RECURSE
670 ++sqcp->sqc_cnt_dollar;
671 #endif
672 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
673 goto jrecurse;
674 }else
675 jstep:
676 ++ib, --il;
678 sqlp->sql_flags = flags;
680 /* Level made the great and completed processing input. Reverse the list of
681 * levels, detect the "most fancy" quote type needed along this way */
682 /* XXX Due to restriction as above very crude */
683 for(flags = 0, il = 0, u.head = NULL; sqlp != NULL;){
684 struct a_shexp_quote_lvl *tmp;
686 tmp = sqlp->sql_link;
687 sqlp->sql_link = u.head;
688 u.head = sqlp;
689 il += sqlp->sql_dat.l;
690 if(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK)
691 il += (sqlp->sql_dat.l >> 1);
692 flags |= sqlp->sql_flags;
693 sqlp = tmp;
695 sqlp = u.head;
697 /* Finally work the substrings in the correct order, adjusting quotes along
698 * the way as necessary. Start off with the "most fancy" quote, so that
699 * the user sees an overall boundary she can orientate herself on.
700 * We do it like that to be able to give the user some "encapsulation
701 * experience", to address what strikes me is a problem of sh(1)ell quoting:
702 * different to, e.g., perl(1), where you see at a glance where a string
703 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
704 * visual appearance of "a string" as such */
705 u.store = n_string_reserve(sqcp->sqc_store, il);
707 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
708 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
709 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
710 }else if(flags & a_SHEXP_QUOTE_T_DOUBLE){
711 u.store = n_string_push_c(u.store, '"');
712 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOUBLE;
713 }else if(flags & a_SHEXP_QUOTE_T_SINGLE){
714 u.store = n_string_push_c(u.store, '\'');
715 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
716 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
717 flags &= ~a_SHEXP_QUOTE_T_MASK;
719 /* Work all the levels */
720 for(; sqlp != NULL; sqlp = sqlp->sql_link){
721 /* As necessary update our mode of quoting */
722 #ifdef a_SHEXP_QUOTE_RECURSE
723 il = 0;
725 switch(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK){
726 case a_SHEXP_QUOTE_T_DOLLAR:
727 if(!(flags & a_SHEXP_QUOTE_T_DOLLAR))
728 il = a_SHEXP_QUOTE_T_DOLLAR;
729 break;
730 case a_SHEXP_QUOTE_T_DOUBLE:
731 if(!(flags & (a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
732 il = a_SHEXP_QUOTE_T_DOLLAR;
733 break;
734 case a_SHEXP_QUOTE_T_SINGLE:
735 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
736 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
737 il = a_SHEXP_QUOTE_T_SINGLE;
738 break;
739 default:
740 case a_SHEXP_QUOTE_T_REVSOL:
741 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
742 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
743 il = a_SHEXP_QUOTE_T_REVSOL;
744 break;
747 if(il != 0){
748 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
749 u.store = n_string_push_c(u.store, '\'');
750 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
751 u.store = n_string_push_c(u.store, '"');
752 flags &= ~a_SHEXP_QUOTE_T_MASK;
754 flags |= (ui32_t)il;
755 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
756 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
757 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
758 u.store = n_string_push_c(u.store, '"');
759 else if(flags & a_SHEXP_QUOTE_T_SINGLE)
760 u.store = n_string_push_c(u.store, '\'');
762 #endif /* a_SHEXP_QUOTE_RECURSE */
764 /* Work the level's substring */
765 ib = sqlp->sql_dat.s;
766 il = sqlp->sql_dat.l;
768 while(il > 0){
769 char c2, c;
771 c = *ib;
773 if(cntrlchar(c)){
774 assert(c == '\t' || (flags & a_SHEXP_QUOTE_T_DOLLAR));
775 assert((flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
776 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)));
777 switch((c2 = c)){
778 case 0x07: c = 'a'; break;
779 case 0x08: c = 'b'; break;
780 case 0x0A: c = 'n'; break;
781 case 0x0B: c = 'v'; break;
782 case 0x0C: c = 'f'; break;
783 case 0x0D: c = 'r'; break;
784 case 0x1B: c = 'E'; break;
785 default: break;
786 case 0x09:
787 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
788 c = 't';
789 break;
791 if(flags & a_SHEXP_QUOTE_T_REVSOL)
792 u.store = n_string_push_c(u.store, '\\');
793 goto jpush;
795 u.store = n_string_push_c(u.store, '\\');
796 if(c == c2){
797 u.store = n_string_push_c(u.store, 'c');
798 c ^= 0x40;
800 goto jpush;
801 }else if(blankspacechar(c) || c == '"' || c == '$'){
802 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
803 goto jpush;
804 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE));
805 u.store = n_string_push_c(u.store, '\\');
806 goto jpush;
807 }else if(c == '\''){
808 if(flags & a_SHEXP_QUOTE_T_DOUBLE)
809 goto jpush;
810 assert(!(flags & a_SHEXP_QUOTE_T_SINGLE));
811 u.store = n_string_push_c(u.store, '\\');
812 goto jpush;
813 }else if(c == '\\'){
814 if(flags & a_SHEXP_QUOTE_T_SINGLE)
815 goto jpush;
816 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE |
817 a_SHEXP_QUOTE_T_DOLLAR));
818 u.store = n_string_push_c(u.store, '\\');
819 goto jpush;
820 }else if(asciichar(c)){
821 /* Shorthand: we can simply push that thing out */
822 jpush:
823 u.store = n_string_push_c(u.store, c);
824 ++ib, --il;
825 }else{
826 /* Not an ASCII character, take care not to split up multibyte
827 * sequences etc. */
828 #ifdef HAVE_NATCH_CHAR
829 if(options & OPT_UNICODE){
830 ui32_t uc;
831 char const *ib2;
832 size_t il2, il3;
834 ib2 = ib;
835 il3 = il2 = il;
836 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
837 char itoa[32];
838 char const *cp;
840 il2 = PTR2SIZE(&ib2[0] - &ib[0]);
841 if((flags & a_SHEXP_QUOTE_ROUNDTRIP) || uc == 0xFFFDu){
842 /* Use padding to make ambiguities impossible */
843 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
844 (uc > 0xFFFFu ? 'U' : 'u'),
845 (int)(uc > 0xFFFFu ? 8 : 4), uc);
846 cp = itoa;
847 }else{
848 il3 = il2;
849 cp = &ib[0];
851 u.store = n_string_push_buf(u.store, cp, il3);
852 ib += il2, il -= il2;
853 continue;
856 #endif /* HAVE_NATCH_CHAR */
858 memset(&vic, 0, sizeof vic);
859 vic.vic_indat = ib;
860 vic.vic_inlen = il;
861 n_visual_info(&vic,
862 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
864 /* Work this substring as sensitive as possible */
865 il -= vic.vic_oulen;
866 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP))
867 u.store = n_string_push_buf(u.store, ib, il);
868 #ifdef HAVE_ICONV
869 else if((vic.vic_indat = n_iconv_onetime_cp("utf-8",
870 charset_get_lc(), savestrbuf(ib, il), FAL0)) != NULL){
871 ui32_t uc;
872 char const *ib2;
873 size_t il2, il3;
875 il3 = il2 = strlen(ib2 = vic.vic_indat);
876 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
877 char itoa[32];
879 il2 = PTR2SIZE(&ib2[0] - &vic.vic_indat[0]);
880 /* Use padding to make ambiguities impossible */
881 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
882 (uc > 0xFFFFu ? 'U' : 'u'),
883 (int)(uc > 0xFFFFu ? 8 : 4), uc);
884 u.store = n_string_push_buf(u.store, itoa, il3);
885 }else
886 goto Jxseq;
888 #endif
889 else
890 #ifdef HAVE_ICONV
891 Jxseq:
892 #endif
893 while(il-- > 0){
894 u.store = n_string_push_buf(u.store, "\\xFF",
895 sizeof("\\xFF") -1);
896 n_c_to_hex_base16(&u.store->s_dat[u.store->s_len - 2], *ib++);
899 ib = vic.vic_oudat;
900 il = vic.vic_oulen;
905 /* Close an open quote */
906 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
907 u.store = n_string_push_c(u.store, '\'');
908 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
909 u.store = n_string_push_c(u.store, '"');
910 #ifdef a_SHEXP_QUOTE_RECURSE
911 jleave:
912 #endif
913 NYD2_LEAVE;
914 return;
916 #ifdef a_SHEXP_QUOTE_RECURSE
917 jrecurse:
918 sqlp->sql_dat.l -= il;
920 sql.sql_link = sqlp;
921 sql.sql_dat.s = UNCONST(ib);
922 sql.sql_dat.l = il;
923 sql.sql_flags = flags;
924 a_shexp__quote(sqcp, &sql);
925 goto jleave;
926 #endif
928 #undef jrecurse
929 #undef a_SHEXP_QUOTE_RECURSE
932 FL char *
933 fexpand(char const *name, enum fexp_mode fexpm)
935 struct str s;
936 char const *cp, *res;
937 bool_t dyn;
938 NYD_ENTER;
940 pstate &= ~PS_EXPAND_MULTIRESULT;
942 /* The order of evaluation is "%" and "#" expand into constants.
943 * "&" can expand into "+". "+" can expand into shell meta characters.
944 * Shell meta characters expand into constants.
945 * This way, we make no recursive expansion */
946 if ((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
947 res = UNCONST(name);
949 jnext:
950 dyn = FAL0;
951 switch (*res) {
952 case '%':
953 if (res[1] == ':' && res[2] != '\0') {
954 res = &res[2];
955 goto jnext;
957 res = _findmail((res[1] != '\0' ? res + 1 : myname), (res[1] != '\0'));
958 goto jislocal;
959 case '#':
960 if (res[1] != '\0')
961 break;
962 if (prevfile[0] == '\0') {
963 n_err(_("No previous file\n"));
964 res = NULL;
965 goto jleave;
967 res = prevfile;
968 goto jislocal;
969 case '&':
970 if (res[1] == '\0')
971 res = ok_vlook(MBOX);
972 break;
975 /* POSIX: if *folder* unset or null, "+" shall be retained */
976 if (*res == '+' && *(cp = folder_query()) != '\0') {
977 size_t i = strlen(cp);
979 res = str_concat_csvl(&s, cp,
980 ((i == 0 || cp[i -1] == '/') ? "" : "/"), res + 1, NULL)->s;
981 dyn = TRU1;
983 /* TODO *folder* can't start with %[:], can it!?! */
984 if (res[0] == '%' && res[1] == ':') {
985 res += 2;
986 goto jnext;
990 /* Do some meta expansions */
991 if((fexpm & (FEXP_NSHELL | FEXP_NVAR)) != FEXP_NVAR &&
992 ((fexpm & FEXP_NSHELL) ? (strchr(res, '$') != NULL)
993 : anyof(res, "{}[]*?$"))){
994 bool_t doexp;
996 if(fexpm & FEXP_NOPROTO)
997 doexp = TRU1;
998 else switch(which_protocol(res)){
999 case PROTO_FILE:
1000 case PROTO_MAILDIR:
1001 doexp = TRU1;
1002 break;
1003 default:
1004 doexp = FAL0;
1005 break;
1008 if(doexp){
1009 struct a_shexp_var_stack top;
1011 memset(&top, 0, sizeof top);
1012 top.svs_value = res;
1013 top.svs_bsesc = TRU1;
1014 res = a_shexp_var(&top);
1016 if(res[0] == '~')
1017 res = a_shexp_tilde(res);
1019 if(!(fexpm & FEXP_NSHELL) &&
1020 (res = a_shexp_globname(res, fexpm)) == NULL)
1021 goto jleave;
1022 dyn = TRU1;
1023 }/* else no tilde */
1024 }else if(res[0] == '~'){
1025 res = a_shexp_tilde(res);
1026 dyn = TRU1;
1029 jislocal:
1030 if (fexpm & FEXP_LOCAL)
1031 switch (which_protocol(res)) {
1032 case PROTO_FILE:
1033 case PROTO_MAILDIR:
1034 break;
1035 default:
1036 n_err(_("Not a local file or directory: %s\n"),
1037 n_shell_quote_cp(name, FAL0));
1038 res = NULL;
1039 break;
1042 jleave:
1043 if(res != NULL && !dyn)
1044 res = savestr(res);
1045 NYD_LEAVE;
1046 return UNCONST(res);
1049 FL int
1050 n_shell_expand_escape(char const **s, bool_t use_nail_extensions)/* TODO DROP!*/
1052 char const *xs;
1053 int c, n;
1054 NYD2_ENTER;
1056 xs = *s;
1058 if ((c = *xs & 0xFF) == '\0')
1059 goto jleave;
1060 ++xs;
1061 if (c != '\\')
1062 goto jleave;
1064 switch ((c = *xs & 0xFF)) {
1065 case 'a': c = '\a'; break;
1066 case 'b': c = '\b'; break;
1067 case 'c': c = PROMPT_STOP; break;
1068 case 'f': c = '\f'; break;
1069 case 'n': c = '\n'; break;
1070 case 'r': c = '\r'; break;
1071 case 't': c = '\t'; break;
1072 case 'v': c = '\v'; break;
1074 /* ESCape */
1075 case 'E':
1076 case 'e':
1077 c = '\033';
1078 break;
1080 /* Hexadecimal TODO uses ASCII */
1081 case 'X':
1082 case 'x': {
1083 static ui8_t const hexatoi[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
1084 #undef a_HEX
1085 #define a_HEX(n) \
1086 hexatoi[(ui8_t)((n) - ((n) <= '9' ? 48 : ((n) <= 'F' ? 55 : 87)))]
1088 c = 0;
1089 ++xs;
1090 if(hexchar(*xs))
1091 c = a_HEX(*xs);
1092 else{
1093 --xs;
1094 if(options & OPT_D_V)
1095 n_err(_("Invalid \"\\xNUMBER\" notation in \"%s\"\n"), xs - 1);
1096 c = '\\';
1097 goto jleave;
1099 ++xs;
1100 if(hexchar(*xs)){
1101 c <<= 4;
1102 c += a_HEX(*xs);
1103 ++xs;
1105 goto jleave;
1107 #undef a_HEX
1109 /* octal, with optional 0 prefix */
1110 case '0':
1111 ++xs;
1112 if(0){
1113 default:
1114 if(*xs == '\0'){
1115 c = '\\';
1116 break;
1119 for (c = 0, n = 3; n-- > 0 && octalchar(*xs); ++xs) {
1120 c <<= 3;
1121 c |= *xs - '0';
1123 goto jleave;
1125 /* S-nail extension for nice (get)prompt(()) support */
1126 case '&':
1127 case '?':
1128 case '$':
1129 case '@':
1130 if (use_nail_extensions) {
1131 switch (c) {
1132 case '&': c = ok_blook(bsdcompat) ? '&' : '?'; break;
1133 case '?': c = (pstate & PS_EVAL_ERROR) ? '1' : '0'; break;
1134 case '$': c = PROMPT_DOLLAR; break;
1135 case '@': c = PROMPT_AT; break;
1137 break;
1140 /* FALLTHRU */
1141 case '\0':
1142 /* A sole <backslash> at EOS is treated as-is! */
1143 c = '\\';
1144 /* FALLTHRU */
1145 case '\\':
1146 break;
1149 ++xs;
1150 jleave:
1151 *s = xs;
1152 NYD2_LEAVE;
1153 return c;
1156 FL enum n_shexp_state
1157 n_shell_parse_token(struct n_string *store, struct str *input, /* TODO WCHAR */
1158 enum n_shexp_parse_flags flags){
1159 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
1160 char utf[8];
1161 #endif
1162 char c2, c, quotec;
1163 enum{
1164 a_NONE = 0,
1165 a_SKIPQ = 1<<0, /* Skip rest of this quote (\c0 ..) */
1166 a_SURPLUS = 1<<1, /* Extended sequence interpretation */
1167 a_NTOKEN = 1<<2 /* "New token": e.g., comments are possible */
1168 } state;
1169 enum n_shexp_state rv;
1170 size_t i, il;
1171 char const *ib_save, *ib;
1172 NYD2_ENTER;
1173 UNINIT(c, '\0');
1175 assert((flags & n_SHEXP_PARSE_DRYRUN) || store != NULL);
1176 assert(input != NULL);
1177 assert(input->l == 0 || input->s != NULL);
1178 assert(!(flags & n_SHEXP_PARSE_LOG) || !(flags & n_SHEXP_PARSE_LOG_D_V));
1179 assert(!(flags & n_SHEXP_PARSE_IFS_ADD_COMMA) ||
1180 !(flags & n_SHEXP_PARSE_IFS_IS_COMMA));
1182 if((flags & n_SHEXP_PARSE_LOG_D_V) && (options & OPT_D_V))
1183 flags |= n_SHEXP_PARSE_LOG;
1185 if((flags & n_SHEXP_PARSE_TRUNC) && store != NULL)
1186 store = n_string_trunc(store, 0);
1188 ib = input->s;
1189 if((il = input->l) == UIZ_MAX)
1190 il = strlen(ib);
1192 jrestart_empty:
1193 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1194 for(; il > 0; ++ib, --il)
1195 if(!blankspacechar(*ib))
1196 break;
1198 input->s = UNCONST(ib);
1199 input->l = il;
1201 if(il == 0){
1202 rv = n_SHEXP_STATE_STOP;
1203 goto jleave;
1206 if(store != NULL)
1207 store = n_string_reserve(store, MIN(il, 32)); /* XXX */
1209 for(rv = n_SHEXP_STATE_NONE, state = a_NTOKEN, quotec = '\0'; il > 0;){
1210 --il, c = *ib++;
1212 /* If no quote-mode active.. */
1213 if(quotec == '\0'){
1214 if(c == '"' || c == '\''){
1215 quotec = c;
1216 if(c == '"')
1217 state |= a_SURPLUS;
1218 else
1219 state &= ~a_SURPLUS;
1220 state &= ~a_NTOKEN;
1221 continue;
1222 }else if(c == '$'){
1223 if(il > 0){
1224 state &= ~a_NTOKEN;
1225 if(*ib == '\''){
1226 --il, ++ib;
1227 quotec = '\'';
1228 state |= a_SURPLUS;
1229 continue;
1230 }else
1231 goto J_var_expand;
1233 }else if(c == '\\'){
1234 /* Outside of quotes this just escapes any next character, but a sole
1235 * <backslash> at EOS is left unchanged */
1236 if(il > 0)
1237 --il, c = *ib++;
1238 state &= ~a_NTOKEN;
1239 }else if(c == '#' && (state & a_NTOKEN)){
1240 rv |= n_SHEXP_STATE_STOP;
1241 goto jleave;
1242 }else if(c == ',' && (flags &
1243 (n_SHEXP_PARSE_IFS_ADD_COMMA | n_SHEXP_PARSE_IFS_IS_COMMA)))
1244 break;
1245 else if(blankchar(c)){
1246 if(!(flags & n_SHEXP_PARSE_IFS_IS_COMMA)){
1247 ++il, --ib;
1248 break;
1250 state |= a_NTOKEN;
1251 }else
1252 state &= ~a_NTOKEN;
1253 }else{
1254 /* Quote-mode */
1255 assert(!(state & a_NTOKEN));
1256 if(c == quotec){
1257 state = a_NONE;
1258 quotec = '\0';
1259 /* Users may need to recognize the presence of empty quotes */
1260 rv |= n_SHEXP_STATE_OUTPUT;
1261 continue;
1262 }else if(c == '\\' && (state & a_SURPLUS)){
1263 ib_save = ib - 1;
1264 /* A sole <backslash> at EOS is treated as-is! This is ok since
1265 * the "closing quote" error will occur next, anyway */
1266 if(il == 0)
1267 break;
1268 else if((c2 = *ib) == quotec){
1269 --il, ++ib;
1270 c = quotec;
1271 }else if(quotec == '"'){
1272 /* Double quotes:
1273 * The <backslash> shall retain its special meaning as an
1274 * escape character (see Section 2.2.1) only when followed
1275 * by one of the following characters when considered
1276 * special: $ ` " \ <newline> */
1277 switch(c2){
1278 case '$':
1279 case '`':
1280 /* case '"': already handled via c2 == quotec */
1281 case '\\':
1282 --il, ++ib;
1283 c = c2;
1284 /* FALLTHRU */
1285 default:
1286 break;
1288 }else{
1289 /* Dollar-single-quote */
1290 --il, ++ib;
1291 switch(c2){
1292 case '"':
1293 /* case '\'': already handled via c2 == quotec */
1294 case '\\':
1295 c = c2;
1296 break;
1298 case 'b': c = '\b'; break;
1299 case 'f': c = '\f'; break;
1300 case 'n': c = '\n'; break;
1301 case 'r': c = '\r'; break;
1302 case 't': c = '\t'; break;
1303 case 'v': c = '\v'; break;
1305 case 'E':
1306 case 'e': c = '\033'; break;
1308 /* Control character */
1309 case 'c':
1310 if(il == 0)
1311 goto j_dollar_ungetc;
1312 --il, c2 = *ib++;
1313 if(state & a_SKIPQ)
1314 continue;
1315 c = upperconv(c2) ^ 0x40;
1316 if((ui8_t)c > 0x1F && c != 0x7F){ /* ASCII C0: 0..1F, 7F */
1317 if(flags & n_SHEXP_PARSE_LOG)
1318 n_err(_("Invalid \"\\c\" notation: %.*s\n"),
1319 (int)input->l, input->s);
1320 rv |= n_SHEXP_STATE_ERR_CONTROL;
1322 /* As an implementation-defined extension, support \c@
1323 * EQ printf(1) alike \c */
1324 if(c == '\0'){
1325 rv |= n_SHEXP_STATE_STOP;
1326 goto jleave;
1328 break;
1330 /* Octal sequence: 1 to 3 octal bytes */
1331 case '0':
1332 /* As an extension (dependent on where you look, echo(1), or
1333 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1334 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1335 c2 = c;
1336 --il, ++ib;
1338 /* FALLTHRU */
1339 case '1': case '2': case '3':
1340 case '4': case '5': case '6': case '7':
1341 c2 -= '0';
1342 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1343 c2 = (c2 << 3) | (c - '0');
1344 --il, ++ib;
1346 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1347 if((ui8_t)c2 > 0x1F){
1348 if(flags & n_SHEXP_PARSE_LOG)
1349 n_err(_("\"\\0\" argument exceeds a byte: "
1350 "%.*s\n"), (int)input->l, input->s);
1351 rv |= n_SHEXP_STATE_ERR_NUMBER;
1352 --il, ++ib;
1353 /* Write unchanged */
1354 je_ib_save:
1355 rv |= n_SHEXP_STATE_OUTPUT;
1356 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1357 store = n_string_push_buf(store, ib_save,
1358 PTR2SIZE(ib - ib_save));
1359 continue;
1361 c2 = (c2 << 3) | (c -= '0');
1362 --il, ++ib;
1364 if((c = c2) == '\0')
1365 state |= a_SKIPQ;
1366 if(state & a_SKIPQ)
1367 continue;
1368 break;
1370 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1371 case 'U':
1372 i = 8;
1373 if(0){
1374 /* FALLTHRU */
1375 case 'u':
1376 i = 4;
1378 if(il == 0)
1379 goto j_dollar_ungetc;
1380 if(0){
1381 /* FALLTHRU */
1383 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1384 case 'X':
1385 case 'x':
1386 if(il == 0)
1387 goto j_dollar_ungetc;
1388 i = 2;
1390 /* C99 */{
1391 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
1392 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1394 size_t no, j;
1396 i = MIN(il, i);
1397 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
1398 c = *ib;
1399 if(hexchar(c)){
1400 no <<= 4;
1401 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
1402 : ((c) <= 'F' ? 55 : 87)))];
1403 }else if(j == 0){
1404 if(state & a_SKIPQ)
1405 break;
1406 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
1407 if(flags & n_SHEXP_PARSE_LOG)
1408 n_err(_("Invalid \"\\%c\" notation: %.*s\n"),
1409 c2, (int)input->l, input->s);
1410 rv |= n_SHEXP_STATE_ERR_NUMBER;
1411 goto je_ib_save;
1412 }else
1413 break;
1416 /* Unicode massage */
1417 if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){
1418 if((c = (char)no) == '\0')
1419 state |= a_SKIPQ;
1420 }else if(no == 0)
1421 state |= a_SKIPQ;
1422 else if(!(state & a_SKIPQ)){
1423 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1424 store = n_string_reserve(store, MAX(j, 4));
1426 c2 = FAL0;
1427 if(no > 0x10FFFF){ /* XXX magic; CText */
1428 if(flags & n_SHEXP_PARSE_LOG)
1429 n_err(_("\"\\U\" argument exceeds 0x10FFFF: "
1430 "%.*s\n"), (int)input->l, input->s);
1431 rv |= n_SHEXP_STATE_ERR_NUMBER;
1432 /* But normalize the output anyway */
1433 goto Je_uni_norm;
1436 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
1437 j = n_utf32_to_utf8(no, utf);
1438 #endif
1439 #ifdef HAVE_NATCH_CHAR
1440 if(options & OPT_UNICODE){
1441 rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE;
1442 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1443 store = n_string_push_buf(store, utf, j);
1444 continue;
1446 #endif
1447 #ifdef HAVE_ICONV
1448 /* C99 */{
1449 char *icp;
1451 icp = n_iconv_onetime_cp(NULL, NULL, utf, FAL0);
1452 if(icp != NULL){
1453 rv |= n_SHEXP_STATE_OUTPUT;
1454 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1455 store = n_string_push_cp(store, icp);
1456 continue;
1459 #endif
1460 if(!(flags & n_SHEXP_PARSE_DRYRUN)) Je_uni_norm:{
1461 char itoa[32];
1463 rv |= n_SHEXP_STATE_OUTPUT |
1464 n_SHEXP_STATE_ERR_UNICODE;
1465 i = snprintf(itoa, sizeof itoa, "\\%c%0*X",
1466 (no > 0xFFFFu ? 'U' : 'u'),
1467 (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no);
1468 store = n_string_push_buf(store, itoa, i);
1470 continue;
1472 if(state & a_SKIPQ)
1473 continue;
1475 break;
1477 /* Extension: \$ can be used to expand a variable.
1478 * Bug|ad effect: if conversion fails, not written "as-is" */
1479 case '$':
1480 if(il == 0)
1481 goto j_dollar_ungetc;
1482 goto J_var_expand;
1484 default:
1485 j_dollar_ungetc:
1486 /* Follow bash behaviour, print sequence unchanged */
1487 ++il, --ib;
1488 break;
1491 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
1492 bool_t brace;
1494 if(!(brace = (*ib == '{')) || il > 1){
1495 char const *cp, *vp;
1497 ib_save = ib - 1;
1498 il -= brace;
1499 vp = (ib += brace);
1501 for(i = 0; il > 0 && (c = *ib, a_SHEXP_ISVARC(c)); ++i)
1502 --il, ++ib;
1504 if(brace){
1505 if(il == 0 || *ib != '}'){
1506 if(state & a_SKIPQ){
1507 assert((state & a_SURPLUS) && quotec == '\'');
1508 continue;
1510 if(flags & n_SHEXP_PARSE_LOG)
1511 n_err(_("Closing brace missing for ${VAR}: %.*s\n"),
1512 (int)input->l, input->s);
1513 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN |
1514 n_SHEXP_STATE_ERR_BRACE;
1515 goto je_ib_save;
1517 --il, ++ib;
1520 if(state & a_SKIPQ)
1521 continue;
1523 if(i == 0){
1524 if(brace){
1525 if(flags & n_SHEXP_PARSE_LOG)
1526 n_err(_("Bad substitution (${}): %.*s\n"),
1527 (int)input->l, input->s);
1528 rv |= n_SHEXP_STATE_ERR_BADSUB;
1529 goto je_ib_save;
1531 c = '$';
1532 }else if(flags & n_SHEXP_PARSE_DRYRUN)
1533 continue;
1534 else{
1535 vp = savestrbuf(vp, i);
1536 /* Check getenv(3) shall no internal variable exist! */
1537 if((cp = vok_vlook(vp)) != NULL || (cp = getenv(vp)) != NULL){
1538 rv |= n_SHEXP_STATE_OUTPUT;
1539 store = n_string_push_cp(store, cp);
1540 for(; (c = *cp) != '\0'; ++cp)
1541 if(cntrlchar(c)){
1542 rv |= n_SHEXP_STATE_CONTROL;
1543 break;
1546 continue;
1549 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
1550 continue;
1554 if(!(state & a_SKIPQ)){
1555 rv |= n_SHEXP_STATE_OUTPUT;
1556 if(cntrlchar(c))
1557 rv |= n_SHEXP_STATE_CONTROL;
1558 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1559 store = n_string_push_c(store, c);
1563 if(quotec != '\0'){
1564 if(flags & n_SHEXP_PARSE_LOG)
1565 n_err(_("no closing quote: %.*s\n"), (int)input->l, input->s);
1566 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
1569 jleave:
1570 if((flags & n_SHEXP_PARSE_DRYRUN) && store != NULL){
1571 store = n_string_push_buf(store, input->s, PTR2SIZE(ib - input->s));
1572 rv |= n_SHEXP_STATE_OUTPUT;
1575 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1576 for(; il > 0; ++ib, --il)
1577 if(!blankchar(*ib))
1578 break;
1580 input->l = il;
1581 input->s = UNCONST(ib);
1583 if(!(rv & n_SHEXP_STATE_STOP)){
1584 if(il > 0 && !(rv & n_SHEXP_STATE_OUTPUT) &&
1585 (flags & n_SHEXP_PARSE_IGNORE_EMPTY))
1586 goto jrestart_empty;
1587 if(!(rv & n_SHEXP_STATE_OUTPUT) && il == 0)
1588 rv |= n_SHEXP_STATE_STOP;
1590 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_UNICODE));
1591 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_CONTROL));
1592 NYD2_LEAVE;
1593 return rv;
1596 FL enum n_shexp_state
1597 n_shell_parse_token_buf(char **store, char const *indat, size_t inlen,
1598 enum n_shexp_parse_flags flags){
1599 struct n_string ss;
1600 struct str is;
1601 enum n_shexp_state shs;
1602 NYD2_ENTER;
1604 assert(store != NULL);
1605 assert(inlen == 0 || indat != NULL);
1607 n_string_creat_auto(&ss);
1608 is.s = UNCONST(indat);
1609 is.l = inlen;
1611 shs = n_shell_parse_token(&ss, &is, flags);
1612 if(is.l > 0)
1613 shs &= ~n_SHEXP_STATE_STOP;
1614 else
1615 shs |= n_SHEXP_STATE_STOP;
1616 *store = n_string_cp(&ss);
1617 n_string_drop_ownership(&ss);
1619 n_string_gut(&ss);
1620 NYD2_LEAVE;
1621 return shs;
1624 FL struct n_string *
1625 n_shell_quote(struct n_string *store, struct str const *input, bool_t rndtrip){
1626 struct a_shexp_quote_lvl sql;
1627 struct a_shexp_quote_ctx sqc;
1628 NYD2_ENTER;
1630 assert(store != NULL);
1631 assert(input != NULL);
1632 assert(input->l == 0 || input->s != NULL);
1634 memset(&sqc, 0, sizeof sqc);
1635 sqc.sqc_store = store;
1636 sqc.sqc_input.s = input->s;
1637 if((sqc.sqc_input.l = input->l) == UIZ_MAX)
1638 sqc.sqc_input.l = strlen(input->s);
1639 sqc.sqc_flags = rndtrip ? a_SHEXP_QUOTE_ROUNDTRIP : a_SHEXP_QUOTE_NONE;
1641 if(sqc.sqc_input.l == 0)
1642 store = n_string_push_buf(store, "''", sizeof("''") -1);
1643 else{
1644 memset(&sql, 0, sizeof sql);
1645 sql.sql_dat = sqc.sqc_input;
1646 sql.sql_flags = sqc.sqc_flags;
1647 a_shexp__quote(&sqc, &sql);
1649 NYD2_LEAVE;
1650 return store;
1653 FL char *
1654 n_shell_quote_cp(char const *cp, bool_t rndtrip){
1655 struct n_string store;
1656 struct str input;
1657 char *rv;
1658 NYD2_ENTER;
1660 assert(cp != NULL);
1662 input.s = UNCONST(cp);
1663 input.l = UIZ_MAX;
1664 rv = n_string_cp(n_shell_quote(n_string_creat_auto(&store), &input,
1665 rndtrip));
1666 n_string_gut(n_string_drop_ownership(&store));
1667 NYD2_LEAVE;
1668 return rv;
1671 /* s-it-mode */