nail.1: short review of the terminal/MLE section
[s-mailx.git] / shexp.c
blob3374902bf85a3e6ca72ec0701f4d2fdd60d43cc6
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
5 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
6 * Copyright (c) 2012 - 2016 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
7 */
8 /*
9 * Copyright (c) 1980, 1993
10 * The Regents of the University of California. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 #undef n_FILE
37 #define n_FILE shexp
39 #ifndef HAVE_AMALGAMATION
40 # include "nail.h"
41 #endif
43 #include <sys/wait.h>
45 #include <pwd.h>
47 #ifdef HAVE_FNMATCH
48 # include <dirent.h>
49 # include <fnmatch.h>
50 #endif
52 /* POSIX says
53 * Environment variable names used by the utilities in the Shell and
54 * Utilities volume of POSIX.1-2008 consist solely of uppercase
55 * letters, digits, and the <underscore> ('_') from the characters
56 * defined in Portable Character Set and do not begin with a digit.
57 * Other characters may be permitted by an implementation;
58 * applications shall tolerate the presence of such names.
59 * We do support the hyphen "-" because it is common for mailx. */
60 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
62 enum a_shexp_quote_flags{
63 a_SHEXP_QUOTE_NONE,
64 a_SHEXP_QUOTE_ROUNDTRIP = 1u<<0, /* Result won't be consumed immediately */
66 a_SHEXP_QUOTE_T_REVSOL = 1u<<8, /* Type: by reverse solidus */
67 a_SHEXP_QUOTE_T_SINGLE = 1u<<9, /* Type: single-quotes */
68 a_SHEXP_QUOTE_T_DOUBLE = 1u<<10, /* Type: double-quotes */
69 a_SHEXP_QUOTE_T_DOLLAR = 1u<<11, /* Type: dollar-single-quotes */
70 a_SHEXP_QUOTE_T_MASK = a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
71 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR,
73 a_SHEXP_QUOTE__FREESHIFT = 16u
76 struct a_shexp_var_stack {
77 struct a_shexp_var_stack *svs_next; /* Outer stack frame */
78 char const *svs_value; /* Remaining value to expand */
79 size_t svs_len; /* gth of .svs_dat this level */
80 char const *svs_dat; /* Result data of this level */
81 bool_t svs_bsesc; /* Shall backslash escaping be performed */
82 ui8_t svs__dummy[7];
85 #ifdef HAVE_FNMATCH
86 struct a_shexp_glob_ctx{
87 char const *sgc_patdat; /* Remaining pattern (at and below level) */
88 size_t sgc_patlen;
89 struct n_string *sgc_outer; /* Resolved path up to this level */
90 ui32_t sgc_flags;
91 ui8_t sgc__dummy[4];
93 #endif
95 struct a_shexp_quote_ctx{
96 struct n_string *sqc_store; /* Result storage */
97 struct str sqc_input; /* Input data, topmost level */
98 ui32_t sqc_cnt_revso;
99 ui32_t sqc_cnt_single;
100 ui32_t sqc_cnt_double;
101 ui32_t sqc_cnt_dollar;
102 enum a_shexp_quote_flags sqc_flags;
103 ui8_t sqc__dummy[4];
106 struct a_shexp_quote_lvl{
107 struct a_shexp_quote_lvl *sql_link; /* Outer level */
108 struct str sql_dat; /* This level (has to) handle(d) */
109 enum a_shexp_quote_flags sql_flags;
110 ui8_t sql__dummy[4];
113 /* Locate the user's mailbox file (where new, unread mail is queued) */
114 static char *a_shexp_findmail(char const *user, bool_t force);
116 /* Expand ^~/? and ^~USER/? constructs.
117 * Returns the completely resolved (maybe empty or identical to input)
118 * salloc()ed string */
119 static char *a_shexp_tilde(char const *s);
121 /* (Try to) Expand any shell variable in s.
122 * Returns the completely resolved (maybe empty) salloc()ed string.
123 * Logs on error */
124 static char *a_shexp_var(struct a_shexp_var_stack *svsp);
126 /* Perform fnmatch(3). May return NULL on error */
127 static char *a_shexp_globname(char const *name, enum fexp_mode fexpm);
128 #ifdef HAVE_FNMATCH
129 static bool_t a_shexp__glob(struct a_shexp_glob_ctx *sgcp,
130 struct n_strlist **slpp);
131 static int a_shexp__globsort(void const *cvpa, void const *cvpb);
132 #endif
134 /* Parse an input string and create a sh(1)ell-quoted result */
135 static void a_shexp__quote(struct a_shexp_quote_ctx *sqcp,
136 struct a_shexp_quote_lvl *sqlp);
138 static char *
139 a_shexp_findmail(char const *user, bool_t force){
140 char *rv;
141 char const *cp;
142 NYD2_ENTER;
144 if(!force){
145 if((cp = ok_vlook(inbox)) != NULL && *cp != '\0'){
146 /* Folder extra introduced to avoid % recursion loops */
147 if((rv = fexpand(cp, FEXP_NSPECIAL | FEXP_NFOLDER | FEXP_NSHELL)
148 ) != NULL)
149 goto jleave;
150 n_err(_("*inbox* expansion failed, using $MAIL / builtin: %s\n"), cp);
153 if((cp = ok_vlook(MAIL)) != NULL){
154 rv = savestr(cp);
155 goto jleave;
159 /* C99 */{
160 size_t ul, i;
162 ul = strlen(user) +1;
163 i = sizeof(VAL_MAIL) -1 + 1 + ul;
165 rv = salloc(i);
166 memcpy(rv, VAL_MAIL, i = sizeof(VAL_MAIL));
167 rv[i] = '/';
168 memcpy(&rv[++i], user, ul);
170 jleave:
171 NYD2_LEAVE;
172 return rv;
175 static char *
176 a_shexp_tilde(char const *s){
177 struct passwd *pwp;
178 size_t nl, rl;
179 char const *rp, *np;
180 char *rv;
181 NYD2_ENTER;
183 if(*(rp = &s[1]) == '/' || *rp == '\0'){
184 np = ok_vlook(HOME);
185 rl = strlen(rp);
186 }else{
187 if((rp = strchr(np = rp, '/')) != NULL){
188 nl = PTR2SIZE(rp - np);
189 np = savestrbuf(np, nl);
190 rl = strlen(rp);
191 }else
192 rl = 0;
194 if((pwp = getpwnam(np)) == NULL){
195 rv = savestr(s);
196 goto jleave;
198 np = pwp->pw_dir;
201 nl = strlen(np);
202 rv = salloc(nl + 1 + rl +1);
203 memcpy(rv, np, nl);
204 if(rl > 0){
205 memcpy(rv + nl, rp, rl);
206 nl += rl;
208 rv[nl] = '\0';
209 jleave:
210 NYD2_LEAVE;
211 return rv;
214 static char *
215 a_shexp_var(struct a_shexp_var_stack *svsp)
217 struct a_shexp_var_stack next, *np, *tmp;
218 char const *vp;
219 char lc, c, *cp, *rv;
220 size_t i;
221 NYD2_ENTER;
223 if (*(vp = svsp->svs_value) != '$') {
224 bool_t bsesc = svsp->svs_bsesc;
225 union {bool_t hadbs; char c;} u = {FAL0};
227 svsp->svs_dat = vp;
228 for (lc = '\0', i = 0; ((c = *vp) != '\0'); ++i, ++vp) {
229 if (c == '$' && lc != '\\')
230 break;
231 if (!bsesc)
232 continue;
233 lc = (lc == '\\') ? (u.hadbs = TRU1, '\0') : c;
235 svsp->svs_len = i;
237 if (u.hadbs) {
238 svsp->svs_dat = cp = savestrbuf(svsp->svs_dat, i);
240 for (lc = '\0', rv = cp; (u.c = *cp++) != '\0';) {
241 if (u.c != '\\' || lc == '\\')
242 *rv++ = u.c;
243 lc = (lc == '\\') ? '\0' : u.c;
245 *rv = '\0';
247 svsp->svs_len = PTR2SIZE(rv - svsp->svs_dat);
249 } else {
250 if ((lc = (*++vp == '{')))
251 ++vp;
253 svsp->svs_dat = vp;
254 for (i = 0; (c = *vp) != '\0'; ++i, ++vp)
255 if (!a_SHEXP_ISVARC(c))
256 break;
258 if (lc) {
259 if (c != '}') {
260 n_err(_("Variable name misses closing }: %s\n"),
261 svsp->svs_value);
262 svsp->svs_len = strlen(svsp->svs_value);
263 svsp->svs_dat = svsp->svs_value;
264 goto junroll;
266 c = *++vp;
269 svsp->svs_len = i;
270 /* Check getenv(3) shall no internal variable exist! */
271 if ((rv = vok_vlook(cp = savestrbuf(svsp->svs_dat, i))) != NULL ||
272 (rv = getenv(cp)) != NULL)
273 svsp->svs_len = strlen(svsp->svs_dat = rv);
274 else
275 svsp->svs_len = 0, svsp->svs_dat = UNCONST("");
277 if (c != '\0')
278 goto jrecurse;
280 /* That level made the great and completed encoding. Build result */
281 junroll:
282 for (i = 0, np = svsp, svsp = NULL; np != NULL;) {
283 i += np->svs_len;
284 tmp = np->svs_next;
285 np->svs_next = svsp;
286 svsp = np;
287 np = tmp;
290 cp = rv = salloc(i +1);
291 while (svsp != NULL) {
292 np = svsp;
293 svsp = svsp->svs_next;
294 memcpy(cp, np->svs_dat, np->svs_len);
295 cp += np->svs_len;
297 *cp = '\0';
299 jleave:
300 NYD2_LEAVE;
301 return rv;
302 jrecurse:
303 memset(&next, 0, sizeof next);
304 next.svs_next = svsp;
305 next.svs_value = vp;
306 next.svs_bsesc = svsp->svs_bsesc;
307 rv = a_shexp_var(&next);
308 goto jleave;
311 static char *
312 a_shexp_globname(char const *name, enum fexp_mode fexpm){
313 #ifdef HAVE_FNMATCH
314 struct a_shexp_glob_ctx sgc;
315 struct n_string outer;
316 struct n_strlist *slp;
317 char *cp;
318 NYD_ENTER;
320 memset(&sgc, 0, sizeof sgc);
321 sgc.sgc_patlen = strlen(name);
322 sgc.sgc_patdat = savestrbuf(name, sgc.sgc_patlen);
323 sgc.sgc_outer = n_string_reserve(n_string_creat(&outer), sgc.sgc_patlen);
324 sgc.sgc_flags = ((fexpm & FEXP_SILENT) != 0);
325 slp = NULL;
326 if(a_shexp__glob(&sgc, &slp))
327 cp = (char*)1;
328 else
329 cp = NULL;
330 n_string_gut(&outer);
332 if(cp == NULL)
333 goto jleave;
335 if(slp == NULL){
336 cp = UNCONST(N_("File pattern does not match"));
337 goto jerr;
338 }else if(slp->sl_next == NULL)
339 cp = savestrbuf(slp->sl_dat, slp->sl_len);
340 else if(fexpm & FEXP_MULTIOK){
341 struct n_strlist **sorta, *xslp;
342 size_t i, no, l;
344 no = l = 0;
345 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next){
346 ++no;
347 l += xslp->sl_len + 1;
350 sorta = smalloc(sizeof(*sorta) * no);
351 no = 0;
352 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next)
353 sorta[no++] = xslp;
354 qsort(sorta, no, sizeof *sorta, &a_shexp__globsort);
356 cp = salloc(++l);
357 l = 0;
358 for(i = 0; i < no; ++i){
359 xslp = sorta[i];
360 memcpy(&cp[l], xslp->sl_dat, xslp->sl_len);
361 l += xslp->sl_len;
362 cp[l++] = '\0';
364 cp[l] = '\0';
366 free(sorta);
367 pstate |= PS_EXPAND_MULTIRESULT;
368 }else{
369 cp = UNCONST(N_("File pattern matches multiple results"));
370 goto jerr;
373 jleave:
374 while(slp != NULL){
375 struct n_strlist *tmp = slp;
377 slp = slp->sl_next;
378 free(tmp);
380 NYD_LEAVE;
381 return cp;
383 jerr:
384 if(!(fexpm & FEXP_SILENT)){
385 name = n_shexp_quote_cp(name, FAL0);
386 n_err("%s: %s\n", V_(cp), name);
388 cp = NULL;
389 goto jleave;
391 #else /* HAVE_FNMATCH */
392 UNUSED(fexpm);
394 if(!(fexpm & FEXP_SILENT))
395 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
396 return savestr(name);
397 #endif
400 #ifdef HAVE_FNMATCH
401 static bool_t
402 a_shexp__glob(struct a_shexp_glob_ctx *sgcp, struct n_strlist **slpp){
403 enum{a_SILENT = 1<<0, a_DEEP=1<<1, a_SALLOC=1<<2};
405 struct a_shexp_glob_ctx nsgc;
406 struct dirent *dep;
407 DIR *dp;
408 size_t old_outerlen;
409 char const *ccp, *myp;
410 NYD2_ENTER;
412 /* We need some special treatment for the outermost level */
413 if(!(sgcp->sgc_flags & a_DEEP)){
414 if(sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/'){
415 myp = n_string_cp(n_string_push_c(sgcp->sgc_outer, '/'));
416 ++sgcp->sgc_patdat;
417 --sgcp->sgc_patlen;
418 }else
419 myp = "./";
420 }else
421 myp = n_string_cp(sgcp->sgc_outer);
422 old_outerlen = sgcp->sgc_outer->s_len;
424 /* Separate current directory/pattern level from any possible remaining
425 * pattern in order to be able to use it for fnmatch(3) */
426 if((ccp = memchr(sgcp->sgc_patdat, '/', sgcp->sgc_patlen)) == NULL)
427 nsgc.sgc_patlen = 0;
428 else{
429 nsgc = *sgcp;
430 nsgc.sgc_flags |= a_DEEP;
431 sgcp->sgc_patlen = PTR2SIZE((nsgc.sgc_patdat = &ccp[1]) -
432 &sgcp->sgc_patdat[0]);
433 nsgc.sgc_patlen -= sgcp->sgc_patlen;
434 /* Trim solidus */
435 if(sgcp->sgc_patlen > 0){
436 assert(sgcp->sgc_patdat[sgcp->sgc_patlen -1] == '/');
437 ((char*)UNCONST(sgcp->sgc_patdat))[--sgcp->sgc_patlen] = '\0';
441 /* Our current directory level */
442 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
443 * xxx be the (sole) result depending on pattern surroundings, etc. */
444 if((dp = opendir(myp)) == NULL){
445 int err;
447 switch((err = errno)){
448 case ENOTDIR:
449 ccp = N_("cannot access paths under non-directory");
450 goto jerr;
451 case ENOENT:
452 ccp = N_("path component of (sub)pattern non-existent");
453 goto jerr;
454 case EACCES:
455 ccp = N_("file permission for file (sub)pattern denied");
456 goto jerr;
457 default:
458 ccp = N_("cannot handle file (sub)pattern");
459 goto jerr;
463 /* As necessary, quote bytes in the current pattern */
464 /* C99 */{
465 char *ncp;
466 size_t i;
467 bool_t need;
469 for(need = FAL0, i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
470 switch(*myp){
471 case '\'': case '"': case '\\': case '$':
472 case ' ': case '\t':
473 need = TRU1;
474 ++i;
475 /* FALLTHRU */
476 default:
477 ++i;
478 break;
481 if(need){
482 ncp = salloc(i +1);
483 for(i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
484 switch(*myp){
485 case '\'': case '"': case '\\': case '$':
486 case ' ': case '\t':
487 ncp[i++] = '\\';
488 /* FALLTHRU */
489 default:
490 ncp[i++] = *myp;
491 break;
493 ncp[i] = '\0';
494 myp = ncp;
495 }else
496 myp = sgcp->sgc_patdat;
499 while((dep = readdir(dp)) != NULL){
500 switch(fnmatch(myp, dep->d_name, FNM_PATHNAME | FNM_PERIOD)){
501 case 0:{
502 /* A match expresses the desire to recurse if there is more pattern */
503 if(nsgc.sgc_patlen > 0){
504 bool_t isdir;
506 n_string_push_cp((sgcp->sgc_outer->s_len > 1
507 ? n_string_push_c(sgcp->sgc_outer, '/') : sgcp->sgc_outer),
508 dep->d_name);
510 isdir = FAL0;
511 #ifdef HAVE_DIRENT_TYPE
512 if(dep->d_type == DT_DIR)
513 isdir = TRU1;
514 else if(dep->d_type == DT_LNK || dep->d_type == DT_UNKNOWN)
515 #endif
517 struct stat sb;
519 if(stat(n_string_cp(sgcp->sgc_outer), &sb)){
520 ccp = N_("I/O error when querying file status");
521 goto jerr;
522 }else if(S_ISDIR(sb.st_mode))
523 isdir = TRU1;
526 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
527 * TODO Instead save away a list of such n_string's for later */
528 if(isdir && !a_shexp__glob(&nsgc, slpp)){
529 ccp = (char*)1;
530 goto jleave;
533 n_string_trunc(sgcp->sgc_outer, old_outerlen);
534 }else{
535 struct n_strlist *slp;
536 size_t i, j;
538 i = strlen(dep->d_name);
539 j = (old_outerlen > 0) ? old_outerlen + 1 + i : i;
540 slp = n_STRLIST_MALLOC(j);
541 *slpp = slp;
542 slpp = &slp->sl_next;
543 slp->sl_next = NULL;
544 if((j = old_outerlen) > 0){
545 memcpy(&slp->sl_dat[0], sgcp->sgc_outer->s_dat, j);
546 if(slp->sl_dat[j -1] != '/')
547 slp->sl_dat[j++] = '/';
549 memcpy(&slp->sl_dat[j], dep->d_name, i);
550 slp->sl_dat[j += i] = '\0';
551 slp->sl_len = j;
553 } break;
554 case FNM_NOMATCH:
555 break;
556 default:
557 ccp = N_("fnmatch(3) cannot handle file (sub)pattern");
558 goto jerr;
562 ccp = NULL;
563 jleave:
564 if(dp != NULL)
565 closedir(dp);
566 NYD2_LEAVE;
567 return (ccp == NULL);
569 jerr:
570 if(!(sgcp->sgc_flags & a_SILENT)){
571 char const *s2, *s3;
573 if(sgcp->sgc_outer->s_len > 0){
574 s2 = n_shexp_quote_cp(n_string_cp(sgcp->sgc_outer), FAL0);
575 s3 = "/";
576 }else
577 s2 = s3 = "";
579 n_err("%s: %s%s%s\n", V_(ccp), s2, s3,
580 n_shexp_quote_cp(sgcp->sgc_patdat, FAL0));
582 goto jleave;
585 static int
586 a_shexp__globsort(void const *cvpa, void const *cvpb){
587 int rv;
588 struct n_strlist const * const *slpa, * const *slpb;
589 NYD2_ENTER;
591 slpa = cvpa;
592 slpb = cvpb;
593 rv = asccasecmp((*slpa)->sl_dat, (*slpb)->sl_dat);
594 NYD2_LEAVE;
595 return rv;
597 #endif /* HAVE_FNMATCH */
599 static void
600 a_shexp__quote(struct a_shexp_quote_ctx *sqcp, struct a_shexp_quote_lvl *sqlp){
601 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
602 * XXX use the recursive implementation because of stateful encodings.
603 * XXX I.e., if a quoted substring cannot be self-contained - the data after
604 * XXX the quote relies on "the former state", then this doesn't make sense.
605 * XXX Therefore this is not fully programmed out but instead only detects
606 * XXX the "most fancy" quoting necessary, and directly does that.
607 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
608 * XXX Otherwise we rather have to convert to wide first and act on that,
609 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
610 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
611 #ifdef a_SHEXP_QUOTE_RECURSE
612 # define jrecurse jrecurse
613 struct a_shexp_quote_lvl sql;
614 #else
615 # define jrecurse jstep
616 #endif
617 struct n_visual_info_ctx vic;
618 union {struct a_shexp_quote_lvl *head; struct n_string *store;} u;
619 ui32_t flags;
620 size_t il;
621 char const *ib;
622 NYD2_ENTER;
624 ib = sqlp->sql_dat.s;
625 il = sqlp->sql_dat.l;
626 flags = sqlp->sql_flags;
628 /* Iterate over the entire input, classify characters and type of quotes
629 * along the way. Whenever a quote change has to be applied, adjust flags
630 * for the new situation -, setup sql.* and recurse- */
631 while(il > 0){
632 char c;
634 c = *ib;
635 if(cntrlchar(c)){
636 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
637 goto jstep;
638 if(c == '\t' && (flags & (a_SHEXP_QUOTE_T_REVSOL |
639 a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOUBLE)))
640 goto jstep;
641 #ifdef a_SHEXP_QUOTE_RECURSE
642 ++sqcp->sqc_cnt_dollar;
643 #endif
644 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
645 goto jrecurse;
646 }else if(blankspacechar(c) || c == '"' || c == '$'){
647 if(flags & a_SHEXP_QUOTE_T_MASK)
648 goto jstep;
649 #ifdef a_SHEXP_QUOTE_RECURSE
650 ++sqcp->sqc_cnt_single;
651 #endif
652 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
653 goto jrecurse;
654 }else if(c == '\''){
655 if(flags & (a_SHEXP_QUOTE_T_MASK & ~a_SHEXP_QUOTE_T_SINGLE))
656 goto jstep;
657 #ifdef a_SHEXP_QUOTE_RECURSE
658 ++sqcp->sqc_cnt_dollar;
659 #endif
660 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
661 goto jrecurse;
662 }else if(c == '\\'){
663 if(flags & a_SHEXP_QUOTE_T_MASK)
664 goto jstep;
665 #ifdef a_SHEXP_QUOTE_RECURSE
666 ++sqcp->sqc_cnt_single;
667 #endif
668 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
669 goto jrecurse;
670 }else if(!asciichar(c)){
671 /* Need to keep together multibytes */
672 #ifdef a_SHEXP_QUOTE_RECURSE
673 memset(&vic, 0, sizeof vic);
674 vic.vic_indat = ib;
675 vic.vic_inlen = il;
676 n_visual_info(&vic,
677 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
678 #endif
679 /* xxx check whether resulting \u would be ASCII */
680 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP) ||
681 (flags & a_SHEXP_QUOTE_T_DOLLAR)){
682 #ifdef a_SHEXP_QUOTE_RECURSE
683 ib = vic.vic_oudat;
684 il = vic.vic_oulen;
685 continue;
686 #else
687 goto jstep;
688 #endif
690 #ifdef a_SHEXP_QUOTE_RECURSE
691 ++sqcp->sqc_cnt_dollar;
692 #endif
693 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
694 goto jrecurse;
695 }else
696 jstep:
697 ++ib, --il;
699 sqlp->sql_flags = flags;
701 /* Level made the great and completed processing input. Reverse the list of
702 * levels, detect the "most fancy" quote type needed along this way */
703 /* XXX Due to restriction as above very crude */
704 for(flags = 0, il = 0, u.head = NULL; sqlp != NULL;){
705 struct a_shexp_quote_lvl *tmp;
707 tmp = sqlp->sql_link;
708 sqlp->sql_link = u.head;
709 u.head = sqlp;
710 il += sqlp->sql_dat.l;
711 if(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK)
712 il += (sqlp->sql_dat.l >> 1);
713 flags |= sqlp->sql_flags;
714 sqlp = tmp;
716 sqlp = u.head;
718 /* Finally work the substrings in the correct order, adjusting quotes along
719 * the way as necessary. Start off with the "most fancy" quote, so that
720 * the user sees an overall boundary she can orientate herself on.
721 * We do it like that to be able to give the user some "encapsulation
722 * experience", to address what strikes me is a problem of sh(1)ell quoting:
723 * different to, e.g., perl(1), where you see at a glance where a string
724 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
725 * visual appearance of "a string" as such */
726 u.store = n_string_reserve(sqcp->sqc_store, il);
728 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
729 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
730 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
731 }else if(flags & a_SHEXP_QUOTE_T_DOUBLE){
732 u.store = n_string_push_c(u.store, '"');
733 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOUBLE;
734 }else if(flags & a_SHEXP_QUOTE_T_SINGLE){
735 u.store = n_string_push_c(u.store, '\'');
736 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
737 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
738 flags &= ~a_SHEXP_QUOTE_T_MASK;
740 /* Work all the levels */
741 for(; sqlp != NULL; sqlp = sqlp->sql_link){
742 /* As necessary update our mode of quoting */
743 #ifdef a_SHEXP_QUOTE_RECURSE
744 il = 0;
746 switch(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK){
747 case a_SHEXP_QUOTE_T_DOLLAR:
748 if(!(flags & a_SHEXP_QUOTE_T_DOLLAR))
749 il = a_SHEXP_QUOTE_T_DOLLAR;
750 break;
751 case a_SHEXP_QUOTE_T_DOUBLE:
752 if(!(flags & (a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
753 il = a_SHEXP_QUOTE_T_DOLLAR;
754 break;
755 case a_SHEXP_QUOTE_T_SINGLE:
756 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
757 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
758 il = a_SHEXP_QUOTE_T_SINGLE;
759 break;
760 default:
761 case a_SHEXP_QUOTE_T_REVSOL:
762 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
763 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
764 il = a_SHEXP_QUOTE_T_REVSOL;
765 break;
768 if(il != 0){
769 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
770 u.store = n_string_push_c(u.store, '\'');
771 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
772 u.store = n_string_push_c(u.store, '"');
773 flags &= ~a_SHEXP_QUOTE_T_MASK;
775 flags |= (ui32_t)il;
776 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
777 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
778 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
779 u.store = n_string_push_c(u.store, '"');
780 else if(flags & a_SHEXP_QUOTE_T_SINGLE)
781 u.store = n_string_push_c(u.store, '\'');
783 #endif /* a_SHEXP_QUOTE_RECURSE */
785 /* Work the level's substring */
786 ib = sqlp->sql_dat.s;
787 il = sqlp->sql_dat.l;
789 while(il > 0){
790 char c2, c;
792 c = *ib;
794 if(cntrlchar(c)){
795 assert(c == '\t' || (flags & a_SHEXP_QUOTE_T_DOLLAR));
796 assert((flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
797 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)));
798 switch((c2 = c)){
799 case 0x07: c = 'a'; break;
800 case 0x08: c = 'b'; break;
801 case 0x0A: c = 'n'; break;
802 case 0x0B: c = 'v'; break;
803 case 0x0C: c = 'f'; break;
804 case 0x0D: c = 'r'; break;
805 case 0x1B: c = 'E'; break;
806 default: break;
807 case 0x09:
808 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
809 c = 't';
810 break;
812 if(flags & a_SHEXP_QUOTE_T_REVSOL)
813 u.store = n_string_push_c(u.store, '\\');
814 goto jpush;
816 u.store = n_string_push_c(u.store, '\\');
817 if(c == c2){
818 u.store = n_string_push_c(u.store, 'c');
819 c ^= 0x40;
821 goto jpush;
822 }else if(blankspacechar(c) || c == '"' || c == '$'){
823 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
824 goto jpush;
825 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE));
826 u.store = n_string_push_c(u.store, '\\');
827 goto jpush;
828 }else if(c == '\''){
829 if(flags & a_SHEXP_QUOTE_T_DOUBLE)
830 goto jpush;
831 assert(!(flags & a_SHEXP_QUOTE_T_SINGLE));
832 u.store = n_string_push_c(u.store, '\\');
833 goto jpush;
834 }else if(c == '\\'){
835 if(flags & a_SHEXP_QUOTE_T_SINGLE)
836 goto jpush;
837 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE |
838 a_SHEXP_QUOTE_T_DOLLAR));
839 u.store = n_string_push_c(u.store, '\\');
840 goto jpush;
841 }else if(asciichar(c)){
842 /* Shorthand: we can simply push that thing out */
843 jpush:
844 u.store = n_string_push_c(u.store, c);
845 ++ib, --il;
846 }else{
847 /* Not an ASCII character, take care not to split up multibyte
848 * sequences etc. */
849 #ifdef HAVE_NATCH_CHAR
850 if(options & OPT_UNICODE){
851 ui32_t uc;
852 char const *ib2;
853 size_t il2, il3;
855 ib2 = ib;
856 il3 = il2 = il;
857 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
858 char itoa[32];
859 char const *cp;
861 il2 = PTR2SIZE(&ib2[0] - &ib[0]);
862 if((flags & a_SHEXP_QUOTE_ROUNDTRIP) || uc == 0xFFFDu){
863 /* Use padding to make ambiguities impossible */
864 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
865 (uc > 0xFFFFu ? 'U' : 'u'),
866 (int)(uc > 0xFFFFu ? 8 : 4), uc);
867 cp = itoa;
868 }else{
869 il3 = il2;
870 cp = &ib[0];
872 u.store = n_string_push_buf(u.store, cp, il3);
873 ib += il2, il -= il2;
874 continue;
877 #endif /* HAVE_NATCH_CHAR */
879 memset(&vic, 0, sizeof vic);
880 vic.vic_indat = ib;
881 vic.vic_inlen = il;
882 n_visual_info(&vic,
883 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
885 /* Work this substring as sensitive as possible */
886 il -= vic.vic_oulen;
887 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP))
888 u.store = n_string_push_buf(u.store, ib, il);
889 #ifdef HAVE_ICONV
890 else if((vic.vic_indat = n_iconv_onetime_cp(n_ICONV_NONE,
891 "utf-8", charset_get_lc(), savestrbuf(ib, il))) != NULL){
892 ui32_t uc;
893 char const *ib2;
894 size_t il2, il3;
896 il3 = il2 = strlen(ib2 = vic.vic_indat);
897 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
898 char itoa[32];
900 il2 = PTR2SIZE(&ib2[0] - &vic.vic_indat[0]);
901 /* Use padding to make ambiguities impossible */
902 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
903 (uc > 0xFFFFu ? 'U' : 'u'),
904 (int)(uc > 0xFFFFu ? 8 : 4), uc);
905 u.store = n_string_push_buf(u.store, itoa, il3);
906 }else
907 goto Jxseq;
909 #endif
910 else
911 #ifdef HAVE_ICONV
912 Jxseq:
913 #endif
914 while(il-- > 0){
915 u.store = n_string_push_buf(u.store, "\\xFF",
916 sizeof("\\xFF") -1);
917 n_c_to_hex_base16(&u.store->s_dat[u.store->s_len - 2], *ib++);
920 ib = vic.vic_oudat;
921 il = vic.vic_oulen;
926 /* Close an open quote */
927 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
928 u.store = n_string_push_c(u.store, '\'');
929 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
930 u.store = n_string_push_c(u.store, '"');
931 #ifdef a_SHEXP_QUOTE_RECURSE
932 jleave:
933 #endif
934 NYD2_LEAVE;
935 return;
937 #ifdef a_SHEXP_QUOTE_RECURSE
938 jrecurse:
939 sqlp->sql_dat.l -= il;
941 sql.sql_link = sqlp;
942 sql.sql_dat.s = UNCONST(ib);
943 sql.sql_dat.l = il;
944 sql.sql_flags = flags;
945 a_shexp__quote(sqcp, &sql);
946 goto jleave;
947 #endif
949 #undef jrecurse
950 #undef a_SHEXP_QUOTE_RECURSE
953 FL char *
954 fexpand(char const *name, enum fexp_mode fexpm)
956 struct str s;
957 char const *cp, *res;
958 bool_t dyn;
959 NYD_ENTER;
961 pstate &= ~PS_EXPAND_MULTIRESULT;
963 /* The order of evaluation is "%" and "#" expand into constants.
964 * "&" can expand into "+". "+" can expand into shell meta characters.
965 * Shell meta characters expand into constants.
966 * This way, we make no recursive expansion */
967 if ((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
968 res = UNCONST(name);
970 if(!(fexpm & FEXP_NSPECIAL)){
971 jnext:
972 dyn = FAL0;
973 switch (*res) {
974 case '%':
975 if(res[1] == ':' && res[2] != '\0')
976 res = &res[2];
977 else{
978 bool_t force;
980 force = (res[1] != '\0');
981 res = a_shexp_findmail((force ? &res[1] : myname), force);
982 if(force)
983 goto jislocal;
985 goto jnext;
986 case '#':
987 if (res[1] != '\0')
988 break;
989 if (prevfile[0] == '\0') {
990 n_err(_("No previous file\n"));
991 res = NULL;
992 goto jleave;
994 res = prevfile;
995 goto jislocal;
996 case '&':
997 if (res[1] == '\0')
998 res = ok_vlook(MBOX);
999 break;
1003 /* POSIX: if *folder* unset or null, "+" shall be retained */
1004 if (!(fexpm & FEXP_NFOLDER) && *res == '+' &&
1005 *(cp = folder_query()) != '\0') {
1006 res = str_concat_csvl(&s, cp, &res[1], NULL)->s;
1007 dyn = TRU1;
1009 /* TODO *folder* can't start with %[:], can it!?! */
1010 if (res[0] == '%' && res[1] == ':') {
1011 res += 2;
1012 goto jnext;
1016 /* Do some meta expansions */
1017 if((fexpm & (FEXP_NSHELL | FEXP_NVAR)) != FEXP_NVAR &&
1018 ((fexpm & FEXP_NSHELL) ? (strchr(res, '$') != NULL)
1019 : anyof(res, "{}[]*?$"))){
1020 bool_t doexp;
1022 if(fexpm & FEXP_NOPROTO)
1023 doexp = TRU1;
1024 else switch(which_protocol(res)){
1025 case PROTO_FILE:
1026 case PROTO_MAILDIR:
1027 doexp = TRU1;
1028 break;
1029 default:
1030 doexp = FAL0;
1031 break;
1034 if(doexp){
1035 struct a_shexp_var_stack top;
1037 memset(&top, 0, sizeof top);
1038 top.svs_value = res;
1039 top.svs_bsesc = TRU1;
1040 res = a_shexp_var(&top);
1042 if(res[0] == '~')
1043 res = a_shexp_tilde(res);
1045 if(!(fexpm & FEXP_NSHELL) &&
1046 (res = a_shexp_globname(res, fexpm)) == NULL)
1047 goto jleave;
1048 dyn = TRU1;
1049 }/* else no tilde */
1050 }else if(res[0] == '~'){
1051 res = a_shexp_tilde(res);
1052 dyn = TRU1;
1055 jislocal:
1056 if (fexpm & FEXP_LOCAL)
1057 switch (which_protocol(res)) {
1058 case PROTO_FILE:
1059 case PROTO_MAILDIR:
1060 break;
1061 default:
1062 n_err(_("Not a local file or directory: %s\n"),
1063 n_shexp_quote_cp(name, FAL0));
1064 res = NULL;
1065 break;
1068 jleave:
1069 if(res != NULL && !dyn)
1070 res = savestr(res);
1071 NYD_LEAVE;
1072 return UNCONST(res);
1075 FL int
1076 n_shexp_expand_escape(char const **s, bool_t use_nail_extensions)/* TODO DROP!*/
1078 char const *xs;
1079 int c, n;
1080 NYD2_ENTER;
1082 xs = *s;
1084 if ((c = *xs & 0xFF) == '\0')
1085 goto jleave;
1086 ++xs;
1087 if (c != '\\')
1088 goto jleave;
1090 switch ((c = *xs & 0xFF)) {
1091 case 'a': c = '\a'; break;
1092 case 'b': c = '\b'; break;
1093 case 'c': c = PROMPT_STOP; break;
1094 case 'f': c = '\f'; break;
1095 case 'n': c = '\n'; break;
1096 case 'r': c = '\r'; break;
1097 case 't': c = '\t'; break;
1098 case 'v': c = '\v'; break;
1100 /* ESCape */
1101 case 'E':
1102 case 'e':
1103 c = '\033';
1104 break;
1106 /* Hexadecimal TODO uses ASCII */
1107 case 'X':
1108 case 'x': {
1109 static ui8_t const hexatoi[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
1110 #undef a_HEX
1111 #define a_HEX(n) \
1112 hexatoi[(ui8_t)((n) - ((n) <= '9' ? 48 : ((n) <= 'F' ? 55 : 87)))]
1114 c = 0;
1115 ++xs;
1116 if(hexchar(*xs))
1117 c = a_HEX(*xs);
1118 else{
1119 --xs;
1120 if(options & OPT_D_V)
1121 n_err(_("Invalid \\xNUMBER notation in: %s\n"), xs - 1);
1122 c = '\\';
1123 goto jleave;
1125 ++xs;
1126 if(hexchar(*xs)){
1127 c <<= 4;
1128 c += a_HEX(*xs);
1129 ++xs;
1131 goto jleave;
1133 #undef a_HEX
1135 /* octal, with optional 0 prefix */
1136 case '0':
1137 ++xs;
1138 if(0){
1139 default:
1140 if(*xs == '\0'){
1141 c = '\\';
1142 break;
1145 for (c = 0, n = 3; n-- > 0 && octalchar(*xs); ++xs) {
1146 c <<= 3;
1147 c |= *xs - '0';
1149 goto jleave;
1151 /* S-nail extension for nice (get)prompt(()) support */
1152 case '&':
1153 case '?':
1154 case '$':
1155 case '@':
1156 if (use_nail_extensions) {
1157 switch (c) {
1158 case '&': c = ok_blook(bsdcompat) ? '&' : '?'; break;
1159 case '?': c = (pstate & PS_EVAL_ERROR) ? '1' : '0'; break;
1160 case '$': c = PROMPT_DOLLAR; break;
1161 case '@': c = PROMPT_AT; break;
1163 break;
1166 /* FALLTHRU */
1167 case '\0':
1168 /* A sole <backslash> at EOS is treated as-is! */
1169 c = '\\';
1170 /* FALLTHRU */
1171 case '\\':
1172 break;
1175 ++xs;
1176 jleave:
1177 *s = xs;
1178 NYD2_LEAVE;
1179 return c;
1182 FL enum n_shexp_state
1183 n_shexp_parse_token(struct n_string *store, struct str *input, /* TODO WCHAR */
1184 enum n_shexp_parse_flags flags){
1185 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
1186 char utf[8];
1187 #endif
1188 char c2, c, quotec;
1189 enum{
1190 a_NONE = 0,
1191 a_SKIPQ = 1<<0, /* Skip rest of this quote (\c0 ..) */
1192 a_SURPLUS = 1<<1, /* Extended sequence interpretation */
1193 a_NTOKEN = 1<<2 /* "New token": e.g., comments are possible */
1194 } state;
1195 enum n_shexp_state rv;
1196 size_t i, il;
1197 char const *ib_save, *ib;
1198 NYD2_ENTER;
1199 UNINIT(c, '\0');
1201 assert((flags & n_SHEXP_PARSE_DRYRUN) || store != NULL);
1202 assert(input != NULL);
1203 assert(input->l == 0 || input->s != NULL);
1204 assert(!(flags & n_SHEXP_PARSE_LOG) || !(flags & n_SHEXP_PARSE_LOG_D_V));
1205 assert(!(flags & n_SHEXP_PARSE_IFS_ADD_COMMA) ||
1206 !(flags & n_SHEXP_PARSE_IFS_IS_COMMA));
1208 if((flags & n_SHEXP_PARSE_LOG_D_V) && (options & OPT_D_V))
1209 flags |= n_SHEXP_PARSE_LOG;
1211 if((flags & n_SHEXP_PARSE_TRUNC) && store != NULL)
1212 store = n_string_trunc(store, 0);
1214 ib = input->s;
1215 if((il = input->l) == UIZ_MAX)
1216 il = strlen(ib);
1218 jrestart_empty:
1219 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1220 for(; il > 0; ++ib, --il)
1221 if(!blankspacechar(*ib))
1222 break;
1224 input->s = UNCONST(ib);
1225 input->l = il;
1227 if(il == 0){
1228 rv = n_SHEXP_STATE_STOP;
1229 goto jleave;
1232 if(store != NULL)
1233 store = n_string_reserve(store, MIN(il, 32)); /* XXX */
1235 for(rv = n_SHEXP_STATE_NONE, state = a_NTOKEN, quotec = '\0'; il > 0;){
1236 --il, c = *ib++;
1238 /* If no quote-mode active.. */
1239 if(quotec == '\0'){
1240 if(c == '"' || c == '\''){
1241 quotec = c;
1242 if(c == '"')
1243 state |= a_SURPLUS;
1244 else
1245 state &= ~a_SURPLUS;
1246 state &= ~a_NTOKEN;
1247 continue;
1248 }else if(c == '$'){
1249 if(il > 0){
1250 state &= ~a_NTOKEN;
1251 if(*ib == '\''){
1252 --il, ++ib;
1253 quotec = '\'';
1254 state |= a_SURPLUS;
1255 continue;
1256 }else
1257 goto J_var_expand;
1259 }else if(c == '\\'){
1260 /* Outside of quotes this just escapes any next character, but a sole
1261 * <backslash> at EOS is left unchanged */
1262 if(il > 0)
1263 --il, c = *ib++;
1264 state &= ~a_NTOKEN;
1265 }else if(c == '#' && (state & a_NTOKEN)){
1266 rv |= n_SHEXP_STATE_STOP;
1267 goto jleave;
1268 }else if(c == ',' && (flags &
1269 (n_SHEXP_PARSE_IFS_ADD_COMMA | n_SHEXP_PARSE_IFS_IS_COMMA)))
1270 break;
1271 else if(blankchar(c)){
1272 if(!(flags & n_SHEXP_PARSE_IFS_IS_COMMA)){
1273 ++il, --ib;
1274 break;
1276 state |= a_NTOKEN;
1277 }else
1278 state &= ~a_NTOKEN;
1279 }else{
1280 /* Quote-mode */
1281 assert(!(state & a_NTOKEN));
1282 if(c == quotec){
1283 state = a_NONE;
1284 quotec = '\0';
1285 /* Users may need to recognize the presence of empty quotes */
1286 rv |= n_SHEXP_STATE_OUTPUT;
1287 continue;
1288 }else if(c == '\\' && (state & a_SURPLUS)){
1289 ib_save = ib - 1;
1290 /* A sole <backslash> at EOS is treated as-is! This is ok since
1291 * the "closing quote" error will occur next, anyway */
1292 if(il == 0)
1293 break;
1294 else if((c2 = *ib) == quotec){
1295 --il, ++ib;
1296 c = quotec;
1297 }else if(quotec == '"'){
1298 /* Double quotes:
1299 * The <backslash> shall retain its special meaning as an
1300 * escape character (see Section 2.2.1) only when followed
1301 * by one of the following characters when considered
1302 * special: $ ` " \ <newline> */
1303 switch(c2){
1304 case '$':
1305 case '`':
1306 /* case '"': already handled via c2 == quotec */
1307 case '\\':
1308 --il, ++ib;
1309 c = c2;
1310 /* FALLTHRU */
1311 default:
1312 break;
1314 }else{
1315 /* Dollar-single-quote */
1316 --il, ++ib;
1317 switch(c2){
1318 case '"':
1319 /* case '\'': already handled via c2 == quotec */
1320 case '\\':
1321 c = c2;
1322 break;
1324 case 'b': c = '\b'; break;
1325 case 'f': c = '\f'; break;
1326 case 'n': c = '\n'; break;
1327 case 'r': c = '\r'; break;
1328 case 't': c = '\t'; break;
1329 case 'v': c = '\v'; break;
1331 case 'E':
1332 case 'e': c = '\033'; break;
1334 /* Control character */
1335 case 'c':
1336 if(il == 0)
1337 goto j_dollar_ungetc;
1338 --il, c2 = *ib++;
1339 if(state & a_SKIPQ)
1340 continue;
1341 c = upperconv(c2) ^ 0x40;
1342 if((ui8_t)c > 0x1F && c != 0x7F){ /* ASCII C0: 0..1F, 7F */
1343 if(flags & n_SHEXP_PARSE_LOG)
1344 n_err(_("Invalid \\c notation: %.*s\n"),
1345 (int)input->l, input->s);
1346 rv |= n_SHEXP_STATE_ERR_CONTROL;
1348 /* As an implementation-defined extension, support \c@
1349 * EQ printf(1) alike \c */
1350 if(c == '\0'){
1351 rv |= n_SHEXP_STATE_STOP;
1352 goto jleave;
1354 break;
1356 /* Octal sequence: 1 to 3 octal bytes */
1357 case '0':
1358 /* As an extension (dependent on where you look, echo(1), or
1359 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1360 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1361 c2 = c;
1362 --il, ++ib;
1364 /* FALLTHRU */
1365 case '1': case '2': case '3':
1366 case '4': case '5': case '6': case '7':
1367 c2 -= '0';
1368 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1369 c2 = (c2 << 3) | (c - '0');
1370 --il, ++ib;
1372 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1373 if((ui8_t)c2 > 0x1F){
1374 if(flags & n_SHEXP_PARSE_LOG)
1375 n_err(_("\\0 argument exceeds a byte: %.*s\n"),
1376 (int)input->l, input->s);
1377 rv |= n_SHEXP_STATE_ERR_NUMBER;
1378 --il, ++ib;
1379 /* Write unchanged */
1380 je_ib_save:
1381 rv |= n_SHEXP_STATE_OUTPUT;
1382 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1383 store = n_string_push_buf(store, ib_save,
1384 PTR2SIZE(ib - ib_save));
1385 continue;
1387 c2 = (c2 << 3) | (c -= '0');
1388 --il, ++ib;
1390 if((c = c2) == '\0')
1391 state |= a_SKIPQ;
1392 if(state & a_SKIPQ)
1393 continue;
1394 break;
1396 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1397 case 'U':
1398 i = 8;
1399 if(0){
1400 /* FALLTHRU */
1401 case 'u':
1402 i = 4;
1404 if(il == 0)
1405 goto j_dollar_ungetc;
1406 if(0){
1407 /* FALLTHRU */
1409 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1410 case 'X':
1411 case 'x':
1412 if(il == 0)
1413 goto j_dollar_ungetc;
1414 i = 2;
1416 /* C99 */{
1417 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
1418 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1420 size_t no, j;
1422 i = MIN(il, i);
1423 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
1424 c = *ib;
1425 if(hexchar(c)){
1426 no <<= 4;
1427 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
1428 : ((c) <= 'F' ? 55 : 87)))];
1429 }else if(j == 0){
1430 if(state & a_SKIPQ)
1431 break;
1432 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
1433 if(flags & n_SHEXP_PARSE_LOG)
1434 n_err(_("Invalid \\%c notation: %.*s\n"),
1435 c2, (int)input->l, input->s);
1436 rv |= n_SHEXP_STATE_ERR_NUMBER;
1437 goto je_ib_save;
1438 }else
1439 break;
1442 /* Unicode massage */
1443 if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){
1444 if((c = (char)no) == '\0')
1445 state |= a_SKIPQ;
1446 }else if(no == 0)
1447 state |= a_SKIPQ;
1448 else if(!(state & a_SKIPQ)){
1449 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1450 store = n_string_reserve(store, MAX(j, 4));
1452 c2 = FAL0;
1453 if(no > 0x10FFFF){ /* XXX magic; CText */
1454 if(flags & n_SHEXP_PARSE_LOG)
1455 n_err(_("\\U argument exceeds 0x10FFFF: %.*s\n"),
1456 (int)input->l, input->s);
1457 rv |= n_SHEXP_STATE_ERR_NUMBER;
1458 /* But normalize the output anyway */
1459 goto Je_uni_norm;
1462 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
1463 j = n_utf32_to_utf8(no, utf);
1464 #endif
1465 #ifdef HAVE_NATCH_CHAR
1466 if(options & OPT_UNICODE){
1467 rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE;
1468 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1469 store = n_string_push_buf(store, utf, j);
1470 continue;
1472 #endif
1473 #ifdef HAVE_ICONV
1474 /* C99 */{
1475 char *icp;
1477 icp = n_iconv_onetime_cp(n_ICONV_NONE,
1478 NULL, NULL, utf);
1479 if(icp != NULL){
1480 rv |= n_SHEXP_STATE_OUTPUT;
1481 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1482 store = n_string_push_cp(store, icp);
1483 continue;
1486 #endif
1487 if(!(flags & n_SHEXP_PARSE_DRYRUN)) Je_uni_norm:{
1488 char itoa[32];
1490 rv |= n_SHEXP_STATE_OUTPUT |
1491 n_SHEXP_STATE_ERR_UNICODE;
1492 i = snprintf(itoa, sizeof itoa, "\\%c%0*X",
1493 (no > 0xFFFFu ? 'U' : 'u'),
1494 (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no);
1495 store = n_string_push_buf(store, itoa, i);
1497 continue;
1499 if(state & a_SKIPQ)
1500 continue;
1502 break;
1504 /* Extension: \$ can be used to expand a variable.
1505 * Bug|ad effect: if conversion fails, not written "as-is" */
1506 case '$':
1507 if(il == 0)
1508 goto j_dollar_ungetc;
1509 goto J_var_expand;
1511 default:
1512 j_dollar_ungetc:
1513 /* Follow bash behaviour, print sequence unchanged */
1514 ++il, --ib;
1515 break;
1518 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
1519 bool_t brace;
1521 if(!(brace = (*ib == '{')) || il > 1){
1522 char const *cp, *vp;
1524 ib_save = ib - 1;
1525 il -= brace;
1526 vp = (ib += brace);
1528 for(i = 0; il > 0 && (c = *ib, a_SHEXP_ISVARC(c)); ++i)
1529 --il, ++ib;
1531 if(brace){
1532 if(il == 0 || *ib != '}'){
1533 if(state & a_SKIPQ){
1534 assert((state & a_SURPLUS) && quotec == '\'');
1535 continue;
1537 if(flags & n_SHEXP_PARSE_LOG)
1538 n_err(_("Closing brace missing for ${VAR}: %.*s\n"),
1539 (int)input->l, input->s);
1540 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN |
1541 n_SHEXP_STATE_ERR_BRACE;
1542 goto je_ib_save;
1544 --il, ++ib;
1547 if(state & a_SKIPQ)
1548 continue;
1550 if(i == 0){
1551 if(brace){
1552 if(flags & n_SHEXP_PARSE_LOG)
1553 n_err(_("Bad substitution (${}): %.*s\n"),
1554 (int)input->l, input->s);
1555 rv |= n_SHEXP_STATE_ERR_BADSUB;
1556 goto je_ib_save;
1558 c = '$';
1559 }else if(flags & n_SHEXP_PARSE_DRYRUN)
1560 continue;
1561 else{
1562 vp = savestrbuf(vp, i);
1563 /* Check getenv(3) shall no internal variable exist! */
1564 if((cp = vok_vlook(vp)) != NULL || (cp = getenv(vp)) != NULL){
1565 rv |= n_SHEXP_STATE_OUTPUT;
1566 store = n_string_push_cp(store, cp);
1567 for(; (c = *cp) != '\0'; ++cp)
1568 if(cntrlchar(c)){
1569 rv |= n_SHEXP_STATE_CONTROL;
1570 break;
1573 continue;
1576 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
1577 continue;
1581 if(!(state & a_SKIPQ)){
1582 rv |= n_SHEXP_STATE_OUTPUT;
1583 if(cntrlchar(c))
1584 rv |= n_SHEXP_STATE_CONTROL;
1585 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1586 store = n_string_push_c(store, c);
1590 if(quotec != '\0'){
1591 if(flags & n_SHEXP_PARSE_LOG)
1592 n_err(_("no closing quote: %.*s\n"), (int)input->l, input->s);
1593 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
1596 jleave:
1597 if((flags & n_SHEXP_PARSE_DRYRUN) && store != NULL){
1598 store = n_string_push_buf(store, input->s, PTR2SIZE(ib - input->s));
1599 rv |= n_SHEXP_STATE_OUTPUT;
1602 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1603 for(; il > 0; ++ib, --il)
1604 if(!blankchar(*ib))
1605 break;
1607 input->l = il;
1608 input->s = UNCONST(ib);
1610 if(!(rv & n_SHEXP_STATE_STOP)){
1611 if(il > 0 && !(rv & n_SHEXP_STATE_OUTPUT) &&
1612 (flags & n_SHEXP_PARSE_IGNORE_EMPTY))
1613 goto jrestart_empty;
1614 if(!(rv & n_SHEXP_STATE_OUTPUT) && il == 0)
1615 rv |= n_SHEXP_STATE_STOP;
1617 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_UNICODE));
1618 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_CONTROL));
1619 NYD2_LEAVE;
1620 return rv;
1623 FL enum n_shexp_state
1624 n_shexp_parse_token_buf(char **store, char const *indat, size_t inlen,
1625 enum n_shexp_parse_flags flags){
1626 struct n_string ss;
1627 struct str is;
1628 enum n_shexp_state shs;
1629 NYD2_ENTER;
1631 assert(store != NULL);
1632 assert(inlen == 0 || indat != NULL);
1634 n_string_creat_auto(&ss);
1635 is.s = UNCONST(indat);
1636 is.l = inlen;
1638 shs = n_shexp_parse_token(&ss, &is, flags);
1639 if(is.l > 0)
1640 shs &= ~n_SHEXP_STATE_STOP;
1641 else
1642 shs |= n_SHEXP_STATE_STOP;
1643 *store = n_string_cp(&ss);
1644 n_string_drop_ownership(&ss);
1646 n_string_gut(&ss);
1647 NYD2_LEAVE;
1648 return shs;
1651 FL struct n_string *
1652 n_shexp_quote(struct n_string *store, struct str const *input, bool_t rndtrip){
1653 struct a_shexp_quote_lvl sql;
1654 struct a_shexp_quote_ctx sqc;
1655 NYD2_ENTER;
1657 assert(store != NULL);
1658 assert(input != NULL);
1659 assert(input->l == 0 || input->s != NULL);
1661 memset(&sqc, 0, sizeof sqc);
1662 sqc.sqc_store = store;
1663 sqc.sqc_input.s = input->s;
1664 if((sqc.sqc_input.l = input->l) == UIZ_MAX)
1665 sqc.sqc_input.l = strlen(input->s);
1666 sqc.sqc_flags = rndtrip ? a_SHEXP_QUOTE_ROUNDTRIP : a_SHEXP_QUOTE_NONE;
1668 if(sqc.sqc_input.l == 0)
1669 store = n_string_push_buf(store, "''", sizeof("''") -1);
1670 else{
1671 memset(&sql, 0, sizeof sql);
1672 sql.sql_dat = sqc.sqc_input;
1673 sql.sql_flags = sqc.sqc_flags;
1674 a_shexp__quote(&sqc, &sql);
1676 NYD2_LEAVE;
1677 return store;
1680 FL char *
1681 n_shexp_quote_cp(char const *cp, bool_t rndtrip){
1682 struct n_string store;
1683 struct str input;
1684 char *rv;
1685 NYD2_ENTER;
1687 assert(cp != NULL);
1689 input.s = UNCONST(cp);
1690 input.l = UIZ_MAX;
1691 rv = n_string_cp(n_shexp_quote(n_string_creat_auto(&store), &input,
1692 rndtrip));
1693 n_string_gut(n_string_drop_ownership(&store));
1694 NYD2_LEAVE;
1695 return rv;
1698 /* s-it-mode */