Add OPT_ALWAYS_UNICODE_LOCALE (Predrag Punosevac)..
[s-mailx.git] / shexp.c
blob536bd7cce5309ee6c50f1e767a0e13c4dca0c1ee
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
5 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
6 * Copyright (c) 2012 - 2016 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
7 */
8 /*
9 * Copyright (c) 1980, 1993
10 * The Regents of the University of California. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 #undef n_FILE
37 #define n_FILE shexp
39 #ifndef HAVE_AMALGAMATION
40 # include "nail.h"
41 #endif
43 #include <sys/wait.h>
45 #include <pwd.h>
47 #ifdef HAVE_FNMATCH
48 # include <dirent.h>
49 # include <fnmatch.h>
50 #endif
52 /* POSIX says
53 * Environment variable names used by the utilities in the Shell and
54 * Utilities volume of POSIX.1-2008 consist solely of uppercase
55 * letters, digits, and the <underscore> ('_') from the characters
56 * defined in Portable Character Set and do not begin with a digit.
57 * Other characters may be permitted by an implementation;
58 * applications shall tolerate the presence of such names.
59 * We do support the hyphen "-" because it is common for mailx. */
60 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
62 enum a_shexp_quote_flags{
63 a_SHEXP_QUOTE_NONE,
64 a_SHEXP_QUOTE_ROUNDTRIP = 1u<<0, /* Result won't be consumed immediately */
66 a_SHEXP_QUOTE_T_REVSOL = 1u<<8, /* Type: by reverse solidus */
67 a_SHEXP_QUOTE_T_SINGLE = 1u<<9, /* Type: single-quotes */
68 a_SHEXP_QUOTE_T_DOUBLE = 1u<<10, /* Type: double-quotes */
69 a_SHEXP_QUOTE_T_DOLLAR = 1u<<11, /* Type: dollar-single-quotes */
70 a_SHEXP_QUOTE_T_MASK = a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
71 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR,
73 a_SHEXP_QUOTE__FREESHIFT = 16u
76 struct a_shexp_var_stack {
77 struct a_shexp_var_stack *svs_next; /* Outer stack frame */
78 char const *svs_value; /* Remaining value to expand */
79 size_t svs_len; /* gth of .svs_dat this level */
80 char const *svs_dat; /* Result data of this level */
81 bool_t svs_bsesc; /* Shall backslash escaping be performed */
82 ui8_t svs__dummy[7];
85 #ifdef HAVE_FNMATCH
86 struct a_shexp_glob_ctx{
87 char const *sgc_patdat; /* Remaining pattern (at and below level) */
88 size_t sgc_patlen;
89 struct n_string *sgc_outer; /* Resolved path up to this level */
90 ui32_t sgc_flags;
91 ui8_t sgc__dummy[4];
93 #endif
95 struct a_shexp_quote_ctx{
96 struct n_string *sqc_store; /* Result storage */
97 struct str sqc_input; /* Input data, topmost level */
98 ui32_t sqc_cnt_revso;
99 ui32_t sqc_cnt_single;
100 ui32_t sqc_cnt_double;
101 ui32_t sqc_cnt_dollar;
102 enum a_shexp_quote_flags sqc_flags;
103 ui8_t sqc__dummy[4];
106 struct a_shexp_quote_lvl{
107 struct a_shexp_quote_lvl *sql_link; /* Outer level */
108 struct str sql_dat; /* This level (has to) handle(d) */
109 enum a_shexp_quote_flags sql_flags;
110 ui8_t sql__dummy[4];
113 /* Locate the user's mailbox file (where new, unread mail is queued) */
114 static char *a_shexp_findmail(char const *user, bool_t force);
116 /* Expand ^~/? and ^~USER/? constructs.
117 * Returns the completely resolved (maybe empty or identical to input)
118 * salloc()ed string */
119 static char *a_shexp_tilde(char const *s);
121 /* (Try to) Expand any shell variable in s.
122 * Returns the completely resolved (maybe empty) salloc()ed string.
123 * Logs on error */
124 static char *a_shexp_var(struct a_shexp_var_stack *svsp);
126 /* Perform fnmatch(3). May return NULL on error */
127 static char *a_shexp_globname(char const *name, enum fexp_mode fexpm);
128 #ifdef HAVE_FNMATCH
129 static bool_t a_shexp__glob(struct a_shexp_glob_ctx *sgcp,
130 struct n_strlist **slpp);
131 static int a_shexp__globsort(void const *cvpa, void const *cvpb);
132 #endif
134 /* Parse an input string and create a sh(1)ell-quoted result */
135 static void a_shexp__quote(struct a_shexp_quote_ctx *sqcp,
136 struct a_shexp_quote_lvl *sqlp);
138 static char *
139 a_shexp_findmail(char const *user, bool_t force){
140 char *rv;
141 char const *cp;
142 NYD2_ENTER;
144 if(!force){
145 if((cp = ok_vlook(inbox)) != NULL && *cp != '\0'){
146 /* Folder extra introduced to avoid % recursion loops */
147 if((rv = fexpand(cp, FEXP_NSPECIAL | FEXP_NFOLDER | FEXP_NSHELL)
148 ) != NULL)
149 goto jleave;
150 n_err(_("*inbox* expansion failed, using $MAIL / builtin: %s\n"), cp);
153 if((cp = ok_vlook(MAIL)) != NULL){
154 rv = savestr(cp);
155 goto jleave;
159 /* C99 */{
160 size_t ul, i;
162 ul = strlen(user) +1;
163 i = sizeof(VAL_MAIL) -1 + 1 + ul;
165 rv = salloc(i);
166 memcpy(rv, VAL_MAIL, i = sizeof(VAL_MAIL));
167 rv[i] = '/';
168 memcpy(&rv[++i], user, ul);
170 jleave:
171 NYD2_LEAVE;
172 return rv;
175 static char *
176 a_shexp_tilde(char const *s){
177 struct passwd *pwp;
178 size_t nl, rl;
179 char const *rp, *np;
180 char *rv;
181 NYD2_ENTER;
183 if(*(rp = &s[1]) == '/' || *rp == '\0'){
184 np = ok_vlook(HOME);
185 rl = strlen(rp);
186 }else{
187 if((rp = strchr(np = rp, '/')) != NULL){
188 nl = PTR2SIZE(rp - np);
189 np = savestrbuf(np, nl);
190 rl = strlen(rp);
191 }else
192 rl = 0;
194 if((pwp = getpwnam(np)) == NULL){
195 rv = savestr(s);
196 goto jleave;
198 np = pwp->pw_dir;
201 nl = strlen(np);
202 rv = salloc(nl + 1 + rl +1);
203 memcpy(rv, np, nl);
204 if(rl > 0){
205 memcpy(rv + nl, rp, rl);
206 nl += rl;
208 rv[nl] = '\0';
209 jleave:
210 NYD2_LEAVE;
211 return rv;
214 static char *
215 a_shexp_var(struct a_shexp_var_stack *svsp)
217 struct a_shexp_var_stack next, *np, *tmp;
218 char const *vp;
219 char lc, c, *cp, *rv;
220 size_t i;
221 NYD2_ENTER;
223 if (*(vp = svsp->svs_value) != '$') {
224 bool_t bsesc = svsp->svs_bsesc;
225 union {bool_t hadbs; char c;} u = {FAL0};
227 svsp->svs_dat = vp;
228 for (lc = '\0', i = 0; ((c = *vp) != '\0'); ++i, ++vp) {
229 if (c == '$' && lc != '\\')
230 break;
231 if (!bsesc)
232 continue;
233 lc = (lc == '\\') ? (u.hadbs = TRU1, '\0') : c;
235 svsp->svs_len = i;
237 if (u.hadbs) {
238 svsp->svs_dat = cp = savestrbuf(svsp->svs_dat, i);
240 for (lc = '\0', rv = cp; (u.c = *cp++) != '\0';) {
241 if (u.c != '\\' || lc == '\\')
242 *rv++ = u.c;
243 lc = (lc == '\\') ? '\0' : u.c;
245 *rv = '\0';
247 svsp->svs_len = PTR2SIZE(rv - svsp->svs_dat);
249 } else {
250 if ((lc = (*++vp == '{')))
251 ++vp;
253 svsp->svs_dat = vp;
254 for (i = 0; (c = *vp) != '\0'; ++i, ++vp)
255 if (!a_SHEXP_ISVARC(c))
256 break;
258 if (lc) {
259 if (c != '}') {
260 n_err(_("Variable name misses closing }: %s\n"),
261 svsp->svs_value);
262 svsp->svs_len = strlen(svsp->svs_value);
263 svsp->svs_dat = svsp->svs_value;
264 goto junroll;
266 c = *++vp;
269 svsp->svs_len = i;
270 /* Check getenv(3) shall no internal variable exist! */
271 if ((rv = vok_vlook(cp = savestrbuf(svsp->svs_dat, i))) != NULL ||
272 (rv = getenv(cp)) != NULL)
273 svsp->svs_len = strlen(svsp->svs_dat = rv);
274 else
275 svsp->svs_len = 0, svsp->svs_dat = UNCONST("");
277 if (c != '\0')
278 goto jrecurse;
280 /* That level made the great and completed encoding. Build result */
281 junroll:
282 for (i = 0, np = svsp, svsp = NULL; np != NULL;) {
283 i += np->svs_len;
284 tmp = np->svs_next;
285 np->svs_next = svsp;
286 svsp = np;
287 np = tmp;
290 cp = rv = salloc(i +1);
291 while (svsp != NULL) {
292 np = svsp;
293 svsp = svsp->svs_next;
294 memcpy(cp, np->svs_dat, np->svs_len);
295 cp += np->svs_len;
297 *cp = '\0';
299 jleave:
300 NYD2_LEAVE;
301 return rv;
302 jrecurse:
303 memset(&next, 0, sizeof next);
304 next.svs_next = svsp;
305 next.svs_value = vp;
306 next.svs_bsesc = svsp->svs_bsesc;
307 rv = a_shexp_var(&next);
308 goto jleave;
311 static char *
312 a_shexp_globname(char const *name, enum fexp_mode fexpm){
313 #ifdef HAVE_FNMATCH
314 struct a_shexp_glob_ctx sgc;
315 struct n_string outer;
316 struct n_strlist *slp;
317 char *cp;
318 NYD_ENTER;
320 memset(&sgc, 0, sizeof sgc);
321 sgc.sgc_patlen = strlen(name);
322 sgc.sgc_patdat = savestrbuf(name, sgc.sgc_patlen);
323 sgc.sgc_outer = n_string_reserve(n_string_creat(&outer), sgc.sgc_patlen);
324 sgc.sgc_flags = ((fexpm & FEXP_SILENT) != 0);
325 slp = NULL;
326 if(a_shexp__glob(&sgc, &slp))
327 cp = (char*)1;
328 else
329 cp = NULL;
330 n_string_gut(&outer);
332 if(cp == NULL)
333 goto jleave;
335 if(slp == NULL){
336 cp = UNCONST(N_("File pattern does not match"));
337 goto jerr;
338 }else if(slp->sl_next == NULL)
339 cp = savestrbuf(slp->sl_dat, slp->sl_len);
340 else if(fexpm & FEXP_MULTIOK){
341 struct n_strlist **sorta, *xslp;
342 size_t i, no, l;
344 no = l = 0;
345 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next){
346 ++no;
347 l += xslp->sl_len + 1;
350 sorta = smalloc(sizeof(*sorta) * no);
351 no = 0;
352 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next)
353 sorta[no++] = xslp;
354 qsort(sorta, no, sizeof *sorta, &a_shexp__globsort);
356 cp = salloc(++l);
357 l = 0;
358 for(i = 0; i < no; ++i){
359 xslp = sorta[i];
360 memcpy(&cp[l], xslp->sl_dat, xslp->sl_len);
361 l += xslp->sl_len;
362 cp[l++] = '\0';
364 cp[l] = '\0';
366 free(sorta);
367 pstate |= PS_EXPAND_MULTIRESULT;
368 }else{
369 cp = UNCONST(N_("File pattern matches multiple results"));
370 goto jerr;
373 jleave:
374 while(slp != NULL){
375 struct n_strlist *tmp = slp;
377 slp = slp->sl_next;
378 free(tmp);
380 NYD_LEAVE;
381 return cp;
383 jerr:
384 if(!(fexpm & FEXP_SILENT)){
385 name = n_shexp_quote_cp(name, FAL0);
386 n_err("%s: %s\n", V_(cp), name);
388 cp = NULL;
389 goto jleave;
391 #else /* HAVE_FNMATCH */
392 UNUSED(fexpm);
394 if(!(fexpm & FEXP_SILENT))
395 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
396 return savestr(name);
397 #endif
400 #ifdef HAVE_FNMATCH
401 static bool_t
402 a_shexp__glob(struct a_shexp_glob_ctx *sgcp, struct n_strlist **slpp){
403 enum{a_SILENT = 1<<0, a_DEEP=1<<1, a_SALLOC=1<<2};
405 struct a_shexp_glob_ctx nsgc;
406 struct dirent *dep;
407 DIR *dp;
408 size_t old_outerlen;
409 char const *ccp, *myp;
410 NYD2_ENTER;
412 /* We need some special treatment for the outermost level */
413 if(!(sgcp->sgc_flags & a_DEEP)){
414 if(sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/'){
415 myp = n_string_cp(n_string_push_c(sgcp->sgc_outer, '/'));
416 ++sgcp->sgc_patdat;
417 --sgcp->sgc_patlen;
418 }else
419 myp = "./";
420 }else
421 myp = n_string_cp(sgcp->sgc_outer);
422 old_outerlen = sgcp->sgc_outer->s_len;
424 /* Separate current directory/pattern level from any possible remaining
425 * pattern in order to be able to use it for fnmatch(3) */
426 if((ccp = memchr(sgcp->sgc_patdat, '/', sgcp->sgc_patlen)) == NULL)
427 nsgc.sgc_patlen = 0;
428 else{
429 nsgc = *sgcp;
430 nsgc.sgc_flags |= a_DEEP;
431 sgcp->sgc_patlen = PTR2SIZE((nsgc.sgc_patdat = &ccp[1]) -
432 &sgcp->sgc_patdat[0]);
433 nsgc.sgc_patlen -= sgcp->sgc_patlen;
434 /* Trim solidus */
435 if(sgcp->sgc_patlen > 0){
436 assert(sgcp->sgc_patdat[sgcp->sgc_patlen -1] == '/');
437 ((char*)UNCONST(sgcp->sgc_patdat))[--sgcp->sgc_patlen] = '\0';
441 /* Our current directory level */
442 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
443 * xxx be the (sole) result depending on pattern surroundings, etc. */
444 if((dp = opendir(myp)) == NULL){
445 int err;
447 switch((err = errno)){
448 case ENOTDIR:
449 ccp = N_("cannot access paths under non-directory");
450 goto jerr;
451 case ENOENT:
452 ccp = N_("path component of (sub)pattern non-existent");
453 goto jerr;
454 case EACCES:
455 ccp = N_("file permission for file (sub)pattern denied");
456 goto jerr;
457 default:
458 ccp = N_("cannot handle file (sub)pattern");
459 goto jerr;
463 /* As necessary, quote bytes in the current pattern */
464 /* C99 */{
465 char *ncp;
466 size_t i;
467 bool_t need;
469 for(need = FAL0, i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
470 switch(*myp){
471 case '\'': case '"': case '\\': case '$':
472 case ' ': case '\t':
473 need = TRU1;
474 ++i;
475 /* FALLTHRU */
476 default:
477 ++i;
478 break;
481 if(need){
482 ncp = salloc(i +1);
483 for(i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
484 switch(*myp){
485 case '\'': case '"': case '\\': case '$':
486 case ' ': case '\t':
487 ncp[i++] = '\\';
488 /* FALLTHRU */
489 default:
490 ncp[i++] = *myp;
491 break;
493 ncp[i] = '\0';
494 myp = ncp;
495 }else
496 myp = sgcp->sgc_patdat;
499 while((dep = readdir(dp)) != NULL){
500 switch(fnmatch(myp, dep->d_name, FNM_PATHNAME | FNM_PERIOD)){
501 case 0:{
502 /* A match expresses the desire to recurse if there is more pattern */
503 if(nsgc.sgc_patlen > 0){
504 bool_t isdir;
506 n_string_push_cp((sgcp->sgc_outer->s_len > 1
507 ? n_string_push_c(sgcp->sgc_outer, '/') : sgcp->sgc_outer),
508 dep->d_name);
510 isdir = FAL0;
511 #ifdef HAVE_DIRENT_TYPE
512 if(dep->d_type == DT_DIR)
513 isdir = TRU1;
514 else if(dep->d_type == DT_LNK || dep->d_type == DT_UNKNOWN)
515 #endif
517 struct stat sb;
519 if(stat(n_string_cp(sgcp->sgc_outer), &sb)){
520 ccp = N_("I/O error when querying file status");
521 goto jerr;
522 }else if(S_ISDIR(sb.st_mode))
523 isdir = TRU1;
526 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
527 * TODO Instead save away a list of such n_string's for later */
528 if(isdir && !a_shexp__glob(&nsgc, slpp)){
529 ccp = (char*)1;
530 goto jleave;
533 n_string_trunc(sgcp->sgc_outer, old_outerlen);
534 }else{
535 struct n_strlist *slp;
536 size_t i, j;
538 i = strlen(dep->d_name);
539 j = (old_outerlen > 0) ? old_outerlen + 1 + i : i;
540 slp = n_STRLIST_MALLOC(j);
541 *slpp = slp;
542 slpp = &slp->sl_next;
543 slp->sl_next = NULL;
544 if((j = old_outerlen) > 0){
545 memcpy(&slp->sl_dat[0], sgcp->sgc_outer->s_dat, j);
546 if(slp->sl_dat[j -1] != '/')
547 slp->sl_dat[j++] = '/';
549 memcpy(&slp->sl_dat[j], dep->d_name, i);
550 slp->sl_dat[j += i] = '\0';
551 slp->sl_len = j;
553 } break;
554 case FNM_NOMATCH:
555 break;
556 default:
557 ccp = N_("fnmatch(3) cannot handle file (sub)pattern");
558 goto jerr;
562 ccp = NULL;
563 jleave:
564 if(dp != NULL)
565 closedir(dp);
566 NYD2_LEAVE;
567 return (ccp == NULL);
569 jerr:
570 if(!(sgcp->sgc_flags & a_SILENT)){
571 char const *s2, *s3;
573 if(sgcp->sgc_outer->s_len > 0){
574 s2 = n_shexp_quote_cp(n_string_cp(sgcp->sgc_outer), FAL0);
575 s3 = "/";
576 }else
577 s2 = s3 = "";
579 n_err("%s: %s%s%s\n", V_(ccp), s2, s3,
580 n_shexp_quote_cp(sgcp->sgc_patdat, FAL0));
582 goto jleave;
585 static int
586 a_shexp__globsort(void const *cvpa, void const *cvpb){
587 int rv;
588 struct n_strlist const * const *slpa, * const *slpb;
589 NYD2_ENTER;
591 slpa = cvpa;
592 slpb = cvpb;
593 rv = asccasecmp((*slpa)->sl_dat, (*slpb)->sl_dat);
594 NYD2_LEAVE;
595 return rv;
597 #endif /* HAVE_FNMATCH */
599 static void
600 a_shexp__quote(struct a_shexp_quote_ctx *sqcp, struct a_shexp_quote_lvl *sqlp){
601 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
602 * XXX use the recursive implementation because of stateful encodings.
603 * XXX I.e., if a quoted substring cannot be self-contained - the data after
604 * XXX the quote relies on "the former state", then this doesn't make sense.
605 * XXX Therefore this is not fully programmed out but instead only detects
606 * XXX the "most fancy" quoting necessary, and directly does that.
607 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
608 * XXX Otherwise we rather have to convert to wide first and act on that,
609 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
610 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
611 #ifdef a_SHEXP_QUOTE_RECURSE
612 # define jrecurse jrecurse
613 struct a_shexp_quote_lvl sql;
614 #else
615 # define jrecurse jstep
616 #endif
617 struct n_visual_info_ctx vic;
618 union {struct a_shexp_quote_lvl *head; struct n_string *store;} u;
619 ui32_t flags;
620 size_t il;
621 char const *ib;
622 NYD2_ENTER;
624 ib = sqlp->sql_dat.s;
625 il = sqlp->sql_dat.l;
626 flags = sqlp->sql_flags;
628 /* Iterate over the entire input, classify characters and type of quotes
629 * along the way. Whenever a quote change has to be applied, adjust flags
630 * for the new situation -, setup sql.* and recurse- */
631 while(il > 0){
632 char c;
634 c = *ib;
635 if(cntrlchar(c)){
636 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
637 goto jstep;
638 if(c == '\t' && (flags & (a_SHEXP_QUOTE_T_REVSOL |
639 a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOUBLE)))
640 goto jstep;
641 #ifdef a_SHEXP_QUOTE_RECURSE
642 ++sqcp->sqc_cnt_dollar;
643 #endif
644 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
645 goto jrecurse;
646 }else if(blankspacechar(c) || c == '"' || c == '$'){
647 if(flags & a_SHEXP_QUOTE_T_MASK)
648 goto jstep;
649 #ifdef a_SHEXP_QUOTE_RECURSE
650 ++sqcp->sqc_cnt_single;
651 #endif
652 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
653 goto jrecurse;
654 }else if(c == '\''){
655 if(flags & (a_SHEXP_QUOTE_T_MASK & ~a_SHEXP_QUOTE_T_SINGLE))
656 goto jstep;
657 #ifdef a_SHEXP_QUOTE_RECURSE
658 ++sqcp->sqc_cnt_dollar;
659 #endif
660 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
661 goto jrecurse;
662 }else if(c == '\\'){
663 if(flags & a_SHEXP_QUOTE_T_MASK)
664 goto jstep;
665 #ifdef a_SHEXP_QUOTE_RECURSE
666 ++sqcp->sqc_cnt_single;
667 #endif
668 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
669 goto jrecurse;
670 }else if(!asciichar(c)){
671 /* Need to keep together multibytes */
672 #ifdef a_SHEXP_QUOTE_RECURSE
673 memset(&vic, 0, sizeof vic);
674 vic.vic_indat = ib;
675 vic.vic_inlen = il;
676 n_visual_info(&vic,
677 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
678 #endif
679 /* xxx check whether resulting \u would be ASCII */
680 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP) ||
681 (flags & a_SHEXP_QUOTE_T_DOLLAR)){
682 #ifdef a_SHEXP_QUOTE_RECURSE
683 ib = vic.vic_oudat;
684 il = vic.vic_oulen;
685 continue;
686 #else
687 goto jstep;
688 #endif
690 #ifdef a_SHEXP_QUOTE_RECURSE
691 ++sqcp->sqc_cnt_dollar;
692 #endif
693 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
694 goto jrecurse;
695 }else
696 jstep:
697 ++ib, --il;
699 sqlp->sql_flags = flags;
701 /* Level made the great and completed processing input. Reverse the list of
702 * levels, detect the "most fancy" quote type needed along this way */
703 /* XXX Due to restriction as above very crude */
704 for(flags = 0, il = 0, u.head = NULL; sqlp != NULL;){
705 struct a_shexp_quote_lvl *tmp;
707 tmp = sqlp->sql_link;
708 sqlp->sql_link = u.head;
709 u.head = sqlp;
710 il += sqlp->sql_dat.l;
711 if(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK)
712 il += (sqlp->sql_dat.l >> 1);
713 flags |= sqlp->sql_flags;
714 sqlp = tmp;
716 sqlp = u.head;
718 /* Finally work the substrings in the correct order, adjusting quotes along
719 * the way as necessary. Start off with the "most fancy" quote, so that
720 * the user sees an overall boundary she can orientate herself on.
721 * We do it like that to be able to give the user some "encapsulation
722 * experience", to address what strikes me is a problem of sh(1)ell quoting:
723 * different to, e.g., perl(1), where you see at a glance where a string
724 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
725 * visual appearance of "a string" as such */
726 u.store = n_string_reserve(sqcp->sqc_store, il);
728 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
729 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
730 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
731 }else if(flags & a_SHEXP_QUOTE_T_DOUBLE){
732 u.store = n_string_push_c(u.store, '"');
733 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOUBLE;
734 }else if(flags & a_SHEXP_QUOTE_T_SINGLE){
735 u.store = n_string_push_c(u.store, '\'');
736 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
737 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
738 flags &= ~a_SHEXP_QUOTE_T_MASK;
740 /* Work all the levels */
741 for(; sqlp != NULL; sqlp = sqlp->sql_link){
742 /* As necessary update our mode of quoting */
743 #ifdef a_SHEXP_QUOTE_RECURSE
744 il = 0;
746 switch(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK){
747 case a_SHEXP_QUOTE_T_DOLLAR:
748 if(!(flags & a_SHEXP_QUOTE_T_DOLLAR))
749 il = a_SHEXP_QUOTE_T_DOLLAR;
750 break;
751 case a_SHEXP_QUOTE_T_DOUBLE:
752 if(!(flags & (a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
753 il = a_SHEXP_QUOTE_T_DOLLAR;
754 break;
755 case a_SHEXP_QUOTE_T_SINGLE:
756 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
757 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
758 il = a_SHEXP_QUOTE_T_SINGLE;
759 break;
760 default:
761 case a_SHEXP_QUOTE_T_REVSOL:
762 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
763 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
764 il = a_SHEXP_QUOTE_T_REVSOL;
765 break;
768 if(il != 0){
769 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
770 u.store = n_string_push_c(u.store, '\'');
771 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
772 u.store = n_string_push_c(u.store, '"');
773 flags &= ~a_SHEXP_QUOTE_T_MASK;
775 flags |= (ui32_t)il;
776 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
777 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
778 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
779 u.store = n_string_push_c(u.store, '"');
780 else if(flags & a_SHEXP_QUOTE_T_SINGLE)
781 u.store = n_string_push_c(u.store, '\'');
783 #endif /* a_SHEXP_QUOTE_RECURSE */
785 /* Work the level's substring */
786 ib = sqlp->sql_dat.s;
787 il = sqlp->sql_dat.l;
789 while(il > 0){
790 char c2, c;
792 c = *ib;
794 if(cntrlchar(c)){
795 assert(c == '\t' || (flags & a_SHEXP_QUOTE_T_DOLLAR));
796 assert((flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
797 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)));
798 switch((c2 = c)){
799 case 0x07: c = 'a'; break;
800 case 0x08: c = 'b'; break;
801 case 0x0A: c = 'n'; break;
802 case 0x0B: c = 'v'; break;
803 case 0x0C: c = 'f'; break;
804 case 0x0D: c = 'r'; break;
805 case 0x1B: c = 'E'; break;
806 default: break;
807 case 0x09:
808 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
809 c = 't';
810 break;
812 if(flags & a_SHEXP_QUOTE_T_REVSOL)
813 u.store = n_string_push_c(u.store, '\\');
814 goto jpush;
816 u.store = n_string_push_c(u.store, '\\');
817 if(c == c2){
818 u.store = n_string_push_c(u.store, 'c');
819 c ^= 0x40;
821 goto jpush;
822 }else if(blankspacechar(c) || c == '"' || c == '$'){
823 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
824 goto jpush;
825 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE));
826 u.store = n_string_push_c(u.store, '\\');
827 goto jpush;
828 }else if(c == '\''){
829 if(flags & a_SHEXP_QUOTE_T_DOUBLE)
830 goto jpush;
831 assert(!(flags & a_SHEXP_QUOTE_T_SINGLE));
832 u.store = n_string_push_c(u.store, '\\');
833 goto jpush;
834 }else if(c == '\\'){
835 if(flags & a_SHEXP_QUOTE_T_SINGLE)
836 goto jpush;
837 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE |
838 a_SHEXP_QUOTE_T_DOLLAR));
839 u.store = n_string_push_c(u.store, '\\');
840 goto jpush;
841 }else if(asciichar(c)){
842 /* Shorthand: we can simply push that thing out */
843 jpush:
844 u.store = n_string_push_c(u.store, c);
845 ++ib, --il;
846 }else{
847 /* Not an ASCII character, take care not to split up multibyte
848 * sequences etc. For the sake of compile testing, don't enwrap in
849 * HAVE_ALWAYS_UNICODE_LOCALE || HAVE_NATCH_CHAR */
850 if(options & OPT_UNICODE){
851 ui32_t uc;
852 char const *ib2;
853 size_t il2, il3;
855 ib2 = ib;
856 il3 = il2 = il;
857 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
858 char itoa[32];
859 char const *cp;
861 il2 = PTR2SIZE(&ib2[0] - &ib[0]);
862 if((flags & a_SHEXP_QUOTE_ROUNDTRIP) || uc == 0xFFFDu){
863 /* Use padding to make ambiguities impossible */
864 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
865 (uc > 0xFFFFu ? 'U' : 'u'),
866 (int)(uc > 0xFFFFu ? 8 : 4), uc);
867 cp = itoa;
868 }else{
869 il3 = il2;
870 cp = &ib[0];
872 u.store = n_string_push_buf(u.store, cp, il3);
873 ib += il2, il -= il2;
874 continue;
878 memset(&vic, 0, sizeof vic);
879 vic.vic_indat = ib;
880 vic.vic_inlen = il;
881 n_visual_info(&vic,
882 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
884 /* Work this substring as sensitive as possible */
885 il -= vic.vic_oulen;
886 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP))
887 u.store = n_string_push_buf(u.store, ib, il);
888 #ifdef HAVE_ICONV
889 else if((vic.vic_indat = n_iconv_onetime_cp(n_ICONV_NONE,
890 "utf-8", charset_get_lc(), savestrbuf(ib, il))) != NULL){
891 ui32_t uc;
892 char const *ib2;
893 size_t il2, il3;
895 il3 = il2 = strlen(ib2 = vic.vic_indat);
896 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
897 char itoa[32];
899 il2 = PTR2SIZE(&ib2[0] - &vic.vic_indat[0]);
900 /* Use padding to make ambiguities impossible */
901 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
902 (uc > 0xFFFFu ? 'U' : 'u'),
903 (int)(uc > 0xFFFFu ? 8 : 4), uc);
904 u.store = n_string_push_buf(u.store, itoa, il3);
905 }else
906 goto Jxseq;
908 #endif
909 else
910 #ifdef HAVE_ICONV
911 Jxseq:
912 #endif
913 while(il-- > 0){
914 u.store = n_string_push_buf(u.store, "\\xFF",
915 sizeof("\\xFF") -1);
916 n_c_to_hex_base16(&u.store->s_dat[u.store->s_len - 2], *ib++);
919 ib = vic.vic_oudat;
920 il = vic.vic_oulen;
925 /* Close an open quote */
926 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
927 u.store = n_string_push_c(u.store, '\'');
928 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
929 u.store = n_string_push_c(u.store, '"');
930 #ifdef a_SHEXP_QUOTE_RECURSE
931 jleave:
932 #endif
933 NYD2_LEAVE;
934 return;
936 #ifdef a_SHEXP_QUOTE_RECURSE
937 jrecurse:
938 sqlp->sql_dat.l -= il;
940 sql.sql_link = sqlp;
941 sql.sql_dat.s = UNCONST(ib);
942 sql.sql_dat.l = il;
943 sql.sql_flags = flags;
944 a_shexp__quote(sqcp, &sql);
945 goto jleave;
946 #endif
948 #undef jrecurse
949 #undef a_SHEXP_QUOTE_RECURSE
952 FL char *
953 fexpand(char const *name, enum fexp_mode fexpm)
955 struct str s;
956 char const *cp, *res;
957 bool_t dyn;
958 NYD_ENTER;
960 pstate &= ~PS_EXPAND_MULTIRESULT;
962 /* The order of evaluation is "%" and "#" expand into constants.
963 * "&" can expand into "+". "+" can expand into shell meta characters.
964 * Shell meta characters expand into constants.
965 * This way, we make no recursive expansion */
966 if ((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
967 res = UNCONST(name);
969 if(!(fexpm & FEXP_NSPECIAL)){
970 jnext:
971 dyn = FAL0;
972 switch (*res) {
973 case '%':
974 if(res[1] == ':' && res[2] != '\0')
975 res = &res[2];
976 else{
977 bool_t force;
979 force = (res[1] != '\0');
980 res = a_shexp_findmail((force ? &res[1] : myname), force);
981 if(force)
982 goto jislocal;
984 goto jnext;
985 case '#':
986 if (res[1] != '\0')
987 break;
988 if (prevfile[0] == '\0') {
989 n_err(_("No previous file\n"));
990 res = NULL;
991 goto jleave;
993 res = prevfile;
994 goto jislocal;
995 case '&':
996 if (res[1] == '\0')
997 res = ok_vlook(MBOX);
998 break;
1002 /* POSIX: if *folder* unset or null, "+" shall be retained */
1003 if (!(fexpm & FEXP_NFOLDER) && *res == '+' &&
1004 *(cp = folder_query()) != '\0') {
1005 res = str_concat_csvl(&s, cp, &res[1], NULL)->s;
1006 dyn = TRU1;
1008 /* TODO *folder* can't start with %[:], can it!?! */
1009 if (res[0] == '%' && res[1] == ':') {
1010 res += 2;
1011 goto jnext;
1015 /* Do some meta expansions */
1016 if((fexpm & (FEXP_NSHELL | FEXP_NVAR)) != FEXP_NVAR &&
1017 ((fexpm & FEXP_NSHELL) ? (strchr(res, '$') != NULL)
1018 : anyof(res, "{}[]*?$"))){
1019 bool_t doexp;
1021 if(fexpm & FEXP_NOPROTO)
1022 doexp = TRU1;
1023 else switch(which_protocol(res)){
1024 case PROTO_FILE:
1025 case PROTO_MAILDIR:
1026 doexp = TRU1;
1027 break;
1028 default:
1029 doexp = FAL0;
1030 break;
1033 if(doexp){
1034 struct a_shexp_var_stack top;
1036 memset(&top, 0, sizeof top);
1037 top.svs_value = res;
1038 top.svs_bsesc = TRU1;
1039 res = a_shexp_var(&top);
1041 if(res[0] == '~')
1042 res = a_shexp_tilde(res);
1044 if(!(fexpm & FEXP_NSHELL) &&
1045 (res = a_shexp_globname(res, fexpm)) == NULL)
1046 goto jleave;
1047 dyn = TRU1;
1048 }/* else no tilde */
1049 }else if(res[0] == '~'){
1050 res = a_shexp_tilde(res);
1051 dyn = TRU1;
1054 jislocal:
1055 if (fexpm & FEXP_LOCAL)
1056 switch (which_protocol(res)) {
1057 case PROTO_FILE:
1058 case PROTO_MAILDIR:
1059 break;
1060 default:
1061 n_err(_("Not a local file or directory: %s\n"),
1062 n_shexp_quote_cp(name, FAL0));
1063 res = NULL;
1064 break;
1067 jleave:
1068 if(res != NULL && !dyn)
1069 res = savestr(res);
1070 NYD_LEAVE;
1071 return UNCONST(res);
1074 FL int
1075 n_shexp_expand_escape(char const **s, bool_t use_nail_extensions)/* TODO DROP!*/
1077 char const *xs;
1078 int c, n;
1079 NYD2_ENTER;
1081 xs = *s;
1083 if ((c = *xs & 0xFF) == '\0')
1084 goto jleave;
1085 ++xs;
1086 if (c != '\\')
1087 goto jleave;
1089 switch ((c = *xs & 0xFF)) {
1090 case 'a': c = '\a'; break;
1091 case 'b': c = '\b'; break;
1092 case 'c': c = PROMPT_STOP; break;
1093 case 'f': c = '\f'; break;
1094 case 'n': c = '\n'; break;
1095 case 'r': c = '\r'; break;
1096 case 't': c = '\t'; break;
1097 case 'v': c = '\v'; break;
1099 /* ESCape */
1100 case 'E':
1101 case 'e':
1102 c = '\033';
1103 break;
1105 /* Hexadecimal TODO uses ASCII */
1106 case 'X':
1107 case 'x': {
1108 static ui8_t const hexatoi[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
1109 #undef a_HEX
1110 #define a_HEX(n) \
1111 hexatoi[(ui8_t)((n) - ((n) <= '9' ? 48 : ((n) <= 'F' ? 55 : 87)))]
1113 c = 0;
1114 ++xs;
1115 if(hexchar(*xs))
1116 c = a_HEX(*xs);
1117 else{
1118 --xs;
1119 if(options & OPT_D_V)
1120 n_err(_("Invalid \\xNUMBER notation in: %s\n"), xs - 1);
1121 c = '\\';
1122 goto jleave;
1124 ++xs;
1125 if(hexchar(*xs)){
1126 c <<= 4;
1127 c += a_HEX(*xs);
1128 ++xs;
1130 goto jleave;
1132 #undef a_HEX
1134 /* octal, with optional 0 prefix */
1135 case '0':
1136 ++xs;
1137 if(0){
1138 default:
1139 if(*xs == '\0'){
1140 c = '\\';
1141 break;
1144 for (c = 0, n = 3; n-- > 0 && octalchar(*xs); ++xs) {
1145 c <<= 3;
1146 c |= *xs - '0';
1148 goto jleave;
1150 /* S-nail extension for nice (get)prompt(()) support */
1151 case '&':
1152 case '?':
1153 case '$':
1154 case '@':
1155 if (use_nail_extensions) {
1156 switch (c) {
1157 case '&': c = ok_blook(bsdcompat) ? '&' : '?'; break;
1158 case '?': c = (pstate & PS_EVAL_ERROR) ? '1' : '0'; break;
1159 case '$': c = PROMPT_DOLLAR; break;
1160 case '@': c = PROMPT_AT; break;
1162 break;
1165 /* FALLTHRU */
1166 case '\0':
1167 /* A sole <backslash> at EOS is treated as-is! */
1168 c = '\\';
1169 /* FALLTHRU */
1170 case '\\':
1171 break;
1174 ++xs;
1175 jleave:
1176 *s = xs;
1177 NYD2_LEAVE;
1178 return c;
1181 FL enum n_shexp_state
1182 n_shexp_parse_token(struct n_string *store, struct str *input, /* TODO WCHAR */
1183 enum n_shexp_parse_flags flags){
1184 char utf[8];
1185 char c2, c, quotec;
1186 enum{
1187 a_NONE = 0,
1188 a_SKIPQ = 1<<0, /* Skip rest of this quote (\c0 ..) */
1189 a_SURPLUS = 1<<1, /* Extended sequence interpretation */
1190 a_NTOKEN = 1<<2 /* "New token": e.g., comments are possible */
1191 } state;
1192 enum n_shexp_state rv;
1193 size_t i, il;
1194 char const *ib_save, *ib;
1195 NYD2_ENTER;
1196 UNINIT(c, '\0');
1198 assert((flags & n_SHEXP_PARSE_DRYRUN) || store != NULL);
1199 assert(input != NULL);
1200 assert(input->l == 0 || input->s != NULL);
1201 assert(!(flags & n_SHEXP_PARSE_LOG) || !(flags & n_SHEXP_PARSE_LOG_D_V));
1202 assert(!(flags & n_SHEXP_PARSE_IFS_ADD_COMMA) ||
1203 !(flags & n_SHEXP_PARSE_IFS_IS_COMMA));
1205 if((flags & n_SHEXP_PARSE_LOG_D_V) && (options & OPT_D_V))
1206 flags |= n_SHEXP_PARSE_LOG;
1208 if((flags & n_SHEXP_PARSE_TRUNC) && store != NULL)
1209 store = n_string_trunc(store, 0);
1211 ib = input->s;
1212 if((il = input->l) == UIZ_MAX)
1213 il = strlen(ib);
1215 jrestart_empty:
1216 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1217 for(; il > 0; ++ib, --il)
1218 if(!blankspacechar(*ib))
1219 break;
1221 input->s = UNCONST(ib);
1222 input->l = il;
1224 if(il == 0){
1225 rv = n_SHEXP_STATE_STOP;
1226 goto jleave;
1229 if(store != NULL)
1230 store = n_string_reserve(store, MIN(il, 32)); /* XXX */
1232 for(rv = n_SHEXP_STATE_NONE, state = a_NTOKEN, quotec = '\0'; il > 0;){
1233 --il, c = *ib++;
1235 /* If no quote-mode active.. */
1236 if(quotec == '\0'){
1237 if(c == '"' || c == '\''){
1238 quotec = c;
1239 if(c == '"')
1240 state |= a_SURPLUS;
1241 else
1242 state &= ~a_SURPLUS;
1243 state &= ~a_NTOKEN;
1244 continue;
1245 }else if(c == '$'){
1246 if(il > 0){
1247 state &= ~a_NTOKEN;
1248 if(*ib == '\''){
1249 --il, ++ib;
1250 quotec = '\'';
1251 state |= a_SURPLUS;
1252 continue;
1253 }else
1254 goto J_var_expand;
1256 }else if(c == '\\'){
1257 /* Outside of quotes this just escapes any next character, but a sole
1258 * <backslash> at EOS is left unchanged */
1259 if(il > 0)
1260 --il, c = *ib++;
1261 state &= ~a_NTOKEN;
1262 }else if(c == '#' && (state & a_NTOKEN)){
1263 rv |= n_SHEXP_STATE_STOP;
1264 goto jleave;
1265 }else if(c == ',' && (flags &
1266 (n_SHEXP_PARSE_IFS_ADD_COMMA | n_SHEXP_PARSE_IFS_IS_COMMA)))
1267 break;
1268 else if(blankchar(c)){
1269 if(!(flags & n_SHEXP_PARSE_IFS_IS_COMMA)){
1270 ++il, --ib;
1271 break;
1273 state |= a_NTOKEN;
1274 }else
1275 state &= ~a_NTOKEN;
1276 }else{
1277 /* Quote-mode */
1278 assert(!(state & a_NTOKEN));
1279 if(c == quotec){
1280 state = a_NONE;
1281 quotec = '\0';
1282 /* Users may need to recognize the presence of empty quotes */
1283 rv |= n_SHEXP_STATE_OUTPUT;
1284 continue;
1285 }else if(c == '\\' && (state & a_SURPLUS)){
1286 ib_save = ib - 1;
1287 /* A sole <backslash> at EOS is treated as-is! This is ok since
1288 * the "closing quote" error will occur next, anyway */
1289 if(il == 0)
1290 break;
1291 else if((c2 = *ib) == quotec){
1292 --il, ++ib;
1293 c = quotec;
1294 }else if(quotec == '"'){
1295 /* Double quotes:
1296 * The <backslash> shall retain its special meaning as an
1297 * escape character (see Section 2.2.1) only when followed
1298 * by one of the following characters when considered
1299 * special: $ ` " \ <newline> */
1300 switch(c2){
1301 case '$':
1302 case '`':
1303 /* case '"': already handled via c2 == quotec */
1304 case '\\':
1305 --il, ++ib;
1306 c = c2;
1307 /* FALLTHRU */
1308 default:
1309 break;
1311 }else{
1312 /* Dollar-single-quote */
1313 --il, ++ib;
1314 switch(c2){
1315 case '"':
1316 /* case '\'': already handled via c2 == quotec */
1317 case '\\':
1318 c = c2;
1319 break;
1321 case 'b': c = '\b'; break;
1322 case 'f': c = '\f'; break;
1323 case 'n': c = '\n'; break;
1324 case 'r': c = '\r'; break;
1325 case 't': c = '\t'; break;
1326 case 'v': c = '\v'; break;
1328 case 'E':
1329 case 'e': c = '\033'; break;
1331 /* Control character */
1332 case 'c':
1333 if(il == 0)
1334 goto j_dollar_ungetc;
1335 --il, c2 = *ib++;
1336 if(state & a_SKIPQ)
1337 continue;
1338 c = upperconv(c2) ^ 0x40;
1339 if((ui8_t)c > 0x1F && c != 0x7F){ /* ASCII C0: 0..1F, 7F */
1340 if(flags & n_SHEXP_PARSE_LOG)
1341 n_err(_("Invalid \\c notation: %.*s\n"),
1342 (int)input->l, input->s);
1343 rv |= n_SHEXP_STATE_ERR_CONTROL;
1345 /* As an implementation-defined extension, support \c@
1346 * EQ printf(1) alike \c */
1347 if(c == '\0'){
1348 rv |= n_SHEXP_STATE_STOP;
1349 goto jleave;
1351 break;
1353 /* Octal sequence: 1 to 3 octal bytes */
1354 case '0':
1355 /* As an extension (dependent on where you look, echo(1), or
1356 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1357 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1358 c2 = c;
1359 --il, ++ib;
1361 /* FALLTHRU */
1362 case '1': case '2': case '3':
1363 case '4': case '5': case '6': case '7':
1364 c2 -= '0';
1365 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1366 c2 = (c2 << 3) | (c - '0');
1367 --il, ++ib;
1369 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1370 if((ui8_t)c2 > 0x1F){
1371 if(flags & n_SHEXP_PARSE_LOG)
1372 n_err(_("\\0 argument exceeds a byte: %.*s\n"),
1373 (int)input->l, input->s);
1374 rv |= n_SHEXP_STATE_ERR_NUMBER;
1375 --il, ++ib;
1376 /* Write unchanged */
1377 je_ib_save:
1378 rv |= n_SHEXP_STATE_OUTPUT;
1379 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1380 store = n_string_push_buf(store, ib_save,
1381 PTR2SIZE(ib - ib_save));
1382 continue;
1384 c2 = (c2 << 3) | (c -= '0');
1385 --il, ++ib;
1387 if((c = c2) == '\0')
1388 state |= a_SKIPQ;
1389 if(state & a_SKIPQ)
1390 continue;
1391 break;
1393 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1394 case 'U':
1395 i = 8;
1396 if(0){
1397 /* FALLTHRU */
1398 case 'u':
1399 i = 4;
1401 if(il == 0)
1402 goto j_dollar_ungetc;
1403 if(0){
1404 /* FALLTHRU */
1406 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1407 case 'X':
1408 case 'x':
1409 if(il == 0)
1410 goto j_dollar_ungetc;
1411 i = 2;
1413 /* C99 */{
1414 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
1415 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1417 size_t no, j;
1419 i = MIN(il, i);
1420 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
1421 c = *ib;
1422 if(hexchar(c)){
1423 no <<= 4;
1424 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
1425 : ((c) <= 'F' ? 55 : 87)))];
1426 }else if(j == 0){
1427 if(state & a_SKIPQ)
1428 break;
1429 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
1430 if(flags & n_SHEXP_PARSE_LOG)
1431 n_err(_("Invalid \\%c notation: %.*s\n"),
1432 c2, (int)input->l, input->s);
1433 rv |= n_SHEXP_STATE_ERR_NUMBER;
1434 goto je_ib_save;
1435 }else
1436 break;
1439 /* Unicode massage */
1440 if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){
1441 if((c = (char)no) == '\0')
1442 state |= a_SKIPQ;
1443 }else if(no == 0)
1444 state |= a_SKIPQ;
1445 else if(!(state & a_SKIPQ)){
1446 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1447 store = n_string_reserve(store, MAX(j, 4));
1449 c2 = FAL0;
1450 if(no > 0x10FFFF){ /* XXX magic; CText */
1451 if(flags & n_SHEXP_PARSE_LOG)
1452 n_err(_("\\U argument exceeds 0x10FFFF: %.*s\n"),
1453 (int)input->l, input->s);
1454 rv |= n_SHEXP_STATE_ERR_NUMBER;
1455 /* But normalize the output anyway */
1456 goto Je_uni_norm;
1459 j = n_utf32_to_utf8(no, utf);
1461 if(options & OPT_UNICODE){
1462 rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE;
1463 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1464 store = n_string_push_buf(store, utf, j);
1465 continue;
1467 #ifdef HAVE_ICONV
1468 else{
1469 char *icp;
1471 icp = n_iconv_onetime_cp(n_ICONV_NONE,
1472 NULL, NULL, utf);
1473 if(icp != NULL){
1474 rv |= n_SHEXP_STATE_OUTPUT;
1475 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1476 store = n_string_push_cp(store, icp);
1477 continue;
1480 #endif
1481 if(!(flags & n_SHEXP_PARSE_DRYRUN)) Je_uni_norm:{
1482 char itoa[32];
1484 rv |= n_SHEXP_STATE_OUTPUT |
1485 n_SHEXP_STATE_ERR_UNICODE;
1486 i = snprintf(itoa, sizeof itoa, "\\%c%0*X",
1487 (no > 0xFFFFu ? 'U' : 'u'),
1488 (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no);
1489 store = n_string_push_buf(store, itoa, i);
1491 continue;
1493 if(state & a_SKIPQ)
1494 continue;
1496 break;
1498 /* Extension: \$ can be used to expand a variable.
1499 * Bug|ad effect: if conversion fails, not written "as-is" */
1500 case '$':
1501 if(il == 0)
1502 goto j_dollar_ungetc;
1503 goto J_var_expand;
1505 default:
1506 j_dollar_ungetc:
1507 /* Follow bash behaviour, print sequence unchanged */
1508 ++il, --ib;
1509 break;
1512 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
1513 bool_t brace;
1515 if(!(brace = (*ib == '{')) || il > 1){
1516 char const *cp, *vp;
1518 ib_save = ib - 1;
1519 il -= brace;
1520 vp = (ib += brace);
1522 for(i = 0; il > 0 && (c = *ib, a_SHEXP_ISVARC(c)); ++i)
1523 --il, ++ib;
1525 if(brace){
1526 if(il == 0 || *ib != '}'){
1527 if(state & a_SKIPQ){
1528 assert((state & a_SURPLUS) && quotec == '\'');
1529 continue;
1531 if(flags & n_SHEXP_PARSE_LOG)
1532 n_err(_("Closing brace missing for ${VAR}: %.*s\n"),
1533 (int)input->l, input->s);
1534 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN |
1535 n_SHEXP_STATE_ERR_BRACE;
1536 goto je_ib_save;
1538 --il, ++ib;
1541 if(state & a_SKIPQ)
1542 continue;
1544 if(i == 0){
1545 if(brace){
1546 if(flags & n_SHEXP_PARSE_LOG)
1547 n_err(_("Bad substitution (${}): %.*s\n"),
1548 (int)input->l, input->s);
1549 rv |= n_SHEXP_STATE_ERR_BADSUB;
1550 goto je_ib_save;
1552 c = '$';
1553 }else if(flags & n_SHEXP_PARSE_DRYRUN)
1554 continue;
1555 else{
1556 vp = savestrbuf(vp, i);
1557 /* Check getenv(3) shall no internal variable exist! */
1558 if((cp = vok_vlook(vp)) != NULL || (cp = getenv(vp)) != NULL){
1559 rv |= n_SHEXP_STATE_OUTPUT;
1560 store = n_string_push_cp(store, cp);
1561 for(; (c = *cp) != '\0'; ++cp)
1562 if(cntrlchar(c)){
1563 rv |= n_SHEXP_STATE_CONTROL;
1564 break;
1567 continue;
1570 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
1571 continue;
1575 if(!(state & a_SKIPQ)){
1576 rv |= n_SHEXP_STATE_OUTPUT;
1577 if(cntrlchar(c))
1578 rv |= n_SHEXP_STATE_CONTROL;
1579 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1580 store = n_string_push_c(store, c);
1584 if(quotec != '\0' && !(flags & n_SHEXP_PARSE_QUOTE_AUTOCLOSE)){
1585 if(flags & n_SHEXP_PARSE_LOG)
1586 n_err(_("no closing quote: %.*s\n"), (int)input->l, input->s);
1587 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
1590 jleave:
1591 if((flags & n_SHEXP_PARSE_DRYRUN) && store != NULL){
1592 store = n_string_push_buf(store, input->s, PTR2SIZE(ib - input->s));
1593 rv |= n_SHEXP_STATE_OUTPUT;
1596 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1597 for(; il > 0; ++ib, --il)
1598 if(!blankchar(*ib))
1599 break;
1601 input->l = il;
1602 input->s = UNCONST(ib);
1604 if(!(rv & n_SHEXP_STATE_STOP)){
1605 if(il > 0 && !(rv & n_SHEXP_STATE_OUTPUT) &&
1606 (flags & n_SHEXP_PARSE_IGNORE_EMPTY))
1607 goto jrestart_empty;
1608 if(!(rv & n_SHEXP_STATE_OUTPUT) && il == 0)
1609 rv |= n_SHEXP_STATE_STOP;
1611 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_UNICODE));
1612 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_CONTROL));
1613 NYD2_LEAVE;
1614 return rv;
1617 FL enum n_shexp_state
1618 n_shexp_parse_token_buf(char **store, char const *indat, size_t inlen,
1619 enum n_shexp_parse_flags flags){
1620 struct n_string ss;
1621 struct str is;
1622 enum n_shexp_state shs;
1623 NYD2_ENTER;
1625 assert(store != NULL);
1626 assert(inlen == 0 || indat != NULL);
1628 n_string_creat_auto(&ss);
1629 is.s = UNCONST(indat);
1630 is.l = inlen;
1632 shs = n_shexp_parse_token(&ss, &is, flags);
1633 if(is.l > 0)
1634 shs &= ~n_SHEXP_STATE_STOP;
1635 else
1636 shs |= n_SHEXP_STATE_STOP;
1637 *store = n_string_cp(&ss);
1638 n_string_drop_ownership(&ss);
1640 n_string_gut(&ss);
1641 NYD2_LEAVE;
1642 return shs;
1645 FL struct n_string *
1646 n_shexp_quote(struct n_string *store, struct str const *input, bool_t rndtrip){
1647 struct a_shexp_quote_lvl sql;
1648 struct a_shexp_quote_ctx sqc;
1649 NYD2_ENTER;
1651 assert(store != NULL);
1652 assert(input != NULL);
1653 assert(input->l == 0 || input->s != NULL);
1655 memset(&sqc, 0, sizeof sqc);
1656 sqc.sqc_store = store;
1657 sqc.sqc_input.s = input->s;
1658 if((sqc.sqc_input.l = input->l) == UIZ_MAX)
1659 sqc.sqc_input.l = strlen(input->s);
1660 sqc.sqc_flags = rndtrip ? a_SHEXP_QUOTE_ROUNDTRIP : a_SHEXP_QUOTE_NONE;
1662 if(sqc.sqc_input.l == 0)
1663 store = n_string_push_buf(store, "''", sizeof("''") -1);
1664 else{
1665 memset(&sql, 0, sizeof sql);
1666 sql.sql_dat = sqc.sqc_input;
1667 sql.sql_flags = sqc.sqc_flags;
1668 a_shexp__quote(&sqc, &sql);
1670 NYD2_LEAVE;
1671 return store;
1674 FL char *
1675 n_shexp_quote_cp(char const *cp, bool_t rndtrip){
1676 struct n_string store;
1677 struct str input;
1678 char *rv;
1679 NYD2_ENTER;
1681 assert(cp != NULL);
1683 input.s = UNCONST(cp);
1684 input.l = UIZ_MAX;
1685 rv = n_string_cp(n_shexp_quote(n_string_creat_auto(&store), &input,
1686 rndtrip));
1687 n_string_gut(n_string_drop_ownership(&store));
1688 NYD2_LEAVE;
1689 return rv;
1692 /* s-it-mode */