privsep.c: and just verify the box is also in CWD (wapiflapi)
[s-mailx.git] / shexp.c
blob6556d949980a0d5ab86b962ab00d9ad104c8b622
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
5 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
6 * Copyright (c) 2012 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
7 */
8 /*
9 * Copyright (c) 1980, 1993
10 * The Regents of the University of California. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 #undef n_FILE
37 #define n_FILE shexp
39 #ifndef HAVE_AMALGAMATION
40 # include "nail.h"
41 #endif
43 #include <sys/wait.h>
45 #include <pwd.h>
47 #ifdef HAVE_FNMATCH
48 # include <dirent.h>
49 # include <fnmatch.h>
50 #endif
52 /* POSIX says
53 * Environment variable names used by the utilities in the Shell and
54 * Utilities volume of POSIX.1-2008 consist solely of uppercase
55 * letters, digits, and the <underscore> ('_') from the characters
56 * defined in Portable Character Set and do not begin with a digit.
57 * Other characters may be permitted by an implementation;
58 * applications shall tolerate the presence of such names.
59 * We do support the hyphen "-" because it is common for mailx. */
60 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
62 enum a_shexp_quote_flags{
63 a_SHEXP_QUOTE_NONE,
64 a_SHEXP_QUOTE_ROUNDTRIP = 1u<<0, /* Result won't be consumed immediately */
66 a_SHEXP_QUOTE_T_REVSOL = 1u<<8, /* Type: by reverse solidus */
67 a_SHEXP_QUOTE_T_SINGLE = 1u<<9, /* Type: single-quotes */
68 a_SHEXP_QUOTE_T_DOUBLE = 1u<<10, /* Type: double-quotes */
69 a_SHEXP_QUOTE_T_DOLLAR = 1u<<11, /* Type: dollar-single-quotes */
70 a_SHEXP_QUOTE_T_MASK = a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
71 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR,
73 a_SHEXP_QUOTE__FREESHIFT = 16u
76 struct a_shexp_var_stack {
77 struct a_shexp_var_stack *svs_next; /* Outer stack frame */
78 char const *svs_value; /* Remaining value to expand */
79 size_t svs_len; /* gth of .svs_dat this level */
80 char const *svs_dat; /* Result data of this level */
81 bool_t svs_bsesc; /* Shall backslash escaping be performed */
82 ui8_t svs__dummy[7];
85 #ifdef HAVE_FNMATCH
86 struct a_shexp_glob_ctx{
87 char const *sgc_patdat; /* Remaining pattern (at and below level) */
88 size_t sgc_patlen;
89 struct n_string *sgc_outer; /* Resolved path up to this level */
90 ui32_t sgc_flags;
91 ui8_t sgc__dummy[4];
93 #endif
95 struct a_shexp_quote_ctx{
96 struct n_string *sqc_store; /* Result storage */
97 struct str sqc_input; /* Input data, topmost level */
98 ui32_t sqc_cnt_revso;
99 ui32_t sqc_cnt_single;
100 ui32_t sqc_cnt_double;
101 ui32_t sqc_cnt_dollar;
102 enum a_shexp_quote_flags sqc_flags;
103 ui8_t sqc__dummy[4];
106 struct a_shexp_quote_lvl{
107 struct a_shexp_quote_lvl *sql_link; /* Outer level */
108 struct str sql_dat; /* This level (has to) handle(d) */
109 enum a_shexp_quote_flags sql_flags;
110 ui8_t sql__dummy[4];
113 /* Locate the user's mailbox file (where new, unread mail is queued) */
114 static char *a_shexp_findmail(char const *user, bool_t force);
116 /* Expand ^~/? and ^~USER/? constructs.
117 * Returns the completely resolved (maybe empty or identical to input)
118 * salloc()ed string */
119 static char *a_shexp_tilde(char const *s);
121 /* (Try to) Expand any shell variable in s.
122 * Returns the completely resolved (maybe empty) salloc()ed string.
123 * Logs on error */
124 static char *a_shexp_var(struct a_shexp_var_stack *svsp);
126 /* Perform fnmatch(3). May return NULL on error */
127 static char *a_shexp_globname(char const *name, enum fexp_mode fexpm);
128 #ifdef HAVE_FNMATCH
129 static bool_t a_shexp__glob(struct a_shexp_glob_ctx *sgcp,
130 struct n_strlist **slpp);
131 static int a_shexp__globsort(void const *cvpa, void const *cvpb);
132 #endif
134 /* Parse an input string and create a sh(1)ell-quoted result */
135 static void a_shexp__quote(struct a_shexp_quote_ctx *sqcp,
136 struct a_shexp_quote_lvl *sqlp);
138 static char *
139 a_shexp_findmail(char const *user, bool_t force){
140 char *rv;
141 char const *cp;
142 NYD2_ENTER;
144 if(!force){
145 if((cp = ok_vlook(inbox)) != NULL && *cp != '\0'){
146 /* Folder extra introduced to avoid % recursion loops */
147 if((rv = fexpand(cp, FEXP_NSPECIAL | FEXP_NFOLDER | FEXP_NSHELL)
148 ) != NULL)
149 goto jleave;
150 n_err(_("*inbox* expansion failed, using $MAIL / builtin: %s\n"), cp);
153 if((cp = ok_vlook(MAIL)) != NULL){
154 rv = savestr(cp);
155 goto jleave;
159 /* C99 */{
160 size_t ul, i;
162 ul = strlen(user) +1;
163 i = sizeof(VAL_MAIL) -1 + 1 + ul;
165 rv = salloc(i);
166 memcpy(rv, VAL_MAIL, (i = sizeof(VAL_MAIL) -1));
167 rv[i] = '/';
168 memcpy(&rv[++i], user, ul);
170 jleave:
171 NYD2_LEAVE;
172 return rv;
175 static char *
176 a_shexp_tilde(char const *s){
177 struct passwd *pwp;
178 size_t nl, rl;
179 char const *rp, *np;
180 char *rv;
181 NYD2_ENTER;
183 if(*(rp = &s[1]) == '/' || *rp == '\0'){
184 np = ok_vlook(HOME);
185 rl = strlen(rp);
186 }else{
187 if((rp = strchr(np = rp, '/')) != NULL){
188 nl = PTR2SIZE(rp - np);
189 np = savestrbuf(np, nl);
190 rl = strlen(rp);
191 }else
192 rl = 0;
194 if((pwp = getpwnam(np)) == NULL){
195 rv = savestr(s);
196 goto jleave;
198 np = pwp->pw_dir;
201 nl = strlen(np);
202 rv = salloc(nl + 1 + rl +1);
203 memcpy(rv, np, nl);
204 if(rl > 0){
205 memcpy(rv + nl, rp, rl);
206 nl += rl;
208 rv[nl] = '\0';
209 jleave:
210 NYD2_LEAVE;
211 return rv;
214 static char *
215 a_shexp_var(struct a_shexp_var_stack *svsp)
217 struct a_shexp_var_stack next, *np, *tmp;
218 char const *vp;
219 char lc, c, *cp, *rv;
220 size_t i;
221 NYD2_ENTER;
223 if (*(vp = svsp->svs_value) != '$') {
224 bool_t bsesc = svsp->svs_bsesc;
225 union {bool_t hadbs; char c;} u = {FAL0};
227 svsp->svs_dat = vp;
228 for (lc = '\0', i = 0; ((c = *vp) != '\0'); ++i, ++vp) {
229 if (c == '$' && lc != '\\')
230 break;
231 if (!bsesc)
232 continue;
233 lc = (lc == '\\') ? (u.hadbs = TRU1, '\0') : c;
235 svsp->svs_len = i;
237 if (u.hadbs) {
238 svsp->svs_dat = cp = savestrbuf(svsp->svs_dat, i);
240 for (lc = '\0', rv = cp; (u.c = *cp++) != '\0';) {
241 if (u.c != '\\' || lc == '\\')
242 *rv++ = u.c;
243 lc = (lc == '\\') ? '\0' : u.c;
245 *rv = '\0';
247 svsp->svs_len = PTR2SIZE(rv - svsp->svs_dat);
249 } else {
250 if ((lc = (*++vp == '{')))
251 ++vp;
253 svsp->svs_dat = vp;
254 for (i = 0; (c = *vp) != '\0'; ++i, ++vp)
255 if (!a_SHEXP_ISVARC(c))
256 break;
258 if (lc) {
259 if (c != '}') {
260 n_err(_("Variable name misses closing }: %s\n"),
261 svsp->svs_value);
262 svsp->svs_len = strlen(svsp->svs_value);
263 svsp->svs_dat = svsp->svs_value;
264 goto junroll;
266 c = *++vp;
269 svsp->svs_len = i;
270 /* Check getenv(3) shall no internal variable exist! */
271 if ((rv = vok_vlook(cp = savestrbuf(svsp->svs_dat, i))) != NULL ||
272 (rv = getenv(cp)) != NULL)
273 svsp->svs_len = strlen(svsp->svs_dat = rv);
274 else
275 svsp->svs_len = 0, svsp->svs_dat = n_UNCONST(n_empty);
277 if (c != '\0')
278 goto jrecurse;
280 /* That level made the great and completed encoding. Build result */
281 junroll:
282 for (i = 0, np = svsp, svsp = NULL; np != NULL;) {
283 i += np->svs_len;
284 tmp = np->svs_next;
285 np->svs_next = svsp;
286 svsp = np;
287 np = tmp;
290 cp = rv = salloc(i +1);
291 while (svsp != NULL) {
292 np = svsp;
293 svsp = svsp->svs_next;
294 memcpy(cp, np->svs_dat, np->svs_len);
295 cp += np->svs_len;
297 *cp = '\0';
299 jleave:
300 NYD2_LEAVE;
301 return rv;
302 jrecurse:
303 memset(&next, 0, sizeof next);
304 next.svs_next = svsp;
305 next.svs_value = vp;
306 next.svs_bsesc = svsp->svs_bsesc;
307 rv = a_shexp_var(&next);
308 goto jleave;
311 static char *
312 a_shexp_globname(char const *name, enum fexp_mode fexpm){
313 #ifdef HAVE_FNMATCH
314 struct a_shexp_glob_ctx sgc;
315 struct n_string outer;
316 struct n_strlist *slp;
317 char *cp;
318 NYD_ENTER;
320 memset(&sgc, 0, sizeof sgc);
321 sgc.sgc_patlen = strlen(name);
322 sgc.sgc_patdat = savestrbuf(name, sgc.sgc_patlen);
323 sgc.sgc_outer = n_string_reserve(n_string_creat(&outer), sgc.sgc_patlen);
324 sgc.sgc_flags = ((fexpm & FEXP_SILENT) != 0);
325 slp = NULL;
326 if(a_shexp__glob(&sgc, &slp))
327 cp = (char*)1;
328 else
329 cp = NULL;
330 n_string_gut(&outer);
332 if(cp == NULL)
333 goto jleave;
335 if(slp == NULL){
336 cp = n_UNCONST(N_("File pattern does not match"));
337 goto jerr;
338 }else if(slp->sl_next == NULL)
339 cp = savestrbuf(slp->sl_dat, slp->sl_len);
340 else if(fexpm & FEXP_MULTIOK){
341 struct n_strlist **sorta, *xslp;
342 size_t i, no, l;
344 no = l = 0;
345 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next){
346 ++no;
347 l += xslp->sl_len + 1;
350 sorta = smalloc(sizeof(*sorta) * no);
351 no = 0;
352 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next)
353 sorta[no++] = xslp;
354 qsort(sorta, no, sizeof *sorta, &a_shexp__globsort);
356 cp = salloc(++l);
357 l = 0;
358 for(i = 0; i < no; ++i){
359 xslp = sorta[i];
360 memcpy(&cp[l], xslp->sl_dat, xslp->sl_len);
361 l += xslp->sl_len;
362 cp[l++] = '\0';
364 cp[l] = '\0';
366 free(sorta);
367 pstate |= PS_EXPAND_MULTIRESULT;
368 }else{
369 cp = n_UNCONST(N_("File pattern matches multiple results"));
370 goto jerr;
373 jleave:
374 while(slp != NULL){
375 struct n_strlist *tmp = slp;
377 slp = slp->sl_next;
378 free(tmp);
380 NYD_LEAVE;
381 return cp;
383 jerr:
384 if(!(fexpm & FEXP_SILENT)){
385 name = n_shexp_quote_cp(name, FAL0);
386 n_err("%s: %s\n", V_(cp), name);
388 cp = NULL;
389 goto jleave;
391 #else /* HAVE_FNMATCH */
392 n_UNUSED(fexpm);
394 if(!(fexpm & FEXP_SILENT))
395 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
396 return savestr(name);
397 #endif
400 #ifdef HAVE_FNMATCH
401 static bool_t
402 a_shexp__glob(struct a_shexp_glob_ctx *sgcp, struct n_strlist **slpp){
403 enum{a_SILENT = 1<<0, a_DEEP=1<<1, a_SALLOC=1<<2};
405 struct a_shexp_glob_ctx nsgc;
406 struct dirent *dep;
407 DIR *dp;
408 size_t old_outerlen;
409 char const *ccp, *myp;
410 NYD2_ENTER;
412 /* We need some special treatment for the outermost level */
413 if(!(sgcp->sgc_flags & a_DEEP)){
414 if(sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/'){
415 myp = n_string_cp(n_string_push_c(sgcp->sgc_outer, '/'));
416 ++sgcp->sgc_patdat;
417 --sgcp->sgc_patlen;
418 }else
419 myp = "./";
420 }else
421 myp = n_string_cp(sgcp->sgc_outer);
422 old_outerlen = sgcp->sgc_outer->s_len;
424 /* Separate current directory/pattern level from any possible remaining
425 * pattern in order to be able to use it for fnmatch(3) */
426 if((ccp = memchr(sgcp->sgc_patdat, '/', sgcp->sgc_patlen)) == NULL)
427 nsgc.sgc_patlen = 0;
428 else{
429 nsgc = *sgcp;
430 nsgc.sgc_flags |= a_DEEP;
431 sgcp->sgc_patlen = PTR2SIZE((nsgc.sgc_patdat = &ccp[1]) -
432 &sgcp->sgc_patdat[0]);
433 nsgc.sgc_patlen -= sgcp->sgc_patlen;
434 /* Trim solidus */
435 if(sgcp->sgc_patlen > 0){
436 assert(sgcp->sgc_patdat[sgcp->sgc_patlen -1] == '/');
437 ((char*)n_UNCONST(sgcp->sgc_patdat))[--sgcp->sgc_patlen] = '\0';
441 /* Our current directory level */
442 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
443 * xxx be the (sole) result depending on pattern surroundings, etc. */
444 if((dp = opendir(myp)) == NULL){
445 int err;
447 switch((err = errno)){
448 case ENOTDIR:
449 ccp = N_("cannot access paths under non-directory");
450 goto jerr;
451 case ENOENT:
452 ccp = N_("path component of (sub)pattern non-existent");
453 goto jerr;
454 case EACCES:
455 ccp = N_("file permission for file (sub)pattern denied");
456 goto jerr;
457 default:
458 ccp = N_("cannot handle file (sub)pattern");
459 goto jerr;
463 /* As necessary, quote bytes in the current pattern */
464 /* C99 */{
465 char *ncp;
466 size_t i;
467 bool_t need;
469 for(need = FAL0, i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
470 switch(*myp){
471 case '\'': case '"': case '\\': case '$':
472 case ' ': case '\t':
473 need = TRU1;
474 ++i;
475 /* FALLTHRU */
476 default:
477 ++i;
478 break;
481 if(need){
482 ncp = salloc(i +1);
483 for(i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
484 switch(*myp){
485 case '\'': case '"': case '\\': case '$':
486 case ' ': case '\t':
487 ncp[i++] = '\\';
488 /* FALLTHRU */
489 default:
490 ncp[i++] = *myp;
491 break;
493 ncp[i] = '\0';
494 myp = ncp;
495 }else
496 myp = sgcp->sgc_patdat;
499 while((dep = readdir(dp)) != NULL){
500 switch(fnmatch(myp, dep->d_name, FNM_PATHNAME | FNM_PERIOD)){
501 case 0:{
502 /* A match expresses the desire to recurse if there is more pattern */
503 if(nsgc.sgc_patlen > 0){
504 bool_t isdir;
506 n_string_push_cp((sgcp->sgc_outer->s_len > 1
507 ? n_string_push_c(sgcp->sgc_outer, '/') : sgcp->sgc_outer),
508 dep->d_name);
510 isdir = FAL0;
511 #ifdef HAVE_DIRENT_TYPE
512 if(dep->d_type == DT_DIR)
513 isdir = TRU1;
514 else if(dep->d_type == DT_LNK || dep->d_type == DT_UNKNOWN)
515 #endif
517 struct stat sb;
519 if(stat(n_string_cp(sgcp->sgc_outer), &sb)){
520 ccp = N_("I/O error when querying file status");
521 goto jerr;
522 }else if(S_ISDIR(sb.st_mode))
523 isdir = TRU1;
526 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
527 * TODO Instead save away a list of such n_string's for later */
528 if(isdir && !a_shexp__glob(&nsgc, slpp)){
529 ccp = (char*)1;
530 goto jleave;
533 n_string_trunc(sgcp->sgc_outer, old_outerlen);
534 }else{
535 struct n_strlist *slp;
536 size_t i, j;
538 i = strlen(dep->d_name);
539 j = (old_outerlen > 0) ? old_outerlen + 1 + i : i;
540 slp = n_STRLIST_MALLOC(j);
541 *slpp = slp;
542 slpp = &slp->sl_next;
543 slp->sl_next = NULL;
544 if((j = old_outerlen) > 0){
545 memcpy(&slp->sl_dat[0], sgcp->sgc_outer->s_dat, j);
546 if(slp->sl_dat[j -1] != '/')
547 slp->sl_dat[j++] = '/';
549 memcpy(&slp->sl_dat[j], dep->d_name, i);
550 slp->sl_dat[j += i] = '\0';
551 slp->sl_len = j;
553 } break;
554 case FNM_NOMATCH:
555 break;
556 default:
557 ccp = N_("fnmatch(3) cannot handle file (sub)pattern");
558 goto jerr;
562 ccp = NULL;
563 jleave:
564 if(dp != NULL)
565 closedir(dp);
566 NYD2_LEAVE;
567 return (ccp == NULL);
569 jerr:
570 if(!(sgcp->sgc_flags & a_SILENT)){
571 char const *s2, *s3;
573 if(sgcp->sgc_outer->s_len > 0){
574 s2 = n_shexp_quote_cp(n_string_cp(sgcp->sgc_outer), FAL0);
575 s3 = "/";
576 }else
577 s2 = s3 = n_empty;
579 n_err("%s: %s%s%s\n", V_(ccp), s2, s3,
580 n_shexp_quote_cp(sgcp->sgc_patdat, FAL0));
582 goto jleave;
585 static int
586 a_shexp__globsort(void const *cvpa, void const *cvpb){
587 int rv;
588 struct n_strlist const * const *slpa, * const *slpb;
589 NYD2_ENTER;
591 slpa = cvpa;
592 slpb = cvpb;
593 rv = asccasecmp((*slpa)->sl_dat, (*slpb)->sl_dat);
594 NYD2_LEAVE;
595 return rv;
597 #endif /* HAVE_FNMATCH */
599 static void
600 a_shexp__quote(struct a_shexp_quote_ctx *sqcp, struct a_shexp_quote_lvl *sqlp){
601 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
602 * XXX use the recursive implementation because of stateful encodings.
603 * XXX I.e., if a quoted substring cannot be self-contained - the data after
604 * XXX the quote relies on "the former state", then this doesn't make sense.
605 * XXX Therefore this is not fully programmed out but instead only detects
606 * XXX the "most fancy" quoting necessary, and directly does that.
607 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
608 * XXX Otherwise we rather have to convert to wide first and act on that,
609 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
610 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
611 #ifdef a_SHEXP_QUOTE_RECURSE
612 # define jrecurse jrecurse
613 struct a_shexp_quote_lvl sql;
614 #else
615 # define jrecurse jstep
616 #endif
617 struct n_visual_info_ctx vic;
618 union {struct a_shexp_quote_lvl *head; struct n_string *store;} u;
619 ui32_t flags;
620 size_t il;
621 char const *ib;
622 NYD2_ENTER;
624 ib = sqlp->sql_dat.s;
625 il = sqlp->sql_dat.l;
626 flags = sqlp->sql_flags;
628 /* Iterate over the entire input, classify characters and type of quotes
629 * along the way. Whenever a quote change has to be applied, adjust flags
630 * for the new situation -, setup sql.* and recurse- */
631 while(il > 0){
632 char c;
634 c = *ib;
635 if(cntrlchar(c)){
636 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
637 goto jstep;
638 if(c == '\t' && (flags & (a_SHEXP_QUOTE_T_REVSOL |
639 a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOUBLE)))
640 goto jstep;
641 #ifdef a_SHEXP_QUOTE_RECURSE
642 ++sqcp->sqc_cnt_dollar;
643 #endif
644 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
645 goto jrecurse;
646 }else if(blankspacechar(c) || c == '"' || c == '$'){
647 if(flags & a_SHEXP_QUOTE_T_MASK)
648 goto jstep;
649 #ifdef a_SHEXP_QUOTE_RECURSE
650 ++sqcp->sqc_cnt_single;
651 #endif
652 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
653 goto jrecurse;
654 }else if(c == '\''){
655 if(flags & (a_SHEXP_QUOTE_T_MASK & ~a_SHEXP_QUOTE_T_SINGLE))
656 goto jstep;
657 #ifdef a_SHEXP_QUOTE_RECURSE
658 ++sqcp->sqc_cnt_dollar;
659 #endif
660 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
661 goto jrecurse;
662 }else if(c == '\\'){
663 if(flags & a_SHEXP_QUOTE_T_MASK)
664 goto jstep;
665 #ifdef a_SHEXP_QUOTE_RECURSE
666 ++sqcp->sqc_cnt_single;
667 #endif
668 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
669 goto jrecurse;
670 }else if(!asciichar(c)){
671 /* Need to keep together multibytes */
672 #ifdef a_SHEXP_QUOTE_RECURSE
673 memset(&vic, 0, sizeof vic);
674 vic.vic_indat = ib;
675 vic.vic_inlen = il;
676 n_visual_info(&vic,
677 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
678 #endif
679 /* xxx check whether resulting \u would be ASCII */
680 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP) ||
681 (flags & a_SHEXP_QUOTE_T_DOLLAR)){
682 #ifdef a_SHEXP_QUOTE_RECURSE
683 ib = vic.vic_oudat;
684 il = vic.vic_oulen;
685 continue;
686 #else
687 goto jstep;
688 #endif
690 #ifdef a_SHEXP_QUOTE_RECURSE
691 ++sqcp->sqc_cnt_dollar;
692 #endif
693 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
694 goto jrecurse;
695 }else
696 jstep:
697 ++ib, --il;
699 sqlp->sql_flags = flags;
701 /* Level made the great and completed processing input. Reverse the list of
702 * levels, detect the "most fancy" quote type needed along this way */
703 /* XXX Due to restriction as above very crude */
704 for(flags = 0, il = 0, u.head = NULL; sqlp != NULL;){
705 struct a_shexp_quote_lvl *tmp;
707 tmp = sqlp->sql_link;
708 sqlp->sql_link = u.head;
709 u.head = sqlp;
710 il += sqlp->sql_dat.l;
711 if(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK)
712 il += (sqlp->sql_dat.l >> 1);
713 flags |= sqlp->sql_flags;
714 sqlp = tmp;
716 sqlp = u.head;
718 /* Finally work the substrings in the correct order, adjusting quotes along
719 * the way as necessary. Start off with the "most fancy" quote, so that
720 * the user sees an overall boundary she can orientate herself on.
721 * We do it like that to be able to give the user some "encapsulation
722 * experience", to address what strikes me is a problem of sh(1)ell quoting:
723 * different to, e.g., perl(1), where you see at a glance where a string
724 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
725 * visual appearance of "a string" as such */
726 u.store = n_string_reserve(sqcp->sqc_store, il);
728 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
729 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
730 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
731 }else if(flags & a_SHEXP_QUOTE_T_DOUBLE){
732 u.store = n_string_push_c(u.store, '"');
733 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOUBLE;
734 }else if(flags & a_SHEXP_QUOTE_T_SINGLE){
735 u.store = n_string_push_c(u.store, '\'');
736 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
737 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
738 flags &= ~a_SHEXP_QUOTE_T_MASK;
740 /* Work all the levels */
741 for(; sqlp != NULL; sqlp = sqlp->sql_link){
742 /* As necessary update our mode of quoting */
743 #ifdef a_SHEXP_QUOTE_RECURSE
744 il = 0;
746 switch(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK){
747 case a_SHEXP_QUOTE_T_DOLLAR:
748 if(!(flags & a_SHEXP_QUOTE_T_DOLLAR))
749 il = a_SHEXP_QUOTE_T_DOLLAR;
750 break;
751 case a_SHEXP_QUOTE_T_DOUBLE:
752 if(!(flags & (a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
753 il = a_SHEXP_QUOTE_T_DOLLAR;
754 break;
755 case a_SHEXP_QUOTE_T_SINGLE:
756 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
757 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
758 il = a_SHEXP_QUOTE_T_SINGLE;
759 break;
760 default:
761 case a_SHEXP_QUOTE_T_REVSOL:
762 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
763 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
764 il = a_SHEXP_QUOTE_T_REVSOL;
765 break;
768 if(il != 0){
769 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
770 u.store = n_string_push_c(u.store, '\'');
771 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
772 u.store = n_string_push_c(u.store, '"');
773 flags &= ~a_SHEXP_QUOTE_T_MASK;
775 flags |= (ui32_t)il;
776 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
777 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
778 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
779 u.store = n_string_push_c(u.store, '"');
780 else if(flags & a_SHEXP_QUOTE_T_SINGLE)
781 u.store = n_string_push_c(u.store, '\'');
783 #endif /* a_SHEXP_QUOTE_RECURSE */
785 /* Work the level's substring */
786 ib = sqlp->sql_dat.s;
787 il = sqlp->sql_dat.l;
789 while(il > 0){
790 char c2, c;
792 c = *ib;
794 if(cntrlchar(c)){
795 assert(c == '\t' || (flags & a_SHEXP_QUOTE_T_DOLLAR));
796 assert((flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
797 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)));
798 switch((c2 = c)){
799 case 0x07: c = 'a'; break;
800 case 0x08: c = 'b'; break;
801 case 0x0A: c = 'n'; break;
802 case 0x0B: c = 'v'; break;
803 case 0x0C: c = 'f'; break;
804 case 0x0D: c = 'r'; break;
805 case 0x1B: c = 'E'; break;
806 default: break;
807 case 0x09:
808 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
809 c = 't';
810 break;
812 if(flags & a_SHEXP_QUOTE_T_REVSOL)
813 u.store = n_string_push_c(u.store, '\\');
814 goto jpush;
816 u.store = n_string_push_c(u.store, '\\');
817 if(c == c2){
818 u.store = n_string_push_c(u.store, 'c');
819 c ^= 0x40;
821 goto jpush;
822 }else if(blankspacechar(c) || c == '"' || c == '$'){
823 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
824 goto jpush;
825 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE));
826 u.store = n_string_push_c(u.store, '\\');
827 goto jpush;
828 }else if(c == '\''){
829 if(flags & a_SHEXP_QUOTE_T_DOUBLE)
830 goto jpush;
831 assert(!(flags & a_SHEXP_QUOTE_T_SINGLE));
832 u.store = n_string_push_c(u.store, '\\');
833 goto jpush;
834 }else if(c == '\\'){
835 if(flags & a_SHEXP_QUOTE_T_SINGLE)
836 goto jpush;
837 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE |
838 a_SHEXP_QUOTE_T_DOLLAR));
839 u.store = n_string_push_c(u.store, '\\');
840 goto jpush;
841 }else if(asciichar(c)){
842 /* Shorthand: we can simply push that thing out */
843 jpush:
844 u.store = n_string_push_c(u.store, c);
845 ++ib, --il;
846 }else{
847 /* Not an ASCII character, take care not to split up multibyte
848 * sequences etc. For the sake of compile testing, don't enwrap in
849 * HAVE_ALWAYS_UNICODE_LOCALE || HAVE_NATCH_CHAR */
850 if(options & OPT_UNICODE){
851 ui32_t uc;
852 char const *ib2;
853 size_t il2, il3;
855 ib2 = ib;
856 il3 = il2 = il;
857 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
858 char itoa[32];
859 char const *cp;
861 il2 = PTR2SIZE(&ib2[0] - &ib[0]);
862 if((flags & a_SHEXP_QUOTE_ROUNDTRIP) || uc == 0xFFFDu){
863 /* Use padding to make ambiguities impossible */
864 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
865 (uc > 0xFFFFu ? 'U' : 'u'),
866 (int)(uc > 0xFFFFu ? 8 : 4), uc);
867 cp = itoa;
868 }else{
869 il3 = il2;
870 cp = &ib[0];
872 u.store = n_string_push_buf(u.store, cp, il3);
873 ib += il2, il -= il2;
874 continue;
878 memset(&vic, 0, sizeof vic);
879 vic.vic_indat = ib;
880 vic.vic_inlen = il;
881 n_visual_info(&vic,
882 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
884 /* Work this substring as sensitive as possible */
885 il -= vic.vic_oulen;
886 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP))
887 u.store = n_string_push_buf(u.store, ib, il);
888 #ifdef HAVE_ICONV
889 else if((vic.vic_indat = n_iconv_onetime_cp(n_ICONV_NONE,
890 "utf-8", ok_vlook(ttycharset), savestrbuf(ib, il))) != NULL){
891 ui32_t uc;
892 char const *ib2;
893 size_t il2, il3;
895 il3 = il2 = strlen(ib2 = vic.vic_indat);
896 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
897 char itoa[32];
899 il2 = PTR2SIZE(&ib2[0] - &vic.vic_indat[0]);
900 /* Use padding to make ambiguities impossible */
901 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
902 (uc > 0xFFFFu ? 'U' : 'u'),
903 (int)(uc > 0xFFFFu ? 8 : 4), uc);
904 u.store = n_string_push_buf(u.store, itoa, il3);
905 }else
906 goto Jxseq;
908 #endif
909 else
910 #ifdef HAVE_ICONV
911 Jxseq:
912 #endif
913 while(il-- > 0){
914 u.store = n_string_push_buf(u.store, "\\xFF",
915 sizeof("\\xFF") -1);
916 n_c_to_hex_base16(&u.store->s_dat[u.store->s_len - 2], *ib++);
919 ib = vic.vic_oudat;
920 il = vic.vic_oulen;
925 /* Close an open quote */
926 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
927 u.store = n_string_push_c(u.store, '\'');
928 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
929 u.store = n_string_push_c(u.store, '"');
930 #ifdef a_SHEXP_QUOTE_RECURSE
931 jleave:
932 #endif
933 NYD2_LEAVE;
934 return;
936 #ifdef a_SHEXP_QUOTE_RECURSE
937 jrecurse:
938 sqlp->sql_dat.l -= il;
940 sql.sql_link = sqlp;
941 sql.sql_dat.s = n_UNCONST(ib);
942 sql.sql_dat.l = il;
943 sql.sql_flags = flags;
944 a_shexp__quote(sqcp, &sql);
945 goto jleave;
946 #endif
948 #undef jrecurse
949 #undef a_SHEXP_QUOTE_RECURSE
952 FL char *
953 fexpand(char const *name, enum fexp_mode fexpm)
955 struct str s;
956 char const *cp, *res;
957 bool_t dyn;
958 NYD_ENTER;
960 pstate &= ~PS_EXPAND_MULTIRESULT;
962 /* The order of evaluation is "%" and "#" expand into constants.
963 * "&" can expand into "+". "+" can expand into shell meta characters.
964 * Shell meta characters expand into constants.
965 * This way, we make no recursive expansion */
966 if ((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
967 res = n_UNCONST(name);
969 if(!(fexpm & FEXP_NSPECIAL)){
970 jnext:
971 dyn = FAL0;
972 switch (*res) {
973 case '%':
974 if(res[1] == ':' && res[2] != '\0')
975 res = &res[2];
976 else{
977 bool_t force;
979 force = (res[1] != '\0');
980 res = a_shexp_findmail((force ? &res[1] : ok_vlook(LOGNAME)),
981 force);
982 if(force)
983 goto jislocal;
985 goto jnext;
986 case '#':
987 if (res[1] != '\0')
988 break;
989 if (prevfile[0] == '\0') {
990 n_err(_("No previous file\n"));
991 res = NULL;
992 goto jleave;
994 res = prevfile;
995 goto jislocal;
996 case '&':
997 if (res[1] == '\0')
998 res = ok_vlook(MBOX);
999 break;
1003 /* POSIX: if *folder* unset or null, "+" shall be retained */
1004 if (!(fexpm & FEXP_NFOLDER) && *res == '+' &&
1005 *(cp = folder_query()) != '\0') {
1006 res = str_concat_csvl(&s, cp, &res[1], NULL)->s;
1007 dyn = TRU1;
1009 /* TODO *folder* can't start with %[:], can it!?! */
1010 if (res[0] == '%' && res[1] == ':') {
1011 res += 2;
1012 goto jnext;
1016 /* Do some meta expansions */
1017 if((fexpm & (FEXP_NSHELL | FEXP_NVAR)) != FEXP_NVAR &&
1018 ((fexpm & FEXP_NSHELL) ? (strchr(res, '$') != NULL)
1019 : anyof(res, "{}[]*?$"))){
1020 bool_t doexp;
1022 if(fexpm & FEXP_NOPROTO)
1023 doexp = TRU1;
1024 else switch(which_protocol(res)){
1025 case PROTO_FILE:
1026 case PROTO_MAILDIR:
1027 doexp = TRU1;
1028 break;
1029 default:
1030 doexp = FAL0;
1031 break;
1034 if(doexp){
1035 struct a_shexp_var_stack top;
1037 memset(&top, 0, sizeof top);
1038 top.svs_value = res;
1039 top.svs_bsesc = TRU1;
1040 res = a_shexp_var(&top);
1042 if(res[0] == '~')
1043 res = a_shexp_tilde(res);
1045 if(!(fexpm & FEXP_NSHELL) &&
1046 (res = a_shexp_globname(res, fexpm)) == NULL)
1047 goto jleave;
1048 dyn = TRU1;
1049 }/* else no tilde */
1050 }else if(res[0] == '~'){
1051 res = a_shexp_tilde(res);
1052 dyn = TRU1;
1055 jislocal:
1056 if (fexpm & FEXP_LOCAL)
1057 switch (which_protocol(res)) {
1058 case PROTO_FILE:
1059 case PROTO_MAILDIR:
1060 break;
1061 default:
1062 n_err(_("Not a local file or directory: %s\n"),
1063 n_shexp_quote_cp(name, FAL0));
1064 res = NULL;
1065 break;
1068 jleave:
1069 if(res != NULL && !dyn)
1070 res = savestr(res);
1071 NYD_LEAVE;
1072 return n_UNCONST(res);
1075 FL enum n_shexp_state
1076 n_shexp_parse_token(struct n_string *store, struct str *input, /* TODO WCHAR */
1077 enum n_shexp_parse_flags flags){
1078 char utf[8];
1079 char c2, c, quotec;
1080 enum{
1081 a_NONE = 0,
1082 a_SKIPQ = 1<<0, /* Skip rest of this quote (\c0 ..) */
1083 a_SURPLUS = 1<<1, /* Extended sequence interpretation */
1084 a_NTOKEN = 1<<2 /* "New token": e.g., comments are possible */
1085 } state;
1086 enum n_shexp_state rv;
1087 size_t i, il;
1088 char const *ib_save, *ib;
1089 NYD2_ENTER;
1090 n_UNINIT(c, '\0');
1092 assert((flags & n_SHEXP_PARSE_DRYRUN) || store != NULL);
1093 assert(input != NULL);
1094 assert(input->l == 0 || input->s != NULL);
1095 assert(!(flags & n_SHEXP_PARSE_LOG) || !(flags & n_SHEXP_PARSE_LOG_D_V));
1096 assert(!(flags & n_SHEXP_PARSE_IFS_ADD_COMMA) ||
1097 !(flags & n_SHEXP_PARSE_IFS_IS_COMMA));
1098 assert(!(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED) ||
1099 (flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK));
1101 if((flags & n_SHEXP_PARSE_LOG_D_V) && (options & OPT_D_V))
1102 flags |= n_SHEXP_PARSE_LOG;
1103 if(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)
1104 flags |= n_SHEXP_PARSE_QUOTE_AUTO_CLOSE;
1106 if((flags & n_SHEXP_PARSE_TRUNC) && store != NULL)
1107 store = n_string_trunc(store, 0);
1109 ib = input->s;
1110 if((il = input->l) == UIZ_MAX)
1111 il = strlen(ib);
1113 jrestart_empty:
1114 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1115 for(; il > 0; ++ib, --il)
1116 if(!blankspacechar(*ib))
1117 break;
1119 input->s = n_UNCONST(ib);
1120 input->l = il;
1122 if(il == 0){
1123 rv = n_SHEXP_STATE_STOP;
1124 goto jleave;
1127 if(store != NULL)
1128 store = n_string_reserve(store, n_MIN(il, 32)); /* XXX */
1130 rv = n_SHEXP_STATE_NONE;
1131 switch(flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK){
1132 case n_SHEXP_PARSE_QUOTE_AUTO_SQ:
1133 quotec = '\'';
1134 state = a_NONE;
1135 break;
1136 case n_SHEXP_PARSE_QUOTE_AUTO_DQ:
1137 quotec = '"';
1138 if(0){
1139 case n_SHEXP_PARSE_QUOTE_AUTO_DSQ:
1140 quotec = '\'';
1142 state = a_SURPLUS;
1143 break;
1144 default:
1145 quotec = '\0';
1146 state = a_NTOKEN;
1147 break;
1150 while(il > 0){
1151 --il, c = *ib++;
1153 /* If no quote-mode active.. */
1154 if(quotec == '\0'){
1155 if(c == '"' || c == '\''){
1156 quotec = c;
1157 if(c == '"')
1158 state |= a_SURPLUS;
1159 else
1160 state &= ~a_SURPLUS;
1161 state &= ~a_NTOKEN;
1162 continue;
1163 }else if(c == '$'){
1164 if(il > 0){
1165 state &= ~a_NTOKEN;
1166 if(*ib == '\''){
1167 --il, ++ib;
1168 quotec = '\'';
1169 state |= a_SURPLUS;
1170 continue;
1171 }else
1172 goto J_var_expand;
1174 }else if(c == '\\'){
1175 /* Outside of quotes this just escapes any next character, but a sole
1176 * <backslash> at EOS is left unchanged */
1177 if(il > 0)
1178 --il, c = *ib++;
1179 state &= ~a_NTOKEN;
1180 }else if(c == '#' && (state & a_NTOKEN)){
1181 rv |= n_SHEXP_STATE_STOP;
1182 goto jleave;
1183 }else if(c == ',' && (flags &
1184 (n_SHEXP_PARSE_IFS_ADD_COMMA | n_SHEXP_PARSE_IFS_IS_COMMA)))
1185 break;
1186 else if(blankchar(c)){
1187 if(!(flags & n_SHEXP_PARSE_IFS_IS_COMMA)){
1188 ++il, --ib;
1189 break;
1191 state |= a_NTOKEN;
1192 }else
1193 state &= ~a_NTOKEN;
1194 }else{
1195 /* Quote-mode */
1196 assert(!(state & a_NTOKEN));
1197 if(c == quotec && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)){
1198 state = a_NONE;
1199 quotec = '\0';
1200 /* Users may need to recognize the presence of empty quotes */
1201 rv |= n_SHEXP_STATE_OUTPUT;
1202 continue;
1203 }else if(c == '\\' && (state & a_SURPLUS)){
1204 ib_save = ib - 1;
1205 /* A sole <backslash> at EOS is treated as-is! This is ok since
1206 * the "closing quote" error will occur next, anyway */
1207 if(il == 0)
1208 break;
1209 else if((c2 = *ib) == quotec){
1210 --il, ++ib;
1211 c = quotec;
1212 }else if(quotec == '"'){
1213 /* Double quotes:
1214 * The <backslash> shall retain its special meaning as an
1215 * escape character (see Section 2.2.1) only when followed
1216 * by one of the following characters when considered
1217 * special: $ ` " \ <newline> */
1218 switch(c2){
1219 case '$':
1220 case '`':
1221 /* case '"': already handled via c2 == quotec */
1222 case '\\':
1223 --il, ++ib;
1224 c = c2;
1225 /* FALLTHRU */
1226 default:
1227 break;
1229 }else{
1230 /* Dollar-single-quote */
1231 --il, ++ib;
1232 switch(c2){
1233 case '"':
1234 /* case '\'': already handled via c2 == quotec */
1235 case '\\':
1236 c = c2;
1237 break;
1239 case 'b': c = '\b'; break;
1240 case 'f': c = '\f'; break;
1241 case 'n': c = '\n'; break;
1242 case 'r': c = '\r'; break;
1243 case 't': c = '\t'; break;
1244 case 'v': c = '\v'; break;
1246 case 'E':
1247 case 'e': c = '\033'; break;
1249 /* Control character */
1250 case 'c':
1251 if(il == 0)
1252 goto j_dollar_ungetc;
1253 --il, c2 = *ib++;
1254 if(state & a_SKIPQ)
1255 continue;
1256 c = upperconv(c2) ^ 0x40;
1257 if((ui8_t)c > 0x1F && c != 0x7F){ /* ASCII C0: 0..1F, 7F */
1258 if(flags & n_SHEXP_PARSE_LOG)
1259 n_err(_("Invalid \\c notation: %.*s\n"),
1260 (int)input->l, input->s);
1261 rv |= n_SHEXP_STATE_ERR_CONTROL;
1263 /* As an implementation-defined extension, support \c@
1264 * EQ printf(1) alike \c */
1265 if(c == '\0'){
1266 rv |= n_SHEXP_STATE_STOP;
1267 goto jleave;
1269 break;
1271 /* Octal sequence: 1 to 3 octal bytes */
1272 case '0':
1273 /* As an extension (dependent on where you look, echo(1), or
1274 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1275 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1276 c2 = c;
1277 --il, ++ib;
1279 /* FALLTHRU */
1280 case '1': case '2': case '3':
1281 case '4': case '5': case '6': case '7':
1282 c2 -= '0';
1283 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1284 c2 = (c2 << 3) | (c - '0');
1285 --il, ++ib;
1287 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1288 if((ui8_t)c2 > 0x1F){
1289 if(flags & n_SHEXP_PARSE_LOG)
1290 n_err(_("\\0 argument exceeds a byte: %.*s\n"),
1291 (int)input->l, input->s);
1292 rv |= n_SHEXP_STATE_ERR_NUMBER;
1293 --il, ++ib;
1294 /* Write unchanged */
1295 je_ib_save:
1296 rv |= n_SHEXP_STATE_OUTPUT;
1297 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1298 store = n_string_push_buf(store, ib_save,
1299 PTR2SIZE(ib - ib_save));
1300 continue;
1302 c2 = (c2 << 3) | (c -= '0');
1303 --il, ++ib;
1305 if((c = c2) == '\0')
1306 state |= a_SKIPQ;
1307 if(state & a_SKIPQ)
1308 continue;
1309 break;
1311 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1312 case 'U':
1313 i = 8;
1314 if(0){
1315 /* FALLTHRU */
1316 case 'u':
1317 i = 4;
1319 if(il == 0)
1320 goto j_dollar_ungetc;
1321 if(0){
1322 /* FALLTHRU */
1324 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1325 case 'X':
1326 case 'x':
1327 if(il == 0)
1328 goto j_dollar_ungetc;
1329 i = 2;
1331 /* C99 */{
1332 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
1333 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1335 size_t no, j;
1337 i = n_MIN(il, i);
1338 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
1339 c = *ib;
1340 if(hexchar(c)){
1341 no <<= 4;
1342 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
1343 : ((c) <= 'F' ? 55 : 87)))];
1344 }else if(j == 0){
1345 if(state & a_SKIPQ)
1346 break;
1347 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
1348 if(flags & n_SHEXP_PARSE_LOG)
1349 n_err(_("Invalid \\%c notation: %.*s\n"),
1350 c2, (int)input->l, input->s);
1351 rv |= n_SHEXP_STATE_ERR_NUMBER;
1352 goto je_ib_save;
1353 }else
1354 break;
1357 /* Unicode massage */
1358 if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){
1359 if((c = (char)no) == '\0')
1360 state |= a_SKIPQ;
1361 }else if(no == 0)
1362 state |= a_SKIPQ;
1363 else if(!(state & a_SKIPQ)){
1364 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1365 store = n_string_reserve(store, n_MAX(j, 4));
1367 c2 = FAL0;
1368 if(no > 0x10FFFF){ /* XXX magic; CText */
1369 if(flags & n_SHEXP_PARSE_LOG)
1370 n_err(_("\\U argument exceeds 0x10FFFF: %.*s\n"),
1371 (int)input->l, input->s);
1372 rv |= n_SHEXP_STATE_ERR_NUMBER;
1373 /* But normalize the output anyway */
1374 goto Je_uni_norm;
1377 j = n_utf32_to_utf8(no, utf);
1379 if(options & OPT_UNICODE){
1380 rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE;
1381 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1382 store = n_string_push_buf(store, utf, j);
1383 continue;
1385 #ifdef HAVE_ICONV
1386 else{
1387 char *icp;
1389 icp = n_iconv_onetime_cp(n_ICONV_NONE,
1390 NULL, NULL, utf);
1391 if(icp != NULL){
1392 rv |= n_SHEXP_STATE_OUTPUT;
1393 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1394 store = n_string_push_cp(store, icp);
1395 continue;
1398 #endif
1399 if(!(flags & n_SHEXP_PARSE_DRYRUN)) Je_uni_norm:{
1400 char itoa[32];
1402 rv |= n_SHEXP_STATE_OUTPUT |
1403 n_SHEXP_STATE_ERR_UNICODE;
1404 i = snprintf(itoa, sizeof itoa, "\\%c%0*X",
1405 (no > 0xFFFFu ? 'U' : 'u'),
1406 (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no);
1407 store = n_string_push_buf(store, itoa, i);
1409 continue;
1411 if(state & a_SKIPQ)
1412 continue;
1414 break;
1416 /* Extension: \$ can be used to expand a variable.
1417 * Bug|ad effect: if conversion fails, not written "as-is" */
1418 case '$':
1419 if(il == 0)
1420 goto j_dollar_ungetc;
1421 goto J_var_expand;
1423 default:
1424 j_dollar_ungetc:
1425 /* Follow bash behaviour, print sequence unchanged */
1426 ++il, --ib;
1427 break;
1430 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
1431 bool_t brace;
1433 if(!(brace = (*ib == '{')) || il > 1){
1434 char const *cp, *vp;
1436 ib_save = ib - 1;
1437 il -= brace;
1438 vp = (ib += brace);
1440 for(i = 0; il > 0 && (c = *ib, a_SHEXP_ISVARC(c)); ++i)
1441 --il, ++ib;
1443 if(state & a_SKIPQ){
1444 if(brace && il > 0 && *ib == '}')
1445 --il, ++ib;
1446 continue;
1449 if(i == 0){
1450 if(brace){
1451 if(flags & n_SHEXP_PARSE_LOG)
1452 n_err(_("Bad substitution (${}): %.*s\n Near %.*s\n"),
1453 (int)input->l, input->s, (int)il, ib);
1454 rv |= n_SHEXP_STATE_ERR_BADSUB;
1455 goto je_ib_save;
1457 c = '$';
1458 }else{
1459 if(brace){
1460 if(il == 0 || *ib != '}'){
1461 if(flags & n_SHEXP_PARSE_LOG)
1462 n_err(_("Closing brace missing for ${VAR}: %.*s\n"
1463 " Near: %.*s\n"),
1464 (int)input->l, input->s, (int)il, ib);
1465 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN |
1466 n_SHEXP_STATE_ERR_BRACE;
1467 goto je_ib_save;
1469 --il, ++ib;
1472 if(flags & n_SHEXP_PARSE_DRYRUN)
1473 continue;
1475 /* Check getenv(3) shall no internal variable exist! */
1476 vp = savestrbuf(vp, i);
1477 if((cp = vok_vlook(vp)) != NULL || (cp = getenv(vp)) != NULL){
1478 rv |= n_SHEXP_STATE_OUTPUT;
1479 store = n_string_push_cp(store, cp);
1480 for(; (c = *cp) != '\0'; ++cp)
1481 if(cntrlchar(c)){
1482 rv |= n_SHEXP_STATE_CONTROL;
1483 break;
1486 continue;
1489 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
1490 continue;
1494 if(!(state & a_SKIPQ)){
1495 rv |= n_SHEXP_STATE_OUTPUT;
1496 if(cntrlchar(c))
1497 rv |= n_SHEXP_STATE_CONTROL;
1498 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1499 store = n_string_push_c(store, c);
1503 if(quotec != '\0' && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_CLOSE)){
1504 if(flags & n_SHEXP_PARSE_LOG)
1505 n_err(_("no closing quote: %.*s\n"), (int)input->l, input->s);
1506 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
1509 jleave:
1510 if((flags & n_SHEXP_PARSE_DRYRUN) && store != NULL){
1511 store = n_string_push_buf(store, input->s, PTR2SIZE(ib - input->s));
1512 rv |= n_SHEXP_STATE_OUTPUT;
1515 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1516 for(; il > 0; ++ib, --il)
1517 if(!blankchar(*ib))
1518 break;
1520 input->l = il;
1521 input->s = n_UNCONST(ib);
1523 if(!(rv & n_SHEXP_STATE_STOP)){
1524 if(il > 0 && !(rv & n_SHEXP_STATE_OUTPUT) &&
1525 (flags & n_SHEXP_PARSE_IGNORE_EMPTY))
1526 goto jrestart_empty;
1527 if(/*!(rv & n_SHEXP_STATE_OUTPUT) &&*/ il == 0)
1528 rv |= n_SHEXP_STATE_STOP;
1530 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_UNICODE));
1531 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_CONTROL));
1532 NYD2_LEAVE;
1533 return rv;
1536 FL enum n_shexp_state
1537 n_shexp_parse_token_buf(char **store, char const *indat, size_t inlen,
1538 enum n_shexp_parse_flags flags){
1539 struct n_string ss;
1540 struct str is;
1541 enum n_shexp_state shs;
1542 NYD2_ENTER;
1544 assert(store != NULL);
1545 assert(inlen == 0 || indat != NULL);
1547 n_string_creat_auto(&ss);
1548 is.s = n_UNCONST(indat);
1549 is.l = inlen;
1551 shs = n_shexp_parse_token(&ss, &is, flags);
1552 if(is.l > 0)
1553 shs &= ~n_SHEXP_STATE_STOP;
1554 else
1555 shs |= n_SHEXP_STATE_STOP;
1556 *store = n_string_cp(&ss);
1557 n_string_drop_ownership(&ss);
1559 n_string_gut(&ss);
1560 NYD2_LEAVE;
1561 return shs;
1564 FL struct n_string *
1565 n_shexp_quote(struct n_string *store, struct str const *input, bool_t rndtrip){
1566 struct a_shexp_quote_lvl sql;
1567 struct a_shexp_quote_ctx sqc;
1568 NYD2_ENTER;
1570 assert(store != NULL);
1571 assert(input != NULL);
1572 assert(input->l == 0 || input->s != NULL);
1574 memset(&sqc, 0, sizeof sqc);
1575 sqc.sqc_store = store;
1576 sqc.sqc_input.s = input->s;
1577 if((sqc.sqc_input.l = input->l) == UIZ_MAX)
1578 sqc.sqc_input.l = strlen(input->s);
1579 sqc.sqc_flags = rndtrip ? a_SHEXP_QUOTE_ROUNDTRIP : a_SHEXP_QUOTE_NONE;
1581 if(sqc.sqc_input.l == 0)
1582 store = n_string_push_buf(store, "''", sizeof("''") -1);
1583 else{
1584 memset(&sql, 0, sizeof sql);
1585 sql.sql_dat = sqc.sqc_input;
1586 sql.sql_flags = sqc.sqc_flags;
1587 a_shexp__quote(&sqc, &sql);
1589 NYD2_LEAVE;
1590 return store;
1593 FL char *
1594 n_shexp_quote_cp(char const *cp, bool_t rndtrip){
1595 struct n_string store;
1596 struct str input;
1597 char *rv;
1598 NYD2_ENTER;
1600 assert(cp != NULL);
1602 input.s = n_UNCONST(cp);
1603 input.l = UIZ_MAX;
1604 rv = n_string_cp(n_shexp_quote(n_string_creat_auto(&store), &input,
1605 rndtrip));
1606 n_string_gut(n_string_drop_ownership(&store));
1607 NYD2_LEAVE;
1608 return rv;
1611 FL bool_t
1612 n_shexp_is_valid_varname(char const *name){
1613 char c;
1614 bool_t rv;
1615 NYD2_ENTER;
1617 for(rv = TRU1; (c = *name++) != '\0';)
1618 if(!a_SHEXP_ISVARC(c)){
1619 rv = FAL0;
1620 break;
1622 NYD2_LEAVE;
1623 return rv;
1626 /* s-it-mode */