Tweak previous, it added a bad memory access
[s-mailx.git] / shexp.c
blob01a2771efe26ac0fc82191525736b556481d6917
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
5 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
6 * Copyright (c) 2012 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
7 */
8 /*
9 * Copyright (c) 1980, 1993
10 * The Regents of the University of California. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 #undef n_FILE
37 #define n_FILE shexp
39 #ifndef HAVE_AMALGAMATION
40 # include "nail.h"
41 #endif
43 #include <sys/wait.h>
45 #include <pwd.h>
47 #ifdef HAVE_FNMATCH
48 # include <dirent.h>
49 # include <fnmatch.h>
50 #endif
52 /* POSIX says
53 * Environment variable names used by the utilities in the Shell and
54 * Utilities volume of POSIX.1-2008 consist solely of uppercase
55 * letters, digits, and the <underscore> ('_') from the characters
56 * defined in Portable Character Set and do not begin with a digit.
57 * Other characters may be permitted by an implementation;
58 * applications shall tolerate the presence of such names.
59 * We do support the hyphen-minus "-" (except in last position for ${x[:]-y}).
60 * We support some special parameter names for one-letter(++) variable names;
61 * these have counterparts in the code that manages internal variables,
62 * and some more special treatment below! */
63 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
64 #define a_SHEXP_ISVARC_BAD1ST(C) (digitchar(C)) /* (Actually assumed below!) */
65 #define a_SHEXP_ISVARC_BADNST(C) ((C) == '-')
67 enum a_shexp_quote_flags{
68 a_SHEXP_QUOTE_NONE,
69 a_SHEXP_QUOTE_ROUNDTRIP = 1u<<0, /* Result won't be consumed immediately */
71 a_SHEXP_QUOTE_T_REVSOL = 1u<<8, /* Type: by reverse solidus */
72 a_SHEXP_QUOTE_T_SINGLE = 1u<<9, /* Type: single-quotes */
73 a_SHEXP_QUOTE_T_DOUBLE = 1u<<10, /* Type: double-quotes */
74 a_SHEXP_QUOTE_T_DOLLAR = 1u<<11, /* Type: dollar-single-quotes */
75 a_SHEXP_QUOTE_T_MASK = a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
76 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR,
78 a_SHEXP_QUOTE__FREESHIFT = 16u
81 #ifdef HAVE_FNMATCH
82 struct a_shexp_glob_ctx{
83 char const *sgc_patdat; /* Remaining pattern (at and below level) */
84 size_t sgc_patlen;
85 struct n_string *sgc_outer; /* Resolved path up to this level */
86 ui32_t sgc_flags;
87 ui8_t sgc__dummy[4];
89 #endif
91 struct a_shexp_quote_ctx{
92 struct n_string *sqc_store; /* Result storage */
93 struct str sqc_input; /* Input data, topmost level */
94 ui32_t sqc_cnt_revso;
95 ui32_t sqc_cnt_single;
96 ui32_t sqc_cnt_double;
97 ui32_t sqc_cnt_dollar;
98 enum a_shexp_quote_flags sqc_flags;
99 ui8_t sqc__dummy[4];
102 struct a_shexp_quote_lvl{
103 struct a_shexp_quote_lvl *sql_link; /* Outer level */
104 struct str sql_dat; /* This level (has to) handle(d) */
105 enum a_shexp_quote_flags sql_flags;
106 ui8_t sql__dummy[4];
109 /* Locate the user's mailbox file (where new, unread mail is queued) */
110 static char *a_shexp_findmail(char const *user, bool_t force);
112 /* Expand ^~/? and ^~USER/? constructs.
113 * Returns the completely resolved (maybe empty or identical to input)
114 * salloc()ed string */
115 static char *a_shexp_tilde(char const *s);
117 /* Perform fnmatch(3). May return NULL on error */
118 static char *a_shexp_globname(char const *name, enum fexp_mode fexpm);
119 #ifdef HAVE_FNMATCH
120 static bool_t a_shexp__glob(struct a_shexp_glob_ctx *sgcp,
121 struct n_strlist **slpp);
122 static int a_shexp__globsort(void const *cvpa, void const *cvpb);
123 #endif
125 /* Parse an input string and create a sh(1)ell-quoted result */
126 static void a_shexp__quote(struct a_shexp_quote_ctx *sqcp,
127 struct a_shexp_quote_lvl *sqlp);
129 static char *
130 a_shexp_findmail(char const *user, bool_t force){
131 char *rv;
132 char const *cp;
133 NYD2_ENTER;
135 if(!force){
136 if((cp = ok_vlook(inbox)) != NULL && *cp != '\0'){
137 /* Folder extra introduced to avoid % recursion loops */
138 if((rv = fexpand(cp, FEXP_NSPECIAL | FEXP_NFOLDER | FEXP_NSHELL)
139 ) != NULL)
140 goto jleave;
141 n_err(_("*inbox* expansion failed, using $MAIL / builtin: %s\n"), cp);
144 if((cp = ok_vlook(MAIL)) != NULL){
145 rv = savestr(cp);
146 goto jleave;
150 /* C99 */{
151 size_t ul, i;
153 ul = strlen(user) +1;
154 i = sizeof(VAL_MAIL) -1 + 1 + ul;
156 rv = salloc(i);
157 memcpy(rv, VAL_MAIL, (i = sizeof(VAL_MAIL) -1));
158 rv[i] = '/';
159 memcpy(&rv[++i], user, ul);
161 jleave:
162 NYD2_LEAVE;
163 return rv;
166 static char *
167 a_shexp_tilde(char const *s){
168 struct passwd *pwp;
169 size_t nl, rl;
170 char const *rp, *np;
171 char *rv;
172 NYD2_ENTER;
174 if(*(rp = &s[1]) == '/' || *rp == '\0'){
175 np = ok_vlook(HOME);
176 rl = strlen(rp);
177 }else{
178 if((rp = strchr(np = rp, '/')) != NULL){
179 nl = PTR2SIZE(rp - np);
180 np = savestrbuf(np, nl);
181 rl = strlen(rp);
182 }else
183 rl = 0;
185 if((pwp = getpwnam(np)) == NULL){
186 rv = savestr(s);
187 goto jleave;
189 np = pwp->pw_dir;
192 nl = strlen(np);
193 rv = salloc(nl + 1 + rl +1);
194 memcpy(rv, np, nl);
195 if(rl > 0){
196 memcpy(rv + nl, rp, rl);
197 nl += rl;
199 rv[nl] = '\0';
200 jleave:
201 NYD2_LEAVE;
202 return rv;
205 static char *
206 a_shexp_globname(char const *name, enum fexp_mode fexpm){
207 #ifdef HAVE_FNMATCH
208 struct a_shexp_glob_ctx sgc;
209 struct n_string outer;
210 struct n_strlist *slp;
211 char *cp;
212 NYD_ENTER;
214 memset(&sgc, 0, sizeof sgc);
215 sgc.sgc_patlen = strlen(name);
216 sgc.sgc_patdat = savestrbuf(name, sgc.sgc_patlen);
217 sgc.sgc_outer = n_string_reserve(n_string_creat(&outer), sgc.sgc_patlen);
218 sgc.sgc_flags = ((fexpm & FEXP_SILENT) != 0);
219 slp = NULL;
220 if(a_shexp__glob(&sgc, &slp))
221 cp = (char*)1;
222 else
223 cp = NULL;
224 n_string_gut(&outer);
226 if(cp == NULL)
227 goto jleave;
229 if(slp == NULL){
230 cp = n_UNCONST(N_("File pattern does not match"));
231 goto jerr;
232 }else if(slp->sl_next == NULL)
233 cp = savestrbuf(slp->sl_dat, slp->sl_len);
234 else if(fexpm & FEXP_MULTIOK){
235 struct n_strlist **sorta, *xslp;
236 size_t i, no, l;
238 no = l = 0;
239 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next){
240 ++no;
241 l += xslp->sl_len + 1;
244 sorta = smalloc(sizeof(*sorta) * no);
245 no = 0;
246 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next)
247 sorta[no++] = xslp;
248 qsort(sorta, no, sizeof *sorta, &a_shexp__globsort);
250 cp = salloc(++l);
251 l = 0;
252 for(i = 0; i < no; ++i){
253 xslp = sorta[i];
254 memcpy(&cp[l], xslp->sl_dat, xslp->sl_len);
255 l += xslp->sl_len;
256 cp[l++] = '\0';
258 cp[l] = '\0';
260 free(sorta);
261 n_pstate |= n_PS_EXPAND_MULTIRESULT;
262 }else{
263 cp = n_UNCONST(N_("File pattern matches multiple results"));
264 goto jerr;
267 jleave:
268 while(slp != NULL){
269 struct n_strlist *tmp = slp;
271 slp = slp->sl_next;
272 free(tmp);
274 NYD_LEAVE;
275 return cp;
277 jerr:
278 if(!(fexpm & FEXP_SILENT)){
279 name = n_shexp_quote_cp(name, FAL0);
280 n_err("%s: %s\n", V_(cp), name);
282 cp = NULL;
283 goto jleave;
285 #else /* HAVE_FNMATCH */
286 n_UNUSED(fexpm);
288 if(!(fexpm & FEXP_SILENT))
289 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
290 return savestr(name);
291 #endif
294 #ifdef HAVE_FNMATCH
295 static bool_t
296 a_shexp__glob(struct a_shexp_glob_ctx *sgcp, struct n_strlist **slpp){
297 enum{a_SILENT = 1<<0, a_DEEP=1<<1, a_SALLOC=1<<2};
299 struct a_shexp_glob_ctx nsgc;
300 struct dirent *dep;
301 DIR *dp;
302 size_t old_outerlen;
303 char const *ccp, *myp;
304 NYD2_ENTER;
306 /* We need some special treatment for the outermost level */
307 if(!(sgcp->sgc_flags & a_DEEP)){
308 if(sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/'){
309 myp = n_string_cp(n_string_push_c(sgcp->sgc_outer, '/'));
310 ++sgcp->sgc_patdat;
311 --sgcp->sgc_patlen;
312 }else
313 myp = "./";
314 }else
315 myp = n_string_cp(sgcp->sgc_outer);
316 old_outerlen = sgcp->sgc_outer->s_len;
318 /* Separate current directory/pattern level from any possible remaining
319 * pattern in order to be able to use it for fnmatch(3) */
320 if((ccp = memchr(sgcp->sgc_patdat, '/', sgcp->sgc_patlen)) == NULL)
321 nsgc.sgc_patlen = 0;
322 else{
323 nsgc = *sgcp;
324 nsgc.sgc_flags |= a_DEEP;
325 sgcp->sgc_patlen = PTR2SIZE((nsgc.sgc_patdat = &ccp[1]) -
326 &sgcp->sgc_patdat[0]);
327 nsgc.sgc_patlen -= sgcp->sgc_patlen;
328 /* Trim solidus */
329 if(sgcp->sgc_patlen > 0){
330 assert(sgcp->sgc_patdat[sgcp->sgc_patlen -1] == '/');
331 ((char*)n_UNCONST(sgcp->sgc_patdat))[--sgcp->sgc_patlen] = '\0';
335 /* Our current directory level */
336 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
337 * xxx be the (sole) result depending on pattern surroundings, etc. */
338 if((dp = opendir(myp)) == NULL){
339 int err;
341 switch((err = n_err_no)){
342 case n_ERR_NOTDIR:
343 ccp = N_("cannot access paths under non-directory");
344 goto jerr;
345 case n_ERR_NOENT:
346 ccp = N_("path component of (sub)pattern non-existent");
347 goto jerr;
348 case n_ERR_ACCES:
349 ccp = N_("file permission for file (sub)pattern denied");
350 goto jerr;
351 default:
352 ccp = N_("cannot handle file (sub)pattern");
353 goto jerr;
357 /* As necessary, quote bytes in the current pattern */
358 /* C99 */{
359 char *ncp;
360 size_t i;
361 bool_t need;
363 for(need = FAL0, i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
364 switch(*myp){
365 case '\'': case '"': case '\\': case '$':
366 case ' ': case '\t':
367 need = TRU1;
368 ++i;
369 /* FALLTHRU */
370 default:
371 ++i;
372 break;
375 if(need){
376 ncp = salloc(i +1);
377 for(i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
378 switch(*myp){
379 case '\'': case '"': case '\\': case '$':
380 case ' ': case '\t':
381 ncp[i++] = '\\';
382 /* FALLTHRU */
383 default:
384 ncp[i++] = *myp;
385 break;
387 ncp[i] = '\0';
388 myp = ncp;
389 }else
390 myp = sgcp->sgc_patdat;
393 while((dep = readdir(dp)) != NULL){
394 switch(fnmatch(myp, dep->d_name, FNM_PATHNAME | FNM_PERIOD)){
395 case 0:{
396 /* A match expresses the desire to recurse if there is more pattern */
397 if(nsgc.sgc_patlen > 0){
398 bool_t isdir;
400 n_string_push_cp((sgcp->sgc_outer->s_len > 1
401 ? n_string_push_c(sgcp->sgc_outer, '/') : sgcp->sgc_outer),
402 dep->d_name);
404 isdir = FAL0;
405 #ifdef HAVE_DIRENT_TYPE
406 if(dep->d_type == DT_DIR)
407 isdir = TRU1;
408 else if(dep->d_type == DT_LNK || dep->d_type == DT_UNKNOWN)
409 #endif
411 struct stat sb;
413 if(stat(n_string_cp(sgcp->sgc_outer), &sb)){
414 ccp = N_("I/O error when querying file status");
415 goto jerr;
416 }else if(S_ISDIR(sb.st_mode))
417 isdir = TRU1;
420 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
421 * TODO Instead save away a list of such n_string's for later */
422 if(isdir && !a_shexp__glob(&nsgc, slpp)){
423 ccp = (char*)1;
424 goto jleave;
427 n_string_trunc(sgcp->sgc_outer, old_outerlen);
428 }else{
429 struct n_strlist *slp;
430 size_t i, j;
432 i = strlen(dep->d_name);
433 j = (old_outerlen > 0) ? old_outerlen + 1 + i : i;
434 slp = n_STRLIST_MALLOC(j);
435 *slpp = slp;
436 slpp = &slp->sl_next;
437 slp->sl_next = NULL;
438 if((j = old_outerlen) > 0){
439 memcpy(&slp->sl_dat[0], sgcp->sgc_outer->s_dat, j);
440 if(slp->sl_dat[j -1] != '/')
441 slp->sl_dat[j++] = '/';
443 memcpy(&slp->sl_dat[j], dep->d_name, i);
444 slp->sl_dat[j += i] = '\0';
445 slp->sl_len = j;
447 } break;
448 case FNM_NOMATCH:
449 break;
450 default:
451 ccp = N_("fnmatch(3) cannot handle file (sub)pattern");
452 goto jerr;
456 ccp = NULL;
457 jleave:
458 if(dp != NULL)
459 closedir(dp);
460 NYD2_LEAVE;
461 return (ccp == NULL);
463 jerr:
464 if(!(sgcp->sgc_flags & a_SILENT)){
465 char const *s2, *s3;
467 if(sgcp->sgc_outer->s_len > 0){
468 s2 = n_shexp_quote_cp(n_string_cp(sgcp->sgc_outer), FAL0);
469 s3 = "/";
470 }else
471 s2 = s3 = n_empty;
473 n_err("%s: %s%s%s\n", V_(ccp), s2, s3,
474 n_shexp_quote_cp(sgcp->sgc_patdat, FAL0));
476 goto jleave;
479 static int
480 a_shexp__globsort(void const *cvpa, void const *cvpb){
481 int rv;
482 struct n_strlist const * const *slpa, * const *slpb;
483 NYD2_ENTER;
485 slpa = cvpa;
486 slpb = cvpb;
487 rv = asccasecmp((*slpa)->sl_dat, (*slpb)->sl_dat);
488 NYD2_LEAVE;
489 return rv;
491 #endif /* HAVE_FNMATCH */
493 static void
494 a_shexp__quote(struct a_shexp_quote_ctx *sqcp, struct a_shexp_quote_lvl *sqlp){
495 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
496 * XXX use the recursive implementation because of stateful encodings.
497 * XXX I.e., if a quoted substring cannot be self-contained - the data after
498 * XXX the quote relies on "the former state", then this doesn't make sense.
499 * XXX Therefore this is not fully programmed out but instead only detects
500 * XXX the "most fancy" quoting necessary, and directly does that.
501 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
502 * XXX Otherwise we rather have to convert to wide first and act on that,
503 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
504 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
505 #ifdef a_SHEXP_QUOTE_RECURSE
506 # define jrecurse jrecurse
507 struct a_shexp_quote_lvl sql;
508 #else
509 # define jrecurse jstep
510 #endif
511 struct n_visual_info_ctx vic;
512 union {struct a_shexp_quote_lvl *head; struct n_string *store;} u;
513 ui32_t flags;
514 size_t il;
515 char const *ib, *ib_base;
516 NYD2_ENTER;
518 ib_base = ib = sqlp->sql_dat.s;
519 il = sqlp->sql_dat.l;
520 flags = sqlp->sql_flags;
522 /* Iterate over the entire input, classify characters and type of quotes
523 * along the way. Whenever a quote change has to be applied, adjust flags
524 * for the new situation -, setup sql.* and recurse- */
525 while(il > 0){
526 char c;
528 c = *ib;
529 if(cntrlchar(c)){
530 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
531 goto jstep;
532 if(c == '\t' && (flags & (a_SHEXP_QUOTE_T_REVSOL |
533 a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOUBLE)))
534 goto jstep;
535 #ifdef a_SHEXP_QUOTE_RECURSE
536 ++sqcp->sqc_cnt_dollar;
537 #endif
538 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
539 goto jrecurse;
540 }else if(blankspacechar(c) || c == '|' || c == '&' || c == ';' ||
541 /* Whereas we don't support those, quote them for the sh(1)ell */
542 c == '(' || c == ')' || c == '<' || c == '>' ||
543 c == '"' || c == '$'){
544 if(flags & a_SHEXP_QUOTE_T_MASK)
545 goto jstep;
546 #ifdef a_SHEXP_QUOTE_RECURSE
547 ++sqcp->sqc_cnt_single;
548 #endif
549 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
550 goto jrecurse;
551 }else if(c == '\''){
552 if(flags & (a_SHEXP_QUOTE_T_MASK & ~a_SHEXP_QUOTE_T_SINGLE))
553 goto jstep;
554 #ifdef a_SHEXP_QUOTE_RECURSE
555 ++sqcp->sqc_cnt_dollar;
556 #endif
557 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
558 goto jrecurse;
559 }else if(c == '\\' || (c == '#' && ib == ib_base)){
560 if(flags & a_SHEXP_QUOTE_T_MASK)
561 goto jstep;
562 #ifdef a_SHEXP_QUOTE_RECURSE
563 ++sqcp->sqc_cnt_single;
564 #endif
565 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
566 goto jrecurse;
567 }else if(!asciichar(c)){
568 /* Need to keep together multibytes */
569 #ifdef a_SHEXP_QUOTE_RECURSE
570 memset(&vic, 0, sizeof vic);
571 vic.vic_indat = ib;
572 vic.vic_inlen = il;
573 n_visual_info(&vic,
574 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
575 #endif
576 /* xxx check whether resulting \u would be ASCII */
577 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP) ||
578 (flags & a_SHEXP_QUOTE_T_DOLLAR)){
579 #ifdef a_SHEXP_QUOTE_RECURSE
580 ib = vic.vic_oudat;
581 il = vic.vic_oulen;
582 continue;
583 #else
584 goto jstep;
585 #endif
587 #ifdef a_SHEXP_QUOTE_RECURSE
588 ++sqcp->sqc_cnt_dollar;
589 #endif
590 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
591 goto jrecurse;
592 }else
593 jstep:
594 ++ib, --il;
596 sqlp->sql_flags = flags;
598 /* Level made the great and completed processing input. Reverse the list of
599 * levels, detect the "most fancy" quote type needed along this way */
600 /* XXX Due to restriction as above very crude */
601 for(flags = 0, il = 0, u.head = NULL; sqlp != NULL;){
602 struct a_shexp_quote_lvl *tmp;
604 tmp = sqlp->sql_link;
605 sqlp->sql_link = u.head;
606 u.head = sqlp;
607 il += sqlp->sql_dat.l;
608 if(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK)
609 il += (sqlp->sql_dat.l >> 1);
610 flags |= sqlp->sql_flags;
611 sqlp = tmp;
613 sqlp = u.head;
615 /* Finally work the substrings in the correct order, adjusting quotes along
616 * the way as necessary. Start off with the "most fancy" quote, so that
617 * the user sees an overall boundary she can orientate herself on.
618 * We do it like that to be able to give the user some "encapsulation
619 * experience", to address what strikes me is a problem of sh(1)ell quoting:
620 * different to, e.g., perl(1), where you see at a glance where a string
621 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
622 * visual appearance of "a string" as such */
623 u.store = n_string_reserve(sqcp->sqc_store, il);
625 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
626 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
627 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
628 }else if(flags & a_SHEXP_QUOTE_T_DOUBLE){
629 u.store = n_string_push_c(u.store, '"');
630 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOUBLE;
631 }else if(flags & a_SHEXP_QUOTE_T_SINGLE){
632 u.store = n_string_push_c(u.store, '\'');
633 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
634 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
635 flags &= ~a_SHEXP_QUOTE_T_MASK;
637 /* Work all the levels */
638 for(; sqlp != NULL; sqlp = sqlp->sql_link){
639 /* As necessary update our mode of quoting */
640 #ifdef a_SHEXP_QUOTE_RECURSE
641 il = 0;
643 switch(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK){
644 case a_SHEXP_QUOTE_T_DOLLAR:
645 if(!(flags & a_SHEXP_QUOTE_T_DOLLAR))
646 il = a_SHEXP_QUOTE_T_DOLLAR;
647 break;
648 case a_SHEXP_QUOTE_T_DOUBLE:
649 if(!(flags & (a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
650 il = a_SHEXP_QUOTE_T_DOLLAR;
651 break;
652 case a_SHEXP_QUOTE_T_SINGLE:
653 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
654 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
655 il = a_SHEXP_QUOTE_T_SINGLE;
656 break;
657 default:
658 case a_SHEXP_QUOTE_T_REVSOL:
659 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
660 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
661 il = a_SHEXP_QUOTE_T_REVSOL;
662 break;
665 if(il != 0){
666 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
667 u.store = n_string_push_c(u.store, '\'');
668 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
669 u.store = n_string_push_c(u.store, '"');
670 flags &= ~a_SHEXP_QUOTE_T_MASK;
672 flags |= (ui32_t)il;
673 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
674 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
675 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
676 u.store = n_string_push_c(u.store, '"');
677 else if(flags & a_SHEXP_QUOTE_T_SINGLE)
678 u.store = n_string_push_c(u.store, '\'');
680 #endif /* a_SHEXP_QUOTE_RECURSE */
682 /* Work the level's substring */
683 ib = sqlp->sql_dat.s;
684 il = sqlp->sql_dat.l;
686 while(il > 0){
687 char c2, c;
689 c = *ib;
691 if(cntrlchar(c)){
692 assert(c == '\t' || (flags & a_SHEXP_QUOTE_T_DOLLAR));
693 assert((flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
694 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)));
695 switch((c2 = c)){
696 case 0x07: c = 'a'; break;
697 case 0x08: c = 'b'; break;
698 case 0x0A: c = 'n'; break;
699 case 0x0B: c = 'v'; break;
700 case 0x0C: c = 'f'; break;
701 case 0x0D: c = 'r'; break;
702 case 0x1B: c = 'E'; break;
703 default: break;
704 case 0x09:
705 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
706 c = 't';
707 break;
709 if(flags & a_SHEXP_QUOTE_T_REVSOL)
710 u.store = n_string_push_c(u.store, '\\');
711 goto jpush;
713 u.store = n_string_push_c(u.store, '\\');
714 if(c == c2){
715 u.store = n_string_push_c(u.store, 'c');
716 c ^= 0x40;
718 goto jpush;
719 }else if(blankspacechar(c) || c == '|' || c == '&' || c == ';' ||
720 /* Whereas we don't support those, quote them for the sh(1)ell */
721 c == '(' || c == ')' || c == '<' || c == '>' ||
722 c == '"' || c == '$'){
723 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
724 goto jpush;
725 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE));
726 u.store = n_string_push_c(u.store, '\\');
727 goto jpush;
728 }else if(c == '\''){
729 if(flags & a_SHEXP_QUOTE_T_DOUBLE)
730 goto jpush;
731 assert(!(flags & a_SHEXP_QUOTE_T_SINGLE));
732 u.store = n_string_push_c(u.store, '\\');
733 goto jpush;
734 }else if(c == '\\' || (c == '#' && ib == ib_base)){
735 if(flags & a_SHEXP_QUOTE_T_SINGLE)
736 goto jpush;
737 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE |
738 a_SHEXP_QUOTE_T_DOLLAR));
739 u.store = n_string_push_c(u.store, '\\');
740 goto jpush;
741 }else if(asciichar(c)){
742 /* Shorthand: we can simply push that thing out */
743 jpush:
744 u.store = n_string_push_c(u.store, c);
745 ++ib, --il;
746 }else{
747 /* Not an ASCII character, take care not to split up multibyte
748 * sequences etc. For the sake of compile testing, don't enwrap in
749 * HAVE_ALWAYS_UNICODE_LOCALE || HAVE_NATCH_CHAR */
750 if(n_psonce & n_PSO_UNICODE){
751 ui32_t uc;
752 char const *ib2;
753 size_t il2, il3;
755 ib2 = ib;
756 il3 = il2 = il;
757 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
758 char itoa[32];
759 char const *cp;
761 il2 = PTR2SIZE(&ib2[0] - &ib[0]);
762 if((flags & a_SHEXP_QUOTE_ROUNDTRIP) || uc == 0xFFFDu){
763 /* Use padding to make ambiguities impossible */
764 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
765 (uc > 0xFFFFu ? 'U' : 'u'),
766 (int)(uc > 0xFFFFu ? 8 : 4), uc);
767 cp = itoa;
768 }else{
769 il3 = il2;
770 cp = &ib[0];
772 u.store = n_string_push_buf(u.store, cp, il3);
773 ib += il2, il -= il2;
774 continue;
778 memset(&vic, 0, sizeof vic);
779 vic.vic_indat = ib;
780 vic.vic_inlen = il;
781 n_visual_info(&vic,
782 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
784 /* Work this substring as sensitive as possible */
785 il -= vic.vic_oulen;
786 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP))
787 u.store = n_string_push_buf(u.store, ib, il);
788 #ifdef HAVE_ICONV
789 else if((vic.vic_indat = n_iconv_onetime_cp(n_ICONV_NONE,
790 "utf-8", ok_vlook(ttycharset), savestrbuf(ib, il))) != NULL){
791 ui32_t uc;
792 char const *ib2;
793 size_t il2, il3;
795 il3 = il2 = strlen(ib2 = vic.vic_indat);
796 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
797 char itoa[32];
799 il2 = PTR2SIZE(&ib2[0] - &vic.vic_indat[0]);
800 /* Use padding to make ambiguities impossible */
801 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
802 (uc > 0xFFFFu ? 'U' : 'u'),
803 (int)(uc > 0xFFFFu ? 8 : 4), uc);
804 u.store = n_string_push_buf(u.store, itoa, il3);
805 }else
806 goto Jxseq;
808 #endif
809 else
810 #ifdef HAVE_ICONV
811 Jxseq:
812 #endif
813 while(il-- > 0){
814 u.store = n_string_push_buf(u.store, "\\xFF",
815 sizeof("\\xFF") -1);
816 n_c_to_hex_base16(&u.store->s_dat[u.store->s_len - 2], *ib++);
819 ib = vic.vic_oudat;
820 il = vic.vic_oulen;
825 /* Close an open quote */
826 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
827 u.store = n_string_push_c(u.store, '\'');
828 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
829 u.store = n_string_push_c(u.store, '"');
830 #ifdef a_SHEXP_QUOTE_RECURSE
831 jleave:
832 #endif
833 NYD2_LEAVE;
834 return;
836 #ifdef a_SHEXP_QUOTE_RECURSE
837 jrecurse:
838 sqlp->sql_dat.l -= il;
840 sql.sql_link = sqlp;
841 sql.sql_dat.s = n_UNCONST(ib);
842 sql.sql_dat.l = il;
843 sql.sql_flags = flags;
844 a_shexp__quote(sqcp, &sql);
845 goto jleave;
846 #endif
848 #undef jrecurse
849 #undef a_SHEXP_QUOTE_RECURSE
852 FL char *
853 fexpand(char const *name, enum fexp_mode fexpm)
855 struct str s;
856 char const *cp, *res;
857 bool_t dyn;
858 NYD_ENTER;
860 n_pstate &= ~n_PS_EXPAND_MULTIRESULT;
862 /* The order of evaluation is "%" and "#" expand into constants.
863 * "&" can expand into "+". "+" can expand into shell meta characters.
864 * Shell meta characters expand into constants.
865 * This way, we make no recursive expansion */
866 if ((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
867 res = n_UNCONST(name);
869 if(!(fexpm & FEXP_NSPECIAL)){
870 jnext:
871 dyn = FAL0;
872 switch (*res) {
873 case '%':
874 if(res[1] == ':' && res[2] != '\0')
875 res = &res[2];
876 else{
877 bool_t force;
879 force = (res[1] != '\0');
880 res = a_shexp_findmail((force ? &res[1] : ok_vlook(LOGNAME)),
881 force);
882 if(force)
883 goto jislocal;
885 goto jnext;
886 case '#':
887 if (res[1] != '\0')
888 break;
889 if (prevfile[0] == '\0') {
890 n_err(_("No previous file\n"));
891 res = NULL;
892 goto jleave;
894 res = prevfile;
895 goto jislocal;
896 case '&':
897 if (res[1] == '\0')
898 res = ok_vlook(MBOX);
899 break;
903 /* POSIX: if *folder* unset or null, "+" shall be retained */
904 if (!(fexpm & FEXP_NFOLDER) && *res == '+' &&
905 *(cp = folder_query()) != '\0') {
906 res = str_concat_csvl(&s, cp, &res[1], NULL)->s;
907 dyn = TRU1;
909 /* TODO *folder* can't start with %[:], can it!?! */
910 if (res[0] == '%' && res[1] == ':') {
911 res += 2;
912 goto jnext;
916 /* Do some meta expansions */
917 if((fexpm & (FEXP_NSHELL | FEXP_NVAR)) != FEXP_NVAR &&
918 ((fexpm & FEXP_NSHELL) ? (strchr(res, '$') != NULL)
919 : anyof(res, "{}[]*?$"))){
920 bool_t doexp;
922 if(fexpm & FEXP_NOPROTO)
923 doexp = TRU1;
924 else switch(which_protocol(res)){
925 case PROTO_FILE:
926 case PROTO_MAILDIR:
927 doexp = TRU1;
928 break;
929 default:
930 doexp = FAL0;
931 break;
934 if(doexp){
935 struct str shin;
936 struct n_string shou, *shoup;
938 shin.s = n_UNCONST(res);
939 shin.l = UIZ_MAX;
940 shoup = n_string_creat_auto(&shou);
941 for(;;){
942 enum n_shexp_state shs;
944 /* TODO shexp: take care to not include backtick eval once avail! */
945 shs = n_shexp_parse_token((n_SHEXP_PARSE_LOG_D_V |
946 n_SHEXP_PARSE_QUOTE_AUTO_FIXED | n_SHEXP_PARSE_QUOTE_AUTO_DQ |
947 n_SHEXP_PARSE_QUOTE_AUTO_CLOSE), shoup, &shin, NULL);
948 if(shs & n_SHEXP_STATE_STOP)
949 break;
951 res = n_string_cp(shoup);
952 shoup = n_string_drop_ownership(shoup);
953 dyn = TRU1;
955 if(res[0] == '~')
956 res = a_shexp_tilde(res);
958 if(!(fexpm & FEXP_NSHELL) &&
959 (res = a_shexp_globname(res, fexpm)) == NULL)
960 goto jleave;
961 dyn = TRU1;
962 }/* else no tilde */
963 }else if(res[0] == '~'){
964 res = a_shexp_tilde(res);
965 dyn = TRU1;
968 jislocal:
969 if (fexpm & FEXP_LOCAL)
970 switch (which_protocol(res)) {
971 case PROTO_FILE:
972 case PROTO_MAILDIR:
973 break;
974 default:
975 n_err(_("Not a local file or directory: %s\n"),
976 n_shexp_quote_cp(name, FAL0));
977 res = NULL;
978 break;
981 jleave:
982 if(res != NULL && !dyn)
983 res = savestr(res);
984 NYD_LEAVE;
985 return n_UNCONST(res);
988 FL enum n_shexp_state
989 n_shexp_parse_token(enum n_shexp_parse_flags flags, struct n_string *store,
990 struct str *input, void const **cookie){
991 /* TODO shexp_parse_token: WCHAR; $IFS (sp20=' '; echo a $sp20 b; ..) */
992 char c2, c, quotec, utf[8];
993 enum n_shexp_state rv;
994 size_t i, il;
995 char const *ib_save, *ib;
996 enum{
997 a_NONE = 0,
998 a_SKIPQ = 1u<<0, /* Skip rest of this quote (\u0 ..) */
999 a_SKIPT = 1u<<1, /* Skip entire token (\c@) */
1000 a_SKIPMASK = a_SKIPQ | a_SKIPT,
1001 a_SURPLUS = 1u<<2, /* Extended sequence interpretation */
1002 a_NTOKEN = 1u<<3, /* "New token": e.g., comments are possible */
1003 a_BRACE = 1u<<4, /* Variable substitution: brace enclosed */
1004 a_DIGIT1 = 1u<<5, /* ..first character was digit */
1005 a_NONDIGIT = 1u<<6, /* ..has seen any non-digits */
1006 a_VARSUBST_MASK = n_BITENUM_MASK(4, 6),
1008 a_ROUND_MASK = a_SKIPT | (int)~n_BITENUM_MASK(0, 7),
1009 a_COOKIE = 1u<<8,
1010 a_EXPLODE = 1u<<9,
1011 a_CONSUME = 1u<<10, /* When done, "consume" remaining input */
1012 a_TMP = 1u<<30
1013 } state;
1014 NYD2_ENTER;
1016 assert((flags & n_SHEXP_PARSE_DRYRUN) || store != NULL);
1017 assert(input != NULL);
1018 assert(input->l == 0 || input->s != NULL);
1019 assert(!(flags & n_SHEXP_PARSE_LOG) || !(flags & n_SHEXP_PARSE_LOG_D_V));
1020 assert(!(flags & n_SHEXP_PARSE_IFS_ADD_COMMA) ||
1021 !(flags & n_SHEXP_PARSE_IFS_IS_COMMA));
1022 assert(!(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED) ||
1023 (flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK));
1025 if((flags & n_SHEXP_PARSE_LOG_D_V) && (n_poption & n_PO_D_V))
1026 flags |= n_SHEXP_PARSE_LOG;
1027 if(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)
1028 flags |= n_SHEXP_PARSE_QUOTE_AUTO_CLOSE;
1030 if((flags & n_SHEXP_PARSE_TRUNC) && store != NULL)
1031 store = n_string_trunc(store, 0);
1033 state = a_NONE;
1034 ib = input->s;
1035 if((il = input->l) == UIZ_MAX)
1036 input->l = il = strlen(ib);
1037 n_UNINIT(c, '\0');
1039 if(cookie != NULL && *cookie != NULL){
1040 assert(!(flags & n_SHEXP_PARSE_DRYRUN));
1041 state |= a_COOKIE;
1044 jrestart_empty:
1045 rv = n_SHEXP_STATE_NONE;
1046 state &= a_ROUND_MASK;
1048 /* In cookie mode, the next ARGV entry is the token already, unchanged,
1049 * since it has already been expanded before! */
1050 if(state & a_COOKIE){
1051 char const * const *xcookie, *cp;
1053 i = store->s_len;
1054 xcookie = *cookie;
1055 if((store = n_string_push_cp(store, *xcookie))->s_len > 0)
1056 rv |= n_SHEXP_STATE_OUTPUT;
1057 if(*++xcookie == NULL){
1058 *cookie = NULL;
1059 state &= ~a_COOKIE;
1060 flags |= n_SHEXP_PARSE_QUOTE_AUTO_DQ; /* ..why we are here! */
1061 }else
1062 *cookie = n_UNCONST(xcookie);
1064 for(cp = &n_string_cp(store)[i]; (c = *cp++) != '\0';)
1065 if(cntrlchar(c)){
1066 rv |= n_SHEXP_STATE_CONTROL;
1067 break;
1070 /* The last exploded cookie will join with the yielded input token, so
1071 * simply fall through in this case */
1072 if(state & a_COOKIE)
1073 goto jleave_quick;
1074 }else{
1075 jrestart:
1076 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1077 for(; il > 0; ++ib, --il)
1078 if(!blankspacechar(*ib))
1079 break;
1081 input->s = n_UNCONST(ib);
1082 input->l = il;
1085 if(il == 0){
1086 rv |= n_SHEXP_STATE_STOP;
1087 goto jleave;
1090 if(store != NULL)
1091 store = n_string_reserve(store, n_MIN(il, 32)); /* XXX */
1093 switch(flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK){
1094 case n_SHEXP_PARSE_QUOTE_AUTO_SQ:
1095 quotec = '\'';
1096 break;
1097 case n_SHEXP_PARSE_QUOTE_AUTO_DQ:
1098 quotec = '"';
1099 if(0){
1100 case n_SHEXP_PARSE_QUOTE_AUTO_DSQ:
1101 quotec = '\'';
1103 state |= a_SURPLUS;
1104 break;
1105 default:
1106 quotec = '\0';
1107 state |= a_NTOKEN;
1108 break;
1111 while(il > 0){
1112 --il, c = *ib++;
1114 /* If no quote-mode active.. */
1115 if(quotec == '\0'){
1116 if(c == '"' || c == '\''){
1117 quotec = c;
1118 if(c == '"')
1119 state |= a_SURPLUS;
1120 else
1121 state &= ~a_SURPLUS;
1122 state &= ~a_NTOKEN;
1123 continue;
1124 }else if(c == '$'){
1125 if(il > 0){
1126 state &= ~a_NTOKEN;
1127 if(*ib == '\''){
1128 --il, ++ib;
1129 quotec = '\'';
1130 state |= a_SURPLUS;
1131 continue;
1132 }else
1133 goto J_var_expand;
1135 }else if(c == '\\'){
1136 /* Outside of quotes this just escapes any next character, but a sole
1137 * <reverse solidus> at EOS is left unchanged */
1138 if(il > 0)
1139 --il, c = *ib++;
1140 state &= ~a_NTOKEN;
1142 /* A comment may it be if no token has yet started */
1143 else if(c == '#' && (state & a_NTOKEN)){
1144 rv |= n_SHEXP_STATE_STOP;
1145 goto jleave;
1147 /* Metacharacters which separate tokens must be turned on explicitly */
1148 else if(c == '|'){
1149 rv |= n_SHEXP_STATE_META_VERTBAR;
1150 /* The parsed sequence may be _the_ output, so ensure we don't
1151 * include the metacharacter, then. */
1152 /*if(flags & n_SHEXP_PARSE_META_VERTBAR)*/
1153 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1154 ++il, --ib;
1155 break;
1156 }else if(c == '&'){
1157 rv |= n_SHEXP_STATE_META_AMPERSAND;
1158 /* The parsed sequence may be _the_ output, so ensure we don't
1159 * include the metacharacter, then. */
1160 /*if(flags & n_SHEXP_PARSE_META_AMPERSAND)*/
1161 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1162 ++il, --ib;
1163 break;
1164 }else if(c == ';'){
1165 rv |= n_SHEXP_STATE_META_SEMICOLON;
1166 /* The parsed sequence may be _the_ output, so ensure we don't
1167 * include the metacharacter, then. */
1168 if(flags & n_SHEXP_PARSE_META_SEMICOLON){
1169 if(il > 0)
1170 n_go_input_inject(n_GO_INPUT_INJECT_COMMIT, ib, il);
1171 state |= a_CONSUME;
1172 rv |= n_SHEXP_STATE_STOP;
1174 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1175 ++il, --ib;
1176 break;
1177 }else if(c == ',' && (flags &
1178 (n_SHEXP_PARSE_IFS_ADD_COMMA | n_SHEXP_PARSE_IFS_IS_COMMA))){
1179 /* The parsed sequence may be _the_ output, so ensure we don't
1180 * include the metacharacter, then. */
1181 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1182 ++il, --ib;
1183 break;
1184 }else if(blankchar(c)){
1185 if(!(flags & n_SHEXP_PARSE_IFS_IS_COMMA)){
1186 /* The parsed sequence may be _the_ output, so ensure we don't
1187 * include the metacharacter, then. */
1188 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1189 ++il, --ib;
1190 break;
1192 state |= a_NTOKEN;
1193 }else
1194 state &= ~a_NTOKEN;
1195 }else{
1196 /* Quote-mode */
1197 assert(!(state & a_NTOKEN));
1198 if(c == quotec && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)){
1199 state &= a_ROUND_MASK;
1200 quotec = '\0';
1201 /* Users may need to recognize the presence of empty quotes */
1202 rv |= n_SHEXP_STATE_OUTPUT;
1203 continue;
1204 }else if(c == '\\' && (state & a_SURPLUS)){
1205 ib_save = ib - 1;
1206 /* A sole <reverse solidus> at EOS is treated as-is! This is ok
1207 * since the "closing quote" error will occur next, anyway */
1208 if(il == 0)
1209 break;
1210 else if((c2 = *ib) == quotec){
1211 --il, ++ib;
1212 c = quotec;
1213 }else if(quotec == '"'){
1214 /* Double quotes, POSIX says:
1215 * The <backslash> shall retain its special meaning as an
1216 * escape character (see Section 2.2.1) only when followed
1217 * by one of the following characters when considered
1218 * special: $ ` " \ <newline> */
1219 switch(c2){
1220 case '$':
1221 case '`':
1222 /* case '"': already handled via c2 == quotec */
1223 case '\\':
1224 --il, ++ib;
1225 c = c2;
1226 /* FALLTHRU */
1227 default:
1228 break;
1230 }else{
1231 /* Dollar-single-quote */
1232 --il, ++ib;
1233 switch(c2){
1234 case '"':
1235 /* case '\'': already handled via c2 == quotec */
1236 case '\\':
1237 c = c2;
1238 break;
1240 case 'b': c = '\b'; break;
1241 case 'f': c = '\f'; break;
1242 case 'n': c = '\n'; break;
1243 case 'r': c = '\r'; break;
1244 case 't': c = '\t'; break;
1245 case 'v': c = '\v'; break;
1247 case 'E':
1248 case 'e': c = '\033'; break;
1250 /* Control character */
1251 case 'c':
1252 if(il == 0)
1253 goto j_dollar_ungetc;
1254 --il, c2 = *ib++;
1255 if(state & a_SKIPMASK)
1256 continue;
1257 c = upperconv(c2) ^ 0x40;
1258 if((ui8_t)c > 0x1F && c != 0x7F){ /* ASCII C0: 0..1F, 7F */
1259 if(flags & n_SHEXP_PARSE_LOG)
1260 n_err(_("Invalid \\c notation: %.*s\n"),
1261 (int)input->l, input->s);
1262 rv |= n_SHEXP_STATE_ERR_CONTROL;
1264 /* As an implementation-defined extension, support \c@
1265 * EQ printf(1) alike \c */
1266 if(c == '\0'){
1267 state |= a_SKIPT;
1268 continue;
1270 break;
1272 /* Octal sequence: 1 to 3 octal bytes */
1273 case '0':
1274 /* As an extension (dependent on where you look, echo(1), or
1275 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1276 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1277 c2 = c;
1278 --il, ++ib;
1280 /* FALLTHRU */
1281 case '1': case '2': case '3':
1282 case '4': case '5': case '6': case '7':
1283 c2 -= '0';
1284 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1285 c2 = (c2 << 3) | (c - '0');
1286 --il, ++ib;
1288 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1289 if(!(state & a_SKIPMASK) && (ui8_t)c2 > 0x1F){
1290 if(flags & n_SHEXP_PARSE_LOG)
1291 n_err(_("\\0 argument exceeds a byte: %.*s\n"),
1292 (int)input->l, input->s);
1293 rv |= n_SHEXP_STATE_ERR_NUMBER;
1294 --il, ++ib;
1295 /* Write unchanged */
1296 je_ib_save:
1297 rv |= n_SHEXP_STATE_OUTPUT;
1298 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1299 store = n_string_push_buf(store, ib_save,
1300 PTR2SIZE(ib - ib_save));
1301 continue;
1303 c2 = (c2 << 3) | (c -= '0');
1304 --il, ++ib;
1306 if(state & a_SKIPMASK)
1307 continue;
1308 if((c = c2) == '\0'){
1309 state |= a_SKIPQ;
1310 continue;
1312 break;
1314 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1315 case 'U':
1316 i = 8;
1317 if(0){
1318 /* FALLTHRU */
1319 case 'u':
1320 i = 4;
1322 if(il == 0)
1323 goto j_dollar_ungetc;
1324 if(0){
1325 /* FALLTHRU */
1327 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1328 case 'X':
1329 case 'x':
1330 if(il == 0)
1331 goto j_dollar_ungetc;
1332 i = 2;
1334 /* C99 */{
1335 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
1336 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1338 size_t no, j;
1340 i = n_MIN(il, i);
1341 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
1342 c = *ib;
1343 if(hexchar(c)){
1344 no <<= 4;
1345 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
1346 : ((c) <= 'F' ? 55 : 87)))];
1347 }else if(j == 0){
1348 if(state & a_SKIPMASK)
1349 break;
1350 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
1351 if(flags & n_SHEXP_PARSE_LOG)
1352 n_err(_("Invalid \\%c notation: %.*s\n"),
1353 c2, (int)input->l, input->s);
1354 rv |= n_SHEXP_STATE_ERR_NUMBER;
1355 goto je_ib_save;
1356 }else
1357 break;
1360 /* Unicode massage */
1361 if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){
1362 if((c = (char)no) == '\0')
1363 state |= a_SKIPQ;
1364 }else if(no == 0)
1365 state |= a_SKIPQ;
1366 else if(!(state & a_SKIPMASK)){
1367 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1368 store = n_string_reserve(store, n_MAX(j, 4));
1370 c2 = FAL0;
1371 if(no > 0x10FFFF){ /* XXX magic; CText */
1372 if(flags & n_SHEXP_PARSE_LOG)
1373 n_err(_("\\U argument exceeds 0x10FFFF: %.*s\n"),
1374 (int)input->l, input->s);
1375 rv |= n_SHEXP_STATE_ERR_NUMBER;
1376 /* But normalize the output anyway */
1377 goto Je_uni_norm;
1380 j = n_utf32_to_utf8(no, utf);
1382 if(n_psonce & n_PSO_UNICODE){
1383 rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE;
1384 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1385 store = n_string_push_buf(store, utf, j);
1386 continue;
1388 #ifdef HAVE_ICONV
1389 else{
1390 char *icp;
1392 icp = n_iconv_onetime_cp(n_ICONV_NONE,
1393 NULL, NULL, utf);
1394 if(icp != NULL){
1395 rv |= n_SHEXP_STATE_OUTPUT;
1396 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1397 store = n_string_push_cp(store, icp);
1398 continue;
1401 #endif
1402 if(!(flags & n_SHEXP_PARSE_DRYRUN)) Je_uni_norm:{
1403 char itoa[32];
1405 rv |= n_SHEXP_STATE_OUTPUT |
1406 n_SHEXP_STATE_ERR_UNICODE;
1407 i = snprintf(itoa, sizeof itoa, "\\%c%0*X",
1408 (no > 0xFFFFu ? 'U' : 'u'),
1409 (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no);
1410 store = n_string_push_buf(store, itoa, i);
1412 continue;
1414 if(state & a_SKIPMASK)
1415 continue;
1417 break;
1419 /* Extension: \$ can be used to expand a variable.
1420 * Bug|ad effect: if conversion fails, not written "as-is" */
1421 case '$':
1422 if(il == 0)
1423 goto j_dollar_ungetc;
1424 goto J_var_expand;
1426 default:
1427 j_dollar_ungetc:
1428 /* Follow bash(1) behaviour, print sequence unchanged */
1429 ++il, --ib;
1430 break;
1433 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
1434 state &= ~a_VARSUBST_MASK;
1435 if(*ib == '{')
1436 state |= a_BRACE;
1438 if(!(state & a_BRACE) || il > 1){
1439 char const *cp, *vp;
1441 ib_save = ib - 1;
1442 if(state & a_BRACE)
1443 --il, ++ib;
1444 vp = ib;
1445 state &= ~a_EXPLODE;
1447 for(i = 0; il > 0; --il, ++ib, ++i){
1448 /* We have some special cases regarding macro-local special
1449 * parameters, so ensure these don't cause failure.
1450 * This has counterparts in the code that manages internal
1451 * variables! */
1452 c = *ib;
1453 if(!a_SHEXP_ISVARC(c)){
1454 if(i == 0 && (c == '*' || c == '@' || c == '#' ||
1455 c == '?' || c == '!' || c == '^')){
1456 /* Skip over multiplexer */
1457 if(c == '^')
1458 continue;
1459 if(c == '@'){
1460 if(quotec == '"')
1461 state |= a_EXPLODE;
1463 --il, ++ib;
1464 ++i;
1466 break;
1467 }else if(a_SHEXP_ISVARC_BAD1ST(c)){
1468 if(i == 0)
1469 state |= a_DIGIT1;
1470 }else
1471 state |= a_NONDIGIT;
1474 if(state & a_SKIPMASK){
1475 if((state & a_BRACE) && il > 0 && *ib == '}')
1476 --il, ++ib;
1477 continue;
1480 /* XXX Digit in first place is not supported, however we do
1481 * XXX support all digits because these refer to macro-local
1482 * XXX variables; if we would have a notion of whether we're in
1483 * XXX a macro this could be made more fine grained */
1484 if((state & (a_DIGIT1 | a_NONDIGIT)) == (a_DIGIT1 | a_NONDIGIT)){
1485 if(state & a_BRACE){
1486 if(il > 0 && *ib == '}')
1487 --il, ++ib;
1488 else
1489 rv |= n_SHEXP_STATE_ERR_BRACE;
1491 if(flags & n_SHEXP_PARSE_LOG)
1492 n_err(_("Invalid identifier for ${}: %.*s\n"),
1493 (int)input->l, input->s);
1494 rv |= n_SHEXP_STATE_ERR_IDENTIFIER;
1495 goto je_ib_save;
1496 }else if(i == 0){
1497 if(state & a_BRACE){
1498 if(flags & n_SHEXP_PARSE_LOG)
1499 n_err(_("Bad substitution for ${}: %.*s\n"),
1500 (int)input->l, input->s);
1501 rv |= n_SHEXP_STATE_ERR_BADSUB;
1502 if(il > 0 && *ib == '}')
1503 --il, ++ib;
1504 else
1505 rv |= n_SHEXP_STATE_ERR_BRACE;
1506 goto je_ib_save;
1508 c = '$';
1509 }else{
1510 if(state & a_BRACE){
1511 if(il == 0 || *ib != '}'){
1512 if(flags & n_SHEXP_PARSE_LOG)
1513 n_err(_("No closing brace for ${}: %.*s\n"),
1514 (int)input->l, input->s);
1515 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN |
1516 n_SHEXP_STATE_ERR_BRACE;
1517 goto je_ib_save;
1519 --il, ++ib;
1522 if(flags & n_SHEXP_PARSE_DRYRUN)
1523 continue;
1525 /* We may shall explode "${@}" to a series of successive,
1526 * properly quoted tokens (instead). The first exploded
1527 * cookie will join with the current token */
1528 if((state & a_EXPLODE) && !(flags & n_SHEXP_PARSE_DRYRUN) &&
1529 cookie != NULL && n_var_vexplode(cookie)){
1530 state |= a_COOKIE;
1531 input->s = n_UNCONST(ib);
1532 input->l = il;
1533 goto jrestart_empty;
1536 /* Check getenv(3) shall no internal variable exist!
1537 * XXX We have some common idioms, avoid memory for them
1538 * XXX Even better would be var_vlook_buf()! */
1539 if(i == 1){
1540 switch(*vp){
1541 case '?': vp = n_qm; break;
1542 case '!': vp = n_em; break;
1543 case '*': vp = n_star; break;
1544 case '@': vp = n_at; break;
1545 case '#': vp = n_ns; break;
1546 default: goto j_var_look_buf;
1548 }else
1549 j_var_look_buf:
1550 vp = savestrbuf(vp, i);
1552 if((cp = n_var_vlook(vp, TRU1)) != NULL){
1553 rv |= n_SHEXP_STATE_OUTPUT;
1554 store = n_string_push_cp(store, cp);
1555 for(; (c = *cp) != '\0'; ++cp)
1556 if(cntrlchar(c)){
1557 rv |= n_SHEXP_STATE_CONTROL;
1558 break;
1561 continue;
1564 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
1565 continue;
1569 if(!(state & a_SKIPMASK)){
1570 rv |= n_SHEXP_STATE_OUTPUT;
1571 if(cntrlchar(c))
1572 rv |= n_SHEXP_STATE_CONTROL;
1573 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1574 store = n_string_push_c(store, c);
1578 if(quotec != '\0' && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_CLOSE)){
1579 if(flags & n_SHEXP_PARSE_LOG)
1580 n_err(_("No closing quote: %.*s\n"), (int)input->l, input->s);
1581 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
1584 jleave:
1585 assert(!(state & a_COOKIE));
1586 if((flags & n_SHEXP_PARSE_DRYRUN) && store != NULL){
1587 store = n_string_push_buf(store, input->s, PTR2SIZE(ib - input->s));
1588 rv |= n_SHEXP_STATE_OUTPUT;
1591 if(state & a_CONSUME){
1592 input->s = n_UNCONST(&ib[il]);
1593 input->l = 0;
1594 }else{
1595 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1596 for(; il > 0; ++ib, --il)
1597 if(!blankchar(*ib))
1598 break;
1600 input->l = il;
1601 input->s = n_UNCONST(ib);
1604 if(!(rv & n_SHEXP_STATE_STOP)){
1605 if(!(rv & n_SHEXP_STATE_OUTPUT) && (flags & n_SHEXP_PARSE_IGNORE_EMPTY) &&
1606 il > 0)
1607 goto jrestart_empty;
1608 if(/*!(rv & n_SHEXP_STATE_OUTPUT) &&*/ il == 0)
1609 rv |= n_SHEXP_STATE_STOP;
1612 if((state & a_SKIPT) && !(rv & n_SHEXP_STATE_STOP) &&
1613 (flags & n__SHEXP_PARSE_META_MASK))
1614 goto jrestart;
1615 jleave_quick:
1616 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_UNICODE));
1617 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_CONTROL));
1618 NYD2_LEAVE;
1619 return rv;
1622 FL char *
1623 n_shexp_parse_token_cp(enum n_shexp_parse_flags flags, char const **cp){
1624 struct str input;
1625 struct n_string sou, *soup;
1626 char *rv;
1627 enum n_shexp_state shs;
1628 NYD2_ENTER;
1630 assert(cp != NULL);
1632 input.s = n_UNCONST(*cp);
1633 input.l = UIZ_MAX;
1634 soup = n_string_creat_auto(&sou);
1636 shs = n_shexp_parse_token(flags, soup, &input, NULL);
1637 if(shs & n_SHEXP_STATE_ERR_MASK){
1638 soup = n_string_assign_cp(soup, *cp);
1639 *cp = NULL;
1640 }else
1641 *cp = input.s;
1643 rv = n_string_cp(soup);
1644 /*n_string_gut(n_string_drop_ownership(soup));*/
1645 NYD2_LEAVE;
1646 return rv;
1649 FL struct n_string *
1650 n_shexp_quote(struct n_string *store, struct str const *input, bool_t rndtrip){
1651 struct a_shexp_quote_lvl sql;
1652 struct a_shexp_quote_ctx sqc;
1653 NYD2_ENTER;
1655 assert(store != NULL);
1656 assert(input != NULL);
1657 assert(input->l == 0 || input->s != NULL);
1659 memset(&sqc, 0, sizeof sqc);
1660 sqc.sqc_store = store;
1661 sqc.sqc_input.s = input->s;
1662 if((sqc.sqc_input.l = input->l) == UIZ_MAX)
1663 sqc.sqc_input.l = strlen(input->s);
1664 sqc.sqc_flags = rndtrip ? a_SHEXP_QUOTE_ROUNDTRIP : a_SHEXP_QUOTE_NONE;
1666 if(sqc.sqc_input.l == 0)
1667 store = n_string_push_buf(store, "''", sizeof("''") -1);
1668 else{
1669 memset(&sql, 0, sizeof sql);
1670 sql.sql_dat = sqc.sqc_input;
1671 sql.sql_flags = sqc.sqc_flags;
1672 a_shexp__quote(&sqc, &sql);
1674 NYD2_LEAVE;
1675 return store;
1678 FL char *
1679 n_shexp_quote_cp(char const *cp, bool_t rndtrip){
1680 struct n_string store;
1681 struct str input;
1682 char *rv;
1683 NYD2_ENTER;
1685 assert(cp != NULL);
1687 input.s = n_UNCONST(cp);
1688 input.l = UIZ_MAX;
1689 rv = n_string_cp(n_shexp_quote(n_string_creat_auto(&store), &input,
1690 rndtrip));
1691 n_string_gut(n_string_drop_ownership(&store));
1692 NYD2_LEAVE;
1693 return rv;
1696 FL bool_t
1697 n_shexp_is_valid_varname(char const *name){
1698 char lc, c;
1699 bool_t rv;
1700 NYD2_ENTER;
1702 rv = FAL0;
1704 for(lc = '\0'; (c = *name++) != '\0'; lc = c)
1705 if(!a_SHEXP_ISVARC(c))
1706 goto jleave;
1707 else if(lc == '\0' && a_SHEXP_ISVARC_BAD1ST(c))
1708 goto jleave;
1709 if(a_SHEXP_ISVARC_BADNST(lc))
1710 goto jleave;
1712 rv = TRU1;
1713 jleave:
1714 NYD2_LEAVE;
1715 return rv;
1718 FL int
1719 c_shcodec(void *v){
1720 struct str in;
1721 struct n_string sou_b, *soup;
1722 si32_t nerrn;
1723 size_t alen;
1724 bool_t norndtrip;
1725 char const **argv, *varname, *act, *cp;
1727 soup = n_string_creat_auto(&sou_b);
1728 argv = v;
1729 varname = (n_pstate & n_PS_ARGMOD_VPUT) ? *argv++ : NULL;
1731 act = *argv;
1732 for(cp = act; *cp != '\0' && !blankspacechar(*cp); ++cp)
1734 if((norndtrip = (*act == '+')))
1735 ++act;
1736 if(act == cp)
1737 goto jesynopsis;
1738 alen = PTR2SIZE(cp - act);
1739 if(*cp != '\0')
1740 ++cp;
1742 in.l = strlen(in.s = n_UNCONST(cp));
1743 nerrn = n_ERR_NONE;
1745 if(is_ascncaseprefix(act, "encode", alen))
1746 soup = n_shexp_quote(soup, &in, !norndtrip);
1747 else if(!norndtrip && is_ascncaseprefix(act, "decode", alen)){
1748 for(;;){
1749 enum n_shexp_state shs;
1751 shs = n_shexp_parse_token((n_SHEXP_PARSE_LOG |
1752 n_SHEXP_PARSE_IGNORE_EMPTY), soup, &in, NULL);
1753 if(shs & n_SHEXP_STATE_ERR_MASK){
1754 soup = n_string_assign_cp(soup, cp);
1755 nerrn = n_ERR_CANCELED;
1756 v = NULL;
1757 break;
1759 if(shs & n_SHEXP_STATE_STOP)
1760 break;
1762 }else
1763 goto jesynopsis;
1765 assert(cp != NULL);
1766 if(varname != NULL){
1767 cp = n_string_cp(soup);
1768 if(!n_var_vset(varname, (uintptr_t)cp)){
1769 nerrn = n_ERR_NOTSUP;
1770 cp = NULL;
1772 }else{
1773 struct str out;
1775 in.s = n_string_cp(soup);
1776 in.l = soup->s_len;
1777 makeprint(&in, &out);
1778 if(fprintf(n_stdout, "%s\n", out.s) < 0){
1779 nerrn = n_err_no;
1780 cp = NULL;
1782 free(out.s);
1785 jleave:
1786 n_pstate_err_no = nerrn;
1787 NYD_LEAVE;
1788 return (cp != NULL ? 0 : 1);
1789 jesynopsis:
1790 n_err(_("Synopsis: shcodec: <[+]e[ncode]|d[ecode]> <rest-of-line>\n"));
1791 nerrn = n_ERR_INVAL;
1792 cp = NULL;
1793 goto jleave;
1796 /* s-it-mode */