Add n_shexp_parse_token_cp()
[s-mailx.git] / shexp.c
blobdf8505c01915f865982ddec4702b2bd6c1f6d58e
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
5 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
6 * Copyright (c) 2012 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
7 */
8 /*
9 * Copyright (c) 1980, 1993
10 * The Regents of the University of California. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 #undef n_FILE
37 #define n_FILE shexp
39 #ifndef HAVE_AMALGAMATION
40 # include "nail.h"
41 #endif
43 #include <sys/wait.h>
45 #include <pwd.h>
47 #ifdef HAVE_FNMATCH
48 # include <dirent.h>
49 # include <fnmatch.h>
50 #endif
52 /* POSIX says
53 * Environment variable names used by the utilities in the Shell and
54 * Utilities volume of POSIX.1-2008 consist solely of uppercase
55 * letters, digits, and the <underscore> ('_') from the characters
56 * defined in Portable Character Set and do not begin with a digit.
57 * Other characters may be permitted by an implementation;
58 * applications shall tolerate the presence of such names.
59 * We do support the hyphen "-" because it is common for mailx.
60 * We support some special parameter names for one-letter variable names;
61 * note these have counterparts in the code that manages internal variables! */
62 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
63 #define a_SHEXP_ISVARC_SPECIAL1(C) \
64 ((C) == '*' || (C) == '@' || (C) == '#' || (C) == '?' || (C) == '!')
66 enum a_shexp_quote_flags{
67 a_SHEXP_QUOTE_NONE,
68 a_SHEXP_QUOTE_ROUNDTRIP = 1u<<0, /* Result won't be consumed immediately */
70 a_SHEXP_QUOTE_T_REVSOL = 1u<<8, /* Type: by reverse solidus */
71 a_SHEXP_QUOTE_T_SINGLE = 1u<<9, /* Type: single-quotes */
72 a_SHEXP_QUOTE_T_DOUBLE = 1u<<10, /* Type: double-quotes */
73 a_SHEXP_QUOTE_T_DOLLAR = 1u<<11, /* Type: dollar-single-quotes */
74 a_SHEXP_QUOTE_T_MASK = a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
75 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR,
77 a_SHEXP_QUOTE__FREESHIFT = 16u
80 #ifdef HAVE_FNMATCH
81 struct a_shexp_glob_ctx{
82 char const *sgc_patdat; /* Remaining pattern (at and below level) */
83 size_t sgc_patlen;
84 struct n_string *sgc_outer; /* Resolved path up to this level */
85 ui32_t sgc_flags;
86 ui8_t sgc__dummy[4];
88 #endif
90 struct a_shexp_quote_ctx{
91 struct n_string *sqc_store; /* Result storage */
92 struct str sqc_input; /* Input data, topmost level */
93 ui32_t sqc_cnt_revso;
94 ui32_t sqc_cnt_single;
95 ui32_t sqc_cnt_double;
96 ui32_t sqc_cnt_dollar;
97 enum a_shexp_quote_flags sqc_flags;
98 ui8_t sqc__dummy[4];
101 struct a_shexp_quote_lvl{
102 struct a_shexp_quote_lvl *sql_link; /* Outer level */
103 struct str sql_dat; /* This level (has to) handle(d) */
104 enum a_shexp_quote_flags sql_flags;
105 ui8_t sql__dummy[4];
108 /* Locate the user's mailbox file (where new, unread mail is queued) */
109 static char *a_shexp_findmail(char const *user, bool_t force);
111 /* Expand ^~/? and ^~USER/? constructs.
112 * Returns the completely resolved (maybe empty or identical to input)
113 * salloc()ed string */
114 static char *a_shexp_tilde(char const *s);
116 /* Perform fnmatch(3). May return NULL on error */
117 static char *a_shexp_globname(char const *name, enum fexp_mode fexpm);
118 #ifdef HAVE_FNMATCH
119 static bool_t a_shexp__glob(struct a_shexp_glob_ctx *sgcp,
120 struct n_strlist **slpp);
121 static int a_shexp__globsort(void const *cvpa, void const *cvpb);
122 #endif
124 /* Parse an input string and create a sh(1)ell-quoted result */
125 static void a_shexp__quote(struct a_shexp_quote_ctx *sqcp,
126 struct a_shexp_quote_lvl *sqlp);
128 static char *
129 a_shexp_findmail(char const *user, bool_t force){
130 char *rv;
131 char const *cp;
132 NYD2_ENTER;
134 if(!force){
135 if((cp = ok_vlook(inbox)) != NULL && *cp != '\0'){
136 /* Folder extra introduced to avoid % recursion loops */
137 if((rv = fexpand(cp, FEXP_NSPECIAL | FEXP_NFOLDER | FEXP_NSHELL)
138 ) != NULL)
139 goto jleave;
140 n_err(_("*inbox* expansion failed, using $MAIL / builtin: %s\n"), cp);
143 if((cp = ok_vlook(MAIL)) != NULL){
144 rv = savestr(cp);
145 goto jleave;
149 /* C99 */{
150 size_t ul, i;
152 ul = strlen(user) +1;
153 i = sizeof(VAL_MAIL) -1 + 1 + ul;
155 rv = salloc(i);
156 memcpy(rv, VAL_MAIL, (i = sizeof(VAL_MAIL) -1));
157 rv[i] = '/';
158 memcpy(&rv[++i], user, ul);
160 jleave:
161 NYD2_LEAVE;
162 return rv;
165 static char *
166 a_shexp_tilde(char const *s){
167 struct passwd *pwp;
168 size_t nl, rl;
169 char const *rp, *np;
170 char *rv;
171 NYD2_ENTER;
173 if(*(rp = &s[1]) == '/' || *rp == '\0'){
174 np = ok_vlook(HOME);
175 rl = strlen(rp);
176 }else{
177 if((rp = strchr(np = rp, '/')) != NULL){
178 nl = PTR2SIZE(rp - np);
179 np = savestrbuf(np, nl);
180 rl = strlen(rp);
181 }else
182 rl = 0;
184 if((pwp = getpwnam(np)) == NULL){
185 rv = savestr(s);
186 goto jleave;
188 np = pwp->pw_dir;
191 nl = strlen(np);
192 rv = salloc(nl + 1 + rl +1);
193 memcpy(rv, np, nl);
194 if(rl > 0){
195 memcpy(rv + nl, rp, rl);
196 nl += rl;
198 rv[nl] = '\0';
199 jleave:
200 NYD2_LEAVE;
201 return rv;
204 static char *
205 a_shexp_globname(char const *name, enum fexp_mode fexpm){
206 #ifdef HAVE_FNMATCH
207 struct a_shexp_glob_ctx sgc;
208 struct n_string outer;
209 struct n_strlist *slp;
210 char *cp;
211 NYD_ENTER;
213 memset(&sgc, 0, sizeof sgc);
214 sgc.sgc_patlen = strlen(name);
215 sgc.sgc_patdat = savestrbuf(name, sgc.sgc_patlen);
216 sgc.sgc_outer = n_string_reserve(n_string_creat(&outer), sgc.sgc_patlen);
217 sgc.sgc_flags = ((fexpm & FEXP_SILENT) != 0);
218 slp = NULL;
219 if(a_shexp__glob(&sgc, &slp))
220 cp = (char*)1;
221 else
222 cp = NULL;
223 n_string_gut(&outer);
225 if(cp == NULL)
226 goto jleave;
228 if(slp == NULL){
229 cp = n_UNCONST(N_("File pattern does not match"));
230 goto jerr;
231 }else if(slp->sl_next == NULL)
232 cp = savestrbuf(slp->sl_dat, slp->sl_len);
233 else if(fexpm & FEXP_MULTIOK){
234 struct n_strlist **sorta, *xslp;
235 size_t i, no, l;
237 no = l = 0;
238 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next){
239 ++no;
240 l += xslp->sl_len + 1;
243 sorta = smalloc(sizeof(*sorta) * no);
244 no = 0;
245 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next)
246 sorta[no++] = xslp;
247 qsort(sorta, no, sizeof *sorta, &a_shexp__globsort);
249 cp = salloc(++l);
250 l = 0;
251 for(i = 0; i < no; ++i){
252 xslp = sorta[i];
253 memcpy(&cp[l], xslp->sl_dat, xslp->sl_len);
254 l += xslp->sl_len;
255 cp[l++] = '\0';
257 cp[l] = '\0';
259 free(sorta);
260 n_pstate |= n_PS_EXPAND_MULTIRESULT;
261 }else{
262 cp = n_UNCONST(N_("File pattern matches multiple results"));
263 goto jerr;
266 jleave:
267 while(slp != NULL){
268 struct n_strlist *tmp = slp;
270 slp = slp->sl_next;
271 free(tmp);
273 NYD_LEAVE;
274 return cp;
276 jerr:
277 if(!(fexpm & FEXP_SILENT)){
278 name = n_shexp_quote_cp(name, FAL0);
279 n_err("%s: %s\n", V_(cp), name);
281 cp = NULL;
282 goto jleave;
284 #else /* HAVE_FNMATCH */
285 n_UNUSED(fexpm);
287 if(!(fexpm & FEXP_SILENT))
288 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
289 return savestr(name);
290 #endif
293 #ifdef HAVE_FNMATCH
294 static bool_t
295 a_shexp__glob(struct a_shexp_glob_ctx *sgcp, struct n_strlist **slpp){
296 enum{a_SILENT = 1<<0, a_DEEP=1<<1, a_SALLOC=1<<2};
298 struct a_shexp_glob_ctx nsgc;
299 struct dirent *dep;
300 DIR *dp;
301 size_t old_outerlen;
302 char const *ccp, *myp;
303 NYD2_ENTER;
305 /* We need some special treatment for the outermost level */
306 if(!(sgcp->sgc_flags & a_DEEP)){
307 if(sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/'){
308 myp = n_string_cp(n_string_push_c(sgcp->sgc_outer, '/'));
309 ++sgcp->sgc_patdat;
310 --sgcp->sgc_patlen;
311 }else
312 myp = "./";
313 }else
314 myp = n_string_cp(sgcp->sgc_outer);
315 old_outerlen = sgcp->sgc_outer->s_len;
317 /* Separate current directory/pattern level from any possible remaining
318 * pattern in order to be able to use it for fnmatch(3) */
319 if((ccp = memchr(sgcp->sgc_patdat, '/', sgcp->sgc_patlen)) == NULL)
320 nsgc.sgc_patlen = 0;
321 else{
322 nsgc = *sgcp;
323 nsgc.sgc_flags |= a_DEEP;
324 sgcp->sgc_patlen = PTR2SIZE((nsgc.sgc_patdat = &ccp[1]) -
325 &sgcp->sgc_patdat[0]);
326 nsgc.sgc_patlen -= sgcp->sgc_patlen;
327 /* Trim solidus */
328 if(sgcp->sgc_patlen > 0){
329 assert(sgcp->sgc_patdat[sgcp->sgc_patlen -1] == '/');
330 ((char*)n_UNCONST(sgcp->sgc_patdat))[--sgcp->sgc_patlen] = '\0';
334 /* Our current directory level */
335 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
336 * xxx be the (sole) result depending on pattern surroundings, etc. */
337 if((dp = opendir(myp)) == NULL){
338 int err;
340 switch((err = errno)){
341 case ENOTDIR:
342 ccp = N_("cannot access paths under non-directory");
343 goto jerr;
344 case ENOENT:
345 ccp = N_("path component of (sub)pattern non-existent");
346 goto jerr;
347 case EACCES:
348 ccp = N_("file permission for file (sub)pattern denied");
349 goto jerr;
350 default:
351 ccp = N_("cannot handle file (sub)pattern");
352 goto jerr;
356 /* As necessary, quote bytes in the current pattern */
357 /* C99 */{
358 char *ncp;
359 size_t i;
360 bool_t need;
362 for(need = FAL0, i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
363 switch(*myp){
364 case '\'': case '"': case '\\': case '$':
365 case ' ': case '\t':
366 need = TRU1;
367 ++i;
368 /* FALLTHRU */
369 default:
370 ++i;
371 break;
374 if(need){
375 ncp = salloc(i +1);
376 for(i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
377 switch(*myp){
378 case '\'': case '"': case '\\': case '$':
379 case ' ': case '\t':
380 ncp[i++] = '\\';
381 /* FALLTHRU */
382 default:
383 ncp[i++] = *myp;
384 break;
386 ncp[i] = '\0';
387 myp = ncp;
388 }else
389 myp = sgcp->sgc_patdat;
392 while((dep = readdir(dp)) != NULL){
393 switch(fnmatch(myp, dep->d_name, FNM_PATHNAME | FNM_PERIOD)){
394 case 0:{
395 /* A match expresses the desire to recurse if there is more pattern */
396 if(nsgc.sgc_patlen > 0){
397 bool_t isdir;
399 n_string_push_cp((sgcp->sgc_outer->s_len > 1
400 ? n_string_push_c(sgcp->sgc_outer, '/') : sgcp->sgc_outer),
401 dep->d_name);
403 isdir = FAL0;
404 #ifdef HAVE_DIRENT_TYPE
405 if(dep->d_type == DT_DIR)
406 isdir = TRU1;
407 else if(dep->d_type == DT_LNK || dep->d_type == DT_UNKNOWN)
408 #endif
410 struct stat sb;
412 if(stat(n_string_cp(sgcp->sgc_outer), &sb)){
413 ccp = N_("I/O error when querying file status");
414 goto jerr;
415 }else if(S_ISDIR(sb.st_mode))
416 isdir = TRU1;
419 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
420 * TODO Instead save away a list of such n_string's for later */
421 if(isdir && !a_shexp__glob(&nsgc, slpp)){
422 ccp = (char*)1;
423 goto jleave;
426 n_string_trunc(sgcp->sgc_outer, old_outerlen);
427 }else{
428 struct n_strlist *slp;
429 size_t i, j;
431 i = strlen(dep->d_name);
432 j = (old_outerlen > 0) ? old_outerlen + 1 + i : i;
433 slp = n_STRLIST_MALLOC(j);
434 *slpp = slp;
435 slpp = &slp->sl_next;
436 slp->sl_next = NULL;
437 if((j = old_outerlen) > 0){
438 memcpy(&slp->sl_dat[0], sgcp->sgc_outer->s_dat, j);
439 if(slp->sl_dat[j -1] != '/')
440 slp->sl_dat[j++] = '/';
442 memcpy(&slp->sl_dat[j], dep->d_name, i);
443 slp->sl_dat[j += i] = '\0';
444 slp->sl_len = j;
446 } break;
447 case FNM_NOMATCH:
448 break;
449 default:
450 ccp = N_("fnmatch(3) cannot handle file (sub)pattern");
451 goto jerr;
455 ccp = NULL;
456 jleave:
457 if(dp != NULL)
458 closedir(dp);
459 NYD2_LEAVE;
460 return (ccp == NULL);
462 jerr:
463 if(!(sgcp->sgc_flags & a_SILENT)){
464 char const *s2, *s3;
466 if(sgcp->sgc_outer->s_len > 0){
467 s2 = n_shexp_quote_cp(n_string_cp(sgcp->sgc_outer), FAL0);
468 s3 = "/";
469 }else
470 s2 = s3 = n_empty;
472 n_err("%s: %s%s%s\n", V_(ccp), s2, s3,
473 n_shexp_quote_cp(sgcp->sgc_patdat, FAL0));
475 goto jleave;
478 static int
479 a_shexp__globsort(void const *cvpa, void const *cvpb){
480 int rv;
481 struct n_strlist const * const *slpa, * const *slpb;
482 NYD2_ENTER;
484 slpa = cvpa;
485 slpb = cvpb;
486 rv = asccasecmp((*slpa)->sl_dat, (*slpb)->sl_dat);
487 NYD2_LEAVE;
488 return rv;
490 #endif /* HAVE_FNMATCH */
492 static void
493 a_shexp__quote(struct a_shexp_quote_ctx *sqcp, struct a_shexp_quote_lvl *sqlp){
494 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
495 * XXX use the recursive implementation because of stateful encodings.
496 * XXX I.e., if a quoted substring cannot be self-contained - the data after
497 * XXX the quote relies on "the former state", then this doesn't make sense.
498 * XXX Therefore this is not fully programmed out but instead only detects
499 * XXX the "most fancy" quoting necessary, and directly does that.
500 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
501 * XXX Otherwise we rather have to convert to wide first and act on that,
502 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
503 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
504 #ifdef a_SHEXP_QUOTE_RECURSE
505 # define jrecurse jrecurse
506 struct a_shexp_quote_lvl sql;
507 #else
508 # define jrecurse jstep
509 #endif
510 struct n_visual_info_ctx vic;
511 union {struct a_shexp_quote_lvl *head; struct n_string *store;} u;
512 ui32_t flags;
513 size_t il;
514 char const *ib, *ib_base;
515 NYD2_ENTER;
517 ib_base = ib = sqlp->sql_dat.s;
518 il = sqlp->sql_dat.l;
519 flags = sqlp->sql_flags;
521 /* Iterate over the entire input, classify characters and type of quotes
522 * along the way. Whenever a quote change has to be applied, adjust flags
523 * for the new situation -, setup sql.* and recurse- */
524 while(il > 0){
525 char c;
527 c = *ib;
528 if(cntrlchar(c)){
529 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
530 goto jstep;
531 if(c == '\t' && (flags & (a_SHEXP_QUOTE_T_REVSOL |
532 a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOUBLE)))
533 goto jstep;
534 #ifdef a_SHEXP_QUOTE_RECURSE
535 ++sqcp->sqc_cnt_dollar;
536 #endif
537 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
538 goto jrecurse;
539 }else if(blankspacechar(c) || c == '|' || c == '&' || c == ';' ||
540 /* Whereas we don't support those, quote them for the sh(1)ell */
541 c == '(' || c == ')' || c == '<' || c == '>' ||
542 c == '"' || c == '$'){
543 if(flags & a_SHEXP_QUOTE_T_MASK)
544 goto jstep;
545 #ifdef a_SHEXP_QUOTE_RECURSE
546 ++sqcp->sqc_cnt_single;
547 #endif
548 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
549 goto jrecurse;
550 }else if(c == '\''){
551 if(flags & (a_SHEXP_QUOTE_T_MASK & ~a_SHEXP_QUOTE_T_SINGLE))
552 goto jstep;
553 #ifdef a_SHEXP_QUOTE_RECURSE
554 ++sqcp->sqc_cnt_dollar;
555 #endif
556 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
557 goto jrecurse;
558 }else if(c == '\\' || (c == '#' && ib == ib_base)){
559 if(flags & a_SHEXP_QUOTE_T_MASK)
560 goto jstep;
561 #ifdef a_SHEXP_QUOTE_RECURSE
562 ++sqcp->sqc_cnt_single;
563 #endif
564 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
565 goto jrecurse;
566 }else if(!asciichar(c)){
567 /* Need to keep together multibytes */
568 #ifdef a_SHEXP_QUOTE_RECURSE
569 memset(&vic, 0, sizeof vic);
570 vic.vic_indat = ib;
571 vic.vic_inlen = il;
572 n_visual_info(&vic,
573 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
574 #endif
575 /* xxx check whether resulting \u would be ASCII */
576 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP) ||
577 (flags & a_SHEXP_QUOTE_T_DOLLAR)){
578 #ifdef a_SHEXP_QUOTE_RECURSE
579 ib = vic.vic_oudat;
580 il = vic.vic_oulen;
581 continue;
582 #else
583 goto jstep;
584 #endif
586 #ifdef a_SHEXP_QUOTE_RECURSE
587 ++sqcp->sqc_cnt_dollar;
588 #endif
589 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
590 goto jrecurse;
591 }else
592 jstep:
593 ++ib, --il;
595 sqlp->sql_flags = flags;
597 /* Level made the great and completed processing input. Reverse the list of
598 * levels, detect the "most fancy" quote type needed along this way */
599 /* XXX Due to restriction as above very crude */
600 for(flags = 0, il = 0, u.head = NULL; sqlp != NULL;){
601 struct a_shexp_quote_lvl *tmp;
603 tmp = sqlp->sql_link;
604 sqlp->sql_link = u.head;
605 u.head = sqlp;
606 il += sqlp->sql_dat.l;
607 if(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK)
608 il += (sqlp->sql_dat.l >> 1);
609 flags |= sqlp->sql_flags;
610 sqlp = tmp;
612 sqlp = u.head;
614 /* Finally work the substrings in the correct order, adjusting quotes along
615 * the way as necessary. Start off with the "most fancy" quote, so that
616 * the user sees an overall boundary she can orientate herself on.
617 * We do it like that to be able to give the user some "encapsulation
618 * experience", to address what strikes me is a problem of sh(1)ell quoting:
619 * different to, e.g., perl(1), where you see at a glance where a string
620 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
621 * visual appearance of "a string" as such */
622 u.store = n_string_reserve(sqcp->sqc_store, il);
624 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
625 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
626 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
627 }else if(flags & a_SHEXP_QUOTE_T_DOUBLE){
628 u.store = n_string_push_c(u.store, '"');
629 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOUBLE;
630 }else if(flags & a_SHEXP_QUOTE_T_SINGLE){
631 u.store = n_string_push_c(u.store, '\'');
632 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
633 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
634 flags &= ~a_SHEXP_QUOTE_T_MASK;
636 /* Work all the levels */
637 for(; sqlp != NULL; sqlp = sqlp->sql_link){
638 /* As necessary update our mode of quoting */
639 #ifdef a_SHEXP_QUOTE_RECURSE
640 il = 0;
642 switch(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK){
643 case a_SHEXP_QUOTE_T_DOLLAR:
644 if(!(flags & a_SHEXP_QUOTE_T_DOLLAR))
645 il = a_SHEXP_QUOTE_T_DOLLAR;
646 break;
647 case a_SHEXP_QUOTE_T_DOUBLE:
648 if(!(flags & (a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
649 il = a_SHEXP_QUOTE_T_DOLLAR;
650 break;
651 case a_SHEXP_QUOTE_T_SINGLE:
652 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
653 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
654 il = a_SHEXP_QUOTE_T_SINGLE;
655 break;
656 default:
657 case a_SHEXP_QUOTE_T_REVSOL:
658 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
659 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
660 il = a_SHEXP_QUOTE_T_REVSOL;
661 break;
664 if(il != 0){
665 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
666 u.store = n_string_push_c(u.store, '\'');
667 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
668 u.store = n_string_push_c(u.store, '"');
669 flags &= ~a_SHEXP_QUOTE_T_MASK;
671 flags |= (ui32_t)il;
672 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
673 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
674 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
675 u.store = n_string_push_c(u.store, '"');
676 else if(flags & a_SHEXP_QUOTE_T_SINGLE)
677 u.store = n_string_push_c(u.store, '\'');
679 #endif /* a_SHEXP_QUOTE_RECURSE */
681 /* Work the level's substring */
682 ib = sqlp->sql_dat.s;
683 il = sqlp->sql_dat.l;
685 while(il > 0){
686 char c2, c;
688 c = *ib;
690 if(cntrlchar(c)){
691 assert(c == '\t' || (flags & a_SHEXP_QUOTE_T_DOLLAR));
692 assert((flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
693 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)));
694 switch((c2 = c)){
695 case 0x07: c = 'a'; break;
696 case 0x08: c = 'b'; break;
697 case 0x0A: c = 'n'; break;
698 case 0x0B: c = 'v'; break;
699 case 0x0C: c = 'f'; break;
700 case 0x0D: c = 'r'; break;
701 case 0x1B: c = 'E'; break;
702 default: break;
703 case 0x09:
704 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
705 c = 't';
706 break;
708 if(flags & a_SHEXP_QUOTE_T_REVSOL)
709 u.store = n_string_push_c(u.store, '\\');
710 goto jpush;
712 u.store = n_string_push_c(u.store, '\\');
713 if(c == c2){
714 u.store = n_string_push_c(u.store, 'c');
715 c ^= 0x40;
717 goto jpush;
718 }else if(blankspacechar(c) || c == '|' || c == '&' || c == ';' ||
719 /* Whereas we don't support those, quote them for the sh(1)ell */
720 c == '(' || c == ')' || c == '<' || c == '>' ||
721 c == '"' || c == '$'){
722 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
723 goto jpush;
724 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE));
725 u.store = n_string_push_c(u.store, '\\');
726 goto jpush;
727 }else if(c == '\''){
728 if(flags & a_SHEXP_QUOTE_T_DOUBLE)
729 goto jpush;
730 assert(!(flags & a_SHEXP_QUOTE_T_SINGLE));
731 u.store = n_string_push_c(u.store, '\\');
732 goto jpush;
733 }else if(c == '\\' || (c == '#' && ib == ib_base)){
734 if(flags & a_SHEXP_QUOTE_T_SINGLE)
735 goto jpush;
736 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE |
737 a_SHEXP_QUOTE_T_DOLLAR));
738 u.store = n_string_push_c(u.store, '\\');
739 goto jpush;
740 }else if(asciichar(c)){
741 /* Shorthand: we can simply push that thing out */
742 jpush:
743 u.store = n_string_push_c(u.store, c);
744 ++ib, --il;
745 }else{
746 /* Not an ASCII character, take care not to split up multibyte
747 * sequences etc. For the sake of compile testing, don't enwrap in
748 * HAVE_ALWAYS_UNICODE_LOCALE || HAVE_NATCH_CHAR */
749 if(n_psonce & n_PSO_UNICODE){
750 ui32_t uc;
751 char const *ib2;
752 size_t il2, il3;
754 ib2 = ib;
755 il3 = il2 = il;
756 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
757 char itoa[32];
758 char const *cp;
760 il2 = PTR2SIZE(&ib2[0] - &ib[0]);
761 if((flags & a_SHEXP_QUOTE_ROUNDTRIP) || uc == 0xFFFDu){
762 /* Use padding to make ambiguities impossible */
763 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
764 (uc > 0xFFFFu ? 'U' : 'u'),
765 (int)(uc > 0xFFFFu ? 8 : 4), uc);
766 cp = itoa;
767 }else{
768 il3 = il2;
769 cp = &ib[0];
771 u.store = n_string_push_buf(u.store, cp, il3);
772 ib += il2, il -= il2;
773 continue;
777 memset(&vic, 0, sizeof vic);
778 vic.vic_indat = ib;
779 vic.vic_inlen = il;
780 n_visual_info(&vic,
781 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
783 /* Work this substring as sensitive as possible */
784 il -= vic.vic_oulen;
785 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP))
786 u.store = n_string_push_buf(u.store, ib, il);
787 #ifdef HAVE_ICONV
788 else if((vic.vic_indat = n_iconv_onetime_cp(n_ICONV_NONE,
789 "utf-8", ok_vlook(ttycharset), savestrbuf(ib, il))) != NULL){
790 ui32_t uc;
791 char const *ib2;
792 size_t il2, il3;
794 il3 = il2 = strlen(ib2 = vic.vic_indat);
795 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
796 char itoa[32];
798 il2 = PTR2SIZE(&ib2[0] - &vic.vic_indat[0]);
799 /* Use padding to make ambiguities impossible */
800 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
801 (uc > 0xFFFFu ? 'U' : 'u'),
802 (int)(uc > 0xFFFFu ? 8 : 4), uc);
803 u.store = n_string_push_buf(u.store, itoa, il3);
804 }else
805 goto Jxseq;
807 #endif
808 else
809 #ifdef HAVE_ICONV
810 Jxseq:
811 #endif
812 while(il-- > 0){
813 u.store = n_string_push_buf(u.store, "\\xFF",
814 sizeof("\\xFF") -1);
815 n_c_to_hex_base16(&u.store->s_dat[u.store->s_len - 2], *ib++);
818 ib = vic.vic_oudat;
819 il = vic.vic_oulen;
824 /* Close an open quote */
825 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
826 u.store = n_string_push_c(u.store, '\'');
827 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
828 u.store = n_string_push_c(u.store, '"');
829 #ifdef a_SHEXP_QUOTE_RECURSE
830 jleave:
831 #endif
832 NYD2_LEAVE;
833 return;
835 #ifdef a_SHEXP_QUOTE_RECURSE
836 jrecurse:
837 sqlp->sql_dat.l -= il;
839 sql.sql_link = sqlp;
840 sql.sql_dat.s = n_UNCONST(ib);
841 sql.sql_dat.l = il;
842 sql.sql_flags = flags;
843 a_shexp__quote(sqcp, &sql);
844 goto jleave;
845 #endif
847 #undef jrecurse
848 #undef a_SHEXP_QUOTE_RECURSE
851 FL char *
852 fexpand(char const *name, enum fexp_mode fexpm)
854 struct str s;
855 char const *cp, *res;
856 bool_t dyn;
857 NYD_ENTER;
859 n_pstate &= ~n_PS_EXPAND_MULTIRESULT;
861 /* The order of evaluation is "%" and "#" expand into constants.
862 * "&" can expand into "+". "+" can expand into shell meta characters.
863 * Shell meta characters expand into constants.
864 * This way, we make no recursive expansion */
865 if ((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
866 res = n_UNCONST(name);
868 if(!(fexpm & FEXP_NSPECIAL)){
869 jnext:
870 dyn = FAL0;
871 switch (*res) {
872 case '%':
873 if(res[1] == ':' && res[2] != '\0')
874 res = &res[2];
875 else{
876 bool_t force;
878 force = (res[1] != '\0');
879 res = a_shexp_findmail((force ? &res[1] : ok_vlook(LOGNAME)),
880 force);
881 if(force)
882 goto jislocal;
884 goto jnext;
885 case '#':
886 if (res[1] != '\0')
887 break;
888 if (prevfile[0] == '\0') {
889 n_err(_("No previous file\n"));
890 res = NULL;
891 goto jleave;
893 res = prevfile;
894 goto jislocal;
895 case '&':
896 if (res[1] == '\0')
897 res = ok_vlook(MBOX);
898 break;
902 /* POSIX: if *folder* unset or null, "+" shall be retained */
903 if (!(fexpm & FEXP_NFOLDER) && *res == '+' &&
904 *(cp = folder_query()) != '\0') {
905 res = str_concat_csvl(&s, cp, &res[1], NULL)->s;
906 dyn = TRU1;
908 /* TODO *folder* can't start with %[:], can it!?! */
909 if (res[0] == '%' && res[1] == ':') {
910 res += 2;
911 goto jnext;
915 /* Do some meta expansions */
916 if((fexpm & (FEXP_NSHELL | FEXP_NVAR)) != FEXP_NVAR &&
917 ((fexpm & FEXP_NSHELL) ? (strchr(res, '$') != NULL)
918 : anyof(res, "{}[]*?$"))){
919 bool_t doexp;
921 if(fexpm & FEXP_NOPROTO)
922 doexp = TRU1;
923 else switch(which_protocol(res)){
924 case PROTO_FILE:
925 case PROTO_MAILDIR:
926 doexp = TRU1;
927 break;
928 default:
929 doexp = FAL0;
930 break;
933 if(doexp){
934 struct str shin;
935 struct n_string shou, *shoup;
937 shin.s = n_UNCONST(res);
938 shin.l = UIZ_MAX;
939 shoup = n_string_creat_auto(&shou);
940 for(;;){
941 enum n_shexp_state shs;
943 /* TODO shexp: take care to not include backtick eval once avail! */
944 shs = n_shexp_parse_token((n_SHEXP_PARSE_LOG_D_V |
945 n_SHEXP_PARSE_QUOTE_AUTO_FIXED | n_SHEXP_PARSE_QUOTE_AUTO_DQ |
946 n_SHEXP_PARSE_QUOTE_AUTO_CLOSE), shoup, &shin, NULL);
947 if(shs & n_SHEXP_STATE_STOP)
948 break;
950 res = n_string_cp(shoup);
951 shoup = n_string_drop_ownership(shoup);
952 dyn = TRU1;
954 if(res[0] == '~')
955 res = a_shexp_tilde(res);
957 if(!(fexpm & FEXP_NSHELL) &&
958 (res = a_shexp_globname(res, fexpm)) == NULL)
959 goto jleave;
960 dyn = TRU1;
961 }/* else no tilde */
962 }else if(res[0] == '~'){
963 res = a_shexp_tilde(res);
964 dyn = TRU1;
967 jislocal:
968 if (fexpm & FEXP_LOCAL)
969 switch (which_protocol(res)) {
970 case PROTO_FILE:
971 case PROTO_MAILDIR:
972 break;
973 default:
974 n_err(_("Not a local file or directory: %s\n"),
975 n_shexp_quote_cp(name, FAL0));
976 res = NULL;
977 break;
980 jleave:
981 if(res != NULL && !dyn)
982 res = savestr(res);
983 NYD_LEAVE;
984 return n_UNCONST(res);
987 FL enum n_shexp_state
988 n_shexp_parse_token(enum n_shexp_parse_flags flags, struct n_string *store,
989 struct str *input, void const **cookie){
990 /* TODO shexp_parse_token: WCHAR; $IFS (sp20=' '; echo a $sp20 b; ..) */
991 char c2, c, quotec, utf[8];
992 enum n_shexp_state rv;
993 size_t i, il;
994 char const *ib_save, *ib;
995 enum{
996 a_NONE = 0,
997 a_SKIPQ = 1<<0, /* Skip rest of this quote (\c0 ..) */
998 a_SURPLUS = 1<<1, /* Extended sequence interpretation */
999 a_NTOKEN = 1<<2, /* "New token": e.g., comments are possible */
1000 a_ROUND_MASK = ~((1<<8) - 1),
1001 a_COOKIE = 1<<8,
1002 a_EXPLODE = 1<<9,
1003 a_CONSUME = 1<<10 /* When done, "consume" remaining input */
1004 } state;
1005 NYD2_ENTER;
1007 assert((flags & n_SHEXP_PARSE_DRYRUN) || store != NULL);
1008 assert(input != NULL);
1009 assert(input->l == 0 || input->s != NULL);
1010 assert(!(flags & n_SHEXP_PARSE_LOG) || !(flags & n_SHEXP_PARSE_LOG_D_V));
1011 assert(!(flags & n_SHEXP_PARSE_IFS_ADD_COMMA) ||
1012 !(flags & n_SHEXP_PARSE_IFS_IS_COMMA));
1013 assert(!(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED) ||
1014 (flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK));
1016 if((flags & n_SHEXP_PARSE_LOG_D_V) && (n_poption & n_PO_D_V))
1017 flags |= n_SHEXP_PARSE_LOG;
1018 if(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)
1019 flags |= n_SHEXP_PARSE_QUOTE_AUTO_CLOSE;
1021 if((flags & n_SHEXP_PARSE_TRUNC) && store != NULL)
1022 store = n_string_trunc(store, 0);
1024 state = a_NONE;
1025 ib = input->s;
1026 if((il = input->l) == UIZ_MAX)
1027 input->l = il = strlen(ib);
1028 n_UNINIT(c, '\0');
1030 if(cookie != NULL && *cookie != NULL){
1031 assert(!(flags & n_SHEXP_PARSE_DRYRUN));
1032 state |= a_COOKIE;
1035 jrestart_empty:
1036 rv = n_SHEXP_STATE_NONE;
1037 state &= a_ROUND_MASK;
1039 /* In cookie mode, the next ARGV entry is the token already, unchanged,
1040 * since it has already been expanded before! */
1041 if(state & a_COOKIE){
1042 char const * const *xcookie, *cp;
1044 i = store->s_len;
1045 xcookie = *cookie;
1046 if((store = n_string_push_cp(store, *xcookie))->s_len > 0)
1047 rv |= n_SHEXP_STATE_OUTPUT;
1048 if(*++xcookie == NULL){
1049 *cookie = NULL;
1050 state &= ~a_COOKIE;
1051 flags |= n_SHEXP_PARSE_QUOTE_AUTO_DQ; /* ..why we are here! */
1052 }else
1053 *cookie = n_UNCONST(xcookie);
1055 for(cp = &n_string_cp(store)[i]; (c = *cp++) != '\0';)
1056 if(cntrlchar(c)){
1057 rv |= n_SHEXP_STATE_CONTROL;
1058 break;
1061 /* The last exploded cookie will join with the yielded input token, so
1062 * simply fall through in this case */
1063 if(state & a_COOKIE)
1064 goto jleave_quick;
1065 }else{
1066 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1067 for(; il > 0; ++ib, --il)
1068 if(!blankspacechar(*ib))
1069 break;
1071 input->s = n_UNCONST(ib);
1072 input->l = il;
1075 if(il == 0){
1076 rv |= n_SHEXP_STATE_STOP;
1077 goto jleave;
1080 if(store != NULL)
1081 store = n_string_reserve(store, n_MIN(il, 32)); /* XXX */
1083 switch(flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK){
1084 case n_SHEXP_PARSE_QUOTE_AUTO_SQ:
1085 quotec = '\'';
1086 break;
1087 case n_SHEXP_PARSE_QUOTE_AUTO_DQ:
1088 quotec = '"';
1089 if(0){
1090 case n_SHEXP_PARSE_QUOTE_AUTO_DSQ:
1091 quotec = '\'';
1093 state |= a_SURPLUS;
1094 break;
1095 default:
1096 quotec = '\0';
1097 state |= a_NTOKEN;
1098 break;
1101 while(il > 0){
1102 --il, c = *ib++;
1104 /* If no quote-mode active.. */
1105 if(quotec == '\0'){
1106 if(c == '"' || c == '\''){
1107 quotec = c;
1108 if(c == '"')
1109 state |= a_SURPLUS;
1110 else
1111 state &= ~a_SURPLUS;
1112 state &= ~a_NTOKEN;
1113 continue;
1114 }else if(c == '$'){
1115 if(il > 0){
1116 state &= ~a_NTOKEN;
1117 if(*ib == '\''){
1118 --il, ++ib;
1119 quotec = '\'';
1120 state |= a_SURPLUS;
1121 continue;
1122 }else
1123 goto J_var_expand;
1125 }else if(c == '\\'){
1126 /* Outside of quotes this just escapes any next character, but a sole
1127 * <backslash> at EOS is left unchanged */
1128 if(il > 0)
1129 --il, c = *ib++;
1130 state &= ~a_NTOKEN;
1132 /* A comment may it be if no token has yet started */
1133 else if(c == '#' && (state & a_NTOKEN)){
1134 rv |= n_SHEXP_STATE_STOP;
1135 goto jleave;
1137 /* Metacharacters which separate tokens must be turned on explicitly */
1138 else if(c == '|'){
1139 rv |= n_SHEXP_STATE_META_VERTBAR;
1140 /* The parsed sequence may be _the_ output, so ensure we don't
1141 * include the metacharacter, then. */
1142 if(flags & n_SHEXP_PARSE_DRYRUN)
1143 ++il, --ib;
1144 /*else if(flags & n_SHEXP_PARSE_META_VERTBAR)*/
1145 break;
1146 }else if(c == '&'){
1147 rv |= n_SHEXP_STATE_META_AMPERSAND;
1148 /* The parsed sequence may be _the_ output, so ensure we don't
1149 * include the metacharacter, then. */
1150 if(flags & n_SHEXP_PARSE_DRYRUN)
1151 ++il, --ib;
1152 /*else if(flags & n_SHEXP_PARSE_META_AMPERSAND)*/
1153 break;
1154 }else if(c == ';'){
1155 rv |= n_SHEXP_STATE_META_SEMICOLON;
1156 /* The parsed sequence may be _the_ output, so ensure we don't
1157 * include the metacharacter, then. */
1158 if(flags & n_SHEXP_PARSE_DRYRUN)
1159 ++il, --ib;
1160 else if(flags & n_SHEXP_PARSE_META_SEMICOLON){
1161 if(il > 0)
1162 n_source_inject_input(n_INPUT_INJECT_COMMIT, ib, il);
1163 state |= a_CONSUME;
1164 rv |= n_SHEXP_STATE_STOP;
1166 break;
1167 }else if(c == ',' && (flags &
1168 (n_SHEXP_PARSE_IFS_ADD_COMMA | n_SHEXP_PARSE_IFS_IS_COMMA))){
1169 /* The parsed sequence may be _the_ output, so ensure we don't
1170 * include the metacharacter, then. */
1171 if(flags & n_SHEXP_PARSE_DRYRUN)
1172 ++il, --ib;
1173 break;
1174 }else if(blankchar(c)){
1175 if(!(flags & n_SHEXP_PARSE_IFS_IS_COMMA)){
1176 /* The parsed sequence may be _the_ output, so ensure we don't
1177 * include the metacharacter, then. */
1178 if(flags & n_SHEXP_PARSE_DRYRUN)
1179 ++il, --ib;
1180 break;
1182 state |= a_NTOKEN;
1183 }else
1184 state &= ~a_NTOKEN;
1185 }else{
1186 /* Quote-mode */
1187 assert(!(state & a_NTOKEN));
1188 if(c == quotec && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)){
1189 state &= a_ROUND_MASK;
1190 quotec = '\0';
1191 /* Users may need to recognize the presence of empty quotes */
1192 rv |= n_SHEXP_STATE_OUTPUT;
1193 continue;
1194 }else if(c == '\\' && (state & a_SURPLUS)){
1195 ib_save = ib - 1;
1196 /* A sole <backslash> at EOS is treated as-is! This is ok since
1197 * the "closing quote" error will occur next, anyway */
1198 if(il == 0)
1199 break;
1200 else if((c2 = *ib) == quotec){
1201 --il, ++ib;
1202 c = quotec;
1203 }else if(quotec == '"'){
1204 /* Double quotes:
1205 * The <backslash> shall retain its special meaning as an
1206 * escape character (see Section 2.2.1) only when followed
1207 * by one of the following characters when considered
1208 * special: $ ` " \ <newline> */
1209 switch(c2){
1210 case '$':
1211 case '`':
1212 /* case '"': already handled via c2 == quotec */
1213 case '\\':
1214 --il, ++ib;
1215 c = c2;
1216 /* FALLTHRU */
1217 default:
1218 break;
1220 }else{
1221 /* Dollar-single-quote */
1222 --il, ++ib;
1223 switch(c2){
1224 case '"':
1225 /* case '\'': already handled via c2 == quotec */
1226 case '\\':
1227 c = c2;
1228 break;
1230 case 'b': c = '\b'; break;
1231 case 'f': c = '\f'; break;
1232 case 'n': c = '\n'; break;
1233 case 'r': c = '\r'; break;
1234 case 't': c = '\t'; break;
1235 case 'v': c = '\v'; break;
1237 case 'E':
1238 case 'e': c = '\033'; break;
1240 /* Control character */
1241 case 'c':
1242 if(il == 0)
1243 goto j_dollar_ungetc;
1244 --il, c2 = *ib++;
1245 if(state & a_SKIPQ)
1246 continue;
1247 c = upperconv(c2) ^ 0x40;
1248 if((ui8_t)c > 0x1F && c != 0x7F){ /* ASCII C0: 0..1F, 7F */
1249 if(flags & n_SHEXP_PARSE_LOG)
1250 n_err(_("Invalid \\c notation: %.*s\n"),
1251 (int)input->l, input->s);
1252 rv |= n_SHEXP_STATE_ERR_CONTROL;
1254 /* As an implementation-defined extension, support \c@
1255 * EQ printf(1) alike \c */
1256 if(c == '\0'){
1257 rv |= n_SHEXP_STATE_STOP;
1258 goto jleave;
1260 break;
1262 /* Octal sequence: 1 to 3 octal bytes */
1263 case '0':
1264 /* As an extension (dependent on where you look, echo(1), or
1265 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1266 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1267 c2 = c;
1268 --il, ++ib;
1270 /* FALLTHRU */
1271 case '1': case '2': case '3':
1272 case '4': case '5': case '6': case '7':
1273 c2 -= '0';
1274 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1275 c2 = (c2 << 3) | (c - '0');
1276 --il, ++ib;
1278 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1279 if((ui8_t)c2 > 0x1F){
1280 if(flags & n_SHEXP_PARSE_LOG)
1281 n_err(_("\\0 argument exceeds a byte: %.*s\n"),
1282 (int)input->l, input->s);
1283 rv |= n_SHEXP_STATE_ERR_NUMBER;
1284 --il, ++ib;
1285 /* Write unchanged */
1286 je_ib_save:
1287 rv |= n_SHEXP_STATE_OUTPUT;
1288 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1289 store = n_string_push_buf(store, ib_save,
1290 PTR2SIZE(ib - ib_save));
1291 continue;
1293 c2 = (c2 << 3) | (c -= '0');
1294 --il, ++ib;
1296 if((c = c2) == '\0')
1297 state |= a_SKIPQ;
1298 if(state & a_SKIPQ)
1299 continue;
1300 break;
1302 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1303 case 'U':
1304 i = 8;
1305 if(0){
1306 /* FALLTHRU */
1307 case 'u':
1308 i = 4;
1310 if(il == 0)
1311 goto j_dollar_ungetc;
1312 if(0){
1313 /* FALLTHRU */
1315 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1316 case 'X':
1317 case 'x':
1318 if(il == 0)
1319 goto j_dollar_ungetc;
1320 i = 2;
1322 /* C99 */{
1323 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
1324 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1326 size_t no, j;
1328 i = n_MIN(il, i);
1329 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
1330 c = *ib;
1331 if(hexchar(c)){
1332 no <<= 4;
1333 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
1334 : ((c) <= 'F' ? 55 : 87)))];
1335 }else if(j == 0){
1336 if(state & a_SKIPQ)
1337 break;
1338 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
1339 if(flags & n_SHEXP_PARSE_LOG)
1340 n_err(_("Invalid \\%c notation: %.*s\n"),
1341 c2, (int)input->l, input->s);
1342 rv |= n_SHEXP_STATE_ERR_NUMBER;
1343 goto je_ib_save;
1344 }else
1345 break;
1348 /* Unicode massage */
1349 if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){
1350 if((c = (char)no) == '\0')
1351 state |= a_SKIPQ;
1352 }else if(no == 0)
1353 state |= a_SKIPQ;
1354 else if(!(state & a_SKIPQ)){
1355 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1356 store = n_string_reserve(store, n_MAX(j, 4));
1358 c2 = FAL0;
1359 if(no > 0x10FFFF){ /* XXX magic; CText */
1360 if(flags & n_SHEXP_PARSE_LOG)
1361 n_err(_("\\U argument exceeds 0x10FFFF: %.*s\n"),
1362 (int)input->l, input->s);
1363 rv |= n_SHEXP_STATE_ERR_NUMBER;
1364 /* But normalize the output anyway */
1365 goto Je_uni_norm;
1368 j = n_utf32_to_utf8(no, utf);
1370 if(n_psonce & n_PSO_UNICODE){
1371 rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE;
1372 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1373 store = n_string_push_buf(store, utf, j);
1374 continue;
1376 #ifdef HAVE_ICONV
1377 else{
1378 char *icp;
1380 icp = n_iconv_onetime_cp(n_ICONV_NONE,
1381 NULL, NULL, utf);
1382 if(icp != NULL){
1383 rv |= n_SHEXP_STATE_OUTPUT;
1384 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1385 store = n_string_push_cp(store, icp);
1386 continue;
1389 #endif
1390 if(!(flags & n_SHEXP_PARSE_DRYRUN)) Je_uni_norm:{
1391 char itoa[32];
1393 rv |= n_SHEXP_STATE_OUTPUT |
1394 n_SHEXP_STATE_ERR_UNICODE;
1395 i = snprintf(itoa, sizeof itoa, "\\%c%0*X",
1396 (no > 0xFFFFu ? 'U' : 'u'),
1397 (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no);
1398 store = n_string_push_buf(store, itoa, i);
1400 continue;
1402 if(state & a_SKIPQ)
1403 continue;
1405 break;
1407 /* Extension: \$ can be used to expand a variable.
1408 * Bug|ad effect: if conversion fails, not written "as-is" */
1409 case '$':
1410 if(il == 0)
1411 goto j_dollar_ungetc;
1412 goto J_var_expand;
1414 default:
1415 j_dollar_ungetc:
1416 /* Follow bash(1) behaviour, print sequence unchanged */
1417 ++il, --ib;
1418 break;
1421 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
1422 bool_t brace;
1424 if(!(brace = (*ib == '{')) || il > 1){
1425 char const *cp, *vp;
1427 ib_save = ib - 1;
1428 il -= brace;
1429 vp = (ib += brace);
1430 state &= ~a_EXPLODE;
1432 for(i = 0; il > 0; --il, ++ib, ++i){
1433 /* We have some special cases regarding macro-local special
1434 * parameters, so ensure these don't cause failure */
1435 c = *ib;
1436 if(!a_SHEXP_ISVARC(c)){
1437 if(i == 0 && a_SHEXP_ISVARC_SPECIAL1(c)){
1438 if(c == '@' && quotec == '"')
1439 state |= a_EXPLODE;
1440 --il, ++ib;
1441 ++i;
1443 break;
1447 if(state & a_SKIPQ){
1448 if(brace && il > 0 && *ib == '}')
1449 --il, ++ib;
1450 continue;
1453 if(i == 0){
1454 if(brace){
1455 if(flags & n_SHEXP_PARSE_LOG)
1456 n_err(_("Bad substitution (${}): %.*s\n Near %.*s\n"),
1457 (int)input->l, input->s, (int)il, ib);
1458 rv |= n_SHEXP_STATE_ERR_BADSUB;
1459 goto je_ib_save;
1461 c = '$';
1462 }else{
1463 if(brace){
1464 if(il == 0 || *ib != '}'){
1465 if(flags & n_SHEXP_PARSE_LOG)
1466 n_err(_("Missing closing brace for ${VAR}: %.*s\n"
1467 " Near: %.*s\n"),
1468 (int)input->l, input->s, (int)il, ib);
1469 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN |
1470 n_SHEXP_STATE_ERR_BRACE;
1471 goto je_ib_save;
1473 --il, ++ib;
1476 if(flags & n_SHEXP_PARSE_DRYRUN)
1477 continue;
1479 /* We may shall explode "${@}" to a series of successive,
1480 * properly quoted tokens (instead). The first exploded
1481 * cookie will join with the current token */
1482 if((state & a_EXPLODE) && !(flags & n_SHEXP_PARSE_DRYRUN) &&
1483 cookie != NULL && n_var_vexplode(cookie)){
1484 state |= a_COOKIE;
1485 input->s = n_UNCONST(ib);
1486 input->l = il;
1487 goto jrestart_empty;
1490 /* Check getenv(3) shall no internal variable exist! */
1491 vp = savestrbuf(vp, i);
1492 if((cp = n_var_vlook(vp, TRU1)) != NULL){
1493 rv |= n_SHEXP_STATE_OUTPUT;
1494 store = n_string_push_cp(store, cp);
1495 for(; (c = *cp) != '\0'; ++cp)
1496 if(cntrlchar(c)){
1497 rv |= n_SHEXP_STATE_CONTROL;
1498 break;
1501 continue;
1504 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
1505 continue;
1509 if(!(state & a_SKIPQ)){
1510 rv |= n_SHEXP_STATE_OUTPUT;
1511 if(cntrlchar(c))
1512 rv |= n_SHEXP_STATE_CONTROL;
1513 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1514 store = n_string_push_c(store, c);
1518 if(quotec != '\0' && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_CLOSE)){
1519 if(flags & n_SHEXP_PARSE_LOG)
1520 n_err(_("no closing quote: %.*s\n"), (int)input->l, input->s);
1521 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
1524 jleave:
1525 assert(!(state & a_COOKIE));
1526 if((flags & n_SHEXP_PARSE_DRYRUN) && store != NULL){
1527 store = n_string_push_buf(store, input->s, PTR2SIZE(ib - input->s));
1528 rv |= n_SHEXP_STATE_OUTPUT;
1531 if(state & a_CONSUME){
1532 input->s = n_UNCONST(&ib[il]);
1533 input->l = 0;
1534 }else{
1535 if(flags & n_SHEXP_PARSE_TRIMSPACE){
1536 for(; il > 0; ++ib, --il)
1537 if(!blankchar(*ib))
1538 break;
1540 input->l = il;
1541 input->s = n_UNCONST(ib);
1544 if(!(rv & n_SHEXP_STATE_STOP)){
1545 if(!(rv & n_SHEXP_STATE_OUTPUT) && (flags & n_SHEXP_PARSE_IGNORE_EMPTY) &&
1546 il > 0)
1547 goto jrestart_empty;
1548 if(/*!(rv & n_SHEXP_STATE_OUTPUT) &&*/ il == 0)
1549 rv |= n_SHEXP_STATE_STOP;
1551 jleave_quick:
1552 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_UNICODE));
1553 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_CONTROL));
1554 NYD2_LEAVE;
1555 return rv;
1558 FL char *
1559 n_shexp_parse_token_cp(enum n_shexp_parse_flags flags, char const **cp){
1560 struct str input;
1561 struct n_string sou, *soup;
1562 char *rv;
1563 enum n_shexp_state shs;
1564 NYD2_ENTER;
1566 assert(cp != NULL);
1568 input.s = n_UNCONST(*cp);
1569 input.l = UIZ_MAX;
1570 soup = n_string_creat_auto(&sou);
1572 shs = n_shexp_parse_token(flags, soup, &input, NULL);
1573 if(shs & n_SHEXP_STATE_ERR_MASK){
1574 soup = n_string_assign_cp(soup, *cp);
1575 *cp = NULL;
1576 }else
1577 *cp = input.s;
1579 rv = n_string_cp(soup);
1580 /*n_string_gut(n_string_drop_ownership(soup));*/
1581 NYD2_LEAVE;
1582 return rv;
1585 FL struct n_string *
1586 n_shexp_quote(struct n_string *store, struct str const *input, bool_t rndtrip){
1587 struct a_shexp_quote_lvl sql;
1588 struct a_shexp_quote_ctx sqc;
1589 NYD2_ENTER;
1591 assert(store != NULL);
1592 assert(input != NULL);
1593 assert(input->l == 0 || input->s != NULL);
1595 memset(&sqc, 0, sizeof sqc);
1596 sqc.sqc_store = store;
1597 sqc.sqc_input.s = input->s;
1598 if((sqc.sqc_input.l = input->l) == UIZ_MAX)
1599 sqc.sqc_input.l = strlen(input->s);
1600 sqc.sqc_flags = rndtrip ? a_SHEXP_QUOTE_ROUNDTRIP : a_SHEXP_QUOTE_NONE;
1602 if(sqc.sqc_input.l == 0)
1603 store = n_string_push_buf(store, "''", sizeof("''") -1);
1604 else{
1605 memset(&sql, 0, sizeof sql);
1606 sql.sql_dat = sqc.sqc_input;
1607 sql.sql_flags = sqc.sqc_flags;
1608 a_shexp__quote(&sqc, &sql);
1610 NYD2_LEAVE;
1611 return store;
1614 FL char *
1615 n_shexp_quote_cp(char const *cp, bool_t rndtrip){
1616 struct n_string store;
1617 struct str input;
1618 char *rv;
1619 NYD2_ENTER;
1621 assert(cp != NULL);
1623 input.s = n_UNCONST(cp);
1624 input.l = UIZ_MAX;
1625 rv = n_string_cp(n_shexp_quote(n_string_creat_auto(&store), &input,
1626 rndtrip));
1627 n_string_gut(n_string_drop_ownership(&store));
1628 NYD2_LEAVE;
1629 return rv;
1632 FL bool_t
1633 n_shexp_is_valid_varname(char const *name){
1634 char c;
1635 bool_t rv;
1636 NYD2_ENTER;
1638 for(rv = TRU1; (c = *name++) != '\0';)
1639 if(!a_SHEXP_ISVARC(c)){
1640 rv = FAL0;
1641 break;
1643 NYD2_LEAVE;
1644 return rv;
1647 /* s-it-mode */