n_shell_quote*(): add a "roundtrip" argument
[s-mailx.git] / shexp.c
blobd433005fb17448fd6475d64b8dfad1bcec9aa9f0
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
6 */
7 /*
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 #undef n_FILE
36 #define n_FILE shexp
38 #ifndef HAVE_AMALGAMATION
39 # include "nail.h"
40 #endif
42 #include <sys/wait.h>
44 #include <pwd.h>
46 #ifdef HAVE_WORDEXP
47 # include <wordexp.h>
48 #endif
50 /* POSIX says
51 * Environment variable names used by the utilities in the Shell and
52 * Utilities volume of POSIX.1-2008 consist solely of uppercase
53 * letters, digits, and the <underscore> ('_') from the characters
54 * defined in Portable Character Set and do not begin with a digit.
55 * Other characters may be permitted by an implementation;
56 * applications shall tolerate the presence of such names.
57 * We do support the hyphen "-" because it is common for mailx. */
58 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
60 struct shvar_stack {
61 struct shvar_stack *shs_next; /* Outer stack frame */
62 char const *shs_value; /* Remaining value to expand */
63 size_t shs_len; /* gth of .shs_dat this level */
64 char const *shs_dat; /* Result data of this level */
65 bool_t *shs_err; /* Or NULL */
66 bool_t shs_bsesc; /* Shall backslash escaping be performed */
69 /* Locate the user's mailbox file (where new, unread mail is queued) */
70 static char * _findmail(char const *user, bool_t force);
72 /* Perform shell meta character expansion TODO obsolete (INSECURE!) */
73 static char * _globname(char const *name, enum fexp_mode fexpm);
75 /* Perform shell variable expansion */
76 static char * _sh_exp_var(struct shvar_stack *shsp);
78 static char *
79 _findmail(char const *user, bool_t force)
81 char *rv;
82 char const *cp;
83 NYD_ENTER;
85 if (force || (cp = ok_vlook(MAIL)) == NULL) {
86 size_t ul = strlen(user), i = sizeof(MAILSPOOL) -1 + 1 + ul +1;
88 rv = salloc(i);
89 memcpy(rv, MAILSPOOL, i = sizeof(MAILSPOOL));
90 rv[i] = '/';
91 memcpy(&rv[++i], user, ul +1);
92 } else if ((rv = fexpand(cp, FEXP_NSHELL)) == NULL)
93 rv = savestr(cp);
94 NYD_LEAVE;
95 return rv;
98 static char *
99 _globname(char const *name, enum fexp_mode fexpm)
101 #ifdef HAVE_WORDEXP
102 wordexp_t we;
103 char *cp = NULL;
104 sigset_t nset;
105 int i;
106 NYD_ENTER;
108 /* Mac OS X Snow Leopard and Linux don't init fields on error, causing
109 * SIGSEGV in wordfree(3); so let's just always zero it ourselfs */
110 memset(&we, 0, sizeof we);
112 /* Some systems (notably Open UNIX 8.0.0) fork a shell for wordexp()
113 * and wait, which will fail if our SIGCHLD handler is active */
114 sigemptyset(&nset);
115 sigaddset(&nset, SIGCHLD);
116 sigprocmask(SIG_BLOCK, &nset, NULL);
117 # ifndef WRDE_NOCMD
118 # define WRDE_NOCMD 0
119 # endif
120 i = wordexp(name, &we, WRDE_NOCMD);
121 sigprocmask(SIG_UNBLOCK, &nset, NULL);
123 switch (i) {
124 case 0:
125 break;
126 #ifdef WRDE_CMDSUB
127 case WRDE_CMDSUB:
128 if (!(fexpm & FEXP_SILENT))
129 n_err(_("\"%s\": Command substitution not allowed\n"), name);
130 goto jleave;
131 #endif
132 case WRDE_NOSPACE:
133 if (!(fexpm & FEXP_SILENT))
134 n_err(_("\"%s\": Expansion buffer overflow\n"), name);
135 goto jleave;
136 case WRDE_BADCHAR:
137 case WRDE_SYNTAX:
138 default:
139 if (!(fexpm & FEXP_SILENT))
140 n_err(_("Syntax error in \"%s\"\n"), name);
141 goto jleave;
144 switch (we.we_wordc) {
145 case 1:
146 cp = savestr(we.we_wordv[0]);
147 break;
148 case 0:
149 if (!(fexpm & FEXP_SILENT))
150 n_err(_("\"%s\": No match\n"), name);
151 break;
152 default:
153 if (fexpm & FEXP_MULTIOK) {
154 size_t j, l;
156 for (l = 0, j = 0; j < we.we_wordc; ++j)
157 l += strlen(we.we_wordv[j]) + 1;
158 ++l;
159 cp = salloc(l);
160 for (l = 0, j = 0; j < we.we_wordc; ++j) {
161 size_t x = strlen(we.we_wordv[j]);
162 memcpy(cp + l, we.we_wordv[j], x);
163 l += x;
164 cp[l++] = ' ';
166 cp[l] = '\0';
167 } else if (!(fexpm & FEXP_SILENT))
168 n_err(_("\"%s\": Ambiguous\n"), name);
169 break;
171 jleave:
172 wordfree(&we);
173 NYD_LEAVE;
174 return cp;
176 #else /* HAVE_WORDEXP */
177 UNUSED(fexpm);
179 if(options & OPT_D_V)
180 n_err(_("wordexp(3) not available, cannot perform expansion\n"));
181 return savestr(name);
182 #endif
185 static char *
186 _sh_exp_var(struct shvar_stack *shsp)
188 struct shvar_stack next, *np, *tmp;
189 char const *vp;
190 char lc, c, *cp, *rv;
191 size_t i;
192 NYD2_ENTER;
194 if (*(vp = shsp->shs_value) != '$') {
195 bool_t bsesc = shsp->shs_bsesc;
196 union {bool_t hadbs; char c;} u = {FAL0};
198 shsp->shs_dat = vp;
199 for (lc = '\0', i = 0; ((c = *vp) != '\0'); ++i, ++vp) {
200 if (c == '$' && lc != '\\')
201 break;
202 if (!bsesc)
203 continue;
204 lc = (lc == '\\') ? (u.hadbs = TRU1, '\0') : c;
206 shsp->shs_len = i;
208 if (u.hadbs) {
209 shsp->shs_dat = cp = savestrbuf(shsp->shs_dat, i);
211 for (lc = '\0', rv = cp; (u.c = *cp++) != '\0';) {
212 if (u.c != '\\' || lc == '\\')
213 *rv++ = u.c;
214 lc = (lc == '\\') ? '\0' : u.c;
216 *rv = '\0';
218 shsp->shs_len = PTR2SIZE(rv - shsp->shs_dat);
220 } else {
221 if ((lc = (*++vp == '{')))
222 ++vp;
224 shsp->shs_dat = vp;
225 for (i = 0; (c = *vp) != '\0'; ++i, ++vp)
226 if (!a_SHEXP_ISVARC(c))
227 break;
229 if (lc) {
230 if (c != '}') {
231 n_err(_("Variable name misses closing \"}\": %s\n"),
232 shsp->shs_value);
233 shsp->shs_len = strlen(shsp->shs_value);
234 shsp->shs_dat = shsp->shs_value;
235 if (shsp->shs_err != NULL)
236 *shsp->shs_err = TRU1;
237 goto junroll;
239 c = *++vp;
242 shsp->shs_len = i;
243 /* Check getenv(3) shall no internal variable exist! */
244 if ((rv = vok_vlook(cp = savestrbuf(shsp->shs_dat, i))) != NULL ||
245 (rv = getenv(cp)) != NULL)
246 shsp->shs_len = strlen(shsp->shs_dat = rv);
247 else
248 shsp->shs_len = 0, shsp->shs_dat = UNCONST("");
250 if (c != '\0')
251 goto jrecurse;
253 /* That level made the great and completed encoding. Build result */
254 junroll:
255 for (i = 0, np = shsp, shsp = NULL; np != NULL;) {
256 i += np->shs_len;
257 tmp = np->shs_next;
258 np->shs_next = shsp;
259 shsp = np;
260 np = tmp;
263 cp = rv = salloc(i +1);
264 while (shsp != NULL) {
265 np = shsp;
266 shsp = shsp->shs_next;
267 memcpy(cp, np->shs_dat, np->shs_len);
268 cp += np->shs_len;
270 *cp = '\0';
272 jleave:
273 NYD2_LEAVE;
274 return rv;
275 jrecurse:
276 memset(&next, 0, sizeof next);
277 next.shs_next = shsp;
278 next.shs_value = vp;
279 next.shs_err = shsp->shs_err;
280 next.shs_bsesc = shsp->shs_bsesc;
281 rv = _sh_exp_var(&next);
282 goto jleave;
285 FL char *
286 fexpand(char const *name, enum fexp_mode fexpm)
288 struct str s;
289 char const *cp, *res;
290 bool_t dyn;
291 NYD_ENTER;
293 /* The order of evaluation is "%" and "#" expand into constants.
294 * "&" can expand into "+". "+" can expand into shell meta characters.
295 * Shell meta characters expand into constants.
296 * This way, we make no recursive expansion */
297 if ((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
298 res = UNCONST(name);
300 jnext:
301 dyn = FAL0;
302 switch (*res) {
303 case '%':
304 if (res[1] == ':' && res[2] != '\0') {
305 res = &res[2];
306 goto jnext;
308 res = _findmail((res[1] != '\0' ? res + 1 : myname), (res[1] != '\0'));
309 goto jislocal;
310 case '#':
311 if (res[1] != '\0')
312 break;
313 if (prevfile[0] == '\0') {
314 n_err(_("No previous file\n"));
315 res = NULL;
316 goto jleave;
318 res = prevfile;
319 goto jislocal;
320 case '&':
321 if (res[1] == '\0')
322 res = ok_vlook(MBOX);
323 break;
326 /* POSIX: if *folder* unset or null, "+" shall be retained */
327 if (*res == '+' && *(cp = folder_query()) != '\0') {
328 size_t i = strlen(cp);
330 res = str_concat_csvl(&s, cp,
331 ((i == 0 || cp[i -1] == '/') ? "" : "/"), res + 1, NULL)->s;
332 dyn = TRU1;
334 /* TODO *folder* can't start with %[:], can it!?! */
335 if (res[0] == '%' && res[1] == ':') {
336 res += 2;
337 goto jnext;
341 /* Catch the most common shell meta character */
342 if (res[0] == '~') {
343 res = n_shell_expand_tilde(res, NULL);
344 dyn = TRU1;
347 if ((fexpm & (FEXP_NSHELL | FEXP_NVAR)) != FEXP_NVAR &&
348 ((fexpm & FEXP_NSHELL) ? (strchr(res, '$') != NULL)
349 : anyof(res, "|&;<>{}()[]*?$`'\"\\"))) {
350 bool_t doexp;
352 if(fexpm & FEXP_NOPROTO)
353 doexp = TRU1;
354 else switch(which_protocol(res)){
355 case PROTO_FILE:
356 case PROTO_MAILDIR:
357 doexp = TRU1;
358 break;
359 default:
360 doexp = FAL0;
361 break;
364 if(doexp){
365 res = (fexpm & FEXP_NSHELL) ? n_shell_expand_var(res, TRU1, NULL)
366 : _globname(res, fexpm);
367 dyn = TRU1;
371 jislocal:
372 if (fexpm & FEXP_LOCAL)
373 switch (which_protocol(res)) {
374 case PROTO_FILE:
375 case PROTO_MAILDIR:
376 break;
377 default:
378 n_err(_("Not a local file or directory: %s\n"),
379 n_shell_quote_cp(name, FAL0));
380 res = NULL;
381 break;
384 jleave:
385 if (res && !dyn)
386 res = savestr(res);
387 NYD_LEAVE;
388 return UNCONST(res);
391 FL char *
392 n_shell_expand_tilde(char const *s, bool_t *err_or_null)
394 struct passwd *pwp;
395 size_t nl, rl;
396 char const *rp, *np;
397 char *rv;
398 bool_t err;
399 NYD2_ENTER;
401 err = FAL0;
403 if (s[0] != '~')
404 goto jasis;
406 if (*(rp = s + 1) == '/' || *rp == '\0')
407 np = ok_vlook(HOME);
408 else {
409 if ((rp = strchr(s + 1, '/')) == NULL)
410 rp = (np = UNCONST(s)) + 1;
411 else {
412 nl = PTR2SIZE(rp - s);
413 np = savestrbuf(s, nl);
416 if ((pwp = getpwnam(np)) == NULL) {
417 err = TRU1;
418 goto jasis;
420 np = pwp->pw_name;
423 nl = strlen(np);
424 rl = strlen(rp);
425 rv = salloc(nl + 1 + rl +1);
426 memcpy(rv, np, nl);
427 if (rl > 0) {
428 memcpy(rv + nl, rp, rl);
429 nl += rl;
431 rv[nl] = '\0';
432 goto jleave;
434 jasis:
435 rv = savestr(s);
436 jleave:
437 if (err_or_null != NULL)
438 *err_or_null = err;
439 NYD2_LEAVE;
440 return rv;
443 FL char *
444 n_shell_expand_var(char const *s, bool_t bsescape, bool_t *err_or_null)
446 struct shvar_stack top;
447 char *rv;
448 NYD2_ENTER;
450 memset(&top, 0, sizeof top);
452 top.shs_value = s;
453 if ((top.shs_err = err_or_null) != NULL)
454 *err_or_null = FAL0;
455 top.shs_bsesc = bsescape;
456 rv = _sh_exp_var(&top);
457 NYD2_LEAVE;
458 return rv;
461 FL int
462 n_shell_expand_escape(char const **s, bool_t use_nail_extensions)/* TODO DROP!*/
464 char const *xs;
465 int c, n;
466 NYD2_ENTER;
468 xs = *s;
470 if ((c = *xs & 0xFF) == '\0')
471 goto jleave;
472 ++xs;
473 if (c != '\\')
474 goto jleave;
476 switch ((c = *xs & 0xFF)) {
477 case 'a': c = '\a'; break;
478 case 'b': c = '\b'; break;
479 case 'c': c = PROMPT_STOP; break;
480 case 'f': c = '\f'; break;
481 case 'n': c = '\n'; break;
482 case 'r': c = '\r'; break;
483 case 't': c = '\t'; break;
484 case 'v': c = '\v'; break;
486 /* ESCape */
487 case 'E':
488 case 'e':
489 c = '\033';
490 break;
492 /* Hexadecimal TODO uses ASCII */
493 case 'X':
494 case 'x': {
495 static ui8_t const hexatoi[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
496 #undef a_HEX
497 #define a_HEX(n) \
498 hexatoi[(ui8_t)((n) - ((n) <= '9' ? 48 : ((n) <= 'F' ? 55 : 87)))]
500 c = 0;
501 ++xs;
502 if(hexchar(*xs))
503 c = a_HEX(*xs);
504 else{
505 --xs;
506 if(options & OPT_D_V)
507 n_err(_("Invalid \"\\xNUMBER\" notation in \"%s\"\n"), xs - 1);
508 c = '\\';
509 goto jleave;
511 ++xs;
512 if(hexchar(*xs)){
513 c <<= 4;
514 c += a_HEX(*xs);
515 ++xs;
517 goto jleave;
519 #undef a_HEX
521 /* octal, with optional 0 prefix */
522 case '0':
523 ++xs;
524 if(0){
525 default:
526 if(*xs == '\0'){
527 c = '\\';
528 break;
531 for (c = 0, n = 3; n-- > 0 && octalchar(*xs); ++xs) {
532 c <<= 3;
533 c |= *xs - '0';
535 goto jleave;
537 /* S-nail extension for nice (get)prompt(()) support */
538 case '&':
539 case '?':
540 case '$':
541 case '@':
542 if (use_nail_extensions) {
543 switch (c) {
544 case '&': c = ok_blook(bsdcompat) ? '&' : '?'; break;
545 case '?': c = (pstate & PS_EVAL_ERROR) ? '1' : '0'; break;
546 case '$': c = PROMPT_DOLLAR; break;
547 case '@': c = PROMPT_AT; break;
549 break;
552 /* FALLTHRU */
553 case '\0':
554 /* A sole <backslash> at EOS is treated as-is! */
555 c = '\\';
556 /* FALLTHRU */
557 case '\\':
558 break;
561 ++xs;
562 jleave:
563 *s = xs;
564 NYD2_LEAVE;
565 return c;
568 FL enum n_shexp_state
569 n_shell_parse_token(struct n_string *store, struct str *input, /* TODO WCHAR */
570 enum n_shexp_parse_flags flags){
571 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
572 char utf[8];
573 #endif
574 char c2, c, quotec;
575 enum{
576 a_NONE = 0,
577 a_SKIPQ = 1<<0, /* Skip rest of this quote (\c0 ..) */
578 a_SURPLUS = 1<<1, /* Extended sequence interpretation */
579 a_NTOKEN = 1<<2 /* "New token": e.g., comments are possible */
580 } state;
581 enum n_shexp_state rv;
582 size_t i, il;
583 char const *ib_save, *ib;
584 NYD2_ENTER;
585 UNINIT(c, '\0');
587 assert((flags & n_SHEXP_PARSE_DRYRUN) || store != NULL);
588 assert(input != NULL);
589 assert(input->l == 0 || input->s != NULL);
590 assert(!(flags & n_SHEXP_PARSE_LOG) || !(flags & n_SHEXP_PARSE_LOG_D_V));
591 assert(!(flags & n_SHEXP_PARSE_IFS_ADD_COMMA) ||
592 !(flags & n_SHEXP_PARSE_IFS_IS_COMMA));
594 if((flags & n_SHEXP_PARSE_LOG_D_V) && (options & OPT_D_V))
595 flags |= n_SHEXP_PARSE_LOG;
597 if((flags & n_SHEXP_PARSE_TRUNC) && store != NULL)
598 store = n_string_trunc(store, 0);
600 ib = input->s;
601 if((il = input->l) == UIZ_MAX)
602 il = strlen(ib);
604 jrestart_empty:
605 if(flags & n_SHEXP_PARSE_TRIMSPACE){
606 for(; il > 0; ++ib, --il)
607 if(!blankspacechar(*ib))
608 break;
610 input->s = UNCONST(ib);
611 input->l = il;
613 if(il == 0){
614 rv = n_SHEXP_STATE_STOP;
615 goto jleave;
618 if(store != NULL)
619 store = n_string_reserve(store, MIN(il, 32)); /* XXX */
621 for(rv = n_SHEXP_STATE_NONE, state = a_NTOKEN, quotec = '\0'; il > 0;){
622 --il, c = *ib++;
624 /* If no quote-mode active.. */
625 if(quotec == '\0'){
626 if(c == '"' || c == '\''){
627 quotec = c;
628 if(c == '"')
629 state |= a_SURPLUS;
630 else
631 state &= ~a_SURPLUS;
632 state &= ~a_NTOKEN;
633 continue;
634 }else if(c == '$'){
635 if(il > 0){
636 state &= ~a_NTOKEN;
637 if(*ib == '\''){
638 --il, ++ib;
639 quotec = '\'';
640 state |= a_SURPLUS;
641 continue;
642 }else
643 goto J_var_expand;
645 }else if(c == '\\'){
646 /* Outside of quotes this just escapes any next character, but a sole
647 * <backslash> at EOS is left unchanged */
648 if(il > 0)
649 --il, c = *ib++;
650 state &= ~a_NTOKEN;
651 }else if(c == '#' && (state & a_NTOKEN)){
652 rv |= n_SHEXP_STATE_STOP;
653 goto jleave;
654 }else if(c == ',' && (flags &
655 (n_SHEXP_PARSE_IFS_ADD_COMMA | n_SHEXP_PARSE_IFS_IS_COMMA)))
656 break;
657 else if(blankchar(c)){
658 if(!(flags & n_SHEXP_PARSE_IFS_IS_COMMA)){
659 ++il, --ib;
660 break;
662 state |= a_NTOKEN;
663 }else
664 state &= ~a_NTOKEN;
665 }else{
666 /* Quote-mode */
667 assert(!(state & a_NTOKEN));
668 if(c == quotec){
669 state = a_NONE;
670 quotec = '\0';
671 /* Users may need to recognize the presence of empty quotes */
672 rv |= n_SHEXP_STATE_OUTPUT;
673 continue;
674 }else if(c == '\\' && (state & a_SURPLUS)){
675 ib_save = ib - 1;
676 /* A sole <backslash> at EOS is treated as-is! This is ok since
677 * the "closing quote" error will occur next, anyway */
678 if(il == 0)
679 break;
680 else if((c2 = *ib) == quotec){
681 --il, ++ib;
682 c = quotec;
683 }else if(quotec == '"'){
684 /* Double quotes:
685 * The <backslash> shall retain its special meaning as an
686 * escape character (see Section 2.2.1) only when followed
687 * by one of the following characters when considered
688 * special: $ ` " \ <newline> */
689 switch(c2){
690 case '$':
691 case '`':
692 /* case '"': already handled via c2 == quotec */
693 case '\\':
694 --il, ++ib;
695 c = c2;
696 /* FALLTHRU */
697 default:
698 break;
700 }else{
701 /* Dollar-single-quote */
702 --il, ++ib;
703 switch(c2){
704 case '"':
705 /* case '\'': already handled via c2 == quotec */
706 case '\\':
707 c = c2;
708 break;
710 case 'b': c = '\b'; break;
711 case 'f': c = '\f'; break;
712 case 'n': c = '\n'; break;
713 case 'r': c = '\r'; break;
714 case 't': c = '\t'; break;
715 case 'v': c = '\v'; break;
717 case 'E':
718 case 'e': c = '\033'; break;
720 /* Control character */
721 case 'c':
722 if(il == 0)
723 goto j_dollar_ungetc;
724 --il, c2 = *ib++;
725 if(state & a_SKIPQ)
726 continue;
727 c = upperconv(c2) ^ 0x40;
728 if((ui8_t)c > 0x1F && c != 0x7F){ /* ASCII C0: 0..1F, 7F */
729 if(flags & n_SHEXP_PARSE_LOG)
730 n_err(_("Invalid \"\\c\" notation: %.*s\n"),
731 (int)input->l, input->s);
732 rv |= n_SHEXP_STATE_ERR_CONTROL;
734 /* As an implementation-defined extension, support \c@
735 * EQ printf(1) alike \c */
736 if(c == '\0'){
737 rv |= n_SHEXP_STATE_STOP;
738 goto jleave;
740 break;
742 /* Octal sequence: 1 to 3 octal bytes */
743 case '0':
744 /* As an extension (dependent on where you look, echo(1), or
745 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
746 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
747 c2 = c;
748 --il, ++ib;
750 /* FALLTHRU */
751 case '1': case '2': case '3':
752 case '4': case '5': case '6': case '7':
753 c2 -= '0';
754 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
755 c2 = (c2 << 3) | (c - '0');
756 --il, ++ib;
758 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
759 if((ui8_t)c2 > 0x1F){
760 if(flags & n_SHEXP_PARSE_LOG)
761 n_err(_("\"\\0\" argument exceeds a byte: "
762 "%.*s\n"), (int)input->l, input->s);
763 rv |= n_SHEXP_STATE_ERR_NUMBER;
764 --il, ++ib;
765 /* Write unchanged */
766 je_ib_save:
767 rv |= n_SHEXP_STATE_OUTPUT;
768 if(!(flags & n_SHEXP_PARSE_DRYRUN))
769 store = n_string_push_buf(store, ib_save,
770 PTR2SIZE(ib - ib_save));
771 continue;
773 c2 = (c2 << 3) | (c -= '0');
774 --il, ++ib;
776 if((c = c2) == '\0')
777 state |= a_SKIPQ;
778 if(state & a_SKIPQ)
779 continue;
780 break;
782 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
783 case 'U':
784 i = 8;
785 if(0){
786 /* FALLTHRU */
787 case 'u':
788 i = 4;
790 if(il == 0)
791 goto j_dollar_ungetc;
792 if(0){
793 /* FALLTHRU */
795 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
796 case 'X':
797 case 'x':
798 if(il == 0)
799 goto j_dollar_ungetc;
800 i = 2;
802 /* C99 */{
803 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
804 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
806 size_t no, j;
808 i = MIN(il, i);
809 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
810 c = *ib;
811 if(hexchar(c)){
812 no <<= 4;
813 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
814 : ((c) <= 'F' ? 55 : 87)))];
815 }else if(j == 0){
816 if(state & a_SKIPQ)
817 break;
818 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
819 if(flags & n_SHEXP_PARSE_LOG)
820 n_err(_("Invalid \"\\%c\" notation: %.*s\n"),
821 c2, (int)input->l, input->s);
822 rv |= n_SHEXP_STATE_ERR_NUMBER;
823 goto je_ib_save;
824 }else
825 break;
828 /* Unicode massage */
829 if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){
830 if((c = (char)no) == '\0')
831 state |= a_SKIPQ;
832 }else if(no == 0)
833 state |= a_SKIPQ;
834 else if(!(state & a_SKIPQ)){
835 if(!(flags & n_SHEXP_PARSE_DRYRUN))
836 store = n_string_reserve(store, MAX(j, 4));
838 c2 = FAL0;
839 if(no > 0x10FFFF){ /* XXX magic; CText */
840 if(flags & n_SHEXP_PARSE_LOG)
841 n_err(_("\"\\U\" argument exceeds 0x10FFFF: "
842 "%.*s\n"), (int)input->l, input->s);
843 rv |= n_SHEXP_STATE_ERR_NUMBER;
844 /* But normalize the output anyway */
845 goto Je_uni_norm;
848 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
849 j = n_utf32_to_utf8(no, utf);
850 #endif
851 #ifdef HAVE_NATCH_CHAR
852 if(options & OPT_UNICODE){
853 rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE;
854 if(!(flags & n_SHEXP_PARSE_DRYRUN))
855 store = n_string_push_buf(store, utf, j);
856 continue;
858 #endif
859 #ifdef HAVE_ICONV
860 /* C99 */{
861 char *icp;
863 icp = n_iconv_onetime_cp(NULL, NULL, utf, FAL0);
864 if(icp != NULL){
865 rv |= n_SHEXP_STATE_OUTPUT;
866 if(!(flags & n_SHEXP_PARSE_DRYRUN))
867 store = n_string_push_cp(store, icp);
868 continue;
871 #endif
872 if(!(flags & n_SHEXP_PARSE_DRYRUN)) Je_uni_norm:{
873 char itoa[32];
875 rv |= n_SHEXP_STATE_OUTPUT |
876 n_SHEXP_STATE_ERR_UNICODE;
877 i = snprintf(itoa, sizeof itoa, "\\%c%0*X",
878 (no > 0xFFFFu ? 'U' : 'u'),
879 (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no);
880 store = n_string_push_buf(store, itoa, i);
882 continue;
884 if(state & a_SKIPQ)
885 continue;
887 break;
889 /* Extension: \$ can be used to expand a variable.
890 * Bug|ad effect: if conversion fails, not written "as-is" */
891 case '$':
892 if(il == 0)
893 goto j_dollar_ungetc;
894 goto J_var_expand;
896 default:
897 j_dollar_ungetc:
898 /* Follow bash behaviour, print sequence unchanged */
899 ++il, --ib;
900 break;
903 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
904 bool_t brace;
906 if(!(brace = (*ib == '{')) || il > 1){
907 char const *cp, *vp;
909 ib_save = ib - 1;
910 il -= brace;
911 vp = (ib += brace);
913 for(i = 0; il > 0 && (c = *ib, a_SHEXP_ISVARC(c)); ++i)
914 --il, ++ib;
916 if(brace){
917 if(il == 0 || *ib != '}'){
918 if(state & a_SKIPQ){
919 assert((state & a_SURPLUS) && quotec == '\'');
920 continue;
922 if(flags & n_SHEXP_PARSE_LOG)
923 n_err(_("Closing brace missing for ${VAR}: %.*s\n"),
924 (int)input->l, input->s);
925 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN |
926 n_SHEXP_STATE_ERR_BRACE;
927 goto je_ib_save;
929 --il, ++ib;
932 if(state & a_SKIPQ)
933 continue;
935 if(i == 0){
936 if(brace){
937 if(flags & n_SHEXP_PARSE_LOG)
938 n_err(_("Bad substitution (${}): %.*s\n"),
939 (int)input->l, input->s);
940 rv |= n_SHEXP_STATE_ERR_BADSUB;
941 goto je_ib_save;
943 c = '$';
944 }else if(flags & n_SHEXP_PARSE_DRYRUN)
945 continue;
946 else{
947 vp = savestrbuf(vp, i);
948 /* Check getenv(3) shall no internal variable exist! */
949 if((cp = vok_vlook(vp)) != NULL || (cp = getenv(vp)) != NULL){
950 rv |= n_SHEXP_STATE_OUTPUT;
951 store = n_string_push_cp(store, cp);
952 for(; (c = *cp) != '\0'; ++cp)
953 if(cntrlchar(c)){
954 rv |= n_SHEXP_STATE_CONTROL;
955 break;
958 continue;
961 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
962 continue;
966 if(!(state & a_SKIPQ)){
967 rv |= n_SHEXP_STATE_OUTPUT;
968 if(cntrlchar(c))
969 rv |= n_SHEXP_STATE_CONTROL;
970 if(!(flags & n_SHEXP_PARSE_DRYRUN))
971 store = n_string_push_c(store, c);
975 if(quotec != '\0'){
976 if(flags & n_SHEXP_PARSE_LOG)
977 n_err(_("no closing quote: %.*s\n"), (int)input->l, input->s);
978 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
981 jleave:
982 if((flags & n_SHEXP_PARSE_DRYRUN) && store != NULL){
983 store = n_string_push_buf(store, input->s, PTR2SIZE(ib - input->s));
984 rv |= n_SHEXP_STATE_OUTPUT;
987 if(flags & n_SHEXP_PARSE_TRIMSPACE){
988 for(; il > 0; ++ib, --il)
989 if(!blankchar(*ib))
990 break;
992 input->l = il;
993 input->s = UNCONST(ib);
995 if(!(rv & n_SHEXP_STATE_STOP)){
996 if(il > 0 && !(rv & n_SHEXP_STATE_OUTPUT) &&
997 (flags & n_SHEXP_PARSE_IGNORE_EMPTY))
998 goto jrestart_empty;
999 if(!(rv & n_SHEXP_STATE_OUTPUT) && il == 0)
1000 rv |= n_SHEXP_STATE_STOP;
1002 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_UNICODE));
1003 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_CONTROL));
1004 NYD2_LEAVE;
1005 return rv;
1008 FL struct n_string *
1009 n_shell_quote(struct n_string *store, struct str const *input, bool_t rndtrip){
1010 /* TODO In v15 we need to save (possibly normalize) away user input,
1011 * TODO so that the ORIGINAL (normalized) input can be used directly.
1012 * Until then, stay somewhat primitive */
1013 #if 0
1014 struct n_visual_info_ctx vic;
1015 #endif
1016 enum{a_QNONE, a_QSINGLE, a_QDOLLAR} quote;
1017 size_t il;
1018 char const *ib;
1019 NYD2_ENTER;
1021 assert(store != NULL);
1022 assert(input != NULL);
1023 assert(input->l == 0 || input->s != NULL);
1025 ib = input->s;
1026 if((il = input->l) == UIZ_MAX)
1027 il = strlen(ib);
1029 /* An empty string needs to be quoted */
1030 if(il == 0){
1031 store = n_string_push_buf(store, "''", sizeof("''") -1);
1032 goto jleave;
1035 #if 0
1036 memset(&vic, 0, sizeof vic);
1037 vic.vic_indat = ib;
1038 vic.vic_inlen = il;
1039 vic.vic_flags = n_VISUAL_INFO_WOUT_CREATE | n_VISUAL_INFO_WOUT_SALLOC;
1040 i = n_visual_info(&vic);
1041 #endif
1043 store = n_string_reserve(store, il + (il >> 2)); /* XXX */
1044 quote = a_QNONE;
1046 #if 0
1047 def HAVE_C90AMEND1 /* TODO wchar! */
1048 if(i){
1049 wchar_t *wcp;
1052 }else
1053 #endif /* HAVE_C90AMEND1 */
1054 while(il > 0){
1055 enum{a_NONE, a_CNTRL, a_SPACE, a_SQ, a_BS, a_NASCII} ct;
1056 char c;
1058 /* Classify character and type of quote, if necessary.
1059 * Try shorthands whenever possible */
1060 c = *ib;
1061 if(cntrlchar(c))
1062 ct = a_CNTRL;
1063 else if(blankspacechar(c) || c == '"' || c == '$'){
1064 if(quote == a_QSINGLE || quote == a_QDOLLAR)
1065 goto jc_one;
1066 ct = a_SPACE;
1067 }else if(c == '\'')
1068 ct = a_SQ;
1069 else if(c == '\\'){
1070 if(quote == a_QSINGLE)
1071 goto jc_one;
1072 ct = a_BS;
1073 }else if(!asciichar(c)){
1074 if(!rndtrip)
1075 goto jc_one;
1076 ct = a_NASCII;
1077 }else{
1078 /* Shorthand: we can simply push that thing out */
1079 jc_one:
1080 store = n_string_push_c(store, c);
1081 ++ib, --il;
1082 continue;
1085 /* We have to take care for quotes, try to reuse what we have */
1086 if(quote == a_QNONE){
1087 switch(ct){
1088 case a_NONE:
1089 case a_SPACE:
1090 case a_BS:
1091 /* See XXX note beloq on a_QNONE! */
1092 store = n_string_push_c(store, '\'');
1093 quote = a_QSINGLE;
1094 goto jc_one;
1095 case a_SQ:
1096 /* XXX a_QNONE backslash escaping of a single character is
1097 * XXX disabled, because that starts looking bad if it is
1098 * XXX needed more than once. We'd need to count in a dryrun
1099 * XXX first, then decide whether it should be used!
1100 * XXX store = n_string_push_c(store, '\\');
1101 * XXX goto jc_one; */
1102 goto jc_qdollar;
1103 case a_NASCII:
1104 assert(rndtrip);
1105 /* FALLTHRU */
1106 case a_CNTRL:
1107 jc_qdollar:
1108 store = n_string_push_buf(store, "$'", sizeof("$'") -1);
1109 quote = a_QDOLLAR;
1110 break;
1112 }else if(quote == a_QSINGLE){
1113 switch(ct){
1114 case a_NONE:
1115 case a_SPACE:
1116 case a_BS:
1117 assert(0);
1118 case a_NASCII:
1119 assert(rndtrip);
1120 /* FALLTHRU */
1121 case a_CNTRL:
1122 store = n_string_push_c(store, '\'');
1123 goto jc_qdollar;
1124 case a_SQ:
1125 /* xxx For SQ we possibly should also simply go for QDOLLAR now? */
1126 store = n_string_push_c(store, '\'');
1127 quote = a_QNONE;
1128 store = n_string_push_c(store, '\\');
1129 goto jc_one;
1133 assert(quote == a_QDOLLAR);
1134 switch(ct){
1135 case a_NONE:
1136 case a_SPACE:
1137 assert(0);
1138 case a_SQ:
1139 case a_BS:
1140 store = n_string_push_c(store, '\\');
1141 goto jc_one;
1142 case a_CNTRL:{
1143 char c2;
1145 store = n_string_push_c(store, '\\');
1146 switch(c2 = c){
1147 case 0x07: c = 'a'; break;
1148 case 0x08: c = 'b'; break;
1149 case 0x09: c = 't'; break;
1150 case 0x0A: c = 'n'; break;
1151 case 0x0B: c = 'v'; break;
1152 case 0x0C: c = 'f'; break;
1153 case 0x0D: c = 'r'; break;
1154 default: break;
1156 if(c == c2){
1157 store = n_string_push_c(store, 'c');
1158 c ^= 0x40;
1160 goto jc_one;
1161 } break;
1162 case a_NASCII:
1163 assert(rndtrip);
1164 #ifdef HAVE_NATCH_CHAR
1165 if(options & OPT_UNICODE){
1166 ui32_t u;
1167 char const *ib2 = ib;
1168 size_t il2 = il, il3 = il2;
1170 if((u = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
1171 char itoa[32];
1172 char const *cp;
1174 il2 = PTR2SIZE(&ib2[0] - &ib[0]);
1175 if(rndtrip || u == 0xFFFD/* TODO CText */){
1176 cp = itoa;
1177 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
1178 (u > 0xFFFFu ? 'U' : 'u'),
1179 (int)(u > 0xFFFFu ? 8 : 4), u);
1180 }else{
1181 cp = &ib[0];
1182 il3 = il2 + 1;
1184 store = n_string_push_buf(store, cp, il3);
1185 ib += il2, il -= il2;
1186 goto jc_useq;
1189 #endif /* HAVE_NATCH_CHAR */
1191 store = n_string_push_buf(store, "\\xFF", sizeof("\\xFF") -1);
1192 n_c_to_hex_base16(&store->s_dat[store->s_len - 2], c);
1193 ++ib, --il;
1194 #ifdef HAVE_NATCH_CHAR
1195 jc_useq:
1196 #endif
1197 if(il > 0 && hexchar(ib[1])){
1198 store = n_string_push_c(store, '\'');
1199 quote = a_QNONE;
1201 break;
1205 if(quote == a_QSINGLE || quote == a_QDOLLAR)
1206 store = n_string_push_c(store, '\'');
1207 jleave:
1208 NYD2_LEAVE;
1209 return store;
1212 FL char *
1213 n_shell_quote_cp(char const *cp, bool_t rndtrip){
1214 struct n_string store;
1215 struct str input;
1216 char *rv;
1217 NYD2_ENTER;
1219 assert(cp != NULL);
1221 input.s = UNCONST(cp);
1222 input.l = UIZ_MAX;
1223 rv = n_string_cp(n_shell_quote(n_string_creat_auto(&store), &input,
1224 rndtrip));
1225 n_string_gut(n_string_drop_ownership(&store));
1226 NYD2_LEAVE;
1227 return rv;
1230 /* s-it-mode */