Add PS_WYSHLIST_SAW_CONTROL and auto-check for control characters
[s-mailx.git] / shexp.c
blob49428d7d7e9c26dce75f7f1fd06f2a21b59712ff
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
6 */
7 /*
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 #undef n_FILE
36 #define n_FILE shexp
38 #ifndef HAVE_AMALGAMATION
39 # include "nail.h"
40 #endif
42 #include <sys/wait.h>
44 #include <pwd.h>
46 #ifdef HAVE_WORDEXP
47 # include <wordexp.h>
48 #endif
50 /* POSIX says
51 * Environment variable names used by the utilities in the Shell and
52 * Utilities volume of POSIX.1-2008 consist solely of uppercase
53 * letters, digits, and the <underscore> ('_') from the characters
54 * defined in Portable Character Set and do not begin with a digit.
55 * Other characters may be permitted by an implementation;
56 * applications shall tolerate the presence of such names.
57 * We do support the hyphen "-" because it is common for mailx. */
58 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
60 struct shvar_stack {
61 struct shvar_stack *shs_next; /* Outer stack frame */
62 char const *shs_value; /* Remaining value to expand */
63 size_t shs_len; /* gth of .shs_dat this level */
64 char const *shs_dat; /* Result data of this level */
65 bool_t *shs_err; /* Or NULL */
66 bool_t shs_bsesc; /* Shall backslash escaping be performed */
69 /* Locate the user's mailbox file (where new, unread mail is queued) */
70 static char * _findmail(char const *user, bool_t force);
72 /* Perform shell meta character expansion TODO obsolete (INSECURE!) */
73 static char * _globname(char const *name, enum fexp_mode fexpm);
75 /* Perform shell variable expansion */
76 static char * _sh_exp_var(struct shvar_stack *shsp);
78 static char *
79 _findmail(char const *user, bool_t force)
81 char *rv;
82 char const *cp;
83 NYD_ENTER;
85 if (force || (cp = ok_vlook(MAIL)) == NULL) {
86 size_t ul = strlen(user), i = sizeof(MAILSPOOL) -1 + 1 + ul +1;
88 rv = salloc(i);
89 memcpy(rv, MAILSPOOL, i = sizeof(MAILSPOOL));
90 rv[i] = '/';
91 memcpy(&rv[++i], user, ul +1);
92 } else if ((rv = fexpand(cp, FEXP_NSHELL)) == NULL)
93 rv = savestr(cp);
94 NYD_LEAVE;
95 return rv;
98 static char *
99 _globname(char const *name, enum fexp_mode fexpm)
101 #ifdef HAVE_WORDEXP
102 wordexp_t we;
103 char *cp = NULL;
104 sigset_t nset;
105 int i;
106 NYD_ENTER;
108 /* Mac OS X Snow Leopard and Linux don't init fields on error, causing
109 * SIGSEGV in wordfree(3); so let's just always zero it ourselfs */
110 memset(&we, 0, sizeof we);
112 /* Some systems (notably Open UNIX 8.0.0) fork a shell for wordexp()
113 * and wait, which will fail if our SIGCHLD handler is active */
114 sigemptyset(&nset);
115 sigaddset(&nset, SIGCHLD);
116 sigprocmask(SIG_BLOCK, &nset, NULL);
117 # ifndef WRDE_NOCMD
118 # define WRDE_NOCMD 0
119 # endif
120 i = wordexp(name, &we, WRDE_NOCMD);
121 sigprocmask(SIG_UNBLOCK, &nset, NULL);
123 switch (i) {
124 case 0:
125 break;
126 #ifdef WRDE_CMDSUB
127 case WRDE_CMDSUB:
128 if (!(fexpm & FEXP_SILENT))
129 n_err(_("\"%s\": Command substitution not allowed\n"), name);
130 goto jleave;
131 #endif
132 case WRDE_NOSPACE:
133 if (!(fexpm & FEXP_SILENT))
134 n_err(_("\"%s\": Expansion buffer overflow\n"), name);
135 goto jleave;
136 case WRDE_BADCHAR:
137 case WRDE_SYNTAX:
138 default:
139 if (!(fexpm & FEXP_SILENT))
140 n_err(_("Syntax error in \"%s\"\n"), name);
141 goto jleave;
144 switch (we.we_wordc) {
145 case 1:
146 cp = savestr(we.we_wordv[0]);
147 break;
148 case 0:
149 if (!(fexpm & FEXP_SILENT))
150 n_err(_("\"%s\": No match\n"), name);
151 break;
152 default:
153 if (fexpm & FEXP_MULTIOK) {
154 size_t j, l;
156 for (l = 0, j = 0; j < we.we_wordc; ++j)
157 l += strlen(we.we_wordv[j]) + 1;
158 ++l;
159 cp = salloc(l);
160 for (l = 0, j = 0; j < we.we_wordc; ++j) {
161 size_t x = strlen(we.we_wordv[j]);
162 memcpy(cp + l, we.we_wordv[j], x);
163 l += x;
164 cp[l++] = ' ';
166 cp[l] = '\0';
167 } else if (!(fexpm & FEXP_SILENT))
168 n_err(_("\"%s\": Ambiguous\n"), name);
169 break;
171 jleave:
172 wordfree(&we);
173 NYD_LEAVE;
174 return cp;
176 #else /* HAVE_WORDEXP */
177 struct stat sbuf;
178 char xname[PATH_MAX +1], cmdbuf[PATH_MAX +1], /* also used for files */
179 cp = NULL;
180 int pivec[2], pid, l, waits;
181 NYD_ENTER;
183 if (pipe(pivec) < 0) {
184 n_perr(_("pipe"), 0);
185 goto jleave;
187 snprintf(cmdbuf, sizeof cmdbuf, "echo %s", name);
188 pid = start_command(ok_vlook(SHELL), NULL, COMMAND_FD_NULL, pivec[1],
189 "-c", cmdbuf, NULL, NULL);
190 if (pid < 0) {
191 close(pivec[0]);
192 close(pivec[1]);
193 goto jleave;
195 close(pivec[1]);
197 jagain:
198 l = read(pivec[0], xname, sizeof xname);
199 if (l < 0) {
200 if (errno == EINTR)
201 goto jagain;
202 n_perr(_("read"), 0);
203 close(pivec[0]);
204 goto jleave;
206 close(pivec[0]);
207 if (!wait_child(pid, &waits) && WTERMSIG(waits) != SIGPIPE) {
208 if (!(fexpm & FEXP_SILENT))
209 n_err(_("\"%s\": Expansion failed\n"), name);
210 goto jleave;
212 if (l == 0) {
213 if (!(fexpm & FEXP_SILENT))
214 n_err(_("\"%s\": No match\n"), name);
215 goto jleave;
217 if (l == sizeof xname) {
218 if (!(fexpm & FEXP_SILENT))
219 n_err(_("\"%s\": Expansion buffer overflow\n"), name);
220 goto jleave;
222 xname[l] = 0;
223 for (cp = xname + l - 1; *cp == '\n' && cp > xname; --cp)
225 cp[1] = '\0';
226 if (!(fexpm & FEXP_MULTIOK) && strchr(xname, ' ') != NULL &&
227 stat(xname, &sbuf) < 0) {
228 if (!(fexpm & FEXP_SILENT))
229 n_err(_("\"%s\": Ambiguous\n"), name);
230 cp = NULL;
231 goto jleave;
233 cp = savestr(xname);
234 jleave:
235 NYD_LEAVE;
236 return cp;
237 #endif /* !HAVE_WORDEXP */
240 static char *
241 _sh_exp_var(struct shvar_stack *shsp)
243 struct shvar_stack next, *np, *tmp;
244 char const *vp;
245 char lc, c, *cp, *rv;
246 size_t i;
247 NYD2_ENTER;
249 if (*(vp = shsp->shs_value) != '$') {
250 bool_t bsesc = shsp->shs_bsesc;
251 union {bool_t hadbs; char c;} u = {FAL0};
253 shsp->shs_dat = vp;
254 for (lc = '\0', i = 0; ((c = *vp) != '\0'); ++i, ++vp) {
255 if (c == '$' && lc != '\\')
256 break;
257 if (!bsesc)
258 continue;
259 lc = (lc == '\\') ? (u.hadbs = TRU1, '\0') : c;
261 shsp->shs_len = i;
263 if (u.hadbs) {
264 shsp->shs_dat = cp = savestrbuf(shsp->shs_dat, i);
266 for (lc = '\0', rv = cp; (u.c = *cp++) != '\0';) {
267 if (u.c != '\\' || lc == '\\')
268 *rv++ = u.c;
269 lc = (lc == '\\') ? '\0' : u.c;
271 *rv = '\0';
273 shsp->shs_len = PTR2SIZE(rv - shsp->shs_dat);
275 } else {
276 if ((lc = (*++vp == '{')))
277 ++vp;
279 shsp->shs_dat = vp;
280 for (i = 0; (c = *vp) != '\0'; ++i, ++vp)
281 if (!a_SHEXP_ISVARC(c))
282 break;
284 if (lc) {
285 if (c != '}') {
286 n_err(_("Variable name misses closing \"}\": \"%s\"\n"),
287 shsp->shs_value);
288 shsp->shs_len = strlen(shsp->shs_value);
289 shsp->shs_dat = shsp->shs_value;
290 if (shsp->shs_err != NULL)
291 *shsp->shs_err = TRU1;
292 goto junroll;
294 c = *++vp;
297 shsp->shs_len = i;
298 /* Check getenv(3) shall no internal variable exist! */
299 if ((rv = vok_vlook(cp = savestrbuf(shsp->shs_dat, i))) != NULL ||
300 (rv = getenv(cp)) != NULL)
301 shsp->shs_len = strlen(shsp->shs_dat = rv);
302 else
303 shsp->shs_len = 0, shsp->shs_dat = UNCONST("");
305 if (c != '\0')
306 goto jrecurse;
308 /* That level made the great and completed encoding. Build result */
309 junroll:
310 for (i = 0, np = shsp, shsp = NULL; np != NULL;) {
311 i += np->shs_len;
312 tmp = np->shs_next;
313 np->shs_next = shsp;
314 shsp = np;
315 np = tmp;
318 cp = rv = salloc(i +1);
319 while (shsp != NULL) {
320 np = shsp;
321 shsp = shsp->shs_next;
322 memcpy(cp, np->shs_dat, np->shs_len);
323 cp += np->shs_len;
325 *cp = '\0';
327 jleave:
328 NYD2_LEAVE;
329 return rv;
330 jrecurse:
331 memset(&next, 0, sizeof next);
332 next.shs_next = shsp;
333 next.shs_value = vp;
334 next.shs_err = shsp->shs_err;
335 next.shs_bsesc = shsp->shs_bsesc;
336 rv = _sh_exp_var(&next);
337 goto jleave;
340 FL char *
341 fexpand(char const *name, enum fexp_mode fexpm)
343 struct str s;
344 char const *cp, *res;
345 bool_t dyn;
346 NYD_ENTER;
348 /* The order of evaluation is "%" and "#" expand into constants.
349 * "&" can expand into "+". "+" can expand into shell meta characters.
350 * Shell meta characters expand into constants.
351 * This way, we make no recursive expansion */
352 if ((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
353 res = UNCONST(name);
355 jnext:
356 dyn = FAL0;
357 switch (*res) {
358 case '%':
359 if (res[1] == ':' && res[2] != '\0') {
360 res = &res[2];
361 goto jnext;
363 res = _findmail((res[1] != '\0' ? res + 1 : myname), (res[1] != '\0'));
364 goto jislocal;
365 case '#':
366 if (res[1] != '\0')
367 break;
368 if (prevfile[0] == '\0') {
369 n_err(_("No previous file\n"));
370 res = NULL;
371 goto jleave;
373 res = prevfile;
374 goto jislocal;
375 case '&':
376 if (res[1] == '\0')
377 res = ok_vlook(MBOX);
378 break;
381 /* POSIX: if *folder* unset or null, "+" shall be retained */
382 if (*res == '+' && *(cp = folder_query()) != '\0') {
383 size_t i = strlen(cp);
385 res = str_concat_csvl(&s, cp,
386 ((i == 0 || cp[i -1] == '/') ? "" : "/"), res + 1, NULL)->s;
387 dyn = TRU1;
389 /* TODO *folder* can't start with %[:], can it!?! */
390 if (res[0] == '%' && res[1] == ':') {
391 res += 2;
392 goto jnext;
396 /* Catch the most common shell meta character */
397 if (res[0] == '~') {
398 res = n_shell_expand_tilde(res, NULL);
399 dyn = TRU1;
402 if (anyof(res, "|&;<>{}()[]*?$`'\"\\")) {
403 bool_t doexp;
405 if(fexpm & FEXP_NOPROTO)
406 doexp = TRU1;
407 else switch(which_protocol(res)){
408 case PROTO_FILE:
409 case PROTO_MAILDIR:
410 doexp = TRU1;
411 break;
412 default:
413 doexp = FAL0;
414 break;
417 if(doexp){
418 res = (fexpm & FEXP_NSHELL) ? n_shell_expand_var(res, TRU1, NULL)
419 : _globname(res, fexpm);
420 dyn = TRU1;
424 jislocal:
425 if (fexpm & FEXP_LOCAL)
426 switch (which_protocol(res)) {
427 case PROTO_FILE:
428 case PROTO_MAILDIR:
429 break;
430 default:
431 n_err(_("Not a local file or directory: \"%s\"\n"), name);
432 res = NULL;
433 break;
436 jleave:
437 if (res && !dyn)
438 res = savestr(res);
439 NYD_LEAVE;
440 return UNCONST(res);
443 FL char *
444 fexpand_nshell_quote(char const *name)
446 size_t i, j;
447 char *rv, c;
448 NYD_ENTER;
450 for (i = j = 0; (c = name[i]) != '\0'; ++i)
451 if (c == '\\')
452 ++j;
454 if (j == 0)
455 rv = savestrbuf(name, i);
456 else {
457 rv = salloc(i + j +1);
458 for (i = j = 0; (c = name[i]) != '\0'; ++i) {
459 rv[j++] = c;
460 if (c == '\\')
461 rv[j++] = c;
463 rv[j] = '\0';
465 NYD_LEAVE;
466 return rv;
469 FL char *
470 n_shell_expand_tilde(char const *s, bool_t *err_or_null)
472 struct passwd *pwp;
473 size_t nl, rl;
474 char const *rp, *np;
475 char *rv;
476 bool_t err;
477 NYD2_ENTER;
479 err = FAL0;
481 if (s[0] != '~')
482 goto jasis;
484 if (*(rp = s + 1) == '/' || *rp == '\0')
485 np = ok_vlook(HOME);
486 else {
487 if ((rp = strchr(s + 1, '/')) == NULL)
488 rp = (np = UNCONST(s)) + 1;
489 else {
490 nl = PTR2SIZE(rp - s);
491 np = savestrbuf(s, nl);
494 if ((pwp = getpwnam(np)) == NULL) {
495 err = TRU1;
496 goto jasis;
498 np = pwp->pw_name;
501 nl = strlen(np);
502 rl = strlen(rp);
503 rv = salloc(nl + 1 + rl +1);
504 memcpy(rv, np, nl);
505 if (rl > 0) {
506 memcpy(rv + nl, rp, rl);
507 nl += rl;
509 rv[nl] = '\0';
510 goto jleave;
512 jasis:
513 rv = savestr(s);
514 jleave:
515 if (err_or_null != NULL)
516 *err_or_null = err;
517 NYD2_LEAVE;
518 return rv;
521 FL char *
522 n_shell_expand_var(char const *s, bool_t bsescape, bool_t *err_or_null)
524 struct shvar_stack top;
525 char *rv;
526 NYD2_ENTER;
528 memset(&top, 0, sizeof top);
530 top.shs_value = s;
531 if ((top.shs_err = err_or_null) != NULL)
532 *err_or_null = FAL0;
533 top.shs_bsesc = bsescape;
534 rv = _sh_exp_var(&top);
535 NYD2_LEAVE;
536 return rv;
539 FL int
540 n_shell_expand_escape(char const **s, bool_t use_nail_extensions)/* TODO DROP!*/
542 char const *xs;
543 int c, n;
544 NYD2_ENTER;
546 xs = *s;
548 if ((c = *xs & 0xFF) == '\0')
549 goto jleave;
550 ++xs;
551 if (c != '\\')
552 goto jleave;
554 switch ((c = *xs & 0xFF)) {
555 case 'a': c = '\a'; break;
556 case 'b': c = '\b'; break;
557 case 'c': c = PROMPT_STOP; break;
558 case 'f': c = '\f'; break;
559 case 'n': c = '\n'; break;
560 case 'r': c = '\r'; break;
561 case 't': c = '\t'; break;
562 case 'v': c = '\v'; break;
564 /* ESCape */
565 case 'E':
566 case 'e':
567 c = '\033';
568 break;
570 /* Hexadecimal TODO uses ASCII */
571 case 'X':
572 case 'x': {
573 static ui8_t const hexatoi[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
574 #undef a_HEX
575 #define a_HEX(n) \
576 hexatoi[(ui8_t)((n) - ((n) <= '9' ? 48 : ((n) <= 'F' ? 55 : 87)))]
578 c = 0;
579 ++xs;
580 if(hexchar(*xs))
581 c = a_HEX(*xs);
582 else{
583 --xs;
584 if(options & OPT_D_V)
585 n_err(_("Invalid \"\\xNUMBER\" notation in \"%s\"\n"), xs - 1);
586 c = '\\';
587 goto jleave;
589 ++xs;
590 if(hexchar(*xs)){
591 c <<= 4;
592 c += a_HEX(*xs);
593 ++xs;
595 goto jleave;
597 #undef a_HEX
599 /* octal, with optional 0 prefix */
600 case '0':
601 ++xs;
602 if(0){
603 default:
604 if(*xs == '\0'){
605 c = '\\';
606 break;
609 for (c = 0, n = 3; n-- > 0 && octalchar(*xs); ++xs) {
610 c <<= 3;
611 c |= *xs - '0';
613 goto jleave;
615 /* S-nail extension for nice (get)prompt(()) support */
616 case '&':
617 case '?':
618 case '$':
619 case '@':
620 if (use_nail_extensions) {
621 switch (c) {
622 case '&': c = ok_blook(bsdcompat) ? '&' : '?'; break;
623 case '?': c = (pstate & PS_EVAL_ERROR) ? '1' : '0'; break;
624 case '$': c = PROMPT_DOLLAR; break;
625 case '@': c = PROMPT_AT; break;
627 break;
630 /* FALLTHRU */
631 case '\0':
632 /* A sole <backslash> at EOS is treated as-is! */
633 c = '\\';
634 /* FALLTHRU */
635 case '\\':
636 break;
639 ++xs;
640 jleave:
641 *s = xs;
642 NYD2_LEAVE;
643 return c;
646 FL enum n_shexp_state
647 n_shell_parse_token(struct n_string *store, struct str *input, bool_t dolog){
648 char c2, c, quotec;
649 bool_t skipq, surplus;
650 enum n_shexp_state rv;
651 size_t i, il;
652 char const *ib;
653 NYD2_ENTER;
654 UNINIT(c, '\0');
656 assert(store != NULL);
657 assert(input != NULL);
658 assert(input->l == 0 || input->s != NULL);
660 if(dolog == TRUM1)
661 dolog = ((options & OPT_D_V) != 0);
663 ib = input->s;
664 if((il = input->l) == UIZ_MAX)
665 input->l = il = strlen(ib);
667 store = n_string_reserve(store, MIN(il, 32)); /* XXX */
669 for(rv = n_SHEXP_STATE_NONE, skipq = surplus = FAL0, quotec = '\0'; il > 0;){
670 --il, c = *ib++;
672 /* If no quote-mode active.. */
673 if(quotec == '\0'){
674 if(c == '"' || c == '\''){
675 quotec = c;
676 surplus = (c == '"');
677 continue;
678 }else if(c == '$'){
679 if(il > 0){
680 if(*ib == '\''){
681 --il, ++ib;
682 quotec = '\'';
683 surplus = TRU1;
684 continue;
685 }else
686 goto J_var_expand;
688 }else if(c == '\\'){
689 /* Outside of quotes this just escapes any next character, but a sole
690 * <backslash> at EOS is left unchanged */
691 if(il > 0)
692 --il, c = *ib++;
693 }else if(c == '#'){
694 rv |= n_SHEXP_STATE_STOP;
695 goto jleave;
696 }else if(blankchar(c))
697 break;
698 }else{
699 /* Quote-mode */
700 if(c == quotec){
701 skipq = surplus = FAL0;
702 quotec = '\0';
703 continue;
704 }else if(c == '\\' && surplus){
705 char const *ib_save = ib - 1;
707 /* A sole <backslash> at EOS is treated as-is! */
708 if(il == 0)
709 break;
710 else if((c2 = *ib) == quotec){
711 --il, ++ib;
712 c = quotec;
713 }else if(quotec == '"'){
714 /* Double quotes:
715 * The <backslash> shall retain its special meaning as an
716 * escape character (see Section 2.2.1) only when followed
717 * by one of the following characters when considered
718 * special: $ ` " \ <newline> */
719 switch(c2){
720 case '$':
721 case '`':
722 /* case '"': already handled via c2 == quotec */
723 case '\\':
724 --il, ++ib;
725 c = c2;
726 /* FALLTHRU */
727 default:
728 break;
730 }else{
731 /* Dollar-single-quote */
732 --il, ++ib;
733 switch(c2){
734 case '"':
735 /* case '\'': already handled via c2 == quotec */
736 case '\\':
737 c = c2;
738 break;
740 case 'b': c = '\b'; break;
741 case 'f': c = '\f'; break;
742 case 'n': c = '\n'; break;
743 case 'r': c = '\r'; break;
744 case 't': c = '\t'; break;
745 case 'v': c = '\v'; break;
747 case 'E':
748 case 'e': c = '\033'; break;
750 /* Control character */
751 case 'c':
752 if(il == 0)
753 goto j_dollar_ungetc;
754 --il, c2 = *ib++;
755 if(skipq)
756 continue;
757 c = upperconv(c2) ^ 0x40;
758 if((ui8_t)c > 0x1F && c != 0x7F){ /* ASCII C0: 0..1F, 7F */
759 if(dolog)
760 n_err(_("Invalid \"\\c\" notation: \"%.*s\"\n"),
761 (int)input->l, input->s);
762 rv |= n_SHEXP_STATE_ERR_CONTROL;
764 /* As an implementation-defined extension, support \c@
765 * EQ printf(1) alike \c */
766 if(c == '\0'){
767 rv |= n_SHEXP_STATE_STOP;
768 goto jleave;
770 break;
772 /* Octal sequence: 1 to 3 octal bytes */
773 case '0':
774 /* As an extension (dependent on where you look, echo(1), or
775 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
776 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
777 c2 = c;
778 --il, ++ib;
780 /* FALLTHRU */
781 case '1': case '2': case '3':
782 case '4': case '5': case '6': case '7':
783 c2 -= '0';
784 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
785 c2 = (c2 << 3) | (c - '0');
786 --il, ++ib;
788 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
789 if((ui8_t)c2 > 0x1F){
790 if(dolog)
791 n_err(_("\"\\0\" argument exceeds a byte: "
792 "\"%.*s\"\n"), (int)input->l, input->s);
793 rv |= n_SHEXP_STATE_ERR_NUMBER;
794 --il, ++ib;
795 goto jeasis;
797 c2 = (c2 << 3) | (c -= '0');
798 --il, ++ib;
800 if((c = c2) == '\0')
801 skipq = TRU1;
802 if(skipq)
803 continue;
804 break;
806 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
807 case 'U':
808 i = 8;
809 if(0){
810 /* FALLTHRU */
811 case 'u':
812 i = 4;
814 if(il == 0)
815 goto j_dollar_ungetc;
816 if(0){
817 /* FALLTHRU */
819 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
820 case 'X':
821 case 'x':
822 if(il == 0)
823 goto j_dollar_ungetc;
824 i = 2;
826 /* C99 */{
827 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
828 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
830 size_t no, j;
832 i = MIN(il, i);
833 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
834 c = *ib;
835 if(hexchar(c)){
836 no <<= 4;
837 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
838 : ((c) <= 'F' ? 55 : 87)))];
839 }else if(j == 0){
840 if(skipq)
841 break;
842 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
843 if(dolog)
844 n_err(_("Invalid \"\\%c\" notation: \"%.*s\"\n"),
845 c2, (int)input->l, input->s);
846 rv |= n_SHEXP_STATE_ERR_NUMBER;
847 goto jeasis;
848 }else
849 break;
852 /* Unicode massage */
853 if(c2 != 'U' && c2 != 'u'){
854 if((c = (char)no) == '\0')
855 skipq = TRU1;
856 }else if(no == 0)
857 skipq = TRU1;
858 else if(!skipq){
859 store = n_string_reserve(store, MAX(j, 4));
861 c2 = FAL0;
862 if(no > 0x10FFFF){ /* XXX magic; CText */
863 if(dolog)
864 n_err(_("\"\\U\" argument exceeds 0x10FFFF: "
865 "\"%.*s\"\n"), (int)input->l, input->s);
866 rv |= n_SHEXP_STATE_UNICODE |
867 n_SHEXP_STATE_ERR_NUMBER |
868 n_SHEXP_STATE_ERR_UNICODE;
869 goto jeunicode;
870 }else if((options & OPT_UNICODE) ||
871 (c2 = n_uasciichar(no))){
872 char utf[8];
874 if(!c2)
875 rv |= n_SHEXP_STATE_UNICODE;
876 j = n_utf32_to_utf8(no, utf);
877 store = n_string_push_buf(store, utf, j);
878 }else{
879 /* Write unchanged */
880 jeunicode:
881 rv |= n_SHEXP_STATE_UNICODE |
882 n_SHEXP_STATE_ERR_UNICODE;
883 jeasis:
884 store = n_string_push_buf(store, ib_save,
885 PTR2SIZE(ib - ib_save));
886 continue;
888 if(n_uasciichar(no) && cntrlchar(no)) /* TODO ctext */
889 rv |= n_SHEXP_STATE_CONTROL;
890 continue;
892 if(skipq)
893 continue;
895 break;
897 /* Extension: \$ can be used to expand a variable.
898 * Bug|ad effect: if conversion fails, not written "as-is" */
899 case '$':
900 if(il == 0)
901 goto j_dollar_ungetc;
902 goto J_var_expand;
904 default:
905 j_dollar_ungetc:
906 /* Follow bash behaviour, print sequence unchanged */
907 ++il, --ib;
908 break;
911 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
912 bool_t brace;
914 if(!(brace = (*ib == '{')) || il > 1){
915 char const *cp, *vp;
917 il -= brace;
918 vp = (ib += brace);
920 for(i = 0; il > 0 && (c = *ib, a_SHEXP_ISVARC(c)); ++i)
921 --il, ++ib;
923 if(brace){
924 if(il == 0 || *ib != '}'){
925 if(skipq){
926 assert(surplus && quotec == '\'');
927 continue;
929 if(dolog)
930 n_err(_("Closing brace missing for ${VAR}: \"%.*s\"\n"),
931 (int)input->l, input->s);
932 rv |= n_SHEXP_STATE_STOP |
933 n_SHEXP_STATE_ERR_QUOTEOPEN | n_SHEXP_STATE_ERR_BRACE;
934 goto jleave;
936 --il, ++ib;
939 if(skipq)
940 continue;
942 if(i == 0){
943 if(brace){
944 if(dolog)
945 n_err(_("Bad substitution (${}): \"%.*s\"\n"),
946 (int)input->l, input->s);
947 rv |= n_SHEXP_STATE_STOP | n_SHEXP_STATE_ERR_BADSUB;
948 goto jleave;
950 c = '$';
951 }else{
952 vp = savestrbuf(vp, i);
953 /* Check getenv(3) shall no internal variable exist! */
954 if((cp = vok_vlook(vp)) != NULL || (cp = getenv(vp)) != NULL){
955 store = n_string_push_cp(store, cp);
956 for(; (c = *cp) != '\0'; ++cp)
957 if(cntrlchar(c)){
958 rv |= n_SHEXP_STATE_CONTROL;
959 break;
962 continue;
965 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
966 continue;
970 if(!skipq){
971 if(cntrlchar(c))
972 rv |= n_SHEXP_STATE_CONTROL;
973 store = n_string_push_c(store, c);
977 if(quotec != '\0'){
978 if(dolog)
979 n_err(_("Missing closing quote in: \"%.*s\"\n"),
980 (int)input->l, input->s);
981 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
983 jleave:
984 if(rv & n_SHEXP_STATE_CONTROL)
985 pstate |= PS_WYSHLIST_SAW_CONTROL;
986 input->s = UNCONST(ib);
987 input->l = il;
988 NYD2_LEAVE;
989 return rv;
992 FL struct n_string *
993 n_shell_quote(struct n_string *store, struct str const *input){
994 /* TODO In v15 we need to save (possibly normalize) away user input,
995 * TODO so that the ORIGINAL (normalized) input can be used directly.
996 * Because we're the last, stay primitive */
997 bool_t qflag;
998 size_t j, i, il;
999 char const *ib;
1000 NYD2_ENTER;
1002 assert(store != NULL);
1003 assert(input != NULL);
1004 assert(input->l == 0 || input->s != NULL);
1006 ib = input->s;
1007 if((il = input->l) == UIZ_MAX)
1008 il = strlen(ib);
1010 /* Calculate necessary buffer space */
1011 if(il == 0)
1012 qflag = TRU1, j = 0;
1013 else for(qflag = FAL0, j = sizeof("''") -1, i = 0; i < il; ++i){
1014 char c = ib[i];
1016 if(c == '\'' || !asciichar(c) || cntrlchar(c)){
1017 qflag |= TRUM1;
1018 j += sizeof("\\0377") -1;
1019 }else if(c == '\\' || c == '$' || blankchar(c)){
1020 qflag |= TRU1;
1021 j += sizeof("\\ ") -1;
1022 }else
1023 ++j;
1025 store = n_string_reserve(store, j + 3);
1027 if(!qflag)
1028 store = n_string_push_buf(store, ib, il);
1029 else if(qflag == TRU1){
1030 store = n_string_push_c(store, '\'');
1031 store = n_string_push_buf(store, ib, il);
1032 store = n_string_push_c(store, '\'');
1033 }else{
1034 store = n_string_push_buf(store, "$'", sizeof("$'") -1);
1036 for(qflag = FAL0, j = 0, i = 0; i < il; ++i){
1037 char c = ib[i];
1039 if(c == '\'' || !asciichar(c) || cntrlchar(c)){
1040 store = n_string_push_c(store, '\\');
1041 if(cntrlchar(c)){
1042 char c2 = c;
1044 switch(c){
1045 case 0x07: c = 'a'; break;
1046 case 0x08: c = 'b'; break;
1047 case 0x09: c = 't'; break;
1048 case 0x0A: c = 'n'; break;
1049 case 0x0B: c = 'v'; break;
1050 case 0x0C: c = 'f'; break;
1051 case 0x0D: c = 'r'; break;
1052 default: break;
1054 if(c == c2){
1055 store = n_string_push_c(store, 'c');
1056 c ^= 0x40;
1058 store = n_string_push_c(store, c);
1059 continue;
1060 }else if(c != '\''){
1061 store = n_string_push_buf(store, "xFF", sizeof("xFF") -1);
1062 n_c_to_hex_base16(&store->s_dat[store->s_len - 2], c);
1063 continue;
1066 store = n_string_push_c(store, c);
1068 store = n_string_push_c(store, '\'');
1070 NYD2_LEAVE;
1071 return store;
1074 FL char *
1075 n_shell_quote_cp(char const *cp){
1076 struct n_string store;
1077 struct str input;
1078 char *rv;
1079 NYD2_ENTER;
1081 assert(cp != NULL);
1083 input.s = UNCONST(cp);
1084 input.l = UIZ_MAX;
1085 rv = n_string_cp(n_shell_quote(n_string_creat_auto(&store), &input));
1086 n_string_gut(n_string_drop_ownership(&store));
1087 NYD2_LEAVE;
1088 return rv;
1091 /* s-it-mode */