1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 #ifndef HAVE_AMALGAMATION
51 * Environment variable names used by the utilities in the Shell and
52 * Utilities volume of POSIX.1-2008 consist solely of uppercase
53 * letters, digits, and the <underscore> ('_') from the characters
54 * defined in Portable Character Set and do not begin with a digit.
55 * Other characters may be permitted by an implementation;
56 * applications shall tolerate the presence of such names.
57 * We do support the hyphen "-" because it is common for mailx. */
58 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
61 struct shvar_stack
*shs_next
; /* Outer stack frame */
62 char const *shs_value
; /* Remaining value to expand */
63 size_t shs_len
; /* gth of .shs_dat this level */
64 char const *shs_dat
; /* Result data of this level */
65 bool_t
*shs_err
; /* Or NULL */
66 bool_t shs_bsesc
; /* Shall backslash escaping be performed */
69 /* Locate the user's mailbox file (where new, unread mail is queued) */
70 static char * _findmail(char const *user
, bool_t force
);
72 /* Perform shell meta character expansion TODO obsolete (INSECURE!) */
73 static char * _globname(char const *name
, enum fexp_mode fexpm
);
75 /* Perform shell variable expansion */
76 static char * _sh_exp_var(struct shvar_stack
*shsp
);
79 _findmail(char const *user
, bool_t force
)
85 if (force
|| (cp
= ok_vlook(MAIL
)) == NULL
) {
86 size_t ul
= strlen(user
), i
= sizeof(MAILSPOOL
) -1 + 1 + ul
+1;
89 memcpy(rv
, MAILSPOOL
, i
= sizeof(MAILSPOOL
));
91 memcpy(&rv
[++i
], user
, ul
+1);
92 } else if ((rv
= fexpand(cp
, FEXP_NSHELL
)) == NULL
)
99 _globname(char const *name
, enum fexp_mode fexpm
)
108 /* Mac OS X Snow Leopard and Linux don't init fields on error, causing
109 * SIGSEGV in wordfree(3); so let's just always zero it ourselfs */
110 memset(&we
, 0, sizeof we
);
112 /* Some systems (notably Open UNIX 8.0.0) fork a shell for wordexp()
113 * and wait, which will fail if our SIGCHLD handler is active */
115 sigaddset(&nset
, SIGCHLD
);
116 sigprocmask(SIG_BLOCK
, &nset
, NULL
);
118 # define WRDE_NOCMD 0
120 i
= wordexp(name
, &we
, WRDE_NOCMD
);
121 sigprocmask(SIG_UNBLOCK
, &nset
, NULL
);
128 if (!(fexpm
& FEXP_SILENT
))
129 n_err(_("\"%s\": Command substitution not allowed\n"), name
);
133 if (!(fexpm
& FEXP_SILENT
))
134 n_err(_("\"%s\": Expansion buffer overflow\n"), name
);
139 if (!(fexpm
& FEXP_SILENT
))
140 n_err(_("Syntax error in \"%s\"\n"), name
);
144 switch (we
.we_wordc
) {
146 cp
= savestr(we
.we_wordv
[0]);
149 if (!(fexpm
& FEXP_SILENT
))
150 n_err(_("\"%s\": No match\n"), name
);
153 if (fexpm
& FEXP_MULTIOK
) {
156 for (l
= 0, j
= 0; j
< we
.we_wordc
; ++j
)
157 l
+= strlen(we
.we_wordv
[j
]) + 1;
160 for (l
= 0, j
= 0; j
< we
.we_wordc
; ++j
) {
161 size_t x
= strlen(we
.we_wordv
[j
]);
162 memcpy(cp
+ l
, we
.we_wordv
[j
], x
);
167 } else if (!(fexpm
& FEXP_SILENT
))
168 n_err(_("\"%s\": Ambiguous\n"), name
);
176 #else /* HAVE_WORDEXP */
179 if(options
& OPT_D_V
)
180 n_err(_("wordexp(3) not available, cannot perform expansion\n"));
181 return savestr(name
);
186 _sh_exp_var(struct shvar_stack
*shsp
)
188 struct shvar_stack next
, *np
, *tmp
;
190 char lc
, c
, *cp
, *rv
;
194 if (*(vp
= shsp
->shs_value
) != '$') {
195 bool_t bsesc
= shsp
->shs_bsesc
;
196 union {bool_t hadbs
; char c
;} u
= {FAL0
};
199 for (lc
= '\0', i
= 0; ((c
= *vp
) != '\0'); ++i
, ++vp
) {
200 if (c
== '$' && lc
!= '\\')
204 lc
= (lc
== '\\') ? (u
.hadbs
= TRU1
, '\0') : c
;
209 shsp
->shs_dat
= cp
= savestrbuf(shsp
->shs_dat
, i
);
211 for (lc
= '\0', rv
= cp
; (u
.c
= *cp
++) != '\0';) {
212 if (u
.c
!= '\\' || lc
== '\\')
214 lc
= (lc
== '\\') ? '\0' : u
.c
;
218 shsp
->shs_len
= PTR2SIZE(rv
- shsp
->shs_dat
);
221 if ((lc
= (*++vp
== '{')))
225 for (i
= 0; (c
= *vp
) != '\0'; ++i
, ++vp
)
226 if (!a_SHEXP_ISVARC(c
))
231 n_err(_("Variable name misses closing \"}\": %s\n"),
233 shsp
->shs_len
= strlen(shsp
->shs_value
);
234 shsp
->shs_dat
= shsp
->shs_value
;
235 if (shsp
->shs_err
!= NULL
)
236 *shsp
->shs_err
= TRU1
;
243 /* Check getenv(3) shall no internal variable exist! */
244 if ((rv
= vok_vlook(cp
= savestrbuf(shsp
->shs_dat
, i
))) != NULL
||
245 (rv
= getenv(cp
)) != NULL
)
246 shsp
->shs_len
= strlen(shsp
->shs_dat
= rv
);
248 shsp
->shs_len
= 0, shsp
->shs_dat
= UNCONST("");
253 /* That level made the great and completed encoding. Build result */
255 for (i
= 0, np
= shsp
, shsp
= NULL
; np
!= NULL
;) {
263 cp
= rv
= salloc(i
+1);
264 while (shsp
!= NULL
) {
266 shsp
= shsp
->shs_next
;
267 memcpy(cp
, np
->shs_dat
, np
->shs_len
);
276 memset(&next
, 0, sizeof next
);
277 next
.shs_next
= shsp
;
279 next
.shs_err
= shsp
->shs_err
;
280 next
.shs_bsesc
= shsp
->shs_bsesc
;
281 rv
= _sh_exp_var(&next
);
286 fexpand(char const *name
, enum fexp_mode fexpm
)
289 char const *cp
, *res
;
293 /* The order of evaluation is "%" and "#" expand into constants.
294 * "&" can expand into "+". "+" can expand into shell meta characters.
295 * Shell meta characters expand into constants.
296 * This way, we make no recursive expansion */
297 if ((fexpm
& FEXP_NSHORTCUT
) || (res
= shortcut_expand(name
)) == NULL
)
304 if (res
[1] == ':' && res
[2] != '\0') {
308 res
= _findmail((res
[1] != '\0' ? res
+ 1 : myname
), (res
[1] != '\0'));
313 if (prevfile
[0] == '\0') {
314 n_err(_("No previous file\n"));
322 res
= ok_vlook(MBOX
);
326 /* POSIX: if *folder* unset or null, "+" shall be retained */
327 if (*res
== '+' && *(cp
= folder_query()) != '\0') {
328 size_t i
= strlen(cp
);
330 res
= str_concat_csvl(&s
, cp
,
331 ((i
== 0 || cp
[i
-1] == '/') ? "" : "/"), res
+ 1, NULL
)->s
;
334 /* TODO *folder* can't start with %[:], can it!?! */
335 if (res
[0] == '%' && res
[1] == ':') {
341 /* Catch the most common shell meta character */
343 res
= n_shell_expand_tilde(res
, NULL
);
347 if ((fexpm
& (FEXP_NSHELL
| FEXP_NVAR
)) != FEXP_NVAR
&&
348 ((fexpm
& FEXP_NSHELL
) ? (strchr(res
, '$') != NULL
)
349 : anyof(res
, "|&;<>{}()[]*?$`'\"\\"))) {
352 if(fexpm
& FEXP_NOPROTO
)
354 else switch(which_protocol(res
)){
365 res
= (fexpm
& FEXP_NSHELL
) ? n_shell_expand_var(res
, TRU1
, NULL
)
366 : _globname(res
, fexpm
);
372 if (fexpm
& FEXP_LOCAL
)
373 switch (which_protocol(res
)) {
378 n_err(_("Not a local file or directory: %s\n"),
379 n_shell_quote_cp(name
, FAL0
));
392 n_shell_expand_tilde(char const *s
, bool_t
*err_or_null
)
406 if (*(rp
= s
+ 1) == '/' || *rp
== '\0')
409 if ((rp
= strchr(s
+ 1, '/')) == NULL
)
410 rp
= (np
= UNCONST(s
)) + 1;
412 nl
= PTR2SIZE(rp
- s
);
413 np
= savestrbuf(s
, nl
);
416 if ((pwp
= getpwnam(np
)) == NULL
) {
425 rv
= salloc(nl
+ 1 + rl
+1);
428 memcpy(rv
+ nl
, rp
, rl
);
437 if (err_or_null
!= NULL
)
444 n_shell_expand_var(char const *s
, bool_t bsescape
, bool_t
*err_or_null
)
446 struct shvar_stack top
;
450 memset(&top
, 0, sizeof top
);
453 if ((top
.shs_err
= err_or_null
) != NULL
)
455 top
.shs_bsesc
= bsescape
;
456 rv
= _sh_exp_var(&top
);
462 n_shell_expand_escape(char const **s
, bool_t use_nail_extensions
)/* TODO DROP!*/
470 if ((c
= *xs
& 0xFF) == '\0')
476 switch ((c
= *xs
& 0xFF)) {
477 case 'a': c
= '\a'; break;
478 case 'b': c
= '\b'; break;
479 case 'c': c
= PROMPT_STOP
; break;
480 case 'f': c
= '\f'; break;
481 case 'n': c
= '\n'; break;
482 case 'r': c
= '\r'; break;
483 case 't': c
= '\t'; break;
484 case 'v': c
= '\v'; break;
492 /* Hexadecimal TODO uses ASCII */
495 static ui8_t
const hexatoi
[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
498 hexatoi[(ui8_t)((n) - ((n) <= '9' ? 48 : ((n) <= 'F' ? 55 : 87)))]
506 if(options
& OPT_D_V
)
507 n_err(_("Invalid \"\\xNUMBER\" notation in \"%s\"\n"), xs
- 1);
521 /* octal, with optional 0 prefix */
531 for (c
= 0, n
= 3; n
-- > 0 && octalchar(*xs
); ++xs
) {
537 /* S-nail extension for nice (get)prompt(()) support */
542 if (use_nail_extensions
) {
544 case '&': c
= ok_blook(bsdcompat
) ? '&' : '?'; break;
545 case '?': c
= (pstate
& PS_EVAL_ERROR
) ? '1' : '0'; break;
546 case '$': c
= PROMPT_DOLLAR
; break;
547 case '@': c
= PROMPT_AT
; break;
554 /* A sole <backslash> at EOS is treated as-is! */
568 FL
enum n_shexp_state
569 n_shell_parse_token(struct n_string
*store
, struct str
*input
, /* TODO WCHAR */
570 enum n_shexp_parse_flags flags
){
571 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
577 a_SKIPQ
= 1<<0, /* Skip rest of this quote (\c0 ..) */
578 a_SURPLUS
= 1<<1, /* Extended sequence interpretation */
579 a_NTOKEN
= 1<<2 /* "New token": e.g., comments are possible */
581 enum n_shexp_state rv
;
583 char const *ib_save
, *ib
;
587 assert((flags
& n_SHEXP_PARSE_DRYRUN
) || store
!= NULL
);
588 assert(input
!= NULL
);
589 assert(input
->l
== 0 || input
->s
!= NULL
);
590 assert(!(flags
& n_SHEXP_PARSE_LOG
) || !(flags
& n_SHEXP_PARSE_LOG_D_V
));
591 assert(!(flags
& n_SHEXP_PARSE_IFS_ADD_COMMA
) ||
592 !(flags
& n_SHEXP_PARSE_IFS_IS_COMMA
));
594 if((flags
& n_SHEXP_PARSE_LOG_D_V
) && (options
& OPT_D_V
))
595 flags
|= n_SHEXP_PARSE_LOG
;
597 if((flags
& n_SHEXP_PARSE_TRUNC
) && store
!= NULL
)
598 store
= n_string_trunc(store
, 0);
601 if((il
= input
->l
) == UIZ_MAX
)
605 if(flags
& n_SHEXP_PARSE_TRIMSPACE
){
606 for(; il
> 0; ++ib
, --il
)
607 if(!blankspacechar(*ib
))
610 input
->s
= UNCONST(ib
);
614 rv
= n_SHEXP_STATE_STOP
;
619 store
= n_string_reserve(store
, MIN(il
, 32)); /* XXX */
621 for(rv
= n_SHEXP_STATE_NONE
, state
= a_NTOKEN
, quotec
= '\0'; il
> 0;){
624 /* If no quote-mode active.. */
626 if(c
== '"' || c
== '\''){
646 /* Outside of quotes this just escapes any next character, but a sole
647 * <backslash> at EOS is left unchanged */
651 }else if(c
== '#' && (state
& a_NTOKEN
)){
652 rv
|= n_SHEXP_STATE_STOP
;
654 }else if(c
== ',' && (flags
&
655 (n_SHEXP_PARSE_IFS_ADD_COMMA
| n_SHEXP_PARSE_IFS_IS_COMMA
)))
657 else if(blankchar(c
)){
658 if(!(flags
& n_SHEXP_PARSE_IFS_IS_COMMA
)){
667 assert(!(state
& a_NTOKEN
));
671 /* Users may need to recognize the presence of empty quotes */
672 rv
|= n_SHEXP_STATE_OUTPUT
;
674 }else if(c
== '\\' && (state
& a_SURPLUS
)){
676 /* A sole <backslash> at EOS is treated as-is! This is ok since
677 * the "closing quote" error will occur next, anyway */
680 else if((c2
= *ib
) == quotec
){
683 }else if(quotec
== '"'){
685 * The <backslash> shall retain its special meaning as an
686 * escape character (see Section 2.2.1) only when followed
687 * by one of the following characters when considered
688 * special: $ ` " \ <newline> */
692 /* case '"': already handled via c2 == quotec */
701 /* Dollar-single-quote */
705 /* case '\'': already handled via c2 == quotec */
710 case 'b': c
= '\b'; break;
711 case 'f': c
= '\f'; break;
712 case 'n': c
= '\n'; break;
713 case 'r': c
= '\r'; break;
714 case 't': c
= '\t'; break;
715 case 'v': c
= '\v'; break;
718 case 'e': c
= '\033'; break;
720 /* Control character */
723 goto j_dollar_ungetc
;
727 c
= upperconv(c2
) ^ 0x40;
728 if((ui8_t
)c
> 0x1F && c
!= 0x7F){ /* ASCII C0: 0..1F, 7F */
729 if(flags
& n_SHEXP_PARSE_LOG
)
730 n_err(_("Invalid \"\\c\" notation: %.*s\n"),
731 (int)input
->l
, input
->s
);
732 rv
|= n_SHEXP_STATE_ERR_CONTROL
;
734 /* As an implementation-defined extension, support \c@
735 * EQ printf(1) alike \c */
737 rv
|= n_SHEXP_STATE_STOP
;
742 /* Octal sequence: 1 to 3 octal bytes */
744 /* As an extension (dependent on where you look, echo(1), or
745 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
746 if(il
> 0 && (c
= *ib
) >= '0' && c
<= '7'){
751 case '1': case '2': case '3':
752 case '4': case '5': case '6': case '7':
754 if(il
> 0 && (c
= *ib
) >= '0' && c
<= '7'){
755 c2
= (c2
<< 3) | (c
- '0');
758 if(il
> 0 && (c
= *ib
) >= '0' && c
<= '7'){
759 if((ui8_t
)c2
> 0x1F){
760 if(flags
& n_SHEXP_PARSE_LOG
)
761 n_err(_("\"\\0\" argument exceeds a byte: "
762 "%.*s\n"), (int)input
->l
, input
->s
);
763 rv
|= n_SHEXP_STATE_ERR_NUMBER
;
765 /* Write unchanged */
767 rv
|= n_SHEXP_STATE_OUTPUT
;
768 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
769 store
= n_string_push_buf(store
, ib_save
,
770 PTR2SIZE(ib
- ib_save
));
773 c2
= (c2
<< 3) | (c
-= '0');
782 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
791 goto j_dollar_ungetc
;
795 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
799 goto j_dollar_ungetc
;
803 static ui8_t
const hexatoi
[] = { /* XXX uses ASCII */
804 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
809 for(no
= j
= 0; i
-- > 0; --il
, ++ib
, ++j
){
813 no
+= hexatoi
[(ui8_t
)((c
) - ((c
) <= '9' ? 48
814 : ((c
) <= 'F' ? 55 : 87)))];
818 c2
= (c2
== 'U' || c2
== 'u') ? 'u' : 'x';
819 if(flags
& n_SHEXP_PARSE_LOG
)
820 n_err(_("Invalid \"\\%c\" notation: %.*s\n"),
821 c2
, (int)input
->l
, input
->s
);
822 rv
|= n_SHEXP_STATE_ERR_NUMBER
;
828 /* Unicode massage */
829 if((c2
!= 'U' && c2
!= 'u') || n_uasciichar(no
)){
830 if((c
= (char)no
) == '\0')
834 else if(!(state
& a_SKIPQ
)){
835 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
836 store
= n_string_reserve(store
, MAX(j
, 4));
839 if(no
> 0x10FFFF){ /* XXX magic; CText */
840 if(flags
& n_SHEXP_PARSE_LOG
)
841 n_err(_("\"\\U\" argument exceeds 0x10FFFF: "
842 "%.*s\n"), (int)input
->l
, input
->s
);
843 rv
|= n_SHEXP_STATE_ERR_NUMBER
;
844 /* But normalize the output anyway */
848 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
849 j
= n_utf32_to_utf8(no
, utf
);
851 #ifdef HAVE_NATCH_CHAR
852 if(options
& OPT_UNICODE
){
853 rv
|= n_SHEXP_STATE_OUTPUT
| n_SHEXP_STATE_UNICODE
;
854 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
855 store
= n_string_push_buf(store
, utf
, j
);
863 icp
= n_iconv_onetime_cp(NULL
, NULL
, utf
, FAL0
);
865 rv
|= n_SHEXP_STATE_OUTPUT
;
866 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
867 store
= n_string_push_cp(store
, icp
);
872 if(!(flags
& n_SHEXP_PARSE_DRYRUN
)) Je_uni_norm
:{
875 rv
|= n_SHEXP_STATE_OUTPUT
|
876 n_SHEXP_STATE_ERR_UNICODE
;
877 i
= snprintf(itoa
, sizeof itoa
, "\\%c%0*X",
878 (no
> 0xFFFFu
? 'U' : 'u'),
879 (int)(no
> 0xFFFFu
? 8 : 4), (ui32_t
)no
);
880 store
= n_string_push_buf(store
, itoa
, i
);
889 /* Extension: \$ can be used to expand a variable.
890 * Bug|ad effect: if conversion fails, not written "as-is" */
893 goto j_dollar_ungetc
;
898 /* Follow bash behaviour, print sequence unchanged */
903 }else if(c
== '$' && quotec
== '"' && il
> 0) J_var_expand
:{
906 if(!(brace
= (*ib
== '{')) || il
> 1){
913 for(i
= 0; il
> 0 && (c
= *ib
, a_SHEXP_ISVARC(c
)); ++i
)
917 if(il
== 0 || *ib
!= '}'){
919 assert((state
& a_SURPLUS
) && quotec
== '\'');
922 if(flags
& n_SHEXP_PARSE_LOG
)
923 n_err(_("Closing brace missing for ${VAR}: %.*s\n"),
924 (int)input
->l
, input
->s
);
925 rv
|= n_SHEXP_STATE_ERR_QUOTEOPEN
|
926 n_SHEXP_STATE_ERR_BRACE
;
937 if(flags
& n_SHEXP_PARSE_LOG
)
938 n_err(_("Bad substitution (${}): %.*s\n"),
939 (int)input
->l
, input
->s
);
940 rv
|= n_SHEXP_STATE_ERR_BADSUB
;
944 }else if(flags
& n_SHEXP_PARSE_DRYRUN
)
947 vp
= savestrbuf(vp
, i
);
948 /* Check getenv(3) shall no internal variable exist! */
949 if((cp
= vok_vlook(vp
)) != NULL
|| (cp
= getenv(vp
)) != NULL
){
950 rv
|= n_SHEXP_STATE_OUTPUT
;
951 store
= n_string_push_cp(store
, cp
);
952 for(; (c
= *cp
) != '\0'; ++cp
)
954 rv
|= n_SHEXP_STATE_CONTROL
;
961 }else if(c
== '`' && quotec
== '"' && il
> 0){ /* TODO shell command */
966 if(!(state
& a_SKIPQ
)){
967 rv
|= n_SHEXP_STATE_OUTPUT
;
969 rv
|= n_SHEXP_STATE_CONTROL
;
970 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
971 store
= n_string_push_c(store
, c
);
976 if(flags
& n_SHEXP_PARSE_LOG
)
977 n_err(_("no closing quote: %.*s\n"), (int)input
->l
, input
->s
);
978 rv
|= n_SHEXP_STATE_ERR_QUOTEOPEN
;
982 if((flags
& n_SHEXP_PARSE_DRYRUN
) && store
!= NULL
){
983 store
= n_string_push_buf(store
, input
->s
, PTR2SIZE(ib
- input
->s
));
984 rv
|= n_SHEXP_STATE_OUTPUT
;
987 if(flags
& n_SHEXP_PARSE_TRIMSPACE
){
988 for(; il
> 0; ++ib
, --il
)
993 input
->s
= UNCONST(ib
);
995 if(!(rv
& n_SHEXP_STATE_STOP
)){
996 if(il
> 0 && !(rv
& n_SHEXP_STATE_OUTPUT
) &&
997 (flags
& n_SHEXP_PARSE_IGNORE_EMPTY
))
999 if(!(rv
& n_SHEXP_STATE_OUTPUT
) && il
== 0)
1000 rv
|= n_SHEXP_STATE_STOP
;
1002 assert((rv
& n_SHEXP_STATE_OUTPUT
) || !(rv
& n_SHEXP_STATE_UNICODE
));
1003 assert((rv
& n_SHEXP_STATE_OUTPUT
) || !(rv
& n_SHEXP_STATE_CONTROL
));
1008 FL
struct n_string
*
1009 n_shell_quote(struct n_string
*store
, struct str
const *input
, bool_t rndtrip
){
1010 /* TODO In v15 we need to save (possibly normalize) away user input,
1011 * TODO so that the ORIGINAL (normalized) input can be used directly.
1012 * Until then, stay somewhat primitive */
1014 struct n_visual_info_ctx vic
;
1016 enum{a_QNONE
, a_QSINGLE
, a_QDOLLAR
} quote
;
1021 assert(store
!= NULL
);
1022 assert(input
!= NULL
);
1023 assert(input
->l
== 0 || input
->s
!= NULL
);
1026 if((il
= input
->l
) == UIZ_MAX
)
1029 /* An empty string needs to be quoted */
1031 store
= n_string_push_buf(store
, "''", sizeof("''") -1);
1036 memset(&vic
, 0, sizeof vic
);
1039 vic
.vic_flags
= n_VISUAL_INFO_WOUT_CREATE
| n_VISUAL_INFO_WOUT_SALLOC
;
1040 i
= n_visual_info(&vic
);
1043 store
= n_string_reserve(store
, il
+ (il
>> 2)); /* XXX */
1047 def HAVE_C90AMEND1
/* TODO wchar! */
1053 #endif /* HAVE_C90AMEND1 */
1055 enum{a_NONE
, a_CNTRL
, a_SPACE
, a_SQ
, a_BS
, a_NASCII
} ct
;
1058 /* Classify character and type of quote, if necessary.
1059 * Try shorthands whenever possible */
1063 else if(blankspacechar(c
) || c
== '"' || c
== '$'){
1064 if(quote
== a_QSINGLE
|| quote
== a_QDOLLAR
)
1070 if(quote
== a_QSINGLE
)
1073 }else if(!asciichar(c
)){
1078 /* Shorthand: we can simply push that thing out */
1080 store
= n_string_push_c(store
, c
);
1085 /* We have to take care for quotes, try to reuse what we have */
1086 if(quote
== a_QNONE
){
1091 /* See XXX note beloq on a_QNONE! */
1092 store
= n_string_push_c(store
, '\'');
1096 /* XXX a_QNONE backslash escaping of a single character is
1097 * XXX disabled, because that starts looking bad if it is
1098 * XXX needed more than once. We'd need to count in a dryrun
1099 * XXX first, then decide whether it should be used!
1100 * XXX store = n_string_push_c(store, '\\');
1101 * XXX goto jc_one; */
1108 store
= n_string_push_buf(store
, "$'", sizeof("$'") -1);
1112 }else if(quote
== a_QSINGLE
){
1122 store
= n_string_push_c(store
, '\'');
1125 /* xxx For SQ we possibly should also simply go for QDOLLAR now? */
1126 store
= n_string_push_c(store
, '\'');
1128 store
= n_string_push_c(store
, '\\');
1133 assert(quote
== a_QDOLLAR
);
1140 store
= n_string_push_c(store
, '\\');
1145 store
= n_string_push_c(store
, '\\');
1147 case 0x07: c
= 'a'; break;
1148 case 0x08: c
= 'b'; break;
1149 case 0x09: c
= 't'; break;
1150 case 0x0A: c
= 'n'; break;
1151 case 0x0B: c
= 'v'; break;
1152 case 0x0C: c
= 'f'; break;
1153 case 0x0D: c
= 'r'; break;
1157 store
= n_string_push_c(store
, 'c');
1164 #ifdef HAVE_NATCH_CHAR
1165 if(options
& OPT_UNICODE
){
1167 char const *ib2
= ib
;
1168 size_t il2
= il
, il3
= il2
;
1170 if((u
= n_utf8_to_utf32(&ib2
, &il2
)) != UI32_MAX
){
1174 il2
= PTR2SIZE(&ib2
[0] - &ib
[0]);
1175 if(rndtrip
|| u
== 0xFFFD/* TODO CText */){
1177 il3
= snprintf(itoa
, sizeof itoa
, "\\%c%0*X",
1178 (u
> 0xFFFFu
? 'U' : 'u'),
1179 (int)(u
> 0xFFFFu
? 8 : 4), u
);
1184 store
= n_string_push_buf(store
, cp
, il3
);
1185 ib
+= il2
, il
-= il2
;
1189 #endif /* HAVE_NATCH_CHAR */
1191 store
= n_string_push_buf(store
, "\\xFF", sizeof("\\xFF") -1);
1192 n_c_to_hex_base16(&store
->s_dat
[store
->s_len
- 2], c
);
1194 #ifdef HAVE_NATCH_CHAR
1197 if(il
> 0 && hexchar(ib
[1])){
1198 store
= n_string_push_c(store
, '\'');
1205 if(quote
== a_QSINGLE
|| quote
== a_QDOLLAR
)
1206 store
= n_string_push_c(store
, '\'');
1213 n_shell_quote_cp(char const *cp
, bool_t rndtrip
){
1214 struct n_string store
;
1221 input
.s
= UNCONST(cp
);
1223 rv
= n_string_cp(n_shell_quote(n_string_creat_auto(&store
), &input
,
1225 n_string_gut(n_string_drop_ownership(&store
));