1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
5 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
6 * Copyright (c) 2012 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
9 * Copyright (c) 1980, 1993
10 * The Regents of the University of California. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 #ifndef HAVE_AMALGAMATION
53 * Environment variable names used by the utilities in the Shell and
54 * Utilities volume of POSIX.1-2008 consist solely of uppercase
55 * letters, digits, and the <underscore> ('_') from the characters
56 * defined in Portable Character Set and do not begin with a digit.
57 * Other characters may be permitted by an implementation;
58 * applications shall tolerate the presence of such names.
59 * We do support the hyphen "-" because it is common for mailx.
60 * We support some special parameter names for one-letter variable names;
61 * note these have counterparts in the code that manages internal variables! */
62 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
63 #define a_SHEXP_ISVARC_SPECIAL1(C) \
64 ((C) == '*' || (C) == '@' || (C) == '#' || (C) == '?')
66 enum a_shexp_quote_flags
{
68 a_SHEXP_QUOTE_ROUNDTRIP
= 1u<<0, /* Result won't be consumed immediately */
70 a_SHEXP_QUOTE_T_REVSOL
= 1u<<8, /* Type: by reverse solidus */
71 a_SHEXP_QUOTE_T_SINGLE
= 1u<<9, /* Type: single-quotes */
72 a_SHEXP_QUOTE_T_DOUBLE
= 1u<<10, /* Type: double-quotes */
73 a_SHEXP_QUOTE_T_DOLLAR
= 1u<<11, /* Type: dollar-single-quotes */
74 a_SHEXP_QUOTE_T_MASK
= a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_SINGLE
|
75 a_SHEXP_QUOTE_T_DOUBLE
| a_SHEXP_QUOTE_T_DOLLAR
,
77 a_SHEXP_QUOTE__FREESHIFT
= 16u
81 struct a_shexp_glob_ctx
{
82 char const *sgc_patdat
; /* Remaining pattern (at and below level) */
84 struct n_string
*sgc_outer
; /* Resolved path up to this level */
90 struct a_shexp_quote_ctx
{
91 struct n_string
*sqc_store
; /* Result storage */
92 struct str sqc_input
; /* Input data, topmost level */
94 ui32_t sqc_cnt_single
;
95 ui32_t sqc_cnt_double
;
96 ui32_t sqc_cnt_dollar
;
97 enum a_shexp_quote_flags sqc_flags
;
101 struct a_shexp_quote_lvl
{
102 struct a_shexp_quote_lvl
*sql_link
; /* Outer level */
103 struct str sql_dat
; /* This level (has to) handle(d) */
104 enum a_shexp_quote_flags sql_flags
;
108 /* Locate the user's mailbox file (where new, unread mail is queued) */
109 static char *a_shexp_findmail(char const *user
, bool_t force
);
111 /* Expand ^~/? and ^~USER/? constructs.
112 * Returns the completely resolved (maybe empty or identical to input)
113 * salloc()ed string */
114 static char *a_shexp_tilde(char const *s
);
116 /* Perform fnmatch(3). May return NULL on error */
117 static char *a_shexp_globname(char const *name
, enum fexp_mode fexpm
);
119 static bool_t
a_shexp__glob(struct a_shexp_glob_ctx
*sgcp
,
120 struct n_strlist
**slpp
);
121 static int a_shexp__globsort(void const *cvpa
, void const *cvpb
);
124 /* Parse an input string and create a sh(1)ell-quoted result */
125 static void a_shexp__quote(struct a_shexp_quote_ctx
*sqcp
,
126 struct a_shexp_quote_lvl
*sqlp
);
129 a_shexp_findmail(char const *user
, bool_t force
){
135 if((cp
= ok_vlook(inbox
)) != NULL
&& *cp
!= '\0'){
136 /* Folder extra introduced to avoid % recursion loops */
137 if((rv
= fexpand(cp
, FEXP_NSPECIAL
| FEXP_NFOLDER
| FEXP_NSHELL
)
140 n_err(_("*inbox* expansion failed, using $MAIL / builtin: %s\n"), cp
);
143 if((cp
= ok_vlook(MAIL
)) != NULL
){
152 ul
= strlen(user
) +1;
153 i
= sizeof(VAL_MAIL
) -1 + 1 + ul
;
156 memcpy(rv
, VAL_MAIL
, (i
= sizeof(VAL_MAIL
) -1));
158 memcpy(&rv
[++i
], user
, ul
);
166 a_shexp_tilde(char const *s
){
173 if(*(rp
= &s
[1]) == '/' || *rp
== '\0'){
177 if((rp
= strchr(np
= rp
, '/')) != NULL
){
178 nl
= PTR2SIZE(rp
- np
);
179 np
= savestrbuf(np
, nl
);
184 if((pwp
= getpwnam(np
)) == NULL
){
192 rv
= salloc(nl
+ 1 + rl
+1);
195 memcpy(rv
+ nl
, rp
, rl
);
205 a_shexp_globname(char const *name
, enum fexp_mode fexpm
){
207 struct a_shexp_glob_ctx sgc
;
208 struct n_string outer
;
209 struct n_strlist
*slp
;
213 memset(&sgc
, 0, sizeof sgc
);
214 sgc
.sgc_patlen
= strlen(name
);
215 sgc
.sgc_patdat
= savestrbuf(name
, sgc
.sgc_patlen
);
216 sgc
.sgc_outer
= n_string_reserve(n_string_creat(&outer
), sgc
.sgc_patlen
);
217 sgc
.sgc_flags
= ((fexpm
& FEXP_SILENT
) != 0);
219 if(a_shexp__glob(&sgc
, &slp
))
223 n_string_gut(&outer
);
229 cp
= n_UNCONST(N_("File pattern does not match"));
231 }else if(slp
->sl_next
== NULL
)
232 cp
= savestrbuf(slp
->sl_dat
, slp
->sl_len
);
233 else if(fexpm
& FEXP_MULTIOK
){
234 struct n_strlist
**sorta
, *xslp
;
238 for(xslp
= slp
; xslp
!= NULL
; xslp
= xslp
->sl_next
){
240 l
+= xslp
->sl_len
+ 1;
243 sorta
= smalloc(sizeof(*sorta
) * no
);
245 for(xslp
= slp
; xslp
!= NULL
; xslp
= xslp
->sl_next
)
247 qsort(sorta
, no
, sizeof *sorta
, &a_shexp__globsort
);
251 for(i
= 0; i
< no
; ++i
){
253 memcpy(&cp
[l
], xslp
->sl_dat
, xslp
->sl_len
);
260 n_pstate
|= n_PS_EXPAND_MULTIRESULT
;
262 cp
= n_UNCONST(N_("File pattern matches multiple results"));
268 struct n_strlist
*tmp
= slp
;
277 if(!(fexpm
& FEXP_SILENT
)){
278 name
= n_shexp_quote_cp(name
, FAL0
);
279 n_err("%s: %s\n", V_(cp
), name
);
284 #else /* HAVE_FNMATCH */
287 if(!(fexpm
& FEXP_SILENT
))
288 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
289 return savestr(name
);
295 a_shexp__glob(struct a_shexp_glob_ctx
*sgcp
, struct n_strlist
**slpp
){
296 enum{a_SILENT
= 1<<0, a_DEEP
=1<<1, a_SALLOC
=1<<2};
298 struct a_shexp_glob_ctx nsgc
;
302 char const *ccp
, *myp
;
305 /* We need some special treatment for the outermost level */
306 if(!(sgcp
->sgc_flags
& a_DEEP
)){
307 if(sgcp
->sgc_patlen
> 0 && sgcp
->sgc_patdat
[0] == '/'){
308 myp
= n_string_cp(n_string_push_c(sgcp
->sgc_outer
, '/'));
314 myp
= n_string_cp(sgcp
->sgc_outer
);
315 old_outerlen
= sgcp
->sgc_outer
->s_len
;
317 /* Separate current directory/pattern level from any possible remaining
318 * pattern in order to be able to use it for fnmatch(3) */
319 if((ccp
= memchr(sgcp
->sgc_patdat
, '/', sgcp
->sgc_patlen
)) == NULL
)
323 nsgc
.sgc_flags
|= a_DEEP
;
324 sgcp
->sgc_patlen
= PTR2SIZE((nsgc
.sgc_patdat
= &ccp
[1]) -
325 &sgcp
->sgc_patdat
[0]);
326 nsgc
.sgc_patlen
-= sgcp
->sgc_patlen
;
328 if(sgcp
->sgc_patlen
> 0){
329 assert(sgcp
->sgc_patdat
[sgcp
->sgc_patlen
-1] == '/');
330 ((char*)n_UNCONST(sgcp
->sgc_patdat
))[--sgcp
->sgc_patlen
] = '\0';
334 /* Our current directory level */
335 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
336 * xxx be the (sole) result depending on pattern surroundings, etc. */
337 if((dp
= opendir(myp
)) == NULL
){
340 switch((err
= errno
)){
342 ccp
= N_("cannot access paths under non-directory");
345 ccp
= N_("path component of (sub)pattern non-existent");
348 ccp
= N_("file permission for file (sub)pattern denied");
351 ccp
= N_("cannot handle file (sub)pattern");
356 /* As necessary, quote bytes in the current pattern */
362 for(need
= FAL0
, i
= 0, myp
= sgcp
->sgc_patdat
; *myp
!= '\0'; ++myp
)
364 case '\'': case '"': case '\\': case '$':
376 for(i
= 0, myp
= sgcp
->sgc_patdat
; *myp
!= '\0'; ++myp
)
378 case '\'': case '"': case '\\': case '$':
389 myp
= sgcp
->sgc_patdat
;
392 while((dep
= readdir(dp
)) != NULL
){
393 switch(fnmatch(myp
, dep
->d_name
, FNM_PATHNAME
| FNM_PERIOD
)){
395 /* A match expresses the desire to recurse if there is more pattern */
396 if(nsgc
.sgc_patlen
> 0){
399 n_string_push_cp((sgcp
->sgc_outer
->s_len
> 1
400 ? n_string_push_c(sgcp
->sgc_outer
, '/') : sgcp
->sgc_outer
),
404 #ifdef HAVE_DIRENT_TYPE
405 if(dep
->d_type
== DT_DIR
)
407 else if(dep
->d_type
== DT_LNK
|| dep
->d_type
== DT_UNKNOWN
)
412 if(stat(n_string_cp(sgcp
->sgc_outer
), &sb
)){
413 ccp
= N_("I/O error when querying file status");
415 }else if(S_ISDIR(sb
.st_mode
))
419 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
420 * TODO Instead save away a list of such n_string's for later */
421 if(isdir
&& !a_shexp__glob(&nsgc
, slpp
)){
426 n_string_trunc(sgcp
->sgc_outer
, old_outerlen
);
428 struct n_strlist
*slp
;
431 i
= strlen(dep
->d_name
);
432 j
= (old_outerlen
> 0) ? old_outerlen
+ 1 + i
: i
;
433 slp
= n_STRLIST_MALLOC(j
);
435 slpp
= &slp
->sl_next
;
437 if((j
= old_outerlen
) > 0){
438 memcpy(&slp
->sl_dat
[0], sgcp
->sgc_outer
->s_dat
, j
);
439 if(slp
->sl_dat
[j
-1] != '/')
440 slp
->sl_dat
[j
++] = '/';
442 memcpy(&slp
->sl_dat
[j
], dep
->d_name
, i
);
443 slp
->sl_dat
[j
+= i
] = '\0';
450 ccp
= N_("fnmatch(3) cannot handle file (sub)pattern");
460 return (ccp
== NULL
);
463 if(!(sgcp
->sgc_flags
& a_SILENT
)){
466 if(sgcp
->sgc_outer
->s_len
> 0){
467 s2
= n_shexp_quote_cp(n_string_cp(sgcp
->sgc_outer
), FAL0
);
472 n_err("%s: %s%s%s\n", V_(ccp
), s2
, s3
,
473 n_shexp_quote_cp(sgcp
->sgc_patdat
, FAL0
));
479 a_shexp__globsort(void const *cvpa
, void const *cvpb
){
481 struct n_strlist
const * const *slpa
, * const *slpb
;
486 rv
= asccasecmp((*slpa
)->sl_dat
, (*slpb
)->sl_dat
);
490 #endif /* HAVE_FNMATCH */
493 a_shexp__quote(struct a_shexp_quote_ctx
*sqcp
, struct a_shexp_quote_lvl
*sqlp
){
494 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
495 * XXX use the recursive implementation because of stateful encodings.
496 * XXX I.e., if a quoted substring cannot be self-contained - the data after
497 * XXX the quote relies on "the former state", then this doesn't make sense.
498 * XXX Therefore this is not fully programmed out but instead only detects
499 * XXX the "most fancy" quoting necessary, and directly does that.
500 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
501 * XXX Otherwise we rather have to convert to wide first and act on that,
502 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
503 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
504 #ifdef a_SHEXP_QUOTE_RECURSE
505 # define jrecurse jrecurse
506 struct a_shexp_quote_lvl sql
;
508 # define jrecurse jstep
510 struct n_visual_info_ctx vic
;
511 union {struct a_shexp_quote_lvl
*head
; struct n_string
*store
;} u
;
517 ib
= sqlp
->sql_dat
.s
;
518 il
= sqlp
->sql_dat
.l
;
519 flags
= sqlp
->sql_flags
;
521 /* Iterate over the entire input, classify characters and type of quotes
522 * along the way. Whenever a quote change has to be applied, adjust flags
523 * for the new situation -, setup sql.* and recurse- */
529 if(flags
& a_SHEXP_QUOTE_T_DOLLAR
)
531 if(c
== '\t' && (flags
& (a_SHEXP_QUOTE_T_REVSOL
|
532 a_SHEXP_QUOTE_T_SINGLE
| a_SHEXP_QUOTE_T_DOUBLE
)))
534 #ifdef a_SHEXP_QUOTE_RECURSE
535 ++sqcp
->sqc_cnt_dollar
;
537 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_DOLLAR
;
539 }else if(blankspacechar(c
) || c
== '"' || c
== '$'){
540 if(flags
& a_SHEXP_QUOTE_T_MASK
)
542 #ifdef a_SHEXP_QUOTE_RECURSE
543 ++sqcp
->sqc_cnt_single
;
545 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_SINGLE
;
548 if(flags
& (a_SHEXP_QUOTE_T_MASK
& ~a_SHEXP_QUOTE_T_SINGLE
))
550 #ifdef a_SHEXP_QUOTE_RECURSE
551 ++sqcp
->sqc_cnt_dollar
;
553 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_DOLLAR
;
556 if(flags
& a_SHEXP_QUOTE_T_MASK
)
558 #ifdef a_SHEXP_QUOTE_RECURSE
559 ++sqcp
->sqc_cnt_single
;
561 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_SINGLE
;
563 }else if(!asciichar(c
)){
564 /* Need to keep together multibytes */
565 #ifdef a_SHEXP_QUOTE_RECURSE
566 memset(&vic
, 0, sizeof vic
);
570 n_VISUAL_INFO_ONE_CHAR
| n_VISUAL_INFO_SKIP_ERRORS
);
572 /* xxx check whether resulting \u would be ASCII */
573 if(!(flags
& a_SHEXP_QUOTE_ROUNDTRIP
) ||
574 (flags
& a_SHEXP_QUOTE_T_DOLLAR
)){
575 #ifdef a_SHEXP_QUOTE_RECURSE
583 #ifdef a_SHEXP_QUOTE_RECURSE
584 ++sqcp
->sqc_cnt_dollar
;
586 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_DOLLAR
;
592 sqlp
->sql_flags
= flags
;
594 /* Level made the great and completed processing input. Reverse the list of
595 * levels, detect the "most fancy" quote type needed along this way */
596 /* XXX Due to restriction as above very crude */
597 for(flags
= 0, il
= 0, u
.head
= NULL
; sqlp
!= NULL
;){
598 struct a_shexp_quote_lvl
*tmp
;
600 tmp
= sqlp
->sql_link
;
601 sqlp
->sql_link
= u
.head
;
603 il
+= sqlp
->sql_dat
.l
;
604 if(sqlp
->sql_flags
& a_SHEXP_QUOTE_T_MASK
)
605 il
+= (sqlp
->sql_dat
.l
>> 1);
606 flags
|= sqlp
->sql_flags
;
611 /* Finally work the substrings in the correct order, adjusting quotes along
612 * the way as necessary. Start off with the "most fancy" quote, so that
613 * the user sees an overall boundary she can orientate herself on.
614 * We do it like that to be able to give the user some "encapsulation
615 * experience", to address what strikes me is a problem of sh(1)ell quoting:
616 * different to, e.g., perl(1), where you see at a glance where a string
617 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
618 * visual appearance of "a string" as such */
619 u
.store
= n_string_reserve(sqcp
->sqc_store
, il
);
621 if(flags
& a_SHEXP_QUOTE_T_DOLLAR
){
622 u
.store
= n_string_push_buf(u
.store
, "$'", sizeof("$'") -1);
623 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_DOLLAR
;
624 }else if(flags
& a_SHEXP_QUOTE_T_DOUBLE
){
625 u
.store
= n_string_push_c(u
.store
, '"');
626 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_DOUBLE
;
627 }else if(flags
& a_SHEXP_QUOTE_T_SINGLE
){
628 u
.store
= n_string_push_c(u
.store
, '\'');
629 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_SINGLE
;
630 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
631 flags
&= ~a_SHEXP_QUOTE_T_MASK
;
633 /* Work all the levels */
634 for(; sqlp
!= NULL
; sqlp
= sqlp
->sql_link
){
635 /* As necessary update our mode of quoting */
636 #ifdef a_SHEXP_QUOTE_RECURSE
639 switch(sqlp
->sql_flags
& a_SHEXP_QUOTE_T_MASK
){
640 case a_SHEXP_QUOTE_T_DOLLAR
:
641 if(!(flags
& a_SHEXP_QUOTE_T_DOLLAR
))
642 il
= a_SHEXP_QUOTE_T_DOLLAR
;
644 case a_SHEXP_QUOTE_T_DOUBLE
:
645 if(!(flags
& (a_SHEXP_QUOTE_T_DOUBLE
| a_SHEXP_QUOTE_T_DOLLAR
)))
646 il
= a_SHEXP_QUOTE_T_DOLLAR
;
648 case a_SHEXP_QUOTE_T_SINGLE
:
649 if(!(flags
& (a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_SINGLE
|
650 a_SHEXP_QUOTE_T_DOUBLE
| a_SHEXP_QUOTE_T_DOLLAR
)))
651 il
= a_SHEXP_QUOTE_T_SINGLE
;
654 case a_SHEXP_QUOTE_T_REVSOL
:
655 if(!(flags
& (a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_SINGLE
|
656 a_SHEXP_QUOTE_T_DOUBLE
| a_SHEXP_QUOTE_T_DOLLAR
)))
657 il
= a_SHEXP_QUOTE_T_REVSOL
;
662 if(flags
& (a_SHEXP_QUOTE_T_SINGLE
| a_SHEXP_QUOTE_T_DOLLAR
))
663 u
.store
= n_string_push_c(u
.store
, '\'');
664 else if(flags
& a_SHEXP_QUOTE_T_DOUBLE
)
665 u
.store
= n_string_push_c(u
.store
, '"');
666 flags
&= ~a_SHEXP_QUOTE_T_MASK
;
669 if(flags
& a_SHEXP_QUOTE_T_DOLLAR
)
670 u
.store
= n_string_push_buf(u
.store
, "$'", sizeof("$'") -1);
671 else if(flags
& a_SHEXP_QUOTE_T_DOUBLE
)
672 u
.store
= n_string_push_c(u
.store
, '"');
673 else if(flags
& a_SHEXP_QUOTE_T_SINGLE
)
674 u
.store
= n_string_push_c(u
.store
, '\'');
676 #endif /* a_SHEXP_QUOTE_RECURSE */
678 /* Work the level's substring */
679 ib
= sqlp
->sql_dat
.s
;
680 il
= sqlp
->sql_dat
.l
;
688 assert(c
== '\t' || (flags
& a_SHEXP_QUOTE_T_DOLLAR
));
689 assert((flags
& (a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_SINGLE
|
690 a_SHEXP_QUOTE_T_DOUBLE
| a_SHEXP_QUOTE_T_DOLLAR
)));
692 case 0x07: c
= 'a'; break;
693 case 0x08: c
= 'b'; break;
694 case 0x0A: c
= 'n'; break;
695 case 0x0B: c
= 'v'; break;
696 case 0x0C: c
= 'f'; break;
697 case 0x0D: c
= 'r'; break;
698 case 0x1B: c
= 'E'; break;
701 if(flags
& a_SHEXP_QUOTE_T_DOLLAR
){
705 if(flags
& a_SHEXP_QUOTE_T_REVSOL
)
706 u
.store
= n_string_push_c(u
.store
, '\\');
709 u
.store
= n_string_push_c(u
.store
, '\\');
711 u
.store
= n_string_push_c(u
.store
, 'c');
715 }else if(blankspacechar(c
) || c
== '"' || c
== '$'){
716 if(flags
& (a_SHEXP_QUOTE_T_SINGLE
| a_SHEXP_QUOTE_T_DOLLAR
))
718 assert(flags
& (a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_DOUBLE
));
719 u
.store
= n_string_push_c(u
.store
, '\\');
722 if(flags
& a_SHEXP_QUOTE_T_DOUBLE
)
724 assert(!(flags
& a_SHEXP_QUOTE_T_SINGLE
));
725 u
.store
= n_string_push_c(u
.store
, '\\');
728 if(flags
& a_SHEXP_QUOTE_T_SINGLE
)
730 assert(flags
& (a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_DOUBLE
|
731 a_SHEXP_QUOTE_T_DOLLAR
));
732 u
.store
= n_string_push_c(u
.store
, '\\');
734 }else if(asciichar(c
)){
735 /* Shorthand: we can simply push that thing out */
737 u
.store
= n_string_push_c(u
.store
, c
);
740 /* Not an ASCII character, take care not to split up multibyte
741 * sequences etc. For the sake of compile testing, don't enwrap in
742 * HAVE_ALWAYS_UNICODE_LOCALE || HAVE_NATCH_CHAR */
743 if(n_psonce
& n_PSO_UNICODE
){
750 if((uc
= n_utf8_to_utf32(&ib2
, &il2
)) != UI32_MAX
){
754 il2
= PTR2SIZE(&ib2
[0] - &ib
[0]);
755 if((flags
& a_SHEXP_QUOTE_ROUNDTRIP
) || uc
== 0xFFFDu
){
756 /* Use padding to make ambiguities impossible */
757 il3
= snprintf(itoa
, sizeof itoa
, "\\%c%0*X",
758 (uc
> 0xFFFFu
? 'U' : 'u'),
759 (int)(uc
> 0xFFFFu
? 8 : 4), uc
);
765 u
.store
= n_string_push_buf(u
.store
, cp
, il3
);
766 ib
+= il2
, il
-= il2
;
771 memset(&vic
, 0, sizeof vic
);
775 n_VISUAL_INFO_ONE_CHAR
| n_VISUAL_INFO_SKIP_ERRORS
);
777 /* Work this substring as sensitive as possible */
779 if(!(flags
& a_SHEXP_QUOTE_ROUNDTRIP
))
780 u
.store
= n_string_push_buf(u
.store
, ib
, il
);
782 else if((vic
.vic_indat
= n_iconv_onetime_cp(n_ICONV_NONE
,
783 "utf-8", ok_vlook(ttycharset
), savestrbuf(ib
, il
))) != NULL
){
788 il3
= il2
= strlen(ib2
= vic
.vic_indat
);
789 if((uc
= n_utf8_to_utf32(&ib2
, &il2
)) != UI32_MAX
){
792 il2
= PTR2SIZE(&ib2
[0] - &vic
.vic_indat
[0]);
793 /* Use padding to make ambiguities impossible */
794 il3
= snprintf(itoa
, sizeof itoa
, "\\%c%0*X",
795 (uc
> 0xFFFFu
? 'U' : 'u'),
796 (int)(uc
> 0xFFFFu
? 8 : 4), uc
);
797 u
.store
= n_string_push_buf(u
.store
, itoa
, il3
);
807 u
.store
= n_string_push_buf(u
.store
, "\\xFF",
809 n_c_to_hex_base16(&u
.store
->s_dat
[u
.store
->s_len
- 2], *ib
++);
818 /* Close an open quote */
819 if(flags
& (a_SHEXP_QUOTE_T_SINGLE
| a_SHEXP_QUOTE_T_DOLLAR
))
820 u
.store
= n_string_push_c(u
.store
, '\'');
821 else if(flags
& a_SHEXP_QUOTE_T_DOUBLE
)
822 u
.store
= n_string_push_c(u
.store
, '"');
823 #ifdef a_SHEXP_QUOTE_RECURSE
829 #ifdef a_SHEXP_QUOTE_RECURSE
831 sqlp
->sql_dat
.l
-= il
;
834 sql
.sql_dat
.s
= n_UNCONST(ib
);
836 sql
.sql_flags
= flags
;
837 a_shexp__quote(sqcp
, &sql
);
842 #undef a_SHEXP_QUOTE_RECURSE
846 fexpand(char const *name
, enum fexp_mode fexpm
)
849 char const *cp
, *res
;
853 n_pstate
&= ~n_PS_EXPAND_MULTIRESULT
;
855 /* The order of evaluation is "%" and "#" expand into constants.
856 * "&" can expand into "+". "+" can expand into shell meta characters.
857 * Shell meta characters expand into constants.
858 * This way, we make no recursive expansion */
859 if ((fexpm
& FEXP_NSHORTCUT
) || (res
= shortcut_expand(name
)) == NULL
)
860 res
= n_UNCONST(name
);
862 if(!(fexpm
& FEXP_NSPECIAL
)){
867 if(res
[1] == ':' && res
[2] != '\0')
872 force
= (res
[1] != '\0');
873 res
= a_shexp_findmail((force
? &res
[1] : ok_vlook(LOGNAME
)),
882 if (prevfile
[0] == '\0') {
883 n_err(_("No previous file\n"));
891 res
= ok_vlook(MBOX
);
896 /* POSIX: if *folder* unset or null, "+" shall be retained */
897 if (!(fexpm
& FEXP_NFOLDER
) && *res
== '+' &&
898 *(cp
= folder_query()) != '\0') {
899 res
= str_concat_csvl(&s
, cp
, &res
[1], NULL
)->s
;
902 /* TODO *folder* can't start with %[:], can it!?! */
903 if (res
[0] == '%' && res
[1] == ':') {
909 /* Do some meta expansions */
910 if((fexpm
& (FEXP_NSHELL
| FEXP_NVAR
)) != FEXP_NVAR
&&
911 ((fexpm
& FEXP_NSHELL
) ? (strchr(res
, '$') != NULL
)
912 : anyof(res
, "{}[]*?$"))){
915 if(fexpm
& FEXP_NOPROTO
)
917 else switch(which_protocol(res
)){
929 struct n_string shou
, *shoup
;
931 shin
.s
= n_UNCONST(res
);
933 shoup
= n_string_creat_auto(&shou
);
935 enum n_shexp_state shs
;
937 /* TODO shexp: take care to not include backtick eval once avail! */
938 shs
= n_shexp_parse_token(shoup
, &shin
, n_SHEXP_PARSE_LOG_D_V
|
939 n_SHEXP_PARSE_QUOTE_AUTO_FIXED
| n_SHEXP_PARSE_QUOTE_AUTO_DQ
|
940 n_SHEXP_PARSE_QUOTE_AUTO_CLOSE
);
941 if(shs
& n_SHEXP_STATE_STOP
)
944 res
= n_string_cp(shoup
);
945 shoup
= n_string_drop_ownership(shoup
);
949 res
= a_shexp_tilde(res
);
951 if(!(fexpm
& FEXP_NSHELL
) &&
952 (res
= a_shexp_globname(res
, fexpm
)) == NULL
)
956 }else if(res
[0] == '~'){
957 res
= a_shexp_tilde(res
);
962 if (fexpm
& FEXP_LOCAL
)
963 switch (which_protocol(res
)) {
968 n_err(_("Not a local file or directory: %s\n"),
969 n_shexp_quote_cp(name
, FAL0
));
975 if(res
!= NULL
&& !dyn
)
978 return n_UNCONST(res
);
981 FL
enum n_shexp_state
982 n_shexp_parse_token(struct n_string
*store
, struct str
*input
, /* TODO WCHAR */
983 enum n_shexp_parse_flags flags
){
988 a_SKIPQ
= 1<<0, /* Skip rest of this quote (\c0 ..) */
989 a_SURPLUS
= 1<<1, /* Extended sequence interpretation */
990 a_NTOKEN
= 1<<2 /* "New token": e.g., comments are possible */
992 enum n_shexp_state rv
;
994 char const *ib_save
, *ib
;
998 assert((flags
& n_SHEXP_PARSE_DRYRUN
) || store
!= NULL
);
999 assert(input
!= NULL
);
1000 assert(input
->l
== 0 || input
->s
!= NULL
);
1001 assert(!(flags
& n_SHEXP_PARSE_LOG
) || !(flags
& n_SHEXP_PARSE_LOG_D_V
));
1002 assert(!(flags
& n_SHEXP_PARSE_IFS_ADD_COMMA
) ||
1003 !(flags
& n_SHEXP_PARSE_IFS_IS_COMMA
));
1004 assert(!(flags
& n_SHEXP_PARSE_QUOTE_AUTO_FIXED
) ||
1005 (flags
& n__SHEXP_PARSE_QUOTE_AUTO_MASK
));
1007 if((flags
& n_SHEXP_PARSE_LOG_D_V
) && (n_poption
& n_PO_D_V
))
1008 flags
|= n_SHEXP_PARSE_LOG
;
1009 if(flags
& n_SHEXP_PARSE_QUOTE_AUTO_FIXED
)
1010 flags
|= n_SHEXP_PARSE_QUOTE_AUTO_CLOSE
;
1012 if((flags
& n_SHEXP_PARSE_TRUNC
) && store
!= NULL
)
1013 store
= n_string_trunc(store
, 0);
1016 if((il
= input
->l
) == UIZ_MAX
)
1020 if(flags
& n_SHEXP_PARSE_TRIMSPACE
){
1021 for(; il
> 0; ++ib
, --il
)
1022 if(!blankspacechar(*ib
))
1025 input
->s
= n_UNCONST(ib
);
1029 rv
= n_SHEXP_STATE_STOP
;
1034 store
= n_string_reserve(store
, n_MIN(il
, 32)); /* XXX */
1036 rv
= n_SHEXP_STATE_NONE
;
1037 switch(flags
& n__SHEXP_PARSE_QUOTE_AUTO_MASK
){
1038 case n_SHEXP_PARSE_QUOTE_AUTO_SQ
:
1042 case n_SHEXP_PARSE_QUOTE_AUTO_DQ
:
1045 case n_SHEXP_PARSE_QUOTE_AUTO_DSQ
:
1059 /* If no quote-mode active.. */
1061 if(c
== '"' || c
== '\''){
1066 state
&= ~a_SURPLUS
;
1080 }else if(c
== '\\'){
1081 /* Outside of quotes this just escapes any next character, but a sole
1082 * <backslash> at EOS is left unchanged */
1086 }else if(c
== '#' && (state
& a_NTOKEN
)){
1087 rv
|= n_SHEXP_STATE_STOP
;
1089 }else if(c
== ',' && (flags
&
1090 (n_SHEXP_PARSE_IFS_ADD_COMMA
| n_SHEXP_PARSE_IFS_IS_COMMA
)))
1092 else if(blankchar(c
)){
1093 if(!(flags
& n_SHEXP_PARSE_IFS_IS_COMMA
)){
1102 assert(!(state
& a_NTOKEN
));
1103 if(c
== quotec
&& !(flags
& n_SHEXP_PARSE_QUOTE_AUTO_FIXED
)){
1106 /* Users may need to recognize the presence of empty quotes */
1107 rv
|= n_SHEXP_STATE_OUTPUT
;
1109 }else if(c
== '\\' && (state
& a_SURPLUS
)){
1111 /* A sole <backslash> at EOS is treated as-is! This is ok since
1112 * the "closing quote" error will occur next, anyway */
1115 else if((c2
= *ib
) == quotec
){
1118 }else if(quotec
== '"'){
1120 * The <backslash> shall retain its special meaning as an
1121 * escape character (see Section 2.2.1) only when followed
1122 * by one of the following characters when considered
1123 * special: $ ` " \ <newline> */
1127 /* case '"': already handled via c2 == quotec */
1136 /* Dollar-single-quote */
1140 /* case '\'': already handled via c2 == quotec */
1145 case 'b': c
= '\b'; break;
1146 case 'f': c
= '\f'; break;
1147 case 'n': c
= '\n'; break;
1148 case 'r': c
= '\r'; break;
1149 case 't': c
= '\t'; break;
1150 case 'v': c
= '\v'; break;
1153 case 'e': c
= '\033'; break;
1155 /* Control character */
1158 goto j_dollar_ungetc
;
1162 c
= upperconv(c2
) ^ 0x40;
1163 if((ui8_t
)c
> 0x1F && c
!= 0x7F){ /* ASCII C0: 0..1F, 7F */
1164 if(flags
& n_SHEXP_PARSE_LOG
)
1165 n_err(_("Invalid \\c notation: %.*s\n"),
1166 (int)input
->l
, input
->s
);
1167 rv
|= n_SHEXP_STATE_ERR_CONTROL
;
1169 /* As an implementation-defined extension, support \c@
1170 * EQ printf(1) alike \c */
1172 rv
|= n_SHEXP_STATE_STOP
;
1177 /* Octal sequence: 1 to 3 octal bytes */
1179 /* As an extension (dependent on where you look, echo(1), or
1180 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1181 if(il
> 0 && (c
= *ib
) >= '0' && c
<= '7'){
1186 case '1': case '2': case '3':
1187 case '4': case '5': case '6': case '7':
1189 if(il
> 0 && (c
= *ib
) >= '0' && c
<= '7'){
1190 c2
= (c2
<< 3) | (c
- '0');
1193 if(il
> 0 && (c
= *ib
) >= '0' && c
<= '7'){
1194 if((ui8_t
)c2
> 0x1F){
1195 if(flags
& n_SHEXP_PARSE_LOG
)
1196 n_err(_("\\0 argument exceeds a byte: %.*s\n"),
1197 (int)input
->l
, input
->s
);
1198 rv
|= n_SHEXP_STATE_ERR_NUMBER
;
1200 /* Write unchanged */
1202 rv
|= n_SHEXP_STATE_OUTPUT
;
1203 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
1204 store
= n_string_push_buf(store
, ib_save
,
1205 PTR2SIZE(ib
- ib_save
));
1208 c2
= (c2
<< 3) | (c
-= '0');
1211 if((c
= c2
) == '\0')
1217 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1226 goto j_dollar_ungetc
;
1230 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1234 goto j_dollar_ungetc
;
1238 static ui8_t
const hexatoi
[] = { /* XXX uses ASCII */
1239 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1244 for(no
= j
= 0; i
-- > 0; --il
, ++ib
, ++j
){
1248 no
+= hexatoi
[(ui8_t
)((c
) - ((c
) <= '9' ? 48
1249 : ((c
) <= 'F' ? 55 : 87)))];
1253 c2
= (c2
== 'U' || c2
== 'u') ? 'u' : 'x';
1254 if(flags
& n_SHEXP_PARSE_LOG
)
1255 n_err(_("Invalid \\%c notation: %.*s\n"),
1256 c2
, (int)input
->l
, input
->s
);
1257 rv
|= n_SHEXP_STATE_ERR_NUMBER
;
1263 /* Unicode massage */
1264 if((c2
!= 'U' && c2
!= 'u') || n_uasciichar(no
)){
1265 if((c
= (char)no
) == '\0')
1269 else if(!(state
& a_SKIPQ
)){
1270 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
1271 store
= n_string_reserve(store
, n_MAX(j
, 4));
1274 if(no
> 0x10FFFF){ /* XXX magic; CText */
1275 if(flags
& n_SHEXP_PARSE_LOG
)
1276 n_err(_("\\U argument exceeds 0x10FFFF: %.*s\n"),
1277 (int)input
->l
, input
->s
);
1278 rv
|= n_SHEXP_STATE_ERR_NUMBER
;
1279 /* But normalize the output anyway */
1283 j
= n_utf32_to_utf8(no
, utf
);
1285 if(n_psonce
& n_PSO_UNICODE
){
1286 rv
|= n_SHEXP_STATE_OUTPUT
| n_SHEXP_STATE_UNICODE
;
1287 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
1288 store
= n_string_push_buf(store
, utf
, j
);
1295 icp
= n_iconv_onetime_cp(n_ICONV_NONE
,
1298 rv
|= n_SHEXP_STATE_OUTPUT
;
1299 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
1300 store
= n_string_push_cp(store
, icp
);
1305 if(!(flags
& n_SHEXP_PARSE_DRYRUN
)) Je_uni_norm
:{
1308 rv
|= n_SHEXP_STATE_OUTPUT
|
1309 n_SHEXP_STATE_ERR_UNICODE
;
1310 i
= snprintf(itoa
, sizeof itoa
, "\\%c%0*X",
1311 (no
> 0xFFFFu
? 'U' : 'u'),
1312 (int)(no
> 0xFFFFu
? 8 : 4), (ui32_t
)no
);
1313 store
= n_string_push_buf(store
, itoa
, i
);
1322 /* Extension: \$ can be used to expand a variable.
1323 * Bug|ad effect: if conversion fails, not written "as-is" */
1326 goto j_dollar_ungetc
;
1331 /* Follow bash behaviour, print sequence unchanged */
1336 }else if(c
== '$' && quotec
== '"' && il
> 0) J_var_expand
:{
1339 if(!(brace
= (*ib
== '{')) || il
> 1){
1340 char const *cp
, *vp
;
1346 for(i
= 0; il
> 0; --il
, ++ib
, ++i
){
1347 /* We have some special cases regarding macro-local special
1348 * parameters, so ensure these don't cause failure */
1350 if(!a_SHEXP_ISVARC(c
)){
1351 if(i
== 0 && a_SHEXP_ISVARC_SPECIAL1(c
)){
1359 if(state
& a_SKIPQ
){
1360 if(brace
&& il
> 0 && *ib
== '}')
1367 if(flags
& n_SHEXP_PARSE_LOG
)
1368 n_err(_("Bad substitution (${}): %.*s\n Near %.*s\n"),
1369 (int)input
->l
, input
->s
, (int)il
, ib
);
1370 rv
|= n_SHEXP_STATE_ERR_BADSUB
;
1376 if(il
== 0 || *ib
!= '}'){
1377 if(flags
& n_SHEXP_PARSE_LOG
)
1378 n_err(_("Missing closing brace for ${VAR}: %.*s\n"
1380 (int)input
->l
, input
->s
, (int)il
, ib
);
1381 rv
|= n_SHEXP_STATE_ERR_QUOTEOPEN
|
1382 n_SHEXP_STATE_ERR_BRACE
;
1388 if(flags
& n_SHEXP_PARSE_DRYRUN
)
1391 /* Check getenv(3) shall no internal variable exist! */
1392 /* TODO Expansion of $-* and $-@ not shell compatible, if
1393 * TODO that occurs within double quotes.
1394 * TODO Same notes on that in accmacvar.c, shexp.c */
1395 vp
= savestrbuf(vp
, i
);
1396 if((cp
= n_var_vlook(vp
, TRU1
)) != NULL
){
1397 rv
|= n_SHEXP_STATE_OUTPUT
;
1398 store
= n_string_push_cp(store
, cp
);
1399 for(; (c
= *cp
) != '\0'; ++cp
)
1401 rv
|= n_SHEXP_STATE_CONTROL
;
1408 }else if(c
== '`' && quotec
== '"' && il
> 0){ /* TODO shell command */
1413 if(!(state
& a_SKIPQ
)){
1414 rv
|= n_SHEXP_STATE_OUTPUT
;
1416 rv
|= n_SHEXP_STATE_CONTROL
;
1417 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
1418 store
= n_string_push_c(store
, c
);
1422 if(quotec
!= '\0' && !(flags
& n_SHEXP_PARSE_QUOTE_AUTO_CLOSE
)){
1423 if(flags
& n_SHEXP_PARSE_LOG
)
1424 n_err(_("no closing quote: %.*s\n"), (int)input
->l
, input
->s
);
1425 rv
|= n_SHEXP_STATE_ERR_QUOTEOPEN
;
1429 if((flags
& n_SHEXP_PARSE_DRYRUN
) && store
!= NULL
){
1430 store
= n_string_push_buf(store
, input
->s
, PTR2SIZE(ib
- input
->s
));
1431 rv
|= n_SHEXP_STATE_OUTPUT
;
1434 if(flags
& n_SHEXP_PARSE_TRIMSPACE
){
1435 for(; il
> 0; ++ib
, --il
)
1440 input
->s
= n_UNCONST(ib
);
1442 if(!(rv
& n_SHEXP_STATE_STOP
)){
1443 if(il
> 0 && !(rv
& n_SHEXP_STATE_OUTPUT
) &&
1444 (flags
& n_SHEXP_PARSE_IGNORE_EMPTY
))
1445 goto jrestart_empty
;
1446 if(/*!(rv & n_SHEXP_STATE_OUTPUT) &&*/ il
== 0)
1447 rv
|= n_SHEXP_STATE_STOP
;
1449 assert((rv
& n_SHEXP_STATE_OUTPUT
) || !(rv
& n_SHEXP_STATE_UNICODE
));
1450 assert((rv
& n_SHEXP_STATE_OUTPUT
) || !(rv
& n_SHEXP_STATE_CONTROL
));
1455 FL
enum n_shexp_state
1456 n_shexp_parse_token_buf(char **store
, char const *indat
, size_t inlen
,
1457 enum n_shexp_parse_flags flags
){
1460 enum n_shexp_state shs
;
1463 assert(store
!= NULL
);
1464 assert(inlen
== 0 || indat
!= NULL
);
1466 n_string_creat_auto(&ss
);
1467 is
.s
= n_UNCONST(indat
);
1470 shs
= n_shexp_parse_token(&ss
, &is
, flags
);
1472 shs
&= ~n_SHEXP_STATE_STOP
;
1474 shs
|= n_SHEXP_STATE_STOP
;
1475 *store
= n_string_cp(&ss
);
1476 n_string_drop_ownership(&ss
);
1483 FL
struct n_string
*
1484 n_shexp_quote(struct n_string
*store
, struct str
const *input
, bool_t rndtrip
){
1485 struct a_shexp_quote_lvl sql
;
1486 struct a_shexp_quote_ctx sqc
;
1489 assert(store
!= NULL
);
1490 assert(input
!= NULL
);
1491 assert(input
->l
== 0 || input
->s
!= NULL
);
1493 memset(&sqc
, 0, sizeof sqc
);
1494 sqc
.sqc_store
= store
;
1495 sqc
.sqc_input
.s
= input
->s
;
1496 if((sqc
.sqc_input
.l
= input
->l
) == UIZ_MAX
)
1497 sqc
.sqc_input
.l
= strlen(input
->s
);
1498 sqc
.sqc_flags
= rndtrip
? a_SHEXP_QUOTE_ROUNDTRIP
: a_SHEXP_QUOTE_NONE
;
1500 if(sqc
.sqc_input
.l
== 0)
1501 store
= n_string_push_buf(store
, "''", sizeof("''") -1);
1503 memset(&sql
, 0, sizeof sql
);
1504 sql
.sql_dat
= sqc
.sqc_input
;
1505 sql
.sql_flags
= sqc
.sqc_flags
;
1506 a_shexp__quote(&sqc
, &sql
);
1513 n_shexp_quote_cp(char const *cp
, bool_t rndtrip
){
1514 struct n_string store
;
1521 input
.s
= n_UNCONST(cp
);
1523 rv
= n_string_cp(n_shexp_quote(n_string_creat_auto(&store
), &input
,
1525 n_string_gut(n_string_drop_ownership(&store
));
1531 n_shexp_is_valid_varname(char const *name
){
1536 for(rv
= TRU1
; (c
= *name
++) != '\0';)
1537 if(!a_SHEXP_ISVARC(c
)){