1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
4 *@ TODO "Magic solidus" used as path separator.
6 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
7 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
10 * Copyright (c) 1980, 1993
11 * The Regents of the University of California. All rights reserved.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 #ifndef HAVE_AMALGAMATION
52 * Environment variable names used by the utilities in the Shell and
53 * Utilities volume of POSIX.1-2008 consist solely of uppercase
54 * letters, digits, and the <underscore> ('_') from the characters
55 * defined in Portable Character Set and do not begin with a digit.
56 * Other characters may be permitted by an implementation;
57 * applications shall tolerate the presence of such names.
58 * We do support the hyphen-minus "-" (except in last position for ${x[:]-y}).
59 * We support some special parameter names for one-letter(++) variable names;
60 * these have counterparts in the code that manages internal variables,
61 * and some more special treatment below! */
62 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
63 #define a_SHEXP_ISVARC_BAD1ST(C) (digitchar(C)) /* (Actually assumed below!) */
64 #define a_SHEXP_ISVARC_BADNST(C) ((C) == '-')
66 enum a_shexp_quote_flags
{
68 a_SHEXP_QUOTE_ROUNDTRIP
= 1u<<0, /* Result won't be consumed immediately */
70 a_SHEXP_QUOTE_T_REVSOL
= 1u<<8, /* Type: by reverse solidus */
71 a_SHEXP_QUOTE_T_SINGLE
= 1u<<9, /* Type: single-quotes */
72 a_SHEXP_QUOTE_T_DOUBLE
= 1u<<10, /* Type: double-quotes */
73 a_SHEXP_QUOTE_T_DOLLAR
= 1u<<11, /* Type: dollar-single-quotes */
74 a_SHEXP_QUOTE_T_MASK
= a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_SINGLE
|
75 a_SHEXP_QUOTE_T_DOUBLE
| a_SHEXP_QUOTE_T_DOLLAR
,
77 a_SHEXP_QUOTE__FREESHIFT
= 16u
81 struct a_shexp_glob_ctx
{
82 char const *sgc_patdat
; /* Remaining pattern (at and below level) */
84 struct n_string
*sgc_outer
; /* Resolved path up to this level */
90 struct a_shexp_quote_ctx
{
91 struct n_string
*sqc_store
; /* Result storage */
92 struct str sqc_input
; /* Input data, topmost level */
94 ui32_t sqc_cnt_single
;
95 ui32_t sqc_cnt_double
;
96 ui32_t sqc_cnt_dollar
;
97 enum a_shexp_quote_flags sqc_flags
;
101 struct a_shexp_quote_lvl
{
102 struct a_shexp_quote_lvl
*sql_link
; /* Outer level */
103 struct str sql_dat
; /* This level (has to) handle(d) */
104 enum a_shexp_quote_flags sql_flags
;
108 /* Locate the user's mailbox file (where new, unread mail is queued) */
109 static char *a_shexp_findmail(char const *user
, bool_t force
);
111 /* Expand ^~/? and ^~USER/? constructs.
112 * Returns the completely resolved (maybe empty or identical to input)
113 * salloc()ed string */
114 static char *a_shexp_tilde(char const *s
);
116 /* Perform fnmatch(3). May return NULL on error */
117 static char *a_shexp_globname(char const *name
, enum fexp_mode fexpm
);
119 static bool_t
a_shexp__glob(struct a_shexp_glob_ctx
*sgcp
,
120 struct n_strlist
**slpp
);
121 static int a_shexp__globsort(void const *cvpa
, void const *cvpb
);
124 /* Parse an input string and create a sh(1)ell-quoted result */
125 static void a_shexp__quote(struct a_shexp_quote_ctx
*sqcp
,
126 struct a_shexp_quote_lvl
*sqlp
);
129 a_shexp_findmail(char const *user
, bool_t force
){
135 if((cp
= ok_vlook(inbox
)) != NULL
&& *cp
!= '\0'){
136 /* _NFOLDER extra introduced to avoid % recursion loops */
137 if((rv
= fexpand(cp
, FEXP_NSPECIAL
| FEXP_NFOLDER
| FEXP_NSHELL
)
140 n_err(_("*inbox* expansion failed, using $MAIL / built-in: %s\n"), cp
);
142 /* Heirloom compatibility: an IMAP *folder* becomes "%" */
144 else if(cp
== NULL
&& !strcmp(user
, ok_vlook(LOGNAME
)) &&
145 which_protocol(cp
= n_folder_query(), FAL0
, FAL0
, NULL
)
147 /* TODO Compat handling of *folder* with IMAP! */
148 n_OBSOLETE("no more expansion of *folder* in \"%\": "
149 "please set *inbox*");
155 if((cp
= ok_vlook(MAIL
)) != NULL
){
164 ul
= strlen(user
) +1;
165 i
= sizeof(VAL_MAIL
) -1 + 1 + ul
;
168 memcpy(rv
, VAL_MAIL
, (i
= sizeof(VAL_MAIL
) -1));
170 memcpy(&rv
[++i
], user
, ul
);
178 a_shexp_tilde(char const *s
){
185 if(*(rp
= &s
[1]) == '/' || *rp
== '\0'){
189 if((rp
= strchr(np
= rp
, '/')) != NULL
){
190 nl
= PTR2SIZE(rp
- np
);
191 np
= savestrbuf(np
, nl
);
196 if((pwp
= getpwnam(np
)) == NULL
){
204 rv
= salloc(nl
+ 1 + rl
+1);
207 memcpy(rv
+ nl
, rp
, rl
);
217 a_shexp_globname(char const *name
, enum fexp_mode fexpm
){
219 struct a_shexp_glob_ctx sgc
;
220 struct n_string outer
;
221 struct n_strlist
*slp
;
225 memset(&sgc
, 0, sizeof sgc
);
226 sgc
.sgc_patlen
= strlen(name
);
227 sgc
.sgc_patdat
= savestrbuf(name
, sgc
.sgc_patlen
);
228 sgc
.sgc_outer
= n_string_reserve(n_string_creat(&outer
), sgc
.sgc_patlen
);
229 sgc
.sgc_flags
= ((fexpm
& FEXP_SILENT
) != 0);
231 if(a_shexp__glob(&sgc
, &slp
))
235 n_string_gut(&outer
);
241 cp
= n_UNCONST(N_("File pattern does not match"));
243 }else if(slp
->sl_next
== NULL
)
244 cp
= savestrbuf(slp
->sl_dat
, slp
->sl_len
);
245 else if(fexpm
& FEXP_MULTIOK
){
246 struct n_strlist
**sorta
, *xslp
;
250 for(xslp
= slp
; xslp
!= NULL
; xslp
= xslp
->sl_next
){
252 l
+= xslp
->sl_len
+ 1;
255 sorta
= smalloc(sizeof(*sorta
) * no
);
257 for(xslp
= slp
; xslp
!= NULL
; xslp
= xslp
->sl_next
)
259 qsort(sorta
, no
, sizeof *sorta
, &a_shexp__globsort
);
263 for(i
= 0; i
< no
; ++i
){
265 memcpy(&cp
[l
], xslp
->sl_dat
, xslp
->sl_len
);
272 n_pstate
|= n_PS_EXPAND_MULTIRESULT
;
274 cp
= n_UNCONST(N_("File pattern matches multiple results"));
280 struct n_strlist
*tmp
= slp
;
289 if(!(fexpm
& FEXP_SILENT
)){
290 name
= n_shexp_quote_cp(name
, FAL0
);
291 n_err("%s: %s\n", V_(cp
), name
);
296 #else /* HAVE_FNMATCH */
299 if(!(fexpm
& FEXP_SILENT
))
300 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
301 return savestr(name
);
307 a_shexp__glob(struct a_shexp_glob_ctx
*sgcp
, struct n_strlist
**slpp
){
308 enum{a_SILENT
= 1<<0, a_DEEP
=1<<1, a_SALLOC
=1<<2};
310 struct a_shexp_glob_ctx nsgc
;
314 char const *ccp
, *myp
;
317 /* We need some special treatment for the outermost level.
318 * All along our way, normalize path separators */
319 if(!(sgcp
->sgc_flags
& a_DEEP
)){
320 if(sgcp
->sgc_patlen
> 0 && sgcp
->sgc_patdat
[0] == '/'){
321 myp
= n_string_cp(n_string_push_c(sgcp
->sgc_outer
, '/'));
324 while(--sgcp
->sgc_patlen
> 0 && sgcp
->sgc_patdat
[0] == '/');
328 myp
= n_string_cp(sgcp
->sgc_outer
);
329 old_outerlen
= sgcp
->sgc_outer
->s_len
;
331 /* Separate current directory/pattern level from any possible remaining
332 * pattern in order to be able to use it for fnmatch(3) */
333 if((ccp
= memchr(sgcp
->sgc_patdat
, '/', sgcp
->sgc_patlen
)) == NULL
)
337 nsgc
.sgc_flags
|= a_DEEP
;
338 sgcp
->sgc_patlen
= PTR2SIZE((nsgc
.sgc_patdat
= &ccp
[1]) -
339 &sgcp
->sgc_patdat
[0]);
340 nsgc
.sgc_patlen
-= sgcp
->sgc_patlen
;
342 /* Trim solidus, everywhere */
343 if(sgcp
->sgc_patlen
> 0){
344 assert(sgcp
->sgc_patdat
[sgcp
->sgc_patlen
-1] == '/');
345 ((char*)n_UNCONST(sgcp
->sgc_patdat
))[--sgcp
->sgc_patlen
] = '\0';
347 while(nsgc
.sgc_patlen
> 0 && nsgc
.sgc_patdat
[0] == '/'){
353 /* Our current directory level */
354 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
355 * xxx be the (sole) result depending on pattern surroundings, etc. */
356 if((dp
= opendir(myp
)) == NULL
){
359 switch((err
= n_err_no
)){
361 ccp
= N_("cannot access paths under non-directory");
364 ccp
= N_("path component of (sub)pattern non-existent");
367 ccp
= N_("file permission for file (sub)pattern denied");
371 ccp
= N_("file descriptor limit reached, cannot open directory");
374 ccp
= N_("cannot open path component as directory");
379 /* As necessary, quote bytes in the current pattern TODO This will not
380 * TODO truly work out in case the user would try to quote a character
381 * TODO class, for example: in "\[a-z]" the "\" would be doubled! For that
382 * TODO to work out, we need the original user input or the shell-expression
383 * TODO parse tree, otherwise we do not know what is desired! */
389 for(need
= FAL0
, i
= 0, myp
= sgcp
->sgc_patdat
; *myp
!= '\0'; ++myp
)
391 case '\'': case '"': case '\\': case '$':
403 for(i
= 0, myp
= sgcp
->sgc_patdat
; *myp
!= '\0'; ++myp
)
405 case '\'': case '"': case '\\': case '$':
416 myp
= sgcp
->sgc_patdat
;
419 while((dep
= readdir(dp
)) != NULL
){
420 switch(fnmatch(myp
, dep
->d_name
, FNM_PATHNAME
| FNM_PERIOD
)){
422 /* A match expresses the desire to recurse if there is more pattern */
423 if(nsgc
.sgc_patlen
> 0){
426 n_string_push_cp((sgcp
->sgc_outer
->s_len
> 1
427 ? n_string_push_c(sgcp
->sgc_outer
, '/') : sgcp
->sgc_outer
),
431 #ifdef HAVE_DIRENT_TYPE
432 if(dep
->d_type
== DT_DIR
)
434 else if(dep
->d_type
== DT_LNK
|| dep
->d_type
== DT_UNKNOWN
)
439 if(stat(n_string_cp(sgcp
->sgc_outer
), &sb
)){
440 ccp
= N_("I/O error when querying file status");
442 }else if(S_ISDIR(sb
.st_mode
))
446 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
447 * TODO Instead save away a list of such n_string's for later */
448 if(isdir
&& !a_shexp__glob(&nsgc
, slpp
)){
453 n_string_trunc(sgcp
->sgc_outer
, old_outerlen
);
455 struct n_strlist
*slp
;
458 i
= strlen(dep
->d_name
);
459 j
= (old_outerlen
> 0) ? old_outerlen
+ 1 + i
: i
;
460 slp
= n_STRLIST_ALLOC(j
);
462 slpp
= &slp
->sl_next
;
464 if((j
= old_outerlen
) > 0){
465 memcpy(&slp
->sl_dat
[0], sgcp
->sgc_outer
->s_dat
, j
);
466 if(slp
->sl_dat
[j
-1] != '/')
467 slp
->sl_dat
[j
++] = '/';
469 memcpy(&slp
->sl_dat
[j
], dep
->d_name
, i
);
470 slp
->sl_dat
[j
+= i
] = '\0';
477 ccp
= N_("fnmatch(3) cannot handle file (sub)pattern");
487 return (ccp
== NULL
);
490 if(!(sgcp
->sgc_flags
& a_SILENT
)){
493 if(sgcp
->sgc_outer
->s_len
> 0){
494 s2
= n_shexp_quote_cp(n_string_cp(sgcp
->sgc_outer
), FAL0
);
499 n_err("%s: %s%s%s\n", V_(ccp
), s2
, s3
,
500 n_shexp_quote_cp(sgcp
->sgc_patdat
, FAL0
));
506 a_shexp__globsort(void const *cvpa
, void const *cvpb
){
508 struct n_strlist
const * const *slpa
, * const *slpb
;
513 rv
= asccasecmp((*slpa
)->sl_dat
, (*slpb
)->sl_dat
);
517 #endif /* HAVE_FNMATCH */
520 a_shexp__quote(struct a_shexp_quote_ctx
*sqcp
, struct a_shexp_quote_lvl
*sqlp
){
521 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
522 * XXX use the recursive implementation because of stateful encodings.
523 * XXX I.e., if a quoted substring cannot be self-contained - the data after
524 * XXX the quote relies on "the former state", then this doesn't make sense.
525 * XXX Therefore this is not fully programmed out but instead only detects
526 * XXX the "most fancy" quoting necessary, and directly does that.
527 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
528 * XXX Otherwise we rather have to convert to wide first and act on that,
529 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
530 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
531 #ifdef a_SHEXP_QUOTE_RECURSE
532 # define jrecurse jrecurse
533 struct a_shexp_quote_lvl sql
;
535 # define jrecurse jstep
537 struct n_visual_info_ctx vic
;
538 union {struct a_shexp_quote_lvl
*head
; struct n_string
*store
;} u
;
541 char const *ib
, *ib_base
;
544 ib_base
= ib
= sqlp
->sql_dat
.s
;
545 il
= sqlp
->sql_dat
.l
;
546 flags
= sqlp
->sql_flags
;
548 /* Iterate over the entire input, classify characters and type of quotes
549 * along the way. Whenever a quote change has to be applied, adjust flags
550 * for the new situation -, setup sql.* and recurse- */
556 if(flags
& a_SHEXP_QUOTE_T_DOLLAR
)
558 if(c
== '\t' && (flags
& (a_SHEXP_QUOTE_T_REVSOL
|
559 a_SHEXP_QUOTE_T_SINGLE
| a_SHEXP_QUOTE_T_DOUBLE
)))
561 #ifdef a_SHEXP_QUOTE_RECURSE
562 ++sqcp
->sqc_cnt_dollar
;
564 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_DOLLAR
;
566 }else if(blankspacechar(c
) || c
== '|' || c
== '&' || c
== ';' ||
567 /* Whereas we don't support those, quote them for the sh(1)ell */
568 c
== '(' || c
== ')' || c
== '<' || c
== '>' ||
569 c
== '"' || c
== '$'){
570 if(flags
& a_SHEXP_QUOTE_T_MASK
)
572 #ifdef a_SHEXP_QUOTE_RECURSE
573 ++sqcp
->sqc_cnt_single
;
575 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_SINGLE
;
578 if(flags
& (a_SHEXP_QUOTE_T_MASK
& ~a_SHEXP_QUOTE_T_SINGLE
))
580 #ifdef a_SHEXP_QUOTE_RECURSE
581 ++sqcp
->sqc_cnt_dollar
;
583 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_DOLLAR
;
585 }else if(c
== '\\' || (c
== '#' && ib
== ib_base
)){
586 if(flags
& a_SHEXP_QUOTE_T_MASK
)
588 #ifdef a_SHEXP_QUOTE_RECURSE
589 ++sqcp
->sqc_cnt_single
;
591 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_SINGLE
;
593 }else if(!asciichar(c
)){
594 /* Need to keep together multibytes */
595 #ifdef a_SHEXP_QUOTE_RECURSE
596 memset(&vic
, 0, sizeof vic
);
600 n_VISUAL_INFO_ONE_CHAR
| n_VISUAL_INFO_SKIP_ERRORS
);
602 /* xxx check whether resulting \u would be ASCII */
603 if(!(flags
& a_SHEXP_QUOTE_ROUNDTRIP
) ||
604 (flags
& a_SHEXP_QUOTE_T_DOLLAR
)){
605 #ifdef a_SHEXP_QUOTE_RECURSE
613 #ifdef a_SHEXP_QUOTE_RECURSE
614 ++sqcp
->sqc_cnt_dollar
;
616 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_DOLLAR
;
622 sqlp
->sql_flags
= flags
;
624 /* Level made the great and completed processing input. Reverse the list of
625 * levels, detect the "most fancy" quote type needed along this way */
626 /* XXX Due to restriction as above very crude */
627 for(flags
= 0, il
= 0, u
.head
= NULL
; sqlp
!= NULL
;){
628 struct a_shexp_quote_lvl
*tmp
;
630 tmp
= sqlp
->sql_link
;
631 sqlp
->sql_link
= u
.head
;
633 il
+= sqlp
->sql_dat
.l
;
634 if(sqlp
->sql_flags
& a_SHEXP_QUOTE_T_MASK
)
635 il
+= (sqlp
->sql_dat
.l
>> 1);
636 flags
|= sqlp
->sql_flags
;
641 /* Finally work the substrings in the correct order, adjusting quotes along
642 * the way as necessary. Start off with the "most fancy" quote, so that
643 * the user sees an overall boundary she can orientate herself on.
644 * We do it like that to be able to give the user some "encapsulation
645 * experience", to address what strikes me is a problem of sh(1)ell quoting:
646 * different to, e.g., perl(1), where you see at a glance where a string
647 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
648 * visual appearance of "a string" as such */
649 u
.store
= n_string_reserve(sqcp
->sqc_store
, il
);
651 if(flags
& a_SHEXP_QUOTE_T_DOLLAR
){
652 u
.store
= n_string_push_buf(u
.store
, "$'", sizeof("$'") -1);
653 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_DOLLAR
;
654 }else if(flags
& a_SHEXP_QUOTE_T_DOUBLE
){
655 u
.store
= n_string_push_c(u
.store
, '"');
656 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_DOUBLE
;
657 }else if(flags
& a_SHEXP_QUOTE_T_SINGLE
){
658 u
.store
= n_string_push_c(u
.store
, '\'');
659 flags
= (flags
& ~a_SHEXP_QUOTE_T_MASK
) | a_SHEXP_QUOTE_T_SINGLE
;
660 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
661 flags
&= ~a_SHEXP_QUOTE_T_MASK
;
663 /* Work all the levels */
664 for(; sqlp
!= NULL
; sqlp
= sqlp
->sql_link
){
665 /* As necessary update our mode of quoting */
666 #ifdef a_SHEXP_QUOTE_RECURSE
669 switch(sqlp
->sql_flags
& a_SHEXP_QUOTE_T_MASK
){
670 case a_SHEXP_QUOTE_T_DOLLAR
:
671 if(!(flags
& a_SHEXP_QUOTE_T_DOLLAR
))
672 il
= a_SHEXP_QUOTE_T_DOLLAR
;
674 case a_SHEXP_QUOTE_T_DOUBLE
:
675 if(!(flags
& (a_SHEXP_QUOTE_T_DOUBLE
| a_SHEXP_QUOTE_T_DOLLAR
)))
676 il
= a_SHEXP_QUOTE_T_DOLLAR
;
678 case a_SHEXP_QUOTE_T_SINGLE
:
679 if(!(flags
& (a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_SINGLE
|
680 a_SHEXP_QUOTE_T_DOUBLE
| a_SHEXP_QUOTE_T_DOLLAR
)))
681 il
= a_SHEXP_QUOTE_T_SINGLE
;
684 case a_SHEXP_QUOTE_T_REVSOL
:
685 if(!(flags
& (a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_SINGLE
|
686 a_SHEXP_QUOTE_T_DOUBLE
| a_SHEXP_QUOTE_T_DOLLAR
)))
687 il
= a_SHEXP_QUOTE_T_REVSOL
;
692 if(flags
& (a_SHEXP_QUOTE_T_SINGLE
| a_SHEXP_QUOTE_T_DOLLAR
))
693 u
.store
= n_string_push_c(u
.store
, '\'');
694 else if(flags
& a_SHEXP_QUOTE_T_DOUBLE
)
695 u
.store
= n_string_push_c(u
.store
, '"');
696 flags
&= ~a_SHEXP_QUOTE_T_MASK
;
699 if(flags
& a_SHEXP_QUOTE_T_DOLLAR
)
700 u
.store
= n_string_push_buf(u
.store
, "$'", sizeof("$'") -1);
701 else if(flags
& a_SHEXP_QUOTE_T_DOUBLE
)
702 u
.store
= n_string_push_c(u
.store
, '"');
703 else if(flags
& a_SHEXP_QUOTE_T_SINGLE
)
704 u
.store
= n_string_push_c(u
.store
, '\'');
706 #endif /* a_SHEXP_QUOTE_RECURSE */
708 /* Work the level's substring */
709 ib
= sqlp
->sql_dat
.s
;
710 il
= sqlp
->sql_dat
.l
;
718 assert(c
== '\t' || (flags
& a_SHEXP_QUOTE_T_DOLLAR
));
719 assert((flags
& (a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_SINGLE
|
720 a_SHEXP_QUOTE_T_DOUBLE
| a_SHEXP_QUOTE_T_DOLLAR
)));
722 case 0x07: c
= 'a'; break;
723 case 0x08: c
= 'b'; break;
724 case 0x0A: c
= 'n'; break;
725 case 0x0B: c
= 'v'; break;
726 case 0x0C: c
= 'f'; break;
727 case 0x0D: c
= 'r'; break;
728 case 0x1B: c
= 'E'; break;
731 if(flags
& a_SHEXP_QUOTE_T_DOLLAR
){
735 if(flags
& a_SHEXP_QUOTE_T_REVSOL
)
736 u
.store
= n_string_push_c(u
.store
, '\\');
739 u
.store
= n_string_push_c(u
.store
, '\\');
741 u
.store
= n_string_push_c(u
.store
, 'c');
745 }else if(blankspacechar(c
) || c
== '|' || c
== '&' || c
== ';' ||
746 /* Whereas we don't support those, quote them for the sh(1)ell */
747 c
== '(' || c
== ')' || c
== '<' || c
== '>' ||
748 c
== '"' || c
== '$'){
749 if(flags
& (a_SHEXP_QUOTE_T_SINGLE
| a_SHEXP_QUOTE_T_DOLLAR
))
751 assert(flags
& (a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_DOUBLE
));
752 u
.store
= n_string_push_c(u
.store
, '\\');
755 if(flags
& a_SHEXP_QUOTE_T_DOUBLE
)
757 assert(!(flags
& a_SHEXP_QUOTE_T_SINGLE
));
758 u
.store
= n_string_push_c(u
.store
, '\\');
760 }else if(c
== '\\' || (c
== '#' && ib
== ib_base
)){
761 if(flags
& a_SHEXP_QUOTE_T_SINGLE
)
763 assert(flags
& (a_SHEXP_QUOTE_T_REVSOL
| a_SHEXP_QUOTE_T_DOUBLE
|
764 a_SHEXP_QUOTE_T_DOLLAR
));
765 u
.store
= n_string_push_c(u
.store
, '\\');
767 }else if(asciichar(c
)){
768 /* Shorthand: we can simply push that thing out */
770 u
.store
= n_string_push_c(u
.store
, c
);
773 /* Not an ASCII character, take care not to split up multibyte
774 * sequences etc. For the sake of compile testing, don't enwrap in
775 * HAVE_ALWAYS_UNICODE_LOCALE || HAVE_NATCH_CHAR */
776 if(n_psonce
& n_PSO_UNICODE
){
783 if((uc
= n_utf8_to_utf32(&ib2
, &il2
)) != UI32_MAX
){
787 il2
= PTR2SIZE(&ib2
[0] - &ib
[0]);
788 if((flags
& a_SHEXP_QUOTE_ROUNDTRIP
) || uc
== 0xFFFDu
){
789 /* Use padding to make ambiguities impossible */
790 il3
= snprintf(itoa
, sizeof itoa
, "\\%c%0*X",
791 (uc
> 0xFFFFu
? 'U' : 'u'),
792 (int)(uc
> 0xFFFFu
? 8 : 4), uc
);
798 u
.store
= n_string_push_buf(u
.store
, cp
, il3
);
799 ib
+= il2
, il
-= il2
;
804 memset(&vic
, 0, sizeof vic
);
808 n_VISUAL_INFO_ONE_CHAR
| n_VISUAL_INFO_SKIP_ERRORS
);
810 /* Work this substring as sensitive as possible */
812 if(!(flags
& a_SHEXP_QUOTE_ROUNDTRIP
))
813 u
.store
= n_string_push_buf(u
.store
, ib
, il
);
815 else if((vic
.vic_indat
= n_iconv_onetime_cp(n_ICONV_NONE
,
816 "utf-8", ok_vlook(ttycharset
), savestrbuf(ib
, il
))) != NULL
){
821 il2
= strlen(ib2
= vic
.vic_indat
);
822 if((uc
= n_utf8_to_utf32(&ib2
, &il2
)) != UI32_MAX
){
825 il2
= PTR2SIZE(&ib2
[0] - &vic
.vic_indat
[0]);
826 /* Use padding to make ambiguities impossible */
827 il3
= snprintf(itoa
, sizeof itoa
, "\\%c%0*X",
828 (uc
> 0xFFFFu
? 'U' : 'u'),
829 (int)(uc
> 0xFFFFu
? 8 : 4), uc
);
830 u
.store
= n_string_push_buf(u
.store
, itoa
, il3
);
840 u
.store
= n_string_push_buf(u
.store
, "\\xFF",
842 n_c_to_hex_base16(&u
.store
->s_dat
[u
.store
->s_len
- 2], *ib
++);
851 /* Close an open quote */
852 if(flags
& (a_SHEXP_QUOTE_T_SINGLE
| a_SHEXP_QUOTE_T_DOLLAR
))
853 u
.store
= n_string_push_c(u
.store
, '\'');
854 else if(flags
& a_SHEXP_QUOTE_T_DOUBLE
)
855 u
.store
= n_string_push_c(u
.store
, '"');
856 #ifdef a_SHEXP_QUOTE_RECURSE
862 #ifdef a_SHEXP_QUOTE_RECURSE
864 sqlp
->sql_dat
.l
-= il
;
867 sql
.sql_dat
.s
= n_UNCONST(ib
);
869 sql
.sql_flags
= flags
;
870 a_shexp__quote(sqcp
, &sql
);
875 #undef a_SHEXP_QUOTE_RECURSE
879 fexpand(char const *name
, enum fexp_mode fexpm
) /* TODO in parts: -> URL::!! */
882 char const *res
, *cp
;
883 bool_t dyn
, haveproto
;
886 n_pstate
&= ~n_PS_EXPAND_MULTIRESULT
;
889 /* The order of evaluation is "%" and "#" expand into constants.
890 * "&" can expand into "+". "+" can expand into shell meta characters.
891 * Shell meta characters expand into constants.
892 * This way, we make no recursive expansion */
893 if((fexpm
& FEXP_NSHORTCUT
) || (res
= shortcut_expand(name
)) == NULL
)
894 res
= n_UNCONST(name
);
897 n_UNINIT(proto
.s
, NULL
), n_UNINIT(proto
.l
, 0);
899 for(cp
= res
; *cp
&& *cp
!= ':'; ++cp
)
902 if(cp
[0] == ':' && cp
[1] == '/' && cp
[2] == '/'){
904 proto
.s
= n_UNCONST(res
);
906 proto
.l
= PTR2SIZE(cp
- res
);
911 if(!(fexpm
& FEXP_NSPECIAL
)){
916 if(res
[1] == ':' && res
[2] != '\0'){
922 force
= (res
[1] != '\0');
923 res
= a_shexp_findmail((force
? &res
[1] : ok_vlook(LOGNAME
)),
932 if (prevfile
[0] == '\0') {
933 n_err(_("No previous file\n"));
941 res
= ok_vlook(MBOX
);
949 if(res
[0] == '@' && which_protocol(mailname
, FAL0
, FAL0
, NULL
)
951 res
= str_concat_csvl(&s
, protbase(mailname
), "/", &res
[1], NULL
)->s
;
956 /* POSIX: if *folder* unset or null, "+" shall be retained */
957 if(!(fexpm
& FEXP_NFOLDER
) && *res
== '+' &&
958 *(cp
= n_folder_query()) != '\0'){
959 res
= str_concat_csvl(&s
, cp
, &res
[1], NULL
)->s
;
963 /* Do some meta expansions */
964 if((fexpm
& (FEXP_NSHELL
| FEXP_NVAR
)) != FEXP_NVAR
&&
965 ((fexpm
& FEXP_NSHELL
) ? (strchr(res
, '$') != NULL
)
966 : n_anyof_cp("{}[]*?$", res
))){
969 if(fexpm
& FEXP_NOPROTO
)
972 cp
= haveproto
? savecat(savestrbuf(proto
.s
, proto
.l
), res
) : res
;
974 switch(which_protocol(cp
, TRU1
, FAL0
, NULL
)){
987 struct n_string shou
, *shoup
;
989 shin
.s
= n_UNCONST(res
);
991 shoup
= n_string_creat_auto(&shou
);
993 enum n_shexp_state shs
;
995 /* TODO shexp: take care to not include backtick eval once avail! */
996 shs
= n_shexp_parse_token((n_SHEXP_PARSE_LOG_D_V
|
997 n_SHEXP_PARSE_QUOTE_AUTO_FIXED
| n_SHEXP_PARSE_QUOTE_AUTO_DQ
|
998 n_SHEXP_PARSE_QUOTE_AUTO_CLOSE
), shoup
, &shin
, NULL
);
999 if(shs
& n_SHEXP_STATE_STOP
)
1002 res
= n_string_cp(shoup
);
1003 /*shoup = n_string_drop_ownership(shoup);*/
1007 res
= a_shexp_tilde(res
);
1009 if(!(fexpm
& FEXP_NSHELL
) &&
1010 (res
= a_shexp_globname(res
, fexpm
)) == NULL
)
1013 }/* else no tilde */
1014 }else if(res
[0] == '~'){
1015 res
= a_shexp_tilde(res
);
1020 if(res
!= NULL
&& haveproto
){
1021 res
= savecat(savestrbuf(proto
.s
, proto
.l
), res
);
1025 if(fexpm
& FEXP_LOCAL
){
1026 switch (which_protocol(res
, FAL0
, FAL0
, NULL
)) {
1028 case PROTO_MAILDIR
: /* Cannot happen since we don't stat(2), but.. */
1031 n_err(_("Not a local file or directory: %s\n"),
1032 n_shexp_quote_cp(name
, FAL0
));
1039 if(res
!= NULL
&& !dyn
)
1042 return n_UNCONST(res
);
1045 FL
enum n_shexp_state
1046 n_shexp_parse_token(enum n_shexp_parse_flags flags
, struct n_string
*store
,
1047 struct str
*input
, void const **cookie
){
1048 /* TODO shexp_parse_token: WCHAR
1049 * TODO This needs to be rewritten in order to support $(( )) and $( )
1050 * TODO and ${xyYZ} and the possibly infinite recursion they bring along,
1051 * TODO too. We need a carrier struct, then, and can nicely split this
1052 * TODO big big thing up in little pieces!
1053 * TODO This means it should produce a tree of objects, so that callees
1054 * TODO can recognize whether something happened inside single/double etc.
1055 * TODO quotes; e.g., to requote "'[a-z]'" to, e.g., "\[a-z]", etc.! */
1056 ui32_t last_known_meta_trim_len
;
1057 char c2
, c
, quotec
, utf
[8];
1058 enum n_shexp_state rv
;
1060 char const *ifs
, *ifs_ws
, *ib_save
, *ib
;
1063 a_SKIPQ
= 1u<<0, /* Skip rest of this quote (\u0 ..) */
1064 a_SKIPT
= 1u<<1, /* Skip entire token (\c@) */
1065 a_SKIPMASK
= a_SKIPQ
| a_SKIPT
,
1066 a_SURPLUS
= 1u<<2, /* Extended sequence interpretation */
1067 a_NTOKEN
= 1u<<3, /* "New token": e.g., comments are possible */
1068 a_BRACE
= 1u<<4, /* Variable substitution: brace enclosed */
1069 a_DIGIT1
= 1u<<5, /* ..first character was digit */
1070 a_NONDIGIT
= 1u<<6, /* ..has seen any non-digits */
1071 a_VARSUBST_MASK
= n_BITENUM_MASK(4, 6),
1073 a_ROUND_MASK
= a_SKIPT
| (int)~n_BITENUM_MASK(0, 7),
1076 a_CONSUME
= 1u<<10, /* When done, "consume" remaining input */
1081 assert((flags
& n_SHEXP_PARSE_DRYRUN
) || store
!= NULL
);
1082 assert(input
!= NULL
);
1083 assert(input
->l
== 0 || input
->s
!= NULL
);
1084 assert(!(flags
& n_SHEXP_PARSE_LOG
) || !(flags
& n_SHEXP_PARSE_LOG_D_V
));
1085 assert(!(flags
& n_SHEXP_PARSE_IFS_ADD_COMMA
) ||
1086 !(flags
& n_SHEXP_PARSE_IFS_IS_COMMA
));
1087 assert(!(flags
& n_SHEXP_PARSE_QUOTE_AUTO_FIXED
) ||
1088 (flags
& n__SHEXP_PARSE_QUOTE_AUTO_MASK
));
1090 if((flags
& n_SHEXP_PARSE_LOG_D_V
) && (n_poption
& n_PO_D_V
))
1091 flags
|= n_SHEXP_PARSE_LOG
;
1092 if(flags
& n_SHEXP_PARSE_QUOTE_AUTO_FIXED
)
1093 flags
|= n_SHEXP_PARSE_QUOTE_AUTO_CLOSE
;
1095 if((flags
& n_SHEXP_PARSE_TRUNC
) && store
!= NULL
)
1096 store
= n_string_trunc(store
, 0);
1098 if(flags
& (n_SHEXP_PARSE_IFS_VAR
| n_SHEXP_PARSE_TRIM_IFSSPACE
)){
1099 ifs
= ok_vlook(ifs
);
1100 ifs_ws
= ok_vlook(ifs_ws
);
1102 n_UNINIT(ifs
, n_empty
);
1103 n_UNINIT(ifs_ws
, n_empty
);
1108 if((il
= input
->l
) == UIZ_MAX
)
1109 input
->l
= il
= strlen(ib
);
1112 if(cookie
!= NULL
&& *cookie
!= NULL
){
1113 assert(!(flags
& n_SHEXP_PARSE_DRYRUN
));
1117 rv
= n_SHEXP_STATE_NONE
;
1119 rv
&= n_SHEXP_STATE_WS_LEAD
;
1120 state
&= a_ROUND_MASK
;
1122 /* In cookie mode, the next ARGV entry is the token already, unchanged,
1123 * since it has already been expanded before! */
1124 if(state
& a_COOKIE
){
1125 char const * const *xcookie
, *cp
;
1129 if((store
= n_string_push_cp(store
, *xcookie
))->s_len
> 0)
1130 rv
|= n_SHEXP_STATE_OUTPUT
;
1131 if(*++xcookie
== NULL
){
1134 flags
|= n_SHEXP_PARSE_QUOTE_AUTO_DQ
; /* ..why we are here! */
1136 *cookie
= n_UNCONST(xcookie
);
1138 for(cp
= &n_string_cp(store
)[i
]; (c
= *cp
++) != '\0';)
1140 rv
|= n_SHEXP_STATE_CONTROL
;
1144 /* The last exploded cookie will join with the yielded input token, so
1145 * simply fall through in this case */
1146 if(state
& a_COOKIE
)
1150 if(flags
& n_SHEXP_PARSE_TRIM_SPACE
){
1151 for(; il
> 0; ++ib
, --il
){
1152 if(!blankspacechar(*ib
))
1154 rv
|= n_SHEXP_STATE_WS_LEAD
;
1158 if(flags
& n_SHEXP_PARSE_TRIM_IFSSPACE
){
1159 for(; il
> 0; ++ib
, --il
){
1160 if(strchr(ifs_ws
, *ib
) == NULL
)
1162 rv
|= n_SHEXP_STATE_WS_LEAD
;
1166 input
->s
= n_UNCONST(ib
);
1171 rv
|= n_SHEXP_STATE_STOP
;
1176 store
= n_string_reserve(store
, n_MIN(il
, 32)); /* XXX */
1178 switch(flags
& n__SHEXP_PARSE_QUOTE_AUTO_MASK
){
1179 case n_SHEXP_PARSE_QUOTE_AUTO_SQ
:
1181 rv
|= n_SHEXP_STATE_QUOTE
;
1183 case n_SHEXP_PARSE_QUOTE_AUTO_DQ
:
1186 case n_SHEXP_PARSE_QUOTE_AUTO_DSQ
:
1189 rv
|= n_SHEXP_STATE_QUOTE
;
1198 /* TODO n_SHEXP_PARSE_META_SEMICOLON++, well, hack: we are not the shell,
1199 * TODO we are not a language, and therefore the general *ifs-ws* and normal
1200 * TODO whitespace trimming that input lines undergo (in a_go_evaluate())
1201 * TODO has already happened, our result will be used *as is*, and therefore
1202 * TODO we need to be aware of and remove trailing unquoted WS that would
1203 * TODO otherwise remain, after we have seen a semicolon sequencer.
1204 * By sheer luck we only need to track this in non-quote-mode */
1205 last_known_meta_trim_len
= UI32_MAX
;
1207 while(il
> 0){ /* {{{ */
1210 /* If no quote-mode active.. */
1212 if(c
== '"' || c
== '\''){
1217 state
&= ~a_SURPLUS
;
1219 last_known_meta_trim_len
= UI32_MAX
;
1220 rv
|= n_SHEXP_STATE_QUOTE
;
1225 last_known_meta_trim_len
= UI32_MAX
;
1230 rv
|= n_SHEXP_STATE_QUOTE
;
1235 }else if(c
== '\\'){
1236 /* Outside of quotes this just escapes any next character, but a sole
1237 * <reverse solidus> at EOS is left unchanged */
1241 last_known_meta_trim_len
= UI32_MAX
;
1243 /* A comment may it be if no token has yet started */
1244 else if(c
== '#' && (state
& a_NTOKEN
)){
1245 rv
|= n_SHEXP_STATE_STOP
;
1246 /*last_known_meta_trim_len = UI32_MAX;*/
1249 /* Metacharacters which separate tokens must be turned on explicitly */
1250 else if(c
== '|' && (flags
& n_SHEXP_PARSE_META_VERTBAR
)){
1251 rv
|= n_SHEXP_STATE_META_VERTBAR
;
1253 /* The parsed sequence may be _the_ output, so ensure we don't
1254 * include the metacharacter, then. */
1255 if(flags
& (n_SHEXP_PARSE_DRYRUN
| n_SHEXP_PARSE_META_KEEP
))
1257 /*last_known_meta_trim_len = UI32_MAX;*/
1259 }else if(c
== '&' && (flags
& n_SHEXP_PARSE_META_AMPERSAND
)){
1260 rv
|= n_SHEXP_STATE_META_AMPERSAND
;
1262 /* The parsed sequence may be _the_ output, so ensure we don't
1263 * include the metacharacter, then. */
1264 if(flags
& (n_SHEXP_PARSE_DRYRUN
| n_SHEXP_PARSE_META_KEEP
))
1266 /*last_known_meta_trim_len = UI32_MAX;*/
1268 }else if(c
== ';' && (flags
& n_SHEXP_PARSE_META_SEMICOLON
)){
1270 n_go_input_inject(n_GO_INPUT_INJECT_COMMIT
, ib
, il
);
1271 rv
|= n_SHEXP_STATE_META_SEMICOLON
| n_SHEXP_STATE_STOP
;
1273 if(!(flags
& n_SHEXP_PARSE_DRYRUN
) && (rv
& n_SHEXP_STATE_OUTPUT
) &&
1274 last_known_meta_trim_len
!= UI32_MAX
)
1275 store
= n_string_trunc(store
, last_known_meta_trim_len
);
1277 /* The parsed sequence may be _the_ output, so ensure we don't
1278 * include the metacharacter, then. */
1279 if(flags
& (n_SHEXP_PARSE_DRYRUN
| n_SHEXP_PARSE_META_KEEP
))
1281 /*last_known_meta_trim_len = UI32_MAX;*/
1283 }else if(c
== ',' && (flags
&
1284 (n_SHEXP_PARSE_IFS_ADD_COMMA
| n_SHEXP_PARSE_IFS_IS_COMMA
))){
1285 /* The parsed sequence may be _the_ output, so ensure we don't
1286 * include the metacharacter, then. */
1287 if(flags
& (n_SHEXP_PARSE_DRYRUN
| n_SHEXP_PARSE_META_KEEP
))
1289 /*last_known_meta_trim_len = UI32_MAX;*/
1294 blnk
= blankchar(c
) ? 1 : 0;
1295 blnk
|= ((flags
& (n_SHEXP_PARSE_IFS_VAR
|
1296 n_SHEXP_PARSE_TRIM_IFSSPACE
)) &&
1297 strchr(ifs_ws
, c
) != NULL
) ? 2 : 0;
1299 if((!(flags
& n_SHEXP_PARSE_IFS_VAR
) && (blnk
& 1)) ||
1300 ((flags
& n_SHEXP_PARSE_IFS_VAR
) &&
1301 ((blnk
& 2) || strchr(ifs
, c
) != NULL
))){
1302 if(!(flags
& n_SHEXP_PARSE_IFS_IS_COMMA
)){
1303 /* The parsed sequence may be _the_ output, so ensure we don't
1304 * include the metacharacter, then. */
1305 if(flags
& (n_SHEXP_PARSE_DRYRUN
| n_SHEXP_PARSE_META_KEEP
))
1307 /*last_known_meta_trim_len = UI32_MAX;*/
1314 if(blnk
&& store
!= NULL
){
1315 if(last_known_meta_trim_len
== UI32_MAX
)
1316 last_known_meta_trim_len
= store
->s_len
;
1318 last_known_meta_trim_len
= UI32_MAX
;
1322 assert(!(state
& a_NTOKEN
));
1323 if(c
== quotec
&& !(flags
& n_SHEXP_PARSE_QUOTE_AUTO_FIXED
)){
1324 state
&= a_ROUND_MASK
;
1326 /* Users may need to recognize the presence of empty quotes */
1327 rv
|= n_SHEXP_STATE_OUTPUT
;
1329 }else if(c
== '\\' && (state
& a_SURPLUS
)){
1331 /* A sole <reverse solidus> at EOS is treated as-is! This is ok
1332 * since the "closing quote" error will occur next, anyway */
1335 else if((c2
= *ib
) == quotec
){
1338 }else if(quotec
== '"'){
1339 /* Double quotes, POSIX says:
1340 * The <backslash> shall retain its special meaning as an
1341 * escape character (see Section 2.2.1) only when followed
1342 * by one of the following characters when considered
1343 * special: $ ` " \ <newline> */
1347 /* case '"': already handled via c2 == quotec */
1356 /* Dollar-single-quote */
1360 /* case '\'': already handled via c2 == quotec */
1365 case 'b': c
= '\b'; break;
1366 case 'f': c
= '\f'; break;
1367 case 'n': c
= '\n'; break;
1368 case 'r': c
= '\r'; break;
1369 case 't': c
= '\t'; break;
1370 case 'v': c
= '\v'; break;
1373 case 'e': c
= '\033'; break;
1375 /* Control character */
1378 goto j_dollar_ungetc
;
1380 if(state
& a_SKIPMASK
)
1382 /* ASCII C0: 0..1F, 7F <- @.._ (+ a-z -> A-Z), ? */
1383 c
= upperconv(c2
) ^ 0x40;
1384 if((ui8_t
)c
> 0x1F && c
!= 0x7F){
1385 if(flags
& n_SHEXP_PARSE_LOG
)
1386 n_err(_("Invalid \\c notation: %.*s: %.*s\n"),
1387 (int)input
->l
, input
->s
,
1388 (int)PTR2SIZE(ib
- ib_save
), ib_save
);
1389 rv
|= n_SHEXP_STATE_ERR_CONTROL
;
1391 /* As an implementation-defined extension, support \c@
1392 * EQ printf(1) alike \c */
1399 /* Octal sequence: 1 to 3 octal bytes */
1401 /* As an extension (dependent on where you look, echo(1), or
1402 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1403 if(il
> 0 && (c
= *ib
) >= '0' && c
<= '7'){
1408 case '1': case '2': case '3':
1409 case '4': case '5': case '6': case '7':
1411 if(il
> 0 && (c
= *ib
) >= '0' && c
<= '7'){
1412 c2
= (c2
<< 3) | (c
- '0');
1415 if(il
> 0 && (c
= *ib
) >= '0' && c
<= '7'){
1416 if(!(state
& a_SKIPMASK
) && (ui8_t
)c2
> 0x1F){
1417 rv
|= n_SHEXP_STATE_ERR_NUMBER
;
1419 if(flags
& n_SHEXP_PARSE_LOG
)
1420 n_err(_("\\0 argument exceeds a byte: %.*s: %.*s\n"),
1421 (int)input
->l
, input
->s
,
1422 (int)PTR2SIZE(ib
- ib_save
), ib_save
);
1423 /* Write unchanged */
1425 rv
|= n_SHEXP_STATE_OUTPUT
;
1426 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
1427 store
= n_string_push_buf(store
, ib_save
,
1428 PTR2SIZE(ib
- ib_save
));
1431 c2
= (c2
<< 3) | (c
-= '0');
1434 if(state
& a_SKIPMASK
)
1436 if((c
= c2
) == '\0'){
1442 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1451 goto j_dollar_ungetc
;
1455 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1459 goto j_dollar_ungetc
;
1463 static ui8_t
const hexatoi
[] = { /* XXX uses ASCII */
1464 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1469 for(no
= j
= 0; i
-- > 0; --il
, ++ib
, ++j
){
1473 no
+= hexatoi
[(ui8_t
)((c
) - ((c
) <= '9' ? 48
1474 : ((c
) <= 'F' ? 55 : 87)))];
1476 if(state
& a_SKIPMASK
)
1478 c2
= (c2
== 'U' || c2
== 'u') ? 'u' : 'x';
1479 if(flags
& n_SHEXP_PARSE_LOG
)
1480 n_err(_("Invalid \\%c notation: %.*s: %.*s\n"),
1481 c2
, (int)input
->l
, input
->s
,
1482 (int)PTR2SIZE(ib
- ib_save
), ib_save
);
1483 rv
|= n_SHEXP_STATE_ERR_NUMBER
;
1489 /* Unicode massage */
1490 if((c2
!= 'U' && c2
!= 'u') || n_uasciichar(no
)){
1491 if((c
= (char)no
) == '\0')
1495 else if(!(state
& a_SKIPMASK
)){
1496 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
1497 store
= n_string_reserve(store
, n_MAX(j
, 4));
1499 if(no
> 0x10FFFF){ /* XXX magic; CText */
1500 if(flags
& n_SHEXP_PARSE_LOG
)
1501 n_err(_("\\U argument exceeds 0x10FFFF: %.*s: "
1503 (int)input
->l
, input
->s
,
1504 (int)PTR2SIZE(ib
- ib_save
), ib_save
);
1505 rv
|= n_SHEXP_STATE_ERR_NUMBER
;
1506 /* But normalize the output anyway */
1510 j
= n_utf32_to_utf8(no
, utf
);
1512 if(n_psonce
& n_PSO_UNICODE
){
1513 rv
|= n_SHEXP_STATE_OUTPUT
| n_SHEXP_STATE_UNICODE
;
1514 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
1515 store
= n_string_push_buf(store
, utf
, j
);
1522 icp
= n_iconv_onetime_cp(n_ICONV_NONE
,
1525 rv
|= n_SHEXP_STATE_OUTPUT
;
1526 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
1527 store
= n_string_push_cp(store
, icp
);
1532 if(!(flags
& n_SHEXP_PARSE_DRYRUN
)) Jerr_uni_norm
:{
1535 rv
|= n_SHEXP_STATE_OUTPUT
|
1536 n_SHEXP_STATE_ERR_UNICODE
;
1537 i
= snprintf(itoa
, sizeof itoa
, "\\%c%0*X",
1538 (no
> 0xFFFFu
? 'U' : 'u'),
1539 (int)(no
> 0xFFFFu
? 8 : 4), (ui32_t
)no
);
1540 store
= n_string_push_buf(store
, itoa
, i
);
1544 if(state
& a_SKIPMASK
)
1549 /* Extension: \$ can be used to expand a variable.
1550 * B(ug|ad) effect: if conversion fails, not written "as-is" */
1553 goto j_dollar_ungetc
;
1558 /* Follow bash(1) behaviour, print sequence unchanged */
1563 }else if(c
== '$' && quotec
== '"' && il
> 0) J_var_expand
:{
1564 state
&= ~a_VARSUBST_MASK
;
1568 /* Scan variable name */
1569 if(!(state
& a_BRACE
) || il
> 1){
1570 char const *cp
, *vp
;
1576 state
&= ~a_EXPLODE
;
1578 for(i
= 0; il
> 0; --il
, ++ib
, ++i
){
1579 /* We have some special cases regarding special parameters,
1580 * so ensure these don't cause failure. This code has
1581 * counterparts in code that manages internal variables! */
1583 if(!a_SHEXP_ISVARC(c
)){
1585 /* Simply skip over multiplexer */
1588 if(c
== '*' || c
== '@' || c
== '#' || c
== '?' ||
1599 }else if(a_SHEXP_ISVARC_BAD1ST(c
)){
1603 state
|= a_NONDIGIT
;
1606 /* In skip mode, be easy and.. skip over */
1607 if(state
& a_SKIPMASK
){
1608 if((state
& a_BRACE
) && il
> 0 && *ib
== '}')
1613 /* Handle the scan error cases */
1614 if((state
& (a_DIGIT1
| a_NONDIGIT
)) == (a_DIGIT1
| a_NONDIGIT
)){
1615 if(state
& a_BRACE
){
1616 if(il
> 0 && *ib
== '}')
1619 rv
|= n_SHEXP_STATE_ERR_GROUPOPEN
;
1621 if(flags
& n_SHEXP_PARSE_LOG
)
1622 n_err(_("Invalid identifier for ${}: %.*s: %.*s\n"),
1623 (int)input
->l
, input
->s
,
1624 (int)PTR2SIZE(ib
- ib_save
), ib_save
);
1625 rv
|= n_SHEXP_STATE_ERR_IDENTIFIER
;
1628 if(state
& a_BRACE
){
1629 if(il
== 0 || *ib
!= '}'){
1630 if(flags
& n_SHEXP_PARSE_LOG
)
1631 n_err(_("No closing brace for ${}: %.*s: %.*s\n"),
1632 (int)input
->l
, input
->s
,
1633 (int)PTR2SIZE(ib
- ib_save
), ib_save
);
1634 rv
|= n_SHEXP_STATE_ERR_GROUPOPEN
;
1640 if(flags
& n_SHEXP_PARSE_LOG
)
1641 n_err(_("Bad substitution for ${}: %.*s: %.*s\n"),
1642 (int)input
->l
, input
->s
,
1643 (int)PTR2SIZE(ib
- ib_save
), ib_save
);
1644 rv
|= n_SHEXP_STATE_ERR_BADSUB
;
1648 /* Simply write dollar as-is? */
1651 if(state
& a_BRACE
){
1652 if(il
== 0 || *ib
!= '}'){
1653 if(flags
& n_SHEXP_PARSE_LOG
)
1654 n_err(_("No closing brace for ${}: %.*s: %.*s\n"),
1655 (int)input
->l
, input
->s
,
1656 (int)PTR2SIZE(ib
- ib_save
), ib_save
);
1657 rv
|= n_SHEXP_STATE_ERR_GROUPOPEN
;
1663 if(flags
& n_SHEXP_PARSE_LOG
)
1664 n_err(_("Bad substitution for ${}: %.*s: %.*s\n"),
1665 (int)input
->l
, input
->s
,
1666 (int)PTR2SIZE(ib
- ib_save
), ib_save
);
1667 rv
|= n_SHEXP_STATE_ERR_BADSUB
;
1672 if(flags
& n_SHEXP_PARSE_DRYRUN
)
1675 /* We may shall explode "${@}" to a series of successive,
1676 * properly quoted tokens (instead). The first exploded
1677 * cookie will join with the current token */
1678 if(n_UNLIKELY(state
& a_EXPLODE
) &&
1679 !(flags
& n_SHEXP_PARSE_DRYRUN
) && cookie
!= NULL
){
1680 if(n_var_vexplode(cookie
))
1682 /* On the other hand, if $@ expands to nothing and is the
1683 * sole content of this quote then act like the shell does
1684 * and throw away the entire atxplode construct */
1685 else if(!(rv
& n_SHEXP_STATE_OUTPUT
) &&
1686 il
== 1 && *ib
== '"' &&
1687 ib_save
== &input
->s
[1] && ib_save
[-1] == '"')
1691 input
->s
= n_UNCONST(ib
);
1693 goto jrestart_empty
;
1696 /* Check getenv(3) shall no internal variable exist!
1697 * XXX We have some common idioms, avoid memory for them
1698 * XXX Even better would be var_vlook_buf()! */
1701 case '?': vp
= n_qm
; break;
1702 case '!': vp
= n_em
; break;
1703 case '*': vp
= n_star
; break;
1704 case '@': vp
= n_at
; break;
1705 case '#': vp
= n_ns
; break;
1706 default: goto j_var_look_buf
;
1710 vp
= savestrbuf(vp
, i
);
1712 if((cp
= n_var_vlook(vp
, TRU1
)) != NULL
){
1713 rv
|= n_SHEXP_STATE_OUTPUT
;
1714 store
= n_string_push_cp(store
, cp
);
1715 for(; (c
= *cp
) != '\0'; ++cp
)
1717 rv
|= n_SHEXP_STATE_CONTROL
;
1724 }else if(c
== '`' && quotec
== '"' && il
> 0){ /* TODO shell command */
1729 if(!(state
& a_SKIPMASK
)){
1730 rv
|= n_SHEXP_STATE_OUTPUT
;
1732 rv
|= n_SHEXP_STATE_CONTROL
;
1733 if(!(flags
& n_SHEXP_PARSE_DRYRUN
))
1734 store
= n_string_push_c(store
, c
);
1738 if(quotec
!= '\0' && !(flags
& n_SHEXP_PARSE_QUOTE_AUTO_CLOSE
)){
1739 if(flags
& n_SHEXP_PARSE_LOG
)
1740 n_err(_("No closing quote: %.*s\n"), (int)input
->l
, input
->s
);
1741 rv
|= n_SHEXP_STATE_ERR_QUOTEOPEN
;
1745 assert(!(state
& a_COOKIE
));
1746 if((flags
& n_SHEXP_PARSE_DRYRUN
) && store
!= NULL
){
1747 store
= n_string_push_buf(store
, input
->s
, PTR2SIZE(ib
- input
->s
));
1748 rv
|= n_SHEXP_STATE_OUTPUT
;
1751 if(state
& a_CONSUME
){
1752 input
->s
= n_UNCONST(&ib
[il
]);
1755 if(flags
& n_SHEXP_PARSE_TRIM_SPACE
){
1756 for(; il
> 0; ++ib
, --il
){
1757 if(!blankspacechar(*ib
))
1759 rv
|= n_SHEXP_STATE_WS_TRAIL
;
1763 if(flags
& n_SHEXP_PARSE_TRIM_IFSSPACE
){
1764 for(; il
> 0; ++ib
, --il
){
1765 if(strchr(ifs_ws
, *ib
) == NULL
)
1767 rv
|= n_SHEXP_STATE_WS_TRAIL
;
1772 input
->s
= n_UNCONST(ib
);
1775 if(!(rv
& n_SHEXP_STATE_STOP
)){
1776 if(!(rv
& (n_SHEXP_STATE_OUTPUT
| n_SHEXP_STATE_META_MASK
)) &&
1777 (flags
& n_SHEXP_PARSE_IGNORE_EMPTY
) && il
> 0)
1778 goto jrestart_empty
;
1779 if(/*!(rv & n_SHEXP_STATE_OUTPUT) &&*/ il
== 0)
1780 rv
|= n_SHEXP_STATE_STOP
;
1783 if((state
& a_SKIPT
) && !(rv
& n_SHEXP_STATE_STOP
) &&
1784 (flags
& n_SHEXP_PARSE_META_MASK
))
1787 assert((rv
& n_SHEXP_STATE_OUTPUT
) || !(rv
& n_SHEXP_STATE_UNICODE
));
1788 assert((rv
& n_SHEXP_STATE_OUTPUT
) || !(rv
& n_SHEXP_STATE_CONTROL
));
1794 n_shexp_parse_token_cp(enum n_shexp_parse_flags flags
, char const **cp
){
1796 struct n_string sou
, *soup
;
1798 enum n_shexp_state shs
;
1803 input
.s
= n_UNCONST(*cp
);
1805 soup
= n_string_creat_auto(&sou
);
1807 shs
= n_shexp_parse_token(flags
, soup
, &input
, NULL
);
1808 if(shs
& n_SHEXP_STATE_ERR_MASK
){
1809 soup
= n_string_assign_cp(soup
, *cp
);
1814 rv
= n_string_cp(soup
);
1815 /*n_string_gut(n_string_drop_ownership(soup));*/
1820 FL
struct n_string
*
1821 n_shexp_quote(struct n_string
*store
, struct str
const *input
, bool_t rndtrip
){
1822 struct a_shexp_quote_lvl sql
;
1823 struct a_shexp_quote_ctx sqc
;
1826 assert(store
!= NULL
);
1827 assert(input
!= NULL
);
1828 assert(input
->l
== 0 || input
->s
!= NULL
);
1830 memset(&sqc
, 0, sizeof sqc
);
1831 sqc
.sqc_store
= store
;
1832 sqc
.sqc_input
.s
= input
->s
;
1833 if((sqc
.sqc_input
.l
= input
->l
) == UIZ_MAX
)
1834 sqc
.sqc_input
.l
= strlen(input
->s
);
1835 sqc
.sqc_flags
= rndtrip
? a_SHEXP_QUOTE_ROUNDTRIP
: a_SHEXP_QUOTE_NONE
;
1837 if(sqc
.sqc_input
.l
== 0)
1838 store
= n_string_push_buf(store
, "''", sizeof("''") -1);
1840 memset(&sql
, 0, sizeof sql
);
1841 sql
.sql_dat
= sqc
.sqc_input
;
1842 sql
.sql_flags
= sqc
.sqc_flags
;
1843 a_shexp__quote(&sqc
, &sql
);
1850 n_shexp_quote_cp(char const *cp
, bool_t rndtrip
){
1851 struct n_string store
;
1858 input
.s
= n_UNCONST(cp
);
1860 rv
= n_string_cp(n_shexp_quote(n_string_creat_auto(&store
), &input
,
1862 n_string_gut(n_string_drop_ownership(&store
));
1868 n_shexp_is_valid_varname(char const *name
){
1875 for(lc
= '\0'; (c
= *name
++) != '\0'; lc
= c
)
1876 if(!a_SHEXP_ISVARC(c
))
1878 else if(lc
== '\0' && a_SHEXP_ISVARC_BAD1ST(c
))
1880 if(a_SHEXP_ISVARC_BADNST(lc
))
1890 c_shcodec(void *vp
){
1892 struct n_string sou_b
, *soup
;
1896 char const **argv
, *varname
, *act
, *cp
;
1898 soup
= n_string_creat_auto(&sou_b
);
1900 varname
= (n_pstate
& n_PS_ARGMOD_VPUT
) ? *argv
++ : NULL
;
1903 for(cp
= act
; *cp
!= '\0' && !blankspacechar(*cp
); ++cp
)
1905 if((norndtrip
= (*act
== '+')))
1909 alen
= PTR2SIZE(cp
- act
);
1913 in
.l
= strlen(in
.s
= n_UNCONST(cp
));
1916 if(is_ascncaseprefix(act
, "encode", alen
))
1917 soup
= n_shexp_quote(soup
, &in
, !norndtrip
);
1918 else if(!norndtrip
&& is_ascncaseprefix(act
, "decode", alen
)){
1920 enum n_shexp_state shs
;
1922 shs
= n_shexp_parse_token((n_SHEXP_PARSE_LOG
|
1923 n_SHEXP_PARSE_IGNORE_EMPTY
), soup
, &in
, NULL
);
1924 if(shs
& n_SHEXP_STATE_ERR_MASK
){
1925 soup
= n_string_assign_cp(soup
, cp
);
1926 nerrn
= n_ERR_CANCELED
;
1930 if(shs
& n_SHEXP_STATE_STOP
)
1936 if(varname
!= NULL
){
1937 cp
= n_string_cp(soup
);
1938 if(!n_var_vset(varname
, (uintptr_t)cp
)){
1939 nerrn
= n_ERR_NOTSUP
;
1945 in
.s
= n_string_cp(soup
);
1947 makeprint(&in
, &out
);
1948 if(fprintf(n_stdout
, "%s\n", out
.s
) < 0){
1956 n_pstate_err_no
= nerrn
;
1958 return (vp
!= NULL
? 0 : 1);
1960 n_err(_("Synopsis: shcodec: <[+]e[ncode]|d[ecode]> <rest-of-line>\n"));
1961 nerrn
= n_ERR_INVAL
;