cc-test.sh: drop useless "set -o noglob" detection; tweak usage()
[s-mailx.git] / shexp.c
blob5d4a720687affa3026136c0966b6cdf348d4e7f0
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
4 *@ TODO "Magic solidus" used as path separator.
6 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
7 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
8 */
9 /*
10 * Copyright (c) 1980, 1993
11 * The Regents of the University of California. All rights reserved.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
37 #undef n_FILE
38 #define n_FILE shexp
40 #ifndef HAVE_AMALGAMATION
41 # include "nail.h"
42 #endif
44 #include <pwd.h>
46 #ifdef HAVE_FNMATCH
47 # include <dirent.h>
48 # include <fnmatch.h>
49 #endif
51 /* POSIX says
52 * Environment variable names used by the utilities in the Shell and
53 * Utilities volume of POSIX.1-2008 consist solely of uppercase
54 * letters, digits, and the <underscore> ('_') from the characters
55 * defined in Portable Character Set and do not begin with a digit.
56 * Other characters may be permitted by an implementation;
57 * applications shall tolerate the presence of such names.
58 * We do support the hyphen-minus "-" (except in last position for ${x[:]-y}).
59 * We support some special parameter names for one-letter(++) variable names;
60 * these have counterparts in the code that manages internal variables,
61 * and some more special treatment below! */
62 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
63 #define a_SHEXP_ISVARC_BAD1ST(C) (digitchar(C)) /* (Actually assumed below!) */
64 #define a_SHEXP_ISVARC_BADNST(C) ((C) == '-')
66 enum a_shexp_quote_flags{
67 a_SHEXP_QUOTE_NONE,
68 a_SHEXP_QUOTE_ROUNDTRIP = 1u<<0, /* Result won't be consumed immediately */
70 a_SHEXP_QUOTE_T_REVSOL = 1u<<8, /* Type: by reverse solidus */
71 a_SHEXP_QUOTE_T_SINGLE = 1u<<9, /* Type: single-quotes */
72 a_SHEXP_QUOTE_T_DOUBLE = 1u<<10, /* Type: double-quotes */
73 a_SHEXP_QUOTE_T_DOLLAR = 1u<<11, /* Type: dollar-single-quotes */
74 a_SHEXP_QUOTE_T_MASK = a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
75 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR,
77 a_SHEXP_QUOTE__FREESHIFT = 16u
80 #ifdef HAVE_FNMATCH
81 struct a_shexp_glob_ctx{
82 char const *sgc_patdat; /* Remaining pattern (at and below level) */
83 size_t sgc_patlen;
84 struct n_string *sgc_outer; /* Resolved path up to this level */
85 ui32_t sgc_flags;
86 ui8_t sgc__dummy[4];
88 #endif
90 struct a_shexp_quote_ctx{
91 struct n_string *sqc_store; /* Result storage */
92 struct str sqc_input; /* Input data, topmost level */
93 ui32_t sqc_cnt_revso;
94 ui32_t sqc_cnt_single;
95 ui32_t sqc_cnt_double;
96 ui32_t sqc_cnt_dollar;
97 enum a_shexp_quote_flags sqc_flags;
98 ui8_t sqc__dummy[4];
101 struct a_shexp_quote_lvl{
102 struct a_shexp_quote_lvl *sql_link; /* Outer level */
103 struct str sql_dat; /* This level (has to) handle(d) */
104 enum a_shexp_quote_flags sql_flags;
105 ui8_t sql__dummy[4];
108 /* Locate the user's mailbox file (where new, unread mail is queued) */
109 static char *a_shexp_findmail(char const *user, bool_t force);
111 /* Expand ^~/? and ^~USER/? constructs.
112 * Returns the completely resolved (maybe empty or identical to input)
113 * salloc()ed string */
114 static char *a_shexp_tilde(char const *s);
116 /* Perform fnmatch(3). May return NULL on error */
117 static char *a_shexp_globname(char const *name, enum fexp_mode fexpm);
118 #ifdef HAVE_FNMATCH
119 static bool_t a_shexp__glob(struct a_shexp_glob_ctx *sgcp,
120 struct n_strlist **slpp);
121 static int a_shexp__globsort(void const *cvpa, void const *cvpb);
122 #endif
124 /* Parse an input string and create a sh(1)ell-quoted result */
125 static void a_shexp__quote(struct a_shexp_quote_ctx *sqcp,
126 struct a_shexp_quote_lvl *sqlp);
128 static char *
129 a_shexp_findmail(char const *user, bool_t force){
130 char *rv;
131 char const *cp;
132 NYD2_ENTER;
134 if(!force){
135 if((cp = ok_vlook(inbox)) != NULL && *cp != '\0'){
136 /* _NFOLDER extra introduced to avoid % recursion loops */
137 if((rv = fexpand(cp, FEXP_NSPECIAL | FEXP_NFOLDER | FEXP_NSHELL)
138 ) != NULL)
139 goto jleave;
140 n_err(_("*inbox* expansion failed, using $MAIL / built-in: %s\n"), cp);
142 /* Heirloom compatibility: an IMAP *folder* becomes "%" */
143 #ifdef HAVE_IMAP
144 else if(cp == NULL && !strcmp(user, ok_vlook(LOGNAME)) &&
145 which_protocol(cp = n_folder_query(), FAL0, FAL0, NULL)
146 == PROTO_IMAP){
147 /* TODO Compat handling of *folder* with IMAP! */
148 n_OBSOLETE("no more expansion of *folder* in \"%\": "
149 "please set *inbox*");
150 rv = savestr(cp);
151 goto jleave;
153 #endif
155 if((cp = ok_vlook(MAIL)) != NULL){
156 rv = savestr(cp);
157 goto jleave;
161 /* C99 */{
162 size_t ul, i;
164 ul = strlen(user) +1;
165 i = sizeof(VAL_MAIL) -1 + 1 + ul;
167 rv = salloc(i);
168 memcpy(rv, VAL_MAIL, (i = sizeof(VAL_MAIL) -1));
169 rv[i] = '/';
170 memcpy(&rv[++i], user, ul);
172 jleave:
173 NYD2_LEAVE;
174 return rv;
177 static char *
178 a_shexp_tilde(char const *s){
179 struct passwd *pwp;
180 size_t nl, rl;
181 char const *rp, *np;
182 char *rv;
183 NYD2_ENTER;
185 if(*(rp = &s[1]) == '/' || *rp == '\0'){
186 np = ok_vlook(HOME);
187 rl = strlen(rp);
188 }else{
189 if((rp = strchr(np = rp, '/')) != NULL){
190 nl = PTR2SIZE(rp - np);
191 np = savestrbuf(np, nl);
192 rl = strlen(rp);
193 }else
194 rl = 0;
196 if((pwp = getpwnam(np)) == NULL){
197 rv = savestr(s);
198 goto jleave;
200 np = pwp->pw_dir;
203 nl = strlen(np);
204 rv = salloc(nl + 1 + rl +1);
205 memcpy(rv, np, nl);
206 if(rl > 0){
207 memcpy(rv + nl, rp, rl);
208 nl += rl;
210 rv[nl] = '\0';
211 jleave:
212 NYD2_LEAVE;
213 return rv;
216 static char *
217 a_shexp_globname(char const *name, enum fexp_mode fexpm){
218 #ifdef HAVE_FNMATCH
219 struct a_shexp_glob_ctx sgc;
220 struct n_string outer;
221 struct n_strlist *slp;
222 char *cp;
223 NYD_ENTER;
225 memset(&sgc, 0, sizeof sgc);
226 sgc.sgc_patlen = strlen(name);
227 sgc.sgc_patdat = savestrbuf(name, sgc.sgc_patlen);
228 sgc.sgc_outer = n_string_reserve(n_string_creat(&outer), sgc.sgc_patlen);
229 sgc.sgc_flags = ((fexpm & FEXP_SILENT) != 0);
230 slp = NULL;
231 if(a_shexp__glob(&sgc, &slp))
232 cp = (char*)1;
233 else
234 cp = NULL;
235 n_string_gut(&outer);
237 if(cp == NULL)
238 goto jleave;
240 if(slp == NULL){
241 cp = n_UNCONST(N_("File pattern does not match"));
242 goto jerr;
243 }else if(slp->sl_next == NULL)
244 cp = savestrbuf(slp->sl_dat, slp->sl_len);
245 else if(fexpm & FEXP_MULTIOK){
246 struct n_strlist **sorta, *xslp;
247 size_t i, no, l;
249 no = l = 0;
250 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next){
251 ++no;
252 l += xslp->sl_len + 1;
255 sorta = smalloc(sizeof(*sorta) * no);
256 no = 0;
257 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next)
258 sorta[no++] = xslp;
259 qsort(sorta, no, sizeof *sorta, &a_shexp__globsort);
261 cp = salloc(++l);
262 l = 0;
263 for(i = 0; i < no; ++i){
264 xslp = sorta[i];
265 memcpy(&cp[l], xslp->sl_dat, xslp->sl_len);
266 l += xslp->sl_len;
267 cp[l++] = '\0';
269 cp[l] = '\0';
271 free(sorta);
272 n_pstate |= n_PS_EXPAND_MULTIRESULT;
273 }else{
274 cp = n_UNCONST(N_("File pattern matches multiple results"));
275 goto jerr;
278 jleave:
279 while(slp != NULL){
280 struct n_strlist *tmp = slp;
282 slp = slp->sl_next;
283 free(tmp);
285 NYD_LEAVE;
286 return cp;
288 jerr:
289 if(!(fexpm & FEXP_SILENT)){
290 name = n_shexp_quote_cp(name, FAL0);
291 n_err("%s: %s\n", V_(cp), name);
293 cp = NULL;
294 goto jleave;
296 #else /* HAVE_FNMATCH */
297 n_UNUSED(fexpm);
299 if(!(fexpm & FEXP_SILENT))
300 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
301 return savestr(name);
302 #endif
305 #ifdef HAVE_FNMATCH
306 static bool_t
307 a_shexp__glob(struct a_shexp_glob_ctx *sgcp, struct n_strlist **slpp){
308 enum{a_SILENT = 1<<0, a_DEEP=1<<1, a_SALLOC=1<<2};
310 struct a_shexp_glob_ctx nsgc;
311 struct dirent *dep;
312 DIR *dp;
313 size_t old_outerlen;
314 char const *ccp, *myp;
315 NYD2_ENTER;
317 /* We need some special treatment for the outermost level.
318 * All along our way, normalize path separators */
319 if(!(sgcp->sgc_flags & a_DEEP)){
320 if(sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/'){
321 myp = n_string_cp(n_string_push_c(sgcp->sgc_outer, '/'));
323 ++sgcp->sgc_patdat;
324 while(--sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/');
325 }else
326 myp = "./";
327 }else
328 myp = n_string_cp(sgcp->sgc_outer);
329 old_outerlen = sgcp->sgc_outer->s_len;
331 /* Separate current directory/pattern level from any possible remaining
332 * pattern in order to be able to use it for fnmatch(3) */
333 if((ccp = memchr(sgcp->sgc_patdat, '/', sgcp->sgc_patlen)) == NULL)
334 nsgc.sgc_patlen = 0;
335 else{
336 nsgc = *sgcp;
337 nsgc.sgc_flags |= a_DEEP;
338 sgcp->sgc_patlen = PTR2SIZE((nsgc.sgc_patdat = &ccp[1]) -
339 &sgcp->sgc_patdat[0]);
340 nsgc.sgc_patlen -= sgcp->sgc_patlen;
342 /* Trim solidus, everywhere */
343 if(sgcp->sgc_patlen > 0){
344 assert(sgcp->sgc_patdat[sgcp->sgc_patlen -1] == '/');
345 ((char*)n_UNCONST(sgcp->sgc_patdat))[--sgcp->sgc_patlen] = '\0';
347 while(nsgc.sgc_patlen > 0 && nsgc.sgc_patdat[0] == '/'){
348 --nsgc.sgc_patlen;
349 ++nsgc.sgc_patdat;
353 /* Our current directory level */
354 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
355 * xxx be the (sole) result depending on pattern surroundings, etc. */
356 if((dp = opendir(myp)) == NULL){
357 int err;
359 switch((err = n_err_no)){
360 case n_ERR_NOTDIR:
361 ccp = N_("cannot access paths under non-directory");
362 goto jerr;
363 case n_ERR_NOENT:
364 ccp = N_("path component of (sub)pattern non-existent");
365 goto jerr;
366 case n_ERR_ACCES:
367 ccp = N_("file permission for file (sub)pattern denied");
368 goto jerr;
369 case n_ERR_NFILE:
370 case n_ERR_MFILE:
371 ccp = N_("file descriptor limit reached, cannot open directory");
372 goto jerr;
373 default:
374 ccp = N_("cannot open path component as directory");
375 goto jerr;
379 /* As necessary, quote bytes in the current pattern TODO This will not
380 * TODO truly work out in case the user would try to quote a character
381 * TODO class, for example: in "\[a-z]" the "\" would be doubled! For that
382 * TODO to work out, we need the original user input or the shell-expression
383 * TODO parse tree, otherwise we do not know what is desired! */
384 /* C99 */{
385 char *ncp;
386 size_t i;
387 bool_t need;
389 for(need = FAL0, i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
390 switch(*myp){
391 case '\'': case '"': case '\\': case '$':
392 case ' ': case '\t':
393 need = TRU1;
394 ++i;
395 /* FALLTHRU */
396 default:
397 ++i;
398 break;
401 if(need){
402 ncp = salloc(i +1);
403 for(i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
404 switch(*myp){
405 case '\'': case '"': case '\\': case '$':
406 case ' ': case '\t':
407 ncp[i++] = '\\';
408 /* FALLTHRU */
409 default:
410 ncp[i++] = *myp;
411 break;
413 ncp[i] = '\0';
414 myp = ncp;
415 }else
416 myp = sgcp->sgc_patdat;
419 while((dep = readdir(dp)) != NULL){
420 switch(fnmatch(myp, dep->d_name, FNM_PATHNAME | FNM_PERIOD)){
421 case 0:{
422 /* A match expresses the desire to recurse if there is more pattern */
423 if(nsgc.sgc_patlen > 0){
424 bool_t isdir;
426 n_string_push_cp((sgcp->sgc_outer->s_len > 1
427 ? n_string_push_c(sgcp->sgc_outer, '/') : sgcp->sgc_outer),
428 dep->d_name);
430 isdir = FAL0;
431 #ifdef HAVE_DIRENT_TYPE
432 if(dep->d_type == DT_DIR)
433 isdir = TRU1;
434 else if(dep->d_type == DT_LNK || dep->d_type == DT_UNKNOWN)
435 #endif
437 struct stat sb;
439 if(stat(n_string_cp(sgcp->sgc_outer), &sb)){
440 ccp = N_("I/O error when querying file status");
441 goto jerr;
442 }else if(S_ISDIR(sb.st_mode))
443 isdir = TRU1;
446 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
447 * TODO Instead save away a list of such n_string's for later */
448 if(isdir && !a_shexp__glob(&nsgc, slpp)){
449 ccp = (char*)1;
450 goto jleave;
453 n_string_trunc(sgcp->sgc_outer, old_outerlen);
454 }else{
455 struct n_strlist *slp;
456 size_t i, j;
458 i = strlen(dep->d_name);
459 j = (old_outerlen > 0) ? old_outerlen + 1 + i : i;
460 slp = n_STRLIST_ALLOC(j);
461 *slpp = slp;
462 slpp = &slp->sl_next;
463 slp->sl_next = NULL;
464 if((j = old_outerlen) > 0){
465 memcpy(&slp->sl_dat[0], sgcp->sgc_outer->s_dat, j);
466 if(slp->sl_dat[j -1] != '/')
467 slp->sl_dat[j++] = '/';
469 memcpy(&slp->sl_dat[j], dep->d_name, i);
470 slp->sl_dat[j += i] = '\0';
471 slp->sl_len = j;
473 }break;
474 case FNM_NOMATCH:
475 break;
476 default:
477 ccp = N_("fnmatch(3) cannot handle file (sub)pattern");
478 goto jerr;
482 ccp = NULL;
483 jleave:
484 if(dp != NULL)
485 closedir(dp);
486 NYD2_LEAVE;
487 return (ccp == NULL);
489 jerr:
490 if(!(sgcp->sgc_flags & a_SILENT)){
491 char const *s2, *s3;
493 if(sgcp->sgc_outer->s_len > 0){
494 s2 = n_shexp_quote_cp(n_string_cp(sgcp->sgc_outer), FAL0);
495 s3 = "/";
496 }else
497 s2 = s3 = n_empty;
499 n_err("%s: %s%s%s\n", V_(ccp), s2, s3,
500 n_shexp_quote_cp(sgcp->sgc_patdat, FAL0));
502 goto jleave;
505 static int
506 a_shexp__globsort(void const *cvpa, void const *cvpb){
507 int rv;
508 struct n_strlist const * const *slpa, * const *slpb;
509 NYD2_ENTER;
511 slpa = cvpa;
512 slpb = cvpb;
513 rv = asccasecmp((*slpa)->sl_dat, (*slpb)->sl_dat);
514 NYD2_LEAVE;
515 return rv;
517 #endif /* HAVE_FNMATCH */
519 static void
520 a_shexp__quote(struct a_shexp_quote_ctx *sqcp, struct a_shexp_quote_lvl *sqlp){
521 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
522 * XXX use the recursive implementation because of stateful encodings.
523 * XXX I.e., if a quoted substring cannot be self-contained - the data after
524 * XXX the quote relies on "the former state", then this doesn't make sense.
525 * XXX Therefore this is not fully programmed out but instead only detects
526 * XXX the "most fancy" quoting necessary, and directly does that.
527 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
528 * XXX Otherwise we rather have to convert to wide first and act on that,
529 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
530 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
531 #ifdef a_SHEXP_QUOTE_RECURSE
532 # define jrecurse jrecurse
533 struct a_shexp_quote_lvl sql;
534 #else
535 # define jrecurse jstep
536 #endif
537 struct n_visual_info_ctx vic;
538 union {struct a_shexp_quote_lvl *head; struct n_string *store;} u;
539 ui32_t flags;
540 size_t il;
541 char const *ib, *ib_base;
542 NYD2_ENTER;
544 ib_base = ib = sqlp->sql_dat.s;
545 il = sqlp->sql_dat.l;
546 flags = sqlp->sql_flags;
548 /* Iterate over the entire input, classify characters and type of quotes
549 * along the way. Whenever a quote change has to be applied, adjust flags
550 * for the new situation -, setup sql.* and recurse- */
551 while(il > 0){
552 char c;
554 c = *ib;
555 if(cntrlchar(c)){
556 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
557 goto jstep;
558 if(c == '\t' && (flags & (a_SHEXP_QUOTE_T_REVSOL |
559 a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOUBLE)))
560 goto jstep;
561 #ifdef a_SHEXP_QUOTE_RECURSE
562 ++sqcp->sqc_cnt_dollar;
563 #endif
564 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
565 goto jrecurse;
566 }else if(blankspacechar(c) || c == '|' || c == '&' || c == ';' ||
567 /* Whereas we don't support those, quote them for the sh(1)ell */
568 c == '(' || c == ')' || c == '<' || c == '>' ||
569 c == '"' || c == '$'){
570 if(flags & a_SHEXP_QUOTE_T_MASK)
571 goto jstep;
572 #ifdef a_SHEXP_QUOTE_RECURSE
573 ++sqcp->sqc_cnt_single;
574 #endif
575 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
576 goto jrecurse;
577 }else if(c == '\''){
578 if(flags & (a_SHEXP_QUOTE_T_MASK & ~a_SHEXP_QUOTE_T_SINGLE))
579 goto jstep;
580 #ifdef a_SHEXP_QUOTE_RECURSE
581 ++sqcp->sqc_cnt_dollar;
582 #endif
583 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
584 goto jrecurse;
585 }else if(c == '\\' || (c == '#' && ib == ib_base)){
586 if(flags & a_SHEXP_QUOTE_T_MASK)
587 goto jstep;
588 #ifdef a_SHEXP_QUOTE_RECURSE
589 ++sqcp->sqc_cnt_single;
590 #endif
591 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
592 goto jrecurse;
593 }else if(!asciichar(c)){
594 /* Need to keep together multibytes */
595 #ifdef a_SHEXP_QUOTE_RECURSE
596 memset(&vic, 0, sizeof vic);
597 vic.vic_indat = ib;
598 vic.vic_inlen = il;
599 n_visual_info(&vic,
600 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
601 #endif
602 /* xxx check whether resulting \u would be ASCII */
603 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP) ||
604 (flags & a_SHEXP_QUOTE_T_DOLLAR)){
605 #ifdef a_SHEXP_QUOTE_RECURSE
606 ib = vic.vic_oudat;
607 il = vic.vic_oulen;
608 continue;
609 #else
610 goto jstep;
611 #endif
613 #ifdef a_SHEXP_QUOTE_RECURSE
614 ++sqcp->sqc_cnt_dollar;
615 #endif
616 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
617 goto jrecurse;
618 }else
619 jstep:
620 ++ib, --il;
622 sqlp->sql_flags = flags;
624 /* Level made the great and completed processing input. Reverse the list of
625 * levels, detect the "most fancy" quote type needed along this way */
626 /* XXX Due to restriction as above very crude */
627 for(flags = 0, il = 0, u.head = NULL; sqlp != NULL;){
628 struct a_shexp_quote_lvl *tmp;
630 tmp = sqlp->sql_link;
631 sqlp->sql_link = u.head;
632 u.head = sqlp;
633 il += sqlp->sql_dat.l;
634 if(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK)
635 il += (sqlp->sql_dat.l >> 1);
636 flags |= sqlp->sql_flags;
637 sqlp = tmp;
639 sqlp = u.head;
641 /* Finally work the substrings in the correct order, adjusting quotes along
642 * the way as necessary. Start off with the "most fancy" quote, so that
643 * the user sees an overall boundary she can orientate herself on.
644 * We do it like that to be able to give the user some "encapsulation
645 * experience", to address what strikes me is a problem of sh(1)ell quoting:
646 * different to, e.g., perl(1), where you see at a glance where a string
647 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
648 * visual appearance of "a string" as such */
649 u.store = n_string_reserve(sqcp->sqc_store, il);
651 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
652 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
653 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
654 }else if(flags & a_SHEXP_QUOTE_T_DOUBLE){
655 u.store = n_string_push_c(u.store, '"');
656 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOUBLE;
657 }else if(flags & a_SHEXP_QUOTE_T_SINGLE){
658 u.store = n_string_push_c(u.store, '\'');
659 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
660 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
661 flags &= ~a_SHEXP_QUOTE_T_MASK;
663 /* Work all the levels */
664 for(; sqlp != NULL; sqlp = sqlp->sql_link){
665 /* As necessary update our mode of quoting */
666 #ifdef a_SHEXP_QUOTE_RECURSE
667 il = 0;
669 switch(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK){
670 case a_SHEXP_QUOTE_T_DOLLAR:
671 if(!(flags & a_SHEXP_QUOTE_T_DOLLAR))
672 il = a_SHEXP_QUOTE_T_DOLLAR;
673 break;
674 case a_SHEXP_QUOTE_T_DOUBLE:
675 if(!(flags & (a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
676 il = a_SHEXP_QUOTE_T_DOLLAR;
677 break;
678 case a_SHEXP_QUOTE_T_SINGLE:
679 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
680 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
681 il = a_SHEXP_QUOTE_T_SINGLE;
682 break;
683 default:
684 case a_SHEXP_QUOTE_T_REVSOL:
685 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
686 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
687 il = a_SHEXP_QUOTE_T_REVSOL;
688 break;
691 if(il != 0){
692 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
693 u.store = n_string_push_c(u.store, '\'');
694 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
695 u.store = n_string_push_c(u.store, '"');
696 flags &= ~a_SHEXP_QUOTE_T_MASK;
698 flags |= (ui32_t)il;
699 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
700 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
701 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
702 u.store = n_string_push_c(u.store, '"');
703 else if(flags & a_SHEXP_QUOTE_T_SINGLE)
704 u.store = n_string_push_c(u.store, '\'');
706 #endif /* a_SHEXP_QUOTE_RECURSE */
708 /* Work the level's substring */
709 ib = sqlp->sql_dat.s;
710 il = sqlp->sql_dat.l;
712 while(il > 0){
713 char c2, c;
715 c = *ib;
717 if(cntrlchar(c)){
718 assert(c == '\t' || (flags & a_SHEXP_QUOTE_T_DOLLAR));
719 assert((flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
720 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)));
721 switch((c2 = c)){
722 case 0x07: c = 'a'; break;
723 case 0x08: c = 'b'; break;
724 case 0x0A: c = 'n'; break;
725 case 0x0B: c = 'v'; break;
726 case 0x0C: c = 'f'; break;
727 case 0x0D: c = 'r'; break;
728 case 0x1B: c = 'E'; break;
729 default: break;
730 case 0x09:
731 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
732 c = 't';
733 break;
735 if(flags & a_SHEXP_QUOTE_T_REVSOL)
736 u.store = n_string_push_c(u.store, '\\');
737 goto jpush;
739 u.store = n_string_push_c(u.store, '\\');
740 if(c == c2){
741 u.store = n_string_push_c(u.store, 'c');
742 c ^= 0x40;
744 goto jpush;
745 }else if(blankspacechar(c) || c == '|' || c == '&' || c == ';' ||
746 /* Whereas we don't support those, quote them for the sh(1)ell */
747 c == '(' || c == ')' || c == '<' || c == '>' ||
748 c == '"' || c == '$'){
749 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
750 goto jpush;
751 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE));
752 u.store = n_string_push_c(u.store, '\\');
753 goto jpush;
754 }else if(c == '\''){
755 if(flags & a_SHEXP_QUOTE_T_DOUBLE)
756 goto jpush;
757 assert(!(flags & a_SHEXP_QUOTE_T_SINGLE));
758 u.store = n_string_push_c(u.store, '\\');
759 goto jpush;
760 }else if(c == '\\' || (c == '#' && ib == ib_base)){
761 if(flags & a_SHEXP_QUOTE_T_SINGLE)
762 goto jpush;
763 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE |
764 a_SHEXP_QUOTE_T_DOLLAR));
765 u.store = n_string_push_c(u.store, '\\');
766 goto jpush;
767 }else if(asciichar(c)){
768 /* Shorthand: we can simply push that thing out */
769 jpush:
770 u.store = n_string_push_c(u.store, c);
771 ++ib, --il;
772 }else{
773 /* Not an ASCII character, take care not to split up multibyte
774 * sequences etc. For the sake of compile testing, don't enwrap in
775 * HAVE_ALWAYS_UNICODE_LOCALE || HAVE_NATCH_CHAR */
776 if(n_psonce & n_PSO_UNICODE){
777 ui32_t uc;
778 char const *ib2;
779 size_t il2, il3;
781 ib2 = ib;
782 il2 = il;
783 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
784 char itoa[32];
785 char const *cp;
787 il2 = PTR2SIZE(&ib2[0] - &ib[0]);
788 if((flags & a_SHEXP_QUOTE_ROUNDTRIP) || uc == 0xFFFDu){
789 /* Use padding to make ambiguities impossible */
790 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
791 (uc > 0xFFFFu ? 'U' : 'u'),
792 (int)(uc > 0xFFFFu ? 8 : 4), uc);
793 cp = itoa;
794 }else{
795 il3 = il2;
796 cp = &ib[0];
798 u.store = n_string_push_buf(u.store, cp, il3);
799 ib += il2, il -= il2;
800 continue;
804 memset(&vic, 0, sizeof vic);
805 vic.vic_indat = ib;
806 vic.vic_inlen = il;
807 n_visual_info(&vic,
808 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
810 /* Work this substring as sensitive as possible */
811 il -= vic.vic_oulen;
812 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP))
813 u.store = n_string_push_buf(u.store, ib, il);
814 #ifdef HAVE_ICONV
815 else if((vic.vic_indat = n_iconv_onetime_cp(n_ICONV_NONE,
816 "utf-8", ok_vlook(ttycharset), savestrbuf(ib, il))) != NULL){
817 ui32_t uc;
818 char const *ib2;
819 size_t il2, il3;
821 il2 = strlen(ib2 = vic.vic_indat);
822 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
823 char itoa[32];
825 il2 = PTR2SIZE(&ib2[0] - &vic.vic_indat[0]);
826 /* Use padding to make ambiguities impossible */
827 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
828 (uc > 0xFFFFu ? 'U' : 'u'),
829 (int)(uc > 0xFFFFu ? 8 : 4), uc);
830 u.store = n_string_push_buf(u.store, itoa, il3);
831 }else
832 goto Jxseq;
834 #endif
835 else
836 #ifdef HAVE_ICONV
837 Jxseq:
838 #endif
839 while(il-- > 0){
840 u.store = n_string_push_buf(u.store, "\\xFF",
841 sizeof("\\xFF") -1);
842 n_c_to_hex_base16(&u.store->s_dat[u.store->s_len - 2], *ib++);
845 ib = vic.vic_oudat;
846 il = vic.vic_oulen;
851 /* Close an open quote */
852 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
853 u.store = n_string_push_c(u.store, '\'');
854 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
855 u.store = n_string_push_c(u.store, '"');
856 #ifdef a_SHEXP_QUOTE_RECURSE
857 jleave:
858 #endif
859 NYD2_LEAVE;
860 return;
862 #ifdef a_SHEXP_QUOTE_RECURSE
863 jrecurse:
864 sqlp->sql_dat.l -= il;
866 sql.sql_link = sqlp;
867 sql.sql_dat.s = n_UNCONST(ib);
868 sql.sql_dat.l = il;
869 sql.sql_flags = flags;
870 a_shexp__quote(sqcp, &sql);
871 goto jleave;
872 #endif
874 #undef jrecurse
875 #undef a_SHEXP_QUOTE_RECURSE
878 FL char *
879 fexpand(char const *name, enum fexp_mode fexpm) /* TODO in parts: -> URL::!! */
881 struct str proto, s;
882 char const *res, *cp;
883 bool_t dyn, haveproto;
884 NYD_ENTER;
886 n_pstate &= ~n_PS_EXPAND_MULTIRESULT;
887 dyn = FAL0;
889 /* The order of evaluation is "%" and "#" expand into constants.
890 * "&" can expand into "+". "+" can expand into shell meta characters.
891 * Shell meta characters expand into constants.
892 * This way, we make no recursive expansion */
893 if((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
894 res = n_UNCONST(name);
896 jprotonext:
897 n_UNINIT(proto.s, NULL), n_UNINIT(proto.l, 0);
898 haveproto = FAL0;
899 for(cp = res; *cp && *cp != ':'; ++cp)
900 if(!alnumchar(*cp))
901 goto jnoproto;
902 if(cp[0] == ':' && cp[1] == '/' && cp[2] == '/'){
903 haveproto = TRU1;
904 proto.s = n_UNCONST(res);
905 cp += 3;
906 proto.l = PTR2SIZE(cp - res);
907 res = cp;
910 jnoproto:
911 if(!(fexpm & FEXP_NSPECIAL)){
912 jnext:
913 dyn = FAL0;
914 switch(*res){
915 case '%':
916 if(res[1] == ':' && res[2] != '\0'){
917 res = &res[2];
918 goto jprotonext;
919 }else{
920 bool_t force;
922 force = (res[1] != '\0');
923 res = a_shexp_findmail((force ? &res[1] : ok_vlook(LOGNAME)),
924 force);
925 if(force)
926 goto jislocal;
928 goto jnext;
929 case '#':
930 if (res[1] != '\0')
931 break;
932 if (prevfile[0] == '\0') {
933 n_err(_("No previous file\n"));
934 res = NULL;
935 goto jleave;
937 res = prevfile;
938 goto jislocal;
939 case '&':
940 if (res[1] == '\0')
941 res = ok_vlook(MBOX);
942 break;
943 default:
944 break;
948 #ifdef HAVE_IMAP
949 if(res[0] == '@' && which_protocol(mailname, FAL0, FAL0, NULL)
950 == PROTO_IMAP){
951 res = str_concat_csvl(&s, protbase(mailname), "/", &res[1], NULL)->s;
952 dyn = TRU1;
954 #endif
956 /* POSIX: if *folder* unset or null, "+" shall be retained */
957 if(!(fexpm & FEXP_NFOLDER) && *res == '+' &&
958 *(cp = n_folder_query()) != '\0'){
959 res = str_concat_csvl(&s, cp, &res[1], NULL)->s;
960 dyn = TRU1;
963 /* Do some meta expansions */
964 if((fexpm & (FEXP_NSHELL | FEXP_NVAR)) != FEXP_NVAR &&
965 ((fexpm & FEXP_NSHELL) ? (strchr(res, '$') != NULL)
966 : n_anyof_cp("{}[]*?$", res))){
967 bool_t doexp;
969 if(fexpm & FEXP_NOPROTO)
970 doexp = TRU1;
971 else{
972 cp = haveproto ? savecat(savestrbuf(proto.s, proto.l), res) : res;
974 switch(which_protocol(cp, TRU1, FAL0, NULL)){
975 case PROTO_FILE:
976 case PROTO_MAILDIR:
977 doexp = TRU1;
978 break;
979 default:
980 doexp = FAL0;
981 break;
985 if(doexp){
986 struct str shin;
987 struct n_string shou, *shoup;
989 shin.s = n_UNCONST(res);
990 shin.l = UIZ_MAX;
991 shoup = n_string_creat_auto(&shou);
992 for(;;){
993 enum n_shexp_state shs;
995 /* TODO shexp: take care to not include backtick eval once avail! */
996 shs = n_shexp_parse_token((n_SHEXP_PARSE_LOG_D_V |
997 n_SHEXP_PARSE_QUOTE_AUTO_FIXED | n_SHEXP_PARSE_QUOTE_AUTO_DQ |
998 n_SHEXP_PARSE_QUOTE_AUTO_CLOSE), shoup, &shin, NULL);
999 if(shs & n_SHEXP_STATE_STOP)
1000 break;
1002 res = n_string_cp(shoup);
1003 /*shoup = n_string_drop_ownership(shoup);*/
1004 dyn = TRU1;
1006 if(res[0] == '~')
1007 res = a_shexp_tilde(res);
1009 if(!(fexpm & FEXP_NSHELL) &&
1010 (res = a_shexp_globname(res, fexpm)) == NULL)
1011 goto jleave;
1012 dyn = TRU1;
1013 }/* else no tilde */
1014 }else if(res[0] == '~'){
1015 res = a_shexp_tilde(res);
1016 dyn = TRU1;
1019 jislocal:
1020 if(res != NULL && haveproto){
1021 res = savecat(savestrbuf(proto.s, proto.l), res);
1022 dyn = TRU1;
1025 if(fexpm & FEXP_LOCAL){
1026 switch (which_protocol(res, FAL0, FAL0, NULL)) {
1027 case PROTO_FILE:
1028 case PROTO_MAILDIR: /* Cannot happen since we don't stat(2), but.. */
1029 break;
1030 default:
1031 n_err(_("Not a local file or directory: %s\n"),
1032 n_shexp_quote_cp(name, FAL0));
1033 res = NULL;
1034 break;
1038 jleave:
1039 if(res != NULL && !dyn)
1040 res = savestr(res);
1041 NYD_LEAVE;
1042 return n_UNCONST(res);
1045 FL enum n_shexp_state
1046 n_shexp_parse_token(enum n_shexp_parse_flags flags, struct n_string *store,
1047 struct str *input, void const **cookie){
1048 /* TODO shexp_parse_token: WCHAR
1049 * TODO This needs to be rewritten in order to support $(( )) and $( )
1050 * TODO and ${xyYZ} and the possibly infinite recursion they bring along,
1051 * TODO too. We need a carrier struct, then, and can nicely split this
1052 * TODO big big thing up in little pieces!
1053 * TODO This means it should produce a tree of objects, so that callees
1054 * TODO can recognize whether something happened inside single/double etc.
1055 * TODO quotes; e.g., to requote "'[a-z]'" to, e.g., "\[a-z]", etc.! */
1056 ui32_t last_known_meta_trim_len;
1057 char c2, c, quotec, utf[8];
1058 enum n_shexp_state rv;
1059 size_t i, il;
1060 char const *ifs, *ifs_ws, *ib_save, *ib;
1061 enum{
1062 a_NONE = 0,
1063 a_SKIPQ = 1u<<0, /* Skip rest of this quote (\u0 ..) */
1064 a_SKIPT = 1u<<1, /* Skip entire token (\c@) */
1065 a_SKIPMASK = a_SKIPQ | a_SKIPT,
1066 a_SURPLUS = 1u<<2, /* Extended sequence interpretation */
1067 a_NTOKEN = 1u<<3, /* "New token": e.g., comments are possible */
1068 a_BRACE = 1u<<4, /* Variable substitution: brace enclosed */
1069 a_DIGIT1 = 1u<<5, /* ..first character was digit */
1070 a_NONDIGIT = 1u<<6, /* ..has seen any non-digits */
1071 a_VARSUBST_MASK = n_BITENUM_MASK(4, 6),
1073 a_ROUND_MASK = a_SKIPT | (int)~n_BITENUM_MASK(0, 7),
1074 a_COOKIE = 1u<<8,
1075 a_EXPLODE = 1u<<9,
1076 a_CONSUME = 1u<<10, /* When done, "consume" remaining input */
1077 a_TMP = 1u<<30
1078 } state;
1079 NYD2_ENTER;
1081 assert((flags & n_SHEXP_PARSE_DRYRUN) || store != NULL);
1082 assert(input != NULL);
1083 assert(input->l == 0 || input->s != NULL);
1084 assert(!(flags & n_SHEXP_PARSE_LOG) || !(flags & n_SHEXP_PARSE_LOG_D_V));
1085 assert(!(flags & n_SHEXP_PARSE_IFS_ADD_COMMA) ||
1086 !(flags & n_SHEXP_PARSE_IFS_IS_COMMA));
1087 assert(!(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED) ||
1088 (flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK));
1090 if((flags & n_SHEXP_PARSE_LOG_D_V) && (n_poption & n_PO_D_V))
1091 flags |= n_SHEXP_PARSE_LOG;
1092 if(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)
1093 flags |= n_SHEXP_PARSE_QUOTE_AUTO_CLOSE;
1095 if((flags & n_SHEXP_PARSE_TRUNC) && store != NULL)
1096 store = n_string_trunc(store, 0);
1098 if(flags & (n_SHEXP_PARSE_IFS_VAR | n_SHEXP_PARSE_TRIM_IFSSPACE)){
1099 ifs = ok_vlook(ifs);
1100 ifs_ws = ok_vlook(ifs_ws);
1101 }else{
1102 n_UNINIT(ifs, n_empty);
1103 n_UNINIT(ifs_ws, n_empty);
1106 state = a_NONE;
1107 ib = input->s;
1108 if((il = input->l) == UIZ_MAX)
1109 input->l = il = strlen(ib);
1110 n_UNINIT(c, '\0');
1112 if(cookie != NULL && *cookie != NULL){
1113 assert(!(flags & n_SHEXP_PARSE_DRYRUN));
1114 state |= a_COOKIE;
1117 rv = n_SHEXP_STATE_NONE;
1118 jrestart_empty:
1119 rv &= n_SHEXP_STATE_WS_LEAD;
1120 state &= a_ROUND_MASK;
1122 /* In cookie mode, the next ARGV entry is the token already, unchanged,
1123 * since it has already been expanded before! */
1124 if(state & a_COOKIE){
1125 char const * const *xcookie, *cp;
1127 i = store->s_len;
1128 xcookie = *cookie;
1129 if((store = n_string_push_cp(store, *xcookie))->s_len > 0)
1130 rv |= n_SHEXP_STATE_OUTPUT;
1131 if(*++xcookie == NULL){
1132 *cookie = NULL;
1133 state &= ~a_COOKIE;
1134 flags |= n_SHEXP_PARSE_QUOTE_AUTO_DQ; /* ..why we are here! */
1135 }else
1136 *cookie = n_UNCONST(xcookie);
1138 for(cp = &n_string_cp(store)[i]; (c = *cp++) != '\0';)
1139 if(cntrlchar(c)){
1140 rv |= n_SHEXP_STATE_CONTROL;
1141 break;
1144 /* The last exploded cookie will join with the yielded input token, so
1145 * simply fall through in this case */
1146 if(state & a_COOKIE)
1147 goto jleave_quick;
1148 }else{
1149 jrestart:
1150 if(flags & n_SHEXP_PARSE_TRIM_SPACE){
1151 for(; il > 0; ++ib, --il){
1152 if(!blankspacechar(*ib))
1153 break;
1154 rv |= n_SHEXP_STATE_WS_LEAD;
1158 if(flags & n_SHEXP_PARSE_TRIM_IFSSPACE){
1159 for(; il > 0; ++ib, --il){
1160 if(strchr(ifs_ws, *ib) == NULL)
1161 break;
1162 rv |= n_SHEXP_STATE_WS_LEAD;
1166 input->s = n_UNCONST(ib);
1167 input->l = il;
1170 if(il == 0){
1171 rv |= n_SHEXP_STATE_STOP;
1172 goto jleave;
1175 if(store != NULL)
1176 store = n_string_reserve(store, n_MIN(il, 32)); /* XXX */
1178 switch(flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK){
1179 case n_SHEXP_PARSE_QUOTE_AUTO_SQ:
1180 quotec = '\'';
1181 rv |= n_SHEXP_STATE_QUOTE;
1182 break;
1183 case n_SHEXP_PARSE_QUOTE_AUTO_DQ:
1184 quotec = '"';
1185 if(0){
1186 case n_SHEXP_PARSE_QUOTE_AUTO_DSQ:
1187 quotec = '\'';
1189 rv |= n_SHEXP_STATE_QUOTE;
1190 state |= a_SURPLUS;
1191 break;
1192 default:
1193 quotec = '\0';
1194 state |= a_NTOKEN;
1195 break;
1198 /* TODO n_SHEXP_PARSE_META_SEMICOLON++, well, hack: we are not the shell,
1199 * TODO we are not a language, and therefore the general *ifs-ws* and normal
1200 * TODO whitespace trimming that input lines undergo (in a_go_evaluate())
1201 * TODO has already happened, our result will be used *as is*, and therefore
1202 * TODO we need to be aware of and remove trailing unquoted WS that would
1203 * TODO otherwise remain, after we have seen a semicolon sequencer.
1204 * By sheer luck we only need to track this in non-quote-mode */
1205 last_known_meta_trim_len = UI32_MAX;
1207 while(il > 0){ /* {{{ */
1208 --il, c = *ib++;
1210 /* If no quote-mode active.. */
1211 if(quotec == '\0'){
1212 if(c == '"' || c == '\''){
1213 quotec = c;
1214 if(c == '"')
1215 state |= a_SURPLUS;
1216 else
1217 state &= ~a_SURPLUS;
1218 state &= ~a_NTOKEN;
1219 last_known_meta_trim_len = UI32_MAX;
1220 rv |= n_SHEXP_STATE_QUOTE;
1221 continue;
1222 }else if(c == '$'){
1223 if(il > 0){
1224 state &= ~a_NTOKEN;
1225 last_known_meta_trim_len = UI32_MAX;
1226 if(*ib == '\''){
1227 --il, ++ib;
1228 quotec = '\'';
1229 state |= a_SURPLUS;
1230 rv |= n_SHEXP_STATE_QUOTE;
1231 continue;
1232 }else
1233 goto J_var_expand;
1235 }else if(c == '\\'){
1236 /* Outside of quotes this just escapes any next character, but a sole
1237 * <reverse solidus> at EOS is left unchanged */
1238 if(il > 0)
1239 --il, c = *ib++;
1240 state &= ~a_NTOKEN;
1241 last_known_meta_trim_len = UI32_MAX;
1243 /* A comment may it be if no token has yet started */
1244 else if(c == '#' && (state & a_NTOKEN)){
1245 rv |= n_SHEXP_STATE_STOP;
1246 /*last_known_meta_trim_len = UI32_MAX;*/
1247 goto jleave;
1249 /* Metacharacters which separate tokens must be turned on explicitly */
1250 else if(c == '|' && (flags & n_SHEXP_PARSE_META_VERTBAR)){
1251 rv |= n_SHEXP_STATE_META_VERTBAR;
1253 /* The parsed sequence may be _the_ output, so ensure we don't
1254 * include the metacharacter, then. */
1255 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1256 ++il, --ib;
1257 /*last_known_meta_trim_len = UI32_MAX;*/
1258 break;
1259 }else if(c == '&' && (flags & n_SHEXP_PARSE_META_AMPERSAND)){
1260 rv |= n_SHEXP_STATE_META_AMPERSAND;
1262 /* The parsed sequence may be _the_ output, so ensure we don't
1263 * include the metacharacter, then. */
1264 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1265 ++il, --ib;
1266 /*last_known_meta_trim_len = UI32_MAX;*/
1267 break;
1268 }else if(c == ';' && (flags & n_SHEXP_PARSE_META_SEMICOLON)){
1269 if(il > 0)
1270 n_go_input_inject(n_GO_INPUT_INJECT_COMMIT, ib, il);
1271 rv |= n_SHEXP_STATE_META_SEMICOLON | n_SHEXP_STATE_STOP;
1272 state |= a_CONSUME;
1273 if(!(flags & n_SHEXP_PARSE_DRYRUN) && (rv & n_SHEXP_STATE_OUTPUT) &&
1274 last_known_meta_trim_len != UI32_MAX)
1275 store = n_string_trunc(store, last_known_meta_trim_len);
1277 /* The parsed sequence may be _the_ output, so ensure we don't
1278 * include the metacharacter, then. */
1279 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1280 ++il, --ib;
1281 /*last_known_meta_trim_len = UI32_MAX;*/
1282 break;
1283 }else if(c == ',' && (flags &
1284 (n_SHEXP_PARSE_IFS_ADD_COMMA | n_SHEXP_PARSE_IFS_IS_COMMA))){
1285 /* The parsed sequence may be _the_ output, so ensure we don't
1286 * include the metacharacter, then. */
1287 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1288 ++il, --ib;
1289 /*last_known_meta_trim_len = UI32_MAX;*/
1290 break;
1291 }else{
1292 ui8_t blnk;
1294 blnk = blankchar(c) ? 1 : 0;
1295 blnk |= ((flags & (n_SHEXP_PARSE_IFS_VAR |
1296 n_SHEXP_PARSE_TRIM_IFSSPACE)) &&
1297 strchr(ifs_ws, c) != NULL) ? 2 : 0;
1299 if((!(flags & n_SHEXP_PARSE_IFS_VAR) && (blnk & 1)) ||
1300 ((flags & n_SHEXP_PARSE_IFS_VAR) &&
1301 ((blnk & 2) || strchr(ifs, c) != NULL))){
1302 if(!(flags & n_SHEXP_PARSE_IFS_IS_COMMA)){
1303 /* The parsed sequence may be _the_ output, so ensure we don't
1304 * include the metacharacter, then. */
1305 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1306 ++il, --ib;
1307 /*last_known_meta_trim_len = UI32_MAX;*/
1308 break;
1310 state |= a_NTOKEN;
1311 }else
1312 state &= ~a_NTOKEN;
1314 if(blnk && store != NULL){
1315 if(last_known_meta_trim_len == UI32_MAX)
1316 last_known_meta_trim_len = store->s_len;
1317 }else
1318 last_known_meta_trim_len = UI32_MAX;
1320 }else{
1321 /* Quote-mode */
1322 assert(!(state & a_NTOKEN));
1323 if(c == quotec && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)){
1324 state &= a_ROUND_MASK;
1325 quotec = '\0';
1326 /* Users may need to recognize the presence of empty quotes */
1327 rv |= n_SHEXP_STATE_OUTPUT;
1328 continue;
1329 }else if(c == '\\' && (state & a_SURPLUS)){
1330 ib_save = ib - 1;
1331 /* A sole <reverse solidus> at EOS is treated as-is! This is ok
1332 * since the "closing quote" error will occur next, anyway */
1333 if(il == 0)
1335 else if((c2 = *ib) == quotec){
1336 --il, ++ib;
1337 c = quotec;
1338 }else if(quotec == '"'){
1339 /* Double quotes, POSIX says:
1340 * The <backslash> shall retain its special meaning as an
1341 * escape character (see Section 2.2.1) only when followed
1342 * by one of the following characters when considered
1343 * special: $ ` " \ <newline> */
1344 switch(c2){
1345 case '$':
1346 case '`':
1347 /* case '"': already handled via c2 == quotec */
1348 case '\\':
1349 --il, ++ib;
1350 c = c2;
1351 /* FALLTHRU */
1352 default:
1353 break;
1355 }else{
1356 /* Dollar-single-quote */
1357 --il, ++ib;
1358 switch(c2){
1359 case '"':
1360 /* case '\'': already handled via c2 == quotec */
1361 case '\\':
1362 c = c2;
1363 break;
1365 case 'b': c = '\b'; break;
1366 case 'f': c = '\f'; break;
1367 case 'n': c = '\n'; break;
1368 case 'r': c = '\r'; break;
1369 case 't': c = '\t'; break;
1370 case 'v': c = '\v'; break;
1372 case 'E':
1373 case 'e': c = '\033'; break;
1375 /* Control character */
1376 case 'c':
1377 if(il == 0)
1378 goto j_dollar_ungetc;
1379 --il, c2 = *ib++;
1380 if(state & a_SKIPMASK)
1381 continue;
1382 /* ASCII C0: 0..1F, 7F <- @.._ (+ a-z -> A-Z), ? */
1383 c = upperconv(c2) ^ 0x40;
1384 if((ui8_t)c > 0x1F && c != 0x7F){
1385 if(flags & n_SHEXP_PARSE_LOG)
1386 n_err(_("Invalid \\c notation: %.*s: %.*s\n"),
1387 (int)input->l, input->s,
1388 (int)PTR2SIZE(ib - ib_save), ib_save);
1389 rv |= n_SHEXP_STATE_ERR_CONTROL;
1391 /* As an implementation-defined extension, support \c@
1392 * EQ printf(1) alike \c */
1393 if(c == '\0'){
1394 state |= a_SKIPT;
1395 continue;
1397 break;
1399 /* Octal sequence: 1 to 3 octal bytes */
1400 case '0':
1401 /* As an extension (dependent on where you look, echo(1), or
1402 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1403 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1404 c2 = c;
1405 --il, ++ib;
1407 /* FALLTHRU */
1408 case '1': case '2': case '3':
1409 case '4': case '5': case '6': case '7':
1410 c2 -= '0';
1411 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1412 c2 = (c2 << 3) | (c - '0');
1413 --il, ++ib;
1415 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1416 if(!(state & a_SKIPMASK) && (ui8_t)c2 > 0x1F){
1417 rv |= n_SHEXP_STATE_ERR_NUMBER;
1418 --il, ++ib;
1419 if(flags & n_SHEXP_PARSE_LOG)
1420 n_err(_("\\0 argument exceeds a byte: %.*s: %.*s\n"),
1421 (int)input->l, input->s,
1422 (int)PTR2SIZE(ib - ib_save), ib_save);
1423 /* Write unchanged */
1424 jerr_ib_save:
1425 rv |= n_SHEXP_STATE_OUTPUT;
1426 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1427 store = n_string_push_buf(store, ib_save,
1428 PTR2SIZE(ib - ib_save));
1429 continue;
1431 c2 = (c2 << 3) | (c -= '0');
1432 --il, ++ib;
1434 if(state & a_SKIPMASK)
1435 continue;
1436 if((c = c2) == '\0'){
1437 state |= a_SKIPQ;
1438 continue;
1440 break;
1442 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1443 case 'U':
1444 i = 8;
1445 if(0){
1446 /* FALLTHRU */
1447 case 'u':
1448 i = 4;
1450 if(il == 0)
1451 goto j_dollar_ungetc;
1452 if(0){
1453 /* FALLTHRU */
1455 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1456 case 'X':
1457 case 'x':
1458 if(il == 0)
1459 goto j_dollar_ungetc;
1460 i = 2;
1462 /* C99 */{
1463 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
1464 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1466 size_t no, j;
1468 i = n_MIN(il, i);
1469 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
1470 c = *ib;
1471 if(hexchar(c)){
1472 no <<= 4;
1473 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
1474 : ((c) <= 'F' ? 55 : 87)))];
1475 }else if(j == 0){
1476 if(state & a_SKIPMASK)
1477 break;
1478 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
1479 if(flags & n_SHEXP_PARSE_LOG)
1480 n_err(_("Invalid \\%c notation: %.*s: %.*s\n"),
1481 c2, (int)input->l, input->s,
1482 (int)PTR2SIZE(ib - ib_save), ib_save);
1483 rv |= n_SHEXP_STATE_ERR_NUMBER;
1484 goto jerr_ib_save;
1485 }else
1486 break;
1489 /* Unicode massage */
1490 if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){
1491 if((c = (char)no) == '\0')
1492 state |= a_SKIPQ;
1493 }else if(no == 0)
1494 state |= a_SKIPQ;
1495 else if(!(state & a_SKIPMASK)){
1496 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1497 store = n_string_reserve(store, n_MAX(j, 4));
1499 if(no > 0x10FFFF){ /* XXX magic; CText */
1500 if(flags & n_SHEXP_PARSE_LOG)
1501 n_err(_("\\U argument exceeds 0x10FFFF: %.*s: "
1502 "%.*s\n"),
1503 (int)input->l, input->s,
1504 (int)PTR2SIZE(ib - ib_save), ib_save);
1505 rv |= n_SHEXP_STATE_ERR_NUMBER;
1506 /* But normalize the output anyway */
1507 goto Jerr_uni_norm;
1510 j = n_utf32_to_utf8(no, utf);
1512 if(n_psonce & n_PSO_UNICODE){
1513 rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE;
1514 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1515 store = n_string_push_buf(store, utf, j);
1516 continue;
1518 #ifdef HAVE_ICONV
1519 else{
1520 char *icp;
1522 icp = n_iconv_onetime_cp(n_ICONV_NONE,
1523 NULL, NULL, utf);
1524 if(icp != NULL){
1525 rv |= n_SHEXP_STATE_OUTPUT;
1526 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1527 store = n_string_push_cp(store, icp);
1528 continue;
1531 #endif
1532 if(!(flags & n_SHEXP_PARSE_DRYRUN)) Jerr_uni_norm:{
1533 char itoa[32];
1535 rv |= n_SHEXP_STATE_OUTPUT |
1536 n_SHEXP_STATE_ERR_UNICODE;
1537 i = snprintf(itoa, sizeof itoa, "\\%c%0*X",
1538 (no > 0xFFFFu ? 'U' : 'u'),
1539 (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no);
1540 store = n_string_push_buf(store, itoa, i);
1542 continue;
1544 if(state & a_SKIPMASK)
1545 continue;
1547 break;
1549 /* Extension: \$ can be used to expand a variable.
1550 * B(ug|ad) effect: if conversion fails, not written "as-is" */
1551 case '$':
1552 if(il == 0)
1553 goto j_dollar_ungetc;
1554 goto J_var_expand;
1556 default:
1557 j_dollar_ungetc:
1558 /* Follow bash(1) behaviour, print sequence unchanged */
1559 ++il, --ib;
1560 break;
1563 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
1564 state &= ~a_VARSUBST_MASK;
1565 if(*ib == '{')
1566 state |= a_BRACE;
1568 /* Scan variable name */
1569 if(!(state & a_BRACE) || il > 1){
1570 char const *cp, *vp;
1572 ib_save = ib - 1;
1573 if(state & a_BRACE)
1574 --il, ++ib;
1575 vp = ib;
1576 state &= ~a_EXPLODE;
1578 for(i = 0; il > 0; --il, ++ib, ++i){
1579 /* We have some special cases regarding special parameters,
1580 * so ensure these don't cause failure. This code has
1581 * counterparts in code that manages internal variables! */
1582 c = *ib;
1583 if(!a_SHEXP_ISVARC(c)){
1584 if(i == 0){
1585 /* Simply skip over multiplexer */
1586 if(c == '^')
1587 continue;
1588 if(c == '*' || c == '@' || c == '#' || c == '?' ||
1589 c == '!'){
1590 if(c == '@'){
1591 if(quotec == '"')
1592 state |= a_EXPLODE;
1594 --il, ++ib;
1595 ++i;
1598 break;
1599 }else if(a_SHEXP_ISVARC_BAD1ST(c)){
1600 if(i == 0)
1601 state |= a_DIGIT1;
1602 }else
1603 state |= a_NONDIGIT;
1606 /* In skip mode, be easy and.. skip over */
1607 if(state & a_SKIPMASK){
1608 if((state & a_BRACE) && il > 0 && *ib == '}')
1609 --il, ++ib;
1610 continue;
1613 /* Handle the scan error cases */
1614 if((state & (a_DIGIT1 | a_NONDIGIT)) == (a_DIGIT1 | a_NONDIGIT)){
1615 if(state & a_BRACE){
1616 if(il > 0 && *ib == '}')
1617 --il, ++ib;
1618 else
1619 rv |= n_SHEXP_STATE_ERR_GROUPOPEN;
1621 if(flags & n_SHEXP_PARSE_LOG)
1622 n_err(_("Invalid identifier for ${}: %.*s: %.*s\n"),
1623 (int)input->l, input->s,
1624 (int)PTR2SIZE(ib - ib_save), ib_save);
1625 rv |= n_SHEXP_STATE_ERR_IDENTIFIER;
1626 goto jerr_ib_save;
1627 }else if(i == 0){
1628 if(state & a_BRACE){
1629 if(il == 0 || *ib != '}'){
1630 if(flags & n_SHEXP_PARSE_LOG)
1631 n_err(_("No closing brace for ${}: %.*s: %.*s\n"),
1632 (int)input->l, input->s,
1633 (int)PTR2SIZE(ib - ib_save), ib_save);
1634 rv |= n_SHEXP_STATE_ERR_GROUPOPEN;
1635 goto jerr_ib_save;
1637 --il, ++ib;
1639 if(i == 0){
1640 if(flags & n_SHEXP_PARSE_LOG)
1641 n_err(_("Bad substitution for ${}: %.*s: %.*s\n"),
1642 (int)input->l, input->s,
1643 (int)PTR2SIZE(ib - ib_save), ib_save);
1644 rv |= n_SHEXP_STATE_ERR_BADSUB;
1645 goto jerr_ib_save;
1648 /* Simply write dollar as-is? */
1649 c = '$';
1650 }else{
1651 if(state & a_BRACE){
1652 if(il == 0 || *ib != '}'){
1653 if(flags & n_SHEXP_PARSE_LOG)
1654 n_err(_("No closing brace for ${}: %.*s: %.*s\n"),
1655 (int)input->l, input->s,
1656 (int)PTR2SIZE(ib - ib_save), ib_save);
1657 rv |= n_SHEXP_STATE_ERR_GROUPOPEN;
1658 goto jerr_ib_save;
1660 --il, ++ib;
1662 if(i == 0){
1663 if(flags & n_SHEXP_PARSE_LOG)
1664 n_err(_("Bad substitution for ${}: %.*s: %.*s\n"),
1665 (int)input->l, input->s,
1666 (int)PTR2SIZE(ib - ib_save), ib_save);
1667 rv |= n_SHEXP_STATE_ERR_BADSUB;
1668 goto jerr_ib_save;
1672 if(flags & n_SHEXP_PARSE_DRYRUN)
1673 continue;
1675 /* We may shall explode "${@}" to a series of successive,
1676 * properly quoted tokens (instead). The first exploded
1677 * cookie will join with the current token */
1678 if(n_UNLIKELY(state & a_EXPLODE) &&
1679 !(flags & n_SHEXP_PARSE_DRYRUN) && cookie != NULL){
1680 if(n_var_vexplode(cookie))
1681 state |= a_COOKIE;
1682 /* On the other hand, if $@ expands to nothing and is the
1683 * sole content of this quote then act like the shell does
1684 * and throw away the entire atxplode construct */
1685 else if(!(rv & n_SHEXP_STATE_OUTPUT) &&
1686 il == 1 && *ib == '"' &&
1687 ib_save == &input->s[1] && ib_save[-1] == '"')
1688 ++ib, --il;
1689 else
1690 continue;
1691 input->s = n_UNCONST(ib);
1692 input->l = il;
1693 goto jrestart_empty;
1696 /* Check getenv(3) shall no internal variable exist!
1697 * XXX We have some common idioms, avoid memory for them
1698 * XXX Even better would be var_vlook_buf()! */
1699 if(i == 1){
1700 switch(*vp){
1701 case '?': vp = n_qm; break;
1702 case '!': vp = n_em; break;
1703 case '*': vp = n_star; break;
1704 case '@': vp = n_at; break;
1705 case '#': vp = n_ns; break;
1706 default: goto j_var_look_buf;
1708 }else
1709 j_var_look_buf:
1710 vp = savestrbuf(vp, i);
1712 if((cp = n_var_vlook(vp, TRU1)) != NULL){
1713 rv |= n_SHEXP_STATE_OUTPUT;
1714 store = n_string_push_cp(store, cp);
1715 for(; (c = *cp) != '\0'; ++cp)
1716 if(cntrlchar(c)){
1717 rv |= n_SHEXP_STATE_CONTROL;
1718 break;
1721 continue;
1724 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
1725 continue;
1729 if(!(state & a_SKIPMASK)){
1730 rv |= n_SHEXP_STATE_OUTPUT;
1731 if(cntrlchar(c))
1732 rv |= n_SHEXP_STATE_CONTROL;
1733 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1734 store = n_string_push_c(store, c);
1736 } /* }}} */
1738 if(quotec != '\0' && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_CLOSE)){
1739 if(flags & n_SHEXP_PARSE_LOG)
1740 n_err(_("No closing quote: %.*s\n"), (int)input->l, input->s);
1741 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
1744 jleave:
1745 assert(!(state & a_COOKIE));
1746 if((flags & n_SHEXP_PARSE_DRYRUN) && store != NULL){
1747 store = n_string_push_buf(store, input->s, PTR2SIZE(ib - input->s));
1748 rv |= n_SHEXP_STATE_OUTPUT;
1751 if(state & a_CONSUME){
1752 input->s = n_UNCONST(&ib[il]);
1753 input->l = 0;
1754 }else{
1755 if(flags & n_SHEXP_PARSE_TRIM_SPACE){
1756 for(; il > 0; ++ib, --il){
1757 if(!blankspacechar(*ib))
1758 break;
1759 rv |= n_SHEXP_STATE_WS_TRAIL;
1763 if(flags & n_SHEXP_PARSE_TRIM_IFSSPACE){
1764 for(; il > 0; ++ib, --il){
1765 if(strchr(ifs_ws, *ib) == NULL)
1766 break;
1767 rv |= n_SHEXP_STATE_WS_TRAIL;
1771 input->l = il;
1772 input->s = n_UNCONST(ib);
1775 if(!(rv & n_SHEXP_STATE_STOP)){
1776 if(!(rv & (n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_META_MASK)) &&
1777 (flags & n_SHEXP_PARSE_IGNORE_EMPTY) && il > 0)
1778 goto jrestart_empty;
1779 if(/*!(rv & n_SHEXP_STATE_OUTPUT) &&*/ il == 0)
1780 rv |= n_SHEXP_STATE_STOP;
1783 if((state & a_SKIPT) && !(rv & n_SHEXP_STATE_STOP) &&
1784 (flags & n_SHEXP_PARSE_META_MASK))
1785 goto jrestart;
1786 jleave_quick:
1787 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_UNICODE));
1788 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_CONTROL));
1789 NYD2_LEAVE;
1790 return rv;
1793 FL char *
1794 n_shexp_parse_token_cp(enum n_shexp_parse_flags flags, char const **cp){
1795 struct str input;
1796 struct n_string sou, *soup;
1797 char *rv;
1798 enum n_shexp_state shs;
1799 NYD2_ENTER;
1801 assert(cp != NULL);
1803 input.s = n_UNCONST(*cp);
1804 input.l = UIZ_MAX;
1805 soup = n_string_creat_auto(&sou);
1807 shs = n_shexp_parse_token(flags, soup, &input, NULL);
1808 if(shs & n_SHEXP_STATE_ERR_MASK){
1809 soup = n_string_assign_cp(soup, *cp);
1810 *cp = NULL;
1811 }else
1812 *cp = input.s;
1814 rv = n_string_cp(soup);
1815 /*n_string_gut(n_string_drop_ownership(soup));*/
1816 NYD2_LEAVE;
1817 return rv;
1820 FL struct n_string *
1821 n_shexp_quote(struct n_string *store, struct str const *input, bool_t rndtrip){
1822 struct a_shexp_quote_lvl sql;
1823 struct a_shexp_quote_ctx sqc;
1824 NYD2_ENTER;
1826 assert(store != NULL);
1827 assert(input != NULL);
1828 assert(input->l == 0 || input->s != NULL);
1830 memset(&sqc, 0, sizeof sqc);
1831 sqc.sqc_store = store;
1832 sqc.sqc_input.s = input->s;
1833 if((sqc.sqc_input.l = input->l) == UIZ_MAX)
1834 sqc.sqc_input.l = strlen(input->s);
1835 sqc.sqc_flags = rndtrip ? a_SHEXP_QUOTE_ROUNDTRIP : a_SHEXP_QUOTE_NONE;
1837 if(sqc.sqc_input.l == 0)
1838 store = n_string_push_buf(store, "''", sizeof("''") -1);
1839 else{
1840 memset(&sql, 0, sizeof sql);
1841 sql.sql_dat = sqc.sqc_input;
1842 sql.sql_flags = sqc.sqc_flags;
1843 a_shexp__quote(&sqc, &sql);
1845 NYD2_LEAVE;
1846 return store;
1849 FL char *
1850 n_shexp_quote_cp(char const *cp, bool_t rndtrip){
1851 struct n_string store;
1852 struct str input;
1853 char *rv;
1854 NYD2_ENTER;
1856 assert(cp != NULL);
1858 input.s = n_UNCONST(cp);
1859 input.l = UIZ_MAX;
1860 rv = n_string_cp(n_shexp_quote(n_string_creat_auto(&store), &input,
1861 rndtrip));
1862 n_string_gut(n_string_drop_ownership(&store));
1863 NYD2_LEAVE;
1864 return rv;
1867 FL bool_t
1868 n_shexp_is_valid_varname(char const *name){
1869 char lc, c;
1870 bool_t rv;
1871 NYD2_ENTER;
1873 rv = FAL0;
1875 for(lc = '\0'; (c = *name++) != '\0'; lc = c)
1876 if(!a_SHEXP_ISVARC(c))
1877 goto jleave;
1878 else if(lc == '\0' && a_SHEXP_ISVARC_BAD1ST(c))
1879 goto jleave;
1880 if(a_SHEXP_ISVARC_BADNST(lc))
1881 goto jleave;
1883 rv = TRU1;
1884 jleave:
1885 NYD2_LEAVE;
1886 return rv;
1889 FL int
1890 c_shcodec(void *vp){
1891 struct str in;
1892 struct n_string sou_b, *soup;
1893 si32_t nerrn;
1894 size_t alen;
1895 bool_t norndtrip;
1896 char const **argv, *varname, *act, *cp;
1898 soup = n_string_creat_auto(&sou_b);
1899 argv = vp;
1900 varname = (n_pstate & n_PS_ARGMOD_VPUT) ? *argv++ : NULL;
1902 act = *argv;
1903 for(cp = act; *cp != '\0' && !blankspacechar(*cp); ++cp)
1905 if((norndtrip = (*act == '+')))
1906 ++act;
1907 if(act == cp)
1908 goto jesynopsis;
1909 alen = PTR2SIZE(cp - act);
1910 if(*cp != '\0')
1911 ++cp;
1913 in.l = strlen(in.s = n_UNCONST(cp));
1914 nerrn = n_ERR_NONE;
1916 if(is_ascncaseprefix(act, "encode", alen))
1917 soup = n_shexp_quote(soup, &in, !norndtrip);
1918 else if(!norndtrip && is_ascncaseprefix(act, "decode", alen)){
1919 for(;;){
1920 enum n_shexp_state shs;
1922 shs = n_shexp_parse_token((n_SHEXP_PARSE_LOG |
1923 n_SHEXP_PARSE_IGNORE_EMPTY), soup, &in, NULL);
1924 if(shs & n_SHEXP_STATE_ERR_MASK){
1925 soup = n_string_assign_cp(soup, cp);
1926 nerrn = n_ERR_CANCELED;
1927 vp = NULL;
1928 break;
1930 if(shs & n_SHEXP_STATE_STOP)
1931 break;
1933 }else
1934 goto jesynopsis;
1936 if(varname != NULL){
1937 cp = n_string_cp(soup);
1938 if(!n_var_vset(varname, (uintptr_t)cp)){
1939 nerrn = n_ERR_NOTSUP;
1940 vp = NULL;
1942 }else{
1943 struct str out;
1945 in.s = n_string_cp(soup);
1946 in.l = soup->s_len;
1947 makeprint(&in, &out);
1948 if(fprintf(n_stdout, "%s\n", out.s) < 0){
1949 nerrn = n_err_no;
1950 vp = NULL;
1952 free(out.s);
1955 jleave:
1956 n_pstate_err_no = nerrn;
1957 NYD_LEAVE;
1958 return (vp != NULL ? 0 : 1);
1959 jesynopsis:
1960 n_err(_("Synopsis: shcodec: <[+]e[ncode]|d[ecode]> <rest-of-line>\n"));
1961 nerrn = n_ERR_INVAL;
1962 vp = NULL;
1963 goto jleave;
1966 /* s-it-mode */