README: review
[s-mailx.git] / shexp.c
blob461db89d8996ae15d8c16094eb6d7b47eb0ec738
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Shell "word", file- and other name expansions, incl. file globbing.
3 *@ TODO v15: peek signal states while opendir/readdir/etc.
4 *@ TODO "Magic solidus" used as path separator.
6 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
7 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
8 * SPDX-License-Identifier: BSD-3-Clause TODO ISC
9 */
11 * Copyright (c) 1980, 1993
12 * The Regents of the University of California. All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
38 #undef n_FILE
39 #define n_FILE shexp
41 #ifndef HAVE_AMALGAMATION
42 # include "nail.h"
43 #endif
45 #include <pwd.h>
47 #ifdef HAVE_FNMATCH
48 # include <dirent.h>
49 # include <fnmatch.h>
50 #endif
52 /* POSIX says
53 * Environment variable names used by the utilities in the Shell and
54 * Utilities volume of POSIX.1-2008 consist solely of uppercase
55 * letters, digits, and the <underscore> ('_') from the characters
56 * defined in Portable Character Set and do not begin with a digit.
57 * Other characters may be permitted by an implementation;
58 * applications shall tolerate the presence of such names.
59 * We do support the hyphen-minus "-" (except in last position for ${x[:]-y}).
60 * We support some special parameter names for one-letter(++) variable names;
61 * these have counterparts in the code that manages internal variables,
62 * and some more special treatment below! */
63 #define a_SHEXP_ISVARC(C) (alnumchar(C) || (C) == '_' || (C) == '-')
64 #define a_SHEXP_ISVARC_BAD1ST(C) (digitchar(C)) /* (Actually assumed below!) */
65 #define a_SHEXP_ISVARC_BADNST(C) ((C) == '-')
67 enum a_shexp_quote_flags{
68 a_SHEXP_QUOTE_NONE,
69 a_SHEXP_QUOTE_ROUNDTRIP = 1u<<0, /* Result won't be consumed immediately */
71 a_SHEXP_QUOTE_T_REVSOL = 1u<<8, /* Type: by reverse solidus */
72 a_SHEXP_QUOTE_T_SINGLE = 1u<<9, /* Type: single-quotes */
73 a_SHEXP_QUOTE_T_DOUBLE = 1u<<10, /* Type: double-quotes */
74 a_SHEXP_QUOTE_T_DOLLAR = 1u<<11, /* Type: dollar-single-quotes */
75 a_SHEXP_QUOTE_T_MASK = a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
76 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR,
78 a_SHEXP_QUOTE__FREESHIFT = 16u
81 #ifdef HAVE_FNMATCH
82 struct a_shexp_glob_ctx{
83 char const *sgc_patdat; /* Remaining pattern (at and below level) */
84 size_t sgc_patlen;
85 struct n_string *sgc_outer; /* Resolved path up to this level */
86 ui32_t sgc_flags;
87 ui8_t sgc__dummy[4];
89 #endif
91 struct a_shexp_quote_ctx{
92 struct n_string *sqc_store; /* Result storage */
93 struct str sqc_input; /* Input data, topmost level */
94 ui32_t sqc_cnt_revso;
95 ui32_t sqc_cnt_single;
96 ui32_t sqc_cnt_double;
97 ui32_t sqc_cnt_dollar;
98 enum a_shexp_quote_flags sqc_flags;
99 ui8_t sqc__dummy[4];
102 struct a_shexp_quote_lvl{
103 struct a_shexp_quote_lvl *sql_link; /* Outer level */
104 struct str sql_dat; /* This level (has to) handle(d) */
105 enum a_shexp_quote_flags sql_flags;
106 ui8_t sql__dummy[4];
109 /* Locate the user's mailbox file (where new, unread mail is queued) */
110 static char *a_shexp_findmail(char const *user, bool_t force);
112 /* Expand ^~/? and ^~USER/? constructs.
113 * Returns the completely resolved (maybe empty or identical to input)
114 * n_autorec_alloc()ed string */
115 static char *a_shexp_tilde(char const *s);
117 /* Perform fnmatch(3). May return NULL on error */
118 static char *a_shexp_globname(char const *name, enum fexp_mode fexpm);
119 #ifdef HAVE_FNMATCH
120 static bool_t a_shexp__glob(struct a_shexp_glob_ctx *sgcp,
121 struct n_strlist **slpp);
122 static int a_shexp__globsort(void const *cvpa, void const *cvpb);
123 #endif
125 /* Parse an input string and create a sh(1)ell-quoted result */
126 static void a_shexp__quote(struct a_shexp_quote_ctx *sqcp,
127 struct a_shexp_quote_lvl *sqlp);
129 static char *
130 a_shexp_findmail(char const *user, bool_t force){
131 char *rv;
132 char const *cp;
133 NYD2_ENTER;
135 if(!force){
136 if((cp = ok_vlook(inbox)) != NULL && *cp != '\0'){
137 /* _NFOLDER extra introduced to avoid % recursion loops */
138 if((rv = fexpand(cp, FEXP_NSPECIAL | FEXP_NFOLDER | FEXP_NSHELL)
139 ) != NULL)
140 goto jleave;
141 n_err(_("*inbox* expansion failed, using $MAIL / built-in: %s\n"), cp);
143 /* Heirloom compatibility: an IMAP *folder* becomes "%" */
144 #ifdef HAVE_IMAP
145 else if(cp == NULL && !strcmp(user, ok_vlook(LOGNAME)) &&
146 which_protocol(cp = n_folder_query(), FAL0, FAL0, NULL)
147 == PROTO_IMAP){
148 /* TODO Compat handling of *folder* with IMAP! */
149 n_OBSOLETE("no more expansion of *folder* in \"%\": "
150 "please set *inbox*");
151 rv = savestr(cp);
152 goto jleave;
154 #endif
156 if((cp = ok_vlook(MAIL)) != NULL){
157 rv = savestr(cp);
158 goto jleave;
162 /* C99 */{
163 size_t ul, i;
165 ul = strlen(user) +1;
166 i = sizeof(VAL_MAIL) -1 + 1 + ul;
168 rv = n_autorec_alloc(i);
169 memcpy(rv, VAL_MAIL, (i = sizeof(VAL_MAIL) -1));
170 rv[i] = '/';
171 memcpy(&rv[++i], user, ul);
173 jleave:
174 NYD2_LEAVE;
175 return rv;
178 static char *
179 a_shexp_tilde(char const *s){
180 struct passwd *pwp;
181 size_t nl, rl;
182 char const *rp, *np;
183 char *rv;
184 NYD2_ENTER;
186 if(*(rp = &s[1]) == '/' || *rp == '\0'){
187 np = ok_vlook(HOME);
188 rl = strlen(rp);
189 }else{
190 if((rp = strchr(np = rp, '/')) != NULL){
191 nl = PTR2SIZE(rp - np);
192 np = savestrbuf(np, nl);
193 rl = strlen(rp);
194 }else
195 rl = 0;
197 if((pwp = getpwnam(np)) == NULL){
198 rv = savestr(s);
199 goto jleave;
201 np = pwp->pw_dir;
204 nl = strlen(np);
205 rv = n_autorec_alloc(nl + 1 + rl +1);
206 memcpy(rv, np, nl);
207 if(rl > 0){
208 memcpy(rv + nl, rp, rl);
209 nl += rl;
211 rv[nl] = '\0';
212 jleave:
213 NYD2_LEAVE;
214 return rv;
217 static char *
218 a_shexp_globname(char const *name, enum fexp_mode fexpm){
219 #ifdef HAVE_FNMATCH
220 struct a_shexp_glob_ctx sgc;
221 struct n_string outer;
222 struct n_strlist *slp;
223 char *cp;
224 NYD_ENTER;
226 memset(&sgc, 0, sizeof sgc);
227 sgc.sgc_patlen = strlen(name);
228 sgc.sgc_patdat = savestrbuf(name, sgc.sgc_patlen);
229 sgc.sgc_outer = n_string_reserve(n_string_creat(&outer), sgc.sgc_patlen);
230 sgc.sgc_flags = ((fexpm & FEXP_SILENT) != 0);
231 slp = NULL;
232 if(a_shexp__glob(&sgc, &slp))
233 cp = (char*)1;
234 else
235 cp = NULL;
236 n_string_gut(&outer);
238 if(cp == NULL)
239 goto jleave;
241 if(slp == NULL){
242 cp = n_UNCONST(N_("File pattern does not match"));
243 goto jerr;
244 }else if(slp->sl_next == NULL)
245 cp = savestrbuf(slp->sl_dat, slp->sl_len);
246 else if(fexpm & FEXP_MULTIOK){
247 struct n_strlist **sorta, *xslp;
248 size_t i, no, l;
250 no = l = 0;
251 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next){
252 ++no;
253 l += xslp->sl_len + 1;
256 sorta = n_alloc(sizeof(*sorta) * no);
257 no = 0;
258 for(xslp = slp; xslp != NULL; xslp = xslp->sl_next)
259 sorta[no++] = xslp;
260 qsort(sorta, no, sizeof *sorta, &a_shexp__globsort);
262 cp = n_autorec_alloc(++l);
263 l = 0;
264 for(i = 0; i < no; ++i){
265 xslp = sorta[i];
266 memcpy(&cp[l], xslp->sl_dat, xslp->sl_len);
267 l += xslp->sl_len;
268 cp[l++] = '\0';
270 cp[l] = '\0';
272 n_free(sorta);
273 n_pstate |= n_PS_EXPAND_MULTIRESULT;
274 }else{
275 cp = n_UNCONST(N_("File pattern matches multiple results"));
276 goto jerr;
279 jleave:
280 while(slp != NULL){
281 struct n_strlist *tmp = slp;
283 slp = slp->sl_next;
284 n_free(tmp);
286 NYD_LEAVE;
287 return cp;
289 jerr:
290 if(!(fexpm & FEXP_SILENT)){
291 name = n_shexp_quote_cp(name, FAL0);
292 n_err("%s: %s\n", V_(cp), name);
294 cp = NULL;
295 goto jleave;
297 #else /* HAVE_FNMATCH */
298 n_UNUSED(fexpm);
300 if(!(fexpm & FEXP_SILENT))
301 n_err(_("No filename pattern (fnmatch(3)) support compiled in\n"));
302 return savestr(name);
303 #endif
306 #ifdef HAVE_FNMATCH
307 static bool_t
308 a_shexp__glob(struct a_shexp_glob_ctx *sgcp, struct n_strlist **slpp){
309 enum{a_SILENT = 1<<0, a_DEEP=1<<1, a_SALLOC=1<<2};
311 struct a_shexp_glob_ctx nsgc;
312 struct dirent *dep;
313 DIR *dp;
314 size_t old_outerlen;
315 char const *ccp, *myp;
316 NYD2_ENTER;
318 /* We need some special treatment for the outermost level.
319 * All along our way, normalize path separators */
320 if(!(sgcp->sgc_flags & a_DEEP)){
321 if(sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/'){
322 myp = n_string_cp(n_string_push_c(sgcp->sgc_outer, '/'));
324 ++sgcp->sgc_patdat;
325 while(--sgcp->sgc_patlen > 0 && sgcp->sgc_patdat[0] == '/');
326 }else
327 myp = "./";
328 }else
329 myp = n_string_cp(sgcp->sgc_outer);
330 old_outerlen = sgcp->sgc_outer->s_len;
332 /* Separate current directory/pattern level from any possible remaining
333 * pattern in order to be able to use it for fnmatch(3) */
334 if((ccp = memchr(sgcp->sgc_patdat, '/', sgcp->sgc_patlen)) == NULL)
335 nsgc.sgc_patlen = 0;
336 else{
337 nsgc = *sgcp;
338 nsgc.sgc_flags |= a_DEEP;
339 sgcp->sgc_patlen = PTR2SIZE((nsgc.sgc_patdat = &ccp[1]) -
340 &sgcp->sgc_patdat[0]);
341 nsgc.sgc_patlen -= sgcp->sgc_patlen;
343 /* Trim solidus, everywhere */
344 if(sgcp->sgc_patlen > 0){
345 assert(sgcp->sgc_patdat[sgcp->sgc_patlen -1] == '/');
346 ((char*)n_UNCONST(sgcp->sgc_patdat))[--sgcp->sgc_patlen] = '\0';
348 while(nsgc.sgc_patlen > 0 && nsgc.sgc_patdat[0] == '/'){
349 --nsgc.sgc_patlen;
350 ++nsgc.sgc_patdat;
354 /* Our current directory level */
355 /* xxx Plenty of room for optimizations, like quickshot lstat(2) which may
356 * xxx be the (sole) result depending on pattern surroundings, etc. */
357 if((dp = opendir(myp)) == NULL){
358 int err;
360 switch((err = n_err_no)){
361 case n_ERR_NOTDIR:
362 ccp = N_("cannot access paths under non-directory");
363 goto jerr;
364 case n_ERR_NOENT:
365 ccp = N_("path component of (sub)pattern non-existent");
366 goto jerr;
367 case n_ERR_ACCES:
368 ccp = N_("file permission for file (sub)pattern denied");
369 goto jerr;
370 case n_ERR_NFILE:
371 case n_ERR_MFILE:
372 ccp = N_("file descriptor limit reached, cannot open directory");
373 goto jerr;
374 default:
375 ccp = N_("cannot open path component as directory");
376 goto jerr;
380 /* As necessary, quote bytes in the current pattern TODO This will not
381 * TODO truly work out in case the user would try to quote a character
382 * TODO class, for example: in "\[a-z]" the "\" would be doubled! For that
383 * TODO to work out, we need the original user input or the shell-expression
384 * TODO parse tree, otherwise we do not know what is desired! */
385 /* C99 */{
386 char *ncp;
387 size_t i;
388 bool_t need;
390 for(need = FAL0, i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
391 switch(*myp){
392 case '\'': case '"': case '\\': case '$':
393 case ' ': case '\t':
394 need = TRU1;
395 ++i;
396 /* FALLTHRU */
397 default:
398 ++i;
399 break;
402 if(need){
403 ncp = n_autorec_alloc(i +1);
404 for(i = 0, myp = sgcp->sgc_patdat; *myp != '\0'; ++myp)
405 switch(*myp){
406 case '\'': case '"': case '\\': case '$':
407 case ' ': case '\t':
408 ncp[i++] = '\\';
409 /* FALLTHRU */
410 default:
411 ncp[i++] = *myp;
412 break;
414 ncp[i] = '\0';
415 myp = ncp;
416 }else
417 myp = sgcp->sgc_patdat;
420 while((dep = readdir(dp)) != NULL){
421 switch(fnmatch(myp, dep->d_name, FNM_PATHNAME | FNM_PERIOD)){
422 case 0:{
423 /* A match expresses the desire to recurse if there is more pattern */
424 if(nsgc.sgc_patlen > 0){
425 bool_t isdir;
427 n_string_push_cp((sgcp->sgc_outer->s_len > 1
428 ? n_string_push_c(sgcp->sgc_outer, '/') : sgcp->sgc_outer),
429 dep->d_name);
431 isdir = FAL0;
432 #ifdef HAVE_DIRENT_TYPE
433 if(dep->d_type == DT_DIR)
434 isdir = TRU1;
435 else if(dep->d_type == DT_LNK || dep->d_type == DT_UNKNOWN)
436 #endif
438 struct stat sb;
440 if(stat(n_string_cp(sgcp->sgc_outer), &sb)){
441 ccp = N_("I/O error when querying file status");
442 goto jerr;
443 }else if(S_ISDIR(sb.st_mode))
444 isdir = TRU1;
447 /* TODO We recurse with current dir FD open, which could E[MN]FILE!
448 * TODO Instead save away a list of such n_string's for later */
449 if(isdir && !a_shexp__glob(&nsgc, slpp)){
450 ccp = (char*)1;
451 goto jleave;
454 n_string_trunc(sgcp->sgc_outer, old_outerlen);
455 }else{
456 struct n_strlist *slp;
457 size_t i, j;
459 i = strlen(dep->d_name);
460 j = (old_outerlen > 0) ? old_outerlen + 1 + i : i;
461 slp = n_STRLIST_ALLOC(j);
462 *slpp = slp;
463 slpp = &slp->sl_next;
464 slp->sl_next = NULL;
465 if((j = old_outerlen) > 0){
466 memcpy(&slp->sl_dat[0], sgcp->sgc_outer->s_dat, j);
467 if(slp->sl_dat[j -1] != '/')
468 slp->sl_dat[j++] = '/';
470 memcpy(&slp->sl_dat[j], dep->d_name, i);
471 slp->sl_dat[j += i] = '\0';
472 slp->sl_len = j;
474 }break;
475 case FNM_NOMATCH:
476 break;
477 default:
478 ccp = N_("fnmatch(3) cannot handle file (sub)pattern");
479 goto jerr;
483 ccp = NULL;
484 jleave:
485 if(dp != NULL)
486 closedir(dp);
487 NYD2_LEAVE;
488 return (ccp == NULL);
490 jerr:
491 if(!(sgcp->sgc_flags & a_SILENT)){
492 char const *s2, *s3;
494 if(sgcp->sgc_outer->s_len > 0){
495 s2 = n_shexp_quote_cp(n_string_cp(sgcp->sgc_outer), FAL0);
496 s3 = "/";
497 }else
498 s2 = s3 = n_empty;
500 n_err("%s: %s%s%s\n", V_(ccp), s2, s3,
501 n_shexp_quote_cp(sgcp->sgc_patdat, FAL0));
503 goto jleave;
506 static int
507 a_shexp__globsort(void const *cvpa, void const *cvpb){
508 int rv;
509 struct n_strlist const * const *slpa, * const *slpb;
510 NYD2_ENTER;
512 slpa = cvpa;
513 slpb = cvpb;
514 rv = asccasecmp((*slpa)->sl_dat, (*slpb)->sl_dat);
515 NYD2_LEAVE;
516 return rv;
518 #endif /* HAVE_FNMATCH */
520 static void
521 a_shexp__quote(struct a_shexp_quote_ctx *sqcp, struct a_shexp_quote_lvl *sqlp){
522 /* XXX Because of the problems caused by ISO C multibyte interface we cannot
523 * XXX use the recursive implementation because of stateful encodings.
524 * XXX I.e., if a quoted substring cannot be self-contained - the data after
525 * XXX the quote relies on "the former state", then this doesn't make sense.
526 * XXX Therefore this is not fully programmed out but instead only detects
527 * XXX the "most fancy" quoting necessary, and directly does that.
528 * XXX As a result of this, T_REVSOL and T_DOUBLE are not even considered.
529 * XXX Otherwise we rather have to convert to wide first and act on that,
530 * XXX e.g., call visual_info(n_VISUAL_INFO_WOUT_CREATE) on entire input */
531 #undef a_SHEXP_QUOTE_RECURSE /* XXX (Needs complete revisit, then) */
532 #ifdef a_SHEXP_QUOTE_RECURSE
533 # define jrecurse jrecurse
534 struct a_shexp_quote_lvl sql;
535 #else
536 # define jrecurse jstep
537 #endif
538 struct n_visual_info_ctx vic;
539 union {struct a_shexp_quote_lvl *head; struct n_string *store;} u;
540 ui32_t flags;
541 size_t il;
542 char const *ib, *ib_base;
543 NYD2_ENTER;
545 ib_base = ib = sqlp->sql_dat.s;
546 il = sqlp->sql_dat.l;
547 flags = sqlp->sql_flags;
549 /* Iterate over the entire input, classify characters and type of quotes
550 * along the way. Whenever a quote change has to be applied, adjust flags
551 * for the new situation -, setup sql.* and recurse- */
552 while(il > 0){
553 char c;
555 c = *ib;
556 if(cntrlchar(c)){
557 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
558 goto jstep;
559 if(c == '\t' && (flags & (a_SHEXP_QUOTE_T_REVSOL |
560 a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOUBLE)))
561 goto jstep;
562 #ifdef a_SHEXP_QUOTE_RECURSE
563 ++sqcp->sqc_cnt_dollar;
564 #endif
565 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
566 goto jrecurse;
567 }else if(blankspacechar(c) || c == '|' || c == '&' || c == ';' ||
568 /* Whereas we don't support those, quote them for the sh(1)ell */
569 c == '(' || c == ')' || c == '<' || c == '>' ||
570 c == '"' || c == '$'){
571 if(flags & a_SHEXP_QUOTE_T_MASK)
572 goto jstep;
573 #ifdef a_SHEXP_QUOTE_RECURSE
574 ++sqcp->sqc_cnt_single;
575 #endif
576 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
577 goto jrecurse;
578 }else if(c == '\''){
579 if(flags & (a_SHEXP_QUOTE_T_MASK & ~a_SHEXP_QUOTE_T_SINGLE))
580 goto jstep;
581 #ifdef a_SHEXP_QUOTE_RECURSE
582 ++sqcp->sqc_cnt_dollar;
583 #endif
584 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
585 goto jrecurse;
586 }else if(c == '\\' || (c == '#' && ib == ib_base)){
587 if(flags & a_SHEXP_QUOTE_T_MASK)
588 goto jstep;
589 #ifdef a_SHEXP_QUOTE_RECURSE
590 ++sqcp->sqc_cnt_single;
591 #endif
592 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
593 goto jrecurse;
594 }else if(!asciichar(c)){
595 /* Need to keep together multibytes */
596 #ifdef a_SHEXP_QUOTE_RECURSE
597 memset(&vic, 0, sizeof vic);
598 vic.vic_indat = ib;
599 vic.vic_inlen = il;
600 n_visual_info(&vic,
601 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
602 #endif
603 /* xxx check whether resulting \u would be ASCII */
604 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP) ||
605 (flags & a_SHEXP_QUOTE_T_DOLLAR)){
606 #ifdef a_SHEXP_QUOTE_RECURSE
607 ib = vic.vic_oudat;
608 il = vic.vic_oulen;
609 continue;
610 #else
611 goto jstep;
612 #endif
614 #ifdef a_SHEXP_QUOTE_RECURSE
615 ++sqcp->sqc_cnt_dollar;
616 #endif
617 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
618 goto jrecurse;
619 }else
620 jstep:
621 ++ib, --il;
623 sqlp->sql_flags = flags;
625 /* Level made the great and completed processing input. Reverse the list of
626 * levels, detect the "most fancy" quote type needed along this way */
627 /* XXX Due to restriction as above very crude */
628 for(flags = 0, il = 0, u.head = NULL; sqlp != NULL;){
629 struct a_shexp_quote_lvl *tmp;
631 tmp = sqlp->sql_link;
632 sqlp->sql_link = u.head;
633 u.head = sqlp;
634 il += sqlp->sql_dat.l;
635 if(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK)
636 il += (sqlp->sql_dat.l >> 1);
637 flags |= sqlp->sql_flags;
638 sqlp = tmp;
640 sqlp = u.head;
642 /* Finally work the substrings in the correct order, adjusting quotes along
643 * the way as necessary. Start off with the "most fancy" quote, so that
644 * the user sees an overall boundary she can orientate herself on.
645 * We do it like that to be able to give the user some "encapsulation
646 * experience", to address what strikes me is a problem of sh(1)ell quoting:
647 * different to, e.g., perl(1), where you see at a glance where a string
648 * starts and ends, sh(1) quoting occurs at the "top level", disrupting the
649 * visual appearance of "a string" as such */
650 u.store = n_string_reserve(sqcp->sqc_store, il);
652 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
653 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
654 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOLLAR;
655 }else if(flags & a_SHEXP_QUOTE_T_DOUBLE){
656 u.store = n_string_push_c(u.store, '"');
657 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_DOUBLE;
658 }else if(flags & a_SHEXP_QUOTE_T_SINGLE){
659 u.store = n_string_push_c(u.store, '\'');
660 flags = (flags & ~a_SHEXP_QUOTE_T_MASK) | a_SHEXP_QUOTE_T_SINGLE;
661 }else /*if(flags & a_SHEXP_QUOTE_T_REVSOL)*/
662 flags &= ~a_SHEXP_QUOTE_T_MASK;
664 /* Work all the levels */
665 for(; sqlp != NULL; sqlp = sqlp->sql_link){
666 /* As necessary update our mode of quoting */
667 #ifdef a_SHEXP_QUOTE_RECURSE
668 il = 0;
670 switch(sqlp->sql_flags & a_SHEXP_QUOTE_T_MASK){
671 case a_SHEXP_QUOTE_T_DOLLAR:
672 if(!(flags & a_SHEXP_QUOTE_T_DOLLAR))
673 il = a_SHEXP_QUOTE_T_DOLLAR;
674 break;
675 case a_SHEXP_QUOTE_T_DOUBLE:
676 if(!(flags & (a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
677 il = a_SHEXP_QUOTE_T_DOLLAR;
678 break;
679 case a_SHEXP_QUOTE_T_SINGLE:
680 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
681 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
682 il = a_SHEXP_QUOTE_T_SINGLE;
683 break;
684 default:
685 case a_SHEXP_QUOTE_T_REVSOL:
686 if(!(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
687 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)))
688 il = a_SHEXP_QUOTE_T_REVSOL;
689 break;
692 if(il != 0){
693 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
694 u.store = n_string_push_c(u.store, '\'');
695 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
696 u.store = n_string_push_c(u.store, '"');
697 flags &= ~a_SHEXP_QUOTE_T_MASK;
699 flags |= (ui32_t)il;
700 if(flags & a_SHEXP_QUOTE_T_DOLLAR)
701 u.store = n_string_push_buf(u.store, "$'", sizeof("$'") -1);
702 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
703 u.store = n_string_push_c(u.store, '"');
704 else if(flags & a_SHEXP_QUOTE_T_SINGLE)
705 u.store = n_string_push_c(u.store, '\'');
707 #endif /* a_SHEXP_QUOTE_RECURSE */
709 /* Work the level's substring */
710 ib = sqlp->sql_dat.s;
711 il = sqlp->sql_dat.l;
713 while(il > 0){
714 char c2, c;
716 c = *ib;
718 if(cntrlchar(c)){
719 assert(c == '\t' || (flags & a_SHEXP_QUOTE_T_DOLLAR));
720 assert((flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_SINGLE |
721 a_SHEXP_QUOTE_T_DOUBLE | a_SHEXP_QUOTE_T_DOLLAR)));
722 switch((c2 = c)){
723 case 0x07: c = 'a'; break;
724 case 0x08: c = 'b'; break;
725 case 0x0A: c = 'n'; break;
726 case 0x0B: c = 'v'; break;
727 case 0x0C: c = 'f'; break;
728 case 0x0D: c = 'r'; break;
729 case 0x1B: c = 'E'; break;
730 default: break;
731 case 0x09:
732 if(flags & a_SHEXP_QUOTE_T_DOLLAR){
733 c = 't';
734 break;
736 if(flags & a_SHEXP_QUOTE_T_REVSOL)
737 u.store = n_string_push_c(u.store, '\\');
738 goto jpush;
740 u.store = n_string_push_c(u.store, '\\');
741 if(c == c2){
742 u.store = n_string_push_c(u.store, 'c');
743 c ^= 0x40;
745 goto jpush;
746 }else if(blankspacechar(c) || c == '|' || c == '&' || c == ';' ||
747 /* Whereas we don't support those, quote them for the sh(1)ell */
748 c == '(' || c == ')' || c == '<' || c == '>' ||
749 c == '"' || c == '$'){
750 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
751 goto jpush;
752 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE));
753 u.store = n_string_push_c(u.store, '\\');
754 goto jpush;
755 }else if(c == '\''){
756 if(flags & a_SHEXP_QUOTE_T_DOUBLE)
757 goto jpush;
758 assert(!(flags & a_SHEXP_QUOTE_T_SINGLE));
759 u.store = n_string_push_c(u.store, '\\');
760 goto jpush;
761 }else if(c == '\\' || (c == '#' && ib == ib_base)){
762 if(flags & a_SHEXP_QUOTE_T_SINGLE)
763 goto jpush;
764 assert(flags & (a_SHEXP_QUOTE_T_REVSOL | a_SHEXP_QUOTE_T_DOUBLE |
765 a_SHEXP_QUOTE_T_DOLLAR));
766 u.store = n_string_push_c(u.store, '\\');
767 goto jpush;
768 }else if(asciichar(c)){
769 /* Shorthand: we can simply push that thing out */
770 jpush:
771 u.store = n_string_push_c(u.store, c);
772 ++ib, --il;
773 }else{
774 /* Not an ASCII character, take care not to split up multibyte
775 * sequences etc. For the sake of compile testing, don't enwrap in
776 * HAVE_ALWAYS_UNICODE_LOCALE || HAVE_NATCH_CHAR */
777 if(n_psonce & n_PSO_UNICODE){
778 ui32_t uc;
779 char const *ib2;
780 size_t il2, il3;
782 ib2 = ib;
783 il2 = il;
784 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
785 char itoa[32];
786 char const *cp;
788 il2 = PTR2SIZE(&ib2[0] - &ib[0]);
789 if((flags & a_SHEXP_QUOTE_ROUNDTRIP) || uc == 0xFFFDu){
790 /* Use padding to make ambiguities impossible */
791 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
792 (uc > 0xFFFFu ? 'U' : 'u'),
793 (int)(uc > 0xFFFFu ? 8 : 4), uc);
794 cp = itoa;
795 }else{
796 il3 = il2;
797 cp = &ib[0];
799 u.store = n_string_push_buf(u.store, cp, il3);
800 ib += il2, il -= il2;
801 continue;
805 memset(&vic, 0, sizeof vic);
806 vic.vic_indat = ib;
807 vic.vic_inlen = il;
808 n_visual_info(&vic,
809 n_VISUAL_INFO_ONE_CHAR | n_VISUAL_INFO_SKIP_ERRORS);
811 /* Work this substring as sensitive as possible */
812 il -= vic.vic_oulen;
813 if(!(flags & a_SHEXP_QUOTE_ROUNDTRIP))
814 u.store = n_string_push_buf(u.store, ib, il);
815 #ifdef HAVE_ICONV
816 else if((vic.vic_indat = n_iconv_onetime_cp(n_ICONV_NONE,
817 "utf-8", ok_vlook(ttycharset), savestrbuf(ib, il))) != NULL){
818 ui32_t uc;
819 char const *ib2;
820 size_t il2, il3;
822 il2 = strlen(ib2 = vic.vic_indat);
823 if((uc = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){
824 char itoa[32];
826 il2 = PTR2SIZE(&ib2[0] - &vic.vic_indat[0]);
827 /* Use padding to make ambiguities impossible */
828 il3 = snprintf(itoa, sizeof itoa, "\\%c%0*X",
829 (uc > 0xFFFFu ? 'U' : 'u'),
830 (int)(uc > 0xFFFFu ? 8 : 4), uc);
831 u.store = n_string_push_buf(u.store, itoa, il3);
832 }else
833 goto Jxseq;
835 #endif
836 else
837 #ifdef HAVE_ICONV
838 Jxseq:
839 #endif
840 while(il-- > 0){
841 u.store = n_string_push_buf(u.store, "\\xFF",
842 sizeof("\\xFF") -1);
843 n_c_to_hex_base16(&u.store->s_dat[u.store->s_len - 2], *ib++);
846 ib = vic.vic_oudat;
847 il = vic.vic_oulen;
852 /* Close an open quote */
853 if(flags & (a_SHEXP_QUOTE_T_SINGLE | a_SHEXP_QUOTE_T_DOLLAR))
854 u.store = n_string_push_c(u.store, '\'');
855 else if(flags & a_SHEXP_QUOTE_T_DOUBLE)
856 u.store = n_string_push_c(u.store, '"');
857 #ifdef a_SHEXP_QUOTE_RECURSE
858 jleave:
859 #endif
860 NYD2_LEAVE;
861 return;
863 #ifdef a_SHEXP_QUOTE_RECURSE
864 jrecurse:
865 sqlp->sql_dat.l -= il;
867 sql.sql_link = sqlp;
868 sql.sql_dat.s = n_UNCONST(ib);
869 sql.sql_dat.l = il;
870 sql.sql_flags = flags;
871 a_shexp__quote(sqcp, &sql);
872 goto jleave;
873 #endif
875 #undef jrecurse
876 #undef a_SHEXP_QUOTE_RECURSE
879 FL char *
880 fexpand(char const *name, enum fexp_mode fexpm) /* TODO in parts: -> URL::!! */
882 struct str proto, s;
883 char const *res, *cp;
884 bool_t dyn, haveproto;
885 NYD_ENTER;
887 n_pstate &= ~n_PS_EXPAND_MULTIRESULT;
888 dyn = FAL0;
890 /* The order of evaluation is "%" and "#" expand into constants.
891 * "&" can expand into "+". "+" can expand into shell meta characters.
892 * Shell meta characters expand into constants.
893 * This way, we make no recursive expansion */
894 if((fexpm & FEXP_NSHORTCUT) || (res = shortcut_expand(name)) == NULL)
895 res = n_UNCONST(name);
897 jprotonext:
898 n_UNINIT(proto.s, NULL), n_UNINIT(proto.l, 0);
899 haveproto = FAL0;
900 for(cp = res; *cp && *cp != ':'; ++cp)
901 if(!alnumchar(*cp))
902 goto jnoproto;
903 if(cp[0] == ':' && cp[1] == '/' && cp[2] == '/'){
904 haveproto = TRU1;
905 proto.s = n_UNCONST(res);
906 cp += 3;
907 proto.l = PTR2SIZE(cp - res);
908 res = cp;
911 jnoproto:
912 if(!(fexpm & FEXP_NSPECIAL)){
913 jnext:
914 dyn = FAL0;
915 switch(*res){
916 case '%':
917 if(res[1] == ':' && res[2] != '\0'){
918 res = &res[2];
919 goto jprotonext;
920 }else{
921 bool_t force;
923 force = (res[1] != '\0');
924 res = a_shexp_findmail((force ? &res[1] : ok_vlook(LOGNAME)),
925 force);
926 if(force)
927 goto jislocal;
929 goto jnext;
930 case '#':
931 if (res[1] != '\0')
932 break;
933 if (prevfile[0] == '\0') {
934 n_err(_("No previous file\n"));
935 res = NULL;
936 goto jleave;
938 res = prevfile;
939 goto jislocal;
940 case '&':
941 if (res[1] == '\0')
942 res = ok_vlook(MBOX);
943 break;
944 default:
945 break;
949 #ifdef HAVE_IMAP
950 if(res[0] == '@' && which_protocol(mailname, FAL0, FAL0, NULL)
951 == PROTO_IMAP){
952 res = str_concat_csvl(&s, protbase(mailname), "/", &res[1], NULL)->s;
953 dyn = TRU1;
955 #endif
957 /* POSIX: if *folder* unset or null, "+" shall be retained */
958 if(!(fexpm & FEXP_NFOLDER) && *res == '+' &&
959 *(cp = n_folder_query()) != '\0'){
960 res = str_concat_csvl(&s, cp, &res[1], NULL)->s;
961 dyn = TRU1;
964 /* Do some meta expansions */
965 if((fexpm & (FEXP_NSHELL | FEXP_NVAR)) != FEXP_NVAR &&
966 ((fexpm & FEXP_NSHELL) ? (strchr(res, '$') != NULL)
967 : n_anyof_cp("{}[]*?$", res))){
968 bool_t doexp;
970 if(fexpm & FEXP_NOPROTO)
971 doexp = TRU1;
972 else{
973 cp = haveproto ? savecat(savestrbuf(proto.s, proto.l), res) : res;
975 switch(which_protocol(cp, TRU1, FAL0, NULL)){
976 case PROTO_FILE:
977 case PROTO_MAILDIR:
978 doexp = TRU1;
979 break;
980 default:
981 doexp = FAL0;
982 break;
986 if(doexp){
987 struct str shin;
988 struct n_string shou, *shoup;
990 shin.s = n_UNCONST(res);
991 shin.l = UIZ_MAX;
992 shoup = n_string_creat_auto(&shou);
993 for(;;){
994 enum n_shexp_state shs;
996 /* TODO shexp: take care to not include backtick eval once avail! */
997 shs = n_shexp_parse_token((n_SHEXP_PARSE_LOG_D_V |
998 n_SHEXP_PARSE_QUOTE_AUTO_FIXED | n_SHEXP_PARSE_QUOTE_AUTO_DQ |
999 n_SHEXP_PARSE_QUOTE_AUTO_CLOSE), shoup, &shin, NULL);
1000 if(shs & n_SHEXP_STATE_STOP)
1001 break;
1003 res = n_string_cp(shoup);
1004 /*shoup = n_string_drop_ownership(shoup);*/
1005 dyn = TRU1;
1007 if(res[0] == '~')
1008 res = a_shexp_tilde(res);
1010 if(!(fexpm & FEXP_NSHELL) &&
1011 (res = a_shexp_globname(res, fexpm)) == NULL)
1012 goto jleave;
1013 dyn = TRU1;
1014 }/* else no tilde */
1015 }else if(res[0] == '~'){
1016 res = a_shexp_tilde(res);
1017 dyn = TRU1;
1020 jislocal:
1021 if(res != NULL && haveproto){
1022 res = savecat(savestrbuf(proto.s, proto.l), res);
1023 dyn = TRU1;
1026 if(fexpm & (FEXP_LOCAL | FEXP_LOCAL_FILE)){
1027 switch (which_protocol(res, FAL0, FAL0, &cp)) {
1028 case PROTO_MAILDIR:
1029 if(!(fexpm & FEXP_LOCAL_FILE)){
1030 /* FALLTHRU */
1031 case PROTO_FILE:
1032 if(fexpm & FEXP_LOCAL_FILE){
1033 res = cp;
1034 dyn = FAL0;
1036 break;
1038 /* FALLTHRU */
1039 default:
1040 n_err(_("Not a local file or directory: %s\n"),
1041 n_shexp_quote_cp(name, FAL0));
1042 res = NULL;
1043 break;
1047 jleave:
1048 if(res != NULL && !dyn)
1049 res = savestr(res);
1050 NYD_LEAVE;
1051 return n_UNCONST(res);
1054 FL enum n_shexp_state
1055 n_shexp_parse_token(enum n_shexp_parse_flags flags, struct n_string *store,
1056 struct str *input, void const **cookie){
1057 /* TODO shexp_parse_token: WCHAR
1058 * TODO This needs to be rewritten in order to support $(( )) and $( )
1059 * TODO and ${xyYZ} and the possibly infinite recursion they bring along,
1060 * TODO too. We need a carrier struct, then, and can nicely split this
1061 * TODO big big thing up in little pieces!
1062 * TODO This means it should produce a tree of objects, so that callees
1063 * TODO can recognize whether something happened inside single/double etc.
1064 * TODO quotes; e.g., to requote "'[a-z]'" to, e.g., "\[a-z]", etc.! */
1065 ui32_t last_known_meta_trim_len;
1066 char c2, c, quotec, utf[8];
1067 enum n_shexp_state rv;
1068 size_t i, il;
1069 char const *ifs, *ifs_ws, *ib_save, *ib;
1070 enum{
1071 a_NONE = 0,
1072 a_SKIPQ = 1u<<0, /* Skip rest of this quote (\u0 ..) */
1073 a_SKIPT = 1u<<1, /* Skip entire token (\c@) */
1074 a_SKIPMASK = a_SKIPQ | a_SKIPT,
1075 a_SURPLUS = 1u<<2, /* Extended sequence interpretation */
1076 a_NTOKEN = 1u<<3, /* "New token": e.g., comments are possible */
1077 a_BRACE = 1u<<4, /* Variable substitution: brace enclosed */
1078 a_DIGIT1 = 1u<<5, /* ..first character was digit */
1079 a_NONDIGIT = 1u<<6, /* ..has seen any non-digits */
1080 a_VARSUBST_MASK = n_BITENUM_MASK(4, 6),
1082 a_ROUND_MASK = a_SKIPT | (int)~n_BITENUM_MASK(0, 7),
1083 a_COOKIE = 1u<<8,
1084 a_EXPLODE = 1u<<9,
1085 a_CONSUME = 1u<<10, /* When done, "consume" remaining input */
1086 a_TMP = 1u<<30
1087 } state;
1088 NYD2_ENTER;
1090 assert((flags & n_SHEXP_PARSE_DRYRUN) || store != NULL);
1091 assert(input != NULL);
1092 assert(input->l == 0 || input->s != NULL);
1093 assert(!(flags & n_SHEXP_PARSE_LOG) || !(flags & n_SHEXP_PARSE_LOG_D_V));
1094 assert(!(flags & n_SHEXP_PARSE_IFS_ADD_COMMA) ||
1095 !(flags & n_SHEXP_PARSE_IFS_IS_COMMA));
1096 assert(!(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED) ||
1097 (flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK));
1099 if((flags & n_SHEXP_PARSE_LOG_D_V) && (n_poption & n_PO_D_V))
1100 flags |= n_SHEXP_PARSE_LOG;
1101 if(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)
1102 flags |= n_SHEXP_PARSE_QUOTE_AUTO_CLOSE;
1104 if((flags & n_SHEXP_PARSE_TRUNC) && store != NULL)
1105 store = n_string_trunc(store, 0);
1107 if(flags & (n_SHEXP_PARSE_IFS_VAR | n_SHEXP_PARSE_TRIM_IFSSPACE)){
1108 ifs = ok_vlook(ifs);
1109 ifs_ws = ok_vlook(ifs_ws);
1110 }else{
1111 n_UNINIT(ifs, n_empty);
1112 n_UNINIT(ifs_ws, n_empty);
1115 state = a_NONE;
1116 ib = input->s;
1117 if((il = input->l) == UIZ_MAX)
1118 input->l = il = strlen(ib);
1119 n_UNINIT(c, '\0');
1121 if(cookie != NULL && *cookie != NULL){
1122 assert(!(flags & n_SHEXP_PARSE_DRYRUN));
1123 state |= a_COOKIE;
1126 rv = n_SHEXP_STATE_NONE;
1127 jrestart_empty:
1128 rv &= n_SHEXP_STATE_WS_LEAD;
1129 state &= a_ROUND_MASK;
1131 /* In cookie mode, the next ARGV entry is the token already, unchanged,
1132 * since it has already been expanded before! */
1133 if(state & a_COOKIE){
1134 char const * const *xcookie, *cp;
1136 i = store->s_len;
1137 xcookie = *cookie;
1138 if((store = n_string_push_cp(store, *xcookie))->s_len > 0)
1139 rv |= n_SHEXP_STATE_OUTPUT;
1140 if(*++xcookie == NULL){
1141 *cookie = NULL;
1142 state &= ~a_COOKIE;
1143 flags |= n_SHEXP_PARSE_QUOTE_AUTO_DQ; /* ..why we are here! */
1144 }else
1145 *cookie = n_UNCONST(xcookie);
1147 for(cp = &n_string_cp(store)[i]; (c = *cp++) != '\0';)
1148 if(cntrlchar(c)){
1149 rv |= n_SHEXP_STATE_CONTROL;
1150 break;
1153 /* The last exploded cookie will join with the yielded input token, so
1154 * simply fall through in this case */
1155 if(state & a_COOKIE)
1156 goto jleave_quick;
1157 }else{
1158 jrestart:
1159 if(flags & n_SHEXP_PARSE_TRIM_SPACE){
1160 for(; il > 0; ++ib, --il){
1161 if(!blankspacechar(*ib))
1162 break;
1163 rv |= n_SHEXP_STATE_WS_LEAD;
1167 if(flags & n_SHEXP_PARSE_TRIM_IFSSPACE){
1168 for(; il > 0; ++ib, --il){
1169 if(strchr(ifs_ws, *ib) == NULL)
1170 break;
1171 rv |= n_SHEXP_STATE_WS_LEAD;
1175 input->s = n_UNCONST(ib);
1176 input->l = il;
1179 if(il == 0){
1180 rv |= n_SHEXP_STATE_STOP;
1181 goto jleave;
1184 if(store != NULL)
1185 store = n_string_reserve(store, n_MIN(il, 32)); /* XXX */
1187 switch(flags & n__SHEXP_PARSE_QUOTE_AUTO_MASK){
1188 case n_SHEXP_PARSE_QUOTE_AUTO_SQ:
1189 quotec = '\'';
1190 rv |= n_SHEXP_STATE_QUOTE;
1191 break;
1192 case n_SHEXP_PARSE_QUOTE_AUTO_DQ:
1193 quotec = '"';
1194 if(0){
1195 case n_SHEXP_PARSE_QUOTE_AUTO_DSQ:
1196 quotec = '\'';
1198 rv |= n_SHEXP_STATE_QUOTE;
1199 state |= a_SURPLUS;
1200 break;
1201 default:
1202 quotec = '\0';
1203 state |= a_NTOKEN;
1204 break;
1207 /* TODO n_SHEXP_PARSE_META_SEMICOLON++, well, hack: we are not the shell,
1208 * TODO we are not a language, and therefore the general *ifs-ws* and normal
1209 * TODO whitespace trimming that input lines undergo (in a_go_evaluate())
1210 * TODO has already happened, our result will be used *as is*, and therefore
1211 * TODO we need to be aware of and remove trailing unquoted WS that would
1212 * TODO otherwise remain, after we have seen a semicolon sequencer.
1213 * By sheer luck we only need to track this in non-quote-mode */
1214 last_known_meta_trim_len = UI32_MAX;
1216 while(il > 0){ /* {{{ */
1217 --il, c = *ib++;
1219 /* If no quote-mode active.. */
1220 if(quotec == '\0'){
1221 if(c == '"' || c == '\''){
1222 quotec = c;
1223 if(c == '"')
1224 state |= a_SURPLUS;
1225 else
1226 state &= ~a_SURPLUS;
1227 state &= ~a_NTOKEN;
1228 last_known_meta_trim_len = UI32_MAX;
1229 rv |= n_SHEXP_STATE_QUOTE;
1230 continue;
1231 }else if(c == '$'){
1232 if(il > 0){
1233 state &= ~a_NTOKEN;
1234 last_known_meta_trim_len = UI32_MAX;
1235 if(*ib == '\''){
1236 --il, ++ib;
1237 quotec = '\'';
1238 state |= a_SURPLUS;
1239 rv |= n_SHEXP_STATE_QUOTE;
1240 continue;
1241 }else
1242 goto J_var_expand;
1244 }else if(c == '\\'){
1245 /* Outside of quotes this just escapes any next character, but a sole
1246 * <reverse solidus> at EOS is left unchanged */
1247 if(il > 0)
1248 --il, c = *ib++;
1249 state &= ~a_NTOKEN;
1250 last_known_meta_trim_len = UI32_MAX;
1252 /* A comment may it be if no token has yet started */
1253 else if(c == '#' && (state & a_NTOKEN)){
1254 rv |= n_SHEXP_STATE_STOP;
1255 /*last_known_meta_trim_len = UI32_MAX;*/
1256 goto jleave;
1258 /* Metacharacters which separate tokens must be turned on explicitly */
1259 else if(c == '|' && (flags & n_SHEXP_PARSE_META_VERTBAR)){
1260 rv |= n_SHEXP_STATE_META_VERTBAR;
1262 /* The parsed sequence may be _the_ output, so ensure we don't
1263 * include the metacharacter, then. */
1264 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1265 ++il, --ib;
1266 /*last_known_meta_trim_len = UI32_MAX;*/
1267 break;
1268 }else if(c == '&' && (flags & n_SHEXP_PARSE_META_AMPERSAND)){
1269 rv |= n_SHEXP_STATE_META_AMPERSAND;
1271 /* The parsed sequence may be _the_ output, so ensure we don't
1272 * include the metacharacter, then. */
1273 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1274 ++il, --ib;
1275 /*last_known_meta_trim_len = UI32_MAX;*/
1276 break;
1277 }else if(c == ';' && (flags & n_SHEXP_PARSE_META_SEMICOLON)){
1278 if(il > 0)
1279 n_go_input_inject(n_GO_INPUT_INJECT_COMMIT, ib, il);
1280 rv |= n_SHEXP_STATE_META_SEMICOLON | n_SHEXP_STATE_STOP;
1281 state |= a_CONSUME;
1282 if(!(flags & n_SHEXP_PARSE_DRYRUN) && (rv & n_SHEXP_STATE_OUTPUT) &&
1283 last_known_meta_trim_len != UI32_MAX)
1284 store = n_string_trunc(store, last_known_meta_trim_len);
1286 /* The parsed sequence may be _the_ output, so ensure we don't
1287 * include the metacharacter, then. */
1288 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1289 ++il, --ib;
1290 /*last_known_meta_trim_len = UI32_MAX;*/
1291 break;
1292 }else if(c == ',' && (flags &
1293 (n_SHEXP_PARSE_IFS_ADD_COMMA | n_SHEXP_PARSE_IFS_IS_COMMA))){
1294 /* The parsed sequence may be _the_ output, so ensure we don't
1295 * include the metacharacter, then. */
1296 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1297 ++il, --ib;
1298 /*last_known_meta_trim_len = UI32_MAX;*/
1299 break;
1300 }else{
1301 ui8_t blnk;
1303 blnk = blankchar(c) ? 1 : 0;
1304 blnk |= ((flags & (n_SHEXP_PARSE_IFS_VAR |
1305 n_SHEXP_PARSE_TRIM_IFSSPACE)) &&
1306 strchr(ifs_ws, c) != NULL) ? 2 : 0;
1308 if((!(flags & n_SHEXP_PARSE_IFS_VAR) && (blnk & 1)) ||
1309 ((flags & n_SHEXP_PARSE_IFS_VAR) &&
1310 ((blnk & 2) || strchr(ifs, c) != NULL))){
1311 if(!(flags & n_SHEXP_PARSE_IFS_IS_COMMA)){
1312 /* The parsed sequence may be _the_ output, so ensure we don't
1313 * include the metacharacter, then. */
1314 if(flags & (n_SHEXP_PARSE_DRYRUN | n_SHEXP_PARSE_META_KEEP))
1315 ++il, --ib;
1316 /*last_known_meta_trim_len = UI32_MAX;*/
1317 break;
1319 state |= a_NTOKEN;
1320 }else
1321 state &= ~a_NTOKEN;
1323 if(blnk && store != NULL){
1324 if(last_known_meta_trim_len == UI32_MAX)
1325 last_known_meta_trim_len = store->s_len;
1326 }else
1327 last_known_meta_trim_len = UI32_MAX;
1329 }else{
1330 /* Quote-mode */
1331 assert(!(state & a_NTOKEN));
1332 if(c == quotec && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_FIXED)){
1333 state &= a_ROUND_MASK;
1334 quotec = '\0';
1335 /* Users may need to recognize the presence of empty quotes */
1336 rv |= n_SHEXP_STATE_OUTPUT;
1337 continue;
1338 }else if(c == '\\' && (state & a_SURPLUS)){
1339 ib_save = ib - 1;
1340 /* A sole <reverse solidus> at EOS is treated as-is! This is ok
1341 * since the "closing quote" error will occur next, anyway */
1342 if(il == 0)
1344 else if((c2 = *ib) == quotec){
1345 --il, ++ib;
1346 c = quotec;
1347 }else if(quotec == '"'){
1348 /* Double quotes, POSIX says:
1349 * The <backslash> shall retain its special meaning as an
1350 * escape character (see Section 2.2.1) only when followed
1351 * by one of the following characters when considered
1352 * special: $ ` " \ <newline> */
1353 switch(c2){
1354 case '$':
1355 case '`':
1356 /* case '"': already handled via c2 == quotec */
1357 case '\\':
1358 --il, ++ib;
1359 c = c2;
1360 /* FALLTHRU */
1361 default:
1362 break;
1364 }else{
1365 /* Dollar-single-quote */
1366 --il, ++ib;
1367 switch(c2){
1368 case '"':
1369 /* case '\'': already handled via c2 == quotec */
1370 case '\\':
1371 c = c2;
1372 break;
1374 case 'b': c = '\b'; break;
1375 case 'f': c = '\f'; break;
1376 case 'n': c = '\n'; break;
1377 case 'r': c = '\r'; break;
1378 case 't': c = '\t'; break;
1379 case 'v': c = '\v'; break;
1381 case 'E':
1382 case 'e': c = '\033'; break;
1384 /* Control character */
1385 case 'c':
1386 if(il == 0)
1387 goto j_dollar_ungetc;
1388 --il, c2 = *ib++;
1389 if(state & a_SKIPMASK)
1390 continue;
1391 /* ASCII C0: 0..1F, 7F <- @.._ (+ a-z -> A-Z), ? */
1392 c = upperconv(c2) ^ 0x40;
1393 if((ui8_t)c > 0x1F && c != 0x7F){
1394 if(flags & n_SHEXP_PARSE_LOG)
1395 n_err(_("Invalid \\c notation: %.*s: %.*s\n"),
1396 (int)input->l, input->s,
1397 (int)PTR2SIZE(ib - ib_save), ib_save);
1398 rv |= n_SHEXP_STATE_ERR_CONTROL;
1400 /* As an implementation-defined extension, support \c@
1401 * EQ printf(1) alike \c */
1402 if(c == '\0'){
1403 state |= a_SKIPT;
1404 continue;
1406 break;
1408 /* Octal sequence: 1 to 3 octal bytes */
1409 case '0':
1410 /* As an extension (dependent on where you look, echo(1), or
1411 * awk(1)/tr(1) etc.), allow leading "0" octal indicator */
1412 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1413 c2 = c;
1414 --il, ++ib;
1416 /* FALLTHRU */
1417 case '1': case '2': case '3':
1418 case '4': case '5': case '6': case '7':
1419 c2 -= '0';
1420 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1421 c2 = (c2 << 3) | (c - '0');
1422 --il, ++ib;
1424 if(il > 0 && (c = *ib) >= '0' && c <= '7'){
1425 if(!(state & a_SKIPMASK) && (ui8_t)c2 > 0x1F){
1426 rv |= n_SHEXP_STATE_ERR_NUMBER;
1427 --il, ++ib;
1428 if(flags & n_SHEXP_PARSE_LOG)
1429 n_err(_("\\0 argument exceeds a byte: %.*s: %.*s\n"),
1430 (int)input->l, input->s,
1431 (int)PTR2SIZE(ib - ib_save), ib_save);
1432 /* Write unchanged */
1433 jerr_ib_save:
1434 rv |= n_SHEXP_STATE_OUTPUT;
1435 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1436 store = n_string_push_buf(store, ib_save,
1437 PTR2SIZE(ib - ib_save));
1438 continue;
1440 c2 = (c2 << 3) | (c -= '0');
1441 --il, ++ib;
1443 if(state & a_SKIPMASK)
1444 continue;
1445 if((c = c2) == '\0'){
1446 state |= a_SKIPQ;
1447 continue;
1449 break;
1451 /* ISO 10646 / Unicode sequence, 8 or 4 hexadecimal bytes */
1452 case 'U':
1453 i = 8;
1454 if(0){
1455 /* FALLTHRU */
1456 case 'u':
1457 i = 4;
1459 if(il == 0)
1460 goto j_dollar_ungetc;
1461 if(0){
1462 /* FALLTHRU */
1464 /* Hexadecimal sequence, 1 or 2 hexadecimal bytes */
1465 case 'X':
1466 case 'x':
1467 if(il == 0)
1468 goto j_dollar_ungetc;
1469 i = 2;
1471 /* C99 */{
1472 static ui8_t const hexatoi[] = { /* XXX uses ASCII */
1473 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1475 size_t no, j;
1477 i = n_MIN(il, i);
1478 for(no = j = 0; i-- > 0; --il, ++ib, ++j){
1479 c = *ib;
1480 if(hexchar(c)){
1481 no <<= 4;
1482 no += hexatoi[(ui8_t)((c) - ((c) <= '9' ? 48
1483 : ((c) <= 'F' ? 55 : 87)))];
1484 }else if(j == 0){
1485 if(state & a_SKIPMASK)
1486 break;
1487 c2 = (c2 == 'U' || c2 == 'u') ? 'u' : 'x';
1488 if(flags & n_SHEXP_PARSE_LOG)
1489 n_err(_("Invalid \\%c notation: %.*s: %.*s\n"),
1490 c2, (int)input->l, input->s,
1491 (int)PTR2SIZE(ib - ib_save), ib_save);
1492 rv |= n_SHEXP_STATE_ERR_NUMBER;
1493 goto jerr_ib_save;
1494 }else
1495 break;
1498 /* Unicode massage */
1499 if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){
1500 if((c = (char)no) == '\0')
1501 state |= a_SKIPQ;
1502 }else if(no == 0)
1503 state |= a_SKIPQ;
1504 else if(!(state & a_SKIPMASK)){
1505 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1506 store = n_string_reserve(store, n_MAX(j, 4));
1508 if(no > 0x10FFFF){ /* XXX magic; CText */
1509 if(flags & n_SHEXP_PARSE_LOG)
1510 n_err(_("\\U argument exceeds 0x10FFFF: %.*s: "
1511 "%.*s\n"),
1512 (int)input->l, input->s,
1513 (int)PTR2SIZE(ib - ib_save), ib_save);
1514 rv |= n_SHEXP_STATE_ERR_NUMBER;
1515 /* But normalize the output anyway */
1516 goto Jerr_uni_norm;
1519 j = n_utf32_to_utf8(no, utf);
1521 if(n_psonce & n_PSO_UNICODE){
1522 rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE;
1523 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1524 store = n_string_push_buf(store, utf, j);
1525 continue;
1527 #ifdef HAVE_ICONV
1528 else{
1529 char *icp;
1531 icp = n_iconv_onetime_cp(n_ICONV_NONE,
1532 NULL, NULL, utf);
1533 if(icp != NULL){
1534 rv |= n_SHEXP_STATE_OUTPUT;
1535 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1536 store = n_string_push_cp(store, icp);
1537 continue;
1540 #endif
1541 if(!(flags & n_SHEXP_PARSE_DRYRUN)) Jerr_uni_norm:{
1542 char itoa[32];
1544 rv |= n_SHEXP_STATE_OUTPUT |
1545 n_SHEXP_STATE_ERR_UNICODE;
1546 i = snprintf(itoa, sizeof itoa, "\\%c%0*X",
1547 (no > 0xFFFFu ? 'U' : 'u'),
1548 (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no);
1549 store = n_string_push_buf(store, itoa, i);
1551 continue;
1553 if(state & a_SKIPMASK)
1554 continue;
1556 break;
1558 /* Extension: \$ can be used to expand a variable.
1559 * B(ug|ad) effect: if conversion fails, not written "as-is" */
1560 case '$':
1561 if(il == 0)
1562 goto j_dollar_ungetc;
1563 goto J_var_expand;
1565 default:
1566 j_dollar_ungetc:
1567 /* Follow bash(1) behaviour, print sequence unchanged */
1568 ++il, --ib;
1569 break;
1572 }else if(c == '$' && quotec == '"' && il > 0) J_var_expand:{
1573 state &= ~a_VARSUBST_MASK;
1574 if(*ib == '{')
1575 state |= a_BRACE;
1577 /* Scan variable name */
1578 if(!(state & a_BRACE) || il > 1){
1579 char const *cp, *vp;
1581 ib_save = ib - 1;
1582 if(state & a_BRACE)
1583 --il, ++ib;
1584 vp = ib;
1585 state &= ~a_EXPLODE;
1587 for(i = 0; il > 0; --il, ++ib, ++i){
1588 /* We have some special cases regarding special parameters,
1589 * so ensure these don't cause failure. This code has
1590 * counterparts in code that manages internal variables! */
1591 c = *ib;
1592 if(!a_SHEXP_ISVARC(c)){
1593 if(i == 0){
1594 /* Simply skip over multiplexer */
1595 if(c == '^')
1596 continue;
1597 if(c == '*' || c == '@' || c == '#' || c == '?' ||
1598 c == '!'){
1599 if(c == '@'){
1600 if(quotec == '"')
1601 state |= a_EXPLODE;
1603 --il, ++ib;
1604 ++i;
1607 break;
1608 }else if(a_SHEXP_ISVARC_BAD1ST(c)){
1609 if(i == 0)
1610 state |= a_DIGIT1;
1611 }else
1612 state |= a_NONDIGIT;
1615 /* In skip mode, be easy and.. skip over */
1616 if(state & a_SKIPMASK){
1617 if((state & a_BRACE) && il > 0 && *ib == '}')
1618 --il, ++ib;
1619 continue;
1622 /* Handle the scan error cases */
1623 if((state & (a_DIGIT1 | a_NONDIGIT)) == (a_DIGIT1 | a_NONDIGIT)){
1624 if(state & a_BRACE){
1625 if(il > 0 && *ib == '}')
1626 --il, ++ib;
1627 else
1628 rv |= n_SHEXP_STATE_ERR_GROUPOPEN;
1630 if(flags & n_SHEXP_PARSE_LOG)
1631 n_err(_("Invalid identifier for ${}: %.*s: %.*s\n"),
1632 (int)input->l, input->s,
1633 (int)PTR2SIZE(ib - ib_save), ib_save);
1634 rv |= n_SHEXP_STATE_ERR_IDENTIFIER;
1635 goto jerr_ib_save;
1636 }else if(i == 0){
1637 if(state & a_BRACE){
1638 if(il == 0 || *ib != '}'){
1639 if(flags & n_SHEXP_PARSE_LOG)
1640 n_err(_("No closing brace for ${}: %.*s: %.*s\n"),
1641 (int)input->l, input->s,
1642 (int)PTR2SIZE(ib - ib_save), ib_save);
1643 rv |= n_SHEXP_STATE_ERR_GROUPOPEN;
1644 goto jerr_ib_save;
1646 --il, ++ib;
1648 if(i == 0){
1649 if(flags & n_SHEXP_PARSE_LOG)
1650 n_err(_("Bad substitution for ${}: %.*s: %.*s\n"),
1651 (int)input->l, input->s,
1652 (int)PTR2SIZE(ib - ib_save), ib_save);
1653 rv |= n_SHEXP_STATE_ERR_BADSUB;
1654 goto jerr_ib_save;
1657 /* Simply write dollar as-is? */
1658 c = '$';
1659 }else{
1660 if(state & a_BRACE){
1661 if(il == 0 || *ib != '}'){
1662 if(flags & n_SHEXP_PARSE_LOG)
1663 n_err(_("No closing brace for ${}: %.*s: %.*s\n"),
1664 (int)input->l, input->s,
1665 (int)PTR2SIZE(ib - ib_save), ib_save);
1666 rv |= n_SHEXP_STATE_ERR_GROUPOPEN;
1667 goto jerr_ib_save;
1669 --il, ++ib;
1671 if(i == 0){
1672 if(flags & n_SHEXP_PARSE_LOG)
1673 n_err(_("Bad substitution for ${}: %.*s: %.*s\n"),
1674 (int)input->l, input->s,
1675 (int)PTR2SIZE(ib - ib_save), ib_save);
1676 rv |= n_SHEXP_STATE_ERR_BADSUB;
1677 goto jerr_ib_save;
1681 if(flags & n_SHEXP_PARSE_DRYRUN)
1682 continue;
1684 /* We may shall explode "${@}" to a series of successive,
1685 * properly quoted tokens (instead). The first exploded
1686 * cookie will join with the current token */
1687 if(n_UNLIKELY(state & a_EXPLODE) &&
1688 !(flags & n_SHEXP_PARSE_DRYRUN) && cookie != NULL){
1689 if(n_var_vexplode(cookie))
1690 state |= a_COOKIE;
1691 /* On the other hand, if $@ expands to nothing and is the
1692 * sole content of this quote then act like the shell does
1693 * and throw away the entire atxplode construct */
1694 else if(!(rv & n_SHEXP_STATE_OUTPUT) &&
1695 il == 1 && *ib == '"' &&
1696 ib_save == &input->s[1] && ib_save[-1] == '"')
1697 ++ib, --il;
1698 else
1699 continue;
1700 input->s = n_UNCONST(ib);
1701 input->l = il;
1702 goto jrestart_empty;
1705 /* Check getenv(3) shall no internal variable exist!
1706 * XXX We have some common idioms, avoid memory for them
1707 * XXX Even better would be var_vlook_buf()! */
1708 if(i == 1){
1709 switch(*vp){
1710 case '?': vp = n_qm; break;
1711 case '!': vp = n_em; break;
1712 case '*': vp = n_star; break;
1713 case '@': vp = n_at; break;
1714 case '#': vp = n_ns; break;
1715 default: goto j_var_look_buf;
1717 }else
1718 j_var_look_buf:
1719 vp = savestrbuf(vp, i);
1721 if((cp = n_var_vlook(vp, TRU1)) != NULL){
1722 rv |= n_SHEXP_STATE_OUTPUT;
1723 store = n_string_push_cp(store, cp);
1724 for(; (c = *cp) != '\0'; ++cp)
1725 if(cntrlchar(c)){
1726 rv |= n_SHEXP_STATE_CONTROL;
1727 break;
1730 continue;
1733 }else if(c == '`' && quotec == '"' && il > 0){ /* TODO shell command */
1734 continue;
1738 if(!(state & a_SKIPMASK)){
1739 rv |= n_SHEXP_STATE_OUTPUT;
1740 if(cntrlchar(c))
1741 rv |= n_SHEXP_STATE_CONTROL;
1742 if(!(flags & n_SHEXP_PARSE_DRYRUN))
1743 store = n_string_push_c(store, c);
1745 } /* }}} */
1747 if(quotec != '\0' && !(flags & n_SHEXP_PARSE_QUOTE_AUTO_CLOSE)){
1748 if(flags & n_SHEXP_PARSE_LOG)
1749 n_err(_("No closing quote: %.*s\n"), (int)input->l, input->s);
1750 rv |= n_SHEXP_STATE_ERR_QUOTEOPEN;
1753 jleave:
1754 assert(!(state & a_COOKIE));
1755 if((flags & n_SHEXP_PARSE_DRYRUN) && store != NULL){
1756 store = n_string_push_buf(store, input->s, PTR2SIZE(ib - input->s));
1757 rv |= n_SHEXP_STATE_OUTPUT;
1760 if(state & a_CONSUME){
1761 input->s = n_UNCONST(&ib[il]);
1762 input->l = 0;
1763 }else{
1764 if(flags & n_SHEXP_PARSE_TRIM_SPACE){
1765 for(; il > 0; ++ib, --il){
1766 if(!blankspacechar(*ib))
1767 break;
1768 rv |= n_SHEXP_STATE_WS_TRAIL;
1772 if(flags & n_SHEXP_PARSE_TRIM_IFSSPACE){
1773 for(; il > 0; ++ib, --il){
1774 if(strchr(ifs_ws, *ib) == NULL)
1775 break;
1776 rv |= n_SHEXP_STATE_WS_TRAIL;
1780 input->l = il;
1781 input->s = n_UNCONST(ib);
1784 if(!(rv & n_SHEXP_STATE_STOP)){
1785 if(!(rv & (n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_META_MASK)) &&
1786 (flags & n_SHEXP_PARSE_IGNORE_EMPTY) && il > 0)
1787 goto jrestart_empty;
1788 if(/*!(rv & n_SHEXP_STATE_OUTPUT) &&*/ il == 0)
1789 rv |= n_SHEXP_STATE_STOP;
1792 if((state & a_SKIPT) && !(rv & n_SHEXP_STATE_STOP) &&
1793 (flags & n_SHEXP_PARSE_META_MASK))
1794 goto jrestart;
1795 jleave_quick:
1796 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_UNICODE));
1797 assert((rv & n_SHEXP_STATE_OUTPUT) || !(rv & n_SHEXP_STATE_CONTROL));
1798 NYD2_LEAVE;
1799 return rv;
1802 FL char *
1803 n_shexp_parse_token_cp(enum n_shexp_parse_flags flags, char const **cp){
1804 struct str input;
1805 struct n_string sou, *soup;
1806 char *rv;
1807 enum n_shexp_state shs;
1808 NYD2_ENTER;
1810 assert(cp != NULL);
1812 input.s = n_UNCONST(*cp);
1813 input.l = UIZ_MAX;
1814 soup = n_string_creat_auto(&sou);
1816 shs = n_shexp_parse_token(flags, soup, &input, NULL);
1817 if(shs & n_SHEXP_STATE_ERR_MASK){
1818 soup = n_string_assign_cp(soup, *cp);
1819 *cp = NULL;
1820 }else
1821 *cp = input.s;
1823 rv = n_string_cp(soup);
1824 /*n_string_gut(n_string_drop_ownership(soup));*/
1825 NYD2_LEAVE;
1826 return rv;
1829 FL struct n_string *
1830 n_shexp_quote(struct n_string *store, struct str const *input, bool_t rndtrip){
1831 struct a_shexp_quote_lvl sql;
1832 struct a_shexp_quote_ctx sqc;
1833 NYD2_ENTER;
1835 assert(store != NULL);
1836 assert(input != NULL);
1837 assert(input->l == 0 || input->s != NULL);
1839 memset(&sqc, 0, sizeof sqc);
1840 sqc.sqc_store = store;
1841 sqc.sqc_input.s = input->s;
1842 if((sqc.sqc_input.l = input->l) == UIZ_MAX)
1843 sqc.sqc_input.l = strlen(input->s);
1844 sqc.sqc_flags = rndtrip ? a_SHEXP_QUOTE_ROUNDTRIP : a_SHEXP_QUOTE_NONE;
1846 if(sqc.sqc_input.l == 0)
1847 store = n_string_push_buf(store, "''", sizeof("''") -1);
1848 else{
1849 memset(&sql, 0, sizeof sql);
1850 sql.sql_dat = sqc.sqc_input;
1851 sql.sql_flags = sqc.sqc_flags;
1852 a_shexp__quote(&sqc, &sql);
1854 NYD2_LEAVE;
1855 return store;
1858 FL char *
1859 n_shexp_quote_cp(char const *cp, bool_t rndtrip){
1860 struct n_string store;
1861 struct str input;
1862 char *rv;
1863 NYD2_ENTER;
1865 assert(cp != NULL);
1867 input.s = n_UNCONST(cp);
1868 input.l = UIZ_MAX;
1869 rv = n_string_cp(n_shexp_quote(n_string_creat_auto(&store), &input,
1870 rndtrip));
1871 n_string_gut(n_string_drop_ownership(&store));
1872 NYD2_LEAVE;
1873 return rv;
1876 FL bool_t
1877 n_shexp_is_valid_varname(char const *name){
1878 char lc, c;
1879 bool_t rv;
1880 NYD2_ENTER;
1882 rv = FAL0;
1884 for(lc = '\0'; (c = *name++) != '\0'; lc = c)
1885 if(!a_SHEXP_ISVARC(c))
1886 goto jleave;
1887 else if(lc == '\0' && a_SHEXP_ISVARC_BAD1ST(c))
1888 goto jleave;
1889 if(a_SHEXP_ISVARC_BADNST(lc))
1890 goto jleave;
1892 rv = TRU1;
1893 jleave:
1894 NYD2_LEAVE;
1895 return rv;
1898 FL int
1899 c_shcodec(void *vp){
1900 struct str in;
1901 struct n_string sou_b, *soup;
1902 si32_t nerrn;
1903 size_t alen;
1904 bool_t norndtrip;
1905 char const **argv, *varname, *act, *cp;
1907 soup = n_string_creat_auto(&sou_b);
1908 argv = vp;
1909 varname = (n_pstate & n_PS_ARGMOD_VPUT) ? *argv++ : NULL;
1911 act = *argv;
1912 for(cp = act; *cp != '\0' && !blankspacechar(*cp); ++cp)
1914 if((norndtrip = (*act == '+')))
1915 ++act;
1916 if(act == cp)
1917 goto jesynopsis;
1918 alen = PTR2SIZE(cp - act);
1919 if(*cp != '\0')
1920 ++cp;
1922 in.l = strlen(in.s = n_UNCONST(cp));
1923 nerrn = n_ERR_NONE;
1925 if(is_ascncaseprefix(act, "encode", alen))
1926 soup = n_shexp_quote(soup, &in, !norndtrip);
1927 else if(!norndtrip && is_ascncaseprefix(act, "decode", alen)){
1928 for(;;){
1929 enum n_shexp_state shs;
1931 shs = n_shexp_parse_token((n_SHEXP_PARSE_LOG |
1932 n_SHEXP_PARSE_IGNORE_EMPTY), soup, &in, NULL);
1933 if(shs & n_SHEXP_STATE_ERR_MASK){
1934 soup = n_string_assign_cp(soup, cp);
1935 nerrn = n_ERR_CANCELED;
1936 vp = NULL;
1937 break;
1939 if(shs & n_SHEXP_STATE_STOP)
1940 break;
1942 }else
1943 goto jesynopsis;
1945 if(varname != NULL){
1946 cp = n_string_cp(soup);
1947 if(!n_var_vset(varname, (uintptr_t)cp)){
1948 nerrn = n_ERR_NOTSUP;
1949 vp = NULL;
1951 }else{
1952 struct str out;
1954 in.s = n_string_cp(soup);
1955 in.l = soup->s_len;
1956 makeprint(&in, &out);
1957 if(fprintf(n_stdout, "%s\n", out.s) < 0){
1958 nerrn = n_err_no;
1959 vp = NULL;
1961 n_free(out.s);
1964 jleave:
1965 n_pstate_err_no = nerrn;
1966 NYD_LEAVE;
1967 return (vp != NULL ? 0 : 1);
1968 jesynopsis:
1969 n_err(_("Synopsis: shcodec: <[+]e[ncode]|d[ecode]> <rest-of-line>\n"));
1970 nerrn = n_ERR_INVAL;
1971 vp = NULL;
1972 goto jleave;
1975 /* s-it-mode */