a_go_evaluate(): do not alloc n_MAXARGC args unless (TODO really) needed
[s-mailx.git] / strings.c
bloba150221122b29f5a9c2177b0af8d0b447bd663af
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ String support routines.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 */
7 /*
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 #undef n_FILE
36 #define n_FILE strings
38 #ifndef HAVE_AMALGAMATION
39 # include "nail.h"
40 #endif
42 #include <ctype.h>
44 FL char *
45 (savestr)(char const *str n_MEMORY_DEBUG_ARGS)
47 size_t size;
48 char *news;
49 NYD_ENTER;
51 size = strlen(str);
52 news = (n_autorec_alloc_from_pool)(NULL, size +1 n_MEMORY_DEBUG_ARGSCALL);
53 if(size > 0)
54 memcpy(news, str, size);
55 news[size] = '\0';
56 NYD_LEAVE;
57 return news;
60 FL char *
61 (savestrbuf)(char const *sbuf, size_t sbuf_len n_MEMORY_DEBUG_ARGS)
63 char *news;
64 NYD_ENTER;
66 news = (n_autorec_alloc_from_pool)(NULL, sbuf_len +1
67 n_MEMORY_DEBUG_ARGSCALL);
68 if(sbuf_len > 0)
69 memcpy(news, sbuf, sbuf_len);
70 news[sbuf_len] = 0;
71 NYD_LEAVE;
72 return news;
75 FL char *
76 (savecatsep)(char const *s1, char sep, char const *s2 n_MEMORY_DEBUG_ARGS)
78 size_t l1, l2;
79 char *news;
80 NYD_ENTER;
82 l1 = (s1 != NULL) ? strlen(s1) : 0;
83 l2 = strlen(s2);
84 news = (n_autorec_alloc_from_pool)(NULL, l1 + (sep != '\0') + l2 +1
85 n_MEMORY_DEBUG_ARGSCALL);
86 if (l1 > 0) {
87 memcpy(news + 0, s1, l1);
88 if (sep != '\0')
89 news[l1++] = sep;
91 if(l2 > 0)
92 memcpy(news + l1, s2, l2);
93 news[l1 + l2] = '\0';
94 NYD_LEAVE;
95 return news;
99 * Support routines, auto-reclaimed storage
102 FL struct str *
103 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
105 va_list vl;
106 size_t l;
107 char const *cs;
108 NYD_ENTER;
110 va_start(vl, self);
111 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
112 l += strlen(cs);
113 va_end(vl);
115 self->l = l;
116 self->s = n_autorec_alloc(l +1);
118 va_start(vl, self);
119 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
120 size_t i;
122 i = strlen(cs);
123 if(i > 0){
124 memcpy(self->s + l, cs, i);
125 l += i;
128 self->s[l] = '\0';
129 va_end(vl);
130 NYD_LEAVE;
131 return self;
134 FL struct str *
135 (str_concat_cpa)(struct str *self, char const * const *cpa,
136 char const *sep_o_null n_MEMORY_DEBUG_ARGS)
138 size_t sonl, l;
139 char const * const *xcpa;
140 NYD_ENTER;
142 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
144 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
145 l += strlen(*xcpa) + sonl;
147 self->l = l;
148 self->s = (n_autorec_alloc_from_pool)(NULL, l +1 n_MEMORY_DEBUG_ARGSCALL);
150 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
151 size_t i;
153 i = strlen(*xcpa);
154 if(i > 0){
155 memcpy(self->s + l, *xcpa, i);
156 l += i;
158 if (sonl > 0) {
159 memcpy(self->s + l, sep_o_null, sonl);
160 l += sonl;
163 self->s[l] = '\0';
164 NYD_LEAVE;
165 return self;
169 * Routines that are not related to auto-reclaimed storage follow.
172 FL bool_t
173 n_anyof_buf(char const *template, char const *dat, size_t len){
174 char c;
175 NYD2_ENTER;
177 if(len == UIZ_MAX){
178 while((c = *template++) != '\0')
179 if(strchr(dat, c) != NULL)
180 break;
181 }else if(len > 0){
182 while((c = *template++) != '\0')
183 if(memchr(dat, c, len) != NULL)
184 break;
185 }else
186 c = '\0';
187 NYD2_LEAVE;
188 return (c != '\0');
191 FL char *
192 n_strsep(char **iolist, char sep, bool_t ignore_empty){
193 char *base, *cp;
194 NYD2_ENTER;
196 for(base = *iolist; base != NULL; base = *iolist){
197 while(*base != '\0' && blankspacechar(*base))
198 ++base;
200 cp = strchr(base, sep);
201 if(cp != NULL)
202 *iolist = &cp[1];
203 else{
204 *iolist = NULL;
205 cp = &base[strlen(base)];
207 while(cp > base && blankspacechar(cp[-1]))
208 --cp;
209 *cp = '\0';
210 if(*base != '\0' || !ignore_empty)
211 break;
213 NYD2_LEAVE;
214 return base;
217 FL char *
218 n_strsep_esc(char **iolist, char sep, bool_t ignore_empty){
219 char *cp, c, *base;
220 bool_t isesc, anyesc;
221 NYD2_ENTER;
223 for(base = *iolist; base != NULL; base = *iolist){
224 while((c = *base) != '\0' && blankspacechar(c))
225 ++base;
227 for(isesc = anyesc = FAL0, cp = base;; ++cp){
228 if(n_UNLIKELY((c = *cp) == '\0')){
229 *iolist = NULL;
230 break;
231 }else if(!isesc){
232 if(c == sep){
233 *iolist = &cp[1];
234 break;
236 isesc = (c == '\\');
237 }else{
238 isesc = FAL0;
239 anyesc |= (c == sep);
243 while(cp > base && blankspacechar(cp[-1]))
244 --cp;
245 *cp = '\0';
247 if(*base != '\0'){
248 if(anyesc){
249 char *ins;
251 for(ins = cp = base;; ++ins)
252 if((c = *cp) == '\\' && cp[1] == sep){
253 *ins = sep;
254 cp += 2;
255 }else if((*ins = (++cp, c)) == '\0')
256 break;
260 if(*base != '\0' || !ignore_empty)
261 break;
263 NYD2_LEAVE;
264 return base;
267 FL bool_t
268 is_prefix(char const *as1, char const *as2) /* TODO arg order */
270 char c;
271 NYD2_ENTER;
273 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
274 if (*as2 == '\0')
275 break;
276 NYD2_LEAVE;
277 return (c == '\0');
280 FL char *
281 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
283 char const *cp;
284 size_t i;
285 char c, *rv;
286 NYD2_ENTER;
288 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
289 if (c == '"' || c == '\\')
290 ++i;
291 rv = n_autorec_alloc(i +1);
293 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
294 if (c == '"' || c == '\\')
295 rv[i++] = '\\';
296 rv[i] = '\0';
297 NYD2_LEAVE;
298 return rv;
301 FL char *
302 laststring(char *linebuf, bool_t *needs_list, bool_t strip)
304 char *cp, *p, quoted;
305 NYD_ENTER;
307 /* Anything to do at all? */
308 if (*(cp = linebuf) == '\0')
309 goto jnull;
310 cp += strlen(linebuf) -1;
312 /* Strip away trailing blanks */
313 while (spacechar(*cp) && cp > linebuf)
314 --cp;
315 cp[1] = '\0';
316 if (cp == linebuf)
317 goto jleave;
319 /* Now search for the BOS of the "last string" */
320 quoted = *cp;
321 if (quoted == '\'' || quoted == '"') {
322 if (strip)
323 *cp = '\0';
324 } else
325 quoted = ' ';
327 while (cp > linebuf) {
328 --cp;
329 if (quoted != ' ') {
330 if (*cp != quoted)
331 continue;
332 } else if (!spacechar(*cp))
333 continue;
334 if (cp == linebuf || cp[-1] != '\\') {
335 /* When in whitespace mode, WS prefix doesn't belong */
336 if (quoted == ' ')
337 ++cp;
338 break;
340 /* Expand the escaped quote character */
341 for (p = --cp; (p[0] = p[1]) != '\0'; ++p)
344 if (strip && quoted != ' ' && *cp == quoted)
345 for (p = cp; (p[0] = p[1]) != '\0'; ++p)
348 /* The "last string" has been skipped over, but still, try to step backwards
349 * until we are at BOS or see whitespace, so as to make possible things like
350 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
351 while (cp > linebuf) {
352 --cp;
353 if (spacechar(*cp)) {
354 p = cp;
355 *cp++ = '\0';
356 /* We can furtherly release our callees if we now decide whether the
357 * remaining non-"last string" line content contains non-WS */
358 while (--p >= linebuf)
359 if (!spacechar(*p))
360 goto jleave;
361 linebuf = cp;
362 break;
366 jleave:
367 if (cp != NULL && *cp == '\0')
368 goto jnull;
369 *needs_list = (cp != linebuf && *linebuf != '\0');
370 j_leave:
371 NYD_LEAVE;
372 return cp;
373 jnull:
374 *needs_list = FAL0;
375 cp = NULL;
376 goto j_leave;
379 FL void
380 makelow(char *cp) /* TODO isn't that crap? --> */
382 NYD_ENTER;
383 #ifdef HAVE_C90AMEND1
384 if (n_mb_cur_max > 1) {
385 char *tp = cp;
386 wchar_t wc;
387 int len;
389 while (*cp != '\0') {
390 len = mbtowc(&wc, cp, n_mb_cur_max);
391 if (len < 0)
392 *tp++ = *cp++;
393 else {
394 wc = towlower(wc);
395 if (wctomb(tp, wc) == len)
396 tp += len, cp += len;
397 else
398 *tp++ = *cp++; /* <-- at least here */
401 } else
402 #endif
405 *cp = tolower((uc_i)*cp);
406 while (*cp++ != '\0');
408 NYD_LEAVE;
411 FL bool_t
412 substr(char const *str, char const *sub)
414 char const *cp, *backup;
415 NYD_ENTER;
417 cp = sub;
418 backup = str;
419 while (*str != '\0' && *cp != '\0') {
420 #ifdef HAVE_C90AMEND1
421 if (n_mb_cur_max > 1) {
422 wchar_t c, c2;
423 int sz;
425 if ((sz = mbtowc(&c, cp, n_mb_cur_max)) == -1)
426 goto Jsinglebyte;
427 cp += sz;
428 if ((sz = mbtowc(&c2, str, n_mb_cur_max)) == -1)
429 goto Jsinglebyte;
430 str += sz;
431 c = towupper(c);
432 c2 = towupper(c2);
433 if (c != c2) {
434 if ((sz = mbtowc(&c, backup, n_mb_cur_max)) > 0) {
435 backup += sz;
436 str = backup;
437 } else
438 str = ++backup;
439 cp = sub;
441 } else
442 Jsinglebyte:
443 #endif
445 int c, c2;
447 c = *cp++ & 0377;
448 if (islower(c))
449 c = toupper(c);
450 c2 = *str++ & 0377;
451 if (islower(c2))
452 c2 = toupper(c2);
453 if (c != c2) {
454 str = ++backup;
455 cp = sub;
459 NYD_LEAVE;
460 return (*cp == '\0');
463 FL char *
464 sstpcpy(char *dst, char const *src)
466 NYD2_ENTER;
467 while ((*dst = *src++) != '\0')
468 ++dst;
469 NYD2_LEAVE;
470 return dst;
473 FL char *
474 (sstrdup)(char const *cp n_MEMORY_DEBUG_ARGS)
476 char *dp;
477 NYD2_ENTER;
479 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) n_MEMORY_DEBUG_ARGSCALL);
480 NYD2_LEAVE;
481 return dp;
484 FL char *
485 (sbufdup)(char const *cp, size_t len n_MEMORY_DEBUG_ARGS)
487 char *dp = NULL;
488 NYD2_ENTER;
490 dp = (n_alloc)(len +1 n_MEMORY_DEBUG_ARGSCALL);
491 if (cp != NULL)
492 memcpy(dp, cp, len);
493 dp[len] = '\0';
494 NYD2_LEAVE;
495 return dp;
498 FL ssize_t
499 n_strscpy(char *dst, char const *src, size_t dstsize){
500 ssize_t rv;
501 NYD2_ENTER;
503 if(n_LIKELY(dstsize > 0)){
504 rv = 0;
506 if((dst[rv] = src[rv]) == '\0')
507 goto jleave;
508 ++rv;
509 }while(--dstsize > 0);
510 dst[--rv] = '\0';
512 #ifdef HAVE_DEVEL
513 else
514 assert(dstsize > 0);
515 #endif
516 rv = -1;
517 jleave:
518 NYD2_LEAVE;
519 return rv;
522 FL int
523 asccasecmp(char const *s1, char const *s2)
525 int cmp;
526 NYD2_ENTER;
528 for (;;) {
529 char c1 = *s1++, c2 = *s2++;
530 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
531 break;
533 NYD2_LEAVE;
534 return cmp;
537 FL int
538 ascncasecmp(char const *s1, char const *s2, size_t sz)
540 int cmp = 0;
541 NYD2_ENTER;
543 while (sz-- > 0) {
544 char c1 = *s1++, c2 = *s2++;
545 cmp = (ui8_t)lowerconv(c1);
546 cmp -= (ui8_t)lowerconv(c2);
547 if (cmp != 0 || c1 == '\0')
548 break;
550 NYD2_LEAVE;
551 return cmp;
554 FL char const *
555 asccasestr(char const *s1, char const *s2)
557 char c2, c1;
558 NYD2_ENTER;
560 for (c2 = *s2++, c2 = lowerconv(c2);;) {
561 if ((c1 = *s1++) == '\0') {
562 s1 = NULL;
563 break;
565 if (lowerconv(c1) == c2 && is_asccaseprefix(s2, s1)) {
566 --s1;
567 break;
570 NYD2_LEAVE;
571 return s1;
574 FL bool_t
575 is_asccaseprefix(char const *as1, char const *as2) /* TODO arg order */
577 char c1, c2;
578 NYD2_ENTER;
580 for(;; ++as1, ++as2){
581 c1 = *as1;
582 c1 = lowerconv(c1);
583 c2 = *as2;
584 c2 = lowerconv(c2);
586 if(c1 != c2 || c1 == '\0')
587 break;
588 if(c2 == '\0')
589 break;
591 NYD2_LEAVE;
592 return (c1 == '\0');
595 FL bool_t
596 is_ascncaseprefix(char const *as1, char const *as2, size_t sz)
598 char c1, c2;
599 bool_t rv;
600 NYD2_ENTER;
602 for(rv = TRU1; sz-- > 0; ++as1, ++as2){
603 c1 = *as1;
604 c1 = lowerconv(c1);
605 c2 = *as2;
606 c2 = lowerconv(c2);
608 if(!(rv = (c1 == c2)) || c1 == '\0')
609 break;
610 if(c2 == '\0')
611 break;
613 NYD2_LEAVE;
614 return rv;
618 FL struct str *
619 (n_str_assign_buf)(struct str *self, char const *buf, uiz_t buflen
620 n_MEMORY_DEBUG_ARGS){
621 NYD_ENTER;
622 if(buflen == UIZ_MAX)
623 buflen = (buf == NULL) ? 0 : strlen(buf);
625 assert(buflen == 0 || buf != NULL);
627 if(n_LIKELY(buflen > 0)){
628 self->s = (n_realloc)(self->s, (self->l = buflen) +1
629 n_MEMORY_DEBUG_ARGSCALL);
630 memcpy(self->s, buf, buflen);
631 self->s[buflen] = '\0';
632 }else
633 self->l = 0;
634 NYD_LEAVE;
635 return self;
638 FL struct str *
639 (n_str_add_buf)(struct str *self, char const *buf, uiz_t buflen
640 n_MEMORY_DEBUG_ARGS){
641 NYD_ENTER;
642 if(buflen == UIZ_MAX)
643 buflen = (buf == NULL) ? 0 : strlen(buf);
645 assert(buflen == 0 || buf != NULL);
647 if(buflen > 0) {
648 size_t osl = self->l, nsl = osl + buflen;
650 self->s = (n_realloc)(self->s, (self->l = nsl) +1
651 n_MEMORY_DEBUG_ARGSCALL);
652 memcpy(self->s + osl, buf, buflen);
653 self->s[nsl] = '\0';
655 NYD_LEAVE;
656 return self;
659 FL struct str *
660 n_str_trim(struct str *self, enum n_str_trim_flags stf){
661 size_t l;
662 char const *cp;
663 NYD2_ENTER;
665 cp = self->s;
667 if((l = self->l) > 0 && (stf & n_STR_TRIM_FRONT)){
668 while(spacechar(*cp)){
669 ++cp;
670 if(--l == 0)
671 break;
673 self->s = n_UNCONST(cp);
676 if(l > 0 && (stf & n_STR_TRIM_END)){
677 for(cp += l -1; spacechar(*cp); --cp)
678 if(--l == 0)
679 break;
681 self->l = l;
683 NYD2_LEAVE;
684 return self;
687 FL struct str *
688 n_str_trim_ifs(struct str *self, bool_t dodefaults){
689 char s, t, n, c;
690 char const *ifs, *cp;
691 size_t l, i;
692 NYD2_ENTER;
694 if((l = self->l) == 0)
695 goto jleave;
697 ifs = ok_vlook(ifs_ws);
698 cp = self->s;
699 s = t = n = '\0';
701 /* Check whether we can go fast(er) path */
702 for(i = 0; (c = ifs[i]) != '\0'; ++i){
703 switch(c){
704 case ' ': s = c; break;
705 case '\t': t = c; break;
706 case '\n': n = c; break;
707 default:
708 /* Need to go the slow path */
709 while(strchr(ifs, *cp) != NULL){
710 ++cp;
711 if(--l == 0)
712 break;
714 self->s = n_UNCONST(cp);
716 if(l > 0){
717 for(cp += l -1; strchr(ifs, *cp) != NULL;){
718 if(--l == 0)
719 break;
720 /* An uneven number of reverse solidus escapes last WS! */
721 else if(*--cp == '\\'){
722 siz_t j;
724 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
726 if(j & 1){
727 ++l;
728 break;
733 self->l = l;
735 if(!dodefaults)
736 goto jleave;
737 cp = self->s;
738 ++i;
739 break;
743 /* No ifs-ws? No more data? No trimming */
744 if(l == 0 || (i == 0 && !dodefaults))
745 goto jleave;
747 if(dodefaults){
748 s = ' ';
749 t = '\t';
750 n = '\n';
753 if(l > 0){
754 while((c = *cp) != '\0' && (c == s || c == t || c == n)){
755 ++cp;
756 if(--l == 0)
757 break;
759 self->s = n_UNCONST(cp);
762 if(l > 0){
763 for(cp += l -1; (c = *cp) != '\0' && (c == s || c == t || c == n);){
764 if(--l == 0)
765 break;
766 /* An uneven number of reverse solidus escapes last WS! */
767 else if(*--cp == '\\'){
768 siz_t j;
770 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
772 if(j & 1){
773 ++l;
774 break;
779 self->l = l;
780 jleave:
781 NYD2_LEAVE;
782 return self;
786 * struct n_string TODO extend, optimize
789 FL struct n_string *
790 (n_string_clear)(struct n_string *self n_MEMORY_DEBUG_ARGS){
791 NYD_ENTER;
793 assert(self != NULL);
795 if(self->s_size != 0){
796 if(!self->s_auto){
797 (n_free)(self->s_dat n_MEMORY_DEBUG_ARGSCALL);
799 self->s_len = self->s_auto = self->s_size = 0;
800 self->s_dat = NULL;
802 NYD_LEAVE;
803 return self;
806 FL struct n_string *
807 (n_string_reserve)(struct n_string *self, size_t noof n_MEMORY_DEBUG_ARGS){
808 ui32_t i, l, s;
809 NYD_ENTER;
810 assert(self != NULL);
812 s = self->s_size;
813 l = self->s_len;
814 if((size_t)SI32_MAX - n_ALIGN(1) - l <= noof)
815 n_panic(_("Memory allocation too large"));
817 if((i = s - l) <= ++noof){
818 i += l + (ui32_t)noof;
819 i = n_ALIGN(i);
820 self->s_size = i -1;
822 if(!self->s_auto)
823 self->s_dat = (n_realloc)(self->s_dat, i n_MEMORY_DEBUG_ARGSCALL);
824 else{
825 char *ndat = (n_autorec_alloc_from_pool)(NULL, i
826 n_MEMORY_DEBUG_ARGSCALL);
828 if(l > 0)
829 memcpy(ndat, self->s_dat, l);
830 self->s_dat = ndat;
833 NYD_LEAVE;
834 return self;
837 FL struct n_string *
838 (n_string_resize)(struct n_string *self, size_t nlen n_MEMORY_DEBUG_ARGS){
839 NYD_ENTER;
840 assert(self != NULL);
842 if(UICMP(z, SI32_MAX, <=, nlen))
843 n_panic(_("Memory allocation too large"));
845 if(self->s_len < nlen)
846 self = (n_string_reserve)(self, nlen n_MEMORY_DEBUG_ARGSCALL);
847 self->s_len = (ui32_t)nlen;
848 NYD_LEAVE;
849 return self;
852 FL struct n_string *
853 (n_string_push_buf)(struct n_string *self, char const *buf, size_t buflen
854 n_MEMORY_DEBUG_ARGS){
855 NYD_ENTER;
857 assert(self != NULL);
858 assert(buflen == 0 || buf != NULL);
860 if(buflen == UIZ_MAX)
861 buflen = (buf == NULL) ? 0 : strlen(buf);
863 if(buflen > 0){
864 ui32_t i;
866 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
867 memcpy(&self->s_dat[i = self->s_len], buf, buflen);
868 self->s_len = (i += (ui32_t)buflen);
870 NYD_LEAVE;
871 return self;
874 FL struct n_string *
875 (n_string_push_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
876 NYD_ENTER;
878 assert(self != NULL);
880 if(self->s_len + 1 >= self->s_size)
881 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
882 self->s_dat[self->s_len++] = c;
883 NYD_LEAVE;
884 return self;
887 FL struct n_string *
888 (n_string_unshift_buf)(struct n_string *self, char const *buf, size_t buflen
889 n_MEMORY_DEBUG_ARGS){
890 NYD_ENTER;
892 assert(self != NULL);
893 assert(buflen == 0 || buf != NULL);
895 if(buflen == UIZ_MAX)
896 buflen = (buf == NULL) ? 0 : strlen(buf);
898 if(buflen > 0){
899 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
900 if(self->s_len > 0)
901 memmove(&self->s_dat[buflen], self->s_dat, self->s_len);
902 memcpy(self->s_dat, buf, buflen);
903 self->s_len += (ui32_t)buflen;
905 NYD_LEAVE;
906 return self;
909 FL struct n_string *
910 (n_string_unshift_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
911 NYD_ENTER;
913 assert(self != NULL);
915 if(self->s_len + 1 >= self->s_size)
916 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
917 if(self->s_len > 0)
918 memmove(&self->s_dat[1], self->s_dat, self->s_len);
919 self->s_dat[0] = c;
920 ++self->s_len;
921 NYD_LEAVE;
922 return self;
925 FL struct n_string *
926 (n_string_insert_buf)(struct n_string *self, size_t idx,
927 char const *buf, size_t buflen n_MEMORY_DEBUG_ARGS){
928 NYD_ENTER;
930 assert(self != NULL);
931 assert(buflen == 0 || buf != NULL);
932 assert(idx <= self->s_len);
934 if(buflen == UIZ_MAX)
935 buflen = (buf == NULL) ? 0 : strlen(buf);
937 if(buflen > 0){
938 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
939 if(self->s_len > 0)
940 memmove(&self->s_dat[idx + buflen], &self->s_dat[idx],
941 self->s_len - idx);
942 memcpy(&self->s_dat[idx], buf, buflen);
943 self->s_len += (ui32_t)buflen;
945 NYD_LEAVE;
946 return self;
949 FL struct n_string *
950 (n_string_insert_c)(struct n_string *self, size_t idx,
951 char c n_MEMORY_DEBUG_ARGS){
952 NYD_ENTER;
954 assert(self != NULL);
955 assert(idx <= self->s_len);
957 if(self->s_len + 1 >= self->s_size)
958 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
959 if(self->s_len > 0)
960 memmove(&self->s_dat[idx + 1], &self->s_dat[idx], self->s_len - idx);
961 self->s_dat[idx] = c;
962 ++self->s_len;
963 NYD_LEAVE;
964 return self;
967 FL struct n_string *
968 n_string_cut(struct n_string *self, size_t idx, size_t len){
969 NYD_ENTER;
971 assert(self != NULL);
972 assert(UIZ_MAX - idx > len);
973 assert(SI32_MAX >= idx + len);
974 assert(idx + len <= self->s_len);
976 if(len > 0)
977 memmove(&self->s_dat[idx], &self->s_dat[idx + len],
978 (self->s_len -= len) - idx);
979 NYD_LEAVE;
980 return self;
983 FL char *
984 (n_string_cp)(struct n_string *self n_MEMORY_DEBUG_ARGS){
985 char *rv;
986 NYD2_ENTER;
988 assert(self != NULL);
990 if(self->s_size == 0)
991 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
993 (rv = self->s_dat)[self->s_len] = '\0';
994 NYD2_LEAVE;
995 return rv;
998 FL char const *
999 n_string_cp_const(struct n_string const *self){
1000 char const *rv;
1001 NYD2_ENTER;
1003 assert(self != NULL);
1005 if(self->s_size != 0){
1006 ((struct n_string*)n_UNCONST(self))->s_dat[self->s_len] = '\0';
1007 rv = self->s_dat;
1008 }else
1009 rv = n_empty;
1010 NYD2_LEAVE;
1011 return rv;
1015 * UTF-8
1018 FL ui32_t
1019 n_utf8_to_utf32(char const **bdat, size_t *blen){
1020 ui32_t c, x, x1;
1021 char const *cp, *cpx;
1022 size_t l, lx;
1023 NYD2_ENTER;
1025 lx = l = *blen - 1;
1026 x = (ui8_t)*(cp = *bdat);
1027 cpx = ++cp;
1029 if(n_LIKELY(x <= 0x7Fu))
1030 c = x;
1031 /* 0xF8, but Unicode guarantees maximum of 0x10FFFFu -> F4 8F BF BF.
1032 * Unicode 9.0, 3.9, UTF-8, Table 3-7. Well-Formed UTF-8 Byte Sequences */
1033 else if(n_LIKELY(x > 0xC0u && x <= 0xF4u)){
1034 if(n_LIKELY(x < 0xE0u)){
1035 if(n_UNLIKELY(l < 1))
1036 goto jenobuf;
1037 --l;
1039 c = (x &= 0x1Fu);
1040 }else if(n_LIKELY(x < 0xF0u)){
1041 if(n_UNLIKELY(l < 2))
1042 goto jenobuf;
1043 l -= 2;
1045 x1 = x;
1046 c = (x &= 0x0Fu);
1048 /* Second byte constraints */
1049 x = (ui8_t)*cp++;
1050 switch(x1){
1051 case 0xE0u:
1052 if(n_UNLIKELY(x < 0xA0u || x > 0xBFu))
1053 goto jerr;
1054 break;
1055 case 0xEDu:
1056 if(n_UNLIKELY(x < 0x80u || x > 0x9Fu))
1057 goto jerr;
1058 break;
1059 default:
1060 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1061 goto jerr;
1062 break;
1064 c <<= 6;
1065 c |= (x &= 0x3Fu);
1066 }else{
1067 if(n_UNLIKELY(l < 3))
1068 goto jenobuf;
1069 l -= 3;
1071 x1 = x;
1072 c = (x &= 0x07u);
1074 /* Second byte constraints */
1075 x = (ui8_t)*cp++;
1076 switch(x1){
1077 case 0xF0u:
1078 if(n_UNLIKELY(x < 0x90u || x > 0xBFu))
1079 goto jerr;
1080 break;
1081 case 0xF4u:
1082 if(n_UNLIKELY((x & 0xF0u) != 0x80u)) /* 80..8F */
1083 goto jerr;
1084 break;
1085 default:
1086 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1087 goto jerr;
1088 break;
1090 c <<= 6;
1091 c |= (x &= 0x3Fu);
1093 x = (ui8_t)*cp++;
1094 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1095 goto jerr;
1096 c <<= 6;
1097 c |= (x &= 0x3Fu);
1100 x = (ui8_t)*cp++;
1101 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1102 goto jerr;
1103 c <<= 6;
1104 c |= x & 0x3Fu;
1105 }else
1106 goto jerr;
1108 cpx = cp;
1109 lx = l;
1110 jleave:
1111 *bdat = cpx;
1112 *blen = lx;
1113 NYD2_LEAVE;
1114 return c;
1115 jenobuf:
1116 jerr:
1117 c = UI32_MAX;
1118 goto jleave;
1121 FL size_t
1122 n_utf32_to_utf8(ui32_t c, char *buf)
1124 struct {
1125 ui32_t lower_bound;
1126 ui32_t upper_bound;
1127 ui8_t enc_leader;
1128 ui8_t enc_lval;
1129 ui8_t dec_leader_mask;
1130 ui8_t dec_leader_val_mask;
1131 ui8_t dec_bytes_togo;
1132 ui8_t cat_index;
1133 ui8_t __dummy[2];
1134 } const _cat[] = {
1135 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
1136 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
1137 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
1138 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
1139 /* xxx _from_utf32() simply assumes magic code points for surrogates!
1140 * xxx (However, should we ever get yet another surrogate range we
1141 * xxx need to deal with that all over the place anyway? */
1142 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
1143 {0x00010000, 0x0010FFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
1144 }, *catp = _cat;
1145 size_t l;
1147 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
1148 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
1149 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
1150 if (c <= _cat[3].upper_bound) {
1151 /* Surrogates may not be converted (Compatibility rule C10) */
1152 if (c >= 0xD800u && c <= 0xDFFFu)
1153 goto jerr;
1154 catp += 3;
1155 goto j3;
1157 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
1158 jerr:
1159 c = 0xFFFDu; /* Unicode replacement character */
1160 catp += 3;
1161 goto j3;
1163 buf[3] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1165 buf[2] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1167 buf[1] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1169 buf[0] = (char)catp->enc_leader | (char)(c);
1171 buf[catp->enc_lval] = '\0';
1172 l = catp->enc_lval;
1173 NYD2_LEAVE;
1174 return l;
1178 * Our iconv(3) wrapper
1181 FL char *
1182 n_iconv_normalize_name(char const *cset){
1183 char *cp, c, *tcp, tc;
1184 bool_t any;
1185 NYD2_ENTER;
1187 /* We need to strip //SUFFIXes off, we want to normalize to all lowercase,
1188 * and we perform some slight content testing, too */
1189 for(any = FAL0, cp = n_UNCONST(cset); (c = *cp) != '\0'; ++cp){
1190 if(!alnumchar(c) && !punctchar(c)){
1191 n_err(_("Invalid character set name %s\n"),
1192 n_shexp_quote_cp(cset, FAL0));
1193 cset = NULL;
1194 goto jleave;
1195 }else if(c == '/')
1196 break;
1197 else if(upperchar(c))
1198 any = TRU1;
1201 if(any || c != '\0'){
1202 cp = savestrbuf(cset, PTR2SIZE(cp - cset));
1203 for(tcp = cp; (tc = *tcp) != '\0'; ++tcp)
1204 *tcp = lowerconv(tc);
1206 if(c != '\0' && (n_poption & n_PO_D_V))
1207 n_err(_("Stripped off character set suffix: %s -> %s\n"),
1208 n_shexp_quote_cp(cset, FAL0), n_shexp_quote_cp(cp, FAL0));
1210 cset = cp;
1212 jleave:
1213 NYD2_LEAVE;
1214 return n_UNCONST(cset);
1217 FL bool_t
1218 n_iconv_name_is_ascii(char const *cset){ /* TODO ctext/su */
1219 bool_t rv;
1220 NYD2_ENTER;
1222 /* In MIME preference order */
1223 rv = (!asccasecmp(cset, "US-ASCII") || !asccasecmp(cset, "ASCII") ||
1224 !asccasecmp(cset, "ANSI_X3.4-1968") ||
1225 !asccasecmp(cset, "iso-ir-6") ||
1226 !asccasecmp(cset, "ANSI_X3.4-1986") ||
1227 !asccasecmp(cset, "ISO_646.irv:1991") ||
1228 !asccasecmp(cset, "ISO646-US") || !asccasecmp(cset, "us") ||
1229 !asccasecmp(cset, "IBM367") || !asccasecmp(cset, "cp367") ||
1230 !asccasecmp(cset, "csASCII"));
1231 NYD2_LEAVE;
1232 return rv;
1235 #ifdef HAVE_ICONV
1236 FL iconv_t
1237 n_iconv_open(char const *tocode, char const *fromcode){
1238 iconv_t id;
1239 NYD_ENTER;
1241 if((!asccasecmp(fromcode, "unknown-8bit") ||
1242 !asccasecmp(fromcode, "binary")) &&
1243 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
1244 fromcode = ok_vlook(CHARSET_8BIT_OKEY);
1246 id = iconv_open(tocode, fromcode);
1248 /* If the encoding names are equal at this point, they are just not
1249 * understood by iconv(), and we cannot sensibly use it in any way. We do
1250 * not perform this as an optimization above since iconv() can otherwise be
1251 * used to check the validity of the input even with identical encoding
1252 * names */
1253 if (id == (iconv_t)-1 && !asccasecmp(tocode, fromcode))
1254 n_err_no = n_ERR_NONE;
1255 NYD_LEAVE;
1256 return id;
1259 FL void
1260 n_iconv_close(iconv_t cd){
1261 NYD_ENTER;
1262 iconv_close(cd);
1263 if(cd == iconvd)
1264 iconvd = (iconv_t)-1;
1265 NYD_LEAVE;
1268 FL void
1269 n_iconv_reset(iconv_t cd){
1270 NYD_ENTER;
1271 iconv(cd, NULL, NULL, NULL, NULL);
1272 NYD_LEAVE;
1275 /* (2012-09-24: export and use it exclusively to isolate prototype problems
1276 * (*inb* is 'char const **' except in POSIX) in a single place.
1277 * GNU libiconv even allows for configuration time const/non-const..
1278 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
1279 * support compiler invocations which bail on error, so no -Werror */
1280 /* Citrus project? */
1281 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1282 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1283 # if n_OS_DRAGONFLY
1284 # define __INBCAST(S) (char ** __restrict__)n_UNCONST(S)
1285 # else
1286 # define __INBCAST(S) (char const **)n_UNCONST(S)
1287 # endif
1288 # elif n_OS_SUNOS || n_OS_SOLARIS
1289 # define __INBCAST(S) (char const ** __restrict__)n_UNCONST(S)
1290 # endif
1291 # ifndef __INBCAST
1292 # define __INBCAST(S) (char **)n_UNCONST(S)
1293 # endif
1295 FL int
1296 n_iconv_buf(iconv_t cd, enum n_iconv_flags icf,
1297 char const **inb, size_t *inbleft, char **outb, size_t *outbleft){
1298 int err;
1299 NYD2_ENTER;
1301 if((icf & n_ICONV_UNIREPL) && !(n_psonce & n_PSO_UNICODE))
1302 icf &= ~n_ICONV_UNIREPL;
1304 for(;;){
1305 size_t sz;
1307 if((sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft)) == 0)
1308 break;
1309 if(sz != (size_t)-1){
1310 if(!(icf & n_ICONV_IGN_NOREVERSE)){
1311 err = n_ERR_NOENT;
1312 goto jleave;
1314 break;
1317 if((err = n_err_no) == n_ERR_2BIG)
1318 goto jleave;
1320 if(!(icf & n_ICONV_IGN_ILSEQ) || err != n_ERR_ILSEQ)
1321 goto jleave;
1322 if(*inbleft > 0){
1323 ++(*inb);
1324 --(*inbleft);
1325 if(icf & n_ICONV_UNIREPL){
1326 if(*outbleft >= sizeof(n_unirepl) -1){
1327 memcpy(*outb, n_unirepl, sizeof(n_unirepl) -1);
1328 *outb += sizeof(n_unirepl) -1;
1329 *outbleft -= sizeof(n_unirepl) -1;
1330 continue;
1332 }else if(*outbleft > 0){
1333 *(*outb)++ = '?';
1334 --*outbleft;
1335 continue;
1337 err = n_ERR_2BIG;
1338 goto jleave;
1339 }else if(*outbleft > 0){
1340 **outb = '\0';
1341 goto jleave;
1344 err = 0;
1345 jleave:
1346 n_iconv_err_no = err;
1347 NYD2_LEAVE;
1348 return err;
1350 # undef __INBCAST
1352 FL int
1353 n_iconv_str(iconv_t cd, enum n_iconv_flags icf,
1354 struct str *out, struct str const *in, struct str *in_rest_or_null){
1355 struct n_string s, *sp = &s;
1356 char const *ib;
1357 int err;
1358 size_t il;
1359 NYD2_ENTER;
1361 il = in->l;
1362 if(!n_string_get_can_book(il) || !n_string_get_can_book(out->l)){
1363 err = n_ERR_INVAL;
1364 goto j_leave;
1366 ib = in->s;
1368 sp = n_string_creat(sp);
1369 sp = n_string_take_ownership(sp, out->s, out->l, 0);
1371 for(;;){
1372 char *ob_base, *ob;
1373 size_t ol, nol;
1375 if((nol = ol = sp->s_len) < il)
1376 nol = il;
1377 assert(sizeof(sp->s_len) == sizeof(ui32_t));
1378 if(nol < 128)
1379 nol += 32;
1380 else{
1381 ui64_t xnol;
1383 xnol = (ui64_t)(nol << 1) - (nol >> 4);
1384 if(!n_string_can_book(sp, xnol)){
1385 xnol = ol + 64;
1386 if(!n_string_can_book(sp, xnol)){
1387 err = n_ERR_INVAL;
1388 goto jleave;
1391 nol = (size_t)xnol;
1393 sp = n_string_resize(sp, nol);
1395 ob = ob_base = &sp->s_dat[ol];
1396 nol -= ol;
1397 err = n_iconv_buf(cd, icf, &ib, &il, &ob, &nol);
1399 sp = n_string_trunc(sp, ol + PTR2SIZE(ob - ob_base));
1400 if(err == 0 || err != n_ERR_2BIG)
1401 break;
1404 if(in_rest_or_null != NULL){
1405 in_rest_or_null->s = n_UNCONST(ib);
1406 in_rest_or_null->l = il;
1409 jleave:
1410 out->s = n_string_cp(sp);
1411 out->l = sp->s_len;
1412 sp = n_string_drop_ownership(sp);
1413 /* n_string_gut(sp)*/
1414 j_leave:
1415 NYD2_LEAVE;
1416 return err;
1419 FL char *
1420 n_iconv_onetime_cp(enum n_iconv_flags icf,
1421 char const *tocode, char const *fromcode, char const *input){
1422 struct str out, in;
1423 iconv_t icd;
1424 char *rv;
1425 NYD2_ENTER;
1427 rv = NULL;
1428 if(tocode == NULL)
1429 tocode = ok_vlook(ttycharset);
1430 if(fromcode == NULL)
1431 fromcode = "utf-8";
1433 if((icd = iconv_open(tocode, fromcode)) == (iconv_t)-1)
1434 goto jleave;
1436 in.l = strlen(in.s = n_UNCONST(input)); /* logical */
1437 out.s = NULL, out.l = 0;
1438 if(!n_iconv_str(icd, icf, &out, &in, NULL))
1439 rv = savestrbuf(out.s, out.l);
1440 if(out.s != NULL)
1441 n_free(out.s);
1443 iconv_close(icd);
1444 jleave:
1445 NYD2_LEAVE;
1446 return rv;
1448 #endif /* HAVE_ICONV */
1450 /* s-it-mode */