Fix [1785be65] from 2017-03-xx
[s-mailx.git] / strings.c
blob4166027964c87fad4685365558ada07e8cecac25
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ String support routines.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 */
7 /*
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 #undef n_FILE
36 #define n_FILE strings
38 #ifndef HAVE_AMALGAMATION
39 # include "nail.h"
40 #endif
42 #include <ctype.h>
44 FL char *
45 (savestr)(char const *str n_MEMORY_DEBUG_ARGS)
47 size_t size;
48 char *news;
49 NYD_ENTER;
51 size = strlen(str);
52 news = (n_autorec_alloc_from_pool)(NULL, size +1 n_MEMORY_DEBUG_ARGSCALL);
53 if(size > 0)
54 memcpy(news, str, size);
55 news[size] = '\0';
56 NYD_LEAVE;
57 return news;
60 FL char *
61 (savestrbuf)(char const *sbuf, size_t sbuf_len n_MEMORY_DEBUG_ARGS)
63 char *news;
64 NYD_ENTER;
66 news = (n_autorec_alloc_from_pool)(NULL, sbuf_len +1
67 n_MEMORY_DEBUG_ARGSCALL);
68 if(sbuf_len > 0)
69 memcpy(news, sbuf, sbuf_len);
70 news[sbuf_len] = 0;
71 NYD_LEAVE;
72 return news;
75 FL char *
76 (savecatsep)(char const *s1, char sep, char const *s2 n_MEMORY_DEBUG_ARGS)
78 size_t l1, l2;
79 char *news;
80 NYD_ENTER;
82 l1 = (s1 != NULL) ? strlen(s1) : 0;
83 l2 = strlen(s2);
84 news = (n_autorec_alloc_from_pool)(NULL, l1 + (sep != '\0') + l2 +1
85 n_MEMORY_DEBUG_ARGSCALL);
86 if (l1 > 0) {
87 memcpy(news + 0, s1, l1);
88 if (sep != '\0')
89 news[l1++] = sep;
91 if(l2 > 0)
92 memcpy(news + l1, s2, l2);
93 news[l1 + l2] = '\0';
94 NYD_LEAVE;
95 return news;
99 * Support routines, auto-reclaimed storage
102 FL char *
103 (i_strdup)(char const *src n_MEMORY_DEBUG_ARGS)
105 size_t sz;
106 char *dest;
107 NYD_ENTER;
109 sz = strlen(src) +1;
110 dest = (n_autorec_alloc_from_pool)(NULL, sz n_MEMORY_DEBUG_ARGSCALL);
111 if(sz > 1)
112 i_strcpy(dest, src, sz);
113 else
114 dest[sz] = '\0';
115 NYD_LEAVE;
116 return dest;
119 FL struct str *
120 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
122 va_list vl;
123 size_t l;
124 char const *cs;
125 NYD_ENTER;
127 va_start(vl, self);
128 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
129 l += strlen(cs);
130 va_end(vl);
132 self->l = l;
133 self->s = salloc(l +1);
135 va_start(vl, self);
136 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
137 size_t i;
139 i = strlen(cs);
140 if(i > 0){
141 memcpy(self->s + l, cs, i);
142 l += i;
145 self->s[l] = '\0';
146 va_end(vl);
147 NYD_LEAVE;
148 return self;
151 FL struct str *
152 (str_concat_cpa)(struct str *self, char const * const *cpa,
153 char const *sep_o_null n_MEMORY_DEBUG_ARGS)
155 size_t sonl, l;
156 char const * const *xcpa;
157 NYD_ENTER;
159 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
161 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
162 l += strlen(*xcpa) + sonl;
164 self->l = l;
165 self->s = (n_autorec_alloc_from_pool)(NULL, l +1 n_MEMORY_DEBUG_ARGSCALL);
167 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
168 size_t i;
170 i = strlen(*xcpa);
171 if(i > 0){
172 memcpy(self->s + l, *xcpa, i);
173 l += i;
175 if (sonl > 0) {
176 memcpy(self->s + l, sep_o_null, sonl);
177 l += sonl;
180 self->s[l] = '\0';
181 NYD_LEAVE;
182 return self;
186 * Routines that are not related to auto-reclaimed storage follow.
189 FL bool_t
190 n_anyof_buf(char const *template, char const *dat, size_t len){
191 char c;
192 NYD2_ENTER;
194 if(len == UIZ_MAX){
195 while((c = *template++) != '\0')
196 if(strchr(dat, c) != NULL)
197 break;
198 }else if(len > 0){
199 while((c = *template++) != '\0')
200 if(memchr(dat, c, len) != NULL)
201 break;
202 }else
203 c = '\0';
204 NYD2_LEAVE;
205 return (c != '\0');
208 FL char *
209 n_strsep(char **iolist, char sep, bool_t ignore_empty){
210 char *base, *cp;
211 NYD2_ENTER;
213 for(base = *iolist; base != NULL; base = *iolist){
214 while(*base != '\0' && blankspacechar(*base))
215 ++base;
217 cp = strchr(base, sep);
218 if(cp != NULL)
219 *iolist = &cp[1];
220 else{
221 *iolist = NULL;
222 cp = &base[strlen(base)];
224 while(cp > base && blankspacechar(cp[-1]))
225 --cp;
226 *cp = '\0';
227 if(*base != '\0' || !ignore_empty)
228 break;
230 NYD2_LEAVE;
231 return base;
234 FL char *
235 n_strsep_esc(char **iolist, char sep, bool_t ignore_empty){
236 char *cp, c, *base;
237 bool_t isesc, anyesc;
238 NYD2_ENTER;
240 for(base = *iolist; base != NULL; base = *iolist){
241 while((c = *base) != '\0' && blankspacechar(c))
242 ++base;
244 for(isesc = anyesc = FAL0, cp = base;; ++cp){
245 if(n_UNLIKELY((c = *cp) == '\0')){
246 *iolist = NULL;
247 break;
248 }else if(!isesc){
249 if(c == sep){
250 *iolist = &cp[1];
251 break;
253 isesc = (c == '\\');
254 }else{
255 isesc = FAL0;
256 anyesc |= (c == sep);
260 while(cp > base && blankspacechar(cp[-1]))
261 --cp;
262 *cp = '\0';
264 if(*base != '\0'){
265 if(anyesc){
266 char *ins;
268 for(ins = cp = base;; ++ins)
269 if((c = *cp) == '\\' && cp[1] == sep){
270 *ins = sep;
271 cp += 2;
272 }else if((*ins = (++cp, c)) == '\0')
273 break;
277 if(*base != '\0' || !ignore_empty)
278 break;
280 NYD2_LEAVE;
281 return base;
284 FL void
285 i_strcpy(char *dest, char const *src, size_t size)
287 NYD2_ENTER;
288 if(size > 0){
289 for(;; ++dest, ++src)
290 if((*dest = lowerconv(*src)) == '\0'){
291 break;
292 }else if(--size == 0){
293 *dest = '\0';
294 break;
297 NYD2_LEAVE;
300 FL bool_t
301 is_prefix(char const *as1, char const *as2) /* TODO arg order */
303 char c;
304 NYD2_ENTER;
306 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
307 if (*as2 == '\0')
308 break;
309 NYD2_LEAVE;
310 return (c == '\0');
313 FL char *
314 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
316 char const *cp;
317 size_t i;
318 char c, *rv;
319 NYD2_ENTER;
321 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
322 if (c == '"' || c == '\\')
323 ++i;
324 rv = salloc(i +1);
326 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
327 if (c == '"' || c == '\\')
328 rv[i++] = '\\';
329 rv[i] = '\0';
330 NYD2_LEAVE;
331 return rv;
334 FL char *
335 laststring(char *linebuf, bool_t *needs_list, bool_t strip)
337 char *cp, *p, quoted;
338 NYD_ENTER;
340 /* Anything to do at all? */
341 if (*(cp = linebuf) == '\0')
342 goto jnull;
343 cp += strlen(linebuf) -1;
345 /* Strip away trailing blanks */
346 while (spacechar(*cp) && cp > linebuf)
347 --cp;
348 cp[1] = '\0';
349 if (cp == linebuf)
350 goto jleave;
352 /* Now search for the BOS of the "last string" */
353 quoted = *cp;
354 if (quoted == '\'' || quoted == '"') {
355 if (strip)
356 *cp = '\0';
357 } else
358 quoted = ' ';
360 while (cp > linebuf) {
361 --cp;
362 if (quoted != ' ') {
363 if (*cp != quoted)
364 continue;
365 } else if (!spacechar(*cp))
366 continue;
367 if (cp == linebuf || cp[-1] != '\\') {
368 /* When in whitespace mode, WS prefix doesn't belong */
369 if (quoted == ' ')
370 ++cp;
371 break;
373 /* Expand the escaped quote character */
374 for (p = --cp; (p[0] = p[1]) != '\0'; ++p)
377 if (strip && quoted != ' ' && *cp == quoted)
378 for (p = cp; (p[0] = p[1]) != '\0'; ++p)
381 /* The "last string" has been skipped over, but still, try to step backwards
382 * until we are at BOS or see whitespace, so as to make possible things like
383 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
384 while (cp > linebuf) {
385 --cp;
386 if (spacechar(*cp)) {
387 p = cp;
388 *cp++ = '\0';
389 /* We can furtherly release our callees if we now decide whether the
390 * remaining non-"last string" line content contains non-WS */
391 while (--p >= linebuf)
392 if (!spacechar(*p))
393 goto jleave;
394 linebuf = cp;
395 break;
399 jleave:
400 if (cp != NULL && *cp == '\0')
401 goto jnull;
402 *needs_list = (cp != linebuf && *linebuf != '\0');
403 j_leave:
404 NYD_LEAVE;
405 return cp;
406 jnull:
407 *needs_list = FAL0;
408 cp = NULL;
409 goto j_leave;
412 FL void
413 makelow(char *cp) /* TODO isn't that crap? --> */
415 NYD_ENTER;
416 #ifdef HAVE_C90AMEND1
417 if (n_mb_cur_max > 1) {
418 char *tp = cp;
419 wchar_t wc;
420 int len;
422 while (*cp != '\0') {
423 len = mbtowc(&wc, cp, n_mb_cur_max);
424 if (len < 0)
425 *tp++ = *cp++;
426 else {
427 wc = towlower(wc);
428 if (wctomb(tp, wc) == len)
429 tp += len, cp += len;
430 else
431 *tp++ = *cp++; /* <-- at least here */
434 } else
435 #endif
438 *cp = tolower((uc_i)*cp);
439 while (*cp++ != '\0');
441 NYD_LEAVE;
444 FL bool_t
445 substr(char const *str, char const *sub)
447 char const *cp, *backup;
448 NYD_ENTER;
450 cp = sub;
451 backup = str;
452 while (*str != '\0' && *cp != '\0') {
453 #ifdef HAVE_C90AMEND1
454 if (n_mb_cur_max > 1) {
455 wchar_t c, c2;
456 int sz;
458 if ((sz = mbtowc(&c, cp, n_mb_cur_max)) == -1)
459 goto Jsinglebyte;
460 cp += sz;
461 if ((sz = mbtowc(&c2, str, n_mb_cur_max)) == -1)
462 goto Jsinglebyte;
463 str += sz;
464 c = towupper(c);
465 c2 = towupper(c2);
466 if (c != c2) {
467 if ((sz = mbtowc(&c, backup, n_mb_cur_max)) > 0) {
468 backup += sz;
469 str = backup;
470 } else
471 str = ++backup;
472 cp = sub;
474 } else
475 Jsinglebyte:
476 #endif
478 int c, c2;
480 c = *cp++ & 0377;
481 if (islower(c))
482 c = toupper(c);
483 c2 = *str++ & 0377;
484 if (islower(c2))
485 c2 = toupper(c2);
486 if (c != c2) {
487 str = ++backup;
488 cp = sub;
492 NYD_LEAVE;
493 return (*cp == '\0');
496 FL char *
497 sstpcpy(char *dst, char const *src)
499 NYD2_ENTER;
500 while ((*dst = *src++) != '\0')
501 ++dst;
502 NYD2_LEAVE;
503 return dst;
506 FL char *
507 (sstrdup)(char const *cp n_MEMORY_DEBUG_ARGS)
509 char *dp;
510 NYD2_ENTER;
512 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) n_MEMORY_DEBUG_ARGSCALL);
513 NYD2_LEAVE;
514 return dp;
517 FL char *
518 (sbufdup)(char const *cp, size_t len n_MEMORY_DEBUG_ARGS)
520 char *dp = NULL;
521 NYD2_ENTER;
523 dp = (n_alloc)(len +1 n_MEMORY_DEBUG_ARGSCALL);
524 if (cp != NULL)
525 memcpy(dp, cp, len);
526 dp[len] = '\0';
527 NYD2_LEAVE;
528 return dp;
531 FL ssize_t
532 n_strscpy(char *dst, char const *src, size_t dstsize){
533 ssize_t rv;
534 NYD2_ENTER;
536 if(n_LIKELY(dstsize > 0)){
537 rv = 0;
539 if((dst[rv] = src[rv]) == '\0')
540 goto jleave;
541 ++rv;
542 }while(--dstsize > 0);
543 dst[--rv] = '\0';
545 #ifdef HAVE_DEVEL
546 else
547 assert(dstsize > 0);
548 #endif
549 rv = -1;
550 jleave:
551 NYD2_LEAVE;
552 return rv;
555 FL int
556 asccasecmp(char const *s1, char const *s2)
558 int cmp;
559 NYD2_ENTER;
561 for (;;) {
562 char c1 = *s1++, c2 = *s2++;
563 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
564 break;
566 NYD2_LEAVE;
567 return cmp;
570 FL int
571 ascncasecmp(char const *s1, char const *s2, size_t sz)
573 int cmp = 0;
574 NYD2_ENTER;
576 while (sz-- > 0) {
577 char c1 = *s1++, c2 = *s2++;
578 cmp = (ui8_t)lowerconv(c1);
579 cmp -= (ui8_t)lowerconv(c2);
580 if (cmp != 0 || c1 == '\0')
581 break;
583 NYD2_LEAVE;
584 return cmp;
587 FL char const *
588 asccasestr(char const *s1, char const *s2)
590 char c2, c1;
591 NYD2_ENTER;
593 for (c2 = *s2++, c2 = lowerconv(c2);;) {
594 if ((c1 = *s1++) == '\0') {
595 s1 = NULL;
596 break;
598 if (lowerconv(c1) == c2 && is_asccaseprefix(s2, s1)) {
599 --s1;
600 break;
603 NYD2_LEAVE;
604 return s1;
607 FL bool_t
608 is_asccaseprefix(char const *as1, char const *as2) /* TODO arg order */
610 char c1, c2;
611 NYD2_ENTER;
613 for(;; ++as1, ++as2){
614 c1 = *as1;
615 c1 = lowerconv(c1);
616 c2 = *as2;
617 c2 = lowerconv(c2);
619 if(c1 != c2 || c1 == '\0')
620 break;
621 if(c2 == '\0')
622 break;
624 NYD2_LEAVE;
625 return (c1 == '\0');
628 FL bool_t
629 is_ascncaseprefix(char const *as1, char const *as2, size_t sz)
631 char c1, c2;
632 bool_t rv;
633 NYD2_ENTER;
635 for(rv = TRU1; sz-- > 0; ++as1, ++as2){
636 c1 = *as1;
637 c1 = lowerconv(c1);
638 c2 = *as2;
639 c2 = lowerconv(c2);
641 if(!(rv = (c1 == c2)) || c1 == '\0')
642 break;
643 if(c2 == '\0')
644 break;
646 NYD2_LEAVE;
647 return rv;
651 FL struct str *
652 (n_str_assign_buf)(struct str *self, char const *buf, uiz_t buflen
653 n_MEMORY_DEBUG_ARGS){
654 NYD_ENTER;
655 if(buflen == UIZ_MAX)
656 buflen = (buf == NULL) ? 0 : strlen(buf);
658 assert(buflen == 0 || buf != NULL);
660 if(n_LIKELY(buflen > 0)){
661 self->s = (n_realloc)(self->s, (self->l = buflen) +1
662 n_MEMORY_DEBUG_ARGSCALL);
663 memcpy(self->s, buf, buflen);
664 self->s[buflen] = '\0';
665 }else
666 self->l = 0;
667 NYD_LEAVE;
668 return self;
671 FL struct str *
672 (n_str_add_buf)(struct str *self, char const *buf, uiz_t buflen
673 n_MEMORY_DEBUG_ARGS){
674 NYD_ENTER;
675 if(buflen == UIZ_MAX)
676 buflen = (buf == NULL) ? 0 : strlen(buf);
678 assert(buflen == 0 || buf != NULL);
680 if(buflen > 0) {
681 size_t osl = self->l, nsl = osl + buflen;
683 self->s = (n_realloc)(self->s, (self->l = nsl) +1
684 n_MEMORY_DEBUG_ARGSCALL);
685 memcpy(self->s + osl, buf, buflen);
686 self->s[nsl] = '\0';
688 NYD_LEAVE;
689 return self;
692 FL struct str *
693 n_str_trim(struct str *self, enum n_str_trim_flags stf){
694 size_t l;
695 char const *cp;
696 NYD2_ENTER;
698 cp = self->s;
700 if((l = self->l) > 0 && (stf & n_STR_TRIM_FRONT)){
701 while(spacechar(*cp)){
702 ++cp;
703 if(--l == 0)
704 break;
706 self->s = n_UNCONST(cp);
709 if(l > 0 && (stf & n_STR_TRIM_END)){
710 for(cp += l -1; spacechar(*cp); --cp)
711 if(--l == 0)
712 break;
714 self->l = l;
716 NYD2_LEAVE;
717 return self;
720 FL struct str *
721 n_str_trim_ifs(struct str *self, bool_t dodefaults){
722 char s, t, n, c;
723 char const *ifs, *cp;
724 size_t l, i;
725 NYD2_ENTER;
727 if((l = self->l) == 0)
728 goto jleave;
730 ifs = ok_vlook(ifs_ws);
731 cp = self->s;
732 s = t = n = '\0';
734 /* Check whether we can go fast(er) path */
735 for(i = 0; (c = ifs[i]) != '\0'; ++i){
736 switch(c){
737 case ' ': s = c; break;
738 case '\t': t = c; break;
739 case '\n': n = c; break;
740 default:
741 /* Need to go the slow path */
742 while(strchr(ifs, *cp) != NULL){
743 ++cp;
744 if(--l == 0)
745 break;
747 self->s = n_UNCONST(cp);
749 if(l > 0){
750 for(cp += l -1; strchr(ifs, *cp) != NULL;){
751 if(--l == 0)
752 break;
753 /* An uneven number of reverse solidus escapes last WS! */
754 else if(*--cp == '\\'){
755 siz_t j;
757 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
759 if(j & 1){
760 ++l;
761 break;
766 self->l = l;
768 if(!dodefaults)
769 goto jleave;
770 cp = self->s;
771 ++i;
772 break;
776 /* No ifs-ws? No more data? No trimming */
777 if(l == 0 || (i == 0 && !dodefaults))
778 goto jleave;
780 if(dodefaults){
781 s = ' ';
782 t = '\t';
783 n = '\n';
786 if(l > 0){
787 while((c = *cp) != '\0' && (c == s || c == t || c == n)){
788 ++cp;
789 if(--l == 0)
790 break;
792 self->s = n_UNCONST(cp);
795 if(l > 0){
796 for(cp += l -1; (c = *cp) != '\0' && (c == s || c == t || c == n);){
797 if(--l == 0)
798 break;
799 /* An uneven number of reverse solidus escapes last WS! */
800 else if(*--cp == '\\'){
801 siz_t j;
803 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
805 if(j & 1){
806 ++l;
807 break;
812 self->l = l;
813 jleave:
814 NYD2_LEAVE;
815 return self;
819 * struct n_string TODO extend, optimize
822 FL struct n_string *
823 (n_string_clear)(struct n_string *self n_MEMORY_DEBUG_ARGS){
824 NYD_ENTER;
826 assert(self != NULL);
828 if(self->s_size != 0){
829 if(!self->s_auto){
830 (n_free)(self->s_dat n_MEMORY_DEBUG_ARGSCALL);
832 self->s_len = self->s_auto = self->s_size = 0;
833 self->s_dat = NULL;
835 NYD_LEAVE;
836 return self;
839 FL struct n_string *
840 (n_string_reserve)(struct n_string *self, size_t noof n_MEMORY_DEBUG_ARGS){
841 ui32_t i, l, s;
842 NYD_ENTER;
843 assert(self != NULL);
845 s = self->s_size;
846 l = self->s_len;
847 if((size_t)SI32_MAX - n_ALIGN(1) - l <= noof)
848 n_panic(_("Memory allocation too large"));
850 if((i = s - l) <= ++noof){
851 i += l + (ui32_t)noof;
852 i = n_ALIGN(i);
853 self->s_size = i -1;
855 if(!self->s_auto)
856 self->s_dat = (n_realloc)(self->s_dat, i n_MEMORY_DEBUG_ARGSCALL);
857 else{
858 char *ndat = (n_autorec_alloc_from_pool)(NULL, i
859 n_MEMORY_DEBUG_ARGSCALL);
861 if(l > 0)
862 memcpy(ndat, self->s_dat, l);
863 self->s_dat = ndat;
866 NYD_LEAVE;
867 return self;
870 FL struct n_string *
871 (n_string_resize)(struct n_string *self, size_t nlen n_MEMORY_DEBUG_ARGS){
872 NYD_ENTER;
873 assert(self != NULL);
875 if(UICMP(z, SI32_MAX, <=, nlen))
876 n_panic(_("Memory allocation too large"));
878 if(self->s_len < nlen)
879 self = (n_string_reserve)(self, nlen n_MEMORY_DEBUG_ARGSCALL);
880 self->s_len = (ui32_t)nlen;
881 NYD_LEAVE;
882 return self;
885 FL struct n_string *
886 (n_string_push_buf)(struct n_string *self, char const *buf, size_t buflen
887 n_MEMORY_DEBUG_ARGS){
888 NYD_ENTER;
890 assert(self != NULL);
891 assert(buflen == 0 || buf != NULL);
893 if(buflen == UIZ_MAX)
894 buflen = (buf == NULL) ? 0 : strlen(buf);
896 if(buflen > 0){
897 ui32_t i;
899 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
900 memcpy(&self->s_dat[i = self->s_len], buf, buflen);
901 self->s_len = (i += (ui32_t)buflen);
903 NYD_LEAVE;
904 return self;
907 FL struct n_string *
908 (n_string_push_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
909 NYD_ENTER;
911 assert(self != NULL);
913 if(self->s_len + 1 >= self->s_size)
914 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
915 self->s_dat[self->s_len++] = c;
916 NYD_LEAVE;
917 return self;
920 FL struct n_string *
921 (n_string_unshift_buf)(struct n_string *self, char const *buf, size_t buflen
922 n_MEMORY_DEBUG_ARGS){
923 NYD_ENTER;
925 assert(self != NULL);
926 assert(buflen == 0 || buf != NULL);
928 if(buflen == UIZ_MAX)
929 buflen = (buf == NULL) ? 0 : strlen(buf);
931 if(buflen > 0){
932 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
933 if(self->s_len > 0)
934 memmove(&self->s_dat[buflen], self->s_dat, self->s_len);
935 memcpy(self->s_dat, buf, buflen);
936 self->s_len += (ui32_t)buflen;
938 NYD_LEAVE;
939 return self;
942 FL struct n_string *
943 (n_string_unshift_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
944 NYD_ENTER;
946 assert(self != NULL);
948 if(self->s_len + 1 >= self->s_size)
949 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
950 if(self->s_len > 0)
951 memmove(&self->s_dat[1], self->s_dat, self->s_len);
952 self->s_dat[0] = c;
953 ++self->s_len;
954 NYD_LEAVE;
955 return self;
958 FL struct n_string *
959 (n_string_insert_buf)(struct n_string *self, size_t idx,
960 char const *buf, size_t buflen n_MEMORY_DEBUG_ARGS){
961 NYD_ENTER;
963 assert(self != NULL);
964 assert(buflen == 0 || buf != NULL);
965 assert(idx <= self->s_len);
967 if(buflen == UIZ_MAX)
968 buflen = (buf == NULL) ? 0 : strlen(buf);
970 if(buflen > 0){
971 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
972 if(self->s_len > 0)
973 memmove(&self->s_dat[idx + buflen], &self->s_dat[idx],
974 self->s_len - idx);
975 memcpy(&self->s_dat[idx], buf, buflen);
976 self->s_len += (ui32_t)buflen;
978 NYD_LEAVE;
979 return self;
982 FL struct n_string *
983 (n_string_insert_c)(struct n_string *self, size_t idx,
984 char c n_MEMORY_DEBUG_ARGS){
985 NYD_ENTER;
987 assert(self != NULL);
988 assert(idx <= self->s_len);
990 if(self->s_len + 1 >= self->s_size)
991 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
992 if(self->s_len > 0)
993 memmove(&self->s_dat[idx + 1], &self->s_dat[idx], self->s_len - idx);
994 self->s_dat[idx] = c;
995 ++self->s_len;
996 NYD_LEAVE;
997 return self;
1000 FL struct n_string *
1001 n_string_cut(struct n_string *self, size_t idx, size_t len){
1002 NYD_ENTER;
1004 assert(self != NULL);
1005 assert(UIZ_MAX - idx > len);
1006 assert(SI32_MAX >= idx + len);
1007 assert(idx + len <= self->s_len);
1009 if(len > 0)
1010 memmove(&self->s_dat[idx], &self->s_dat[idx + len],
1011 (self->s_len -= len) - idx);
1012 NYD_LEAVE;
1013 return self;
1016 FL char *
1017 (n_string_cp)(struct n_string *self n_MEMORY_DEBUG_ARGS){
1018 char *rv;
1019 NYD2_ENTER;
1021 assert(self != NULL);
1023 if(self->s_size == 0)
1024 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
1026 (rv = self->s_dat)[self->s_len] = '\0';
1027 NYD2_LEAVE;
1028 return rv;
1031 FL char const *
1032 n_string_cp_const(struct n_string const *self){
1033 char const *rv;
1034 NYD2_ENTER;
1036 assert(self != NULL);
1038 if(self->s_size != 0){
1039 ((struct n_string*)n_UNCONST(self))->s_dat[self->s_len] = '\0';
1040 rv = self->s_dat;
1041 }else
1042 rv = n_empty;
1043 NYD2_LEAVE;
1044 return rv;
1048 * UTF-8
1051 FL ui32_t
1052 n_utf8_to_utf32(char const **bdat, size_t *blen){
1053 ui32_t c, x, x1;
1054 char const *cp, *cpx;
1055 size_t l, lx;
1056 NYD2_ENTER;
1058 lx = l = *blen - 1;
1059 x = (ui8_t)*(cp = *bdat);
1060 cpx = ++cp;
1062 if(n_LIKELY(x <= 0x7Fu))
1063 c = x;
1064 /* 0xF8, but Unicode guarantees maximum of 0x10FFFFu -> F4 8F BF BF.
1065 * Unicode 9.0, 3.9, UTF-8, Table 3-7. Well-Formed UTF-8 Byte Sequences */
1066 else if(n_LIKELY(x > 0xC0u && x <= 0xF4u)){
1067 if(n_LIKELY(x < 0xE0u)){
1068 if(n_UNLIKELY(l < 1))
1069 goto jenobuf;
1070 --l;
1072 c = (x &= 0x1Fu);
1073 }else if(n_LIKELY(x < 0xF0u)){
1074 if(n_UNLIKELY(l < 2))
1075 goto jenobuf;
1076 l -= 2;
1078 x1 = x;
1079 c = (x &= 0x0Fu);
1081 /* Second byte constraints */
1082 x = (ui8_t)*cp++;
1083 switch(x1){
1084 case 0xE0u:
1085 if(n_UNLIKELY(x < 0xA0u || x > 0xBFu))
1086 goto jerr;
1087 break;
1088 case 0xEDu:
1089 if(n_UNLIKELY(x < 0x80u || x > 0x9Fu))
1090 goto jerr;
1091 break;
1092 default:
1093 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1094 goto jerr;
1095 break;
1097 c <<= 6;
1098 c |= (x &= 0x3Fu);
1099 }else{
1100 if(n_UNLIKELY(l < 3))
1101 goto jenobuf;
1102 l -= 3;
1104 x1 = x;
1105 c = (x &= 0x07u);
1107 /* Second byte constraints */
1108 x = (ui8_t)*cp++;
1109 switch(x1){
1110 case 0xF0u:
1111 if(n_UNLIKELY(x < 0x90u || x > 0xBFu))
1112 goto jerr;
1113 break;
1114 case 0xF4u:
1115 if(n_UNLIKELY((x & 0xF0u) != 0x80u)) /* 80..8F */
1116 goto jerr;
1117 break;
1118 default:
1119 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1120 goto jerr;
1121 break;
1123 c <<= 6;
1124 c |= (x &= 0x3Fu);
1126 x = (ui8_t)*cp++;
1127 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1128 goto jerr;
1129 c <<= 6;
1130 c |= (x &= 0x3Fu);
1133 x = (ui8_t)*cp++;
1134 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1135 goto jerr;
1136 c <<= 6;
1137 c |= x & 0x3Fu;
1138 }else
1139 goto jerr;
1141 cpx = cp;
1142 lx = l;
1143 jleave:
1144 *bdat = cpx;
1145 *blen = lx;
1146 NYD2_LEAVE;
1147 return c;
1148 jenobuf:
1149 jerr:
1150 c = UI32_MAX;
1151 goto jleave;
1154 FL size_t
1155 n_utf32_to_utf8(ui32_t c, char *buf)
1157 struct {
1158 ui32_t lower_bound;
1159 ui32_t upper_bound;
1160 ui8_t enc_leader;
1161 ui8_t enc_lval;
1162 ui8_t dec_leader_mask;
1163 ui8_t dec_leader_val_mask;
1164 ui8_t dec_bytes_togo;
1165 ui8_t cat_index;
1166 ui8_t __dummy[2];
1167 } const _cat[] = {
1168 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
1169 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
1170 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
1171 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
1172 /* xxx _from_utf32() simply assumes magic code points for surrogates!
1173 * xxx (However, should we ever get yet another surrogate range we
1174 * xxx need to deal with that all over the place anyway? */
1175 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
1176 {0x00010000, 0x0010FFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
1177 }, *catp = _cat;
1178 size_t l;
1180 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
1181 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
1182 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
1183 if (c <= _cat[3].upper_bound) {
1184 /* Surrogates may not be converted (Compatibility rule C10) */
1185 if (c >= 0xD800u && c <= 0xDFFFu)
1186 goto jerr;
1187 catp += 3;
1188 goto j3;
1190 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
1191 jerr:
1192 c = 0xFFFDu; /* Unicode replacement character */
1193 catp += 3;
1194 goto j3;
1196 buf[3] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1198 buf[2] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1200 buf[1] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1202 buf[0] = (char)catp->enc_leader | (char)(c);
1204 buf[catp->enc_lval] = '\0';
1205 l = catp->enc_lval;
1206 NYD2_LEAVE;
1207 return l;
1211 * Our iconv(3) wrapper
1214 FL char *
1215 n_iconv_normalize_name(char const *cset){
1216 char *cp, c, *tcp, tc;
1217 bool_t any;
1218 NYD2_ENTER;
1220 /* We need to strip //SUFFIXes off, we want to normalize to all lowercase,
1221 * and we perform some slight content testing, too */
1222 for(any = FAL0, cp = n_UNCONST(cset); (c = *cp) != '\0'; ++cp){
1223 if(!alnumchar(c) && !punctchar(c)){
1224 n_err(_("Invalid character set name %s\n"),
1225 n_shexp_quote_cp(cset, FAL0));
1226 cset = NULL;
1227 goto jleave;
1228 }else if(c == '/')
1229 break;
1230 else if(upperchar(c))
1231 any = TRU1;
1234 if(any || c != '\0'){
1235 cp = savestrbuf(cset, PTR2SIZE(cp - cset));
1236 for(tcp = cp; (tc = *tcp) != '\0'; ++tcp)
1237 *tcp = lowerconv(tc);
1239 if(c != '\0' && (n_poption & n_PO_D_V))
1240 n_err(_("Stripped off character set suffix: %s -> %s\n"),
1241 n_shexp_quote_cp(cset, FAL0), n_shexp_quote_cp(cp, FAL0));
1243 cset = cp;
1245 jleave:
1246 NYD2_LEAVE;
1247 return n_UNCONST(cset);
1250 FL bool_t
1251 n_iconv_name_is_ascii(char const *cset){ /* TODO ctext/su */
1252 bool_t rv;
1253 NYD2_ENTER;
1255 /* In MIME preference order */
1256 rv = (!asccasecmp(cset, "US-ASCII") || !asccasecmp(cset, "ASCII") ||
1257 !asccasecmp(cset, "ANSI_X3.4-1968") ||
1258 !asccasecmp(cset, "iso-ir-6") ||
1259 !asccasecmp(cset, "ANSI_X3.4-1986") ||
1260 !asccasecmp(cset, "ISO_646.irv:1991") ||
1261 !asccasecmp(cset, "ISO646-US") || !asccasecmp(cset, "us") ||
1262 !asccasecmp(cset, "IBM367") || !asccasecmp(cset, "cp367") ||
1263 !asccasecmp(cset, "csASCII"));
1264 NYD2_LEAVE;
1265 return rv;
1268 #ifdef HAVE_ICONV
1269 FL iconv_t
1270 n_iconv_open(char const *tocode, char const *fromcode){
1271 iconv_t id;
1272 NYD_ENTER;
1274 if((!asccasecmp(fromcode, "unknown-8bit") ||
1275 !asccasecmp(fromcode, "binary")) &&
1276 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
1277 fromcode = ok_vlook(CHARSET_8BIT_OKEY);
1279 id = iconv_open(tocode, fromcode);
1281 /* If the encoding names are equal at this point, they are just not
1282 * understood by iconv(), and we cannot sensibly use it in any way. We do
1283 * not perform this as an optimization above since iconv() can otherwise be
1284 * used to check the validity of the input even with identical encoding
1285 * names */
1286 if (id == (iconv_t)-1 && !asccasecmp(tocode, fromcode))
1287 n_err_no = n_ERR_NONE;
1288 NYD_LEAVE;
1289 return id;
1292 FL void
1293 n_iconv_close(iconv_t cd){
1294 NYD_ENTER;
1295 iconv_close(cd);
1296 if(cd == iconvd)
1297 iconvd = (iconv_t)-1;
1298 NYD_LEAVE;
1301 FL void
1302 n_iconv_reset(iconv_t cd){
1303 NYD_ENTER;
1304 iconv(cd, NULL, NULL, NULL, NULL);
1305 NYD_LEAVE;
1308 /* (2012-09-24: export and use it exclusively to isolate prototype problems
1309 * (*inb* is 'char const **' except in POSIX) in a single place.
1310 * GNU libiconv even allows for configuration time const/non-const..
1311 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
1312 * support compiler invocations which bail on error, so no -Werror */
1313 /* Citrus project? */
1314 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1315 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1316 # if n_OS_DRAGONFLY
1317 # define __INBCAST(S) (char ** __restrict__)n_UNCONST(S)
1318 # else
1319 # define __INBCAST(S) (char const **)n_UNCONST(S)
1320 # endif
1321 # elif n_OS_SUNOS || n_OS_SOLARIS
1322 # define __INBCAST(S) (char const ** __restrict__)n_UNCONST(S)
1323 # endif
1324 # ifndef __INBCAST
1325 # define __INBCAST(S) (char **)n_UNCONST(S)
1326 # endif
1328 FL int
1329 n_iconv_buf(iconv_t cd, enum n_iconv_flags icf,
1330 char const **inb, size_t *inbleft, char **outb, size_t *outbleft){
1331 int err;
1332 NYD2_ENTER;
1334 if((icf & n_ICONV_UNIREPL) && !(n_psonce & n_PSO_UNICODE))
1335 icf &= ~n_ICONV_UNIREPL;
1337 for(;;){
1338 size_t sz;
1340 if((sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft)) == 0)
1341 break;
1342 if(sz != (size_t)-1){
1343 if(!(icf & n_ICONV_IGN_NOREVERSE)){
1344 err = n_ERR_NOENT;
1345 goto jleave;
1347 break;
1350 if((err = n_err_no) == n_ERR_2BIG)
1351 goto jleave;
1353 if(!(icf & n_ICONV_IGN_ILSEQ) || err != n_ERR_ILSEQ)
1354 goto jleave;
1355 if(*inbleft > 0){
1356 ++(*inb);
1357 --(*inbleft);
1358 if(icf & n_ICONV_UNIREPL){
1359 if(*outbleft >= sizeof(n_unirepl) -1){
1360 memcpy(*outb, n_unirepl, sizeof(n_unirepl) -1);
1361 *outb += sizeof(n_unirepl) -1;
1362 *outbleft -= sizeof(n_unirepl) -1;
1363 continue;
1365 }else if(*outbleft > 0){
1366 *(*outb)++ = '?';
1367 --*outbleft;
1368 continue;
1370 err = n_ERR_2BIG;
1371 goto jleave;
1372 }else if(*outbleft > 0){
1373 **outb = '\0';
1374 goto jleave;
1377 err = 0;
1378 jleave:
1379 n_iconv_err_no = err;
1380 NYD2_LEAVE;
1381 return err;
1383 # undef __INBCAST
1385 FL int
1386 n_iconv_str(iconv_t cd, enum n_iconv_flags icf,
1387 struct str *out, struct str const *in, struct str *in_rest_or_null){
1388 struct n_string s, *sp = &s;
1389 char const *ib;
1390 int err;
1391 size_t il;
1392 NYD2_ENTER;
1394 il = in->l;
1395 if(!n_string_get_can_book(il) || !n_string_get_can_book(out->l)){
1396 err = n_ERR_INVAL;
1397 goto j_leave;
1399 ib = in->s;
1401 sp = n_string_creat(sp);
1402 sp = n_string_take_ownership(sp, out->s, out->l, 0);
1404 for(;;){
1405 char *ob_base, *ob;
1406 size_t ol, nol;
1408 if((nol = ol = sp->s_len) < il)
1409 nol = il;
1410 assert(sizeof(sp->s_len) == sizeof(ui32_t));
1411 if(nol < 128)
1412 nol += 32;
1413 else{
1414 ui64_t xnol;
1416 xnol = (ui64_t)(nol << 1) - (nol >> 4);
1417 if(!n_string_can_book(sp, xnol)){
1418 xnol = ol + 64;
1419 if(!n_string_can_book(sp, xnol)){
1420 err = n_ERR_INVAL;
1421 goto jleave;
1424 nol = (size_t)xnol;
1426 sp = n_string_resize(sp, nol);
1428 ob = ob_base = &sp->s_dat[ol];
1429 nol -= ol;
1430 err = n_iconv_buf(cd, icf, &ib, &il, &ob, &nol);
1432 sp = n_string_trunc(sp, ol + PTR2SIZE(ob - ob_base));
1433 if(err == 0 || err != n_ERR_2BIG)
1434 break;
1437 if(in_rest_or_null != NULL){
1438 in_rest_or_null->s = n_UNCONST(ib);
1439 in_rest_or_null->l = il;
1442 jleave:
1443 out->s = n_string_cp(sp);
1444 out->l = sp->s_len;
1445 sp = n_string_drop_ownership(sp);
1446 /* n_string_gut(sp)*/
1447 j_leave:
1448 NYD2_LEAVE;
1449 return err;
1452 FL char *
1453 n_iconv_onetime_cp(enum n_iconv_flags icf,
1454 char const *tocode, char const *fromcode, char const *input){
1455 struct str out, in;
1456 iconv_t icd;
1457 char *rv;
1458 NYD2_ENTER;
1460 rv = NULL;
1461 if(tocode == NULL)
1462 tocode = ok_vlook(ttycharset);
1463 if(fromcode == NULL)
1464 fromcode = "utf-8";
1466 if((icd = iconv_open(tocode, fromcode)) == (iconv_t)-1)
1467 goto jleave;
1469 in.l = strlen(in.s = n_UNCONST(input)); /* logical */
1470 out.s = NULL, out.l = 0;
1471 if(!n_iconv_str(icd, icf, &out, &in, NULL))
1472 rv = savestrbuf(out.s, out.l);
1473 if(out.s != NULL)
1474 free(out.s);
1476 iconv_close(icd);
1477 jleave:
1478 NYD2_LEAVE;
1479 return rv;
1481 #endif /* HAVE_ICONV */
1483 /* s-it-mode */