TODO: How High The Moon
[s-mailx.git] / strings.c
blob54c72f71d3631b83a2dcdf46a1ac5699b816907c
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ String support routines.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 * SPDX-License-Identifier: BSD-3-Clause
7 */
8 /*
9 * Copyright (c) 1980, 1993
10 * The Regents of the University of California. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 #undef n_FILE
37 #define n_FILE strings
39 #ifndef HAVE_AMALGAMATION
40 # include "nail.h"
41 #endif
43 #include <ctype.h>
45 FL char *
46 (savestr)(char const *str n_MEMORY_DEBUG_ARGS)
48 size_t size;
49 char *news;
50 NYD_ENTER;
52 size = strlen(str);
53 news = (n_autorec_alloc_from_pool)(NULL, size +1 n_MEMORY_DEBUG_ARGSCALL);
54 if(size > 0)
55 memcpy(news, str, size);
56 news[size] = '\0';
57 NYD_LEAVE;
58 return news;
61 FL char *
62 (savestrbuf)(char const *sbuf, size_t sbuf_len n_MEMORY_DEBUG_ARGS)
64 char *news;
65 NYD_ENTER;
67 news = (n_autorec_alloc_from_pool)(NULL, sbuf_len +1
68 n_MEMORY_DEBUG_ARGSCALL);
69 if(sbuf_len > 0)
70 memcpy(news, sbuf, sbuf_len);
71 news[sbuf_len] = 0;
72 NYD_LEAVE;
73 return news;
76 FL char *
77 (savecatsep)(char const *s1, char sep, char const *s2 n_MEMORY_DEBUG_ARGS)
79 size_t l1, l2;
80 char *news;
81 NYD_ENTER;
83 l1 = (s1 != NULL) ? strlen(s1) : 0;
84 l2 = strlen(s2);
85 news = (n_autorec_alloc_from_pool)(NULL, l1 + (sep != '\0') + l2 +1
86 n_MEMORY_DEBUG_ARGSCALL);
87 if (l1 > 0) {
88 memcpy(news + 0, s1, l1);
89 if (sep != '\0')
90 news[l1++] = sep;
92 if(l2 > 0)
93 memcpy(news + l1, s2, l2);
94 news[l1 + l2] = '\0';
95 NYD_LEAVE;
96 return news;
100 * Support routines, auto-reclaimed storage
103 FL struct str *
104 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
106 va_list vl;
107 size_t l;
108 char const *cs;
109 NYD_ENTER;
111 va_start(vl, self);
112 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
113 l += strlen(cs);
114 va_end(vl);
116 self->l = l;
117 self->s = n_autorec_alloc(l +1);
119 va_start(vl, self);
120 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
121 size_t i;
123 i = strlen(cs);
124 if(i > 0){
125 memcpy(self->s + l, cs, i);
126 l += i;
129 self->s[l] = '\0';
130 va_end(vl);
131 NYD_LEAVE;
132 return self;
135 FL struct str *
136 (str_concat_cpa)(struct str *self, char const * const *cpa,
137 char const *sep_o_null n_MEMORY_DEBUG_ARGS)
139 size_t sonl, l;
140 char const * const *xcpa;
141 NYD_ENTER;
143 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
145 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
146 l += strlen(*xcpa) + sonl;
148 self->l = l;
149 self->s = (n_autorec_alloc_from_pool)(NULL, l +1 n_MEMORY_DEBUG_ARGSCALL);
151 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
152 size_t i;
154 i = strlen(*xcpa);
155 if(i > 0){
156 memcpy(self->s + l, *xcpa, i);
157 l += i;
159 if (sonl > 0) {
160 memcpy(self->s + l, sep_o_null, sonl);
161 l += sonl;
164 self->s[l] = '\0';
165 NYD_LEAVE;
166 return self;
170 * Routines that are not related to auto-reclaimed storage follow.
173 FL bool_t
174 n_anyof_buf(char const *template, char const *dat, size_t len){
175 char c;
176 NYD2_ENTER;
178 if(len == UIZ_MAX){
179 while((c = *template++) != '\0')
180 if(strchr(dat, c) != NULL)
181 break;
182 }else if(len > 0){
183 while((c = *template++) != '\0')
184 if(memchr(dat, c, len) != NULL)
185 break;
186 }else
187 c = '\0';
188 NYD2_LEAVE;
189 return (c != '\0');
192 FL char *
193 n_strsep(char **iolist, char sep, bool_t ignore_empty){
194 char *base, *cp;
195 NYD2_ENTER;
197 for(base = *iolist; base != NULL; base = *iolist){
198 while(*base != '\0' && blankspacechar(*base))
199 ++base;
201 cp = strchr(base, sep);
202 if(cp != NULL)
203 *iolist = &cp[1];
204 else{
205 *iolist = NULL;
206 cp = &base[strlen(base)];
208 while(cp > base && blankspacechar(cp[-1]))
209 --cp;
210 *cp = '\0';
211 if(*base != '\0' || !ignore_empty)
212 break;
214 NYD2_LEAVE;
215 return base;
218 FL char *
219 n_strsep_esc(char **iolist, char sep, bool_t ignore_empty){
220 char *cp, c, *base;
221 bool_t isesc, anyesc;
222 NYD2_ENTER;
224 for(base = *iolist; base != NULL; base = *iolist){
225 while((c = *base) != '\0' && blankspacechar(c))
226 ++base;
228 for(isesc = anyesc = FAL0, cp = base;; ++cp){
229 if(n_UNLIKELY((c = *cp) == '\0')){
230 *iolist = NULL;
231 break;
232 }else if(!isesc){
233 if(c == sep){
234 *iolist = &cp[1];
235 break;
237 isesc = (c == '\\');
238 }else{
239 isesc = FAL0;
240 anyesc |= (c == sep);
244 while(cp > base && blankspacechar(cp[-1]))
245 --cp;
246 *cp = '\0';
248 if(*base != '\0'){
249 if(anyesc){
250 char *ins;
252 for(ins = cp = base;; ++ins)
253 if((c = *cp) == '\\' && cp[1] == sep){
254 *ins = sep;
255 cp += 2;
256 }else if((*ins = (++cp, c)) == '\0')
257 break;
261 if(*base != '\0' || !ignore_empty)
262 break;
264 NYD2_LEAVE;
265 return base;
268 FL bool_t
269 is_prefix(char const *as1, char const *as2) /* TODO arg order */
271 char c;
272 NYD2_ENTER;
274 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
275 if (*as2 == '\0')
276 break;
277 NYD2_LEAVE;
278 return (c == '\0');
281 FL char *
282 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
284 char const *cp;
285 size_t i;
286 char c, *rv;
287 NYD2_ENTER;
289 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
290 if (c == '"' || c == '\\')
291 ++i;
292 rv = n_autorec_alloc(i +1);
294 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
295 if (c == '"' || c == '\\')
296 rv[i++] = '\\';
297 rv[i] = '\0';
298 NYD2_LEAVE;
299 return rv;
302 FL void
303 makelow(char *cp) /* TODO isn't that crap? --> */
305 NYD_ENTER;
306 #ifdef HAVE_C90AMEND1
307 if (n_mb_cur_max > 1) {
308 char *tp = cp;
309 wchar_t wc;
310 int len;
312 while (*cp != '\0') {
313 len = mbtowc(&wc, cp, n_mb_cur_max);
314 if (len < 0)
315 *tp++ = *cp++;
316 else {
317 wc = towlower(wc);
318 if (wctomb(tp, wc) == len)
319 tp += len, cp += len;
320 else
321 *tp++ = *cp++; /* <-- at least here */
324 } else
325 #endif
328 *cp = tolower((uc_i)*cp);
329 while (*cp++ != '\0');
331 NYD_LEAVE;
334 FL bool_t
335 substr(char const *str, char const *sub)
337 char const *cp, *backup;
338 NYD_ENTER;
340 cp = sub;
341 backup = str;
342 while (*str != '\0' && *cp != '\0') {
343 #ifdef HAVE_C90AMEND1
344 if (n_mb_cur_max > 1) {
345 wchar_t c, c2;
346 int sz;
348 if ((sz = mbtowc(&c, cp, n_mb_cur_max)) == -1)
349 goto Jsinglebyte;
350 cp += sz;
351 if ((sz = mbtowc(&c2, str, n_mb_cur_max)) == -1)
352 goto Jsinglebyte;
353 str += sz;
354 c = towupper(c);
355 c2 = towupper(c2);
356 if (c != c2) {
357 if ((sz = mbtowc(&c, backup, n_mb_cur_max)) > 0) {
358 backup += sz;
359 str = backup;
360 } else
361 str = ++backup;
362 cp = sub;
364 } else
365 Jsinglebyte:
366 #endif
368 int c, c2;
370 c = *cp++ & 0377;
371 if (islower(c))
372 c = toupper(c);
373 c2 = *str++ & 0377;
374 if (islower(c2))
375 c2 = toupper(c2);
376 if (c != c2) {
377 str = ++backup;
378 cp = sub;
382 NYD_LEAVE;
383 return (*cp == '\0');
386 FL char *
387 sstpcpy(char *dst, char const *src)
389 NYD2_ENTER;
390 while ((*dst = *src++) != '\0')
391 ++dst;
392 NYD2_LEAVE;
393 return dst;
396 FL char *
397 (sstrdup)(char const *cp n_MEMORY_DEBUG_ARGS)
399 char *dp;
400 NYD2_ENTER;
402 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) n_MEMORY_DEBUG_ARGSCALL);
403 NYD2_LEAVE;
404 return dp;
407 FL char *
408 (sbufdup)(char const *cp, size_t len n_MEMORY_DEBUG_ARGS)
410 char *dp = NULL;
411 NYD2_ENTER;
413 dp = (n_alloc)(len +1 n_MEMORY_DEBUG_ARGSCALL);
414 if (cp != NULL)
415 memcpy(dp, cp, len);
416 dp[len] = '\0';
417 NYD2_LEAVE;
418 return dp;
421 FL ssize_t
422 n_strscpy(char *dst, char const *src, size_t dstsize){
423 ssize_t rv;
424 NYD2_ENTER;
426 if(n_LIKELY(dstsize > 0)){
427 rv = 0;
429 if((dst[rv] = src[rv]) == '\0')
430 goto jleave;
431 ++rv;
432 }while(--dstsize > 0);
433 dst[--rv] = '\0';
435 #ifdef HAVE_DEVEL
436 else
437 assert(dstsize > 0);
438 #endif
439 rv = -1;
440 jleave:
441 NYD2_LEAVE;
442 return rv;
445 FL int
446 asccasecmp(char const *s1, char const *s2)
448 int cmp;
449 NYD2_ENTER;
451 for (;;) {
452 char c1 = *s1++, c2 = *s2++;
453 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
454 break;
456 NYD2_LEAVE;
457 return cmp;
460 FL int
461 ascncasecmp(char const *s1, char const *s2, size_t sz)
463 int cmp = 0;
464 NYD2_ENTER;
466 while (sz-- > 0) {
467 char c1 = *s1++, c2 = *s2++;
468 cmp = (ui8_t)lowerconv(c1);
469 cmp -= (ui8_t)lowerconv(c2);
470 if (cmp != 0 || c1 == '\0')
471 break;
473 NYD2_LEAVE;
474 return cmp;
477 FL char const *
478 asccasestr(char const *s1, char const *s2)
480 char c2, c1;
481 NYD2_ENTER;
483 for (c2 = *s2++, c2 = lowerconv(c2);;) {
484 if ((c1 = *s1++) == '\0') {
485 s1 = NULL;
486 break;
488 if (lowerconv(c1) == c2 && is_asccaseprefix(s2, s1)) {
489 --s1;
490 break;
493 NYD2_LEAVE;
494 return s1;
497 FL bool_t
498 is_asccaseprefix(char const *as1, char const *as2) /* TODO arg order */
500 char c1, c2;
501 NYD2_ENTER;
503 for(;; ++as1, ++as2){
504 c1 = *as1;
505 c1 = lowerconv(c1);
506 c2 = *as2;
507 c2 = lowerconv(c2);
509 if(c1 != c2 || c1 == '\0')
510 break;
511 if(c2 == '\0')
512 break;
514 NYD2_LEAVE;
515 return (c1 == '\0');
518 FL bool_t
519 is_ascncaseprefix(char const *as1, char const *as2, size_t sz)
521 char c1, c2;
522 bool_t rv;
523 NYD2_ENTER;
525 for(rv = TRU1; sz-- > 0; ++as1, ++as2){
526 c1 = *as1;
527 c1 = lowerconv(c1);
528 c2 = *as2;
529 c2 = lowerconv(c2);
531 if(!(rv = (c1 == c2)) || c1 == '\0')
532 break;
533 if(c2 == '\0')
534 break;
536 NYD2_LEAVE;
537 return rv;
541 FL struct str *
542 (n_str_assign_buf)(struct str *self, char const *buf, uiz_t buflen
543 n_MEMORY_DEBUG_ARGS){
544 NYD_ENTER;
545 if(buflen == UIZ_MAX)
546 buflen = (buf == NULL) ? 0 : strlen(buf);
548 assert(buflen == 0 || buf != NULL);
550 if(n_LIKELY(buflen > 0)){
551 self->s = (n_realloc)(self->s, (self->l = buflen) +1
552 n_MEMORY_DEBUG_ARGSCALL);
553 memcpy(self->s, buf, buflen);
554 self->s[buflen] = '\0';
555 }else
556 self->l = 0;
557 NYD_LEAVE;
558 return self;
561 FL struct str *
562 (n_str_add_buf)(struct str *self, char const *buf, uiz_t buflen
563 n_MEMORY_DEBUG_ARGS){
564 NYD_ENTER;
565 if(buflen == UIZ_MAX)
566 buflen = (buf == NULL) ? 0 : strlen(buf);
568 assert(buflen == 0 || buf != NULL);
570 if(buflen > 0) {
571 size_t osl = self->l, nsl = osl + buflen;
573 self->s = (n_realloc)(self->s, (self->l = nsl) +1
574 n_MEMORY_DEBUG_ARGSCALL);
575 memcpy(self->s + osl, buf, buflen);
576 self->s[nsl] = '\0';
578 NYD_LEAVE;
579 return self;
582 FL struct str *
583 n_str_trim(struct str *self, enum n_str_trim_flags stf){
584 size_t l;
585 char const *cp;
586 NYD2_ENTER;
588 cp = self->s;
590 if((l = self->l) > 0 && (stf & n_STR_TRIM_FRONT)){
591 while(spacechar(*cp)){
592 ++cp;
593 if(--l == 0)
594 break;
596 self->s = n_UNCONST(cp);
599 if(l > 0 && (stf & n_STR_TRIM_END)){
600 for(cp += l -1; spacechar(*cp); --cp)
601 if(--l == 0)
602 break;
604 self->l = l;
606 NYD2_LEAVE;
607 return self;
610 FL struct str *
611 n_str_trim_ifs(struct str *self, bool_t dodefaults){
612 char s, t, n, c;
613 char const *ifs, *cp;
614 size_t l, i;
615 NYD2_ENTER;
617 if((l = self->l) == 0)
618 goto jleave;
620 ifs = ok_vlook(ifs_ws);
621 cp = self->s;
622 s = t = n = '\0';
624 /* Check whether we can go fast(er) path */
625 for(i = 0; (c = ifs[i]) != '\0'; ++i){
626 switch(c){
627 case ' ': s = c; break;
628 case '\t': t = c; break;
629 case '\n': n = c; break;
630 default:
631 /* Need to go the slow path */
632 while(strchr(ifs, *cp) != NULL){
633 ++cp;
634 if(--l == 0)
635 break;
637 self->s = n_UNCONST(cp);
639 if(l > 0){
640 for(cp += l -1; strchr(ifs, *cp) != NULL;){
641 if(--l == 0)
642 break;
643 /* An uneven number of reverse solidus escapes last WS! */
644 else if(*--cp == '\\'){
645 siz_t j;
647 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
649 if(j & 1){
650 ++l;
651 break;
656 self->l = l;
658 if(!dodefaults)
659 goto jleave;
660 cp = self->s;
661 ++i;
662 break;
666 /* No ifs-ws? No more data? No trimming */
667 if(l == 0 || (i == 0 && !dodefaults))
668 goto jleave;
670 if(dodefaults){
671 s = ' ';
672 t = '\t';
673 n = '\n';
676 if(l > 0){
677 while((c = *cp) != '\0' && (c == s || c == t || c == n)){
678 ++cp;
679 if(--l == 0)
680 break;
682 self->s = n_UNCONST(cp);
685 if(l > 0){
686 for(cp += l -1; (c = *cp) != '\0' && (c == s || c == t || c == n);){
687 if(--l == 0)
688 break;
689 /* An uneven number of reverse solidus escapes last WS! */
690 else if(*--cp == '\\'){
691 siz_t j;
693 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
695 if(j & 1){
696 ++l;
697 break;
702 self->l = l;
703 jleave:
704 NYD2_LEAVE;
705 return self;
709 * struct n_string TODO extend, optimize
712 FL struct n_string *
713 (n_string_clear)(struct n_string *self n_MEMORY_DEBUG_ARGS){
714 NYD_ENTER;
716 assert(self != NULL);
718 if(self->s_size != 0){
719 if(!self->s_auto){
720 (n_free)(self->s_dat n_MEMORY_DEBUG_ARGSCALL);
722 self->s_len = self->s_auto = self->s_size = 0;
723 self->s_dat = NULL;
725 NYD_LEAVE;
726 return self;
729 FL struct n_string *
730 (n_string_reserve)(struct n_string *self, size_t noof n_MEMORY_DEBUG_ARGS){
731 ui32_t i, l, s;
732 NYD_ENTER;
733 assert(self != NULL);
735 s = self->s_size;
736 l = self->s_len;
737 if((size_t)SI32_MAX - n_ALIGN(1) - l <= noof)
738 n_panic(_("Memory allocation too large"));
740 if((i = s - l) <= ++noof){
741 i += l + (ui32_t)noof;
742 i = n_ALIGN(i);
743 self->s_size = i -1;
745 if(!self->s_auto)
746 self->s_dat = (n_realloc)(self->s_dat, i n_MEMORY_DEBUG_ARGSCALL);
747 else{
748 char *ndat = (n_autorec_alloc_from_pool)(NULL, i
749 n_MEMORY_DEBUG_ARGSCALL);
751 if(l > 0)
752 memcpy(ndat, self->s_dat, l);
753 self->s_dat = ndat;
756 NYD_LEAVE;
757 return self;
760 FL struct n_string *
761 (n_string_resize)(struct n_string *self, size_t nlen n_MEMORY_DEBUG_ARGS){
762 NYD_ENTER;
763 assert(self != NULL);
765 if(UICMP(z, SI32_MAX, <=, nlen))
766 n_panic(_("Memory allocation too large"));
768 if(self->s_len < nlen)
769 self = (n_string_reserve)(self, nlen n_MEMORY_DEBUG_ARGSCALL);
770 self->s_len = (ui32_t)nlen;
771 NYD_LEAVE;
772 return self;
775 FL struct n_string *
776 (n_string_push_buf)(struct n_string *self, char const *buf, size_t buflen
777 n_MEMORY_DEBUG_ARGS){
778 NYD_ENTER;
780 assert(self != NULL);
781 assert(buflen == 0 || buf != NULL);
783 if(buflen == UIZ_MAX)
784 buflen = (buf == NULL) ? 0 : strlen(buf);
786 if(buflen > 0){
787 ui32_t i;
789 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
790 memcpy(&self->s_dat[i = self->s_len], buf, buflen);
791 self->s_len = (i += (ui32_t)buflen);
793 NYD_LEAVE;
794 return self;
797 FL struct n_string *
798 (n_string_push_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
799 NYD_ENTER;
801 assert(self != NULL);
803 if(self->s_len + 1 >= self->s_size)
804 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
805 self->s_dat[self->s_len++] = c;
806 NYD_LEAVE;
807 return self;
810 FL struct n_string *
811 (n_string_unshift_buf)(struct n_string *self, char const *buf, size_t buflen
812 n_MEMORY_DEBUG_ARGS){
813 NYD_ENTER;
815 assert(self != NULL);
816 assert(buflen == 0 || buf != NULL);
818 if(buflen == UIZ_MAX)
819 buflen = (buf == NULL) ? 0 : strlen(buf);
821 if(buflen > 0){
822 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
823 if(self->s_len > 0)
824 memmove(&self->s_dat[buflen], self->s_dat, self->s_len);
825 memcpy(self->s_dat, buf, buflen);
826 self->s_len += (ui32_t)buflen;
828 NYD_LEAVE;
829 return self;
832 FL struct n_string *
833 (n_string_unshift_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
834 NYD_ENTER;
836 assert(self != NULL);
838 if(self->s_len + 1 >= self->s_size)
839 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
840 if(self->s_len > 0)
841 memmove(&self->s_dat[1], self->s_dat, self->s_len);
842 self->s_dat[0] = c;
843 ++self->s_len;
844 NYD_LEAVE;
845 return self;
848 FL struct n_string *
849 (n_string_insert_buf)(struct n_string *self, size_t idx,
850 char const *buf, size_t buflen n_MEMORY_DEBUG_ARGS){
851 NYD_ENTER;
853 assert(self != NULL);
854 assert(buflen == 0 || buf != NULL);
855 assert(idx <= self->s_len);
857 if(buflen == UIZ_MAX)
858 buflen = (buf == NULL) ? 0 : strlen(buf);
860 if(buflen > 0){
861 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
862 if(self->s_len > 0)
863 memmove(&self->s_dat[idx + buflen], &self->s_dat[idx],
864 self->s_len - idx);
865 memcpy(&self->s_dat[idx], buf, buflen);
866 self->s_len += (ui32_t)buflen;
868 NYD_LEAVE;
869 return self;
872 FL struct n_string *
873 (n_string_insert_c)(struct n_string *self, size_t idx,
874 char c n_MEMORY_DEBUG_ARGS){
875 NYD_ENTER;
877 assert(self != NULL);
878 assert(idx <= self->s_len);
880 if(self->s_len + 1 >= self->s_size)
881 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
882 if(self->s_len > 0)
883 memmove(&self->s_dat[idx + 1], &self->s_dat[idx], self->s_len - idx);
884 self->s_dat[idx] = c;
885 ++self->s_len;
886 NYD_LEAVE;
887 return self;
890 FL struct n_string *
891 n_string_cut(struct n_string *self, size_t idx, size_t len){
892 NYD_ENTER;
894 assert(self != NULL);
895 assert(UIZ_MAX - idx > len);
896 assert(SI32_MAX >= idx + len);
897 assert(idx + len <= self->s_len);
899 if(len > 0)
900 memmove(&self->s_dat[idx], &self->s_dat[idx + len],
901 (self->s_len -= len) - idx);
902 NYD_LEAVE;
903 return self;
906 FL char *
907 (n_string_cp)(struct n_string *self n_MEMORY_DEBUG_ARGS){
908 char *rv;
909 NYD2_ENTER;
911 assert(self != NULL);
913 if(self->s_size == 0)
914 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
916 (rv = self->s_dat)[self->s_len] = '\0';
917 NYD2_LEAVE;
918 return rv;
921 FL char const *
922 n_string_cp_const(struct n_string const *self){
923 char const *rv;
924 NYD2_ENTER;
926 assert(self != NULL);
928 if(self->s_size != 0){
929 ((struct n_string*)n_UNCONST(self))->s_dat[self->s_len] = '\0';
930 rv = self->s_dat;
931 }else
932 rv = n_empty;
933 NYD2_LEAVE;
934 return rv;
938 * UTF-8
941 FL ui32_t
942 n_utf8_to_utf32(char const **bdat, size_t *blen){
943 ui32_t c, x, x1;
944 char const *cp, *cpx;
945 size_t l, lx;
946 NYD2_ENTER;
948 lx = l = *blen - 1;
949 x = (ui8_t)*(cp = *bdat);
950 cpx = ++cp;
952 if(n_LIKELY(x <= 0x7Fu))
953 c = x;
954 /* 0xF8, but Unicode guarantees maximum of 0x10FFFFu -> F4 8F BF BF.
955 * Unicode 9.0, 3.9, UTF-8, Table 3-7. Well-Formed UTF-8 Byte Sequences */
956 else if(n_LIKELY(x > 0xC0u && x <= 0xF4u)){
957 if(n_LIKELY(x < 0xE0u)){
958 if(n_UNLIKELY(l < 1))
959 goto jenobuf;
960 --l;
962 c = (x &= 0x1Fu);
963 }else if(n_LIKELY(x < 0xF0u)){
964 if(n_UNLIKELY(l < 2))
965 goto jenobuf;
966 l -= 2;
968 x1 = x;
969 c = (x &= 0x0Fu);
971 /* Second byte constraints */
972 x = (ui8_t)*cp++;
973 switch(x1){
974 case 0xE0u:
975 if(n_UNLIKELY(x < 0xA0u || x > 0xBFu))
976 goto jerr;
977 break;
978 case 0xEDu:
979 if(n_UNLIKELY(x < 0x80u || x > 0x9Fu))
980 goto jerr;
981 break;
982 default:
983 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
984 goto jerr;
985 break;
987 c <<= 6;
988 c |= (x &= 0x3Fu);
989 }else{
990 if(n_UNLIKELY(l < 3))
991 goto jenobuf;
992 l -= 3;
994 x1 = x;
995 c = (x &= 0x07u);
997 /* Second byte constraints */
998 x = (ui8_t)*cp++;
999 switch(x1){
1000 case 0xF0u:
1001 if(n_UNLIKELY(x < 0x90u || x > 0xBFu))
1002 goto jerr;
1003 break;
1004 case 0xF4u:
1005 if(n_UNLIKELY((x & 0xF0u) != 0x80u)) /* 80..8F */
1006 goto jerr;
1007 break;
1008 default:
1009 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1010 goto jerr;
1011 break;
1013 c <<= 6;
1014 c |= (x &= 0x3Fu);
1016 x = (ui8_t)*cp++;
1017 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1018 goto jerr;
1019 c <<= 6;
1020 c |= (x &= 0x3Fu);
1023 x = (ui8_t)*cp++;
1024 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1025 goto jerr;
1026 c <<= 6;
1027 c |= x & 0x3Fu;
1028 }else
1029 goto jerr;
1031 cpx = cp;
1032 lx = l;
1033 jleave:
1034 *bdat = cpx;
1035 *blen = lx;
1036 NYD2_LEAVE;
1037 return c;
1038 jenobuf:
1039 jerr:
1040 c = UI32_MAX;
1041 goto jleave;
1044 FL size_t
1045 n_utf32_to_utf8(ui32_t c, char *buf)
1047 struct {
1048 ui32_t lower_bound;
1049 ui32_t upper_bound;
1050 ui8_t enc_leader;
1051 ui8_t enc_lval;
1052 ui8_t dec_leader_mask;
1053 ui8_t dec_leader_val_mask;
1054 ui8_t dec_bytes_togo;
1055 ui8_t cat_index;
1056 ui8_t __dummy[2];
1057 } const _cat[] = {
1058 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
1059 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
1060 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
1061 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
1062 /* xxx _from_utf32() simply assumes magic code points for surrogates!
1063 * xxx (However, should we ever get yet another surrogate range we
1064 * xxx need to deal with that all over the place anyway? */
1065 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
1066 {0x00010000, 0x0010FFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
1067 }, *catp = _cat;
1068 size_t l;
1070 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
1071 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
1072 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
1073 if (c <= _cat[3].upper_bound) {
1074 /* Surrogates may not be converted (Compatibility rule C10) */
1075 if (c >= 0xD800u && c <= 0xDFFFu)
1076 goto jerr;
1077 catp += 3;
1078 goto j3;
1080 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
1081 jerr:
1082 c = 0xFFFDu; /* Unicode replacement character */
1083 catp += 3;
1084 goto j3;
1086 buf[3] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1088 buf[2] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1090 buf[1] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1092 buf[0] = (char)catp->enc_leader | (char)(c);
1094 buf[catp->enc_lval] = '\0';
1095 l = catp->enc_lval;
1096 NYD2_LEAVE;
1097 return l;
1101 * Our iconv(3) wrapper
1104 FL char *
1105 n_iconv_normalize_name(char const *cset){
1106 char *cp, c, *tcp, tc;
1107 bool_t any;
1108 NYD2_ENTER;
1110 /* We need to strip //SUFFIXes off, we want to normalize to all lowercase,
1111 * and we perform some slight content testing, too */
1112 for(any = FAL0, cp = n_UNCONST(cset); (c = *cp) != '\0'; ++cp){
1113 if(!alnumchar(c) && !punctchar(c)){
1114 n_err(_("Invalid character set name %s\n"),
1115 n_shexp_quote_cp(cset, FAL0));
1116 cset = NULL;
1117 goto jleave;
1118 }else if(c == '/')
1119 break;
1120 else if(upperchar(c))
1121 any = TRU1;
1124 if(any || c != '\0'){
1125 cp = savestrbuf(cset, PTR2SIZE(cp - cset));
1126 for(tcp = cp; (tc = *tcp) != '\0'; ++tcp)
1127 *tcp = lowerconv(tc);
1129 if(c != '\0' && (n_poption & n_PO_D_V))
1130 n_err(_("Stripped off character set suffix: %s -> %s\n"),
1131 n_shexp_quote_cp(cset, FAL0), n_shexp_quote_cp(cp, FAL0));
1133 cset = cp;
1135 jleave:
1136 NYD2_LEAVE;
1137 return n_UNCONST(cset);
1140 FL bool_t
1141 n_iconv_name_is_ascii(char const *cset){ /* TODO ctext/su */
1142 bool_t rv;
1143 NYD2_ENTER;
1145 /* In MIME preference order */
1146 rv = (!asccasecmp(cset, "US-ASCII") || !asccasecmp(cset, "ASCII") ||
1147 !asccasecmp(cset, "ANSI_X3.4-1968") ||
1148 !asccasecmp(cset, "iso-ir-6") ||
1149 !asccasecmp(cset, "ANSI_X3.4-1986") ||
1150 !asccasecmp(cset, "ISO_646.irv:1991") ||
1151 !asccasecmp(cset, "ISO646-US") || !asccasecmp(cset, "us") ||
1152 !asccasecmp(cset, "IBM367") || !asccasecmp(cset, "cp367") ||
1153 !asccasecmp(cset, "csASCII"));
1154 NYD2_LEAVE;
1155 return rv;
1158 #ifdef HAVE_ICONV
1159 FL iconv_t
1160 n_iconv_open(char const *tocode, char const *fromcode){
1161 iconv_t id;
1162 NYD_ENTER;
1164 if((!asccasecmp(fromcode, "unknown-8bit") ||
1165 !asccasecmp(fromcode, "binary")) &&
1166 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
1167 fromcode = ok_vlook(CHARSET_8BIT_OKEY);
1169 id = iconv_open(tocode, fromcode);
1171 /* If the encoding names are equal at this point, they are just not
1172 * understood by iconv(), and we cannot sensibly use it in any way. We do
1173 * not perform this as an optimization above since iconv() can otherwise be
1174 * used to check the validity of the input even with identical encoding
1175 * names */
1176 if (id == (iconv_t)-1 && !asccasecmp(tocode, fromcode))
1177 n_err_no = n_ERR_NONE;
1178 NYD_LEAVE;
1179 return id;
1182 FL void
1183 n_iconv_close(iconv_t cd){
1184 NYD_ENTER;
1185 iconv_close(cd);
1186 if(cd == iconvd)
1187 iconvd = (iconv_t)-1;
1188 NYD_LEAVE;
1191 FL void
1192 n_iconv_reset(iconv_t cd){
1193 NYD_ENTER;
1194 iconv(cd, NULL, NULL, NULL, NULL);
1195 NYD_LEAVE;
1198 /* (2012-09-24: export and use it exclusively to isolate prototype problems
1199 * (*inb* is 'char const **' except in POSIX) in a single place.
1200 * GNU libiconv even allows for configuration time const/non-const..
1201 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
1202 * support compiler invocations which bail on error, so no -Werror */
1203 /* Citrus project? */
1204 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1205 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1206 # if n_OS_DRAGONFLY
1207 # define __INBCAST(S) (char ** __restrict__)n_UNCONST(S)
1208 # else
1209 # define __INBCAST(S) (char const **)n_UNCONST(S)
1210 # endif
1211 # elif n_OS_SUNOS || n_OS_SOLARIS
1212 # define __INBCAST(S) (char const ** __restrict__)n_UNCONST(S)
1213 # endif
1214 # ifndef __INBCAST
1215 # define __INBCAST(S) (char **)n_UNCONST(S)
1216 # endif
1218 FL int
1219 n_iconv_buf(iconv_t cd, enum n_iconv_flags icf,
1220 char const **inb, size_t *inbleft, char **outb, size_t *outbleft){
1221 int err;
1222 NYD2_ENTER;
1224 if((icf & n_ICONV_UNIREPL) && !(n_psonce & n_PSO_UNICODE))
1225 icf &= ~n_ICONV_UNIREPL;
1227 for(;;){
1228 size_t sz;
1230 if((sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft)) == 0)
1231 break;
1232 if(sz != (size_t)-1){
1233 if(!(icf & n_ICONV_IGN_NOREVERSE)){
1234 err = n_ERR_NOENT;
1235 goto jleave;
1237 break;
1240 if((err = n_err_no) == n_ERR_2BIG)
1241 goto jleave;
1243 if(!(icf & n_ICONV_IGN_ILSEQ) || err != n_ERR_ILSEQ)
1244 goto jleave;
1245 if(*inbleft > 0){
1246 ++(*inb);
1247 --(*inbleft);
1248 if(icf & n_ICONV_UNIREPL){
1249 if(*outbleft >= sizeof(n_unirepl) -1){
1250 memcpy(*outb, n_unirepl, sizeof(n_unirepl) -1);
1251 *outb += sizeof(n_unirepl) -1;
1252 *outbleft -= sizeof(n_unirepl) -1;
1253 continue;
1255 }else if(*outbleft > 0){
1256 *(*outb)++ = '?';
1257 --*outbleft;
1258 continue;
1260 err = n_ERR_2BIG;
1261 goto jleave;
1262 }else if(*outbleft > 0){
1263 **outb = '\0';
1264 goto jleave;
1267 err = 0;
1268 jleave:
1269 n_iconv_err_no = err;
1270 NYD2_LEAVE;
1271 return err;
1273 # undef __INBCAST
1275 FL int
1276 n_iconv_str(iconv_t cd, enum n_iconv_flags icf,
1277 struct str *out, struct str const *in, struct str *in_rest_or_null){
1278 struct n_string s, *sp = &s;
1279 char const *ib;
1280 int err;
1281 size_t il;
1282 NYD2_ENTER;
1284 il = in->l;
1285 if(!n_string_get_can_book(il) || !n_string_get_can_book(out->l)){
1286 err = n_ERR_INVAL;
1287 goto j_leave;
1289 ib = in->s;
1291 sp = n_string_creat(sp);
1292 sp = n_string_take_ownership(sp, out->s, out->l, 0);
1294 for(;;){
1295 char *ob_base, *ob;
1296 size_t ol, nol;
1298 if((nol = ol = sp->s_len) < il)
1299 nol = il;
1300 assert(sizeof(sp->s_len) == sizeof(ui32_t));
1301 if(nol < 128)
1302 nol += 32;
1303 else{
1304 ui64_t xnol;
1306 xnol = (ui64_t)(nol << 1) - (nol >> 4);
1307 if(!n_string_can_book(sp, xnol)){
1308 xnol = ol + 64;
1309 if(!n_string_can_book(sp, xnol)){
1310 err = n_ERR_INVAL;
1311 goto jleave;
1314 nol = (size_t)xnol;
1316 sp = n_string_resize(sp, nol);
1318 ob = ob_base = &sp->s_dat[ol];
1319 nol -= ol;
1320 err = n_iconv_buf(cd, icf, &ib, &il, &ob, &nol);
1322 sp = n_string_trunc(sp, ol + PTR2SIZE(ob - ob_base));
1323 if(err == 0 || err != n_ERR_2BIG)
1324 break;
1327 if(in_rest_or_null != NULL){
1328 in_rest_or_null->s = n_UNCONST(ib);
1329 in_rest_or_null->l = il;
1332 jleave:
1333 out->s = n_string_cp(sp);
1334 out->l = sp->s_len;
1335 sp = n_string_drop_ownership(sp);
1336 /* n_string_gut(sp)*/
1337 j_leave:
1338 NYD2_LEAVE;
1339 return err;
1342 FL char *
1343 n_iconv_onetime_cp(enum n_iconv_flags icf,
1344 char const *tocode, char const *fromcode, char const *input){
1345 struct str out, in;
1346 iconv_t icd;
1347 char *rv;
1348 NYD2_ENTER;
1350 rv = NULL;
1351 if(tocode == NULL)
1352 tocode = ok_vlook(ttycharset);
1353 if(fromcode == NULL)
1354 fromcode = "utf-8";
1356 if((icd = iconv_open(tocode, fromcode)) == (iconv_t)-1)
1357 goto jleave;
1359 in.l = strlen(in.s = n_UNCONST(input)); /* logical */
1360 out.s = NULL, out.l = 0;
1361 if(!n_iconv_str(icd, icf, &out, &in, NULL))
1362 rv = savestrbuf(out.s, out.l);
1363 if(out.s != NULL)
1364 n_free(out.s);
1366 iconv_close(icd);
1367 jleave:
1368 NYD2_LEAVE;
1369 return rv;
1371 #endif /* HAVE_ICONV */
1373 /* s-it-mode */