*quote-as-attachment*: handle in compose-mode, very last..
[s-mailx.git] / strings.c
blob4952fa340664f8ce52dd362bdfef7224391f441e
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ String support routines.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 */
7 /*
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 #undef n_FILE
36 #define n_FILE strings
38 #ifndef HAVE_AMALGAMATION
39 # include "nail.h"
40 #endif
42 #include <ctype.h>
44 FL char *
45 (savestr)(char const *str n_MEMORY_DEBUG_ARGS)
47 size_t size;
48 char *news;
49 NYD_ENTER;
51 size = strlen(str);
52 news = (n_autorec_alloc_from_pool)(NULL, size +1 n_MEMORY_DEBUG_ARGSCALL);
53 if(size > 0)
54 memcpy(news, str, size);
55 news[size] = '\0';
56 NYD_LEAVE;
57 return news;
60 FL char *
61 (savestrbuf)(char const *sbuf, size_t sbuf_len n_MEMORY_DEBUG_ARGS)
63 char *news;
64 NYD_ENTER;
66 news = (n_autorec_alloc_from_pool)(NULL, sbuf_len +1
67 n_MEMORY_DEBUG_ARGSCALL);
68 if(sbuf_len > 0)
69 memcpy(news, sbuf, sbuf_len);
70 news[sbuf_len] = 0;
71 NYD_LEAVE;
72 return news;
75 FL char *
76 (savecatsep)(char const *s1, char sep, char const *s2 n_MEMORY_DEBUG_ARGS)
78 size_t l1, l2;
79 char *news;
80 NYD_ENTER;
82 l1 = (s1 != NULL) ? strlen(s1) : 0;
83 l2 = strlen(s2);
84 news = (n_autorec_alloc_from_pool)(NULL, l1 + (sep != '\0') + l2 +1
85 n_MEMORY_DEBUG_ARGSCALL);
86 if (l1 > 0) {
87 memcpy(news + 0, s1, l1);
88 if (sep != '\0')
89 news[l1++] = sep;
91 if(l2 > 0)
92 memcpy(news + l1, s2, l2);
93 news[l1 + l2] = '\0';
94 NYD_LEAVE;
95 return news;
99 * Support routines, auto-reclaimed storage
102 FL struct str *
103 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
105 va_list vl;
106 size_t l;
107 char const *cs;
108 NYD_ENTER;
110 va_start(vl, self);
111 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
112 l += strlen(cs);
113 va_end(vl);
115 self->l = l;
116 self->s = n_autorec_alloc(l +1);
118 va_start(vl, self);
119 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
120 size_t i;
122 i = strlen(cs);
123 if(i > 0){
124 memcpy(self->s + l, cs, i);
125 l += i;
128 self->s[l] = '\0';
129 va_end(vl);
130 NYD_LEAVE;
131 return self;
134 FL struct str *
135 (str_concat_cpa)(struct str *self, char const * const *cpa,
136 char const *sep_o_null n_MEMORY_DEBUG_ARGS)
138 size_t sonl, l;
139 char const * const *xcpa;
140 NYD_ENTER;
142 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
144 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
145 l += strlen(*xcpa) + sonl;
147 self->l = l;
148 self->s = (n_autorec_alloc_from_pool)(NULL, l +1 n_MEMORY_DEBUG_ARGSCALL);
150 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
151 size_t i;
153 i = strlen(*xcpa);
154 if(i > 0){
155 memcpy(self->s + l, *xcpa, i);
156 l += i;
158 if (sonl > 0) {
159 memcpy(self->s + l, sep_o_null, sonl);
160 l += sonl;
163 self->s[l] = '\0';
164 NYD_LEAVE;
165 return self;
169 * Routines that are not related to auto-reclaimed storage follow.
172 FL bool_t
173 n_anyof_buf(char const *template, char const *dat, size_t len){
174 char c;
175 NYD2_ENTER;
177 if(len == UIZ_MAX){
178 while((c = *template++) != '\0')
179 if(strchr(dat, c) != NULL)
180 break;
181 }else if(len > 0){
182 while((c = *template++) != '\0')
183 if(memchr(dat, c, len) != NULL)
184 break;
185 }else
186 c = '\0';
187 NYD2_LEAVE;
188 return (c != '\0');
191 FL char *
192 n_strsep(char **iolist, char sep, bool_t ignore_empty){
193 char *base, *cp;
194 NYD2_ENTER;
196 for(base = *iolist; base != NULL; base = *iolist){
197 while(*base != '\0' && blankspacechar(*base))
198 ++base;
200 cp = strchr(base, sep);
201 if(cp != NULL)
202 *iolist = &cp[1];
203 else{
204 *iolist = NULL;
205 cp = &base[strlen(base)];
207 while(cp > base && blankspacechar(cp[-1]))
208 --cp;
209 *cp = '\0';
210 if(*base != '\0' || !ignore_empty)
211 break;
213 NYD2_LEAVE;
214 return base;
217 FL char *
218 n_strsep_esc(char **iolist, char sep, bool_t ignore_empty){
219 char *cp, c, *base;
220 bool_t isesc, anyesc;
221 NYD2_ENTER;
223 for(base = *iolist; base != NULL; base = *iolist){
224 while((c = *base) != '\0' && blankspacechar(c))
225 ++base;
227 for(isesc = anyesc = FAL0, cp = base;; ++cp){
228 if(n_UNLIKELY((c = *cp) == '\0')){
229 *iolist = NULL;
230 break;
231 }else if(!isesc){
232 if(c == sep){
233 *iolist = &cp[1];
234 break;
236 isesc = (c == '\\');
237 }else{
238 isesc = FAL0;
239 anyesc |= (c == sep);
243 while(cp > base && blankspacechar(cp[-1]))
244 --cp;
245 *cp = '\0';
247 if(*base != '\0'){
248 if(anyesc){
249 char *ins;
251 for(ins = cp = base;; ++ins)
252 if((c = *cp) == '\\' && cp[1] == sep){
253 *ins = sep;
254 cp += 2;
255 }else if((*ins = (++cp, c)) == '\0')
256 break;
260 if(*base != '\0' || !ignore_empty)
261 break;
263 NYD2_LEAVE;
264 return base;
267 FL bool_t
268 is_prefix(char const *as1, char const *as2) /* TODO arg order */
270 char c;
271 NYD2_ENTER;
273 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
274 if (*as2 == '\0')
275 break;
276 NYD2_LEAVE;
277 return (c == '\0');
280 FL char *
281 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
283 char const *cp;
284 size_t i;
285 char c, *rv;
286 NYD2_ENTER;
288 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
289 if (c == '"' || c == '\\')
290 ++i;
291 rv = n_autorec_alloc(i +1);
293 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
294 if (c == '"' || c == '\\')
295 rv[i++] = '\\';
296 rv[i] = '\0';
297 NYD2_LEAVE;
298 return rv;
301 FL void
302 makelow(char *cp) /* TODO isn't that crap? --> */
304 NYD_ENTER;
305 #ifdef HAVE_C90AMEND1
306 if (n_mb_cur_max > 1) {
307 char *tp = cp;
308 wchar_t wc;
309 int len;
311 while (*cp != '\0') {
312 len = mbtowc(&wc, cp, n_mb_cur_max);
313 if (len < 0)
314 *tp++ = *cp++;
315 else {
316 wc = towlower(wc);
317 if (wctomb(tp, wc) == len)
318 tp += len, cp += len;
319 else
320 *tp++ = *cp++; /* <-- at least here */
323 } else
324 #endif
327 *cp = tolower((uc_i)*cp);
328 while (*cp++ != '\0');
330 NYD_LEAVE;
333 FL bool_t
334 substr(char const *str, char const *sub)
336 char const *cp, *backup;
337 NYD_ENTER;
339 cp = sub;
340 backup = str;
341 while (*str != '\0' && *cp != '\0') {
342 #ifdef HAVE_C90AMEND1
343 if (n_mb_cur_max > 1) {
344 wchar_t c, c2;
345 int sz;
347 if ((sz = mbtowc(&c, cp, n_mb_cur_max)) == -1)
348 goto Jsinglebyte;
349 cp += sz;
350 if ((sz = mbtowc(&c2, str, n_mb_cur_max)) == -1)
351 goto Jsinglebyte;
352 str += sz;
353 c = towupper(c);
354 c2 = towupper(c2);
355 if (c != c2) {
356 if ((sz = mbtowc(&c, backup, n_mb_cur_max)) > 0) {
357 backup += sz;
358 str = backup;
359 } else
360 str = ++backup;
361 cp = sub;
363 } else
364 Jsinglebyte:
365 #endif
367 int c, c2;
369 c = *cp++ & 0377;
370 if (islower(c))
371 c = toupper(c);
372 c2 = *str++ & 0377;
373 if (islower(c2))
374 c2 = toupper(c2);
375 if (c != c2) {
376 str = ++backup;
377 cp = sub;
381 NYD_LEAVE;
382 return (*cp == '\0');
385 FL char *
386 sstpcpy(char *dst, char const *src)
388 NYD2_ENTER;
389 while ((*dst = *src++) != '\0')
390 ++dst;
391 NYD2_LEAVE;
392 return dst;
395 FL char *
396 (sstrdup)(char const *cp n_MEMORY_DEBUG_ARGS)
398 char *dp;
399 NYD2_ENTER;
401 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) n_MEMORY_DEBUG_ARGSCALL);
402 NYD2_LEAVE;
403 return dp;
406 FL char *
407 (sbufdup)(char const *cp, size_t len n_MEMORY_DEBUG_ARGS)
409 char *dp = NULL;
410 NYD2_ENTER;
412 dp = (n_alloc)(len +1 n_MEMORY_DEBUG_ARGSCALL);
413 if (cp != NULL)
414 memcpy(dp, cp, len);
415 dp[len] = '\0';
416 NYD2_LEAVE;
417 return dp;
420 FL ssize_t
421 n_strscpy(char *dst, char const *src, size_t dstsize){
422 ssize_t rv;
423 NYD2_ENTER;
425 if(n_LIKELY(dstsize > 0)){
426 rv = 0;
428 if((dst[rv] = src[rv]) == '\0')
429 goto jleave;
430 ++rv;
431 }while(--dstsize > 0);
432 dst[--rv] = '\0';
434 #ifdef HAVE_DEVEL
435 else
436 assert(dstsize > 0);
437 #endif
438 rv = -1;
439 jleave:
440 NYD2_LEAVE;
441 return rv;
444 FL int
445 asccasecmp(char const *s1, char const *s2)
447 int cmp;
448 NYD2_ENTER;
450 for (;;) {
451 char c1 = *s1++, c2 = *s2++;
452 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
453 break;
455 NYD2_LEAVE;
456 return cmp;
459 FL int
460 ascncasecmp(char const *s1, char const *s2, size_t sz)
462 int cmp = 0;
463 NYD2_ENTER;
465 while (sz-- > 0) {
466 char c1 = *s1++, c2 = *s2++;
467 cmp = (ui8_t)lowerconv(c1);
468 cmp -= (ui8_t)lowerconv(c2);
469 if (cmp != 0 || c1 == '\0')
470 break;
472 NYD2_LEAVE;
473 return cmp;
476 FL char const *
477 asccasestr(char const *s1, char const *s2)
479 char c2, c1;
480 NYD2_ENTER;
482 for (c2 = *s2++, c2 = lowerconv(c2);;) {
483 if ((c1 = *s1++) == '\0') {
484 s1 = NULL;
485 break;
487 if (lowerconv(c1) == c2 && is_asccaseprefix(s2, s1)) {
488 --s1;
489 break;
492 NYD2_LEAVE;
493 return s1;
496 FL bool_t
497 is_asccaseprefix(char const *as1, char const *as2) /* TODO arg order */
499 char c1, c2;
500 NYD2_ENTER;
502 for(;; ++as1, ++as2){
503 c1 = *as1;
504 c1 = lowerconv(c1);
505 c2 = *as2;
506 c2 = lowerconv(c2);
508 if(c1 != c2 || c1 == '\0')
509 break;
510 if(c2 == '\0')
511 break;
513 NYD2_LEAVE;
514 return (c1 == '\0');
517 FL bool_t
518 is_ascncaseprefix(char const *as1, char const *as2, size_t sz)
520 char c1, c2;
521 bool_t rv;
522 NYD2_ENTER;
524 for(rv = TRU1; sz-- > 0; ++as1, ++as2){
525 c1 = *as1;
526 c1 = lowerconv(c1);
527 c2 = *as2;
528 c2 = lowerconv(c2);
530 if(!(rv = (c1 == c2)) || c1 == '\0')
531 break;
532 if(c2 == '\0')
533 break;
535 NYD2_LEAVE;
536 return rv;
540 FL struct str *
541 (n_str_assign_buf)(struct str *self, char const *buf, uiz_t buflen
542 n_MEMORY_DEBUG_ARGS){
543 NYD_ENTER;
544 if(buflen == UIZ_MAX)
545 buflen = (buf == NULL) ? 0 : strlen(buf);
547 assert(buflen == 0 || buf != NULL);
549 if(n_LIKELY(buflen > 0)){
550 self->s = (n_realloc)(self->s, (self->l = buflen) +1
551 n_MEMORY_DEBUG_ARGSCALL);
552 memcpy(self->s, buf, buflen);
553 self->s[buflen] = '\0';
554 }else
555 self->l = 0;
556 NYD_LEAVE;
557 return self;
560 FL struct str *
561 (n_str_add_buf)(struct str *self, char const *buf, uiz_t buflen
562 n_MEMORY_DEBUG_ARGS){
563 NYD_ENTER;
564 if(buflen == UIZ_MAX)
565 buflen = (buf == NULL) ? 0 : strlen(buf);
567 assert(buflen == 0 || buf != NULL);
569 if(buflen > 0) {
570 size_t osl = self->l, nsl = osl + buflen;
572 self->s = (n_realloc)(self->s, (self->l = nsl) +1
573 n_MEMORY_DEBUG_ARGSCALL);
574 memcpy(self->s + osl, buf, buflen);
575 self->s[nsl] = '\0';
577 NYD_LEAVE;
578 return self;
581 FL struct str *
582 n_str_trim(struct str *self, enum n_str_trim_flags stf){
583 size_t l;
584 char const *cp;
585 NYD2_ENTER;
587 cp = self->s;
589 if((l = self->l) > 0 && (stf & n_STR_TRIM_FRONT)){
590 while(spacechar(*cp)){
591 ++cp;
592 if(--l == 0)
593 break;
595 self->s = n_UNCONST(cp);
598 if(l > 0 && (stf & n_STR_TRIM_END)){
599 for(cp += l -1; spacechar(*cp); --cp)
600 if(--l == 0)
601 break;
603 self->l = l;
605 NYD2_LEAVE;
606 return self;
609 FL struct str *
610 n_str_trim_ifs(struct str *self, bool_t dodefaults){
611 char s, t, n, c;
612 char const *ifs, *cp;
613 size_t l, i;
614 NYD2_ENTER;
616 if((l = self->l) == 0)
617 goto jleave;
619 ifs = ok_vlook(ifs_ws);
620 cp = self->s;
621 s = t = n = '\0';
623 /* Check whether we can go fast(er) path */
624 for(i = 0; (c = ifs[i]) != '\0'; ++i){
625 switch(c){
626 case ' ': s = c; break;
627 case '\t': t = c; break;
628 case '\n': n = c; break;
629 default:
630 /* Need to go the slow path */
631 while(strchr(ifs, *cp) != NULL){
632 ++cp;
633 if(--l == 0)
634 break;
636 self->s = n_UNCONST(cp);
638 if(l > 0){
639 for(cp += l -1; strchr(ifs, *cp) != NULL;){
640 if(--l == 0)
641 break;
642 /* An uneven number of reverse solidus escapes last WS! */
643 else if(*--cp == '\\'){
644 siz_t j;
646 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
648 if(j & 1){
649 ++l;
650 break;
655 self->l = l;
657 if(!dodefaults)
658 goto jleave;
659 cp = self->s;
660 ++i;
661 break;
665 /* No ifs-ws? No more data? No trimming */
666 if(l == 0 || (i == 0 && !dodefaults))
667 goto jleave;
669 if(dodefaults){
670 s = ' ';
671 t = '\t';
672 n = '\n';
675 if(l > 0){
676 while((c = *cp) != '\0' && (c == s || c == t || c == n)){
677 ++cp;
678 if(--l == 0)
679 break;
681 self->s = n_UNCONST(cp);
684 if(l > 0){
685 for(cp += l -1; (c = *cp) != '\0' && (c == s || c == t || c == n);){
686 if(--l == 0)
687 break;
688 /* An uneven number of reverse solidus escapes last WS! */
689 else if(*--cp == '\\'){
690 siz_t j;
692 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
694 if(j & 1){
695 ++l;
696 break;
701 self->l = l;
702 jleave:
703 NYD2_LEAVE;
704 return self;
708 * struct n_string TODO extend, optimize
711 FL struct n_string *
712 (n_string_clear)(struct n_string *self n_MEMORY_DEBUG_ARGS){
713 NYD_ENTER;
715 assert(self != NULL);
717 if(self->s_size != 0){
718 if(!self->s_auto){
719 (n_free)(self->s_dat n_MEMORY_DEBUG_ARGSCALL);
721 self->s_len = self->s_auto = self->s_size = 0;
722 self->s_dat = NULL;
724 NYD_LEAVE;
725 return self;
728 FL struct n_string *
729 (n_string_reserve)(struct n_string *self, size_t noof n_MEMORY_DEBUG_ARGS){
730 ui32_t i, l, s;
731 NYD_ENTER;
732 assert(self != NULL);
734 s = self->s_size;
735 l = self->s_len;
736 if((size_t)SI32_MAX - n_ALIGN(1) - l <= noof)
737 n_panic(_("Memory allocation too large"));
739 if((i = s - l) <= ++noof){
740 i += l + (ui32_t)noof;
741 i = n_ALIGN(i);
742 self->s_size = i -1;
744 if(!self->s_auto)
745 self->s_dat = (n_realloc)(self->s_dat, i n_MEMORY_DEBUG_ARGSCALL);
746 else{
747 char *ndat = (n_autorec_alloc_from_pool)(NULL, i
748 n_MEMORY_DEBUG_ARGSCALL);
750 if(l > 0)
751 memcpy(ndat, self->s_dat, l);
752 self->s_dat = ndat;
755 NYD_LEAVE;
756 return self;
759 FL struct n_string *
760 (n_string_resize)(struct n_string *self, size_t nlen n_MEMORY_DEBUG_ARGS){
761 NYD_ENTER;
762 assert(self != NULL);
764 if(UICMP(z, SI32_MAX, <=, nlen))
765 n_panic(_("Memory allocation too large"));
767 if(self->s_len < nlen)
768 self = (n_string_reserve)(self, nlen n_MEMORY_DEBUG_ARGSCALL);
769 self->s_len = (ui32_t)nlen;
770 NYD_LEAVE;
771 return self;
774 FL struct n_string *
775 (n_string_push_buf)(struct n_string *self, char const *buf, size_t buflen
776 n_MEMORY_DEBUG_ARGS){
777 NYD_ENTER;
779 assert(self != NULL);
780 assert(buflen == 0 || buf != NULL);
782 if(buflen == UIZ_MAX)
783 buflen = (buf == NULL) ? 0 : strlen(buf);
785 if(buflen > 0){
786 ui32_t i;
788 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
789 memcpy(&self->s_dat[i = self->s_len], buf, buflen);
790 self->s_len = (i += (ui32_t)buflen);
792 NYD_LEAVE;
793 return self;
796 FL struct n_string *
797 (n_string_push_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
798 NYD_ENTER;
800 assert(self != NULL);
802 if(self->s_len + 1 >= self->s_size)
803 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
804 self->s_dat[self->s_len++] = c;
805 NYD_LEAVE;
806 return self;
809 FL struct n_string *
810 (n_string_unshift_buf)(struct n_string *self, char const *buf, size_t buflen
811 n_MEMORY_DEBUG_ARGS){
812 NYD_ENTER;
814 assert(self != NULL);
815 assert(buflen == 0 || buf != NULL);
817 if(buflen == UIZ_MAX)
818 buflen = (buf == NULL) ? 0 : strlen(buf);
820 if(buflen > 0){
821 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
822 if(self->s_len > 0)
823 memmove(&self->s_dat[buflen], self->s_dat, self->s_len);
824 memcpy(self->s_dat, buf, buflen);
825 self->s_len += (ui32_t)buflen;
827 NYD_LEAVE;
828 return self;
831 FL struct n_string *
832 (n_string_unshift_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
833 NYD_ENTER;
835 assert(self != NULL);
837 if(self->s_len + 1 >= self->s_size)
838 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
839 if(self->s_len > 0)
840 memmove(&self->s_dat[1], self->s_dat, self->s_len);
841 self->s_dat[0] = c;
842 ++self->s_len;
843 NYD_LEAVE;
844 return self;
847 FL struct n_string *
848 (n_string_insert_buf)(struct n_string *self, size_t idx,
849 char const *buf, size_t buflen n_MEMORY_DEBUG_ARGS){
850 NYD_ENTER;
852 assert(self != NULL);
853 assert(buflen == 0 || buf != NULL);
854 assert(idx <= self->s_len);
856 if(buflen == UIZ_MAX)
857 buflen = (buf == NULL) ? 0 : strlen(buf);
859 if(buflen > 0){
860 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
861 if(self->s_len > 0)
862 memmove(&self->s_dat[idx + buflen], &self->s_dat[idx],
863 self->s_len - idx);
864 memcpy(&self->s_dat[idx], buf, buflen);
865 self->s_len += (ui32_t)buflen;
867 NYD_LEAVE;
868 return self;
871 FL struct n_string *
872 (n_string_insert_c)(struct n_string *self, size_t idx,
873 char c n_MEMORY_DEBUG_ARGS){
874 NYD_ENTER;
876 assert(self != NULL);
877 assert(idx <= self->s_len);
879 if(self->s_len + 1 >= self->s_size)
880 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
881 if(self->s_len > 0)
882 memmove(&self->s_dat[idx + 1], &self->s_dat[idx], self->s_len - idx);
883 self->s_dat[idx] = c;
884 ++self->s_len;
885 NYD_LEAVE;
886 return self;
889 FL struct n_string *
890 n_string_cut(struct n_string *self, size_t idx, size_t len){
891 NYD_ENTER;
893 assert(self != NULL);
894 assert(UIZ_MAX - idx > len);
895 assert(SI32_MAX >= idx + len);
896 assert(idx + len <= self->s_len);
898 if(len > 0)
899 memmove(&self->s_dat[idx], &self->s_dat[idx + len],
900 (self->s_len -= len) - idx);
901 NYD_LEAVE;
902 return self;
905 FL char *
906 (n_string_cp)(struct n_string *self n_MEMORY_DEBUG_ARGS){
907 char *rv;
908 NYD2_ENTER;
910 assert(self != NULL);
912 if(self->s_size == 0)
913 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
915 (rv = self->s_dat)[self->s_len] = '\0';
916 NYD2_LEAVE;
917 return rv;
920 FL char const *
921 n_string_cp_const(struct n_string const *self){
922 char const *rv;
923 NYD2_ENTER;
925 assert(self != NULL);
927 if(self->s_size != 0){
928 ((struct n_string*)n_UNCONST(self))->s_dat[self->s_len] = '\0';
929 rv = self->s_dat;
930 }else
931 rv = n_empty;
932 NYD2_LEAVE;
933 return rv;
937 * UTF-8
940 FL ui32_t
941 n_utf8_to_utf32(char const **bdat, size_t *blen){
942 ui32_t c, x, x1;
943 char const *cp, *cpx;
944 size_t l, lx;
945 NYD2_ENTER;
947 lx = l = *blen - 1;
948 x = (ui8_t)*(cp = *bdat);
949 cpx = ++cp;
951 if(n_LIKELY(x <= 0x7Fu))
952 c = x;
953 /* 0xF8, but Unicode guarantees maximum of 0x10FFFFu -> F4 8F BF BF.
954 * Unicode 9.0, 3.9, UTF-8, Table 3-7. Well-Formed UTF-8 Byte Sequences */
955 else if(n_LIKELY(x > 0xC0u && x <= 0xF4u)){
956 if(n_LIKELY(x < 0xE0u)){
957 if(n_UNLIKELY(l < 1))
958 goto jenobuf;
959 --l;
961 c = (x &= 0x1Fu);
962 }else if(n_LIKELY(x < 0xF0u)){
963 if(n_UNLIKELY(l < 2))
964 goto jenobuf;
965 l -= 2;
967 x1 = x;
968 c = (x &= 0x0Fu);
970 /* Second byte constraints */
971 x = (ui8_t)*cp++;
972 switch(x1){
973 case 0xE0u:
974 if(n_UNLIKELY(x < 0xA0u || x > 0xBFu))
975 goto jerr;
976 break;
977 case 0xEDu:
978 if(n_UNLIKELY(x < 0x80u || x > 0x9Fu))
979 goto jerr;
980 break;
981 default:
982 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
983 goto jerr;
984 break;
986 c <<= 6;
987 c |= (x &= 0x3Fu);
988 }else{
989 if(n_UNLIKELY(l < 3))
990 goto jenobuf;
991 l -= 3;
993 x1 = x;
994 c = (x &= 0x07u);
996 /* Second byte constraints */
997 x = (ui8_t)*cp++;
998 switch(x1){
999 case 0xF0u:
1000 if(n_UNLIKELY(x < 0x90u || x > 0xBFu))
1001 goto jerr;
1002 break;
1003 case 0xF4u:
1004 if(n_UNLIKELY((x & 0xF0u) != 0x80u)) /* 80..8F */
1005 goto jerr;
1006 break;
1007 default:
1008 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1009 goto jerr;
1010 break;
1012 c <<= 6;
1013 c |= (x &= 0x3Fu);
1015 x = (ui8_t)*cp++;
1016 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1017 goto jerr;
1018 c <<= 6;
1019 c |= (x &= 0x3Fu);
1022 x = (ui8_t)*cp++;
1023 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1024 goto jerr;
1025 c <<= 6;
1026 c |= x & 0x3Fu;
1027 }else
1028 goto jerr;
1030 cpx = cp;
1031 lx = l;
1032 jleave:
1033 *bdat = cpx;
1034 *blen = lx;
1035 NYD2_LEAVE;
1036 return c;
1037 jenobuf:
1038 jerr:
1039 c = UI32_MAX;
1040 goto jleave;
1043 FL size_t
1044 n_utf32_to_utf8(ui32_t c, char *buf)
1046 struct {
1047 ui32_t lower_bound;
1048 ui32_t upper_bound;
1049 ui8_t enc_leader;
1050 ui8_t enc_lval;
1051 ui8_t dec_leader_mask;
1052 ui8_t dec_leader_val_mask;
1053 ui8_t dec_bytes_togo;
1054 ui8_t cat_index;
1055 ui8_t __dummy[2];
1056 } const _cat[] = {
1057 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
1058 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
1059 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
1060 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
1061 /* xxx _from_utf32() simply assumes magic code points for surrogates!
1062 * xxx (However, should we ever get yet another surrogate range we
1063 * xxx need to deal with that all over the place anyway? */
1064 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
1065 {0x00010000, 0x0010FFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
1066 }, *catp = _cat;
1067 size_t l;
1069 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
1070 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
1071 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
1072 if (c <= _cat[3].upper_bound) {
1073 /* Surrogates may not be converted (Compatibility rule C10) */
1074 if (c >= 0xD800u && c <= 0xDFFFu)
1075 goto jerr;
1076 catp += 3;
1077 goto j3;
1079 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
1080 jerr:
1081 c = 0xFFFDu; /* Unicode replacement character */
1082 catp += 3;
1083 goto j3;
1085 buf[3] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1087 buf[2] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1089 buf[1] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1091 buf[0] = (char)catp->enc_leader | (char)(c);
1093 buf[catp->enc_lval] = '\0';
1094 l = catp->enc_lval;
1095 NYD2_LEAVE;
1096 return l;
1100 * Our iconv(3) wrapper
1103 FL char *
1104 n_iconv_normalize_name(char const *cset){
1105 char *cp, c, *tcp, tc;
1106 bool_t any;
1107 NYD2_ENTER;
1109 /* We need to strip //SUFFIXes off, we want to normalize to all lowercase,
1110 * and we perform some slight content testing, too */
1111 for(any = FAL0, cp = n_UNCONST(cset); (c = *cp) != '\0'; ++cp){
1112 if(!alnumchar(c) && !punctchar(c)){
1113 n_err(_("Invalid character set name %s\n"),
1114 n_shexp_quote_cp(cset, FAL0));
1115 cset = NULL;
1116 goto jleave;
1117 }else if(c == '/')
1118 break;
1119 else if(upperchar(c))
1120 any = TRU1;
1123 if(any || c != '\0'){
1124 cp = savestrbuf(cset, PTR2SIZE(cp - cset));
1125 for(tcp = cp; (tc = *tcp) != '\0'; ++tcp)
1126 *tcp = lowerconv(tc);
1128 if(c != '\0' && (n_poption & n_PO_D_V))
1129 n_err(_("Stripped off character set suffix: %s -> %s\n"),
1130 n_shexp_quote_cp(cset, FAL0), n_shexp_quote_cp(cp, FAL0));
1132 cset = cp;
1134 jleave:
1135 NYD2_LEAVE;
1136 return n_UNCONST(cset);
1139 FL bool_t
1140 n_iconv_name_is_ascii(char const *cset){ /* TODO ctext/su */
1141 bool_t rv;
1142 NYD2_ENTER;
1144 /* In MIME preference order */
1145 rv = (!asccasecmp(cset, "US-ASCII") || !asccasecmp(cset, "ASCII") ||
1146 !asccasecmp(cset, "ANSI_X3.4-1968") ||
1147 !asccasecmp(cset, "iso-ir-6") ||
1148 !asccasecmp(cset, "ANSI_X3.4-1986") ||
1149 !asccasecmp(cset, "ISO_646.irv:1991") ||
1150 !asccasecmp(cset, "ISO646-US") || !asccasecmp(cset, "us") ||
1151 !asccasecmp(cset, "IBM367") || !asccasecmp(cset, "cp367") ||
1152 !asccasecmp(cset, "csASCII"));
1153 NYD2_LEAVE;
1154 return rv;
1157 #ifdef HAVE_ICONV
1158 FL iconv_t
1159 n_iconv_open(char const *tocode, char const *fromcode){
1160 iconv_t id;
1161 NYD_ENTER;
1163 if((!asccasecmp(fromcode, "unknown-8bit") ||
1164 !asccasecmp(fromcode, "binary")) &&
1165 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
1166 fromcode = ok_vlook(CHARSET_8BIT_OKEY);
1168 id = iconv_open(tocode, fromcode);
1170 /* If the encoding names are equal at this point, they are just not
1171 * understood by iconv(), and we cannot sensibly use it in any way. We do
1172 * not perform this as an optimization above since iconv() can otherwise be
1173 * used to check the validity of the input even with identical encoding
1174 * names */
1175 if (id == (iconv_t)-1 && !asccasecmp(tocode, fromcode))
1176 n_err_no = n_ERR_NONE;
1177 NYD_LEAVE;
1178 return id;
1181 FL void
1182 n_iconv_close(iconv_t cd){
1183 NYD_ENTER;
1184 iconv_close(cd);
1185 if(cd == iconvd)
1186 iconvd = (iconv_t)-1;
1187 NYD_LEAVE;
1190 FL void
1191 n_iconv_reset(iconv_t cd){
1192 NYD_ENTER;
1193 iconv(cd, NULL, NULL, NULL, NULL);
1194 NYD_LEAVE;
1197 /* (2012-09-24: export and use it exclusively to isolate prototype problems
1198 * (*inb* is 'char const **' except in POSIX) in a single place.
1199 * GNU libiconv even allows for configuration time const/non-const..
1200 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
1201 * support compiler invocations which bail on error, so no -Werror */
1202 /* Citrus project? */
1203 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1204 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1205 # if n_OS_DRAGONFLY
1206 # define __INBCAST(S) (char ** __restrict__)n_UNCONST(S)
1207 # else
1208 # define __INBCAST(S) (char const **)n_UNCONST(S)
1209 # endif
1210 # elif n_OS_SUNOS || n_OS_SOLARIS
1211 # define __INBCAST(S) (char const ** __restrict__)n_UNCONST(S)
1212 # endif
1213 # ifndef __INBCAST
1214 # define __INBCAST(S) (char **)n_UNCONST(S)
1215 # endif
1217 FL int
1218 n_iconv_buf(iconv_t cd, enum n_iconv_flags icf,
1219 char const **inb, size_t *inbleft, char **outb, size_t *outbleft){
1220 int err;
1221 NYD2_ENTER;
1223 if((icf & n_ICONV_UNIREPL) && !(n_psonce & n_PSO_UNICODE))
1224 icf &= ~n_ICONV_UNIREPL;
1226 for(;;){
1227 size_t sz;
1229 if((sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft)) == 0)
1230 break;
1231 if(sz != (size_t)-1){
1232 if(!(icf & n_ICONV_IGN_NOREVERSE)){
1233 err = n_ERR_NOENT;
1234 goto jleave;
1236 break;
1239 if((err = n_err_no) == n_ERR_2BIG)
1240 goto jleave;
1242 if(!(icf & n_ICONV_IGN_ILSEQ) || err != n_ERR_ILSEQ)
1243 goto jleave;
1244 if(*inbleft > 0){
1245 ++(*inb);
1246 --(*inbleft);
1247 if(icf & n_ICONV_UNIREPL){
1248 if(*outbleft >= sizeof(n_unirepl) -1){
1249 memcpy(*outb, n_unirepl, sizeof(n_unirepl) -1);
1250 *outb += sizeof(n_unirepl) -1;
1251 *outbleft -= sizeof(n_unirepl) -1;
1252 continue;
1254 }else if(*outbleft > 0){
1255 *(*outb)++ = '?';
1256 --*outbleft;
1257 continue;
1259 err = n_ERR_2BIG;
1260 goto jleave;
1261 }else if(*outbleft > 0){
1262 **outb = '\0';
1263 goto jleave;
1266 err = 0;
1267 jleave:
1268 n_iconv_err_no = err;
1269 NYD2_LEAVE;
1270 return err;
1272 # undef __INBCAST
1274 FL int
1275 n_iconv_str(iconv_t cd, enum n_iconv_flags icf,
1276 struct str *out, struct str const *in, struct str *in_rest_or_null){
1277 struct n_string s, *sp = &s;
1278 char const *ib;
1279 int err;
1280 size_t il;
1281 NYD2_ENTER;
1283 il = in->l;
1284 if(!n_string_get_can_book(il) || !n_string_get_can_book(out->l)){
1285 err = n_ERR_INVAL;
1286 goto j_leave;
1288 ib = in->s;
1290 sp = n_string_creat(sp);
1291 sp = n_string_take_ownership(sp, out->s, out->l, 0);
1293 for(;;){
1294 char *ob_base, *ob;
1295 size_t ol, nol;
1297 if((nol = ol = sp->s_len) < il)
1298 nol = il;
1299 assert(sizeof(sp->s_len) == sizeof(ui32_t));
1300 if(nol < 128)
1301 nol += 32;
1302 else{
1303 ui64_t xnol;
1305 xnol = (ui64_t)(nol << 1) - (nol >> 4);
1306 if(!n_string_can_book(sp, xnol)){
1307 xnol = ol + 64;
1308 if(!n_string_can_book(sp, xnol)){
1309 err = n_ERR_INVAL;
1310 goto jleave;
1313 nol = (size_t)xnol;
1315 sp = n_string_resize(sp, nol);
1317 ob = ob_base = &sp->s_dat[ol];
1318 nol -= ol;
1319 err = n_iconv_buf(cd, icf, &ib, &il, &ob, &nol);
1321 sp = n_string_trunc(sp, ol + PTR2SIZE(ob - ob_base));
1322 if(err == 0 || err != n_ERR_2BIG)
1323 break;
1326 if(in_rest_or_null != NULL){
1327 in_rest_or_null->s = n_UNCONST(ib);
1328 in_rest_or_null->l = il;
1331 jleave:
1332 out->s = n_string_cp(sp);
1333 out->l = sp->s_len;
1334 sp = n_string_drop_ownership(sp);
1335 /* n_string_gut(sp)*/
1336 j_leave:
1337 NYD2_LEAVE;
1338 return err;
1341 FL char *
1342 n_iconv_onetime_cp(enum n_iconv_flags icf,
1343 char const *tocode, char const *fromcode, char const *input){
1344 struct str out, in;
1345 iconv_t icd;
1346 char *rv;
1347 NYD2_ENTER;
1349 rv = NULL;
1350 if(tocode == NULL)
1351 tocode = ok_vlook(ttycharset);
1352 if(fromcode == NULL)
1353 fromcode = "utf-8";
1355 if((icd = iconv_open(tocode, fromcode)) == (iconv_t)-1)
1356 goto jleave;
1358 in.l = strlen(in.s = n_UNCONST(input)); /* logical */
1359 out.s = NULL, out.l = 0;
1360 if(!n_iconv_str(icd, icf, &out, &in, NULL))
1361 rv = savestrbuf(out.s, out.l);
1362 if(out.s != NULL)
1363 n_free(out.s);
1365 iconv_close(icd);
1366 jleave:
1367 NYD2_LEAVE;
1368 return rv;
1370 #endif /* HAVE_ICONV */
1372 /* s-it-mode */