n_utf8_to_utf32(): FIX: implement compliant to Unicode 9.0, 3.9, UTF-8
[s-mailx.git] / strings.c
blob055892700a96a1e8152a949e30bf04a5d5f37ad7
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ String support routines.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 */
7 /*
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 #undef n_FILE
36 #define n_FILE strings
38 #ifndef HAVE_AMALGAMATION
39 # include "nail.h"
40 #endif
42 #include <ctype.h>
44 FL char *
45 (savestr)(char const *str n_MEMORY_DEBUG_ARGS)
47 size_t size;
48 char *news;
49 NYD_ENTER;
51 size = strlen(str);
52 news = (n_autorec_alloc_from_pool)(NULL, size +1 n_MEMORY_DEBUG_ARGSCALL);
53 if(size > 0)
54 memcpy(news, str, size);
55 news[size] = '\0';
56 NYD_LEAVE;
57 return news;
60 FL char *
61 (savestrbuf)(char const *sbuf, size_t sbuf_len n_MEMORY_DEBUG_ARGS)
63 char *news;
64 NYD_ENTER;
66 news = (n_autorec_alloc_from_pool)(NULL, sbuf_len +1
67 n_MEMORY_DEBUG_ARGSCALL);
68 if(sbuf_len > 0)
69 memcpy(news, sbuf, sbuf_len);
70 news[sbuf_len] = 0;
71 NYD_LEAVE;
72 return news;
75 FL char *
76 (savecatsep)(char const *s1, char sep, char const *s2 n_MEMORY_DEBUG_ARGS)
78 size_t l1, l2;
79 char *news;
80 NYD_ENTER;
82 l1 = (s1 != NULL) ? strlen(s1) : 0;
83 l2 = strlen(s2);
84 news = (n_autorec_alloc_from_pool)(NULL, l1 + (sep != '\0') + l2 +1
85 n_MEMORY_DEBUG_ARGSCALL);
86 if (l1 > 0) {
87 memcpy(news + 0, s1, l1);
88 if (sep != '\0')
89 news[l1++] = sep;
91 if(l2 > 0)
92 memcpy(news + l1, s2, l2);
93 news[l1 + l2] = '\0';
94 NYD_LEAVE;
95 return news;
99 * Support routines, auto-reclaimed storage
102 FL char *
103 (i_strdup)(char const *src n_MEMORY_DEBUG_ARGS)
105 size_t sz;
106 char *dest;
107 NYD_ENTER;
109 sz = strlen(src) +1;
110 dest = (n_autorec_alloc_from_pool)(NULL, sz n_MEMORY_DEBUG_ARGSCALL);
111 if(sz > 1)
112 i_strcpy(dest, src, sz);
113 else
114 dest[sz] = '\0';
115 NYD_LEAVE;
116 return dest;
119 FL struct str *
120 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
122 va_list vl;
123 size_t l;
124 char const *cs;
125 NYD_ENTER;
127 va_start(vl, self);
128 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
129 l += strlen(cs);
130 va_end(vl);
132 self->l = l;
133 self->s = salloc(l +1);
135 va_start(vl, self);
136 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
137 size_t i;
139 i = strlen(cs);
140 if(i > 0){
141 memcpy(self->s + l, cs, i);
142 l += i;
145 self->s[l] = '\0';
146 va_end(vl);
147 NYD_LEAVE;
148 return self;
151 FL struct str *
152 (str_concat_cpa)(struct str *self, char const * const *cpa,
153 char const *sep_o_null n_MEMORY_DEBUG_ARGS)
155 size_t sonl, l;
156 char const * const *xcpa;
157 NYD_ENTER;
159 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
161 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
162 l += strlen(*xcpa) + sonl;
164 self->l = l;
165 self->s = (n_autorec_alloc_from_pool)(NULL, l +1 n_MEMORY_DEBUG_ARGSCALL);
167 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
168 size_t i;
170 i = strlen(*xcpa);
171 if(i > 0){
172 memcpy(self->s + l, *xcpa, i);
173 l += i;
175 if (sonl > 0) {
176 memcpy(self->s + l, sep_o_null, sonl);
177 l += sonl;
180 self->s[l] = '\0';
181 NYD_LEAVE;
182 return self;
186 * Routines that are not related to auto-reclaimed storage follow.
189 FL bool_t
190 n_anyof_buf(char const *template, char const *dat, size_t len){
191 char c;
192 NYD2_ENTER;
194 if(len == UIZ_MAX){
195 while((c = *template++) != '\0')
196 if(strchr(dat, c) != NULL)
197 break;
198 }else if(len > 0){
199 while((c = *template++) != '\0')
200 if(memchr(dat, c, len) != NULL)
201 break;
202 }else
203 c = '\0';
204 NYD2_LEAVE;
205 return (c != '\0');
208 FL char *
209 n_strsep(char **iolist, char sep, bool_t ignore_empty){
210 char *base, *cp;
211 NYD2_ENTER;
213 for(base = *iolist; base != NULL; base = *iolist){
214 while(*base != '\0' && blankspacechar(*base))
215 ++base;
217 cp = strchr(base, sep);
218 if(cp != NULL)
219 *iolist = &cp[1];
220 else{
221 *iolist = NULL;
222 cp = &base[strlen(base)];
224 while(cp > base && blankspacechar(cp[-1]))
225 --cp;
226 *cp = '\0';
227 if(*base != '\0' || !ignore_empty)
228 break;
230 NYD2_LEAVE;
231 return base;
234 FL char *
235 n_strsep_esc(char **iolist, char sep, bool_t ignore_empty){
236 char *cp, c, *base;
237 bool_t isesc, anyesc;
238 NYD2_ENTER;
240 for(base = *iolist; base != NULL; base = *iolist){
241 while((c = *base) != '\0' && blankspacechar(c))
242 ++base;
244 for(isesc = anyesc = FAL0, cp = base;; ++cp){
245 if(n_UNLIKELY((c = *cp) == '\0')){
246 *iolist = NULL;
247 break;
248 }else if(!isesc){
249 if(c == sep){
250 *iolist = &cp[1];
251 break;
253 isesc = (c == '\\');
254 }else{
255 isesc = FAL0;
256 anyesc |= (c == sep);
260 while(cp > base && blankspacechar(cp[-1]))
261 --cp;
262 *cp = '\0';
264 if(*base != '\0'){
265 if(anyesc){
266 char *ins;
268 for(ins = cp = base;; ++ins)
269 if((c = *cp) == '\\' && cp[1] == sep){
270 *ins = sep;
271 cp += 2;
272 }else if((*ins = (++cp, c)) == '\0')
273 break;
277 if(*base != '\0' || !ignore_empty)
278 break;
280 NYD2_LEAVE;
281 return base;
284 FL void
285 i_strcpy(char *dest, char const *src, size_t size)
287 NYD2_ENTER;
288 if(size > 0){
289 for(;; ++dest, ++src)
290 if((*dest = lowerconv(*src)) == '\0'){
291 break;
292 }else if(--size == 0){
293 *dest = '\0';
294 break;
297 NYD2_LEAVE;
300 FL bool_t
301 is_prefix(char const *as1, char const *as2) /* TODO arg order */
303 char c;
304 NYD2_ENTER;
306 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
307 if (*as2 == '\0')
308 break;
309 NYD2_LEAVE;
310 return (c == '\0');
313 FL char *
314 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
316 char const *cp;
317 size_t i;
318 char c, *rv;
319 NYD2_ENTER;
321 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
322 if (c == '"' || c == '\\')
323 ++i;
324 rv = salloc(i +1);
326 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
327 if (c == '"' || c == '\\')
328 rv[i++] = '\\';
329 rv[i] = '\0';
330 NYD2_LEAVE;
331 return rv;
334 FL char *
335 laststring(char *linebuf, bool_t *needs_list, bool_t strip)
337 char *cp, *p, quoted;
338 NYD_ENTER;
340 /* Anything to do at all? */
341 if (*(cp = linebuf) == '\0')
342 goto jnull;
343 cp += strlen(linebuf) -1;
345 /* Strip away trailing blanks */
346 while (spacechar(*cp) && cp > linebuf)
347 --cp;
348 cp[1] = '\0';
349 if (cp == linebuf)
350 goto jleave;
352 /* Now search for the BOS of the "last string" */
353 quoted = *cp;
354 if (quoted == '\'' || quoted == '"') {
355 if (strip)
356 *cp = '\0';
357 } else
358 quoted = ' ';
360 while (cp > linebuf) {
361 --cp;
362 if (quoted != ' ') {
363 if (*cp != quoted)
364 continue;
365 } else if (!spacechar(*cp))
366 continue;
367 if (cp == linebuf || cp[-1] != '\\') {
368 /* When in whitespace mode, WS prefix doesn't belong */
369 if (quoted == ' ')
370 ++cp;
371 break;
373 /* Expand the escaped quote character */
374 for (p = --cp; (p[0] = p[1]) != '\0'; ++p)
377 if (strip && quoted != ' ' && *cp == quoted)
378 for (p = cp; (p[0] = p[1]) != '\0'; ++p)
381 /* The "last string" has been skipped over, but still, try to step backwards
382 * until we are at BOS or see whitespace, so as to make possible things like
383 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
384 while (cp > linebuf) {
385 --cp;
386 if (spacechar(*cp)) {
387 p = cp;
388 *cp++ = '\0';
389 /* We can furtherly release our callees if we now decide whether the
390 * remaining non-"last string" line content contains non-WS */
391 while (--p >= linebuf)
392 if (!spacechar(*p))
393 goto jleave;
394 linebuf = cp;
395 break;
399 jleave:
400 if (cp != NULL && *cp == '\0')
401 goto jnull;
402 *needs_list = (cp != linebuf && *linebuf != '\0');
403 j_leave:
404 NYD_LEAVE;
405 return cp;
406 jnull:
407 *needs_list = FAL0;
408 cp = NULL;
409 goto j_leave;
412 FL void
413 makelow(char *cp) /* TODO isn't that crap? --> */
415 NYD_ENTER;
416 #ifdef HAVE_C90AMEND1
417 if (n_mb_cur_max > 1) {
418 char *tp = cp;
419 wchar_t wc;
420 int len;
422 while (*cp != '\0') {
423 len = mbtowc(&wc, cp, n_mb_cur_max);
424 if (len < 0)
425 *tp++ = *cp++;
426 else {
427 wc = towlower(wc);
428 if (wctomb(tp, wc) == len)
429 tp += len, cp += len;
430 else
431 *tp++ = *cp++; /* <-- at least here */
434 } else
435 #endif
438 *cp = tolower((uc_i)*cp);
439 while (*cp++ != '\0');
441 NYD_LEAVE;
444 FL bool_t
445 substr(char const *str, char const *sub)
447 char const *cp, *backup;
448 NYD_ENTER;
450 cp = sub;
451 backup = str;
452 while (*str != '\0' && *cp != '\0') {
453 #ifdef HAVE_C90AMEND1
454 if (n_mb_cur_max > 1) {
455 wchar_t c, c2;
456 int sz;
458 if ((sz = mbtowc(&c, cp, n_mb_cur_max)) == -1)
459 goto Jsinglebyte;
460 cp += sz;
461 if ((sz = mbtowc(&c2, str, n_mb_cur_max)) == -1)
462 goto Jsinglebyte;
463 str += sz;
464 c = towupper(c);
465 c2 = towupper(c2);
466 if (c != c2) {
467 if ((sz = mbtowc(&c, backup, n_mb_cur_max)) > 0) {
468 backup += sz;
469 str = backup;
470 } else
471 str = ++backup;
472 cp = sub;
474 } else
475 Jsinglebyte:
476 #endif
478 int c, c2;
480 c = *cp++ & 0377;
481 if (islower(c))
482 c = toupper(c);
483 c2 = *str++ & 0377;
484 if (islower(c2))
485 c2 = toupper(c2);
486 if (c != c2) {
487 str = ++backup;
488 cp = sub;
492 NYD_LEAVE;
493 return (*cp == '\0');
496 FL char *
497 sstpcpy(char *dst, char const *src)
499 NYD2_ENTER;
500 while ((*dst = *src++) != '\0')
501 ++dst;
502 NYD2_LEAVE;
503 return dst;
506 FL char *
507 (sstrdup)(char const *cp n_MEMORY_DEBUG_ARGS)
509 char *dp;
510 NYD2_ENTER;
512 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) n_MEMORY_DEBUG_ARGSCALL);
513 NYD2_LEAVE;
514 return dp;
517 FL char *
518 (sbufdup)(char const *cp, size_t len n_MEMORY_DEBUG_ARGS)
520 char *dp = NULL;
521 NYD2_ENTER;
523 dp = (n_alloc)(len +1 n_MEMORY_DEBUG_ARGSCALL);
524 if (cp != NULL)
525 memcpy(dp, cp, len);
526 dp[len] = '\0';
527 NYD2_LEAVE;
528 return dp;
531 FL ssize_t
532 n_strscpy(char *dst, char const *src, size_t dstsize){
533 ssize_t rv;
534 NYD2_ENTER;
536 if(n_LIKELY(dstsize > 0)){
537 rv = 0;
539 if((dst[rv] = src[rv]) == '\0')
540 goto jleave;
541 ++rv;
542 }while(--dstsize > 0);
543 dst[--rv] = '\0';
545 #ifdef HAVE_DEVEL
546 else
547 assert(dstsize > 0);
548 #endif
549 rv = -1;
550 jleave:
551 NYD2_LEAVE;
552 return rv;
555 FL int
556 asccasecmp(char const *s1, char const *s2)
558 int cmp;
559 NYD2_ENTER;
561 for (;;) {
562 char c1 = *s1++, c2 = *s2++;
563 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
564 break;
566 NYD2_LEAVE;
567 return cmp;
570 FL int
571 ascncasecmp(char const *s1, char const *s2, size_t sz)
573 int cmp = 0;
574 NYD2_ENTER;
576 while (sz-- > 0) {
577 char c1 = *s1++, c2 = *s2++;
578 cmp = (ui8_t)lowerconv(c1);
579 cmp -= (ui8_t)lowerconv(c2);
580 if (cmp != 0 || c1 == '\0')
581 break;
583 NYD2_LEAVE;
584 return cmp;
587 FL char const *
588 asccasestr(char const *s1, char const *s2)
590 char c2, c1;
591 NYD2_ENTER;
593 for (c2 = *s2++, c2 = lowerconv(c2);;) {
594 if ((c1 = *s1++) == '\0') {
595 s1 = NULL;
596 break;
598 if (lowerconv(c1) == c2 && is_asccaseprefix(s2, s1)) {
599 --s1;
600 break;
603 NYD2_LEAVE;
604 return s1;
607 FL bool_t
608 is_asccaseprefix(char const *as1, char const *as2) /* TODO arg order */
610 char c1, c2;
611 NYD2_ENTER;
613 for(;; ++as1, ++as2){
614 c1 = *as1;
615 c1 = lowerconv(c1);
616 c2 = *as2;
617 c2 = lowerconv(c2);
619 if(c1 != c2 || c1 == '\0')
620 break;
621 if(c2 == '\0')
622 break;
624 NYD2_LEAVE;
625 return (c1 == '\0');
628 FL bool_t
629 is_ascncaseprefix(char const *as1, char const *as2, size_t sz)
631 char c1, c2;
632 bool_t rv;
633 NYD2_ENTER;
635 for(rv = TRU1; sz-- > 0; ++as1, ++as2){
636 c1 = *as1;
637 c1 = lowerconv(c1);
638 c2 = *as2;
639 c2 = lowerconv(c2);
641 if(!(rv = (c1 == c2)) || c1 == '\0')
642 break;
643 if(c2 == '\0')
644 break;
646 NYD2_LEAVE;
647 return rv;
651 FL struct str *
652 (n_str_assign_buf)(struct str *self, char const *buf, uiz_t buflen
653 n_MEMORY_DEBUG_ARGS){
654 NYD_ENTER;
655 if(buflen == UIZ_MAX)
656 buflen = (buf == NULL) ? 0 : strlen(buf);
658 assert(buflen == 0 || buf != NULL);
660 if(n_LIKELY(buflen > 0)){
661 self->s = (n_realloc)(self->s, (self->l = buflen) +1
662 n_MEMORY_DEBUG_ARGSCALL);
663 memcpy(self->s, buf, buflen);
664 self->s[buflen] = '\0';
665 }else
666 self->l = 0;
667 NYD_LEAVE;
668 return self;
671 FL struct str *
672 (n_str_add_buf)(struct str *self, char const *buf, uiz_t buflen
673 n_MEMORY_DEBUG_ARGS){
674 NYD_ENTER;
675 if(buflen == UIZ_MAX)
676 buflen = (buf == NULL) ? 0 : strlen(buf);
678 assert(buflen == 0 || buf != NULL);
680 if(buflen > 0) {
681 size_t osl = self->l, nsl = osl + buflen;
683 self->s = (n_realloc)(self->s, (self->l = nsl) +1
684 n_MEMORY_DEBUG_ARGSCALL);
685 memcpy(self->s + osl, buf, buflen);
686 self->s[nsl] = '\0';
688 NYD_LEAVE;
689 return self;
692 FL struct str *
693 n_str_trim(struct str *self, enum n_str_trim_flags stf){
694 size_t l;
695 char const *cp;
696 NYD2_ENTER;
698 cp = self->s;
700 if((l = self->l) > 0 && (stf & n_STR_TRIM_FRONT)){
701 while(spacechar(*cp)){
702 ++cp;
703 if(--l == 0)
704 break;
706 self->s = n_UNCONST(cp);
709 if(l > 0 && (stf & n_STR_TRIM_END)){
710 for(cp += l -1; spacechar(*cp); --cp)
711 if(--l == 0)
712 break;
714 self->l = l;
716 NYD2_LEAVE;
717 return self;
720 FL struct str *
721 n_str_trim_ifs(struct str *self, bool_t dodefaults){
722 char s, t, n, c;
723 char const *ifs, *cp;
724 size_t l, i;
725 NYD2_ENTER;
727 if((l = self->l) == 0)
728 goto jleave;
730 ifs = ok_vlook(ifs_ws);
731 cp = self->s;
732 s = t = n = '\0';
734 /* Check whether we can go fast(er) path */
735 for(i = 0; (c = ifs[i]) != '\0'; ++i){
736 switch(c){
737 case ' ': s = c; break;
738 case '\t': t = c; break;
739 case '\n': n = c; break;
740 default:
741 /* Need to go the slow path */
742 while(strchr(ifs, *cp) != NULL){
743 ++cp;
744 if(--l == 0)
745 break;
747 self->s = n_UNCONST(cp);
749 if(l > 0){
750 for(cp += l -1; strchr(ifs, *cp) != NULL;){
751 if(--l == 0)
752 break;
753 /* An uneven number of reverse solidus escapes last WS! */
754 else if(*--cp == '\\'){
755 siz_t j;
757 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
759 if(j & 1){
760 ++l;
761 break;
766 self->l = l;
768 if(!dodefaults)
769 goto jleave;
770 cp = self->s;
771 ++i;
772 break;
776 /* No ifs-ws? No more data? No trimming */
777 if(l == 0 || (i == 0 && !dodefaults))
778 goto jleave;
780 if(dodefaults){
781 s = ' ';
782 t = '\t';
783 n = '\n';
786 if(l > 0){
787 while((c = *cp) != '\0' && (c == s || c == t || c == n)){
788 ++cp;
789 if(--l == 0)
790 break;
792 self->s = n_UNCONST(cp);
795 if(l > 0){
796 for(cp += l -1; (c = *cp) != '\0' && (c == s || c == t || c == n);){
797 if(--l == 0)
798 break;
799 /* An uneven number of reverse solidus escapes last WS! */
800 else if(*--cp == '\\'){
801 siz_t j;
803 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
805 if(j & 1){
806 ++l;
807 break;
812 self->l = l;
813 jleave:
814 NYD2_LEAVE;
815 return self;
819 * struct n_string TODO extend, optimize
822 FL struct n_string *
823 (n_string_clear)(struct n_string *self n_MEMORY_DEBUG_ARGS){
824 NYD_ENTER;
826 assert(self != NULL);
828 if(self->s_size != 0){
829 if(!self->s_auto){
830 (n_free)(self->s_dat n_MEMORY_DEBUG_ARGSCALL);
832 self->s_len = self->s_auto = self->s_size = 0;
833 self->s_dat = NULL;
835 NYD_LEAVE;
836 return self;
839 FL struct n_string *
840 (n_string_reserve)(struct n_string *self, size_t noof n_MEMORY_DEBUG_ARGS){
841 ui32_t i, l, s;
842 NYD_ENTER;
844 assert(self != NULL);
846 s = self->s_size;
847 l = self->s_len;
848 #if 0 /* FIXME memory alloc too large */
849 if(SI32_MAX - n_ALIGN(1) - l <= noof)
850 n_panic(_("Memory allocation too large"));
851 #endif
853 if((i = s - l) <= ++noof){
854 i += l + (ui32_t)noof;
855 i = n_ALIGN(i);
856 self->s_size = i -1;
858 if(!self->s_auto)
859 self->s_dat = (n_realloc)(self->s_dat, i n_MEMORY_DEBUG_ARGSCALL);
860 else{
861 char *ndat = (n_autorec_alloc_from_pool)(NULL, i
862 n_MEMORY_DEBUG_ARGSCALL);
864 if(l > 0)
865 memcpy(ndat, self->s_dat, l);
866 self->s_dat = ndat;
869 NYD_LEAVE;
870 return self;
873 FL struct n_string *
874 (n_string_resize)(struct n_string *self, size_t nlen n_MEMORY_DEBUG_ARGS){
875 NYD_ENTER;
877 assert(self != NULL);
878 #if 0 /* FIXME memory alloc too large */
879 if(SI32_MAX - n_ALIGN(1) - l <= noof)
880 n_panic(_("Memory allocation too large"));
881 #endif
883 if(self->s_len < nlen)
884 self = (n_string_reserve)(self, nlen n_MEMORY_DEBUG_ARGSCALL);
885 self->s_len = (ui32_t)nlen;
886 NYD_LEAVE;
887 return self;
890 FL struct n_string *
891 (n_string_push_buf)(struct n_string *self, char const *buf, size_t buflen
892 n_MEMORY_DEBUG_ARGS){
893 NYD_ENTER;
895 assert(self != NULL);
896 assert(buflen == 0 || buf != NULL);
898 if(buflen == UIZ_MAX)
899 buflen = (buf == NULL) ? 0 : strlen(buf);
901 if(buflen > 0){
902 ui32_t i;
904 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
905 memcpy(&self->s_dat[i = self->s_len], buf, buflen);
906 self->s_len = (i += (ui32_t)buflen);
908 NYD_LEAVE;
909 return self;
912 FL struct n_string *
913 (n_string_push_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
914 NYD_ENTER;
916 assert(self != NULL);
918 if(self->s_len + 1 >= self->s_size)
919 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
920 self->s_dat[self->s_len++] = c;
921 NYD_LEAVE;
922 return self;
925 FL struct n_string *
926 (n_string_unshift_buf)(struct n_string *self, char const *buf, size_t buflen
927 n_MEMORY_DEBUG_ARGS){
928 NYD_ENTER;
930 assert(self != NULL);
931 assert(buflen == 0 || buf != NULL);
933 if(buflen == UIZ_MAX)
934 buflen = (buf == NULL) ? 0 : strlen(buf);
936 if(buflen > 0){
937 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
938 if(self->s_len > 0)
939 memmove(&self->s_dat[buflen], self->s_dat, self->s_len);
940 memcpy(self->s_dat, buf, buflen);
941 self->s_len += (ui32_t)buflen;
943 NYD_LEAVE;
944 return self;
947 FL struct n_string *
948 (n_string_unshift_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
949 NYD_ENTER;
951 assert(self != NULL);
953 if(self->s_len + 1 >= self->s_size)
954 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
955 if(self->s_len > 0)
956 memmove(&self->s_dat[1], self->s_dat, self->s_len);
957 self->s_dat[0] = c;
958 ++self->s_len;
959 NYD_LEAVE;
960 return self;
963 FL struct n_string *
964 (n_string_insert_buf)(struct n_string *self, size_t idx,
965 char const *buf, size_t buflen n_MEMORY_DEBUG_ARGS){
966 NYD_ENTER;
968 assert(self != NULL);
969 assert(buflen == 0 || buf != NULL);
970 assert(idx <= self->s_len);
972 if(buflen == UIZ_MAX)
973 buflen = (buf == NULL) ? 0 : strlen(buf);
975 if(buflen > 0){
976 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
977 if(self->s_len > 0)
978 memmove(&self->s_dat[idx + buflen], &self->s_dat[idx],
979 self->s_len - idx);
980 memcpy(&self->s_dat[idx], buf, buflen);
981 self->s_len += (ui32_t)buflen;
983 NYD_LEAVE;
984 return self;
987 FL struct n_string *
988 (n_string_insert_c)(struct n_string *self, size_t idx,
989 char c n_MEMORY_DEBUG_ARGS){
990 NYD_ENTER;
992 assert(self != NULL);
993 assert(idx <= self->s_len);
995 if(self->s_len + 1 >= self->s_size)
996 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
997 if(self->s_len > 0)
998 memmove(&self->s_dat[idx + 1], &self->s_dat[idx], self->s_len - idx);
999 self->s_dat[idx] = c;
1000 ++self->s_len;
1001 NYD_LEAVE;
1002 return self;
1005 FL struct n_string *
1006 n_string_cut(struct n_string *self, size_t idx, size_t len){
1007 NYD_ENTER;
1009 assert(self != NULL);
1010 assert(UIZ_MAX - idx > len);
1011 assert(SI32_MAX >= idx + len);
1012 assert(idx + len <= self->s_len);
1014 if(len > 0)
1015 memmove(&self->s_dat[idx], &self->s_dat[idx + len],
1016 (self->s_len -= len) - idx);
1017 NYD_LEAVE;
1018 return self;
1021 FL char *
1022 (n_string_cp)(struct n_string *self n_MEMORY_DEBUG_ARGS){
1023 char *rv;
1024 NYD2_ENTER;
1026 assert(self != NULL);
1028 if(self->s_size == 0)
1029 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
1031 (rv = self->s_dat)[self->s_len] = '\0';
1032 NYD2_LEAVE;
1033 return rv;
1036 FL char const *
1037 n_string_cp_const(struct n_string const *self){
1038 char const *rv;
1039 NYD2_ENTER;
1041 assert(self != NULL);
1043 if(self->s_size != 0){
1044 ((struct n_string*)n_UNCONST(self))->s_dat[self->s_len] = '\0';
1045 rv = self->s_dat;
1046 }else
1047 rv = n_empty;
1048 NYD2_LEAVE;
1049 return rv;
1053 * UTF-8
1056 FL ui32_t
1057 n_utf8_to_utf32(char const **bdat, size_t *blen){
1058 ui32_t c, x, x1;
1059 char const *cp, *cpx;
1060 size_t l, lx;
1061 NYD2_ENTER;
1063 lx = l = *blen - 1;
1064 x = *(cp = *bdat);
1065 cpx = ++cp;
1067 if(n_LIKELY(x <= 0x7Fu))
1068 c = x;
1069 /* 0xF8, but Unicode guarantees maximum of 0x10FFFFu -> F4 8F BF BF.
1070 * Unicode 9.0, 3.9, UTF-8, Table 3-7. Well-Formed UTF-8 Byte Sequences */
1071 else if(n_LIKELY(x > 0xC0u && x <= 0xF4u)){
1072 if(n_LIKELY(x < 0xE0u)){
1073 if(n_UNLIKELY(l < 1))
1074 goto jenobuf;
1075 --l;
1077 c = (x &= 0x1Fu);
1078 }else if(n_LIKELY(x < 0xF0u)){
1079 if(n_UNLIKELY(l < 2))
1080 goto jenobuf;
1081 l -= 2;
1083 x1 = x;
1084 c = (x &= 0x0Fu);
1086 /* Second byte constraints */
1087 x = (ui8_t)*cp++;
1088 switch(x1){
1089 case 0xE0u:
1090 if(n_UNLIKELY(x < 0xA0u || x > 0xBFu))
1091 goto jerr;
1092 break;
1093 case 0xEDu:
1094 if(n_UNLIKELY(x < 0x80u || x > 0x9Fu))
1095 goto jerr;
1096 break;
1097 default:
1098 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1099 goto jerr;
1100 break;
1102 c <<= 6;
1103 c |= (x &= 0x3Fu);
1104 }else{
1105 if(n_UNLIKELY(l < 3))
1106 goto jenobuf;
1107 l -= 3;
1109 x1 = x;
1110 c = (x &= 0x07u);
1112 /* Second byte constraints */
1113 x = (ui8_t)*cp++;
1114 switch(x1){
1115 case 0xF0u:
1116 if(n_UNLIKELY(x < 0x90u || x > 0xBFu))
1117 goto jerr;
1118 break;
1119 case 0xF4u:
1120 if(n_UNLIKELY((x & 0xF0u) != 0x80u)) /* 80..8F */
1121 goto jerr;
1122 break;
1123 default:
1124 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1125 goto jerr;
1126 break;
1128 c <<= 6;
1129 c |= (x &= 0x3Fu);
1131 x = (ui8_t)*cp++;
1132 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1133 goto jerr;
1134 c <<= 6;
1135 c |= (x &= 0x3Fu);
1138 x = (ui8_t)*cp++;
1139 if(n_UNLIKELY((x & 0xC0u) != 0x80u))
1140 goto jerr;
1141 c <<= 6;
1142 c |= x & 0x3Fu;
1143 }else
1144 goto jerr;
1146 cpx = cp;
1147 lx = l;
1148 jleave:
1149 *bdat = cpx;
1150 *blen = lx;
1151 NYD2_LEAVE;
1152 return c;
1153 jenobuf:
1154 jerr:
1155 c = UI32_MAX;
1156 goto jleave;
1159 FL size_t
1160 n_utf32_to_utf8(ui32_t c, char *buf)
1162 struct {
1163 ui32_t lower_bound;
1164 ui32_t upper_bound;
1165 ui8_t enc_leader;
1166 ui8_t enc_lval;
1167 ui8_t dec_leader_mask;
1168 ui8_t dec_leader_val_mask;
1169 ui8_t dec_bytes_togo;
1170 ui8_t cat_index;
1171 ui8_t __dummy[2];
1172 } const _cat[] = {
1173 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
1174 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
1175 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
1176 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
1177 /* xxx _from_utf32() simply assumes magic code points for surrogates!
1178 * xxx (However, should we ever get yet another surrogate range we
1179 * xxx need to deal with that all over the place anyway? */
1180 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
1181 {0x00010000, 0x0010FFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
1182 }, *catp = _cat;
1183 size_t l;
1185 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
1186 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
1187 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
1188 if (c <= _cat[3].upper_bound) {
1189 /* Surrogates may not be converted (Compatibility rule C10) */
1190 if (c >= 0xD800u && c <= 0xDFFFu)
1191 goto jerr;
1192 catp += 3;
1193 goto j3;
1195 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
1196 jerr:
1197 c = 0xFFFDu; /* Unicode replacement character */
1198 catp += 3;
1199 goto j3;
1201 buf[3] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1203 buf[2] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1205 buf[1] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1207 buf[0] = (char)catp->enc_leader | (char)(c);
1209 buf[catp->enc_lval] = '\0';
1210 l = catp->enc_lval;
1211 NYD2_LEAVE;
1212 return l;
1216 * Our iconv(3) wrapper
1219 #ifdef HAVE_ICONV
1220 FL iconv_t
1221 n_iconv_open(char const *tocode, char const *fromcode){
1222 iconv_t id;
1223 NYD_ENTER;
1225 if((!asccasecmp(fromcode, "unknown-8bit") ||
1226 !asccasecmp(fromcode, "binary")) &&
1227 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
1228 fromcode = ok_vlook(CHARSET_8BIT_OKEY);
1230 id = iconv_open(tocode, fromcode);
1232 /* If the encoding names are equal at this point, they are just not
1233 * understood by iconv(), and we cannot sensibly use it in any way. We do
1234 * not perform this as an optimization above since iconv() can otherwise be
1235 * used to check the validity of the input even with identical encoding
1236 * names */
1237 if (id == (iconv_t)-1 && !asccasecmp(tocode, fromcode))
1238 n_err_no = n_ERR_NONE;
1239 NYD_LEAVE;
1240 return id;
1243 FL void
1244 n_iconv_close(iconv_t cd){
1245 NYD_ENTER;
1246 iconv_close(cd);
1247 if(cd == iconvd)
1248 iconvd = (iconv_t)-1;
1249 NYD_LEAVE;
1252 FL void
1253 n_iconv_reset(iconv_t cd){
1254 NYD_ENTER;
1255 iconv(cd, NULL, NULL, NULL, NULL);
1256 NYD_LEAVE;
1259 /* (2012-09-24: export and use it exclusively to isolate prototype problems
1260 * (*inb* is 'char const **' except in POSIX) in a single place.
1261 * GNU libiconv even allows for configuration time const/non-const..
1262 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
1263 * support compiler invocations which bail on error, so no -Werror */
1264 /* Citrus project? */
1265 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1266 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1267 # if n_OS_DRAGONFLY
1268 # define __INBCAST(S) (char ** __restrict__)n_UNCONST(S)
1269 # else
1270 # define __INBCAST(S) (char const **)n_UNCONST(S)
1271 # endif
1272 # elif n_OS_SUNOS || n_OS_SOLARIS
1273 # define __INBCAST(S) (char const ** __restrict__)n_UNCONST(S)
1274 # endif
1275 # ifndef __INBCAST
1276 # define __INBCAST(S) (char **)n_UNCONST(S)
1277 # endif
1279 FL int
1280 n_iconv_buf(iconv_t cd, enum n_iconv_flags icf,
1281 char const **inb, size_t *inbleft, char **outb, size_t *outbleft){
1282 int err;
1283 NYD2_ENTER;
1285 if((icf & n_ICONV_UNIREPL) && !(n_psonce & n_PSO_UNICODE))
1286 icf &= ~n_ICONV_UNIREPL;
1288 for(;;){
1289 size_t sz;
1291 sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft);
1292 if(sz > 0 && !(icf & n_ICONV_IGN_NOREVERSE)){
1293 err = n_ERR_NOENT;
1294 goto jleave;
1296 if(sz != (size_t)-1)
1297 break;
1299 err = n_err_no;
1300 if(!(icf & n_ICONV_IGN_ILSEQ) || err != n_ERR_ILSEQ)
1301 goto jleave;
1302 if(*inbleft > 0){
1303 ++(*inb);
1304 --(*inbleft);
1305 if(icf & n_ICONV_UNIREPL){
1306 if(*outbleft >= sizeof(n_unirepl) -1){
1307 memcpy(*outb, n_unirepl, sizeof(n_unirepl) -1);
1308 *outb += sizeof(n_unirepl) -1;
1309 *outbleft -= sizeof(n_unirepl) -1;
1310 continue;
1312 }else if(*outbleft > 0){
1313 *(*outb)++ = '?';
1314 --*outbleft;
1315 continue;
1317 err = n_ERR_2BIG;
1318 goto jleave;
1319 }else if(*outbleft > 0){
1320 **outb = '\0';
1321 goto jleave;
1324 err = 0;
1325 jleave:
1326 n_iconv_err_no = err;
1327 NYD2_LEAVE;
1328 return err;
1330 # undef __INBCAST
1332 FL int
1333 n_iconv_str(iconv_t cd, enum n_iconv_flags icf,
1334 struct str *out, struct str const *in, struct str *in_rest_or_null)
1336 int err;
1337 char *obb, *ob;
1338 char const *ib;
1339 size_t olb, ol, il;
1340 NYD2_ENTER;
1342 obb = out->s;
1343 olb = out->l;
1344 ol = in->l;
1346 ol = (ol << 1) - (ol >> 4);
1347 if (olb <= ol) {
1348 olb = ol;
1349 goto jrealloc;
1352 for (;;) {
1353 ib = in->s;
1354 il = in->l;
1355 ob = obb;
1356 ol = olb;
1357 if((err = n_iconv_buf(cd, icf, &ib, &il, &ob, &ol)) == 0 ||
1358 err != n_ERR_2BIG)
1359 break;
1360 olb += in->l;
1361 jrealloc:
1362 obb = n_realloc(obb, olb +1);
1365 if (in_rest_or_null != NULL) {
1366 in_rest_or_null->s = n_UNCONST(ib);
1367 in_rest_or_null->l = il;
1369 out->s = obb;
1370 out->s[out->l = olb - ol] = '\0';
1371 NYD2_LEAVE;
1372 return err;
1375 FL char *
1376 n_iconv_onetime_cp(enum n_iconv_flags icf,
1377 char const *tocode, char const *fromcode, char const *input){
1378 struct str out, in;
1379 iconv_t icd;
1380 char *rv;
1381 NYD2_ENTER;
1383 rv = NULL;
1384 if(tocode == NULL)
1385 tocode = ok_vlook(ttycharset);
1386 if(fromcode == NULL)
1387 fromcode = "utf-8";
1389 if((icd = iconv_open(tocode, fromcode)) == (iconv_t)-1)
1390 goto jleave;
1392 in.l = strlen(in.s = n_UNCONST(input)); /* logical */
1393 out.s = NULL, out.l = 0;
1394 if(!n_iconv_str(icd, icf, &out, &in, NULL))
1395 rv = savestrbuf(out.s, out.l);
1396 if(out.s != NULL)
1397 free(out.s);
1399 iconv_close(icd);
1400 jleave:
1401 NYD2_LEAVE;
1402 return rv;
1404 #endif /* HAVE_ICONV */
1406 /* s-it-mode */