THANKS: Solar Designer
[s-mailx.git] / strings.c
blob453329c4257d3c183e249080729a5a37208a653e
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ String support routines.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 */
7 /*
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 #undef n_FILE
36 #define n_FILE strings
38 #ifndef HAVE_AMALGAMATION
39 # include "nail.h"
40 #endif
42 #include <ctype.h>
44 FL char *
45 (savestr)(char const *str n_MEMORY_DEBUG_ARGS)
47 size_t size;
48 char *news;
49 NYD_ENTER;
51 size = strlen(str);
52 news = (n_autorec_alloc_from_pool)(NULL, size +1 n_MEMORY_DEBUG_ARGSCALL);
53 if(size > 0)
54 memcpy(news, str, size);
55 news[size] = '\0';
56 NYD_LEAVE;
57 return news;
60 FL char *
61 (savestrbuf)(char const *sbuf, size_t sbuf_len n_MEMORY_DEBUG_ARGS)
63 char *news;
64 NYD_ENTER;
66 news = (n_autorec_alloc_from_pool)(NULL, sbuf_len +1
67 n_MEMORY_DEBUG_ARGSCALL);
68 if(sbuf_len > 0)
69 memcpy(news, sbuf, sbuf_len);
70 news[sbuf_len] = 0;
71 NYD_LEAVE;
72 return news;
75 FL char *
76 (savecatsep)(char const *s1, char sep, char const *s2 n_MEMORY_DEBUG_ARGS)
78 size_t l1, l2;
79 char *news;
80 NYD_ENTER;
82 l1 = (s1 != NULL) ? strlen(s1) : 0;
83 l2 = strlen(s2);
84 news = (n_autorec_alloc_from_pool)(NULL, l1 + (sep != '\0') + l2 +1
85 n_MEMORY_DEBUG_ARGSCALL);
86 if (l1 > 0) {
87 memcpy(news + 0, s1, l1);
88 if (sep != '\0')
89 news[l1++] = sep;
91 if(l2 > 0)
92 memcpy(news + l1, s2, l2);
93 news[l1 + l2] = '\0';
94 NYD_LEAVE;
95 return news;
99 * Support routines, auto-reclaimed storage
102 FL char *
103 (i_strdup)(char const *src n_MEMORY_DEBUG_ARGS)
105 size_t sz;
106 char *dest;
107 NYD_ENTER;
109 sz = strlen(src);
110 dest = (n_autorec_alloc_from_pool)(NULL, sz +1 n_MEMORY_DEBUG_ARGSCALL);
111 if(sz > 0)
112 i_strcpy(dest, src, sz);
113 dest[sz] = '\0';
114 NYD_LEAVE;
115 return dest;
118 FL struct str *
119 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
121 va_list vl;
122 size_t l;
123 char const *cs;
124 NYD_ENTER;
126 va_start(vl, self);
127 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
128 l += strlen(cs);
129 va_end(vl);
131 self->l = l;
132 self->s = salloc(l +1);
134 va_start(vl, self);
135 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
136 size_t i;
138 i = strlen(cs);
139 if(i > 0){
140 memcpy(self->s + l, cs, i);
141 l += i;
144 self->s[l] = '\0';
145 va_end(vl);
146 NYD_LEAVE;
147 return self;
150 FL struct str *
151 (str_concat_cpa)(struct str *self, char const * const *cpa,
152 char const *sep_o_null n_MEMORY_DEBUG_ARGS)
154 size_t sonl, l;
155 char const * const *xcpa;
156 NYD_ENTER;
158 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
160 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
161 l += strlen(*xcpa) + sonl;
163 self->l = l;
164 self->s = (n_autorec_alloc_from_pool)(NULL, l +1 n_MEMORY_DEBUG_ARGSCALL);
166 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
167 size_t i;
169 i = strlen(*xcpa);
170 if(i > 0){
171 memcpy(self->s + l, *xcpa, i);
172 l += i;
174 if (sonl > 0) {
175 memcpy(self->s + l, sep_o_null, sonl);
176 l += sonl;
179 self->s[l] = '\0';
180 NYD_LEAVE;
181 return self;
185 * Routines that are not related to auto-reclaimed storage follow.
188 FL bool_t
189 n_anyof_buf(char const *template, char const *dat, size_t len){
190 char c;
191 NYD2_ENTER;
193 if(len == UIZ_MAX){
194 while((c = *template++) != '\0')
195 if(strchr(dat, c) != NULL)
196 break;
197 }else if(len > 0){
198 while((c = *template++) != '\0')
199 if(memchr(dat, c, len) != NULL)
200 break;
201 }else
202 c = '\0';
203 NYD2_LEAVE;
204 return (c != '\0');
207 FL char *
208 n_strsep(char **iolist, char sep, bool_t ignore_empty){
209 char *base, *cp;
210 NYD2_ENTER;
212 for(base = *iolist; base != NULL; base = *iolist){
213 while(*base != '\0' && blankspacechar(*base))
214 ++base;
216 cp = strchr(base, sep);
217 if(cp != NULL)
218 *iolist = &cp[1];
219 else{
220 *iolist = NULL;
221 cp = &base[strlen(base)];
223 while(cp > base && blankspacechar(cp[-1]))
224 --cp;
225 *cp = '\0';
226 if(*base != '\0' || !ignore_empty)
227 break;
229 NYD2_LEAVE;
230 return base;
233 FL char *
234 n_strsep_esc(char **iolist, char sep, bool_t ignore_empty){
235 char *cp, c, *base;
236 bool_t isesc, anyesc;
237 NYD2_ENTER;
239 for(base = *iolist; base != NULL; base = *iolist){
240 while((c = *base) != '\0' && blankspacechar(c))
241 ++base;
243 for(isesc = anyesc = FAL0, cp = base;; ++cp){
244 if(n_UNLIKELY((c = *cp) == '\0')){
245 *iolist = NULL;
246 break;
247 }else if(!isesc){
248 if(c == sep){
249 *iolist = &cp[1];
250 break;
252 isesc = (c == '\\');
253 }else{
254 isesc = FAL0;
255 anyesc |= (c == sep);
259 while(cp > base && blankspacechar(cp[-1]))
260 --cp;
261 *cp = '\0';
263 if(*base != '\0'){
264 if(anyesc){
265 char *ins;
267 for(ins = cp = base;; ++ins)
268 if((c = *cp) == '\\' && cp[1] == sep){
269 *ins = sep;
270 cp += 2;
271 }else if((*ins = (++cp, c)) == '\0')
272 break;
276 if(*base != '\0' || !ignore_empty)
277 break;
279 NYD2_LEAVE;
280 return base;
283 FL void
284 i_strcpy(char *dest, char const *src, size_t size)
286 NYD2_ENTER;
287 if (size > 0) {
288 for (;; ++dest, ++src)
289 if ((*dest = lowerconv(*src)) == '\0') {
290 break;
291 } else if (--size == 0) {
292 *dest = '\0';
293 break;
296 NYD2_LEAVE;
299 FL bool_t
300 is_prefix(char const *as1, char const *as2) /* TODO arg order */
302 char c;
303 NYD2_ENTER;
305 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
306 if (*as2 == '\0')
307 break;
308 NYD2_LEAVE;
309 return (c == '\0');
312 FL char *
313 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
315 char const *cp;
316 size_t i;
317 char c, *rv;
318 NYD2_ENTER;
320 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
321 if (c == '"' || c == '\\')
322 ++i;
323 rv = salloc(i +1);
325 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
326 if (c == '"' || c == '\\')
327 rv[i++] = '\\';
328 rv[i] = '\0';
329 NYD2_LEAVE;
330 return rv;
333 FL char *
334 laststring(char *linebuf, bool_t *needs_list, bool_t strip)
336 char *cp, *p, quoted;
337 NYD_ENTER;
339 /* Anything to do at all? */
340 if (*(cp = linebuf) == '\0')
341 goto jnull;
342 cp += strlen(linebuf) -1;
344 /* Strip away trailing blanks */
345 while (spacechar(*cp) && cp > linebuf)
346 --cp;
347 cp[1] = '\0';
348 if (cp == linebuf)
349 goto jleave;
351 /* Now search for the BOS of the "last string" */
352 quoted = *cp;
353 if (quoted == '\'' || quoted == '"') {
354 if (strip)
355 *cp = '\0';
356 } else
357 quoted = ' ';
359 while (cp > linebuf) {
360 --cp;
361 if (quoted != ' ') {
362 if (*cp != quoted)
363 continue;
364 } else if (!spacechar(*cp))
365 continue;
366 if (cp == linebuf || cp[-1] != '\\') {
367 /* When in whitespace mode, WS prefix doesn't belong */
368 if (quoted == ' ')
369 ++cp;
370 break;
372 /* Expand the escaped quote character */
373 for (p = --cp; (p[0] = p[1]) != '\0'; ++p)
376 if (strip && quoted != ' ' && *cp == quoted)
377 for (p = cp; (p[0] = p[1]) != '\0'; ++p)
380 /* The "last string" has been skipped over, but still, try to step backwards
381 * until we are at BOS or see whitespace, so as to make possible things like
382 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
383 while (cp > linebuf) {
384 --cp;
385 if (spacechar(*cp)) {
386 p = cp;
387 *cp++ = '\0';
388 /* We can furtherly release our callees if we now decide whether the
389 * remaining non-"last string" line content contains non-WS */
390 while (--p >= linebuf)
391 if (!spacechar(*p))
392 goto jleave;
393 linebuf = cp;
394 break;
398 jleave:
399 if (cp != NULL && *cp == '\0')
400 goto jnull;
401 *needs_list = (cp != linebuf && *linebuf != '\0');
402 j_leave:
403 NYD_LEAVE;
404 return cp;
405 jnull:
406 *needs_list = FAL0;
407 cp = NULL;
408 goto j_leave;
411 FL void
412 makelow(char *cp) /* TODO isn't that crap? --> */
414 NYD_ENTER;
415 #ifdef HAVE_C90AMEND1
416 if (n_mb_cur_max > 1) {
417 char *tp = cp;
418 wchar_t wc;
419 int len;
421 while (*cp != '\0') {
422 len = mbtowc(&wc, cp, n_mb_cur_max);
423 if (len < 0)
424 *tp++ = *cp++;
425 else {
426 wc = towlower(wc);
427 if (wctomb(tp, wc) == len)
428 tp += len, cp += len;
429 else
430 *tp++ = *cp++; /* <-- at least here */
433 } else
434 #endif
437 *cp = tolower((uc_i)*cp);
438 while (*cp++ != '\0');
440 NYD_LEAVE;
443 FL bool_t
444 substr(char const *str, char const *sub)
446 char const *cp, *backup;
447 NYD_ENTER;
449 cp = sub;
450 backup = str;
451 while (*str != '\0' && *cp != '\0') {
452 #ifdef HAVE_C90AMEND1
453 if (n_mb_cur_max > 1) {
454 wchar_t c, c2;
455 int sz;
457 if ((sz = mbtowc(&c, cp, n_mb_cur_max)) == -1)
458 goto Jsinglebyte;
459 cp += sz;
460 if ((sz = mbtowc(&c2, str, n_mb_cur_max)) == -1)
461 goto Jsinglebyte;
462 str += sz;
463 c = towupper(c);
464 c2 = towupper(c2);
465 if (c != c2) {
466 if ((sz = mbtowc(&c, backup, n_mb_cur_max)) > 0) {
467 backup += sz;
468 str = backup;
469 } else
470 str = ++backup;
471 cp = sub;
473 } else
474 Jsinglebyte:
475 #endif
477 int c, c2;
479 c = *cp++ & 0377;
480 if (islower(c))
481 c = toupper(c);
482 c2 = *str++ & 0377;
483 if (islower(c2))
484 c2 = toupper(c2);
485 if (c != c2) {
486 str = ++backup;
487 cp = sub;
491 NYD_LEAVE;
492 return (*cp == '\0');
495 FL char *
496 sstpcpy(char *dst, char const *src)
498 NYD2_ENTER;
499 while ((*dst = *src++) != '\0')
500 ++dst;
501 NYD2_LEAVE;
502 return dst;
505 FL char *
506 (sstrdup)(char const *cp n_MEMORY_DEBUG_ARGS)
508 char *dp;
509 NYD2_ENTER;
511 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) n_MEMORY_DEBUG_ARGSCALL);
512 NYD2_LEAVE;
513 return dp;
516 FL char *
517 (sbufdup)(char const *cp, size_t len n_MEMORY_DEBUG_ARGS)
519 char *dp = NULL;
520 NYD2_ENTER;
522 dp = (n_alloc)(len +1 n_MEMORY_DEBUG_ARGSCALL);
523 if (cp != NULL)
524 memcpy(dp, cp, len);
525 dp[len] = '\0';
526 NYD2_LEAVE;
527 return dp;
530 FL ssize_t
531 n_strscpy(char *dst, char const *src, size_t dstsize){
532 ssize_t rv;
533 NYD2_ENTER;
535 if(n_LIKELY(dstsize > 0)){
536 rv = 0;
538 if((dst[rv] = src[rv]) == '\0')
539 goto jleave;
540 ++rv;
541 }while(--dstsize > 0);
542 dst[--rv] = '\0';
544 #ifdef HAVE_DEVEL
545 else
546 assert(dstsize > 0);
547 #endif
548 rv = -1;
549 jleave:
550 NYD2_LEAVE;
551 return rv;
554 FL int
555 asccasecmp(char const *s1, char const *s2)
557 int cmp;
558 NYD2_ENTER;
560 for (;;) {
561 char c1 = *s1++, c2 = *s2++;
562 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
563 break;
565 NYD2_LEAVE;
566 return cmp;
569 FL int
570 ascncasecmp(char const *s1, char const *s2, size_t sz)
572 int cmp = 0;
573 NYD2_ENTER;
575 while (sz-- > 0) {
576 char c1 = *s1++, c2 = *s2++;
577 cmp = (ui8_t)lowerconv(c1);
578 cmp -= (ui8_t)lowerconv(c2);
579 if (cmp != 0 || c1 == '\0')
580 break;
582 NYD2_LEAVE;
583 return cmp;
586 FL char const *
587 asccasestr(char const *s1, char const *s2)
589 char c2, c1;
590 NYD2_ENTER;
592 for (c2 = *s2++, c2 = lowerconv(c2);;) {
593 if ((c1 = *s1++) == '\0') {
594 s1 = NULL;
595 break;
597 if (lowerconv(c1) == c2 && is_asccaseprefix(s2, s1)) {
598 --s1;
599 break;
602 NYD2_LEAVE;
603 return s1;
606 FL bool_t
607 is_asccaseprefix(char const *as1, char const *as2) /* TODO arg order */
609 char c1, c2;
610 NYD2_ENTER;
612 for(;; ++as1, ++as2){
613 c1 = *as1;
614 c1 = lowerconv(c1);
615 c2 = *as2;
616 c2 = lowerconv(c2);
618 if(c1 != c2 || c1 == '\0')
619 break;
620 if(c2 == '\0')
621 break;
623 NYD2_LEAVE;
624 return (c1 == '\0');
627 FL bool_t
628 is_ascncaseprefix(char const *as1, char const *as2, size_t sz)
630 char c1, c2;
631 bool_t rv;
632 NYD2_ENTER;
634 for(rv = TRU1; sz-- > 0; ++as1, ++as2){
635 c1 = *as1;
636 c1 = lowerconv(c1);
637 c2 = *as2;
638 c2 = lowerconv(c2);
640 if(!(rv = (c1 == c2)) || c1 == '\0')
641 break;
642 if(c2 == '\0')
643 break;
645 NYD2_LEAVE;
646 return rv;
650 FL struct str *
651 (n_str_assign_buf)(struct str *self, char const *buf, uiz_t buflen
652 n_MEMORY_DEBUG_ARGS){
653 NYD_ENTER;
654 if(buflen == UIZ_MAX)
655 buflen = (buf == NULL) ? 0 : strlen(buf);
657 assert(buflen == 0 || buf != NULL);
659 if(n_LIKELY(buflen > 0)){
660 self->s = (n_realloc)(self->s, (self->l = buflen) +1
661 n_MEMORY_DEBUG_ARGSCALL);
662 memcpy(self->s, buf, buflen);
663 self->s[buflen] = '\0';
664 }else
665 self->l = 0;
666 NYD_LEAVE;
667 return self;
670 FL struct str *
671 (n_str_add_buf)(struct str *self, char const *buf, uiz_t buflen
672 n_MEMORY_DEBUG_ARGS){
673 NYD_ENTER;
674 if(buflen == UIZ_MAX)
675 buflen = (buf == NULL) ? 0 : strlen(buf);
677 assert(buflen == 0 || buf != NULL);
679 if(buflen > 0) {
680 size_t osl = self->l, nsl = osl + buflen;
682 self->s = (n_realloc)(self->s, (self->l = nsl) +1
683 n_MEMORY_DEBUG_ARGSCALL);
684 memcpy(self->s + osl, buf, buflen);
685 self->s[nsl] = '\0';
687 NYD_LEAVE;
688 return self;
691 FL struct str *
692 n_str_trim(struct str *self, enum n_str_trim_flags stf){
693 size_t l;
694 char const *cp;
695 NYD2_ENTER;
697 cp = self->s;
699 if((l = self->l) > 0 && (stf & n_STR_TRIM_FRONT)){
700 while(spacechar(*cp)){
701 ++cp;
702 if(--l == 0)
703 break;
705 self->s = n_UNCONST(cp);
708 if(l > 0 && (stf & n_STR_TRIM_END)){
709 for(cp += l -1; spacechar(*cp); --cp)
710 if(--l == 0)
711 break;
713 self->l = l;
715 NYD2_LEAVE;
716 return self;
719 FL struct str *
720 n_str_trim_ifs(struct str *self, bool_t dodefaults){
721 char s, t, n, c;
722 char const *ifs, *cp;
723 size_t l, i;
724 NYD2_ENTER;
726 if((l = self->l) == 0)
727 goto jleave;
729 ifs = ok_vlook(ifs_ws);
730 cp = self->s;
731 s = t = n = '\0';
733 /* Check whether we can go fast(er) path */
734 for(i = 0; (c = ifs[i]) != '\0'; ++i){
735 switch(c){
736 case ' ': s = c; break;
737 case '\t': t = c; break;
738 case '\n': n = c; break;
739 default:
740 /* Need to go the slow path */
741 while(strchr(ifs, *cp) != NULL){
742 ++cp;
743 if(--l == 0)
744 break;
746 self->s = n_UNCONST(cp);
748 if(l > 0){
749 for(cp += l -1; strchr(ifs, *cp) != NULL;){
750 if(--l == 0)
751 break;
752 /* An uneven number of reverse solidus escapes last WS! */
753 else if(*--cp == '\\'){
754 siz_t j;
756 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
758 if(j & 1){
759 ++l;
760 break;
765 self->l = l;
767 if(!dodefaults)
768 goto jleave;
769 cp = self->s;
770 ++i;
771 break;
775 /* No ifs-ws? No more data? No trimming */
776 if(l == 0 || (i == 0 && !dodefaults))
777 goto jleave;
779 if(dodefaults){
780 s = ' ';
781 t = '\t';
782 n = '\n';
785 if(l > 0){
786 while((c = *cp) != '\0' && (c == s || c == t || c == n)){
787 ++cp;
788 if(--l == 0)
789 break;
791 self->s = n_UNCONST(cp);
794 if(l > 0){
795 for(cp += l -1; (c = *cp) != '\0' && (c == s || c == t || c == n);){
796 if(--l == 0)
797 break;
798 /* An uneven number of reverse solidus escapes last WS! */
799 else if(*--cp == '\\'){
800 siz_t j;
802 for(j = 1; l - (uiz_t)j > 0 && cp[-j] == '\\'; ++j)
804 if(j & 1){
805 ++l;
806 break;
811 self->l = l;
812 jleave:
813 NYD2_LEAVE;
814 return self;
818 * struct n_string TODO extend, optimize
821 FL struct n_string *
822 (n_string_clear)(struct n_string *self n_MEMORY_DEBUG_ARGS){
823 NYD_ENTER;
825 assert(self != NULL);
827 if(self->s_size != 0){
828 if(!self->s_auto){
829 (n_free)(self->s_dat n_MEMORY_DEBUG_ARGSCALL);
831 self->s_len = self->s_auto = self->s_size = 0;
832 self->s_dat = NULL;
834 NYD_LEAVE;
835 return self;
838 FL struct n_string *
839 (n_string_reserve)(struct n_string *self, size_t noof n_MEMORY_DEBUG_ARGS){
840 ui32_t i, l, s;
841 NYD_ENTER;
843 assert(self != NULL);
845 s = self->s_size;
846 l = self->s_len;
847 #if 0 /* FIXME memory alloc too large */
848 if(SI32_MAX - n_ALIGN(1) - l <= noof)
849 n_panic(_("Memory allocation too large"));
850 #endif
852 if((i = s - l) <= ++noof){
853 i += l + (ui32_t)noof;
854 i = n_ALIGN(i);
855 self->s_size = i -1;
857 if(!self->s_auto)
858 self->s_dat = (n_realloc)(self->s_dat, i n_MEMORY_DEBUG_ARGSCALL);
859 else{
860 char *ndat = (n_autorec_alloc_from_pool)(NULL, i
861 n_MEMORY_DEBUG_ARGSCALL);
863 if(l > 0)
864 memcpy(ndat, self->s_dat, l);
865 self->s_dat = ndat;
868 NYD_LEAVE;
869 return self;
872 FL struct n_string *
873 (n_string_resize)(struct n_string *self, size_t nlen n_MEMORY_DEBUG_ARGS){
874 NYD_ENTER;
876 assert(self != NULL);
877 #if 0 /* FIXME memory alloc too large */
878 if(SI32_MAX - n_ALIGN(1) - l <= noof)
879 n_panic(_("Memory allocation too large"));
880 #endif
882 if(self->s_len < nlen)
883 self = (n_string_reserve)(self, nlen n_MEMORY_DEBUG_ARGSCALL);
884 self->s_len = (ui32_t)nlen;
885 NYD_LEAVE;
886 return self;
889 FL struct n_string *
890 (n_string_push_buf)(struct n_string *self, char const *buf, size_t buflen
891 n_MEMORY_DEBUG_ARGS){
892 NYD_ENTER;
894 assert(self != NULL);
895 assert(buflen == 0 || buf != NULL);
897 if(buflen == UIZ_MAX)
898 buflen = (buf == NULL) ? 0 : strlen(buf);
900 if(buflen > 0){
901 ui32_t i;
903 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
904 memcpy(&self->s_dat[i = self->s_len], buf, buflen);
905 self->s_len = (i += (ui32_t)buflen);
907 NYD_LEAVE;
908 return self;
911 FL struct n_string *
912 (n_string_push_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
913 NYD_ENTER;
915 assert(self != NULL);
917 if(self->s_len + 1 >= self->s_size)
918 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
919 self->s_dat[self->s_len++] = c;
920 NYD_LEAVE;
921 return self;
924 FL struct n_string *
925 (n_string_unshift_buf)(struct n_string *self, char const *buf, size_t buflen
926 n_MEMORY_DEBUG_ARGS){
927 NYD_ENTER;
929 assert(self != NULL);
930 assert(buflen == 0 || buf != NULL);
932 if(buflen == UIZ_MAX)
933 buflen = (buf == NULL) ? 0 : strlen(buf);
935 if(buflen > 0){
936 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
937 if(self->s_len > 0)
938 memmove(&self->s_dat[buflen], self->s_dat, self->s_len);
939 memcpy(self->s_dat, buf, buflen);
940 self->s_len += (ui32_t)buflen;
942 NYD_LEAVE;
943 return self;
946 FL struct n_string *
947 (n_string_unshift_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
948 NYD_ENTER;
950 assert(self != NULL);
952 if(self->s_len + 1 >= self->s_size)
953 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
954 if(self->s_len > 0)
955 memmove(&self->s_dat[1], self->s_dat, self->s_len);
956 self->s_dat[0] = c;
957 ++self->s_len;
958 NYD_LEAVE;
959 return self;
962 FL struct n_string *
963 (n_string_insert_buf)(struct n_string *self, size_t idx,
964 char const *buf, size_t buflen n_MEMORY_DEBUG_ARGS){
965 NYD_ENTER;
967 assert(self != NULL);
968 assert(buflen == 0 || buf != NULL);
969 assert(idx <= self->s_len);
971 if(buflen == UIZ_MAX)
972 buflen = (buf == NULL) ? 0 : strlen(buf);
974 if(buflen > 0){
975 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
976 if(self->s_len > 0)
977 memmove(&self->s_dat[idx + buflen], &self->s_dat[idx],
978 self->s_len - idx);
979 memcpy(&self->s_dat[idx], buf, buflen);
980 self->s_len += (ui32_t)buflen;
982 NYD_LEAVE;
983 return self;
986 FL struct n_string *
987 (n_string_insert_c)(struct n_string *self, size_t idx,
988 char c n_MEMORY_DEBUG_ARGS){
989 NYD_ENTER;
991 assert(self != NULL);
992 assert(idx <= self->s_len);
994 if(self->s_len + 1 >= self->s_size)
995 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
996 if(self->s_len > 0)
997 memmove(&self->s_dat[idx + 1], &self->s_dat[idx], self->s_len - idx);
998 self->s_dat[idx] = c;
999 ++self->s_len;
1000 NYD_LEAVE;
1001 return self;
1004 FL struct n_string *
1005 n_string_cut(struct n_string *self, size_t idx, size_t len){
1006 NYD_ENTER;
1008 assert(self != NULL);
1009 assert(UIZ_MAX - idx > len);
1010 assert(SI32_MAX >= idx + len);
1011 assert(idx + len <= self->s_len);
1013 if(len > 0)
1014 memmove(&self->s_dat[idx], &self->s_dat[idx + len],
1015 (self->s_len -= len) - idx);
1016 NYD_LEAVE;
1017 return self;
1020 FL char *
1021 (n_string_cp)(struct n_string *self n_MEMORY_DEBUG_ARGS){
1022 char *rv;
1023 NYD2_ENTER;
1025 assert(self != NULL);
1027 if(self->s_size == 0)
1028 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
1030 (rv = self->s_dat)[self->s_len] = '\0';
1031 NYD2_LEAVE;
1032 return rv;
1035 FL char const *
1036 n_string_cp_const(struct n_string const *self){
1037 char const *rv;
1038 NYD2_ENTER;
1040 assert(self != NULL);
1042 if(self->s_size != 0){
1043 ((struct n_string*)n_UNCONST(self))->s_dat[self->s_len] = '\0';
1044 rv = self->s_dat;
1045 }else
1046 rv = n_empty;
1047 NYD2_LEAVE;
1048 return rv;
1052 * UTF-8
1055 FL ui32_t
1056 n_utf8_to_utf32(char const **bdat, size_t *blen) /* TODO check false UTF8 */
1058 char const *cp;
1059 size_t l;
1060 ui32_t c, x;
1061 NYD2_ENTER;
1063 cp = *bdat;
1064 l = *blen - 1;
1065 x = (ui8_t)*cp++;
1067 if (x <= 0x7Fu)
1068 c = x;
1069 else {
1070 /* TODO UTF-8 decoder false sequences: Zhang Boyang, TinyCC [a82c11f] */
1071 if ((x & 0xE0u) == 0xC0u) {
1072 if (l < 1)
1073 goto jerr;
1074 l -= 1;
1075 c = x & ~0xC0u;
1076 } else if ((x & 0xF0u) == 0xE0u) {
1077 if (l < 2)
1078 goto jerr;
1079 l -= 2;
1080 c = x & ~0xE0u;
1081 c <<= 6;
1082 x = (ui8_t)*cp++;
1083 c |= x & 0x7Fu;
1084 } else {
1085 if (l < 3)
1086 goto jerr;
1087 l -= 3;
1088 c = x & ~0xF0u;
1089 c <<= 6;
1090 x = (ui8_t)*cp++;
1091 c |= x & 0x7Fu;
1092 c <<= 6;
1093 x = (ui8_t)*cp++;
1094 c |= x & 0x7Fu;
1096 c <<= 6;
1097 x = (ui8_t)*cp++;
1098 c |= x & 0x7Fu;
1101 jleave:
1102 *bdat = cp;
1103 *blen = l;
1104 NYD2_LEAVE;
1105 return c;
1106 jerr:
1107 c = UI32_MAX;
1108 goto jleave;
1111 FL size_t
1112 n_utf32_to_utf8(ui32_t c, char *buf)
1114 struct {
1115 ui32_t lower_bound;
1116 ui32_t upper_bound;
1117 ui8_t enc_leader;
1118 ui8_t enc_lval;
1119 ui8_t dec_leader_mask;
1120 ui8_t dec_leader_val_mask;
1121 ui8_t dec_bytes_togo;
1122 ui8_t cat_index;
1123 ui8_t __dummy[2];
1124 } const _cat[] = {
1125 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
1126 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
1127 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
1128 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
1129 /* xxx _from_utf32() simply assumes magic code points for surrogates!
1130 * xxx (However, should we ever get yet another surrogate range we
1131 * xxx need to deal with that all over the place anyway? */
1132 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
1133 {0x00010000, 0x0010FFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
1134 }, *catp = _cat;
1135 size_t l;
1137 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
1138 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
1139 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
1140 if (c <= _cat[3].upper_bound) {
1141 /* Surrogates may not be converted (Compatibility rule C10) */
1142 if (c >= 0xD800u && c <= 0xDFFFu)
1143 goto jerr;
1144 catp += 3;
1145 goto j3;
1147 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
1148 jerr:
1149 c = 0xFFFDu; /* Unicode replacement character */
1150 catp += 3;
1151 goto j3;
1153 buf[3] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1155 buf[2] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1157 buf[1] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
1159 buf[0] = (char)catp->enc_leader | (char)(c);
1161 buf[catp->enc_lval] = '\0';
1162 l = catp->enc_lval;
1163 NYD2_LEAVE;
1164 return l;
1168 * Our iconv(3) wrapper
1171 #ifdef HAVE_ICONV
1172 FL iconv_t
1173 n_iconv_open(char const *tocode, char const *fromcode){
1174 iconv_t id;
1175 NYD_ENTER;
1177 if((!asccasecmp(fromcode, "unknown-8bit") ||
1178 !asccasecmp(fromcode, "binary")) &&
1179 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
1180 fromcode = ok_vlook(CHARSET_8BIT_OKEY);
1182 id = iconv_open(tocode, fromcode);
1184 /* If the encoding names are equal at this point, they are just not
1185 * understood by iconv(), and we cannot sensibly use it in any way. We do
1186 * not perform this as an optimization above since iconv() can otherwise be
1187 * used to check the validity of the input even with identical encoding
1188 * names */
1189 if (id == (iconv_t)-1 && !asccasecmp(tocode, fromcode))
1190 n_err_no = n_ERR_NONE;
1191 NYD_LEAVE;
1192 return id;
1195 FL void
1196 n_iconv_close(iconv_t cd){
1197 NYD_ENTER;
1198 iconv_close(cd);
1199 if(cd == iconvd)
1200 iconvd = (iconv_t)-1;
1201 NYD_LEAVE;
1204 FL void
1205 n_iconv_reset(iconv_t cd){
1206 NYD_ENTER;
1207 iconv(cd, NULL, NULL, NULL, NULL);
1208 NYD_LEAVE;
1211 /* (2012-09-24: export and use it exclusively to isolate prototype problems
1212 * (*inb* is 'char const **' except in POSIX) in a single place.
1213 * GNU libiconv even allows for configuration time const/non-const..
1214 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
1215 * support compiler invocations which bail on error, so no -Werror */
1216 /* Citrus project? */
1217 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1218 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1219 # if n_OS_DRAGONFLY
1220 # define __INBCAST(S) (char ** __restrict__)n_UNCONST(S)
1221 # else
1222 # define __INBCAST(S) (char const **)n_UNCONST(S)
1223 # endif
1224 # elif n_OS_SUNOS || n_OS_SOLARIS
1225 # define __INBCAST(S) (char const ** __restrict__)n_UNCONST(S)
1226 # endif
1227 # ifndef __INBCAST
1228 # define __INBCAST(S) (char **)n_UNCONST(S)
1229 # endif
1231 FL int
1232 n_iconv_buf(iconv_t cd, enum n_iconv_flags icf,
1233 char const **inb, size_t *inbleft, char **outb, size_t *outbleft){
1234 int err;
1235 NYD2_ENTER;
1237 if((icf & n_ICONV_UNIREPL) && !(n_psonce & n_PSO_UNICODE))
1238 icf &= ~n_ICONV_UNIREPL;
1240 for(;;){
1241 size_t sz;
1243 sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft);
1244 if(sz > 0 && !(icf & n_ICONV_IGN_NOREVERSE)){
1245 err = n_ERR_NOENT;
1246 goto jleave;
1248 if(sz != (size_t)-1)
1249 break;
1251 err = n_err_no;
1252 if(!(icf & n_ICONV_IGN_ILSEQ) || err != n_ERR_ILSEQ)
1253 goto jleave;
1254 if(*inbleft > 0){
1255 ++(*inb);
1256 --(*inbleft);
1257 if(icf & n_ICONV_UNIREPL){
1258 if(*outbleft >= sizeof(n_unirepl) -1){
1259 memcpy(*outb, n_unirepl, sizeof(n_unirepl) -1);
1260 *outb += sizeof(n_unirepl) -1;
1261 *outbleft -= sizeof(n_unirepl) -1;
1262 continue;
1264 }else if(*outbleft > 0){
1265 *(*outb)++ = '?';
1266 --*outbleft;
1267 continue;
1269 err = n_ERR_2BIG;
1270 goto jleave;
1271 }else if(*outbleft > 0){
1272 **outb = '\0';
1273 goto jleave;
1276 err = 0;
1277 jleave:
1278 n_iconv_err_no = err;
1279 NYD2_LEAVE;
1280 return err;
1282 # undef __INBCAST
1284 FL int
1285 n_iconv_str(iconv_t cd, enum n_iconv_flags icf,
1286 struct str *out, struct str const *in, struct str *in_rest_or_null)
1288 int err;
1289 char *obb, *ob;
1290 char const *ib;
1291 size_t olb, ol, il;
1292 NYD2_ENTER;
1294 obb = out->s;
1295 olb = out->l;
1296 ol = in->l;
1298 ol = (ol << 1) - (ol >> 4);
1299 if (olb <= ol) {
1300 olb = ol;
1301 goto jrealloc;
1304 for (;;) {
1305 ib = in->s;
1306 il = in->l;
1307 ob = obb;
1308 ol = olb;
1309 if((err = n_iconv_buf(cd, icf, &ib, &il, &ob, &ol)) == 0 ||
1310 err != n_ERR_2BIG)
1311 break;
1312 olb += in->l;
1313 jrealloc:
1314 obb = n_realloc(obb, olb +1);
1317 if (in_rest_or_null != NULL) {
1318 in_rest_or_null->s = n_UNCONST(ib);
1319 in_rest_or_null->l = il;
1321 out->s = obb;
1322 out->s[out->l = olb - ol] = '\0';
1323 NYD2_LEAVE;
1324 return err;
1327 FL char *
1328 n_iconv_onetime_cp(enum n_iconv_flags icf,
1329 char const *tocode, char const *fromcode, char const *input){
1330 struct str out, in;
1331 iconv_t icd;
1332 char *rv;
1333 NYD2_ENTER;
1335 rv = NULL;
1336 if(tocode == NULL)
1337 tocode = ok_vlook(ttycharset);
1338 if(fromcode == NULL)
1339 fromcode = "utf-8";
1341 if((icd = iconv_open(tocode, fromcode)) == (iconv_t)-1)
1342 goto jleave;
1344 in.l = strlen(in.s = n_UNCONST(input)); /* logical */
1345 out.s = NULL, out.l = 0;
1346 if(!n_iconv_str(icd, icf, &out, &in, NULL))
1347 rv = savestrbuf(out.s, out.l);
1348 if(out.s != NULL)
1349 free(out.s);
1351 iconv_close(icd);
1352 jleave:
1353 NYD2_LEAVE;
1354 return rv;
1356 #endif /* HAVE_ICONV */
1358 /* s-it-mode */