(BWDIC!) Allow `source' in `call'ed macros..
[s-mailx.git] / strings.c
blob369c0d4e60c9c593682cfcb78dea3c74c22a1a9b
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ String support routines.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
6 */
7 /*
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 #undef n_FILE
36 #define n_FILE strings
38 #ifndef HAVE_AMALGAMATION
39 # include "nail.h"
40 #endif
42 #include <ctype.h>
44 FL char *
45 (savestr)(char const *str SALLOC_DEBUG_ARGS)
47 size_t size;
48 char *news;
49 NYD_ENTER;
51 size = strlen(str) +1;
52 news = (salloc)(size SALLOC_DEBUG_ARGSCALL);
53 memcpy(news, str, size);
54 NYD_LEAVE;
55 return news;
58 FL char *
59 (savestrbuf)(char const *sbuf, size_t sbuf_len SALLOC_DEBUG_ARGS)
61 char *news;
62 NYD_ENTER;
64 news = (salloc)(sbuf_len +1 SALLOC_DEBUG_ARGSCALL);
65 memcpy(news, sbuf, sbuf_len);
66 news[sbuf_len] = 0;
67 NYD_LEAVE;
68 return news;
71 FL char *
72 (savecatsep)(char const *s1, char sep, char const *s2 SALLOC_DEBUG_ARGS)
74 size_t l1, l2;
75 char *news;
76 NYD_ENTER;
78 l1 = (s1 != NULL) ? strlen(s1) : 0;
79 l2 = strlen(s2);
80 news = (salloc)(l1 + (sep != '\0') + l2 +1 SALLOC_DEBUG_ARGSCALL);
81 if (l1 > 0) {
82 memcpy(news + 0, s1, l1);
83 if (sep != '\0')
84 news[l1++] = sep;
86 memcpy(news + l1, s2, l2);
87 news[l1 + l2] = '\0';
88 NYD_LEAVE;
89 return news;
93 * Support routines, auto-reclaimed storage
96 FL char *
97 (i_strdup)(char const *src SALLOC_DEBUG_ARGS)
99 size_t sz;
100 char *dest;
101 NYD_ENTER;
103 sz = strlen(src) +1;
104 dest = (salloc)(sz SALLOC_DEBUG_ARGSCALL);
105 i_strcpy(dest, src, sz);
106 NYD_LEAVE;
107 return dest;
110 FL struct str *
111 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
113 va_list vl;
114 size_t l;
115 char const *cs;
116 NYD_ENTER;
118 va_start(vl, self);
119 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
120 l += strlen(cs);
121 va_end(vl);
123 self->l = l;
124 self->s = salloc(l +1);
126 va_start(vl, self);
127 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
128 size_t i = strlen(cs);
129 memcpy(self->s + l, cs, i);
130 l += i;
132 self->s[l] = '\0';
133 va_end(vl);
134 NYD_LEAVE;
135 return self;
138 FL struct str *
139 (str_concat_cpa)(struct str *self, char const * const *cpa,
140 char const *sep_o_null SALLOC_DEBUG_ARGS)
142 size_t sonl, l;
143 char const * const *xcpa;
144 NYD_ENTER;
146 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
148 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
149 l += strlen(*xcpa) + sonl;
151 self->l = l;
152 self->s = (salloc)(l +1 SALLOC_DEBUG_ARGSCALL);
154 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
155 size_t i = strlen(*xcpa);
156 memcpy(self->s + l, *xcpa, i);
157 l += i;
158 if (sonl > 0) {
159 memcpy(self->s + l, sep_o_null, sonl);
160 l += sonl;
163 self->s[l] = '\0';
164 NYD_LEAVE;
165 return self;
169 * Routines that are not related to auto-reclaimed storage follow.
172 FL int
173 anyof(char const *s1, char const *s2)
175 NYD2_ENTER;
176 for (; *s1 != '\0'; ++s1)
177 if (strchr(s2, *s1) != NULL)
178 break;
179 NYD2_LEAVE;
180 return (*s1 != '\0');
183 FL char *
184 n_strsep(char **iolist, char sep, bool_t ignore_empty)
186 char *base, *cp;
187 NYD2_ENTER;
189 for (base = *iolist; base != NULL; base = *iolist) {
190 while (*base != '\0' && blankspacechar(*base))
191 ++base;
192 cp = strchr(base, sep);
193 if (cp != NULL)
194 *iolist = cp + 1;
195 else {
196 *iolist = NULL;
197 cp = base + strlen(base);
199 while (cp > base && blankspacechar(cp[-1]))
200 --cp;
201 *cp = '\0';
202 if (*base != '\0' || !ignore_empty)
203 break;
205 NYD2_LEAVE;
206 return base;
209 FL char *
210 n_strescsep(char **iolist, char sep, bool_t ignore_empty){
211 char *cp, c, *base;
212 bool_t isesc, anyesc;
213 NYD2_ENTER;
215 assert(sep != '\0');
217 for(base = *iolist; base != NULL; base = *iolist){
218 while((c = *base) != '\0' && blankspacechar(c))
219 ++base;
221 for(isesc = anyesc = FAL0, cp = base;; ++cp){
222 if(UNLIKELY((c = *cp) == '\0')){
223 *iolist = NULL;
224 break;
225 }else if(!isesc){
226 if(c == sep){
227 *iolist = cp + 1;
228 break;
230 isesc = (c == '\\');
231 }else{
232 isesc = FAL0;
233 anyesc |= (c == sep);
237 while(cp > base && blankspacechar(cp[-1]))
238 --cp;
239 *cp = '\0';
241 if(*base != '\0'){
242 if(anyesc){
243 char *ins;
245 for(ins = cp = base;; ++ins)
246 if((c = *cp) == '\\' && cp[1] == sep){
247 *ins = sep;
248 cp += 2;
249 }else if((*ins = (++cp, c)) == '\0')
250 break;
252 break;
254 if(!ignore_empty)
255 break;
257 NYD2_LEAVE;
258 return base;
261 FL void
262 i_strcpy(char *dest, char const *src, size_t size)
264 NYD2_ENTER;
265 if (size > 0) {
266 for (;; ++dest, ++src)
267 if ((*dest = lowerconv(*src)) == '\0') {
268 break;
269 } else if (--size == 0) {
270 *dest = '\0';
271 break;
274 NYD2_LEAVE;
277 FL int
278 is_prefix(char const *as1, char const *as2)
280 char c;
281 NYD2_ENTER;
283 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
284 if (*as2 == '\0')
285 break;
286 NYD2_LEAVE;
287 return (c == '\0');
290 FL char *
291 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
293 char const *cp;
294 size_t i;
295 char c, *rv;
296 NYD2_ENTER;
298 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
299 if (c == '"' || c == '\\')
300 ++i;
301 rv = salloc(i +1);
303 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
304 if (c == '"' || c == '\\')
305 rv[i++] = '\\';
306 rv[i] = '\0';
307 NYD2_LEAVE;
308 return rv;
311 FL char *
312 laststring(char *linebuf, bool_t *needs_list, bool_t strip)
314 char *cp, *p, quoted;
315 NYD_ENTER;
317 /* Anything to do at all? */
318 if (*(cp = linebuf) == '\0')
319 goto jnull;
320 cp += strlen(linebuf) -1;
322 /* Strip away trailing blanks */
323 while (spacechar(*cp) && cp > linebuf)
324 --cp;
325 cp[1] = '\0';
326 if (cp == linebuf)
327 goto jleave;
329 /* Now search for the BOS of the "last string" */
330 quoted = *cp;
331 if (quoted == '\'' || quoted == '"') {
332 if (strip)
333 *cp = '\0';
334 } else
335 quoted = ' ';
337 while (cp > linebuf) {
338 --cp;
339 if (quoted != ' ') {
340 if (*cp != quoted)
341 continue;
342 } else if (!spacechar(*cp))
343 continue;
344 if (cp == linebuf || cp[-1] != '\\') {
345 /* When in whitespace mode, WS prefix doesn't belong */
346 if (quoted == ' ')
347 ++cp;
348 break;
350 /* Expand the escaped quote character */
351 for (p = --cp; (p[0] = p[1]) != '\0'; ++p)
354 if (strip && quoted != ' ' && *cp == quoted)
355 for (p = cp; (p[0] = p[1]) != '\0'; ++p)
358 /* The "last string" has been skipped over, but still, try to step backwards
359 * until we are at BOS or see whitespace, so as to make possible things like
360 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
361 while (cp > linebuf) {
362 --cp;
363 if (spacechar(*cp)) {
364 p = cp;
365 *cp++ = '\0';
366 /* We can furtherly release our callees if we now decide wether the
367 * remaining non-"last string" line content contains non-WS */
368 while (--p >= linebuf)
369 if (!spacechar(*p))
370 goto jleave;
371 linebuf = cp;
372 break;
376 jleave:
377 if (cp != NULL && *cp == '\0')
378 goto jnull;
379 *needs_list = (cp != linebuf && *linebuf != '\0');
380 j_leave:
381 NYD_LEAVE;
382 return cp;
383 jnull:
384 *needs_list = FAL0;
385 cp = NULL;
386 goto j_leave;
389 FL void
390 makelow(char *cp) /* TODO isn't that crap? --> */
392 NYD_ENTER;
393 #ifdef HAVE_C90AMEND1
394 if (mb_cur_max > 1) {
395 char *tp = cp;
396 wchar_t wc;
397 int len;
399 while (*cp != '\0') {
400 len = mbtowc(&wc, cp, mb_cur_max);
401 if (len < 0)
402 *tp++ = *cp++;
403 else {
404 wc = towlower(wc);
405 if (wctomb(tp, wc) == len)
406 tp += len, cp += len;
407 else
408 *tp++ = *cp++; /* <-- at least here */
411 } else
412 #endif
415 *cp = tolower((uc_i)*cp);
416 while (*cp++ != '\0');
418 NYD_LEAVE;
421 FL bool_t
422 substr(char const *str, char const *sub)
424 char const *cp, *backup;
425 NYD_ENTER;
427 cp = sub;
428 backup = str;
429 while (*str != '\0' && *cp != '\0') {
430 #ifdef HAVE_C90AMEND1
431 if (mb_cur_max > 1) {
432 wchar_t c, c2;
433 int sz;
435 if ((sz = mbtowc(&c, cp, mb_cur_max)) == -1)
436 goto Jsinglebyte;
437 cp += sz;
438 if ((sz = mbtowc(&c2, str, mb_cur_max)) == -1)
439 goto Jsinglebyte;
440 str += sz;
441 c = towupper(c);
442 c2 = towupper(c2);
443 if (c != c2) {
444 if ((sz = mbtowc(&c, backup, mb_cur_max)) > 0) {
445 backup += sz;
446 str = backup;
447 } else
448 str = ++backup;
449 cp = sub;
451 } else
452 Jsinglebyte:
453 #endif
455 int c, c2;
457 c = *cp++ & 0377;
458 if (islower(c))
459 c = toupper(c);
460 c2 = *str++ & 0377;
461 if (islower(c2))
462 c2 = toupper(c2);
463 if (c != c2) {
464 str = ++backup;
465 cp = sub;
469 NYD_LEAVE;
470 return (*cp == '\0');
473 FL char *
474 sstpcpy(char *dst, char const *src)
476 NYD2_ENTER;
477 while ((*dst = *src++) != '\0')
478 ++dst;
479 NYD2_LEAVE;
480 return dst;
483 FL char *
484 (sstrdup)(char const *cp SMALLOC_DEBUG_ARGS)
486 char *dp;
487 NYD2_ENTER;
489 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) SMALLOC_DEBUG_ARGSCALL);
490 NYD2_LEAVE;
491 return dp;
494 FL char *
495 (sbufdup)(char const *cp, size_t len SMALLOC_DEBUG_ARGS)
497 char *dp = NULL;
498 NYD2_ENTER;
500 dp = (smalloc)(len +1 SMALLOC_DEBUG_ARGSCALL);
501 if (cp != NULL)
502 memcpy(dp, cp, len);
503 dp[len] = '\0';
504 NYD2_LEAVE;
505 return dp;
508 FL ssize_t
509 n_strscpy(char *dst, char const *src, size_t dstsize){
510 ssize_t rv;
511 NYD2_ENTER;
513 if(LIKELY(dstsize > 0)){
514 rv = 0;
516 if((dst[rv] = src[rv]) == '\0')
517 goto jleave;
518 ++rv;
519 }while(--dstsize > 0);
520 dst[--rv] = '\0';
522 #ifdef HAVE_DEVEL
523 else
524 assert(dstsize > 0);
525 #endif
526 rv = -1;
527 jleave:
528 NYD2_LEAVE;
529 return rv;
532 FL int
533 asccasecmp(char const *s1, char const *s2)
535 int cmp;
536 NYD2_ENTER;
538 for (;;) {
539 char c1 = *s1++, c2 = *s2++;
540 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
541 break;
543 NYD2_LEAVE;
544 return cmp;
547 FL int
548 ascncasecmp(char const *s1, char const *s2, size_t sz)
550 int cmp = 0;
551 NYD2_ENTER;
553 while (sz-- > 0) {
554 char c1 = *s1++, c2 = *s2++;
555 cmp = (ui8_t)lowerconv(c1);
556 cmp -= (ui8_t)lowerconv(c2);
557 if (cmp != 0 || c1 == '\0')
558 break;
560 NYD2_LEAVE;
561 return cmp;
564 FL char const *
565 asccasestr(char const *s1, char const *s2)
567 char c2, c1;
568 NYD2_ENTER;
570 for (c2 = *s2++, c2 = lowerconv(c2);;) {
571 if ((c1 = *s1++) == '\0') {
572 s1 = NULL;
573 break;
575 if (lowerconv(c1) == c2 && is_asccaseprefix(s1, s2)) {
576 --s1;
577 break;
580 NYD2_LEAVE;
581 return s1;
584 FL bool_t
585 is_asccaseprefix(char const *as1, char const *as2)
587 bool_t rv = FAL0;
588 NYD2_ENTER;
590 for (;; ++as1, ++as2) {
591 char c1 = lowerconv(*as1), c2 = lowerconv(*as2);
593 if ((rv = (c2 == '\0')))
594 break;
595 if (c1 != c2)
596 break;
598 NYD2_LEAVE;
599 return rv;
602 FL struct str *
603 (n_str_assign_buf)(struct str *self, char const *buf, uiz_t buflen
604 SMALLOC_DEBUG_ARGS){
605 NYD_ENTER;
606 if(buflen == UIZ_MAX)
607 buflen = (buf == NULL) ? 0 : strlen(buf);
609 assert(buflen == 0 || buf != NULL);
611 if(LIKELY(buflen > 0)){
612 self->s = (srealloc)(self->s, (self->l = buflen) +1
613 SMALLOC_DEBUG_ARGSCALL);
614 memcpy(self->s, buf, buflen);
615 self->s[buflen] = '\0';
616 }else
617 self->l = 0;
618 NYD_LEAVE;
619 return self;
622 FL struct str *
623 (n_str_add_buf)(struct str *self, char const *buf, uiz_t buflen
624 SMALLOC_DEBUG_ARGS){
625 NYD_ENTER;
626 if(buflen == UIZ_MAX)
627 buflen = (buf == NULL) ? 0 : strlen(buf);
629 assert(buflen == 0 || buf != NULL);
631 if(buflen > 0) {
632 size_t osl = self->l, nsl = osl + buflen;
634 self->s = (srealloc)(self->s, (self->l = nsl) +1 SMALLOC_DEBUG_ARGSCALL);
635 memcpy(self->s + osl, buf, buflen);
636 self->s[nsl] = '\0';
638 NYD_LEAVE;
639 return self;
643 * struct n_string TODO extend, optimize
646 FL struct n_string *
647 (n_string_clear)(struct n_string *self SMALLOC_DEBUG_ARGS){
648 NYD_ENTER;
650 assert(self != NULL);
652 if(self->s_size != 0){
653 if(!self->s_auto){
654 #ifdef HAVE_DEBUG
655 sfree(self->s_dat SMALLOC_DEBUG_ARGSCALL);
656 #else
657 free(self->s_dat);
658 #endif
660 self->s_len = self->s_auto = self->s_size = 0;
661 self->s_dat = NULL;
663 NYD_LEAVE;
664 return self;
667 FL struct n_string *
668 (n_string_reserve)(struct n_string *self, size_t noof SMALLOC_DEBUG_ARGS){
669 ui32_t i, l, s;
670 NYD_ENTER;
672 assert(self != NULL);
674 s = self->s_size;
675 l = self->s_len;
676 #if 0 /* FIXME memory alloc too large */
677 if(SI32_MAX - n_ALIGN(1) - l <= noof)
678 n_panic(_("Memory allocation too large"));
679 #endif
681 if((i = s - l) <= noof){
682 i += 1 + l + (ui32_t)noof;
683 i = n_ALIGN(i);
684 self->s_size = i -1;
686 if(!self->s_auto)
687 self->s_dat = (srealloc)(self->s_dat, i SMALLOC_DEBUG_ARGSCALL);
688 else{
689 char *ndat = (salloc)(i SALLOC_DEBUG_ARGSCALL);
691 if(l > 0)
692 memcpy(ndat, self->s_dat, l);
693 self->s_dat = ndat;
696 NYD_LEAVE;
697 return self;
700 FL struct n_string *
701 (n_string_push_buf)(struct n_string *self, char const *buf, size_t buflen
702 SMALLOC_DEBUG_ARGS){
703 NYD_ENTER;
705 assert(self != NULL);
706 assert(buflen == 0 || buf != NULL);
708 if(buflen == UIZ_MAX)
709 buflen = (buf == NULL) ? 0 : strlen(buf);
711 if(buflen > 0){
712 ui32_t i;
714 self = (n_string_reserve)(self, buflen SMALLOC_DEBUG_ARGSCALL);
715 memcpy(self->s_dat + (i = self->s_len), buf, buflen);
716 self->s_len = (i += (ui32_t)buflen);
718 NYD_LEAVE;
719 return self;
722 FL struct n_string *
723 (n_string_push_c)(struct n_string *self, char c SMALLOC_DEBUG_ARGS){
724 NYD_ENTER;
726 assert(self != NULL);
728 if(self->s_len + 1 >= self->s_size)
729 self = (n_string_reserve)(self, 1 SMALLOC_DEBUG_ARGSCALL);
730 self->s_dat[self->s_len++] = c;
731 NYD_LEAVE;
732 return self;
735 FL struct n_string *
736 (n_string_unshift_buf)(struct n_string *self, char const *buf, size_t buflen
737 SMALLOC_DEBUG_ARGS){
738 NYD_ENTER;
740 assert(self != NULL);
741 assert(buflen == 0 || buf != NULL);
743 if(buflen == UIZ_MAX)
744 buflen = (buf == NULL) ? 0 : strlen(buf);
746 if(buflen > 0){
747 self = (n_string_reserve)(self, buflen SMALLOC_DEBUG_ARGSCALL);
748 if(self->s_len > 0)
749 memmove(self->s_dat + buflen, self->s_dat, self->s_len);
750 memcpy(self->s_dat, buf, buflen);
751 self->s_len += (ui32_t)buflen;
753 NYD_LEAVE;
754 return self;
757 FL struct n_string *
758 (n_string_unshift_c)(struct n_string *self, char c SMALLOC_DEBUG_ARGS){
759 NYD_ENTER;
761 assert(self != NULL);
763 if(self->s_len + 1 >= self->s_size)
764 self = (n_string_reserve)(self, 1 SMALLOC_DEBUG_ARGSCALL);
765 if(self->s_len > 0)
766 memmove(self->s_dat + 1, self->s_dat, self->s_len);
767 self->s_dat[0] = c;
768 ++self->s_len;
769 NYD_LEAVE;
770 return self;
773 FL char *
774 (n_string_cp)(struct n_string *self SMALLOC_DEBUG_ARGS){
775 char *rv;
776 NYD2_ENTER;
778 assert(self != NULL);
780 if(self->s_size == 0)
781 self = (n_string_reserve)(self, 1 SMALLOC_DEBUG_ARGSCALL);
783 (rv = self->s_dat)[self->s_len] = '\0';
784 NYD2_LEAVE;
785 return rv;
788 FL char const *
789 n_string_cp_const(struct n_string const *self){
790 char const *rv;
791 NYD2_ENTER;
793 assert(self != NULL);
795 if(self->s_size != 0){
796 ((struct n_string*)UNCONST(self))->s_dat[self->s_len] = '\0';
797 rv = self->s_dat;
798 }else
799 rv = "";
800 NYD2_LEAVE;
801 return rv;
805 * UTF-8
808 #ifdef HAVE_NATCH_CHAR
809 FL ui32_t
810 n_utf8_to_utf32(char const **bdat, size_t *blen) /* TODO check false UTF8 */
812 char const *cp;
813 size_t l;
814 ui32_t c, x;
815 NYD2_ENTER;
817 cp = *bdat;
818 l = *blen - 1;
819 x = (ui8_t)*cp++;
821 if (x <= 0x7F)
822 c = x;
823 else {
824 if ((x & 0xE0) == 0xC0) {
825 if (l < 1)
826 goto jerr;
827 l -= 1;
828 c = x & ~0xC0;
829 } else if ((x & 0xF0) == 0xE0) {
830 if (l < 2)
831 goto jerr;
832 l -= 2;
833 c = x & ~0xE0;
834 c <<= 6;
835 x = (ui8_t)*cp++;
836 c |= x & 0x7F;
837 } else {
838 if (l < 3)
839 goto jerr;
840 l -= 3;
841 c = x & ~0xF0;
842 c <<= 6;
843 x = (ui8_t)*cp++;
844 c |= x & 0x7F;
845 c <<= 6;
846 x = (ui8_t)*cp++;
847 c |= x & 0x7F;
849 c <<= 6;
850 x = (ui8_t)*cp++;
851 c |= x & 0x7F;
854 jleave:
855 *bdat = cp;
856 *blen = l;
857 NYD2_LEAVE;
858 return c;
859 jerr:
860 c = UI32_MAX;
861 goto jleave;
863 #endif /* HAVE_NATCH_CHAR */
865 #ifdef HAVE_FILTER_HTML_TAGSOUP
866 FL size_t
867 n_utf32_to_utf8(ui32_t c, char *buf)
869 struct {
870 ui32_t lower_bound;
871 ui32_t upper_bound;
872 ui8_t enc_leader;
873 ui8_t enc_lval;
874 ui8_t dec_leader_mask;
875 ui8_t dec_leader_val_mask;
876 ui8_t dec_bytes_togo;
877 ui8_t cat_index;
878 ui8_t __dummy[2];
879 } const _cat[] = {
880 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
881 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
882 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
883 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
884 /* xxx _from_utf32() simply assumes magic code points for surrogates!
885 * xxx (However, should we ever get yet another surrogate range we
886 * xxx need to deal with that all over the place anyway? */
887 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
888 {0x00010000, 0x0010FFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
889 }, *catp = _cat;
890 size_t l;
892 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
893 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
894 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
895 if (c <= _cat[3].upper_bound) {
896 /* Surrogates may not be converted (Compatibility rule C10) */
897 if (c >= 0xD800u && c <= 0xDFFFu)
898 goto jerr;
899 catp += 3;
900 goto j3;
902 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
903 jerr:
904 c = 0xFFFDu; /* Unicode replacement character */
905 catp += 3;
906 goto j3;
908 buf[3] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
910 buf[2] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
912 buf[1] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
914 buf[0] = (char)catp->enc_leader | (char)(c);
916 buf[catp->enc_lval] = '\0';
917 l = catp->enc_lval;
918 NYD2_LEAVE;
919 return l;
921 #endif /* HAVE_FILTER_HTML_TAGSOUP */
924 * Our iconv(3) wrapper
926 #ifdef HAVE_ICONV
928 static void _ic_toupper(char *dest, char const *src);
929 static void _ic_stripdash(char *p);
931 static void
932 _ic_toupper(char *dest, char const *src)
934 NYD2_ENTER;
936 *dest++ = upperconv(*src);
937 while (*src++ != '\0');
938 NYD2_LEAVE;
941 static void
942 _ic_stripdash(char *p)
944 char *q = p;
945 NYD2_ENTER;
948 if (*(q = p) != '-')
949 ++q;
950 while (*p++ != '\0');
951 NYD2_LEAVE;
954 FL iconv_t
955 n_iconv_open(char const *tocode, char const *fromcode)
957 iconv_t id;
958 char *t, *f;
959 NYD_ENTER;
961 if ((!asccasecmp(fromcode, "unknown-8bit") ||
962 !asccasecmp(fromcode, "binary")) &&
963 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
964 fromcode = charset_get_8bit();
966 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
967 goto jleave;
969 /* Remove the "iso-" prefixes for Solaris */
970 if (!ascncasecmp(tocode, "iso-", 4))
971 tocode += 4;
972 else if (!ascncasecmp(tocode, "iso", 3))
973 tocode += 3;
974 if (!ascncasecmp(fromcode, "iso-", 4))
975 fromcode += 4;
976 else if (!ascncasecmp(fromcode, "iso", 3))
977 fromcode += 3;
978 if (*tocode == '\0' || *fromcode == '\0') {
979 id = (iconv_t)-1;
980 goto jleave;
982 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
983 goto jleave;
985 /* Solaris prefers upper-case charset names. Don't ask... */
986 t = salloc(strlen(tocode) +1);
987 _ic_toupper(t, tocode);
988 f = salloc(strlen(fromcode) +1);
989 _ic_toupper(f, fromcode);
990 if ((id = iconv_open(t, f)) != (iconv_t)-1)
991 goto jleave;
993 /* Strip dashes for UnixWare */
994 _ic_stripdash(t);
995 _ic_stripdash(f);
996 if ((id = iconv_open(t, f)) != (iconv_t)-1)
997 goto jleave;
999 /* Add your vendor's sillynesses here */
1001 /* If the encoding names are equal at this point, they are just not
1002 * understood by iconv(), and we cannot sensibly use it in any way. We do
1003 * not perform this as an optimization above since iconv() can otherwise be
1004 * used to check the validity of the input even with identical encoding
1005 * names */
1006 if (!strcmp(t, f))
1007 errno = 0;
1008 jleave:
1009 NYD_LEAVE;
1010 return id;
1013 FL void
1014 n_iconv_close(iconv_t cd)
1016 NYD_ENTER;
1017 iconv_close(cd);
1018 if (cd == iconvd)
1019 iconvd = (iconv_t)-1;
1020 NYD_LEAVE;
1023 FL void
1024 n_iconv_reset(iconv_t cd)
1026 NYD_ENTER;
1027 iconv(cd, NULL, NULL, NULL, NULL);
1028 NYD_LEAVE;
1031 /* (2012-09-24: export and use it exclusively to isolate prototype problems
1032 * (*inb* is 'char const **' except in POSIX) in a single place.
1033 * GNU libiconv even allows for configuration time const/non-const..
1034 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
1035 * support compiler invocations which bail on error, so no -Werror */
1036 /* Citrus project? */
1037 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1038 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1039 # if OS_DRAGONFLY
1040 # define __INBCAST(S) (char ** __restrict__)UNCONST(S)
1041 # else
1042 # define __INBCAST(S) (char const **)UNCONST(S)
1043 # endif
1044 # elif OS_SUNOS || OS_SOLARIS
1045 # define __INBCAST(S) (char const ** __restrict__)UNCONST(S)
1046 # endif
1047 # ifndef __INBCAST
1048 # define __INBCAST(S) (char **)UNCONST(S)
1049 # endif
1051 FL int
1052 n_iconv_buf(iconv_t cd, char const **inb, size_t *inbleft,/*XXX redo iconv use*/
1053 char **outb, size_t *outbleft, bool_t skipilseq)
1055 int err = 0;
1056 NYD2_ENTER;
1058 for (;;) {
1059 size_t sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft);
1060 if (sz != (size_t)-1)
1061 break;
1062 err = errno;
1063 if (!skipilseq || err != EILSEQ)
1064 break;
1065 if (*inbleft > 0) {
1066 ++(*inb);
1067 --(*inbleft);
1068 } else if (*outbleft > 0) {
1069 **outb = '\0';
1070 break;
1072 if (*outbleft > 0/* TODO 0xFFFD 2*/) {
1073 /* TODO 0xFFFD (*outb)[0] = '[';
1074 * TODO (*outb)[1] = '?';
1075 * TODO 0xFFFD (*outb)[2] = ']';
1076 * TODO (*outb) += 3;
1077 * TODO (*outbleft) -= 3; */
1078 *(*outb)++ = '?';
1079 --*outbleft;
1080 } else {
1081 err = E2BIG;
1082 break;
1084 err = 0;
1086 NYD2_LEAVE;
1087 return err;
1089 # undef __INBCAST
1091 FL int
1092 n_iconv_str(iconv_t cd, struct str *out, struct str const *in,
1093 struct str *in_rest_or_null, bool_t skipilseq)
1095 int err;
1096 char *obb, *ob;
1097 char const *ib;
1098 size_t olb, ol, il;
1099 NYD2_ENTER;
1101 err = 0;
1102 obb = out->s;
1103 olb = out->l;
1104 ol = in->l;
1106 ol = (ol << 1) - (ol >> 4);
1107 if (olb <= ol) {
1108 olb = ol;
1109 goto jrealloc;
1112 for (;;) {
1113 ib = in->s;
1114 il = in->l;
1115 ob = obb;
1116 ol = olb;
1117 err = n_iconv_buf(cd, &ib, &il, &ob, &ol, skipilseq);
1118 if (err == 0 || err != E2BIG)
1119 break;
1120 err = 0;
1121 olb += in->l;
1122 jrealloc:
1123 obb = srealloc(obb, olb +1);
1126 if (in_rest_or_null != NULL) {
1127 in_rest_or_null->s = UNCONST(ib);
1128 in_rest_or_null->l = il;
1130 out->s = obb;
1131 out->s[out->l = olb - ol] = '\0';
1132 NYD2_LEAVE;
1133 return err;
1135 #endif /* HAVE_ICONV */
1137 /* s-it-mode */