Add OPT_ALWAYS_UNICODE_LOCALE (Predrag Punosevac)..
[s-mailx.git] / strings.c
blob95bfe1f0475bc9a959ccc255862c03f5ff518b78
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ String support routines.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2016 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 */
7 /*
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 #undef n_FILE
36 #define n_FILE strings
38 #ifndef HAVE_AMALGAMATION
39 # include "nail.h"
40 #endif
42 #include <ctype.h>
44 FL char *
45 (savestr)(char const *str SALLOC_DEBUG_ARGS)
47 size_t size;
48 char *news;
49 NYD_ENTER;
51 size = strlen(str) +1;
52 news = (salloc)(size SALLOC_DEBUG_ARGSCALL);
53 memcpy(news, str, size);
54 NYD_LEAVE;
55 return news;
58 FL char *
59 (savestrbuf)(char const *sbuf, size_t sbuf_len SALLOC_DEBUG_ARGS)
61 char *news;
62 NYD_ENTER;
64 news = (salloc)(sbuf_len +1 SALLOC_DEBUG_ARGSCALL);
65 memcpy(news, sbuf, sbuf_len);
66 news[sbuf_len] = 0;
67 NYD_LEAVE;
68 return news;
71 FL char *
72 (savecatsep)(char const *s1, char sep, char const *s2 SALLOC_DEBUG_ARGS)
74 size_t l1, l2;
75 char *news;
76 NYD_ENTER;
78 l1 = (s1 != NULL) ? strlen(s1) : 0;
79 l2 = strlen(s2);
80 news = (salloc)(l1 + (sep != '\0') + l2 +1 SALLOC_DEBUG_ARGSCALL);
81 if (l1 > 0) {
82 memcpy(news + 0, s1, l1);
83 if (sep != '\0')
84 news[l1++] = sep;
86 memcpy(news + l1, s2, l2);
87 news[l1 + l2] = '\0';
88 NYD_LEAVE;
89 return news;
93 * Support routines, auto-reclaimed storage
96 FL char *
97 (i_strdup)(char const *src SALLOC_DEBUG_ARGS)
99 size_t sz;
100 char *dest;
101 NYD_ENTER;
103 sz = strlen(src) +1;
104 dest = (salloc)(sz SALLOC_DEBUG_ARGSCALL);
105 i_strcpy(dest, src, sz);
106 NYD_LEAVE;
107 return dest;
110 FL struct str *
111 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
113 va_list vl;
114 size_t l;
115 char const *cs;
116 NYD_ENTER;
118 va_start(vl, self);
119 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
120 l += strlen(cs);
121 va_end(vl);
123 self->l = l;
124 self->s = salloc(l +1);
126 va_start(vl, self);
127 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
128 size_t i = strlen(cs);
129 memcpy(self->s + l, cs, i);
130 l += i;
132 self->s[l] = '\0';
133 va_end(vl);
134 NYD_LEAVE;
135 return self;
138 FL struct str *
139 (str_concat_cpa)(struct str *self, char const * const *cpa,
140 char const *sep_o_null SALLOC_DEBUG_ARGS)
142 size_t sonl, l;
143 char const * const *xcpa;
144 NYD_ENTER;
146 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
148 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
149 l += strlen(*xcpa) + sonl;
151 self->l = l;
152 self->s = (salloc)(l +1 SALLOC_DEBUG_ARGSCALL);
154 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
155 size_t i = strlen(*xcpa);
156 memcpy(self->s + l, *xcpa, i);
157 l += i;
158 if (sonl > 0) {
159 memcpy(self->s + l, sep_o_null, sonl);
160 l += sonl;
163 self->s[l] = '\0';
164 NYD_LEAVE;
165 return self;
169 * Routines that are not related to auto-reclaimed storage follow.
172 FL int
173 anyof(char const *s1, char const *s2)
175 NYD2_ENTER;
176 for (; *s1 != '\0'; ++s1)
177 if (strchr(s2, *s1) != NULL)
178 break;
179 NYD2_LEAVE;
180 return (*s1 != '\0');
183 FL char *
184 n_strsep(char **iolist, char sep, bool_t ignore_empty)
186 char *base, *cp;
187 NYD2_ENTER;
189 for (base = *iolist; base != NULL; base = *iolist) {
190 while (*base != '\0' && blankspacechar(*base))
191 ++base;
192 cp = strchr(base, sep);
193 if (cp != NULL)
194 *iolist = cp + 1;
195 else {
196 *iolist = NULL;
197 cp = base + strlen(base);
199 while (cp > base && blankspacechar(cp[-1]))
200 --cp;
201 *cp = '\0';
202 if (*base != '\0' || !ignore_empty)
203 break;
205 NYD2_LEAVE;
206 return base;
209 FL void
210 i_strcpy(char *dest, char const *src, size_t size)
212 NYD2_ENTER;
213 if (size > 0) {
214 for (;; ++dest, ++src)
215 if ((*dest = lowerconv(*src)) == '\0') {
216 break;
217 } else if (--size == 0) {
218 *dest = '\0';
219 break;
222 NYD2_LEAVE;
225 FL int
226 is_prefix(char const *as1, char const *as2)
228 char c;
229 NYD2_ENTER;
231 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
232 if (*as2 == '\0')
233 break;
234 NYD2_LEAVE;
235 return (c == '\0');
238 FL char *
239 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
241 char const *cp;
242 size_t i;
243 char c, *rv;
244 NYD2_ENTER;
246 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
247 if (c == '"' || c == '\\')
248 ++i;
249 rv = salloc(i +1);
251 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
252 if (c == '"' || c == '\\')
253 rv[i++] = '\\';
254 rv[i] = '\0';
255 NYD2_LEAVE;
256 return rv;
259 FL char *
260 laststring(char *linebuf, bool_t *needs_list, bool_t strip)
262 char *cp, *p, quoted;
263 NYD_ENTER;
265 /* Anything to do at all? */
266 if (*(cp = linebuf) == '\0')
267 goto jnull;
268 cp += strlen(linebuf) -1;
270 /* Strip away trailing blanks */
271 while (spacechar(*cp) && cp > linebuf)
272 --cp;
273 cp[1] = '\0';
274 if (cp == linebuf)
275 goto jleave;
277 /* Now search for the BOS of the "last string" */
278 quoted = *cp;
279 if (quoted == '\'' || quoted == '"') {
280 if (strip)
281 *cp = '\0';
282 } else
283 quoted = ' ';
285 while (cp > linebuf) {
286 --cp;
287 if (quoted != ' ') {
288 if (*cp != quoted)
289 continue;
290 } else if (!spacechar(*cp))
291 continue;
292 if (cp == linebuf || cp[-1] != '\\') {
293 /* When in whitespace mode, WS prefix doesn't belong */
294 if (quoted == ' ')
295 ++cp;
296 break;
298 /* Expand the escaped quote character */
299 for (p = --cp; (p[0] = p[1]) != '\0'; ++p)
302 if (strip && quoted != ' ' && *cp == quoted)
303 for (p = cp; (p[0] = p[1]) != '\0'; ++p)
306 /* The "last string" has been skipped over, but still, try to step backwards
307 * until we are at BOS or see whitespace, so as to make possible things like
308 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
309 while (cp > linebuf) {
310 --cp;
311 if (spacechar(*cp)) {
312 p = cp;
313 *cp++ = '\0';
314 /* We can furtherly release our callees if we now decide whether the
315 * remaining non-"last string" line content contains non-WS */
316 while (--p >= linebuf)
317 if (!spacechar(*p))
318 goto jleave;
319 linebuf = cp;
320 break;
324 jleave:
325 if (cp != NULL && *cp == '\0')
326 goto jnull;
327 *needs_list = (cp != linebuf && *linebuf != '\0');
328 j_leave:
329 NYD_LEAVE;
330 return cp;
331 jnull:
332 *needs_list = FAL0;
333 cp = NULL;
334 goto j_leave;
337 FL void
338 makelow(char *cp) /* TODO isn't that crap? --> */
340 NYD_ENTER;
341 #ifdef HAVE_C90AMEND1
342 if (mb_cur_max > 1) {
343 char *tp = cp;
344 wchar_t wc;
345 int len;
347 while (*cp != '\0') {
348 len = mbtowc(&wc, cp, mb_cur_max);
349 if (len < 0)
350 *tp++ = *cp++;
351 else {
352 wc = towlower(wc);
353 if (wctomb(tp, wc) == len)
354 tp += len, cp += len;
355 else
356 *tp++ = *cp++; /* <-- at least here */
359 } else
360 #endif
363 *cp = tolower((uc_i)*cp);
364 while (*cp++ != '\0');
366 NYD_LEAVE;
369 FL bool_t
370 substr(char const *str, char const *sub)
372 char const *cp, *backup;
373 NYD_ENTER;
375 cp = sub;
376 backup = str;
377 while (*str != '\0' && *cp != '\0') {
378 #ifdef HAVE_C90AMEND1
379 if (mb_cur_max > 1) {
380 wchar_t c, c2;
381 int sz;
383 if ((sz = mbtowc(&c, cp, mb_cur_max)) == -1)
384 goto Jsinglebyte;
385 cp += sz;
386 if ((sz = mbtowc(&c2, str, mb_cur_max)) == -1)
387 goto Jsinglebyte;
388 str += sz;
389 c = towupper(c);
390 c2 = towupper(c2);
391 if (c != c2) {
392 if ((sz = mbtowc(&c, backup, mb_cur_max)) > 0) {
393 backup += sz;
394 str = backup;
395 } else
396 str = ++backup;
397 cp = sub;
399 } else
400 Jsinglebyte:
401 #endif
403 int c, c2;
405 c = *cp++ & 0377;
406 if (islower(c))
407 c = toupper(c);
408 c2 = *str++ & 0377;
409 if (islower(c2))
410 c2 = toupper(c2);
411 if (c != c2) {
412 str = ++backup;
413 cp = sub;
417 NYD_LEAVE;
418 return (*cp == '\0');
421 FL char *
422 sstpcpy(char *dst, char const *src)
424 NYD2_ENTER;
425 while ((*dst = *src++) != '\0')
426 ++dst;
427 NYD2_LEAVE;
428 return dst;
431 FL char *
432 (sstrdup)(char const *cp SMALLOC_DEBUG_ARGS)
434 char *dp;
435 NYD2_ENTER;
437 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) SMALLOC_DEBUG_ARGSCALL);
438 NYD2_LEAVE;
439 return dp;
442 FL char *
443 (sbufdup)(char const *cp, size_t len SMALLOC_DEBUG_ARGS)
445 char *dp = NULL;
446 NYD2_ENTER;
448 dp = (smalloc)(len +1 SMALLOC_DEBUG_ARGSCALL);
449 if (cp != NULL)
450 memcpy(dp, cp, len);
451 dp[len] = '\0';
452 NYD2_LEAVE;
453 return dp;
456 FL ssize_t
457 n_strscpy(char *dst, char const *src, size_t dstsize){
458 ssize_t rv;
459 NYD2_ENTER;
461 if(LIKELY(dstsize > 0)){
462 rv = 0;
464 if((dst[rv] = src[rv]) == '\0')
465 goto jleave;
466 ++rv;
467 }while(--dstsize > 0);
468 dst[--rv] = '\0';
470 #ifdef HAVE_DEVEL
471 else
472 assert(dstsize > 0);
473 #endif
474 rv = -1;
475 jleave:
476 NYD2_LEAVE;
477 return rv;
480 FL int
481 asccasecmp(char const *s1, char const *s2)
483 int cmp;
484 NYD2_ENTER;
486 for (;;) {
487 char c1 = *s1++, c2 = *s2++;
488 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
489 break;
491 NYD2_LEAVE;
492 return cmp;
495 FL int
496 ascncasecmp(char const *s1, char const *s2, size_t sz)
498 int cmp = 0;
499 NYD2_ENTER;
501 while (sz-- > 0) {
502 char c1 = *s1++, c2 = *s2++;
503 cmp = (ui8_t)lowerconv(c1);
504 cmp -= (ui8_t)lowerconv(c2);
505 if (cmp != 0 || c1 == '\0')
506 break;
508 NYD2_LEAVE;
509 return cmp;
512 FL char const *
513 asccasestr(char const *s1, char const *s2)
515 char c2, c1;
516 NYD2_ENTER;
518 for (c2 = *s2++, c2 = lowerconv(c2);;) {
519 if ((c1 = *s1++) == '\0') {
520 s1 = NULL;
521 break;
523 if (lowerconv(c1) == c2 && is_asccaseprefix(s1, s2)) {
524 --s1;
525 break;
528 NYD2_LEAVE;
529 return s1;
532 FL bool_t
533 is_asccaseprefix(char const *as1, char const *as2)
535 bool_t rv = FAL0;
536 NYD2_ENTER;
538 for (;; ++as1, ++as2) {
539 char c1 = lowerconv(*as1), c2 = lowerconv(*as2);
541 if ((rv = (c2 == '\0')))
542 break;
543 if (c1 != c2)
544 break;
546 NYD2_LEAVE;
547 return rv;
550 FL struct str *
551 (n_str_assign_buf)(struct str *self, char const *buf, uiz_t buflen
552 SMALLOC_DEBUG_ARGS){
553 NYD_ENTER;
554 if(buflen == UIZ_MAX)
555 buflen = (buf == NULL) ? 0 : strlen(buf);
557 assert(buflen == 0 || buf != NULL);
559 if(LIKELY(buflen > 0)){
560 self->s = (srealloc)(self->s, (self->l = buflen) +1
561 SMALLOC_DEBUG_ARGSCALL);
562 memcpy(self->s, buf, buflen);
563 self->s[buflen] = '\0';
564 }else
565 self->l = 0;
566 NYD_LEAVE;
567 return self;
570 FL struct str *
571 (n_str_add_buf)(struct str *self, char const *buf, uiz_t buflen
572 SMALLOC_DEBUG_ARGS){
573 NYD_ENTER;
574 if(buflen == UIZ_MAX)
575 buflen = (buf == NULL) ? 0 : strlen(buf);
577 assert(buflen == 0 || buf != NULL);
579 if(buflen > 0) {
580 size_t osl = self->l, nsl = osl + buflen;
582 self->s = (srealloc)(self->s, (self->l = nsl) +1 SMALLOC_DEBUG_ARGSCALL);
583 memcpy(self->s + osl, buf, buflen);
584 self->s[nsl] = '\0';
586 NYD_LEAVE;
587 return self;
591 * struct n_string TODO extend, optimize
594 FL struct n_string *
595 (n_string_clear)(struct n_string *self SMALLOC_DEBUG_ARGS){
596 NYD_ENTER;
598 assert(self != NULL);
600 if(self->s_size != 0){
601 if(!self->s_auto){
602 #ifdef HAVE_MEMORY_DEBUG
603 sfree(self->s_dat SMALLOC_DEBUG_ARGSCALL);
604 #else
605 free(self->s_dat);
606 #endif
608 self->s_len = self->s_auto = self->s_size = 0;
609 self->s_dat = NULL;
611 NYD_LEAVE;
612 return self;
615 FL struct n_string *
616 (n_string_reserve)(struct n_string *self, size_t noof SMALLOC_DEBUG_ARGS){
617 ui32_t i, l, s;
618 NYD_ENTER;
620 assert(self != NULL);
622 s = self->s_size;
623 l = self->s_len;
624 #if 0 /* FIXME memory alloc too large */
625 if(SI32_MAX - n_ALIGN(1) - l <= noof)
626 n_panic(_("Memory allocation too large"));
627 #endif
629 if((i = s - l) <= noof){
630 i += 1 + l + (ui32_t)noof;
631 i = n_ALIGN(i);
632 self->s_size = i -1;
634 if(!self->s_auto)
635 self->s_dat = (srealloc)(self->s_dat, i SMALLOC_DEBUG_ARGSCALL);
636 else{
637 char *ndat = (salloc)(i SALLOC_DEBUG_ARGSCALL);
639 if(l > 0)
640 memcpy(ndat, self->s_dat, l);
641 self->s_dat = ndat;
644 NYD_LEAVE;
645 return self;
648 FL struct n_string *
649 (n_string_resize)(struct n_string *self, size_t nlen SMALLOC_DEBUG_ARGS){
650 NYD_ENTER;
652 assert(self != NULL);
653 #if 0 /* FIXME memory alloc too large */
654 if(SI32_MAX - n_ALIGN(1) - l <= noof)
655 n_panic(_("Memory allocation too large"));
656 #endif
658 if(self->s_len < nlen)
659 self = (n_string_reserve)(self, nlen SMALLOC_DEBUG_ARGSCALL);
660 self->s_len = (ui32_t)nlen;
661 NYD_LEAVE;
662 return self;
665 FL struct n_string *
666 (n_string_push_buf)(struct n_string *self, char const *buf, size_t buflen
667 SMALLOC_DEBUG_ARGS){
668 NYD_ENTER;
670 assert(self != NULL);
671 assert(buflen == 0 || buf != NULL);
673 if(buflen == UIZ_MAX)
674 buflen = (buf == NULL) ? 0 : strlen(buf);
676 if(buflen > 0){
677 ui32_t i;
679 self = (n_string_reserve)(self, buflen SMALLOC_DEBUG_ARGSCALL);
680 memcpy(self->s_dat + (i = self->s_len), buf, buflen);
681 self->s_len = (i += (ui32_t)buflen);
683 NYD_LEAVE;
684 return self;
687 FL struct n_string *
688 (n_string_push_c)(struct n_string *self, char c SMALLOC_DEBUG_ARGS){
689 NYD_ENTER;
691 assert(self != NULL);
693 if(self->s_len + 1 >= self->s_size)
694 self = (n_string_reserve)(self, 1 SMALLOC_DEBUG_ARGSCALL);
695 self->s_dat[self->s_len++] = c;
696 NYD_LEAVE;
697 return self;
700 FL struct n_string *
701 (n_string_unshift_buf)(struct n_string *self, char const *buf, size_t buflen
702 SMALLOC_DEBUG_ARGS){
703 NYD_ENTER;
705 assert(self != NULL);
706 assert(buflen == 0 || buf != NULL);
708 if(buflen == UIZ_MAX)
709 buflen = (buf == NULL) ? 0 : strlen(buf);
711 if(buflen > 0){
712 self = (n_string_reserve)(self, buflen SMALLOC_DEBUG_ARGSCALL);
713 if(self->s_len > 0)
714 memmove(self->s_dat + buflen, self->s_dat, self->s_len);
715 memcpy(self->s_dat, buf, buflen);
716 self->s_len += (ui32_t)buflen;
718 NYD_LEAVE;
719 return self;
722 FL struct n_string *
723 (n_string_unshift_c)(struct n_string *self, char c SMALLOC_DEBUG_ARGS){
724 NYD_ENTER;
726 assert(self != NULL);
728 if(self->s_len + 1 >= self->s_size)
729 self = (n_string_reserve)(self, 1 SMALLOC_DEBUG_ARGSCALL);
730 if(self->s_len > 0)
731 memmove(self->s_dat + 1, self->s_dat, self->s_len);
732 self->s_dat[0] = c;
733 ++self->s_len;
734 NYD_LEAVE;
735 return self;
738 FL char *
739 (n_string_cp)(struct n_string *self SMALLOC_DEBUG_ARGS){
740 char *rv;
741 NYD2_ENTER;
743 assert(self != NULL);
745 if(self->s_size == 0)
746 self = (n_string_reserve)(self, 1 SMALLOC_DEBUG_ARGSCALL);
748 (rv = self->s_dat)[self->s_len] = '\0';
749 NYD2_LEAVE;
750 return rv;
753 FL char const *
754 n_string_cp_const(struct n_string const *self){
755 char const *rv;
756 NYD2_ENTER;
758 assert(self != NULL);
760 if(self->s_size != 0){
761 ((struct n_string*)UNCONST(self))->s_dat[self->s_len] = '\0';
762 rv = self->s_dat;
763 }else
764 rv = "";
765 NYD2_LEAVE;
766 return rv;
770 * UTF-8
773 FL ui32_t
774 n_utf8_to_utf32(char const **bdat, size_t *blen) /* TODO check false UTF8 */
776 char const *cp;
777 size_t l;
778 ui32_t c, x;
779 NYD2_ENTER;
781 cp = *bdat;
782 l = *blen - 1;
783 x = (ui8_t)*cp++;
785 if (x <= 0x7Fu)
786 c = x;
787 else {
788 if ((x & 0xE0u) == 0xC0u) {
789 if (l < 1)
790 goto jerr;
791 l -= 1;
792 c = x & ~0xC0u;
793 } else if ((x & 0xF0u) == 0xE0u) {
794 if (l < 2)
795 goto jerr;
796 l -= 2;
797 c = x & ~0xE0u;
798 c <<= 6;
799 x = (ui8_t)*cp++;
800 c |= x & 0x7Fu;
801 } else {
802 if (l < 3)
803 goto jerr;
804 l -= 3;
805 c = x & ~0xF0u;
806 c <<= 6;
807 x = (ui8_t)*cp++;
808 c |= x & 0x7Fu;
809 c <<= 6;
810 x = (ui8_t)*cp++;
811 c |= x & 0x7Fu;
813 c <<= 6;
814 x = (ui8_t)*cp++;
815 c |= x & 0x7Fu;
818 jleave:
819 *bdat = cp;
820 *blen = l;
821 NYD2_LEAVE;
822 return c;
823 jerr:
824 c = UI32_MAX;
825 goto jleave;
828 FL size_t
829 n_utf32_to_utf8(ui32_t c, char *buf)
831 struct {
832 ui32_t lower_bound;
833 ui32_t upper_bound;
834 ui8_t enc_leader;
835 ui8_t enc_lval;
836 ui8_t dec_leader_mask;
837 ui8_t dec_leader_val_mask;
838 ui8_t dec_bytes_togo;
839 ui8_t cat_index;
840 ui8_t __dummy[2];
841 } const _cat[] = {
842 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
843 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
844 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
845 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
846 /* xxx _from_utf32() simply assumes magic code points for surrogates!
847 * xxx (However, should we ever get yet another surrogate range we
848 * xxx need to deal with that all over the place anyway? */
849 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
850 {0x00010000, 0x0010FFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
851 }, *catp = _cat;
852 size_t l;
854 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
855 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
856 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
857 if (c <= _cat[3].upper_bound) {
858 /* Surrogates may not be converted (Compatibility rule C10) */
859 if (c >= 0xD800u && c <= 0xDFFFu)
860 goto jerr;
861 catp += 3;
862 goto j3;
864 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
865 jerr:
866 c = 0xFFFDu; /* Unicode replacement character */
867 catp += 3;
868 goto j3;
870 buf[3] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
872 buf[2] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
874 buf[1] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
876 buf[0] = (char)catp->enc_leader | (char)(c);
878 buf[catp->enc_lval] = '\0';
879 l = catp->enc_lval;
880 NYD2_LEAVE;
881 return l;
885 * Our iconv(3) wrapper
887 #ifdef HAVE_ICONV
889 static void _ic_toupper(char *dest, char const *src);
890 static void _ic_stripdash(char *p);
892 static void
893 _ic_toupper(char *dest, char const *src)
895 NYD2_ENTER;
897 *dest++ = upperconv(*src);
898 while (*src++ != '\0');
899 NYD2_LEAVE;
902 static void
903 _ic_stripdash(char *p)
905 char *q = p;
906 NYD2_ENTER;
909 if (*(q = p) != '-')
910 ++q;
911 while (*p++ != '\0');
912 NYD2_LEAVE;
915 FL iconv_t
916 n_iconv_open(char const *tocode, char const *fromcode)
918 iconv_t id;
919 char *t, *f;
920 NYD_ENTER;
922 if ((!asccasecmp(fromcode, "unknown-8bit") ||
923 !asccasecmp(fromcode, "binary")) &&
924 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
925 fromcode = charset_get_8bit();
927 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
928 goto jleave;
930 /* Remove the "iso-" prefixes for Solaris */
931 if (!ascncasecmp(tocode, "iso-", 4))
932 tocode += 4;
933 else if (!ascncasecmp(tocode, "iso", 3))
934 tocode += 3;
935 if (!ascncasecmp(fromcode, "iso-", 4))
936 fromcode += 4;
937 else if (!ascncasecmp(fromcode, "iso", 3))
938 fromcode += 3;
939 if (*tocode == '\0' || *fromcode == '\0') {
940 id = (iconv_t)-1;
941 goto jleave;
943 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
944 goto jleave;
946 /* Solaris prefers upper-case charset names. Don't ask... */
947 t = salloc(strlen(tocode) +1);
948 _ic_toupper(t, tocode);
949 f = salloc(strlen(fromcode) +1);
950 _ic_toupper(f, fromcode);
951 if ((id = iconv_open(t, f)) != (iconv_t)-1)
952 goto jleave;
954 /* Strip dashes for UnixWare */
955 _ic_stripdash(t);
956 _ic_stripdash(f);
957 if ((id = iconv_open(t, f)) != (iconv_t)-1)
958 goto jleave;
960 /* Add your vendor's sillynesses here */
962 /* If the encoding names are equal at this point, they are just not
963 * understood by iconv(), and we cannot sensibly use it in any way. We do
964 * not perform this as an optimization above since iconv() can otherwise be
965 * used to check the validity of the input even with identical encoding
966 * names */
967 if (!strcmp(t, f))
968 errno = 0;
969 jleave:
970 NYD_LEAVE;
971 return id;
974 FL void
975 n_iconv_close(iconv_t cd)
977 NYD_ENTER;
978 iconv_close(cd);
979 if (cd == iconvd)
980 iconvd = (iconv_t)-1;
981 NYD_LEAVE;
984 FL void
985 n_iconv_reset(iconv_t cd)
987 NYD_ENTER;
988 iconv(cd, NULL, NULL, NULL, NULL);
989 NYD_LEAVE;
992 /* (2012-09-24: export and use it exclusively to isolate prototype problems
993 * (*inb* is 'char const **' except in POSIX) in a single place.
994 * GNU libiconv even allows for configuration time const/non-const..
995 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
996 * support compiler invocations which bail on error, so no -Werror */
997 /* Citrus project? */
998 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
999 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1000 # if OS_DRAGONFLY
1001 # define __INBCAST(S) (char ** __restrict__)UNCONST(S)
1002 # else
1003 # define __INBCAST(S) (char const **)UNCONST(S)
1004 # endif
1005 # elif OS_SUNOS || OS_SOLARIS
1006 # define __INBCAST(S) (char const ** __restrict__)UNCONST(S)
1007 # endif
1008 # ifndef __INBCAST
1009 # define __INBCAST(S) (char **)UNCONST(S)
1010 # endif
1012 FL int
1013 n_iconv_buf(iconv_t cd, enum n_iconv_flags icf,
1014 char const **inb, size_t *inbleft, char **outb, size_t *outbleft){
1015 int err;
1016 NYD2_ENTER;
1018 for(;;){
1019 size_t sz;
1021 sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft);
1022 if(sz > 0 && !(icf & n_ICONV_IGN_NOREVERSE)){
1023 err = ENOENT;
1024 goto jleave;
1026 if(sz != (size_t)-1)
1027 break;
1029 err = errno;
1030 if(!(icf & n_ICONV_IGN_ILSEQ) || err != EILSEQ)
1031 goto jleave;
1032 if(*inbleft > 0){
1033 ++(*inb);
1034 --(*inbleft);
1035 if(*outbleft > 0/* TODO unicode replacement 0xFFFD */){
1036 *(*outb)++ = '?';
1037 --*outbleft;
1038 }else{
1039 err = E2BIG;
1040 goto jleave;
1042 }else if(*outbleft > 0){
1043 **outb = '\0';
1044 goto jleave;
1047 err = 0;
1048 jleave:
1049 NYD2_LEAVE;
1050 return err;
1052 # undef __INBCAST
1054 FL int
1055 n_iconv_str(iconv_t cd, enum n_iconv_flags icf,
1056 struct str *out, struct str const *in, struct str *in_rest_or_null)
1058 int err;
1059 char *obb, *ob;
1060 char const *ib;
1061 size_t olb, ol, il;
1062 NYD2_ENTER;
1064 err = 0;
1065 obb = out->s;
1066 olb = out->l;
1067 ol = in->l;
1069 ol = (ol << 1) - (ol >> 4);
1070 if (olb <= ol) {
1071 olb = ol;
1072 goto jrealloc;
1075 for (;;) {
1076 ib = in->s;
1077 il = in->l;
1078 ob = obb;
1079 ol = olb;
1080 if((err = n_iconv_buf(cd, icf, &ib, &il, &ob, &ol)) == 0 || err != E2BIG)
1081 break;
1082 err = 0;
1083 olb += in->l;
1084 jrealloc:
1085 obb = srealloc(obb, olb +1);
1088 if (in_rest_or_null != NULL) {
1089 in_rest_or_null->s = UNCONST(ib);
1090 in_rest_or_null->l = il;
1092 out->s = obb;
1093 out->s[out->l = olb - ol] = '\0';
1094 NYD2_LEAVE;
1095 return err;
1098 FL char *
1099 n_iconv_onetime_cp(enum n_iconv_flags icf,
1100 char const *tocode, char const *fromcode, char const *input){
1101 struct str out, in;
1102 iconv_t icd;
1103 char *rv;
1104 NYD2_ENTER;
1106 rv = NULL;
1107 if(tocode == NULL)
1108 tocode = charset_get_lc();
1109 if(fromcode == NULL)
1110 fromcode = "utf-8";
1112 if((icd = iconv_open(tocode, fromcode)) == (iconv_t)-1)
1113 goto jleave;
1115 in.l = strlen(in.s = UNCONST(input)); /* logical */
1116 out.s = NULL, out.l = 0;
1117 if(!n_iconv_str(icd, icf, &out, &in, NULL))
1118 rv = savestrbuf(out.s, out.l);
1119 if(out.s != NULL)
1120 free(out.s);
1122 iconv_close(icd);
1123 jleave:
1124 NYD2_LEAVE;
1125 return rv;
1127 #endif /* HAVE_ICONV */
1129 /* s-it-mode */