Tweak VERBOSE handling (William Yodlowsky)..
[s-mailx.git] / strings.c
blob37349464cc444b422f205b48aacc30da1e665016
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ String support routines.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
6 */
7 /*
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 #undef n_FILE
36 #define n_FILE strings
38 #ifndef HAVE_AMALGAMATION
39 # include "nail.h"
40 #endif
42 #include <ctype.h>
44 FL char *
45 (savestr)(char const *str SALLOC_DEBUG_ARGS)
47 size_t size;
48 char *news;
49 NYD_ENTER;
51 size = strlen(str) +1;
52 news = (salloc)(size SALLOC_DEBUG_ARGSCALL);
53 memcpy(news, str, size);
54 NYD_LEAVE;
55 return news;
58 FL char *
59 (savestrbuf)(char const *sbuf, size_t sbuf_len SALLOC_DEBUG_ARGS)
61 char *news;
62 NYD_ENTER;
64 news = (salloc)(sbuf_len +1 SALLOC_DEBUG_ARGSCALL);
65 memcpy(news, sbuf, sbuf_len);
66 news[sbuf_len] = 0;
67 NYD_LEAVE;
68 return news;
71 FL char *
72 (savecatsep)(char const *s1, char sep, char const *s2 SALLOC_DEBUG_ARGS)
74 size_t l1, l2;
75 char *news;
76 NYD_ENTER;
78 l1 = (s1 != NULL) ? strlen(s1) : 0;
79 l2 = strlen(s2);
80 news = (salloc)(l1 + (sep != '\0') + l2 +1 SALLOC_DEBUG_ARGSCALL);
81 if (l1 > 0) {
82 memcpy(news + 0, s1, l1);
83 if (sep != '\0')
84 news[l1++] = sep;
86 memcpy(news + l1, s2, l2);
87 news[l1 + l2] = '\0';
88 NYD_LEAVE;
89 return news;
93 * Support routines, auto-reclaimed storage
96 FL char *
97 (i_strdup)(char const *src SALLOC_DEBUG_ARGS)
99 size_t sz;
100 char *dest;
101 NYD_ENTER;
103 sz = strlen(src) +1;
104 dest = (salloc)(sz SALLOC_DEBUG_ARGSCALL);
105 i_strcpy(dest, src, sz);
106 NYD_LEAVE;
107 return dest;
110 FL struct str *
111 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
113 va_list vl;
114 size_t l;
115 char const *cs;
116 NYD_ENTER;
118 va_start(vl, self);
119 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
120 l += strlen(cs);
121 va_end(vl);
123 self->l = l;
124 self->s = salloc(l +1);
126 va_start(vl, self);
127 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
128 size_t i = strlen(cs);
129 memcpy(self->s + l, cs, i);
130 l += i;
132 self->s[l] = '\0';
133 va_end(vl);
134 NYD_LEAVE;
135 return self;
138 FL struct str *
139 (str_concat_cpa)(struct str *self, char const * const *cpa,
140 char const *sep_o_null SALLOC_DEBUG_ARGS)
142 size_t sonl, l;
143 char const * const *xcpa;
144 NYD_ENTER;
146 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
148 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
149 l += strlen(*xcpa) + sonl;
151 self->l = l;
152 self->s = (salloc)(l +1 SALLOC_DEBUG_ARGSCALL);
154 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
155 size_t i = strlen(*xcpa);
156 memcpy(self->s + l, *xcpa, i);
157 l += i;
158 if (sonl > 0) {
159 memcpy(self->s + l, sep_o_null, sonl);
160 l += sonl;
163 self->s[l] = '\0';
164 NYD_LEAVE;
165 return self;
169 * Routines that are not related to auto-reclaimed storage follow.
172 FL int
173 anyof(char const *s1, char const *s2)
175 NYD2_ENTER;
176 for (; *s1 != '\0'; ++s1)
177 if (strchr(s2, *s1) != NULL)
178 break;
179 NYD2_LEAVE;
180 return (*s1 != '\0');
183 FL char *
184 n_strsep(char **iolist, char sep, bool_t ignore_empty)
186 char *base, *cp;
187 NYD2_ENTER;
189 for (base = *iolist; base != NULL; base = *iolist) {
190 while (*base != '\0' && blankspacechar(*base))
191 ++base;
192 cp = strchr(base, sep);
193 if (cp != NULL)
194 *iolist = cp + 1;
195 else {
196 *iolist = NULL;
197 cp = base + strlen(base);
199 while (cp > base && blankspacechar(cp[-1]))
200 --cp;
201 *cp = '\0';
202 if (*base != '\0' || !ignore_empty)
203 break;
205 NYD2_LEAVE;
206 return base;
209 FL void
210 i_strcpy(char *dest, char const *src, size_t size)
212 NYD2_ENTER;
213 if (size > 0) {
214 for (;; ++dest, ++src)
215 if ((*dest = lowerconv(*src)) == '\0') {
216 break;
217 } else if (--size == 0) {
218 *dest = '\0';
219 break;
222 NYD2_LEAVE;
225 FL int
226 is_prefix(char const *as1, char const *as2)
228 char c;
229 NYD2_ENTER;
231 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
232 if (*as2 == '\0')
233 break;
234 NYD2_LEAVE;
235 return (c == '\0');
238 FL char *
239 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
241 char const *cp;
242 size_t i;
243 char c, *rv;
244 NYD2_ENTER;
246 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
247 if (c == '"' || c == '\\')
248 ++i;
249 rv = salloc(i +1);
251 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
252 if (c == '"' || c == '\\')
253 rv[i++] = '\\';
254 rv[i] = '\0';
255 NYD2_LEAVE;
256 return rv;
259 FL char *
260 laststring(char *linebuf, bool_t *needs_list, bool_t strip)
262 char *cp, *p, quoted;
263 NYD_ENTER;
265 /* Anything to do at all? */
266 if (*(cp = linebuf) == '\0')
267 goto jnull;
268 cp += strlen(linebuf) -1;
270 /* Strip away trailing blanks */
271 while (spacechar(*cp) && cp > linebuf)
272 --cp;
273 cp[1] = '\0';
274 if (cp == linebuf)
275 goto jleave;
277 /* Now search for the BOS of the "last string" */
278 quoted = *cp;
279 if (quoted == '\'' || quoted == '"') {
280 if (strip)
281 *cp = '\0';
282 } else
283 quoted = ' ';
285 while (cp > linebuf) {
286 --cp;
287 if (quoted != ' ') {
288 if (*cp != quoted)
289 continue;
290 } else if (!spacechar(*cp))
291 continue;
292 if (cp == linebuf || cp[-1] != '\\') {
293 /* When in whitespace mode, WS prefix doesn't belong */
294 if (quoted == ' ')
295 ++cp;
296 break;
298 /* Expand the escaped quote character */
299 for (p = --cp; (p[0] = p[1]) != '\0'; ++p)
302 if (strip && quoted != ' ' && *cp == quoted)
303 for (p = cp; (p[0] = p[1]) != '\0'; ++p)
306 /* The "last string" has been skipped over, but still, try to step backwards
307 * until we are at BOS or see whitespace, so as to make possible things like
308 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
309 while (cp > linebuf) {
310 --cp;
311 if (spacechar(*cp)) {
312 p = cp;
313 *cp++ = '\0';
314 /* We can furtherly release our callees if we now decide wether the
315 * remaining non-"last string" line content contains non-WS */
316 while (--p >= linebuf)
317 if (!spacechar(*p))
318 goto jleave;
319 linebuf = cp;
320 break;
324 jleave:
325 if (cp != NULL && *cp == '\0')
326 goto jnull;
327 *needs_list = (cp != linebuf && *linebuf != '\0');
328 j_leave:
329 NYD_LEAVE;
330 return cp;
331 jnull:
332 *needs_list = FAL0;
333 cp = NULL;
334 goto j_leave;
337 FL void
338 makelow(char *cp) /* TODO isn't that crap? --> */
340 NYD_ENTER;
341 #ifdef HAVE_C90AMEND1
342 if (mb_cur_max > 1) {
343 char *tp = cp;
344 wchar_t wc;
345 int len;
347 while (*cp != '\0') {
348 len = mbtowc(&wc, cp, mb_cur_max);
349 if (len < 0)
350 *tp++ = *cp++;
351 else {
352 wc = towlower(wc);
353 if (wctomb(tp, wc) == len)
354 tp += len, cp += len;
355 else
356 *tp++ = *cp++; /* <-- at least here */
359 } else
360 #endif
363 *cp = tolower((uc_i)*cp);
364 while (*cp++ != '\0');
366 NYD_LEAVE;
369 FL bool_t
370 substr(char const *str, char const *sub)
372 char const *cp, *backup;
373 NYD_ENTER;
375 cp = sub;
376 backup = str;
377 while (*str != '\0' && *cp != '\0') {
378 #ifdef HAVE_C90AMEND1
379 if (mb_cur_max > 1) {
380 wchar_t c, c2;
381 int sz;
383 if ((sz = mbtowc(&c, cp, mb_cur_max)) == -1)
384 goto Jsinglebyte;
385 cp += sz;
386 if ((sz = mbtowc(&c2, str, mb_cur_max)) == -1)
387 goto Jsinglebyte;
388 str += sz;
389 c = towupper(c);
390 c2 = towupper(c2);
391 if (c != c2) {
392 if ((sz = mbtowc(&c, backup, mb_cur_max)) > 0) {
393 backup += sz;
394 str = backup;
395 } else
396 str = ++backup;
397 cp = sub;
399 } else
400 Jsinglebyte:
401 #endif
403 int c, c2;
405 c = *cp++ & 0377;
406 if (islower(c))
407 c = toupper(c);
408 c2 = *str++ & 0377;
409 if (islower(c2))
410 c2 = toupper(c2);
411 if (c != c2) {
412 str = ++backup;
413 cp = sub;
417 NYD_LEAVE;
418 return (*cp == '\0');
421 FL char *
422 sstpcpy(char *dst, char const *src)
424 NYD2_ENTER;
425 while ((*dst = *src++) != '\0')
426 ++dst;
427 NYD2_LEAVE;
428 return dst;
431 FL char *
432 (sstrdup)(char const *cp SMALLOC_DEBUG_ARGS)
434 char *dp;
435 NYD2_ENTER;
437 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) SMALLOC_DEBUG_ARGSCALL);
438 NYD2_LEAVE;
439 return dp;
442 FL char *
443 (sbufdup)(char const *cp, size_t len SMALLOC_DEBUG_ARGS)
445 char *dp = NULL;
446 NYD2_ENTER;
448 dp = (smalloc)(len +1 SMALLOC_DEBUG_ARGSCALL);
449 if (cp != NULL)
450 memcpy(dp, cp, len);
451 dp[len] = '\0';
452 NYD2_LEAVE;
453 return dp;
456 FL ssize_t
457 n_strscpy(char *dst, char const *src, size_t dstsize){
458 ssize_t rv;
459 NYD2_ENTER;
461 if(LIKELY(dstsize > 0)){
462 rv = 0;
464 if((dst[rv] = src[rv]) == '\0')
465 goto jleave;
466 ++rv;
467 }while(--dstsize > 0);
468 dst[--rv] = '\0';
470 #ifdef HAVE_DEVEL
471 else
472 assert(dstsize > 0);
473 #endif
474 rv = -1;
475 jleave:
476 NYD2_LEAVE;
477 return rv;
480 FL int
481 asccasecmp(char const *s1, char const *s2)
483 int cmp;
484 NYD2_ENTER;
486 for (;;) {
487 char c1 = *s1++, c2 = *s2++;
488 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
489 break;
491 NYD2_LEAVE;
492 return cmp;
495 FL int
496 ascncasecmp(char const *s1, char const *s2, size_t sz)
498 int cmp = 0;
499 NYD2_ENTER;
501 while (sz-- > 0) {
502 char c1 = *s1++, c2 = *s2++;
503 cmp = (ui8_t)lowerconv(c1);
504 cmp -= (ui8_t)lowerconv(c2);
505 if (cmp != 0 || c1 == '\0')
506 break;
508 NYD2_LEAVE;
509 return cmp;
512 FL char const *
513 asccasestr(char const *s1, char const *s2)
515 char c2, c1;
516 NYD2_ENTER;
518 for (c2 = *s2++, c2 = lowerconv(c2);;) {
519 if ((c1 = *s1++) == '\0') {
520 s1 = NULL;
521 break;
523 if (lowerconv(c1) == c2 && is_asccaseprefix(s1, s2)) {
524 --s1;
525 break;
528 NYD2_LEAVE;
529 return s1;
532 FL bool_t
533 is_asccaseprefix(char const *as1, char const *as2)
535 bool_t rv = FAL0;
536 NYD2_ENTER;
538 for (;; ++as1, ++as2) {
539 char c1 = lowerconv(*as1), c2 = lowerconv(*as2);
541 if ((rv = (c2 == '\0')))
542 break;
543 if (c1 != c2)
544 break;
546 NYD2_LEAVE;
547 return rv;
550 FL struct str *
551 (n_str_assign_buf)(struct str *self, char const *buf, uiz_t buflen
552 SMALLOC_DEBUG_ARGS){
553 NYD_ENTER;
554 if(buflen == UIZ_MAX)
555 buflen = (buf == NULL) ? 0 : strlen(buf);
557 assert(buflen == 0 || buf != NULL);
559 if(LIKELY(buflen > 0)){
560 self->s = (srealloc)(self->s, (self->l = buflen) +1
561 SMALLOC_DEBUG_ARGSCALL);
562 memcpy(self->s, buf, buflen);
563 self->s[buflen] = '\0';
564 }else
565 self->l = 0;
566 NYD_LEAVE;
567 return self;
570 FL struct str *
571 (n_str_add_buf)(struct str *self, char const *buf, uiz_t buflen
572 SMALLOC_DEBUG_ARGS){
573 NYD_ENTER;
574 if(buflen == UIZ_MAX)
575 buflen = (buf == NULL) ? 0 : strlen(buf);
577 assert(buflen == 0 || buf != NULL);
579 if(buflen > 0) {
580 size_t osl = self->l, nsl = osl + buflen;
582 self->s = (srealloc)(self->s, (self->l = nsl) +1 SMALLOC_DEBUG_ARGSCALL);
583 memcpy(self->s + osl, buf, buflen);
584 self->s[nsl] = '\0';
586 NYD_LEAVE;
587 return self;
591 * struct n_string TODO extend, optimize
594 FL struct n_string *
595 (n_string_clear)(struct n_string *self SMALLOC_DEBUG_ARGS){
596 NYD_ENTER;
598 assert(self != NULL);
600 if(self->s_size != 0){
601 if(!self->s_auto){
602 #ifdef HAVE_MEMORY_DEBUG
603 sfree(self->s_dat SMALLOC_DEBUG_ARGSCALL);
604 #else
605 free(self->s_dat);
606 #endif
608 self->s_len = self->s_auto = self->s_size = 0;
609 self->s_dat = NULL;
611 NYD_LEAVE;
612 return self;
615 FL struct n_string *
616 (n_string_reserve)(struct n_string *self, size_t noof SMALLOC_DEBUG_ARGS){
617 ui32_t i, l, s;
618 NYD_ENTER;
620 assert(self != NULL);
622 s = self->s_size;
623 l = self->s_len;
624 #if 0 /* FIXME memory alloc too large */
625 if(SI32_MAX - n_ALIGN(1) - l <= noof)
626 n_panic(_("Memory allocation too large"));
627 #endif
629 if((i = s - l) <= noof){
630 i += 1 + l + (ui32_t)noof;
631 i = n_ALIGN(i);
632 self->s_size = i -1;
634 if(!self->s_auto)
635 self->s_dat = (srealloc)(self->s_dat, i SMALLOC_DEBUG_ARGSCALL);
636 else{
637 char *ndat = (salloc)(i SALLOC_DEBUG_ARGSCALL);
639 if(l > 0)
640 memcpy(ndat, self->s_dat, l);
641 self->s_dat = ndat;
644 NYD_LEAVE;
645 return self;
648 FL struct n_string *
649 (n_string_push_buf)(struct n_string *self, char const *buf, size_t buflen
650 SMALLOC_DEBUG_ARGS){
651 NYD_ENTER;
653 assert(self != NULL);
654 assert(buflen == 0 || buf != NULL);
656 if(buflen == UIZ_MAX)
657 buflen = (buf == NULL) ? 0 : strlen(buf);
659 if(buflen > 0){
660 ui32_t i;
662 self = (n_string_reserve)(self, buflen SMALLOC_DEBUG_ARGSCALL);
663 memcpy(self->s_dat + (i = self->s_len), buf, buflen);
664 self->s_len = (i += (ui32_t)buflen);
666 NYD_LEAVE;
667 return self;
670 FL struct n_string *
671 (n_string_push_c)(struct n_string *self, char c SMALLOC_DEBUG_ARGS){
672 NYD_ENTER;
674 assert(self != NULL);
676 if(self->s_len + 1 >= self->s_size)
677 self = (n_string_reserve)(self, 1 SMALLOC_DEBUG_ARGSCALL);
678 self->s_dat[self->s_len++] = c;
679 NYD_LEAVE;
680 return self;
683 FL struct n_string *
684 (n_string_unshift_buf)(struct n_string *self, char const *buf, size_t buflen
685 SMALLOC_DEBUG_ARGS){
686 NYD_ENTER;
688 assert(self != NULL);
689 assert(buflen == 0 || buf != NULL);
691 if(buflen == UIZ_MAX)
692 buflen = (buf == NULL) ? 0 : strlen(buf);
694 if(buflen > 0){
695 self = (n_string_reserve)(self, buflen SMALLOC_DEBUG_ARGSCALL);
696 if(self->s_len > 0)
697 memmove(self->s_dat + buflen, self->s_dat, self->s_len);
698 memcpy(self->s_dat, buf, buflen);
699 self->s_len += (ui32_t)buflen;
701 NYD_LEAVE;
702 return self;
705 FL struct n_string *
706 (n_string_unshift_c)(struct n_string *self, char c SMALLOC_DEBUG_ARGS){
707 NYD_ENTER;
709 assert(self != NULL);
711 if(self->s_len + 1 >= self->s_size)
712 self = (n_string_reserve)(self, 1 SMALLOC_DEBUG_ARGSCALL);
713 if(self->s_len > 0)
714 memmove(self->s_dat + 1, self->s_dat, self->s_len);
715 self->s_dat[0] = c;
716 ++self->s_len;
717 NYD_LEAVE;
718 return self;
721 FL char *
722 (n_string_cp)(struct n_string *self SMALLOC_DEBUG_ARGS){
723 char *rv;
724 NYD2_ENTER;
726 assert(self != NULL);
728 if(self->s_size == 0)
729 self = (n_string_reserve)(self, 1 SMALLOC_DEBUG_ARGSCALL);
731 (rv = self->s_dat)[self->s_len] = '\0';
732 NYD2_LEAVE;
733 return rv;
736 FL char const *
737 n_string_cp_const(struct n_string const *self){
738 char const *rv;
739 NYD2_ENTER;
741 assert(self != NULL);
743 if(self->s_size != 0){
744 ((struct n_string*)UNCONST(self))->s_dat[self->s_len] = '\0';
745 rv = self->s_dat;
746 }else
747 rv = "";
748 NYD2_LEAVE;
749 return rv;
753 * UTF-8
756 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
757 FL ui32_t
758 n_utf8_to_utf32(char const **bdat, size_t *blen) /* TODO check false UTF8 */
760 char const *cp;
761 size_t l;
762 ui32_t c, x;
763 NYD2_ENTER;
765 cp = *bdat;
766 l = *blen - 1;
767 x = (ui8_t)*cp++;
769 if (x <= 0x7F)
770 c = x;
771 else {
772 if ((x & 0xE0) == 0xC0) {
773 if (l < 1)
774 goto jerr;
775 l -= 1;
776 c = x & ~0xC0;
777 } else if ((x & 0xF0) == 0xE0) {
778 if (l < 2)
779 goto jerr;
780 l -= 2;
781 c = x & ~0xE0;
782 c <<= 6;
783 x = (ui8_t)*cp++;
784 c |= x & 0x7F;
785 } else {
786 if (l < 3)
787 goto jerr;
788 l -= 3;
789 c = x & ~0xF0;
790 c <<= 6;
791 x = (ui8_t)*cp++;
792 c |= x & 0x7F;
793 c <<= 6;
794 x = (ui8_t)*cp++;
795 c |= x & 0x7F;
797 c <<= 6;
798 x = (ui8_t)*cp++;
799 c |= x & 0x7F;
802 jleave:
803 *bdat = cp;
804 *blen = l;
805 NYD2_LEAVE;
806 return c;
807 jerr:
808 c = UI32_MAX;
809 goto jleave;
812 FL size_t
813 n_utf32_to_utf8(ui32_t c, char *buf)
815 struct {
816 ui32_t lower_bound;
817 ui32_t upper_bound;
818 ui8_t enc_leader;
819 ui8_t enc_lval;
820 ui8_t dec_leader_mask;
821 ui8_t dec_leader_val_mask;
822 ui8_t dec_bytes_togo;
823 ui8_t cat_index;
824 ui8_t __dummy[2];
825 } const _cat[] = {
826 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
827 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
828 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
829 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
830 /* xxx _from_utf32() simply assumes magic code points for surrogates!
831 * xxx (However, should we ever get yet another surrogate range we
832 * xxx need to deal with that all over the place anyway? */
833 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
834 {0x00010000, 0x0010FFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
835 }, *catp = _cat;
836 size_t l;
838 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
839 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
840 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
841 if (c <= _cat[3].upper_bound) {
842 /* Surrogates may not be converted (Compatibility rule C10) */
843 if (c >= 0xD800u && c <= 0xDFFFu)
844 goto jerr;
845 catp += 3;
846 goto j3;
848 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
849 jerr:
850 c = 0xFFFDu; /* Unicode replacement character */
851 catp += 3;
852 goto j3;
854 buf[3] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
856 buf[2] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
858 buf[1] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
860 buf[0] = (char)catp->enc_leader | (char)(c);
862 buf[catp->enc_lval] = '\0';
863 l = catp->enc_lval;
864 NYD2_LEAVE;
865 return l;
867 #endif /* HAVE_NATCH_CHAR || HAVE_ICONV */
870 * Our iconv(3) wrapper
872 #ifdef HAVE_ICONV
874 static void _ic_toupper(char *dest, char const *src);
875 static void _ic_stripdash(char *p);
877 static void
878 _ic_toupper(char *dest, char const *src)
880 NYD2_ENTER;
882 *dest++ = upperconv(*src);
883 while (*src++ != '\0');
884 NYD2_LEAVE;
887 static void
888 _ic_stripdash(char *p)
890 char *q = p;
891 NYD2_ENTER;
894 if (*(q = p) != '-')
895 ++q;
896 while (*p++ != '\0');
897 NYD2_LEAVE;
900 FL iconv_t
901 n_iconv_open(char const *tocode, char const *fromcode)
903 iconv_t id;
904 char *t, *f;
905 NYD_ENTER;
907 if ((!asccasecmp(fromcode, "unknown-8bit") ||
908 !asccasecmp(fromcode, "binary")) &&
909 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
910 fromcode = charset_get_8bit();
912 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
913 goto jleave;
915 /* Remove the "iso-" prefixes for Solaris */
916 if (!ascncasecmp(tocode, "iso-", 4))
917 tocode += 4;
918 else if (!ascncasecmp(tocode, "iso", 3))
919 tocode += 3;
920 if (!ascncasecmp(fromcode, "iso-", 4))
921 fromcode += 4;
922 else if (!ascncasecmp(fromcode, "iso", 3))
923 fromcode += 3;
924 if (*tocode == '\0' || *fromcode == '\0') {
925 id = (iconv_t)-1;
926 goto jleave;
928 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
929 goto jleave;
931 /* Solaris prefers upper-case charset names. Don't ask... */
932 t = salloc(strlen(tocode) +1);
933 _ic_toupper(t, tocode);
934 f = salloc(strlen(fromcode) +1);
935 _ic_toupper(f, fromcode);
936 if ((id = iconv_open(t, f)) != (iconv_t)-1)
937 goto jleave;
939 /* Strip dashes for UnixWare */
940 _ic_stripdash(t);
941 _ic_stripdash(f);
942 if ((id = iconv_open(t, f)) != (iconv_t)-1)
943 goto jleave;
945 /* Add your vendor's sillynesses here */
947 /* If the encoding names are equal at this point, they are just not
948 * understood by iconv(), and we cannot sensibly use it in any way. We do
949 * not perform this as an optimization above since iconv() can otherwise be
950 * used to check the validity of the input even with identical encoding
951 * names */
952 if (!strcmp(t, f))
953 errno = 0;
954 jleave:
955 NYD_LEAVE;
956 return id;
959 FL void
960 n_iconv_close(iconv_t cd)
962 NYD_ENTER;
963 iconv_close(cd);
964 if (cd == iconvd)
965 iconvd = (iconv_t)-1;
966 NYD_LEAVE;
969 FL void
970 n_iconv_reset(iconv_t cd)
972 NYD_ENTER;
973 iconv(cd, NULL, NULL, NULL, NULL);
974 NYD_LEAVE;
977 /* (2012-09-24: export and use it exclusively to isolate prototype problems
978 * (*inb* is 'char const **' except in POSIX) in a single place.
979 * GNU libiconv even allows for configuration time const/non-const..
980 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
981 * support compiler invocations which bail on error, so no -Werror */
982 /* Citrus project? */
983 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
984 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
985 # if OS_DRAGONFLY
986 # define __INBCAST(S) (char ** __restrict__)UNCONST(S)
987 # else
988 # define __INBCAST(S) (char const **)UNCONST(S)
989 # endif
990 # elif OS_SUNOS || OS_SOLARIS
991 # define __INBCAST(S) (char const ** __restrict__)UNCONST(S)
992 # endif
993 # ifndef __INBCAST
994 # define __INBCAST(S) (char **)UNCONST(S)
995 # endif
997 FL int
998 n_iconv_buf(iconv_t cd, char const **inb, size_t *inbleft,/*XXX redo iconv use*/
999 char **outb, size_t *outbleft, bool_t skipilseq)
1001 int err = 0;
1002 NYD2_ENTER;
1004 for (;;) {
1005 size_t sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft);
1006 if (sz != (size_t)-1)
1007 break;
1008 err = errno;
1009 if (!skipilseq || err != EILSEQ)
1010 break;
1011 if (*inbleft > 0) {
1012 ++(*inb);
1013 --(*inbleft);
1014 } else if (*outbleft > 0) {
1015 **outb = '\0';
1016 break;
1018 if (*outbleft > 0/* TODO 0xFFFD 2*/) {
1019 /* TODO 0xFFFD (*outb)[0] = '[';
1020 * TODO (*outb)[1] = '?';
1021 * TODO 0xFFFD (*outb)[2] = ']';
1022 * TODO (*outb) += 3;
1023 * TODO (*outbleft) -= 3; */
1024 *(*outb)++ = '?';
1025 --*outbleft;
1026 } else {
1027 err = E2BIG;
1028 break;
1030 err = 0;
1032 NYD2_LEAVE;
1033 return err;
1035 # undef __INBCAST
1037 FL int
1038 n_iconv_str(iconv_t cd, struct str *out, struct str const *in,
1039 struct str *in_rest_or_null, bool_t skipilseq)
1041 int err;
1042 char *obb, *ob;
1043 char const *ib;
1044 size_t olb, ol, il;
1045 NYD2_ENTER;
1047 err = 0;
1048 obb = out->s;
1049 olb = out->l;
1050 ol = in->l;
1052 ol = (ol << 1) - (ol >> 4);
1053 if (olb <= ol) {
1054 olb = ol;
1055 goto jrealloc;
1058 for (;;) {
1059 ib = in->s;
1060 il = in->l;
1061 ob = obb;
1062 ol = olb;
1063 err = n_iconv_buf(cd, &ib, &il, &ob, &ol, skipilseq);
1064 if (err == 0 || err != E2BIG)
1065 break;
1066 err = 0;
1067 olb += in->l;
1068 jrealloc:
1069 obb = srealloc(obb, olb +1);
1072 if (in_rest_or_null != NULL) {
1073 in_rest_or_null->s = UNCONST(ib);
1074 in_rest_or_null->l = il;
1076 out->s = obb;
1077 out->s[out->l = olb - ol] = '\0';
1078 NYD2_LEAVE;
1079 return err;
1082 FL char *
1083 n_iconv_onetime_cp(char const *tocode, char const *fromcode,
1084 char const *input, bool_t skipilseq){
1085 struct str out, in;
1086 iconv_t icd;
1087 char *rv;
1088 NYD2_ENTER;
1090 rv = NULL;
1091 if(tocode == NULL)
1092 tocode = charset_get_lc();
1093 if(fromcode == NULL)
1094 fromcode = "utf-8";
1096 if((icd = iconv_open(tocode, fromcode)) == (iconv_t)-1)
1097 goto jleave;
1099 in.l = strlen(in.s = UNCONST(input)); /* logical */
1100 out.s = NULL, out.l = 0;
1101 if(!n_iconv_str(icd, &out, &in, NULL, skipilseq))
1102 rv = savestrbuf(out.s, out.l);
1103 if(out.s != NULL)
1104 free(out.s);
1106 iconv_close(icd);
1107 jleave:
1108 NYD2_LEAVE;
1109 return rv;
1111 #endif /* HAVE_ICONV */
1113 /* s-it-mode */