nail.1: tweak text of -a
[s-mailx.git] / strings.c
blob493c916ab03107fcacca2416fc275b30540748be
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ String support routines.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2016 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 */
7 /*
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 #undef n_FILE
36 #define n_FILE strings
38 #ifndef HAVE_AMALGAMATION
39 # include "nail.h"
40 #endif
42 #include <ctype.h>
44 FL char *
45 (savestr)(char const *str n_MEMORY_DEBUG_ARGS)
47 size_t size;
48 char *news;
49 NYD_ENTER;
51 size = strlen(str) +1;
52 news = (n_autorec_alloc)(NULL, size n_MEMORY_DEBUG_ARGSCALL);
53 memcpy(news, str, size);
54 NYD_LEAVE;
55 return news;
58 FL char *
59 (savestrbuf)(char const *sbuf, size_t sbuf_len n_MEMORY_DEBUG_ARGS)
61 char *news;
62 NYD_ENTER;
64 news = (n_autorec_alloc)(NULL, sbuf_len +1 n_MEMORY_DEBUG_ARGSCALL);
65 memcpy(news, sbuf, sbuf_len);
66 news[sbuf_len] = 0;
67 NYD_LEAVE;
68 return news;
71 FL char *
72 (savecatsep)(char const *s1, char sep, char const *s2 n_MEMORY_DEBUG_ARGS)
74 size_t l1, l2;
75 char *news;
76 NYD_ENTER;
78 l1 = (s1 != NULL) ? strlen(s1) : 0;
79 l2 = strlen(s2);
80 news = (n_autorec_alloc)(NULL, l1 + (sep != '\0') + l2 +1
81 n_MEMORY_DEBUG_ARGSCALL);
82 if (l1 > 0) {
83 memcpy(news + 0, s1, l1);
84 if (sep != '\0')
85 news[l1++] = sep;
87 memcpy(news + l1, s2, l2);
88 news[l1 + l2] = '\0';
89 NYD_LEAVE;
90 return news;
94 * Support routines, auto-reclaimed storage
97 FL char *
98 (i_strdup)(char const *src n_MEMORY_DEBUG_ARGS)
100 size_t sz;
101 char *dest;
102 NYD_ENTER;
104 sz = strlen(src) +1;
105 dest = (n_autorec_alloc)(NULL, sz n_MEMORY_DEBUG_ARGSCALL);
106 i_strcpy(dest, src, sz);
107 NYD_LEAVE;
108 return dest;
111 FL struct str *
112 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
114 va_list vl;
115 size_t l;
116 char const *cs;
117 NYD_ENTER;
119 va_start(vl, self);
120 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
121 l += strlen(cs);
122 va_end(vl);
124 self->l = l;
125 self->s = salloc(l +1);
127 va_start(vl, self);
128 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
129 size_t i = strlen(cs);
130 memcpy(self->s + l, cs, i);
131 l += i;
133 self->s[l] = '\0';
134 va_end(vl);
135 NYD_LEAVE;
136 return self;
139 FL struct str *
140 (str_concat_cpa)(struct str *self, char const * const *cpa,
141 char const *sep_o_null n_MEMORY_DEBUG_ARGS)
143 size_t sonl, l;
144 char const * const *xcpa;
145 NYD_ENTER;
147 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
149 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
150 l += strlen(*xcpa) + sonl;
152 self->l = l;
153 self->s = (n_autorec_alloc)(NULL, l +1 n_MEMORY_DEBUG_ARGSCALL);
155 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
156 size_t i = strlen(*xcpa);
157 memcpy(self->s + l, *xcpa, i);
158 l += i;
159 if (sonl > 0) {
160 memcpy(self->s + l, sep_o_null, sonl);
161 l += sonl;
164 self->s[l] = '\0';
165 NYD_LEAVE;
166 return self;
170 * Routines that are not related to auto-reclaimed storage follow.
173 FL int
174 anyof(char const *s1, char const *s2)
176 NYD2_ENTER;
177 for (; *s1 != '\0'; ++s1)
178 if (strchr(s2, *s1) != NULL)
179 break;
180 NYD2_LEAVE;
181 return (*s1 != '\0');
184 FL char *
185 n_strsep(char **iolist, char sep, bool_t ignore_empty)
187 char *base, *cp;
188 NYD2_ENTER;
190 for (base = *iolist; base != NULL; base = *iolist) {
191 while (*base != '\0' && blankspacechar(*base))
192 ++base;
193 cp = strchr(base, sep);
194 if (cp != NULL)
195 *iolist = cp + 1;
196 else {
197 *iolist = NULL;
198 cp = base + strlen(base);
200 while (cp > base && blankspacechar(cp[-1]))
201 --cp;
202 *cp = '\0';
203 if (*base != '\0' || !ignore_empty)
204 break;
206 NYD2_LEAVE;
207 return base;
210 FL void
211 i_strcpy(char *dest, char const *src, size_t size)
213 NYD2_ENTER;
214 if (size > 0) {
215 for (;; ++dest, ++src)
216 if ((*dest = lowerconv(*src)) == '\0') {
217 break;
218 } else if (--size == 0) {
219 *dest = '\0';
220 break;
223 NYD2_LEAVE;
226 FL int
227 is_prefix(char const *as1, char const *as2)
229 char c;
230 NYD2_ENTER;
232 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
233 if (*as2 == '\0')
234 break;
235 NYD2_LEAVE;
236 return (c == '\0');
239 FL char *
240 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
242 char const *cp;
243 size_t i;
244 char c, *rv;
245 NYD2_ENTER;
247 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
248 if (c == '"' || c == '\\')
249 ++i;
250 rv = salloc(i +1);
252 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
253 if (c == '"' || c == '\\')
254 rv[i++] = '\\';
255 rv[i] = '\0';
256 NYD2_LEAVE;
257 return rv;
260 FL char *
261 laststring(char *linebuf, bool_t *needs_list, bool_t strip)
263 char *cp, *p, quoted;
264 NYD_ENTER;
266 /* Anything to do at all? */
267 if (*(cp = linebuf) == '\0')
268 goto jnull;
269 cp += strlen(linebuf) -1;
271 /* Strip away trailing blanks */
272 while (spacechar(*cp) && cp > linebuf)
273 --cp;
274 cp[1] = '\0';
275 if (cp == linebuf)
276 goto jleave;
278 /* Now search for the BOS of the "last string" */
279 quoted = *cp;
280 if (quoted == '\'' || quoted == '"') {
281 if (strip)
282 *cp = '\0';
283 } else
284 quoted = ' ';
286 while (cp > linebuf) {
287 --cp;
288 if (quoted != ' ') {
289 if (*cp != quoted)
290 continue;
291 } else if (!spacechar(*cp))
292 continue;
293 if (cp == linebuf || cp[-1] != '\\') {
294 /* When in whitespace mode, WS prefix doesn't belong */
295 if (quoted == ' ')
296 ++cp;
297 break;
299 /* Expand the escaped quote character */
300 for (p = --cp; (p[0] = p[1]) != '\0'; ++p)
303 if (strip && quoted != ' ' && *cp == quoted)
304 for (p = cp; (p[0] = p[1]) != '\0'; ++p)
307 /* The "last string" has been skipped over, but still, try to step backwards
308 * until we are at BOS or see whitespace, so as to make possible things like
309 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
310 while (cp > linebuf) {
311 --cp;
312 if (spacechar(*cp)) {
313 p = cp;
314 *cp++ = '\0';
315 /* We can furtherly release our callees if we now decide whether the
316 * remaining non-"last string" line content contains non-WS */
317 while (--p >= linebuf)
318 if (!spacechar(*p))
319 goto jleave;
320 linebuf = cp;
321 break;
325 jleave:
326 if (cp != NULL && *cp == '\0')
327 goto jnull;
328 *needs_list = (cp != linebuf && *linebuf != '\0');
329 j_leave:
330 NYD_LEAVE;
331 return cp;
332 jnull:
333 *needs_list = FAL0;
334 cp = NULL;
335 goto j_leave;
338 FL void
339 makelow(char *cp) /* TODO isn't that crap? --> */
341 NYD_ENTER;
342 #ifdef HAVE_C90AMEND1
343 if (mb_cur_max > 1) {
344 char *tp = cp;
345 wchar_t wc;
346 int len;
348 while (*cp != '\0') {
349 len = mbtowc(&wc, cp, mb_cur_max);
350 if (len < 0)
351 *tp++ = *cp++;
352 else {
353 wc = towlower(wc);
354 if (wctomb(tp, wc) == len)
355 tp += len, cp += len;
356 else
357 *tp++ = *cp++; /* <-- at least here */
360 } else
361 #endif
364 *cp = tolower((uc_i)*cp);
365 while (*cp++ != '\0');
367 NYD_LEAVE;
370 FL bool_t
371 substr(char const *str, char const *sub)
373 char const *cp, *backup;
374 NYD_ENTER;
376 cp = sub;
377 backup = str;
378 while (*str != '\0' && *cp != '\0') {
379 #ifdef HAVE_C90AMEND1
380 if (mb_cur_max > 1) {
381 wchar_t c, c2;
382 int sz;
384 if ((sz = mbtowc(&c, cp, mb_cur_max)) == -1)
385 goto Jsinglebyte;
386 cp += sz;
387 if ((sz = mbtowc(&c2, str, mb_cur_max)) == -1)
388 goto Jsinglebyte;
389 str += sz;
390 c = towupper(c);
391 c2 = towupper(c2);
392 if (c != c2) {
393 if ((sz = mbtowc(&c, backup, mb_cur_max)) > 0) {
394 backup += sz;
395 str = backup;
396 } else
397 str = ++backup;
398 cp = sub;
400 } else
401 Jsinglebyte:
402 #endif
404 int c, c2;
406 c = *cp++ & 0377;
407 if (islower(c))
408 c = toupper(c);
409 c2 = *str++ & 0377;
410 if (islower(c2))
411 c2 = toupper(c2);
412 if (c != c2) {
413 str = ++backup;
414 cp = sub;
418 NYD_LEAVE;
419 return (*cp == '\0');
422 FL char *
423 sstpcpy(char *dst, char const *src)
425 NYD2_ENTER;
426 while ((*dst = *src++) != '\0')
427 ++dst;
428 NYD2_LEAVE;
429 return dst;
432 FL char *
433 (sstrdup)(char const *cp n_MEMORY_DEBUG_ARGS)
435 char *dp;
436 NYD2_ENTER;
438 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) n_MEMORY_DEBUG_ARGSCALL);
439 NYD2_LEAVE;
440 return dp;
443 FL char *
444 (sbufdup)(char const *cp, size_t len n_MEMORY_DEBUG_ARGS)
446 char *dp = NULL;
447 NYD2_ENTER;
449 dp = (n_alloc)(len +1 n_MEMORY_DEBUG_ARGSCALL);
450 if (cp != NULL)
451 memcpy(dp, cp, len);
452 dp[len] = '\0';
453 NYD2_LEAVE;
454 return dp;
457 FL ssize_t
458 n_strscpy(char *dst, char const *src, size_t dstsize){
459 ssize_t rv;
460 NYD2_ENTER;
462 if(n_LIKELY(dstsize > 0)){
463 rv = 0;
465 if((dst[rv] = src[rv]) == '\0')
466 goto jleave;
467 ++rv;
468 }while(--dstsize > 0);
469 dst[--rv] = '\0';
471 #ifdef HAVE_DEVEL
472 else
473 assert(dstsize > 0);
474 #endif
475 rv = -1;
476 jleave:
477 NYD2_LEAVE;
478 return rv;
481 FL int
482 asccasecmp(char const *s1, char const *s2)
484 int cmp;
485 NYD2_ENTER;
487 for (;;) {
488 char c1 = *s1++, c2 = *s2++;
489 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
490 break;
492 NYD2_LEAVE;
493 return cmp;
496 FL int
497 ascncasecmp(char const *s1, char const *s2, size_t sz)
499 int cmp = 0;
500 NYD2_ENTER;
502 while (sz-- > 0) {
503 char c1 = *s1++, c2 = *s2++;
504 cmp = (ui8_t)lowerconv(c1);
505 cmp -= (ui8_t)lowerconv(c2);
506 if (cmp != 0 || c1 == '\0')
507 break;
509 NYD2_LEAVE;
510 return cmp;
513 FL char const *
514 asccasestr(char const *s1, char const *s2)
516 char c2, c1;
517 NYD2_ENTER;
519 for (c2 = *s2++, c2 = lowerconv(c2);;) {
520 if ((c1 = *s1++) == '\0') {
521 s1 = NULL;
522 break;
524 if (lowerconv(c1) == c2 && is_asccaseprefix(s1, s2)) {
525 --s1;
526 break;
529 NYD2_LEAVE;
530 return s1;
533 FL bool_t
534 is_asccaseprefix(char const *as1, char const *as2)
536 bool_t rv = FAL0;
537 NYD2_ENTER;
539 for (;; ++as1, ++as2) {
540 char c1 = lowerconv(*as1), c2 = lowerconv(*as2);
542 if ((rv = (c2 == '\0')))
543 break;
544 if (c1 != c2)
545 break;
547 NYD2_LEAVE;
548 return rv;
551 FL struct str *
552 (n_str_assign_buf)(struct str *self, char const *buf, uiz_t buflen
553 n_MEMORY_DEBUG_ARGS){
554 NYD_ENTER;
555 if(buflen == UIZ_MAX)
556 buflen = (buf == NULL) ? 0 : strlen(buf);
558 assert(buflen == 0 || buf != NULL);
560 if(n_LIKELY(buflen > 0)){
561 self->s = (n_realloc)(self->s, (self->l = buflen) +1
562 n_MEMORY_DEBUG_ARGSCALL);
563 memcpy(self->s, buf, buflen);
564 self->s[buflen] = '\0';
565 }else
566 self->l = 0;
567 NYD_LEAVE;
568 return self;
571 FL struct str *
572 (n_str_add_buf)(struct str *self, char const *buf, uiz_t buflen
573 n_MEMORY_DEBUG_ARGS){
574 NYD_ENTER;
575 if(buflen == UIZ_MAX)
576 buflen = (buf == NULL) ? 0 : strlen(buf);
578 assert(buflen == 0 || buf != NULL);
580 if(buflen > 0) {
581 size_t osl = self->l, nsl = osl + buflen;
583 self->s = (n_realloc)(self->s, (self->l = nsl) +1
584 n_MEMORY_DEBUG_ARGSCALL);
585 memcpy(self->s + osl, buf, buflen);
586 self->s[nsl] = '\0';
588 NYD_LEAVE;
589 return self;
593 * struct n_string TODO extend, optimize
596 FL struct n_string *
597 (n_string_clear)(struct n_string *self n_MEMORY_DEBUG_ARGS){
598 NYD_ENTER;
600 assert(self != NULL);
602 if(self->s_size != 0){
603 if(!self->s_auto){
604 (n_free)(self->s_dat n_MEMORY_DEBUG_ARGSCALL);
606 self->s_len = self->s_auto = self->s_size = 0;
607 self->s_dat = NULL;
609 NYD_LEAVE;
610 return self;
613 FL struct n_string *
614 (n_string_reserve)(struct n_string *self, size_t noof n_MEMORY_DEBUG_ARGS){
615 ui32_t i, l, s;
616 NYD_ENTER;
618 assert(self != NULL);
620 s = self->s_size;
621 l = self->s_len;
622 #if 0 /* FIXME memory alloc too large */
623 if(SI32_MAX - n_ALIGN(1) - l <= noof)
624 n_panic(_("Memory allocation too large"));
625 #endif
627 if((i = s - l) <= ++noof){
628 i += l + (ui32_t)noof;
629 i = n_ALIGN(i);
630 self->s_size = i -1;
632 if(!self->s_auto)
633 self->s_dat = (n_realloc)(self->s_dat, i n_MEMORY_DEBUG_ARGSCALL);
634 else{
635 char *ndat = (n_autorec_alloc)(NULL, i n_MEMORY_DEBUG_ARGSCALL);
637 if(l > 0)
638 memcpy(ndat, self->s_dat, l);
639 self->s_dat = ndat;
642 NYD_LEAVE;
643 return self;
646 FL struct n_string *
647 (n_string_resize)(struct n_string *self, size_t nlen n_MEMORY_DEBUG_ARGS){
648 NYD_ENTER;
650 assert(self != NULL);
651 #if 0 /* FIXME memory alloc too large */
652 if(SI32_MAX - n_ALIGN(1) - l <= noof)
653 n_panic(_("Memory allocation too large"));
654 #endif
656 if(self->s_len < nlen)
657 self = (n_string_reserve)(self, nlen n_MEMORY_DEBUG_ARGSCALL);
658 self->s_len = (ui32_t)nlen;
659 NYD_LEAVE;
660 return self;
663 FL struct n_string *
664 (n_string_push_buf)(struct n_string *self, char const *buf, size_t buflen
665 n_MEMORY_DEBUG_ARGS){
666 NYD_ENTER;
668 assert(self != NULL);
669 assert(buflen == 0 || buf != NULL);
671 if(buflen == UIZ_MAX)
672 buflen = (buf == NULL) ? 0 : strlen(buf);
674 if(buflen > 0){
675 ui32_t i;
677 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
678 memcpy(&self->s_dat[i = self->s_len], buf, buflen);
679 self->s_len = (i += (ui32_t)buflen);
681 NYD_LEAVE;
682 return self;
685 FL struct n_string *
686 (n_string_push_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
687 NYD_ENTER;
689 assert(self != NULL);
691 if(self->s_len + 1 >= self->s_size)
692 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
693 self->s_dat[self->s_len++] = c;
694 NYD_LEAVE;
695 return self;
698 FL struct n_string *
699 (n_string_unshift_buf)(struct n_string *self, char const *buf, size_t buflen
700 n_MEMORY_DEBUG_ARGS){
701 NYD_ENTER;
703 assert(self != NULL);
704 assert(buflen == 0 || buf != NULL);
706 if(buflen == UIZ_MAX)
707 buflen = (buf == NULL) ? 0 : strlen(buf);
709 if(buflen > 0){
710 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
711 if(self->s_len > 0)
712 memmove(&self->s_dat[buflen], self->s_dat, self->s_len);
713 memcpy(self->s_dat, buf, buflen);
714 self->s_len += (ui32_t)buflen;
716 NYD_LEAVE;
717 return self;
720 FL struct n_string *
721 (n_string_unshift_c)(struct n_string *self, char c n_MEMORY_DEBUG_ARGS){
722 NYD_ENTER;
724 assert(self != NULL);
726 if(self->s_len + 1 >= self->s_size)
727 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
728 if(self->s_len > 0)
729 memmove(&self->s_dat[1], self->s_dat, self->s_len);
730 self->s_dat[0] = c;
731 ++self->s_len;
732 NYD_LEAVE;
733 return self;
736 FL struct n_string *
737 (n_string_insert_buf)(struct n_string *self, size_t idx,
738 char const *buf, size_t buflen n_MEMORY_DEBUG_ARGS){
739 NYD_ENTER;
741 assert(self != NULL);
742 assert(buflen == 0 || buf != NULL);
743 assert(idx <= self->s_len);
745 if(buflen == UIZ_MAX)
746 buflen = (buf == NULL) ? 0 : strlen(buf);
748 if(buflen > 0){
749 self = (n_string_reserve)(self, buflen n_MEMORY_DEBUG_ARGSCALL);
750 if(self->s_len > 0)
751 memmove(&self->s_dat[idx + buflen], &self->s_dat[idx],
752 self->s_len - idx);
753 memcpy(&self->s_dat[idx], buf, buflen);
754 self->s_len += (ui32_t)buflen;
756 NYD_LEAVE;
757 return self;
760 FL struct n_string *
761 (n_string_insert_c)(struct n_string *self, size_t idx,
762 char c n_MEMORY_DEBUG_ARGS){
763 NYD_ENTER;
765 assert(self != NULL);
766 assert(idx <= self->s_len);
768 if(self->s_len + 1 >= self->s_size)
769 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
770 if(self->s_len > 0)
771 memmove(&self->s_dat[idx + 1], &self->s_dat[idx], self->s_len - idx);
772 self->s_dat[idx] = c;
773 ++self->s_len;
774 NYD_LEAVE;
775 return self;
778 FL struct n_string *
779 n_string_cut(struct n_string *self, size_t idx, size_t len){
780 NYD_ENTER;
782 assert(self != NULL);
783 assert(UIZ_MAX - idx > len);
784 assert(SI32_MAX >= idx + len);
785 assert(idx + len <= self->s_len);
787 if(len > 0)
788 memmove(&self->s_dat[idx], &self->s_dat[idx + len],
789 (self->s_len -= len) - idx);
790 NYD_LEAVE;
791 return self;
794 FL char *
795 (n_string_cp)(struct n_string *self n_MEMORY_DEBUG_ARGS){
796 char *rv;
797 NYD2_ENTER;
799 assert(self != NULL);
801 if(self->s_size == 0)
802 self = (n_string_reserve)(self, 1 n_MEMORY_DEBUG_ARGSCALL);
804 (rv = self->s_dat)[self->s_len] = '\0';
805 NYD2_LEAVE;
806 return rv;
809 FL char const *
810 n_string_cp_const(struct n_string const *self){
811 char const *rv;
812 NYD2_ENTER;
814 assert(self != NULL);
816 if(self->s_size != 0){
817 ((struct n_string*)n_UNCONST(self))->s_dat[self->s_len] = '\0';
818 rv = self->s_dat;
819 }else
820 rv = n_empty;
821 NYD2_LEAVE;
822 return rv;
826 * UTF-8
829 FL ui32_t
830 n_utf8_to_utf32(char const **bdat, size_t *blen) /* TODO check false UTF8 */
832 char const *cp;
833 size_t l;
834 ui32_t c, x;
835 NYD2_ENTER;
837 cp = *bdat;
838 l = *blen - 1;
839 x = (ui8_t)*cp++;
841 if (x <= 0x7Fu)
842 c = x;
843 else {
844 if ((x & 0xE0u) == 0xC0u) {
845 if (l < 1)
846 goto jerr;
847 l -= 1;
848 c = x & ~0xC0u;
849 } else if ((x & 0xF0u) == 0xE0u) {
850 if (l < 2)
851 goto jerr;
852 l -= 2;
853 c = x & ~0xE0u;
854 c <<= 6;
855 x = (ui8_t)*cp++;
856 c |= x & 0x7Fu;
857 } else {
858 if (l < 3)
859 goto jerr;
860 l -= 3;
861 c = x & ~0xF0u;
862 c <<= 6;
863 x = (ui8_t)*cp++;
864 c |= x & 0x7Fu;
865 c <<= 6;
866 x = (ui8_t)*cp++;
867 c |= x & 0x7Fu;
869 c <<= 6;
870 x = (ui8_t)*cp++;
871 c |= x & 0x7Fu;
874 jleave:
875 *bdat = cp;
876 *blen = l;
877 NYD2_LEAVE;
878 return c;
879 jerr:
880 c = UI32_MAX;
881 goto jleave;
884 FL size_t
885 n_utf32_to_utf8(ui32_t c, char *buf)
887 struct {
888 ui32_t lower_bound;
889 ui32_t upper_bound;
890 ui8_t enc_leader;
891 ui8_t enc_lval;
892 ui8_t dec_leader_mask;
893 ui8_t dec_leader_val_mask;
894 ui8_t dec_bytes_togo;
895 ui8_t cat_index;
896 ui8_t __dummy[2];
897 } const _cat[] = {
898 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
899 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
900 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
901 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
902 /* xxx _from_utf32() simply assumes magic code points for surrogates!
903 * xxx (However, should we ever get yet another surrogate range we
904 * xxx need to deal with that all over the place anyway? */
905 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
906 {0x00010000, 0x0010FFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
907 }, *catp = _cat;
908 size_t l;
910 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
911 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
912 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
913 if (c <= _cat[3].upper_bound) {
914 /* Surrogates may not be converted (Compatibility rule C10) */
915 if (c >= 0xD800u && c <= 0xDFFFu)
916 goto jerr;
917 catp += 3;
918 goto j3;
920 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
921 jerr:
922 c = 0xFFFDu; /* Unicode replacement character */
923 catp += 3;
924 goto j3;
926 buf[3] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
928 buf[2] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
930 buf[1] = (char)0x80u | (char)(c & 0x3Fu); c >>= 6;
932 buf[0] = (char)catp->enc_leader | (char)(c);
934 buf[catp->enc_lval] = '\0';
935 l = catp->enc_lval;
936 NYD2_LEAVE;
937 return l;
941 * Our iconv(3) wrapper
943 #ifdef HAVE_ICONV
945 static void _ic_toupper(char *dest, char const *src);
946 static void _ic_stripdash(char *p);
948 static void
949 _ic_toupper(char *dest, char const *src)
951 NYD2_ENTER;
953 *dest++ = upperconv(*src);
954 while (*src++ != '\0');
955 NYD2_LEAVE;
958 static void
959 _ic_stripdash(char *p)
961 char *q = p;
962 NYD2_ENTER;
965 if (*(q = p) != '-')
966 ++q;
967 while (*p++ != '\0');
968 NYD2_LEAVE;
971 FL iconv_t
972 n_iconv_open(char const *tocode, char const *fromcode)
974 iconv_t id;
975 char *t, *f;
976 NYD_ENTER;
978 if ((!asccasecmp(fromcode, "unknown-8bit") ||
979 !asccasecmp(fromcode, "binary")) &&
980 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
981 fromcode = charset_get_8bit();
983 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
984 goto jleave;
986 /* Remove the "iso-" prefixes for Solaris */
987 if (!ascncasecmp(tocode, "iso-", 4))
988 tocode += 4;
989 else if (!ascncasecmp(tocode, "iso", 3))
990 tocode += 3;
991 if (!ascncasecmp(fromcode, "iso-", 4))
992 fromcode += 4;
993 else if (!ascncasecmp(fromcode, "iso", 3))
994 fromcode += 3;
995 if (*tocode == '\0' || *fromcode == '\0') {
996 id = (iconv_t)-1;
997 goto jleave;
999 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
1000 goto jleave;
1002 /* Solaris prefers upper-case charset names. Don't ask... */
1003 t = salloc(strlen(tocode) +1);
1004 _ic_toupper(t, tocode);
1005 f = salloc(strlen(fromcode) +1);
1006 _ic_toupper(f, fromcode);
1007 if ((id = iconv_open(t, f)) != (iconv_t)-1)
1008 goto jleave;
1010 /* Strip dashes for UnixWare */
1011 _ic_stripdash(t);
1012 _ic_stripdash(f);
1013 if ((id = iconv_open(t, f)) != (iconv_t)-1)
1014 goto jleave;
1016 /* Add your vendor's sillynesses here */
1018 /* If the encoding names are equal at this point, they are just not
1019 * understood by iconv(), and we cannot sensibly use it in any way. We do
1020 * not perform this as an optimization above since iconv() can otherwise be
1021 * used to check the validity of the input even with identical encoding
1022 * names */
1023 if (!strcmp(t, f))
1024 errno = 0;
1025 jleave:
1026 NYD_LEAVE;
1027 return id;
1030 FL void
1031 n_iconv_close(iconv_t cd)
1033 NYD_ENTER;
1034 iconv_close(cd);
1035 if (cd == iconvd)
1036 iconvd = (iconv_t)-1;
1037 NYD_LEAVE;
1040 FL void
1041 n_iconv_reset(iconv_t cd)
1043 NYD_ENTER;
1044 iconv(cd, NULL, NULL, NULL, NULL);
1045 NYD_LEAVE;
1048 /* (2012-09-24: export and use it exclusively to isolate prototype problems
1049 * (*inb* is 'char const **' except in POSIX) in a single place.
1050 * GNU libiconv even allows for configuration time const/non-const..
1051 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
1052 * support compiler invocations which bail on error, so no -Werror */
1053 /* Citrus project? */
1054 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1055 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1056 # if OS_DRAGONFLY
1057 # define __INBCAST(S) (char ** __restrict__)n_UNCONST(S)
1058 # else
1059 # define __INBCAST(S) (char const **)n_UNCONST(S)
1060 # endif
1061 # elif OS_SUNOS || OS_SOLARIS
1062 # define __INBCAST(S) (char const ** __restrict__)n_UNCONST(S)
1063 # endif
1064 # ifndef __INBCAST
1065 # define __INBCAST(S) (char **)n_UNCONST(S)
1066 # endif
1068 FL int
1069 n_iconv_buf(iconv_t cd, enum n_iconv_flags icf,
1070 char const **inb, size_t *inbleft, char **outb, size_t *outbleft){
1071 int err;
1072 NYD2_ENTER;
1074 if((icf & n_ICONV_UNIREPL) && !(options & OPT_UNICODE))
1075 icf &= ~n_ICONV_UNIREPL;
1077 for(;;){
1078 size_t sz;
1080 sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft);
1081 if(sz > 0 && !(icf & n_ICONV_IGN_NOREVERSE)){
1082 err = ENOENT;
1083 goto jleave;
1085 if(sz != (size_t)-1)
1086 break;
1088 err = errno;
1089 if(!(icf & n_ICONV_IGN_ILSEQ) || err != EILSEQ)
1090 goto jleave;
1091 if(*inbleft > 0){
1092 ++(*inb);
1093 --(*inbleft);
1094 if(icf & n_ICONV_UNIREPL){
1095 if(*outbleft >= 3){
1096 (*outb)[0] = '\xEF';
1097 (*outb)[1] = '\xBF';
1098 (*outb)[2] = '\xBD';
1099 *outb += 3;
1100 *outbleft -= 3;
1101 continue;
1103 }else if(*outbleft > 0){
1104 *(*outb)++ = '?';
1105 --*outbleft;
1106 continue;
1108 err = E2BIG;
1109 goto jleave;
1110 }else if(*outbleft > 0){
1111 **outb = '\0';
1112 goto jleave;
1115 err = 0;
1116 jleave:
1117 NYD2_LEAVE;
1118 return err;
1120 # undef __INBCAST
1122 FL int
1123 n_iconv_str(iconv_t cd, enum n_iconv_flags icf,
1124 struct str *out, struct str const *in, struct str *in_rest_or_null)
1126 int err;
1127 char *obb, *ob;
1128 char const *ib;
1129 size_t olb, ol, il;
1130 NYD2_ENTER;
1132 err = 0;
1133 obb = out->s;
1134 olb = out->l;
1135 ol = in->l;
1137 ol = (ol << 1) - (ol >> 4);
1138 if (olb <= ol) {
1139 olb = ol;
1140 goto jrealloc;
1143 for (;;) {
1144 ib = in->s;
1145 il = in->l;
1146 ob = obb;
1147 ol = olb;
1148 if((err = n_iconv_buf(cd, icf, &ib, &il, &ob, &ol)) == 0 || err != E2BIG)
1149 break;
1150 err = 0;
1151 olb += in->l;
1152 jrealloc:
1153 obb = n_realloc(obb, olb +1);
1156 if (in_rest_or_null != NULL) {
1157 in_rest_or_null->s = n_UNCONST(ib);
1158 in_rest_or_null->l = il;
1160 out->s = obb;
1161 out->s[out->l = olb - ol] = '\0';
1162 NYD2_LEAVE;
1163 return err;
1166 FL char *
1167 n_iconv_onetime_cp(enum n_iconv_flags icf,
1168 char const *tocode, char const *fromcode, char const *input){
1169 struct str out, in;
1170 iconv_t icd;
1171 char *rv;
1172 NYD2_ENTER;
1174 rv = NULL;
1175 if(tocode == NULL)
1176 tocode = charset_get_lc();
1177 if(fromcode == NULL)
1178 fromcode = "utf-8";
1180 if((icd = iconv_open(tocode, fromcode)) == (iconv_t)-1)
1181 goto jleave;
1183 in.l = strlen(in.s = n_UNCONST(input)); /* logical */
1184 out.s = NULL, out.l = 0;
1185 if(!n_iconv_str(icd, icf, &out, &in, NULL))
1186 rv = savestrbuf(out.s, out.l);
1187 if(out.s != NULL)
1188 free(out.s);
1190 iconv_close(icd);
1191 jleave:
1192 NYD2_LEAVE;
1193 return rv;
1195 #endif /* HAVE_ICONV */
1197 /* s-it-mode */