1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ String support routines.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2016 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 #define n_FILE strings
38 #ifndef HAVE_AMALGAMATION
45 (savestr
)(char const *str SALLOC_DEBUG_ARGS
)
51 size
= strlen(str
) +1;
52 news
= (salloc
)(size SALLOC_DEBUG_ARGSCALL
);
53 memcpy(news
, str
, size
);
59 (savestrbuf
)(char const *sbuf
, size_t sbuf_len SALLOC_DEBUG_ARGS
)
64 news
= (salloc
)(sbuf_len
+1 SALLOC_DEBUG_ARGSCALL
);
65 memcpy(news
, sbuf
, sbuf_len
);
72 (savecatsep
)(char const *s1
, char sep
, char const *s2 SALLOC_DEBUG_ARGS
)
78 l1
= (s1
!= NULL
) ? strlen(s1
) : 0;
80 news
= (salloc
)(l1
+ (sep
!= '\0') + l2
+1 SALLOC_DEBUG_ARGSCALL
);
82 memcpy(news
+ 0, s1
, l1
);
86 memcpy(news
+ l1
, s2
, l2
);
93 * Support routines, auto-reclaimed storage
97 (i_strdup
)(char const *src SALLOC_DEBUG_ARGS
)
104 dest
= (salloc
)(sz SALLOC_DEBUG_ARGSCALL
);
105 i_strcpy(dest
, src
, sz
);
111 str_concat_csvl(struct str
*self
, ...) /* XXX onepass maybe better here */
119 for (l
= 0; (cs
= va_arg(vl
, char const*)) != NULL
;)
124 self
->s
= salloc(l
+1);
127 for (l
= 0; (cs
= va_arg(vl
, char const*)) != NULL
;) {
128 size_t i
= strlen(cs
);
129 memcpy(self
->s
+ l
, cs
, i
);
139 (str_concat_cpa
)(struct str
*self
, char const * const *cpa
,
140 char const *sep_o_null SALLOC_DEBUG_ARGS
)
143 char const * const *xcpa
;
146 sonl
= (sep_o_null
!= NULL
) ? strlen(sep_o_null
) : 0;
148 for (l
= 0, xcpa
= cpa
; *xcpa
!= NULL
; ++xcpa
)
149 l
+= strlen(*xcpa
) + sonl
;
152 self
->s
= (salloc
)(l
+1 SALLOC_DEBUG_ARGSCALL
);
154 for (l
= 0, xcpa
= cpa
; *xcpa
!= NULL
; ++xcpa
) {
155 size_t i
= strlen(*xcpa
);
156 memcpy(self
->s
+ l
, *xcpa
, i
);
159 memcpy(self
->s
+ l
, sep_o_null
, sonl
);
169 * Routines that are not related to auto-reclaimed storage follow.
173 anyof(char const *s1
, char const *s2
)
176 for (; *s1
!= '\0'; ++s1
)
177 if (strchr(s2
, *s1
) != NULL
)
180 return (*s1
!= '\0');
184 n_strsep(char **iolist
, char sep
, bool_t ignore_empty
)
189 for (base
= *iolist
; base
!= NULL
; base
= *iolist
) {
190 while (*base
!= '\0' && blankspacechar(*base
))
192 cp
= strchr(base
, sep
);
197 cp
= base
+ strlen(base
);
199 while (cp
> base
&& blankspacechar(cp
[-1]))
202 if (*base
!= '\0' || !ignore_empty
)
210 i_strcpy(char *dest
, char const *src
, size_t size
)
214 for (;; ++dest
, ++src
)
215 if ((*dest
= lowerconv(*src
)) == '\0') {
217 } else if (--size
== 0) {
226 is_prefix(char const *as1
, char const *as2
)
231 for (; (c
= *as1
) == *as2
&& c
!= '\0'; ++as1
, ++as2
)
239 string_quote(char const *v
) /* TODO too simpleminded (getrawlist(), +++ ..) */
246 for (i
= 0, cp
= v
; (c
= *cp
) != '\0'; ++i
, ++cp
)
247 if (c
== '"' || c
== '\\')
251 for (i
= 0, cp
= v
; (c
= *cp
) != '\0'; rv
[i
++] = c
, ++cp
)
252 if (c
== '"' || c
== '\\')
260 laststring(char *linebuf
, bool_t
*needs_list
, bool_t strip
)
262 char *cp
, *p
, quoted
;
265 /* Anything to do at all? */
266 if (*(cp
= linebuf
) == '\0')
268 cp
+= strlen(linebuf
) -1;
270 /* Strip away trailing blanks */
271 while (spacechar(*cp
) && cp
> linebuf
)
277 /* Now search for the BOS of the "last string" */
279 if (quoted
== '\'' || quoted
== '"') {
285 while (cp
> linebuf
) {
290 } else if (!spacechar(*cp
))
292 if (cp
== linebuf
|| cp
[-1] != '\\') {
293 /* When in whitespace mode, WS prefix doesn't belong */
298 /* Expand the escaped quote character */
299 for (p
= --cp
; (p
[0] = p
[1]) != '\0'; ++p
)
302 if (strip
&& quoted
!= ' ' && *cp
== quoted
)
303 for (p
= cp
; (p
[0] = p
[1]) != '\0'; ++p
)
306 /* The "last string" has been skipped over, but still, try to step backwards
307 * until we are at BOS or see whitespace, so as to make possible things like
308 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
309 while (cp
> linebuf
) {
311 if (spacechar(*cp
)) {
314 /* We can furtherly release our callees if we now decide whether the
315 * remaining non-"last string" line content contains non-WS */
316 while (--p
>= linebuf
)
325 if (cp
!= NULL
&& *cp
== '\0')
327 *needs_list
= (cp
!= linebuf
&& *linebuf
!= '\0');
338 makelow(char *cp
) /* TODO isn't that crap? --> */
341 #ifdef HAVE_C90AMEND1
342 if (mb_cur_max
> 1) {
347 while (*cp
!= '\0') {
348 len
= mbtowc(&wc
, cp
, mb_cur_max
);
353 if (wctomb(tp
, wc
) == len
)
354 tp
+= len
, cp
+= len
;
356 *tp
++ = *cp
++; /* <-- at least here */
363 *cp
= tolower((uc_i
)*cp
);
364 while (*cp
++ != '\0');
370 substr(char const *str
, char const *sub
)
372 char const *cp
, *backup
;
377 while (*str
!= '\0' && *cp
!= '\0') {
378 #ifdef HAVE_C90AMEND1
379 if (mb_cur_max
> 1) {
383 if ((sz
= mbtowc(&c
, cp
, mb_cur_max
)) == -1)
386 if ((sz
= mbtowc(&c2
, str
, mb_cur_max
)) == -1)
392 if ((sz
= mbtowc(&c
, backup
, mb_cur_max
)) > 0) {
418 return (*cp
== '\0');
422 sstpcpy(char *dst
, char const *src
)
425 while ((*dst
= *src
++) != '\0')
432 (sstrdup
)(char const *cp SMALLOC_DEBUG_ARGS
)
437 dp
= (cp
== NULL
) ? NULL
: (sbufdup
)(cp
, strlen(cp
) SMALLOC_DEBUG_ARGSCALL
);
443 (sbufdup
)(char const *cp
, size_t len SMALLOC_DEBUG_ARGS
)
448 dp
= (smalloc
)(len
+1 SMALLOC_DEBUG_ARGSCALL
);
457 n_strscpy(char *dst
, char const *src
, size_t dstsize
){
461 if(LIKELY(dstsize
> 0)){
464 if((dst
[rv
] = src
[rv
]) == '\0')
467 }while(--dstsize
> 0);
481 asccasecmp(char const *s1
, char const *s2
)
487 char c1
= *s1
++, c2
= *s2
++;
488 if ((cmp
= lowerconv(c1
) - lowerconv(c2
)) != 0 || c1
== '\0')
496 ascncasecmp(char const *s1
, char const *s2
, size_t sz
)
502 char c1
= *s1
++, c2
= *s2
++;
503 cmp
= (ui8_t
)lowerconv(c1
);
504 cmp
-= (ui8_t
)lowerconv(c2
);
505 if (cmp
!= 0 || c1
== '\0')
513 asccasestr(char const *s1
, char const *s2
)
518 for (c2
= *s2
++, c2
= lowerconv(c2
);;) {
519 if ((c1
= *s1
++) == '\0') {
523 if (lowerconv(c1
) == c2
&& is_asccaseprefix(s1
, s2
)) {
533 is_asccaseprefix(char const *as1
, char const *as2
)
538 for (;; ++as1
, ++as2
) {
539 char c1
= lowerconv(*as1
), c2
= lowerconv(*as2
);
541 if ((rv
= (c2
== '\0')))
551 (n_str_assign_buf
)(struct str
*self
, char const *buf
, uiz_t buflen
554 if(buflen
== UIZ_MAX
)
555 buflen
= (buf
== NULL
) ? 0 : strlen(buf
);
557 assert(buflen
== 0 || buf
!= NULL
);
559 if(LIKELY(buflen
> 0)){
560 self
->s
= (srealloc
)(self
->s
, (self
->l
= buflen
) +1
561 SMALLOC_DEBUG_ARGSCALL
);
562 memcpy(self
->s
, buf
, buflen
);
563 self
->s
[buflen
] = '\0';
571 (n_str_add_buf
)(struct str
*self
, char const *buf
, uiz_t buflen
574 if(buflen
== UIZ_MAX
)
575 buflen
= (buf
== NULL
) ? 0 : strlen(buf
);
577 assert(buflen
== 0 || buf
!= NULL
);
580 size_t osl
= self
->l
, nsl
= osl
+ buflen
;
582 self
->s
= (srealloc
)(self
->s
, (self
->l
= nsl
) +1 SMALLOC_DEBUG_ARGSCALL
);
583 memcpy(self
->s
+ osl
, buf
, buflen
);
591 * struct n_string TODO extend, optimize
595 (n_string_clear
)(struct n_string
*self SMALLOC_DEBUG_ARGS
){
598 assert(self
!= NULL
);
600 if(self
->s_size
!= 0){
602 #ifdef HAVE_MEMORY_DEBUG
603 sfree(self
->s_dat SMALLOC_DEBUG_ARGSCALL
);
608 self
->s_len
= self
->s_auto
= self
->s_size
= 0;
616 (n_string_reserve
)(struct n_string
*self
, size_t noof SMALLOC_DEBUG_ARGS
){
620 assert(self
!= NULL
);
624 #if 0 /* FIXME memory alloc too large */
625 if(SI32_MAX
- n_ALIGN(1) - l
<= noof
)
626 n_panic(_("Memory allocation too large"));
629 if((i
= s
- l
) <= noof
){
630 i
+= 1 + l
+ (ui32_t
)noof
;
635 self
->s_dat
= (srealloc
)(self
->s_dat
, i SMALLOC_DEBUG_ARGSCALL
);
637 char *ndat
= (salloc
)(i SALLOC_DEBUG_ARGSCALL
);
640 memcpy(ndat
, self
->s_dat
, l
);
649 (n_string_resize
)(struct n_string
*self
, size_t nlen SMALLOC_DEBUG_ARGS
){
652 assert(self
!= NULL
);
653 #if 0 /* FIXME memory alloc too large */
654 if(SI32_MAX
- n_ALIGN(1) - l
<= noof
)
655 n_panic(_("Memory allocation too large"));
658 if(self
->s_len
< nlen
)
659 self
= (n_string_reserve
)(self
, nlen SMALLOC_DEBUG_ARGSCALL
);
660 self
->s_len
= (ui32_t
)nlen
;
666 (n_string_push_buf
)(struct n_string
*self
, char const *buf
, size_t buflen
670 assert(self
!= NULL
);
671 assert(buflen
== 0 || buf
!= NULL
);
673 if(buflen
== UIZ_MAX
)
674 buflen
= (buf
== NULL
) ? 0 : strlen(buf
);
679 self
= (n_string_reserve
)(self
, buflen SMALLOC_DEBUG_ARGSCALL
);
680 memcpy(self
->s_dat
+ (i
= self
->s_len
), buf
, buflen
);
681 self
->s_len
= (i
+= (ui32_t
)buflen
);
688 (n_string_push_c
)(struct n_string
*self
, char c SMALLOC_DEBUG_ARGS
){
691 assert(self
!= NULL
);
693 if(self
->s_len
+ 1 >= self
->s_size
)
694 self
= (n_string_reserve
)(self
, 1 SMALLOC_DEBUG_ARGSCALL
);
695 self
->s_dat
[self
->s_len
++] = c
;
701 (n_string_unshift_buf
)(struct n_string
*self
, char const *buf
, size_t buflen
705 assert(self
!= NULL
);
706 assert(buflen
== 0 || buf
!= NULL
);
708 if(buflen
== UIZ_MAX
)
709 buflen
= (buf
== NULL
) ? 0 : strlen(buf
);
712 self
= (n_string_reserve
)(self
, buflen SMALLOC_DEBUG_ARGSCALL
);
714 memmove(self
->s_dat
+ buflen
, self
->s_dat
, self
->s_len
);
715 memcpy(self
->s_dat
, buf
, buflen
);
716 self
->s_len
+= (ui32_t
)buflen
;
723 (n_string_unshift_c
)(struct n_string
*self
, char c SMALLOC_DEBUG_ARGS
){
726 assert(self
!= NULL
);
728 if(self
->s_len
+ 1 >= self
->s_size
)
729 self
= (n_string_reserve
)(self
, 1 SMALLOC_DEBUG_ARGSCALL
);
731 memmove(self
->s_dat
+ 1, self
->s_dat
, self
->s_len
);
739 (n_string_cp
)(struct n_string
*self SMALLOC_DEBUG_ARGS
){
743 assert(self
!= NULL
);
745 if(self
->s_size
== 0)
746 self
= (n_string_reserve
)(self
, 1 SMALLOC_DEBUG_ARGSCALL
);
748 (rv
= self
->s_dat
)[self
->s_len
] = '\0';
754 n_string_cp_const(struct n_string
const *self
){
758 assert(self
!= NULL
);
760 if(self
->s_size
!= 0){
761 ((struct n_string
*)UNCONST(self
))->s_dat
[self
->s_len
] = '\0';
773 #if defined HAVE_NATCH_CHAR || defined HAVE_ICONV
775 n_utf8_to_utf32(char const **bdat
, size_t *blen
) /* TODO check false UTF8 */
789 if ((x
& 0xE0u
) == 0xC0u
) {
794 } else if ((x
& 0xF0u
) == 0xE0u
) {
830 n_utf32_to_utf8(ui32_t c
, char *buf
)
837 ui8_t dec_leader_mask
;
838 ui8_t dec_leader_val_mask
;
839 ui8_t dec_bytes_togo
;
843 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
844 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
845 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
846 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
847 /* xxx _from_utf32() simply assumes magic code points for surrogates!
848 * xxx (However, should we ever get yet another surrogate range we
849 * xxx need to deal with that all over the place anyway? */
850 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
851 {0x00010000, 0x0010FFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
855 if (c
<= _cat
[0].upper_bound
) { catp
+= 0; goto j0
; }
856 if (c
<= _cat
[1].upper_bound
) { catp
+= 1; goto j1
; }
857 if (c
<= _cat
[2].upper_bound
) { catp
+= 2; goto j2
; }
858 if (c
<= _cat
[3].upper_bound
) {
859 /* Surrogates may not be converted (Compatibility rule C10) */
860 if (c
>= 0xD800u
&& c
<= 0xDFFFu
)
865 if (c
<= _cat
[4].upper_bound
) { catp
+= 4; goto j4
; }
867 c
= 0xFFFDu
; /* Unicode replacement character */
871 buf
[3] = (char)0x80u
| (char)(c
& 0x3Fu
); c
>>= 6;
873 buf
[2] = (char)0x80u
| (char)(c
& 0x3Fu
); c
>>= 6;
875 buf
[1] = (char)0x80u
| (char)(c
& 0x3Fu
); c
>>= 6;
877 buf
[0] = (char)catp
->enc_leader
| (char)(c
);
879 buf
[catp
->enc_lval
] = '\0';
884 #endif /* HAVE_NATCH_CHAR || HAVE_ICONV */
887 * Our iconv(3) wrapper
891 static void _ic_toupper(char *dest
, char const *src
);
892 static void _ic_stripdash(char *p
);
895 _ic_toupper(char *dest
, char const *src
)
899 *dest
++ = upperconv(*src
);
900 while (*src
++ != '\0');
905 _ic_stripdash(char *p
)
913 while (*p
++ != '\0');
918 n_iconv_open(char const *tocode
, char const *fromcode
)
924 if ((!asccasecmp(fromcode
, "unknown-8bit") ||
925 !asccasecmp(fromcode
, "binary")) &&
926 (fromcode
= ok_vlook(charset_unknown_8bit
)) == NULL
)
927 fromcode
= charset_get_8bit();
929 if ((id
= iconv_open(tocode
, fromcode
)) != (iconv_t
)-1)
932 /* Remove the "iso-" prefixes for Solaris */
933 if (!ascncasecmp(tocode
, "iso-", 4))
935 else if (!ascncasecmp(tocode
, "iso", 3))
937 if (!ascncasecmp(fromcode
, "iso-", 4))
939 else if (!ascncasecmp(fromcode
, "iso", 3))
941 if (*tocode
== '\0' || *fromcode
== '\0') {
945 if ((id
= iconv_open(tocode
, fromcode
)) != (iconv_t
)-1)
948 /* Solaris prefers upper-case charset names. Don't ask... */
949 t
= salloc(strlen(tocode
) +1);
950 _ic_toupper(t
, tocode
);
951 f
= salloc(strlen(fromcode
) +1);
952 _ic_toupper(f
, fromcode
);
953 if ((id
= iconv_open(t
, f
)) != (iconv_t
)-1)
956 /* Strip dashes for UnixWare */
959 if ((id
= iconv_open(t
, f
)) != (iconv_t
)-1)
962 /* Add your vendor's sillynesses here */
964 /* If the encoding names are equal at this point, they are just not
965 * understood by iconv(), and we cannot sensibly use it in any way. We do
966 * not perform this as an optimization above since iconv() can otherwise be
967 * used to check the validity of the input even with identical encoding
977 n_iconv_close(iconv_t cd
)
982 iconvd
= (iconv_t
)-1;
987 n_iconv_reset(iconv_t cd
)
990 iconv(cd
, NULL
, NULL
, NULL
, NULL
);
994 /* (2012-09-24: export and use it exclusively to isolate prototype problems
995 * (*inb* is 'char const **' except in POSIX) in a single place.
996 * GNU libiconv even allows for configuration time const/non-const..
997 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
998 * support compiler invocations which bail on error, so no -Werror */
999 /* Citrus project? */
1000 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1001 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1003 # define __INBCAST(S) (char ** __restrict__)UNCONST(S)
1005 # define __INBCAST(S) (char const **)UNCONST(S)
1007 # elif OS_SUNOS || OS_SOLARIS
1008 # define __INBCAST(S) (char const ** __restrict__)UNCONST(S)
1011 # define __INBCAST(S) (char **)UNCONST(S)
1015 n_iconv_buf(iconv_t cd
, enum n_iconv_flags icf
,
1016 char const **inb
, size_t *inbleft
, char **outb
, size_t *outbleft
){
1023 sz
= iconv(cd
, __INBCAST(inb
), inbleft
, outb
, outbleft
);
1024 if(sz
> 0 && !(icf
& n_ICONV_IGN_NOREVERSE
)){
1028 if(sz
!= (size_t)-1)
1032 if(!(icf
& n_ICONV_IGN_ILSEQ
) || err
!= EILSEQ
)
1037 if(*outbleft
> 0/* TODO unicode replacement 0xFFFD */){
1044 }else if(*outbleft
> 0){
1057 n_iconv_str(iconv_t cd
, enum n_iconv_flags icf
,
1058 struct str
*out
, struct str
const *in
, struct str
*in_rest_or_null
)
1071 ol
= (ol
<< 1) - (ol
>> 4);
1082 if((err
= n_iconv_buf(cd
, icf
, &ib
, &il
, &ob
, &ol
)) == 0 || err
!= E2BIG
)
1087 obb
= srealloc(obb
, olb
+1);
1090 if (in_rest_or_null
!= NULL
) {
1091 in_rest_or_null
->s
= UNCONST(ib
);
1092 in_rest_or_null
->l
= il
;
1095 out
->s
[out
->l
= olb
- ol
] = '\0';
1101 n_iconv_onetime_cp(enum n_iconv_flags icf
,
1102 char const *tocode
, char const *fromcode
, char const *input
){
1110 tocode
= charset_get_lc();
1111 if(fromcode
== NULL
)
1114 if((icd
= iconv_open(tocode
, fromcode
)) == (iconv_t
)-1)
1117 in
.l
= strlen(in
.s
= UNCONST(input
)); /* logical */
1118 out
.s
= NULL
, out
.l
= 0;
1119 if(!n_iconv_str(icd
, icf
, &out
, &in
, NULL
))
1120 rv
= savestrbuf(out
.s
, out
.l
);
1129 #endif /* HAVE_ICONV */