1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Auto-reclaimed string allocation and support routines that build on top of
3 *@ them. Strings handed out by those are reclaimed at the top of the command
4 *@ loop each time, so they need not be freed.
5 *@ And below this series we do collect all other plain string support routines
6 *@ in here, including those which use normal heap memory.
8 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
9 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
12 * Copyright (c) 1980, 1993
13 * The Regents of the University of California. All rights reserved.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 #define n_FILE strings
46 #ifndef HAVE_AMALGAMATION
52 /* In debug mode the "string dope" allocations are enwrapped in canaries, just
53 * as we do with our normal memory allocator */
55 # define _SHOPE_SIZE (2u * 8 * sizeof(char) + sizeof(struct schunk))
57 CTA(sizeof(char) == sizeof(ui8_t
));
72 #endif /* HAVE_DEBUG */
79 #define SALIGN (sizeof(union __align__) - 1)
81 CTA(ISPOW2(SALIGN
+ 1));
85 char *_bot
; /* For spreserve() */
86 char *_relax
; /* If !NULL, used by srelax() instead of ._bot */
87 char *_max
; /* Max usable byte */
88 char *_caster
; /* NULL if full */
91 /* Single instance builtin buffer. Room for anything, most of the time */
94 char b_buf
[SBUFFER_BUILTIN
- sizeof(struct b_base
)];
96 #define SBLTIN_SIZE SIZEOF_FIELD(struct b_bltin, b_buf)
98 /* Dynamically allocated buffers to overcome shortage, always released again
99 * once the command loop ticks (without relaxation or during PS_SOURCING) */
101 struct b_base b_base
;
102 char b_buf
[SBUFFER_SIZE
- sizeof(struct b_base
)];
104 #define SDYN_SIZE SIZEOF_FIELD(struct b_dyn, b_buf)
106 /* The multiplexer of the several real b_* */
109 char b_buf
[VFIELD_SIZE(SALIGN
+ 1)];
112 /* Requests that exceed SDYN_SIZE-1 and thus cannot be handled by string dope
113 * are always served by the normal memory allocator (which panics if memory
114 * cannot be served). Note such an allocation has not yet occurred, it is only
115 * included as a security fallback bypass */
117 struct hugebuf
*hb_next
;
118 char hb_buf
[VFIELD_SIZE(SALIGN
+ 1)];
121 static struct b_bltin _builtin_buf
;
122 static struct buffer
*_buf_head
, *_buf_list
, *_buf_server
, *_buf_relax
;
123 static size_t _relax_recur_no
;
124 static struct hugebuf
*_huge_list
;
126 static size_t _all_cnt
, _all_cycnt
, _all_cycnt_max
,
127 _all_size
, _all_cysize
, _all_cysize_max
, _all_min
,
129 _all_bufcnt
, _all_cybufcnt
, _all_cybufcnt_max
,
130 _all_resetreqs
, _all_resets
;
133 /* sreset() / srelax() release a buffer, check the canaries of all chunks */
135 static void _salloc_bcheck(struct buffer
*b
);
140 _salloc_bcheck(struct buffer
*b
)
145 pmax
.cp
= (b
->b
._caster
== NULL
) ? b
->b
._max
: b
->b
._caster
;
148 while (pp
.cp
< pmax
.cp
) {
155 pp
.cp
+= c
->full_size
;
160 if (x
.ui8p
[0] != 0xDE) i
|= 1<<0;
161 if (x
.ui8p
[1] != 0xAA) i
|= 1<<1;
162 if (x
.ui8p
[2] != 0x55) i
|= 1<<2;
163 if (x
.ui8p
[3] != 0xAD) i
|= 1<<3;
164 if (x
.ui8p
[4] != 0xBE) i
|= 1<<4;
165 if (x
.ui8p
[5] != 0x55) i
|= 1<<5;
166 if (x
.ui8p
[6] != 0xAA) i
|= 1<<6;
167 if (x
.ui8p
[7] != 0xEF) i
|= 1<<7;
169 alert("sdope %p: corrupt lower canary: 0x%02X, size %u: %s, line %u",
170 ux
, i
, c
->usr_size
, c
->file
, c
->line
);
171 x
.cp
+= 8 + c
->usr_size
;
174 if (x
.ui8p
[0] != 0xDE) i
|= 1<<0;
175 if (x
.ui8p
[1] != 0xAA) i
|= 1<<1;
176 if (x
.ui8p
[2] != 0x55) i
|= 1<<2;
177 if (x
.ui8p
[3] != 0xAD) i
|= 1<<3;
178 if (x
.ui8p
[4] != 0xBE) i
|= 1<<4;
179 if (x
.ui8p
[5] != 0x55) i
|= 1<<5;
180 if (x
.ui8p
[6] != 0xAA) i
|= 1<<6;
181 if (x
.ui8p
[7] != 0xEF) i
|= 1<<7;
183 alert("sdope %p: corrupt upper canary: 0x%02X, size %u: %s, line %u",
184 ux
, i
, c
->usr_size
, c
->file
, c
->line
);
191 (salloc
)(size_t size SALLOC_DEBUG_ARGS
)
193 DBG( size_t orig_size
= size
; )
194 union {struct buffer
*b
; struct hugebuf
*hb
; char *cp
;} u
;
206 _all_cycnt_max
= MAX(_all_cycnt_max
, _all_cycnt
);
209 _all_cysize_max
= MAX(_all_cysize_max
, _all_cysize
);
210 _all_min
= (_all_max
== 0) ? size
: MIN(_all_min
, size
);
211 _all_max
= MAX(_all_max
, size
);
212 _all_wast
+= size
- orig_size
;
216 if (size
>= SDYN_SIZE
- 1)
217 alert("salloc() of %" PRIuZ
" bytes from \"%s\", line %d\n",
218 size
, mdbg_file
, mdbg_line
);
221 /* Huge allocations are special */
222 if (UNLIKELY(size
>= SDYN_SIZE
- 1))
225 /* Search for a buffer with enough free space to serve request */
226 if ((u
.b
= _buf_server
) != NULL
)
229 for (u
.b
= _buf_head
; u
.b
!= NULL
; u
.b
= u
.b
->b
._next
) {
233 if (u
.b
== _buf_server
) {
234 if (u
.b
== _buf_head
&& (u
.b
= _buf_head
->b
._next
) != NULL
) {
245 if (PTRCMP(y
, <=, z
)) {
246 /* Alignment is the one thing, the other is what is usually allocated,
247 * and here about 40 bytes seems to be a good cut to avoid non-usable
248 * non-NULL casters. However, because of _salloc_bcheck(), we may not
249 * set ._caster to NULL because then it would check all chunks up to
250 * ._max, which surely doesn't work; speed is no issue with DEBUG */
251 u
.b
->b
._caster
= NDBG( PTRCMP(y
+ 42 + 16, >=, z
) ? NULL
: ) y
;
257 /* Need a new buffer */
258 if (_buf_head
== NULL
) {
259 struct b_bltin
*b
= &_builtin_buf
;
260 b
->b_base
._max
= b
->b_buf
+ SBLTIN_SIZE
- 1;
261 _buf_head
= (struct buffer
*)b
;
267 _all_cybufcnt_max
= MAX(_all_cybufcnt_max
, _all_cybufcnt
);
269 u
.b
= smalloc(sizeof(struct b_dyn
));
270 u
.b
->b
._max
= u
.b
->b_buf
+ SDYN_SIZE
- 1;
272 if (_buf_list
!= NULL
)
273 _buf_list
->b
._next
= u
.b
;
274 _buf_server
= _buf_list
= u
.b
;
276 u
.b
->b
._caster
= (u
.b
->b
._bot
= u
.b
->b_buf
) + size
;
277 u
.b
->b
._relax
= NULL
;
281 /* Encapsulate user chunk in debug canaries */
289 xc
->file
= mdbg_file
;
290 xc
->line
= mdbg_line
;
291 xc
->usr_size
= (ui16_t
)orig_size
;
292 xc
->full_size
= (ui16_t
)size
;
294 xl
.ui8p
[0]=0xDE; xl
.ui8p
[1]=0xAA; xl
.ui8p
[2]=0x55; xl
.ui8p
[3]=0xAD;
295 xl
.ui8p
[4]=0xBE; xl
.ui8p
[5]=0x55; xl
.ui8p
[6]=0xAA; xl
.ui8p
[7]=0xEF;
299 xu
.ui8p
[0]=0xDE; xu
.ui8p
[1]=0xAA; xu
.ui8p
[2]=0x55; xu
.ui8p
[3]=0xAD;
300 xu
.ui8p
[4]=0xBE; xu
.ui8p
[5]=0x55; xu
.ui8p
[6]=0xAA; xu
.ui8p
[7]=0xEF;
307 u
.hb
= smalloc(sizeof(*u
.hb
) - VFIELD_SIZEOF(struct hugebuf
, hb_buf
) +
309 u
.hb
->hb_next
= _huge_list
;
316 (csalloc
)(size_t nmemb
, size_t size SALLOC_DEBUG_ARGS
)
322 vp
= (salloc
)(size SALLOC_DEBUG_ARGSCALL
);
329 sreset(bool_t only_if_relaxed
)
331 struct buffer
*blh
, *bh
;
334 DBG( ++_all_resetreqs
; )
336 /* Reset relaxation after any jump is a MUST */
337 if (_relax_recur_no
> 0)
341 if (only_if_relaxed
&& _relax_recur_no
== 0)
345 _all_cycnt
= _all_cysize
= 0;
346 _all_cybufcnt
= (_buf_head
!= NULL
&& _buf_head
->b
._next
!= NULL
);
350 /* Reset relaxation after jump */
351 if (_relax_recur_no
> 0) {
353 assert(_relax_recur_no
== 0);
357 if ((bh
= _buf_head
) != NULL
) {
359 struct buffer
*x
= bh
;
361 DBG( _salloc_bcheck(x
); )
363 /* Give away all buffers that are not covered by sreset().
364 * _buf_head is builtin and thus cannot be free()d */
365 if (blh
!= NULL
&& x
->b
._bot
== x
->b_buf
) {
370 x
->b
._caster
= x
->b
._bot
;
372 DBG( memset(x
->b
._caster
, 0377,
373 PTR2SIZE(x
->b
._max
- x
->b
._caster
)); )
375 } while (bh
!= NULL
);
377 _buf_server
= _buf_head
;
382 while (_huge_list
!= NULL
) {
383 struct hugebuf
*hb
= _huge_list
;
384 _huge_list
= hb
->hb_next
;
399 if (_relax_recur_no
++ == 0) {
400 for (b
= _buf_head
; b
!= NULL
; b
= b
->b
._next
)
401 b
->b
._relax
= b
->b
._caster
;
402 _buf_relax
= _buf_server
;
413 assert(_relax_recur_no
> 0);
415 if (--_relax_recur_no
== 0) {
416 for (b
= _buf_head
; b
!= NULL
; b
= b
->b
._next
) {
417 DBG( _salloc_bcheck(b
); )
418 b
->b
._caster
= (b
->b
._relax
!= NULL
) ? b
->b
._relax
: b
->b
._bot
;
426 fprintf(stderr
, "srelax_rele(): recursion >0!\n");
434 /* The purpose of relaxation is only that it is possible to reset the
435 * casters, *not* to give back memory to the system. We are presumably in
436 * an iteration over all messages of a mailbox, and it'd be quite
437 * counterproductive to give the system allocator a chance to waste time */
441 assert(_relax_recur_no
> 0);
443 if (_relax_recur_no
== 1) {
444 for (b
= _buf_head
; b
!= NULL
; b
= b
->b
._next
) {
445 DBG( _salloc_bcheck(b
); )
446 b
->b
._caster
= (b
->b
._relax
!= NULL
) ? b
->b
._relax
: b
->b
._bot
;
447 DBG( memset(b
->b
._caster
, 0377, PTR2SIZE(b
->b
._max
- b
->b
._caster
)); )
459 for (b
= _buf_head
; b
!= NULL
; b
= b
->b
._next
)
460 b
->b
._bot
= b
->b
._caster
;
472 excess
= (_all_cybufcnt_max
* SDYN_SIZE
) + SBLTIN_SIZE
;
473 excess
= (excess
>= _all_cysize_max
) ? 0 : _all_cysize_max
- excess
;
475 printf("String usage statistics (cycle means one sreset() cycle):\n"
476 " Buffer allocs ever/max a time : %" PRIuZ
"/%" PRIuZ
"\n"
477 " .. size of the builtin/dynamic: %" PRIuZ
"/%" PRIuZ
"\n"
478 " Overall alloc count/bytes : %" PRIuZ
"/%" PRIuZ
"\n"
479 " .. bytes min/max/align wastage: %" PRIuZ
"/%" PRIuZ
"/%" PRIuZ
"\n"
480 " sreset() cycles : %" PRIuZ
" (%" PRIuZ
" performed)\n"
481 " Cycle max.: alloc count/bytes : %" PRIuZ
"/%" PRIuZ
"+%" PRIuZ
"\n",
482 _all_bufcnt
, _all_cybufcnt_max
,
483 SBLTIN_SIZE
, SDYN_SIZE
,
485 _all_min
, _all_max
, _all_wast
,
486 _all_resetreqs
, _all_resets
,
487 _all_cycnt_max
, _all_cysize_max
, excess
);
494 (savestr
)(char const *str SALLOC_DEBUG_ARGS
)
500 size
= strlen(str
) +1;
501 news
= (salloc
)(size SALLOC_DEBUG_ARGSCALL
);
502 memcpy(news
, str
, size
);
508 (savestrbuf
)(char const *sbuf
, size_t sbuf_len SALLOC_DEBUG_ARGS
)
513 news
= (salloc
)(sbuf_len
+1 SALLOC_DEBUG_ARGSCALL
);
514 memcpy(news
, sbuf
, sbuf_len
);
521 (savecatsep
)(char const *s1
, char sep
, char const *s2 SALLOC_DEBUG_ARGS
)
527 l1
= (s1
!= NULL
) ? strlen(s1
) : 0;
529 news
= (salloc
)(l1
+ (sep
!= '\0') + l2
+1 SALLOC_DEBUG_ARGSCALL
);
531 memcpy(news
+ 0, s1
, l1
);
535 memcpy(news
+ l1
, s2
, l2
);
536 news
[l1
+ l2
] = '\0';
542 * Support routines, auto-reclaimed storage
546 (i_strdup
)(char const *src SALLOC_DEBUG_ARGS
)
553 dest
= (salloc
)(sz SALLOC_DEBUG_ARGSCALL
);
554 i_strcpy(dest
, src
, sz
);
560 (protbase
)(char const *cp SALLOC_DEBUG_ARGS
) /* TODO obsolete */
565 np
= n
= (salloc
)(strlen(cp
) +1 SALLOC_DEBUG_ARGSCALL
);
567 /* Just ignore the `is-system-mailbox' prefix XXX */
568 if (cp
[0] == '%' && cp
[1] == ':')
571 while (*cp
!= '\0') {
572 if (cp
[0] == ':' && cp
[1] == '/' && cp
[2] == '/') {
576 } else if (cp
[0] == '/')
587 str_concat_csvl(struct str
*self
, ...) /* XXX onepass maybe better here */
595 for (l
= 0; (cs
= va_arg(vl
, char const*)) != NULL
;)
600 self
->s
= salloc(l
+1);
603 for (l
= 0; (cs
= va_arg(vl
, char const*)) != NULL
;) {
604 size_t i
= strlen(cs
);
605 memcpy(self
->s
+ l
, cs
, i
);
615 (str_concat_cpa
)(struct str
*self
, char const * const *cpa
,
616 char const *sep_o_null SALLOC_DEBUG_ARGS
)
619 char const * const *xcpa
;
622 sonl
= (sep_o_null
!= NULL
) ? strlen(sep_o_null
) : 0;
624 for (l
= 0, xcpa
= cpa
; *xcpa
!= NULL
; ++xcpa
)
625 l
+= strlen(*xcpa
) + sonl
;
628 self
->s
= (salloc
)(l
+1 SALLOC_DEBUG_ARGSCALL
);
630 for (l
= 0, xcpa
= cpa
; *xcpa
!= NULL
; ++xcpa
) {
631 size_t i
= strlen(*xcpa
);
632 memcpy(self
->s
+ l
, *xcpa
, i
);
635 memcpy(self
->s
+ l
, sep_o_null
, sonl
);
645 * Routines that are not related to auto-reclaimed storage follow.
649 anyof(char const *s1
, char const *s2
)
652 for (; *s1
!= '\0'; ++s1
)
653 if (strchr(s2
, *s1
) != NULL
)
656 return (*s1
!= '\0');
660 n_strsep(char **iolist
, char sep
, bool_t ignore_empty
)
665 for (base
= *iolist
; base
!= NULL
; base
= *iolist
) {
666 while (*base
!= '\0' && blankspacechar(*base
))
668 cp
= strchr(base
, sep
);
673 cp
= base
+ strlen(base
);
675 while (cp
> base
&& blankspacechar(cp
[-1]))
678 if (*base
!= '\0' || !ignore_empty
)
686 i_strcpy(char *dest
, char const *src
, size_t size
)
690 for (;; ++dest
, ++src
)
691 if ((*dest
= lowerconv(*src
)) == '\0') {
693 } else if (--size
== 0) {
702 is_prefix(char const *as1
, char const *as2
)
707 for (; (c
= *as1
) == *as2
&& c
!= '\0'; ++as1
, ++as2
)
715 string_quote(char const *v
) /* TODO too simpleminded (getrawlist(), +++ ..) */
722 for (i
= 0, cp
= v
; (c
= *cp
) != '\0'; ++i
, ++cp
)
723 if (c
== '"' || c
== '\\')
727 for (i
= 0, cp
= v
; (c
= *cp
) != '\0'; rv
[i
++] = c
, ++cp
)
728 if (c
== '"' || c
== '\\')
736 laststring(char *linebuf
, bool_t
*needs_list
, bool_t strip
)
738 char *cp
, *p
, quoted
;
741 /* Anything to do at all? */
742 if (*(cp
= linebuf
) == '\0')
744 cp
+= strlen(linebuf
) -1;
746 /* Strip away trailing blanks */
747 while (whitechar(*cp
) && cp
> linebuf
)
753 /* Now search for the BOS of the "last string" */
755 if (quoted
== '\'' || quoted
== '"') {
761 while (cp
> linebuf
) {
766 } else if (!whitechar(*cp
))
768 if (cp
== linebuf
|| cp
[-1] != '\\') {
769 /* When in whitespace mode, WS prefix doesn't belong */
774 /* Expand the escaped quote character */
775 for (p
= --cp
; (p
[0] = p
[1]) != '\0'; ++p
)
778 if (strip
&& quoted
!= ' ' && *cp
== quoted
)
779 for (p
= cp
; (p
[0] = p
[1]) != '\0'; ++p
)
782 /* The "last string" has been skipped over, but still, try to step backwards
783 * until we are at BOS or see whitespace, so as to make possible things like
784 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
785 while (cp
> linebuf
) {
787 if (whitechar(*cp
)) {
790 /* We can furtherly release our callees if we now decide wether the
791 * remaining non-"last string" line content contains non-WS */
792 while (--p
>= linebuf
)
801 if (cp
!= NULL
&& *cp
== '\0')
803 *needs_list
= (cp
!= linebuf
&& *linebuf
!= '\0');
814 makelow(char *cp
) /* TODO isn't that crap? --> */
817 #ifdef HAVE_C90AMEND1
818 if (mb_cur_max
> 1) {
823 while (*cp
!= '\0') {
824 len
= mbtowc(&wc
, cp
, mb_cur_max
);
829 if (wctomb(tp
, wc
) == len
)
830 tp
+= len
, cp
+= len
;
832 *tp
++ = *cp
++; /* <-- at least here */
839 *cp
= tolower((uc_i
)*cp
);
840 while (*cp
++ != '\0');
846 substr(char const *str
, char const *sub
)
848 char const *cp
, *backup
;
853 while (*str
!= '\0' && *cp
!= '\0') {
854 #ifdef HAVE_C90AMEND1
855 if (mb_cur_max
> 1) {
859 if ((sz
= mbtowc(&c
, cp
, mb_cur_max
)) == -1)
862 if ((sz
= mbtowc(&c2
, str
, mb_cur_max
)) == -1)
868 if ((sz
= mbtowc(&c
, backup
, mb_cur_max
)) > 0) {
894 return (*cp
== '\0');
897 #ifndef HAVE_SNPRINTF
899 snprintf(char *str
, size_t size
, char const *format
, ...) /* XXX DANGER! */
905 va_start(ap
, format
);
906 ret
= vsprintf(str
, format
, ap
);
916 sstpcpy(char *dst
, char const *src
)
919 while ((*dst
= *src
++) != '\0')
926 (sstrdup
)(char const *cp SMALLOC_DEBUG_ARGS
)
931 dp
= (cp
== NULL
) ? NULL
: (sbufdup
)(cp
, strlen(cp
) SMALLOC_DEBUG_ARGSCALL
);
937 (sbufdup
)(char const *cp
, size_t len SMALLOC_DEBUG_ARGS
)
942 dp
= (smalloc
)(len
+1 SMALLOC_DEBUG_ARGSCALL
);
951 n_strlcpy(char *dst
, char const *src
, size_t len
)
957 dst
= strncpy(dst
, src
, len
);
964 asccasecmp(char const *s1
, char const *s2
)
970 char c1
= *s1
++, c2
= *s2
++;
971 if ((cmp
= lowerconv(c1
) - lowerconv(c2
)) != 0 || c1
== '\0')
979 ascncasecmp(char const *s1
, char const *s2
, size_t sz
)
985 char c1
= *s1
++, c2
= *s2
++;
986 cmp
= (ui8_t
)lowerconv(c1
);
987 cmp
-= (ui8_t
)lowerconv(c2
);
988 if (cmp
!= 0 || c1
== '\0')
996 asccasestr(char const *s1
, char const *s2
)
1001 for (c2
= *s2
++, c2
= lowerconv(c2
);;) {
1002 if ((c1
= *s1
++) == '\0') {
1006 if (lowerconv(c1
) == c2
&& is_asccaseprefix(s1
, s2
)) {
1016 is_asccaseprefix(char const *as1
, char const *as2
)
1021 for (;; ++as1
, ++as2
) {
1022 char c1
= lowerconv(*as1
), c2
= lowerconv(*as2
);
1024 if ((rv
= (c2
== '\0')))
1034 (n_str_dup
)(struct str
*self
, struct str
const *t SMALLOC_DEBUG_ARGS
)
1037 if (t
!= NULL
&& t
->l
> 0) {
1039 self
->s
= (srealloc
)(self
->s
, t
->l
+1 SMALLOC_DEBUG_ARGSCALL
);
1040 memcpy(self
->s
, t
->s
, t
->l
+1);
1048 (n_str_add_buf
)(struct str
*self
, char const *buf
, size_t buflen
1053 size_t sl
= self
->l
;
1054 self
->l
= sl
+ buflen
;
1055 self
->s
= (srealloc
)(self
->s
, self
->l
+1 SMALLOC_DEBUG_ARGSCALL
);
1056 memcpy(self
->s
+ sl
, buf
, buflen
);
1057 self
->s
[self
->l
] = '\0';
1067 #ifdef HAVE_NATCH_CHAR
1069 n_utf8_to_utf32(char const **bdat
, size_t *blen
)
1083 if ((x
& 0xE0) == 0xC0) {
1088 } else if ((x
& 0xF0) == 0xE0) {
1122 #endif /* HAVE_NATCH_CHAR */
1124 #ifdef HAVE_FILTER_HTML_TAGSOUP
1126 n_utf32_to_utf8(ui32_t c
, char *buf
)
1133 ui8_t dec_leader_mask
;
1134 ui8_t dec_leader_val_mask
;
1135 ui8_t dec_bytes_togo
;
1139 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
1140 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
1141 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
1142 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
1143 /* xxx _from_utf32() simply assumes magic code points for surrogates!
1144 * xxx (However, should we ever get yet another surrogate range we
1145 * xxx need to deal with that all over the place anyway? */
1146 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
1147 {0x00010000, 0x001FFFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
1151 if (c
<= _cat
[0].upper_bound
) { catp
+= 0; goto j0
; }
1152 if (c
<= _cat
[1].upper_bound
) { catp
+= 1; goto j1
; }
1153 if (c
<= _cat
[2].upper_bound
) { catp
+= 2; goto j2
; }
1154 if (c
<= _cat
[3].upper_bound
) {
1155 /* Surrogates may not be converted (Compatibility rule C10) */
1156 if (c
>= 0xD800u
&& c
<= 0xDFFFu
)
1161 if (c
<= _cat
[4].upper_bound
) { catp
+= 4; goto j4
; }
1163 c
= 0xFFFDu
; /* Unicode replacement character */
1167 buf
[3] = (char)0x80 | (char)(c
& 0x3F); c
>>= 6;
1169 buf
[2] = (char)0x80 | (char)(c
& 0x3F); c
>>= 6;
1171 buf
[1] = (char)0x80 | (char)(c
& 0x3F); c
>>= 6;
1173 buf
[0] = (char)catp
->enc_leader
| (char)(c
);
1175 buf
[catp
->enc_lval
] = '\0';
1180 #endif /* HAVE_FILTER_HTML_TAGSOUP */
1183 * Our iconv(3) wrapper
1187 static void _ic_toupper(char *dest
, char const *src
);
1188 static void _ic_stripdash(char *p
);
1191 _ic_toupper(char *dest
, char const *src
)
1195 *dest
++ = upperconv(*src
);
1196 while (*src
++ != '\0');
1201 _ic_stripdash(char *p
)
1207 if (*(q
= p
) != '-')
1209 while (*p
++ != '\0');
1214 n_iconv_open(char const *tocode
, char const *fromcode
)
1220 if (!asccasecmp(fromcode
, "unknown-8bit") &&
1221 (fromcode
= ok_vlook(charset_unknown_8bit
)) == NULL
)
1222 fromcode
= charset_get_8bit();
1224 if ((id
= iconv_open(tocode
, fromcode
)) != (iconv_t
)-1)
1227 /* Remove the "iso-" prefixes for Solaris */
1228 if (!ascncasecmp(tocode
, "iso-", 4))
1230 else if (!ascncasecmp(tocode
, "iso", 3))
1232 if (!ascncasecmp(fromcode
, "iso-", 4))
1234 else if (!ascncasecmp(fromcode
, "iso", 3))
1236 if (*tocode
== '\0' || *fromcode
== '\0') {
1240 if ((id
= iconv_open(tocode
, fromcode
)) != (iconv_t
)-1)
1243 /* Solaris prefers upper-case charset names. Don't ask... */
1244 t
= salloc(strlen(tocode
) +1);
1245 _ic_toupper(t
, tocode
);
1246 f
= salloc(strlen(fromcode
) +1);
1247 _ic_toupper(f
, fromcode
);
1248 if ((id
= iconv_open(t
, f
)) != (iconv_t
)-1)
1251 /* Strip dashes for UnixWare */
1254 if ((id
= iconv_open(t
, f
)) != (iconv_t
)-1)
1257 /* Add your vendor's sillynesses here */
1259 /* If the encoding names are equal at this point, they are just not
1260 * understood by iconv(), and we cannot sensibly use it in any way. We do
1261 * not perform this as an optimization above since iconv() can otherwise be
1262 * used to check the validity of the input even with identical encoding
1272 n_iconv_close(iconv_t cd
)
1277 iconvd
= (iconv_t
)-1;
1282 n_iconv_reset(iconv_t cd
)
1285 iconv(cd
, NULL
, NULL
, NULL
, NULL
);
1289 /* (2012-09-24: export and use it exclusively to isolate prototype problems
1290 * (*inb* is 'char const **' except in POSIX) in a single place.
1291 * GNU libiconv even allows for configuration time const/non-const..
1292 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
1293 * support compiler invocations which bail on error, so no -Werror */
1294 /* Citrus project? */
1295 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1296 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1298 # define __INBCAST(S) (char ** __restrict__)UNCONST(S)
1300 # define __INBCAST(S) (char const **)UNCONST(S)
1302 # elif OS_SUNOS || OS_SOLARIS
1303 # define __INBCAST(S) (char const ** __restrict__)UNCONST(S)
1306 # define __INBCAST(S) (char **)UNCONST(S)
1310 n_iconv_buf(iconv_t cd
, char const **inb
, size_t *inbleft
,/*XXX redo iconv use*/
1311 char **outb
, size_t *outbleft
, bool_t skipilseq
)
1317 size_t sz
= iconv(cd
, __INBCAST(inb
), inbleft
, outb
, outbleft
);
1318 if (sz
!= (size_t)-1)
1321 if (!skipilseq
|| err
!= EILSEQ
)
1326 } else if (*outbleft
> 0) {
1330 if (*outbleft
> 0/* TODO 0xFFFD 2*/) {
1331 /* TODO 0xFFFD (*outb)[0] = '[';
1332 * TODO (*outb)[1] = '?';
1333 * TODO 0xFFFD (*outb)[2] = ']';
1334 * TODO (*outb) += 3;
1335 * TODO (*outbleft) -= 3; */
1350 n_iconv_str(iconv_t cd
, struct str
*out
, struct str
const *in
,
1351 struct str
*in_rest_or_null
, bool_t skipilseq
)
1364 ol
= (ol
<< 1) - (ol
>> 4);
1375 err
= n_iconv_buf(cd
, &ib
, &il
, &ob
, &ol
, skipilseq
);
1376 if (err
== 0 || err
!= E2BIG
)
1381 obb
= srealloc(obb
, olb
+1);
1384 if (in_rest_or_null
!= NULL
) {
1385 in_rest_or_null
->s
= UNCONST(ib
);
1386 in_rest_or_null
->l
= il
;
1389 out
->s
[out
->l
= olb
- ol
] = '\0';
1393 #endif /* HAVE_ICONV */