IMAP: [a65afca] (Add *imap-delim{,-{{,USER@}HOST}}*) for real now
[s-mailx.git] / strings.c
blob35eef87da012aed739ff5e6088442fc8a244b0a7
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Auto-reclaimed string allocation and support routines that build on top of
3 *@ them. Strings handed out by those are reclaimed at the top of the command
4 *@ loop each time, so they need not be freed.
5 *@ And below this series we do collect all other plain string support routines
6 *@ in here, including those which use normal heap memory.
8 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
9 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
12 * Copyright (c) 1980, 1993
13 * The Regents of the University of California. All rights reserved.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
39 #undef n_FILE
40 #define n_FILE strings
42 #ifndef HAVE_AMALGAMATION
43 # include "nail.h"
44 #endif
46 #include <ctype.h>
48 /* In debug mode the "string dope" allocations are enwrapped in canaries, just
49 * as we do with our normal memory allocator */
50 #ifdef HAVE_DEBUG
51 # define _SHOPE_SIZE (2u * 8 * sizeof(char) + sizeof(struct schunk))
53 CTA(sizeof(char) == sizeof(ui8_t));
55 struct schunk {
56 char const *file;
57 ui32_t line;
58 ui16_t usr_size;
59 ui16_t full_size;
62 union sptr {
63 void *p;
64 struct schunk *c;
65 char *cp;
66 ui8_t *ui8p;
68 #endif /* HAVE_DEBUG */
70 union __align__ {
71 char *cp;
72 size_t sz;
73 ul_i ul;
75 #define SALIGN (sizeof(union __align__) - 1)
77 CTA(ISPOW2(SALIGN + 1));
79 struct b_base {
80 struct buffer *_next;
81 char *_bot; /* For spreserve() */
82 char *_relax; /* If !NULL, used by srelax() instead of ._bot */
83 char *_max; /* Max usable byte */
84 char *_caster; /* NULL if full */
87 /* Single instance builtin buffer. Room for anything, most of the time */
88 struct b_bltin {
89 struct b_base b_base;
90 char b_buf[SBUFFER_BUILTIN - sizeof(struct b_base)];
92 #define SBLTIN_SIZE SIZEOF_FIELD(struct b_bltin, b_buf)
94 /* Dynamically allocated buffers to overcome shortage, always released again
95 * once the command loop ticks (without relaxation or during PS_SOURCING) */
96 struct b_dyn {
97 struct b_base b_base;
98 char b_buf[SBUFFER_SIZE - sizeof(struct b_base)];
100 #define SDYN_SIZE SIZEOF_FIELD(struct b_dyn, b_buf)
102 /* The multiplexer of the several real b_* */
103 struct buffer {
104 struct b_base b;
105 char b_buf[VFIELD_SIZE(SALIGN + 1)];
108 /* Requests that exceed SDYN_SIZE-1 and thus cannot be handled by string dope
109 * are always served by the normal memory allocator (which panics if memory
110 * cannot be served). Note such an allocation has not yet occurred, it is only
111 * included as a security fallback bypass */
112 struct hugebuf {
113 struct hugebuf *hb_next;
114 char hb_buf[VFIELD_SIZE(SALIGN + 1)];
117 static struct b_bltin _builtin_buf;
118 static struct buffer *_buf_head, *_buf_list, *_buf_server, *_buf_relax;
119 static size_t _relax_recur_no;
120 static struct hugebuf *_huge_list;
121 #ifdef HAVE_DEBUG
122 static size_t _all_cnt, _all_cycnt, _all_cycnt_max,
123 _all_size, _all_cysize, _all_cysize_max, _all_min,
124 _all_max, _all_wast,
125 _all_bufcnt, _all_cybufcnt, _all_cybufcnt_max,
126 _all_resetreqs, _all_resets;
127 #endif
129 /* sreset() / srelax() release a buffer, check the canaries of all chunks */
130 #ifdef HAVE_DEBUG
131 static void _salloc_bcheck(struct buffer *b);
132 #endif
134 #ifdef HAVE_DEBUG
135 static void
136 _salloc_bcheck(struct buffer *b)
138 union sptr pmax, pp;
139 /*NYD2_ENTER;*/
141 pmax.cp = (b->b._caster == NULL) ? b->b._max : b->b._caster;
142 pp.cp = b->b._bot;
144 while (pp.cp < pmax.cp) {
145 struct schunk *c;
146 union sptr x;
147 void *ux;
148 ui8_t i;
150 c = pp.c;
151 pp.cp += c->full_size;
152 x.p = c + 1;
153 ux = x.cp + 8;
155 i = 0;
156 if (x.ui8p[0] != 0xDE) i |= 1<<0;
157 if (x.ui8p[1] != 0xAA) i |= 1<<1;
158 if (x.ui8p[2] != 0x55) i |= 1<<2;
159 if (x.ui8p[3] != 0xAD) i |= 1<<3;
160 if (x.ui8p[4] != 0xBE) i |= 1<<4;
161 if (x.ui8p[5] != 0x55) i |= 1<<5;
162 if (x.ui8p[6] != 0xAA) i |= 1<<6;
163 if (x.ui8p[7] != 0xEF) i |= 1<<7;
164 if (i != 0)
165 n_alert("sdope %p: corrupt lower canary: 0x%02X, size %u: %s, line %u",
166 ux, i, c->usr_size, c->file, c->line);
167 x.cp += 8 + c->usr_size;
169 i = 0;
170 if (x.ui8p[0] != 0xDE) i |= 1<<0;
171 if (x.ui8p[1] != 0xAA) i |= 1<<1;
172 if (x.ui8p[2] != 0x55) i |= 1<<2;
173 if (x.ui8p[3] != 0xAD) i |= 1<<3;
174 if (x.ui8p[4] != 0xBE) i |= 1<<4;
175 if (x.ui8p[5] != 0x55) i |= 1<<5;
176 if (x.ui8p[6] != 0xAA) i |= 1<<6;
177 if (x.ui8p[7] != 0xEF) i |= 1<<7;
178 if (i != 0)
179 n_alert("sdope %p: corrupt upper canary: 0x%02X, size %u: %s, line %u",
180 ux, i, c->usr_size, c->file, c->line);
182 /*NYD2_LEAVE;*/
184 #endif
186 FL void *
187 (salloc)(size_t size SALLOC_DEBUG_ARGS)
189 DBG( size_t orig_size = size; )
190 union {struct buffer *b; struct hugebuf *hb; char *cp;} u;
191 char *x, *y, *z;
192 NYD2_ENTER;
194 if (size == 0)
195 ++size;
196 size += SALIGN;
197 size &= ~SALIGN;
199 #ifdef HAVE_DEBUG
200 ++_all_cnt;
201 ++_all_cycnt;
202 _all_cycnt_max = MAX(_all_cycnt_max, _all_cycnt);
203 _all_size += size;
204 _all_cysize += size;
205 _all_cysize_max = MAX(_all_cysize_max, _all_cysize);
206 _all_min = (_all_max == 0) ? size : MIN(_all_min, size);
207 _all_max = MAX(_all_max, size);
208 _all_wast += size - orig_size;
210 size += _SHOPE_SIZE;
212 if (size >= SDYN_SIZE - 1)
213 n_alert("salloc() of %" PRIuZ " bytes from \"%s\", line %d",
214 size, mdbg_file, mdbg_line);
215 #endif
217 /* Huge allocations are special */
218 if (UNLIKELY(size >= SDYN_SIZE - 1))
219 goto jhuge;
221 /* Search for a buffer with enough free space to serve request */
222 if ((u.b = _buf_server) != NULL)
223 goto jumpin;
224 jredo:
225 for (u.b = _buf_head; u.b != NULL; u.b = u.b->b._next) {
226 jumpin:
227 x = u.b->b._caster;
228 if (x == NULL) {
229 if (u.b == _buf_server) {
230 if (u.b == _buf_head && (u.b = _buf_head->b._next) != NULL) {
231 _buf_server = u.b;
232 goto jumpin;
234 _buf_server = NULL;
235 goto jredo;
237 continue;
239 y = x + size;
240 z = u.b->b._max;
241 if (PTRCMP(y, <=, z)) {
242 /* Alignment is the one thing, the other is what is usually allocated,
243 * and here about 40 bytes seems to be a good cut to avoid non-usable
244 * non-NULL casters. However, because of _salloc_bcheck(), we may not
245 * set ._caster to NULL because then it would check all chunks up to
246 * ._max, which surely doesn't work; speed is no issue with DEBUG */
247 u.b->b._caster = NDBG( PTRCMP(y + 42 + 16, >=, z) ? NULL : ) y;
248 u.cp = x;
249 goto jleave;
253 /* Need a new buffer */
254 if (_buf_head == NULL) {
255 struct b_bltin *b = &_builtin_buf;
256 b->b_base._max = b->b_buf + SBLTIN_SIZE - 1;
257 _buf_head = (struct buffer*)b;
258 u.b = _buf_head;
259 } else {
260 #ifdef HAVE_DEBUG
261 ++_all_bufcnt;
262 ++_all_cybufcnt;
263 _all_cybufcnt_max = MAX(_all_cybufcnt_max, _all_cybufcnt);
264 #endif
265 u.b = smalloc(sizeof(struct b_dyn));
266 u.b->b._max = u.b->b_buf + SDYN_SIZE - 1;
268 if (_buf_list != NULL)
269 _buf_list->b._next = u.b;
270 _buf_server = _buf_list = u.b;
271 u.b->b._next = NULL;
272 u.b->b._caster = (u.b->b._bot = u.b->b_buf) + size;
273 u.b->b._relax = NULL;
274 u.cp = u.b->b._bot;
276 jleave:
277 /* Encapsulate user chunk in debug canaries */
278 #ifdef HAVE_DEBUG
280 union sptr xl, xu;
281 struct schunk *xc;
283 xl.p = u.cp;
284 xc = xl.c;
285 xc->file = mdbg_file;
286 xc->line = mdbg_line;
287 xc->usr_size = (ui16_t)orig_size;
288 xc->full_size = (ui16_t)size;
289 xl.p = xc + 1;
290 xl.ui8p[0]=0xDE; xl.ui8p[1]=0xAA; xl.ui8p[2]=0x55; xl.ui8p[3]=0xAD;
291 xl.ui8p[4]=0xBE; xl.ui8p[5]=0x55; xl.ui8p[6]=0xAA; xl.ui8p[7]=0xEF;
292 u.cp = xl.cp + 8;
293 xu.p = u.cp;
294 xu.cp += orig_size;
295 xu.ui8p[0]=0xDE; xu.ui8p[1]=0xAA; xu.ui8p[2]=0x55; xu.ui8p[3]=0xAD;
296 xu.ui8p[4]=0xBE; xu.ui8p[5]=0x55; xu.ui8p[6]=0xAA; xu.ui8p[7]=0xEF;
298 #endif
299 NYD2_LEAVE;
300 return u.cp;
302 jhuge:
303 u.hb = smalloc(sizeof(*u.hb) - VFIELD_SIZEOF(struct hugebuf, hb_buf) +
304 size +1);
305 u.hb->hb_next = _huge_list;
306 _huge_list = u.hb;
307 u.cp = u.hb->hb_buf;
308 goto jleave;
311 FL void *
312 (csalloc)(size_t nmemb, size_t size SALLOC_DEBUG_ARGS)
314 void *vp;
315 NYD2_ENTER;
317 size *= nmemb;
318 vp = (salloc)(size SALLOC_DEBUG_ARGSCALL);
319 memset(vp, 0, size);
320 NYD2_LEAVE;
321 return vp;
324 FL void
325 sreset(bool_t only_if_relaxed)
327 struct buffer *blh, *bh;
328 NYD_ENTER;
330 DBG( ++_all_resetreqs; )
331 if (noreset) {
332 /* Reset relaxation after any jump is a MUST */
333 if (_relax_recur_no > 0)
334 srelax_rele();
335 goto jleave;
337 if (only_if_relaxed && _relax_recur_no == 0)
338 goto jleave;
340 #ifdef HAVE_DEBUG
341 _all_cycnt = _all_cysize = 0;
342 _all_cybufcnt = (_buf_head != NULL && _buf_head->b._next != NULL);
343 ++_all_resets;
344 #endif
346 /* Reset relaxation after jump */
347 if (_relax_recur_no > 0) {
348 srelax_rele();
349 assert(_relax_recur_no == 0);
352 blh = NULL;
353 if ((bh = _buf_head) != NULL) {
354 do {
355 struct buffer *x = bh;
356 bh = x->b._next;
357 DBG( _salloc_bcheck(x); )
359 /* Give away all buffers that are not covered by sreset().
360 * _buf_head is builtin and thus cannot be free()d */
361 if (blh != NULL && x->b._bot == x->b_buf) {
362 blh->b._next = bh;
363 free(x);
364 } else {
365 blh = x;
366 x->b._caster = x->b._bot;
367 x->b._relax = NULL;
368 DBG( memset(x->b._caster, 0377,
369 PTR2SIZE(x->b._max - x->b._caster)); )
371 } while (bh != NULL);
373 _buf_server = _buf_head;
374 _buf_list = blh;
375 _buf_relax = NULL;
378 while (_huge_list != NULL) {
379 struct hugebuf *hb = _huge_list;
380 _huge_list = hb->hb_next;
381 free(hb);
384 DBG( smemreset(); )
385 jleave:
386 NYD_LEAVE;
389 FL void
390 srelax_hold(void)
392 struct buffer *b;
393 NYD_ENTER;
395 if (_relax_recur_no++ == 0) {
396 for (b = _buf_head; b != NULL; b = b->b._next)
397 b->b._relax = b->b._caster;
398 _buf_relax = _buf_server;
400 NYD_LEAVE;
403 FL void
404 srelax_rele(void)
406 struct buffer *b;
407 NYD_ENTER;
409 assert(_relax_recur_no > 0);
411 if (--_relax_recur_no == 0) {
412 for (b = _buf_head; b != NULL; b = b->b._next) {
413 DBG( _salloc_bcheck(b); )
414 b->b._caster = (b->b._relax != NULL) ? b->b._relax : b->b._bot;
415 b->b._relax = NULL;
418 _buf_relax = NULL;
420 #ifdef HAVE_DEVEL
421 else
422 n_err("srelax_rele(): recursion >0!\n");
423 #endif
424 NYD_LEAVE;
427 FL void
428 srelax(void)
430 /* The purpose of relaxation is only that it is possible to reset the
431 * casters, *not* to give back memory to the system. We are presumably in
432 * an iteration over all messages of a mailbox, and it'd be quite
433 * counterproductive to give the system allocator a chance to waste time */
434 struct buffer *b;
435 NYD_ENTER;
437 assert(_relax_recur_no > 0);
439 if (_relax_recur_no == 1) {
440 for (b = _buf_head; b != NULL; b = b->b._next) {
441 DBG( _salloc_bcheck(b); )
442 b->b._caster = (b->b._relax != NULL) ? b->b._relax : b->b._bot;
443 DBG( memset(b->b._caster, 0377, PTR2SIZE(b->b._max - b->b._caster)); )
446 NYD_LEAVE;
449 FL void
450 spreserve(void)
452 struct buffer *b;
453 NYD_ENTER;
455 for (b = _buf_head; b != NULL; b = b->b._next)
456 b->b._bot = b->b._caster;
457 NYD_LEAVE;
460 #ifdef HAVE_DEBUG
461 FL int
462 c_sstats(void *v)
464 size_t excess;
465 NYD_ENTER;
466 UNUSED(v);
468 excess = (_all_cybufcnt_max * SDYN_SIZE) + SBLTIN_SIZE;
469 excess = (excess >= _all_cysize_max) ? 0 : _all_cysize_max - excess;
471 printf("String usage statistics (cycle means one sreset() cycle):\n"
472 " Buffer allocs ever/max a time : %" PRIuZ "/%" PRIuZ "\n"
473 " .. size of the builtin/dynamic: %" PRIuZ "/%" PRIuZ "\n"
474 " Overall alloc count/bytes : %" PRIuZ "/%" PRIuZ "\n"
475 " .. bytes min/max/align wastage: %" PRIuZ "/%" PRIuZ "/%" PRIuZ "\n"
476 " sreset() cycles : %" PRIuZ " (%" PRIuZ " performed)\n"
477 " Cycle max.: alloc count/bytes : %" PRIuZ "/%" PRIuZ "+%" PRIuZ "\n",
478 _all_bufcnt, _all_cybufcnt_max,
479 SBLTIN_SIZE, SDYN_SIZE,
480 _all_cnt, _all_size,
481 _all_min, _all_max, _all_wast,
482 _all_resetreqs, _all_resets,
483 _all_cycnt_max, _all_cysize_max, excess);
484 NYD_LEAVE;
485 return 0;
487 #endif
489 FL char *
490 (savestr)(char const *str SALLOC_DEBUG_ARGS)
492 size_t size;
493 char *news;
494 NYD_ENTER;
496 size = strlen(str) +1;
497 news = (salloc)(size SALLOC_DEBUG_ARGSCALL);
498 memcpy(news, str, size);
499 NYD_LEAVE;
500 return news;
503 FL char *
504 (savestrbuf)(char const *sbuf, size_t sbuf_len SALLOC_DEBUG_ARGS)
506 char *news;
507 NYD_ENTER;
509 news = (salloc)(sbuf_len +1 SALLOC_DEBUG_ARGSCALL);
510 memcpy(news, sbuf, sbuf_len);
511 news[sbuf_len] = 0;
512 NYD_LEAVE;
513 return news;
516 FL char *
517 (savecatsep)(char const *s1, char sep, char const *s2 SALLOC_DEBUG_ARGS)
519 size_t l1, l2;
520 char *news;
521 NYD_ENTER;
523 l1 = (s1 != NULL) ? strlen(s1) : 0;
524 l2 = strlen(s2);
525 news = (salloc)(l1 + (sep != '\0') + l2 +1 SALLOC_DEBUG_ARGSCALL);
526 if (l1 > 0) {
527 memcpy(news + 0, s1, l1);
528 if (sep != '\0')
529 news[l1++] = sep;
531 memcpy(news + l1, s2, l2);
532 news[l1 + l2] = '\0';
533 NYD_LEAVE;
534 return news;
538 * Support routines, auto-reclaimed storage
541 FL char *
542 (i_strdup)(char const *src SALLOC_DEBUG_ARGS)
544 size_t sz;
545 char *dest;
546 NYD_ENTER;
548 sz = strlen(src) +1;
549 dest = (salloc)(sz SALLOC_DEBUG_ARGSCALL);
550 i_strcpy(dest, src, sz);
551 NYD_LEAVE;
552 return dest;
555 FL char *
556 (protbase)(char const *cp SALLOC_DEBUG_ARGS) /* TODO obsolete */
558 char *n, *np;
559 NYD_ENTER;
561 np = n = (salloc)(strlen(cp) +1 SALLOC_DEBUG_ARGSCALL);
563 /* Just ignore the `is-system-mailbox' prefix XXX */
564 if (cp[0] == '%' && cp[1] == ':')
565 cp += 2;
567 while (*cp != '\0') {
568 if (cp[0] == ':' && cp[1] == '/' && cp[2] == '/') {
569 *np++ = *cp++;
570 *np++ = *cp++;
571 *np++ = *cp++;
572 } else if (cp[0] == '/')
573 break;
574 else
575 *np++ = *cp++;
577 *np = '\0';
578 NYD_LEAVE;
579 return n;
582 FL struct str *
583 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
585 va_list vl;
586 size_t l;
587 char const *cs;
588 NYD_ENTER;
590 va_start(vl, self);
591 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
592 l += strlen(cs);
593 va_end(vl);
595 self->l = l;
596 self->s = salloc(l +1);
598 va_start(vl, self);
599 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
600 size_t i = strlen(cs);
601 memcpy(self->s + l, cs, i);
602 l += i;
604 self->s[l] = '\0';
605 va_end(vl);
606 NYD_LEAVE;
607 return self;
610 FL struct str *
611 (str_concat_cpa)(struct str *self, char const * const *cpa,
612 char const *sep_o_null SALLOC_DEBUG_ARGS)
614 size_t sonl, l;
615 char const * const *xcpa;
616 NYD_ENTER;
618 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
620 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
621 l += strlen(*xcpa) + sonl;
623 self->l = l;
624 self->s = (salloc)(l +1 SALLOC_DEBUG_ARGSCALL);
626 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
627 size_t i = strlen(*xcpa);
628 memcpy(self->s + l, *xcpa, i);
629 l += i;
630 if (sonl > 0) {
631 memcpy(self->s + l, sep_o_null, sonl);
632 l += sonl;
635 self->s[l] = '\0';
636 NYD_LEAVE;
637 return self;
641 * Routines that are not related to auto-reclaimed storage follow.
644 FL int
645 anyof(char const *s1, char const *s2)
647 NYD2_ENTER;
648 for (; *s1 != '\0'; ++s1)
649 if (strchr(s2, *s1) != NULL)
650 break;
651 NYD2_LEAVE;
652 return (*s1 != '\0');
655 FL char *
656 n_strsep(char **iolist, char sep, bool_t ignore_empty)
658 char *base, *cp;
659 NYD2_ENTER;
661 for (base = *iolist; base != NULL; base = *iolist) {
662 while (*base != '\0' && blankspacechar(*base))
663 ++base;
664 cp = strchr(base, sep);
665 if (cp != NULL)
666 *iolist = cp + 1;
667 else {
668 *iolist = NULL;
669 cp = base + strlen(base);
671 while (cp > base && blankspacechar(cp[-1]))
672 --cp;
673 *cp = '\0';
674 if (*base != '\0' || !ignore_empty)
675 break;
677 NYD2_LEAVE;
678 return base;
681 FL void
682 i_strcpy(char *dest, char const *src, size_t size)
684 NYD2_ENTER;
685 if (size > 0) {
686 for (;; ++dest, ++src)
687 if ((*dest = lowerconv(*src)) == '\0') {
688 break;
689 } else if (--size == 0) {
690 *dest = '\0';
691 break;
694 NYD2_LEAVE;
697 FL int
698 is_prefix(char const *as1, char const *as2)
700 char c;
701 NYD2_ENTER;
703 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
704 if (*as2 == '\0')
705 break;
706 NYD2_LEAVE;
707 return (c == '\0');
710 FL char *
711 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
713 char const *cp;
714 size_t i;
715 char c, *rv;
716 NYD2_ENTER;
718 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
719 if (c == '"' || c == '\\')
720 ++i;
721 rv = salloc(i +1);
723 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
724 if (c == '"' || c == '\\')
725 rv[i++] = '\\';
726 rv[i] = '\0';
727 NYD2_LEAVE;
728 return rv;
731 FL char *
732 laststring(char *linebuf, bool_t *needs_list, bool_t strip)
734 char *cp, *p, quoted;
735 NYD_ENTER;
737 /* Anything to do at all? */
738 if (*(cp = linebuf) == '\0')
739 goto jnull;
740 cp += strlen(linebuf) -1;
742 /* Strip away trailing blanks */
743 while (whitechar(*cp) && cp > linebuf)
744 --cp;
745 cp[1] = '\0';
746 if (cp == linebuf)
747 goto jleave;
749 /* Now search for the BOS of the "last string" */
750 quoted = *cp;
751 if (quoted == '\'' || quoted == '"') {
752 if (strip)
753 *cp = '\0';
754 } else
755 quoted = ' ';
757 while (cp > linebuf) {
758 --cp;
759 if (quoted != ' ') {
760 if (*cp != quoted)
761 continue;
762 } else if (!whitechar(*cp))
763 continue;
764 if (cp == linebuf || cp[-1] != '\\') {
765 /* When in whitespace mode, WS prefix doesn't belong */
766 if (quoted == ' ')
767 ++cp;
768 break;
770 /* Expand the escaped quote character */
771 for (p = --cp; (p[0] = p[1]) != '\0'; ++p)
774 if (strip && quoted != ' ' && *cp == quoted)
775 for (p = cp; (p[0] = p[1]) != '\0'; ++p)
778 /* The "last string" has been skipped over, but still, try to step backwards
779 * until we are at BOS or see whitespace, so as to make possible things like
780 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
781 while (cp > linebuf) {
782 --cp;
783 if (whitechar(*cp)) {
784 p = cp;
785 *cp++ = '\0';
786 /* We can furtherly release our callees if we now decide wether the
787 * remaining non-"last string" line content contains non-WS */
788 while (--p >= linebuf)
789 if (!whitechar(*p))
790 goto jleave;
791 linebuf = cp;
792 break;
796 jleave:
797 if (cp != NULL && *cp == '\0')
798 goto jnull;
799 *needs_list = (cp != linebuf && *linebuf != '\0');
800 j_leave:
801 NYD_LEAVE;
802 return cp;
803 jnull:
804 *needs_list = FAL0;
805 cp = NULL;
806 goto j_leave;
809 FL void
810 makelow(char *cp) /* TODO isn't that crap? --> */
812 NYD_ENTER;
813 #ifdef HAVE_C90AMEND1
814 if (mb_cur_max > 1) {
815 char *tp = cp;
816 wchar_t wc;
817 int len;
819 while (*cp != '\0') {
820 len = mbtowc(&wc, cp, mb_cur_max);
821 if (len < 0)
822 *tp++ = *cp++;
823 else {
824 wc = towlower(wc);
825 if (wctomb(tp, wc) == len)
826 tp += len, cp += len;
827 else
828 *tp++ = *cp++; /* <-- at least here */
831 } else
832 #endif
835 *cp = tolower((uc_i)*cp);
836 while (*cp++ != '\0');
838 NYD_LEAVE;
841 FL bool_t
842 substr(char const *str, char const *sub)
844 char const *cp, *backup;
845 NYD_ENTER;
847 cp = sub;
848 backup = str;
849 while (*str != '\0' && *cp != '\0') {
850 #ifdef HAVE_C90AMEND1
851 if (mb_cur_max > 1) {
852 wchar_t c, c2;
853 int sz;
855 if ((sz = mbtowc(&c, cp, mb_cur_max)) == -1)
856 goto Jsinglebyte;
857 cp += sz;
858 if ((sz = mbtowc(&c2, str, mb_cur_max)) == -1)
859 goto Jsinglebyte;
860 str += sz;
861 c = towupper(c);
862 c2 = towupper(c2);
863 if (c != c2) {
864 if ((sz = mbtowc(&c, backup, mb_cur_max)) > 0) {
865 backup += sz;
866 str = backup;
867 } else
868 str = ++backup;
869 cp = sub;
871 } else
872 Jsinglebyte:
873 #endif
875 int c, c2;
877 c = *cp++ & 0377;
878 if (islower(c))
879 c = toupper(c);
880 c2 = *str++ & 0377;
881 if (islower(c2))
882 c2 = toupper(c2);
883 if (c != c2) {
884 str = ++backup;
885 cp = sub;
889 NYD_LEAVE;
890 return (*cp == '\0');
893 FL char *
894 sstpcpy(char *dst, char const *src)
896 NYD2_ENTER;
897 while ((*dst = *src++) != '\0')
898 ++dst;
899 NYD2_LEAVE;
900 return dst;
903 FL char *
904 (sstrdup)(char const *cp SMALLOC_DEBUG_ARGS)
906 char *dp;
907 NYD2_ENTER;
909 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) SMALLOC_DEBUG_ARGSCALL);
910 NYD2_LEAVE;
911 return dp;
914 FL char *
915 (sbufdup)(char const *cp, size_t len SMALLOC_DEBUG_ARGS)
917 char *dp = NULL;
918 NYD2_ENTER;
920 dp = (smalloc)(len +1 SMALLOC_DEBUG_ARGSCALL);
921 if (cp != NULL)
922 memcpy(dp, cp, len);
923 dp[len] = '\0';
924 NYD2_LEAVE;
925 return dp;
928 FL char *
929 n_strlcpy(char *dst, char const *src, size_t len)
931 NYD2_ENTER;
933 assert(len > 0);
935 dst = strncpy(dst, src, len);
936 dst[len -1] = '\0';
937 NYD2_LEAVE;
938 return dst;
941 FL int
942 asccasecmp(char const *s1, char const *s2)
944 int cmp;
945 NYD2_ENTER;
947 for (;;) {
948 char c1 = *s1++, c2 = *s2++;
949 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
950 break;
952 NYD2_LEAVE;
953 return cmp;
956 FL int
957 ascncasecmp(char const *s1, char const *s2, size_t sz)
959 int cmp = 0;
960 NYD2_ENTER;
962 while (sz-- > 0) {
963 char c1 = *s1++, c2 = *s2++;
964 cmp = (ui8_t)lowerconv(c1);
965 cmp -= (ui8_t)lowerconv(c2);
966 if (cmp != 0 || c1 == '\0')
967 break;
969 NYD2_LEAVE;
970 return cmp;
973 FL char const *
974 asccasestr(char const *s1, char const *s2)
976 char c2, c1;
977 NYD2_ENTER;
979 for (c2 = *s2++, c2 = lowerconv(c2);;) {
980 if ((c1 = *s1++) == '\0') {
981 s1 = NULL;
982 break;
984 if (lowerconv(c1) == c2 && is_asccaseprefix(s1, s2)) {
985 --s1;
986 break;
989 NYD2_LEAVE;
990 return s1;
993 FL bool_t
994 is_asccaseprefix(char const *as1, char const *as2)
996 bool_t rv = FAL0;
997 NYD2_ENTER;
999 for (;; ++as1, ++as2) {
1000 char c1 = lowerconv(*as1), c2 = lowerconv(*as2);
1002 if ((rv = (c2 == '\0')))
1003 break;
1004 if (c1 != c2)
1005 break;
1007 NYD2_LEAVE;
1008 return rv;
1011 FL struct str *
1012 (n_str_dup)(struct str *self, struct str const *t SMALLOC_DEBUG_ARGS)
1014 NYD_ENTER;
1015 if (t != NULL && t->l > 0) {
1016 self->l = t->l;
1017 self->s = (srealloc)(self->s, t->l +1 SMALLOC_DEBUG_ARGSCALL);
1018 memcpy(self->s, t->s, t->l);
1019 self->s[t->l] = '\0';
1020 } else
1021 self->l = 0;
1022 NYD_LEAVE;
1023 return self;
1026 FL struct str *
1027 (n_str_add_buf)(struct str *self, char const *buf, size_t buflen
1028 SMALLOC_DEBUG_ARGS)
1030 NYD_ENTER;
1031 if (buflen != 0) {
1032 size_t sl = self->l;
1033 self->l = sl + buflen;
1034 self->s = (srealloc)(self->s, self->l +1 SMALLOC_DEBUG_ARGSCALL);
1035 memcpy(self->s + sl, buf, buflen);
1036 self->s[self->l] = '\0';
1038 NYD_LEAVE;
1039 return self;
1043 * UTF-8
1046 #ifdef HAVE_NATCH_CHAR
1047 FL ui32_t
1048 n_utf8_to_utf32(char const **bdat, size_t *blen) /* TODO check false UTF8 */
1050 char const *cp;
1051 size_t l;
1052 ui32_t c, x;
1053 NYD2_ENTER;
1055 cp = *bdat;
1056 l = *blen - 1;
1057 x = (ui8_t)*cp++;
1059 if (x <= 0x7F)
1060 c = x;
1061 else {
1062 if ((x & 0xE0) == 0xC0) {
1063 if (l < 1)
1064 goto jerr;
1065 l -= 1;
1066 c = x & ~0xC0;
1067 } else if ((x & 0xF0) == 0xE0) {
1068 if (l < 2)
1069 goto jerr;
1070 l -= 2;
1071 c = x & ~0xE0;
1072 c <<= 6;
1073 x = (ui8_t)*cp++;
1074 c |= x & 0x7F;
1075 } else {
1076 if (l < 3)
1077 goto jerr;
1078 l -= 3;
1079 c = x & ~0xF0;
1080 c <<= 6;
1081 x = (ui8_t)*cp++;
1082 c |= x & 0x7F;
1083 c <<= 6;
1084 x = (ui8_t)*cp++;
1085 c |= x & 0x7F;
1087 c <<= 6;
1088 x = (ui8_t)*cp++;
1089 c |= x & 0x7F;
1092 jleave:
1093 *bdat = cp;
1094 *blen = l;
1095 NYD2_LEAVE;
1096 return c;
1097 jerr:
1098 c = UI32_MAX;
1099 goto jleave;
1101 #endif /* HAVE_NATCH_CHAR */
1103 #ifdef HAVE_FILTER_HTML_TAGSOUP
1104 FL size_t
1105 n_utf32_to_utf8(ui32_t c, char *buf)
1107 struct {
1108 ui32_t lower_bound;
1109 ui32_t upper_bound;
1110 ui8_t enc_leader;
1111 ui8_t enc_lval;
1112 ui8_t dec_leader_mask;
1113 ui8_t dec_leader_val_mask;
1114 ui8_t dec_bytes_togo;
1115 ui8_t cat_index;
1116 ui8_t __dummy[2];
1117 } const _cat[] = {
1118 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
1119 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
1120 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
1121 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
1122 /* xxx _from_utf32() simply assumes magic code points for surrogates!
1123 * xxx (However, should we ever get yet another surrogate range we
1124 * xxx need to deal with that all over the place anyway? */
1125 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
1126 {0x00010000, 0x001FFFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
1127 }, *catp = _cat;
1128 size_t l;
1130 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
1131 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
1132 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
1133 if (c <= _cat[3].upper_bound) {
1134 /* Surrogates may not be converted (Compatibility rule C10) */
1135 if (c >= 0xD800u && c <= 0xDFFFu)
1136 goto jerr;
1137 catp += 3;
1138 goto j3;
1140 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
1141 jerr:
1142 c = 0xFFFDu; /* Unicode replacement character */
1143 catp += 3;
1144 goto j3;
1146 buf[3] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
1148 buf[2] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
1150 buf[1] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
1152 buf[0] = (char)catp->enc_leader | (char)(c);
1154 buf[catp->enc_lval] = '\0';
1155 l = catp->enc_lval;
1156 NYD2_LEAVE;
1157 return l;
1159 #endif /* HAVE_FILTER_HTML_TAGSOUP */
1162 * Our iconv(3) wrapper
1164 #ifdef HAVE_ICONV
1166 static void _ic_toupper(char *dest, char const *src);
1167 static void _ic_stripdash(char *p);
1169 static void
1170 _ic_toupper(char *dest, char const *src)
1172 NYD2_ENTER;
1174 *dest++ = upperconv(*src);
1175 while (*src++ != '\0');
1176 NYD2_LEAVE;
1179 static void
1180 _ic_stripdash(char *p)
1182 char *q = p;
1183 NYD2_ENTER;
1186 if (*(q = p) != '-')
1187 ++q;
1188 while (*p++ != '\0');
1189 NYD2_LEAVE;
1192 FL iconv_t
1193 n_iconv_open(char const *tocode, char const *fromcode)
1195 iconv_t id;
1196 char *t, *f;
1197 NYD_ENTER;
1199 if (!asccasecmp(fromcode, "unknown-8bit") &&
1200 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
1201 fromcode = charset_get_8bit();
1203 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
1204 goto jleave;
1206 /* Remove the "iso-" prefixes for Solaris */
1207 if (!ascncasecmp(tocode, "iso-", 4))
1208 tocode += 4;
1209 else if (!ascncasecmp(tocode, "iso", 3))
1210 tocode += 3;
1211 if (!ascncasecmp(fromcode, "iso-", 4))
1212 fromcode += 4;
1213 else if (!ascncasecmp(fromcode, "iso", 3))
1214 fromcode += 3;
1215 if (*tocode == '\0' || *fromcode == '\0') {
1216 id = (iconv_t)-1;
1217 goto jleave;
1219 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
1220 goto jleave;
1222 /* Solaris prefers upper-case charset names. Don't ask... */
1223 t = salloc(strlen(tocode) +1);
1224 _ic_toupper(t, tocode);
1225 f = salloc(strlen(fromcode) +1);
1226 _ic_toupper(f, fromcode);
1227 if ((id = iconv_open(t, f)) != (iconv_t)-1)
1228 goto jleave;
1230 /* Strip dashes for UnixWare */
1231 _ic_stripdash(t);
1232 _ic_stripdash(f);
1233 if ((id = iconv_open(t, f)) != (iconv_t)-1)
1234 goto jleave;
1236 /* Add your vendor's sillynesses here */
1238 /* If the encoding names are equal at this point, they are just not
1239 * understood by iconv(), and we cannot sensibly use it in any way. We do
1240 * not perform this as an optimization above since iconv() can otherwise be
1241 * used to check the validity of the input even with identical encoding
1242 * names */
1243 if (!strcmp(t, f))
1244 errno = 0;
1245 jleave:
1246 NYD_LEAVE;
1247 return id;
1250 FL void
1251 n_iconv_close(iconv_t cd)
1253 NYD_ENTER;
1254 iconv_close(cd);
1255 if (cd == iconvd)
1256 iconvd = (iconv_t)-1;
1257 NYD_LEAVE;
1260 FL void
1261 n_iconv_reset(iconv_t cd)
1263 NYD_ENTER;
1264 iconv(cd, NULL, NULL, NULL, NULL);
1265 NYD_LEAVE;
1268 /* (2012-09-24: export and use it exclusively to isolate prototype problems
1269 * (*inb* is 'char const **' except in POSIX) in a single place.
1270 * GNU libiconv even allows for configuration time const/non-const..
1271 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
1272 * support compiler invocations which bail on error, so no -Werror */
1273 /* Citrus project? */
1274 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1275 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1276 # if OS_DRAGONFLY
1277 # define __INBCAST(S) (char ** __restrict__)UNCONST(S)
1278 # else
1279 # define __INBCAST(S) (char const **)UNCONST(S)
1280 # endif
1281 # elif OS_SUNOS || OS_SOLARIS
1282 # define __INBCAST(S) (char const ** __restrict__)UNCONST(S)
1283 # endif
1284 # ifndef __INBCAST
1285 # define __INBCAST(S) (char **)UNCONST(S)
1286 # endif
1288 FL int
1289 n_iconv_buf(iconv_t cd, char const **inb, size_t *inbleft,/*XXX redo iconv use*/
1290 char **outb, size_t *outbleft, bool_t skipilseq)
1292 int err = 0;
1293 NYD2_ENTER;
1295 for (;;) {
1296 size_t sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft);
1297 if (sz != (size_t)-1)
1298 break;
1299 err = errno;
1300 if (!skipilseq || err != EILSEQ)
1301 break;
1302 if (*inbleft > 0) {
1303 ++(*inb);
1304 --(*inbleft);
1305 } else if (*outbleft > 0) {
1306 **outb = '\0';
1307 break;
1309 if (*outbleft > 0/* TODO 0xFFFD 2*/) {
1310 /* TODO 0xFFFD (*outb)[0] = '[';
1311 * TODO (*outb)[1] = '?';
1312 * TODO 0xFFFD (*outb)[2] = ']';
1313 * TODO (*outb) += 3;
1314 * TODO (*outbleft) -= 3; */
1315 *(*outb)++ = '?';
1316 --*outbleft;
1317 } else {
1318 err = E2BIG;
1319 break;
1321 err = 0;
1323 NYD2_LEAVE;
1324 return err;
1326 # undef __INBCAST
1328 FL int
1329 n_iconv_str(iconv_t cd, struct str *out, struct str const *in,
1330 struct str *in_rest_or_null, bool_t skipilseq)
1332 int err;
1333 char *obb, *ob;
1334 char const *ib;
1335 size_t olb, ol, il;
1336 NYD2_ENTER;
1338 err = 0;
1339 obb = out->s;
1340 olb = out->l;
1341 ol = in->l;
1343 ol = (ol << 1) - (ol >> 4);
1344 if (olb <= ol) {
1345 olb = ol;
1346 goto jrealloc;
1349 for (;;) {
1350 ib = in->s;
1351 il = in->l;
1352 ob = obb;
1353 ol = olb;
1354 err = n_iconv_buf(cd, &ib, &il, &ob, &ol, skipilseq);
1355 if (err == 0 || err != E2BIG)
1356 break;
1357 err = 0;
1358 olb += in->l;
1359 jrealloc:
1360 obb = srealloc(obb, olb +1);
1363 if (in_rest_or_null != NULL) {
1364 in_rest_or_null->s = UNCONST(ib);
1365 in_rest_or_null->l = il;
1367 out->s = obb;
1368 out->s[out->l = olb - ol] = '\0';
1369 NYD2_LEAVE;
1370 return err;
1372 #endif /* HAVE_ICONV */
1374 /* s-it-mode */