nail.h: add some OS_ constants
[s-mailx.git] / strings.c
blobf5807ce7be1462253a53aa49520c6cdb788cb93a
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Auto-reclaimed string allocation and support routines that build on top of
3 *@ them. Strings handed out by those are reclaimed at the top of the command
4 *@ loop each time, so they need not be freed.
5 *@ And below this series we do collect all other plain string support routines
6 *@ in here, including those which use normal heap memory.
8 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
9 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
12 * Copyright (c) 1980, 1993
13 * The Regents of the University of California. All rights reserved.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
43 #undef n_FILE
44 #define n_FILE strings
46 #ifndef HAVE_AMALGAMATION
47 # include "nail.h"
48 #endif
50 #include <ctype.h>
52 /* In debug mode the "string dope" allocations are enwrapped in canaries, just
53 * as we do with our normal memory allocator */
54 #ifdef HAVE_DEBUG
55 # define _SHOPE_SIZE (2u * 8 * sizeof(char) + sizeof(struct schunk))
57 CTA(sizeof(char) == sizeof(ui8_t));
59 struct schunk {
60 char const *file;
61 ui32_t line;
62 ui16_t usr_size;
63 ui16_t full_size;
66 union sptr {
67 void *p;
68 struct schunk *c;
69 char *cp;
70 ui8_t *ui8p;
72 #endif /* HAVE_DEBUG */
74 union __align__ {
75 char *cp;
76 size_t sz;
77 ul_i ul;
79 #define SALIGN (sizeof(union __align__) - 1)
81 CTA(ISPOW2(SALIGN + 1));
83 struct b_base {
84 struct buffer *_next;
85 char *_bot; /* For spreserve() */
86 char *_relax; /* If !NULL, used by srelax() instead of ._bot */
87 char *_max; /* Max usable byte */
88 char *_caster; /* NULL if full */
91 /* Single instance builtin buffer. Room for anything, most of the time */
92 struct b_bltin {
93 struct b_base b_base;
94 char b_buf[SBUFFER_BUILTIN - sizeof(struct b_base)];
96 #define SBLTIN_SIZE SIZEOF_FIELD(struct b_bltin, b_buf)
98 /* Dynamically allocated buffers to overcome shortage, always released again
99 * once the command loop ticks (without relaxation or during PS_SOURCING) */
100 struct b_dyn {
101 struct b_base b_base;
102 char b_buf[SBUFFER_SIZE - sizeof(struct b_base)];
104 #define SDYN_SIZE SIZEOF_FIELD(struct b_dyn, b_buf)
106 /* The multiplexer of the several real b_* */
107 struct buffer {
108 struct b_base b;
109 char b_buf[VFIELD_SIZE(SALIGN + 1)];
112 /* Requests that exceed SDYN_SIZE-1 and thus cannot be handled by string dope
113 * are always served by the normal memory allocator (which panics if memory
114 * cannot be served). Note such an allocation has not yet occurred, it is only
115 * included as a security fallback bypass */
116 struct hugebuf {
117 struct hugebuf *hb_next;
118 char hb_buf[VFIELD_SIZE(SALIGN + 1)];
121 static struct b_bltin _builtin_buf;
122 static struct buffer *_buf_head, *_buf_list, *_buf_server, *_buf_relax;
123 static size_t _relax_recur_no;
124 static struct hugebuf *_huge_list;
125 #ifdef HAVE_DEBUG
126 static size_t _all_cnt, _all_cycnt, _all_cycnt_max,
127 _all_size, _all_cysize, _all_cysize_max, _all_min,
128 _all_max, _all_wast,
129 _all_bufcnt, _all_cybufcnt, _all_cybufcnt_max,
130 _all_resetreqs, _all_resets;
131 #endif
133 /* sreset() / srelax() release a buffer, check the canaries of all chunks */
134 #ifdef HAVE_DEBUG
135 static void _salloc_bcheck(struct buffer *b);
136 #endif
138 #ifdef HAVE_DEBUG
139 static void
140 _salloc_bcheck(struct buffer *b)
142 union sptr pmax, pp;
143 /*NYD2_ENTER;*/
145 pmax.cp = (b->b._caster == NULL) ? b->b._max : b->b._caster;
146 pp.cp = b->b._bot;
148 while (pp.cp < pmax.cp) {
149 struct schunk *c;
150 union sptr x;
151 void *ux;
152 ui8_t i;
154 c = pp.c;
155 pp.cp += c->full_size;
156 x.p = c + 1;
157 ux = x.cp + 8;
159 i = 0;
160 if (x.ui8p[0] != 0xDE) i |= 1<<0;
161 if (x.ui8p[1] != 0xAA) i |= 1<<1;
162 if (x.ui8p[2] != 0x55) i |= 1<<2;
163 if (x.ui8p[3] != 0xAD) i |= 1<<3;
164 if (x.ui8p[4] != 0xBE) i |= 1<<4;
165 if (x.ui8p[5] != 0x55) i |= 1<<5;
166 if (x.ui8p[6] != 0xAA) i |= 1<<6;
167 if (x.ui8p[7] != 0xEF) i |= 1<<7;
168 if (i != 0)
169 alert("sdope %p: corrupt lower canary: 0x%02X, size %u: %s, line %u",
170 ux, i, c->usr_size, c->file, c->line);
171 x.cp += 8 + c->usr_size;
173 i = 0;
174 if (x.ui8p[0] != 0xDE) i |= 1<<0;
175 if (x.ui8p[1] != 0xAA) i |= 1<<1;
176 if (x.ui8p[2] != 0x55) i |= 1<<2;
177 if (x.ui8p[3] != 0xAD) i |= 1<<3;
178 if (x.ui8p[4] != 0xBE) i |= 1<<4;
179 if (x.ui8p[5] != 0x55) i |= 1<<5;
180 if (x.ui8p[6] != 0xAA) i |= 1<<6;
181 if (x.ui8p[7] != 0xEF) i |= 1<<7;
182 if (i != 0)
183 alert("sdope %p: corrupt upper canary: 0x%02X, size %u: %s, line %u",
184 ux, i, c->usr_size, c->file, c->line);
186 /*NYD2_LEAVE;*/
188 #endif
190 FL void *
191 (salloc)(size_t size SALLOC_DEBUG_ARGS)
193 DBG( size_t orig_size = size; )
194 union {struct buffer *b; struct hugebuf *hb; char *cp;} u;
195 char *x, *y, *z;
196 NYD2_ENTER;
198 if (size == 0)
199 ++size;
200 size += SALIGN;
201 size &= ~SALIGN;
203 #ifdef HAVE_DEBUG
204 ++_all_cnt;
205 ++_all_cycnt;
206 _all_cycnt_max = MAX(_all_cycnt_max, _all_cycnt);
207 _all_size += size;
208 _all_cysize += size;
209 _all_cysize_max = MAX(_all_cysize_max, _all_cysize);
210 _all_min = (_all_max == 0) ? size : MIN(_all_min, size);
211 _all_max = MAX(_all_max, size);
212 _all_wast += size - orig_size;
214 size += _SHOPE_SIZE;
216 if (size >= SDYN_SIZE - 1)
217 alert("salloc() of %" PRIuZ " bytes from \"%s\", line %d\n",
218 size, mdbg_file, mdbg_line);
219 #endif
221 /* Huge allocations are special */
222 if (UNLIKELY(size >= SDYN_SIZE - 1))
223 goto jhuge;
225 /* Search for a buffer with enough free space to serve request */
226 if ((u.b = _buf_server) != NULL)
227 goto jumpin;
228 jredo:
229 for (u.b = _buf_head; u.b != NULL; u.b = u.b->b._next) {
230 jumpin:
231 x = u.b->b._caster;
232 if (x == NULL) {
233 if (u.b == _buf_server) {
234 if (u.b == _buf_head && (u.b = _buf_head->b._next) != NULL) {
235 _buf_server = u.b;
236 goto jumpin;
238 _buf_server = NULL;
239 goto jredo;
241 continue;
243 y = x + size;
244 z = u.b->b._max;
245 if (PTRCMP(y, <=, z)) {
246 /* Alignment is the one thing, the other is what is usually allocated,
247 * and here about 40 bytes seems to be a good cut to avoid non-usable
248 * non-NULL casters. However, because of _salloc_bcheck(), we may not
249 * set ._caster to NULL because then it would check all chunks up to
250 * ._max, which surely doesn't work; speed is no issue with DEBUG */
251 u.b->b._caster = NDBG( PTRCMP(y + 42 + 16, >=, z) ? NULL : ) y;
252 u.cp = x;
253 goto jleave;
257 /* Need a new buffer */
258 if (_buf_head == NULL) {
259 struct b_bltin *b = &_builtin_buf;
260 b->b_base._max = b->b_buf + SBLTIN_SIZE - 1;
261 _buf_head = (struct buffer*)b;
262 u.b = _buf_head;
263 } else {
264 #ifdef HAVE_DEBUG
265 ++_all_bufcnt;
266 ++_all_cybufcnt;
267 _all_cybufcnt_max = MAX(_all_cybufcnt_max, _all_cybufcnt);
268 #endif
269 u.b = smalloc(sizeof(struct b_dyn));
270 u.b->b._max = u.b->b_buf + SDYN_SIZE - 1;
272 if (_buf_list != NULL)
273 _buf_list->b._next = u.b;
274 _buf_server = _buf_list = u.b;
275 u.b->b._next = NULL;
276 u.b->b._caster = (u.b->b._bot = u.b->b_buf) + size;
277 u.b->b._relax = NULL;
278 u.cp = u.b->b._bot;
280 jleave:
281 /* Encapsulate user chunk in debug canaries */
282 #ifdef HAVE_DEBUG
284 union sptr xl, xu;
285 struct schunk *xc;
287 xl.p = u.cp;
288 xc = xl.c;
289 xc->file = mdbg_file;
290 xc->line = mdbg_line;
291 xc->usr_size = (ui16_t)orig_size;
292 xc->full_size = (ui16_t)size;
293 xl.p = xc + 1;
294 xl.ui8p[0]=0xDE; xl.ui8p[1]=0xAA; xl.ui8p[2]=0x55; xl.ui8p[3]=0xAD;
295 xl.ui8p[4]=0xBE; xl.ui8p[5]=0x55; xl.ui8p[6]=0xAA; xl.ui8p[7]=0xEF;
296 u.cp = xl.cp + 8;
297 xu.p = u.cp;
298 xu.cp += orig_size;
299 xu.ui8p[0]=0xDE; xu.ui8p[1]=0xAA; xu.ui8p[2]=0x55; xu.ui8p[3]=0xAD;
300 xu.ui8p[4]=0xBE; xu.ui8p[5]=0x55; xu.ui8p[6]=0xAA; xu.ui8p[7]=0xEF;
302 #endif
303 NYD2_LEAVE;
304 return u.cp;
306 jhuge:
307 u.hb = smalloc(sizeof(*u.hb) - VFIELD_SIZEOF(struct hugebuf, hb_buf) +
308 size +1);
309 u.hb->hb_next = _huge_list;
310 _huge_list = u.hb;
311 u.cp = u.hb->hb_buf;
312 goto jleave;
315 FL void *
316 (csalloc)(size_t nmemb, size_t size SALLOC_DEBUG_ARGS)
318 void *vp;
319 NYD2_ENTER;
321 size *= nmemb;
322 vp = (salloc)(size SALLOC_DEBUG_ARGSCALL);
323 memset(vp, 0, size);
324 NYD2_LEAVE;
325 return vp;
328 FL void
329 sreset(bool_t only_if_relaxed)
331 struct buffer *blh, *bh;
332 NYD_ENTER;
334 DBG( ++_all_resetreqs; )
335 if (noreset) {
336 /* Reset relaxation after any jump is a MUST */
337 if (_relax_recur_no > 0)
338 srelax_rele();
339 goto jleave;
341 if (only_if_relaxed && _relax_recur_no == 0)
342 goto jleave;
344 #ifdef HAVE_DEBUG
345 _all_cycnt = _all_cysize = 0;
346 _all_cybufcnt = (_buf_head != NULL && _buf_head->b._next != NULL);
347 ++_all_resets;
348 #endif
350 /* Reset relaxation after jump */
351 if (_relax_recur_no > 0) {
352 srelax_rele();
353 assert(_relax_recur_no == 0);
356 blh = NULL;
357 if ((bh = _buf_head) != NULL) {
358 do {
359 struct buffer *x = bh;
360 bh = x->b._next;
361 DBG( _salloc_bcheck(x); )
363 /* Give away all buffers that are not covered by sreset().
364 * _buf_head is builtin and thus cannot be free()d */
365 if (blh != NULL && x->b._bot == x->b_buf) {
366 blh->b._next = bh;
367 free(x);
368 } else {
369 blh = x;
370 x->b._caster = x->b._bot;
371 x->b._relax = NULL;
372 DBG( memset(x->b._caster, 0377,
373 PTR2SIZE(x->b._max - x->b._caster)); )
375 } while (bh != NULL);
377 _buf_server = _buf_head;
378 _buf_list = blh;
379 _buf_relax = NULL;
382 while (_huge_list != NULL) {
383 struct hugebuf *hb = _huge_list;
384 _huge_list = hb->hb_next;
385 free(hb);
388 DBG( smemreset(); )
389 jleave:
390 NYD_LEAVE;
393 FL void
394 srelax_hold(void)
396 struct buffer *b;
397 NYD_ENTER;
399 if (_relax_recur_no++ == 0) {
400 for (b = _buf_head; b != NULL; b = b->b._next)
401 b->b._relax = b->b._caster;
402 _buf_relax = _buf_server;
404 NYD_LEAVE;
407 FL void
408 srelax_rele(void)
410 struct buffer *b;
411 NYD_ENTER;
413 assert(_relax_recur_no > 0);
415 if (--_relax_recur_no == 0) {
416 for (b = _buf_head; b != NULL; b = b->b._next) {
417 DBG( _salloc_bcheck(b); )
418 b->b._caster = (b->b._relax != NULL) ? b->b._relax : b->b._bot;
419 b->b._relax = NULL;
422 _buf_relax = NULL;
424 #ifdef HAVE_DEVEL
425 else
426 fprintf(stderr, "srelax_rele(): recursion >0!\n");
427 #endif
428 NYD_LEAVE;
431 FL void
432 srelax(void)
434 /* The purpose of relaxation is only that it is possible to reset the
435 * casters, *not* to give back memory to the system. We are presumably in
436 * an iteration over all messages of a mailbox, and it'd be quite
437 * counterproductive to give the system allocator a chance to waste time */
438 struct buffer *b;
439 NYD_ENTER;
441 assert(_relax_recur_no > 0);
443 if (_relax_recur_no == 1) {
444 for (b = _buf_head; b != NULL; b = b->b._next) {
445 DBG( _salloc_bcheck(b); )
446 b->b._caster = (b->b._relax != NULL) ? b->b._relax : b->b._bot;
447 DBG( memset(b->b._caster, 0377, PTR2SIZE(b->b._max - b->b._caster)); )
450 NYD_LEAVE;
453 FL void
454 spreserve(void)
456 struct buffer *b;
457 NYD_ENTER;
459 for (b = _buf_head; b != NULL; b = b->b._next)
460 b->b._bot = b->b._caster;
461 NYD_LEAVE;
464 #ifdef HAVE_DEBUG
465 FL int
466 c_sstats(void *v)
468 size_t excess;
469 NYD_ENTER;
470 UNUSED(v);
472 excess = (_all_cybufcnt_max * SDYN_SIZE) + SBLTIN_SIZE;
473 excess = (excess >= _all_cysize_max) ? 0 : _all_cysize_max - excess;
475 printf("String usage statistics (cycle means one sreset() cycle):\n"
476 " Buffer allocs ever/max a time : %" PRIuZ "/%" PRIuZ "\n"
477 " .. size of the builtin/dynamic: %" PRIuZ "/%" PRIuZ "\n"
478 " Overall alloc count/bytes : %" PRIuZ "/%" PRIuZ "\n"
479 " .. bytes min/max/align wastage: %" PRIuZ "/%" PRIuZ "/%" PRIuZ "\n"
480 " sreset() cycles : %" PRIuZ " (%" PRIuZ " performed)\n"
481 " Cycle max.: alloc count/bytes : %" PRIuZ "/%" PRIuZ "+%" PRIuZ "\n",
482 _all_bufcnt, _all_cybufcnt_max,
483 SBLTIN_SIZE, SDYN_SIZE,
484 _all_cnt, _all_size,
485 _all_min, _all_max, _all_wast,
486 _all_resetreqs, _all_resets,
487 _all_cycnt_max, _all_cysize_max, excess);
488 NYD_LEAVE;
489 return 0;
491 #endif
493 FL char *
494 (savestr)(char const *str SALLOC_DEBUG_ARGS)
496 size_t size;
497 char *news;
498 NYD_ENTER;
500 size = strlen(str) +1;
501 news = (salloc)(size SALLOC_DEBUG_ARGSCALL);
502 memcpy(news, str, size);
503 NYD_LEAVE;
504 return news;
507 FL char *
508 (savestrbuf)(char const *sbuf, size_t sbuf_len SALLOC_DEBUG_ARGS)
510 char *news;
511 NYD_ENTER;
513 news = (salloc)(sbuf_len +1 SALLOC_DEBUG_ARGSCALL);
514 memcpy(news, sbuf, sbuf_len);
515 news[sbuf_len] = 0;
516 NYD_LEAVE;
517 return news;
520 FL char *
521 (savecatsep)(char const *s1, char sep, char const *s2 SALLOC_DEBUG_ARGS)
523 size_t l1, l2;
524 char *news;
525 NYD_ENTER;
527 l1 = (s1 != NULL) ? strlen(s1) : 0;
528 l2 = strlen(s2);
529 news = (salloc)(l1 + (sep != '\0') + l2 +1 SALLOC_DEBUG_ARGSCALL);
530 if (l1 > 0) {
531 memcpy(news + 0, s1, l1);
532 if (sep != '\0')
533 news[l1++] = sep;
535 memcpy(news + l1, s2, l2);
536 news[l1 + l2] = '\0';
537 NYD_LEAVE;
538 return news;
542 * Support routines, auto-reclaimed storage
545 FL char *
546 (i_strdup)(char const *src SALLOC_DEBUG_ARGS)
548 size_t sz;
549 char *dest;
550 NYD_ENTER;
552 sz = strlen(src) +1;
553 dest = (salloc)(sz SALLOC_DEBUG_ARGSCALL);
554 i_strcpy(dest, src, sz);
555 NYD_LEAVE;
556 return dest;
559 FL char *
560 (protbase)(char const *cp SALLOC_DEBUG_ARGS) /* TODO obsolete */
562 char *n, *np;
563 NYD_ENTER;
565 np = n = (salloc)(strlen(cp) +1 SALLOC_DEBUG_ARGSCALL);
567 /* Just ignore the `is-system-mailbox' prefix XXX */
568 if (cp[0] == '%' && cp[1] == ':')
569 cp += 2;
571 while (*cp != '\0') {
572 if (cp[0] == ':' && cp[1] == '/' && cp[2] == '/') {
573 *np++ = *cp++;
574 *np++ = *cp++;
575 *np++ = *cp++;
576 } else if (cp[0] == '/')
577 break;
578 else
579 *np++ = *cp++;
581 *np = '\0';
582 NYD_LEAVE;
583 return n;
586 FL struct str *
587 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
589 va_list vl;
590 size_t l;
591 char const *cs;
592 NYD_ENTER;
594 va_start(vl, self);
595 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
596 l += strlen(cs);
597 va_end(vl);
599 self->l = l;
600 self->s = salloc(l +1);
602 va_start(vl, self);
603 for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
604 size_t i = strlen(cs);
605 memcpy(self->s + l, cs, i);
606 l += i;
608 self->s[l] = '\0';
609 va_end(vl);
610 NYD_LEAVE;
611 return self;
614 FL struct str *
615 (str_concat_cpa)(struct str *self, char const * const *cpa,
616 char const *sep_o_null SALLOC_DEBUG_ARGS)
618 size_t sonl, l;
619 char const * const *xcpa;
620 NYD_ENTER;
622 sonl = (sep_o_null != NULL) ? strlen(sep_o_null) : 0;
624 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
625 l += strlen(*xcpa) + sonl;
627 self->l = l;
628 self->s = (salloc)(l +1 SALLOC_DEBUG_ARGSCALL);
630 for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
631 size_t i = strlen(*xcpa);
632 memcpy(self->s + l, *xcpa, i);
633 l += i;
634 if (sonl > 0) {
635 memcpy(self->s + l, sep_o_null, sonl);
636 l += sonl;
639 self->s[l] = '\0';
640 NYD_LEAVE;
641 return self;
645 * Routines that are not related to auto-reclaimed storage follow.
648 FL int
649 anyof(char const *s1, char const *s2)
651 NYD2_ENTER;
652 for (; *s1 != '\0'; ++s1)
653 if (strchr(s2, *s1) != NULL)
654 break;
655 NYD2_LEAVE;
656 return (*s1 != '\0');
659 FL char *
660 n_strsep(char **iolist, char sep, bool_t ignore_empty)
662 char *base, *cp;
663 NYD2_ENTER;
665 for (base = *iolist; base != NULL; base = *iolist) {
666 while (*base != '\0' && blankspacechar(*base))
667 ++base;
668 cp = strchr(base, sep);
669 if (cp != NULL)
670 *iolist = cp + 1;
671 else {
672 *iolist = NULL;
673 cp = base + strlen(base);
675 while (cp > base && blankspacechar(cp[-1]))
676 --cp;
677 *cp = '\0';
678 if (*base != '\0' || !ignore_empty)
679 break;
681 NYD2_LEAVE;
682 return base;
685 FL void
686 i_strcpy(char *dest, char const *src, size_t size)
688 NYD2_ENTER;
689 if (size > 0) {
690 for (;; ++dest, ++src)
691 if ((*dest = lowerconv(*src)) == '\0') {
692 break;
693 } else if (--size == 0) {
694 *dest = '\0';
695 break;
698 NYD2_LEAVE;
701 FL int
702 is_prefix(char const *as1, char const *as2)
704 char c;
705 NYD2_ENTER;
707 for (; (c = *as1) == *as2 && c != '\0'; ++as1, ++as2)
708 if (*as2 == '\0')
709 break;
710 NYD2_LEAVE;
711 return (c == '\0');
714 FL char *
715 string_quote(char const *v) /* TODO too simpleminded (getrawlist(), +++ ..) */
717 char const *cp;
718 size_t i;
719 char c, *rv;
720 NYD2_ENTER;
722 for (i = 0, cp = v; (c = *cp) != '\0'; ++i, ++cp)
723 if (c == '"' || c == '\\')
724 ++i;
725 rv = salloc(i +1);
727 for (i = 0, cp = v; (c = *cp) != '\0'; rv[i++] = c, ++cp)
728 if (c == '"' || c == '\\')
729 rv[i++] = '\\';
730 rv[i] = '\0';
731 NYD2_LEAVE;
732 return rv;
735 FL char *
736 laststring(char *linebuf, bool_t *needs_list, bool_t strip)
738 char *cp, *p, quoted;
739 NYD_ENTER;
741 /* Anything to do at all? */
742 if (*(cp = linebuf) == '\0')
743 goto jnull;
744 cp += strlen(linebuf) -1;
746 /* Strip away trailing blanks */
747 while (whitechar(*cp) && cp > linebuf)
748 --cp;
749 cp[1] = '\0';
750 if (cp == linebuf)
751 goto jleave;
753 /* Now search for the BOS of the "last string" */
754 quoted = *cp;
755 if (quoted == '\'' || quoted == '"') {
756 if (strip)
757 *cp = '\0';
758 } else
759 quoted = ' ';
761 while (cp > linebuf) {
762 --cp;
763 if (quoted != ' ') {
764 if (*cp != quoted)
765 continue;
766 } else if (!whitechar(*cp))
767 continue;
768 if (cp == linebuf || cp[-1] != '\\') {
769 /* When in whitespace mode, WS prefix doesn't belong */
770 if (quoted == ' ')
771 ++cp;
772 break;
774 /* Expand the escaped quote character */
775 for (p = --cp; (p[0] = p[1]) != '\0'; ++p)
778 if (strip && quoted != ' ' && *cp == quoted)
779 for (p = cp; (p[0] = p[1]) != '\0'; ++p)
782 /* The "last string" has been skipped over, but still, try to step backwards
783 * until we are at BOS or see whitespace, so as to make possible things like
784 * "? copy +'x y.mbox'" or even "? copy +x\ y.mbox" */
785 while (cp > linebuf) {
786 --cp;
787 if (whitechar(*cp)) {
788 p = cp;
789 *cp++ = '\0';
790 /* We can furtherly release our callees if we now decide wether the
791 * remaining non-"last string" line content contains non-WS */
792 while (--p >= linebuf)
793 if (!whitechar(*p))
794 goto jleave;
795 linebuf = cp;
796 break;
800 jleave:
801 if (cp != NULL && *cp == '\0')
802 goto jnull;
803 *needs_list = (cp != linebuf && *linebuf != '\0');
804 j_leave:
805 NYD_LEAVE;
806 return cp;
807 jnull:
808 *needs_list = FAL0;
809 cp = NULL;
810 goto j_leave;
813 FL void
814 makelow(char *cp) /* TODO isn't that crap? --> */
816 NYD_ENTER;
817 #ifdef HAVE_C90AMEND1
818 if (mb_cur_max > 1) {
819 char *tp = cp;
820 wchar_t wc;
821 int len;
823 while (*cp != '\0') {
824 len = mbtowc(&wc, cp, mb_cur_max);
825 if (len < 0)
826 *tp++ = *cp++;
827 else {
828 wc = towlower(wc);
829 if (wctomb(tp, wc) == len)
830 tp += len, cp += len;
831 else
832 *tp++ = *cp++; /* <-- at least here */
835 } else
836 #endif
839 *cp = tolower((uc_i)*cp);
840 while (*cp++ != '\0');
842 NYD_LEAVE;
845 FL bool_t
846 substr(char const *str, char const *sub)
848 char const *cp, *backup;
849 NYD_ENTER;
851 cp = sub;
852 backup = str;
853 while (*str != '\0' && *cp != '\0') {
854 #ifdef HAVE_C90AMEND1
855 if (mb_cur_max > 1) {
856 wchar_t c, c2;
857 int sz;
859 if ((sz = mbtowc(&c, cp, mb_cur_max)) == -1)
860 goto Jsinglebyte;
861 cp += sz;
862 if ((sz = mbtowc(&c2, str, mb_cur_max)) == -1)
863 goto Jsinglebyte;
864 str += sz;
865 c = towupper(c);
866 c2 = towupper(c2);
867 if (c != c2) {
868 if ((sz = mbtowc(&c, backup, mb_cur_max)) > 0) {
869 backup += sz;
870 str = backup;
871 } else
872 str = ++backup;
873 cp = sub;
875 } else
876 Jsinglebyte:
877 #endif
879 int c, c2;
881 c = *cp++ & 0377;
882 if (islower(c))
883 c = toupper(c);
884 c2 = *str++ & 0377;
885 if (islower(c2))
886 c2 = toupper(c2);
887 if (c != c2) {
888 str = ++backup;
889 cp = sub;
893 NYD_LEAVE;
894 return (*cp == '\0');
897 #ifndef HAVE_SNPRINTF
898 FL int
899 snprintf(char *str, size_t size, char const *format, ...) /* XXX DANGER! */
901 va_list ap;
902 int ret;
903 NYD2_ENTER;
905 va_start(ap, format);
906 ret = vsprintf(str, format, ap);
907 va_end(ap);
908 if (ret < 0)
909 ret = strlen(str);
910 NYD2_LEAVE;
911 return ret;
913 #endif
915 FL char *
916 sstpcpy(char *dst, char const *src)
918 NYD2_ENTER;
919 while ((*dst = *src++) != '\0')
920 ++dst;
921 NYD2_LEAVE;
922 return dst;
925 FL char *
926 (sstrdup)(char const *cp SMALLOC_DEBUG_ARGS)
928 char *dp;
929 NYD2_ENTER;
931 dp = (cp == NULL) ? NULL : (sbufdup)(cp, strlen(cp) SMALLOC_DEBUG_ARGSCALL);
932 NYD2_LEAVE;
933 return dp;
936 FL char *
937 (sbufdup)(char const *cp, size_t len SMALLOC_DEBUG_ARGS)
939 char *dp = NULL;
940 NYD2_ENTER;
942 dp = (smalloc)(len +1 SMALLOC_DEBUG_ARGSCALL);
943 if (cp != NULL)
944 memcpy(dp, cp, len);
945 dp[len] = '\0';
946 NYD2_LEAVE;
947 return dp;
950 FL char *
951 n_strlcpy(char *dst, char const *src, size_t len)
953 NYD2_ENTER;
955 assert(len > 0);
957 dst = strncpy(dst, src, len);
958 dst[len -1] = '\0';
959 NYD2_LEAVE;
960 return dst;
963 FL int
964 asccasecmp(char const *s1, char const *s2)
966 int cmp;
967 NYD2_ENTER;
969 for (;;) {
970 char c1 = *s1++, c2 = *s2++;
971 if ((cmp = lowerconv(c1) - lowerconv(c2)) != 0 || c1 == '\0')
972 break;
974 NYD2_LEAVE;
975 return cmp;
978 FL int
979 ascncasecmp(char const *s1, char const *s2, size_t sz)
981 int cmp = 0;
982 NYD2_ENTER;
984 while (sz-- > 0) {
985 char c1 = *s1++, c2 = *s2++;
986 cmp = (ui8_t)lowerconv(c1);
987 cmp -= (ui8_t)lowerconv(c2);
988 if (cmp != 0 || c1 == '\0')
989 break;
991 NYD2_LEAVE;
992 return cmp;
995 FL char const *
996 asccasestr(char const *s1, char const *s2)
998 char c2, c1;
999 NYD2_ENTER;
1001 for (c2 = *s2++, c2 = lowerconv(c2);;) {
1002 if ((c1 = *s1++) == '\0') {
1003 s1 = NULL;
1004 break;
1006 if (lowerconv(c1) == c2 && is_asccaseprefix(s1, s2)) {
1007 --s1;
1008 break;
1011 NYD2_LEAVE;
1012 return s1;
1015 FL bool_t
1016 is_asccaseprefix(char const *as1, char const *as2)
1018 bool_t rv = FAL0;
1019 NYD2_ENTER;
1021 for (;; ++as1, ++as2) {
1022 char c1 = lowerconv(*as1), c2 = lowerconv(*as2);
1024 if ((rv = (c2 == '\0')))
1025 break;
1026 if (c1 != c2)
1027 break;
1029 NYD2_LEAVE;
1030 return rv;
1033 FL struct str *
1034 (n_str_dup)(struct str *self, struct str const *t SMALLOC_DEBUG_ARGS)
1036 NYD_ENTER;
1037 if (t != NULL && t->l > 0) {
1038 self->l = t->l;
1039 self->s = (srealloc)(self->s, t->l +1 SMALLOC_DEBUG_ARGSCALL);
1040 memcpy(self->s, t->s, t->l +1);
1041 } else
1042 self->l = 0;
1043 NYD_LEAVE;
1044 return self;
1047 FL struct str *
1048 (n_str_add_buf)(struct str *self, char const *buf, size_t buflen
1049 SMALLOC_DEBUG_ARGS)
1051 NYD_ENTER;
1052 if (buflen != 0) {
1053 size_t sl = self->l;
1054 self->l = sl + buflen;
1055 self->s = (srealloc)(self->s, self->l +1 SMALLOC_DEBUG_ARGSCALL);
1056 memcpy(self->s + sl, buf, buflen);
1057 self->s[self->l] = '\0';
1059 NYD_LEAVE;
1060 return self;
1064 * UTF-8
1067 #ifdef HAVE_NATCH_CHAR
1068 FL ui32_t
1069 n_utf8_to_utf32(char const **bdat, size_t *blen)
1071 char const *cp;
1072 size_t l;
1073 ui32_t c, x;
1074 NYD2_ENTER;
1076 cp = *bdat;
1077 l = *blen - 1;
1078 x = (ui8_t)*cp++;
1080 if (x <= 0x7F)
1081 c = x;
1082 else {
1083 if ((x & 0xE0) == 0xC0) {
1084 if (l < 2)
1085 goto jerr;
1086 l -= 1;
1087 c = x & ~0xC0;
1088 } else if ((x & 0xF0) == 0xE0) {
1089 if (l < 3)
1090 goto jerr;
1091 l -= 2;
1092 c = x & ~0xE0;
1093 c <<= 6;
1094 x = (ui8_t)*cp++;
1095 c |= x & 0x7F;
1096 } else {
1097 if (l < 4)
1098 goto jerr;
1099 l -= 3;
1100 c = x & ~0xF0;
1101 c <<= 6;
1102 x = (ui8_t)*cp++;
1103 c |= x & 0x7F;
1104 c <<= 6;
1105 x = (ui8_t)*cp++;
1106 c |= x & 0x7F;
1108 c <<= 6;
1109 x = (ui8_t)*cp++;
1110 c |= x & 0x7F;
1113 jleave:
1114 *bdat = cp;
1115 *blen = l;
1116 NYD2_LEAVE;
1117 return c;
1118 jerr:
1119 c = UI32_MAX;
1120 goto jleave;
1122 #endif /* HAVE_NATCH_CHAR */
1124 #ifdef HAVE_FILTER_HTML_TAGSOUP
1125 FL size_t
1126 n_utf32_to_utf8(ui32_t c, char *buf)
1128 struct {
1129 ui32_t lower_bound;
1130 ui32_t upper_bound;
1131 ui8_t enc_leader;
1132 ui8_t enc_lval;
1133 ui8_t dec_leader_mask;
1134 ui8_t dec_leader_val_mask;
1135 ui8_t dec_bytes_togo;
1136 ui8_t cat_index;
1137 ui8_t __dummy[2];
1138 } const _cat[] = {
1139 {0x00000000, 0x00000000, 0x00, 0, 0x00, 0x00, 0, 0, {0,}},
1140 {0x00000000, 0x0000007F, 0x00, 1, 0x80, 0x7F, 1-1, 1, {0,}},
1141 {0x00000080, 0x000007FF, 0xC0, 2, 0xE0, 0xFF-0xE0, 2-1, 2, {0,}},
1142 /* We assume surrogates are U+D800 - U+DFFF, _cat index 3 */
1143 /* xxx _from_utf32() simply assumes magic code points for surrogates!
1144 * xxx (However, should we ever get yet another surrogate range we
1145 * xxx need to deal with that all over the place anyway? */
1146 {0x00000800, 0x0000FFFF, 0xE0, 3, 0xF0, 0xFF-0xF0, 3-1, 3, {0,}},
1147 {0x00010000, 0x001FFFFF, 0xF0, 4, 0xF8, 0xFF-0xF8, 4-1, 4, {0,}},
1148 }, *catp = _cat;
1149 size_t l;
1151 if (c <= _cat[0].upper_bound) { catp += 0; goto j0; }
1152 if (c <= _cat[1].upper_bound) { catp += 1; goto j1; }
1153 if (c <= _cat[2].upper_bound) { catp += 2; goto j2; }
1154 if (c <= _cat[3].upper_bound) {
1155 /* Surrogates may not be converted (Compatibility rule C10) */
1156 if (c >= 0xD800u && c <= 0xDFFFu)
1157 goto jerr;
1158 catp += 3;
1159 goto j3;
1161 if (c <= _cat[4].upper_bound) { catp += 4; goto j4; }
1162 jerr:
1163 c = 0xFFFDu; /* Unicode replacement character */
1164 catp += 3;
1165 goto j3;
1167 buf[3] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
1169 buf[2] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
1171 buf[1] = (char)0x80 | (char)(c & 0x3F); c >>= 6;
1173 buf[0] = (char)catp->enc_leader | (char)(c);
1175 buf[catp->enc_lval] = '\0';
1176 l = catp->enc_lval;
1177 NYD2_LEAVE;
1178 return l;
1180 #endif /* HAVE_FILTER_HTML_TAGSOUP */
1183 * Our iconv(3) wrapper
1185 #ifdef HAVE_ICONV
1187 static void _ic_toupper(char *dest, char const *src);
1188 static void _ic_stripdash(char *p);
1190 static void
1191 _ic_toupper(char *dest, char const *src)
1193 NYD2_ENTER;
1195 *dest++ = upperconv(*src);
1196 while (*src++ != '\0');
1197 NYD2_LEAVE;
1200 static void
1201 _ic_stripdash(char *p)
1203 char *q = p;
1204 NYD2_ENTER;
1207 if (*(q = p) != '-')
1208 ++q;
1209 while (*p++ != '\0');
1210 NYD2_LEAVE;
1213 FL iconv_t
1214 n_iconv_open(char const *tocode, char const *fromcode)
1216 iconv_t id;
1217 char *t, *f;
1218 NYD_ENTER;
1220 if (!asccasecmp(fromcode, "unknown-8bit") &&
1221 (fromcode = ok_vlook(charset_unknown_8bit)) == NULL)
1222 fromcode = charset_get_8bit();
1224 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
1225 goto jleave;
1227 /* Remove the "iso-" prefixes for Solaris */
1228 if (!ascncasecmp(tocode, "iso-", 4))
1229 tocode += 4;
1230 else if (!ascncasecmp(tocode, "iso", 3))
1231 tocode += 3;
1232 if (!ascncasecmp(fromcode, "iso-", 4))
1233 fromcode += 4;
1234 else if (!ascncasecmp(fromcode, "iso", 3))
1235 fromcode += 3;
1236 if (*tocode == '\0' || *fromcode == '\0') {
1237 id = (iconv_t)-1;
1238 goto jleave;
1240 if ((id = iconv_open(tocode, fromcode)) != (iconv_t)-1)
1241 goto jleave;
1243 /* Solaris prefers upper-case charset names. Don't ask... */
1244 t = salloc(strlen(tocode) +1);
1245 _ic_toupper(t, tocode);
1246 f = salloc(strlen(fromcode) +1);
1247 _ic_toupper(f, fromcode);
1248 if ((id = iconv_open(t, f)) != (iconv_t)-1)
1249 goto jleave;
1251 /* Strip dashes for UnixWare */
1252 _ic_stripdash(t);
1253 _ic_stripdash(f);
1254 if ((id = iconv_open(t, f)) != (iconv_t)-1)
1255 goto jleave;
1257 /* Add your vendor's sillynesses here */
1259 /* If the encoding names are equal at this point, they are just not
1260 * understood by iconv(), and we cannot sensibly use it in any way. We do
1261 * not perform this as an optimization above since iconv() can otherwise be
1262 * used to check the validity of the input even with identical encoding
1263 * names */
1264 if (!strcmp(t, f))
1265 errno = 0;
1266 jleave:
1267 NYD_LEAVE;
1268 return id;
1271 FL void
1272 n_iconv_close(iconv_t cd)
1274 NYD_ENTER;
1275 iconv_close(cd);
1276 if (cd == iconvd)
1277 iconvd = (iconv_t)-1;
1278 NYD_LEAVE;
1281 FL void
1282 n_iconv_reset(iconv_t cd)
1284 NYD_ENTER;
1285 iconv(cd, NULL, NULL, NULL, NULL);
1286 NYD_LEAVE;
1289 /* (2012-09-24: export and use it exclusively to isolate prototype problems
1290 * (*inb* is 'char const **' except in POSIX) in a single place.
1291 * GNU libiconv even allows for configuration time const/non-const..
1292 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
1293 * support compiler invocations which bail on error, so no -Werror */
1294 /* Citrus project? */
1295 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
1296 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
1297 # ifdef __DragonFly__
1298 # define __INBCAST(S) (char ** __restrict__)UNCONST(S)
1299 # else
1300 # define __INBCAST(S) (char const **)UNCONST(S)
1301 # endif
1302 # endif
1303 # ifndef __INBCAST
1304 # define __INBCAST(S) (char **)UNCONST(S)
1305 # endif
1307 FL int
1308 n_iconv_buf(iconv_t cd, char const **inb, size_t *inbleft,/*XXX redo iconv use*/
1309 char **outb, size_t *outbleft, bool_t skipilseq)
1311 int err = 0;
1312 NYD2_ENTER;
1314 for (;;) {
1315 size_t sz = iconv(cd, __INBCAST(inb), inbleft, outb, outbleft);
1316 if (sz != (size_t)-1)
1317 break;
1318 err = errno;
1319 if (!skipilseq || err != EILSEQ)
1320 break;
1321 if (*inbleft > 0) {
1322 ++(*inb);
1323 --(*inbleft);
1324 } else if (*outbleft > 0) {
1325 **outb = '\0';
1326 break;
1328 if (*outbleft > 0/* TODO 0xFFFD 2*/) {
1329 /* TODO 0xFFFD (*outb)[0] = '[';
1330 * TODO (*outb)[1] = '?';
1331 * TODO 0xFFFD (*outb)[2] = ']';
1332 * TODO (*outb) += 3;
1333 * TODO (*outbleft) -= 3; */
1334 *(*outb)++ = '?';
1335 --*outbleft;
1336 } else {
1337 err = E2BIG;
1338 break;
1340 err = 0;
1342 NYD2_LEAVE;
1343 return err;
1345 # undef __INBCAST
1347 FL int
1348 n_iconv_str(iconv_t cd, struct str *out, struct str const *in,
1349 struct str *in_rest_or_null, bool_t skipilseq)
1351 int err;
1352 char *obb, *ob;
1353 char const *ib;
1354 size_t olb, ol, il;
1355 NYD2_ENTER;
1357 err = 0;
1358 obb = out->s;
1359 olb = out->l;
1360 ol = in->l;
1362 ol = (ol << 1) - (ol >> 4);
1363 if (olb <= ol) {
1364 olb = ol;
1365 goto jrealloc;
1368 for (;;) {
1369 ib = in->s;
1370 il = in->l;
1371 ob = obb;
1372 ol = olb;
1373 err = n_iconv_buf(cd, &ib, &il, &ob, &ol, skipilseq);
1374 if (err == 0 || err != E2BIG)
1375 break;
1376 err = 0;
1377 olb += in->l;
1378 jrealloc:
1379 obb = srealloc(obb, olb +1);
1382 if (in_rest_or_null != NULL) {
1383 in_rest_or_null->s = UNCONST(ib);
1384 in_rest_or_null->l = il;
1386 out->s = obb;
1387 out->s[out->l = olb - ol] = '\0';
1388 NYD2_LEAVE;
1389 return err;
1391 #endif /* HAVE_ICONV */
1393 /* s-it-mode */