1 /* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.46 2015/05/04 15:31:13 christos Exp $ */
3 * tc.str.c: Short string package
4 * This has been a lesson of how to write buggy code!
7 * Copyright (c) 1980, 1991 The Regents of the University of California.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 RCSID("$tcsh: tc.str.c,v 3.46 2015/05/04 15:31:13 christos Exp $")
41 #define MALLOC_INCR 128
43 #define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */
45 #define MALLOC_SURPLUS 0
50 one_mbtowc(Char
*pwc
, const char *s
, size_t n
)
54 len
= rt_mbtowc(pwc
, s
, n
);
57 *pwc
= (unsigned char)*s
| INVALID_BYTE
;
65 one_wctomb(char *s
, Char wchar
)
69 if (wchar
& INVALID_BYTE
) {
74 if (wchar
>= 0x10000) {
75 /* UTF-16 systems can't handle these values directly in calls to
76 wctomb. Convert value to UTF-16 surrogate and call wcstombs to
77 convert the "string" to the correct multibyte representation,
81 ws
[0] = 0xd800 | (wchar
>> 10);
82 ws
[1] = 0xdc00 | (wchar
& 0x3ff);
84 /* The return value of wcstombs excludes the trailing 0, so len is
85 the correct number of multibytes for the Unicode char. */
86 len
= wcstombs (s
, ws
, MB_CUR_MAX
+ 1);
89 len
= wctomb(s
, (wchar_t) wchar
);
99 rt_mbtowc(Char
*pwc
, const char *s
, size_t n
)
102 char back
[MB_LEN_MAX
];
104 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
105 # if defined(AUTOSET_KANJI)
106 static mbstate_t mb_zero
, mb
;
108 * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
110 if (!adrof(STRnokanji
) && n
&& pwc
&& s
&& (*s
== '\\' || *s
== '~') &&
111 !memcmp(&mb
, &mb_zero
, sizeof(mb
)))
120 memset (&mb
, 0, sizeof mb
);
121 ret
= mbrtowc(&tmp
, s
, n
, &mb
);
123 ret
= mbtowc(&tmp
, s
, n
);
127 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
128 if (tmp
>= 0xd800 && tmp
<= 0xdbff) {
129 /* UTF-16 surrogate pair. Fetch second half and compute
130 UTF-32 value. Dispense with the inverse test in this case. */
131 size_t n2
= mbrtowc(&tmp
, s
+ ret
, n
- ret
, &mb
);
132 if (n2
== 0 || n2
== (size_t)-1 || n2
== (size_t)-2)
135 *pwc
= (((*pwc
& 0x3ff) << 10) | (tmp
& 0x3ff)) + 0x10000;
140 if (wctomb(back
, *pwc
) != ret
|| memcmp(s
, back
, ret
) != 0)
143 } else if (ret
== -2)
154 blk2short(char **src
)
162 for (n
= 0; src
[n
] != NULL
; n
++)
164 sdst
= dst
= xmalloc((n
+ 1) * sizeof(Char
*));
166 for (; *src
!= NULL
; src
++)
173 short2blk(Char
**src
)
181 for (n
= 0; src
[n
] != NULL
; n
++)
183 sdst
= dst
= xmalloc((n
+ 1) * sizeof(char *));
185 for (; *src
!= NULL
; src
++)
186 *dst
++ = strsave(short2str(*src
));
192 str2short(const char *src
)
194 static struct Strbuf buf
; /* = Strbuf_INIT; */
203 src
+= one_mbtowc(&wc
, src
, MB_LEN_MAX
);
204 Strbuf_append1(&buf
, wc
);
206 Strbuf_terminate(&buf
);
211 short2str(const Char
*src
)
213 static char *sdst
= NULL
;
214 static size_t dstsize
= 0;
221 dstsize
= MALLOC_INCR
;
222 sdst
= xmalloc((dstsize
+ MALLOC_SURPLUS
) * sizeof(char));
225 edst
= &dst
[dstsize
];
227 dst
+= one_wctomb(dst
, *src
& CHAR
);
233 dstsize
+= MALLOC_INCR
;
234 sdst
= xrealloc(sdst
, (dstsize
+ MALLOC_SURPLUS
) * sizeof(char));
235 edst
= &sdst
[dstsize
];
236 dst
= &edst
[-MALLOC_INCR
];
237 while (wdst
> wedst
) {
247 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
249 s_strcpy(Char
*dst
, const Char
*src
)
254 while ((*dst
++ = *src
++) != '\0')
260 s_strncpy(Char
*dst
, const Char
*src
, size_t n
)
269 if ((*dst
++ = *src
++) == '\0') {
279 s_strcat(Char
*dst
, const Char
*src
)
281 Strcpy(Strend(dst
), src
);
287 s_strncat(Char
*dst
, const Char
*src
, size_t n
)
300 if ((*dst
++ = *src
++) == '\0')
312 s_strchr(const Char
*str
, int ch
)
316 return ((Char
*)(intptr_t)str
);
322 s_strrchr(const Char
*str
, int ch
)
331 return ((Char
*)(intptr_t)rstr
);
335 s_strlen(const Char
*str
)
339 for (n
= 0; *str
++; n
++)
345 s_strcmp(const Char
*str1
, const Char
*str2
)
347 for (; *str1
&& *str1
== *str2
; str1
++, str2
++)
350 * The following case analysis is necessary so that characters which look
351 * negative collate low against normal characters but high against the
354 if (*str1
== '\0' && *str2
== '\0')
356 else if (*str1
== '\0')
358 else if (*str2
== '\0')
361 return (*str1
- *str2
);
365 s_strncmp(const Char
*str1
, const Char
*str2
, size_t n
)
370 if (*str1
!= *str2
) {
372 * The following case analysis is necessary so that characters
373 * which look negative collate low against normal characters
374 * but high against the end-of-string NUL.
378 else if (*str2
== '\0')
381 return (*str1
- *str2
);
389 #endif /* not WIDE_STRINGS */
392 s_strcasecmp(const Char
*str1
, const Char
*str2
)
395 wint_t l1
= 0, l2
= 0;
396 for (; *str1
; str1
++, str2
++)
399 else if ((l1
= towlower(*str1
)) != (l2
= towlower(*str2
)))
402 unsigned char l1
= 0, l2
= 0;
403 for (; *str1
; str1
++, str2
++)
406 else if ((l1
= tolower((unsigned char)*str1
)) !=
407 (l2
= tolower((unsigned char)*str2
)))
411 * The following case analysis is necessary so that characters which look
412 * negative collate low against normal characters but high against the
415 if (*str1
== '\0' && *str2
== '\0')
417 else if (*str1
== '\0')
419 else if (*str2
== '\0')
421 else if (l1
== l2
) /* They are zero when they are equal */
422 return (*str1
- *str2
);
428 s_strnsave(const Char
*s
, size_t len
)
432 n
= xmalloc((len
+ 1) * sizeof (*n
));
433 memcpy(n
, s
, len
* sizeof (*n
));
439 s_strsave(const Char
*s
)
446 size
= (Strlen(s
) + 1) * sizeof(*n
);
453 s_strspl(const Char
*cp
, const Char
*dp
)
466 res
= xmalloc(((p
- cp
) + (q
- dp
) - 1) * sizeof(Char
));
467 for (ep
= res
, q
= cp
; (*ep
++ = *q
++) != '\0';)
469 for (ep
--, q
= dp
; (*ep
++ = *q
++) != '\0';)
475 s_strend(const Char
*cp
)
478 return ((Char
*)(intptr_t) cp
);
481 return ((Char
*)(intptr_t) cp
);
485 s_strstr(const Char
*s
, const Char
*t
)
493 return ((Char
*)(intptr_t) s
);
494 while (*ss
++ == *tt
++);
495 } while (*s
++ != '\0');
499 #else /* !SHORT_STRINGS */
501 caching_strip(const char *s
)
503 static char *buf
= NULL
;
504 static size_t buf_size
= 0;
509 size
= strlen(s
) + 1;
510 if (buf_size
< size
) {
511 buf
= xrealloc(buf
, size
);
514 memcpy(buf
, s
, size
);
521 short2qstr(const Char
*src
)
523 static char *sdst
= NULL
;
524 static size_t dstsize
= 0;
531 dstsize
= MALLOC_INCR
;
532 sdst
= xmalloc((dstsize
+ MALLOC_SURPLUS
) * sizeof(char));
535 edst
= &dst
[dstsize
];
540 dstsize
+= MALLOC_INCR
;
541 sdst
= xrealloc(sdst
,
542 (dstsize
+ MALLOC_SURPLUS
) * sizeof(char));
543 edst
= &sdst
[dstsize
];
544 dst
= &edst
[-MALLOC_INCR
];
547 dst
+= one_wctomb(dst
, *src
& CHAR
);
550 ptrdiff_t i
= dst
- edst
;
551 dstsize
+= MALLOC_INCR
;
552 sdst
= xrealloc(sdst
, (dstsize
+ MALLOC_SURPLUS
) * sizeof(char));
553 edst
= &sdst
[dstsize
];
554 dst
= &edst
[-MALLOC_INCR
+ i
];
564 return xcalloc(1, sizeof(struct blk_buf
));
568 bb_store(struct blk_buf
*bb
, Char
*str
)
570 if (bb
->len
== bb
->size
) { /* Keep space for terminating NULL */
572 bb
->size
= 16; /* Arbitrary */
575 bb
->vec
= xrealloc(bb
->vec
, bb
->size
* sizeof (*bb
->vec
));
577 bb
->vec
[bb
->len
] = str
;
581 bb_append(struct blk_buf
*bb
, Char
*str
)
588 bb_cleanup(void *xbb
)
593 bb
= (struct blk_buf
*)xbb
;
595 for (i
= 0; i
< bb
->len
; i
++)
611 bb_finish(struct blk_buf
*bb
)
614 return xrealloc(bb
->vec
, (bb
->len
+ 1) * sizeof (*bb
->vec
));
617 #define DO_STRBUF(STRBUF, CHAR, STRLEN) \
620 STRBUF##_alloc(void) \
622 return xcalloc(1, sizeof(struct STRBUF)); \
626 STRBUF##_store1(struct STRBUF *buf, CHAR c) \
628 if (buf->size == buf->len) { \
629 if (buf->size == 0) \
630 buf->size = 64; /* Arbitrary */ \
633 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
636 buf->s[buf->len] = c; \
639 /* Like strbuf_append1(buf, '\0'), but don't advance len */ \
641 STRBUF##_terminate(struct STRBUF *buf) \
643 STRBUF##_store1(buf, '\0'); \
647 STRBUF##_append1(struct STRBUF *buf, CHAR c) \
649 STRBUF##_store1(buf, c); \
654 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \
656 if (buf->size < buf->len + len) { \
657 if (buf->size == 0) \
658 buf->size = 64; /* Arbitrary */ \
659 while (buf->size < buf->len + len) \
661 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
663 memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \
668 STRBUF##_append(struct STRBUF *buf, const CHAR *s) \
670 STRBUF##_appendn(buf, s, STRLEN(s)); \
674 STRBUF##_finish(struct STRBUF *buf) \
676 STRBUF##_append1(buf, 0); \
677 return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \
681 STRBUF##_cleanup(void *xbuf) \
683 struct STRBUF *buf; \
690 STRBUF##_free(void *xbuf) \
692 STRBUF##_cleanup(xbuf); \
696 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
698 DO_STRBUF(strbuf
, char, strlen
);
699 DO_STRBUF(Strbuf
, Char
, Strlen
);