contrib/tcsh-6/tc.str.c

   1 /* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.46 2015/05/04 15:31:13 christos Exp $ */
   2 /*
   3  * tc.str.c: Short string package
   4  *           This has been a lesson of how to write buggy code!
   5  */
   6 /*-
   7  * Copyright (c) 1980, 1991 The Regents of the University of California.
   8  * All rights reserved.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 3. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34 #include "sh.h"
  35
  36 #include <assert.h>
  37 #include <limits.h>
  38
  39 RCSID("$tcsh: tc.str.c,v 3.46 2015/05/04 15:31:13 christos Exp $")
  40
  41 #define MALLOC_INCR     128
  42 #ifdef WIDE_STRINGS
  43 #define MALLOC_SURPLUS  MB_LEN_MAX /* Space for one multibyte character */
  44 #else
  45 #define MALLOC_SURPLUS  0
  46 #endif
  47
  48 #ifdef WIDE_STRINGS
  49 size_t
  50 one_mbtowc(Char *pwc, const char *s, size_t n)
  51 {
  52     int len;
  53
  54     len = rt_mbtowc(pwc, s, n);
  55     if (len == -1) {
  56         reset_mbtowc();
  57         *pwc = (unsigned char)*s | INVALID_BYTE;
  58     }
  59     if (len <= 0)
  60         len = 1;
  61     return len;
  62 }
  63
  64 size_t
  65 one_wctomb(char *s, Char wchar)
  66 {
  67     int len;
  68
  69     if (wchar & INVALID_BYTE) {
  70         s[0] = wchar & 0xFF;
  71         len = 1;
  72     } else {
  73 #ifdef UTF16_STRINGS
  74         if (wchar >= 0x10000) {
  75             /* UTF-16 systems can't handle these values directly in calls to
  76                wctomb.  Convert value to UTF-16 surrogate and call wcstombs to
  77                convert the "string" to the correct multibyte representation,
  78                if any. */
  79             wchar_t ws[3];
  80             wchar -= 0x10000;
  81             ws[0] = 0xd800 | (wchar >> 10);
  82             ws[1] = 0xdc00 | (wchar & 0x3ff);
  83             ws[2] = 0;
  84             /* The return value of wcstombs excludes the trailing 0, so len is
  85                the correct number of multibytes for the Unicode char. */
  86             len = wcstombs (s, ws, MB_CUR_MAX + 1);
  87         } else
  88 #endif
  89         len = wctomb(s, (wchar_t) wchar);
  90         if (len == -1)
  91             s[0] = wchar;
  92         if (len <= 0)
  93             len = 1;
  94     }
  95     return len;
  96 }
  97
  98 int
  99 rt_mbtowc(Char *pwc, const char *s, size_t n)
 100 {
 101     int ret;
 102     char back[MB_LEN_MAX];
 103     wchar_t tmp;
 104 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
 105 # if defined(AUTOSET_KANJI)
 106     static mbstate_t mb_zero, mb;
 107     /*
 108      * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
 109      */
 110     if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
 111         !memcmp(&mb, &mb_zero, sizeof(mb)))
 112     {
 113         *pwc = *s;
 114         return 1;
 115     }
 116 # else
 117     mbstate_t mb;
 118 # endif
 119
 120     memset (&mb, 0, sizeof mb);
 121     ret = mbrtowc(&tmp, s, n, &mb);
 122 #else
 123     ret = mbtowc(&tmp, s, n);
 124 #endif
 125     if (ret > 0) {
 126         *pwc = tmp;
 127 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
 128         if (tmp >= 0xd800 && tmp <= 0xdbff) {
 129             /* UTF-16 surrogate pair.  Fetch second half and compute
 130                UTF-32 value.  Dispense with the inverse test in this case. */
 131             size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
 132             if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
 133                 ret = -1;
 134             else {
 135                 *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
 136                 ret += n2;
 137             }
 138         } else
 139 #endif
 140         if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
 141             ret = -1;
 142
 143     } else if (ret == -2)
 144         ret = -1;
 145     else if (ret == 0)
 146         *pwc = '\0';
 147
 148     return ret;
 149 }
 150 #endif
 151
 152 #ifdef SHORT_STRINGS
 153 Char  **
 154 blk2short(char **src)
 155 {
 156     size_t     n;
 157     Char **sdst, **dst;
 158
 159     /*
 160      * Count
 161      */
 162     for (n = 0; src[n] != NULL; n++)
 163         continue;
 164     sdst = dst = xmalloc((n + 1) * sizeof(Char *));
 165
 166     for (; *src != NULL; src++)
 167         *dst++ = SAVE(*src);
 168     *dst = NULL;
 169     return (sdst);
 170 }
 171
 172 char  **
 173 short2blk(Char **src)
 174 {
 175     size_t     n;
 176     char **sdst, **dst;
 177
 178     /*
 179      * Count
 180      */
 181     for (n = 0; src[n] != NULL; n++)
 182         continue;
 183     sdst = dst = xmalloc((n + 1) * sizeof(char *));
 184
 185     for (; *src != NULL; src++)
 186         *dst++ = strsave(short2str(*src));
 187     *dst = NULL;
 188     return (sdst);
 189 }
 190
 191 Char   *
 192 str2short(const char *src)
 193 {
 194     static struct Strbuf buf; /* = Strbuf_INIT; */
 195
 196     if (src == NULL)
 197         return (NULL);
 198
 199     buf.len = 0;
 200     while (*src) {
 201         Char wc;
 202
 203         src += one_mbtowc(&wc, src, MB_LEN_MAX);
 204         Strbuf_append1(&buf, wc);
 205     }
 206     Strbuf_terminate(&buf);
 207     return buf.s;
 208 }
 209
 210 char   *
 211 short2str(const Char *src)
 212 {
 213     static char *sdst = NULL;
 214     static size_t dstsize = 0;
 215     char *dst, *edst;
 216
 217     if (src == NULL)
 218         return (NULL);
 219
 220     if (sdst == NULL) {
 221         dstsize = MALLOC_INCR;
 222         sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
 223     }
 224     dst = sdst;
 225     edst = &dst[dstsize];
 226     while (*src) {
 227         dst += one_wctomb(dst, *src & CHAR);
 228         src++;
 229         if (dst >= edst) {
 230             char *wdst = dst;
 231             char *wedst = edst;
 232
 233             dstsize += MALLOC_INCR;
 234             sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
 235             edst = &sdst[dstsize];
 236             dst = &edst[-MALLOC_INCR];
 237             while (wdst > wedst) {
 238                 dst++;
 239                 wdst--;
 240             }
 241         }
 242     }
 243     *dst = 0;
 244     return (sdst);
 245 }
 246
 247 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
 248 Char   *
 249 s_strcpy(Char *dst, const Char *src)
 250 {
 251     Char *sdst;
 252
 253     sdst = dst;
 254     while ((*dst++ = *src++) != '\0')
 255         continue;
 256     return (sdst);
 257 }
 258
 259 Char   *
 260 s_strncpy(Char *dst, const Char *src, size_t n)
 261 {
 262     Char *sdst;
 263
 264     if (n == 0)
 265         return(dst);
 266
 267     sdst = dst;
 268     do
 269         if ((*dst++ = *src++) == '\0') {
 270             while (--n != 0)
 271                 *dst++ = '\0';
 272             return(sdst);
 273         }
 274     while (--n != 0);
 275     return (sdst);
 276 }
 277
 278 Char   *
 279 s_strcat(Char *dst, const Char *src)
 280 {
 281     Strcpy(Strend(dst), src);
 282     return dst;
 283 }
 284
 285 #ifdef NOTUSED
 286 Char   *
 287 s_strncat(Char *dst, const Char *src, size_t n)
 288 {
 289     Char *sdst;
 290
 291     if (n == 0)
 292         return (dst);
 293
 294     sdst = dst;
 295
 296     while (*dst)
 297         dst++;
 298
 299     do
 300         if ((*dst++ = *src++) == '\0')
 301             return(sdst);
 302     while (--n != 0)
 303         continue;
 304
 305     *dst = '\0';
 306     return (sdst);
 307 }
 308
 309 #endif
 310
 311 Char   *
 312 s_strchr(const Char *str, int ch)
 313 {
 314     do
 315         if (*str == ch)
 316             return ((Char *)(intptr_t)str);
 317     while (*str++);
 318     return (NULL);
 319 }
 320
 321 Char   *
 322 s_strrchr(const Char *str, int ch)
 323 {
 324     const Char *rstr;
 325
 326     rstr = NULL;
 327     do
 328         if (*str == ch)
 329             rstr = str;
 330     while (*str++);
 331     return ((Char *)(intptr_t)rstr);
 332 }
 333
 334 size_t
 335 s_strlen(const Char *str)
 336 {
 337     size_t n;
 338
 339     for (n = 0; *str++; n++)
 340         continue;
 341     return (n);
 342 }
 343
 344 int
 345 s_strcmp(const Char *str1, const Char *str2)
 346 {
 347     for (; *str1 && *str1 == *str2; str1++, str2++)
 348         continue;
 349     /*
 350      * The following case analysis is necessary so that characters which look
 351      * negative collate low against normal characters but high against the
 352      * end-of-string NUL.
 353      */
 354     if (*str1 == '\0' && *str2 == '\0')
 355         return (0);
 356     else if (*str1 == '\0')
 357         return (-1);
 358     else if (*str2 == '\0')
 359         return (1);
 360     else
 361         return (*str1 - *str2);
 362 }
 363
 364 int
 365 s_strncmp(const Char *str1, const Char *str2, size_t n)
 366 {
 367     if (n == 0)
 368         return (0);
 369     do {
 370         if (*str1 != *str2) {
 371             /*
 372              * The following case analysis is necessary so that characters
 373              * which look negative collate low against normal characters
 374              * but high against the end-of-string NUL.
 375              */
 376             if (*str1 == '\0')
 377                 return (-1);
 378             else if (*str2 == '\0')
 379                 return (1);
 380             else
 381                 return (*str1 - *str2);
 382         }
 383         if (*str1 == '\0')
 384             return(0);
 385         str1++, str2++;
 386     } while (--n != 0);
 387     return(0);
 388 }
 389 #endif /* not WIDE_STRINGS */
 390
 391 int
 392 s_strcasecmp(const Char *str1, const Char *str2)
 393 {
 394 #ifdef WIDE_STRINGS
 395     wint_t l1 = 0, l2 = 0;
 396     for (; *str1; str1++, str2++)
 397         if (*str1 == *str2)
 398             l1 = l2 = 0;
 399         else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
 400             break;
 401 #else
 402     unsigned char l1 = 0, l2 = 0;
 403     for (; *str1; str1++, str2++)
 404         if (*str1 == *str2)
 405                 l1 = l2 = 0;
 406         else if ((l1 = tolower((unsigned char)*str1)) !=
 407             (l2 = tolower((unsigned char)*str2)))
 408             break;
 409 #endif
 410     /*
 411      * The following case analysis is necessary so that characters which look
 412      * negative collate low against normal characters but high against the
 413      * end-of-string NUL.
 414      */
 415     if (*str1 == '\0' && *str2 == '\0')
 416         return (0);
 417     else if (*str1 == '\0')
 418         return (-1);
 419     else if (*str2 == '\0')
 420         return (1);
 421     else if (l1 == l2)  /* They are zero when they are equal */
 422         return (*str1 - *str2);
 423     else
 424         return (l1 - l2);
 425 }
 426
 427 Char   *
 428 s_strnsave(const Char *s, size_t len)
 429 {
 430     Char *n;
 431
 432     n = xmalloc((len + 1) * sizeof (*n));
 433     memcpy(n, s, len * sizeof (*n));
 434     n[len] = '\0';
 435     return n;
 436 }
 437
 438 Char   *
 439 s_strsave(const Char *s)
 440 {
 441     Char   *n;
 442     size_t size;
 443
 444     if (s == NULL)
 445         s = STRNULL;
 446     size = (Strlen(s) + 1) * sizeof(*n);
 447     n = xmalloc(size);
 448     memcpy(n, s, size);
 449     return (n);
 450 }
 451
 452 Char   *
 453 s_strspl(const Char *cp, const Char *dp)
 454 {
 455     Char *res, *ep;
 456     const Char *p, *q;
 457
 458     if (!cp)
 459         cp = STRNULL;
 460     if (!dp)
 461         dp = STRNULL;
 462     for (p = cp; *p++;)
 463         continue;
 464     for (q = dp; *q++;)
 465         continue;
 466     res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
 467     for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
 468         continue;
 469     for (ep--, q = dp; (*ep++ = *q++) != '\0';)
 470         continue;
 471     return (res);
 472 }
 473
 474 Char   *
 475 s_strend(const Char *cp)
 476 {
 477     if (!cp)
 478         return ((Char *)(intptr_t) cp);
 479     while (*cp)
 480         cp++;
 481     return ((Char *)(intptr_t) cp);
 482 }
 483
 484 Char   *
 485 s_strstr(const Char *s, const Char *t)
 486 {
 487     do {
 488         const Char *ss = s;
 489         const Char *tt = t;
 490
 491         do
 492             if (*tt == '\0')
 493                 return ((Char *)(intptr_t) s);
 494         while (*ss++ == *tt++);
 495     } while (*s++ != '\0');
 496     return (NULL);
 497 }
 498
 499 #else /* !SHORT_STRINGS */
 500 char *
 501 caching_strip(const char *s)
 502 {
 503     static char *buf = NULL;
 504     static size_t buf_size = 0;
 505     size_t size;
 506
 507     if (s == NULL)
 508       return NULL;
 509     size = strlen(s) + 1;
 510     if (buf_size < size) {
 511         buf = xrealloc(buf, size);
 512         buf_size = size;
 513     }
 514     memcpy(buf, s, size);
 515     strip(buf);
 516     return buf;
 517 }
 518 #endif
 519
 520 char   *
 521 short2qstr(const Char *src)
 522 {
 523     static char *sdst = NULL;
 524     static size_t dstsize = 0;
 525     char *dst, *edst;
 526
 527     if (src == NULL)
 528         return (NULL);
 529
 530     if (sdst == NULL) {
 531         dstsize = MALLOC_INCR;
 532         sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
 533     }
 534     dst = sdst;
 535     edst = &dst[dstsize];
 536     while (*src) {
 537         if (*src & QUOTE) {
 538             *dst++ = '\\';
 539             if (dst == edst) {
 540                 dstsize += MALLOC_INCR;
 541                 sdst = xrealloc(sdst,
 542                                 (dstsize + MALLOC_SURPLUS) * sizeof(char));
 543                 edst = &sdst[dstsize];
 544                 dst = &edst[-MALLOC_INCR];
 545             }
 546         }
 547         dst += one_wctomb(dst, *src & CHAR);
 548         src++;
 549         if (dst >= edst) {
 550             ptrdiff_t i = dst - edst;
 551             dstsize += MALLOC_INCR;
 552             sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
 553             edst = &sdst[dstsize];
 554             dst = &edst[-MALLOC_INCR + i];
 555         }
 556     }
 557     *dst = 0;
 558     return (sdst);
 559 }
 560
 561 struct blk_buf *
 562 bb_alloc(void)
 563 {
 564     return xcalloc(1, sizeof(struct blk_buf));
 565 }
 566
 567 static void
 568 bb_store(struct blk_buf *bb, Char *str)
 569 {
 570     if (bb->len == bb->size) { /* Keep space for terminating NULL */
 571         if (bb->size == 0)
 572             bb->size = 16; /* Arbitrary */
 573         else
 574             bb->size *= 2;
 575         bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
 576     }
 577     bb->vec[bb->len] = str;
 578 }
 579
 580 void
 581 bb_append(struct blk_buf *bb, Char *str)
 582 {
 583     bb_store(bb, str);
 584     bb->len++;
 585 }
 586
 587 void
 588 bb_cleanup(void *xbb)
 589 {
 590     struct blk_buf *bb;
 591     size_t i;
 592
 593     bb = (struct blk_buf *)xbb;
 594     if (bb->vec) {
 595         for (i = 0; i < bb->len; i++)
 596             xfree(bb->vec[i]);
 597         xfree(bb->vec);
 598     }
 599     bb->vec = NULL;
 600     bb->len = 0;
 601 }
 602
 603 void
 604 bb_free(void *bb)
 605 {
 606     bb_cleanup(bb);
 607     xfree(bb);
 608 }
 609
 610 Char **
 611 bb_finish(struct blk_buf *bb)
 612 {
 613     bb_store(bb, NULL);
 614     return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
 615 }
 616
 617 #define DO_STRBUF(STRBUF, CHAR, STRLEN)                         \
 618                                                                 \
 619 struct STRBUF *                                                 \
 620 STRBUF##_alloc(void)                                            \
 621 {                                                               \
 622     return xcalloc(1, sizeof(struct STRBUF));                   \
 623 }                                                               \
 624                                                                 \
 625 static void                                                     \
 626 STRBUF##_store1(struct STRBUF *buf, CHAR c)                     \
 627 {                                                               \
 628     if (buf->size == buf->len) {                                \
 629         if (buf->size == 0)                                     \
 630             buf->size = 64; /* Arbitrary */                     \
 631         else                                                    \
 632             buf->size *= 2;                                     \
 633         buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
 634     }                                                           \
 635     assert(buf->s);                                             \
 636     buf->s[buf->len] = c;                                       \
 637 }                                                               \
 638                                                                 \
 639 /* Like strbuf_append1(buf, '\0'), but don't advance len */     \
 640 void                                                            \
 641 STRBUF##_terminate(struct STRBUF *buf)                          \
 642 {                                                               \
 643     STRBUF##_store1(buf, '\0');                                 \
 644 }                                                               \
 645                                                                 \
 646 void                                                            \
 647 STRBUF##_append1(struct STRBUF *buf, CHAR c)                    \
 648 {                                                               \
 649     STRBUF##_store1(buf, c);                                    \
 650     buf->len++;                                                 \
 651 }                                                               \
 652                                                                 \
 653 void                                                            \
 654 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \
 655 {                                                               \
 656     if (buf->size < buf->len + len) {                           \
 657         if (buf->size == 0)                                     \
 658             buf->size = 64; /* Arbitrary */                     \
 659         while (buf->size < buf->len + len)                      \
 660             buf->size *= 2;                                     \
 661         buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
 662     }                                                           \
 663     memcpy(buf->s + buf->len, s, len * sizeof(*buf->s));        \
 664     buf->len += len;                                            \
 665 }                                                               \
 666                                                                 \
 667 void                                                            \
 668 STRBUF##_append(struct STRBUF *buf, const CHAR *s)              \
 669 {                                                               \
 670     STRBUF##_appendn(buf, s, STRLEN(s));                        \
 671 }                                                               \
 672                                                                 \
 673 CHAR *                                                          \
 674 STRBUF##_finish(struct STRBUF *buf)                             \
 675 {                                                               \
 676     STRBUF##_append1(buf, 0);                                   \
 677     return xrealloc(buf->s, buf->len * sizeof(*buf->s));        \
 678 }                                                               \
 679                                                                 \
 680 void                                                            \
 681 STRBUF##_cleanup(void *xbuf)                                    \
 682 {                                                               \
 683     struct STRBUF *buf;                                         \
 684                                                                 \
 685     buf = xbuf;                                                 \
 686     xfree(buf->s);                                              \
 687 }                                                               \
 688                                                                 \
 689 void                                                            \
 690 STRBUF##_free(void *xbuf)                                       \
 691 {                                                               \
 692     STRBUF##_cleanup(xbuf);                                     \
 693     xfree(xbuf);                                                \
 694 }                                                               \
 695                                                                 \
 696 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
 697
 698 DO_STRBUF(strbuf, char, strlen);
 699 DO_STRBUF(Strbuf, Char, Strlen);