src/basic/string-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <errno.h>
   4 #include <stdarg.h>
   5 #include <stdint.h>
   6 #include <stdio.h>
   7 #include <stdlib.h>
   8
   9 #include "alloc-util.h"
  10 #include "escape.h"
  11 #include "extract-word.h"
  12 #include "fd-util.h"
  13 #include "fileio.h"
  14 #include "gunicode.h"
  15 #include "locale-util.h"
  16 #include "macro.h"
  17 #include "memory-util.h"
  18 #include "memstream-util.h"
  19 #include "path-util.h"
  20 #include "string-util.h"
  21 #include "strv.h"
  22 #include "terminal-util.h"
  23 #include "utf8.h"
  24
  25 char* first_word(const char *s, const char *word) {
  26         size_t sl, wl;
  27         const char *p;
  28
  29         assert(s);
  30         assert(word);
  31
  32         /* Checks if the string starts with the specified word, either
  33          * followed by NUL or by whitespace. Returns a pointer to the
  34          * NUL or the first character after the whitespace. */
  35
  36         sl = strlen(s);
  37         wl = strlen(word);
  38
  39         if (sl < wl)
  40                 return NULL;
  41
  42         if (wl == 0)
  43                 return (char*) s;
  44
  45         if (memcmp(s, word, wl) != 0)
  46                 return NULL;
  47
  48         p = s + wl;
  49         if (*p == 0)
  50                 return (char*) p;
  51
  52         if (!strchr(WHITESPACE, *p))
  53                 return NULL;
  54
  55         p += strspn(p, WHITESPACE);
  56         return (char*) p;
  57 }
  58
  59 char *strnappend(const char *s, const char *suffix, size_t b) {
  60         size_t a;
  61         char *r;
  62
  63         if (!s && !suffix)
  64                 return strdup("");
  65
  66         if (!s)
  67                 return strndup(suffix, b);
  68
  69         if (!suffix)
  70                 return strdup(s);
  71
  72         assert(s);
  73         assert(suffix);
  74
  75         a = strlen(s);
  76         if (b > SIZE_MAX - a)
  77                 return NULL;
  78
  79         r = new(char, a+b+1);
  80         if (!r)
  81                 return NULL;
  82
  83         memcpy(r, s, a);
  84         memcpy(r+a, suffix, b);
  85         r[a+b] = 0;
  86
  87         return r;
  88 }
  89
  90 char *strjoin_real(const char *x, ...) {
  91         va_list ap;
  92         size_t l = 1;
  93         char *r, *p;
  94
  95         va_start(ap, x);
  96         for (const char *t = x; t; t = va_arg(ap, const char *)) {
  97                 size_t n;
  98
  99                 n = strlen(t);
 100                 if (n > SIZE_MAX - l) {
 101                         va_end(ap);
 102                         return NULL;
 103                 }
 104                 l += n;
 105         }
 106         va_end(ap);
 107
 108         p = r = new(char, l);
 109         if (!r)
 110                 return NULL;
 111
 112         va_start(ap, x);
 113         for (const char *t = x; t; t = va_arg(ap, const char *))
 114                 p = stpcpy(p, t);
 115         va_end(ap);
 116
 117         *p = 0;
 118
 119         return r;
 120 }
 121
 122 char *strstrip(char *s) {
 123         if (!s)
 124                 return NULL;
 125
 126         /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */
 127
 128         return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE);
 129 }
 130
 131 char *delete_chars(char *s, const char *bad) {
 132         char *f, *t;
 133
 134         /* Drops all specified bad characters, regardless where in the string */
 135
 136         if (!s)
 137                 return NULL;
 138
 139         if (!bad)
 140                 bad = WHITESPACE;
 141
 142         for (f = s, t = s; *f; f++) {
 143                 if (strchr(bad, *f))
 144                         continue;
 145
 146                 *(t++) = *f;
 147         }
 148
 149         *t = 0;
 150
 151         return s;
 152 }
 153
 154 char *delete_trailing_chars(char *s, const char *bad) {
 155         char *c = s;
 156
 157         /* Drops all specified bad characters, at the end of the string */
 158
 159         if (!s)
 160                 return NULL;
 161
 162         if (!bad)
 163                 bad = WHITESPACE;
 164
 165         for (char *p = s; *p; p++)
 166                 if (!strchr(bad, *p))
 167                         c = p + 1;
 168
 169         *c = 0;
 170
 171         return s;
 172 }
 173
 174 char *truncate_nl_full(char *s, size_t *ret_len) {
 175         size_t n;
 176
 177         assert(s);
 178
 179         n = strcspn(s, NEWLINE);
 180         s[n] = '\0';
 181         if (ret_len)
 182                 *ret_len = n;
 183         return s;
 184 }
 185
 186 char ascii_tolower(char x) {
 187
 188         if (x >= 'A' && x <= 'Z')
 189                 return x - 'A' + 'a';
 190
 191         return x;
 192 }
 193
 194 char ascii_toupper(char x) {
 195
 196         if (x >= 'a' && x <= 'z')
 197                 return x - 'a' + 'A';
 198
 199         return x;
 200 }
 201
 202 char *ascii_strlower(char *t) {
 203         assert(t);
 204
 205         for (char *p = t; *p; p++)
 206                 *p = ascii_tolower(*p);
 207
 208         return t;
 209 }
 210
 211 char *ascii_strupper(char *t) {
 212         assert(t);
 213
 214         for (char *p = t; *p; p++)
 215                 *p = ascii_toupper(*p);
 216
 217         return t;
 218 }
 219
 220 char *ascii_strlower_n(char *t, size_t n) {
 221         if (n <= 0)
 222                 return t;
 223
 224         for (size_t i = 0; i < n; i++)
 225                 t[i] = ascii_tolower(t[i]);
 226
 227         return t;
 228 }
 229
 230 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
 231
 232         for (; n > 0; a++, b++, n--) {
 233                 int x, y;
 234
 235                 x = (int) (uint8_t) ascii_tolower(*a);
 236                 y = (int) (uint8_t) ascii_tolower(*b);
 237
 238                 if (x != y)
 239                         return x - y;
 240         }
 241
 242         return 0;
 243 }
 244
 245 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
 246         int r;
 247
 248         r = ascii_strcasecmp_n(a, b, MIN(n, m));
 249         if (r != 0)
 250                 return r;
 251
 252         return CMP(n, m);
 253 }
 254
 255 bool chars_intersect(const char *a, const char *b) {
 256         /* Returns true if any of the chars in a are in b. */
 257         for (const char *p = a; *p; p++)
 258                 if (strchr(b, *p))
 259                         return true;
 260
 261         return false;
 262 }
 263
 264 bool string_has_cc(const char *p, const char *ok) {
 265         assert(p);
 266
 267         /*
 268          * Check if a string contains control characters. If 'ok' is
 269          * non-NULL it may be a string containing additional CCs to be
 270          * considered OK.
 271          */
 272
 273         for (const char *t = p; *t; t++) {
 274                 if (ok && strchr(ok, *t))
 275                         continue;
 276
 277                 if (char_is_cc(*t))
 278                         return true;
 279         }
 280
 281         return false;
 282 }
 283
 284 static int write_ellipsis(char *buf, bool unicode) {
 285         if (unicode || is_locale_utf8()) {
 286                 buf[0] = 0xe2; /* tri-dot ellipsis: … */
 287                 buf[1] = 0x80;
 288                 buf[2] = 0xa6;
 289         } else {
 290                 buf[0] = '.';
 291                 buf[1] = '.';
 292                 buf[2] = '.';
 293         }
 294
 295         return 3;
 296 }
 297
 298 static size_t ansi_sequence_length(const char *s, size_t len) {
 299         assert(s);
 300
 301         if (len < 2)
 302                 return 0;
 303
 304         if (s[0] != 0x1B)  /* ASCII 27, aka ESC, aka Ctrl-[ */
 305                 return 0;  /* Not the start of a sequence */
 306
 307         if (s[1] == 0x5B) { /* [, start of CSI sequence */
 308                 size_t i = 2;
 309
 310                 if (i == len)
 311                         return 0;
 312
 313                 while (s[i] >= 0x30 && s[i] <= 0x3F) /* Parameter bytes */
 314                         if (++i == len)
 315                                 return 0;
 316                 while (s[i] >= 0x20 && s[i] <= 0x2F) /* Intermediate bytes */
 317                         if (++i == len)
 318                                 return 0;
 319                 if (s[i] >= 0x40 && s[i] <= 0x7E) /* Final byte */
 320                         return i + 1;
 321                 return 0;  /* Bad sequence */
 322
 323         } else if (s[1] >= 0x40 && s[1] <= 0x5F) /* other non-CSI Fe sequence */
 324                 return 2;
 325
 326         return 0;  /* Bad escape? */
 327 }
 328
 329 static bool string_has_ansi_sequence(const char *s, size_t len) {
 330         const char *t = s;
 331
 332         while ((t = memchr(s, 0x1B, len - (t - s))))
 333                 if (ansi_sequence_length(t, len - (t - s)) > 0)
 334                         return true;
 335         return false;
 336 }
 337
 338 static size_t previous_ansi_sequence(const char *s, size_t length, const char **ret_where) {
 339         /* Locate the previous ANSI sequence and save its start in *ret_where and return length. */
 340
 341         for (size_t i = length - 2; i > 0; i--) {  /* -2 because at least two bytes are needed */
 342                 size_t slen = ansi_sequence_length(s + (i - 1), length - (i - 1));
 343                 if (slen == 0)
 344                         continue;
 345
 346                 *ret_where = s + (i - 1);
 347                 return slen;
 348         }
 349
 350         *ret_where = NULL;
 351         return 0;
 352 }
 353
 354 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 355         size_t x, need_space, suffix_len;
 356         char *t;
 357
 358         assert(s);
 359         assert(percent <= 100);
 360         assert(new_length != SIZE_MAX);
 361
 362         if (old_length <= new_length)
 363                 return strndup(s, old_length);
 364
 365         /* Special case short ellipsations */
 366         switch (new_length) {
 367
 368         case 0:
 369                 return strdup("");
 370
 371         case 1:
 372                 if (is_locale_utf8())
 373                         return strdup("…");
 374                 else
 375                         return strdup(".");
 376
 377         case 2:
 378                 if (!is_locale_utf8())
 379                         return strdup("..");
 380
 381                 break;
 382
 383         default:
 384                 break;
 385         }
 386
 387         /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
 388          * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
 389          * either for the UTF-8 encoded character or for three ASCII characters. */
 390         need_space = is_locale_utf8() ? 1 : 3;
 391
 392         t = new(char, new_length+3);
 393         if (!t)
 394                 return NULL;
 395
 396         assert(new_length >= need_space);
 397
 398         x = ((new_length - need_space) * percent + 50) / 100;
 399         assert(x <= new_length - need_space);
 400
 401         memcpy(t, s, x);
 402         write_ellipsis(t + x, false);
 403         suffix_len = new_length - x - need_space;
 404         memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
 405         *(t + x + 3 + suffix_len) = '\0';
 406
 407         return t;
 408 }
 409
 410 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 411         size_t x, k, len, len2;
 412         const char *i, *j;
 413         int r;
 414
 415         /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
 416          * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
 417          * strings.
 418          *
 419          * Ellipsation is done in a locale-dependent way:
 420          * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
 421          * 2. Otherwise, a unicode ellipsis is used ("…")
 422          *
 423          * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
 424          * the current locale is UTF-8.
 425          */
 426
 427         assert(s);
 428         assert(percent <= 100);
 429
 430         if (new_length == SIZE_MAX)
 431                 return strndup(s, old_length);
 432
 433         if (new_length == 0)
 434                 return strdup("");
 435
 436         bool has_ansi_seq = string_has_ansi_sequence(s, old_length);
 437
 438         /* If no multibyte characters or ANSI sequences, use ascii_ellipsize_mem for speed */
 439         if (!has_ansi_seq && ascii_is_valid_n(s, old_length))
 440                 return ascii_ellipsize_mem(s, old_length, new_length, percent);
 441
 442         x = (new_length - 1) * percent / 100;
 443         assert(x <= new_length - 1);
 444
 445         k = 0;
 446         for (i = s; i < s + old_length; ) {
 447                 size_t slen = has_ansi_seq ? ansi_sequence_length(i, old_length - (i - s)) : 0;
 448                 if (slen > 0) {
 449                         i += slen;
 450                         continue;  /* ANSI sequences don't take up any space in output */
 451                 }
 452
 453                 char32_t c;
 454                 r = utf8_encoded_to_unichar(i, &c);
 455                 if (r < 0)
 456                         return NULL;
 457
 458                 int w = unichar_iswide(c) ? 2 : 1;
 459                 if (k + w > x)
 460                         break;
 461
 462                 k += w;
 463                 i += r;
 464         }
 465
 466         const char *ansi_start = s + old_length;
 467         size_t ansi_len = 0;
 468
 469         for (const char *t = j = s + old_length; t > i && k < new_length; ) {
 470                 char32_t c;
 471                 int w;
 472                 const char *tt;
 473
 474                 if (has_ansi_seq && ansi_start >= t)
 475                         /* Figure out the previous ANSI sequence, if any */
 476                         ansi_len = previous_ansi_sequence(s, t - s, &ansi_start);
 477
 478                 /* If the sequence extends all the way to the current position, skip it. */
 479                 if (has_ansi_seq && ansi_len > 0 && ansi_start + ansi_len == t) {
 480                         t = ansi_start;
 481                         continue;
 482                 }
 483
 484                 tt = utf8_prev_char(t);
 485                 r = utf8_encoded_to_unichar(tt, &c);
 486                 if (r < 0)
 487                         return NULL;
 488
 489                 w = unichar_iswide(c) ? 2 : 1;
 490                 if (k + w > new_length)
 491                         break;
 492
 493                 k += w;
 494                 j = t = tt;  /* j should always point to the first "real" character */
 495         }
 496
 497         /* We don't actually need to ellipsize */
 498         if (i >= j)
 499                 return memdup_suffix0(s, old_length);
 500
 501         if (k >= new_length) {
 502                 /* Make space for ellipsis, if required and possible. We know that the edge character is not
 503                  * part of an ANSI sequence (because then we'd skip it). If the last character we looked at
 504                  * was wide, we don't need to make space. */
 505                 if (j < s + old_length)
 506                         j = utf8_next_char(j);
 507                 else if (i > s)
 508                         i = utf8_prev_char(i);
 509         }
 510
 511         len = i - s;
 512         len2 = s + old_length - j;
 513
 514         /* If we have ANSI, allow the same length as the source string + ellipsis. It'd be too involved to
 515          * figure out what exact space is needed. Strings with ANSI sequences are most likely to be fairly
 516          * short anyway. */
 517         size_t alloc_len = has_ansi_seq ? old_length + 3 + 1 : len + 3 + len2 + 1;
 518
 519         char *e = new(char, alloc_len);
 520         if (!e)
 521                 return NULL;
 522
 523         /*
 524         printf("old_length=%zu new_length=%zu x=%zu len=%zu len2=%zu k=%zu\n",
 525                old_length, new_length, x, len, len2, k);
 526         */
 527
 528         memcpy_safe(e, s, len);
 529         write_ellipsis(e + len, true);
 530
 531         char *dst = e + len + 3;
 532
 533         if (has_ansi_seq)
 534                 /* Copy over any ANSI sequences in full */
 535                 for (const char *p = s + len; p < j; ) {
 536                         size_t slen = ansi_sequence_length(p, j - p);
 537                         if (slen > 0) {
 538                                 dst = mempcpy(dst, p, slen);
 539                                 p += slen;
 540                         } else
 541                                 p = utf8_next_char(p);
 542                 }
 543
 544         memcpy_safe(dst, j, len2);
 545         dst[len2] = '\0';
 546
 547         return e;
 548 }
 549
 550 char *cellescape(char *buf, size_t len, const char *s) {
 551         /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
 552          * characters are copied as they are, everything else is escaped. The result
 553          * is different then if escaping and ellipsization was performed in two
 554          * separate steps, because each sequence is either stored in full or skipped.
 555          *
 556          * This function should be used for logging about strings which expected to
 557          * be plain ASCII in a safe way.
 558          *
 559          * An ellipsis will be used if s is too long. It was always placed at the
 560          * very end.
 561          */
 562
 563         size_t i = 0, last_char_width[4] = {}, k = 0;
 564
 565         assert(len > 0); /* at least a terminating NUL */
 566
 567         for (;;) {
 568                 char four[4];
 569                 int w;
 570
 571                 if (*s == 0) /* terminating NUL detected? then we are done! */
 572                         goto done;
 573
 574                 w = cescape_char(*s, four);
 575                 if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's
 576                                       * ellipsize at the previous location */
 577                         break;
 578
 579                 /* OK, there was space, let's add this escaped character to the buffer */
 580                 memcpy(buf + i, four, w);
 581                 i += w;
 582
 583                 /* And remember its width in the ring buffer */
 584                 last_char_width[k] = w;
 585                 k = (k + 1) % 4;
 586
 587                 s++;
 588         }
 589
 590         /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4
 591          * characters ideally, but the buffer is shorter than that in the first place take what we can get */
 592         for (size_t j = 0; j < ELEMENTSOF(last_char_width); j++) {
 593
 594                 if (i + 4 <= len) /* nice, we reached our space goal */
 595                         break;
 596
 597                 k = k == 0 ? 3 : k - 1;
 598                 if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */
 599                         break;
 600
 601                 assert(i >= last_char_width[k]);
 602                 i -= last_char_width[k];
 603         }
 604
 605         if (i + 4 <= len) /* yay, enough space */
 606                 i += write_ellipsis(buf + i, false);
 607         else if (i + 3 <= len) { /* only space for ".." */
 608                 buf[i++] = '.';
 609                 buf[i++] = '.';
 610         } else if (i + 2 <= len) /* only space for a single "." */
 611                 buf[i++] = '.';
 612         else
 613                 assert(i + 1 <= len);
 614
 615  done:
 616         buf[i] = '\0';
 617         return buf;
 618 }
 619
 620 char* strshorten(char *s, size_t l) {
 621         assert(s);
 622
 623         if (strnlen(s, l+1) > l)
 624                 s[l] = 0;
 625
 626         return s;
 627 }
 628
 629 int strgrowpad0(char **s, size_t l) {
 630         assert(s);
 631
 632         char *q = realloc(*s, l);
 633         if (!q)
 634                 return -ENOMEM;
 635         *s = q;
 636
 637         size_t sz = strlen(*s);
 638         memzero(*s + sz, l - sz);
 639         return 0;
 640 }
 641
 642 char *strreplace(const char *text, const char *old_string, const char *new_string) {
 643         size_t l, old_len, new_len;
 644         char *t, *ret = NULL;
 645         const char *f;
 646
 647         assert(old_string);
 648         assert(new_string);
 649
 650         if (!text)
 651                 return NULL;
 652
 653         old_len = strlen(old_string);
 654         new_len = strlen(new_string);
 655
 656         l = strlen(text);
 657         if (!GREEDY_REALLOC(ret, l+1))
 658                 return NULL;
 659
 660         f = text;
 661         t = ret;
 662         while (*f) {
 663                 size_t d, nl;
 664
 665                 if (!startswith(f, old_string)) {
 666                         *(t++) = *(f++);
 667                         continue;
 668                 }
 669
 670                 d = t - ret;
 671                 nl = l - old_len + new_len;
 672
 673                 if (!GREEDY_REALLOC(ret, nl + 1))
 674                         return mfree(ret);
 675
 676                 l = nl;
 677                 t = ret + d;
 678
 679                 t = stpcpy(t, new_string);
 680                 f += old_len;
 681         }
 682
 683         *t = 0;
 684         return ret;
 685 }
 686
 687 static void advance_offsets(
 688                 ssize_t diff,
 689                 size_t offsets[2], /* note: we can't use [static 2] here, since this may be NULL */
 690                 size_t shift[static 2],
 691                 size_t size) {
 692
 693         if (!offsets)
 694                 return;
 695
 696         assert(shift);
 697
 698         if ((size_t) diff < offsets[0])
 699                 shift[0] += size;
 700         if ((size_t) diff < offsets[1])
 701                 shift[1] += size;
 702 }
 703
 704 char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
 705         const char *begin = NULL;
 706         enum {
 707                 STATE_OTHER,
 708                 STATE_ESCAPE,
 709                 STATE_CSI,
 710                 STATE_CSO,
 711         } state = STATE_OTHER;
 712         _cleanup_(memstream_done) MemStream m = {};
 713         size_t isz, shift[2] = {}, n_carriage_returns = 0;
 714         FILE *f;
 715
 716         assert(ibuf);
 717         assert(*ibuf);
 718
 719         /* This does three things:
 720          *
 721          * 1. Replaces TABs by 8 spaces
 722          * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
 723          * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
 724          * 4. Strip trailing \r characters (since they would "move the cursor", but have no
 725          *    other effect).
 726          *
 727          * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as
 728          * are any other special characters. Truncated ANSI sequences are left-as is too. This call is
 729          * supposed to suppress the most basic formatting noise, but nothing else.
 730          *
 731          * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
 732
 733         isz = _isz ? *_isz : strlen(*ibuf);
 734
 735         /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we
 736          * created f here and it doesn't leave our scope. */
 737         f = memstream_init(&m);
 738         if (!f)
 739                 return NULL;
 740
 741         for (const char *i = *ibuf; i < *ibuf + isz + 1; i++) {
 742
 743                 switch (state) {
 744
 745                 case STATE_OTHER:
 746                         if (i >= *ibuf + isz) /* EOT */
 747                                 break;
 748
 749                         if (*i == '\r') {
 750                                 n_carriage_returns++;
 751                                 break;
 752                         } else if (*i == '\n')
 753                                 /* Ignore carriage returns before new line */
 754                                 n_carriage_returns = 0;
 755                         for (; n_carriage_returns > 0; n_carriage_returns--)
 756                                 fputc('\r', f);
 757
 758                         if (*i == '\x1B')
 759                                 state = STATE_ESCAPE;
 760                         else if (*i == '\t') {
 761                                 fputs("        ", f);
 762                                 advance_offsets(i - *ibuf, highlight, shift, 7);
 763                         } else
 764                                 fputc(*i, f);
 765
 766                         break;
 767
 768                 case STATE_ESCAPE:
 769                         assert(n_carriage_returns == 0);
 770
 771                         if (i >= *ibuf + isz) { /* EOT */
 772                                 fputc('\x1B', f);
 773                                 advance_offsets(i - *ibuf, highlight, shift, 1);
 774                                 break;
 775                         } else if (*i == '[') { /* ANSI CSI */
 776                                 state = STATE_CSI;
 777                                 begin = i + 1;
 778                         } else if (*i == ']') { /* ANSI CSO */
 779                                 state = STATE_CSO;
 780                                 begin = i + 1;
 781                         } else {
 782                                 fputc('\x1B', f);
 783                                 fputc(*i, f);
 784                                 advance_offsets(i - *ibuf, highlight, shift, 1);
 785                                 state = STATE_OTHER;
 786                         }
 787
 788                         break;
 789
 790                 case STATE_CSI:
 791                         assert(n_carriage_returns == 0);
 792
 793                         if (i >= *ibuf + isz || /* EOT … */
 794                             !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
 795                                 fputc('\x1B', f);
 796                                 fputc('[', f);
 797                                 advance_offsets(i - *ibuf, highlight, shift, 2);
 798                                 state = STATE_OTHER;
 799                                 i = begin-1;
 800                         } else if (*i == 'm')
 801                                 state = STATE_OTHER;
 802
 803                         break;
 804
 805                 case STATE_CSO:
 806                         assert(n_carriage_returns == 0);
 807
 808                         if (i >= *ibuf + isz || /* EOT … */
 809                             (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
 810                                 fputc('\x1B', f);
 811                                 fputc(']', f);
 812                                 advance_offsets(i - *ibuf, highlight, shift, 2);
 813                                 state = STATE_OTHER;
 814                                 i = begin-1;
 815                         } else if (*i == '\a')
 816                                 state = STATE_OTHER;
 817
 818                         break;
 819                 }
 820         }
 821
 822         char *obuf;
 823         if (memstream_finalize(&m, &obuf, _isz) < 0)
 824                 return NULL;
 825
 826         free_and_replace(*ibuf, obuf);
 827
 828         if (highlight) {
 829                 highlight[0] += shift[0];
 830                 highlight[1] += shift[1];
 831         }
 832
 833         return *ibuf;
 834 }
 835
 836 char *strextend_with_separator_internal(char **x, const char *separator, ...) {
 837         size_t f, l, l_separator;
 838         bool need_separator;
 839         char *nr, *p;
 840         va_list ap;
 841
 842         assert(x);
 843
 844         l = f = strlen_ptr(*x);
 845
 846         need_separator = !isempty(*x);
 847         l_separator = strlen_ptr(separator);
 848
 849         va_start(ap, separator);
 850         for (;;) {
 851                 const char *t;
 852                 size_t n;
 853
 854                 t = va_arg(ap, const char *);
 855                 if (!t)
 856                         break;
 857
 858                 n = strlen(t);
 859
 860                 if (need_separator)
 861                         n += l_separator;
 862
 863                 if (n >= SIZE_MAX - l) {
 864                         va_end(ap);
 865                         return NULL;
 866                 }
 867
 868                 l += n;
 869                 need_separator = true;
 870         }
 871         va_end(ap);
 872
 873         need_separator = !isempty(*x);
 874
 875         nr = realloc(*x, GREEDY_ALLOC_ROUND_UP(l+1));
 876         if (!nr)
 877                 return NULL;
 878
 879         *x = nr;
 880         p = nr + f;
 881
 882         va_start(ap, separator);
 883         for (;;) {
 884                 const char *t;
 885
 886                 t = va_arg(ap, const char *);
 887                 if (!t)
 888                         break;
 889
 890                 if (need_separator && separator)
 891                         p = stpcpy(p, separator);
 892
 893                 p = stpcpy(p, t);
 894
 895                 need_separator = true;
 896         }
 897         va_end(ap);
 898
 899         assert(p == nr + l);
 900
 901         *p = 0;
 902
 903         return p;
 904 }
 905
 906 int strextendf_with_separator(char **x, const char *separator, const char *format, ...) {
 907         size_t m, a, l_separator;
 908         va_list ap;
 909         int l;
 910
 911         /* Appends a formatted string to the specified string. Don't use this in inner loops, since then
 912          * we'll spend a tonload of time in determining the length of the string passed in, over and over
 913          * again. */
 914
 915         assert(x);
 916         assert(format);
 917
 918         l_separator = isempty(*x) ? 0 : strlen_ptr(separator);
 919
 920         /* Let's try to use the allocated buffer, if there's room at the end still. Otherwise let's extend by 64 chars. */
 921         if (*x) {
 922                 m = strlen(*x);
 923                 a = MALLOC_SIZEOF_SAFE(*x);
 924                 assert(a >= m + 1);
 925         } else
 926                 m = a = 0;
 927
 928         if (a - m < 17 + l_separator) { /* if there's less than 16 chars space, then enlarge the buffer first */
 929                 char *n;
 930
 931                 if (_unlikely_(l_separator > SIZE_MAX - 64)) /* overflow check #1 */
 932                         return -ENOMEM;
 933                 if (_unlikely_(m > SIZE_MAX - 64 - l_separator)) /* overflow check #2 */
 934                         return -ENOMEM;
 935
 936                 n = realloc(*x, m + 64 + l_separator);
 937                 if (!n)
 938                         return -ENOMEM;
 939
 940                 *x = n;
 941                 a = MALLOC_SIZEOF_SAFE(*x);
 942         }
 943
 944         /* Now, let's try to format the string into it */
 945         memcpy_safe(*x + m, separator, l_separator);
 946         va_start(ap, format);
 947         l = vsnprintf(*x + m + l_separator, a - m - l_separator, format, ap);
 948         va_end(ap);
 949
 950         assert(l >= 0);
 951
 952         if ((size_t) l < a - m - l_separator) {
 953                 char *n;
 954
 955                 /* Nice! This worked. We are done. But first, let's return the extra space we don't
 956                  * need. This should be a cheap operation, since we only lower the allocation size here,
 957                  * never increase. */
 958                 n = realloc(*x, m + (size_t) l + l_separator + 1);
 959                 if (n)
 960                         *x = n;
 961         } else {
 962                 char *n;
 963
 964                 /* Wasn't enough. Then let's allocate exactly what we need. */
 965
 966                 if (_unlikely_((size_t) l > SIZE_MAX - (l_separator + 1))) /* overflow check #1 */
 967                         goto oom;
 968                 if (_unlikely_(m > SIZE_MAX - ((size_t) l + l_separator + 1))) /* overflow check #2 */
 969                         goto oom;
 970
 971                 a = m + (size_t) l + l_separator + 1;
 972                 n = realloc(*x, a);
 973                 if (!n)
 974                         goto oom;
 975                 *x = n;
 976
 977                 va_start(ap, format);
 978                 l = vsnprintf(*x + m + l_separator, a - m - l_separator, format, ap);
 979                 va_end(ap);
 980
 981                 assert((size_t) l < a - m - l_separator);
 982         }
 983
 984         return 0;
 985
 986 oom:
 987         /* truncate the bytes added after the first vsnprintf() attempt again */
 988         (*x)[m] = 0;
 989         return -ENOMEM;
 990 }
 991
 992 char *strextendn(char **x, const char *s, size_t l) {
 993         assert(x);
 994         assert(s || l == 0);
 995
 996         if (l == SIZE_MAX)
 997                 l = strlen_ptr(s);
 998         else if (l > 0)
 999                 l = strnlen(s, l); /* ignore trailing noise */
1000
1001         if (l > 0 || !*x) {
1002                 size_t q;
1003                 char *m;
1004
1005                 q = strlen_ptr(*x);
1006                 m = realloc(*x, q + l + 1);
1007                 if (!m)
1008                         return NULL;
1009
1010                 memcpy_safe(m + q, s, l);
1011                 m[q + l] = 0;
1012
1013                 *x = m;
1014         }
1015
1016         return *x;
1017 }
1018
1019 char *strrep(const char *s, unsigned n) {
1020         char *r, *p;
1021         size_t l;
1022
1023         assert(s);
1024
1025         l = strlen(s);
1026         p = r = malloc(l * n + 1);
1027         if (!r)
1028                 return NULL;
1029
1030         for (unsigned i = 0; i < n; i++)
1031                 p = stpcpy(p, s);
1032
1033         *p = 0;
1034         return r;
1035 }
1036
1037 int split_pair(const char *s, const char *sep, char **l, char **r) {
1038         char *x, *a, *b;
1039
1040         assert(s);
1041         assert(sep);
1042         assert(l);
1043         assert(r);
1044
1045         if (isempty(sep))
1046                 return -EINVAL;
1047
1048         x = strstr(s, sep);
1049         if (!x)
1050                 return -EINVAL;
1051
1052         a = strndup(s, x - s);
1053         if (!a)
1054                 return -ENOMEM;
1055
1056         b = strdup(x + strlen(sep));
1057         if (!b) {
1058                 free(a);
1059                 return -ENOMEM;
1060         }
1061
1062         *l = a;
1063         *r = b;
1064
1065         return 0;
1066 }
1067
1068 int free_and_strdup(char **p, const char *s) {
1069         char *t;
1070
1071         assert(p);
1072
1073         /* Replaces a string pointer with a strdup()ed new string,
1074          * possibly freeing the old one. */
1075
1076         if (streq_ptr(*p, s))
1077                 return 0;
1078
1079         if (s) {
1080                 t = strdup(s);
1081                 if (!t)
1082                         return -ENOMEM;
1083         } else
1084                 t = NULL;
1085
1086         free_and_replace(*p, t);
1087
1088         return 1;
1089 }
1090
1091 int free_and_strndup(char **p, const char *s, size_t l) {
1092         char *t;
1093
1094         assert(p);
1095         assert(s || l == 0);
1096
1097         /* Replaces a string pointer with a strndup()ed new string,
1098          * freeing the old one. */
1099
1100         if (!*p && !s)
1101                 return 0;
1102
1103         if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0'))
1104                 return 0;
1105
1106         if (s) {
1107                 t = strndup(s, l);
1108                 if (!t)
1109                         return -ENOMEM;
1110         } else
1111                 t = NULL;
1112
1113         free_and_replace(*p, t);
1114         return 1;
1115 }
1116
1117 bool string_is_safe(const char *p) {
1118         if (!p)
1119                 return false;
1120
1121         /* Checks if the specified string contains no quotes or control characters */
1122
1123         for (const char *t = p; *t; t++) {
1124                 if (*t > 0 && *t < ' ') /* no control characters */
1125                         return false;
1126
1127                 if (strchr(QUOTES "\\\x7f", *t))
1128                         return false;
1129         }
1130
1131         return true;
1132 }
1133
1134 char* string_erase(char *x) {
1135         if (!x)
1136                 return NULL;
1137
1138         /* A delicious drop of snake-oil! To be called on memory where we stored passphrases or so, after we
1139          * used them. */
1140         explicit_bzero_safe(x, strlen(x));
1141         return x;
1142 }
1143
1144 int string_truncate_lines(const char *s, size_t n_lines, char **ret) {
1145         const char *p = s, *e = s;
1146         bool truncation_applied = false;
1147         char *copy;
1148         size_t n = 0;
1149
1150         assert(s);
1151
1152         /* Truncate after the specified number of lines. Returns > 0 if a truncation was applied or == 0 if
1153          * there were fewer lines in the string anyway. Trailing newlines on input are ignored, and not
1154          * generated either. */
1155
1156         for (;;) {
1157                 size_t k;
1158
1159                 k = strcspn(p, "\n");
1160
1161                 if (p[k] == 0) {
1162                         if (k == 0) /* final empty line */
1163                                 break;
1164
1165                         if (n >= n_lines) /* above threshold */
1166                                 break;
1167
1168                         e = p + k; /* last line to include */
1169                         break;
1170                 }
1171
1172                 assert(p[k] == '\n');
1173
1174                 if (n >= n_lines)
1175                         break;
1176
1177                 if (k > 0)
1178                         e = p + k;
1179
1180                 p += k + 1;
1181                 n++;
1182         }
1183
1184         /* e points after the last character we want to keep */
1185         if (isempty(e))
1186                 copy = strdup(s);
1187         else {
1188                 if (!in_charset(e, "\n")) /* We only consider things truncated if we remove something that
1189                                            * isn't a new-line or a series of them */
1190                         truncation_applied = true;
1191
1192                 copy = strndup(s, e - s);
1193         }
1194         if (!copy)
1195                 return -ENOMEM;
1196
1197         *ret = copy;
1198         return truncation_applied;
1199 }
1200
1201 int string_extract_line(const char *s, size_t i, char **ret) {
1202         const char *p = s;
1203         size_t c = 0;
1204
1205         /* Extract the i'nth line from the specified string. Returns > 0 if there are more lines after that,
1206          * and == 0 if we are looking at the last line or already beyond the last line. As special
1207          * optimization, if the first line is requested and the string only consists of one line we return
1208          * NULL, indicating the input string should be used as is, and avoid a memory allocation for a very
1209          * common case. */
1210
1211         for (;;) {
1212                 const char *q;
1213
1214                 q = strchr(p, '\n');
1215                 if (i == c) {
1216                         /* The line we are looking for! */
1217
1218                         if (q) {
1219                                 char *m;
1220
1221                                 m = strndup(p, q - p);
1222                                 if (!m)
1223                                         return -ENOMEM;
1224
1225                                 *ret = m;
1226                                 return !isempty(q + 1); /* more coming? */
1227                         } else {
1228                                 if (p == s)
1229                                         *ret = NULL; /* Just use the input string */
1230                                 else {
1231                                         char *m;
1232
1233                                         m = strdup(p);
1234                                         if (!m)
1235                                                 return -ENOMEM;
1236
1237                                         *ret = m;
1238                                 }
1239
1240                                 return 0; /* The end */
1241                         }
1242                 }
1243
1244                 if (!q) {
1245                         char *m;
1246
1247                         /* No more lines, return empty line */
1248
1249                         m = strdup("");
1250                         if (!m)
1251                                 return -ENOMEM;
1252
1253                         *ret = m;
1254                         return 0; /* The end */
1255                 }
1256
1257                 p = q + 1;
1258                 c++;
1259         }
1260 }
1261
1262 int string_contains_word_strv(const char *string, const char *separators, char **words, const char **ret_word) {
1263         /* In the default mode with no separators specified, we split on whitespace and
1264          * don't coalesce separators. */
1265         const ExtractFlags flags = separators ? EXTRACT_DONT_COALESCE_SEPARATORS : 0;
1266
1267         const char *found = NULL;
1268
1269         for (const char *p = string;;) {
1270                 _cleanup_free_ char *w = NULL;
1271                 int r;
1272
1273                 r = extract_first_word(&p, &w, separators, flags);
1274                 if (r < 0)
1275                         return r;
1276                 if (r == 0)
1277                         break;
1278
1279                 found = strv_find(words, w);
1280                 if (found)
1281                         break;
1282         }
1283
1284         if (ret_word)
1285                 *ret_word = found;
1286         return !!found;
1287 }
1288
1289 bool streq_skip_trailing_chars(const char *s1, const char *s2, const char *ok) {
1290         if (!s1 && !s2)
1291                 return true;
1292         if (!s1 || !s2)
1293                 return false;
1294
1295         if (!ok)
1296                 ok = WHITESPACE;
1297
1298         for (; *s1 && *s2; s1++, s2++)
1299                 if (*s1 != *s2)
1300                         break;
1301
1302         return in_charset(s1, ok) && in_charset(s2, ok);
1303 }
1304
1305 char *string_replace_char(char *str, char old_char, char new_char) {
1306         assert(str);
1307         assert(old_char != '\0');
1308         assert(new_char != '\0');
1309         assert(old_char != new_char);
1310
1311         for (char *p = strchr(str, old_char); p; p = strchr(p + 1, old_char))
1312                 *p = new_char;
1313
1314         return str;
1315 }
1316
1317 int make_cstring(const char *s, size_t n, MakeCStringMode mode, char **ret) {
1318         char *b;
1319
1320         assert(s || n == 0);
1321         assert(mode >= 0);
1322         assert(mode < _MAKE_CSTRING_MODE_MAX);
1323
1324         /* Converts a sized character buffer into a NUL-terminated NUL string, refusing if there are embedded
1325          * NUL bytes. Whether to expect a trailing NUL byte can be specified via 'mode' */
1326
1327         if (n == 0) {
1328                 if (mode == MAKE_CSTRING_REQUIRE_TRAILING_NUL)
1329                         return -EINVAL;
1330
1331                 if (!ret)
1332                         return 0;
1333
1334                 b = new0(char, 1);
1335         } else {
1336                 const char *nul;
1337
1338                 nul = memchr(s, 0, n);
1339                 if (nul) {
1340                         if (nul < s + n - 1 || /* embedded NUL? */
1341                             mode == MAKE_CSTRING_REFUSE_TRAILING_NUL)
1342                                 return -EINVAL;
1343
1344                         n--;
1345                 } else if (mode == MAKE_CSTRING_REQUIRE_TRAILING_NUL)
1346                         return -EINVAL;
1347
1348                 if (!ret)
1349                         return 0;
1350
1351                 b = memdup_suffix0(s, n);
1352         }
1353         if (!b)
1354                 return -ENOMEM;
1355
1356         *ret = b;
1357         return 0;
1358 }
1359
1360 size_t strspn_from_end(const char *str, const char *accept) {
1361         size_t n = 0;
1362
1363         if (isempty(str))
1364                 return 0;
1365
1366         if (isempty(accept))
1367                 return 0;
1368
1369         for (const char *p = str + strlen(str); p > str && strchr(accept, p[-1]); p--)
1370                 n++;
1371
1372         return n;
1373 }
1374
1375 char *strdupspn(const char *a, const char *accept) {
1376         if (isempty(a) || isempty(accept))
1377                 return strdup("");
1378
1379         return strndup(a, strspn(a, accept));
1380 }
1381
1382 char *strdupcspn(const char *a, const char *reject) {
1383         if (isempty(a))
1384                 return strdup("");
1385         if (isempty(reject))
1386                 return strdup(a);
1387
1388         return strndup(a, strcspn(a, reject));
1389 }
1390
1391 char *find_line_startswith(const char *haystack, const char *needle) {
1392         char *p;
1393
1394         assert(haystack);
1395         assert(needle);
1396
1397         /* Finds the first line in 'haystack' that starts with the specified string. Returns a pointer to the
1398          * first character after it */
1399
1400         p = strstr(haystack, needle);
1401         if (!p)
1402                 return NULL;
1403
1404         if (p > haystack)
1405                 while (p[-1] != '\n') {
1406                         p = strstr(p + 1, needle);
1407                         if (!p)
1408                                 return NULL;
1409                 }
1410
1411         return p + strlen(needle);
1412 }
1413
1414 char *startswith_strv(const char *string, char **strv) {
1415         char *found = NULL;
1416
1417         STRV_FOREACH(i, strv) {
1418                 found = startswith(string, *i);
1419                 if (found)
1420                         break;
1421         }
1422
1423         return found;
1424 }
1425
1426 bool version_is_valid(const char *s) {
1427         if (isempty(s))
1428                 return false;
1429
1430         if (!filename_part_is_valid(s))
1431                 return false;
1432
1433         /* This is a superset of the characters used by semver. We additionally allow "," and "_". */
1434         if (!in_charset(s, ALPHANUMERICAL ".,_-+"))
1435                 return false;
1436
1437         return true;
1438 }
1439
1440 bool version_is_valid_versionspec(const char *s) {
1441         if (!filename_part_is_valid(s))
1442                 return false;
1443
1444         if (!in_charset(s, ALPHANUMERICAL "-.~^"))
1445                 return false;
1446
1447         return true;
1448 }