lib/strutil/strutil.c

   1 /*
   2    Common strings utilities
   3
   4    Copyright (C) 2007, 2011, 2013
   5    The Free Software Foundation, Inc.
   6
   7    Written by:
   8    Rostislav Benes, 2007
   9
  10    This file is part of the Midnight Commander.
  11
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  24  */
  25
  26 #include <config.h>
  27
  28 #include <stdlib.h>
  29 #include <langinfo.h>
  30 #include <string.h>
  31 #include <errno.h>
  32
  33 #include "lib/global.h"
  34 #include "lib/strutil.h"
  35
  36 /*names, that are used for utf-8 */
  37 static const char *str_utf8_encodings[] = {
  38     "utf-8",
  39     "utf8",
  40     NULL
  41 };
  42
  43 /* standard 8bit encodings, no wide or multibytes characters */
  44 static const char *str_8bit_encodings[] = {
  45     "cp-1251",
  46     "cp1251",
  47     "cp-1250",
  48     "cp1250",
  49     "cp-866",
  50     "cp866",
  51     "ibm-866",
  52     "ibm866",
  53     "cp-850",
  54     "cp850",
  55     "cp-852",
  56     "cp852",
  57     "iso-8859",
  58     "iso8859",
  59     "koi8",
  60     NULL
  61 };
  62
  63 /* terminal encoding */
  64 static char *codeset = NULL;
  65 static char *term_encoding = NULL;
  66 /* function for encoding specific operations */
  67 static struct str_class used_class;
  68
  69 GIConv str_cnv_to_term;
  70 GIConv str_cnv_from_term;
  71 GIConv str_cnv_not_convert = INVALID_CONV;
  72
  73 /* if enc is same encoding like on terminal */
  74 static int
  75 str_test_not_convert (const char *enc)
  76 {
  77     return g_ascii_strcasecmp (enc, codeset) == 0;
  78 }
  79
  80 GIConv
  81 str_crt_conv_to (const char *to_enc)
  82 {
  83     return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
  84 }
  85
  86 GIConv
  87 str_crt_conv_from (const char *from_enc)
  88 {
  89     return (!str_test_not_convert (from_enc))
  90         ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
  91 }
  92
  93 void
  94 str_close_conv (GIConv conv)
  95 {
  96     if (conv != str_cnv_not_convert)
  97         g_iconv_close (conv);
  98 }
  99
 100 static estr_t
 101 _str_convert (GIConv coder, const char *string, int size, GString * buffer)
 102 {
 103     estr_t state = ESTR_SUCCESS;
 104     gssize left;
 105     gsize bytes_read = 0;
 106     gsize bytes_written = 0;
 107
 108     errno = 0;                  /* FIXME: is it really needed? */
 109
 110     if (coder == INVALID_CONV)
 111         return ESTR_FAILURE;
 112
 113     if (string == NULL || buffer == NULL)
 114         return ESTR_FAILURE;
 115
 116     /*
 117        if (! used_class.is_valid_string (string))
 118        {
 119        return ESTR_FAILURE;
 120        }
 121      */
 122     if (size < 0)
 123         size = strlen (string);
 124     else
 125     {
 126         left = strlen (string);
 127         if (left < size)
 128             size = left;
 129     }
 130
 131     left = size;
 132     g_iconv (coder, NULL, NULL, NULL, NULL);
 133
 134     while (left != 0)
 135     {
 136         gchar *tmp_buff;
 137         GError *error = NULL;
 138
 139         tmp_buff = g_convert_with_iconv ((const gchar *) string,
 140                                          left, coder, &bytes_read, &bytes_written, &error);
 141         if (error != NULL)
 142         {
 143             int code = error->code;
 144
 145             g_error_free (error);
 146             error = NULL;
 147
 148             switch (code)
 149             {
 150             case G_CONVERT_ERROR_NO_CONVERSION:
 151                 /* Conversion between the requested character sets is not supported. */
 152                 tmp_buff = g_strnfill (strlen (string), '?');
 153                 g_string_append (buffer, tmp_buff);
 154                 g_free (tmp_buff);
 155                 return ESTR_FAILURE;
 156
 157             case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
 158                 /* Invalid byte sequence in conversion input. */
 159                 if ((tmp_buff == NULL) && (bytes_read != 0))
 160                     /* recode valid byte sequence */
 161                     tmp_buff = g_convert_with_iconv ((const gchar *) string,
 162                                                      bytes_read, coder, NULL, NULL, NULL);
 163
 164                 if (tmp_buff != NULL)
 165                 {
 166                     g_string_append (buffer, tmp_buff);
 167                     g_free (tmp_buff);
 168                 }
 169
 170                 if ((int) bytes_read >= left)
 171                     return ESTR_PROBLEM;
 172
 173                 string += bytes_read + 1;
 174                 size -= (bytes_read + 1);
 175                 left -= (bytes_read + 1);
 176                 g_string_append_c (buffer, *(string - 1));
 177                 state = ESTR_PROBLEM;
 178                 break;
 179
 180             case G_CONVERT_ERROR_PARTIAL_INPUT:
 181                 /* Partial character sequence at end of input. */
 182                 g_string_append (buffer, tmp_buff);
 183                 g_free (tmp_buff);
 184                 if ((int) bytes_read < left)
 185                 {
 186                     left = left - bytes_read;
 187                     tmp_buff = g_strnfill (left, '?');
 188                     g_string_append (buffer, tmp_buff);
 189                     g_free (tmp_buff);
 190                 }
 191                 return ESTR_PROBLEM;
 192
 193             case G_CONVERT_ERROR_BAD_URI:      /* Don't know how handle this error :( */
 194             case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH:    /* Don't know how handle this error :( */
 195             case G_CONVERT_ERROR_FAILED:       /* Conversion failed for some reason. */
 196             default:
 197                 g_free (tmp_buff);
 198                 return ESTR_FAILURE;
 199             }
 200         }
 201         else if (tmp_buff == NULL)
 202         {
 203             g_string_append (buffer, string);
 204             return ESTR_PROBLEM;
 205         }
 206         else if (*tmp_buff == '\0')
 207         {
 208             g_free (tmp_buff);
 209             g_string_append (buffer, string);
 210             return state;
 211         }
 212         else
 213         {
 214             g_string_append (buffer, tmp_buff);
 215             g_free (tmp_buff);
 216             string += bytes_read;
 217             left -= bytes_read;
 218         }
 219     }
 220
 221     return state;
 222 }
 223
 224 estr_t
 225 str_convert (GIConv coder, const char *string, GString * buffer)
 226 {
 227     return _str_convert (coder, string, -1, buffer);
 228 }
 229
 230 estr_t
 231 str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
 232 {
 233     return _str_convert (coder, string, size, buffer);
 234 }
 235
 236 gchar *
 237 str_conv_gerror_message (GError * error, const char *def_msg)
 238 {
 239     return used_class.conv_gerror_message (error, def_msg);
 240 }
 241
 242 estr_t
 243 str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
 244 {
 245     estr_t result = ESTR_SUCCESS;
 246
 247     if (coder == str_cnv_not_convert)
 248         g_string_append (buffer, string != NULL ? string : "");
 249     else
 250         result = _str_convert (coder, string, -1, buffer);
 251
 252     return result;
 253 }
 254
 255 estr_t
 256 str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
 257 {
 258     return used_class.vfs_convert_to (coder, string, size, buffer);
 259 }
 260
 261 void
 262 str_printf (GString * buffer, const char *format, ...)
 263 {
 264     va_list ap;
 265     va_start (ap, format);
 266
 267 #if GLIB_CHECK_VERSION (2, 14, 0)
 268     g_string_append_vprintf (buffer, format, ap);
 269 #else
 270     {
 271         gchar *tmp;
 272
 273         tmp = g_strdup_vprintf (format, ap);
 274         g_string_append (buffer, tmp);
 275         g_free (tmp);
 276     }
 277 #endif
 278     va_end (ap);
 279 }
 280
 281 void
 282 str_insert_replace_char (GString * buffer)
 283 {
 284     used_class.insert_replace_char (buffer);
 285 }
 286
 287 estr_t
 288 str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
 289 {
 290     size_t left;
 291     size_t cnv;
 292
 293     g_iconv (conv, NULL, NULL, NULL, NULL);
 294
 295     left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
 296
 297     cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
 298     if (cnv == (size_t) (-1))
 299         return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
 300
 301     output[0] = '\0';
 302     return ESTR_SUCCESS;
 303 }
 304
 305
 306 const char *
 307 str_detect_termencoding (void)
 308 {
 309     if (term_encoding == NULL)
 310     {
 311         /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
 312            to utf-8 or UTF-8.
 313            On Mac OS X, it returns the same case as the LANG input.
 314            So let tranform result of nl_langinfo (CODESET) to upper case  unconditionally. */
 315         term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
 316     }
 317
 318     return term_encoding;
 319 }
 320
 321 static int
 322 str_test_encoding_class (const char *encoding, const char **table)
 323 {
 324     int result = 0;
 325
 326     if (encoding != NULL)
 327     {
 328         int t;
 329
 330         for (t = 0; table[t] != NULL; t++)
 331             if (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0)
 332                 result++;
 333     }
 334
 335     return result;
 336 }
 337
 338 static void
 339 str_choose_str_functions (void)
 340 {
 341     if (str_test_encoding_class (codeset, str_utf8_encodings))
 342         used_class = str_utf8_init ();
 343     else if (str_test_encoding_class (codeset, str_8bit_encodings))
 344         used_class = str_8bit_init ();
 345     else
 346         used_class = str_ascii_init ();
 347 }
 348
 349 gboolean
 350 str_isutf8 (const char *codeset_name)
 351 {
 352     return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
 353 }
 354
 355 void
 356 str_init_strings (const char *termenc)
 357 {
 358     codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
 359
 360     str_cnv_not_convert = g_iconv_open (codeset, codeset);
 361     if (str_cnv_not_convert == INVALID_CONV)
 362     {
 363         if (termenc != NULL)
 364         {
 365             g_free (codeset);
 366             codeset = g_strdup (str_detect_termencoding ());
 367             str_cnv_not_convert = g_iconv_open (codeset, codeset);
 368         }
 369
 370         if (str_cnv_not_convert == INVALID_CONV)
 371         {
 372             g_free (codeset);
 373             codeset = g_strdup (DEFAULT_CHARSET);
 374             str_cnv_not_convert = g_iconv_open (codeset, codeset);
 375         }
 376     }
 377
 378     str_cnv_to_term = str_cnv_not_convert;
 379     str_cnv_from_term = str_cnv_not_convert;
 380
 381     str_choose_str_functions ();
 382 }
 383
 384 void
 385 str_uninit_strings (void)
 386 {
 387     if (str_cnv_not_convert != INVALID_CONV)
 388         g_iconv_close (str_cnv_not_convert);
 389     g_free (term_encoding);
 390     g_free (codeset);
 391 }
 392
 393 const char *
 394 str_term_form (const char *text)
 395 {
 396     return used_class.term_form (text);
 397 }
 398
 399 const char *
 400 str_fit_to_term (const char *text, int width, align_crt_t just_mode)
 401 {
 402     return used_class.fit_to_term (text, width, just_mode);
 403 }
 404
 405 const char *
 406 str_term_trim (const char *text, int width)
 407 {
 408     return used_class.term_trim (text, width);
 409 }
 410
 411 const char *
 412 str_term_substring (const char *text, int start, int width)
 413 {
 414     return used_class.term_substring (text, start, width);
 415 }
 416
 417 char *
 418 str_get_next_char (char *text)
 419 {
 420
 421     used_class.cnext_char ((const char **) &text);
 422     return text;
 423 }
 424
 425 const char *
 426 str_cget_next_char (const char *text)
 427 {
 428     used_class.cnext_char (&text);
 429     return text;
 430 }
 431
 432 void
 433 str_next_char (char **text)
 434 {
 435     used_class.cnext_char ((const char **) text);
 436 }
 437
 438 void
 439 str_cnext_char (const char **text)
 440 {
 441     used_class.cnext_char (text);
 442 }
 443
 444 char *
 445 str_get_prev_char (char *text)
 446 {
 447     used_class.cprev_char ((const char **) &text);
 448     return text;
 449 }
 450
 451 const char *
 452 str_cget_prev_char (const char *text)
 453 {
 454     used_class.cprev_char (&text);
 455     return text;
 456 }
 457
 458 void
 459 str_prev_char (char **text)
 460 {
 461     used_class.cprev_char ((const char **) text);
 462 }
 463
 464 void
 465 str_cprev_char (const char **text)
 466 {
 467     used_class.cprev_char (text);
 468 }
 469
 470 char *
 471 str_get_next_char_safe (char *text)
 472 {
 473     used_class.cnext_char_safe ((const char **) &text);
 474     return text;
 475 }
 476
 477 const char *
 478 str_cget_next_char_safe (const char *text)
 479 {
 480     used_class.cnext_char_safe (&text);
 481     return text;
 482 }
 483
 484 void
 485 str_next_char_safe (char **text)
 486 {
 487     used_class.cnext_char_safe ((const char **) text);
 488 }
 489
 490 void
 491 str_cnext_char_safe (const char **text)
 492 {
 493     used_class.cnext_char_safe (text);
 494 }
 495
 496 char *
 497 str_get_prev_char_safe (char *text)
 498 {
 499     used_class.cprev_char_safe ((const char **) &text);
 500     return text;
 501 }
 502
 503 const char *
 504 str_cget_prev_char_safe (const char *text)
 505 {
 506     used_class.cprev_char_safe (&text);
 507     return text;
 508 }
 509
 510 void
 511 str_prev_char_safe (char **text)
 512 {
 513     used_class.cprev_char_safe ((const char **) text);
 514 }
 515
 516 void
 517 str_cprev_char_safe (const char **text)
 518 {
 519     used_class.cprev_char_safe (text);
 520 }
 521
 522 int
 523 str_next_noncomb_char (char **text)
 524 {
 525     return used_class.cnext_noncomb_char ((const char **) text);
 526 }
 527
 528 int
 529 str_cnext_noncomb_char (const char **text)
 530 {
 531     return used_class.cnext_noncomb_char (text);
 532 }
 533
 534 int
 535 str_prev_noncomb_char (char **text, const char *begin)
 536 {
 537     return used_class.cprev_noncomb_char ((const char **) text, begin);
 538 }
 539
 540 int
 541 str_cprev_noncomb_char (const char **text, const char *begin)
 542 {
 543     return used_class.cprev_noncomb_char (text, begin);
 544 }
 545
 546 int
 547 str_is_valid_char (const char *ch, size_t size)
 548 {
 549     return used_class.is_valid_char (ch, size);
 550 }
 551
 552 int
 553 str_term_width1 (const char *text)
 554 {
 555     return used_class.term_width1 (text);
 556 }
 557
 558 int
 559 str_term_width2 (const char *text, size_t length)
 560 {
 561     return used_class.term_width2 (text, length);
 562 }
 563
 564 int
 565 str_term_char_width (const char *text)
 566 {
 567     return used_class.term_char_width (text);
 568 }
 569
 570 int
 571 str_offset_to_pos (const char *text, size_t length)
 572 {
 573     return used_class.offset_to_pos (text, length);
 574 }
 575
 576 int
 577 str_length (const char *text)
 578 {
 579     return used_class.length (text);
 580 }
 581
 582 int
 583 str_length_char (const char *text)
 584 {
 585     return str_cget_next_char_safe (text) - text;
 586 }
 587
 588 int
 589 str_length2 (const char *text, int size)
 590 {
 591     return used_class.length2 (text, size);
 592 }
 593
 594 int
 595 str_length_noncomb (const char *text)
 596 {
 597     return used_class.length_noncomb (text);
 598 }
 599
 600 int
 601 str_column_to_pos (const char *text, size_t pos)
 602 {
 603     return used_class.column_to_pos (text, pos);
 604 }
 605
 606 int
 607 str_isspace (const char *ch)
 608 {
 609     return used_class.char_isspace (ch);
 610 }
 611
 612 int
 613 str_ispunct (const char *ch)
 614 {
 615     return used_class.char_ispunct (ch);
 616 }
 617
 618 int
 619 str_isalnum (const char *ch)
 620 {
 621     return used_class.char_isalnum (ch);
 622 }
 623
 624 int
 625 str_isdigit (const char *ch)
 626 {
 627     return used_class.char_isdigit (ch);
 628 }
 629
 630 int
 631 str_toupper (const char *ch, char **out, size_t * remain)
 632 {
 633     return used_class.char_toupper (ch, out, remain);
 634 }
 635
 636 int
 637 str_tolower (const char *ch, char **out, size_t * remain)
 638 {
 639     return used_class.char_tolower (ch, out, remain);
 640 }
 641
 642 int
 643 str_isprint (const char *ch)
 644 {
 645     return used_class.char_isprint (ch);
 646 }
 647
 648 gboolean
 649 str_iscombiningmark (const char *ch)
 650 {
 651     return used_class.char_iscombiningmark (ch);
 652 }
 653
 654 const char *
 655 str_trunc (const char *text, int width)
 656 {
 657     return used_class.trunc (text, width);
 658 }
 659
 660 char *
 661 str_create_search_needle (const char *needle, int case_sen)
 662 {
 663     return used_class.create_search_needle (needle, case_sen);
 664 }
 665
 666
 667 void
 668 str_release_search_needle (char *needle, int case_sen)
 669 {
 670     used_class.release_search_needle (needle, case_sen);
 671 }
 672
 673 const char *
 674 str_search_first (const char *text, const char *search, int case_sen)
 675 {
 676     return used_class.search_first (text, search, case_sen);
 677 }
 678
 679 const char *
 680 str_search_last (const char *text, const char *search, int case_sen)
 681 {
 682     return used_class.search_last (text, search, case_sen);
 683 }
 684
 685 int
 686 str_is_valid_string (const char *text)
 687 {
 688     return used_class.is_valid_string (text);
 689 }
 690
 691 int
 692 str_compare (const char *t1, const char *t2)
 693 {
 694     return used_class.compare (t1, t2);
 695 }
 696
 697 int
 698 str_ncompare (const char *t1, const char *t2)
 699 {
 700     return used_class.ncompare (t1, t2);
 701 }
 702
 703 int
 704 str_casecmp (const char *t1, const char *t2)
 705 {
 706     return used_class.casecmp (t1, t2);
 707 }
 708
 709 int
 710 str_ncasecmp (const char *t1, const char *t2)
 711 {
 712     return used_class.ncasecmp (t1, t2);
 713 }
 714
 715 int
 716 str_prefix (const char *text, const char *prefix)
 717 {
 718     return used_class.prefix (text, prefix);
 719 }
 720
 721 int
 722 str_caseprefix (const char *text, const char *prefix)
 723 {
 724     return used_class.caseprefix (text, prefix);
 725 }
 726
 727 void
 728 str_fix_string (char *text)
 729 {
 730     used_class.fix_string (text);
 731 }
 732
 733 char *
 734 str_create_key (const char *text, int case_sen)
 735 {
 736     return used_class.create_key (text, case_sen);
 737 }
 738
 739 char *
 740 str_create_key_for_filename (const char *text, int case_sen)
 741 {
 742     return used_class.create_key_for_filename (text, case_sen);
 743 }
 744
 745 int
 746 str_key_collate (const char *t1, const char *t2, int case_sen)
 747 {
 748     return used_class.key_collate (t1, t2, case_sen);
 749 }
 750
 751 void
 752 str_release_key (char *key, int case_sen)
 753 {
 754     used_class.release_key (key, case_sen);
 755 }
 756
 757 void
 758 str_msg_term_size (const char *text, int *lines, int *columns)
 759 {
 760     char *p, *tmp;
 761     char *q;
 762     char c = '\0';
 763     int width;
 764
 765     *lines = 1;
 766     *columns = 0;
 767
 768     tmp = g_strdup (text);
 769     p = tmp;
 770
 771     while (TRUE)
 772     {
 773         q = strchr (p, '\n');
 774         if (q != NULL)
 775         {
 776             c = q[0];
 777             q[0] = '\0';
 778         }
 779
 780         width = str_term_width1 (p);
 781         if (width > *columns)
 782             *columns = width;
 783
 784         if (q == NULL)
 785             break;
 786
 787         q[0] = c;
 788         p = q + 1;
 789         (*lines)++;
 790     }
 791
 792     g_free (tmp);
 793 }
 794
 795 /* --------------------------------------------------------------------------------------------- */
 796
 797 char *
 798 strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
 799 {
 800     char *semi;
 801     ssize_t len;
 802
 803     len = strlen (haystack);
 804
 805     do
 806     {
 807         semi = g_strrstr_len (haystack, len, needle);
 808         if (semi == NULL)
 809             return NULL;
 810         len = semi - haystack - 1;
 811     }
 812     while (skip_count-- != 0);
 813
 814     return semi;
 815 }
 816
 817 /* --------------------------------------------------------------------------------------------- */
 818 /* Interprete string as a non-negative decimal integer, optionally multiplied by various values.
 819  *
 820  * @param str input value
 821  * @param invalid set to TRUE if "str" does not represent a number in this format
 822  *
 823  * @return non-integer representation of "str", 0 in case of error.
 824  */
 825
 826 uintmax_t
 827 parse_integer (const char *str, gboolean * invalid)
 828 {
 829     uintmax_t n;
 830     char *suffix;
 831     strtol_error_t e;
 832
 833     e = xstrtoumax (str, &suffix, 10, &n, "bcEGkKMPTwYZ0");
 834     if (e == LONGINT_INVALID_SUFFIX_CHAR && *suffix == 'x')
 835     {
 836         uintmax_t multiplier;
 837
 838         multiplier = parse_integer (suffix + 1, invalid);
 839         if (multiplier != 0 && n * multiplier / multiplier != n)
 840         {
 841             *invalid = TRUE;
 842             return 0;
 843         }
 844
 845         n *= multiplier;
 846     }
 847     else if (e != LONGINT_OK)
 848     {
 849         *invalid = TRUE;
 850         n = 0;
 851     }
 852
 853     return n;
 854 }
 855
 856 /* --------------------------------------------------------------------------------------------- */