lib/strutil/strutil.c

   1 /*
   2    Common strings utilities
   3
   4    Copyright (C) 2007, 2011
   5    The Free Software Foundation, Inc.
   6
   7    Written by:
   8    Rostislav Benes, 2007
   9
  10    This file is part of the Midnight Commander.
  11
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  24
  25  */
  26
  27 #include <config.h>
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <langinfo.h>
  31 #include <string.h>
  32 #include <errno.h>
  33 #include <stdarg.h>
  34
  35 #include "lib/global.h"
  36 #include "lib/strutil.h"
  37
  38 /*names, that are used for utf-8 */
  39 static const char *str_utf8_encodings[] = {
  40     "utf-8",
  41     "utf8",
  42     NULL
  43 };
  44
  45 /* standard 8bit encodings, no wide or multibytes characters */
  46 static const char *str_8bit_encodings[] = {
  47     "cp-1251",
  48     "cp1251",
  49     "cp-1250",
  50     "cp1250",
  51     "cp-866",
  52     "cp866",
  53     "ibm-866",
  54     "ibm866",
  55     "cp-850",
  56     "cp850",
  57     "cp-852",
  58     "cp852",
  59     "iso-8859",
  60     "iso8859",
  61     "koi8",
  62     NULL
  63 };
  64
  65 /* terminal encoding */
  66 static char *codeset = NULL;
  67 static char *term_encoding = NULL;
  68 /* function for encoding specific operations */
  69 static struct str_class used_class;
  70
  71 GIConv str_cnv_to_term;
  72 GIConv str_cnv_from_term;
  73 GIConv str_cnv_not_convert = INVALID_CONV;
  74
  75 /* if enc is same encoding like on terminal */
  76 static int
  77 str_test_not_convert (const char *enc)
  78 {
  79     return g_ascii_strcasecmp (enc, codeset) == 0;
  80 }
  81
  82 GIConv
  83 str_crt_conv_to (const char *to_enc)
  84 {
  85     return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
  86 }
  87
  88 GIConv
  89 str_crt_conv_from (const char *from_enc)
  90 {
  91     return (!str_test_not_convert (from_enc))
  92         ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
  93 }
  94
  95 void
  96 str_close_conv (GIConv conv)
  97 {
  98     if (conv != str_cnv_not_convert)
  99         g_iconv_close (conv);
 100 }
 101
 102 static estr_t
 103 _str_convert (GIConv coder, const char *string, int size, GString * buffer)
 104 {
 105     estr_t state = ESTR_SUCCESS;
 106     gchar *tmp_buff = NULL;
 107     gssize left;
 108     gsize bytes_read = 0;
 109     gsize bytes_written = 0;
 110     GError *error = NULL;
 111     errno = 0;
 112
 113     if (coder == INVALID_CONV)
 114         return ESTR_FAILURE;
 115
 116     if (string == NULL || buffer == NULL)
 117         return ESTR_FAILURE;
 118
 119     /*
 120        if (! used_class.is_valid_string (string))
 121        {
 122        return ESTR_FAILURE;
 123        }
 124      */
 125     if (size < 0)
 126     {
 127         size = strlen (string);
 128     }
 129     else
 130     {
 131         left = strlen (string);
 132         if (left < size)
 133             size = left;
 134     }
 135
 136     left = size;
 137     g_iconv (coder, NULL, NULL, NULL, NULL);
 138
 139     while (left)
 140     {
 141         tmp_buff = g_convert_with_iconv ((const gchar *) string,
 142                                          left, coder, &bytes_read, &bytes_written, &error);
 143         if (error)
 144         {
 145             int code = error->code;
 146
 147             g_error_free (error);
 148             error = NULL;
 149
 150             switch (code)
 151             {
 152             case G_CONVERT_ERROR_NO_CONVERSION:
 153                 /* Conversion between the requested character sets is not supported. */
 154                 tmp_buff = g_strnfill (strlen (string), '?');
 155                 g_string_append (buffer, tmp_buff);
 156                 g_free (tmp_buff);
 157                 return ESTR_FAILURE;
 158
 159             case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
 160                 /* Invalid byte sequence in conversion input. */
 161                 if ((tmp_buff == NULL) && (bytes_read != 0))
 162                     /* recode valid byte sequence */
 163                     tmp_buff = g_convert_with_iconv ((const gchar *) string,
 164                                                      bytes_read, coder, NULL, NULL, NULL);
 165
 166                 if (tmp_buff != NULL)
 167                 {
 168                     g_string_append (buffer, tmp_buff);
 169                     g_free (tmp_buff);
 170                 }
 171
 172                 if ((int) bytes_read < left)
 173                 {
 174                     string += bytes_read + 1;
 175                     size -= (bytes_read + 1);
 176                     left -= (bytes_read + 1);
 177                     g_string_append_c (buffer, *(string - 1));
 178                 }
 179                 else
 180                 {
 181                     return ESTR_PROBLEM;
 182                 }
 183                 state = ESTR_PROBLEM;
 184                 break;
 185
 186             case G_CONVERT_ERROR_PARTIAL_INPUT:
 187                 /* Partial character sequence at end of input. */
 188                 g_string_append (buffer, tmp_buff);
 189                 g_free (tmp_buff);
 190                 if ((int) bytes_read < left)
 191                 {
 192                     left = left - bytes_read;
 193                     tmp_buff = g_strnfill (left, '?');
 194                     g_string_append (buffer, tmp_buff);
 195                     g_free (tmp_buff);
 196                 }
 197                 return ESTR_PROBLEM;
 198
 199             case G_CONVERT_ERROR_BAD_URI:      /* Don't know how handle this error :( */
 200             case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH:    /* Don't know how handle this error :( */
 201             case G_CONVERT_ERROR_FAILED:       /* Conversion failed for some reason. */
 202             default:
 203                 g_free (tmp_buff);
 204                 return ESTR_FAILURE;
 205             }
 206         }
 207         else
 208         {
 209             if (tmp_buff != NULL)
 210             {
 211                 if (*tmp_buff)
 212                 {
 213                     g_string_append (buffer, tmp_buff);
 214                     g_free (tmp_buff);
 215                     string += bytes_read;
 216                     left -= bytes_read;
 217                 }
 218                 else
 219                 {
 220                     g_free (tmp_buff);
 221                     g_string_append (buffer, string);
 222                     return state;
 223                 }
 224             }
 225             else
 226             {
 227                 g_string_append (buffer, string);
 228                 return ESTR_PROBLEM;
 229             }
 230         }
 231     }
 232     return state;
 233 }
 234
 235 estr_t
 236 str_convert (GIConv coder, const char *string, GString * buffer)
 237 {
 238     return _str_convert (coder, string, -1, buffer);
 239 }
 240
 241 estr_t
 242 str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
 243 {
 244     return _str_convert (coder, string, size, buffer);
 245 }
 246
 247 gchar *
 248 str_conv_gerror_message (GError * error, const char *def_msg)
 249 {
 250     return used_class.conv_gerror_message (error, def_msg);
 251 }
 252
 253 estr_t
 254 str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
 255 {
 256     estr_t result;
 257
 258     if (coder == str_cnv_not_convert)
 259     {
 260         g_string_append (buffer, string != NULL ? string : "");
 261         result = ESTR_SUCCESS;
 262     }
 263     else
 264         result = _str_convert (coder, string, -1, buffer);
 265
 266     return result;
 267 }
 268
 269 estr_t
 270 str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
 271 {
 272     return used_class.vfs_convert_to (coder, string, size, buffer);
 273 }
 274
 275 void
 276 str_printf (GString * buffer, const char *format, ...)
 277 {
 278     va_list ap;
 279     va_start (ap, format);
 280 #if GLIB_CHECK_VERSION (2, 14, 0)
 281     g_string_append_vprintf (buffer, format, ap);
 282 #else
 283     {
 284         gchar *tmp;
 285         tmp = g_strdup_vprintf (format, ap);
 286         g_string_append (buffer, tmp);
 287         g_free (tmp);
 288     }
 289 #endif
 290     va_end (ap);
 291 }
 292
 293 void
 294 str_insert_replace_char (GString * buffer)
 295 {
 296     used_class.insert_replace_char (buffer);
 297 }
 298
 299 estr_t
 300 str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
 301 {
 302     size_t left;
 303     size_t cnv;
 304
 305     g_iconv (conv, NULL, NULL, NULL, NULL);
 306
 307     left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
 308
 309     cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
 310     if (cnv == (size_t) (-1))
 311     {
 312         return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
 313     }
 314     else
 315     {
 316         output[0] = '\0';
 317         return ESTR_SUCCESS;
 318     }
 319 }
 320
 321
 322 const char *
 323 str_detect_termencoding (void)
 324 {
 325     if (term_encoding == NULL)
 326     {
 327         /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
 328            to utf-8 or UTF-8.
 329            On Mac OS X, it returns the same case as the LANG input.
 330            So let tranform result of nl_langinfo (CODESET) to upper case  unconditionally. */
 331         term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
 332     }
 333
 334     return term_encoding;
 335 }
 336
 337 static int
 338 str_test_encoding_class (const char *encoding, const char **table)
 339 {
 340     int t;
 341     int result = 0;
 342     if (encoding == NULL)
 343         return result;
 344
 345     for (t = 0; table[t] != NULL; t++)
 346     {
 347         result += (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0);
 348     }
 349     return result;
 350 }
 351
 352 static void
 353 str_choose_str_functions (void)
 354 {
 355     if (str_test_encoding_class (codeset, str_utf8_encodings))
 356     {
 357         used_class = str_utf8_init ();
 358     }
 359     else if (str_test_encoding_class (codeset, str_8bit_encodings))
 360     {
 361         used_class = str_8bit_init ();
 362     }
 363     else
 364     {
 365         used_class = str_ascii_init ();
 366     }
 367 }
 368
 369 gboolean
 370 str_isutf8 (const char *codeset_name)
 371 {
 372     return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
 373 }
 374
 375 void
 376 str_init_strings (const char *termenc)
 377 {
 378     codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
 379
 380     str_cnv_not_convert = g_iconv_open (codeset, codeset);
 381     if (str_cnv_not_convert == INVALID_CONV)
 382     {
 383         if (termenc != NULL)
 384         {
 385             g_free (codeset);
 386             codeset = g_strdup (str_detect_termencoding ());
 387             str_cnv_not_convert = g_iconv_open (codeset, codeset);
 388         }
 389
 390         if (str_cnv_not_convert == INVALID_CONV)
 391         {
 392             g_free (codeset);
 393             codeset = g_strdup ("ASCII");
 394             str_cnv_not_convert = g_iconv_open (codeset, codeset);
 395         }
 396     }
 397
 398     str_cnv_to_term = str_cnv_not_convert;
 399     str_cnv_from_term = str_cnv_not_convert;
 400
 401     str_choose_str_functions ();
 402 }
 403
 404 void
 405 str_uninit_strings (void)
 406 {
 407     if (str_cnv_not_convert != INVALID_CONV)
 408         g_iconv_close (str_cnv_not_convert);
 409     g_free (term_encoding);
 410     g_free (codeset);
 411 }
 412
 413 const char *
 414 str_term_form (const char *text)
 415 {
 416     return used_class.term_form (text);
 417 }
 418
 419 const char *
 420 str_fit_to_term (const char *text, int width, align_crt_t just_mode)
 421 {
 422     return used_class.fit_to_term (text, width, just_mode);
 423 }
 424
 425 const char *
 426 str_term_trim (const char *text, int width)
 427 {
 428     return used_class.term_trim (text, width);
 429 }
 430
 431 const char *
 432 str_term_substring (const char *text, int start, int width)
 433 {
 434     return used_class.term_substring (text, start, width);
 435 }
 436
 437 char *
 438 str_get_next_char (char *text)
 439 {
 440
 441     used_class.cnext_char ((const char **) &text);
 442     return text;
 443 }
 444
 445 const char *
 446 str_cget_next_char (const char *text)
 447 {
 448     used_class.cnext_char (&text);
 449     return text;
 450 }
 451
 452 void
 453 str_next_char (char **text)
 454 {
 455     used_class.cnext_char ((const char **) text);
 456 }
 457
 458 void
 459 str_cnext_char (const char **text)
 460 {
 461     used_class.cnext_char (text);
 462 }
 463
 464 char *
 465 str_get_prev_char (char *text)
 466 {
 467     used_class.cprev_char ((const char **) &text);
 468     return text;
 469 }
 470
 471 const char *
 472 str_cget_prev_char (const char *text)
 473 {
 474     used_class.cprev_char (&text);
 475     return text;
 476 }
 477
 478 void
 479 str_prev_char (char **text)
 480 {
 481     used_class.cprev_char ((const char **) text);
 482 }
 483
 484 void
 485 str_cprev_char (const char **text)
 486 {
 487     used_class.cprev_char (text);
 488 }
 489
 490 char *
 491 str_get_next_char_safe (char *text)
 492 {
 493     used_class.cnext_char_safe ((const char **) &text);
 494     return text;
 495 }
 496
 497 const char *
 498 str_cget_next_char_safe (const char *text)
 499 {
 500     used_class.cnext_char_safe (&text);
 501     return text;
 502 }
 503
 504 void
 505 str_next_char_safe (char **text)
 506 {
 507     used_class.cnext_char_safe ((const char **) text);
 508 }
 509
 510 void
 511 str_cnext_char_safe (const char **text)
 512 {
 513     used_class.cnext_char_safe (text);
 514 }
 515
 516 char *
 517 str_get_prev_char_safe (char *text)
 518 {
 519     used_class.cprev_char_safe ((const char **) &text);
 520     return text;
 521 }
 522
 523 const char *
 524 str_cget_prev_char_safe (const char *text)
 525 {
 526     used_class.cprev_char_safe (&text);
 527     return text;
 528 }
 529
 530 void
 531 str_prev_char_safe (char **text)
 532 {
 533     used_class.cprev_char_safe ((const char **) text);
 534 }
 535
 536 void
 537 str_cprev_char_safe (const char **text)
 538 {
 539     used_class.cprev_char_safe (text);
 540 }
 541
 542 int
 543 str_next_noncomb_char (char **text)
 544 {
 545     return used_class.cnext_noncomb_char ((const char **) text);
 546 }
 547
 548 int
 549 str_cnext_noncomb_char (const char **text)
 550 {
 551     return used_class.cnext_noncomb_char (text);
 552 }
 553
 554 int
 555 str_prev_noncomb_char (char **text, const char *begin)
 556 {
 557     return used_class.cprev_noncomb_char ((const char **) text, begin);
 558 }
 559
 560 int
 561 str_cprev_noncomb_char (const char **text, const char *begin)
 562 {
 563     return used_class.cprev_noncomb_char (text, begin);
 564 }
 565
 566 int
 567 str_is_valid_char (const char *ch, size_t size)
 568 {
 569     return used_class.is_valid_char (ch, size);
 570 }
 571
 572 int
 573 str_term_width1 (const char *text)
 574 {
 575     return used_class.term_width1 (text);
 576 }
 577
 578 int
 579 str_term_width2 (const char *text, size_t length)
 580 {
 581     return used_class.term_width2 (text, length);
 582 }
 583
 584 int
 585 str_term_char_width (const char *text)
 586 {
 587     return used_class.term_char_width (text);
 588 }
 589
 590 int
 591 str_offset_to_pos (const char *text, size_t length)
 592 {
 593     return used_class.offset_to_pos (text, length);
 594 }
 595
 596 int
 597 str_length (const char *text)
 598 {
 599     return used_class.length (text);
 600 }
 601
 602 int
 603 str_length_char (const char *text)
 604 {
 605     return str_cget_next_char_safe (text) - text;
 606 }
 607
 608 int
 609 str_length2 (const char *text, int size)
 610 {
 611     return used_class.length2 (text, size);
 612 }
 613
 614 int
 615 str_length_noncomb (const char *text)
 616 {
 617     return used_class.length_noncomb (text);
 618 }
 619
 620 int
 621 str_column_to_pos (const char *text, size_t pos)
 622 {
 623     return used_class.column_to_pos (text, pos);
 624 }
 625
 626 int
 627 str_isspace (const char *ch)
 628 {
 629     return used_class.char_isspace (ch);
 630 }
 631
 632 int
 633 str_ispunct (const char *ch)
 634 {
 635     return used_class.char_ispunct (ch);
 636 }
 637
 638 int
 639 str_isalnum (const char *ch)
 640 {
 641     return used_class.char_isalnum (ch);
 642 }
 643
 644 int
 645 str_isdigit (const char *ch)
 646 {
 647     return used_class.char_isdigit (ch);
 648 }
 649
 650 int
 651 str_toupper (const char *ch, char **out, size_t * remain)
 652 {
 653     return used_class.char_toupper (ch, out, remain);
 654 }
 655
 656 int
 657 str_tolower (const char *ch, char **out, size_t * remain)
 658 {
 659     return used_class.char_tolower (ch, out, remain);
 660 }
 661
 662 int
 663 str_isprint (const char *ch)
 664 {
 665     return used_class.char_isprint (ch);
 666 }
 667
 668 gboolean
 669 str_iscombiningmark (const char *ch)
 670 {
 671     return used_class.char_iscombiningmark (ch);
 672 }
 673
 674 const char *
 675 str_trunc (const char *text, int width)
 676 {
 677     return used_class.trunc (text, width);
 678 }
 679
 680 char *
 681 str_create_search_needle (const char *needle, int case_sen)
 682 {
 683     return used_class.create_search_needle (needle, case_sen);
 684 }
 685
 686
 687 void
 688 str_release_search_needle (char *needle, int case_sen)
 689 {
 690     used_class.release_search_needle (needle, case_sen);
 691 }
 692
 693 const char *
 694 str_search_first (const char *text, const char *search, int case_sen)
 695 {
 696     return used_class.search_first (text, search, case_sen);
 697 }
 698
 699 const char *
 700 str_search_last (const char *text, const char *search, int case_sen)
 701 {
 702     return used_class.search_last (text, search, case_sen);
 703 }
 704
 705 int
 706 str_is_valid_string (const char *text)
 707 {
 708     return used_class.is_valid_string (text);
 709 }
 710
 711 int
 712 str_compare (const char *t1, const char *t2)
 713 {
 714     return used_class.compare (t1, t2);
 715 }
 716
 717 int
 718 str_ncompare (const char *t1, const char *t2)
 719 {
 720     return used_class.ncompare (t1, t2);
 721 }
 722
 723 int
 724 str_casecmp (const char *t1, const char *t2)
 725 {
 726     return used_class.casecmp (t1, t2);
 727 }
 728
 729 int
 730 str_ncasecmp (const char *t1, const char *t2)
 731 {
 732     return used_class.ncasecmp (t1, t2);
 733 }
 734
 735 int
 736 str_prefix (const char *text, const char *prefix)
 737 {
 738     return used_class.prefix (text, prefix);
 739 }
 740
 741 int
 742 str_caseprefix (const char *text, const char *prefix)
 743 {
 744     return used_class.caseprefix (text, prefix);
 745 }
 746
 747 void
 748 str_fix_string (char *text)
 749 {
 750     used_class.fix_string (text);
 751 }
 752
 753 char *
 754 str_create_key (const char *text, int case_sen)
 755 {
 756     return used_class.create_key (text, case_sen);
 757 }
 758
 759 char *
 760 str_create_key_for_filename (const char *text, int case_sen)
 761 {
 762     return used_class.create_key_for_filename (text, case_sen);
 763 }
 764
 765 int
 766 str_key_collate (const char *t1, const char *t2, int case_sen)
 767 {
 768     return used_class.key_collate (t1, t2, case_sen);
 769 }
 770
 771 void
 772 str_release_key (char *key, int case_sen)
 773 {
 774     used_class.release_key (key, case_sen);
 775 }
 776
 777 void
 778 str_msg_term_size (const char *text, int *lines, int *columns)
 779 {
 780     char *p, *tmp;
 781     char *q;
 782     char c = '\0';
 783     int width;
 784
 785     *lines = 1;
 786     *columns = 0;
 787
 788     tmp = g_strdup (text);
 789     p = tmp;
 790
 791     while (TRUE)
 792     {
 793         q = strchr (p, '\n');
 794         if (q != NULL)
 795         {
 796             c = q[0];
 797             q[0] = '\0';
 798         }
 799
 800         width = str_term_width1 (p);
 801         if (width > *columns)
 802             *columns = width;
 803
 804         if (q == NULL)
 805             break;
 806
 807         q[0] = c;
 808         p = q + 1;
 809         (*lines)++;
 810     }
 811
 812     g_free (tmp);
 813 }
 814
 815 /* --------------------------------------------------------------------------------------------- */
 816
 817 char *
 818 strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
 819 {
 820     char *semi;
 821     ssize_t len;
 822
 823     len = strlen (haystack);
 824
 825     do
 826     {
 827         semi = g_strrstr_len (haystack, len, needle);
 828         if (semi == NULL)
 829             return NULL;
 830         len = semi - haystack - 1;
 831     }
 832     while (skip_count-- != 0);
 833     return semi;
 834 }
 835
 836 /* --------------------------------------------------------------------------------------------- */