lib/strutil/strutil.c

   1 /*
   2    Common strings utilities
   3
   4    Copyright (C) 2007, 2011
   5    The Free Software Foundation, Inc.
   6
   7    Written by:
   8    Rostislav Benes, 2007
   9
  10    The file_date routine is mostly from GNU's fileutils package,
  11    written by Richard Stallman and David MacKenzie.
  12
  13    This file is part of the Midnight Commander.
  14
  15    The Midnight Commander is free software: you can redistribute it
  16    and/or modify it under the terms of the GNU General Public License as
  17    published by the Free Software Foundation, either version 3 of the License,
  18    or (at your option) any later version.
  19
  20    The Midnight Commander is distributed in the hope that it will be useful,
  21    but WITHOUT ANY WARRANTY; without even the implied warranty of
  22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  23    GNU General Public License for more details.
  24
  25    You should have received a copy of the GNU General Public License
  26    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  27
  28  */
  29
  30 #include <config.h>
  31 #include <stdlib.h>
  32 #include <stdio.h>
  33 #include <langinfo.h>
  34 #include <string.h>
  35 #include <errno.h>
  36 #include <stdarg.h>
  37
  38 #include "lib/global.h"
  39 #include "lib/strutil.h"
  40
  41 /*names, that are used for utf-8 */
  42 static const char *str_utf8_encodings[] = {
  43     "utf-8",
  44     "utf8",
  45     NULL
  46 };
  47
  48 /* standard 8bit encodings, no wide or multibytes characters */
  49 static const char *str_8bit_encodings[] = {
  50     "cp-1251",
  51     "cp1251",
  52     "cp-1250",
  53     "cp1250",
  54     "cp-866",
  55     "cp866",
  56     "ibm-866",
  57     "ibm866",
  58     "cp-850",
  59     "cp850",
  60     "cp-852",
  61     "cp852",
  62     "iso-8859",
  63     "iso8859",
  64     "koi8",
  65     NULL
  66 };
  67
  68 /* terminal encoding */
  69 static char *codeset = NULL;
  70 static char *term_encoding = NULL;
  71 /* function for encoding specific operations */
  72 static struct str_class used_class;
  73
  74 GIConv str_cnv_to_term;
  75 GIConv str_cnv_from_term;
  76 GIConv str_cnv_not_convert = INVALID_CONV;
  77
  78 /* if enc is same encoding like on terminal */
  79 static int
  80 str_test_not_convert (const char *enc)
  81 {
  82     return g_ascii_strcasecmp (enc, codeset) == 0;
  83 }
  84
  85 GIConv
  86 str_crt_conv_to (const char *to_enc)
  87 {
  88     return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
  89 }
  90
  91 GIConv
  92 str_crt_conv_from (const char *from_enc)
  93 {
  94     return (!str_test_not_convert (from_enc))
  95         ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
  96 }
  97
  98 void
  99 str_close_conv (GIConv conv)
 100 {
 101     if (conv != str_cnv_not_convert)
 102         g_iconv_close (conv);
 103 }
 104
 105 static estr_t
 106 _str_convert (GIConv coder, const char *string, int size, GString * buffer)
 107 {
 108     estr_t state = ESTR_SUCCESS;
 109     gchar *tmp_buff = NULL;
 110     gssize left;
 111     gsize bytes_read = 0;
 112     gsize bytes_written = 0;
 113     GError *error = NULL;
 114     errno = 0;
 115
 116     if (coder == INVALID_CONV)
 117         return ESTR_FAILURE;
 118
 119     if (string == NULL || buffer == NULL)
 120         return ESTR_FAILURE;
 121
 122     /*
 123        if (! used_class.is_valid_string (string))
 124        {
 125        return ESTR_FAILURE;
 126        }
 127      */
 128     if (size < 0)
 129     {
 130         size = strlen (string);
 131     }
 132     else
 133     {
 134         left = strlen (string);
 135         if (left < size)
 136             size = left;
 137     }
 138
 139     left = size;
 140     g_iconv (coder, NULL, NULL, NULL, NULL);
 141
 142     while (left)
 143     {
 144         tmp_buff = g_convert_with_iconv ((const gchar *) string,
 145                                          left, coder, &bytes_read, &bytes_written, &error);
 146         if (error)
 147         {
 148             int code = error->code;
 149
 150             g_error_free (error);
 151             error = NULL;
 152
 153             switch (code)
 154             {
 155             case G_CONVERT_ERROR_NO_CONVERSION:
 156                 /* Conversion between the requested character sets is not supported. */
 157                 tmp_buff = g_strnfill (strlen (string), '?');
 158                 g_string_append (buffer, tmp_buff);
 159                 g_free (tmp_buff);
 160                 return ESTR_FAILURE;
 161
 162             case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
 163                 /* Invalid byte sequence in conversion input. */
 164                 if ((tmp_buff == NULL) && (bytes_read != 0))
 165                     /* recode valid byte sequence */
 166                     tmp_buff = g_convert_with_iconv ((const gchar *) string,
 167                                                      bytes_read, coder, NULL, NULL, NULL);
 168
 169                 if (tmp_buff != NULL)
 170                 {
 171                     g_string_append (buffer, tmp_buff);
 172                     g_free (tmp_buff);
 173                 }
 174
 175                 if ((int) bytes_read < left)
 176                 {
 177                     string += bytes_read + 1;
 178                     size -= (bytes_read + 1);
 179                     left -= (bytes_read + 1);
 180                     g_string_append_c (buffer, *(string - 1));
 181                 }
 182                 else
 183                 {
 184                     return ESTR_PROBLEM;
 185                 }
 186                 state = ESTR_PROBLEM;
 187                 break;
 188
 189             case G_CONVERT_ERROR_PARTIAL_INPUT:
 190                 /* Partial character sequence at end of input. */
 191                 g_string_append (buffer, tmp_buff);
 192                 g_free (tmp_buff);
 193                 if ((int) bytes_read < left)
 194                 {
 195                     left = left - bytes_read;
 196                     tmp_buff = g_strnfill (left, '?');
 197                     g_string_append (buffer, tmp_buff);
 198                     g_free (tmp_buff);
 199                 }
 200                 return ESTR_PROBLEM;
 201
 202             case G_CONVERT_ERROR_BAD_URI:      /* Don't know how handle this error :( */
 203             case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH:    /* Don't know how handle this error :( */
 204             case G_CONVERT_ERROR_FAILED:       /* Conversion failed for some reason. */
 205             default:
 206                 g_free (tmp_buff);
 207                 return ESTR_FAILURE;
 208             }
 209         }
 210         else
 211         {
 212             if (tmp_buff != NULL)
 213             {
 214                 if (*tmp_buff)
 215                 {
 216                     g_string_append (buffer, tmp_buff);
 217                     g_free (tmp_buff);
 218                     string += bytes_read;
 219                     left -= bytes_read;
 220                 }
 221                 else
 222                 {
 223                     g_free (tmp_buff);
 224                     g_string_append (buffer, string);
 225                     return state;
 226                 }
 227             }
 228             else
 229             {
 230                 g_string_append (buffer, string);
 231                 return ESTR_PROBLEM;
 232             }
 233         }
 234     }
 235     return state;
 236 }
 237
 238 estr_t
 239 str_convert (GIConv coder, const char *string, GString * buffer)
 240 {
 241     return _str_convert (coder, string, -1, buffer);
 242 }
 243
 244 estr_t
 245 str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
 246 {
 247     return _str_convert (coder, string, size, buffer);
 248 }
 249
 250 gchar *
 251 str_conv_gerror_message (GError * error, const char *def_msg)
 252 {
 253     return used_class.conv_gerror_message (error, def_msg);
 254 }
 255
 256 estr_t
 257 str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
 258 {
 259     estr_t result;
 260
 261     if (coder == str_cnv_not_convert)
 262     {
 263         g_string_append (buffer, string != NULL ? string : "");
 264         result = ESTR_SUCCESS;
 265     }
 266     else
 267         result = _str_convert (coder, string, -1, buffer);
 268
 269     return result;
 270 }
 271
 272 estr_t
 273 str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
 274 {
 275     return used_class.vfs_convert_to (coder, string, size, buffer);
 276 }
 277
 278 void
 279 str_printf (GString * buffer, const char *format, ...)
 280 {
 281     va_list ap;
 282     va_start (ap, format);
 283 #if GLIB_CHECK_VERSION (2, 14, 0)
 284     g_string_append_vprintf (buffer, format, ap);
 285 #else
 286     {
 287         gchar *tmp;
 288         tmp = g_strdup_vprintf (format, ap);
 289         g_string_append (buffer, tmp);
 290         g_free (tmp);
 291     }
 292 #endif
 293     va_end (ap);
 294 }
 295
 296 void
 297 str_insert_replace_char (GString * buffer)
 298 {
 299     used_class.insert_replace_char (buffer);
 300 }
 301
 302 estr_t
 303 str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
 304 {
 305     size_t left;
 306     size_t cnv;
 307
 308     g_iconv (conv, NULL, NULL, NULL, NULL);
 309
 310     left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
 311
 312     cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
 313     if (cnv == (size_t) (-1))
 314     {
 315         return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
 316     }
 317     else
 318     {
 319         output[0] = '\0';
 320         return ESTR_SUCCESS;
 321     }
 322 }
 323
 324
 325 const char *
 326 str_detect_termencoding (void)
 327 {
 328     if (term_encoding == NULL)
 329     {
 330         /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
 331            to utf-8 or UTF-8.
 332            On Mac OS X, it returns the same case as the LANG input.
 333            So let tranform result of nl_langinfo (CODESET) to upper case  unconditionally. */
 334         term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
 335     }
 336
 337     return term_encoding;
 338 }
 339
 340 static int
 341 str_test_encoding_class (const char *encoding, const char **table)
 342 {
 343     int t;
 344     int result = 0;
 345     if (encoding == NULL)
 346         return result;
 347
 348     for (t = 0; table[t] != NULL; t++)
 349     {
 350         result += (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0);
 351     }
 352     return result;
 353 }
 354
 355 static void
 356 str_choose_str_functions (void)
 357 {
 358     if (str_test_encoding_class (codeset, str_utf8_encodings))
 359     {
 360         used_class = str_utf8_init ();
 361     }
 362     else if (str_test_encoding_class (codeset, str_8bit_encodings))
 363     {
 364         used_class = str_8bit_init ();
 365     }
 366     else
 367     {
 368         used_class = str_ascii_init ();
 369     }
 370 }
 371
 372 gboolean
 373 str_isutf8 (const char *codeset_name)
 374 {
 375     return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
 376 }
 377
 378 void
 379 str_init_strings (const char *termenc)
 380 {
 381     codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
 382
 383     str_cnv_not_convert = g_iconv_open (codeset, codeset);
 384     if (str_cnv_not_convert == INVALID_CONV)
 385     {
 386         if (termenc != NULL)
 387         {
 388             g_free (codeset);
 389             codeset = g_strdup (str_detect_termencoding ());
 390             str_cnv_not_convert = g_iconv_open (codeset, codeset);
 391         }
 392
 393         if (str_cnv_not_convert == INVALID_CONV)
 394         {
 395             g_free (codeset);
 396             codeset = g_strdup ("ASCII");
 397             str_cnv_not_convert = g_iconv_open (codeset, codeset);
 398         }
 399     }
 400
 401     str_cnv_to_term = str_cnv_not_convert;
 402     str_cnv_from_term = str_cnv_not_convert;
 403
 404     str_choose_str_functions ();
 405 }
 406
 407 void
 408 str_uninit_strings (void)
 409 {
 410     if (str_cnv_not_convert != INVALID_CONV)
 411         g_iconv_close (str_cnv_not_convert);
 412     g_free (term_encoding);
 413     g_free (codeset);
 414 }
 415
 416 const char *
 417 str_term_form (const char *text)
 418 {
 419     return used_class.term_form (text);
 420 }
 421
 422 const char *
 423 str_fit_to_term (const char *text, int width, align_crt_t just_mode)
 424 {
 425     return used_class.fit_to_term (text, width, just_mode);
 426 }
 427
 428 const char *
 429 str_term_trim (const char *text, int width)
 430 {
 431     return used_class.term_trim (text, width);
 432 }
 433
 434 const char *
 435 str_term_substring (const char *text, int start, int width)
 436 {
 437     return used_class.term_substring (text, start, width);
 438 }
 439
 440 char *
 441 str_get_next_char (char *text)
 442 {
 443
 444     used_class.cnext_char ((const char **) &text);
 445     return text;
 446 }
 447
 448 const char *
 449 str_cget_next_char (const char *text)
 450 {
 451     used_class.cnext_char (&text);
 452     return text;
 453 }
 454
 455 void
 456 str_next_char (char **text)
 457 {
 458     used_class.cnext_char ((const char **) text);
 459 }
 460
 461 void
 462 str_cnext_char (const char **text)
 463 {
 464     used_class.cnext_char (text);
 465 }
 466
 467 char *
 468 str_get_prev_char (char *text)
 469 {
 470     used_class.cprev_char ((const char **) &text);
 471     return text;
 472 }
 473
 474 const char *
 475 str_cget_prev_char (const char *text)
 476 {
 477     used_class.cprev_char (&text);
 478     return text;
 479 }
 480
 481 void
 482 str_prev_char (char **text)
 483 {
 484     used_class.cprev_char ((const char **) text);
 485 }
 486
 487 void
 488 str_cprev_char (const char **text)
 489 {
 490     used_class.cprev_char (text);
 491 }
 492
 493 char *
 494 str_get_next_char_safe (char *text)
 495 {
 496     used_class.cnext_char_safe ((const char **) &text);
 497     return text;
 498 }
 499
 500 const char *
 501 str_cget_next_char_safe (const char *text)
 502 {
 503     used_class.cnext_char_safe (&text);
 504     return text;
 505 }
 506
 507 void
 508 str_next_char_safe (char **text)
 509 {
 510     used_class.cnext_char_safe ((const char **) text);
 511 }
 512
 513 void
 514 str_cnext_char_safe (const char **text)
 515 {
 516     used_class.cnext_char_safe (text);
 517 }
 518
 519 char *
 520 str_get_prev_char_safe (char *text)
 521 {
 522     used_class.cprev_char_safe ((const char **) &text);
 523     return text;
 524 }
 525
 526 const char *
 527 str_cget_prev_char_safe (const char *text)
 528 {
 529     used_class.cprev_char_safe (&text);
 530     return text;
 531 }
 532
 533 void
 534 str_prev_char_safe (char **text)
 535 {
 536     used_class.cprev_char_safe ((const char **) text);
 537 }
 538
 539 void
 540 str_cprev_char_safe (const char **text)
 541 {
 542     used_class.cprev_char_safe (text);
 543 }
 544
 545 int
 546 str_next_noncomb_char (char **text)
 547 {
 548     return used_class.cnext_noncomb_char ((const char **) text);
 549 }
 550
 551 int
 552 str_cnext_noncomb_char (const char **text)
 553 {
 554     return used_class.cnext_noncomb_char (text);
 555 }
 556
 557 int
 558 str_prev_noncomb_char (char **text, const char *begin)
 559 {
 560     return used_class.cprev_noncomb_char ((const char **) text, begin);
 561 }
 562
 563 int
 564 str_cprev_noncomb_char (const char **text, const char *begin)
 565 {
 566     return used_class.cprev_noncomb_char (text, begin);
 567 }
 568
 569 int
 570 str_is_valid_char (const char *ch, size_t size)
 571 {
 572     return used_class.is_valid_char (ch, size);
 573 }
 574
 575 int
 576 str_term_width1 (const char *text)
 577 {
 578     return used_class.term_width1 (text);
 579 }
 580
 581 int
 582 str_term_width2 (const char *text, size_t length)
 583 {
 584     return used_class.term_width2 (text, length);
 585 }
 586
 587 int
 588 str_term_char_width (const char *text)
 589 {
 590     return used_class.term_char_width (text);
 591 }
 592
 593 int
 594 str_offset_to_pos (const char *text, size_t length)
 595 {
 596     return used_class.offset_to_pos (text, length);
 597 }
 598
 599 int
 600 str_length (const char *text)
 601 {
 602     return used_class.length (text);
 603 }
 604
 605 int
 606 str_length_char (const char *text)
 607 {
 608     return str_cget_next_char_safe (text) - text;
 609 }
 610
 611 int
 612 str_length2 (const char *text, int size)
 613 {
 614     return used_class.length2 (text, size);
 615 }
 616
 617 int
 618 str_length_noncomb (const char *text)
 619 {
 620     return used_class.length_noncomb (text);
 621 }
 622
 623 int
 624 str_column_to_pos (const char *text, size_t pos)
 625 {
 626     return used_class.column_to_pos (text, pos);
 627 }
 628
 629 int
 630 str_isspace (const char *ch)
 631 {
 632     return used_class.isspace (ch);
 633 }
 634
 635 int
 636 str_ispunct (const char *ch)
 637 {
 638     return used_class.ispunct (ch);
 639 }
 640
 641 int
 642 str_isalnum (const char *ch)
 643 {
 644     return used_class.isalnum (ch);
 645 }
 646
 647 int
 648 str_isdigit (const char *ch)
 649 {
 650     return used_class.isdigit (ch);
 651 }
 652
 653 int
 654 str_toupper (const char *ch, char **out, size_t * remain)
 655 {
 656     return used_class.toupper (ch, out, remain);
 657 }
 658
 659 int
 660 str_tolower (const char *ch, char **out, size_t * remain)
 661 {
 662     return used_class.tolower (ch, out, remain);
 663 }
 664
 665 int
 666 str_isprint (const char *ch)
 667 {
 668     return used_class.isprint (ch);
 669 }
 670
 671 int
 672 str_iscombiningmark (const char *ch)
 673 {
 674     return used_class.iscombiningmark (ch);
 675 }
 676
 677 const char *
 678 str_trunc (const char *text, int width)
 679 {
 680     return used_class.trunc (text, width);
 681 }
 682
 683 char *
 684 str_create_search_needle (const char *needle, int case_sen)
 685 {
 686     return used_class.create_search_needle (needle, case_sen);
 687 }
 688
 689
 690 void
 691 str_release_search_needle (char *needle, int case_sen)
 692 {
 693     used_class.release_search_needle (needle, case_sen);
 694 }
 695
 696 const char *
 697 str_search_first (const char *text, const char *search, int case_sen)
 698 {
 699     return used_class.search_first (text, search, case_sen);
 700 }
 701
 702 const char *
 703 str_search_last (const char *text, const char *search, int case_sen)
 704 {
 705     return used_class.search_last (text, search, case_sen);
 706 }
 707
 708 int
 709 str_is_valid_string (const char *text)
 710 {
 711     return used_class.is_valid_string (text);
 712 }
 713
 714 int
 715 str_compare (const char *t1, const char *t2)
 716 {
 717     return used_class.compare (t1, t2);
 718 }
 719
 720 int
 721 str_ncompare (const char *t1, const char *t2)
 722 {
 723     return used_class.ncompare (t1, t2);
 724 }
 725
 726 int
 727 str_casecmp (const char *t1, const char *t2)
 728 {
 729     return used_class.casecmp (t1, t2);
 730 }
 731
 732 int
 733 str_ncasecmp (const char *t1, const char *t2)
 734 {
 735     return used_class.ncasecmp (t1, t2);
 736 }
 737
 738 int
 739 str_prefix (const char *text, const char *prefix)
 740 {
 741     return used_class.prefix (text, prefix);
 742 }
 743
 744 int
 745 str_caseprefix (const char *text, const char *prefix)
 746 {
 747     return used_class.caseprefix (text, prefix);
 748 }
 749
 750 void
 751 str_fix_string (char *text)
 752 {
 753     used_class.fix_string (text);
 754 }
 755
 756 char *
 757 str_create_key (const char *text, int case_sen)
 758 {
 759     return used_class.create_key (text, case_sen);
 760 }
 761
 762 char *
 763 str_create_key_for_filename (const char *text, int case_sen)
 764 {
 765     return used_class.create_key_for_filename (text, case_sen);
 766 }
 767
 768 int
 769 str_key_collate (const char *t1, const char *t2, int case_sen)
 770 {
 771     return used_class.key_collate (t1, t2, case_sen);
 772 }
 773
 774 void
 775 str_release_key (char *key, int case_sen)
 776 {
 777     used_class.release_key (key, case_sen);
 778 }
 779
 780 void
 781 str_msg_term_size (const char *text, int *lines, int *columns)
 782 {
 783     char *p, *tmp;
 784     char *q;
 785     char c = '\0';
 786     int width;
 787
 788     *lines = 1;
 789     *columns = 0;
 790
 791     tmp = g_strdup (text);
 792     p = tmp;
 793
 794     while (TRUE)
 795     {
 796         q = strchr (p, '\n');
 797         if (q != NULL)
 798         {
 799             c = q[0];
 800             q[0] = '\0';
 801         }
 802
 803         width = str_term_width1 (p);
 804         if (width > *columns)
 805             *columns = width;
 806
 807         if (q == NULL)
 808             break;
 809
 810         q[0] = c;
 811         p = q + 1;
 812         (*lines)++;
 813     }
 814
 815     g_free (tmp);
 816 }
 817
 818 /* --------------------------------------------------------------------------------------------- */
 819
 820 char *
 821 strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
 822 {
 823     char *semi;
 824     ssize_t len;
 825
 826     len = strlen (haystack);
 827
 828     do
 829     {
 830         semi = g_strrstr_len (haystack, len, needle);
 831         if (semi == NULL)
 832             return NULL;
 833         len = semi - haystack - 1;
 834     }
 835     while (skip_count-- != 0);
 836     return semi;
 837 }
 838
 839 /* --------------------------------------------------------------------------------------------- */