Merge branch '3578_mcext_fixes'
[midnight-commander.git] / lib / strutil / strutil.c
blobb3c346eb76292926fa953dfe19f784a249e3d0b0
1 /*
2 Common strings utilities
4 Copyright (C) 2007-2016
5 Free Software Foundation, Inc.
7 Written by:
8 Rostislav Benes, 2007
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 #include <config.h>
28 #include <stdlib.h>
29 #include <langinfo.h>
30 #include <string.h>
31 #include <errno.h>
33 #include "lib/global.h"
34 #include "lib/strutil.h"
36 /*** global variables ****************************************************************************/
38 GIConv str_cnv_to_term;
39 GIConv str_cnv_from_term;
40 GIConv str_cnv_not_convert = INVALID_CONV;
42 /*** file scope macro definitions ****************************************************************/
44 /*** file scope type declarations ****************************************************************/
46 /*** file scope variables ************************************************************************/
48 /* names, that are used for utf-8 */
49 static const char *str_utf8_encodings[] = {
50 "utf-8",
51 "utf8",
52 NULL
55 /* standard 8bit encodings, no wide or multibytes characters */
56 static const char *str_8bit_encodings[] = {
57 "cp-1251",
58 "cp1251",
59 "cp-1250",
60 "cp1250",
61 "cp-866",
62 "cp866",
63 "ibm-866",
64 "ibm866",
65 "cp-850",
66 "cp850",
67 "cp-852",
68 "cp852",
69 "iso-8859",
70 "iso8859",
71 "koi8",
72 NULL
75 /* terminal encoding */
76 static char *codeset = NULL;
77 static char *term_encoding = NULL;
78 /* function for encoding specific operations */
79 static struct str_class used_class;
81 /* --------------------------------------------------------------------------------------------- */
82 /*** file scope functions ************************************************************************/
83 /* --------------------------------------------------------------------------------------------- */
85 /* if enc is same encoding like on terminal */
86 static int
87 str_test_not_convert (const char *enc)
89 return g_ascii_strcasecmp (enc, codeset) == 0;
92 /* --------------------------------------------------------------------------------------------- */
94 static estr_t
95 _str_convert (GIConv coder, const char *string, int size, GString * buffer)
97 estr_t state = ESTR_SUCCESS;
98 gssize left;
99 gsize bytes_read = 0;
100 gsize bytes_written = 0;
102 errno = 0; /* FIXME: is it really needed? */
104 if (coder == INVALID_CONV)
105 return ESTR_FAILURE;
107 if (string == NULL || buffer == NULL)
108 return ESTR_FAILURE;
111 if (! used_class.is_valid_string (string))
113 return ESTR_FAILURE;
116 if (size < 0)
117 size = strlen (string);
118 else
120 left = strlen (string);
121 if (left < size)
122 size = left;
125 left = size;
126 g_iconv (coder, NULL, NULL, NULL, NULL);
128 while (left != 0)
130 gchar *tmp_buff;
131 GError *mcerror = NULL;
133 tmp_buff = g_convert_with_iconv ((const gchar *) string,
134 left, coder, &bytes_read, &bytes_written, &mcerror);
135 if (mcerror != NULL)
137 int code = mcerror->code;
139 g_error_free (mcerror);
140 mcerror = NULL;
142 switch (code)
144 case G_CONVERT_ERROR_NO_CONVERSION:
145 /* Conversion between the requested character sets is not supported. */
146 tmp_buff = g_strnfill (strlen (string), '?');
147 g_string_append (buffer, tmp_buff);
148 g_free (tmp_buff);
149 return ESTR_FAILURE;
151 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
152 /* Invalid byte sequence in conversion input. */
153 if ((tmp_buff == NULL) && (bytes_read != 0))
154 /* recode valid byte sequence */
155 tmp_buff = g_convert_with_iconv ((const gchar *) string,
156 bytes_read, coder, NULL, NULL, NULL);
158 if (tmp_buff != NULL)
160 g_string_append (buffer, tmp_buff);
161 g_free (tmp_buff);
164 if ((int) bytes_read >= left)
165 return ESTR_PROBLEM;
167 string += bytes_read + 1;
168 size -= (bytes_read + 1);
169 left -= (bytes_read + 1);
170 g_string_append_c (buffer, *(string - 1));
171 state = ESTR_PROBLEM;
172 break;
174 case G_CONVERT_ERROR_PARTIAL_INPUT:
175 /* Partial character sequence at end of input. */
176 g_string_append (buffer, tmp_buff);
177 g_free (tmp_buff);
178 if ((int) bytes_read < left)
180 left = left - bytes_read;
181 tmp_buff = g_strnfill (left, '?');
182 g_string_append (buffer, tmp_buff);
183 g_free (tmp_buff);
185 return ESTR_PROBLEM;
187 case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
188 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
189 case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
190 default:
191 g_free (tmp_buff);
192 return ESTR_FAILURE;
195 else if (tmp_buff == NULL)
197 g_string_append (buffer, string);
198 return ESTR_PROBLEM;
200 else if (*tmp_buff == '\0')
202 g_free (tmp_buff);
203 g_string_append (buffer, string);
204 return state;
206 else
208 g_string_append (buffer, tmp_buff);
209 g_free (tmp_buff);
210 string += bytes_read;
211 left -= bytes_read;
215 return state;
218 /* --------------------------------------------------------------------------------------------- */
220 static int
221 str_test_encoding_class (const char *encoding, const char **table)
223 int result = 0;
225 if (encoding != NULL)
227 int t;
229 for (t = 0; table[t] != NULL; t++)
230 if (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0)
231 result++;
234 return result;
237 /* --------------------------------------------------------------------------------------------- */
239 static void
240 str_choose_str_functions (void)
242 if (str_test_encoding_class (codeset, str_utf8_encodings))
243 used_class = str_utf8_init ();
244 else if (str_test_encoding_class (codeset, str_8bit_encodings))
245 used_class = str_8bit_init ();
246 else
247 used_class = str_ascii_init ();
250 /* --------------------------------------------------------------------------------------------- */
251 /*** public functions ****************************************************************************/
252 /* --------------------------------------------------------------------------------------------- */
254 GIConv
255 str_crt_conv_to (const char *to_enc)
257 return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
260 /* --------------------------------------------------------------------------------------------- */
262 GIConv
263 str_crt_conv_from (const char *from_enc)
265 return (!str_test_not_convert (from_enc))
266 ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
269 /* --------------------------------------------------------------------------------------------- */
271 void
272 str_close_conv (GIConv conv)
274 if (conv != str_cnv_not_convert)
275 g_iconv_close (conv);
278 /* --------------------------------------------------------------------------------------------- */
280 estr_t
281 str_convert (GIConv coder, const char *string, GString * buffer)
283 return _str_convert (coder, string, -1, buffer);
286 /* --------------------------------------------------------------------------------------------- */
288 estr_t
289 str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
291 return _str_convert (coder, string, size, buffer);
294 /* --------------------------------------------------------------------------------------------- */
296 gchar *
297 str_conv_gerror_message (GError * mcerror, const char *def_msg)
299 return used_class.conv_gerror_message (mcerror, def_msg);
302 /* --------------------------------------------------------------------------------------------- */
304 estr_t
305 str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
307 estr_t result = ESTR_SUCCESS;
309 if (coder == str_cnv_not_convert)
310 g_string_append (buffer, string != NULL ? string : "");
311 else
312 result = _str_convert (coder, string, -1, buffer);
314 return result;
317 /* --------------------------------------------------------------------------------------------- */
319 estr_t
320 str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
322 return used_class.vfs_convert_to (coder, string, size, buffer);
325 /* --------------------------------------------------------------------------------------------- */
327 void
328 str_printf (GString * buffer, const char *format, ...)
330 va_list ap;
331 va_start (ap, format);
333 g_string_append_vprintf (buffer, format, ap);
334 va_end (ap);
337 /* --------------------------------------------------------------------------------------------- */
339 void
340 str_insert_replace_char (GString * buffer)
342 used_class.insert_replace_char (buffer);
345 /* --------------------------------------------------------------------------------------------- */
347 estr_t
348 str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
350 size_t left;
351 size_t cnv;
353 g_iconv (conv, NULL, NULL, NULL, NULL);
355 left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
357 cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
358 if (cnv == (size_t) (-1))
359 return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
361 output[0] = '\0';
362 return ESTR_SUCCESS;
365 /* --------------------------------------------------------------------------------------------- */
367 const char *
368 str_detect_termencoding (void)
370 if (term_encoding == NULL)
372 /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
373 to utf-8 or UTF-8.
374 On Mac OS X, it returns the same case as the LANG input.
375 So let tranform result of nl_langinfo (CODESET) to upper case unconditionally. */
376 term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
379 return term_encoding;
382 /* --------------------------------------------------------------------------------------------- */
384 gboolean
385 str_isutf8 (const char *codeset_name)
387 return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
390 /* --------------------------------------------------------------------------------------------- */
392 void
393 str_init_strings (const char *termenc)
395 codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
397 str_cnv_not_convert = g_iconv_open (codeset, codeset);
398 if (str_cnv_not_convert == INVALID_CONV)
400 if (termenc != NULL)
402 g_free (codeset);
403 codeset = g_strdup (str_detect_termencoding ());
404 str_cnv_not_convert = g_iconv_open (codeset, codeset);
407 if (str_cnv_not_convert == INVALID_CONV)
409 g_free (codeset);
410 codeset = g_strdup (DEFAULT_CHARSET);
411 str_cnv_not_convert = g_iconv_open (codeset, codeset);
415 str_cnv_to_term = str_cnv_not_convert;
416 str_cnv_from_term = str_cnv_not_convert;
418 str_choose_str_functions ();
421 /* --------------------------------------------------------------------------------------------- */
423 void
424 str_uninit_strings (void)
426 if (str_cnv_not_convert != INVALID_CONV)
427 g_iconv_close (str_cnv_not_convert);
428 g_free (term_encoding);
429 g_free (codeset);
432 /* --------------------------------------------------------------------------------------------- */
434 const char *
435 str_term_form (const char *text)
437 return used_class.term_form (text);
440 /* --------------------------------------------------------------------------------------------- */
442 const char *
443 str_fit_to_term (const char *text, int width, align_crt_t just_mode)
445 return used_class.fit_to_term (text, width, just_mode);
448 /* --------------------------------------------------------------------------------------------- */
450 const char *
451 str_term_trim (const char *text, int width)
453 return used_class.term_trim (text, width);
456 /* --------------------------------------------------------------------------------------------- */
458 const char *
459 str_term_substring (const char *text, int start, int width)
461 return used_class.term_substring (text, start, width);
464 /* --------------------------------------------------------------------------------------------- */
466 char *
467 str_get_next_char (char *text)
470 used_class.cnext_char ((const char **) &text);
471 return text;
474 /* --------------------------------------------------------------------------------------------- */
476 const char *
477 str_cget_next_char (const char *text)
479 used_class.cnext_char (&text);
480 return text;
483 /* --------------------------------------------------------------------------------------------- */
485 void
486 str_next_char (char **text)
488 used_class.cnext_char ((const char **) text);
491 /* --------------------------------------------------------------------------------------------- */
493 void
494 str_cnext_char (const char **text)
496 used_class.cnext_char (text);
499 /* --------------------------------------------------------------------------------------------- */
501 char *
502 str_get_prev_char (char *text)
504 used_class.cprev_char ((const char **) &text);
505 return text;
508 /* --------------------------------------------------------------------------------------------- */
510 const char *
511 str_cget_prev_char (const char *text)
513 used_class.cprev_char (&text);
514 return text;
517 /* --------------------------------------------------------------------------------------------- */
519 void
520 str_prev_char (char **text)
522 used_class.cprev_char ((const char **) text);
525 /* --------------------------------------------------------------------------------------------- */
527 void
528 str_cprev_char (const char **text)
530 used_class.cprev_char (text);
533 /* --------------------------------------------------------------------------------------------- */
535 char *
536 str_get_next_char_safe (char *text)
538 used_class.cnext_char_safe ((const char **) &text);
539 return text;
542 /* --------------------------------------------------------------------------------------------- */
544 const char *
545 str_cget_next_char_safe (const char *text)
547 used_class.cnext_char_safe (&text);
548 return text;
551 /* --------------------------------------------------------------------------------------------- */
553 void
554 str_next_char_safe (char **text)
556 used_class.cnext_char_safe ((const char **) text);
559 /* --------------------------------------------------------------------------------------------- */
561 void
562 str_cnext_char_safe (const char **text)
564 used_class.cnext_char_safe (text);
567 /* --------------------------------------------------------------------------------------------- */
569 char *
570 str_get_prev_char_safe (char *text)
572 used_class.cprev_char_safe ((const char **) &text);
573 return text;
576 /* --------------------------------------------------------------------------------------------- */
578 const char *
579 str_cget_prev_char_safe (const char *text)
581 used_class.cprev_char_safe (&text);
582 return text;
585 /* --------------------------------------------------------------------------------------------- */
587 void
588 str_prev_char_safe (char **text)
590 used_class.cprev_char_safe ((const char **) text);
593 /* --------------------------------------------------------------------------------------------- */
595 void
596 str_cprev_char_safe (const char **text)
598 used_class.cprev_char_safe (text);
601 /* --------------------------------------------------------------------------------------------- */
604 str_next_noncomb_char (char **text)
606 return used_class.cnext_noncomb_char ((const char **) text);
609 /* --------------------------------------------------------------------------------------------- */
612 str_cnext_noncomb_char (const char **text)
614 return used_class.cnext_noncomb_char (text);
617 /* --------------------------------------------------------------------------------------------- */
620 str_prev_noncomb_char (char **text, const char *begin)
622 return used_class.cprev_noncomb_char ((const char **) text, begin);
625 /* --------------------------------------------------------------------------------------------- */
628 str_cprev_noncomb_char (const char **text, const char *begin)
630 return used_class.cprev_noncomb_char (text, begin);
633 /* --------------------------------------------------------------------------------------------- */
636 str_is_valid_char (const char *ch, size_t size)
638 return used_class.is_valid_char (ch, size);
641 /* --------------------------------------------------------------------------------------------- */
644 str_term_width1 (const char *text)
646 return used_class.term_width1 (text);
649 /* --------------------------------------------------------------------------------------------- */
652 str_term_width2 (const char *text, size_t length)
654 return used_class.term_width2 (text, length);
657 /* --------------------------------------------------------------------------------------------- */
660 str_term_char_width (const char *text)
662 return used_class.term_char_width (text);
665 /* --------------------------------------------------------------------------------------------- */
668 str_offset_to_pos (const char *text, size_t length)
670 return used_class.offset_to_pos (text, length);
673 /* --------------------------------------------------------------------------------------------- */
676 str_length (const char *text)
678 return used_class.length (text);
681 /* --------------------------------------------------------------------------------------------- */
684 str_length_char (const char *text)
686 return str_cget_next_char_safe (text) - text;
689 /* --------------------------------------------------------------------------------------------- */
692 str_length2 (const char *text, int size)
694 return used_class.length2 (text, size);
697 /* --------------------------------------------------------------------------------------------- */
700 str_length_noncomb (const char *text)
702 return used_class.length_noncomb (text);
705 /* --------------------------------------------------------------------------------------------- */
708 str_column_to_pos (const char *text, size_t pos)
710 return used_class.column_to_pos (text, pos);
713 /* --------------------------------------------------------------------------------------------- */
716 str_isspace (const char *ch)
718 return used_class.char_isspace (ch);
721 /* --------------------------------------------------------------------------------------------- */
724 str_ispunct (const char *ch)
726 return used_class.char_ispunct (ch);
729 /* --------------------------------------------------------------------------------------------- */
732 str_isalnum (const char *ch)
734 return used_class.char_isalnum (ch);
737 /* --------------------------------------------------------------------------------------------- */
740 str_isdigit (const char *ch)
742 return used_class.char_isdigit (ch);
745 /* --------------------------------------------------------------------------------------------- */
748 str_toupper (const char *ch, char **out, size_t * remain)
750 return used_class.char_toupper (ch, out, remain);
753 /* --------------------------------------------------------------------------------------------- */
756 str_tolower (const char *ch, char **out, size_t * remain)
758 return used_class.char_tolower (ch, out, remain);
761 /* --------------------------------------------------------------------------------------------- */
764 str_isprint (const char *ch)
766 return used_class.char_isprint (ch);
769 /* --------------------------------------------------------------------------------------------- */
771 gboolean
772 str_iscombiningmark (const char *ch)
774 return used_class.char_iscombiningmark (ch);
777 /* --------------------------------------------------------------------------------------------- */
779 const char *
780 str_trunc (const char *text, int width)
782 return used_class.trunc (text, width);
785 /* --------------------------------------------------------------------------------------------- */
787 char *
788 str_create_search_needle (const char *needle, int case_sen)
790 return used_class.create_search_needle (needle, case_sen);
793 /* --------------------------------------------------------------------------------------------- */
795 void
796 str_release_search_needle (char *needle, int case_sen)
798 used_class.release_search_needle (needle, case_sen);
801 /* --------------------------------------------------------------------------------------------- */
803 const char *
804 str_search_first (const char *text, const char *search, int case_sen)
806 return used_class.search_first (text, search, case_sen);
809 /* --------------------------------------------------------------------------------------------- */
811 const char *
812 str_search_last (const char *text, const char *search, int case_sen)
814 return used_class.search_last (text, search, case_sen);
817 /* --------------------------------------------------------------------------------------------- */
820 str_is_valid_string (const char *text)
822 return used_class.is_valid_string (text);
825 /* --------------------------------------------------------------------------------------------- */
828 str_compare (const char *t1, const char *t2)
830 return used_class.compare (t1, t2);
833 /* --------------------------------------------------------------------------------------------- */
836 str_ncompare (const char *t1, const char *t2)
838 return used_class.ncompare (t1, t2);
841 /* --------------------------------------------------------------------------------------------- */
844 str_casecmp (const char *t1, const char *t2)
846 return used_class.casecmp (t1, t2);
849 /* --------------------------------------------------------------------------------------------- */
852 str_ncasecmp (const char *t1, const char *t2)
854 return used_class.ncasecmp (t1, t2);
857 /* --------------------------------------------------------------------------------------------- */
860 str_prefix (const char *text, const char *prefix)
862 return used_class.prefix (text, prefix);
865 /* --------------------------------------------------------------------------------------------- */
868 str_caseprefix (const char *text, const char *prefix)
870 return used_class.caseprefix (text, prefix);
873 /* --------------------------------------------------------------------------------------------- */
875 void
876 str_fix_string (char *text)
878 used_class.fix_string (text);
881 /* --------------------------------------------------------------------------------------------- */
883 char *
884 str_create_key (const char *text, int case_sen)
886 return used_class.create_key (text, case_sen);
889 /* --------------------------------------------------------------------------------------------- */
891 char *
892 str_create_key_for_filename (const char *text, int case_sen)
894 return used_class.create_key_for_filename (text, case_sen);
897 /* --------------------------------------------------------------------------------------------- */
900 str_key_collate (const char *t1, const char *t2, int case_sen)
902 return used_class.key_collate (t1, t2, case_sen);
905 /* --------------------------------------------------------------------------------------------- */
907 void
908 str_release_key (char *key, int case_sen)
910 used_class.release_key (key, case_sen);
913 /* --------------------------------------------------------------------------------------------- */
915 void
916 str_msg_term_size (const char *text, int *lines, int *columns)
918 char *p, *tmp;
919 char *q;
920 char c = '\0';
922 *lines = 1;
923 *columns = 0;
925 tmp = g_strdup (text);
926 p = tmp;
928 while (TRUE)
930 int width;
932 q = strchr (p, '\n');
933 if (q != NULL)
935 c = q[0];
936 q[0] = '\0';
939 width = str_term_width1 (p);
940 if (width > *columns)
941 *columns = width;
943 if (q == NULL)
944 break;
946 q[0] = c;
947 p = q + 1;
948 (*lines)++;
951 g_free (tmp);
954 /* --------------------------------------------------------------------------------------------- */
956 char *
957 strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
959 char *semi;
960 ssize_t len;
962 len = strlen (haystack);
966 semi = g_strrstr_len (haystack, len, needle);
967 if (semi == NULL)
968 return NULL;
969 len = semi - haystack - 1;
971 while (skip_count-- != 0);
973 return semi;
976 /* --------------------------------------------------------------------------------------------- */
977 /* Interprete string as a non-negative decimal integer, optionally multiplied by various values.
979 * @param str input value
980 * @param invalid set to TRUE if "str" does not represent a number in this format
982 * @return non-integer representation of "str", 0 in case of error.
985 uintmax_t
986 parse_integer (const char *str, gboolean * invalid)
988 uintmax_t n;
989 char *suffix;
990 strtol_error_t e;
992 e = xstrtoumax (str, &suffix, 10, &n, "bcEGkKMPTwYZ0");
993 if (e == LONGINT_INVALID_SUFFIX_CHAR && *suffix == 'x')
995 uintmax_t multiplier;
997 multiplier = parse_integer (suffix + 1, invalid);
998 if (multiplier != 0 && n * multiplier / multiplier != n)
1000 *invalid = TRUE;
1001 return 0;
1004 n *= multiplier;
1006 else if (e != LONGINT_OK)
1008 *invalid = TRUE;
1009 n = 0;
1012 return n;
1015 /* --------------------------------------------------------------------------------------------- */