Updated Italian translation
[midnight-commander.git] / lib / strutil / strutil.c
blobbc7f6ae4af2724ac57fe655f44c59149d63abaf0
1 /*
2 Common strings utilities
4 Copyright (C) 2007-2024
5 Free Software Foundation, Inc.
7 Written by:
8 Rostislav Benes, 2007
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 #include <config.h>
28 #include <stdlib.h>
29 #include <langinfo.h>
30 #include <string.h>
31 #include <errno.h>
33 #include "lib/global.h"
34 #include "lib/util.h" /* MC_PTR_FREE */
35 #include "lib/strutil.h"
37 /*** global variables ****************************************************************************/
39 GIConv str_cnv_to_term;
40 GIConv str_cnv_from_term;
41 GIConv str_cnv_not_convert = INVALID_CONV;
43 /*** file scope macro definitions ****************************************************************/
45 /*** file scope type declarations ****************************************************************/
47 /*** forward declarations (file scope functions) *************************************************/
49 /*** file scope variables ************************************************************************/
51 /* names, that are used for utf-8 */
52 static const char *const str_utf8_encodings[] = {
53 "utf-8",
54 "utf8",
55 NULL
58 /* standard 8bit encodings, no wide or multibytes characters */
59 static const char *const str_8bit_encodings[] = {
60 /* Solaris has different names of Windows 1251 encoding */
61 #ifdef __sun
62 "ansi-1251",
63 "ansi1251",
64 #else
65 "cp-1251",
66 "cp1251",
67 #endif
68 "cp-1250",
69 "cp1250",
70 "cp-866",
71 "cp866",
72 "ibm-866",
73 "ibm866",
74 "cp-850",
75 "cp850",
76 "cp-852",
77 "cp852",
78 "iso-8859",
79 "iso8859",
80 "koi8",
81 NULL
84 /* terminal encoding */
85 static char *codeset = NULL;
86 static char *term_encoding = NULL;
87 /* function for encoding specific operations */
88 static struct str_class used_class;
90 /* --------------------------------------------------------------------------------------------- */
91 /*** file scope functions ************************************************************************/
92 /* --------------------------------------------------------------------------------------------- */
94 /* if enc is same encoding like on terminal */
95 static int
96 str_test_not_convert (const char *enc)
98 return g_ascii_strcasecmp (enc, codeset) == 0;
101 /* --------------------------------------------------------------------------------------------- */
103 static estr_t
104 _str_convert (GIConv coder, const char *string, int size, GString *buffer)
106 estr_t state = ESTR_SUCCESS;
107 gssize left;
108 gsize bytes_read = 0;
109 gsize bytes_written = 0;
111 errno = 0; /* FIXME: is it really needed? */
113 if (coder == INVALID_CONV)
114 return ESTR_FAILURE;
116 if (string == NULL || buffer == NULL)
117 return ESTR_FAILURE;
120 if (! used_class.is_valid_string (string))
122 return ESTR_FAILURE;
125 if (size < 0)
126 size = strlen (string);
127 else
129 left = strlen (string);
130 if (left < size)
131 size = left;
134 left = size;
135 g_iconv (coder, NULL, NULL, NULL, NULL);
137 while (left != 0)
139 gchar *tmp_buff;
140 GError *mcerror = NULL;
142 tmp_buff = g_convert_with_iconv ((const gchar *) string,
143 left, coder, &bytes_read, &bytes_written, &mcerror);
144 if (mcerror != NULL)
146 int code = mcerror->code;
148 g_error_free (mcerror);
149 mcerror = NULL;
151 switch (code)
153 case G_CONVERT_ERROR_NO_CONVERSION:
154 /* Conversion between the requested character sets is not supported. */
155 g_free (tmp_buff);
156 mc_g_string_append_c_len (buffer, '?', strlen (string));
157 return ESTR_FAILURE;
159 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
160 /* Invalid byte sequence in conversion input. */
161 if ((tmp_buff == NULL) && (bytes_read != 0))
162 /* recode valid byte sequence */
163 tmp_buff = g_convert_with_iconv ((const gchar *) string,
164 bytes_read, coder, NULL, NULL, NULL);
166 if (tmp_buff != NULL)
168 g_string_append (buffer, tmp_buff);
169 g_free (tmp_buff);
172 if ((int) bytes_read >= left)
173 return ESTR_PROBLEM;
175 string += bytes_read + 1;
176 size -= (bytes_read + 1);
177 left -= (bytes_read + 1);
178 g_string_append_c (buffer, *(string - 1));
179 state = ESTR_PROBLEM;
180 break;
182 case G_CONVERT_ERROR_PARTIAL_INPUT:
183 /* Partial character sequence at end of input. */
184 g_string_append (buffer, tmp_buff);
185 g_free (tmp_buff);
186 if ((int) bytes_read < left)
187 mc_g_string_append_c_len (buffer, '?', left - bytes_read);
188 return ESTR_PROBLEM;
190 case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
191 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
192 case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
193 default:
194 g_free (tmp_buff);
195 return ESTR_FAILURE;
198 else if (tmp_buff == NULL)
200 g_string_append (buffer, string);
201 return ESTR_PROBLEM;
203 else if (*tmp_buff == '\0')
205 g_free (tmp_buff);
206 g_string_append (buffer, string);
207 return state;
209 else
211 g_string_append (buffer, tmp_buff);
212 g_free (tmp_buff);
213 string += bytes_read;
214 left -= bytes_read;
218 return state;
221 /* --------------------------------------------------------------------------------------------- */
223 static int
224 str_test_encoding_class (const char *encoding, const char *const *table)
226 int result = 0;
228 if (encoding != NULL)
230 int t;
232 for (t = 0; table[t] != NULL; t++)
233 if (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0)
234 result++;
237 return result;
240 /* --------------------------------------------------------------------------------------------- */
242 static void
243 str_choose_str_functions (void)
245 if (str_test_encoding_class (codeset, str_utf8_encodings))
246 used_class = str_utf8_init ();
247 else if (str_test_encoding_class (codeset, str_8bit_encodings))
248 used_class = str_8bit_init ();
249 else
250 used_class = str_ascii_init ();
253 /* --------------------------------------------------------------------------------------------- */
254 /*** public functions ****************************************************************************/
255 /* --------------------------------------------------------------------------------------------- */
257 GIConv
258 str_crt_conv_to (const char *to_enc)
260 return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
263 /* --------------------------------------------------------------------------------------------- */
265 GIConv
266 str_crt_conv_from (const char *from_enc)
268 return (!str_test_not_convert (from_enc))
269 ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
272 /* --------------------------------------------------------------------------------------------- */
274 void
275 str_close_conv (GIConv conv)
277 if (conv != str_cnv_not_convert)
278 g_iconv_close (conv);
281 /* --------------------------------------------------------------------------------------------- */
283 estr_t
284 str_convert (GIConv coder, const char *string, GString *buffer)
286 return _str_convert (coder, string, -1, buffer);
289 /* --------------------------------------------------------------------------------------------- */
291 estr_t
292 str_nconvert (GIConv coder, const char *string, int size, GString *buffer)
294 return _str_convert (coder, string, size, buffer);
297 /* --------------------------------------------------------------------------------------------- */
299 gchar *
300 str_conv_gerror_message (GError *mcerror, const char *def_msg)
302 return used_class.conv_gerror_message (mcerror, def_msg);
305 /* --------------------------------------------------------------------------------------------- */
307 estr_t
308 str_vfs_convert_from (GIConv coder, const char *string, GString *buffer)
310 estr_t result = ESTR_SUCCESS;
312 if (coder == str_cnv_not_convert)
313 g_string_append (buffer, string != NULL ? string : "");
314 else
315 result = _str_convert (coder, string, -1, buffer);
317 return result;
320 /* --------------------------------------------------------------------------------------------- */
322 estr_t
323 str_vfs_convert_to (GIConv coder, const char *string, int size, GString *buffer)
325 return used_class.vfs_convert_to (coder, string, size, buffer);
328 /* --------------------------------------------------------------------------------------------- */
330 void
331 str_printf (GString *buffer, const char *format, ...)
333 va_list ap;
334 va_start (ap, format);
336 g_string_append_vprintf (buffer, format, ap);
337 va_end (ap);
340 /* --------------------------------------------------------------------------------------------- */
342 void
343 str_insert_replace_char (GString *buffer)
345 used_class.insert_replace_char (buffer);
348 /* --------------------------------------------------------------------------------------------- */
350 estr_t
351 str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
353 size_t left;
354 size_t cnv;
356 g_iconv (conv, NULL, NULL, NULL, NULL);
358 left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
360 cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
361 if (cnv == (size_t) (-1))
362 return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
364 output[0] = '\0';
365 return ESTR_SUCCESS;
368 /* --------------------------------------------------------------------------------------------- */
370 const char *
371 str_detect_termencoding (void)
373 if (term_encoding == NULL)
375 /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
376 to utf-8 or UTF-8.
377 On Mac OS X, it returns the same case as the LANG input.
378 So let transform result of nl_langinfo (CODESET) to upper case unconditionally. */
379 term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
382 return term_encoding;
385 /* --------------------------------------------------------------------------------------------- */
387 gboolean
388 str_isutf8 (const char *codeset_name)
390 return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
393 /* --------------------------------------------------------------------------------------------- */
395 void
396 str_init_strings (const char *termenc)
398 codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
400 str_cnv_not_convert = g_iconv_open (codeset, codeset);
401 if (str_cnv_not_convert == INVALID_CONV)
403 if (termenc != NULL)
405 g_free (codeset);
406 codeset = g_strdup (str_detect_termencoding ());
407 str_cnv_not_convert = g_iconv_open (codeset, codeset);
410 if (str_cnv_not_convert == INVALID_CONV)
412 g_free (codeset);
413 codeset = g_strdup (DEFAULT_CHARSET);
414 str_cnv_not_convert = g_iconv_open (codeset, codeset);
418 str_cnv_to_term = str_cnv_not_convert;
419 str_cnv_from_term = str_cnv_not_convert;
421 str_choose_str_functions ();
424 /* --------------------------------------------------------------------------------------------- */
426 void
427 str_uninit_strings (void)
429 if (str_cnv_not_convert != INVALID_CONV)
430 g_iconv_close (str_cnv_not_convert);
431 /* NULL-ize pointers to avoid double free in unit tests */
432 MC_PTR_FREE (term_encoding);
433 MC_PTR_FREE (codeset);
436 /* --------------------------------------------------------------------------------------------- */
438 const char *
439 str_term_form (const char *text)
441 return used_class.term_form (text);
444 /* --------------------------------------------------------------------------------------------- */
446 const char *
447 str_fit_to_term (const char *text, int width, align_crt_t just_mode)
449 return used_class.fit_to_term (text, width, just_mode);
452 /* --------------------------------------------------------------------------------------------- */
454 const char *
455 str_term_trim (const char *text, int width)
457 return used_class.term_trim (text, width);
460 /* --------------------------------------------------------------------------------------------- */
462 const char *
463 str_term_substring (const char *text, int start, int width)
465 return used_class.term_substring (text, start, width);
468 /* --------------------------------------------------------------------------------------------- */
470 char *
471 str_get_next_char (char *text)
474 used_class.cnext_char ((const char **) &text);
475 return text;
478 /* --------------------------------------------------------------------------------------------- */
480 const char *
481 str_cget_next_char (const char *text)
483 used_class.cnext_char (&text);
484 return text;
487 /* --------------------------------------------------------------------------------------------- */
489 void
490 str_next_char (char **text)
492 used_class.cnext_char ((const char **) text);
495 /* --------------------------------------------------------------------------------------------- */
497 void
498 str_cnext_char (const char **text)
500 used_class.cnext_char (text);
503 /* --------------------------------------------------------------------------------------------- */
505 char *
506 str_get_prev_char (char *text)
508 used_class.cprev_char ((const char **) &text);
509 return text;
512 /* --------------------------------------------------------------------------------------------- */
514 const char *
515 str_cget_prev_char (const char *text)
517 used_class.cprev_char (&text);
518 return text;
521 /* --------------------------------------------------------------------------------------------- */
523 void
524 str_prev_char (char **text)
526 used_class.cprev_char ((const char **) text);
529 /* --------------------------------------------------------------------------------------------- */
531 void
532 str_cprev_char (const char **text)
534 used_class.cprev_char (text);
537 /* --------------------------------------------------------------------------------------------- */
539 char *
540 str_get_next_char_safe (char *text)
542 used_class.cnext_char_safe ((const char **) &text);
543 return text;
546 /* --------------------------------------------------------------------------------------------- */
548 const char *
549 str_cget_next_char_safe (const char *text)
551 used_class.cnext_char_safe (&text);
552 return text;
555 /* --------------------------------------------------------------------------------------------- */
557 void
558 str_next_char_safe (char **text)
560 used_class.cnext_char_safe ((const char **) text);
563 /* --------------------------------------------------------------------------------------------- */
565 void
566 str_cnext_char_safe (const char **text)
568 used_class.cnext_char_safe (text);
571 /* --------------------------------------------------------------------------------------------- */
573 char *
574 str_get_prev_char_safe (char *text)
576 used_class.cprev_char_safe ((const char **) &text);
577 return text;
580 /* --------------------------------------------------------------------------------------------- */
582 const char *
583 str_cget_prev_char_safe (const char *text)
585 used_class.cprev_char_safe (&text);
586 return text;
589 /* --------------------------------------------------------------------------------------------- */
591 void
592 str_prev_char_safe (char **text)
594 used_class.cprev_char_safe ((const char **) text);
597 /* --------------------------------------------------------------------------------------------- */
599 void
600 str_cprev_char_safe (const char **text)
602 used_class.cprev_char_safe (text);
605 /* --------------------------------------------------------------------------------------------- */
608 str_next_noncomb_char (char **text)
610 return used_class.cnext_noncomb_char ((const char **) text);
613 /* --------------------------------------------------------------------------------------------- */
616 str_cnext_noncomb_char (const char **text)
618 return used_class.cnext_noncomb_char (text);
621 /* --------------------------------------------------------------------------------------------- */
624 str_prev_noncomb_char (char **text, const char *begin)
626 return used_class.cprev_noncomb_char ((const char **) text, begin);
629 /* --------------------------------------------------------------------------------------------- */
632 str_cprev_noncomb_char (const char **text, const char *begin)
634 return used_class.cprev_noncomb_char (text, begin);
637 /* --------------------------------------------------------------------------------------------- */
640 str_is_valid_char (const char *ch, size_t size)
642 return used_class.is_valid_char (ch, size);
645 /* --------------------------------------------------------------------------------------------- */
648 str_term_width1 (const char *text)
650 return used_class.term_width1 (text);
653 /* --------------------------------------------------------------------------------------------- */
656 str_term_width2 (const char *text, size_t length)
658 return used_class.term_width2 (text, length);
661 /* --------------------------------------------------------------------------------------------- */
664 str_term_char_width (const char *text)
666 return used_class.term_char_width (text);
669 /* --------------------------------------------------------------------------------------------- */
672 str_offset_to_pos (const char *text, size_t length)
674 return used_class.offset_to_pos (text, length);
677 /* --------------------------------------------------------------------------------------------- */
680 str_length (const char *text)
682 return used_class.length (text);
685 /* --------------------------------------------------------------------------------------------- */
688 str_length_char (const char *text)
690 return str_cget_next_char_safe (text) - text;
693 /* --------------------------------------------------------------------------------------------- */
696 str_length2 (const char *text, int size)
698 return used_class.length2 (text, size);
701 /* --------------------------------------------------------------------------------------------- */
704 str_length_noncomb (const char *text)
706 return used_class.length_noncomb (text);
709 /* --------------------------------------------------------------------------------------------- */
712 str_column_to_pos (const char *text, size_t pos)
714 return used_class.column_to_pos (text, pos);
717 /* --------------------------------------------------------------------------------------------- */
719 gboolean
720 str_isspace (const char *ch)
722 return used_class.char_isspace (ch);
725 /* --------------------------------------------------------------------------------------------- */
727 gboolean
728 str_ispunct (const char *ch)
730 return used_class.char_ispunct (ch);
733 /* --------------------------------------------------------------------------------------------- */
735 gboolean
736 str_isalnum (const char *ch)
738 return used_class.char_isalnum (ch);
741 /* --------------------------------------------------------------------------------------------- */
743 gboolean
744 str_isdigit (const char *ch)
746 return used_class.char_isdigit (ch);
749 /* --------------------------------------------------------------------------------------------- */
751 gboolean
752 str_toupper (const char *ch, char **out, size_t *remain)
754 return used_class.char_toupper (ch, out, remain);
757 /* --------------------------------------------------------------------------------------------- */
759 gboolean
760 str_tolower (const char *ch, char **out, size_t *remain)
762 return used_class.char_tolower (ch, out, remain);
765 /* --------------------------------------------------------------------------------------------- */
767 gboolean
768 str_isprint (const char *ch)
770 return used_class.char_isprint (ch);
773 /* --------------------------------------------------------------------------------------------- */
775 gboolean
776 str_iscombiningmark (const char *ch)
778 return used_class.char_iscombiningmark (ch);
781 /* --------------------------------------------------------------------------------------------- */
783 const char *
784 str_trunc (const char *text, int width)
786 return used_class.trunc (text, width);
789 /* --------------------------------------------------------------------------------------------- */
791 char *
792 str_create_search_needle (const char *needle, gboolean case_sen)
794 return used_class.create_search_needle (needle, case_sen);
797 /* --------------------------------------------------------------------------------------------- */
799 void
800 str_release_search_needle (char *needle, gboolean case_sen)
802 used_class.release_search_needle (needle, case_sen);
805 /* --------------------------------------------------------------------------------------------- */
807 const char *
808 str_search_first (const char *text, const char *search, gboolean case_sen)
810 return used_class.search_first (text, search, case_sen);
813 /* --------------------------------------------------------------------------------------------- */
815 const char *
816 str_search_last (const char *text, const char *search, gboolean case_sen)
818 return used_class.search_last (text, search, case_sen);
821 /* --------------------------------------------------------------------------------------------- */
823 gboolean
824 str_is_valid_string (const char *text)
826 return used_class.is_valid_string (text);
829 /* --------------------------------------------------------------------------------------------- */
832 str_compare (const char *t1, const char *t2)
834 return used_class.compare (t1, t2);
837 /* --------------------------------------------------------------------------------------------- */
840 str_ncompare (const char *t1, const char *t2)
842 return used_class.ncompare (t1, t2);
845 /* --------------------------------------------------------------------------------------------- */
848 str_casecmp (const char *t1, const char *t2)
850 return used_class.casecmp (t1, t2);
853 /* --------------------------------------------------------------------------------------------- */
856 str_ncasecmp (const char *t1, const char *t2)
858 return used_class.ncasecmp (t1, t2);
861 /* --------------------------------------------------------------------------------------------- */
864 str_prefix (const char *text, const char *prefix)
866 return used_class.prefix (text, prefix);
869 /* --------------------------------------------------------------------------------------------- */
872 str_caseprefix (const char *text, const char *prefix)
874 return used_class.caseprefix (text, prefix);
877 /* --------------------------------------------------------------------------------------------- */
879 void
880 str_fix_string (char *text)
882 used_class.fix_string (text);
885 /* --------------------------------------------------------------------------------------------- */
887 char *
888 str_create_key (const char *text, gboolean case_sen)
890 return used_class.create_key (text, case_sen);
893 /* --------------------------------------------------------------------------------------------- */
895 char *
896 str_create_key_for_filename (const char *text, gboolean case_sen)
898 return used_class.create_key_for_filename (text, case_sen);
901 /* --------------------------------------------------------------------------------------------- */
904 str_key_collate (const char *t1, const char *t2, gboolean case_sen)
906 return used_class.key_collate (t1, t2, case_sen);
909 /* --------------------------------------------------------------------------------------------- */
911 void
912 str_release_key (char *key, gboolean case_sen)
914 used_class.release_key (key, case_sen);
917 /* --------------------------------------------------------------------------------------------- */
919 void
920 str_msg_term_size (const char *text, int *lines, int *columns)
922 char *p, *tmp;
923 char *q;
924 char c = '\0';
926 *lines = 1;
927 *columns = 0;
929 tmp = g_strdup (text);
930 p = tmp;
932 while (TRUE)
934 int width;
936 q = strchr (p, '\n');
937 if (q != NULL)
939 c = q[0];
940 q[0] = '\0';
943 width = str_term_width1 (p);
944 if (width > *columns)
945 *columns = width;
947 if (q == NULL)
948 break;
950 q[0] = c;
951 p = q + 1;
952 (*lines)++;
955 g_free (tmp);
958 /* --------------------------------------------------------------------------------------------- */
960 char *
961 strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
963 char *semi;
964 ssize_t len;
966 len = strlen (haystack);
970 semi = g_strrstr_len (haystack, len, needle);
971 if (semi == NULL)
972 return NULL;
973 len = semi - haystack - 1;
975 while (skip_count-- != 0);
977 return semi;
980 /* --------------------------------------------------------------------------------------------- */
981 /* Interpret string as a non-negative decimal integer, optionally multiplied by various values.
983 * @param str input value
984 * @param invalid set to TRUE if "str" does not represent a number in this format
986 * @return non-negative integer representation of "str", 0 in case of error.
989 uintmax_t
990 parse_integer (const char *str, gboolean *invalid)
992 uintmax_t n;
993 char *suffix;
994 strtol_error_t e;
996 e = xstrtoumax (str, &suffix, 10, &n, "bcEGkKMPTwYZ0");
997 if (e == LONGINT_INVALID_SUFFIX_CHAR && *suffix == 'x')
999 uintmax_t multiplier;
1001 multiplier = parse_integer (suffix + 1, invalid);
1002 if (multiplier != 0 && n * multiplier / multiplier != n)
1004 *invalid = TRUE;
1005 return 0;
1008 n *= multiplier;
1010 else if (e != LONGINT_OK)
1012 *invalid = TRUE;
1013 n = 0;
1016 return n;
1019 /* --------------------------------------------------------------------------------------------- */