2 Common strings utilities
4 Copyright (C) 2007-2024
5 Free Software Foundation, Inc.
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
33 #include "lib/global.h"
34 #include "lib/util.h" /* MC_PTR_FREE */
35 #include "lib/strutil.h"
37 /*** global variables ****************************************************************************/
39 GIConv str_cnv_to_term
;
40 GIConv str_cnv_from_term
;
41 GIConv str_cnv_not_convert
= INVALID_CONV
;
43 /*** file scope macro definitions ****************************************************************/
45 /*** file scope type declarations ****************************************************************/
47 /*** forward declarations (file scope functions) *************************************************/
49 /*** file scope variables ************************************************************************/
51 /* names, that are used for utf-8 */
52 static const char *const str_utf8_encodings
[] = {
58 /* standard 8bit encodings, no wide or multibytes characters */
59 static const char *const str_8bit_encodings
[] = {
60 /* Solaris has different names of Windows 1251 encoding */
84 /* terminal encoding */
85 static char *codeset
= NULL
;
86 static char *term_encoding
= NULL
;
87 /* function for encoding specific operations */
88 static struct str_class used_class
;
90 /* --------------------------------------------------------------------------------------------- */
91 /*** file scope functions ************************************************************************/
92 /* --------------------------------------------------------------------------------------------- */
94 /* if enc is same encoding like on terminal */
96 str_test_not_convert (const char *enc
)
98 return g_ascii_strcasecmp (enc
, codeset
) == 0;
101 /* --------------------------------------------------------------------------------------------- */
104 _str_convert (GIConv coder
, const char *string
, int size
, GString
*buffer
)
106 estr_t state
= ESTR_SUCCESS
;
108 gsize bytes_read
= 0;
109 gsize bytes_written
= 0;
111 errno
= 0; /* FIXME: is it really needed? */
113 if (coder
== INVALID_CONV
)
116 if (string
== NULL
|| buffer
== NULL
)
120 if (! used_class.is_valid_string (string))
126 size
= strlen (string
);
129 left
= strlen (string
);
135 g_iconv (coder
, NULL
, NULL
, NULL
, NULL
);
140 GError
*mcerror
= NULL
;
142 tmp_buff
= g_convert_with_iconv ((const gchar
*) string
,
143 left
, coder
, &bytes_read
, &bytes_written
, &mcerror
);
146 int code
= mcerror
->code
;
148 g_error_free (mcerror
);
153 case G_CONVERT_ERROR_NO_CONVERSION
:
154 /* Conversion between the requested character sets is not supported. */
156 mc_g_string_append_c_len (buffer
, '?', strlen (string
));
159 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE
:
160 /* Invalid byte sequence in conversion input. */
161 if ((tmp_buff
== NULL
) && (bytes_read
!= 0))
162 /* recode valid byte sequence */
163 tmp_buff
= g_convert_with_iconv ((const gchar
*) string
,
164 bytes_read
, coder
, NULL
, NULL
, NULL
);
166 if (tmp_buff
!= NULL
)
168 g_string_append (buffer
, tmp_buff
);
172 if ((int) bytes_read
>= left
)
175 string
+= bytes_read
+ 1;
176 size
-= (bytes_read
+ 1);
177 left
-= (bytes_read
+ 1);
178 g_string_append_c (buffer
, *(string
- 1));
179 state
= ESTR_PROBLEM
;
182 case G_CONVERT_ERROR_PARTIAL_INPUT
:
183 /* Partial character sequence at end of input. */
184 g_string_append (buffer
, tmp_buff
);
186 if ((int) bytes_read
< left
)
187 mc_g_string_append_c_len (buffer
, '?', left
- bytes_read
);
190 case G_CONVERT_ERROR_BAD_URI
: /* Don't know how handle this error :( */
191 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH
: /* Don't know how handle this error :( */
192 case G_CONVERT_ERROR_FAILED
: /* Conversion failed for some reason. */
198 else if (tmp_buff
== NULL
)
200 g_string_append (buffer
, string
);
203 else if (*tmp_buff
== '\0')
206 g_string_append (buffer
, string
);
211 g_string_append (buffer
, tmp_buff
);
213 string
+= bytes_read
;
221 /* --------------------------------------------------------------------------------------------- */
224 str_test_encoding_class (const char *encoding
, const char *const *table
)
228 if (encoding
!= NULL
)
232 for (t
= 0; table
[t
] != NULL
; t
++)
233 if (g_ascii_strncasecmp (encoding
, table
[t
], strlen (table
[t
])) == 0)
240 /* --------------------------------------------------------------------------------------------- */
243 str_choose_str_functions (void)
245 if (str_test_encoding_class (codeset
, str_utf8_encodings
))
246 used_class
= str_utf8_init ();
247 else if (str_test_encoding_class (codeset
, str_8bit_encodings
))
248 used_class
= str_8bit_init ();
250 used_class
= str_ascii_init ();
253 /* --------------------------------------------------------------------------------------------- */
254 /*** public functions ****************************************************************************/
255 /* --------------------------------------------------------------------------------------------- */
258 str_crt_conv_to (const char *to_enc
)
260 return (!str_test_not_convert (to_enc
)) ? g_iconv_open (to_enc
, codeset
) : str_cnv_not_convert
;
263 /* --------------------------------------------------------------------------------------------- */
266 str_crt_conv_from (const char *from_enc
)
268 return (!str_test_not_convert (from_enc
))
269 ? g_iconv_open (codeset
, from_enc
) : str_cnv_not_convert
;
272 /* --------------------------------------------------------------------------------------------- */
275 str_close_conv (GIConv conv
)
277 if (conv
!= str_cnv_not_convert
)
278 g_iconv_close (conv
);
281 /* --------------------------------------------------------------------------------------------- */
284 str_convert (GIConv coder
, const char *string
, GString
*buffer
)
286 return _str_convert (coder
, string
, -1, buffer
);
289 /* --------------------------------------------------------------------------------------------- */
292 str_nconvert (GIConv coder
, const char *string
, int size
, GString
*buffer
)
294 return _str_convert (coder
, string
, size
, buffer
);
297 /* --------------------------------------------------------------------------------------------- */
300 str_conv_gerror_message (GError
*mcerror
, const char *def_msg
)
302 return used_class
.conv_gerror_message (mcerror
, def_msg
);
305 /* --------------------------------------------------------------------------------------------- */
308 str_vfs_convert_from (GIConv coder
, const char *string
, GString
*buffer
)
310 estr_t result
= ESTR_SUCCESS
;
312 if (coder
== str_cnv_not_convert
)
313 g_string_append (buffer
, string
!= NULL
? string
: "");
315 result
= _str_convert (coder
, string
, -1, buffer
);
320 /* --------------------------------------------------------------------------------------------- */
323 str_vfs_convert_to (GIConv coder
, const char *string
, int size
, GString
*buffer
)
325 return used_class
.vfs_convert_to (coder
, string
, size
, buffer
);
328 /* --------------------------------------------------------------------------------------------- */
331 str_printf (GString
*buffer
, const char *format
, ...)
334 va_start (ap
, format
);
336 g_string_append_vprintf (buffer
, format
, ap
);
340 /* --------------------------------------------------------------------------------------------- */
343 str_insert_replace_char (GString
*buffer
)
345 used_class
.insert_replace_char (buffer
);
348 /* --------------------------------------------------------------------------------------------- */
351 str_translate_char (GIConv conv
, const char *keys
, size_t ch_size
, char *output
, size_t out_size
)
356 g_iconv (conv
, NULL
, NULL
, NULL
, NULL
);
358 left
= (ch_size
== (size_t) (-1)) ? strlen (keys
) : ch_size
;
360 cnv
= g_iconv (conv
, (gchar
**) & keys
, &left
, &output
, &out_size
);
361 if (cnv
== (size_t) (-1))
362 return (errno
== EINVAL
) ? ESTR_PROBLEM
: ESTR_FAILURE
;
368 /* --------------------------------------------------------------------------------------------- */
371 str_detect_termencoding (void)
373 if (term_encoding
== NULL
)
375 /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
377 On Mac OS X, it returns the same case as the LANG input.
378 So let transform result of nl_langinfo (CODESET) to upper case unconditionally. */
379 term_encoding
= g_ascii_strup (nl_langinfo (CODESET
), -1);
382 return term_encoding
;
385 /* --------------------------------------------------------------------------------------------- */
388 str_isutf8 (const char *codeset_name
)
390 return (str_test_encoding_class (codeset_name
, str_utf8_encodings
) != 0);
393 /* --------------------------------------------------------------------------------------------- */
396 str_init_strings (const char *termenc
)
398 codeset
= termenc
!= NULL
? g_ascii_strup (termenc
, -1) : g_strdup (str_detect_termencoding ());
400 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
401 if (str_cnv_not_convert
== INVALID_CONV
)
406 codeset
= g_strdup (str_detect_termencoding ());
407 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
410 if (str_cnv_not_convert
== INVALID_CONV
)
413 codeset
= g_strdup (DEFAULT_CHARSET
);
414 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
418 str_cnv_to_term
= str_cnv_not_convert
;
419 str_cnv_from_term
= str_cnv_not_convert
;
421 str_choose_str_functions ();
424 /* --------------------------------------------------------------------------------------------- */
427 str_uninit_strings (void)
429 if (str_cnv_not_convert
!= INVALID_CONV
)
430 g_iconv_close (str_cnv_not_convert
);
431 /* NULL-ize pointers to avoid double free in unit tests */
432 MC_PTR_FREE (term_encoding
);
433 MC_PTR_FREE (codeset
);
436 /* --------------------------------------------------------------------------------------------- */
439 str_term_form (const char *text
)
441 return used_class
.term_form (text
);
444 /* --------------------------------------------------------------------------------------------- */
447 str_fit_to_term (const char *text
, int width
, align_crt_t just_mode
)
449 return used_class
.fit_to_term (text
, width
, just_mode
);
452 /* --------------------------------------------------------------------------------------------- */
455 str_term_trim (const char *text
, int width
)
457 return used_class
.term_trim (text
, width
);
460 /* --------------------------------------------------------------------------------------------- */
463 str_term_substring (const char *text
, int start
, int width
)
465 return used_class
.term_substring (text
, start
, width
);
468 /* --------------------------------------------------------------------------------------------- */
471 str_get_next_char (char *text
)
474 used_class
.cnext_char ((const char **) &text
);
478 /* --------------------------------------------------------------------------------------------- */
481 str_cget_next_char (const char *text
)
483 used_class
.cnext_char (&text
);
487 /* --------------------------------------------------------------------------------------------- */
490 str_next_char (char **text
)
492 used_class
.cnext_char ((const char **) text
);
495 /* --------------------------------------------------------------------------------------------- */
498 str_cnext_char (const char **text
)
500 used_class
.cnext_char (text
);
503 /* --------------------------------------------------------------------------------------------- */
506 str_get_prev_char (char *text
)
508 used_class
.cprev_char ((const char **) &text
);
512 /* --------------------------------------------------------------------------------------------- */
515 str_cget_prev_char (const char *text
)
517 used_class
.cprev_char (&text
);
521 /* --------------------------------------------------------------------------------------------- */
524 str_prev_char (char **text
)
526 used_class
.cprev_char ((const char **) text
);
529 /* --------------------------------------------------------------------------------------------- */
532 str_cprev_char (const char **text
)
534 used_class
.cprev_char (text
);
537 /* --------------------------------------------------------------------------------------------- */
540 str_get_next_char_safe (char *text
)
542 used_class
.cnext_char_safe ((const char **) &text
);
546 /* --------------------------------------------------------------------------------------------- */
549 str_cget_next_char_safe (const char *text
)
551 used_class
.cnext_char_safe (&text
);
555 /* --------------------------------------------------------------------------------------------- */
558 str_next_char_safe (char **text
)
560 used_class
.cnext_char_safe ((const char **) text
);
563 /* --------------------------------------------------------------------------------------------- */
566 str_cnext_char_safe (const char **text
)
568 used_class
.cnext_char_safe (text
);
571 /* --------------------------------------------------------------------------------------------- */
574 str_get_prev_char_safe (char *text
)
576 used_class
.cprev_char_safe ((const char **) &text
);
580 /* --------------------------------------------------------------------------------------------- */
583 str_cget_prev_char_safe (const char *text
)
585 used_class
.cprev_char_safe (&text
);
589 /* --------------------------------------------------------------------------------------------- */
592 str_prev_char_safe (char **text
)
594 used_class
.cprev_char_safe ((const char **) text
);
597 /* --------------------------------------------------------------------------------------------- */
600 str_cprev_char_safe (const char **text
)
602 used_class
.cprev_char_safe (text
);
605 /* --------------------------------------------------------------------------------------------- */
608 str_next_noncomb_char (char **text
)
610 return used_class
.cnext_noncomb_char ((const char **) text
);
613 /* --------------------------------------------------------------------------------------------- */
616 str_cnext_noncomb_char (const char **text
)
618 return used_class
.cnext_noncomb_char (text
);
621 /* --------------------------------------------------------------------------------------------- */
624 str_prev_noncomb_char (char **text
, const char *begin
)
626 return used_class
.cprev_noncomb_char ((const char **) text
, begin
);
629 /* --------------------------------------------------------------------------------------------- */
632 str_cprev_noncomb_char (const char **text
, const char *begin
)
634 return used_class
.cprev_noncomb_char (text
, begin
);
637 /* --------------------------------------------------------------------------------------------- */
640 str_is_valid_char (const char *ch
, size_t size
)
642 return used_class
.is_valid_char (ch
, size
);
645 /* --------------------------------------------------------------------------------------------- */
648 str_term_width1 (const char *text
)
650 return used_class
.term_width1 (text
);
653 /* --------------------------------------------------------------------------------------------- */
656 str_term_width2 (const char *text
, size_t length
)
658 return used_class
.term_width2 (text
, length
);
661 /* --------------------------------------------------------------------------------------------- */
664 str_term_char_width (const char *text
)
666 return used_class
.term_char_width (text
);
669 /* --------------------------------------------------------------------------------------------- */
672 str_offset_to_pos (const char *text
, size_t length
)
674 return used_class
.offset_to_pos (text
, length
);
677 /* --------------------------------------------------------------------------------------------- */
680 str_length (const char *text
)
682 return used_class
.length (text
);
685 /* --------------------------------------------------------------------------------------------- */
688 str_length_char (const char *text
)
690 return str_cget_next_char_safe (text
) - text
;
693 /* --------------------------------------------------------------------------------------------- */
696 str_length2 (const char *text
, int size
)
698 return used_class
.length2 (text
, size
);
701 /* --------------------------------------------------------------------------------------------- */
704 str_length_noncomb (const char *text
)
706 return used_class
.length_noncomb (text
);
709 /* --------------------------------------------------------------------------------------------- */
712 str_column_to_pos (const char *text
, size_t pos
)
714 return used_class
.column_to_pos (text
, pos
);
717 /* --------------------------------------------------------------------------------------------- */
720 str_isspace (const char *ch
)
722 return used_class
.char_isspace (ch
);
725 /* --------------------------------------------------------------------------------------------- */
728 str_ispunct (const char *ch
)
730 return used_class
.char_ispunct (ch
);
733 /* --------------------------------------------------------------------------------------------- */
736 str_isalnum (const char *ch
)
738 return used_class
.char_isalnum (ch
);
741 /* --------------------------------------------------------------------------------------------- */
744 str_isdigit (const char *ch
)
746 return used_class
.char_isdigit (ch
);
749 /* --------------------------------------------------------------------------------------------- */
752 str_toupper (const char *ch
, char **out
, size_t *remain
)
754 return used_class
.char_toupper (ch
, out
, remain
);
757 /* --------------------------------------------------------------------------------------------- */
760 str_tolower (const char *ch
, char **out
, size_t *remain
)
762 return used_class
.char_tolower (ch
, out
, remain
);
765 /* --------------------------------------------------------------------------------------------- */
768 str_isprint (const char *ch
)
770 return used_class
.char_isprint (ch
);
773 /* --------------------------------------------------------------------------------------------- */
776 str_iscombiningmark (const char *ch
)
778 return used_class
.char_iscombiningmark (ch
);
781 /* --------------------------------------------------------------------------------------------- */
784 str_trunc (const char *text
, int width
)
786 return used_class
.trunc (text
, width
);
789 /* --------------------------------------------------------------------------------------------- */
792 str_create_search_needle (const char *needle
, gboolean case_sen
)
794 return used_class
.create_search_needle (needle
, case_sen
);
797 /* --------------------------------------------------------------------------------------------- */
800 str_release_search_needle (char *needle
, gboolean case_sen
)
802 used_class
.release_search_needle (needle
, case_sen
);
805 /* --------------------------------------------------------------------------------------------- */
808 str_search_first (const char *text
, const char *search
, gboolean case_sen
)
810 return used_class
.search_first (text
, search
, case_sen
);
813 /* --------------------------------------------------------------------------------------------- */
816 str_search_last (const char *text
, const char *search
, gboolean case_sen
)
818 return used_class
.search_last (text
, search
, case_sen
);
821 /* --------------------------------------------------------------------------------------------- */
824 str_is_valid_string (const char *text
)
826 return used_class
.is_valid_string (text
);
829 /* --------------------------------------------------------------------------------------------- */
832 str_compare (const char *t1
, const char *t2
)
834 return used_class
.compare (t1
, t2
);
837 /* --------------------------------------------------------------------------------------------- */
840 str_ncompare (const char *t1
, const char *t2
)
842 return used_class
.ncompare (t1
, t2
);
845 /* --------------------------------------------------------------------------------------------- */
848 str_casecmp (const char *t1
, const char *t2
)
850 return used_class
.casecmp (t1
, t2
);
853 /* --------------------------------------------------------------------------------------------- */
856 str_ncasecmp (const char *t1
, const char *t2
)
858 return used_class
.ncasecmp (t1
, t2
);
861 /* --------------------------------------------------------------------------------------------- */
864 str_prefix (const char *text
, const char *prefix
)
866 return used_class
.prefix (text
, prefix
);
869 /* --------------------------------------------------------------------------------------------- */
872 str_caseprefix (const char *text
, const char *prefix
)
874 return used_class
.caseprefix (text
, prefix
);
877 /* --------------------------------------------------------------------------------------------- */
880 str_fix_string (char *text
)
882 used_class
.fix_string (text
);
885 /* --------------------------------------------------------------------------------------------- */
888 str_create_key (const char *text
, gboolean case_sen
)
890 return used_class
.create_key (text
, case_sen
);
893 /* --------------------------------------------------------------------------------------------- */
896 str_create_key_for_filename (const char *text
, gboolean case_sen
)
898 return used_class
.create_key_for_filename (text
, case_sen
);
901 /* --------------------------------------------------------------------------------------------- */
904 str_key_collate (const char *t1
, const char *t2
, gboolean case_sen
)
906 return used_class
.key_collate (t1
, t2
, case_sen
);
909 /* --------------------------------------------------------------------------------------------- */
912 str_release_key (char *key
, gboolean case_sen
)
914 used_class
.release_key (key
, case_sen
);
917 /* --------------------------------------------------------------------------------------------- */
920 str_msg_term_size (const char *text
, int *lines
, int *columns
)
929 tmp
= g_strdup (text
);
936 q
= strchr (p
, '\n');
943 width
= str_term_width1 (p
);
944 if (width
> *columns
)
958 /* --------------------------------------------------------------------------------------------- */
961 strrstr_skip_count (const char *haystack
, const char *needle
, size_t skip_count
)
966 len
= strlen (haystack
);
970 semi
= g_strrstr_len (haystack
, len
, needle
);
973 len
= semi
- haystack
- 1;
975 while (skip_count
-- != 0);
980 /* --------------------------------------------------------------------------------------------- */
981 /* Interpret string as a non-negative decimal integer, optionally multiplied by various values.
983 * @param str input value
984 * @param invalid set to TRUE if "str" does not represent a number in this format
986 * @return non-negative integer representation of "str", 0 in case of error.
990 parse_integer (const char *str
, gboolean
*invalid
)
996 e
= xstrtoumax (str
, &suffix
, 10, &n
, "bcEGkKMPTwYZ0");
997 if (e
== LONGINT_INVALID_SUFFIX_CHAR
&& *suffix
== 'x')
999 uintmax_t multiplier
;
1001 multiplier
= parse_integer (suffix
+ 1, invalid
);
1002 if (multiplier
!= 0 && n
* multiplier
/ multiplier
!= n
)
1010 else if (e
!= LONGINT_OK
)
1019 /* --------------------------------------------------------------------------------------------- */