2 Common strings utilities
4 Copyright (C) 2007-2016
5 Free Software Foundation, Inc.
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
33 #include "lib/global.h"
34 #include "lib/strutil.h"
36 /*** global variables ****************************************************************************/
38 GIConv str_cnv_to_term
;
39 GIConv str_cnv_from_term
;
40 GIConv str_cnv_not_convert
= INVALID_CONV
;
42 /*** file scope macro definitions ****************************************************************/
44 /*** file scope type declarations ****************************************************************/
46 /*** file scope variables ************************************************************************/
48 /* names, that are used for utf-8 */
49 static const char *str_utf8_encodings
[] = {
55 /* standard 8bit encodings, no wide or multibytes characters */
56 static const char *str_8bit_encodings
[] = {
75 /* terminal encoding */
76 static char *codeset
= NULL
;
77 static char *term_encoding
= NULL
;
78 /* function for encoding specific operations */
79 static struct str_class used_class
;
81 /* --------------------------------------------------------------------------------------------- */
82 /*** file scope functions ************************************************************************/
83 /* --------------------------------------------------------------------------------------------- */
85 /* if enc is same encoding like on terminal */
87 str_test_not_convert (const char *enc
)
89 return g_ascii_strcasecmp (enc
, codeset
) == 0;
92 /* --------------------------------------------------------------------------------------------- */
95 _str_convert (GIConv coder
, const char *string
, int size
, GString
* buffer
)
97 estr_t state
= ESTR_SUCCESS
;
100 gsize bytes_written
= 0;
102 errno
= 0; /* FIXME: is it really needed? */
104 if (coder
== INVALID_CONV
)
107 if (string
== NULL
|| buffer
== NULL
)
111 if (! used_class.is_valid_string (string))
117 size
= strlen (string
);
120 left
= strlen (string
);
126 g_iconv (coder
, NULL
, NULL
, NULL
, NULL
);
131 GError
*mcerror
= NULL
;
133 tmp_buff
= g_convert_with_iconv ((const gchar
*) string
,
134 left
, coder
, &bytes_read
, &bytes_written
, &mcerror
);
137 int code
= mcerror
->code
;
139 g_error_free (mcerror
);
144 case G_CONVERT_ERROR_NO_CONVERSION
:
145 /* Conversion between the requested character sets is not supported. */
146 tmp_buff
= g_strnfill (strlen (string
), '?');
147 g_string_append (buffer
, tmp_buff
);
151 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE
:
152 /* Invalid byte sequence in conversion input. */
153 if ((tmp_buff
== NULL
) && (bytes_read
!= 0))
154 /* recode valid byte sequence */
155 tmp_buff
= g_convert_with_iconv ((const gchar
*) string
,
156 bytes_read
, coder
, NULL
, NULL
, NULL
);
158 if (tmp_buff
!= NULL
)
160 g_string_append (buffer
, tmp_buff
);
164 if ((int) bytes_read
>= left
)
167 string
+= bytes_read
+ 1;
168 size
-= (bytes_read
+ 1);
169 left
-= (bytes_read
+ 1);
170 g_string_append_c (buffer
, *(string
- 1));
171 state
= ESTR_PROBLEM
;
174 case G_CONVERT_ERROR_PARTIAL_INPUT
:
175 /* Partial character sequence at end of input. */
176 g_string_append (buffer
, tmp_buff
);
178 if ((int) bytes_read
< left
)
180 left
= left
- bytes_read
;
181 tmp_buff
= g_strnfill (left
, '?');
182 g_string_append (buffer
, tmp_buff
);
187 case G_CONVERT_ERROR_BAD_URI
: /* Don't know how handle this error :( */
188 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH
: /* Don't know how handle this error :( */
189 case G_CONVERT_ERROR_FAILED
: /* Conversion failed for some reason. */
195 else if (tmp_buff
== NULL
)
197 g_string_append (buffer
, string
);
200 else if (*tmp_buff
== '\0')
203 g_string_append (buffer
, string
);
208 g_string_append (buffer
, tmp_buff
);
210 string
+= bytes_read
;
218 /* --------------------------------------------------------------------------------------------- */
221 str_test_encoding_class (const char *encoding
, const char **table
)
225 if (encoding
!= NULL
)
229 for (t
= 0; table
[t
] != NULL
; t
++)
230 if (g_ascii_strncasecmp (encoding
, table
[t
], strlen (table
[t
])) == 0)
237 /* --------------------------------------------------------------------------------------------- */
240 str_choose_str_functions (void)
242 if (str_test_encoding_class (codeset
, str_utf8_encodings
))
243 used_class
= str_utf8_init ();
244 else if (str_test_encoding_class (codeset
, str_8bit_encodings
))
245 used_class
= str_8bit_init ();
247 used_class
= str_ascii_init ();
250 /* --------------------------------------------------------------------------------------------- */
251 /*** public functions ****************************************************************************/
252 /* --------------------------------------------------------------------------------------------- */
255 str_crt_conv_to (const char *to_enc
)
257 return (!str_test_not_convert (to_enc
)) ? g_iconv_open (to_enc
, codeset
) : str_cnv_not_convert
;
260 /* --------------------------------------------------------------------------------------------- */
263 str_crt_conv_from (const char *from_enc
)
265 return (!str_test_not_convert (from_enc
))
266 ? g_iconv_open (codeset
, from_enc
) : str_cnv_not_convert
;
269 /* --------------------------------------------------------------------------------------------- */
272 str_close_conv (GIConv conv
)
274 if (conv
!= str_cnv_not_convert
)
275 g_iconv_close (conv
);
278 /* --------------------------------------------------------------------------------------------- */
281 str_convert (GIConv coder
, const char *string
, GString
* buffer
)
283 return _str_convert (coder
, string
, -1, buffer
);
286 /* --------------------------------------------------------------------------------------------- */
289 str_nconvert (GIConv coder
, const char *string
, int size
, GString
* buffer
)
291 return _str_convert (coder
, string
, size
, buffer
);
294 /* --------------------------------------------------------------------------------------------- */
297 str_conv_gerror_message (GError
* mcerror
, const char *def_msg
)
299 return used_class
.conv_gerror_message (mcerror
, def_msg
);
302 /* --------------------------------------------------------------------------------------------- */
305 str_vfs_convert_from (GIConv coder
, const char *string
, GString
* buffer
)
307 estr_t result
= ESTR_SUCCESS
;
309 if (coder
== str_cnv_not_convert
)
310 g_string_append (buffer
, string
!= NULL
? string
: "");
312 result
= _str_convert (coder
, string
, -1, buffer
);
317 /* --------------------------------------------------------------------------------------------- */
320 str_vfs_convert_to (GIConv coder
, const char *string
, int size
, GString
* buffer
)
322 return used_class
.vfs_convert_to (coder
, string
, size
, buffer
);
325 /* --------------------------------------------------------------------------------------------- */
328 str_printf (GString
* buffer
, const char *format
, ...)
331 va_start (ap
, format
);
333 g_string_append_vprintf (buffer
, format
, ap
);
337 /* --------------------------------------------------------------------------------------------- */
340 str_insert_replace_char (GString
* buffer
)
342 used_class
.insert_replace_char (buffer
);
345 /* --------------------------------------------------------------------------------------------- */
348 str_translate_char (GIConv conv
, const char *keys
, size_t ch_size
, char *output
, size_t out_size
)
353 g_iconv (conv
, NULL
, NULL
, NULL
, NULL
);
355 left
= (ch_size
== (size_t) (-1)) ? strlen (keys
) : ch_size
;
357 cnv
= g_iconv (conv
, (gchar
**) & keys
, &left
, &output
, &out_size
);
358 if (cnv
== (size_t) (-1))
359 return (errno
== EINVAL
) ? ESTR_PROBLEM
: ESTR_FAILURE
;
365 /* --------------------------------------------------------------------------------------------- */
368 str_detect_termencoding (void)
370 if (term_encoding
== NULL
)
372 /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
374 On Mac OS X, it returns the same case as the LANG input.
375 So let tranform result of nl_langinfo (CODESET) to upper case unconditionally. */
376 term_encoding
= g_ascii_strup (nl_langinfo (CODESET
), -1);
379 return term_encoding
;
382 /* --------------------------------------------------------------------------------------------- */
385 str_isutf8 (const char *codeset_name
)
387 return (str_test_encoding_class (codeset_name
, str_utf8_encodings
) != 0);
390 /* --------------------------------------------------------------------------------------------- */
393 str_init_strings (const char *termenc
)
395 codeset
= termenc
!= NULL
? g_ascii_strup (termenc
, -1) : g_strdup (str_detect_termencoding ());
397 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
398 if (str_cnv_not_convert
== INVALID_CONV
)
403 codeset
= g_strdup (str_detect_termencoding ());
404 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
407 if (str_cnv_not_convert
== INVALID_CONV
)
410 codeset
= g_strdup (DEFAULT_CHARSET
);
411 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
415 str_cnv_to_term
= str_cnv_not_convert
;
416 str_cnv_from_term
= str_cnv_not_convert
;
418 str_choose_str_functions ();
421 /* --------------------------------------------------------------------------------------------- */
424 str_uninit_strings (void)
426 if (str_cnv_not_convert
!= INVALID_CONV
)
427 g_iconv_close (str_cnv_not_convert
);
428 g_free (term_encoding
);
432 /* --------------------------------------------------------------------------------------------- */
435 str_term_form (const char *text
)
437 return used_class
.term_form (text
);
440 /* --------------------------------------------------------------------------------------------- */
443 str_fit_to_term (const char *text
, int width
, align_crt_t just_mode
)
445 return used_class
.fit_to_term (text
, width
, just_mode
);
448 /* --------------------------------------------------------------------------------------------- */
451 str_term_trim (const char *text
, int width
)
453 return used_class
.term_trim (text
, width
);
456 /* --------------------------------------------------------------------------------------------- */
459 str_term_substring (const char *text
, int start
, int width
)
461 return used_class
.term_substring (text
, start
, width
);
464 /* --------------------------------------------------------------------------------------------- */
467 str_get_next_char (char *text
)
470 used_class
.cnext_char ((const char **) &text
);
474 /* --------------------------------------------------------------------------------------------- */
477 str_cget_next_char (const char *text
)
479 used_class
.cnext_char (&text
);
483 /* --------------------------------------------------------------------------------------------- */
486 str_next_char (char **text
)
488 used_class
.cnext_char ((const char **) text
);
491 /* --------------------------------------------------------------------------------------------- */
494 str_cnext_char (const char **text
)
496 used_class
.cnext_char (text
);
499 /* --------------------------------------------------------------------------------------------- */
502 str_get_prev_char (char *text
)
504 used_class
.cprev_char ((const char **) &text
);
508 /* --------------------------------------------------------------------------------------------- */
511 str_cget_prev_char (const char *text
)
513 used_class
.cprev_char (&text
);
517 /* --------------------------------------------------------------------------------------------- */
520 str_prev_char (char **text
)
522 used_class
.cprev_char ((const char **) text
);
525 /* --------------------------------------------------------------------------------------------- */
528 str_cprev_char (const char **text
)
530 used_class
.cprev_char (text
);
533 /* --------------------------------------------------------------------------------------------- */
536 str_get_next_char_safe (char *text
)
538 used_class
.cnext_char_safe ((const char **) &text
);
542 /* --------------------------------------------------------------------------------------------- */
545 str_cget_next_char_safe (const char *text
)
547 used_class
.cnext_char_safe (&text
);
551 /* --------------------------------------------------------------------------------------------- */
554 str_next_char_safe (char **text
)
556 used_class
.cnext_char_safe ((const char **) text
);
559 /* --------------------------------------------------------------------------------------------- */
562 str_cnext_char_safe (const char **text
)
564 used_class
.cnext_char_safe (text
);
567 /* --------------------------------------------------------------------------------------------- */
570 str_get_prev_char_safe (char *text
)
572 used_class
.cprev_char_safe ((const char **) &text
);
576 /* --------------------------------------------------------------------------------------------- */
579 str_cget_prev_char_safe (const char *text
)
581 used_class
.cprev_char_safe (&text
);
585 /* --------------------------------------------------------------------------------------------- */
588 str_prev_char_safe (char **text
)
590 used_class
.cprev_char_safe ((const char **) text
);
593 /* --------------------------------------------------------------------------------------------- */
596 str_cprev_char_safe (const char **text
)
598 used_class
.cprev_char_safe (text
);
601 /* --------------------------------------------------------------------------------------------- */
604 str_next_noncomb_char (char **text
)
606 return used_class
.cnext_noncomb_char ((const char **) text
);
609 /* --------------------------------------------------------------------------------------------- */
612 str_cnext_noncomb_char (const char **text
)
614 return used_class
.cnext_noncomb_char (text
);
617 /* --------------------------------------------------------------------------------------------- */
620 str_prev_noncomb_char (char **text
, const char *begin
)
622 return used_class
.cprev_noncomb_char ((const char **) text
, begin
);
625 /* --------------------------------------------------------------------------------------------- */
628 str_cprev_noncomb_char (const char **text
, const char *begin
)
630 return used_class
.cprev_noncomb_char (text
, begin
);
633 /* --------------------------------------------------------------------------------------------- */
636 str_is_valid_char (const char *ch
, size_t size
)
638 return used_class
.is_valid_char (ch
, size
);
641 /* --------------------------------------------------------------------------------------------- */
644 str_term_width1 (const char *text
)
646 return used_class
.term_width1 (text
);
649 /* --------------------------------------------------------------------------------------------- */
652 str_term_width2 (const char *text
, size_t length
)
654 return used_class
.term_width2 (text
, length
);
657 /* --------------------------------------------------------------------------------------------- */
660 str_term_char_width (const char *text
)
662 return used_class
.term_char_width (text
);
665 /* --------------------------------------------------------------------------------------------- */
668 str_offset_to_pos (const char *text
, size_t length
)
670 return used_class
.offset_to_pos (text
, length
);
673 /* --------------------------------------------------------------------------------------------- */
676 str_length (const char *text
)
678 return used_class
.length (text
);
681 /* --------------------------------------------------------------------------------------------- */
684 str_length_char (const char *text
)
686 return str_cget_next_char_safe (text
) - text
;
689 /* --------------------------------------------------------------------------------------------- */
692 str_length2 (const char *text
, int size
)
694 return used_class
.length2 (text
, size
);
697 /* --------------------------------------------------------------------------------------------- */
700 str_length_noncomb (const char *text
)
702 return used_class
.length_noncomb (text
);
705 /* --------------------------------------------------------------------------------------------- */
708 str_column_to_pos (const char *text
, size_t pos
)
710 return used_class
.column_to_pos (text
, pos
);
713 /* --------------------------------------------------------------------------------------------- */
716 str_isspace (const char *ch
)
718 return used_class
.char_isspace (ch
);
721 /* --------------------------------------------------------------------------------------------- */
724 str_ispunct (const char *ch
)
726 return used_class
.char_ispunct (ch
);
729 /* --------------------------------------------------------------------------------------------- */
732 str_isalnum (const char *ch
)
734 return used_class
.char_isalnum (ch
);
737 /* --------------------------------------------------------------------------------------------- */
740 str_isdigit (const char *ch
)
742 return used_class
.char_isdigit (ch
);
745 /* --------------------------------------------------------------------------------------------- */
748 str_toupper (const char *ch
, char **out
, size_t * remain
)
750 return used_class
.char_toupper (ch
, out
, remain
);
753 /* --------------------------------------------------------------------------------------------- */
756 str_tolower (const char *ch
, char **out
, size_t * remain
)
758 return used_class
.char_tolower (ch
, out
, remain
);
761 /* --------------------------------------------------------------------------------------------- */
764 str_isprint (const char *ch
)
766 return used_class
.char_isprint (ch
);
769 /* --------------------------------------------------------------------------------------------- */
772 str_iscombiningmark (const char *ch
)
774 return used_class
.char_iscombiningmark (ch
);
777 /* --------------------------------------------------------------------------------------------- */
780 str_trunc (const char *text
, int width
)
782 return used_class
.trunc (text
, width
);
785 /* --------------------------------------------------------------------------------------------- */
788 str_create_search_needle (const char *needle
, int case_sen
)
790 return used_class
.create_search_needle (needle
, case_sen
);
793 /* --------------------------------------------------------------------------------------------- */
796 str_release_search_needle (char *needle
, int case_sen
)
798 used_class
.release_search_needle (needle
, case_sen
);
801 /* --------------------------------------------------------------------------------------------- */
804 str_search_first (const char *text
, const char *search
, int case_sen
)
806 return used_class
.search_first (text
, search
, case_sen
);
809 /* --------------------------------------------------------------------------------------------- */
812 str_search_last (const char *text
, const char *search
, int case_sen
)
814 return used_class
.search_last (text
, search
, case_sen
);
817 /* --------------------------------------------------------------------------------------------- */
820 str_is_valid_string (const char *text
)
822 return used_class
.is_valid_string (text
);
825 /* --------------------------------------------------------------------------------------------- */
828 str_compare (const char *t1
, const char *t2
)
830 return used_class
.compare (t1
, t2
);
833 /* --------------------------------------------------------------------------------------------- */
836 str_ncompare (const char *t1
, const char *t2
)
838 return used_class
.ncompare (t1
, t2
);
841 /* --------------------------------------------------------------------------------------------- */
844 str_casecmp (const char *t1
, const char *t2
)
846 return used_class
.casecmp (t1
, t2
);
849 /* --------------------------------------------------------------------------------------------- */
852 str_ncasecmp (const char *t1
, const char *t2
)
854 return used_class
.ncasecmp (t1
, t2
);
857 /* --------------------------------------------------------------------------------------------- */
860 str_prefix (const char *text
, const char *prefix
)
862 return used_class
.prefix (text
, prefix
);
865 /* --------------------------------------------------------------------------------------------- */
868 str_caseprefix (const char *text
, const char *prefix
)
870 return used_class
.caseprefix (text
, prefix
);
873 /* --------------------------------------------------------------------------------------------- */
876 str_fix_string (char *text
)
878 used_class
.fix_string (text
);
881 /* --------------------------------------------------------------------------------------------- */
884 str_create_key (const char *text
, int case_sen
)
886 return used_class
.create_key (text
, case_sen
);
889 /* --------------------------------------------------------------------------------------------- */
892 str_create_key_for_filename (const char *text
, int case_sen
)
894 return used_class
.create_key_for_filename (text
, case_sen
);
897 /* --------------------------------------------------------------------------------------------- */
900 str_key_collate (const char *t1
, const char *t2
, int case_sen
)
902 return used_class
.key_collate (t1
, t2
, case_sen
);
905 /* --------------------------------------------------------------------------------------------- */
908 str_release_key (char *key
, int case_sen
)
910 used_class
.release_key (key
, case_sen
);
913 /* --------------------------------------------------------------------------------------------- */
916 str_msg_term_size (const char *text
, int *lines
, int *columns
)
925 tmp
= g_strdup (text
);
932 q
= strchr (p
, '\n');
939 width
= str_term_width1 (p
);
940 if (width
> *columns
)
954 /* --------------------------------------------------------------------------------------------- */
957 strrstr_skip_count (const char *haystack
, const char *needle
, size_t skip_count
)
962 len
= strlen (haystack
);
966 semi
= g_strrstr_len (haystack
, len
, needle
);
969 len
= semi
- haystack
- 1;
971 while (skip_count
-- != 0);
976 /* --------------------------------------------------------------------------------------------- */
977 /* Interprete string as a non-negative decimal integer, optionally multiplied by various values.
979 * @param str input value
980 * @param invalid set to TRUE if "str" does not represent a number in this format
982 * @return non-integer representation of "str", 0 in case of error.
986 parse_integer (const char *str
, gboolean
* invalid
)
992 e
= xstrtoumax (str
, &suffix
, 10, &n
, "bcEGkKMPTwYZ0");
993 if (e
== LONGINT_INVALID_SUFFIX_CHAR
&& *suffix
== 'x')
995 uintmax_t multiplier
;
997 multiplier
= parse_integer (suffix
+ 1, invalid
);
998 if (multiplier
!= 0 && n
* multiplier
/ multiplier
!= n
)
1006 else if (e
!= LONGINT_OK
)
1015 /* --------------------------------------------------------------------------------------------- */