2 Common strings utilities
4 Copyright (C) 2007, 2011
5 The Free Software Foundation, Inc.
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
35 #include "lib/global.h"
36 #include "lib/strutil.h"
38 /*names, that are used for utf-8 */
39 static const char *str_utf8_encodings
[] = {
45 /* standard 8bit encodings, no wide or multibytes characters */
46 static const char *str_8bit_encodings
[] = {
65 /* terminal encoding */
66 static char *codeset
= NULL
;
67 static char *term_encoding
= NULL
;
68 /* function for encoding specific operations */
69 static struct str_class used_class
;
71 GIConv str_cnv_to_term
;
72 GIConv str_cnv_from_term
;
73 GIConv str_cnv_not_convert
= INVALID_CONV
;
75 /* if enc is same encoding like on terminal */
77 str_test_not_convert (const char *enc
)
79 return g_ascii_strcasecmp (enc
, codeset
) == 0;
83 str_crt_conv_to (const char *to_enc
)
85 return (!str_test_not_convert (to_enc
)) ? g_iconv_open (to_enc
, codeset
) : str_cnv_not_convert
;
89 str_crt_conv_from (const char *from_enc
)
91 return (!str_test_not_convert (from_enc
))
92 ? g_iconv_open (codeset
, from_enc
) : str_cnv_not_convert
;
96 str_close_conv (GIConv conv
)
98 if (conv
!= str_cnv_not_convert
)
103 _str_convert (GIConv coder
, const char *string
, int size
, GString
* buffer
)
105 estr_t state
= ESTR_SUCCESS
;
106 gchar
*tmp_buff
= NULL
;
108 gsize bytes_read
= 0;
109 gsize bytes_written
= 0;
110 GError
*error
= NULL
;
113 if (coder
== INVALID_CONV
)
116 if (string
== NULL
|| buffer
== NULL
)
120 if (! used_class.is_valid_string (string))
127 size
= strlen (string
);
131 left
= strlen (string
);
137 g_iconv (coder
, NULL
, NULL
, NULL
, NULL
);
141 tmp_buff
= g_convert_with_iconv ((const gchar
*) string
,
142 left
, coder
, &bytes_read
, &bytes_written
, &error
);
145 int code
= error
->code
;
147 g_error_free (error
);
152 case G_CONVERT_ERROR_NO_CONVERSION
:
153 /* Conversion between the requested character sets is not supported. */
154 tmp_buff
= g_strnfill (strlen (string
), '?');
155 g_string_append (buffer
, tmp_buff
);
159 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE
:
160 /* Invalid byte sequence in conversion input. */
161 if ((tmp_buff
== NULL
) && (bytes_read
!= 0))
162 /* recode valid byte sequence */
163 tmp_buff
= g_convert_with_iconv ((const gchar
*) string
,
164 bytes_read
, coder
, NULL
, NULL
, NULL
);
166 if (tmp_buff
!= NULL
)
168 g_string_append (buffer
, tmp_buff
);
172 if ((int) bytes_read
< left
)
174 string
+= bytes_read
+ 1;
175 size
-= (bytes_read
+ 1);
176 left
-= (bytes_read
+ 1);
177 g_string_append_c (buffer
, *(string
- 1));
183 state
= ESTR_PROBLEM
;
186 case G_CONVERT_ERROR_PARTIAL_INPUT
:
187 /* Partial character sequence at end of input. */
188 g_string_append (buffer
, tmp_buff
);
190 if ((int) bytes_read
< left
)
192 left
= left
- bytes_read
;
193 tmp_buff
= g_strnfill (left
, '?');
194 g_string_append (buffer
, tmp_buff
);
199 case G_CONVERT_ERROR_BAD_URI
: /* Don't know how handle this error :( */
200 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH
: /* Don't know how handle this error :( */
201 case G_CONVERT_ERROR_FAILED
: /* Conversion failed for some reason. */
209 if (tmp_buff
!= NULL
)
213 g_string_append (buffer
, tmp_buff
);
215 string
+= bytes_read
;
221 g_string_append (buffer
, string
);
227 g_string_append (buffer
, string
);
236 str_convert (GIConv coder
, const char *string
, GString
* buffer
)
238 return _str_convert (coder
, string
, -1, buffer
);
242 str_nconvert (GIConv coder
, const char *string
, int size
, GString
* buffer
)
244 return _str_convert (coder
, string
, size
, buffer
);
248 str_conv_gerror_message (GError
* error
, const char *def_msg
)
250 return used_class
.conv_gerror_message (error
, def_msg
);
254 str_vfs_convert_from (GIConv coder
, const char *string
, GString
* buffer
)
258 if (coder
== str_cnv_not_convert
)
260 g_string_append (buffer
, string
!= NULL
? string
: "");
261 result
= ESTR_SUCCESS
;
264 result
= _str_convert (coder
, string
, -1, buffer
);
270 str_vfs_convert_to (GIConv coder
, const char *string
, int size
, GString
* buffer
)
272 return used_class
.vfs_convert_to (coder
, string
, size
, buffer
);
276 str_printf (GString
* buffer
, const char *format
, ...)
279 va_start (ap
, format
);
280 #if GLIB_CHECK_VERSION (2, 14, 0)
281 g_string_append_vprintf (buffer
, format
, ap
);
285 tmp
= g_strdup_vprintf (format
, ap
);
286 g_string_append (buffer
, tmp
);
294 str_insert_replace_char (GString
* buffer
)
296 used_class
.insert_replace_char (buffer
);
300 str_translate_char (GIConv conv
, const char *keys
, size_t ch_size
, char *output
, size_t out_size
)
305 g_iconv (conv
, NULL
, NULL
, NULL
, NULL
);
307 left
= (ch_size
== (size_t) (-1)) ? strlen (keys
) : ch_size
;
309 cnv
= g_iconv (conv
, (gchar
**) & keys
, &left
, &output
, &out_size
);
310 if (cnv
== (size_t) (-1))
312 return (errno
== EINVAL
) ? ESTR_PROBLEM
: ESTR_FAILURE
;
323 str_detect_termencoding (void)
325 if (term_encoding
== NULL
)
327 /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
329 On Mac OS X, it returns the same case as the LANG input.
330 So let tranform result of nl_langinfo (CODESET) to upper case unconditionally. */
331 term_encoding
= g_ascii_strup (nl_langinfo (CODESET
), -1);
334 return term_encoding
;
338 str_test_encoding_class (const char *encoding
, const char **table
)
342 if (encoding
== NULL
)
345 for (t
= 0; table
[t
] != NULL
; t
++)
347 result
+= (g_ascii_strncasecmp (encoding
, table
[t
], strlen (table
[t
])) == 0);
353 str_choose_str_functions (void)
355 if (str_test_encoding_class (codeset
, str_utf8_encodings
))
357 used_class
= str_utf8_init ();
359 else if (str_test_encoding_class (codeset
, str_8bit_encodings
))
361 used_class
= str_8bit_init ();
365 used_class
= str_ascii_init ();
370 str_isutf8 (const char *codeset_name
)
372 return (str_test_encoding_class (codeset_name
, str_utf8_encodings
) != 0);
376 str_init_strings (const char *termenc
)
378 codeset
= termenc
!= NULL
? g_ascii_strup (termenc
, -1) : g_strdup (str_detect_termencoding ());
380 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
381 if (str_cnv_not_convert
== INVALID_CONV
)
386 codeset
= g_strdup (str_detect_termencoding ());
387 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
390 if (str_cnv_not_convert
== INVALID_CONV
)
393 codeset
= g_strdup ("ASCII");
394 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
398 str_cnv_to_term
= str_cnv_not_convert
;
399 str_cnv_from_term
= str_cnv_not_convert
;
401 str_choose_str_functions ();
405 str_uninit_strings (void)
407 if (str_cnv_not_convert
!= INVALID_CONV
)
408 g_iconv_close (str_cnv_not_convert
);
409 g_free (term_encoding
);
414 str_term_form (const char *text
)
416 return used_class
.term_form (text
);
420 str_fit_to_term (const char *text
, int width
, align_crt_t just_mode
)
422 return used_class
.fit_to_term (text
, width
, just_mode
);
426 str_term_trim (const char *text
, int width
)
428 return used_class
.term_trim (text
, width
);
432 str_term_substring (const char *text
, int start
, int width
)
434 return used_class
.term_substring (text
, start
, width
);
438 str_get_next_char (char *text
)
441 used_class
.cnext_char ((const char **) &text
);
446 str_cget_next_char (const char *text
)
448 used_class
.cnext_char (&text
);
453 str_next_char (char **text
)
455 used_class
.cnext_char ((const char **) text
);
459 str_cnext_char (const char **text
)
461 used_class
.cnext_char (text
);
465 str_get_prev_char (char *text
)
467 used_class
.cprev_char ((const char **) &text
);
472 str_cget_prev_char (const char *text
)
474 used_class
.cprev_char (&text
);
479 str_prev_char (char **text
)
481 used_class
.cprev_char ((const char **) text
);
485 str_cprev_char (const char **text
)
487 used_class
.cprev_char (text
);
491 str_get_next_char_safe (char *text
)
493 used_class
.cnext_char_safe ((const char **) &text
);
498 str_cget_next_char_safe (const char *text
)
500 used_class
.cnext_char_safe (&text
);
505 str_next_char_safe (char **text
)
507 used_class
.cnext_char_safe ((const char **) text
);
511 str_cnext_char_safe (const char **text
)
513 used_class
.cnext_char_safe (text
);
517 str_get_prev_char_safe (char *text
)
519 used_class
.cprev_char_safe ((const char **) &text
);
524 str_cget_prev_char_safe (const char *text
)
526 used_class
.cprev_char_safe (&text
);
531 str_prev_char_safe (char **text
)
533 used_class
.cprev_char_safe ((const char **) text
);
537 str_cprev_char_safe (const char **text
)
539 used_class
.cprev_char_safe (text
);
543 str_next_noncomb_char (char **text
)
545 return used_class
.cnext_noncomb_char ((const char **) text
);
549 str_cnext_noncomb_char (const char **text
)
551 return used_class
.cnext_noncomb_char (text
);
555 str_prev_noncomb_char (char **text
, const char *begin
)
557 return used_class
.cprev_noncomb_char ((const char **) text
, begin
);
561 str_cprev_noncomb_char (const char **text
, const char *begin
)
563 return used_class
.cprev_noncomb_char (text
, begin
);
567 str_is_valid_char (const char *ch
, size_t size
)
569 return used_class
.is_valid_char (ch
, size
);
573 str_term_width1 (const char *text
)
575 return used_class
.term_width1 (text
);
579 str_term_width2 (const char *text
, size_t length
)
581 return used_class
.term_width2 (text
, length
);
585 str_term_char_width (const char *text
)
587 return used_class
.term_char_width (text
);
591 str_offset_to_pos (const char *text
, size_t length
)
593 return used_class
.offset_to_pos (text
, length
);
597 str_length (const char *text
)
599 return used_class
.length (text
);
603 str_length_char (const char *text
)
605 return str_cget_next_char_safe (text
) - text
;
609 str_length2 (const char *text
, int size
)
611 return used_class
.length2 (text
, size
);
615 str_length_noncomb (const char *text
)
617 return used_class
.length_noncomb (text
);
621 str_column_to_pos (const char *text
, size_t pos
)
623 return used_class
.column_to_pos (text
, pos
);
627 str_isspace (const char *ch
)
629 return used_class
.char_isspace (ch
);
633 str_ispunct (const char *ch
)
635 return used_class
.char_ispunct (ch
);
639 str_isalnum (const char *ch
)
641 return used_class
.char_isalnum (ch
);
645 str_isdigit (const char *ch
)
647 return used_class
.char_isdigit (ch
);
651 str_toupper (const char *ch
, char **out
, size_t * remain
)
653 return used_class
.char_toupper (ch
, out
, remain
);
657 str_tolower (const char *ch
, char **out
, size_t * remain
)
659 return used_class
.char_tolower (ch
, out
, remain
);
663 str_isprint (const char *ch
)
665 return used_class
.char_isprint (ch
);
669 str_iscombiningmark (const char *ch
)
671 return used_class
.char_iscombiningmark (ch
);
675 str_trunc (const char *text
, int width
)
677 return used_class
.trunc (text
, width
);
681 str_create_search_needle (const char *needle
, int case_sen
)
683 return used_class
.create_search_needle (needle
, case_sen
);
688 str_release_search_needle (char *needle
, int case_sen
)
690 used_class
.release_search_needle (needle
, case_sen
);
694 str_search_first (const char *text
, const char *search
, int case_sen
)
696 return used_class
.search_first (text
, search
, case_sen
);
700 str_search_last (const char *text
, const char *search
, int case_sen
)
702 return used_class
.search_last (text
, search
, case_sen
);
706 str_is_valid_string (const char *text
)
708 return used_class
.is_valid_string (text
);
712 str_compare (const char *t1
, const char *t2
)
714 return used_class
.compare (t1
, t2
);
718 str_ncompare (const char *t1
, const char *t2
)
720 return used_class
.ncompare (t1
, t2
);
724 str_casecmp (const char *t1
, const char *t2
)
726 return used_class
.casecmp (t1
, t2
);
730 str_ncasecmp (const char *t1
, const char *t2
)
732 return used_class
.ncasecmp (t1
, t2
);
736 str_prefix (const char *text
, const char *prefix
)
738 return used_class
.prefix (text
, prefix
);
742 str_caseprefix (const char *text
, const char *prefix
)
744 return used_class
.caseprefix (text
, prefix
);
748 str_fix_string (char *text
)
750 used_class
.fix_string (text
);
754 str_create_key (const char *text
, int case_sen
)
756 return used_class
.create_key (text
, case_sen
);
760 str_create_key_for_filename (const char *text
, int case_sen
)
762 return used_class
.create_key_for_filename (text
, case_sen
);
766 str_key_collate (const char *t1
, const char *t2
, int case_sen
)
768 return used_class
.key_collate (t1
, t2
, case_sen
);
772 str_release_key (char *key
, int case_sen
)
774 used_class
.release_key (key
, case_sen
);
778 str_msg_term_size (const char *text
, int *lines
, int *columns
)
788 tmp
= g_strdup (text
);
793 q
= strchr (p
, '\n');
800 width
= str_term_width1 (p
);
801 if (width
> *columns
)
815 /* --------------------------------------------------------------------------------------------- */
818 strrstr_skip_count (const char *haystack
, const char *needle
, size_t skip_count
)
823 len
= strlen (haystack
);
827 semi
= g_strrstr_len (haystack
, len
, needle
);
830 len
= semi
- haystack
- 1;
832 while (skip_count
-- != 0);
836 /* --------------------------------------------------------------------------------------------- */