2 Common strings utilities
4 Copyright (C) 2007, 2011
5 The Free Software Foundation, Inc.
10 The file_date routine is mostly from GNU's fileutils package,
11 written by Richard Stallman and David MacKenzie.
13 This file is part of the Midnight Commander.
15 The Midnight Commander is free software: you can redistribute it
16 and/or modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation, either version 3 of the License,
18 or (at your option) any later version.
20 The Midnight Commander is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program. If not, see <http://www.gnu.org/licenses/>.
38 #include "lib/global.h"
39 #include "lib/strutil.h"
41 /*names, that are used for utf-8 */
42 static const char *str_utf8_encodings
[] = {
48 /* standard 8bit encodings, no wide or multibytes characters */
49 static const char *str_8bit_encodings
[] = {
68 /* terminal encoding */
69 static char *codeset
= NULL
;
70 static char *term_encoding
= NULL
;
71 /* function for encoding specific operations */
72 static struct str_class used_class
;
74 GIConv str_cnv_to_term
;
75 GIConv str_cnv_from_term
;
76 GIConv str_cnv_not_convert
= INVALID_CONV
;
78 /* if enc is same encoding like on terminal */
80 str_test_not_convert (const char *enc
)
82 return g_ascii_strcasecmp (enc
, codeset
) == 0;
86 str_crt_conv_to (const char *to_enc
)
88 return (!str_test_not_convert (to_enc
)) ? g_iconv_open (to_enc
, codeset
) : str_cnv_not_convert
;
92 str_crt_conv_from (const char *from_enc
)
94 return (!str_test_not_convert (from_enc
))
95 ? g_iconv_open (codeset
, from_enc
) : str_cnv_not_convert
;
99 str_close_conv (GIConv conv
)
101 if (conv
!= str_cnv_not_convert
)
102 g_iconv_close (conv
);
106 _str_convert (GIConv coder
, const char *string
, int size
, GString
* buffer
)
108 estr_t state
= ESTR_SUCCESS
;
109 gchar
*tmp_buff
= NULL
;
111 gsize bytes_read
= 0;
112 gsize bytes_written
= 0;
113 GError
*error
= NULL
;
116 if (coder
== INVALID_CONV
)
119 if (string
== NULL
|| buffer
== NULL
)
123 if (! used_class.is_valid_string (string))
130 size
= strlen (string
);
134 left
= strlen (string
);
140 g_iconv (coder
, NULL
, NULL
, NULL
, NULL
);
144 tmp_buff
= g_convert_with_iconv ((const gchar
*) string
,
145 left
, coder
, &bytes_read
, &bytes_written
, &error
);
148 int code
= error
->code
;
150 g_error_free (error
);
155 case G_CONVERT_ERROR_NO_CONVERSION
:
156 /* Conversion between the requested character sets is not supported. */
157 tmp_buff
= g_strnfill (strlen (string
), '?');
158 g_string_append (buffer
, tmp_buff
);
162 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE
:
163 /* Invalid byte sequence in conversion input. */
164 if ((tmp_buff
== NULL
) && (bytes_read
!= 0))
165 /* recode valid byte sequence */
166 tmp_buff
= g_convert_with_iconv ((const gchar
*) string
,
167 bytes_read
, coder
, NULL
, NULL
, NULL
);
169 if (tmp_buff
!= NULL
)
171 g_string_append (buffer
, tmp_buff
);
175 if ((int) bytes_read
< left
)
177 string
+= bytes_read
+ 1;
178 size
-= (bytes_read
+ 1);
179 left
-= (bytes_read
+ 1);
180 g_string_append_c (buffer
, *(string
- 1));
186 state
= ESTR_PROBLEM
;
189 case G_CONVERT_ERROR_PARTIAL_INPUT
:
190 /* Partial character sequence at end of input. */
191 g_string_append (buffer
, tmp_buff
);
193 if ((int) bytes_read
< left
)
195 left
= left
- bytes_read
;
196 tmp_buff
= g_strnfill (left
, '?');
197 g_string_append (buffer
, tmp_buff
);
202 case G_CONVERT_ERROR_BAD_URI
: /* Don't know how handle this error :( */
203 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH
: /* Don't know how handle this error :( */
204 case G_CONVERT_ERROR_FAILED
: /* Conversion failed for some reason. */
212 if (tmp_buff
!= NULL
)
216 g_string_append (buffer
, tmp_buff
);
218 string
+= bytes_read
;
224 g_string_append (buffer
, string
);
230 g_string_append (buffer
, string
);
239 str_convert (GIConv coder
, const char *string
, GString
* buffer
)
241 return _str_convert (coder
, string
, -1, buffer
);
245 str_nconvert (GIConv coder
, const char *string
, int size
, GString
* buffer
)
247 return _str_convert (coder
, string
, size
, buffer
);
251 str_conv_gerror_message (GError
* error
, const char *def_msg
)
253 return used_class
.conv_gerror_message (error
, def_msg
);
257 str_vfs_convert_from (GIConv coder
, const char *string
, GString
* buffer
)
261 if (coder
== str_cnv_not_convert
)
263 g_string_append (buffer
, string
!= NULL
? string
: "");
264 result
= ESTR_SUCCESS
;
267 result
= _str_convert (coder
, string
, -1, buffer
);
273 str_vfs_convert_to (GIConv coder
, const char *string
, int size
, GString
* buffer
)
275 return used_class
.vfs_convert_to (coder
, string
, size
, buffer
);
279 str_printf (GString
* buffer
, const char *format
, ...)
282 va_start (ap
, format
);
283 #if GLIB_CHECK_VERSION (2, 14, 0)
284 g_string_append_vprintf (buffer
, format
, ap
);
288 tmp
= g_strdup_vprintf (format
, ap
);
289 g_string_append (buffer
, tmp
);
297 str_insert_replace_char (GString
* buffer
)
299 used_class
.insert_replace_char (buffer
);
303 str_translate_char (GIConv conv
, const char *keys
, size_t ch_size
, char *output
, size_t out_size
)
308 g_iconv (conv
, NULL
, NULL
, NULL
, NULL
);
310 left
= (ch_size
== (size_t) (-1)) ? strlen (keys
) : ch_size
;
312 cnv
= g_iconv (conv
, (gchar
**) & keys
, &left
, &output
, &out_size
);
313 if (cnv
== (size_t) (-1))
315 return (errno
== EINVAL
) ? ESTR_PROBLEM
: ESTR_FAILURE
;
326 str_detect_termencoding (void)
328 if (term_encoding
== NULL
)
330 /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
332 On Mac OS X, it returns the same case as the LANG input.
333 So let tranform result of nl_langinfo (CODESET) to upper case unconditionally. */
334 term_encoding
= g_ascii_strup (nl_langinfo (CODESET
), -1);
337 return term_encoding
;
341 str_test_encoding_class (const char *encoding
, const char **table
)
345 if (encoding
== NULL
)
348 for (t
= 0; table
[t
] != NULL
; t
++)
350 result
+= (g_ascii_strncasecmp (encoding
, table
[t
], strlen (table
[t
])) == 0);
356 str_choose_str_functions (void)
358 if (str_test_encoding_class (codeset
, str_utf8_encodings
))
360 used_class
= str_utf8_init ();
362 else if (str_test_encoding_class (codeset
, str_8bit_encodings
))
364 used_class
= str_8bit_init ();
368 used_class
= str_ascii_init ();
373 str_isutf8 (const char *codeset_name
)
375 return (str_test_encoding_class (codeset_name
, str_utf8_encodings
) != 0);
379 str_init_strings (const char *termenc
)
381 codeset
= termenc
!= NULL
? g_ascii_strup (termenc
, -1) : g_strdup (str_detect_termencoding ());
383 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
384 if (str_cnv_not_convert
== INVALID_CONV
)
389 codeset
= g_strdup (str_detect_termencoding ());
390 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
393 if (str_cnv_not_convert
== INVALID_CONV
)
396 codeset
= g_strdup ("ASCII");
397 str_cnv_not_convert
= g_iconv_open (codeset
, codeset
);
401 str_cnv_to_term
= str_cnv_not_convert
;
402 str_cnv_from_term
= str_cnv_not_convert
;
404 str_choose_str_functions ();
408 str_uninit_strings (void)
410 if (str_cnv_not_convert
!= INVALID_CONV
)
411 g_iconv_close (str_cnv_not_convert
);
412 g_free (term_encoding
);
417 str_term_form (const char *text
)
419 return used_class
.term_form (text
);
423 str_fit_to_term (const char *text
, int width
, align_crt_t just_mode
)
425 return used_class
.fit_to_term (text
, width
, just_mode
);
429 str_term_trim (const char *text
, int width
)
431 return used_class
.term_trim (text
, width
);
435 str_term_substring (const char *text
, int start
, int width
)
437 return used_class
.term_substring (text
, start
, width
);
441 str_get_next_char (char *text
)
444 used_class
.cnext_char ((const char **) &text
);
449 str_cget_next_char (const char *text
)
451 used_class
.cnext_char (&text
);
456 str_next_char (char **text
)
458 used_class
.cnext_char ((const char **) text
);
462 str_cnext_char (const char **text
)
464 used_class
.cnext_char (text
);
468 str_get_prev_char (char *text
)
470 used_class
.cprev_char ((const char **) &text
);
475 str_cget_prev_char (const char *text
)
477 used_class
.cprev_char (&text
);
482 str_prev_char (char **text
)
484 used_class
.cprev_char ((const char **) text
);
488 str_cprev_char (const char **text
)
490 used_class
.cprev_char (text
);
494 str_get_next_char_safe (char *text
)
496 used_class
.cnext_char_safe ((const char **) &text
);
501 str_cget_next_char_safe (const char *text
)
503 used_class
.cnext_char_safe (&text
);
508 str_next_char_safe (char **text
)
510 used_class
.cnext_char_safe ((const char **) text
);
514 str_cnext_char_safe (const char **text
)
516 used_class
.cnext_char_safe (text
);
520 str_get_prev_char_safe (char *text
)
522 used_class
.cprev_char_safe ((const char **) &text
);
527 str_cget_prev_char_safe (const char *text
)
529 used_class
.cprev_char_safe (&text
);
534 str_prev_char_safe (char **text
)
536 used_class
.cprev_char_safe ((const char **) text
);
540 str_cprev_char_safe (const char **text
)
542 used_class
.cprev_char_safe (text
);
546 str_next_noncomb_char (char **text
)
548 return used_class
.cnext_noncomb_char ((const char **) text
);
552 str_cnext_noncomb_char (const char **text
)
554 return used_class
.cnext_noncomb_char (text
);
558 str_prev_noncomb_char (char **text
, const char *begin
)
560 return used_class
.cprev_noncomb_char ((const char **) text
, begin
);
564 str_cprev_noncomb_char (const char **text
, const char *begin
)
566 return used_class
.cprev_noncomb_char (text
, begin
);
570 str_is_valid_char (const char *ch
, size_t size
)
572 return used_class
.is_valid_char (ch
, size
);
576 str_term_width1 (const char *text
)
578 return used_class
.term_width1 (text
);
582 str_term_width2 (const char *text
, size_t length
)
584 return used_class
.term_width2 (text
, length
);
588 str_term_char_width (const char *text
)
590 return used_class
.term_char_width (text
);
594 str_offset_to_pos (const char *text
, size_t length
)
596 return used_class
.offset_to_pos (text
, length
);
600 str_length (const char *text
)
602 return used_class
.length (text
);
606 str_length_char (const char *text
)
608 return str_cget_next_char_safe (text
) - text
;
612 str_length2 (const char *text
, int size
)
614 return used_class
.length2 (text
, size
);
618 str_length_noncomb (const char *text
)
620 return used_class
.length_noncomb (text
);
624 str_column_to_pos (const char *text
, size_t pos
)
626 return used_class
.column_to_pos (text
, pos
);
630 str_isspace (const char *ch
)
632 return used_class
.isspace (ch
);
636 str_ispunct (const char *ch
)
638 return used_class
.ispunct (ch
);
642 str_isalnum (const char *ch
)
644 return used_class
.isalnum (ch
);
648 str_isdigit (const char *ch
)
650 return used_class
.isdigit (ch
);
654 str_toupper (const char *ch
, char **out
, size_t * remain
)
656 return used_class
.toupper (ch
, out
, remain
);
660 str_tolower (const char *ch
, char **out
, size_t * remain
)
662 return used_class
.tolower (ch
, out
, remain
);
666 str_isprint (const char *ch
)
668 return used_class
.isprint (ch
);
672 str_iscombiningmark (const char *ch
)
674 return used_class
.iscombiningmark (ch
);
678 str_trunc (const char *text
, int width
)
680 return used_class
.trunc (text
, width
);
684 str_create_search_needle (const char *needle
, int case_sen
)
686 return used_class
.create_search_needle (needle
, case_sen
);
691 str_release_search_needle (char *needle
, int case_sen
)
693 used_class
.release_search_needle (needle
, case_sen
);
697 str_search_first (const char *text
, const char *search
, int case_sen
)
699 return used_class
.search_first (text
, search
, case_sen
);
703 str_search_last (const char *text
, const char *search
, int case_sen
)
705 return used_class
.search_last (text
, search
, case_sen
);
709 str_is_valid_string (const char *text
)
711 return used_class
.is_valid_string (text
);
715 str_compare (const char *t1
, const char *t2
)
717 return used_class
.compare (t1
, t2
);
721 str_ncompare (const char *t1
, const char *t2
)
723 return used_class
.ncompare (t1
, t2
);
727 str_casecmp (const char *t1
, const char *t2
)
729 return used_class
.casecmp (t1
, t2
);
733 str_ncasecmp (const char *t1
, const char *t2
)
735 return used_class
.ncasecmp (t1
, t2
);
739 str_prefix (const char *text
, const char *prefix
)
741 return used_class
.prefix (text
, prefix
);
745 str_caseprefix (const char *text
, const char *prefix
)
747 return used_class
.caseprefix (text
, prefix
);
751 str_fix_string (char *text
)
753 used_class
.fix_string (text
);
757 str_create_key (const char *text
, int case_sen
)
759 return used_class
.create_key (text
, case_sen
);
763 str_create_key_for_filename (const char *text
, int case_sen
)
765 return used_class
.create_key_for_filename (text
, case_sen
);
769 str_key_collate (const char *t1
, const char *t2
, int case_sen
)
771 return used_class
.key_collate (t1
, t2
, case_sen
);
775 str_release_key (char *key
, int case_sen
)
777 used_class
.release_key (key
, case_sen
);
781 str_msg_term_size (const char *text
, int *lines
, int *columns
)
791 tmp
= g_strdup (text
);
796 q
= strchr (p
, '\n');
803 width
= str_term_width1 (p
);
804 if (width
> *columns
)
818 /* --------------------------------------------------------------------------------------------- */
821 strrstr_skip_count (const char *haystack
, const char *needle
, size_t skip_count
)
826 len
= strlen (haystack
);
830 semi
= g_strrstr_len (haystack
, len
, needle
);
833 len
= semi
- haystack
- 1;
835 while (skip_count
-- != 0);
839 /* --------------------------------------------------------------------------------------------- */