Refactoring: use define DEFAULT_CHARSET for "ASCII" charset.
[midnight-commander.git] / lib / strutil / strutil.c
blob184fa86f7cf002469c7beab3154c27fc7958326a
1 /*
2 Common strings utilities
4 Copyright (C) 2007, 2011, 2013
5 The Free Software Foundation, Inc.
7 Written by:
8 Rostislav Benes, 2007
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 #include <config.h>
28 #include <stdlib.h>
29 #include <langinfo.h>
30 #include <string.h>
31 #include <errno.h>
33 #include "lib/global.h"
34 #include "lib/strutil.h"
36 /*names, that are used for utf-8 */
37 static const char *str_utf8_encodings[] = {
38 "utf-8",
39 "utf8",
40 NULL
43 /* standard 8bit encodings, no wide or multibytes characters */
44 static const char *str_8bit_encodings[] = {
45 "cp-1251",
46 "cp1251",
47 "cp-1250",
48 "cp1250",
49 "cp-866",
50 "cp866",
51 "ibm-866",
52 "ibm866",
53 "cp-850",
54 "cp850",
55 "cp-852",
56 "cp852",
57 "iso-8859",
58 "iso8859",
59 "koi8",
60 NULL
63 /* terminal encoding */
64 static char *codeset = NULL;
65 static char *term_encoding = NULL;
66 /* function for encoding specific operations */
67 static struct str_class used_class;
69 GIConv str_cnv_to_term;
70 GIConv str_cnv_from_term;
71 GIConv str_cnv_not_convert = INVALID_CONV;
73 /* if enc is same encoding like on terminal */
74 static int
75 str_test_not_convert (const char *enc)
77 return g_ascii_strcasecmp (enc, codeset) == 0;
80 GIConv
81 str_crt_conv_to (const char *to_enc)
83 return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
86 GIConv
87 str_crt_conv_from (const char *from_enc)
89 return (!str_test_not_convert (from_enc))
90 ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
93 void
94 str_close_conv (GIConv conv)
96 if (conv != str_cnv_not_convert)
97 g_iconv_close (conv);
100 static estr_t
101 _str_convert (GIConv coder, const char *string, int size, GString * buffer)
103 estr_t state = ESTR_SUCCESS;
104 gssize left;
105 gsize bytes_read = 0;
106 gsize bytes_written = 0;
108 errno = 0; /* FIXME: is it really needed? */
110 if (coder == INVALID_CONV)
111 return ESTR_FAILURE;
113 if (string == NULL || buffer == NULL)
114 return ESTR_FAILURE;
117 if (! used_class.is_valid_string (string))
119 return ESTR_FAILURE;
122 if (size < 0)
123 size = strlen (string);
124 else
126 left = strlen (string);
127 if (left < size)
128 size = left;
131 left = size;
132 g_iconv (coder, NULL, NULL, NULL, NULL);
134 while (left != 0)
136 gchar *tmp_buff;
137 GError *error = NULL;
139 tmp_buff = g_convert_with_iconv ((const gchar *) string,
140 left, coder, &bytes_read, &bytes_written, &error);
141 if (error != NULL)
143 int code = error->code;
145 g_error_free (error);
146 error = NULL;
148 switch (code)
150 case G_CONVERT_ERROR_NO_CONVERSION:
151 /* Conversion between the requested character sets is not supported. */
152 tmp_buff = g_strnfill (strlen (string), '?');
153 g_string_append (buffer, tmp_buff);
154 g_free (tmp_buff);
155 return ESTR_FAILURE;
157 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
158 /* Invalid byte sequence in conversion input. */
159 if ((tmp_buff == NULL) && (bytes_read != 0))
160 /* recode valid byte sequence */
161 tmp_buff = g_convert_with_iconv ((const gchar *) string,
162 bytes_read, coder, NULL, NULL, NULL);
164 if (tmp_buff != NULL)
166 g_string_append (buffer, tmp_buff);
167 g_free (tmp_buff);
170 if ((int) bytes_read >= left)
171 return ESTR_PROBLEM;
173 string += bytes_read + 1;
174 size -= (bytes_read + 1);
175 left -= (bytes_read + 1);
176 g_string_append_c (buffer, *(string - 1));
177 state = ESTR_PROBLEM;
178 break;
180 case G_CONVERT_ERROR_PARTIAL_INPUT:
181 /* Partial character sequence at end of input. */
182 g_string_append (buffer, tmp_buff);
183 g_free (tmp_buff);
184 if ((int) bytes_read < left)
186 left = left - bytes_read;
187 tmp_buff = g_strnfill (left, '?');
188 g_string_append (buffer, tmp_buff);
189 g_free (tmp_buff);
191 return ESTR_PROBLEM;
193 case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
194 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
195 case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
196 default:
197 g_free (tmp_buff);
198 return ESTR_FAILURE;
201 else if (tmp_buff == NULL)
203 g_string_append (buffer, string);
204 return ESTR_PROBLEM;
206 else if (*tmp_buff == '\0')
208 g_free (tmp_buff);
209 g_string_append (buffer, string);
210 return state;
212 else
214 g_string_append (buffer, tmp_buff);
215 g_free (tmp_buff);
216 string += bytes_read;
217 left -= bytes_read;
221 return state;
224 estr_t
225 str_convert (GIConv coder, const char *string, GString * buffer)
227 return _str_convert (coder, string, -1, buffer);
230 estr_t
231 str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
233 return _str_convert (coder, string, size, buffer);
236 gchar *
237 str_conv_gerror_message (GError * error, const char *def_msg)
239 return used_class.conv_gerror_message (error, def_msg);
242 estr_t
243 str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
245 estr_t result = ESTR_SUCCESS;
247 if (coder == str_cnv_not_convert)
248 g_string_append (buffer, string != NULL ? string : "");
249 else
250 result = _str_convert (coder, string, -1, buffer);
252 return result;
255 estr_t
256 str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
258 return used_class.vfs_convert_to (coder, string, size, buffer);
261 void
262 str_printf (GString * buffer, const char *format, ...)
264 va_list ap;
265 va_start (ap, format);
267 #if GLIB_CHECK_VERSION (2, 14, 0)
268 g_string_append_vprintf (buffer, format, ap);
269 #else
271 gchar *tmp;
273 tmp = g_strdup_vprintf (format, ap);
274 g_string_append (buffer, tmp);
275 g_free (tmp);
277 #endif
278 va_end (ap);
281 void
282 str_insert_replace_char (GString * buffer)
284 used_class.insert_replace_char (buffer);
287 estr_t
288 str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
290 size_t left;
291 size_t cnv;
293 g_iconv (conv, NULL, NULL, NULL, NULL);
295 left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
297 cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
298 if (cnv == (size_t) (-1))
299 return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
301 output[0] = '\0';
302 return ESTR_SUCCESS;
306 const char *
307 str_detect_termencoding (void)
309 if (term_encoding == NULL)
311 /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
312 to utf-8 or UTF-8.
313 On Mac OS X, it returns the same case as the LANG input.
314 So let tranform result of nl_langinfo (CODESET) to upper case unconditionally. */
315 term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
318 return term_encoding;
321 static int
322 str_test_encoding_class (const char *encoding, const char **table)
324 int result = 0;
326 if (encoding != NULL)
328 int t;
330 for (t = 0; table[t] != NULL; t++)
331 if (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0)
332 result++;
335 return result;
338 static void
339 str_choose_str_functions (void)
341 if (str_test_encoding_class (codeset, str_utf8_encodings))
342 used_class = str_utf8_init ();
343 else if (str_test_encoding_class (codeset, str_8bit_encodings))
344 used_class = str_8bit_init ();
345 else
346 used_class = str_ascii_init ();
349 gboolean
350 str_isutf8 (const char *codeset_name)
352 return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
355 void
356 str_init_strings (const char *termenc)
358 codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
360 str_cnv_not_convert = g_iconv_open (codeset, codeset);
361 if (str_cnv_not_convert == INVALID_CONV)
363 if (termenc != NULL)
365 g_free (codeset);
366 codeset = g_strdup (str_detect_termencoding ());
367 str_cnv_not_convert = g_iconv_open (codeset, codeset);
370 if (str_cnv_not_convert == INVALID_CONV)
372 g_free (codeset);
373 codeset = g_strdup (DEFAULT_CHARSET);
374 str_cnv_not_convert = g_iconv_open (codeset, codeset);
378 str_cnv_to_term = str_cnv_not_convert;
379 str_cnv_from_term = str_cnv_not_convert;
381 str_choose_str_functions ();
384 void
385 str_uninit_strings (void)
387 if (str_cnv_not_convert != INVALID_CONV)
388 g_iconv_close (str_cnv_not_convert);
389 g_free (term_encoding);
390 g_free (codeset);
393 const char *
394 str_term_form (const char *text)
396 return used_class.term_form (text);
399 const char *
400 str_fit_to_term (const char *text, int width, align_crt_t just_mode)
402 return used_class.fit_to_term (text, width, just_mode);
405 const char *
406 str_term_trim (const char *text, int width)
408 return used_class.term_trim (text, width);
411 const char *
412 str_term_substring (const char *text, int start, int width)
414 return used_class.term_substring (text, start, width);
417 char *
418 str_get_next_char (char *text)
421 used_class.cnext_char ((const char **) &text);
422 return text;
425 const char *
426 str_cget_next_char (const char *text)
428 used_class.cnext_char (&text);
429 return text;
432 void
433 str_next_char (char **text)
435 used_class.cnext_char ((const char **) text);
438 void
439 str_cnext_char (const char **text)
441 used_class.cnext_char (text);
444 char *
445 str_get_prev_char (char *text)
447 used_class.cprev_char ((const char **) &text);
448 return text;
451 const char *
452 str_cget_prev_char (const char *text)
454 used_class.cprev_char (&text);
455 return text;
458 void
459 str_prev_char (char **text)
461 used_class.cprev_char ((const char **) text);
464 void
465 str_cprev_char (const char **text)
467 used_class.cprev_char (text);
470 char *
471 str_get_next_char_safe (char *text)
473 used_class.cnext_char_safe ((const char **) &text);
474 return text;
477 const char *
478 str_cget_next_char_safe (const char *text)
480 used_class.cnext_char_safe (&text);
481 return text;
484 void
485 str_next_char_safe (char **text)
487 used_class.cnext_char_safe ((const char **) text);
490 void
491 str_cnext_char_safe (const char **text)
493 used_class.cnext_char_safe (text);
496 char *
497 str_get_prev_char_safe (char *text)
499 used_class.cprev_char_safe ((const char **) &text);
500 return text;
503 const char *
504 str_cget_prev_char_safe (const char *text)
506 used_class.cprev_char_safe (&text);
507 return text;
510 void
511 str_prev_char_safe (char **text)
513 used_class.cprev_char_safe ((const char **) text);
516 void
517 str_cprev_char_safe (const char **text)
519 used_class.cprev_char_safe (text);
523 str_next_noncomb_char (char **text)
525 return used_class.cnext_noncomb_char ((const char **) text);
529 str_cnext_noncomb_char (const char **text)
531 return used_class.cnext_noncomb_char (text);
535 str_prev_noncomb_char (char **text, const char *begin)
537 return used_class.cprev_noncomb_char ((const char **) text, begin);
541 str_cprev_noncomb_char (const char **text, const char *begin)
543 return used_class.cprev_noncomb_char (text, begin);
547 str_is_valid_char (const char *ch, size_t size)
549 return used_class.is_valid_char (ch, size);
553 str_term_width1 (const char *text)
555 return used_class.term_width1 (text);
559 str_term_width2 (const char *text, size_t length)
561 return used_class.term_width2 (text, length);
565 str_term_char_width (const char *text)
567 return used_class.term_char_width (text);
571 str_offset_to_pos (const char *text, size_t length)
573 return used_class.offset_to_pos (text, length);
577 str_length (const char *text)
579 return used_class.length (text);
583 str_length_char (const char *text)
585 return str_cget_next_char_safe (text) - text;
589 str_length2 (const char *text, int size)
591 return used_class.length2 (text, size);
595 str_length_noncomb (const char *text)
597 return used_class.length_noncomb (text);
601 str_column_to_pos (const char *text, size_t pos)
603 return used_class.column_to_pos (text, pos);
607 str_isspace (const char *ch)
609 return used_class.char_isspace (ch);
613 str_ispunct (const char *ch)
615 return used_class.char_ispunct (ch);
619 str_isalnum (const char *ch)
621 return used_class.char_isalnum (ch);
625 str_isdigit (const char *ch)
627 return used_class.char_isdigit (ch);
631 str_toupper (const char *ch, char **out, size_t * remain)
633 return used_class.char_toupper (ch, out, remain);
637 str_tolower (const char *ch, char **out, size_t * remain)
639 return used_class.char_tolower (ch, out, remain);
643 str_isprint (const char *ch)
645 return used_class.char_isprint (ch);
648 gboolean
649 str_iscombiningmark (const char *ch)
651 return used_class.char_iscombiningmark (ch);
654 const char *
655 str_trunc (const char *text, int width)
657 return used_class.trunc (text, width);
660 char *
661 str_create_search_needle (const char *needle, int case_sen)
663 return used_class.create_search_needle (needle, case_sen);
667 void
668 str_release_search_needle (char *needle, int case_sen)
670 used_class.release_search_needle (needle, case_sen);
673 const char *
674 str_search_first (const char *text, const char *search, int case_sen)
676 return used_class.search_first (text, search, case_sen);
679 const char *
680 str_search_last (const char *text, const char *search, int case_sen)
682 return used_class.search_last (text, search, case_sen);
686 str_is_valid_string (const char *text)
688 return used_class.is_valid_string (text);
692 str_compare (const char *t1, const char *t2)
694 return used_class.compare (t1, t2);
698 str_ncompare (const char *t1, const char *t2)
700 return used_class.ncompare (t1, t2);
704 str_casecmp (const char *t1, const char *t2)
706 return used_class.casecmp (t1, t2);
710 str_ncasecmp (const char *t1, const char *t2)
712 return used_class.ncasecmp (t1, t2);
716 str_prefix (const char *text, const char *prefix)
718 return used_class.prefix (text, prefix);
722 str_caseprefix (const char *text, const char *prefix)
724 return used_class.caseprefix (text, prefix);
727 void
728 str_fix_string (char *text)
730 used_class.fix_string (text);
733 char *
734 str_create_key (const char *text, int case_sen)
736 return used_class.create_key (text, case_sen);
739 char *
740 str_create_key_for_filename (const char *text, int case_sen)
742 return used_class.create_key_for_filename (text, case_sen);
746 str_key_collate (const char *t1, const char *t2, int case_sen)
748 return used_class.key_collate (t1, t2, case_sen);
751 void
752 str_release_key (char *key, int case_sen)
754 used_class.release_key (key, case_sen);
757 void
758 str_msg_term_size (const char *text, int *lines, int *columns)
760 char *p, *tmp;
761 char *q;
762 char c = '\0';
763 int width;
765 *lines = 1;
766 *columns = 0;
768 tmp = g_strdup (text);
769 p = tmp;
771 while (TRUE)
773 q = strchr (p, '\n');
774 if (q != NULL)
776 c = q[0];
777 q[0] = '\0';
780 width = str_term_width1 (p);
781 if (width > *columns)
782 *columns = width;
784 if (q == NULL)
785 break;
787 q[0] = c;
788 p = q + 1;
789 (*lines)++;
792 g_free (tmp);
795 /* --------------------------------------------------------------------------------------------- */
797 char *
798 strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
800 char *semi;
801 ssize_t len;
803 len = strlen (haystack);
807 semi = g_strrstr_len (haystack, len, needle);
808 if (semi == NULL)
809 return NULL;
810 len = semi - haystack - 1;
812 while (skip_count-- != 0);
814 return semi;
817 /* --------------------------------------------------------------------------------------------- */
818 /* Interprete string as a non-negative decimal integer, optionally multiplied by various values.
820 * @param str input value
821 * @param invalid set to TRUE if "str" does not represent a number in this format
823 * @return non-integer representation of "str", 0 in case of error.
826 uintmax_t
827 parse_integer (const char *str, gboolean * invalid)
829 uintmax_t n;
830 char *suffix;
831 strtol_error_t e;
833 e = xstrtoumax (str, &suffix, 10, &n, "bcEGkKMPTwYZ0");
834 if (e == LONGINT_INVALID_SUFFIX_CHAR && *suffix == 'x')
836 uintmax_t multiplier;
838 multiplier = parse_integer (suffix + 1, invalid);
839 if (multiplier != 0 && n * multiplier / multiplier != n)
841 *invalid = TRUE;
842 return 0;
845 n *= multiplier;
847 else if (e != LONGINT_OK)
849 *invalid = TRUE;
850 n = 0;
853 return n;
856 /* --------------------------------------------------------------------------------------------- */