Remove irrelevant comments about file_date() function.
[midnight-commander.git] / lib / strutil / strutil.c
blob0f9a5042d6a8c4ea96f2b7f72e15f6be1b7609ba
1 /*
2 Common strings utilities
4 Copyright (C) 2007, 2011
5 The Free Software Foundation, Inc.
7 Written by:
8 Rostislav Benes, 2007
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <config.h>
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <langinfo.h>
31 #include <string.h>
32 #include <errno.h>
33 #include <stdarg.h>
35 #include "lib/global.h"
36 #include "lib/strutil.h"
38 /*names, that are used for utf-8 */
39 static const char *str_utf8_encodings[] = {
40 "utf-8",
41 "utf8",
42 NULL
45 /* standard 8bit encodings, no wide or multibytes characters */
46 static const char *str_8bit_encodings[] = {
47 "cp-1251",
48 "cp1251",
49 "cp-1250",
50 "cp1250",
51 "cp-866",
52 "cp866",
53 "ibm-866",
54 "ibm866",
55 "cp-850",
56 "cp850",
57 "cp-852",
58 "cp852",
59 "iso-8859",
60 "iso8859",
61 "koi8",
62 NULL
65 /* terminal encoding */
66 static char *codeset = NULL;
67 static char *term_encoding = NULL;
68 /* function for encoding specific operations */
69 static struct str_class used_class;
71 GIConv str_cnv_to_term;
72 GIConv str_cnv_from_term;
73 GIConv str_cnv_not_convert = INVALID_CONV;
75 /* if enc is same encoding like on terminal */
76 static int
77 str_test_not_convert (const char *enc)
79 return g_ascii_strcasecmp (enc, codeset) == 0;
82 GIConv
83 str_crt_conv_to (const char *to_enc)
85 return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
88 GIConv
89 str_crt_conv_from (const char *from_enc)
91 return (!str_test_not_convert (from_enc))
92 ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
95 void
96 str_close_conv (GIConv conv)
98 if (conv != str_cnv_not_convert)
99 g_iconv_close (conv);
102 static estr_t
103 _str_convert (GIConv coder, const char *string, int size, GString * buffer)
105 estr_t state = ESTR_SUCCESS;
106 gchar *tmp_buff = NULL;
107 gssize left;
108 gsize bytes_read = 0;
109 gsize bytes_written = 0;
110 GError *error = NULL;
111 errno = 0;
113 if (coder == INVALID_CONV)
114 return ESTR_FAILURE;
116 if (string == NULL || buffer == NULL)
117 return ESTR_FAILURE;
120 if (! used_class.is_valid_string (string))
122 return ESTR_FAILURE;
125 if (size < 0)
127 size = strlen (string);
129 else
131 left = strlen (string);
132 if (left < size)
133 size = left;
136 left = size;
137 g_iconv (coder, NULL, NULL, NULL, NULL);
139 while (left)
141 tmp_buff = g_convert_with_iconv ((const gchar *) string,
142 left, coder, &bytes_read, &bytes_written, &error);
143 if (error)
145 int code = error->code;
147 g_error_free (error);
148 error = NULL;
150 switch (code)
152 case G_CONVERT_ERROR_NO_CONVERSION:
153 /* Conversion between the requested character sets is not supported. */
154 tmp_buff = g_strnfill (strlen (string), '?');
155 g_string_append (buffer, tmp_buff);
156 g_free (tmp_buff);
157 return ESTR_FAILURE;
159 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
160 /* Invalid byte sequence in conversion input. */
161 if ((tmp_buff == NULL) && (bytes_read != 0))
162 /* recode valid byte sequence */
163 tmp_buff = g_convert_with_iconv ((const gchar *) string,
164 bytes_read, coder, NULL, NULL, NULL);
166 if (tmp_buff != NULL)
168 g_string_append (buffer, tmp_buff);
169 g_free (tmp_buff);
172 if ((int) bytes_read < left)
174 string += bytes_read + 1;
175 size -= (bytes_read + 1);
176 left -= (bytes_read + 1);
177 g_string_append_c (buffer, *(string - 1));
179 else
181 return ESTR_PROBLEM;
183 state = ESTR_PROBLEM;
184 break;
186 case G_CONVERT_ERROR_PARTIAL_INPUT:
187 /* Partial character sequence at end of input. */
188 g_string_append (buffer, tmp_buff);
189 g_free (tmp_buff);
190 if ((int) bytes_read < left)
192 left = left - bytes_read;
193 tmp_buff = g_strnfill (left, '?');
194 g_string_append (buffer, tmp_buff);
195 g_free (tmp_buff);
197 return ESTR_PROBLEM;
199 case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
200 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
201 case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
202 default:
203 g_free (tmp_buff);
204 return ESTR_FAILURE;
207 else
209 if (tmp_buff != NULL)
211 if (*tmp_buff)
213 g_string_append (buffer, tmp_buff);
214 g_free (tmp_buff);
215 string += bytes_read;
216 left -= bytes_read;
218 else
220 g_free (tmp_buff);
221 g_string_append (buffer, string);
222 return state;
225 else
227 g_string_append (buffer, string);
228 return ESTR_PROBLEM;
232 return state;
235 estr_t
236 str_convert (GIConv coder, const char *string, GString * buffer)
238 return _str_convert (coder, string, -1, buffer);
241 estr_t
242 str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
244 return _str_convert (coder, string, size, buffer);
247 gchar *
248 str_conv_gerror_message (GError * error, const char *def_msg)
250 return used_class.conv_gerror_message (error, def_msg);
253 estr_t
254 str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
256 estr_t result;
258 if (coder == str_cnv_not_convert)
260 g_string_append (buffer, string != NULL ? string : "");
261 result = ESTR_SUCCESS;
263 else
264 result = _str_convert (coder, string, -1, buffer);
266 return result;
269 estr_t
270 str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
272 return used_class.vfs_convert_to (coder, string, size, buffer);
275 void
276 str_printf (GString * buffer, const char *format, ...)
278 va_list ap;
279 va_start (ap, format);
280 #if GLIB_CHECK_VERSION (2, 14, 0)
281 g_string_append_vprintf (buffer, format, ap);
282 #else
284 gchar *tmp;
285 tmp = g_strdup_vprintf (format, ap);
286 g_string_append (buffer, tmp);
287 g_free (tmp);
289 #endif
290 va_end (ap);
293 void
294 str_insert_replace_char (GString * buffer)
296 used_class.insert_replace_char (buffer);
299 estr_t
300 str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
302 size_t left;
303 size_t cnv;
305 g_iconv (conv, NULL, NULL, NULL, NULL);
307 left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
309 cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
310 if (cnv == (size_t) (-1))
312 return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
314 else
316 output[0] = '\0';
317 return ESTR_SUCCESS;
322 const char *
323 str_detect_termencoding (void)
325 if (term_encoding == NULL)
327 /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
328 to utf-8 or UTF-8.
329 On Mac OS X, it returns the same case as the LANG input.
330 So let tranform result of nl_langinfo (CODESET) to upper case unconditionally. */
331 term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
334 return term_encoding;
337 static int
338 str_test_encoding_class (const char *encoding, const char **table)
340 int t;
341 int result = 0;
342 if (encoding == NULL)
343 return result;
345 for (t = 0; table[t] != NULL; t++)
347 result += (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0);
349 return result;
352 static void
353 str_choose_str_functions (void)
355 if (str_test_encoding_class (codeset, str_utf8_encodings))
357 used_class = str_utf8_init ();
359 else if (str_test_encoding_class (codeset, str_8bit_encodings))
361 used_class = str_8bit_init ();
363 else
365 used_class = str_ascii_init ();
369 gboolean
370 str_isutf8 (const char *codeset_name)
372 return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
375 void
376 str_init_strings (const char *termenc)
378 codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
380 str_cnv_not_convert = g_iconv_open (codeset, codeset);
381 if (str_cnv_not_convert == INVALID_CONV)
383 if (termenc != NULL)
385 g_free (codeset);
386 codeset = g_strdup (str_detect_termencoding ());
387 str_cnv_not_convert = g_iconv_open (codeset, codeset);
390 if (str_cnv_not_convert == INVALID_CONV)
392 g_free (codeset);
393 codeset = g_strdup ("ASCII");
394 str_cnv_not_convert = g_iconv_open (codeset, codeset);
398 str_cnv_to_term = str_cnv_not_convert;
399 str_cnv_from_term = str_cnv_not_convert;
401 str_choose_str_functions ();
404 void
405 str_uninit_strings (void)
407 if (str_cnv_not_convert != INVALID_CONV)
408 g_iconv_close (str_cnv_not_convert);
409 g_free (term_encoding);
410 g_free (codeset);
413 const char *
414 str_term_form (const char *text)
416 return used_class.term_form (text);
419 const char *
420 str_fit_to_term (const char *text, int width, align_crt_t just_mode)
422 return used_class.fit_to_term (text, width, just_mode);
425 const char *
426 str_term_trim (const char *text, int width)
428 return used_class.term_trim (text, width);
431 const char *
432 str_term_substring (const char *text, int start, int width)
434 return used_class.term_substring (text, start, width);
437 char *
438 str_get_next_char (char *text)
441 used_class.cnext_char ((const char **) &text);
442 return text;
445 const char *
446 str_cget_next_char (const char *text)
448 used_class.cnext_char (&text);
449 return text;
452 void
453 str_next_char (char **text)
455 used_class.cnext_char ((const char **) text);
458 void
459 str_cnext_char (const char **text)
461 used_class.cnext_char (text);
464 char *
465 str_get_prev_char (char *text)
467 used_class.cprev_char ((const char **) &text);
468 return text;
471 const char *
472 str_cget_prev_char (const char *text)
474 used_class.cprev_char (&text);
475 return text;
478 void
479 str_prev_char (char **text)
481 used_class.cprev_char ((const char **) text);
484 void
485 str_cprev_char (const char **text)
487 used_class.cprev_char (text);
490 char *
491 str_get_next_char_safe (char *text)
493 used_class.cnext_char_safe ((const char **) &text);
494 return text;
497 const char *
498 str_cget_next_char_safe (const char *text)
500 used_class.cnext_char_safe (&text);
501 return text;
504 void
505 str_next_char_safe (char **text)
507 used_class.cnext_char_safe ((const char **) text);
510 void
511 str_cnext_char_safe (const char **text)
513 used_class.cnext_char_safe (text);
516 char *
517 str_get_prev_char_safe (char *text)
519 used_class.cprev_char_safe ((const char **) &text);
520 return text;
523 const char *
524 str_cget_prev_char_safe (const char *text)
526 used_class.cprev_char_safe (&text);
527 return text;
530 void
531 str_prev_char_safe (char **text)
533 used_class.cprev_char_safe ((const char **) text);
536 void
537 str_cprev_char_safe (const char **text)
539 used_class.cprev_char_safe (text);
543 str_next_noncomb_char (char **text)
545 return used_class.cnext_noncomb_char ((const char **) text);
549 str_cnext_noncomb_char (const char **text)
551 return used_class.cnext_noncomb_char (text);
555 str_prev_noncomb_char (char **text, const char *begin)
557 return used_class.cprev_noncomb_char ((const char **) text, begin);
561 str_cprev_noncomb_char (const char **text, const char *begin)
563 return used_class.cprev_noncomb_char (text, begin);
567 str_is_valid_char (const char *ch, size_t size)
569 return used_class.is_valid_char (ch, size);
573 str_term_width1 (const char *text)
575 return used_class.term_width1 (text);
579 str_term_width2 (const char *text, size_t length)
581 return used_class.term_width2 (text, length);
585 str_term_char_width (const char *text)
587 return used_class.term_char_width (text);
591 str_offset_to_pos (const char *text, size_t length)
593 return used_class.offset_to_pos (text, length);
597 str_length (const char *text)
599 return used_class.length (text);
603 str_length_char (const char *text)
605 return str_cget_next_char_safe (text) - text;
609 str_length2 (const char *text, int size)
611 return used_class.length2 (text, size);
615 str_length_noncomb (const char *text)
617 return used_class.length_noncomb (text);
621 str_column_to_pos (const char *text, size_t pos)
623 return used_class.column_to_pos (text, pos);
627 str_isspace (const char *ch)
629 return used_class.char_isspace (ch);
633 str_ispunct (const char *ch)
635 return used_class.char_ispunct (ch);
639 str_isalnum (const char *ch)
641 return used_class.char_isalnum (ch);
645 str_isdigit (const char *ch)
647 return used_class.char_isdigit (ch);
651 str_toupper (const char *ch, char **out, size_t * remain)
653 return used_class.char_toupper (ch, out, remain);
657 str_tolower (const char *ch, char **out, size_t * remain)
659 return used_class.char_tolower (ch, out, remain);
663 str_isprint (const char *ch)
665 return used_class.char_isprint (ch);
668 gboolean
669 str_iscombiningmark (const char *ch)
671 return used_class.char_iscombiningmark (ch);
674 const char *
675 str_trunc (const char *text, int width)
677 return used_class.trunc (text, width);
680 char *
681 str_create_search_needle (const char *needle, int case_sen)
683 return used_class.create_search_needle (needle, case_sen);
687 void
688 str_release_search_needle (char *needle, int case_sen)
690 used_class.release_search_needle (needle, case_sen);
693 const char *
694 str_search_first (const char *text, const char *search, int case_sen)
696 return used_class.search_first (text, search, case_sen);
699 const char *
700 str_search_last (const char *text, const char *search, int case_sen)
702 return used_class.search_last (text, search, case_sen);
706 str_is_valid_string (const char *text)
708 return used_class.is_valid_string (text);
712 str_compare (const char *t1, const char *t2)
714 return used_class.compare (t1, t2);
718 str_ncompare (const char *t1, const char *t2)
720 return used_class.ncompare (t1, t2);
724 str_casecmp (const char *t1, const char *t2)
726 return used_class.casecmp (t1, t2);
730 str_ncasecmp (const char *t1, const char *t2)
732 return used_class.ncasecmp (t1, t2);
736 str_prefix (const char *text, const char *prefix)
738 return used_class.prefix (text, prefix);
742 str_caseprefix (const char *text, const char *prefix)
744 return used_class.caseprefix (text, prefix);
747 void
748 str_fix_string (char *text)
750 used_class.fix_string (text);
753 char *
754 str_create_key (const char *text, int case_sen)
756 return used_class.create_key (text, case_sen);
759 char *
760 str_create_key_for_filename (const char *text, int case_sen)
762 return used_class.create_key_for_filename (text, case_sen);
766 str_key_collate (const char *t1, const char *t2, int case_sen)
768 return used_class.key_collate (t1, t2, case_sen);
771 void
772 str_release_key (char *key, int case_sen)
774 used_class.release_key (key, case_sen);
777 void
778 str_msg_term_size (const char *text, int *lines, int *columns)
780 char *p, *tmp;
781 char *q;
782 char c = '\0';
783 int width;
785 *lines = 1;
786 *columns = 0;
788 tmp = g_strdup (text);
789 p = tmp;
791 while (TRUE)
793 q = strchr (p, '\n');
794 if (q != NULL)
796 c = q[0];
797 q[0] = '\0';
800 width = str_term_width1 (p);
801 if (width > *columns)
802 *columns = width;
804 if (q == NULL)
805 break;
807 q[0] = c;
808 p = q + 1;
809 (*lines)++;
812 g_free (tmp);
815 /* --------------------------------------------------------------------------------------------- */
817 char *
818 strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
820 char *semi;
821 ssize_t len;
823 len = strlen (haystack);
827 semi = g_strrstr_len (haystack, len, needle);
828 if (semi == NULL)
829 return NULL;
830 len = semi - haystack - 1;
832 while (skip_count-- != 0);
833 return semi;
836 /* --------------------------------------------------------------------------------------------- */