Updated doc/NEWS file
[midnight-commander.git] / lib / strutil / strutil.c
blob6ffef8c69eaf055e067605d10906b4a7e3c08fc8
1 /*
2 Common strings utilities
4 Copyright (C) 2007, 2011
5 The Free Software Foundation, Inc.
7 Written by:
8 Rostislav Benes, 2007
10 The file_date routine is mostly from GNU's fileutils package,
11 written by Richard Stallman and David MacKenzie.
13 This file is part of the Midnight Commander.
15 The Midnight Commander is free software: you can redistribute it
16 and/or modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation, either version 3 of the License,
18 or (at your option) any later version.
20 The Midnight Commander is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program. If not, see <http://www.gnu.org/licenses/>.
30 #include <config.h>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <langinfo.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <stdarg.h>
38 #include "lib/global.h"
39 #include "lib/strutil.h"
41 /*names, that are used for utf-8 */
42 static const char *str_utf8_encodings[] = {
43 "utf-8",
44 "utf8",
45 NULL
48 /* standard 8bit encodings, no wide or multibytes characters */
49 static const char *str_8bit_encodings[] = {
50 "cp-1251",
51 "cp1251",
52 "cp-1250",
53 "cp1250",
54 "cp-866",
55 "cp866",
56 "ibm-866",
57 "ibm866",
58 "cp-850",
59 "cp850",
60 "cp-852",
61 "cp852",
62 "iso-8859",
63 "iso8859",
64 "koi8",
65 NULL
68 /* terminal encoding */
69 static char *codeset = NULL;
70 static char *term_encoding = NULL;
71 /* function for encoding specific operations */
72 static struct str_class used_class;
74 GIConv str_cnv_to_term;
75 GIConv str_cnv_from_term;
76 GIConv str_cnv_not_convert = INVALID_CONV;
78 /* if enc is same encoding like on terminal */
79 static int
80 str_test_not_convert (const char *enc)
82 return g_ascii_strcasecmp (enc, codeset) == 0;
85 GIConv
86 str_crt_conv_to (const char *to_enc)
88 return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
91 GIConv
92 str_crt_conv_from (const char *from_enc)
94 return (!str_test_not_convert (from_enc))
95 ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
98 void
99 str_close_conv (GIConv conv)
101 if (conv != str_cnv_not_convert)
102 g_iconv_close (conv);
105 static estr_t
106 _str_convert (GIConv coder, const char *string, int size, GString * buffer)
108 estr_t state = ESTR_SUCCESS;
109 gchar *tmp_buff = NULL;
110 gssize left;
111 gsize bytes_read = 0;
112 gsize bytes_written = 0;
113 GError *error = NULL;
114 errno = 0;
116 if (coder == INVALID_CONV)
117 return ESTR_FAILURE;
119 if (string == NULL || buffer == NULL)
120 return ESTR_FAILURE;
123 if (! used_class.is_valid_string (string))
125 return ESTR_FAILURE;
128 if (size < 0)
130 size = strlen (string);
132 else
134 left = strlen (string);
135 if (left < size)
136 size = left;
139 left = size;
140 g_iconv (coder, NULL, NULL, NULL, NULL);
142 while (left)
144 tmp_buff = g_convert_with_iconv ((const gchar *) string,
145 left, coder, &bytes_read, &bytes_written, &error);
146 if (error)
148 int code = error->code;
150 g_error_free (error);
151 error = NULL;
153 switch (code)
155 case G_CONVERT_ERROR_NO_CONVERSION:
156 /* Conversion between the requested character sets is not supported. */
157 tmp_buff = g_strnfill (strlen (string), '?');
158 g_string_append (buffer, tmp_buff);
159 g_free (tmp_buff);
160 return ESTR_FAILURE;
162 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
163 /* Invalid byte sequence in conversion input. */
164 if ((tmp_buff == NULL) && (bytes_read != 0))
165 /* recode valid byte sequence */
166 tmp_buff = g_convert_with_iconv ((const gchar *) string,
167 bytes_read, coder, NULL, NULL, NULL);
169 if (tmp_buff != NULL)
171 g_string_append (buffer, tmp_buff);
172 g_free (tmp_buff);
175 if ((int) bytes_read < left)
177 string += bytes_read + 1;
178 size -= (bytes_read + 1);
179 left -= (bytes_read + 1);
180 g_string_append_c (buffer, *(string - 1));
182 else
184 return ESTR_PROBLEM;
186 state = ESTR_PROBLEM;
187 break;
189 case G_CONVERT_ERROR_PARTIAL_INPUT:
190 /* Partial character sequence at end of input. */
191 g_string_append (buffer, tmp_buff);
192 g_free (tmp_buff);
193 if ((int) bytes_read < left)
195 left = left - bytes_read;
196 tmp_buff = g_strnfill (left, '?');
197 g_string_append (buffer, tmp_buff);
198 g_free (tmp_buff);
200 return ESTR_PROBLEM;
202 case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
203 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
204 case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
205 default:
206 g_free (tmp_buff);
207 return ESTR_FAILURE;
210 else
212 if (tmp_buff != NULL)
214 if (*tmp_buff)
216 g_string_append (buffer, tmp_buff);
217 g_free (tmp_buff);
218 string += bytes_read;
219 left -= bytes_read;
221 else
223 g_free (tmp_buff);
224 g_string_append (buffer, string);
225 return state;
228 else
230 g_string_append (buffer, string);
231 return ESTR_PROBLEM;
235 return state;
238 estr_t
239 str_convert (GIConv coder, const char *string, GString * buffer)
241 return _str_convert (coder, string, -1, buffer);
244 estr_t
245 str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
247 return _str_convert (coder, string, size, buffer);
250 gchar *
251 str_conv_gerror_message (GError * error, const char *def_msg)
253 return used_class.conv_gerror_message (error, def_msg);
256 estr_t
257 str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
259 estr_t result;
261 if (coder == str_cnv_not_convert)
263 g_string_append (buffer, string != NULL ? string : "");
264 result = ESTR_SUCCESS;
266 else
267 result = _str_convert (coder, string, -1, buffer);
269 return result;
272 estr_t
273 str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
275 return used_class.vfs_convert_to (coder, string, size, buffer);
278 void
279 str_printf (GString * buffer, const char *format, ...)
281 va_list ap;
282 va_start (ap, format);
283 #if GLIB_CHECK_VERSION (2, 14, 0)
284 g_string_append_vprintf (buffer, format, ap);
285 #else
287 gchar *tmp;
288 tmp = g_strdup_vprintf (format, ap);
289 g_string_append (buffer, tmp);
290 g_free (tmp);
292 #endif
293 va_end (ap);
296 void
297 str_insert_replace_char (GString * buffer)
299 used_class.insert_replace_char (buffer);
302 estr_t
303 str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
305 size_t left;
306 size_t cnv;
308 g_iconv (conv, NULL, NULL, NULL, NULL);
310 left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
312 cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
313 if (cnv == (size_t) (-1))
315 return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
317 else
319 output[0] = '\0';
320 return ESTR_SUCCESS;
325 const char *
326 str_detect_termencoding (void)
328 if (term_encoding == NULL)
330 /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
331 to utf-8 or UTF-8.
332 On Mac OS X, it returns the same case as the LANG input.
333 So let tranform result of nl_langinfo (CODESET) to upper case unconditionally. */
334 term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
337 return term_encoding;
340 static int
341 str_test_encoding_class (const char *encoding, const char **table)
343 int t;
344 int result = 0;
345 if (encoding == NULL)
346 return result;
348 for (t = 0; table[t] != NULL; t++)
350 result += (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0);
352 return result;
355 static void
356 str_choose_str_functions (void)
358 if (str_test_encoding_class (codeset, str_utf8_encodings))
360 used_class = str_utf8_init ();
362 else if (str_test_encoding_class (codeset, str_8bit_encodings))
364 used_class = str_8bit_init ();
366 else
368 used_class = str_ascii_init ();
372 gboolean
373 str_isutf8 (const char *codeset_name)
375 return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
378 void
379 str_init_strings (const char *termenc)
381 codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
383 str_cnv_not_convert = g_iconv_open (codeset, codeset);
384 if (str_cnv_not_convert == INVALID_CONV)
386 if (termenc != NULL)
388 g_free (codeset);
389 codeset = g_strdup (str_detect_termencoding ());
390 str_cnv_not_convert = g_iconv_open (codeset, codeset);
393 if (str_cnv_not_convert == INVALID_CONV)
395 g_free (codeset);
396 codeset = g_strdup ("ASCII");
397 str_cnv_not_convert = g_iconv_open (codeset, codeset);
401 str_cnv_to_term = str_cnv_not_convert;
402 str_cnv_from_term = str_cnv_not_convert;
404 str_choose_str_functions ();
407 void
408 str_uninit_strings (void)
410 if (str_cnv_not_convert != INVALID_CONV)
411 g_iconv_close (str_cnv_not_convert);
412 g_free (term_encoding);
413 g_free (codeset);
416 const char *
417 str_term_form (const char *text)
419 return used_class.term_form (text);
422 const char *
423 str_fit_to_term (const char *text, int width, align_crt_t just_mode)
425 return used_class.fit_to_term (text, width, just_mode);
428 const char *
429 str_term_trim (const char *text, int width)
431 return used_class.term_trim (text, width);
434 const char *
435 str_term_substring (const char *text, int start, int width)
437 return used_class.term_substring (text, start, width);
440 char *
441 str_get_next_char (char *text)
444 used_class.cnext_char ((const char **) &text);
445 return text;
448 const char *
449 str_cget_next_char (const char *text)
451 used_class.cnext_char (&text);
452 return text;
455 void
456 str_next_char (char **text)
458 used_class.cnext_char ((const char **) text);
461 void
462 str_cnext_char (const char **text)
464 used_class.cnext_char (text);
467 char *
468 str_get_prev_char (char *text)
470 used_class.cprev_char ((const char **) &text);
471 return text;
474 const char *
475 str_cget_prev_char (const char *text)
477 used_class.cprev_char (&text);
478 return text;
481 void
482 str_prev_char (char **text)
484 used_class.cprev_char ((const char **) text);
487 void
488 str_cprev_char (const char **text)
490 used_class.cprev_char (text);
493 char *
494 str_get_next_char_safe (char *text)
496 used_class.cnext_char_safe ((const char **) &text);
497 return text;
500 const char *
501 str_cget_next_char_safe (const char *text)
503 used_class.cnext_char_safe (&text);
504 return text;
507 void
508 str_next_char_safe (char **text)
510 used_class.cnext_char_safe ((const char **) text);
513 void
514 str_cnext_char_safe (const char **text)
516 used_class.cnext_char_safe (text);
519 char *
520 str_get_prev_char_safe (char *text)
522 used_class.cprev_char_safe ((const char **) &text);
523 return text;
526 const char *
527 str_cget_prev_char_safe (const char *text)
529 used_class.cprev_char_safe (&text);
530 return text;
533 void
534 str_prev_char_safe (char **text)
536 used_class.cprev_char_safe ((const char **) text);
539 void
540 str_cprev_char_safe (const char **text)
542 used_class.cprev_char_safe (text);
546 str_next_noncomb_char (char **text)
548 return used_class.cnext_noncomb_char ((const char **) text);
552 str_cnext_noncomb_char (const char **text)
554 return used_class.cnext_noncomb_char (text);
558 str_prev_noncomb_char (char **text, const char *begin)
560 return used_class.cprev_noncomb_char ((const char **) text, begin);
564 str_cprev_noncomb_char (const char **text, const char *begin)
566 return used_class.cprev_noncomb_char (text, begin);
570 str_is_valid_char (const char *ch, size_t size)
572 return used_class.is_valid_char (ch, size);
576 str_term_width1 (const char *text)
578 return used_class.term_width1 (text);
582 str_term_width2 (const char *text, size_t length)
584 return used_class.term_width2 (text, length);
588 str_term_char_width (const char *text)
590 return used_class.term_char_width (text);
594 str_offset_to_pos (const char *text, size_t length)
596 return used_class.offset_to_pos (text, length);
600 str_length (const char *text)
602 return used_class.length (text);
606 str_length_char (const char *text)
608 return str_cget_next_char_safe (text) - text;
612 str_length2 (const char *text, int size)
614 return used_class.length2 (text, size);
618 str_length_noncomb (const char *text)
620 return used_class.length_noncomb (text);
624 str_column_to_pos (const char *text, size_t pos)
626 return used_class.column_to_pos (text, pos);
630 str_isspace (const char *ch)
632 return used_class.char_isspace (ch);
636 str_ispunct (const char *ch)
638 return used_class.char_ispunct (ch);
642 str_isalnum (const char *ch)
644 return used_class.char_isalnum (ch);
648 str_isdigit (const char *ch)
650 return used_class.char_isdigit (ch);
654 str_toupper (const char *ch, char **out, size_t * remain)
656 return used_class.char_toupper (ch, out, remain);
660 str_tolower (const char *ch, char **out, size_t * remain)
662 return used_class.char_tolower (ch, out, remain);
666 str_isprint (const char *ch)
668 return used_class.char_isprint (ch);
671 gboolean
672 str_iscombiningmark (const char *ch)
674 return used_class.char_iscombiningmark (ch);
677 const char *
678 str_trunc (const char *text, int width)
680 return used_class.trunc (text, width);
683 char *
684 str_create_search_needle (const char *needle, int case_sen)
686 return used_class.create_search_needle (needle, case_sen);
690 void
691 str_release_search_needle (char *needle, int case_sen)
693 used_class.release_search_needle (needle, case_sen);
696 const char *
697 str_search_first (const char *text, const char *search, int case_sen)
699 return used_class.search_first (text, search, case_sen);
702 const char *
703 str_search_last (const char *text, const char *search, int case_sen)
705 return used_class.search_last (text, search, case_sen);
709 str_is_valid_string (const char *text)
711 return used_class.is_valid_string (text);
715 str_compare (const char *t1, const char *t2)
717 return used_class.compare (t1, t2);
721 str_ncompare (const char *t1, const char *t2)
723 return used_class.ncompare (t1, t2);
727 str_casecmp (const char *t1, const char *t2)
729 return used_class.casecmp (t1, t2);
733 str_ncasecmp (const char *t1, const char *t2)
735 return used_class.ncasecmp (t1, t2);
739 str_prefix (const char *text, const char *prefix)
741 return used_class.prefix (text, prefix);
745 str_caseprefix (const char *text, const char *prefix)
747 return used_class.caseprefix (text, prefix);
750 void
751 str_fix_string (char *text)
753 used_class.fix_string (text);
756 char *
757 str_create_key (const char *text, int case_sen)
759 return used_class.create_key (text, case_sen);
762 char *
763 str_create_key_for_filename (const char *text, int case_sen)
765 return used_class.create_key_for_filename (text, case_sen);
769 str_key_collate (const char *t1, const char *t2, int case_sen)
771 return used_class.key_collate (t1, t2, case_sen);
774 void
775 str_release_key (char *key, int case_sen)
777 used_class.release_key (key, case_sen);
780 void
781 str_msg_term_size (const char *text, int *lines, int *columns)
783 char *p, *tmp;
784 char *q;
785 char c = '\0';
786 int width;
788 *lines = 1;
789 *columns = 0;
791 tmp = g_strdup (text);
792 p = tmp;
794 while (TRUE)
796 q = strchr (p, '\n');
797 if (q != NULL)
799 c = q[0];
800 q[0] = '\0';
803 width = str_term_width1 (p);
804 if (width > *columns)
805 *columns = width;
807 if (q == NULL)
808 break;
810 q[0] = c;
811 p = q + 1;
812 (*lines)++;
815 g_free (tmp);
818 /* --------------------------------------------------------------------------------------------- */
820 char *
821 strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
823 char *semi;
824 ssize_t len;
826 len = strlen (haystack);
830 semi = g_strrstr_len (haystack, len, needle);
831 if (semi == NULL)
832 return NULL;
833 len = semi - haystack - 1;
835 while (skip_count-- != 0);
836 return semi;
839 /* --------------------------------------------------------------------------------------------- */