Moved dir $(srcdir)/syntax into $(srcdir)/misc/syntax
[midnight-commander.git] / src / strutil.c
blob5755548e51a7fcff6dc1852edacd29cb5ee3f4e8
1 /* common strings utilities
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 Written 2007 by:
5 Rostislav Benes
7 The file_date routine is mostly from GNU's fileutils package,
8 written by Richard Stallman and David MacKenzie.
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 #include <config.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <langinfo.h>
29 #include <string.h>
30 #include <errno.h>
31 #include <stdarg.h>
33 #include "global.h"
34 #include "strutil.h"
36 /*names, that are used for utf-8 */
37 static const char *str_utf8_encodings[] = {
38 "utf-8",
39 "utf8",
40 NULL
43 /* standard 8bit encodings, no wide or multibytes characters*/
44 static const char *str_8bit_encodings[] = {
45 "cp-1251",
46 "cp1251",
47 "cp-1250",
48 "cp1250",
49 "cp-866",
50 "cp866",
51 "ibm-866",
52 "ibm866",
53 "cp-850",
54 "cp850",
55 "cp-852",
56 "cp852",
57 "iso-8859",
58 "iso8859",
59 "koi8",
60 NULL
63 /* terminal encoding*/
64 static char *codeset = NULL;
65 /* function for encoding specific operations*/
66 static struct str_class used_class;
68 GIConv str_cnv_to_term;
69 GIConv str_cnv_from_term;
70 GIConv str_cnv_not_convert;
72 /* if enc is same encoding like on terminal*/
73 static int
74 str_test_not_convert (const char *enc)
76 return g_ascii_strcasecmp (enc, codeset) == 0;
79 GIConv
80 str_crt_conv_to (const char *to_enc)
82 return (!str_test_not_convert (to_enc))
83 ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
86 GIConv
87 str_crt_conv_from (const char *from_enc)
89 return (!str_test_not_convert (from_enc))
90 ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
93 void
94 str_close_conv (GIConv conv)
96 if (conv != str_cnv_not_convert)
97 g_iconv_close (conv);
100 static estr_t
101 _str_convert (GIConv coder, const char *string, int size, GString * buffer)
103 estr_t state = ESTR_SUCCESS;
104 gchar *tmp_buff = NULL;
105 gssize left;
106 gsize bytes_read = 0;
107 gsize bytes_written = 0;
108 GError *error = NULL;
109 errno = 0;
111 if (coder == INVALID_CONV)
112 return ESTR_FAILURE;
114 if (string == NULL || buffer == NULL)
115 return ESTR_FAILURE;
118 if (! used_class.is_valid_string (string))
120 return ESTR_FAILURE;
123 if (size < 0)
125 size = strlen (string);
127 else
129 left = strlen (string);
130 if (left < size)
131 size = left;
134 left = size;
135 g_iconv (coder, NULL, NULL, NULL, NULL);
137 while (left)
139 tmp_buff = g_convert_with_iconv ((const gchar *) string,
140 left,
141 coder,
142 &bytes_read,
143 &bytes_written, &error);
144 if (error)
146 int code = error->code;
148 g_error_free (error);
149 error = NULL;
151 switch (code)
153 case G_CONVERT_ERROR_NO_CONVERSION:
154 /* Conversion between the requested character sets is not supported. */
155 tmp_buff = g_strnfill (strlen (string), '?');
156 g_string_append (buffer, tmp_buff);
157 g_free (tmp_buff);
158 return ESTR_FAILURE;
160 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
161 /* Invalid byte sequence in conversion input. */
162 if ((tmp_buff == NULL) && (bytes_read != 0))
163 /* recode valid byte sequence */
164 tmp_buff = g_convert_with_iconv ((const gchar *) string,
165 bytes_read,
166 coder, NULL, NULL, NULL);
168 if (tmp_buff != NULL)
170 g_string_append (buffer, tmp_buff);
171 g_free (tmp_buff);
174 if ((int)bytes_read < left)
176 string += bytes_read + 1;
177 size -= (bytes_read + 1);
178 left -= (bytes_read + 1);
179 g_string_append_c (buffer, *(string-1));
181 else
183 return ESTR_PROBLEM;
185 state = ESTR_PROBLEM;
186 break;
188 case G_CONVERT_ERROR_PARTIAL_INPUT:
189 /* Partial character sequence at end of input. */
190 g_string_append (buffer, tmp_buff);
191 g_free (tmp_buff);
192 if ((int)bytes_read < left)
194 left = left - bytes_read;
195 tmp_buff = g_strnfill (left, '?');
196 g_string_append (buffer, tmp_buff);
197 g_free (tmp_buff);
199 return ESTR_PROBLEM;
201 case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
202 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
203 case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
204 default:
205 g_free (tmp_buff);
206 return ESTR_FAILURE;
209 else
211 if (tmp_buff != NULL)
213 if (*tmp_buff)
215 g_string_append (buffer, tmp_buff);
216 g_free (tmp_buff);
217 string += bytes_read;
218 left -= bytes_read;
220 else
222 g_free (tmp_buff);
223 g_string_append (buffer, string);
224 return state;
227 else
229 g_string_append (buffer, string);
230 return ESTR_PROBLEM;
234 return state;
237 estr_t
238 str_convert (GIConv coder, const char *string, GString * buffer)
240 return _str_convert (coder, string, -1, buffer);
243 estr_t
244 str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
246 return _str_convert (coder, string, size, buffer);
249 gchar *
250 str_conv_gerror_message (GError *error, const char *def_msg)
252 return used_class.conv_gerror_message (error, def_msg);
255 estr_t
256 str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
258 estr_t result;
260 if (coder == str_cnv_not_convert)
262 g_string_append (buffer, string != NULL ? string : "");
263 result = ESTR_SUCCESS;
265 else
266 result = _str_convert (coder, string, -1, buffer);
268 return result;
271 estr_t
272 str_vfs_convert_to (GIConv coder, const char *string, int size,
273 GString * buffer)
275 return used_class.vfs_convert_to (coder, string, size, buffer);
278 void
279 str_printf (GString * buffer, const char *format, ...)
281 va_list ap;
282 va_start (ap, format);
283 #if GLIB_CHECK_VERSION (2, 14, 0)
284 g_string_append_vprintf (buffer, format, ap);
285 #else
287 gchar *tmp;
288 tmp = g_strdup_vprintf (format, ap);
289 g_string_append (buffer, tmp);
290 g_free(tmp);
292 #endif
293 va_end (ap);
296 void
297 str_insert_replace_char (GString * buffer)
299 used_class.insert_replace_char (buffer);
302 estr_t
303 str_translate_char (GIConv conv, const char *keys, size_t ch_size,
304 char *output, size_t out_size)
306 size_t left;
307 size_t cnv;
309 g_iconv (conv, NULL, NULL, NULL, NULL);
311 left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
313 cnv = g_iconv (conv, (gchar **) &keys, &left, &output, &out_size);
314 if (cnv == (size_t)(-1)) {
315 return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
316 } else {
317 output[0] = '\0';
318 return ESTR_SUCCESS;
323 const char *
324 str_detect_termencoding (void)
326 return (nl_langinfo (CODESET));
329 static int
330 str_test_encoding_class (const char *encoding, const char **table)
332 int t;
333 int result = 0;
334 if ( encoding == NULL )
335 return result;
337 for (t = 0; table[t] != NULL; t++)
339 result += (g_ascii_strncasecmp (encoding, table[t],
340 strlen (table[t])) == 0);
342 return result;
345 static void
346 str_choose_str_functions ()
348 if (str_test_encoding_class (codeset, str_utf8_encodings))
350 used_class = str_utf8_init ();
352 else if (str_test_encoding_class (codeset, str_8bit_encodings))
354 used_class = str_8bit_init ();
356 else
358 used_class = str_ascii_init ();
363 str_isutf8 (const char *codeset_name)
365 int result = 0;
366 if (str_test_encoding_class (codeset_name, str_utf8_encodings))
368 result = 1;
370 return result;
373 void
374 str_init_strings (const char *termenc)
376 codeset = g_strdup ((termenc != NULL)
377 ? termenc
378 : str_detect_termencoding ());
380 str_cnv_not_convert = g_iconv_open (codeset, codeset);
381 if (str_cnv_not_convert == INVALID_CONV)
383 if (termenc != NULL)
385 g_free (codeset);
386 codeset = g_strdup (str_detect_termencoding ());
387 str_cnv_not_convert = g_iconv_open (codeset, codeset);
390 if (str_cnv_not_convert == INVALID_CONV)
392 g_free (codeset);
393 codeset = g_strdup ("ascii");
394 str_cnv_not_convert = g_iconv_open (codeset, codeset);
398 str_cnv_to_term = str_cnv_not_convert;
399 str_cnv_from_term = str_cnv_not_convert;
401 str_choose_str_functions ();
404 void
405 str_uninit_strings (void)
407 if (str_cnv_not_convert != INVALID_CONV)
408 g_iconv_close (str_cnv_not_convert);
409 g_free (codeset);
412 const char *
413 str_term_form (const char *text)
415 return used_class.term_form (text);
418 const char *
419 str_fit_to_term (const char *text, int width, align_crt_t just_mode)
421 return used_class.fit_to_term (text, width, just_mode);
424 const char *
425 str_term_trim (const char *text, int width)
427 return used_class.term_trim (text, width);
430 void
431 str_msg_term_size (const char *text, int *lines, int *columns)
433 used_class.msg_term_size (text, lines, columns);
436 const char *
437 str_term_substring (const char *text, int start, int width)
439 return used_class.term_substring (text, start, width);
442 char *
443 str_get_next_char (char *text)
446 used_class.cnext_char ((const char **) &text);
447 return text;
450 const char *
451 str_cget_next_char (const char *text)
453 used_class.cnext_char(&text);
454 return text;
457 void
458 str_next_char (char **text)
460 used_class.cnext_char ((const char **) text);
463 void
464 str_cnext_char (const char **text)
466 used_class.cnext_char (text);
469 char *
470 str_get_prev_char (char *text)
472 used_class.cprev_char ((const char **) &text);
473 return text;
476 const char *
477 str_cget_prev_char (const char *text)
479 used_class.cprev_char (&text);
480 return text;
483 void
484 str_prev_char (char **text)
486 used_class.cprev_char ((const char **) text);
489 void
490 str_cprev_char (const char **text)
492 used_class.cprev_char (text);
495 char *
496 str_get_next_char_safe (char *text)
498 used_class.cnext_char_safe ((const char **) &text);
499 return text;
502 const char *
503 str_cget_next_char_safe (const char *text)
505 used_class.cnext_char_safe (&text);
506 return text;
509 void
510 str_next_char_safe (char **text)
512 used_class.cnext_char_safe ((const char **) text);
515 void
516 str_cnext_char_safe (const char **text)
518 used_class.cnext_char_safe (text);
521 char *
522 str_get_prev_char_safe (char *text)
524 used_class.cprev_char_safe ((const char **) &text);
525 return text;
528 const char *
529 str_cget_prev_char_safe (const char *text)
531 used_class.cprev_char_safe (&text);
532 return text;
535 void
536 str_prev_char_safe (char **text)
538 used_class.cprev_char_safe ((const char **) text);
541 void
542 str_cprev_char_safe (const char **text)
544 used_class.cprev_char_safe (text);
548 str_next_noncomb_char (char **text)
550 return used_class.cnext_noncomb_char ((const char **) text);
554 str_cnext_noncomb_char (const char **text)
556 return used_class.cnext_noncomb_char (text);
560 str_prev_noncomb_char (char **text, const char *begin)
562 return used_class.cprev_noncomb_char ((const char **) text, begin);
566 str_cprev_noncomb_char (const char **text, const char *begin)
568 return used_class.cprev_noncomb_char (text, begin);
572 str_is_valid_char (const char *ch, size_t size)
574 return used_class.is_valid_char (ch, size);
578 str_term_width1 (const char *text)
580 return used_class.term_width1 (text);
584 str_term_width2 (const char *text, size_t length)
586 return used_class.term_width2 (text, length);
590 str_term_char_width (const char *text)
592 return used_class.term_char_width (text);
596 str_offset_to_pos (const char *text, size_t length)
598 return used_class.offset_to_pos (text, length);
602 str_length (const char *text)
604 return used_class.length (text);
608 str_length_char (const char *text)
610 return str_cget_next_char_safe (text)-text;
614 str_length2 (const char *text, int size)
616 return used_class.length2 (text, size);
620 str_length_noncomb (const char *text)
622 return used_class.length_noncomb (text);
626 str_column_to_pos (const char *text, size_t pos)
628 return used_class.column_to_pos (text, pos);
632 str_isspace (const char *ch)
634 return used_class.isspace (ch);
638 str_ispunct (const char *ch)
640 return used_class.ispunct (ch);
644 str_isalnum (const char *ch)
646 return used_class.isalnum (ch);
650 str_isdigit (const char *ch)
652 return used_class.isdigit (ch);
656 str_toupper (const char *ch, char **out, size_t * remain)
658 return used_class.toupper (ch, out, remain);
662 str_tolower (const char *ch, char **out, size_t * remain)
664 return used_class.tolower (ch, out, remain);
668 str_isprint (const char *ch)
670 return used_class.isprint (ch);
674 str_iscombiningmark (const char *ch)
676 return used_class.iscombiningmark (ch);
679 const char *
680 str_trunc (const char *text, int width)
682 return used_class.trunc (text, width);
685 char *
686 str_create_search_needle (const char *needle, int case_sen)
688 return used_class.create_search_needle (needle, case_sen);
692 void
693 str_release_search_needle (char *needle, int case_sen)
695 used_class.release_search_needle (needle, case_sen);
698 const char *
699 str_search_first (const char *text, const char *search, int case_sen)
701 return used_class.search_first (text, search, case_sen);
704 const char *
705 str_search_last (const char *text, const char *search, int case_sen)
707 return used_class.search_last (text, search, case_sen);
711 str_is_valid_string (const char *text)
713 return used_class.is_valid_string (text);
717 str_compare (const char *t1, const char *t2)
719 return used_class.compare (t1, t2);
723 str_ncompare (const char *t1, const char *t2)
725 return used_class.ncompare (t1, t2);
729 str_casecmp (const char *t1, const char *t2)
731 return used_class.casecmp (t1, t2);
735 str_ncasecmp (const char *t1, const char *t2)
737 return used_class.ncasecmp (t1, t2);
741 str_prefix (const char *text, const char *prefix)
743 return used_class.prefix (text, prefix);
747 str_caseprefix (const char *text, const char *prefix)
749 return used_class.caseprefix (text, prefix);
752 void
753 str_fix_string (char *text)
755 used_class.fix_string (text);
758 char *
759 str_create_key (const char *text, int case_sen)
761 return used_class.create_key (text, case_sen);
764 char *
765 str_create_key_for_filename (const char *text, int case_sen)
767 return used_class.create_key_for_filename (text, case_sen);
771 str_key_collate (const char *t1, const char *t2, int case_sen)
773 return used_class.key_collate (t1, t2, case_sen);
776 void
777 str_release_key (char *key, int case_sen)
779 used_class.release_key (key, case_sen);