Temporary commit. Half-fix searching.
[midnight-commander.git] / src / strutil.c
blob308709b31515a5cae2bb091b2542832dd56bc8df
1 /* common strings utilities
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 Written 2007 by:
5 Rostislav Benes
7 The file_date routine is mostly from GNU's fileutils package,
8 written by Richard Stallman and David MacKenzie.
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 #include <config.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <iconv.h>
29 #include <glib.h>
30 #include <langinfo.h>
31 #include <string.h>
32 #include <errno.h>
33 #include <stdarg.h>
35 #include "global.h"
36 #include "strutil.h"
38 //names, that are used for utf-8
39 static const char *str_utf8_encodings[] = {
40 "utf-8",
41 "utf8",
42 NULL
45 // standard 8bit encodings, no wide or multibytes characters
46 static const char *str_8bit_encodings[] = {
47 "cp-1251",
48 "cp1251",
49 "cp-1250",
50 "cp1250",
51 "cp-866",
52 "cp866",
53 "cp-850",
54 "cp850",
55 "cp-852",
56 "cp852",
57 "iso-8859",
58 "iso8859",
59 "koi8",
60 NULL
63 // terminal encoding
64 static char *codeset;
65 // function for encoding specific operations
66 static struct str_class used_class;
68 GIConv str_cnv_to_term;
69 GIConv str_cnv_from_term;
70 GIConv str_cnv_not_convert;
72 // if enc is same encoding like on terminal
73 static int
74 str_test_not_convert (const char *enc)
76 return g_ascii_strcasecmp (enc, codeset) == 0;
79 GIConv
80 str_crt_conv_to (const char *to_enc)
82 return (!str_test_not_convert (to_enc))
83 ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
86 GIConv
87 str_crt_conv_from (const char *from_enc)
89 return (!str_test_not_convert (from_enc))
90 ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
93 void
94 str_close_conv (GIConv conv)
96 if (conv != str_cnv_not_convert)
97 g_iconv_close (conv);
100 static int
101 _str_convert (GIConv coder, char *string, int size, GString * buffer)
103 int state;
104 gchar *tmp_buff;
105 gssize left;
106 gsize bytes_read, bytes_written;
107 GError *error = NULL;
109 errno = 0;
111 if (used_class.is_valid_string (string))
113 state = 0;
114 if (size < 0)
116 size = strlen (string);
118 else
120 left = strlen (string);
121 if (left < size)
122 size = left;
124 left = size;
126 if (coder == (GIConv) (-1))
127 return ESTR_FAILURE;
129 g_iconv (coder, NULL, NULL, NULL, NULL);
131 while (left)
133 tmp_buff = g_convert_with_iconv ((const gchar *) string,
134 left,
135 coder,
136 &bytes_read,
137 &bytes_written, &error);
139 if (error)
141 switch (error->code)
143 case G_CONVERT_ERROR_NO_CONVERSION:
144 /* Conversion between the requested character sets is not supported. */
145 tmp_buff = g_strnfill (strlen (string), '?');
146 g_string_append (buffer, tmp_buff);
147 g_free (tmp_buff);
148 g_error_free (error);
149 return ESTR_PROBLEM;
150 break;
151 case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
152 /* Invalid byte sequence in conversion input. */
153 g_string_append (buffer, tmp_buff);
154 g_string_append (buffer, "?");
155 g_free (tmp_buff);
156 if (bytes_read < left)
158 string += bytes_read + 1;
159 size -= (bytes_read + 1);
160 left -= (bytes_read + 1);
162 else
164 g_error_free (error);
165 return ESTR_PROBLEM;
167 state = ESTR_PROBLEM;
168 break;
169 case G_CONVERT_ERROR_PARTIAL_INPUT:
170 /* Partial character sequence at end of input. */
171 g_error_free (error);
172 g_string_append (buffer, tmp_buff);
173 g_free (tmp_buff);
174 if (bytes_read < left)
176 left = left - bytes_read;
177 tmp_buff = g_strnfill (left, '?');
178 g_string_append (buffer, tmp_buff);
179 g_free (tmp_buff);
181 return ESTR_PROBLEM;
182 break;
183 case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
184 case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
185 case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
186 default:
187 g_error_free (error);
188 if (tmp_buff)
189 g_free (tmp_buff);
191 return ESTR_FAILURE;
193 g_error_free (error);
195 else
197 g_string_append (buffer, tmp_buff);
198 g_free (tmp_buff);
199 string += bytes_read;
200 left -= bytes_read;
203 return state;
205 else
206 return ESTR_FAILURE;
210 str_convert (GIConv coder, char *string, GString * buffer)
212 int result;
214 result = _str_convert (coder, string, -1, buffer);
216 return result;
220 str_nconvert (GIConv coder, char *string, int size, GString * buffer)
222 int result;
224 result = _str_convert (coder, string, size, buffer);
226 return result;
230 str_vfs_convert_from (GIConv coder, char *string, GString * buffer)
232 int result;
234 if (coder == str_cnv_not_convert)
236 g_string_append (buffer, string);
237 result = 0;
239 else
240 result = _str_convert (coder, string, -1, buffer);
242 return result;
246 str_vfs_convert_to (GIConv coder, const char *string, int size,
247 GString * buffer)
249 return used_class.vfs_convert_to (coder, string, size, buffer);
252 void
253 str_printf (GString * buffer, const char *format, ...)
255 gchar *tmp;
256 va_list ap;
257 va_start (ap, format);
259 * more simple call:
260 g_string_append_vprintf (buffer, format, ap);
261 * but not all versions of glib2 have this function :(
263 tmp = g_strdup_vprintf ( format, ap);
264 g_string_append (buffer, tmp);
265 g_free(tmp);
266 va_end (ap);
269 void
270 str_insert_replace_char (GString * buffer)
272 used_class.insert_replace_char (buffer);
276 str_translate_char (GIConv conv, char *keys, size_t ch_size,
277 char *output, size_t out_size)
279 size_t left;
280 size_t cnv;
282 g_iconv (conv, NULL, NULL, NULL, NULL);
284 left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
286 cnv = g_iconv (conv, &keys, &left, &output, &out_size);
287 if (cnv == (size_t)(-1)) {
288 if (errno == EINVAL) return ESTR_PROBLEM; else return ESTR_FAILURE;
289 } else {
290 output[0] = '\0';
291 return 0;
296 static const char *
297 str_detect_termencoding ()
299 return (nl_langinfo (CODESET));
302 static int
303 str_test_encoding_class (const char *encoding, const char **table)
305 int t;
306 int result = 0;
308 for (t = 0; table[t] != NULL; t++)
310 result += (g_ascii_strncasecmp (encoding, table[t],
311 strlen (table[t])) == 0);
313 return result;
316 static void
317 str_choose_str_functions ()
319 if (str_test_encoding_class (codeset, str_utf8_encodings))
321 used_class = str_utf8_init ();
323 else if (str_test_encoding_class (codeset, str_8bit_encodings))
325 used_class = str_8bit_init ();
327 else
329 used_class = str_ascii_init ();
334 str_isutf8 (char *codeset_name)
336 int result = 0;
337 if (str_test_encoding_class (codeset_name, str_utf8_encodings))
339 result = 1;
341 return result;
344 void
345 str_init_strings (const char *termenc)
347 codeset = g_strdup ((termenc != NULL)
348 ? termenc
349 : str_detect_termencoding ());
351 str_cnv_not_convert = g_iconv_open (codeset, codeset);
352 if (str_cnv_not_convert == INVALID_CONV)
354 if (termenc != NULL)
356 g_free (codeset);
357 codeset = g_strdup (str_detect_termencoding ());
358 str_cnv_not_convert = g_iconv_open (codeset, codeset);
361 if (str_cnv_not_convert == INVALID_CONV)
363 g_free (codeset);
364 codeset = g_strdup ("ascii");
365 str_cnv_not_convert = g_iconv_open (codeset, codeset);
369 str_cnv_to_term = str_cnv_not_convert;
370 str_cnv_from_term = str_cnv_not_convert;
372 str_choose_str_functions ();
375 void
376 str_uninit_strings ()
378 g_iconv_close (str_cnv_not_convert);
381 const char *
382 str_term_form (const char *text)
384 return used_class.term_form (text);
387 const char *
388 str_fit_to_term (const char *text, int width, int just_mode)
390 return used_class.fit_to_term (text, width, just_mode);
393 const char *
394 str_term_trim (const char *text, int width)
396 return used_class.term_trim (text, width);
399 void
400 str_msg_term_size (const char *text, int *lines, int *columns)
402 return used_class.msg_term_size (text, lines, columns);
405 const char *
406 str_term_substring (const char *text, int start, int width)
408 return used_class.term_substring (text, start, width);
411 char *
412 str_get_next_char (char *text)
415 used_class.cnext_char ((const char **) &text);
416 return text;
419 const char *
420 str_cget_next_char (const char *text)
422 used_class.cnext_char(&text);
423 return text;
426 void
427 str_next_char (char **text)
429 used_class.cnext_char ((const char **) text);
432 void
433 str_cnext_char (const char **text)
435 used_class.cnext_char (text);
438 char *
439 str_get_prev_char (char *text)
441 used_class.cprev_char ((const char **) &text);
442 return text;
445 const char *
446 str_cget_prev_char (const char *text)
448 used_class.cprev_char (&text);
449 return text;
452 void
453 str_prev_char (char **text)
455 used_class.cprev_char ((const char **) text);
458 void
459 str_cprev_char (const char **text)
461 used_class.cprev_char (text);
464 char *
465 str_get_next_char_safe (char *text)
467 used_class.cnext_char_safe ((const char **) &text);
468 return text;
471 const char *
472 str_cget_next_char_safe (const char *text)
474 used_class.cnext_char_safe (&text);
475 return text;
478 void
479 str_next_char_safe (char **text)
481 used_class.cnext_char_safe ((const char **) text);
484 void
485 str_cnext_char_safe (const char **text)
487 used_class.cnext_char_safe (text);
490 char *
491 str_get_prev_char_safe (char *text)
493 used_class.cprev_char_safe ((const char **) &text);
494 return text;
497 const char *
498 str_cget_prev_char_safe (const char *text)
500 used_class.cprev_char_safe (&text);
501 return text;
504 void
505 str_prev_char_safe (char **text)
507 used_class.cprev_char_safe ((const char **) text);
510 void
511 str_cprev_char_safe (const char **text)
513 used_class.cprev_char_safe (text);
517 str_next_noncomb_char (char **text)
519 return used_class.cnext_noncomb_char ((const char **) text);
523 str_cnext_noncomb_char (const char **text)
525 return used_class.cnext_noncomb_char (text);
529 str_prev_noncomb_char (char **text, const char *begin)
531 return used_class.cprev_noncomb_char ((const char **) text, begin);
535 str_cprev_noncomb_char (const char **text, const char *begin)
537 return used_class.cprev_noncomb_char (text, begin);
541 str_is_valid_char (const char *ch, size_t size)
543 return used_class.is_valid_char (ch, size);
547 str_term_width1 (const char *text)
549 return used_class.term_width1 (text);
553 str_term_width2 (const char *text, size_t length)
555 return used_class.term_width2 (text, length);
559 str_term_char_width (const char *text)
561 return used_class.term_char_width (text);
565 str_offset_to_pos (const char *text, size_t length)
567 return used_class.offset_to_pos (text, length);
571 str_length (const char *text)
573 return used_class.length (text);
577 str_length_char (const char *text)
579 return str_cget_next_char_safe (text)-text;
583 str_length2 (const char *text, int size)
585 return used_class.length2 (text, size);
589 str_length_noncomb (const char *text)
591 return used_class.length_noncomb (text);
595 str_column_to_pos (const char *text, size_t pos)
597 return used_class.column_to_pos (text, pos);
601 str_isspace (const char *ch)
603 return used_class.isspace (ch);
607 str_ispunct (const char *ch)
609 return used_class.ispunct (ch);
613 str_isalnum (const char *ch)
615 return used_class.isalnum (ch);
619 str_isdigit (const char *ch)
621 return used_class.isdigit (ch);
625 str_toupper (const char *ch, char **out, size_t * remain)
627 return used_class.toupper (ch, out, remain);
631 str_tolower (const char *ch, char **out, size_t * remain)
633 return used_class.tolower (ch, out, remain);
637 str_isprint (const char *ch)
639 return used_class.isprint (ch);
643 str_iscombiningmark (const char *ch)
645 return used_class.iscombiningmark (ch);
648 const char *
649 str_trunc (const char *text, int width)
651 return used_class.trunc (text, width);
654 char *
655 str_create_search_needle (const char *needle, int case_sen)
657 return used_class.create_search_needle (needle, case_sen);
661 void
662 str_release_search_needle (char *needle, int case_sen)
664 used_class.release_search_needle (needle, case_sen);
667 const char *
668 str_search_first (const char *text, const char *search, int case_sen)
670 return used_class.search_first (text, search, case_sen);
673 const char *
674 str_search_last (const char *text, const char *search, int case_sen)
676 return used_class.search_last (text, search, case_sen);
680 str_is_valid_string (const char *text)
682 return used_class.is_valid_string (text);
686 str_compare (const char *t1, const char *t2)
688 return used_class.compare (t1, t2);
692 str_ncompare (const char *t1, const char *t2)
694 return used_class.ncompare (t1, t2);
698 str_casecmp (const char *t1, const char *t2)
700 return used_class.casecmp (t1, t2);
704 str_ncasecmp (const char *t1, const char *t2)
706 return used_class.ncasecmp (t1, t2);
710 str_prefix (const char *text, const char *prefix)
712 return used_class.prefix (text, prefix);
716 str_caseprefix (const char *text, const char *prefix)
718 return used_class.caseprefix (text, prefix);
721 void
722 str_fix_string (char *text)
724 used_class.fix_string (text);
727 char *
728 str_create_key (const char *text, int case_sen)
730 return used_class.create_key (text, case_sen);
733 char *
734 str_create_key_for_filename (const char *text, int case_sen)
736 return used_class.create_key_for_filename (text, case_sen);
740 str_key_collate (const char *t1, const char *t2, int case_sen)
742 return used_class.key_collate (t1, t2, case_sen);
745 void
746 str_release_key (char *key, int case_sen)
748 used_class.release_key (key, case_sen);