2 UTF-8 strings utilities
4 Copyright (C) 2007, 2011
5 The Free Software Foundation, Inc.
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
34 #include "lib/global.h"
35 #include "lib/strutil.h"
37 /* using function for utf-8 from glib */
39 static const char replch
[] = "\xEF\xBF\xBD";
42 str_unichar_iscombiningmark (gunichar uni
)
46 type
= g_unichar_type (uni
);
47 return (type
== G_UNICODE_COMBINING_MARK
)
48 || (type
== G_UNICODE_ENCLOSING_MARK
) || (type
== G_UNICODE_NON_SPACING_MARK
);
52 str_utf8_insert_replace_char (GString
* buffer
)
54 g_string_append (buffer
, replch
);
58 str_utf8_is_valid_string (const char *text
)
60 return g_utf8_validate (text
, -1, NULL
);
64 str_utf8_is_valid_char (const char *ch
, size_t size
)
66 switch (g_utf8_get_char_validated (ch
, size
))
78 str_utf8_cnext_char (const char **text
)
80 (*text
) = g_utf8_next_char (*text
);
84 str_utf8_cprev_char (const char **text
)
86 (*text
) = g_utf8_prev_char (*text
);
90 str_utf8_cnext_char_safe (const char **text
)
92 if (str_utf8_is_valid_char (*text
, -1) == 1)
93 (*text
) = g_utf8_next_char (*text
);
99 str_utf8_cprev_char_safe (const char **text
)
101 const char *result
= g_utf8_prev_char (*text
);
102 const char *t
= result
;
103 str_utf8_cnext_char_safe (&t
);
111 str_utf8_fix_string (char *text
)
115 while (text
[0] != '\0')
117 uni
= g_utf8_get_char_validated (text
, -1);
118 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2)))
120 text
= g_utf8_next_char (text
);
131 str_utf8_isspace (const char *text
)
133 gunichar uni
= g_utf8_get_char_validated (text
, -1);
134 return g_unichar_isspace (uni
);
138 str_utf8_ispunct (const char *text
)
140 gunichar uni
= g_utf8_get_char_validated (text
, -1);
141 return g_unichar_ispunct (uni
);
145 str_utf8_isalnum (const char *text
)
147 gunichar uni
= g_utf8_get_char_validated (text
, -1);
148 return g_unichar_isalnum (uni
);
152 str_utf8_isdigit (const char *text
)
154 gunichar uni
= g_utf8_get_char_validated (text
, -1);
155 return g_unichar_isdigit (uni
);
159 str_utf8_isprint (const char *ch
)
161 gunichar uni
= g_utf8_get_char_validated (ch
, -1);
162 return g_unichar_isprint (uni
);
166 str_utf8_iscombiningmark (const char *ch
)
168 gunichar uni
= g_utf8_get_char_validated (ch
, -1);
169 return str_unichar_iscombiningmark (uni
);
173 str_utf8_cnext_noncomb_char (const char **text
)
176 while ((*text
)[0] != '\0')
178 str_utf8_cnext_char_safe (text
);
180 if (!str_utf8_iscombiningmark (*text
))
187 str_utf8_cprev_noncomb_char (const char **text
, const char *begin
)
190 while ((*text
) != begin
)
192 str_utf8_cprev_char_safe (text
);
194 if (!str_utf8_iscombiningmark (*text
))
201 str_utf8_toupper (const char *text
, char **out
, size_t * remain
)
206 uni
= g_utf8_get_char_validated (text
, -1);
207 if (uni
== (gunichar
) (-1) || uni
== (gunichar
) (-2))
210 uni
= g_unichar_toupper (uni
);
211 left
= g_unichar_to_utf8 (uni
, NULL
);
215 left
= g_unichar_to_utf8 (uni
, *out
);
222 str_utf8_tolower (const char *text
, char **out
, size_t * remain
)
227 uni
= g_utf8_get_char_validated (text
, -1);
228 if (uni
== (gunichar
) (-1) || uni
== (gunichar
) (-2))
231 uni
= g_unichar_tolower (uni
);
232 left
= g_unichar_to_utf8 (uni
, NULL
);
236 left
= g_unichar_to_utf8 (uni
, *out
);
243 str_utf8_length (const char *text
)
250 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
254 result
+= g_utf8_strlen (start
, end
- start
);
262 result
= g_utf8_strlen (text
, -1);
266 if (start
[0] != '\0' && start
!= end
)
268 result
+= g_utf8_strlen (start
, end
- start
);
276 str_utf8_length2 (const char *text
, int size
)
283 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0' && size
> 0)
287 result
+= g_utf8_strlen (start
, min (end
- start
, size
));
290 result
+= (size
> 0);
297 result
= g_utf8_strlen (text
, size
);
301 if (start
[0] != '\0' && start
!= end
&& size
> 0)
303 result
+= g_utf8_strlen (start
, min (end
- start
, size
));
311 str_utf8_length_noncomb (const char *text
)
314 const char *t
= text
;
318 str_utf8_cnext_noncomb_char (&t
);
327 str_utf8_questmark_sustb (char **string, size_t * left, GString * buffer)
329 char *next = g_utf8_next_char (*string);
330 (*left) -= next - (*string);
332 g_string_append_c (buffer, '?');
337 str_utf8_conv_gerror_message (GError
* error
, const char *def_msg
)
339 if ((error
!= NULL
) && (error
->message
!= NULL
))
340 return g_strdup (error
->message
);
342 return g_strdup (def_msg
!= NULL
? def_msg
: "");
346 str_utf8_vfs_convert_to (GIConv coder
, const char *string
, int size
, GString
* buffer
)
350 if (coder
== str_cnv_not_convert
)
352 g_string_append_len (buffer
, string
, size
);
353 result
= ESTR_SUCCESS
;
356 result
= str_nconvert (coder
, (char *) string
, size
, buffer
);
363 char text
[BUF_MEDIUM
* 6];
368 /* utiliti function, that make string valid in utf8 and all characters printable
369 * return width of string too*/
370 static const struct term_form
*
371 str_utf8_make_make_term_form (const char *text
, size_t length
)
373 static struct term_form result
;
378 result
.text
[0] = '\0';
380 result
.compose
= FALSE
;
381 actual
= result
.text
;
383 /* check if text start with combining character,
384 * add space at begin in this case */
385 if (length
!= 0 && text
[0] != '\0')
387 uni
= g_utf8_get_char_validated (text
, -1);
388 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2)))
390 if (str_unichar_iscombiningmark (uni
))
395 result
.compose
= TRUE
;
400 while (length
!= 0 && text
[0] != '\0')
402 uni
= g_utf8_get_char_validated (text
, -1);
403 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2)))
405 if (g_unichar_isprint (uni
))
407 left
= g_unichar_to_utf8 (uni
, actual
);
409 if (str_unichar_iscombiningmark (uni
))
410 result
.compose
= TRUE
;
414 if (g_unichar_iswide (uni
))
424 text
= g_utf8_next_char (text
);
429 /*actual[0] = '?'; */
430 memcpy (actual
, replch
, strlen (replch
));
431 actual
+= strlen (replch
);
434 if (length
!= (size_t) (-1))
443 str_utf8_term_form (const char *text
)
445 static char result
[BUF_MEDIUM
* 6];
446 const struct term_form
*pre_form
;
449 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
450 if (pre_form
->compose
)
452 composed
= g_utf8_normalize (pre_form
->text
, -1, G_NORMALIZE_DEFAULT_COMPOSE
);
453 g_strlcpy (result
, composed
, sizeof (result
));
458 g_strlcpy (result
, pre_form
->text
, sizeof (result
));
472 /* utiliti function, that copy all characters from cheked to actual */
474 utf8_tool_copy_chars_to_end (struct utf8_tool
*tool
)
479 tool
->compose
= FALSE
;
481 while (tool
->cheked
[0] != '\0')
483 uni
= g_utf8_get_char (tool
->cheked
);
484 tool
->compose
= tool
->compose
|| str_unichar_iscombiningmark (uni
);
485 left
= g_unichar_to_utf8 (uni
, NULL
);
486 if (tool
->remain
<= left
)
488 left
= g_unichar_to_utf8 (uni
, tool
->actual
);
489 tool
->actual
+= left
;
490 tool
->remain
-= left
;
491 tool
->cheked
= g_utf8_next_char (tool
->cheked
);
496 /* utiliti function, that copy characters from cheked to actual until ident is
497 * smaller than to_ident */
499 utf8_tool_copy_chars_to (struct utf8_tool
*tool
, int to_ident
)
505 tool
->compose
= FALSE
;
507 while (tool
->cheked
[0] != '\0')
509 uni
= g_utf8_get_char (tool
->cheked
);
510 if (!str_unichar_iscombiningmark (uni
))
513 if (g_unichar_iswide (uni
))
515 if (tool
->ident
+ w
> to_ident
)
521 tool
->compose
= TRUE
;
524 left
= g_unichar_to_utf8 (uni
, NULL
);
525 if (tool
->remain
<= left
)
527 left
= g_unichar_to_utf8 (uni
, tool
->actual
);
528 tool
->actual
+= left
;
529 tool
->remain
-= left
;
530 tool
->cheked
= g_utf8_next_char (tool
->cheked
);
536 /* utiliti function, add count spaces to actual */
538 utf8_tool_insert_space (struct utf8_tool
*tool
, int count
)
542 if (tool
->remain
<= (gsize
) count
)
544 memset (tool
->actual
, ' ', count
);
545 tool
->actual
+= count
;
546 tool
->remain
-= count
;
550 /* utiliti function, add one characters to actual */
552 utf8_tool_insert_char (struct utf8_tool
*tool
, char ch
)
554 if (tool
->remain
<= 1)
556 tool
->actual
[0] = ch
;
562 /* utiliti function, thah skip characters from cheked until ident is greater or
563 * equal to to_ident */
565 utf8_tool_skip_chars_to (struct utf8_tool
*tool
, int to_ident
)
569 while (to_ident
> tool
->ident
&& tool
->cheked
[0] != '\0')
571 uni
= g_utf8_get_char (tool
->cheked
);
572 if (!str_unichar_iscombiningmark (uni
))
575 if (g_unichar_iswide (uni
))
578 tool
->cheked
= g_utf8_next_char (tool
->cheked
);
580 uni
= g_utf8_get_char (tool
->cheked
);
581 while (str_unichar_iscombiningmark (uni
))
583 tool
->cheked
= g_utf8_next_char (tool
->cheked
);
584 uni
= g_utf8_get_char (tool
->cheked
);
590 utf8_tool_compose (char *buffer
, size_t size
)
592 char *composed
= g_utf8_normalize (buffer
, -1, G_NORMALIZE_DEFAULT_COMPOSE
);
593 g_strlcpy (buffer
, composed
, size
);
599 str_utf8_fit_to_term (const char *text
, int width
, align_crt_t just_mode
)
601 static char result
[BUF_MEDIUM
* 6];
602 const struct term_form
*pre_form
;
603 struct utf8_tool tool
;
605 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
606 tool
.cheked
= pre_form
->text
;
607 tool
.actual
= result
;
608 tool
.remain
= sizeof (result
);
609 tool
.compose
= FALSE
;
611 if (pre_form
->width
<= (gsize
) width
)
614 switch (HIDE_FIT (just_mode
))
618 tool
.ident
= (width
- pre_form
->width
) / 2;
621 tool
.ident
= width
- pre_form
->width
;
625 utf8_tool_insert_space (&tool
, tool
.ident
);
626 utf8_tool_copy_chars_to_end (&tool
);
627 utf8_tool_insert_space (&tool
, width
- pre_form
->width
- tool
.ident
);
631 if (IS_FIT (just_mode
))
634 utf8_tool_copy_chars_to (&tool
, width
/ 2);
635 utf8_tool_insert_char (&tool
, '~');
638 utf8_tool_skip_chars_to (&tool
, pre_form
->width
- width
+ 1);
639 utf8_tool_copy_chars_to_end (&tool
);
640 utf8_tool_insert_space (&tool
, width
- (pre_form
->width
- tool
.ident
+ 1));
645 switch (HIDE_FIT (just_mode
))
648 tool
.ident
= (width
- pre_form
->width
) / 2;
651 tool
.ident
= width
- pre_form
->width
;
655 utf8_tool_skip_chars_to (&tool
, 0);
656 utf8_tool_insert_space (&tool
, tool
.ident
);
657 utf8_tool_copy_chars_to (&tool
, width
);
658 utf8_tool_insert_space (&tool
, width
- tool
.ident
);
662 tool
.actual
[0] = '\0';
664 utf8_tool_compose (result
, sizeof (result
));
669 str_utf8_term_trim (const char *text
, int width
)
671 static char result
[BUF_MEDIUM
* 6];
672 const struct term_form
*pre_form
;
673 struct utf8_tool tool
;
681 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
683 tool
.cheked
= pre_form
->text
;
684 tool
.actual
= result
;
685 tool
.remain
= sizeof (result
);
686 tool
.compose
= FALSE
;
688 if ((gsize
) width
< pre_form
->width
)
692 memset (tool
.actual
, '.', width
);
693 tool
.actual
+= width
;
694 tool
.remain
-= width
;
698 memset (tool
.actual
, '.', 3);
703 utf8_tool_skip_chars_to (&tool
, pre_form
->width
- width
+ 3);
704 utf8_tool_copy_chars_to_end (&tool
);
709 utf8_tool_copy_chars_to_end (&tool
);
712 tool
.actual
[0] = '\0';
714 utf8_tool_compose (result
, sizeof (result
));
719 str_utf8_term_width2 (const char *text
, size_t length
)
721 const struct term_form
*result
;
723 result
= str_utf8_make_make_term_form (text
, length
);
724 return result
->width
;
728 str_utf8_term_width1 (const char *text
)
730 return str_utf8_term_width2 (text
, (size_t) (-1));
734 str_utf8_term_char_width (const char *text
)
736 gunichar uni
= g_utf8_get_char_validated (text
, -1);
737 return (str_unichar_iscombiningmark (uni
)) ? 0 : ((g_unichar_iswide (uni
)) ? 2 : 1);
741 str_utf8_term_substring (const char *text
, int start
, int width
)
743 static char result
[BUF_MEDIUM
* 6];
744 const struct term_form
*pre_form
;
745 struct utf8_tool tool
;
747 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
749 tool
.cheked
= pre_form
->text
;
750 tool
.actual
= result
;
751 tool
.remain
= sizeof (result
);
752 tool
.compose
= FALSE
;
755 utf8_tool_skip_chars_to (&tool
, 0);
758 utf8_tool_insert_space (&tool
, tool
.ident
);
760 utf8_tool_copy_chars_to (&tool
, width
);
761 utf8_tool_insert_space (&tool
, width
- tool
.ident
);
763 tool
.actual
[0] = '\0';
765 utf8_tool_compose (result
, sizeof (result
));
770 str_utf8_trunc (const char *text
, int width
)
772 static char result
[MC_MAXPATHLEN
* 6 * 2];
773 const struct term_form
*pre_form
;
774 struct utf8_tool tool
;
776 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
778 tool
.cheked
= pre_form
->text
;
779 tool
.actual
= result
;
780 tool
.remain
= sizeof (result
);
781 tool
.compose
= FALSE
;
783 if (pre_form
->width
> (gsize
) width
)
786 utf8_tool_copy_chars_to (&tool
, width
/ 2);
787 utf8_tool_insert_char (&tool
, '~');
790 utf8_tool_skip_chars_to (&tool
, pre_form
->width
- width
+ 1);
791 utf8_tool_copy_chars_to_end (&tool
);
795 utf8_tool_copy_chars_to_end (&tool
);
798 tool
.actual
[0] = '\0';
800 utf8_tool_compose (result
, sizeof (result
));
805 str_utf8_offset_to_pos (const char *text
, size_t length
)
807 if (str_utf8_is_valid_string (text
))
808 return g_utf8_offset_to_pointer (text
, length
) - text
;
812 GString
*buffer
= g_string_new (text
);
814 str_utf8_fix_string (buffer
->str
);
815 result
= g_utf8_offset_to_pointer (buffer
->str
, length
) - buffer
->str
;
816 g_string_free (buffer
, TRUE
);
822 str_utf8_column_to_pos (const char *text
, size_t pos
)
831 while (text
[0] != '\0')
833 uni
= g_utf8_get_char_validated (text
, 6);
834 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2)))
836 if (g_unichar_isprint (uni
))
838 if (!str_unichar_iscombiningmark (uni
))
841 if (g_unichar_iswide (uni
))
849 text
= g_utf8_next_char (text
);
856 if ((gsize
) width
> pos
)
866 str_utf8_create_search_needle (const char *needle
, int case_sen
)
872 return g_utf8_normalize (needle
, -1, G_NORMALIZE_ALL
);
876 char *fold
= g_utf8_casefold (needle
, -1);
877 char *result
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
887 str_utf8_release_search_needle (char *needle
, int case_sen
)
895 str_utf8_search_first (const char *text
, const char *search
, int case_sen
)
900 const char *result
= NULL
;
903 fold_text
= (case_sen
) ? (char *) text
: g_utf8_casefold (text
, -1);
904 deco_text
= g_utf8_normalize (fold_text
, -1, G_NORMALIZE_ALL
);
909 match
= g_strstr_len (match
, -1, search
);
912 if ((!str_utf8_iscombiningmark (match
) || (match
== deco_text
)) &&
913 !str_utf8_iscombiningmark (match
+ strlen (search
)))
920 str_utf8_cnext_noncomb_char (&m
);
921 str_utf8_cnext_noncomb_char (&result
);
926 str_utf8_cnext_char (&match
);
930 while (match
!= NULL
&& result
== NULL
);
940 str_utf8_search_last (const char *text
, const char *search
, int case_sen
)
945 const char *result
= NULL
;
948 fold_text
= (case_sen
) ? (char *) text
: g_utf8_casefold (text
, -1);
949 deco_text
= g_utf8_normalize (fold_text
, -1, G_NORMALIZE_ALL
);
953 match
= g_strrstr_len (deco_text
, -1, search
);
956 if ((!str_utf8_iscombiningmark (match
) || (match
== deco_text
)) &&
957 !str_utf8_iscombiningmark (match
+ strlen (search
)))
964 str_utf8_cnext_noncomb_char (&m
);
965 str_utf8_cnext_noncomb_char (&result
);
974 while (match
!= NULL
&& result
== NULL
);
984 str_utf8_normalize (const char *text
)
992 fixed
= g_string_sized_new (4);
995 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
999 tmp
= g_utf8_normalize (start
, end
- start
, G_NORMALIZE_ALL
);
1000 g_string_append (fixed
, tmp
);
1003 g_string_append_c (fixed
, end
[0]);
1009 result
= g_utf8_normalize (text
, -1, G_NORMALIZE_ALL
);
1010 g_string_free (fixed
, TRUE
);
1014 if (start
[0] != '\0' && start
!= end
)
1016 tmp
= g_utf8_normalize (start
, end
- start
, G_NORMALIZE_ALL
);
1017 g_string_append (fixed
, tmp
);
1020 result
= g_string_free (fixed
, FALSE
);
1027 str_utf8_casefold_normalize (const char *text
)
1035 fixed
= g_string_sized_new (4);
1038 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
1042 fold
= g_utf8_casefold (start
, end
- start
);
1043 tmp
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
1044 g_string_append (fixed
, tmp
);
1048 g_string_append_c (fixed
, end
[0]);
1054 fold
= g_utf8_casefold (text
, -1);
1055 result
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
1057 g_string_free (fixed
, TRUE
);
1061 if (start
[0] != '\0' && start
!= end
)
1063 fold
= g_utf8_casefold (start
, end
- start
);
1064 tmp
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
1065 g_string_append (fixed
, tmp
);
1069 result
= g_string_free (fixed
, FALSE
);
1076 str_utf8_compare (const char *t1
, const char *t2
)
1081 n1
= str_utf8_normalize (t1
);
1082 n2
= str_utf8_normalize (t2
);
1084 result
= strcmp (n1
, n2
);
1093 str_utf8_ncompare (const char *t1
, const char *t2
)
1098 n1
= str_utf8_normalize (t1
);
1099 n2
= str_utf8_normalize (t2
);
1101 result
= strncmp (n1
, n2
, min (strlen (n1
), strlen (n2
)));
1110 str_utf8_casecmp (const char *t1
, const char *t2
)
1115 n1
= str_utf8_casefold_normalize (t1
);
1116 n2
= str_utf8_casefold_normalize (t2
);
1118 result
= strcmp (n1
, n2
);
1127 str_utf8_ncasecmp (const char *t1
, const char *t2
)
1132 n1
= str_utf8_casefold_normalize (t1
);
1133 n2
= str_utf8_casefold_normalize (t2
);
1135 result
= strncmp (n1
, n2
, min (strlen (n1
), strlen (n2
)));
1144 str_utf8_prefix (const char *text
, const char *prefix
)
1146 char *t
= str_utf8_normalize (text
);
1147 char *p
= str_utf8_normalize (prefix
);
1150 const char *nnt
= t
;
1151 const char *nnp
= p
;
1154 while (nt
[0] != '\0' && np
[0] != '\0')
1156 str_utf8_cnext_char_safe (&nnt
);
1157 str_utf8_cnext_char_safe (&nnp
);
1158 if (nnt
- nt
!= nnp
- np
)
1160 if (strncmp (nt
, np
, nnt
- nt
) != 0)
1175 str_utf8_caseprefix (const char *text
, const char *prefix
)
1177 char *t
= str_utf8_casefold_normalize (text
);
1178 char *p
= str_utf8_casefold_normalize (prefix
);
1181 const char *nnt
= t
;
1182 const char *nnp
= p
;
1185 while (nt
[0] != '\0' && np
[0] != '\0')
1187 str_utf8_cnext_char_safe (&nnt
);
1188 str_utf8_cnext_char_safe (&nnp
);
1189 if (nnt
- nt
!= nnp
- np
)
1191 if (strncmp (nt
, np
, nnt
- nt
) != 0)
1206 str_utf8_create_key_gen (const char *text
, int case_sen
,
1207 gchar
* (*keygen
) (const gchar
* text
, gssize size
))
1213 result
= str_utf8_normalize (text
);
1219 const char *start
, *end
;
1222 dot
= text
[0] == '.';
1223 fixed
= g_string_sized_new (16);
1230 g_string_append_c (fixed
, '.');
1233 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
1237 fold
= g_utf8_casefold (start
, end
- start
);
1238 key
= keygen (fold
, -1);
1239 g_string_append (fixed
, key
);
1243 g_string_append_c (fixed
, end
[0]);
1249 fold
= g_utf8_casefold (start
, -1);
1250 result
= keygen (fold
, -1);
1252 g_string_free (fixed
, TRUE
);
1254 else if (dot
&& (start
== text
+ 1))
1256 fold
= g_utf8_casefold (start
, -1);
1257 key
= keygen (fold
, -1);
1258 g_string_append (fixed
, key
);
1261 result
= g_string_free (fixed
, FALSE
);
1265 if (start
[0] != '\0' && start
!= end
)
1267 fold
= g_utf8_casefold (start
, end
- start
);
1268 key
= keygen (fold
, -1);
1269 g_string_append (fixed
, key
);
1273 result
= g_string_free (fixed
, FALSE
);
1280 str_utf8_create_key (const char *text
, int case_sen
)
1282 return str_utf8_create_key_gen (text
, case_sen
, g_utf8_collate_key
);
1285 #ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME
1287 str_utf8_create_key_for_filename (const char *text
, int case_sen
)
1289 return str_utf8_create_key_gen (text
, case_sen
, g_utf8_collate_key_for_filename
);
1294 str_utf8_key_collate (const char *t1
, const char *t2
, int case_sen
)
1297 return strcmp (t1
, t2
);
1301 str_utf8_release_key (char *key
, int case_sen
)
1308 str_utf8_init (void)
1310 struct str_class result
;
1312 result
.conv_gerror_message
= str_utf8_conv_gerror_message
;
1313 result
.vfs_convert_to
= str_utf8_vfs_convert_to
;
1314 result
.insert_replace_char
= str_utf8_insert_replace_char
;
1315 result
.is_valid_string
= str_utf8_is_valid_string
;
1316 result
.is_valid_char
= str_utf8_is_valid_char
;
1317 result
.cnext_char
= str_utf8_cnext_char
;
1318 result
.cprev_char
= str_utf8_cprev_char
;
1319 result
.cnext_char_safe
= str_utf8_cnext_char_safe
;
1320 result
.cprev_char_safe
= str_utf8_cprev_char_safe
;
1321 result
.cnext_noncomb_char
= str_utf8_cnext_noncomb_char
;
1322 result
.cprev_noncomb_char
= str_utf8_cprev_noncomb_char
;
1323 result
.char_isspace
= str_utf8_isspace
;
1324 result
.char_ispunct
= str_utf8_ispunct
;
1325 result
.char_isalnum
= str_utf8_isalnum
;
1326 result
.char_isdigit
= str_utf8_isdigit
;
1327 result
.char_isprint
= str_utf8_isprint
;
1328 result
.char_iscombiningmark
= str_utf8_iscombiningmark
;
1329 result
.char_toupper
= str_utf8_toupper
;
1330 result
.char_tolower
= str_utf8_tolower
;
1331 result
.length
= str_utf8_length
;
1332 result
.length2
= str_utf8_length2
;
1333 result
.length_noncomb
= str_utf8_length_noncomb
;
1334 result
.fix_string
= str_utf8_fix_string
;
1335 result
.term_form
= str_utf8_term_form
;
1336 result
.fit_to_term
= str_utf8_fit_to_term
;
1337 result
.term_trim
= str_utf8_term_trim
;
1338 result
.term_width2
= str_utf8_term_width2
;
1339 result
.term_width1
= str_utf8_term_width1
;
1340 result
.term_char_width
= str_utf8_term_char_width
;
1341 result
.term_substring
= str_utf8_term_substring
;
1342 result
.trunc
= str_utf8_trunc
;
1343 result
.offset_to_pos
= str_utf8_offset_to_pos
;
1344 result
.column_to_pos
= str_utf8_column_to_pos
;
1345 result
.create_search_needle
= str_utf8_create_search_needle
;
1346 result
.release_search_needle
= str_utf8_release_search_needle
;
1347 result
.search_first
= str_utf8_search_first
;
1348 result
.search_last
= str_utf8_search_last
;
1349 result
.compare
= str_utf8_compare
;
1350 result
.ncompare
= str_utf8_ncompare
;
1351 result
.casecmp
= str_utf8_casecmp
;
1352 result
.ncasecmp
= str_utf8_ncasecmp
;
1353 result
.prefix
= str_utf8_prefix
;
1354 result
.caseprefix
= str_utf8_caseprefix
;
1355 result
.create_key
= str_utf8_create_key
;
1356 #ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME
1357 /* case insensitive sort files in "a1 a2 a10" order */
1358 result
.create_key_for_filename
= str_utf8_create_key_for_filename
;
1360 /* case insensitive sort files in "a1 a10 a2" order */
1361 result
.create_key_for_filename
= str_utf8_create_key
;
1363 result
.key_collate
= str_utf8_key_collate
;
1364 result
.release_key
= str_utf8_release_key
;