2 UTF-8 strings utilities
4 Copyright (C) 2007, 2011
5 The Free Software Foundation, Inc.
10 The file_date routine is mostly from GNU's fileutils package,
11 written by Richard Stallman and David MacKenzie.
13 This file is part of the Midnight Commander.
15 The Midnight Commander is free software: you can redistribute it
16 and/or modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation, either version 3 of the License,
18 or (at your option) any later version.
20 The Midnight Commander is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program. If not, see <http://www.gnu.org/licenses/>.
37 #include "lib/global.h"
38 #include "lib/strutil.h"
40 /* using function for utf-8 from glib */
42 static const char replch
[] = "\xEF\xBF\xBD";
45 str_unichar_iscombiningmark (gunichar uni
)
49 type
= g_unichar_type (uni
);
50 return (type
== G_UNICODE_COMBINING_MARK
)
51 || (type
== G_UNICODE_ENCLOSING_MARK
) || (type
== G_UNICODE_NON_SPACING_MARK
);
55 str_utf8_insert_replace_char (GString
* buffer
)
57 g_string_append (buffer
, replch
);
61 str_utf8_is_valid_string (const char *text
)
63 return g_utf8_validate (text
, -1, NULL
);
67 str_utf8_is_valid_char (const char *ch
, size_t size
)
69 switch (g_utf8_get_char_validated (ch
, size
))
81 str_utf8_cnext_char (const char **text
)
83 (*text
) = g_utf8_next_char (*text
);
87 str_utf8_cprev_char (const char **text
)
89 (*text
) = g_utf8_prev_char (*text
);
93 str_utf8_cnext_char_safe (const char **text
)
95 if (str_utf8_is_valid_char (*text
, -1) == 1)
96 (*text
) = g_utf8_next_char (*text
);
102 str_utf8_cprev_char_safe (const char **text
)
104 const char *result
= g_utf8_prev_char (*text
);
105 const char *t
= result
;
106 str_utf8_cnext_char_safe (&t
);
114 str_utf8_fix_string (char *text
)
118 while (text
[0] != '\0')
120 uni
= g_utf8_get_char_validated (text
, -1);
121 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2)))
123 text
= g_utf8_next_char (text
);
134 str_utf8_isspace (const char *text
)
136 gunichar uni
= g_utf8_get_char_validated (text
, -1);
137 return g_unichar_isspace (uni
);
141 str_utf8_ispunct (const char *text
)
143 gunichar uni
= g_utf8_get_char_validated (text
, -1);
144 return g_unichar_ispunct (uni
);
148 str_utf8_isalnum (const char *text
)
150 gunichar uni
= g_utf8_get_char_validated (text
, -1);
151 return g_unichar_isalnum (uni
);
155 str_utf8_isdigit (const char *text
)
157 gunichar uni
= g_utf8_get_char_validated (text
, -1);
158 return g_unichar_isdigit (uni
);
162 str_utf8_isprint (const char *ch
)
164 gunichar uni
= g_utf8_get_char_validated (ch
, -1);
165 return g_unichar_isprint (uni
);
169 str_utf8_iscombiningmark (const char *ch
)
171 gunichar uni
= g_utf8_get_char_validated (ch
, -1);
172 return str_unichar_iscombiningmark (uni
);
176 str_utf8_cnext_noncomb_char (const char **text
)
179 while ((*text
)[0] != '\0')
181 str_utf8_cnext_char_safe (text
);
183 if (!str_utf8_iscombiningmark (*text
))
190 str_utf8_cprev_noncomb_char (const char **text
, const char *begin
)
193 while ((*text
) != begin
)
195 str_utf8_cprev_char_safe (text
);
197 if (!str_utf8_iscombiningmark (*text
))
204 str_utf8_toupper (const char *text
, char **out
, size_t * remain
)
209 uni
= g_utf8_get_char_validated (text
, -1);
210 if (uni
== (gunichar
) (-1) || uni
== (gunichar
) (-2))
213 uni
= g_unichar_toupper (uni
);
214 left
= g_unichar_to_utf8 (uni
, NULL
);
218 left
= g_unichar_to_utf8 (uni
, *out
);
225 str_utf8_tolower (const char *text
, char **out
, size_t * remain
)
230 uni
= g_utf8_get_char_validated (text
, -1);
231 if (uni
== (gunichar
) (-1) || uni
== (gunichar
) (-2))
234 uni
= g_unichar_tolower (uni
);
235 left
= g_unichar_to_utf8 (uni
, NULL
);
239 left
= g_unichar_to_utf8 (uni
, *out
);
246 str_utf8_length (const char *text
)
253 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
257 result
+= g_utf8_strlen (start
, end
- start
);
265 result
= g_utf8_strlen (text
, -1);
269 if (start
[0] != '\0' && start
!= end
)
271 result
+= g_utf8_strlen (start
, end
- start
);
279 str_utf8_length2 (const char *text
, int size
)
286 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0' && size
> 0)
290 result
+= g_utf8_strlen (start
, min (end
- start
, size
));
293 result
+= (size
> 0);
300 result
= g_utf8_strlen (text
, size
);
304 if (start
[0] != '\0' && start
!= end
&& size
> 0)
306 result
+= g_utf8_strlen (start
, min (end
- start
, size
));
314 str_utf8_length_noncomb (const char *text
)
317 const char *t
= text
;
321 str_utf8_cnext_noncomb_char (&t
);
330 str_utf8_questmark_sustb (char **string, size_t * left, GString * buffer)
332 char *next = g_utf8_next_char (*string);
333 (*left) -= next - (*string);
335 g_string_append_c (buffer, '?');
340 str_utf8_conv_gerror_message (GError
* error
, const char *def_msg
)
342 if ((error
!= NULL
) && (error
->message
!= NULL
))
343 return g_strdup (error
->message
);
345 return g_strdup (def_msg
!= NULL
? def_msg
: "");
349 str_utf8_vfs_convert_to (GIConv coder
, const char *string
, int size
, GString
* buffer
)
353 if (coder
== str_cnv_not_convert
)
355 g_string_append_len (buffer
, string
, size
);
356 result
= ESTR_SUCCESS
;
359 result
= str_nconvert (coder
, (char *) string
, size
, buffer
);
366 char text
[BUF_MEDIUM
* 6];
371 /* utiliti function, that make string valid in utf8 and all characters printable
372 * return width of string too*/
373 static const struct term_form
*
374 str_utf8_make_make_term_form (const char *text
, size_t length
)
376 static struct term_form result
;
381 result
.text
[0] = '\0';
383 result
.compose
= FALSE
;
384 actual
= result
.text
;
386 /* check if text start with combining character,
387 * add space at begin in this case */
388 if (length
!= 0 && text
[0] != '\0')
390 uni
= g_utf8_get_char_validated (text
, -1);
391 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2)))
393 if (str_unichar_iscombiningmark (uni
))
398 result
.compose
= TRUE
;
403 while (length
!= 0 && text
[0] != '\0')
405 uni
= g_utf8_get_char_validated (text
, -1);
406 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2)))
408 if (g_unichar_isprint (uni
))
410 left
= g_unichar_to_utf8 (uni
, actual
);
412 if (str_unichar_iscombiningmark (uni
))
413 result
.compose
= TRUE
;
417 if (g_unichar_iswide (uni
))
427 text
= g_utf8_next_char (text
);
432 /*actual[0] = '?'; */
433 memcpy (actual
, replch
, strlen (replch
));
434 actual
+= strlen (replch
);
437 if (length
!= (size_t) (-1))
446 str_utf8_term_form (const char *text
)
448 static char result
[BUF_MEDIUM
* 6];
449 const struct term_form
*pre_form
;
452 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
453 if (pre_form
->compose
)
455 composed
= g_utf8_normalize (pre_form
->text
, -1, G_NORMALIZE_DEFAULT_COMPOSE
);
456 g_strlcpy (result
, composed
, sizeof (result
));
461 g_strlcpy (result
, pre_form
->text
, sizeof (result
));
475 /* utiliti function, that copy all characters from cheked to actual */
477 utf8_tool_copy_chars_to_end (struct utf8_tool
*tool
)
482 tool
->compose
= FALSE
;
484 while (tool
->cheked
[0] != '\0')
486 uni
= g_utf8_get_char (tool
->cheked
);
487 tool
->compose
= tool
->compose
|| str_unichar_iscombiningmark (uni
);
488 left
= g_unichar_to_utf8 (uni
, NULL
);
489 if (tool
->remain
<= left
)
491 left
= g_unichar_to_utf8 (uni
, tool
->actual
);
492 tool
->actual
+= left
;
493 tool
->remain
-= left
;
494 tool
->cheked
= g_utf8_next_char (tool
->cheked
);
499 /* utiliti function, that copy characters from cheked to actual until ident is
500 * smaller than to_ident */
502 utf8_tool_copy_chars_to (struct utf8_tool
*tool
, int to_ident
)
508 tool
->compose
= FALSE
;
510 while (tool
->cheked
[0] != '\0')
512 uni
= g_utf8_get_char (tool
->cheked
);
513 if (!str_unichar_iscombiningmark (uni
))
516 if (g_unichar_iswide (uni
))
518 if (tool
->ident
+ w
> to_ident
)
524 tool
->compose
= TRUE
;
527 left
= g_unichar_to_utf8 (uni
, NULL
);
528 if (tool
->remain
<= left
)
530 left
= g_unichar_to_utf8 (uni
, tool
->actual
);
531 tool
->actual
+= left
;
532 tool
->remain
-= left
;
533 tool
->cheked
= g_utf8_next_char (tool
->cheked
);
539 /* utiliti function, add count spaces to actual */
541 utf8_tool_insert_space (struct utf8_tool
*tool
, int count
)
545 if (tool
->remain
<= (gsize
) count
)
547 memset (tool
->actual
, ' ', count
);
548 tool
->actual
+= count
;
549 tool
->remain
-= count
;
553 /* utiliti function, add one characters to actual */
555 utf8_tool_insert_char (struct utf8_tool
*tool
, char ch
)
557 if (tool
->remain
<= 1)
559 tool
->actual
[0] = ch
;
565 /* utiliti function, thah skip characters from cheked until ident is greater or
566 * equal to to_ident */
568 utf8_tool_skip_chars_to (struct utf8_tool
*tool
, int to_ident
)
572 while (to_ident
> tool
->ident
&& tool
->cheked
[0] != '\0')
574 uni
= g_utf8_get_char (tool
->cheked
);
575 if (!str_unichar_iscombiningmark (uni
))
578 if (g_unichar_iswide (uni
))
581 tool
->cheked
= g_utf8_next_char (tool
->cheked
);
583 uni
= g_utf8_get_char (tool
->cheked
);
584 while (str_unichar_iscombiningmark (uni
))
586 tool
->cheked
= g_utf8_next_char (tool
->cheked
);
587 uni
= g_utf8_get_char (tool
->cheked
);
593 utf8_tool_compose (char *buffer
, size_t size
)
595 char *composed
= g_utf8_normalize (buffer
, -1, G_NORMALIZE_DEFAULT_COMPOSE
);
596 g_strlcpy (buffer
, composed
, size
);
602 str_utf8_fit_to_term (const char *text
, int width
, align_crt_t just_mode
)
604 static char result
[BUF_MEDIUM
* 6];
605 const struct term_form
*pre_form
;
606 struct utf8_tool tool
;
608 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
609 tool
.cheked
= pre_form
->text
;
610 tool
.actual
= result
;
611 tool
.remain
= sizeof (result
);
612 tool
.compose
= FALSE
;
614 if (pre_form
->width
<= (gsize
) width
)
617 switch (HIDE_FIT (just_mode
))
621 tool
.ident
= (width
- pre_form
->width
) / 2;
624 tool
.ident
= width
- pre_form
->width
;
628 utf8_tool_insert_space (&tool
, tool
.ident
);
629 utf8_tool_copy_chars_to_end (&tool
);
630 utf8_tool_insert_space (&tool
, width
- pre_form
->width
- tool
.ident
);
634 if (IS_FIT (just_mode
))
637 utf8_tool_copy_chars_to (&tool
, width
/ 2);
638 utf8_tool_insert_char (&tool
, '~');
641 utf8_tool_skip_chars_to (&tool
, pre_form
->width
- width
+ 1);
642 utf8_tool_copy_chars_to_end (&tool
);
643 utf8_tool_insert_space (&tool
, width
- (pre_form
->width
- tool
.ident
+ 1));
648 switch (HIDE_FIT (just_mode
))
651 tool
.ident
= (width
- pre_form
->width
) / 2;
654 tool
.ident
= width
- pre_form
->width
;
658 utf8_tool_skip_chars_to (&tool
, 0);
659 utf8_tool_insert_space (&tool
, tool
.ident
);
660 utf8_tool_copy_chars_to (&tool
, width
);
661 utf8_tool_insert_space (&tool
, width
- tool
.ident
);
665 tool
.actual
[0] = '\0';
667 utf8_tool_compose (result
, sizeof (result
));
672 str_utf8_term_trim (const char *text
, int width
)
674 static char result
[BUF_MEDIUM
* 6];
675 const struct term_form
*pre_form
;
676 struct utf8_tool tool
;
684 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
686 tool
.cheked
= pre_form
->text
;
687 tool
.actual
= result
;
688 tool
.remain
= sizeof (result
);
689 tool
.compose
= FALSE
;
691 if ((gsize
) width
< pre_form
->width
)
695 memset (tool
.actual
, '.', width
);
696 tool
.actual
+= width
;
697 tool
.remain
-= width
;
701 memset (tool
.actual
, '.', 3);
706 utf8_tool_skip_chars_to (&tool
, pre_form
->width
- width
+ 3);
707 utf8_tool_copy_chars_to_end (&tool
);
712 utf8_tool_copy_chars_to_end (&tool
);
715 tool
.actual
[0] = '\0';
717 utf8_tool_compose (result
, sizeof (result
));
722 str_utf8_term_width2 (const char *text
, size_t length
)
724 const struct term_form
*result
;
726 result
= str_utf8_make_make_term_form (text
, length
);
727 return result
->width
;
731 str_utf8_term_width1 (const char *text
)
733 return str_utf8_term_width2 (text
, (size_t) (-1));
737 str_utf8_term_char_width (const char *text
)
739 gunichar uni
= g_utf8_get_char_validated (text
, -1);
740 return (str_unichar_iscombiningmark (uni
)) ? 0 : ((g_unichar_iswide (uni
)) ? 2 : 1);
744 str_utf8_term_substring (const char *text
, int start
, int width
)
746 static char result
[BUF_MEDIUM
* 6];
747 const struct term_form
*pre_form
;
748 struct utf8_tool tool
;
750 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
752 tool
.cheked
= pre_form
->text
;
753 tool
.actual
= result
;
754 tool
.remain
= sizeof (result
);
755 tool
.compose
= FALSE
;
758 utf8_tool_skip_chars_to (&tool
, 0);
761 utf8_tool_insert_space (&tool
, tool
.ident
);
763 utf8_tool_copy_chars_to (&tool
, width
);
764 utf8_tool_insert_space (&tool
, width
- tool
.ident
);
766 tool
.actual
[0] = '\0';
768 utf8_tool_compose (result
, sizeof (result
));
773 str_utf8_trunc (const char *text
, int width
)
775 static char result
[MC_MAXPATHLEN
* 6 * 2];
776 const struct term_form
*pre_form
;
777 struct utf8_tool tool
;
779 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
781 tool
.cheked
= pre_form
->text
;
782 tool
.actual
= result
;
783 tool
.remain
= sizeof (result
);
784 tool
.compose
= FALSE
;
786 if (pre_form
->width
> (gsize
) width
)
789 utf8_tool_copy_chars_to (&tool
, width
/ 2);
790 utf8_tool_insert_char (&tool
, '~');
793 utf8_tool_skip_chars_to (&tool
, pre_form
->width
- width
+ 1);
794 utf8_tool_copy_chars_to_end (&tool
);
798 utf8_tool_copy_chars_to_end (&tool
);
801 tool
.actual
[0] = '\0';
803 utf8_tool_compose (result
, sizeof (result
));
808 str_utf8_offset_to_pos (const char *text
, size_t length
)
810 if (str_utf8_is_valid_string (text
))
811 return g_utf8_offset_to_pointer (text
, length
) - text
;
815 GString
*buffer
= g_string_new (text
);
817 str_utf8_fix_string (buffer
->str
);
818 result
= g_utf8_offset_to_pointer (buffer
->str
, length
) - buffer
->str
;
819 g_string_free (buffer
, TRUE
);
825 str_utf8_column_to_pos (const char *text
, size_t pos
)
834 while (text
[0] != '\0')
836 uni
= g_utf8_get_char_validated (text
, 6);
837 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2)))
839 if (g_unichar_isprint (uni
))
841 if (!str_unichar_iscombiningmark (uni
))
844 if (g_unichar_iswide (uni
))
852 text
= g_utf8_next_char (text
);
859 if ((gsize
) width
> pos
)
869 str_utf8_create_search_needle (const char *needle
, int case_sen
)
875 return g_utf8_normalize (needle
, -1, G_NORMALIZE_ALL
);
879 char *fold
= g_utf8_casefold (needle
, -1);
880 char *result
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
890 str_utf8_release_search_needle (char *needle
, int case_sen
)
898 str_utf8_search_first (const char *text
, const char *search
, int case_sen
)
903 const char *result
= NULL
;
906 fold_text
= (case_sen
) ? (char *) text
: g_utf8_casefold (text
, -1);
907 deco_text
= g_utf8_normalize (fold_text
, -1, G_NORMALIZE_ALL
);
912 match
= g_strstr_len (match
, -1, search
);
915 if ((!str_utf8_iscombiningmark (match
) || (match
== deco_text
)) &&
916 !str_utf8_iscombiningmark (match
+ strlen (search
)))
923 str_utf8_cnext_noncomb_char (&m
);
924 str_utf8_cnext_noncomb_char (&result
);
929 str_utf8_cnext_char (&match
);
933 while (match
!= NULL
&& result
== NULL
);
943 str_utf8_search_last (const char *text
, const char *search
, int case_sen
)
948 const char *result
= NULL
;
951 fold_text
= (case_sen
) ? (char *) text
: g_utf8_casefold (text
, -1);
952 deco_text
= g_utf8_normalize (fold_text
, -1, G_NORMALIZE_ALL
);
956 match
= g_strrstr_len (deco_text
, -1, search
);
959 if ((!str_utf8_iscombiningmark (match
) || (match
== deco_text
)) &&
960 !str_utf8_iscombiningmark (match
+ strlen (search
)))
967 str_utf8_cnext_noncomb_char (&m
);
968 str_utf8_cnext_noncomb_char (&result
);
977 while (match
!= NULL
&& result
== NULL
);
987 str_utf8_normalize (const char *text
)
995 fixed
= g_string_sized_new (4);
998 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
1002 tmp
= g_utf8_normalize (start
, end
- start
, G_NORMALIZE_ALL
);
1003 g_string_append (fixed
, tmp
);
1006 g_string_append_c (fixed
, end
[0]);
1012 result
= g_utf8_normalize (text
, -1, G_NORMALIZE_ALL
);
1013 g_string_free (fixed
, TRUE
);
1017 if (start
[0] != '\0' && start
!= end
)
1019 tmp
= g_utf8_normalize (start
, end
- start
, G_NORMALIZE_ALL
);
1020 g_string_append (fixed
, tmp
);
1023 result
= g_string_free (fixed
, FALSE
);
1030 str_utf8_casefold_normalize (const char *text
)
1038 fixed
= g_string_sized_new (4);
1041 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
1045 fold
= g_utf8_casefold (start
, end
- start
);
1046 tmp
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
1047 g_string_append (fixed
, tmp
);
1051 g_string_append_c (fixed
, end
[0]);
1057 fold
= g_utf8_casefold (text
, -1);
1058 result
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
1060 g_string_free (fixed
, TRUE
);
1064 if (start
[0] != '\0' && start
!= end
)
1066 fold
= g_utf8_casefold (start
, end
- start
);
1067 tmp
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
1068 g_string_append (fixed
, tmp
);
1072 result
= g_string_free (fixed
, FALSE
);
1079 str_utf8_compare (const char *t1
, const char *t2
)
1084 n1
= str_utf8_normalize (t1
);
1085 n2
= str_utf8_normalize (t2
);
1087 result
= strcmp (n1
, n2
);
1096 str_utf8_ncompare (const char *t1
, const char *t2
)
1101 n1
= str_utf8_normalize (t1
);
1102 n2
= str_utf8_normalize (t2
);
1104 result
= strncmp (n1
, n2
, min (strlen (n1
), strlen (n2
)));
1113 str_utf8_casecmp (const char *t1
, const char *t2
)
1118 n1
= str_utf8_casefold_normalize (t1
);
1119 n2
= str_utf8_casefold_normalize (t2
);
1121 result
= strcmp (n1
, n2
);
1130 str_utf8_ncasecmp (const char *t1
, const char *t2
)
1135 n1
= str_utf8_casefold_normalize (t1
);
1136 n2
= str_utf8_casefold_normalize (t2
);
1138 result
= strncmp (n1
, n2
, min (strlen (n1
), strlen (n2
)));
1147 str_utf8_prefix (const char *text
, const char *prefix
)
1149 char *t
= str_utf8_normalize (text
);
1150 char *p
= str_utf8_normalize (prefix
);
1153 const char *nnt
= t
;
1154 const char *nnp
= p
;
1157 while (nt
[0] != '\0' && np
[0] != '\0')
1159 str_utf8_cnext_char_safe (&nnt
);
1160 str_utf8_cnext_char_safe (&nnp
);
1161 if (nnt
- nt
!= nnp
- np
)
1163 if (strncmp (nt
, np
, nnt
- nt
) != 0)
1178 str_utf8_caseprefix (const char *text
, const char *prefix
)
1180 char *t
= str_utf8_casefold_normalize (text
);
1181 char *p
= str_utf8_casefold_normalize (prefix
);
1184 const char *nnt
= t
;
1185 const char *nnp
= p
;
1188 while (nt
[0] != '\0' && np
[0] != '\0')
1190 str_utf8_cnext_char_safe (&nnt
);
1191 str_utf8_cnext_char_safe (&nnp
);
1192 if (nnt
- nt
!= nnp
- np
)
1194 if (strncmp (nt
, np
, nnt
- nt
) != 0)
1209 str_utf8_create_key_gen (const char *text
, int case_sen
,
1210 gchar
* (*keygen
) (const gchar
* text
, gssize size
))
1216 result
= str_utf8_normalize (text
);
1222 const char *start
, *end
;
1225 dot
= text
[0] == '.';
1226 fixed
= g_string_sized_new (16);
1233 g_string_append_c (fixed
, '.');
1236 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
1240 fold
= g_utf8_casefold (start
, end
- start
);
1241 key
= keygen (fold
, -1);
1242 g_string_append (fixed
, key
);
1246 g_string_append_c (fixed
, end
[0]);
1252 fold
= g_utf8_casefold (start
, -1);
1253 result
= keygen (fold
, -1);
1255 g_string_free (fixed
, TRUE
);
1257 else if (dot
&& (start
== text
+ 1))
1259 fold
= g_utf8_casefold (start
, -1);
1260 key
= keygen (fold
, -1);
1261 g_string_append (fixed
, key
);
1264 result
= g_string_free (fixed
, FALSE
);
1268 if (start
[0] != '\0' && start
!= end
)
1270 fold
= g_utf8_casefold (start
, end
- start
);
1271 key
= keygen (fold
, -1);
1272 g_string_append (fixed
, key
);
1276 result
= g_string_free (fixed
, FALSE
);
1283 str_utf8_create_key (const char *text
, int case_sen
)
1285 return str_utf8_create_key_gen (text
, case_sen
, g_utf8_collate_key
);
1288 #ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME
1290 str_utf8_create_key_for_filename (const char *text
, int case_sen
)
1292 return str_utf8_create_key_gen (text
, case_sen
, g_utf8_collate_key_for_filename
);
1297 str_utf8_key_collate (const char *t1
, const char *t2
, int case_sen
)
1300 return strcmp (t1
, t2
);
1304 str_utf8_release_key (char *key
, int case_sen
)
1311 str_utf8_init (void)
1313 struct str_class result
;
1315 result
.conv_gerror_message
= str_utf8_conv_gerror_message
;
1316 result
.vfs_convert_to
= str_utf8_vfs_convert_to
;
1317 result
.insert_replace_char
= str_utf8_insert_replace_char
;
1318 result
.is_valid_string
= str_utf8_is_valid_string
;
1319 result
.is_valid_char
= str_utf8_is_valid_char
;
1320 result
.cnext_char
= str_utf8_cnext_char
;
1321 result
.cprev_char
= str_utf8_cprev_char
;
1322 result
.cnext_char_safe
= str_utf8_cnext_char_safe
;
1323 result
.cprev_char_safe
= str_utf8_cprev_char_safe
;
1324 result
.cnext_noncomb_char
= str_utf8_cnext_noncomb_char
;
1325 result
.cprev_noncomb_char
= str_utf8_cprev_noncomb_char
;
1326 result
.char_isspace
= str_utf8_isspace
;
1327 result
.char_ispunct
= str_utf8_ispunct
;
1328 result
.char_isalnum
= str_utf8_isalnum
;
1329 result
.char_isdigit
= str_utf8_isdigit
;
1330 result
.char_isprint
= str_utf8_isprint
;
1331 result
.char_iscombiningmark
= str_utf8_iscombiningmark
;
1332 result
.char_toupper
= str_utf8_toupper
;
1333 result
.char_tolower
= str_utf8_tolower
;
1334 result
.length
= str_utf8_length
;
1335 result
.length2
= str_utf8_length2
;
1336 result
.length_noncomb
= str_utf8_length_noncomb
;
1337 result
.fix_string
= str_utf8_fix_string
;
1338 result
.term_form
= str_utf8_term_form
;
1339 result
.fit_to_term
= str_utf8_fit_to_term
;
1340 result
.term_trim
= str_utf8_term_trim
;
1341 result
.term_width2
= str_utf8_term_width2
;
1342 result
.term_width1
= str_utf8_term_width1
;
1343 result
.term_char_width
= str_utf8_term_char_width
;
1344 result
.term_substring
= str_utf8_term_substring
;
1345 result
.trunc
= str_utf8_trunc
;
1346 result
.offset_to_pos
= str_utf8_offset_to_pos
;
1347 result
.column_to_pos
= str_utf8_column_to_pos
;
1348 result
.create_search_needle
= str_utf8_create_search_needle
;
1349 result
.release_search_needle
= str_utf8_release_search_needle
;
1350 result
.search_first
= str_utf8_search_first
;
1351 result
.search_last
= str_utf8_search_last
;
1352 result
.compare
= str_utf8_compare
;
1353 result
.ncompare
= str_utf8_ncompare
;
1354 result
.casecmp
= str_utf8_casecmp
;
1355 result
.ncasecmp
= str_utf8_ncasecmp
;
1356 result
.prefix
= str_utf8_prefix
;
1357 result
.caseprefix
= str_utf8_caseprefix
;
1358 result
.create_key
= str_utf8_create_key
;
1359 #ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME
1360 /* case insensitive sort files in "a1 a2 a10" order */
1361 result
.create_key_for_filename
= str_utf8_create_key_for_filename
;
1363 /* case insensitive sort files in "a1 a10 a2" order */
1364 result
.create_key_for_filename
= str_utf8_create_key
;
1366 result
.key_collate
= str_utf8_key_collate
;
1367 result
.release_key
= str_utf8_release_key
;