2 UTF-8 strings utilities
4 Copyright (C) 2007-2016
5 Free Software Foundation, Inc.
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
32 #include "lib/global.h"
33 #include "lib/strutil.h"
35 /* using function for utf-8 from glib */
37 /*** global variables ****************************************************************************/
39 /*** file scope macro definitions ****************************************************************/
41 /*** file scope type declarations ****************************************************************/
54 char text
[BUF_MEDIUM
* 6];
59 /*** file scope variables ************************************************************************/
61 static const char replch
[] = "\xEF\xBF\xBD";
63 /* --------------------------------------------------------------------------------------------- */
64 /*** file scope functions ************************************************************************/
65 /* --------------------------------------------------------------------------------------------- */
68 str_unichar_iscombiningmark (gunichar uni
)
72 type
= g_unichar_type (uni
);
73 return (type
== G_UNICODE_COMBINING_MARK
)
74 || (type
== G_UNICODE_ENCLOSING_MARK
) || (type
== G_UNICODE_NON_SPACING_MARK
);
77 /* --------------------------------------------------------------------------------------------- */
80 str_utf8_insert_replace_char (GString
* buffer
)
82 g_string_append (buffer
, replch
);
85 /* --------------------------------------------------------------------------------------------- */
88 str_utf8_is_valid_string (const char *text
)
90 return g_utf8_validate (text
, -1, NULL
);
93 /* --------------------------------------------------------------------------------------------- */
96 str_utf8_is_valid_char (const char *ch
, size_t size
)
98 switch (g_utf8_get_char_validated (ch
, size
))
100 case (gunichar
) (-2):
102 case (gunichar
) (-1):
109 /* --------------------------------------------------------------------------------------------- */
112 str_utf8_cnext_char (const char **text
)
114 (*text
) = g_utf8_next_char (*text
);
117 /* --------------------------------------------------------------------------------------------- */
120 str_utf8_cprev_char (const char **text
)
122 (*text
) = g_utf8_prev_char (*text
);
125 /* --------------------------------------------------------------------------------------------- */
128 str_utf8_cnext_char_safe (const char **text
)
130 if (str_utf8_is_valid_char (*text
, -1) == 1)
131 (*text
) = g_utf8_next_char (*text
);
136 /* --------------------------------------------------------------------------------------------- */
139 str_utf8_cprev_char_safe (const char **text
)
141 const char *result
, *t
;
143 result
= g_utf8_prev_char (*text
);
145 str_utf8_cnext_char_safe (&t
);
152 /* --------------------------------------------------------------------------------------------- */
155 str_utf8_fix_string (char *text
)
157 while (text
[0] != '\0')
161 uni
= g_utf8_get_char_validated (text
, -1);
162 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2)))
163 text
= g_utf8_next_char (text
);
172 /* --------------------------------------------------------------------------------------------- */
175 str_utf8_isspace (const char *text
)
179 uni
= g_utf8_get_char_validated (text
, -1);
180 return g_unichar_isspace (uni
);
183 /* --------------------------------------------------------------------------------------------- */
186 str_utf8_ispunct (const char *text
)
190 uni
= g_utf8_get_char_validated (text
, -1);
191 return g_unichar_ispunct (uni
);
194 /* --------------------------------------------------------------------------------------------- */
197 str_utf8_isalnum (const char *text
)
201 uni
= g_utf8_get_char_validated (text
, -1);
202 return g_unichar_isalnum (uni
);
205 /* --------------------------------------------------------------------------------------------- */
208 str_utf8_isdigit (const char *text
)
212 uni
= g_utf8_get_char_validated (text
, -1);
213 return g_unichar_isdigit (uni
);
216 /* --------------------------------------------------------------------------------------------- */
219 str_utf8_isprint (const char *ch
)
223 uni
= g_utf8_get_char_validated (ch
, -1);
224 return g_unichar_isprint (uni
);
227 /* --------------------------------------------------------------------------------------------- */
230 str_utf8_iscombiningmark (const char *ch
)
234 uni
= g_utf8_get_char_validated (ch
, -1);
235 return str_unichar_iscombiningmark (uni
);
238 /* --------------------------------------------------------------------------------------------- */
241 str_utf8_cnext_noncomb_char (const char **text
)
245 while ((*text
)[0] != '\0')
247 str_utf8_cnext_char_safe (text
);
249 if (!str_utf8_iscombiningmark (*text
))
256 /* --------------------------------------------------------------------------------------------- */
259 str_utf8_cprev_noncomb_char (const char **text
, const char *begin
)
263 while ((*text
) != begin
)
265 str_utf8_cprev_char_safe (text
);
267 if (!str_utf8_iscombiningmark (*text
))
274 /* --------------------------------------------------------------------------------------------- */
277 str_utf8_toupper (const char *text
, char **out
, size_t * remain
)
282 uni
= g_utf8_get_char_validated (text
, -1);
283 if (uni
== (gunichar
) (-1) || uni
== (gunichar
) (-2))
286 uni
= g_unichar_toupper (uni
);
287 left
= g_unichar_to_utf8 (uni
, NULL
);
291 left
= g_unichar_to_utf8 (uni
, *out
);
297 /* --------------------------------------------------------------------------------------------- */
300 str_utf8_tolower (const char *text
, char **out
, size_t * remain
)
305 uni
= g_utf8_get_char_validated (text
, -1);
306 if (uni
== (gunichar
) (-1) || uni
== (gunichar
) (-2))
309 uni
= g_unichar_tolower (uni
);
310 left
= g_unichar_to_utf8 (uni
, NULL
);
314 left
= g_unichar_to_utf8 (uni
, *out
);
320 /* --------------------------------------------------------------------------------------------- */
323 str_utf8_length (const char *text
)
330 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
333 result
+= g_utf8_strlen (start
, end
- start
);
340 result
= g_utf8_strlen (text
, -1);
341 else if (start
[0] != '\0' && start
!= end
)
342 result
+= g_utf8_strlen (start
, end
- start
);
347 /* --------------------------------------------------------------------------------------------- */
350 str_utf8_length2 (const char *text
, int size
)
357 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0' && size
> 0)
361 result
+= g_utf8_strlen (start
, MIN (end
- start
, size
));
364 result
+= (size
> 0);
370 result
= g_utf8_strlen (text
, size
);
371 else if (start
[0] != '\0' && start
!= end
&& size
> 0)
372 result
+= g_utf8_strlen (start
, MIN (end
- start
, size
));
377 /* --------------------------------------------------------------------------------------------- */
380 str_utf8_length_noncomb (const char *text
)
383 const char *t
= text
;
387 str_utf8_cnext_noncomb_char (&t
);
394 /* --------------------------------------------------------------------------------------------- */
398 str_utf8_questmark_sustb (char **string
, size_t * left
, GString
* buffer
)
402 next
= g_utf8_next_char (*string
);
403 (*left
) -= next
- (*string
);
405 g_string_append_c (buffer
, '?');
409 /* --------------------------------------------------------------------------------------------- */
412 str_utf8_conv_gerror_message (GError
* mcerror
, const char *def_msg
)
415 return g_strdup (mcerror
->message
);
417 return g_strdup (def_msg
!= NULL
? def_msg
: "");
420 /* --------------------------------------------------------------------------------------------- */
423 str_utf8_vfs_convert_to (GIConv coder
, const char *string
, int size
, GString
* buffer
)
425 estr_t result
= ESTR_SUCCESS
;
427 if (coder
== str_cnv_not_convert
)
428 g_string_append_len (buffer
, string
, size
);
430 result
= str_nconvert (coder
, string
, size
, buffer
);
435 /* --------------------------------------------------------------------------------------------- */
436 /* utility function, that makes string valid in utf8 and all characters printable
437 * return width of string too */
439 static const struct term_form
*
440 str_utf8_make_make_term_form (const char *text
, size_t length
)
442 static struct term_form result
;
447 result
.text
[0] = '\0';
449 result
.compose
= FALSE
;
450 actual
= result
.text
;
452 /* check if text start with combining character,
453 * add space at begin in this case */
454 if (length
!= 0 && text
[0] != '\0')
456 uni
= g_utf8_get_char_validated (text
, -1);
457 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2))
458 && str_unichar_iscombiningmark (uni
))
463 result
.compose
= TRUE
;
467 while (length
!= 0 && text
[0] != '\0')
469 uni
= g_utf8_get_char_validated (text
, -1);
470 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2)))
472 if (g_unichar_isprint (uni
))
474 left
= g_unichar_to_utf8 (uni
, actual
);
476 if (str_unichar_iscombiningmark (uni
))
477 result
.compose
= TRUE
;
481 if (g_unichar_iswide (uni
))
491 text
= g_utf8_next_char (text
);
496 /*actual[0] = '?'; */
497 memcpy (actual
, replch
, strlen (replch
));
498 actual
+= strlen (replch
);
502 if (length
!= (size_t) (-1))
510 /* --------------------------------------------------------------------------------------------- */
513 str_utf8_term_form (const char *text
)
515 static char result
[BUF_MEDIUM
* 6];
516 const struct term_form
*pre_form
;
518 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
519 if (pre_form
->compose
)
523 composed
= g_utf8_normalize (pre_form
->text
, -1, G_NORMALIZE_DEFAULT_COMPOSE
);
524 g_strlcpy (result
, composed
, sizeof (result
));
528 g_strlcpy (result
, pre_form
->text
, sizeof (result
));
533 /* --------------------------------------------------------------------------------------------- */
534 /* utility function, that copies all characters from checked to actual */
537 utf8_tool_copy_chars_to_end (struct utf8_tool
*tool
)
539 tool
->compose
= FALSE
;
541 while (tool
->checked
[0] != '\0')
546 uni
= g_utf8_get_char (tool
->checked
);
547 tool
->compose
= tool
->compose
|| str_unichar_iscombiningmark (uni
);
548 left
= g_unichar_to_utf8 (uni
, NULL
);
549 if (tool
->remain
<= left
)
551 left
= g_unichar_to_utf8 (uni
, tool
->actual
);
552 tool
->actual
+= left
;
553 tool
->remain
-= left
;
554 tool
->checked
= g_utf8_next_char (tool
->checked
);
560 /* --------------------------------------------------------------------------------------------- */
561 /* utility function, that copies characters from checked to actual until ident is
562 * smaller than to_ident */
565 utf8_tool_copy_chars_to (struct utf8_tool
*tool
, int to_ident
)
567 tool
->compose
= FALSE
;
569 while (tool
->checked
[0] != '\0')
575 uni
= g_utf8_get_char (tool
->checked
);
576 if (str_unichar_iscombiningmark (uni
))
577 tool
->compose
= TRUE
;
581 if (g_unichar_iswide (uni
))
583 if (tool
->ident
+ w
> to_ident
)
587 left
= g_unichar_to_utf8 (uni
, NULL
);
588 if (tool
->remain
<= left
)
590 left
= g_unichar_to_utf8 (uni
, tool
->actual
);
591 tool
->actual
+= left
;
592 tool
->remain
-= left
;
593 tool
->checked
= g_utf8_next_char (tool
->checked
);
600 /* --------------------------------------------------------------------------------------------- */
601 /* utility function, adds count spaces to actual */
604 utf8_tool_insert_space (struct utf8_tool
*tool
, int count
)
608 if (tool
->remain
<= (gsize
) count
)
611 memset (tool
->actual
, ' ', count
);
612 tool
->actual
+= count
;
613 tool
->remain
-= count
;
617 /* --------------------------------------------------------------------------------------------- */
618 /* utility function, adds one characters to actual */
621 utf8_tool_insert_char (struct utf8_tool
*tool
, char ch
)
623 if (tool
->remain
<= 1)
626 tool
->actual
[0] = ch
;
632 /* --------------------------------------------------------------------------------------------- */
633 /* utility function, thah skips characters from checked until ident is greater or
634 * equal to to_ident */
637 utf8_tool_skip_chars_to (struct utf8_tool
*tool
, int to_ident
)
641 while (to_ident
> tool
->ident
&& tool
->checked
[0] != '\0')
643 uni
= g_utf8_get_char (tool
->checked
);
644 if (!str_unichar_iscombiningmark (uni
))
647 if (g_unichar_iswide (uni
))
650 tool
->checked
= g_utf8_next_char (tool
->checked
);
653 uni
= g_utf8_get_char (tool
->checked
);
654 while (str_unichar_iscombiningmark (uni
))
656 tool
->checked
= g_utf8_next_char (tool
->checked
);
657 uni
= g_utf8_get_char (tool
->checked
);
663 /* --------------------------------------------------------------------------------------------- */
666 utf8_tool_compose (char *buffer
, size_t size
)
670 composed
= g_utf8_normalize (buffer
, -1, G_NORMALIZE_DEFAULT_COMPOSE
);
671 g_strlcpy (buffer
, composed
, size
);
675 /* --------------------------------------------------------------------------------------------- */
678 str_utf8_fit_to_term (const char *text
, int width
, align_crt_t just_mode
)
680 static char result
[BUF_MEDIUM
* 6];
681 const struct term_form
*pre_form
;
682 struct utf8_tool tool
;
684 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
685 tool
.checked
= pre_form
->text
;
686 tool
.actual
= result
;
687 tool
.remain
= sizeof (result
);
688 tool
.compose
= FALSE
;
690 if (pre_form
->width
<= (gsize
) width
)
692 switch (HIDE_FIT (just_mode
))
696 tool
.ident
= (width
- pre_form
->width
) / 2;
699 tool
.ident
= width
- pre_form
->width
;
706 utf8_tool_insert_space (&tool
, tool
.ident
);
707 utf8_tool_copy_chars_to_end (&tool
);
708 utf8_tool_insert_space (&tool
, width
- pre_form
->width
- tool
.ident
);
710 else if (IS_FIT (just_mode
))
713 utf8_tool_copy_chars_to (&tool
, width
/ 2);
714 utf8_tool_insert_char (&tool
, '~');
717 utf8_tool_skip_chars_to (&tool
, pre_form
->width
- width
+ 1);
718 utf8_tool_copy_chars_to_end (&tool
);
719 utf8_tool_insert_space (&tool
, width
- (pre_form
->width
- tool
.ident
+ 1));
723 switch (HIDE_FIT (just_mode
))
726 tool
.ident
= (width
- pre_form
->width
) / 2;
729 tool
.ident
= width
- pre_form
->width
;
736 utf8_tool_skip_chars_to (&tool
, 0);
737 utf8_tool_insert_space (&tool
, tool
.ident
);
738 utf8_tool_copy_chars_to (&tool
, width
);
739 utf8_tool_insert_space (&tool
, width
- tool
.ident
);
742 tool
.actual
[0] = '\0';
744 utf8_tool_compose (result
, sizeof (result
));
748 /* --------------------------------------------------------------------------------------------- */
751 str_utf8_term_trim (const char *text
, int width
)
753 static char result
[BUF_MEDIUM
* 6];
754 const struct term_form
*pre_form
;
755 struct utf8_tool tool
;
763 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
765 tool
.checked
= pre_form
->text
;
766 tool
.actual
= result
;
767 tool
.remain
= sizeof (result
);
768 tool
.compose
= FALSE
;
770 if ((gsize
) width
>= pre_form
->width
)
771 utf8_tool_copy_chars_to_end (&tool
);
774 memset (tool
.actual
, '.', width
);
775 tool
.actual
+= width
;
776 tool
.remain
-= width
;
780 memset (tool
.actual
, '.', 3);
785 utf8_tool_skip_chars_to (&tool
, pre_form
->width
- width
+ 3);
786 utf8_tool_copy_chars_to_end (&tool
);
789 tool
.actual
[0] = '\0';
791 utf8_tool_compose (result
, sizeof (result
));
795 /* --------------------------------------------------------------------------------------------- */
798 str_utf8_term_width2 (const char *text
, size_t length
)
800 const struct term_form
*result
;
802 result
= str_utf8_make_make_term_form (text
, length
);
803 return result
->width
;
806 /* --------------------------------------------------------------------------------------------- */
809 str_utf8_term_width1 (const char *text
)
811 return str_utf8_term_width2 (text
, (size_t) (-1));
814 /* --------------------------------------------------------------------------------------------- */
817 str_utf8_term_char_width (const char *text
)
821 uni
= g_utf8_get_char_validated (text
, -1);
822 return (str_unichar_iscombiningmark (uni
)) ? 0 : ((g_unichar_iswide (uni
)) ? 2 : 1);
825 /* --------------------------------------------------------------------------------------------- */
828 str_utf8_term_substring (const char *text
, int start
, int width
)
830 static char result
[BUF_MEDIUM
* 6];
831 const struct term_form
*pre_form
;
832 struct utf8_tool tool
;
834 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
836 tool
.checked
= pre_form
->text
;
837 tool
.actual
= result
;
838 tool
.remain
= sizeof (result
);
839 tool
.compose
= FALSE
;
842 utf8_tool_skip_chars_to (&tool
, 0);
845 utf8_tool_insert_space (&tool
, tool
.ident
);
847 utf8_tool_copy_chars_to (&tool
, width
);
848 utf8_tool_insert_space (&tool
, width
- tool
.ident
);
850 tool
.actual
[0] = '\0';
852 utf8_tool_compose (result
, sizeof (result
));
856 /* --------------------------------------------------------------------------------------------- */
859 str_utf8_trunc (const char *text
, int width
)
861 static char result
[MC_MAXPATHLEN
* 6 * 2];
862 const struct term_form
*pre_form
;
863 struct utf8_tool tool
;
865 pre_form
= str_utf8_make_make_term_form (text
, (size_t) (-1));
867 tool
.checked
= pre_form
->text
;
868 tool
.actual
= result
;
869 tool
.remain
= sizeof (result
);
870 tool
.compose
= FALSE
;
872 if (pre_form
->width
<= (gsize
) width
)
873 utf8_tool_copy_chars_to_end (&tool
);
877 utf8_tool_copy_chars_to (&tool
, width
/ 2);
878 utf8_tool_insert_char (&tool
, '~');
881 utf8_tool_skip_chars_to (&tool
, pre_form
->width
- width
+ 1);
882 utf8_tool_copy_chars_to_end (&tool
);
885 tool
.actual
[0] = '\0';
887 utf8_tool_compose (result
, sizeof (result
));
891 /* --------------------------------------------------------------------------------------------- */
894 str_utf8_offset_to_pos (const char *text
, size_t length
)
896 if (str_utf8_is_valid_string (text
))
897 return g_utf8_offset_to_pointer (text
, length
) - text
;
903 buffer
= g_string_new (text
);
904 str_utf8_fix_string (buffer
->str
);
905 result
= g_utf8_offset_to_pointer (buffer
->str
, length
) - buffer
->str
;
906 g_string_free (buffer
, TRUE
);
911 /* --------------------------------------------------------------------------------------------- */
914 str_utf8_column_to_pos (const char *text
, size_t pos
)
919 while (text
[0] != '\0')
923 uni
= g_utf8_get_char_validated (text
, 6);
924 if ((uni
!= (gunichar
) (-1)) && (uni
!= (gunichar
) (-2)))
926 if (g_unichar_isprint (uni
))
928 if (!str_unichar_iscombiningmark (uni
))
931 if (g_unichar_iswide (uni
))
939 text
= g_utf8_next_char (text
);
947 if ((gsize
) width
> pos
)
956 /* --------------------------------------------------------------------------------------------- */
959 str_utf8_create_search_needle (const char *needle
, int case_sen
)
967 return g_utf8_normalize (needle
, -1, G_NORMALIZE_ALL
);
970 fold
= g_utf8_casefold (needle
, -1);
971 result
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
976 /* --------------------------------------------------------------------------------------------- */
979 str_utf8_release_search_needle (char *needle
, int case_sen
)
985 /* --------------------------------------------------------------------------------------------- */
988 str_utf8_search_first (const char *text
, const char *search
, int case_sen
)
993 const char *result
= NULL
;
996 fold_text
= (case_sen
) ? (char *) text
: g_utf8_casefold (text
, -1);
997 deco_text
= g_utf8_normalize (fold_text
, -1, G_NORMALIZE_ALL
);
1002 match
= g_strstr_len (match
, -1, search
);
1005 if ((!str_utf8_iscombiningmark (match
) || (match
== deco_text
)) &&
1006 !str_utf8_iscombiningmark (match
+ strlen (search
)))
1012 str_utf8_cnext_noncomb_char (&m
);
1013 str_utf8_cnext_noncomb_char (&result
);
1017 str_utf8_cnext_char (&match
);
1020 while (match
!= NULL
&& result
== NULL
);
1029 /* --------------------------------------------------------------------------------------------- */
1032 str_utf8_search_last (const char *text
, const char *search
, int case_sen
)
1037 const char *result
= NULL
;
1040 fold_text
= (case_sen
) ? (char *) text
: g_utf8_casefold (text
, -1);
1041 deco_text
= g_utf8_normalize (fold_text
, -1, G_NORMALIZE_ALL
);
1045 match
= g_strrstr_len (deco_text
, -1, search
);
1048 if ((!str_utf8_iscombiningmark (match
) || (match
== deco_text
)) &&
1049 !str_utf8_iscombiningmark (match
+ strlen (search
)))
1055 str_utf8_cnext_noncomb_char (&m
);
1056 str_utf8_cnext_noncomb_char (&result
);
1063 while (match
!= NULL
&& result
== NULL
);
1072 /* --------------------------------------------------------------------------------------------- */
1075 str_utf8_normalize (const char *text
)
1083 fixed
= g_string_sized_new (4);
1086 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
1090 tmp
= g_utf8_normalize (start
, end
- start
, G_NORMALIZE_ALL
);
1091 g_string_append (fixed
, tmp
);
1094 g_string_append_c (fixed
, end
[0]);
1100 result
= g_utf8_normalize (text
, -1, G_NORMALIZE_ALL
);
1101 g_string_free (fixed
, TRUE
);
1105 if (start
[0] != '\0' && start
!= end
)
1107 tmp
= g_utf8_normalize (start
, end
- start
, G_NORMALIZE_ALL
);
1108 g_string_append (fixed
, tmp
);
1111 result
= g_string_free (fixed
, FALSE
);
1117 /* --------------------------------------------------------------------------------------------- */
1120 str_utf8_casefold_normalize (const char *text
)
1128 fixed
= g_string_sized_new (4);
1131 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
1135 fold
= g_utf8_casefold (start
, end
- start
);
1136 tmp
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
1137 g_string_append (fixed
, tmp
);
1141 g_string_append_c (fixed
, end
[0]);
1147 fold
= g_utf8_casefold (text
, -1);
1148 result
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
1150 g_string_free (fixed
, TRUE
);
1154 if (start
[0] != '\0' && start
!= end
)
1156 fold
= g_utf8_casefold (start
, end
- start
);
1157 tmp
= g_utf8_normalize (fold
, -1, G_NORMALIZE_ALL
);
1158 g_string_append (fixed
, tmp
);
1162 result
= g_string_free (fixed
, FALSE
);
1168 /* --------------------------------------------------------------------------------------------- */
1171 str_utf8_compare (const char *t1
, const char *t2
)
1176 n1
= str_utf8_normalize (t1
);
1177 n2
= str_utf8_normalize (t2
);
1179 result
= strcmp (n1
, n2
);
1187 /* --------------------------------------------------------------------------------------------- */
1190 str_utf8_ncompare (const char *t1
, const char *t2
)
1196 n1
= str_utf8_normalize (t1
);
1197 n2
= str_utf8_normalize (t2
);
1201 result
= strncmp (n1
, n2
, MIN (l1
, l2
));
1209 /* --------------------------------------------------------------------------------------------- */
1212 str_utf8_casecmp (const char *t1
, const char *t2
)
1217 n1
= str_utf8_casefold_normalize (t1
);
1218 n2
= str_utf8_casefold_normalize (t2
);
1220 result
= strcmp (n1
, n2
);
1228 /* --------------------------------------------------------------------------------------------- */
1231 str_utf8_ncasecmp (const char *t1
, const char *t2
)
1237 n1
= str_utf8_casefold_normalize (t1
);
1238 n2
= str_utf8_casefold_normalize (t2
);
1242 result
= strncmp (n1
, n2
, MIN (l1
, l2
));
1250 /* --------------------------------------------------------------------------------------------- */
1253 str_utf8_prefix (const char *text
, const char *prefix
)
1256 const char *nt
, *np
;
1257 const char *nnt
, *nnp
;
1260 t
= str_utf8_normalize (text
);
1261 p
= str_utf8_normalize (prefix
);
1267 while (nt
[0] != '\0' && np
[0] != '\0')
1269 str_utf8_cnext_char_safe (&nnt
);
1270 str_utf8_cnext_char_safe (&nnp
);
1271 if (nnt
- nt
!= nnp
- np
)
1273 if (strncmp (nt
, np
, nnt
- nt
) != 0)
1287 /* --------------------------------------------------------------------------------------------- */
1290 str_utf8_caseprefix (const char *text
, const char *prefix
)
1293 const char *nt
, *np
;
1294 const char *nnt
, *nnp
;
1297 t
= str_utf8_casefold_normalize (text
);
1298 p
= str_utf8_casefold_normalize (prefix
);
1304 while (nt
[0] != '\0' && np
[0] != '\0')
1306 str_utf8_cnext_char_safe (&nnt
);
1307 str_utf8_cnext_char_safe (&nnp
);
1308 if (nnt
- nt
!= nnp
- np
)
1310 if (strncmp (nt
, np
, nnt
- nt
) != 0)
1324 /* --------------------------------------------------------------------------------------------- */
1327 str_utf8_create_key_gen (const char *text
, int case_sen
,
1328 gchar
* (*keygen
) (const gchar
* text
, gssize size
))
1333 result
= str_utf8_normalize (text
);
1338 const char *start
, *end
;
1341 dot
= text
[0] == '.';
1342 fixed
= g_string_sized_new (16);
1349 g_string_append_c (fixed
, '.');
1352 while (!g_utf8_validate (start
, -1, &end
) && start
[0] != '\0')
1356 fold
= g_utf8_casefold (start
, end
- start
);
1357 key
= keygen (fold
, -1);
1358 g_string_append (fixed
, key
);
1362 g_string_append_c (fixed
, end
[0]);
1368 fold
= g_utf8_casefold (start
, -1);
1369 result
= keygen (fold
, -1);
1371 g_string_free (fixed
, TRUE
);
1373 else if (dot
&& (start
== text
+ 1))
1375 fold
= g_utf8_casefold (start
, -1);
1376 key
= keygen (fold
, -1);
1377 g_string_append (fixed
, key
);
1380 result
= g_string_free (fixed
, FALSE
);
1384 if (start
[0] != '\0' && start
!= end
)
1386 fold
= g_utf8_casefold (start
, end
- start
);
1387 key
= keygen (fold
, -1);
1388 g_string_append (fixed
, key
);
1392 result
= g_string_free (fixed
, FALSE
);
1398 /* --------------------------------------------------------------------------------------------- */
1401 str_utf8_create_key (const char *text
, int case_sen
)
1403 return str_utf8_create_key_gen (text
, case_sen
, g_utf8_collate_key
);
1406 /* --------------------------------------------------------------------------------------------- */
1408 #ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME
1410 str_utf8_create_key_for_filename (const char *text
, int case_sen
)
1412 return str_utf8_create_key_gen (text
, case_sen
, g_utf8_collate_key_for_filename
);
1416 /* --------------------------------------------------------------------------------------------- */
1419 str_utf8_key_collate (const char *t1
, const char *t2
, int case_sen
)
1422 return strcmp (t1
, t2
);
1425 /* --------------------------------------------------------------------------------------------- */
1428 str_utf8_release_key (char *key
, int case_sen
)
1434 /* --------------------------------------------------------------------------------------------- */
1435 /*** public functions ****************************************************************************/
1436 /* --------------------------------------------------------------------------------------------- */
1439 str_utf8_init (void)
1441 struct str_class result
;
1443 result
.conv_gerror_message
= str_utf8_conv_gerror_message
;
1444 result
.vfs_convert_to
= str_utf8_vfs_convert_to
;
1445 result
.insert_replace_char
= str_utf8_insert_replace_char
;
1446 result
.is_valid_string
= str_utf8_is_valid_string
;
1447 result
.is_valid_char
= str_utf8_is_valid_char
;
1448 result
.cnext_char
= str_utf8_cnext_char
;
1449 result
.cprev_char
= str_utf8_cprev_char
;
1450 result
.cnext_char_safe
= str_utf8_cnext_char_safe
;
1451 result
.cprev_char_safe
= str_utf8_cprev_char_safe
;
1452 result
.cnext_noncomb_char
= str_utf8_cnext_noncomb_char
;
1453 result
.cprev_noncomb_char
= str_utf8_cprev_noncomb_char
;
1454 result
.char_isspace
= str_utf8_isspace
;
1455 result
.char_ispunct
= str_utf8_ispunct
;
1456 result
.char_isalnum
= str_utf8_isalnum
;
1457 result
.char_isdigit
= str_utf8_isdigit
;
1458 result
.char_isprint
= str_utf8_isprint
;
1459 result
.char_iscombiningmark
= str_utf8_iscombiningmark
;
1460 result
.char_toupper
= str_utf8_toupper
;
1461 result
.char_tolower
= str_utf8_tolower
;
1462 result
.length
= str_utf8_length
;
1463 result
.length2
= str_utf8_length2
;
1464 result
.length_noncomb
= str_utf8_length_noncomb
;
1465 result
.fix_string
= str_utf8_fix_string
;
1466 result
.term_form
= str_utf8_term_form
;
1467 result
.fit_to_term
= str_utf8_fit_to_term
;
1468 result
.term_trim
= str_utf8_term_trim
;
1469 result
.term_width2
= str_utf8_term_width2
;
1470 result
.term_width1
= str_utf8_term_width1
;
1471 result
.term_char_width
= str_utf8_term_char_width
;
1472 result
.term_substring
= str_utf8_term_substring
;
1473 result
.trunc
= str_utf8_trunc
;
1474 result
.offset_to_pos
= str_utf8_offset_to_pos
;
1475 result
.column_to_pos
= str_utf8_column_to_pos
;
1476 result
.create_search_needle
= str_utf8_create_search_needle
;
1477 result
.release_search_needle
= str_utf8_release_search_needle
;
1478 result
.search_first
= str_utf8_search_first
;
1479 result
.search_last
= str_utf8_search_last
;
1480 result
.compare
= str_utf8_compare
;
1481 result
.ncompare
= str_utf8_ncompare
;
1482 result
.casecmp
= str_utf8_casecmp
;
1483 result
.ncasecmp
= str_utf8_ncasecmp
;
1484 result
.prefix
= str_utf8_prefix
;
1485 result
.caseprefix
= str_utf8_caseprefix
;
1486 result
.create_key
= str_utf8_create_key
;
1487 #ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME
1488 /* case insensitive sort files in "a1 a2 a10" order */
1489 result
.create_key_for_filename
= str_utf8_create_key_for_filename
;
1491 /* case insensitive sort files in "a1 a10 a2" order */
1492 result
.create_key_for_filename
= str_utf8_create_key
;
1494 result
.key_collate
= str_utf8_key_collate
;
1495 result
.release_key
= str_utf8_release_key
;
1500 /* --------------------------------------------------------------------------------------------- */