Merge branch '4572_cleanup'
[midnight-commander.git] / lib / strutil / strutil8bit.c
blob6564f1d7d9d3bdfb0010ecec3dd6138f8b88ee8c
1 /*
2 8bit strings utilities
4 Copyright (C) 2007-2024
5 Free Software Foundation, Inc.
7 Written by:
8 Rostislav Benes, 2007
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 #include <config.h>
28 #include <ctype.h>
29 #include <stdlib.h>
31 #include "lib/global.h"
32 #include "lib/strutil.h"
34 /* Functions for singlebyte encodings, all characters have width 1
35 * using standard system functions.
36 * There are only small differences between functions in strutil8bit.c
37 * and strutilascii.c.
40 /*** global variables ****************************************************************************/
42 /*** file scope macro definitions ****************************************************************/
45 * Inlines to equalize 'char' signedness for single 'char' encodings.
46 * Instead of writing
47 * isspace ((unsigned char) c);
48 * you can write
49 * char_isspace (c);
51 #define DECLARE_CTYPE_WRAPPER(func_name) \
52 static inline int char_##func_name(char c) \
53 { \
54 return func_name((int)(unsigned char)c); \
57 /*** file scope type declarations ****************************************************************/
59 /*** forward declarations (file scope functions) *************************************************/
61 /*** file scope variables ************************************************************************/
63 static const char replch = '?';
65 /* --------------------------------------------------------------------------------------------- */
66 /*** file scope functions ************************************************************************/
67 /* --------------------------------------------------------------------------------------------- */
69 /* *INDENT-OFF* */
70 DECLARE_CTYPE_WRAPPER (isalnum)
71 DECLARE_CTYPE_WRAPPER (isdigit)
72 DECLARE_CTYPE_WRAPPER (isprint)
73 DECLARE_CTYPE_WRAPPER (ispunct)
74 DECLARE_CTYPE_WRAPPER (isspace)
75 DECLARE_CTYPE_WRAPPER (toupper)
76 DECLARE_CTYPE_WRAPPER (tolower)
77 /* *INDENT-ON* */
79 /* --------------------------------------------------------------------------------------------- */
81 static void
82 str_8bit_insert_replace_char (GString *buffer)
84 g_string_append_c (buffer, replch);
87 /* --------------------------------------------------------------------------------------------- */
89 static gboolean
90 str_8bit_is_valid_string (const char *text)
92 (void) text;
93 return TRUE;
96 /* --------------------------------------------------------------------------------------------- */
98 static int
99 str_8bit_is_valid_char (const char *ch, size_t size)
101 (void) ch;
102 (void) size;
103 return 1;
106 /* --------------------------------------------------------------------------------------------- */
108 static void
109 str_8bit_cnext_char (const char **text)
111 (*text)++;
114 /* --------------------------------------------------------------------------------------------- */
116 static void
117 str_8bit_cprev_char (const char **text)
119 (*text)--;
122 /* --------------------------------------------------------------------------------------------- */
124 static int
125 str_8bit_cnext_noncomb_char (const char **text)
127 if (*text[0] == '\0')
128 return 0;
130 (*text)++;
131 return 1;
134 /* --------------------------------------------------------------------------------------------- */
136 static int
137 str_8bit_cprev_noncomb_char (const char **text, const char *begin)
139 if ((*text) == begin)
140 return 0;
142 (*text)--;
143 return 1;
146 /* --------------------------------------------------------------------------------------------- */
148 static gboolean
149 str_8bit_isspace (const char *text)
151 return char_isspace (text[0]) != 0;
154 /* --------------------------------------------------------------------------------------------- */
156 static gboolean
157 str_8bit_ispunct (const char *text)
159 return char_ispunct (text[0]) != 0;
162 /* --------------------------------------------------------------------------------------------- */
164 static gboolean
165 str_8bit_isalnum (const char *text)
167 return char_isalnum (text[0]) != 0;
170 /* --------------------------------------------------------------------------------------------- */
172 static gboolean
173 str_8bit_isdigit (const char *text)
175 return char_isdigit (text[0]) != 0;
178 /* --------------------------------------------------------------------------------------------- */
180 static gboolean
181 str_8bit_isprint (const char *text)
183 return char_isprint (text[0]) != 0;
186 /* --------------------------------------------------------------------------------------------- */
188 static gboolean
189 str_8bit_iscombiningmark (const char *text)
191 (void) text;
192 return FALSE;
195 /* --------------------------------------------------------------------------------------------- */
197 static int
198 str_8bit_toupper (const char *text, char **out, size_t *remain)
200 if (*remain <= 1)
201 return FALSE;
203 (*out)[0] = char_toupper (text[0]);
204 (*out)++;
205 (*remain)--;
206 return TRUE;
209 /* --------------------------------------------------------------------------------------------- */
211 static gboolean
212 str_8bit_tolower (const char *text, char **out, size_t *remain)
214 if (*remain <= 1)
215 return FALSE;
217 (*out)[0] = char_tolower (text[0]);
218 (*out)++;
219 (*remain)--;
220 return TRUE;
223 /* --------------------------------------------------------------------------------------------- */
225 static int
226 str_8bit_length (const char *text)
228 return strlen (text);
231 /* --------------------------------------------------------------------------------------------- */
233 static int
234 str_8bit_length2 (const char *text, int size)
236 size_t length;
238 length = strlen (text);
240 return (size >= 0) ? MIN (length, (size_t) size) : length;
243 /* --------------------------------------------------------------------------------------------- */
245 static gchar *
246 str_8bit_conv_gerror_message (GError *mcerror, const char *def_msg)
248 GIConv conv;
249 gchar *ret;
251 /* glib messages are in UTF-8 charset */
252 conv = str_crt_conv_from ("UTF-8");
254 if (conv == INVALID_CONV)
255 ret = g_strdup (def_msg != NULL ? def_msg : "");
256 else
258 GString *buf;
260 buf = g_string_new ("");
262 if (str_convert (conv, mcerror->message, buf) != ESTR_FAILURE)
263 ret = g_string_free (buf, FALSE);
264 else
266 ret = g_strdup (def_msg != NULL ? def_msg : "");
267 g_string_free (buf, TRUE);
270 str_close_conv (conv);
273 return ret;
276 /* --------------------------------------------------------------------------------------------- */
278 static estr_t
279 str_8bit_vfs_convert_to (GIConv coder, const char *string, int size, GString *buffer)
281 estr_t result = ESTR_SUCCESS;
283 if (coder == str_cnv_not_convert)
284 g_string_append_len (buffer, string, size);
285 else
286 result = str_nconvert (coder, string, size, buffer);
288 return result;
291 /* --------------------------------------------------------------------------------------------- */
293 static const char *
294 str_8bit_term_form (const char *text)
296 static char result[BUF_MEDIUM];
297 char *actual;
298 size_t remain;
299 size_t length;
300 size_t pos = 0;
302 actual = result;
303 remain = sizeof (result);
304 length = strlen (text);
306 for (; pos < length && remain > 1; pos++, actual++, remain--)
307 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
309 actual[0] = '\0';
310 return result;
313 /* --------------------------------------------------------------------------------------------- */
315 static const char *
316 str_8bit_fit_to_term (const char *text, int width, align_crt_t just_mode)
318 static char result[BUF_MEDIUM];
319 char *actual;
320 size_t remain;
321 int ident = 0;
322 size_t length;
323 size_t pos = 0;
325 length = strlen (text);
326 actual = result;
327 remain = sizeof (result);
329 if ((int) length <= width)
331 switch (HIDE_FIT (just_mode))
333 case J_CENTER_LEFT:
334 case J_CENTER:
335 ident = (width - length) / 2;
336 break;
337 case J_RIGHT:
338 ident = width - length;
339 break;
340 default:
341 break;
344 if ((int) remain <= ident)
345 goto finally;
346 memset (actual, ' ', ident);
347 actual += ident;
348 remain -= ident;
350 for (; pos < length && remain > 1; pos++, actual++, remain--)
351 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
353 if (width - length - ident > 0)
355 if (remain <= width - length - ident)
356 goto finally;
357 memset (actual, ' ', width - length - ident);
358 actual += width - length - ident;
361 else if (IS_FIT (just_mode))
363 for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
364 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
366 if (remain <= 1)
367 goto finally;
368 actual[0] = '~';
369 actual++;
370 remain--;
372 pos += length - width + 1;
373 for (; pos < length && remain > 1; pos++, actual++, remain--)
374 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
376 else
378 switch (HIDE_FIT (just_mode))
380 case J_CENTER:
381 ident = (length - width) / 2;
382 break;
383 case J_RIGHT:
384 ident = length - width;
385 break;
386 default:
387 break;
390 pos += ident;
391 for (; pos < (gsize) (ident + width) && remain > 1; pos++, actual++, remain--)
392 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
395 finally:
396 if (actual >= result + sizeof (result))
397 actual = result + sizeof (result) - 1;
398 actual[0] = '\0';
399 return result;
402 /* --------------------------------------------------------------------------------------------- */
404 static const char *
405 str_8bit_term_trim (const char *text, int width)
407 static char result[BUF_MEDIUM];
408 size_t remain;
409 char *actual;
410 size_t length;
412 length = strlen (text);
413 actual = result;
414 remain = sizeof (result);
416 if (width > 0)
418 size_t pos;
420 if (width >= (int) length)
422 for (pos = 0; pos < length && remain > 1; pos++, actual++, remain--)
423 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
425 else if (width <= 3)
427 memset (actual, '.', width);
428 actual += width;
430 else
432 memset (actual, '.', 3);
433 actual += 3;
434 remain -= 3;
436 for (pos = length - width + 3; pos < length && remain > 1; pos++, actual++, remain--)
437 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
441 actual[0] = '\0';
442 return result;
445 /* --------------------------------------------------------------------------------------------- */
447 static int
448 str_8bit_term_width2 (const char *text, size_t length)
450 size_t text_len;
452 text_len = strlen (text);
454 return (length != (size_t) (-1)) ? MIN (text_len, length) : text_len;
457 /* --------------------------------------------------------------------------------------------- */
459 static int
460 str_8bit_term_width1 (const char *text)
462 return str_8bit_term_width2 (text, (size_t) (-1));
465 /* --------------------------------------------------------------------------------------------- */
467 static int
468 str_8bit_term_char_width (const char *text)
470 (void) text;
471 return 1;
474 /* --------------------------------------------------------------------------------------------- */
476 static const char *
477 str_8bit_term_substring (const char *text, int start, int width)
479 static char result[BUF_MEDIUM];
480 size_t remain;
481 char *actual;
482 size_t length;
484 actual = result;
485 remain = sizeof (result);
486 length = strlen (text);
488 if (start < (int) length)
490 size_t pos;
492 for (pos = start; pos < length && width > 0 && remain > 1;
493 pos++, width--, actual++, remain--)
494 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
497 for (; width > 0 && remain > 1; actual++, remain--, width--)
498 actual[0] = ' ';
500 actual[0] = '\0';
501 return result;
504 /* --------------------------------------------------------------------------------------------- */
506 static const char *
507 str_8bit_trunc (const char *text, int width)
509 static char result[MC_MAXPATHLEN];
510 int remain;
511 char *actual;
512 size_t pos = 0;
513 size_t length;
515 actual = result;
516 remain = sizeof (result);
517 length = strlen (text);
519 if ((int) length > width)
521 for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
522 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
524 if (remain <= 1)
525 goto finally;
526 actual[0] = '~';
527 actual++;
528 remain--;
530 pos += length - width + 1;
531 for (; pos < length && remain > 1; pos++, actual++, remain--)
532 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
534 else
536 for (; pos < length && remain > 1; pos++, actual++, remain--)
537 actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
540 finally:
541 actual[0] = '\0';
542 return result;
545 /* --------------------------------------------------------------------------------------------- */
547 static int
548 str_8bit_offset_to_pos (const char *text, size_t length)
550 (void) text;
551 return (int) length;
554 /* --------------------------------------------------------------------------------------------- */
556 static int
557 str_8bit_column_to_pos (const char *text, size_t pos)
559 (void) text;
560 return (int) pos;
563 /* --------------------------------------------------------------------------------------------- */
565 static char *
566 str_8bit_create_search_needle (const char *needle, gboolean case_sen)
568 (void) case_sen;
569 return (char *) needle;
572 /* --------------------------------------------------------------------------------------------- */
574 static void
575 str_8bit_release_search_needle (char *needle, gboolean case_sen)
577 (void) case_sen;
578 (void) needle;
581 /* --------------------------------------------------------------------------------------------- */
583 static char *
584 str_8bit_strdown (const char *str)
586 char *rets, *p;
588 if (str == NULL)
589 return NULL;
591 rets = g_strdup (str);
593 for (p = rets; *p != '\0'; p++)
594 *p = char_tolower (*p);
596 return rets;
599 /* --------------------------------------------------------------------------------------------- */
601 static const char *
602 str_8bit_search_first (const char *text, const char *search, gboolean case_sen)
604 char *fold_text;
605 char *fold_search;
606 const char *match;
608 fold_text = case_sen ? (char *) text : str_8bit_strdown (text);
609 fold_search = case_sen ? (char *) search : str_8bit_strdown (search);
611 match = g_strstr_len (fold_text, -1, fold_search);
612 if (match != NULL)
614 size_t offset;
616 offset = match - fold_text;
617 match = text + offset;
620 if (!case_sen)
622 g_free (fold_text);
623 g_free (fold_search);
626 return match;
629 /* --------------------------------------------------------------------------------------------- */
631 static const char *
632 str_8bit_search_last (const char *text, const char *search, gboolean case_sen)
634 char *fold_text;
635 char *fold_search;
636 const char *match;
638 fold_text = case_sen ? (char *) text : str_8bit_strdown (text);
639 fold_search = case_sen ? (char *) search : str_8bit_strdown (search);
641 match = g_strrstr_len (fold_text, -1, fold_search);
642 if (match != NULL)
644 size_t offset;
646 offset = match - fold_text;
647 match = text + offset;
650 if (!case_sen)
652 g_free (fold_text);
653 g_free (fold_search);
656 return match;
659 /* --------------------------------------------------------------------------------------------- */
661 static int
662 str_8bit_compare (const char *t1, const char *t2)
664 return strcmp (t1, t2);
667 /* --------------------------------------------------------------------------------------------- */
669 static int
670 str_8bit_ncompare (const char *t1, const char *t2)
672 size_t l1, l2;
674 l1 = strlen (t1);
675 l2 = strlen (t2);
677 return strncmp (t1, t2, MIN (l1, l2));
680 /* --------------------------------------------------------------------------------------------- */
682 static int
683 str_8bit_casecmp (const char *s1, const char *s2)
685 /* code from GLib */
687 #ifdef HAVE_STRCASECMP
688 g_return_val_if_fail (s1 != NULL, 0);
689 g_return_val_if_fail (s2 != NULL, 0);
691 return strcasecmp (s1, s2);
692 #else
694 g_return_val_if_fail (s1 != NULL, 0);
695 g_return_val_if_fail (s2 != NULL, 0);
697 for (; *s1 != '\0' && *s2 != '\0'; s1++, s2++)
699 gint c1, c2;
701 /* According to A. Cox, some platforms have islower's that
702 * don't work right on non-uppercase
704 c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
705 c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
706 if (c1 != c2)
707 return (c1 - c2);
710 return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
711 #endif
714 /* --------------------------------------------------------------------------------------------- */
716 static int
717 str_8bit_ncasecmp (const char *s1, const char *s2)
719 size_t l1, l2;
720 size_t n;
722 g_return_val_if_fail (s1 != NULL, 0);
723 g_return_val_if_fail (s2 != NULL, 0);
725 l1 = strlen (s1);
726 l2 = strlen (s2);
727 n = MIN (l1, l2);
729 /* code from GLib */
731 #ifdef HAVE_STRNCASECMP
732 return strncasecmp (s1, s2, n);
733 #else
735 for (; *s1 != '\0' && *s2 != '\0'; s1++, s2++)
737 gint c1, c2;
739 n--;
740 /* According to A. Cox, some platforms have islower's that
741 * don't work right on non-uppercase
743 c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
744 c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
745 if (c1 != c2)
746 return (c1 - c2);
749 if (n == 0)
750 return 0;
752 return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
754 #endif
757 /* --------------------------------------------------------------------------------------------- */
759 static int
760 str_8bit_prefix (const char *text, const char *prefix)
762 int result;
764 for (result = 0; text[result] != '\0' && prefix[result] != '\0'
765 && text[result] == prefix[result]; result++);
767 return result;
770 /* --------------------------------------------------------------------------------------------- */
772 static int
773 str_8bit_caseprefix (const char *text, const char *prefix)
775 int result;
777 for (result = 0; text[result] != '\0' && prefix[result] != '\0'
778 && char_toupper (text[result]) == char_toupper (prefix[result]); result++);
780 return result;
783 /* --------------------------------------------------------------------------------------------- */
785 static void
786 str_8bit_fix_string (char *text)
788 (void) text;
791 /* --------------------------------------------------------------------------------------------- */
793 static char *
794 str_8bit_create_key (const char *text, gboolean case_sen)
796 return case_sen ? (char *) text : str_8bit_strdown (text);
799 /* --------------------------------------------------------------------------------------------- */
801 static int
802 str_8bit_key_collate (const char *t1, const char *t2, gboolean case_sen)
804 return case_sen ? strcmp (t1, t2) : strcoll (t1, t2);
807 /* --------------------------------------------------------------------------------------------- */
809 static void
810 str_8bit_release_key (char *key, gboolean case_sen)
812 if (!case_sen)
813 g_free (key);
816 /* --------------------------------------------------------------------------------------------- */
817 /*** public functions ****************************************************************************/
818 /* --------------------------------------------------------------------------------------------- */
820 struct str_class
821 str_8bit_init (void)
823 struct str_class result;
825 result.conv_gerror_message = str_8bit_conv_gerror_message;
826 result.vfs_convert_to = str_8bit_vfs_convert_to;
827 result.insert_replace_char = str_8bit_insert_replace_char;
828 result.is_valid_string = str_8bit_is_valid_string;
829 result.is_valid_char = str_8bit_is_valid_char;
830 result.cnext_char = str_8bit_cnext_char;
831 result.cprev_char = str_8bit_cprev_char;
832 result.cnext_char_safe = str_8bit_cnext_char;
833 result.cprev_char_safe = str_8bit_cprev_char;
834 result.cnext_noncomb_char = str_8bit_cnext_noncomb_char;
835 result.cprev_noncomb_char = str_8bit_cprev_noncomb_char;
836 result.char_isspace = str_8bit_isspace;
837 result.char_ispunct = str_8bit_ispunct;
838 result.char_isalnum = str_8bit_isalnum;
839 result.char_isdigit = str_8bit_isdigit;
840 result.char_isprint = str_8bit_isprint;
841 result.char_iscombiningmark = str_8bit_iscombiningmark;
842 result.char_toupper = str_8bit_toupper;
843 result.char_tolower = str_8bit_tolower;
844 result.length = str_8bit_length;
845 result.length2 = str_8bit_length2;
846 result.length_noncomb = str_8bit_length;
847 result.fix_string = str_8bit_fix_string;
848 result.term_form = str_8bit_term_form;
849 result.fit_to_term = str_8bit_fit_to_term;
850 result.term_trim = str_8bit_term_trim;
851 result.term_width2 = str_8bit_term_width2;
852 result.term_width1 = str_8bit_term_width1;
853 result.term_char_width = str_8bit_term_char_width;
854 result.term_substring = str_8bit_term_substring;
855 result.trunc = str_8bit_trunc;
856 result.offset_to_pos = str_8bit_offset_to_pos;
857 result.column_to_pos = str_8bit_column_to_pos;
858 result.create_search_needle = str_8bit_create_search_needle;
859 result.release_search_needle = str_8bit_release_search_needle;
860 result.search_first = str_8bit_search_first;
861 result.search_last = str_8bit_search_last;
862 result.compare = str_8bit_compare;
863 result.ncompare = str_8bit_ncompare;
864 result.casecmp = str_8bit_casecmp;
865 result.ncasecmp = str_8bit_ncasecmp;
866 result.prefix = str_8bit_prefix;
867 result.caseprefix = str_8bit_caseprefix;
868 result.create_key = str_8bit_create_key;
869 result.create_key_for_filename = str_8bit_create_key;
870 result.key_collate = str_8bit_key_collate;
871 result.release_key = str_8bit_release_key;
873 return result;
876 /* --------------------------------------------------------------------------------------------- */