4 #include "lib/global.h" /* include glib.h */
10 /* Header file for strutil.c, strutilascii.c, strutil8bit.c, strutilutf8.c.
11 * There are two sort of functions:
12 * 1. functions for working with growing strings and conversion strings between
13 * different encodings.
14 * (implemented directly in strutil.c)
15 * 2. functions, that hide differences between encodings derived from ASCII.
16 * (implemented separately in strutilascii.c, strutil8bit.c, strutilutf8.c)
17 * documentation is made for UTF-8 version of functions.
21 * function, that works with invalid strings are marked with "I"
23 * invalid bytes of string are handled as one byte characters with width 1, they
24 * are displayed as questionmarks, I-maked comparing functions try to keep
25 * the original value of these bytes.
28 /* combining characters
29 * displaynig: all handled as zero with characters, expect combing character
30 * at the begin of string, this character has with one (space add before),
31 * so str_term_width is not good for computing width of singles characters
32 * (never return zero, expect emtpy string)
33 * for compatibility are strings composed before displaynig
34 * comparing: comparing decompose all string before comparing, n-compare
35 * functions do not work as is usual, because same strings do not have to be
36 * same length in UTF-8. So they return 0 if one string is prefix of the other
38 * str_prefix is used to determine, how many characters from one string are
39 * prefix in second string. However, str_prefix return number of characters in
40 * decompose form. (used in do_search (screen.c))
43 /*** typedefs(not structures) and defined constants **********************************************/
45 #define IS_FIT(x) ((x) & 0x0010)
46 #define MAKE_FIT(x) ((x) | 0x0010)
47 #define HIDE_FIT(x) ((x) & 0x000f)
49 #define INVALID_CONV ((GIConv) (-1))
51 /*** enums ***************************************************************************************/
53 /* results of conversion function
57 /* success means, that convertion has been finished successully
60 /* problem means, that not every characters was successfully converted (They are
61 * replaced with questionmark). So is impossible convert string back.
64 /* failure means, that conversion is not possible (example: wrong encoding
70 /* alignment strings on terminal
77 /* if there is enough space for string on terminal,
78 * string is centered otherwise is aligned to left */
80 /* fit alignment, if string is to long, is truncated with '~' */
84 J_CENTER_LEFT_FIT
= 0x14
87 /* string-to-integer parsing results
93 /* These two values can be ORed together, to indicate that both errors occurred. */
95 LONGINT_INVALID_SUFFIX_CHAR
= 2,
97 LONGINT_INVALID_SUFFIX_CHAR_WITH_OVERFLOW
= (LONGINT_INVALID_SUFFIX_CHAR
| LONGINT_OVERFLOW
),
101 /*** structures declarations (and typedefs of structures)*****************************************/
103 /* all functions in str_class must be defined for every encoding */
107 gchar
*(*conv_gerror_message
) (GError
* error
, const char *def_msg
);
108 /*I*/ estr_t (*vfs_convert_to
) (GIConv coder
, const char *string
, int size
, GString
* buffer
);
109 /*I*/ void (*insert_replace_char
) (GString
* buffer
);
110 gboolean (*is_valid_string
) (const char *);
111 /*I*/ int (*is_valid_char
) (const char *, size_t);
112 /*I*/ void (*cnext_char
) (const char **);
113 void (*cprev_char
) (const char **);
114 void (*cnext_char_safe
) (const char **);
115 /*I*/ void (*cprev_char_safe
) (const char **);
116 /*I*/ int (*cnext_noncomb_char
) (const char **text
);
117 /*I*/ int (*cprev_noncomb_char
) (const char **text
, const char *begin
);
118 /*I*/ gboolean (*char_isspace
) (const char *);
119 /*I*/ gboolean (*char_ispunct
) (const char *);
120 /*I*/ gboolean (*char_isalnum
) (const char *);
121 /*I*/ gboolean (*char_isdigit
) (const char *);
122 /*I*/ gboolean (*char_isprint
) (const char *);
123 /*I*/ gboolean (*char_iscombiningmark
) (const char *);
124 /*I*/ int (*length
) (const char *);
125 /*I*/ int (*length2
) (const char *, int);
126 /*I*/ int (*length_noncomb
) (const char *);
127 /*I*/ gboolean (*char_toupper
) (const char *, char **, size_t *);
128 gboolean (*char_tolower
) (const char *, char **, size_t *);
129 void (*fix_string
) (char *);
130 /*I*/ const char *(*term_form
) (const char *);
131 /*I*/ const char *(*fit_to_term
) (const char *, int, align_crt_t
);
132 /*I*/ const char *(*term_trim
) (const char *text
, int width
);
133 /*I*/ const char *(*term_substring
) (const char *, int, int);
134 /*I*/ int (*term_width1
) (const char *);
135 /*I*/ int (*term_width2
) (const char *, size_t);
136 /*I*/ int (*term_char_width
) (const char *);
137 /*I*/ const char *(*trunc
) (const char *, int);
138 /*I*/ int (*offset_to_pos
) (const char *, size_t);
139 /*I*/ int (*column_to_pos
) (const char *, size_t);
140 /*I*/ char *(*create_search_needle
) (const char *, gboolean
);
141 void (*release_search_needle
) (char *, gboolean
);
142 const char *(*search_first
) (const char *, const char *, gboolean
);
143 const char *(*search_last
) (const char *, const char *, gboolean
);
144 int (*compare
) (const char *, const char *);
145 /*I*/ int (*ncompare
) (const char *, const char *);
146 /*I*/ int (*casecmp
) (const char *, const char *);
147 /*I*/ int (*ncasecmp
) (const char *, const char *);
148 /*I*/ int (*prefix
) (const char *, const char *);
149 /*I*/ int (*caseprefix
) (const char *, const char *);
150 /*I*/ char *(*create_key
) (const char *text
, gboolean case_sen
);
151 /*I*/ char *(*create_key_for_filename
) (const char *text
, gboolean case_sen
);
152 /*I*/ int (*key_collate
) (const char *t1
, const char *t2
, gboolean case_sen
);
153 /*I*/ void (*release_key
) (char *key
, gboolean case_sen
);
157 /*** global variables defined in .c file *********************************************************/
159 /* standard convertors */
160 extern GIConv str_cnv_to_term
;
161 extern GIConv str_cnv_from_term
;
162 /* from terminal encoding to terminal encoding */
163 extern GIConv str_cnv_not_convert
;
165 /*** declarations of public functions ************************************************************/
167 struct str_class
str_utf8_init (void);
168 struct str_class
str_8bit_init (void);
169 struct str_class
str_ascii_init (void);
171 /* create convertor from "from_enc" to terminal encoding
172 * if "from_enc" is not supported return INVALID_CONV
174 GIConv
str_crt_conv_from (const char *);
176 /* create convertor from terminal encoding to "to_enc"
177 * if "to_enc" is not supported return INVALID_CONV
179 GIConv
str_crt_conv_to (const char *);
181 /* close convertor, do not close str_cnv_to_term, str_cnv_from_term,
182 * str_cnv_not_convert
184 void str_close_conv (GIConv
);
186 /* return on of not used buffers (.used == 0) or create new
187 * returned buffer has set .used to 1
190 /* convert string using coder, result of conversion is appended at end of buffer
191 * return ESTR_SUCCESS if there was no problem.
192 * otherwise return ESTR_PROBLEM or ESTR_FAILURE
194 estr_t
str_convert (GIConv
, const char *, GString
*);
195 estr_t
str_nconvert (GIConv
, const char *, int, GString
*);
197 /* convert GError message (which in UTF-8) to terminal charset
198 * def_char is used if result of error->str conversion if ESTR_FAILURE
199 * return new allocated null-terminated string, which is need to be freed
202 gchar
*str_conv_gerror_message (GError
* error
, const char *def_msg
);
204 /* return only ESTR_SUCCESS or ESTR_FAILURE, because vfs must be able to convert
205 * result to original string. (so no replace with questionmark)
206 * if coder is str_cnv_from_term or str_cnv_not_convert, string is only copied,
207 * so is possible to show file, that is not valid in terminal encoding
209 estr_t
str_vfs_convert_from (GIConv
, const char *, GString
*);
211 /* if coder is str_cnv_to_term or str_cnv_not_convert, string is only copied,
212 * does replace with questionmark
215 estr_t
str_vfs_convert_to (GIConv
, const char *, int, GString
*);
217 /* printf function for str_buffer, append result of printf at the end of buffer
220 void str_printf (GString
* buffer
, const char *format
, ...) G_GNUC_PRINTF (2, 3);
223 /* add standard replacement character in terminal encoding
225 void str_insert_replace_char (GString
*);
227 /* init strings and set terminal encoding,
228 * if is termenc NULL, detect terminal encoding
229 * create all str_cnv_* and set functions for terminal encoding
231 void str_init_strings (const char *termenc
);
233 /* free all str_buffer and all str_cnv_*
235 void str_uninit_strings (void);
237 /* try convert characters in ch to output using conv
238 * ch_size is size of ch, can by (size_t)(-1) (-1 only for ASCII
239 * compatible encoding, for other must be set)
240 * return ESTR_SUCCESS if conversion was successfully,
241 * ESTR_PROBLEM if ch contains only part of characters,
242 * ESTR_FAILURE if conversion is not possible
244 estr_t
str_translate_char (GIConv conv
, const char *ch
, size_t ch_size
,
245 char *output
, size_t out_size
);
247 /* test, if text is valid in terminal encoding
250 gboolean
str_is_valid_string (const char *text
);
252 /* test, if first char of ch is valid
253 * size, how many bytes characters occupied, could be (size_t)(-1)
254 * return 1 if it is valid, -1 if it is invalid or -2 if it is only part of
255 * multibyte character
258 int str_is_valid_char (const char *ch
, size_t size
);
260 /* return next characters after text, do not call on the end of string
262 char *str_get_next_char (char *text
);
263 const char *str_cget_next_char (const char *text
);
265 /* return previous characters before text, do not call on the start of strings
267 char *str_get_prev_char (char *text
);
268 const char *str_cget_prev_char (const char *text
);
270 /* set text to next characters, do not call on the end of string
272 void str_next_char (char **text
);
273 void str_cnext_char (const char **text
);
275 /* set text to previous characters, do not call on the start of strings
277 void str_prev_char (char **text
);
278 void str_cprev_char (const char **text
);
280 /* return next characters after text, do not call on the end of string
281 * works with invalid string
284 char *str_get_next_char_safe (char *text
);
285 const char *str_cget_next_char_safe (const char *text
);
287 /* return previous characters before text, do not call on the start of strings
288 * works with invalid string
291 char *str_get_prev_char_safe (char *text
);
292 const char *str_cget_prev_char_safe (const char *text
);
294 /* set text to next characters, do not call on the end of string
295 * works with invalid string
298 void str_next_char_safe (char **text
);
299 void str_cnext_char_safe (const char **text
);
301 /* set text to previous characters, do not call on the start of strings
302 * works with invalid string
305 void str_prev_char_safe (char **text
);
306 void str_cprev_char_safe (const char **text
);
308 /* set text to next noncombining characters, check the end of text
309 * return how many characters was skipped
310 * works with invalid string
313 int str_next_noncomb_char (char **text
);
314 int str_cnext_noncomb_char (const char **text
);
316 /* set text to previous noncombining characters, search stop at begin
317 * return how many characters was skipped
318 * works with invalid string
321 int str_prev_noncomb_char (char **text
, const char *begin
);
322 int str_cprev_noncomb_char (const char **text
, const char *begin
);
324 /* if first characters in ch is space, tabulator or new lines
327 gboolean
str_isspace (const char *ch
);
329 /* if first characters in ch is punctuation or symbol
332 gboolean
str_ispunct (const char *ch
);
334 /* if first characters in ch is alphanum
337 gboolean
str_isalnum (const char *ch
);
339 /* if first characters in ch is digit
342 gboolean
str_isdigit (const char *ch
);
344 /* if first characters in ch is printable
347 gboolean
str_isprint (const char *ch
);
349 /* if first characters in ch is a combining mark (only in utf-8)
350 * combining makrs are assumed to be zero width
353 gboolean
str_iscombiningmark (const char *ch
);
355 /* write lower from of fisrt characters in ch into out
356 * decrase remain by size of returned characters
357 * if out is not big enough, do nothing
359 gboolean
str_toupper (const char *ch
, char **out
, size_t * remain
);
361 /* write upper from of fisrt characters in ch into out
362 * decrase remain by size of returned characters
363 * if out is not big enough, do nothing
365 gboolean
str_tolower (const char *ch
, char **out
, size_t * remain
);
367 /* return length of text in characters
370 int str_length (const char *text
);
372 /* return length of text in characters, limit to size
375 int str_length2 (const char *text
, int size
);
377 /* return length of one char
380 int str_length_char (const char *);
382 /* return length of text in characters, count only noncombining characters
385 int str_length_noncomb (const char *text
);
387 /* replace all invalid characters in text with questionmark
388 * after return, text is valid string in terminal encoding
391 void str_fix_string (char *text
);
393 /* replace all invalid characters in text with questionmark
394 * replace all unprintable characters with '.'
395 * return static allocated string, "text" is not changed
396 * returned string do not need to be freed
399 const char *str_term_form (const char *text
);
401 /* like str_term_form, but text can be alignment to width
402 * alignment is specified in just_mode (J_LEFT, J_LEFT_FIT, ...)
403 * result is completed with spaces to width
406 const char *str_fit_to_term (const char *text
, int width
, align_crt_t just_mode
);
408 /* like str_term_form, but when text is wider than width, three dots are
409 * inserted at begin and result is completed with suffix of text
410 * no additional spaces are inserted
413 const char *str_term_trim (const char *text
, int width
);
416 /* like str_term_form, but return only specified substring
417 * start - column (position) on terminal, where substring begin
418 * result is completed with spaces to width
421 const char *str_term_substring (const char *text
, int start
, int width
);
423 /* return width, that will be text occupied on terminal
426 int str_term_width1 (const char *text
);
428 /* return width, that will be text occupied on terminal
429 * text is limited by length in characters
432 int str_term_width2 (const char *text
, size_t length
);
434 /* return width, that will be character occupied on terminal
435 * combining characters are always zero width
438 int str_term_char_width (const char *text
);
440 /* convert position in characters to position in bytes
443 int str_offset_to_pos (const char *text
, size_t length
);
445 /* convert position on terminal to position in characters
448 int str_column_to_pos (const char *text
, size_t pos
);
450 /* like str_fit_to_term width just_mode = J_LEFT_FIT,
451 * but do not insert additional spaces
454 const char *str_trunc (const char *text
, int width
);
456 /* create needle, that will be searched in str_search_fist/last,
457 * so needle can be reused
458 * in UTF-8 return normalized form of needle
460 char *str_create_search_needle (const char *needle
, gboolean case_sen
);
462 /* free needle returned by str_create_search_needle
464 void str_release_search_needle (char *needle
, gboolean case_sen
);
466 /* search for first occurrence of search in text
468 const char *str_search_first (const char *text
, const char *needle
, gboolean case_sen
);
470 /* search for last occurrence of search in text
472 const char *str_search_last (const char *text
, const char *needle
, gboolean case_sen
);
474 /* case sensitive compare two strings
477 int str_compare (const char *t1
, const char *t2
);
479 /* case sensitive compare two strings
480 * if one string is prefix of the other string, return 0
483 int str_ncompare (const char *t1
, const char *t2
);
485 /* case insensitive compare two strings
488 int str_casecmp (const char *t1
, const char *t2
);
490 /* case insensitive compare two strings
491 * if one string is prefix of the other string, return 0
494 int str_ncasecmp (const char *t1
, const char *t2
);
496 /* return, how many bytes are are same from start in text and prefix
497 * both strings are decomposed before comparing and return value is counted
498 * in decomposed form, too. calling with prefix, prefix, you get size in bytes
499 * of prefix in decomposed form,
502 int str_prefix (const char *text
, const char *prefix
);
504 /* case insensitive version of str_prefix
507 int str_caseprefix (const char *text
, const char *prefix
);
509 /* create a key that is used by str_key_collate
512 char *str_create_key (const char *text
, gboolean case_sen
);
514 /* create a key that is used by str_key_collate
515 * should aware dot '.' in text
518 char *str_create_key_for_filename (const char *text
, gboolean case_sen
);
520 /* compare two string using LC_COLLATE, if is possible
521 * if case_sen is set, comparing is case sensitive,
522 * case_sen must be same for str_create_key, str_key_collate and str_release_key
525 int str_key_collate (const char *t1
, const char *t2
, gboolean case_sen
);
527 /* release_key created by str_create_key, only rigth way to release key
530 void str_release_key (char *key
, gboolean case_sen
);
532 /* return TRUE if codeset_name is utf8 or utf-8
535 gboolean
str_isutf8 (const char *codeset_name
);
537 const char *str_detect_termencoding (void);
539 int str_verscmp (const char *s1
, const char *s2
);
541 /* return how many lines and columns will text occupy on terminal
543 void str_msg_term_size (const char *text
, int *lines
, int *columns
);
546 * skip first needle's in haystack
548 * @param haystack pointer to string
549 * @param needle pointer to string
550 * @param skip_count skip first bytes
552 * @return pointer to skip_count+1 needle (or NULL if not found).
555 char *strrstr_skip_count (const char *haystack
, const char *needle
, size_t skip_count
);
557 char *str_replace_all (const char *haystack
, const char *needle
, const char *replacement
);
559 strtol_error_t
xstrtoumax (const char *s
, char **ptr
, int base
, uintmax_t * val
,
560 const char *valid_suffixes
);
561 uintmax_t parse_integer (const char *str
, gboolean
* invalid
);
563 /* --------------------------------------------------------------------------------------------- */
564 /*** inline functions ****************************************************************************/
565 /* --------------------------------------------------------------------------------------------- */
568 str_replace (char *s
, char from
, char to
)
570 for (; *s
!= '\0'; s
++)
577 /* --------------------------------------------------------------------------------------------- */
579 * strcpy is unsafe on overlapping memory areas, so define memmove-alike
581 * Have sense only when:
584 * * dest and str are pointers to one object (as Roland Illig pointed).
586 * We can't use str*cpy funs here:
587 * http://kerneltrap.org/mailarchive/openbsd-misc/2008/5/27/1951294
589 * @param dest pointer to string
590 * @param src pointer to string
592 * @return newly allocated string
597 str_move (char *dest
, const char *src
)
601 g_assert (dest
<= src
);
603 n
= strlen (src
) + 1; /* + '\0' */
605 return (char *) memmove (dest
, src
, n
);
608 /* --------------------------------------------------------------------------------------------- */
610 #endif /* MC_STRUTIL_H */