Merge branch '1858_segfault_in_search'
[midnight-commander.git] / src / strutil.h
blobe379b553f6b888178c82e25f4c95d07dfb3c6d52
1 #ifndef MC_STRUTIL_H
2 #define MC_STRUTIL_H
4 #include "global.h" /* include glib.h */
6 /* Header file for strutil.c, strutilascii.c, strutil8bit.c, strutilutf8.c.
7 * There are two sort of functions:
8 * 1. functions for working with growing strings and conversion strings between
9 * different encodings.
10 * (implemented directly in strutil.c)
11 * 2. functions, that hide differences between encodings derived from ASCII.
12 * (implemented separately in strutilascii.c, strutil8bit.c, strutilutf8.c)
13 * documentation is made for UTF-8 version of functions.
16 /* invalid strings
17 * function, that works with invalid strings are marked with "I"
18 * in documentation
19 * invalid bytes of string are handled as one byte characters with width 1, they
20 * are displayed as questionmarks, I-maked comparing functions try to keep
21 * the original value of these bytes.
24 /* combining characters
25 * displaynig: all handled as zero with characters, expect combing character
26 * at the begin of string, this character has with one (space add before),
27 * so str_term_width is not good for computing width of singles characters
28 * (never return zero, expect emtpy string)
29 * for compatibility are strings composed before displaynig
30 * comparing: comparing decompose all string before comparing, n-compare
31 * functions do not work as is usual, because same strings do not have to be
32 * same length in UTF-8. So they return 0 if one string is prefix of the other
33 * one.
34 * str_prefix is used to determine, how many characters from one string are
35 * prefix in second string. However, str_prefix return number of characters in
36 * decompose form. (used in do_search (screen.c))
39 /* results of conversion function
41 typedef enum {
42 /* success means, that convertion has been finished successully
44 ESTR_SUCCESS = 0,
45 /* problem means, that not every characters was successfully converted (They are
46 * replaced with questionmark). So is impossible convert string back.
48 ESTR_PROBLEM = 1,
49 /* failure means, that conversion is not possible (example: wrong encoding
50 * of input string)
52 ESTR_FAILURE = 2
53 } estr_t;
55 /* alignment strings on terminal
57 typedef enum {
58 J_LEFT = 0x01,
59 J_RIGHT = 0x02,
60 J_CENTER = 0x03,
61 /* if there is enough space for string on terminal,
62 * string is centered otherwise is aligned to left */
63 J_CENTER_LEFT = 0x04,
64 /* fit alignment, if string is to long, is truncated with '~' */
65 J_LEFT_FIT = 0x11,
66 J_RIGHT_FIT = 0x12,
67 J_CENTER_FIT = 0x13,
68 J_CENTER_LEFT_FIT = 0x14
69 } align_crt_t;
71 #define IS_FIT(x) ((x) & 0x0010)
72 #define MAKE_FIT(x) ((x) | 0x0010)
73 #define HIDE_FIT(x) ((x) & 0x000f)
75 #define INVALID_CONV ((GIConv) (-1))
77 /* standard convertors */
78 extern GIConv str_cnv_to_term;
79 extern GIConv str_cnv_from_term;
80 /* from terminal encoding to terminal encoding */
81 extern GIConv str_cnv_not_convert;
83 /* all functions in str_class must be defined for every encoding */
84 struct str_class {
85 gchar *(*conv_gerror_message) (GError *error, const char *def_msg); /*I*/
86 estr_t (*vfs_convert_to) (GIConv coder, const char *string,
87 int size, GString *buffer); /*I*/
88 void (*insert_replace_char) (GString *buffer);
89 int (*is_valid_string) (const char *); /*I*/
90 int (*is_valid_char) (const char *, size_t); /*I*/
91 void (*cnext_char) (const char **);
92 void (*cprev_char) (const char **);
93 void (*cnext_char_safe) (const char **); /*I*/
94 void (*cprev_char_safe) (const char **); /*I*/
95 int (*cnext_noncomb_char) (const char **text); /*I*/
96 int (*cprev_noncomb_char) (const char **text, const char *begin); /*I*/
97 int (*isspace) (const char *); /*I*/
98 int (*ispunct) (const char *); /*I*/
99 int (*isalnum) (const char *); /*I*/
100 int (*isdigit) (const char *); /*I*/
101 int (*isprint) (const char *); /*I*/
102 int (*iscombiningmark) (const char *); /*I*/
103 int (*length) (const char *); /*I*/
104 int (*length2) (const char *, int); /*I*/
105 int (*length_noncomb) (const char *); /*I*/
106 int (*toupper) (const char *, char **, size_t *);
107 int (*tolower) (const char *, char **, size_t *);
108 void (*fix_string) (char *); /*I*/
109 const char *(*term_form) (const char *); /*I*/
110 const char *(*fit_to_term) (const char *, int, align_crt_t); /*I*/
111 const char *(*term_trim) (const char *text, int width); /*I*/
112 void (*msg_term_size) (const char *, int *, int *); /*I*/
113 const char *(*term_substring) (const char *, int, int); /*I*/
114 int (*term_width1) (const char *); /*I*/
115 int (*term_width2) (const char *, size_t); /*I*/
116 int (*term_char_width) (const char *); /*I*/
117 const char *(*trunc) (const char *, int); /*I*/
118 int (*offset_to_pos) (const char *, size_t); /*I*/
119 int (*column_to_pos) (const char *, size_t); /*I*/
120 char *(*create_search_needle) (const char *, int);
121 void (*release_search_needle) (char *, int);
122 const char *(*search_first) (const char *, const char *, int);
123 const char *(*search_last) (const char *, const char *, int);
124 int (*compare) (const char *, const char *); /*I*/
125 int (*ncompare) (const char *, const char *); /*I*/
126 int (*casecmp) (const char *, const char *); /*I*/
127 int (*ncasecmp) (const char *, const char *); /*I*/
128 int (*prefix) (const char *, const char *); /*I*/
129 int (*caseprefix) (const char *, const char *); /*I*/
130 char *(*create_key) (const char *text, int case_sen); /*I*/
131 char *(*create_key_for_filename) (const char *text, int case_sen); /*I*/
132 int (*key_collate) (const char *t1, const char *t2, int case_sen); /*I*/
133 void (*release_key) (char *key, int case_sen); /*I*/
136 struct str_class str_utf8_init (void);
137 struct str_class str_8bit_init (void);
138 struct str_class str_ascii_init (void);
140 /* create convertor from "from_enc" to terminal encoding
141 * if "from_enc" is not supported return INVALID_CONV
143 GIConv str_crt_conv_from (const char *);
145 /* create convertor from terminal encoding to "to_enc"
146 * if "to_enc" is not supported return INVALID_CONV
148 GIConv str_crt_conv_to (const char *);
150 /* close convertor, do not close str_cnv_to_term, str_cnv_from_term,
151 * str_cnv_not_convert
153 void str_close_conv (GIConv);
155 /* return on of not used buffers (.used == 0) or create new
156 * returned buffer has set .used to 1
159 /* convert string using coder, result of conversion is appended at end of buffer
160 * return ESTR_SUCCESS if there was no problem.
161 * otherwise return ESTR_PROBLEM or ESTR_FAILURE
163 estr_t str_convert (GIConv, const char *, GString *);
164 estr_t str_nconvert (GIConv, const char *, int, GString *);
166 /* convert GError message (which in UTF-8) to terminal charset
167 * def_char is used if result of error->str conversion if ESTR_FAILURE
168 * return new allocated null-terminated string, which is need to be freed
171 gchar *str_conv_gerror_message (GError *error, const char *def_msg);
173 /* return only ESTR_SUCCESS or ESTR_FAILURE, because vfs must be able to convert
174 * result to original string. (so no replace with questionmark)
175 * if coder is str_cnv_from_term or str_cnv_not_convert, string is only copied,
176 * so is possible to show file, that is not valid in terminal encoding
178 estr_t str_vfs_convert_from (GIConv, const char *, GString *);
180 /* if coder is str_cnv_to_term or str_cnv_not_convert, string is only copied,
181 * does replace with questionmark
184 estr_t str_vfs_convert_to (GIConv, const char *, int, GString *);
186 /* printf functin for str_buffer, append result of printf at the end of buffer
188 void
189 str_printf (GString *, const char *, ...);
191 /* add standard replacement character in terminal encoding
193 void str_insert_replace_char (GString *);
195 /* init strings and set terminal encoding,
196 * if is termenc NULL, detect terminal encoding
197 * create all str_cnv_* and set functions for terminal encoding
199 void str_init_strings (const char *termenc);
201 /* free all str_buffer and all str_cnv_*
203 void str_uninit_strings (void);
205 /* try convert characters in ch to output using conv
206 * ch_size is size of ch, can by (size_t)(-1) (-1 only for ASCII
207 * compatible encoding, for other must be set)
208 * return ESTR_SUCCESS if conversion was successfully,
209 * ESTR_PROBLEM if ch contains only part of characters,
210 * ESTR_FAILURE if conversion is not possible
212 estr_t str_translate_char (GIConv conv, const char *ch, size_t ch_size,
213 char *output, size_t out_size);
215 /* test, if text is valid in terminal encoding
218 int str_is_valid_string (const char *text);
220 /* test, if first char of ch is valid
221 * size, how many bytes characters occupied, could be (size_t)(-1)
222 * return 1 if it is valid, -1 if it is invalid or -2 if it is only part of
223 * multibyte character
226 int str_is_valid_char (const char *ch, size_t size);
228 /* return next characters after text, do not call on the end of string
230 char *str_get_next_char (char *text);
231 const char *str_cget_next_char (const char *text);
233 /* return previous characters before text, do not call on the start of strings
235 char *str_get_prev_char (char *text);
236 const char *str_cget_prev_char (const char *text);
238 /* set text to next characters, do not call on the end of string
240 void str_next_char (char **text);
241 void str_cnext_char (const char **text);
243 /* set text to previous characters, do not call on the start of strings
245 void str_prev_char (char **text);
246 void str_cprev_char (const char **text);
248 /* return next characters after text, do not call on the end of string
249 * works with invalid string
252 char *str_get_next_char_safe (char *text);
253 const char *str_cget_next_char_safe (const char *text);
255 /* return previous characters before text, do not call on the start of strings
256 * works with invalid string
259 char *str_get_prev_char_safe (char *text);
260 const char *str_cget_prev_char_safe (const char *text);
262 /* set text to next characters, do not call on the end of string
263 * works with invalid string
266 void str_next_char_safe (char **text);
267 void str_cnext_char_safe (const char **text);
269 /* set text to previous characters, do not call on the start of strings
270 * works with invalid string
273 void str_prev_char_safe (char **text);
274 void str_cprev_char_safe (const char **text);
276 /* set text to next noncombining characters, check the end of text
277 * return how many characters was skipped
278 * works with invalid string
281 int str_next_noncomb_char (char **text);
282 int str_cnext_noncomb_char (const char **text);
284 /* set text to previous noncombining characters, search stop at begin
285 * return how many characters was skipped
286 * works with invalid string
289 int str_prev_noncomb_char (char **text, const char *begin);
290 int str_cprev_noncomb_char (const char **text, const char *begin);
292 /* if first characters in ch is space, tabulator or new lines
295 int str_isspace (const char *ch);
297 /* if first characters in ch is punctuation or symbol
300 int str_ispunct (const char *ch);
302 /* if first characters in ch is alphanum
305 int str_isalnum (const char *ch);
307 /* if first characters in ch is digit
310 int str_isdigit (const char *ch);
312 /* if first characters in ch is printable
315 int str_isprint (const char *ch);
317 /* if first characters in ch is a combining mark (only in utf-8)
318 * combining makrs are assumed to be zero width
321 int str_iscombiningmark (const char *ch);
323 /* write lower from of fisrt characters in ch into out
324 * decrase remain by size of returned characters
325 * if out is not big enough, do nothing
327 int str_toupper (const char *ch, char **out, size_t *remain);
329 /* write upper from of fisrt characters in ch into out
330 * decrase remain by size of returned characters
331 * if out is not big enough, do nothing
333 int str_tolower (const char *ch, char **out, size_t *remain);
335 /* return length of text in characters
338 int str_length (const char* text);
340 /* return length of text in characters, limit to size
343 int str_length2 (const char* text, int size);
345 /* return length of one char
348 int str_length_char (const char *);
350 /* return length of text in characters, count only noncombining characters
353 int str_length_noncomb (const char* text);
355 /* replace all invalid characters in text with questionmark
356 * after return, text is valid string in terminal encoding
359 void str_fix_string (char* text);
361 /* replace all invalid characters in text with questionmark
362 * replace all unprintable characters with '.'
363 * return static allocated string, "text" is not changed
364 * returned string do not need to be freed
367 const char *str_term_form (const char *text);
369 /* like str_term_form, but text can be alignment to width
370 * alignment is specified in just_mode (J_LEFT, J_LEFT_FIT, ...)
371 * result is completed with spaces to width
374 const char *str_fit_to_term (const char *text, int width, align_crt_t just_mode);
376 /* like str_term_form, but when text is wider than width, three dots are
377 * inserted at begin and result is completed with suffix of text
378 * no additional spaces are inserted
381 const char *str_term_trim (const char *text, int width);
383 /* return how many lines and columns will text occupy on terminal
386 void str_msg_term_size (const char *text, int *lines, int *columns);
388 /* like str_term_form, but return only specified substring
389 * start - column (position) on terminal, where substring begin
390 * result is completed with spaces to width
393 const char *str_term_substring (const char *text, int start, int width);
395 /* return width, that will be text occupied on terminal
398 int str_term_width1 (const char *text);
400 /* return width, that will be text occupied on terminal
401 * text is limited by length in characters
404 int str_term_width2 (const char *text, size_t length);
406 /* return width, that will be character occupied on terminal
407 * combining characters are always zero width
410 int str_term_char_width (const char *text);
412 /* convert position in characters to position in bytes
415 int str_offset_to_pos (const char* text, size_t length);
417 /* convert position on terminal to position in characters
420 int str_column_to_pos (const char *text, size_t pos);
422 /* like str_fit_to_term width just_mode = J_LEFT_FIT,
423 * but do not insert additional spaces
426 const char *str_trunc (const char *text, int width);
428 /* create needle, that will be searched in str_search_fist/last,
429 * so needle can be reused
430 * in UTF-8 return normalized form of needle
432 char *str_create_search_needle (const char *needle, int case_sen);
434 /* free needle returned by str_create_search_needle
436 void str_release_search_needle (char *needle, int case_sen);
438 /* search for first occurrence of search in text
440 const char *str_search_first (const char *text, const char *needle, int case_sen);
442 /* search for last occurrence of search in text
444 const char *str_search_last (const char *text, const char *needle, int case_sen);
446 /* case sensitive compare two strings
449 int str_compare (const char *t1, const char *t2);
451 /* case sensitive compare two strings
452 * if one string is prefix of the other string, return 0
455 int str_ncompare (const char *t1, const char *t2);
457 /* case insensitive compare two strings
460 int str_casecmp (const char *t1, const char *t2);
462 /* case insensitive compare two strings
463 * if one string is prefix of the other string, return 0
466 int str_ncasecmp (const char *t1, const char *t2);
468 /* return, how many bytes are are same from start in text and prefix
469 * both strings are decomposed befor comapring and return value is counted
470 * in decomposed form, too. caling with prefix, prefix, you get size in bytes
471 * of prefix in decomposed form,
474 int str_prefix (const char *text, const char *prefix);
476 /* case insensitive version of str_prefix
479 int str_caseprefix (const char *text, const char *prefix);
481 /* create a key that is used by str_key_collate
484 char *str_create_key (const char *text, int case_sen);
486 /* create a key that is used by str_key_collate
487 * should aware dot '.' in text
490 char *str_create_key_for_filename (const char *text, int case_sen);
492 /* compare two string using LC_COLLATE, if is possible
493 * if case_sen is set, comparing is case sensitive,
494 * case_sen must be same for str_create_key, str_key_collate and str_release_key
497 int str_key_collate (const char *t1, const char *t2, int case_sen);
499 /* release_key created by str_create_key, only rigth way to release key
502 void str_release_key (char *key, int case_sen);
504 /* return 1 if codeset_name is utf8 or utf-8
507 int str_isutf8 (const char *codeset_name);
509 const char *str_detect_termencoding (void);
511 #endif /* MC_STRUTIL_H*/