Updated doc/NEWS file
[midnight-commander.git] / lib / strutil.h
blob833e7eda75b8491a3c8b9f06c4d5fbc863cbdecc
1 #ifndef MC_STRUTIL_H
2 #define MC_STRUTIL_H
4 #include "lib/global.h" /* include glib.h */
6 #include <sys/types.h>
7 #include <string.h>
8 #ifdef HAVE_ASSERT_H
9 #include <assert.h> /* assert() */
10 #endif
12 /* Header file for strutil.c, strutilascii.c, strutil8bit.c, strutilutf8.c.
13 * There are two sort of functions:
14 * 1. functions for working with growing strings and conversion strings between
15 * different encodings.
16 * (implemented directly in strutil.c)
17 * 2. functions, that hide differences between encodings derived from ASCII.
18 * (implemented separately in strutilascii.c, strutil8bit.c, strutilutf8.c)
19 * documentation is made for UTF-8 version of functions.
22 /* invalid strings
23 * function, that works with invalid strings are marked with "I"
24 * in documentation
25 * invalid bytes of string are handled as one byte characters with width 1, they
26 * are displayed as questionmarks, I-maked comparing functions try to keep
27 * the original value of these bytes.
30 /* combining characters
31 * displaynig: all handled as zero with characters, expect combing character
32 * at the begin of string, this character has with one (space add before),
33 * so str_term_width is not good for computing width of singles characters
34 * (never return zero, expect emtpy string)
35 * for compatibility are strings composed before displaynig
36 * comparing: comparing decompose all string before comparing, n-compare
37 * functions do not work as is usual, because same strings do not have to be
38 * same length in UTF-8. So they return 0 if one string is prefix of the other
39 * one.
40 * str_prefix is used to determine, how many characters from one string are
41 * prefix in second string. However, str_prefix return number of characters in
42 * decompose form. (used in do_search (screen.c))
45 /*** typedefs(not structures) and defined constants **********************************************/
47 #define IS_FIT(x) ((x) & 0x0010)
48 #define MAKE_FIT(x) ((x) | 0x0010)
49 #define HIDE_FIT(x) ((x) & 0x000f)
51 #define INVALID_CONV ((GIConv) (-1))
53 /*** enums ***************************************************************************************/
55 /* results of conversion function
57 typedef enum
59 /* success means, that convertion has been finished successully
61 ESTR_SUCCESS = 0,
62 /* problem means, that not every characters was successfully converted (They are
63 * replaced with questionmark). So is impossible convert string back.
65 ESTR_PROBLEM = 1,
66 /* failure means, that conversion is not possible (example: wrong encoding
67 * of input string)
69 ESTR_FAILURE = 2
70 } estr_t;
72 /* alignment strings on terminal
74 typedef enum
76 J_LEFT = 0x01,
77 J_RIGHT = 0x02,
78 J_CENTER = 0x03,
79 /* if there is enough space for string on terminal,
80 * string is centered otherwise is aligned to left */
81 J_CENTER_LEFT = 0x04,
82 /* fit alignment, if string is to long, is truncated with '~' */
83 J_LEFT_FIT = 0x11,
84 J_RIGHT_FIT = 0x12,
85 J_CENTER_FIT = 0x13,
86 J_CENTER_LEFT_FIT = 0x14
87 } align_crt_t;
89 /*** structures declarations (and typedefs of structures)*****************************************/
91 /* all functions in str_class must be defined for every encoding */
92 struct str_class
94 gchar *(*conv_gerror_message) (GError * error, const char *def_msg);
95 /*I*/ estr_t (*vfs_convert_to) (GIConv coder, const char *string, int size, GString * buffer);
96 /*I*/ void (*insert_replace_char) (GString * buffer);
97 int (*is_valid_string) (const char *);
98 /*I*/ int (*is_valid_char) (const char *, size_t);
99 /*I*/ void (*cnext_char) (const char **);
100 void (*cprev_char) (const char **);
101 void (*cnext_char_safe) (const char **);
102 /*I*/ void (*cprev_char_safe) (const char **);
103 /*I*/ int (*cnext_noncomb_char) (const char **text);
104 /*I*/ int (*cprev_noncomb_char) (const char **text, const char *begin);
105 /*I*/ int (*char_isspace) (const char *);
106 /*I*/ int (*char_ispunct) (const char *);
107 /*I*/ int (*char_isalnum) (const char *);
108 /*I*/ int (*char_isdigit) (const char *);
109 /*I*/ int (*char_isprint) (const char *);
110 /*I*/ gboolean (*char_iscombiningmark) (const char *);
111 /*I*/ int (*length) (const char *);
112 /*I*/ int (*length2) (const char *, int);
113 /*I*/ int (*length_noncomb) (const char *);
114 /*I*/ int (*char_toupper) (const char *, char **, size_t *);
115 int (*char_tolower) (const char *, char **, size_t *);
116 void (*fix_string) (char *);
117 /*I*/ const char *(*term_form) (const char *);
118 /*I*/ const char *(*fit_to_term) (const char *, int, align_crt_t);
119 /*I*/ const char *(*term_trim) (const char *text, int width);
120 /*I*/ const char *(*term_substring) (const char *, int, int);
121 /*I*/ int (*term_width1) (const char *);
122 /*I*/ int (*term_width2) (const char *, size_t);
123 /*I*/ int (*term_char_width) (const char *);
124 /*I*/ const char *(*trunc) (const char *, int);
125 /*I*/ int (*offset_to_pos) (const char *, size_t);
126 /*I*/ int (*column_to_pos) (const char *, size_t);
127 /*I*/ char *(*create_search_needle) (const char *, int);
128 void (*release_search_needle) (char *, int);
129 const char *(*search_first) (const char *, const char *, int);
130 const char *(*search_last) (const char *, const char *, int);
131 int (*compare) (const char *, const char *);
132 /*I*/ int (*ncompare) (const char *, const char *);
133 /*I*/ int (*casecmp) (const char *, const char *);
134 /*I*/ int (*ncasecmp) (const char *, const char *);
135 /*I*/ int (*prefix) (const char *, const char *);
136 /*I*/ int (*caseprefix) (const char *, const char *);
137 /*I*/ char *(*create_key) (const char *text, int case_sen);
138 /*I*/ char *(*create_key_for_filename) (const char *text, int case_sen);
139 /*I*/ int (*key_collate) (const char *t1, const char *t2, int case_sen);
140 /*I*/ void (*release_key) (char *key, int case_sen);
141 /*I*/};
143 /*** global variables defined in .c file *********************************************************/
145 /* standard convertors */
146 extern GIConv str_cnv_to_term;
147 extern GIConv str_cnv_from_term;
148 /* from terminal encoding to terminal encoding */
149 extern GIConv str_cnv_not_convert;
151 /*** declarations of public functions ************************************************************/
153 struct str_class str_utf8_init (void);
154 struct str_class str_8bit_init (void);
155 struct str_class str_ascii_init (void);
157 /* create convertor from "from_enc" to terminal encoding
158 * if "from_enc" is not supported return INVALID_CONV
160 GIConv str_crt_conv_from (const char *);
162 /* create convertor from terminal encoding to "to_enc"
163 * if "to_enc" is not supported return INVALID_CONV
165 GIConv str_crt_conv_to (const char *);
167 /* close convertor, do not close str_cnv_to_term, str_cnv_from_term,
168 * str_cnv_not_convert
170 void str_close_conv (GIConv);
172 /* return on of not used buffers (.used == 0) or create new
173 * returned buffer has set .used to 1
176 /* convert string using coder, result of conversion is appended at end of buffer
177 * return ESTR_SUCCESS if there was no problem.
178 * otherwise return ESTR_PROBLEM or ESTR_FAILURE
180 estr_t str_convert (GIConv, const char *, GString *);
181 estr_t str_nconvert (GIConv, const char *, int, GString *);
183 /* convert GError message (which in UTF-8) to terminal charset
184 * def_char is used if result of error->str conversion if ESTR_FAILURE
185 * return new allocated null-terminated string, which is need to be freed
188 gchar *str_conv_gerror_message (GError * error, const char *def_msg);
190 /* return only ESTR_SUCCESS or ESTR_FAILURE, because vfs must be able to convert
191 * result to original string. (so no replace with questionmark)
192 * if coder is str_cnv_from_term or str_cnv_not_convert, string is only copied,
193 * so is possible to show file, that is not valid in terminal encoding
195 estr_t str_vfs_convert_from (GIConv, const char *, GString *);
197 /* if coder is str_cnv_to_term or str_cnv_not_convert, string is only copied,
198 * does replace with questionmark
201 estr_t str_vfs_convert_to (GIConv, const char *, int, GString *);
203 /* printf functin for str_buffer, append result of printf at the end of buffer
205 void str_printf (GString *, const char *, ...);
207 /* add standard replacement character in terminal encoding
209 void str_insert_replace_char (GString *);
211 /* init strings and set terminal encoding,
212 * if is termenc NULL, detect terminal encoding
213 * create all str_cnv_* and set functions for terminal encoding
215 void str_init_strings (const char *termenc);
217 /* free all str_buffer and all str_cnv_*
219 void str_uninit_strings (void);
221 /* try convert characters in ch to output using conv
222 * ch_size is size of ch, can by (size_t)(-1) (-1 only for ASCII
223 * compatible encoding, for other must be set)
224 * return ESTR_SUCCESS if conversion was successfully,
225 * ESTR_PROBLEM if ch contains only part of characters,
226 * ESTR_FAILURE if conversion is not possible
228 estr_t str_translate_char (GIConv conv, const char *ch, size_t ch_size,
229 char *output, size_t out_size);
231 /* test, if text is valid in terminal encoding
234 int str_is_valid_string (const char *text);
236 /* test, if first char of ch is valid
237 * size, how many bytes characters occupied, could be (size_t)(-1)
238 * return 1 if it is valid, -1 if it is invalid or -2 if it is only part of
239 * multibyte character
242 int str_is_valid_char (const char *ch, size_t size);
244 /* return next characters after text, do not call on the end of string
246 char *str_get_next_char (char *text);
247 const char *str_cget_next_char (const char *text);
249 /* return previous characters before text, do not call on the start of strings
251 char *str_get_prev_char (char *text);
252 const char *str_cget_prev_char (const char *text);
254 /* set text to next characters, do not call on the end of string
256 void str_next_char (char **text);
257 void str_cnext_char (const char **text);
259 /* set text to previous characters, do not call on the start of strings
261 void str_prev_char (char **text);
262 void str_cprev_char (const char **text);
264 /* return next characters after text, do not call on the end of string
265 * works with invalid string
268 char *str_get_next_char_safe (char *text);
269 const char *str_cget_next_char_safe (const char *text);
271 /* return previous characters before text, do not call on the start of strings
272 * works with invalid string
275 char *str_get_prev_char_safe (char *text);
276 const char *str_cget_prev_char_safe (const char *text);
278 /* set text to next characters, do not call on the end of string
279 * works with invalid string
282 void str_next_char_safe (char **text);
283 void str_cnext_char_safe (const char **text);
285 /* set text to previous characters, do not call on the start of strings
286 * works with invalid string
289 void str_prev_char_safe (char **text);
290 void str_cprev_char_safe (const char **text);
292 /* set text to next noncombining characters, check the end of text
293 * return how many characters was skipped
294 * works with invalid string
297 int str_next_noncomb_char (char **text);
298 int str_cnext_noncomb_char (const char **text);
300 /* set text to previous noncombining characters, search stop at begin
301 * return how many characters was skipped
302 * works with invalid string
305 int str_prev_noncomb_char (char **text, const char *begin);
306 int str_cprev_noncomb_char (const char **text, const char *begin);
308 /* if first characters in ch is space, tabulator or new lines
311 int str_isspace (const char *ch);
313 /* if first characters in ch is punctuation or symbol
316 int str_ispunct (const char *ch);
318 /* if first characters in ch is alphanum
321 int str_isalnum (const char *ch);
323 /* if first characters in ch is digit
326 int str_isdigit (const char *ch);
328 /* if first characters in ch is printable
331 int str_isprint (const char *ch);
333 /* if first characters in ch is a combining mark (only in utf-8)
334 * combining makrs are assumed to be zero width
337 gboolean str_iscombiningmark (const char *ch);
339 /* write lower from of fisrt characters in ch into out
340 * decrase remain by size of returned characters
341 * if out is not big enough, do nothing
343 int str_toupper (const char *ch, char **out, size_t * remain);
345 /* write upper from of fisrt characters in ch into out
346 * decrase remain by size of returned characters
347 * if out is not big enough, do nothing
349 int str_tolower (const char *ch, char **out, size_t * remain);
351 /* return length of text in characters
354 int str_length (const char *text);
356 /* return length of text in characters, limit to size
359 int str_length2 (const char *text, int size);
361 /* return length of one char
364 int str_length_char (const char *);
366 /* return length of text in characters, count only noncombining characters
369 int str_length_noncomb (const char *text);
371 /* replace all invalid characters in text with questionmark
372 * after return, text is valid string in terminal encoding
375 void str_fix_string (char *text);
377 /* replace all invalid characters in text with questionmark
378 * replace all unprintable characters with '.'
379 * return static allocated string, "text" is not changed
380 * returned string do not need to be freed
383 const char *str_term_form (const char *text);
385 /* like str_term_form, but text can be alignment to width
386 * alignment is specified in just_mode (J_LEFT, J_LEFT_FIT, ...)
387 * result is completed with spaces to width
390 const char *str_fit_to_term (const char *text, int width, align_crt_t just_mode);
392 /* like str_term_form, but when text is wider than width, three dots are
393 * inserted at begin and result is completed with suffix of text
394 * no additional spaces are inserted
397 const char *str_term_trim (const char *text, int width);
400 /* like str_term_form, but return only specified substring
401 * start - column (position) on terminal, where substring begin
402 * result is completed with spaces to width
405 const char *str_term_substring (const char *text, int start, int width);
407 /* return width, that will be text occupied on terminal
410 int str_term_width1 (const char *text);
412 /* return width, that will be text occupied on terminal
413 * text is limited by length in characters
416 int str_term_width2 (const char *text, size_t length);
418 /* return width, that will be character occupied on terminal
419 * combining characters are always zero width
422 int str_term_char_width (const char *text);
424 /* convert position in characters to position in bytes
427 int str_offset_to_pos (const char *text, size_t length);
429 /* convert position on terminal to position in characters
432 int str_column_to_pos (const char *text, size_t pos);
434 /* like str_fit_to_term width just_mode = J_LEFT_FIT,
435 * but do not insert additional spaces
438 const char *str_trunc (const char *text, int width);
440 /* create needle, that will be searched in str_search_fist/last,
441 * so needle can be reused
442 * in UTF-8 return normalized form of needle
444 char *str_create_search_needle (const char *needle, int case_sen);
446 /* free needle returned by str_create_search_needle
448 void str_release_search_needle (char *needle, int case_sen);
450 /* search for first occurrence of search in text
452 const char *str_search_first (const char *text, const char *needle, int case_sen);
454 /* search for last occurrence of search in text
456 const char *str_search_last (const char *text, const char *needle, int case_sen);
458 /* case sensitive compare two strings
461 int str_compare (const char *t1, const char *t2);
463 /* case sensitive compare two strings
464 * if one string is prefix of the other string, return 0
467 int str_ncompare (const char *t1, const char *t2);
469 /* case insensitive compare two strings
472 int str_casecmp (const char *t1, const char *t2);
474 /* case insensitive compare two strings
475 * if one string is prefix of the other string, return 0
478 int str_ncasecmp (const char *t1, const char *t2);
480 /* return, how many bytes are are same from start in text and prefix
481 * both strings are decomposed befor comapring and return value is counted
482 * in decomposed form, too. caling with prefix, prefix, you get size in bytes
483 * of prefix in decomposed form,
486 int str_prefix (const char *text, const char *prefix);
488 /* case insensitive version of str_prefix
491 int str_caseprefix (const char *text, const char *prefix);
493 /* create a key that is used by str_key_collate
496 char *str_create_key (const char *text, int case_sen);
498 /* create a key that is used by str_key_collate
499 * should aware dot '.' in text
502 char *str_create_key_for_filename (const char *text, int case_sen);
504 /* compare two string using LC_COLLATE, if is possible
505 * if case_sen is set, comparing is case sensitive,
506 * case_sen must be same for str_create_key, str_key_collate and str_release_key
509 int str_key_collate (const char *t1, const char *t2, int case_sen);
511 /* release_key created by str_create_key, only rigth way to release key
514 void str_release_key (char *key, int case_sen);
516 /* return TRUE if codeset_name is utf8 or utf-8
519 gboolean str_isutf8 (const char *codeset_name);
521 const char *str_detect_termencoding (void);
523 int str_verscmp (const char *s1, const char *s2);
525 /* return how many lines and columns will text occupy on terminal
527 void str_msg_term_size (const char *text, int *lines, int *columns);
530 * skip first needle's in haystack
532 * @param haystack pointer to string
533 * @param needle pointer to string
534 * @param skip_count skip first bytes
536 * @return pointer to skip_count+1 needle (or NULL if not found).
539 char *strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count);
541 /*** inline functions ****************************************************************************/
543 static inline void
544 str_replace (char *s, char from, char to)
546 for (; *s != '\0'; s++)
548 if (*s == from)
549 *s = to;
554 * strcpy is unsafe on overlapping memory areas, so define memmove-alike
555 * string function.
556 * Have sense only when:
557 * * dest <= src
558 * AND
559 * * dest and str are pointers to one object (as Roland Illig pointed).
561 * We can't use str*cpy funs here:
562 * http://kerneltrap.org/mailarchive/openbsd-misc/2008/5/27/1951294
564 * @param dest pointer to string
565 * @param src pointer to string
567 * @return newly allocated string
571 static inline char *
572 str_move (char *dest, const char *src)
574 size_t n;
576 #ifdef HAVE_ASSERT_H
577 assert (dest <= src);
578 #endif
580 n = strlen (src) + 1; /* + '\0' */
582 return (char *) memmove (dest, src, n);
585 #endif /* MC_STRUTIL_H */