4 // First some base level utility routines
11 // convert UTF-16 characters to UTF-8
12 char * u16_u8(char * dest
, int size
, const w_char
* src
, int srclen
);
14 // convert UTF-8 characters to UTF-16
15 int u8_u16(w_char
* dest
, int size
, const char * src
);
18 void flag_qsort(unsigned short flags
[], int begin
, int end
);
20 // binary search in 2-byte vector
21 int flag_bsearch(unsigned short flags
[], unsigned short flag
, int right
);
23 // remove end of line char(s)
24 void mychomp(char * s
);
27 char * mystrdup(const char * s
);
29 // duplicate reverse of string
30 char * myrevstrdup(const char * s
);
32 // parse into tokens with char delimiter
33 char * mystrsep(char ** sptr
, const char delim
);
34 // parse into tokens with char delimiter
35 char * mystrsep2(char ** sptr
, const char delim
);
37 // parse into tokens with char delimiter
38 char * mystrrep(char *, const char *, const char *);
40 // append s to ends of every lines in text
41 void strlinecat(char * lines
, const char * s
);
43 // tokenize into lines with new line
44 int line_tok(const char * text
, char *** lines
);
46 // tokenize into lines with new line and uniq in place
47 char * line_uniq(char * text
);
49 // change \n to c in place
50 char * line_join(char * text
, char c
);
52 // leave only last {[^}]*} pattern in string
53 char * delete_zeros(char * morphout
);
56 int reverseword(char *);
59 int reverseword_utf(char *);
61 // character encoding information
68 // Unicode character encoding information
71 unsigned short cupper
;
72 unsigned short clower
;
75 struct unicode_info2
{
77 unsigned short cupper
;
78 unsigned short clower
;
81 int initialize_utf_tbl();
83 unsigned short unicodetoupper(unsigned short c
, int langnum
);
84 unsigned short unicodetolower(unsigned short c
, int langnum
);
85 int unicodeisalpha(unsigned short c
);
88 const char * enc_name
;
89 struct cs_info
* cs_table
;
92 // language to encoding default map
100 struct cs_info
* get_current_cs(const char * es
);
102 const char * get_default_enc(const char * lang
);
104 int get_lang_num(const char * lang
);
106 // convert null terminated string to all caps using encoding
107 void enmkallcap(char * d
, const char * p
, const char * encoding
);
109 // convert null terminated string to all little using encoding
110 void enmkallsmall(char * d
, const char * p
, const char * encoding
);
112 // convert null terminated string to have intial capital using encoding
113 void enmkinitcap(char * d
, const char * p
, const char * encoding
);
115 // convert null terminated string to all caps
116 void mkallcap(char * p
, const struct cs_info
* csconv
);
118 // convert null terminated string to all little
119 void mkallsmall(char * p
, const struct cs_info
* csconv
);
121 // convert null terminated string to have intial capital
122 void mkinitcap(char * p
, const struct cs_info
* csconv
);
124 // convert first nc characters of UTF-8 string to little
125 void mkallsmall_utf(w_char
* u
, int nc
, int langnum
);
127 // convert first nc characters of UTF-8 string to capital
128 void mkallcap_utf(w_char
* u
, int nc
, int langnum
);
130 // strip all ignored characters in the string
131 void remove_ignored_chars_utf(char * word
, unsigned short ignored_chars
[], int ignored_len
);
133 // strip all ignored characters in the string
134 void remove_ignored_chars(char * word
, char * ignored_chars
);
136 int parse_string(char * line
, char ** out
, const char * name
);
138 int parse_array(char * line
, char ** out
,
139 unsigned short ** out_utf16
, int * out_utf16_len
, const char * name
, int utf8
);