2 * Copyright 2004-2005 Timo Hirvonen
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 typedef unsigned int uchar
;
26 * @byte any byte in UTF-8 string
28 * Returns 1 if @byte is the first byte of unicode char, 0 otherwise
30 static inline int u_is_first_byte(unsigned char byte
)
32 return byte
>> 6 != 2;
36 * @uch potential unicode character
38 * Returns 1 if @uch is valid unicode character, 0 otherwise
40 static inline int u_is_unicode(uchar uch
)
42 return uch
<= 0x0010ffffU
;
46 * Returns size of @uch in bytes
48 static inline int u_char_size(uchar uch
)
50 if (uch
<= 0x0000007fU
) {
52 } else if (uch
<= 0x000007ffU
) {
54 } else if (uch
<= 0x0000ffffU
) {
56 } else if (uch
<= 0x0010ffffU
) {
64 * Returns width of @uch (1 or 2)
66 extern int u_char_width(uchar uch
);
69 * @str any null-terminated string
71 * Returns 1 if @str is valid UTF-8 string, 0 otherwise.
73 extern int u_is_valid(const char *str
);
76 * @str null-terminated UTF-8 string
78 * Retuns length of @str in UTF-8 characters.
80 extern int u_strlen(const char *str
);
83 * @str null-terminated UTF-8 string
85 * Retuns width of @str.
87 extern int u_str_width(const char *str
);
90 * @str null-terminated UTF-8 string
91 * @len number of characters to measure
93 * Retuns width of the first @len characters in @str.
95 extern int u_str_nwidth(const char *str
, int len
);
98 * @str null-terminated UTF-8 string
99 * @idx pointer to byte index in @str (not UTF-8 character index!)
100 * @uch pointer to returned unicode character
102 extern void u_get_char(const char *str
, int *idx
, uchar
*uch
);
105 * @str destination buffer
106 * @idx pointer to byte index in @str (not UTF-8 character index!)
107 * @uch unicode character
109 extern void u_set_char(char *str
, int *idx
, uchar uch
);
112 * @dst destination buffer
113 * @src null-terminated UTF-8 string
114 * @width how much to copy
116 * Copies at most @count characters, less if null byte was hit.
117 * Null byte is _never_ copied.
118 * Actual width of copied characters is stored to @width.
120 * Returns number of _bytes_ copied.
122 extern int u_copy_chars(char *dst
, const char *src
, int *width
);
125 * @str null-terminated UTF-8 string, must be long enough
126 * @width how much to skip
128 * Skips @count UTF-8 characters.
129 * Total width of skipped characters is stored to @width.
130 * Returned @width can be the given @width + 1 if the last skipped
131 * character was double width.
133 * Returns number of _bytes_ skipped.
135 extern int u_skip_chars(const char *str
, int *width
);
137 extern int u_strcasecmp(const char *a
, const char *b
);
138 extern int u_strncasecmp(const char *a
, const char *b
, int len
);
139 extern char *u_strcasestr(const char *text
, const char *part
);
141 /* @text filename (locale's encoding)
144 extern char *u_strcasestr_filename(const char *text
, const char *part
);