mkv: Improve PCI events handling
[vlc.git] / include / vlc_charset.h
blob2397e1f9ede8b07ec9e7d2c7b00f76e65b77c4ca
1 /*****************************************************************************
2 * vlc_charset.h: Unicode UTF-8 wrappers function
3 *****************************************************************************
4 * Copyright (C) 2003-2005 VLC authors and VideoLAN
5 * Copyright © 2005-2010 Rémi Denis-Courmont
7 * Author: Rémi Denis-Courmont <rem # videolan,org>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 #ifndef VLC_CHARSET_H
25 #define VLC_CHARSET_H 1
27 /**
28 * \file
29 * Characters sets handling
31 * \ingroup strings
32 * @{
35 /**
36 * Decodes a code point from UTF-8.
38 * Converts the first character in a UTF-8 sequence into a Unicode code point.
40 * \param str an UTF-8 bytes sequence [IN]
41 * \param pwc address of a location to store the code point [OUT]
43 * \return the number of bytes occupied by the decoded code point
45 * \retval (size_t)-1 not a valid UTF-8 sequence
46 * \retval 0 null character (i.e. str points to an empty string)
47 * \retval 1 (non-null) ASCII character
48 * \retval 2-4 non-ASCII character
50 VLC_API size_t vlc_towc(const char *str, uint32_t *restrict pwc);
52 /**
53 * Checks UTF-8 validity.
55 * Checks whether a null-terminated string is a valid UTF-8 bytes sequence.
57 * \param str string to check
59 * \retval str the string is a valid null-terminated UTF-8 sequence
60 * \retval NULL the string is not an UTF-8 sequence
62 VLC_USED static inline const char *IsUTF8(const char *str)
64 size_t n;
65 uint32_t cp;
67 while ((n = vlc_towc(str, &cp)) != 0)
68 if (likely(n != (size_t)-1))
69 str += n;
70 else
71 return NULL;
72 return str;
75 /**
76 * Checks ASCII validity.
78 * Checks whether a null-terminated string is a valid ASCII bytes sequence
79 * (non-printable ASCII characters 1-31 are permitted).
81 * \param str string to check
83 * \retval str the string is a valid null-terminated ASCII sequence
84 * \retval NULL the string is not an ASCII sequence
86 VLC_USED static inline const char *IsASCII(const char *str)
88 unsigned char c;
90 for (const char *p = str; (c = *p) != '\0'; p++)
91 if (c >= 0x80)
92 return NULL;
93 return str;
96 /**
97 * Removes non-UTF-8 sequences.
99 * Replaces invalid or <i>over-long</i> UTF-8 bytes sequences within a
100 * null-terminated string with question marks. This is so that the string can
101 * be printed at least partially.
103 * \warning Do not use this were correctness is critical. use IsUTF8() and
104 * handle the error case instead. This function is mainly for display or debug.
106 * \note Converting from Latin-1 to UTF-8 in place is not possible (the string
107 * size would be increased). So it is not attempted even if it would otherwise
108 * be less disruptive.
110 * \retval str the string is a valid null-terminated UTF-8 sequence
111 * (i.e. no changes were made)
112 * \retval NULL the string is not an UTF-8 sequence
114 static inline char *EnsureUTF8(char *str)
116 char *ret = str;
117 size_t n;
118 uint32_t cp;
120 while ((n = vlc_towc(str, &cp)) != 0)
121 if (likely(n != (size_t)-1))
122 str += n;
123 else
125 *str++ = '?';
126 ret = NULL;
128 return ret;
131 /* iconv wrappers (defined in src/extras/libc.c) */
132 #define VLC_ICONV_ERR ((size_t) -1)
133 typedef void *vlc_iconv_t;
134 VLC_API vlc_iconv_t vlc_iconv_open( const char *, const char * ) VLC_USED;
135 VLC_API size_t vlc_iconv( vlc_iconv_t, const char **, size_t *, char **, size_t * ) VLC_USED;
136 VLC_API int vlc_iconv_close( vlc_iconv_t );
138 #include <stdarg.h>
140 VLC_API int utf8_vfprintf( FILE *stream, const char *fmt, va_list ap );
141 VLC_API int utf8_fprintf( FILE *, const char *, ... ) VLC_FORMAT( 2, 3 );
142 VLC_API char * vlc_strcasestr(const char *, const char *) VLC_USED;
144 VLC_API char * FromCharset( const char *charset, const void *data, size_t data_size ) VLC_USED;
145 VLC_API void * ToCharset( const char *charset, const char *in, size_t *outsize ) VLC_USED;
147 #ifdef __APPLE__
148 # include <CoreFoundation/CoreFoundation.h>
150 /* Obtains a copy of the contents of a CFString in specified encoding.
151 * Returns char* (must be freed by caller) or NULL on failure.
153 VLC_USED static inline char *FromCFString(const CFStringRef cfString,
154 const CFStringEncoding cfStringEncoding)
156 // Try the quick way to obtain the buffer
157 const char *tmpBuffer = CFStringGetCStringPtr(cfString, cfStringEncoding);
159 if (tmpBuffer != NULL) {
160 return strdup(tmpBuffer);
163 // The quick way did not work, try the long way
164 CFIndex length = CFStringGetLength(cfString);
165 CFIndex maxSize =
166 CFStringGetMaximumSizeForEncoding(length, cfStringEncoding);
168 // If result would exceed LONG_MAX, kCFNotFound is returned
169 if (unlikely(maxSize == kCFNotFound)) {
170 return NULL;
173 // Account for the null terminator
174 maxSize++;
176 char *buffer = (char *)malloc(maxSize);
178 if (unlikely(buffer == NULL)) {
179 return NULL;
182 // Copy CFString in requested encoding to buffer
183 Boolean success = CFStringGetCString(cfString, buffer, maxSize, cfStringEncoding);
185 if (!success)
186 FREENULL(buffer);
187 return buffer;
189 #endif
191 #ifdef _WIN32
192 VLC_USED
193 static inline char *FromWide (const wchar_t *wide)
195 size_t len = WideCharToMultiByte (CP_UTF8, 0, wide, -1, NULL, 0, NULL, NULL);
196 if (len == 0)
197 return NULL;
199 char *out = (char *)malloc (len);
201 if (likely(out))
202 WideCharToMultiByte (CP_UTF8, 0, wide, -1, out, len, NULL, NULL);
203 return out;
206 VLC_USED
207 static inline wchar_t *ToWide (const char *utf8)
209 int len = MultiByteToWideChar (CP_UTF8, 0, utf8, -1, NULL, 0);
210 if (len == 0)
211 return NULL;
213 wchar_t *out = (wchar_t *)malloc (len * sizeof (wchar_t));
215 if (likely(out))
216 MultiByteToWideChar (CP_UTF8, 0, utf8, -1, out, len);
217 return out;
220 VLC_USED VLC_MALLOC
221 static inline char *ToCodePage (unsigned cp, const char *utf8)
223 wchar_t *wide = ToWide (utf8);
224 if (wide == NULL)
225 return NULL;
227 size_t len = WideCharToMultiByte (cp, 0, wide, -1, NULL, 0, NULL, NULL);
228 if (len == 0) {
229 free(wide);
230 return NULL;
233 char *out = (char *)malloc (len);
234 if (likely(out != NULL))
235 WideCharToMultiByte (cp, 0, wide, -1, out, len, NULL, NULL);
236 free (wide);
237 return out;
240 VLC_USED VLC_MALLOC
241 static inline char *FromCodePage (unsigned cp, const char *mb)
243 int len = MultiByteToWideChar (cp, 0, mb, -1, NULL, 0);
244 if (len == 0)
245 return NULL;
247 wchar_t *wide = (wchar_t *)malloc (len * sizeof (wchar_t));
248 if (unlikely(wide == NULL))
249 return NULL;
250 MultiByteToWideChar (cp, 0, mb, -1, wide, len);
252 char *utf8 = FromWide (wide);
253 free (wide);
254 return utf8;
257 VLC_USED VLC_MALLOC
258 static inline char *FromANSI (const char *ansi)
260 return FromCodePage (GetACP (), ansi);
263 VLC_USED VLC_MALLOC
264 static inline char *ToANSI (const char *utf8)
266 return ToCodePage (GetACP (), utf8);
269 # define FromLocale FromANSI
270 # define ToLocale ToANSI
271 # define LocaleFree(s) free((char *)(s))
272 # define FromLocaleDup FromANSI
273 # define ToLocaleDup ToANSI
275 #elif defined(__OS2__)
277 VLC_USED static inline char *FromLocale (const char *locale)
279 return locale ? FromCharset ((char *)"", locale, strlen(locale)) : NULL;
282 VLC_USED static inline char *ToLocale (const char *utf8)
284 size_t outsize;
285 return utf8 ? (char *)ToCharset ("", utf8, &outsize) : NULL;
288 VLC_USED static inline void LocaleFree (const char *str)
290 free ((char *)str);
293 VLC_USED static inline char *FromLocaleDup (const char *locale)
295 return FromCharset ("", locale, strlen(locale));
298 VLC_USED static inline char *ToLocaleDup (const char *utf8)
300 size_t outsize;
301 return (char *)ToCharset ("", utf8, &outsize);
304 #else
306 # define FromLocale(l) (l)
307 # define ToLocale(u) (u)
308 # define LocaleFree(s) ((void)(s))
309 # define FromLocaleDup strdup
310 # define ToLocaleDup strdup
311 #endif
314 * Converts a nul-terminated string from ISO-8859-1 to UTF-8.
316 static inline char *FromLatin1 (const char *latin)
318 char *str = (char *)malloc (2 * strlen (latin) + 1), *utf8 = str;
319 unsigned char c;
321 if (str == NULL)
322 return NULL;
324 while ((c = *(latin++)) != '\0')
326 if (c >= 0x80)
328 *(utf8++) = 0xC0 | (c >> 6);
329 *(utf8++) = 0x80 | (c & 0x3F);
331 else
332 *(utf8++) = c;
334 *(utf8++) = '\0';
336 utf8 = (char *)realloc (str, utf8 - str);
337 return utf8 ? utf8 : str;
340 /** @} */
342 VLC_API double us_strtod( const char *, char ** ) VLC_USED;
343 VLC_API float us_strtof( const char *, char ** ) VLC_USED;
344 VLC_API double us_atof( const char * ) VLC_USED;
345 VLC_API int us_vasprintf( char **, const char *, va_list );
346 VLC_API int us_asprintf( char **, const char *, ... ) VLC_USED;
348 #endif