UTF-8: New function unicode_fold_label_case and a related script.
[elinks.git] / src / intl / charsets.h
blobae8fe97ffff0b8752b648803130ea641314a6381
1 #ifndef EL__INTL_CHARSETS_H
2 #define EL__INTL_CHARSETS_H
4 typedef uint32_t unicode_val_T;
6 /* UCS/Unicode replacement character. */
7 #define UCS_NO_CHAR ((unicode_val_T) 0xFFFFFFFD)
9 /*   replacement character. See u2cp(). */
10 #define NBSP_CHAR ((unsigned char) 1)
11 #define NBSP_CHAR_STRING "\001"
13 struct conv_table {
14 int t;
15 union {
16 unsigned char *str;
17 struct conv_table *tbl;
18 } u;
21 enum convert_string_mode {
22 CSM_DEFAULT, /* Convert any char. */
23 CSM_QUERY, /* Special handling of '&' and '=' chars. */
24 CSM_FORM, /* Special handling of '&' and '=' chars in forms. */
25 CSM_NONE, /* Convert nothing. */
28 struct conv_table *get_translation_table(int, int);
29 unsigned char *get_entity_string(const unsigned char *str, const int strlen, int encoding);
31 /* The convert_string() name is also used by Samba (version 3.0.3), which
32 * provides libnss_wins.so.2, which is called somewhere inside
33 * _nss_wins_gethostbyname_r(). This name clash causes the elinks hostname
34 * lookup thread to crash so we need to rename the symbol. */
35 /* Reported by Derek Poon and filed as bug 453 */
36 #undef convert_string
37 #define convert_string convert_string_elinks
39 /* This routine converts a string from one charset to another according to the
40 * passed @convert_table, potentially also decoding SGML (HTML) entities along
41 * the way (according to @mode). It either returns dynamically allocated
42 * converted string of length @length, or if the @callback is non-NULL it calls
43 * it each few bytes instead and always returns NULL (@length is undefined).
44 * Note that it's ok not to care and pass NULL as @length. */
45 unsigned char *convert_string(struct conv_table *convert_table,
46 unsigned char *chars, int charslen, int cp,
47 enum convert_string_mode mode, int *length,
48 void (*callback)(void *data, unsigned char *buf, int buflen),
49 void *callback_data);
51 int get_cp_index(unsigned char *);
52 unsigned char *get_cp_name(int);
53 unsigned char *get_cp_mime_name(int);
54 int is_cp_utf8(int);
55 void free_conv_table(void);
56 #ifdef CONFIG_UTF_8
57 inline unsigned char *encode_utf_8(unicode_val_T);
58 inline unsigned char *utf8_prevchar(unsigned char *, int, unsigned char *);
59 inline int utf8charlen(const unsigned char *);
60 int utf8_char2cells(unsigned char *, unsigned char *);
61 int utf8_ptr2cells(unsigned char *, unsigned char *);
62 int utf8_ptr2chars(unsigned char *, unsigned char *);
63 int utf8_cells2bytes(unsigned char *, int, unsigned char *);
64 inline int unicode_to_cell(unicode_val_T);
65 unicode_val_T unicode_fold_label_case(unicode_val_T);
66 inline int strlen_utf8(unsigned char **);
67 inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);
68 unicode_val_T cp2u(int, unsigned char);
69 #endif /* CONFIG_UTF_8 */
71 unsigned char *cp2utf_8(int, int);
73 unsigned char *u2cp_(unicode_val_T, int, int no_nbsp_hack);
74 #define u2cp(u, to) u2cp_(u, to, 0)
75 #define u2cp_no_nbsp(u, to) u2cp_(u, to, 1)
77 void init_charsets_lookup(void);
78 void free_charsets_lookup(void);
80 #endif