1 #ifndef EL__INTL_CHARSETS_H
2 #define EL__INTL_CHARSETS_H
4 typedef uint32_t unicode_val_T
;
6 /* UCS/Unicode replacement character. */
7 #define UCS_NO_CHAR ((unicode_val_T) 0xFFFFFFFD)
9 /* replacement character. See u2cp(). */
10 #define NBSP_CHAR ((unsigned char) 1)
11 #define NBSP_CHAR_STRING "\001"
17 struct conv_table
*tbl
;
21 enum convert_string_mode
{
22 CSM_DEFAULT
, /* Convert any char. */
23 CSM_QUERY
, /* Special handling of '&' and '=' chars. */
24 CSM_FORM
, /* Special handling of '&' and '=' chars in forms. */
25 CSM_NONE
, /* Convert nothing. */
28 struct conv_table
*get_translation_table(int, int);
29 unsigned char *get_entity_string(const unsigned char *str
, const int strlen
, int encoding
);
31 /* The convert_string() name is also used by Samba (version 3.0.3), which
32 * provides libnss_wins.so.2, which is called somewhere inside
33 * _nss_wins_gethostbyname_r(). This name clash causes the elinks hostname
34 * lookup thread to crash so we need to rename the symbol. */
35 /* Reported by Derek Poon and filed as bug 453 */
37 #define convert_string convert_string_elinks
39 /* This routine converts a string from one charset to another according to the
40 * passed @convert_table, potentially also decoding SGML (HTML) entities along
41 * the way (according to @mode). It either returns dynamically allocated
42 * converted string of length @length, or if the @callback is non-NULL it calls
43 * it each few bytes instead and always returns NULL (@length is undefined).
44 * Note that it's ok not to care and pass NULL as @length. */
45 unsigned char *convert_string(struct conv_table
*convert_table
,
46 unsigned char *chars
, int charslen
, int cp
,
47 enum convert_string_mode mode
, int *length
,
48 void (*callback
)(void *data
, unsigned char *buf
, int buflen
),
51 int get_cp_index(unsigned char *);
52 unsigned char *get_cp_name(int);
53 unsigned char *get_cp_mime_name(int);
55 void free_conv_table(void);
57 inline unsigned char *encode_utf_8(unicode_val_T
);
58 inline unsigned char *utf8_prevchar(unsigned char *, int, unsigned char *);
59 inline int utf8charlen(const unsigned char *);
60 int utf8_char2cells(unsigned char *, unsigned char *);
61 int utf8_ptr2cells(unsigned char *, unsigned char *);
62 int utf8_ptr2chars(unsigned char *, unsigned char *);
63 int utf8_cells2bytes(unsigned char *, int, unsigned char *);
64 inline int unicode_to_cell(unicode_val_T
);
65 unicode_val_T
unicode_fold_label_case(unicode_val_T
);
66 inline int strlen_utf8(unsigned char **);
67 inline unicode_val_T
utf_8_to_unicode(unsigned char **, unsigned char *);
68 unicode_val_T
cp2u(int, unsigned char);
69 #endif /* CONFIG_UTF_8 */
71 unsigned char *cp2utf_8(int, int);
73 unsigned char *u2cp_(unicode_val_T
, int, int no_nbsp_hack
);
74 #define u2cp(u, to) u2cp_(u, to, 0)
75 #define u2cp_no_nbsp(u, to) u2cp_(u, to, 1)
77 void init_charsets_lookup(void);
78 void free_charsets_lookup(void);