Make PHP 5 happy.
[elinks.git] / src / intl / charsets.c
blobd25c558f634a2fd7c06e223b634fa82c3d486b82
1 /* Charsets convertor */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #if HAVE_LANGINFO_CODESET
8 #include <langinfo.h>
9 #endif
11 #include <ctype.h>
12 #include <stdlib.h>
14 #include "elinks.h"
16 #include "document/options.h"
17 #include "intl/charsets.h"
18 #include "util/conv.h"
19 #include "util/error.h"
20 #include "util/fastfind.h"
21 #include "util/memory.h"
22 #include "util/string.h"
25 /* Fix namespace clash on MacOS. */
26 #define table table_elinks
28 struct table_entry {
29 unsigned char c;
30 unicode_val_T u;
33 struct codepage_desc {
34 unsigned char *name;
35 unsigned char **aliases;
36 struct table_entry *table;
39 #include "intl/codepage.inc"
40 #include "intl/uni_7b.inc"
41 #include "intl/entity.inc"
44 static char strings[256][2] = {
45 "\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007",
46 "\010", "\011", "\012", "\013", "\014", "\015", "\016", "\017",
47 "\020", "\021", "\022", "\023", "\024", "\025", "\026", "\033",
48 "\030", "\031", "\032", "\033", "\034", "\035", "\036", "\033",
49 "\040", "\041", "\042", "\043", "\044", "\045", "\046", "\047",
50 "\050", "\051", "\052", "\053", "\054", "\055", "\056", "\057",
51 "\060", "\061", "\062", "\063", "\064", "\065", "\066", "\067",
52 "\070", "\071", "\072", "\073", "\074", "\075", "\076", "\077",
53 "\100", "\101", "\102", "\103", "\104", "\105", "\106", "\107",
54 "\110", "\111", "\112", "\113", "\114", "\115", "\116", "\117",
55 "\120", "\121", "\122", "\123", "\124", "\125", "\126", "\127",
56 "\130", "\131", "\132", "\133", "\134", "\135", "\136", "\137",
57 "\140", "\141", "\142", "\143", "\144", "\145", "\146", "\147",
58 "\150", "\151", "\152", "\153", "\154", "\155", "\156", "\157",
59 "\160", "\161", "\162", "\163", "\164", "\165", "\166", "\167",
60 "\170", "\171", "\172", "\173", "\174", "\175", "\176", "\177",
61 "\200", "\201", "\202", "\203", "\204", "\205", "\206", "\207",
62 "\210", "\211", "\212", "\213", "\214", "\215", "\216", "\217",
63 "\220", "\221", "\222", "\223", "\224", "\225", "\226", "\227",
64 "\230", "\231", "\232", "\233", "\234", "\235", "\236", "\237",
65 "\240", "\241", "\242", "\243", "\244", "\245", "\246", "\247",
66 "\250", "\251", "\252", "\253", "\254", "\255", "\256", "\257",
67 "\260", "\261", "\262", "\263", "\264", "\265", "\266", "\267",
68 "\270", "\271", "\272", "\273", "\274", "\275", "\276", "\277",
69 "\300", "\301", "\302", "\303", "\304", "\305", "\306", "\307",
70 "\310", "\311", "\312", "\313", "\314", "\315", "\316", "\317",
71 "\320", "\321", "\322", "\323", "\324", "\325", "\326", "\327",
72 "\330", "\331", "\332", "\333", "\334", "\335", "\336", "\337",
73 "\340", "\341", "\342", "\343", "\344", "\345", "\346", "\347",
74 "\350", "\351", "\352", "\353", "\354", "\355", "\356", "\357",
75 "\360", "\361", "\362", "\363", "\364", "\365", "\366", "\367",
76 "\370", "\371", "\372", "\373", "\374", "\375", "\376", "\377",
79 static void
80 free_translation_table(struct conv_table *p)
82 int i;
84 for (i = 0; i < 256; i++)
85 if (p[i].t)
86 free_translation_table(p[i].u.tbl);
88 mem_free(p);
91 static unsigned char *no_str = "*";
93 static void
94 new_translation_table(struct conv_table *p)
96 int i;
98 for (i = 0; i < 256; i++)
99 if (p[i].t)
100 free_translation_table(p[i].u.tbl);
101 for (i = 0; i < 128; i++) {
102 p[i].t = 0;
103 p[i].u.str = strings[i];
105 for (; i < 256; i++) {
106 p[i].t = 0;
107 p[i].u.str = no_str;
111 #define BIN_SEARCH(table, entry, entries, key, result) \
113 long _s = 0, _e = (entries) - 1; \
115 while (_s <= _e || !((result) = -1)) { \
116 long _m = (_s + _e) / 2; \
118 if ((table)[_m].entry == (key)) { \
119 (result) = _m; \
120 break; \
122 if ((table)[_m].entry > (key)) _e = _m - 1; \
123 if ((table)[_m].entry < (key)) _s = _m + 1; \
127 static const unicode_val_T strange_chars[32] = {
128 0x20ac, 0x0000, 0x002a, 0x0000, 0x201e, 0x2026, 0x2020, 0x2021,
129 0x005e, 0x2030, 0x0160, 0x003c, 0x0152, 0x0000, 0x0000, 0x0000,
130 0x0000, 0x0060, 0x0027, 0x0022, 0x0022, 0x002a, 0x2013, 0x2014,
131 0x007e, 0x2122, 0x0161, 0x003e, 0x0153, 0x0000, 0x0000, 0x0000,
134 #define SYSTEM_CHARSET_FLAG 128
136 unsigned char *
137 u2cp_(unicode_val_T u, int to, int no_nbsp_hack)
139 int j;
140 int s;
142 if (u < 128) return strings[u];
143 /* To mark non breaking spaces, we use a special char NBSP_CHAR. */
144 if (u == 0xa0) return no_nbsp_hack ? " " : NBSP_CHAR_STRING;
145 if (u == 0xad) return "";
147 if (u < 0xa0) {
148 unicode_val_T strange = strange_chars[u - 0x80];
150 if (!strange) return NULL;
151 return u2cp_(strange, to, no_nbsp_hack);
154 to &= ~SYSTEM_CHARSET_FLAG;
156 for (j = 0; codepages[to].table[j].c; j++)
157 if (codepages[to].table[j].u == u)
158 return strings[codepages[to].table[j].c];
160 BIN_SEARCH(unicode_7b, x, N_UNICODE_7B, u, s);
161 if (s != -1) return unicode_7b[s].s;
163 return no_str;
166 static unsigned char utf_buffer[7];
168 static unsigned char *
169 encode_utf_8(unicode_val_T u)
171 memset(utf_buffer, 0, 7);
173 if (u < 0x80)
174 utf_buffer[0] = u;
175 else if (u < 0x800)
176 utf_buffer[0] = 0xc0 | ((u >> 6) & 0x1f),
177 utf_buffer[1] = 0x80 | (u & 0x3f);
178 else if (u < 0x10000)
179 utf_buffer[0] = 0xe0 | ((u >> 12) & 0x0f),
180 utf_buffer[1] = 0x80 | ((u >> 6) & 0x3f),
181 utf_buffer[2] = 0x80 | (u & 0x3f);
182 else if (u < 0x200000)
183 utf_buffer[0] = 0xf0 | ((u >> 18) & 0x0f),
184 utf_buffer[1] = 0x80 | ((u >> 12) & 0x3f),
185 utf_buffer[2] = 0x80 | ((u >> 6) & 0x3f),
186 utf_buffer[3] = 0x80 | (u & 0x3f);
187 else if (u < 0x4000000)
188 utf_buffer[0] = 0xf8 | ((u >> 24) & 0x0f),
189 utf_buffer[1] = 0x80 | ((u >> 18) & 0x3f),
190 utf_buffer[2] = 0x80 | ((u >> 12) & 0x3f),
191 utf_buffer[3] = 0x80 | ((u >> 6) & 0x3f),
192 utf_buffer[4] = 0x80 | (u & 0x3f);
193 else utf_buffer[0] = 0xfc | ((u >> 30) & 0x01),
194 utf_buffer[1] = 0x80 | ((u >> 24) & 0x3f),
195 utf_buffer[2] = 0x80 | ((u >> 18) & 0x3f),
196 utf_buffer[3] = 0x80 | ((u >> 12) & 0x3f),
197 utf_buffer[4] = 0x80 | ((u >> 6) & 0x3f),
198 utf_buffer[5] = 0x80 | (u & 0x3f);
200 return utf_buffer;
203 /* This slow and ugly code is used by the terminal utf_8_io */
204 unsigned char *
205 cp2utf_8(int from, int c)
207 int j;
209 from &= ~SYSTEM_CHARSET_FLAG;
211 if (codepages[from].table == table_utf_8 || c < 128)
212 return strings[c];
214 for (j = 0; codepages[from].table[j].c; j++)
215 if (codepages[from].table[j].c == c)
216 return encode_utf_8(codepages[from].table[j].u);
218 return encode_utf_8(UCS_NO_CHAR);
221 static void
222 add_utf_8(struct conv_table *ct, unicode_val_T u, unsigned char *str)
224 unsigned char *p = encode_utf_8(u);
226 while (p[1]) {
227 if (ct[*p].t) ct = ct[*p].u.tbl;
228 else {
229 struct conv_table *nct;
231 assertm(ct[*p].u.str == no_str, "bad utf encoding #1");
232 if_assert_failed return;
234 nct = mem_calloc(256, sizeof(*nct));
235 if (!nct) return;
236 new_translation_table(nct);
237 ct[*p].t = 1;
238 ct[*p].u.tbl = nct;
239 ct = nct;
241 p++;
244 assertm(!ct[*p].t, "bad utf encoding #2");
245 if_assert_failed return;
247 if (ct[*p].u.str == no_str)
248 ct[*p].u.str = str;
251 struct conv_table utf_table[256];
252 int utf_table_init = 1;
254 static void
255 free_utf_table(void)
257 int i;
259 for (i = 128; i < 256; i++)
260 mem_free(utf_table[i].u.str);
263 static struct conv_table *
264 get_translation_table_to_utf_8(int from)
266 int i;
267 static int lfr = -1;
269 if (from == -1) return NULL;
270 from &= ~SYSTEM_CHARSET_FLAG;
271 if (from == lfr) return utf_table;
272 if (utf_table_init)
273 memset(utf_table, 0, sizeof(utf_table)),
274 utf_table_init = 0;
275 else
276 free_utf_table();
278 for (i = 0; i < 128; i++)
279 utf_table[i].u.str = strings[i];
281 if (codepages[from].table == table_utf_8) {
282 for (i = 128; i < 256; i++)
283 utf_table[i].u.str = stracpy(strings[i]);
284 return utf_table;
287 for (i = 128; i < 256; i++)
288 utf_table[i].u.str = NULL;
290 for (i = 0; codepages[from].table[i].c; i++) {
291 unicode_val_T u = codepages[from].table[i].u;
293 if (!utf_table[codepages[from].table[i].c].u.str)
294 utf_table[codepages[from].table[i].c].u.str =
295 stracpy(encode_utf_8(u));
298 for (i = 128; i < 256; i++)
299 if (!utf_table[i].u.str)
300 utf_table[i].u.str = stracpy(no_str);
302 return utf_table;
305 struct conv_table table[256];
306 static int first = 1;
308 void
309 free_conv_table(void)
311 if (!utf_table_init) free_utf_table();
312 if (first) {
313 memset(table, 0, sizeof(table));
314 first = 0;
316 new_translation_table(table);
320 struct conv_table *
321 get_translation_table(int from, int to)
323 static int lfr = -1;
324 static int lto = -1;
326 from &= ~SYSTEM_CHARSET_FLAG;
327 to &= ~SYSTEM_CHARSET_FLAG;
328 if (first) {
329 memset(table, 0, sizeof(table));
330 first = 0;
332 if (/*from == to ||*/ from == -1 || to == -1)
333 return NULL;
334 if (codepages[to].table == table_utf_8)
335 return get_translation_table_to_utf_8(from);
336 if (from == lfr && to == lto)
337 return table;
338 lfr = from;
339 lto = to;
340 new_translation_table(table);
342 if (codepages[from].table == table_utf_8) {
343 int i;
345 for (i = 0; codepages[to].table[i].c; i++)
346 add_utf_8(table, codepages[to].table[i].u,
347 strings[codepages[to].table[i].c]);
349 for (i = 0; unicode_7b[i].x != -1; i++)
350 if (unicode_7b[i].x >= 0x80)
351 add_utf_8(table, unicode_7b[i].x,
352 unicode_7b[i].s);
354 } else {
355 int i;
357 for (i = 128; i < 256; i++) {
358 int j;
360 for (j = 0; codepages[from].table[j].c; j++) {
361 if (codepages[from].table[j].c == i) {
362 unsigned char *u;
364 u = u2cp(codepages[from].table[j].u, to);
365 if (u) table[i].u.str = u;
366 break;
372 return table;
375 static inline int
376 xxstrcmp(unsigned char *s1, unsigned char *s2, int l2)
378 while (l2) {
379 if (*s1 > *s2) return 1;
380 if (*s1 < *s2) return -1;
381 s1++;
382 s2++;
383 l2--;
386 return *s2 ? -1 : 0;
389 /* Entity cache debugging purpose. */
390 #if 0
391 #define DEBUG_ENTITY_CACHE
392 #else
393 #undef DEBUG_ENTITY_CACHE
394 #endif
396 struct entity_cache {
397 unsigned int hits;
398 int strlen;
399 int encoding;
400 unsigned char *result;
401 unsigned char str[20]; /* Suffice in any case. */
404 static int
405 hits_cmp(struct entity_cache *a, struct entity_cache *b)
407 if (a->hits == b->hits) return 0;
408 if (a->hits > b->hits) return -1;
409 else return 1;
412 static int
413 compare_entities(const void *key_, const void *element_)
415 struct string *key = (struct string *) key_;
416 struct entity *element = (struct entity *) element_;
417 int length = key->length;
418 unsigned char *first = key->source;
419 unsigned char *second = element->s;
421 return xxstrcmp(first, second, length);
424 unsigned char *
425 get_entity_string(const unsigned char *str, const int strlen, int encoding)
427 #define ENTITY_CACHE_SIZE 10 /* 10 seems a good value. */
428 #define ENTITY_CACHE_MAXLEN 9 /* entities with length >= ENTITY_CACHE_MAXLEN or == 1
429 will go in [0] table */
430 static struct entity_cache entity_cache[ENTITY_CACHE_MAXLEN][ENTITY_CACHE_SIZE];
431 static unsigned int nb_entity_cache[ENTITY_CACHE_MAXLEN];
432 static int first_time = 1;
433 unsigned int slen;
434 unsigned char *result = NULL;
436 if (strlen <= 0) return NULL;
438 if (first_time) {
439 memset(&nb_entity_cache, 0, ENTITY_CACHE_MAXLEN * sizeof(unsigned int));
440 first_time = 0;
443 /* Check if cached. A test on many websites (freshmeat.net + whole ELinks website
444 * + google + slashdot + websites that result from a search for test on google,
445 * + various ones) show a quite impressive improvment:
446 * Top ten is:
447 * 0: hits=2459 l=4 st='nbsp'
448 * 1: hits=2152 l=6 st='eacute'
449 * 2: hits=235 l=6 st='egrave'
450 * 3: hits=136 l=6 st='agrave'
451 * 4: hits=100 l=3 st='amp'
452 * 5: hits=40 l=5 st='laquo'
453 * 6: hits=8 l=4 st='copy'
454 * 7: hits=5 l=2 st='gt'
455 * 8: hits=2 l=2 st='lt'
456 * 9: hits=1 l=6 st='middot'
458 * Most of the time cache hit ratio is near 95%.
460 * A long test shows: 15186 hits vs. 24 misses and mean iteration
461 * count is kept < 2 (worst case 1.58). Not so bad ;)
463 * --Zas */
465 /* entities with length >= ENTITY_CACHE_MAXLEN or == 1 will go in [0] table */
466 slen = (strlen > 1 && strlen < ENTITY_CACHE_MAXLEN) ? strlen : 0;
468 if (strlen < ENTITY_CACHE_MAXLEN && nb_entity_cache[slen] > 0) {
469 int i;
471 for (i = 0; i < nb_entity_cache[slen]; i++) {
472 if (entity_cache[slen][i].encoding == encoding
473 && !memcmp(str, entity_cache[slen][i].str, strlen)) {
474 #ifdef DEBUG_ENTITY_CACHE
475 static double total_iter = 0;
476 static unsigned long hit_count = 0;
478 total_iter += i + 1;
479 hit_count++;
480 fprintf(stderr, "hit after %d iter. (mean = %0.2f)\n", i + 1, total_iter / (double) hit_count);
481 #endif
482 if (entity_cache[slen][i].hits < (unsigned int) ~0)
483 entity_cache[slen][i].hits++;
484 return entity_cache[slen][i].result;
487 #ifdef DEBUG_ENTITY_CACHE
488 fprintf(stderr, "miss\n");
489 #endif
492 if (*str == '#') { /* Numeric entity. */
493 int l = (int) strlen;
494 unsigned char *st = (unsigned char *) str;
495 unicode_val_T n = 0;
497 if (l == 1) goto end; /* &#; ? */
498 st++, l--;
499 if ((*st | 32) == 'x') { /* Hexadecimal */
501 if (l == 1 || l > 9) goto end; /* xFFFFFFFF max. */
502 st++, l--;
503 do {
504 unsigned char c = (*(st++) | 32);
506 if (isdigit(c))
507 n = (n << 4) | (c - '0');
508 else if (isxdigit(c))
509 n = (n << 4) | (c - 'a' + 10);
510 else
511 goto end; /* Bad char. */
512 } while (--l);
513 } else { /* Decimal */
514 if (l > 10) goto end; /* 4294967295 max. */
515 do {
516 unsigned char c = *(st++);
518 if (isdigit(c))
519 n = n * 10 + c - '0';
520 else
521 goto end; /* Bad char. */
522 /* Limit to 0xFFFFFFFF. */
523 if (n >= (unicode_val_T) 0xFFFFFFFFu)
524 goto end;
525 } while (--l);
528 result = u2cp(n, encoding);
530 #ifdef DEBUG_ENTITY_CACHE
531 fprintf(stderr, "%lu %016x %s\n", (unsigned long) n , n, result);
532 #endif
533 } else { /* Text entity. */
534 struct string key = INIT_STRING((unsigned char *) str, strlen);
535 struct entity *element = bsearch((void *) &key, entities,
536 N_ENTITIES,
537 sizeof(*element),
538 compare_entities);
540 if (element) result = u2cp(element->c, encoding);
543 end:
544 /* Take care of potential buffer overflow. */
545 if (strlen < sizeof(entity_cache[slen][0].str)) {
546 struct entity_cache *ece = &entity_cache[slen][nb_entity_cache[slen]];
548 /* Copy new entry to cache. */
549 ece->hits = 1;
550 ece->strlen = strlen;
551 ece->encoding = encoding;
552 ece->result = result;
553 memcpy(ece->str, str, strlen);
554 ece->str[strlen] = '\0';
556 /* Increment number of cache entries if possible. */
557 if (nb_entity_cache[slen] < ENTITY_CACHE_SIZE) nb_entity_cache[slen]++;
559 #ifdef DEBUG_ENTITY_CACHE
560 fprintf(stderr, "Added in [%u]: l=%d st='%s'\n", slen,
561 entity_cache[slen][0].strlen, entity_cache[slen][0].str);
563 #endif
565 /* Sort entries by hit order. */
566 if (nb_entity_cache[slen] > 1)
567 qsort(&entity_cache[slen][0], nb_entity_cache[slen],
568 sizeof(entity_cache[slen][0]), (void *) hits_cmp);
570 #ifdef DEBUG_ENTITY_CACHE
572 unsigned int i;
574 fprintf(stderr, "- Cache entries [%u] -\n", slen);
575 for (i = 0; i < nb_entity_cache[slen] ; i++)
576 fprintf(stderr, "%d: hits=%u l=%d st='%s'\n", i,
577 entity_cache[slen][i].hits, entity_cache[slen][i].strlen,
578 entity_cache[slen][i].str);
579 fprintf(stderr, "-----------------\n");
581 #endif
583 return result;
586 unsigned char *
587 convert_string(struct conv_table *convert_table,
588 unsigned char *chars, int charslen, int cp,
589 enum convert_string_mode mode, int *length,
590 void (*callback)(void *data, unsigned char *buf, int buflen),
591 void *callback_data)
593 unsigned char *buffer;
594 int bufferpos = 0;
595 int charspos = 0;
597 if (!convert_table && !memchr(chars, '&', charslen)) {
598 if (callback) {
599 if (charslen) callback(callback_data, chars, charslen);
600 return NULL;
601 } else {
602 return memacpy(chars, charslen);
606 /* Buffer allocation */
608 buffer = mem_alloc(ALLOC_GR + 1 /* trailing \0 */);
609 if (!buffer) return NULL;
611 /* Iterate ;-) */
613 while (charspos < charslen) {
614 unsigned char *translit;
616 #define PUTC do { \
617 buffer[bufferpos++] = chars[charspos++]; \
618 translit = ""; \
619 goto flush; \
620 } while (0)
622 if (chars[charspos] != '&') {
623 struct conv_table *t;
624 int i;
626 if (chars[charspos] < 128 || !convert_table) PUTC;
628 t = convert_table;
629 i = charspos;
631 while (t[chars[i]].t) {
632 t = t[chars[i++]].u.tbl;
633 if (i >= charslen) PUTC;
636 translit = t[chars[i]].u.str;
637 charspos = i + 1;
639 } else if (mode == CSM_FORM || mode == CSM_NONE) {
640 PUTC;
642 } else {
643 int start = charspos + 1;
644 int i = start;
646 while (i < charslen
647 && (isasciialpha(chars[i])
648 || isdigit(chars[i])
649 || (chars[i] == '#')))
650 i++;
652 /* This prevents bug 213: we were expanding "entities"
653 * in URL query strings. */
654 /* XXX: But this disables &nbsp&nbsp usage, which
655 * appears to be relatively common! --pasky */
656 if ((mode == CSM_DEFAULT || (chars[i] != '&' && chars[i] != '='))
657 && i > start
658 && !isasciialpha(chars[i]) && !isdigit(chars[i])) {
659 translit = get_entity_string(&chars[start], i - start,
660 cp);
661 if (chars[i] != ';') {
662 /* Eat &nbsp &nbsp<foo> happily, but
663 * pull back from the character after
664 * entity string if it is not the valid
665 * terminator. */
666 i--;
669 if (!translit) PUTC;
670 charspos = i + (i < charslen);
671 } else PUTC;
674 if (!translit[0]) continue;
676 if (!translit[1]) {
677 buffer[bufferpos++] = translit[0];
678 translit = "";
679 goto flush;
682 while (*translit) {
683 unsigned char *new;
685 buffer[bufferpos++] = *(translit++);
686 flush:
687 if (bufferpos & (ALLOC_GR - 1)) continue;
689 if (callback) {
690 buffer[bufferpos] = 0;
691 callback(callback_data, buffer, bufferpos);
692 bufferpos = 0;
693 } else {
694 new = mem_realloc(buffer, bufferpos + ALLOC_GR);
695 if (!new) {
696 mem_free(buffer);
697 return NULL;
699 buffer = new;
702 #undef PUTC
705 /* Say bye */
707 buffer[bufferpos] = 0;
708 if (length) *length = bufferpos;
710 if (callback) {
711 if (bufferpos) callback(callback_data, buffer, bufferpos);
712 mem_free(buffer);
713 return NULL;
714 } else {
715 return buffer;
720 #ifndef USE_FASTFIND
722 get_cp_index(unsigned char *name)
724 int i, a;
725 int syscp = 0;
727 if (!strcasecmp(name, "System")) {
728 #if HAVE_LANGINFO_CODESET
729 name = nl_langinfo(CODESET);
730 syscp = SYSTEM_CHARSET_FLAG;
731 #else
732 name = "us-ascii";
733 #endif
736 for (i = 0; codepages[i].name; i++) {
737 for (a = 0; codepages[i].aliases[a]; a++) {
738 /* In the past, we looked for the longest substring
739 * in all the names; it is way too expensive, though:
741 * % cumulative self self total
742 * time seconds seconds calls us/call us/call name
743 * 3.00 0.66 0.03 1325 22.64 22.64 get_cp_index
745 * Anything called from redraw_screen() is in fact
746 * relatively expensive, even if it's called just
747 * once. So we will do a simple strcasecmp() here.
750 if (!strcasecmp(name, codepages[i].aliases[a]))
751 return i | syscp;
755 if (syscp) {
756 return get_cp_index("us-ascii") | syscp;
757 } else {
758 return -1;
762 #else
764 static unsigned int i_name = 0;
765 static unsigned int i_alias = 0;
767 /* Reset internal list pointer */
768 void
769 charsets_list_reset(void)
771 i_name = 0;
772 i_alias = 0;
775 /* Returns a pointer to a struct that contains current key and data pointers
776 * and increment internal pointer. It returns NULL when key is NULL. */
777 struct fastfind_key_value *
778 charsets_list_next(void)
780 static struct fastfind_key_value kv;
782 if (!codepages[i_name].name) return NULL;
784 kv.key = codepages[i_name].aliases[i_alias];
785 kv.data = &codepages[i_name];
787 if (codepages[i_name].aliases[i_alias + 1])
788 i_alias++;
789 else {
790 i_name++;
791 i_alias = 0;
794 return &kv;
797 static struct fastfind_index ff_charsets_index
798 = INIT_FASTFIND_INDEX("charsets_lookup", charsets_list_reset, charsets_list_next);
800 /* It searchs for a charset named @name or one of its aliases and
801 * returns index for it or -1 if not found. */
803 get_cp_index(unsigned char *name)
805 struct codepage_desc *codepage;
806 int syscp = 0;
808 if (!strcasecmp(name, "System")) {
809 #if HAVE_LANGINFO_CODESET
810 name = nl_langinfo(CODESET);
811 syscp = SYSTEM_CHARSET_FLAG;
812 #else
813 name = "us-ascii";
814 #endif
817 codepage = fastfind_search(&ff_charsets_index, name, strlen(name));
818 if (codepage) {
819 assert(codepages <= codepage && codepage < codepages + N_CODEPAGES);
820 return (codepage - codepages) | syscp;
822 } else if (syscp) {
823 return get_cp_index("us-ascii") | syscp;
825 } else {
826 return -1;
830 #endif /* USE_FASTFIND */
832 void
833 init_charsets_lookup(void)
835 #ifdef USE_FASTFIND
836 fastfind_index(&ff_charsets_index, FF_COMPRESS);
837 #endif
840 void
841 free_charsets_lookup(void)
843 #ifdef USE_FASTFIND
844 fastfind_done(&ff_charsets_index);
845 #endif
848 unsigned char *
849 get_cp_name(int cp_index)
851 if (cp_index < 0) return "none";
852 if (cp_index & SYSTEM_CHARSET_FLAG) return "System";
854 return codepages[cp_index].name;
857 unsigned char *
858 get_cp_mime_name(int cp_index)
860 if (cp_index < 0) return "none";
861 if (cp_index & SYSTEM_CHARSET_FLAG) return "System";
862 if (!codepages[cp_index].aliases) return NULL;
864 return codepages[cp_index].aliases[0];
868 is_cp_special(int cp_index)
870 cp_index &= ~SYSTEM_CHARSET_FLAG;
871 return codepages[cp_index].table == table_utf_8;