it's now safe (i hope) to include Jambase.configure multiple times
[k8jam.git] / src / unigen.c
blob9e032c7f802755db325c59f9b6ef8341a2ed5e9f
1 /* coded by Ketmar // Vampire Avalon (psyc://ketmar.no-ip.org/~Ketmar)
2 * Understanding is not required. Only obedience.
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 #include <ctype.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <unistd.h>
24 static char *trim (char *str) {
25 char *s;
26 if (str == NULL) return NULL;
27 for (s = str; *s && isspace(*s); ++s) ;
28 if (s > str) memmove(str, s, strlen(s)+1);
29 for (s = str+strlen(str)-1; s >= str && isspace(*s); --s) ;
30 s[1] = 0;
31 return str;
35 static int hex2num (char *s) {
36 char *end;
37 int res;
38 trim(s);
39 if (!s[0]) return -1;
40 res = strtol(s, &end, 16);
41 if (end[0] || res < 0) return -1;
42 return res;
46 typedef struct {
47 int code;
48 const char *name;
49 const char *class;
50 int upper;
51 int lower;
52 } UCInfo;
55 static char *stx = NULL;
57 static char *tok (void) {
58 char *s = stx;
59 for (; *stx && *stx != ';'; ++stx) ;
60 if (*stx) *stx++ = 0;
61 return s;
65 // <0: eof; 0: bad record; >0: good record
66 static int read_record (FILE *fi, UCInfo *ui) {
67 static char str[8192];
68 char *tk, *u, *l;
69 int f;
70 if (fgets(str, sizeof(str)-1, fi) == NULL) return -1;
71 stx = trim(str);
72 if ((tk = tok()) == NULL) return 0;
73 if ((ui->code = hex2num(tk)) < 0) return 0;
74 if (ui->code > 65535) return -1;
75 if ((ui->name = trim(tok())) == NULL) return 0;
76 if ((ui->class = trim(tok())) == NULL) return 0;
77 // skip unused fields
78 for (f = 9; f > 0; --f) if (tok() == NULL) { printf("%d\n", f); return 0; }
79 if ((u = trim(tok())) == NULL) return 0;
80 if ((l = trim(tok())) == NULL) return 0;
81 if (!u[0]) ui->upper = ui->code; else ui->upper = hex2num(u);
82 if (!l[0]) ui->lower = ui->code; else ui->lower = hex2num(l);
83 if (ui->upper < 0 || ui->lower < 0) return 0;
84 if (ui->upper > 65535 || ui->lower > 65535) abort();
85 return 1;
89 typedef struct {
90 int code, l, u;
91 } mm;
93 static mm map[65535];
96 static int mm_cmp (const void *p0, const void *p1) {
97 const mm *i0 = (const mm *)p0;
98 const mm *i1 = (const mm *)p1;
99 return (i0->code < i1->code ? -1 : (i0->code > i1->code ? 1 : 0));
103 int main (int argc, char *argv[]) {
104 UCInfo ui;
105 int rc, f, totalmap = 0;
106 FILE *fi, *fo;
107 if (argc != 3) { fprintf(stderr, "usage: %s unitable.c unitable.txt\n", argv[0]); exit(1); }
108 if ((fi = fopen(argv[2], "r")) == NULL) { fprintf(stderr, "FATAL: can't open input file: '%s'\n", argv[2]); exit(1); }
109 for (;;) {
110 rc = read_record(fi, &ui);
111 if (rc < 0) break;
112 if (rc == 0) continue;
113 if (ui.code < 128) continue;
114 if (strcmp(ui.class, "Lu") != 0 && strcmp(ui.class, "Ll") != 0) continue;
115 if (ui.upper == ui.code && ui.lower == ui.code) continue;
116 for (f = 0; f < totalmap; ++f) {
117 if (map[f].code == ui.code) { fprintf(stderr, "FATAL: duplicate entries in the map!\n"); exit(1); }
118 if (map[f].code > ui.code) { fprintf(stderr, "FATAL: invalid entry order in the map!\n"); exit(1); }
120 if (totalmap > 65535) { fprintf(stderr, "FATAL: too many entries in the map!\n"); exit(1); }
121 map[totalmap].code = ui.code;
122 map[totalmap].l = ui.lower;
123 map[totalmap].u = ui.upper;
124 ++totalmap;
126 fclose(fi);
127 qsort(map, totalmap, sizeof(map[0]), mm_cmp);
128 fo = fopen(argv[1], "w");
129 if (fo == NULL) { fprintf(stderr, "FATAL: can't create output file: '%s'\n", argv[1]); exit(1); }
130 fprintf(fo, "static const struct casemap unicode_case_mapping[%d] = {\n", totalmap);
131 for (f = 0; f < totalmap; ++f) fprintf(fo, "{0x%04x,0x%04x,0x%04x},\n", map[f].code, map[f].l, map[f].u);
132 fprintf(fo, "%s\n", "};");
133 fclose(fo);
134 return 0;