Add support for Thumb-only processors.
[uclibc-ng.git] / utils / poparser.c
blob236a1d995c239a75406addb802d8b9221e47f810
1 #include <ctype.h>
2 #include <assert.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <iconv.h>
6 #include "poparser.h"
7 #include "StringEscape.h"
9 #define streq(A, B) (!strcmp(A, B))
10 #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1)))
12 static unsigned fuzzymark = 0;
13 static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* end, size_t *stringstart) {
14 enum po_entry result_type;
15 char *x, *y;
16 size_t start = (size_t) lp;
17 while(isspace(*lp) && lp < end) lp++;
18 if(lp[0] == '#') {
19 char *s;
20 if((s = strstr(lp, ", fuzzy"))) {
21 if(fuzzymark != 0) fuzzymark++;
22 else fuzzymark=2;
24 inv:
25 *stringstart = 0;
26 return pe_invalid;
27 } else if((y = strstarts(lp, "msg"))) {
28 if((x = strstarts(y, "id")) && isspace(*x))
29 result_type = pe_msgid;
30 else if ((x = strstarts(y, "id_plural")) && isspace(*x))
31 result_type = pe_plural;
32 else if ((x = strstarts(y, "ctxt")) && isspace(*x))
33 result_type = pe_ctxt;
34 else if ((x = strstarts(y, "str")) && (isspace(*x) ||
35 (x[0] == '[' && (x[1]-'0') < info->nplurals && x[2] == ']' && (x += 3) && isspace(*x))))
36 result_type = pe_msgstr;
37 else
38 goto inv;
39 while(isspace(*x) && x < end) x++;
40 if(*x != '"') abort();
41 conv:
42 *stringstart = ((size_t) x - start) + 1;
43 } else if(lp[0] == '"') {
44 if(!(*info->charset)) {
45 if((x = strstr(lp, "charset="))) {
46 // charset=xxx\\n
47 int len = strlen(x+=8) - 4;
48 assert(len <= 11);
49 if(strncmp(x, "UTF-8", 5) && strncmp(x, "utf-8", 5)) {
50 memcpy(info->charset, x, len);
51 info->charset[len] = 0;
55 if((x = strstr(lp, "nplurals=")))
56 if(*(x+9) - '0')
57 info->nplurals = *(x+9) - '0';
58 result_type = pe_str;
59 x = lp;
60 goto conv;
61 } else {
62 goto inv;
64 return result_type;
67 /* expects a pointer to the first char after a opening " in a string,
68 * converts the string into convbuf, and returns the length of that string */
69 static size_t get_length_and_convert(struct po_info *info, char* x, char* end, char* convbuf, size_t convbuflen) {
70 size_t result = 0;
71 char* e = x + strlen(x);
72 assert(e > x && e < end && *e == 0);
73 e--;
74 while(isspace(*e)) e--;
75 if(*e != '"') abort();
76 *e = 0;
77 char *s;
78 if(*info->charset) {
79 iconv_t ret = iconv_open("UTF-8", info->charset);
80 if(ret != (iconv_t)-1) {
81 size_t a=end-x, b=a*4;
82 char mid[b], *midp=mid;
83 iconv(iconv_open("UTF-8", info->charset), &x, &a, &midp, &b);
84 if((s = strstr(mid, "charset=")))
85 memcpy(s+8, "UTF-8\\n\0", 8);
86 result = unescape(mid, convbuf, convbuflen);
87 // iconv doesnt recognize the encoding
88 } else result = unescape(x, convbuf, convbuflen);
89 } else result = unescape(x, convbuf, convbuflen);
90 return result;
94 void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata) {
95 p->buf = workbuf;
96 p->bufsize = bufsize;
97 p->cb = cb;
98 p->prev_type = pe_invalid;
99 p->prev_rtype = pe_invalid;
100 p->curr_len = 0;
101 p->cbdata = cbdata;
102 *(p->info.charset) = 0;
103 // nplurals = 2 by default
104 p->info.nplurals = 2;
105 fuzzymark = 0;
108 enum lineactions {
109 la_incr,
110 la_proc,
111 la_abort,
112 la_nop,
113 la_max,
116 /* return 0 on success */
117 int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) {
118 char *convbuf = p->buf;
119 size_t convbuflen = p->bufsize;
120 size_t strstart;
122 static const enum lineactions action_tbl[pe_max][pe_max] = {
123 // pe_str will never be set as curr_type
124 [pe_str] = {
125 [pe_str] = la_abort,
126 [pe_msgid] = la_abort,
127 [pe_ctxt] = la_abort,
128 [pe_plural] = la_abort,
129 [pe_msgstr] = la_abort,
130 [pe_invalid] = la_abort,
132 [pe_msgid] = {
133 [pe_str] = la_incr,
134 [pe_msgid] = la_abort,
135 [pe_ctxt] = la_abort,
136 [pe_plural] = la_proc,
137 [pe_msgstr] = la_proc,
138 [pe_invalid] = la_proc,
140 [pe_ctxt] = {
141 [pe_str] = la_incr,
142 [pe_msgid] = la_proc,
143 [pe_ctxt] = la_abort,
144 [pe_plural] = la_abort,
145 [pe_msgstr] = la_abort,
146 [pe_invalid] = la_proc,
148 [pe_plural] = {
149 [pe_str] = la_incr,
150 [pe_msgid] = la_abort,
151 [pe_ctxt] = la_abort,
152 [pe_plural] = la_abort,
153 [pe_msgstr] = la_proc,
154 [pe_invalid] = la_proc,
156 [pe_msgstr] = {
157 [pe_str] = la_incr,
158 [pe_msgid] = la_proc,
159 [pe_ctxt] = la_proc,
160 [pe_plural] = la_abort,
161 [pe_msgstr] = la_proc,
162 [pe_invalid] = la_proc,
164 [pe_invalid] = {
165 [pe_str] = la_nop,
166 [pe_msgid] = la_incr,
167 [pe_ctxt] = la_incr,
168 [pe_plural] = la_nop,
169 [pe_msgstr] = la_nop,
170 [pe_invalid] = la_nop,
174 enum po_entry type;
176 type = get_type_and_start(&p->info, line, line + buflen, &strstart);
177 if(p->prev_rtype != pe_invalid && action_tbl[p->prev_rtype][type] == la_abort)
178 abort();
179 if(type != pe_invalid && type != pe_str)
180 p->prev_rtype = type;
181 if(fuzzymark) {
182 if(type == pe_ctxt && fuzzymark == 1) fuzzymark--;
183 if(type == pe_msgid) fuzzymark--;
184 if(fuzzymark > 0) return 0;
186 switch(action_tbl[p->prev_type][type]) {
187 case la_incr:
188 assert(type == pe_msgid || type == pe_msgstr || type == pe_str || type == pe_plural || pe_ctxt);
189 p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len);
190 break;
191 case la_proc:
192 assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr || p->prev_type == pe_plural || p->prev_type == pe_ctxt);
193 p->info.text = convbuf;
194 p->info.textlen = p->curr_len;
195 p->info.type = p->prev_type;
196 p->cb(&p->info, p->cbdata);
197 if(type != pe_invalid)
198 p->curr_len = get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf, convbuflen);
199 else
200 p->curr_len = 0;
201 break;
202 case la_nop:
203 break;
204 case la_abort:
205 default:
206 abort();
207 // todo : return error code
209 if(type != pe_str) {
210 p->prev_type = type;
212 return 0;
215 int poparser_finish(struct po_parser *p) {
216 char empty[4] = "";
217 return poparser_feed_line(p, empty, sizeof(empty));