7 #include "StringEscape.h"
9 #define streq(A, B) (!strcmp(A, B))
10 #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1)))
12 static unsigned fuzzymark
= 0;
13 static enum po_entry
get_type_and_start(struct po_info
*info
, char* lp
, char* end
, size_t *stringstart
) {
14 enum po_entry result_type
;
16 size_t start
= (size_t) lp
;
17 while(isspace(*lp
) && lp
< end
) lp
++;
20 if((s
= strstr(lp
, ", fuzzy"))) {
21 if(fuzzymark
!= 0) fuzzymark
++;
27 } else if((y
= strstarts(lp
, "msg"))) {
28 if((x
= strstarts(y
, "id")) && isspace(*x
))
29 result_type
= pe_msgid
;
30 else if ((x
= strstarts(y
, "id_plural")) && isspace(*x
))
31 result_type
= pe_plural
;
32 else if ((x
= strstarts(y
, "ctxt")) && isspace(*x
))
33 result_type
= pe_ctxt
;
34 else if ((x
= strstarts(y
, "str")) && (isspace(*x
) ||
35 (x
[0] == '[' && (x
[1]-'0') < info
->nplurals
&& x
[2] == ']' && (x
+= 3) && isspace(*x
))))
36 result_type
= pe_msgstr
;
39 while(isspace(*x
) && x
< end
) x
++;
40 if(*x
!= '"') abort();
42 *stringstart
= ((size_t) x
- start
) + 1;
43 } else if(lp
[0] == '"') {
44 if(!(*info
->charset
)) {
45 if((x
= strstr(lp
, "charset="))) {
47 int len
= strlen(x
+=8) - 4;
49 if(strncmp(x
, "UTF-8", 5) && strncmp(x
, "utf-8", 5)) {
50 memcpy(info
->charset
, x
, len
);
51 info
->charset
[len
] = 0;
55 if((x
= strstr(lp
, "nplurals=")))
57 info
->nplurals
= *(x
+9) - '0';
67 /* expects a pointer to the first char after a opening " in a string,
68 * converts the string into convbuf, and returns the length of that string */
69 static size_t get_length_and_convert(struct po_info
*info
, char* x
, char* end
, char* convbuf
, size_t convbuflen
) {
71 char* e
= x
+ strlen(x
);
72 assert(e
> x
&& e
< end
&& *e
== 0);
74 while(isspace(*e
)) e
--;
75 if(*e
!= '"') abort();
79 iconv_t ret
= iconv_open("UTF-8", info
->charset
);
80 if(ret
!= (iconv_t
)-1) {
81 size_t a
=end
-x
, b
=a
*4;
82 char mid
[b
], *midp
=mid
;
83 iconv(iconv_open("UTF-8", info
->charset
), &x
, &a
, &midp
, &b
);
84 if((s
= strstr(mid
, "charset=")))
85 memcpy(s
+8, "UTF-8\\n\0", 8);
86 result
= unescape(mid
, convbuf
, convbuflen
);
87 // iconv doesnt recognize the encoding
88 } else result
= unescape(x
, convbuf
, convbuflen
);
89 } else result
= unescape(x
, convbuf
, convbuflen
);
94 void poparser_init(struct po_parser
*p
, char* workbuf
, size_t bufsize
, poparser_callback cb
, void* cbdata
) {
98 p
->prev_type
= pe_invalid
;
99 p
->prev_rtype
= pe_invalid
;
102 *(p
->info
.charset
) = 0;
103 // nplurals = 2 by default
104 p
->info
.nplurals
= 2;
116 /* return 0 on success */
117 int poparser_feed_line(struct po_parser
*p
, char* line
, size_t buflen
) {
118 char *convbuf
= p
->buf
;
119 size_t convbuflen
= p
->bufsize
;
122 static const enum lineactions action_tbl
[pe_max
][pe_max
] = {
123 // pe_str will never be set as curr_type
126 [pe_msgid
] = la_abort
,
127 [pe_ctxt
] = la_abort
,
128 [pe_plural
] = la_abort
,
129 [pe_msgstr
] = la_abort
,
130 [pe_invalid
] = la_abort
,
134 [pe_msgid
] = la_abort
,
135 [pe_ctxt
] = la_abort
,
136 [pe_plural
] = la_proc
,
137 [pe_msgstr
] = la_proc
,
138 [pe_invalid
] = la_proc
,
142 [pe_msgid
] = la_proc
,
143 [pe_ctxt
] = la_abort
,
144 [pe_plural
] = la_abort
,
145 [pe_msgstr
] = la_abort
,
146 [pe_invalid
] = la_proc
,
150 [pe_msgid
] = la_abort
,
151 [pe_ctxt
] = la_abort
,
152 [pe_plural
] = la_abort
,
153 [pe_msgstr
] = la_proc
,
154 [pe_invalid
] = la_proc
,
158 [pe_msgid
] = la_proc
,
160 [pe_plural
] = la_abort
,
161 [pe_msgstr
] = la_proc
,
162 [pe_invalid
] = la_proc
,
166 [pe_msgid
] = la_incr
,
168 [pe_plural
] = la_nop
,
169 [pe_msgstr
] = la_nop
,
170 [pe_invalid
] = la_nop
,
176 type
= get_type_and_start(&p
->info
, line
, line
+ buflen
, &strstart
);
177 if(p
->prev_rtype
!= pe_invalid
&& action_tbl
[p
->prev_rtype
][type
] == la_abort
)
179 if(type
!= pe_invalid
&& type
!= pe_str
)
180 p
->prev_rtype
= type
;
182 if(type
== pe_ctxt
&& fuzzymark
== 1) fuzzymark
--;
183 if(type
== pe_msgid
) fuzzymark
--;
184 if(fuzzymark
> 0) return 0;
186 switch(action_tbl
[p
->prev_type
][type
]) {
188 assert(type
== pe_msgid
|| type
== pe_msgstr
|| type
== pe_str
|| type
== pe_plural
|| pe_ctxt
);
189 p
->curr_len
+= get_length_and_convert(&p
->info
, line
+ strstart
, line
+ buflen
, convbuf
+ p
->curr_len
, convbuflen
- p
->curr_len
);
192 assert(p
->prev_type
== pe_msgid
|| p
->prev_type
== pe_msgstr
|| p
->prev_type
== pe_plural
|| p
->prev_type
== pe_ctxt
);
193 p
->info
.text
= convbuf
;
194 p
->info
.textlen
= p
->curr_len
;
195 p
->info
.type
= p
->prev_type
;
196 p
->cb(&p
->info
, p
->cbdata
);
197 if(type
!= pe_invalid
)
198 p
->curr_len
= get_length_and_convert(&p
->info
, line
+ strstart
, line
+ buflen
, convbuf
, convbuflen
);
207 // todo : return error code
215 int poparser_finish(struct po_parser
*p
) {
217 return poparser_feed_line(p
, empty
, sizeof(empty
));