depend on __UCLIBC_HAVE_STATX__
[uclibc-ng.git] / utils / msgfmt.c
blob6256eed27d9b5462566271a496cccbeebd42cb67
1 /* msgfmt utility (C) 2012 rofl0r
2 * released under the MIT license, see LICENSE for details */
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <ctype.h>
7 #include <assert.h>
8 #include "poparser.h"
10 // in DO_NOTHING mode, we simply write the msgid twice, once for msgid, once for msgstr.
11 // TODO: maybe make it write "" instead of echoing the msgid.
12 //#define DO_NOTHING
14 __attribute__((noreturn))
15 static void syntax(void) {
16 fprintf(stdout,
17 "Usage: msgfmt [OPTION] filename.po ...\n");
18 exit(1);
21 __attribute__((noreturn))
22 static void version(void) {
23 fprintf(stdout,
24 "these are not (GNU gettext-tools) 99.9999.9999\n");
25 exit(0);
28 #define streq(A, B) (!strcmp(A, B))
29 #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1)))
31 struct mo_hdr {
32 unsigned magic;
33 int rev;
34 unsigned numstring;
35 unsigned off_tbl_org;
36 unsigned off_tbl_trans;
37 unsigned hash_tbl_size;
38 unsigned off_tbl_hash;
41 /* file layout:
42 header
43 strtable (lenghts/offsets)
44 transtable (lenghts/offsets)
45 [hashtable]
46 strings section
47 translations section */
49 const struct mo_hdr def_hdr = {
50 0x950412de,
53 sizeof(struct mo_hdr),
60 // pass 0: collect numbers of strings, calculate size and offsets for tables
61 // print header
62 // pass 1: create in-memory string tables
63 enum passes {
64 pass_first = 0,
65 pass_collect_sizes = pass_first,
66 pass_second,
67 pass_max,
70 struct strtbl {
71 unsigned len, off;
74 struct strmap {
75 struct strtbl str, *trans;
78 struct callbackdata {
79 enum passes pass;
80 unsigned off;
81 FILE* out;
82 unsigned msgidbuf1_len;
83 unsigned msgidbuf2_len;
84 unsigned pluralbuf1_len;
85 unsigned pluralbuf2_len;
86 unsigned ctxtbuf_len;
87 unsigned msgstr1_len;
88 unsigned msgstr2_len;
89 unsigned pluralstr_count;
90 unsigned string_maxlen;
91 char* msgidbuf1;
92 char* msgidbuf2;
93 char* pluralbuf1;
94 char* pluralbuf2;
95 char* msgctxtbuf;
96 char* msgstrbuf1;
97 char* msgstrbuf2;
98 unsigned priv_type;
99 unsigned priv_len;
100 unsigned num[pe_maxstr];
101 unsigned len[pe_maxstr];
102 struct strmap *strlist;
103 struct strtbl *translist;
104 char *strbuffer[pe_maxstr];
105 unsigned stroff[pe_maxstr];
106 unsigned curr[pe_maxstr];
109 static struct callbackdata *cb_for_qsort;
110 int strmap_comp(const void *a_, const void *b_) {
111 const struct strmap *a = a_, *b = b_;
112 return strcmp(cb_for_qsort->strbuffer[0] + a->str.off, cb_for_qsort->strbuffer[0] + b->str.off);
115 enum sysdep_types {
116 st_priu32 = 0,
117 st_priu64,
118 st_priumax,
119 st_max
122 static const char sysdep_str[][10]={
123 [st_priu32] = "\x08<PRIu32>",
124 [st_priu64] = "\x08<PRIu64>",
125 [st_priumax] = "\x09<PRIuMAX>",
127 static const char sysdep_repl[][8]={
128 [st_priu32] = "\x02lu\0u",
129 [st_priu64] = "\x02lu\0llu",
130 [st_priumax] = "\x01ju"
132 static const char *get_repl(enum sysdep_types type, unsigned nr) {
133 assert(nr < (unsigned)sysdep_repl[type][0]);
134 const char* p = sysdep_repl[type]+1;
135 while(nr--) p+=strlen(p)+1;
136 return p;
138 static void replace(char* text, unsigned textlen, const char* what, const char * with) {
139 char*p = text;
140 size_t la = strlen(what), li=strlen(with);
141 assert(la >= li);
142 for(p=text;textlen >= la;) {
143 if(!memcmp(p,what,la)) {
144 memcpy(p, with, li);
145 textlen -= la;
146 memmove(p+li,p+la,textlen+1);
147 p+=li;
148 } else {
149 p++;
150 textlen--;
154 static unsigned get_form(enum sysdep_types type, unsigned no, unsigned occurences[st_max]) {
155 unsigned i,divisor = 1;
156 for(i=type+1;i<st_max;i++) if(occurences[i]) divisor *= sysdep_repl[i][0];
157 return (no/divisor)%sysdep_repl[type][0];
159 static char** sysdep_transform(const char* text, unsigned textlen, unsigned *len, unsigned *count, int simulate) {
160 unsigned occurences[st_max] = {0};
161 const char *p=text,*o;
162 unsigned i,j, l = textlen;
163 while(l && (o=strchr(p, '<'))) {
164 l-=o-p;p=o;
165 unsigned f = 0;
166 for(i=0;i<st_max;i++)
167 if(l>=(unsigned)sysdep_str[i][0] && !memcmp(p,sysdep_str[i]+1,sysdep_str[i][0])) {
168 occurences[i]++;
169 f=1;
170 p+=sysdep_str[i][0];
171 l-=sysdep_str[i][0];
172 break;
174 if(!f) p++,l--;
176 *count = 1;
177 for(i=0;i<st_max;i++) if(occurences[i]) *count *= sysdep_repl[i][0];
178 l = textlen * *count;
179 for(i=0;i<*count;i++) for(j=0;j<st_max;j++)
180 if(occurences[j]) l-= occurences[j] * (sysdep_str[j][0] - strlen(get_repl(j, get_form(j, i, occurences))));
181 *len = l+*count-1;
183 char **out = 0;
184 if(!simulate) {
185 out = malloc((sizeof(char*)+textlen+1) * *count);
186 assert(out);
187 char *p = (void*)(out+*count);
188 for(i=0;i<*count;i++) {
189 out[i]=p;
190 memcpy(p, text, textlen+1);
191 p+=textlen+1;
193 for(i=0;i<*count;i++) for(j=0;j<st_max;j++)
194 if(occurences[j])
195 replace(out[i], textlen, sysdep_str[j]+1, get_repl(j, get_form(j, i, occurences)));
198 return out;
201 static inline void writemsg(struct callbackdata *d) {
202 if(d->msgidbuf1_len != 0) {
203 if(!d->strlist[d->curr[pe_msgid]].str.off)
204 d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid];
206 if(d->ctxtbuf_len != 0) {
207 memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len);
208 d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len;
209 d->stroff[pe_msgid]+=d->ctxtbuf_len;
211 memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf1, d->msgidbuf1_len);
212 d->stroff[pe_msgid]+=d->msgidbuf1_len;
213 d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf1_len-1;
214 if(d->pluralbuf1_len != 0) {
215 memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf1, d->pluralbuf1_len);
216 d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf1_len;
217 d->stroff[pe_msgid]+=d->pluralbuf1_len;
219 d->curr[pe_msgid]++;
221 if(d->msgidbuf2_len != 0) {
222 if(!d->strlist[d->curr[pe_msgid]].str.off)
223 d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid];
225 if(d->ctxtbuf_len != 0) {
226 memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len);
227 d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len;
228 d->stroff[pe_msgid]+=d->ctxtbuf_len;
230 memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf2, d->msgidbuf2_len);
231 d->stroff[pe_msgid]+=d->msgidbuf2_len;
232 d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf2_len-1;
233 if(d->pluralbuf2_len != 0) {
234 memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf2, d->pluralbuf2_len);
235 d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf2_len;
236 d->stroff[pe_msgid]+=d->pluralbuf2_len;
238 d->curr[pe_msgid]++;
241 d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=0;
244 static inline void writestr(struct callbackdata *d, struct po_info *info) {
245 // msgid xx; msgstr ""; is widely happened, it's invalid
247 // https://github.com/sabotage-linux/gettext-tiny/issues/1
248 // no invalid, when empty, check d->num[pe_msgid]
249 if(!d->pluralstr_count && d->num[pe_msgid] > 0) {
250 d->len[pe_msgid]-=d->msgidbuf1_len;
251 d->len[pe_msgid]-=d->msgidbuf2_len;
252 d->len[pe_plural]-=d->pluralbuf1_len;
253 d->len[pe_plural]-=d->pluralbuf2_len;
254 d->len[pe_ctxt]-=d->ctxtbuf_len;
255 d->len[pe_msgstr]--;
256 d->num[pe_msgid]--;
257 d->num[pe_msgstr]--;
258 d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0;
259 return;
262 if(d->pluralstr_count && d->pluralstr_count <= info->nplurals) {
263 writemsg(d);
264 // plural <= nplurals is allowed
265 d->translist[d->curr[pe_msgstr]].len=d->msgstr1_len-1;
266 d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr];
267 d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]];
269 memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf1, d->msgstr1_len);
270 d->stroff[pe_msgstr]+=d->msgstr1_len;
271 d->curr[pe_msgstr]++;
273 if(d->msgstr2_len) {
274 d->translist[d->curr[pe_msgstr]].len=d->msgstr2_len-1;
275 d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr];
276 d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]];
278 memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf2, d->msgstr2_len);
279 d->stroff[pe_msgstr]+=d->msgstr2_len;
280 d->curr[pe_msgstr]++;
283 d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0;
287 int process_line_callback(struct po_info* info, void* user) {
288 struct callbackdata *d = (struct callbackdata *) user;
289 assert(info->type == pe_msgid || info->type == pe_ctxt || info->type == pe_msgstr || info->type == pe_plural);
290 char **sysdeps;
291 unsigned len, count, i, l;
292 switch(d->pass) {
293 case pass_collect_sizes:
294 sysdep_transform(info->text, info->textlen, &len, &count, 1);
295 d->num[info->type] += count;
296 if(info->type == pe_msgid && count == 2 && d->priv_type == pe_ctxt) {
297 // ctxt meets msgid with sysdeps, multiply num and len to suit it
298 d->len[pe_ctxt] += d->priv_len +1;
299 d->num[pe_ctxt]++;
301 if(count != 1 && info->type == pe_ctxt) {
302 // except msgid, str, plural, all other types should not have sysdeps
303 abort();
306 d->priv_type = info->type;
307 d->priv_len = len;
308 d->len[info->type] += len +1;
310 if(len+1 > d->string_maxlen)
311 d->string_maxlen = len+1;
312 break;
313 case pass_second:
314 sysdeps = sysdep_transform(info->text, info->textlen, &len, &count, 0);
315 for(i=0;i<count;i++) {
316 l = strlen(sysdeps[i]);
317 assert(l+1 <= d->string_maxlen);
318 if(info->type == pe_msgid) {
319 if(i==0 && d->msgidbuf1_len)
320 writestr(d, info);
322 // just copy, it's written down when writemsg()
323 if(i==0) {
324 memcpy(d->msgidbuf1, sysdeps[i], l+1);
325 d->msgidbuf1_len = l+1;
326 } else {
327 memcpy(d->msgidbuf2, sysdeps[i], l+1);
328 d->msgidbuf2_len = l+1;
330 } else if(info->type == pe_plural) {
331 if(i==0) {
332 memcpy(d->pluralbuf1, sysdeps[i], l+1);
333 d->pluralbuf1_len = l+1;
334 } else {
335 memcpy(d->pluralbuf2, sysdeps[i], l+1);
336 d->pluralbuf2_len = l+1;
338 } else if(info->type == pe_ctxt) {
339 writestr(d, info);
340 d->ctxtbuf_len = l+1;
341 memcpy(d->msgctxtbuf, sysdeps[i], l);
342 d->msgctxtbuf[l] = 0x4;//EOT
343 } else {
344 // just copy, it's written down when writestr()
345 if(l) {
346 if(i==0) {
347 memcpy(&d->msgstrbuf1[d->msgstr1_len], sysdeps[i], l+1);
348 d->msgstr1_len += l+1;
349 d->pluralstr_count++;
350 } else {
351 // sysdeps exist
352 memcpy(&d->msgstrbuf2[d->msgstr2_len], sysdeps[i], l+1);
353 d->msgstr2_len += l+1;
358 free(sysdeps);
359 break;
360 default:
361 abort();
363 return 0;
366 int process(FILE *in, FILE *out) {
367 struct mo_hdr mohdr = def_hdr;
368 char line[4096]; char *lp;
369 char convbuf[16384];
371 struct callbackdata d = {
372 .num = {
373 [pe_msgid] = 0,
374 [pe_msgstr] = 0,
375 [pe_plural] = 0,
376 [pe_ctxt] = 0,
378 .len = {
379 [pe_msgid] = 0,
380 [pe_msgstr] = 0,
381 [pe_plural] = 0,
382 [pe_ctxt] = 0,
384 .off = 0,
385 .out = out,
386 .pass = pass_first,
387 .ctxtbuf_len = 0,
388 .pluralbuf1_len = 0,
389 .pluralbuf2_len = 0,
390 .msgidbuf1_len = 0,
391 .msgidbuf2_len = 0,
392 .msgstr1_len = 0,
393 .msgstr2_len = 0,
394 .pluralstr_count = 0,
395 .string_maxlen = 0,
398 struct po_parser pb, *p = &pb;
400 mohdr.off_tbl_trans = mohdr.off_tbl_org;
401 for(d.pass = pass_first; d.pass <= pass_second; d.pass++) {
402 if(d.pass == pass_second) {
403 // start of second pass:
404 // ensure we dont output when there's no strings at all
405 if(d.num[pe_msgid] == 0) {
406 return 1;
409 // check that data gathered in first pass is consistent
410 if((d.num[pe_msgstr] < d.num[pe_msgid]) || (d.num[pe_msgstr] > (d.num[pe_msgid] + d.num[pe_plural] * (p->info.nplurals - 1)))) {
411 // one should actually abort here,
412 // but gnu gettext simply writes an empty .mo and returns success.
413 //abort();
414 fprintf(stderr, "warning: mismatch of msgid/msgstr count, writing empty .mo file\n");
415 d.num[pe_msgid] = 0;
416 return 0;
419 d.msgidbuf1 = calloc(d.string_maxlen*5+2*d.string_maxlen*p->info.nplurals, 1);
420 d.msgidbuf2 = d.msgidbuf1 + d.string_maxlen;
421 d.pluralbuf1 = d.msgidbuf2 + d.string_maxlen;
422 d.pluralbuf2 = d.pluralbuf1 + d.string_maxlen;
423 d.msgctxtbuf = d.pluralbuf2 + d.string_maxlen;
424 d.msgstrbuf1 = d.msgctxtbuf + d.string_maxlen;
425 d.msgstrbuf2 = d.msgstrbuf1 + d.string_maxlen*p->info.nplurals;
427 d.strlist = calloc(d.num[pe_msgid] * sizeof(struct strmap), 1);
428 d.translist = calloc(d.num[pe_msgstr] * sizeof(struct strtbl), 1);
429 d.strbuffer[pe_msgid] = calloc(d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1);
430 d.strbuffer[pe_msgstr] = calloc(d.len[pe_msgstr], 1);
431 d.stroff[pe_msgid] = d.stroff[pe_msgstr] = 0;
432 assert(d.msgidbuf1 && d.strlist && d.translist && d.strbuffer[pe_msgid] && d.strbuffer[pe_msgstr]);
435 poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d);
437 while((lp = fgets(line, sizeof(line), in))) {
438 poparser_feed_line(p, lp, sizeof(line));
440 poparser_finish(p);
441 if(d.pass == pass_second)
442 writestr(&d, &p->info);
444 if(d.pass == pass_second) {
445 // calculate header fields from len and num arrays
446 mohdr.numstring = d.num[pe_msgid];
447 mohdr.off_tbl_org = sizeof(struct mo_hdr);
448 mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2);
449 // set offset startvalue
450 d.off = mohdr.off_tbl_trans + d.num[pe_msgid] * (sizeof(unsigned)*2);
452 fseek(in, 0, SEEK_SET);
455 cb_for_qsort = &d;
456 qsort(d.strlist, d.num[pe_msgid], sizeof (struct strmap), strmap_comp);
457 unsigned i;
459 // print header
460 fwrite(&mohdr, sizeof(mohdr), 1, out);
461 for(i = 0; i < d.num[pe_msgid]; i++) {
462 d.strlist[i].str.off += d.off;
463 fwrite(&d.strlist[i].str, sizeof(struct strtbl), 1, d.out);
465 for(i = 0; i < d.num[pe_msgid]; i++) {
466 d.strlist[i].trans->off += d.off + d.len[pe_msgid] + d.len[pe_plural] + d.len[pe_ctxt];
467 fwrite(d.strlist[i].trans, sizeof(struct strtbl), 1, d.out);
469 fwrite(d.strbuffer[pe_msgid], d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1, d.out);
470 fwrite(d.strbuffer[pe_msgstr], d.len[pe_msgstr], 1, d.out);
472 return 0;
476 void set_file(int out, char* fn, FILE** dest) {
477 if(streq(fn, "-")) {
478 if(out) {
479 *dest = stdout;
480 } else {
481 char b[4096];
482 size_t n=0;
483 FILE* tmpf = tmpfile();
484 if(!tmpf)
485 perror("tmpfile");
487 while((n=fread(b, sizeof(*b), sizeof(b), stdin)) > 0)
488 fwrite(b, sizeof(*b), n, tmpf);
490 fseek(tmpf, 0, SEEK_SET);
491 *dest = tmpf;
493 } else {
494 *dest = fopen(fn, out ? "w" : "r");
496 if(!*dest) {
497 perror("fopen");
498 exit(1);
502 int main(int argc, char**argv) {
503 if(argc == 1) syntax();
504 int arg = 1;
505 FILE *out = NULL;
506 FILE *in = NULL;
507 int expect_in_fn = 1;
508 char* locale = NULL;
509 char* dest = NULL;
510 #define A argv[arg]
511 for(; arg < argc; arg++) {
512 if(A[0] == '-') {
513 if(A[1] == '-') {
515 streq(A+2, "java") ||
516 streq(A+2, "java2") ||
517 streq(A+2, "csharp") ||
518 streq(A+2, "csharp-resources") ||
519 streq(A+2, "tcl") ||
520 streq(A+2, "qt") ||
521 streq(A+2, "strict") ||
522 streq(A+2, "properties-input") ||
523 streq(A+2, "stringtable-input") ||
524 streq(A+2, "use-fuzzy") ||
525 strstarts(A+2, "alignment=") ||
526 streq(A+2, "check") ||
527 streq(A+2, "check-format") ||
528 streq(A+2, "check-header") ||
529 streq(A+2, "check-domain") ||
530 streq(A+2, "check-compatibility") ||
531 streq(A+2, "check-accelerators") ||
532 streq(A+2, "no-hash") ||
533 streq(A+2, "verbose") ||
534 streq(A+2, "statistics") ||
535 strstarts(A+2, "check-accelerators=") ||
536 strstarts(A+2, "resource=")
538 } else if((dest = strstarts(A+2, "locale="))) {
539 locale = dest;
540 } else if((dest = strstarts(A+2, "output-file="))) {
541 set_file(1, dest, &out);
542 } else if(streq(A+2, "version")) {
543 version();
544 } else if(streq(A+2, "help")) {
545 syntax();
546 } else if (expect_in_fn) {
547 set_file(0, A, &in);
548 expect_in_fn = 0;
550 } else if(streq(A + 1, "o")) {
551 arg++;
552 dest = A;
553 set_file(1, A, &out);
554 } else if(
555 streq(A+1, "j") ||
556 streq(A+1, "r") ||
557 streq(A+1, "P") ||
558 streq(A+1, "f") ||
559 streq(A+1, "a") ||
560 streq(A+1, "c") ||
561 streq(A+1, "v") ||
562 streq(A+1, "C")
564 } else if (streq(A+1, "V")) {
565 version();
566 } else if (streq(A+1, "h")) {
567 syntax();
568 } else if (streq(A+1, "l")) {
569 arg++;
570 locale = A;
571 } else if (streq(A+1, "d")) {
572 arg++;
573 dest = A;
574 } else if (expect_in_fn) {
575 set_file(0, A, &in);
576 expect_in_fn = 0;
578 } else if (expect_in_fn) {
579 set_file(0, A, &in);
580 expect_in_fn = 0;
584 if (locale != NULL && dest != NULL) {
585 int sz = snprintf(NULL, 0, "%s/%s.msg", dest, locale);
586 char msg[sz+1];
587 snprintf(msg, sizeof(msg), "%s/%s.msg", dest, locale);
588 FILE *fp = fopen(msg, "w");
589 if (fp) {
590 fclose(fp);
591 return 0;
592 } else return 1;
595 if(out == NULL) {
596 dest = "messages.mo";
597 set_file(1, "messages.mo", &out);
600 if(in == NULL || out == NULL) {
601 return 1;
603 int ret = process(in, out);
604 fflush(in); fflush(out);
605 if(in != stdin) fclose(in);
606 if(out != stdout) fclose(out);
608 if (ret == 1) {
609 return remove(dest);
611 return ret;