1 /* msgfmt utility (C) 2012 rofl0r
2 * released under the MIT license, see LICENSE for details */
10 // in DO_NOTHING mode, we simply write the msgid twice, once for msgid, once for msgstr.
11 // TODO: maybe make it write "" instead of echoing the msgid.
14 __attribute__((noreturn
))
15 static void syntax(void) {
17 "Usage: msgfmt [OPTION] filename.po ...\n");
21 __attribute__((noreturn
))
22 static void version(void) {
24 "these are not (GNU gettext-tools) 99.9999.9999\n");
28 #define streq(A, B) (!strcmp(A, B))
29 #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1)))
36 unsigned off_tbl_trans
;
37 unsigned hash_tbl_size
;
38 unsigned off_tbl_hash
;
43 strtable (lenghts/offsets)
44 transtable (lenghts/offsets)
47 translations section */
49 const struct mo_hdr def_hdr
= {
53 sizeof(struct mo_hdr
),
60 // pass 0: collect numbers of strings, calculate size and offsets for tables
62 // pass 1: create in-memory string tables
65 pass_collect_sizes
= pass_first
,
75 struct strtbl str
, *trans
;
82 unsigned msgidbuf1_len
;
83 unsigned msgidbuf2_len
;
84 unsigned pluralbuf1_len
;
85 unsigned pluralbuf2_len
;
89 unsigned pluralstr_count
;
90 unsigned string_maxlen
;
100 unsigned num
[pe_maxstr
];
101 unsigned len
[pe_maxstr
];
102 struct strmap
*strlist
;
103 struct strtbl
*translist
;
104 char *strbuffer
[pe_maxstr
];
105 unsigned stroff
[pe_maxstr
];
106 unsigned curr
[pe_maxstr
];
109 static struct callbackdata
*cb_for_qsort
;
110 int strmap_comp(const void *a_
, const void *b_
) {
111 const struct strmap
*a
= a_
, *b
= b_
;
112 return strcmp(cb_for_qsort
->strbuffer
[0] + a
->str
.off
, cb_for_qsort
->strbuffer
[0] + b
->str
.off
);
122 static const char sysdep_str
[][10]={
123 [st_priu32
] = "\x08<PRIu32>",
124 [st_priu64
] = "\x08<PRIu64>",
125 [st_priumax
] = "\x09<PRIuMAX>",
127 static const char sysdep_repl
[][8]={
128 [st_priu32
] = "\x02lu\0u",
129 [st_priu64
] = "\x02lu\0llu",
130 [st_priumax
] = "\x01ju"
132 static const char *get_repl(enum sysdep_types type
, unsigned nr
) {
133 assert(nr
< (unsigned)sysdep_repl
[type
][0]);
134 const char* p
= sysdep_repl
[type
]+1;
135 while(nr
--) p
+=strlen(p
)+1;
138 static void replace(char* text
, unsigned textlen
, const char* what
, const char * with
) {
140 size_t la
= strlen(what
), li
=strlen(with
);
142 for(p
=text
;textlen
>= la
;) {
143 if(!memcmp(p
,what
,la
)) {
146 memmove(p
+li
,p
+la
,textlen
+1);
154 static unsigned get_form(enum sysdep_types type
, unsigned no
, unsigned occurences
[st_max
]) {
155 unsigned i
,divisor
= 1;
156 for(i
=type
+1;i
<st_max
;i
++) if(occurences
[i
]) divisor
*= sysdep_repl
[i
][0];
157 return (no
/divisor
)%sysdep_repl
[type
][0];
159 static char** sysdep_transform(const char* text
, unsigned textlen
, unsigned *len
, unsigned *count
, int simulate
) {
160 unsigned occurences
[st_max
] = {0};
161 const char *p
=text
,*o
;
162 unsigned i
,j
, l
= textlen
;
163 while(l
&& (o
=strchr(p
, '<'))) {
166 for(i
=0;i
<st_max
;i
++)
167 if(l
>=(unsigned)sysdep_str
[i
][0] && !memcmp(p
,sysdep_str
[i
]+1,sysdep_str
[i
][0])) {
177 for(i
=0;i
<st_max
;i
++) if(occurences
[i
]) *count
*= sysdep_repl
[i
][0];
178 l
= textlen
* *count
;
179 for(i
=0;i
<*count
;i
++) for(j
=0;j
<st_max
;j
++)
180 if(occurences
[j
]) l
-= occurences
[j
] * (sysdep_str
[j
][0] - strlen(get_repl(j
, get_form(j
, i
, occurences
))));
185 out
= malloc((sizeof(char*)+textlen
+1) * *count
);
187 char *p
= (void*)(out
+*count
);
188 for(i
=0;i
<*count
;i
++) {
190 memcpy(p
, text
, textlen
+1);
193 for(i
=0;i
<*count
;i
++) for(j
=0;j
<st_max
;j
++)
195 replace(out
[i
], textlen
, sysdep_str
[j
]+1, get_repl(j
, get_form(j
, i
, occurences
)));
201 static inline void writemsg(struct callbackdata
*d
) {
202 if(d
->msgidbuf1_len
!= 0) {
203 if(!d
->strlist
[d
->curr
[pe_msgid
]].str
.off
)
204 d
->strlist
[d
->curr
[pe_msgid
]].str
.off
=d
->stroff
[pe_msgid
];
206 if(d
->ctxtbuf_len
!= 0) {
207 memcpy(d
->strbuffer
[pe_msgid
] + d
->stroff
[pe_msgid
], d
->msgctxtbuf
, d
->ctxtbuf_len
);
208 d
->strlist
[d
->curr
[pe_msgid
]].str
.len
+=d
->ctxtbuf_len
;
209 d
->stroff
[pe_msgid
]+=d
->ctxtbuf_len
;
211 memcpy(d
->strbuffer
[pe_msgid
] + d
->stroff
[pe_msgid
], d
->msgidbuf1
, d
->msgidbuf1_len
);
212 d
->stroff
[pe_msgid
]+=d
->msgidbuf1_len
;
213 d
->strlist
[d
->curr
[pe_msgid
]].str
.len
+=d
->msgidbuf1_len
-1;
214 if(d
->pluralbuf1_len
!= 0) {
215 memcpy(d
->strbuffer
[pe_msgid
] + d
->stroff
[pe_msgid
], d
->pluralbuf1
, d
->pluralbuf1_len
);
216 d
->strlist
[d
->curr
[pe_msgid
]].str
.len
+=d
->pluralbuf1_len
;
217 d
->stroff
[pe_msgid
]+=d
->pluralbuf1_len
;
221 if(d
->msgidbuf2_len
!= 0) {
222 if(!d
->strlist
[d
->curr
[pe_msgid
]].str
.off
)
223 d
->strlist
[d
->curr
[pe_msgid
]].str
.off
=d
->stroff
[pe_msgid
];
225 if(d
->ctxtbuf_len
!= 0) {
226 memcpy(d
->strbuffer
[pe_msgid
] + d
->stroff
[pe_msgid
], d
->msgctxtbuf
, d
->ctxtbuf_len
);
227 d
->strlist
[d
->curr
[pe_msgid
]].str
.len
+=d
->ctxtbuf_len
;
228 d
->stroff
[pe_msgid
]+=d
->ctxtbuf_len
;
230 memcpy(d
->strbuffer
[pe_msgid
] + d
->stroff
[pe_msgid
], d
->msgidbuf2
, d
->msgidbuf2_len
);
231 d
->stroff
[pe_msgid
]+=d
->msgidbuf2_len
;
232 d
->strlist
[d
->curr
[pe_msgid
]].str
.len
+=d
->msgidbuf2_len
-1;
233 if(d
->pluralbuf2_len
!= 0) {
234 memcpy(d
->strbuffer
[pe_msgid
] + d
->stroff
[pe_msgid
], d
->pluralbuf2
, d
->pluralbuf2_len
);
235 d
->strlist
[d
->curr
[pe_msgid
]].str
.len
+=d
->pluralbuf2_len
;
236 d
->stroff
[pe_msgid
]+=d
->pluralbuf2_len
;
241 d
->pluralbuf2_len
=d
->pluralbuf1_len
=d
->ctxtbuf_len
=d
->msgidbuf1_len
=d
->msgidbuf2_len
=0;
244 static inline void writestr(struct callbackdata
*d
, struct po_info
*info
) {
245 // msgid xx; msgstr ""; is widely happened, it's invalid
247 // https://github.com/sabotage-linux/gettext-tiny/issues/1
248 // no invalid, when empty, check d->num[pe_msgid]
249 if(!d
->pluralstr_count
&& d
->num
[pe_msgid
] > 0) {
250 d
->len
[pe_msgid
]-=d
->msgidbuf1_len
;
251 d
->len
[pe_msgid
]-=d
->msgidbuf2_len
;
252 d
->len
[pe_plural
]-=d
->pluralbuf1_len
;
253 d
->len
[pe_plural
]-=d
->pluralbuf2_len
;
254 d
->len
[pe_ctxt
]-=d
->ctxtbuf_len
;
258 d
->pluralbuf2_len
=d
->pluralbuf1_len
=d
->ctxtbuf_len
=d
->msgidbuf1_len
=d
->msgidbuf2_len
=d
->msgstr1_len
=d
->msgstr2_len
=d
->pluralstr_count
=0;
262 if(d
->pluralstr_count
&& d
->pluralstr_count
<= info
->nplurals
) {
264 // plural <= nplurals is allowed
265 d
->translist
[d
->curr
[pe_msgstr
]].len
=d
->msgstr1_len
-1;
266 d
->translist
[d
->curr
[pe_msgstr
]].off
=d
->stroff
[pe_msgstr
];
267 d
->strlist
[d
->curr
[pe_msgstr
]].trans
= &d
->translist
[d
->curr
[pe_msgstr
]];
269 memcpy(d
->strbuffer
[pe_msgstr
] + d
->stroff
[pe_msgstr
], d
->msgstrbuf1
, d
->msgstr1_len
);
270 d
->stroff
[pe_msgstr
]+=d
->msgstr1_len
;
271 d
->curr
[pe_msgstr
]++;
274 d
->translist
[d
->curr
[pe_msgstr
]].len
=d
->msgstr2_len
-1;
275 d
->translist
[d
->curr
[pe_msgstr
]].off
=d
->stroff
[pe_msgstr
];
276 d
->strlist
[d
->curr
[pe_msgstr
]].trans
= &d
->translist
[d
->curr
[pe_msgstr
]];
278 memcpy(d
->strbuffer
[pe_msgstr
] + d
->stroff
[pe_msgstr
], d
->msgstrbuf2
, d
->msgstr2_len
);
279 d
->stroff
[pe_msgstr
]+=d
->msgstr2_len
;
280 d
->curr
[pe_msgstr
]++;
283 d
->msgstr1_len
=d
->msgstr2_len
=d
->pluralstr_count
=0;
287 int process_line_callback(struct po_info
* info
, void* user
) {
288 struct callbackdata
*d
= (struct callbackdata
*) user
;
289 assert(info
->type
== pe_msgid
|| info
->type
== pe_ctxt
|| info
->type
== pe_msgstr
|| info
->type
== pe_plural
);
291 unsigned len
, count
, i
, l
;
293 case pass_collect_sizes
:
294 sysdep_transform(info
->text
, info
->textlen
, &len
, &count
, 1);
295 d
->num
[info
->type
] += count
;
296 if(info
->type
== pe_msgid
&& count
== 2 && d
->priv_type
== pe_ctxt
) {
297 // ctxt meets msgid with sysdeps, multiply num and len to suit it
298 d
->len
[pe_ctxt
] += d
->priv_len
+1;
301 if(count
!= 1 && info
->type
== pe_ctxt
) {
302 // except msgid, str, plural, all other types should not have sysdeps
306 d
->priv_type
= info
->type
;
308 d
->len
[info
->type
] += len
+1;
310 if(len
+1 > d
->string_maxlen
)
311 d
->string_maxlen
= len
+1;
314 sysdeps
= sysdep_transform(info
->text
, info
->textlen
, &len
, &count
, 0);
315 for(i
=0;i
<count
;i
++) {
316 l
= strlen(sysdeps
[i
]);
317 assert(l
+1 <= d
->string_maxlen
);
318 if(info
->type
== pe_msgid
) {
319 if(i
==0 && d
->msgidbuf1_len
)
322 // just copy, it's written down when writemsg()
324 memcpy(d
->msgidbuf1
, sysdeps
[i
], l
+1);
325 d
->msgidbuf1_len
= l
+1;
327 memcpy(d
->msgidbuf2
, sysdeps
[i
], l
+1);
328 d
->msgidbuf2_len
= l
+1;
330 } else if(info
->type
== pe_plural
) {
332 memcpy(d
->pluralbuf1
, sysdeps
[i
], l
+1);
333 d
->pluralbuf1_len
= l
+1;
335 memcpy(d
->pluralbuf2
, sysdeps
[i
], l
+1);
336 d
->pluralbuf2_len
= l
+1;
338 } else if(info
->type
== pe_ctxt
) {
340 d
->ctxtbuf_len
= l
+1;
341 memcpy(d
->msgctxtbuf
, sysdeps
[i
], l
);
342 d
->msgctxtbuf
[l
] = 0x4;//EOT
344 // just copy, it's written down when writestr()
347 memcpy(&d
->msgstrbuf1
[d
->msgstr1_len
], sysdeps
[i
], l
+1);
348 d
->msgstr1_len
+= l
+1;
349 d
->pluralstr_count
++;
352 memcpy(&d
->msgstrbuf2
[d
->msgstr2_len
], sysdeps
[i
], l
+1);
353 d
->msgstr2_len
+= l
+1;
366 int process(FILE *in
, FILE *out
) {
367 struct mo_hdr mohdr
= def_hdr
;
368 char line
[4096]; char *lp
;
371 struct callbackdata d
= {
394 .pluralstr_count
= 0,
398 struct po_parser pb
, *p
= &pb
;
400 mohdr
.off_tbl_trans
= mohdr
.off_tbl_org
;
401 for(d
.pass
= pass_first
; d
.pass
<= pass_second
; d
.pass
++) {
402 if(d
.pass
== pass_second
) {
403 // start of second pass:
404 // ensure we dont output when there's no strings at all
405 if(d
.num
[pe_msgid
] == 0) {
409 // check that data gathered in first pass is consistent
410 if((d
.num
[pe_msgstr
] < d
.num
[pe_msgid
]) || (d
.num
[pe_msgstr
] > (d
.num
[pe_msgid
] + d
.num
[pe_plural
] * (p
->info
.nplurals
- 1)))) {
411 // one should actually abort here,
412 // but gnu gettext simply writes an empty .mo and returns success.
414 fprintf(stderr
, "warning: mismatch of msgid/msgstr count, writing empty .mo file\n");
419 d
.msgidbuf1
= calloc(d
.string_maxlen
*5+2*d
.string_maxlen
*p
->info
.nplurals
, 1);
420 d
.msgidbuf2
= d
.msgidbuf1
+ d
.string_maxlen
;
421 d
.pluralbuf1
= d
.msgidbuf2
+ d
.string_maxlen
;
422 d
.pluralbuf2
= d
.pluralbuf1
+ d
.string_maxlen
;
423 d
.msgctxtbuf
= d
.pluralbuf2
+ d
.string_maxlen
;
424 d
.msgstrbuf1
= d
.msgctxtbuf
+ d
.string_maxlen
;
425 d
.msgstrbuf2
= d
.msgstrbuf1
+ d
.string_maxlen
*p
->info
.nplurals
;
427 d
.strlist
= calloc(d
.num
[pe_msgid
] * sizeof(struct strmap
), 1);
428 d
.translist
= calloc(d
.num
[pe_msgstr
] * sizeof(struct strtbl
), 1);
429 d
.strbuffer
[pe_msgid
] = calloc(d
.len
[pe_msgid
]+d
.len
[pe_plural
]+d
.len
[pe_ctxt
], 1);
430 d
.strbuffer
[pe_msgstr
] = calloc(d
.len
[pe_msgstr
], 1);
431 d
.stroff
[pe_msgid
] = d
.stroff
[pe_msgstr
] = 0;
432 assert(d
.msgidbuf1
&& d
.strlist
&& d
.translist
&& d
.strbuffer
[pe_msgid
] && d
.strbuffer
[pe_msgstr
]);
435 poparser_init(p
, convbuf
, sizeof(convbuf
), process_line_callback
, &d
);
437 while((lp
= fgets(line
, sizeof(line
), in
))) {
438 poparser_feed_line(p
, lp
, sizeof(line
));
441 if(d
.pass
== pass_second
)
442 writestr(&d
, &p
->info
);
444 if(d
.pass
== pass_second
) {
445 // calculate header fields from len and num arrays
446 mohdr
.numstring
= d
.num
[pe_msgid
];
447 mohdr
.off_tbl_org
= sizeof(struct mo_hdr
);
448 mohdr
.off_tbl_trans
= mohdr
.off_tbl_org
+ d
.num
[pe_msgid
] * (sizeof(unsigned)*2);
449 // set offset startvalue
450 d
.off
= mohdr
.off_tbl_trans
+ d
.num
[pe_msgid
] * (sizeof(unsigned)*2);
452 fseek(in
, 0, SEEK_SET
);
456 qsort(d
.strlist
, d
.num
[pe_msgid
], sizeof (struct strmap
), strmap_comp
);
460 fwrite(&mohdr
, sizeof(mohdr
), 1, out
);
461 for(i
= 0; i
< d
.num
[pe_msgid
]; i
++) {
462 d
.strlist
[i
].str
.off
+= d
.off
;
463 fwrite(&d
.strlist
[i
].str
, sizeof(struct strtbl
), 1, d
.out
);
465 for(i
= 0; i
< d
.num
[pe_msgid
]; i
++) {
466 d
.strlist
[i
].trans
->off
+= d
.off
+ d
.len
[pe_msgid
] + d
.len
[pe_plural
] + d
.len
[pe_ctxt
];
467 fwrite(d
.strlist
[i
].trans
, sizeof(struct strtbl
), 1, d
.out
);
469 fwrite(d
.strbuffer
[pe_msgid
], d
.len
[pe_msgid
]+d
.len
[pe_plural
]+d
.len
[pe_ctxt
], 1, d
.out
);
470 fwrite(d
.strbuffer
[pe_msgstr
], d
.len
[pe_msgstr
], 1, d
.out
);
476 void set_file(int out
, char* fn
, FILE** dest
) {
483 FILE* tmpf
= tmpfile();
487 while((n
=fread(b
, sizeof(*b
), sizeof(b
), stdin
)) > 0)
488 fwrite(b
, sizeof(*b
), n
, tmpf
);
490 fseek(tmpf
, 0, SEEK_SET
);
494 *dest
= fopen(fn
, out
? "w" : "r");
502 int main(int argc
, char**argv
) {
503 if(argc
== 1) syntax();
507 int expect_in_fn
= 1;
511 for(; arg
< argc
; arg
++) {
515 streq(A
+2, "java") ||
516 streq(A
+2, "java2") ||
517 streq(A
+2, "csharp") ||
518 streq(A
+2, "csharp-resources") ||
521 streq(A
+2, "strict") ||
522 streq(A
+2, "properties-input") ||
523 streq(A
+2, "stringtable-input") ||
524 streq(A
+2, "use-fuzzy") ||
525 strstarts(A
+2, "alignment=") ||
526 streq(A
+2, "check") ||
527 streq(A
+2, "check-format") ||
528 streq(A
+2, "check-header") ||
529 streq(A
+2, "check-domain") ||
530 streq(A
+2, "check-compatibility") ||
531 streq(A
+2, "check-accelerators") ||
532 streq(A
+2, "no-hash") ||
533 streq(A
+2, "verbose") ||
534 streq(A
+2, "statistics") ||
535 strstarts(A
+2, "check-accelerators=") ||
536 strstarts(A
+2, "resource=")
538 } else if((dest
= strstarts(A
+2, "locale="))) {
540 } else if((dest
= strstarts(A
+2, "output-file="))) {
541 set_file(1, dest
, &out
);
542 } else if(streq(A
+2, "version")) {
544 } else if(streq(A
+2, "help")) {
546 } else if (expect_in_fn
) {
550 } else if(streq(A
+ 1, "o")) {
553 set_file(1, A
, &out
);
564 } else if (streq(A
+1, "V")) {
566 } else if (streq(A
+1, "h")) {
568 } else if (streq(A
+1, "l")) {
571 } else if (streq(A
+1, "d")) {
574 } else if (expect_in_fn
) {
578 } else if (expect_in_fn
) {
584 if (locale
!= NULL
&& dest
!= NULL
) {
585 int sz
= snprintf(NULL
, 0, "%s/%s.msg", dest
, locale
);
587 snprintf(msg
, sizeof(msg
), "%s/%s.msg", dest
, locale
);
588 FILE *fp
= fopen(msg
, "w");
596 dest
= "messages.mo";
597 set_file(1, "messages.mo", &out
);
600 if(in
== NULL
|| out
== NULL
) {
603 int ret
= process(in
, out
);
604 fflush(in
); fflush(out
);
605 if(in
!= stdin
) fclose(in
);
606 if(out
!= stdout
) fclose(out
);