[CI] Need netpbm package on trusty
[survex.git] / lib / po-to-msg.pl
blobbdd9b37a88fb5455964a446b7c23687523f9d3ad
1 #!/usr/bin/perl -w
2 require 5.008;
3 use bytes;
4 use strict;
5 use Locale::PO;
7 use integer;
9 # Magic identifier (12 bytes)
10 my $magic = "Svx\nMsg\r\n\xfe\xff\0";
11 # Designed to be corrupted by ASCII ftp, top bit stripping (or
12 # being used for parity). Contains a zero byte so more likely
13 # to be flagged as data (e.g. by perl's "-B" test).
15 my $srcdir = $0;
16 $srcdir =~ s!/[^/]+$!!;
18 my $major = 0;
19 my $minor = 8;
21 # File format (multi-byte numbers in network order (bigendian)):
22 # 12 bytes: Magic identifier
23 # 1 byte: File format major version number (0)
24 # 1 byte: File format minor version number (8)
25 # 2 bytes: Number of messages (N)
26 # 4 bytes: Offset from XXXX to end of file
27 # XXXX:
28 # N*:
29 # <message> NUL
31 my %msgs = ();
32 ${$msgs{'en'}}[0] = '©';
34 # my %uses = ();
36 my $file;
38 my %n = ();
39 my %loc = ();
40 $file = "$srcdir/survex.pot";
41 my $num_list = Locale::PO->load_file_asarray($file);
42 foreach my $po_entry (@{$num_list}) {
43 my $ref = $po_entry->reference;
44 (defined $ref && $ref =~ /^n:(\d+)$/m) or next;
45 my $msgno = $1;
46 my $key = $po_entry->msgid;
47 my $msg = c_unescape($po_entry->dequote($key));
48 my $where = $file . ":" . $po_entry->loaded_line_number;
49 ${$loc{'en'}}[$msgno] = $where;
50 if (${$msgs{'en'}}[$msgno]) {
51 print STDERR "$where: warning: already had message $msgno for language 'en'\n";
53 ${$msgs{'en'}}[$msgno] = $msg;
54 ++$n{$msgno};
56 my $last = 0;
57 for (sort { $a <=> $b } keys %n) {
58 if ($_ > $last + 1) {
59 print STDERR "$file: Unused msg numbers: ", join(" ", $last + 1 .. $_ - 1), "\n";
61 $last = $_;
63 print STDERR "$file: Last used msg number: $last\n";
64 %n = ();
66 my %fuzzy;
67 my %c_format;
68 for my $po_file (@ARGV) {
69 my $language = $po_file;
70 $language =~ s/\.po$//;
72 $file = "$srcdir/$po_file";
73 my $po_hash = Locale::PO->load_file_ashash($file);
75 if (exists $$po_hash{'""'}) {
76 if ($$po_hash{'""'}->msgstr =~ /^(?:.*\\n)?Language:\s*([^\s\\]+)/im) {
77 if ($language ne $1) {
78 my $line = 3 + scalar(@{[$& =~ /(\\n)/g]});
79 print STDERR "$file:$line: Language code '$1' doesn't match '$language' from filename\n";
81 } else {
82 my $line = 2 + scalar(@{[$$po_hash{'""'}->msgstr =~ /(\\n)/g]});
83 print STDERR "$file:$line: No suitable 'Language:' field in header\n";
85 } else {
86 print STDERR "$file:1: Expected 'msgid \"\"' with header\n";
89 my $fuzzy = 0;
90 foreach my $po_entry (@{$num_list}) {
91 my $ref = $po_entry->reference;
92 (defined $ref && $ref =~ /^n:(\d+)$/m) or next;
93 my $msgno = $1;
94 my $key = $po_entry->msgid;
95 my $ent = $$po_hash{$key};
96 my $where = $file . ":" . $po_entry->loaded_line_number;
97 ${$loc{$language}}[$msgno] = $where;
98 if (defined $ent) {
99 my $msg = c_unescape($po_entry->dequote($ent->msgstr));
100 next if $msg eq '';
101 if (${$msgs{$language}}[$msgno]) {
102 print STDERR "$where: warning: already had message $msgno for language $language\n";
104 ${$msgs{$language}}[$msgno] = $msg;
105 $ent->fuzzy() and ++$fuzzy;
107 $po_entry->c_format and $c_format{$language}[$msgno]++;
109 $fuzzy{$language} = $fuzzy;
112 my $lang;
113 my @langs = sort grep ! /_\*$/, keys %msgs;
115 my $num_msgs = -1;
116 foreach $lang (@langs) {
117 my $aref = $msgs{$lang};
118 $num_msgs = scalar @$aref if scalar @$aref > $num_msgs;
121 foreach $lang (@langs) {
122 my $fnm = $lang;
123 $file = "$srcdir/$lang.po";
124 $fnm =~ s/(_.*)$/\U$1/;
125 open OUT, ">$fnm.msg" or die $!;
127 my $aref = $msgs{$lang};
129 my $parentaref;
130 my $mainlang = $lang;
131 $parentaref = $msgs{$mainlang} if $mainlang =~ s/_.*$//;
133 print OUT $magic or die $!;
134 print OUT pack("CCn", $major, $minor, $num_msgs) or die $!;
136 my $buff = '';
137 my $missing = 0;
139 for my $n (0 .. $num_msgs - 1) {
140 my $warned = 0;
141 my $msg = $$aref[$n];
142 if (!defined $msg) {
143 $msg = $$parentaref[$n] if defined $parentaref;
144 if (!defined $msg) {
145 $msg = ${$msgs{'en'}}[$n];
146 # don't report if we have a parent (as the omission will be
147 # reported there)
148 if (defined $msg && $msg ne '' && $msg ne '©' && !defined $parentaref) {
149 ++$missing;
150 $warned = 1;
151 } else {
152 $msg = '' if !defined $msg;
155 } else {
156 if ($lang ne 'en') {
157 my $c_format = $c_format{$lang}[$n] // 0;
158 sanity_check("Message $n in language $lang", $msg, ${$msgs{'en'}}[$n], ${$loc{$lang}}[$n], $c_format);
161 $buff .= $msg . "\0";
164 print OUT pack('N',length($buff)), $buff or die $!;
165 close OUT or die $!;
167 my $fuzzy = $fuzzy{$lang};
168 if ($missing || $fuzzy) {
169 print STDERR "Warning: $file: ";
170 if ($missing) {
171 print STDERR "$missing missing message(s)";
172 if ($fuzzy) {
173 print STDERR " and $fuzzy fuzzy message(s)";
175 } else {
176 print STDERR "$fuzzy fuzzy message(s)";
178 print STDERR " for $lang\n";
182 sub sanity_check {
183 my ($what, $msg, $orig, $where, $c_format) = @_;
184 if ($c_format) {
185 # check printf-like specifiers match
186 # allow valid printf specifiers, or %<any letter> to support strftime
187 # and other printf-like formats.
188 my @pcent_m = grep /\%/, split /(%(?:[-#0 +'I]*(?:[0-9]*|\*|\*m\$)(?:\.[0-9]*)?(?:hh|ll|[hlLqjzt])?[diouxXeEfFgGaAcsCSpn]|[a-zA-Z]))/, $msg;
189 my @pcent_o = grep /\%/, split /(%(?:[-#0 +'I]*(?:[0-9]*|\*|\*m\$)(?:\.[0-9]*)?(?:hh|ll|[hlLqjzt])?[diouxXeEfFgGaAcsCSpn]|[a-zA-Z]))/, $orig;
190 while (scalar @pcent_m || scalar @pcent_o) {
191 if (!scalar @pcent_m) {
192 print STDERR "$where: warning: $what misses out \%spec $pcent_o[0]\n";
193 } elsif (!scalar @pcent_o) {
194 print STDERR "$where: warning: $what has extra \%spec $pcent_m[0]\n";
195 } elsif ($pcent_m[0] ne $pcent_o[0]) {
196 print STDERR "$where: warning: $what has \%spec $pcent_m[0] instead of $pcent_o[0]\n";
198 pop @pcent_m;
199 pop @pcent_o;
203 # Check for missing (or added) ellipses (...)
204 if ($msg =~ /\.\.\./ && $orig !~ /\.\.\./) {
205 print STDERR "$where: warning: $what has ellipses but original doesn't\n";
206 } elsif ($msg !~ /\.\.\./ && $orig =~ /\.\.\./) {
207 print STDERR "$where: warning: $what is missing ellipses\n";
210 # Check for missing (or added) menu shortcut (&)
211 if ($msg =~ /\&[A-Za-z\xc2-\xf4]/ && $orig !~ /\&[A-Za-z]/) {
212 print STDERR "$where: warning: $what has menu shortcut but original doesn't\n";
213 } elsif ($msg !~ /\&[A-Za-z\xc2-\xf4]/ && $orig =~ /\&[A-Za-z]/) {
214 print STDERR "$where: warning: $what is missing menu shortcut\n";
217 # Check for missing (or added) double quotes (“ and ”)
218 if (scalar($msg =~ s/(?:“|»)/$&/g) != scalar($orig =~ s/“/$&/g)) {
219 print STDERR "$where: warning: $what has different numbers of “\n";
220 print STDERR "$orig\n$msg\n\n";
222 if (scalar($msg =~ s/(?:”|«)/$&/g) != scalar($orig =~ s/”/$&/g)) {
223 print STDERR "$where: warning: $what has different numbers of ”\n";
224 print STDERR "$orig\n$msg\n\n";
227 # Check for missing (or added) menu accelerator "##"
228 if ($msg =~ /\#\#/ && $orig !~ /\#\#/) {
229 print STDERR "$where: warning: $what has menu accelerator but original doesn't\n";
230 } elsif ($msg !~ /\#\#/ && $orig =~ /\#\#/) {
231 print STDERR "$where: warning: $what is missing menu accelerator\n";
232 } elsif ($orig =~ /\#\#(.*)/) {
233 my $acc_o = $1;
234 my ($acc_m) = $msg =~ /\#\#(.*)/;
235 if ($acc_o ne $acc_m) {
236 print STDERR "$where: warning: $what has menu accelerator $acc_m instead of $acc_o\n";
240 # Check for missing (or added) menu accelerator "\t"
241 if ($msg =~ /\t/ && $orig !~ /\t/) {
242 print STDERR "$where: warning: $what has menu accelerator but original doesn't\n";
243 } elsif ($msg !~ /\t/ && $orig =~ /\t/) {
244 print STDERR "$where: warning: $what is missing menu accelerator\n";
245 } elsif ($orig =~ /\t(.*)/) {
246 my $acc_o = $1;
247 my ($acc_m) = $msg =~ /\t(.*)/;
248 if ($acc_o ne $acc_m) {
249 print STDERR "$where: warning: $what has menu accelerator $acc_m instead of $acc_o\n";
254 sub c_unescape {
255 my $str = shift @_;
256 $str =~ s/\\(x..|0|[0-7][0-7][0-7]|.)/&c_unescape_char($1)/ge;
257 return $str;
260 sub c_unescape_char {
261 my $ch = shift @_;
262 if ($ch eq '0' || $ch eq 'x00' || $ch eq '000') {
263 print STDERR "Nul byte in translation! (\\$ch)\n";
264 exit(1);
266 return $ch if $ch eq '"' || $ch eq '\\';
267 return "\n" if $ch eq "n";
268 return "\t" if $ch eq "t";
269 return "\r" if $ch eq "r";
270 return "\f" if $ch eq "f";
271 return chr(hex(substr($ch,1))) if $ch =~ /^x../;
272 return chr(oct($ch)) if $ch =~ /^[0-7][0-7][0-7]/;
273 print STDERR "Unknown C-escape in translation! (\\$ch)\n";
274 exit(1);