Bug 26922: Regression tests
[koha.git] / misc / translator / po2json
blob410bd68c41f9f9553518a24fdb76fbcccb5448d7
1 #!/usr/bin/env perl
2 # PODNAME: po2json
3 # ABSTRACT: Command line tool for converting a po file into a Gettext.js compatible json dataset
5 # Copyright (C) 2008, Joshua I. Miller E<lt>unrtst@cpan.orgE<gt>, all
6 # rights reserved.
8 # This program is free software; you can redistribute it and/or modify it
9 # under the terms of the GNU Library General Public License as published
10 # by the Free Software Foundation; either version 2, or (at your option)
11 # any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # Library General Public License for more details.
18 # You should have received a copy of the GNU Library General Public
19 # License along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
21 # USA.
24 use strict;
25 use JSON 2.53;
26 use Locale::PO 0.24;
27 use File::Basename qw(basename);
29 my $gettext_context_glue = "\004";
31 sub usage {
32 return "$0 {-p} {file.po} > {outputfile.json}
33 -p : do pretty-printing of json data\n";
36 &main;
38 sub main
40 my $src;
42 my $pretty = 0;
43 if ($ARGV[0] =~ /^--?p$/) {
44 shift @ARGV;
45 $pretty = 1;
48 if (length($ARGV[0]))
50 if ($ARGV[0] =~ /^-h/) {
51 print &usage;
52 exit 1;
55 unless (-r $ARGV[0]) {
56 print "ERROR: Unable to read file [$ARGV[0]]\n";
57 die &usage;
60 $src = $ARGV[0];
61 } else {
62 die &usage;
65 # we'll be building this data struct
66 my $json = {};
68 my $plural_form_count;
69 # get po object stack
70 my $pos = Locale::PO->load_file_asarray($src) or die "Can't parse po file [$src].";
73 foreach my $po (@$pos)
75 my $qmsgid1 = $po->msgid;
76 my $msgid1 = $po->dequote( $qmsgid1 );
78 # on the header
79 if (length($msgid1) == 0)
81 my $qmsgstr = $po->msgstr;
82 my $cur = $po->dequote( $qmsgstr );
83 my %cur;
84 foreach my $h (split(/\n/, $cur))
86 next unless length($h);
87 my @h = split(':', $h, 2);
89 if (length($cur{$h[0]})) {
90 warn "SKIPPING DUPLICATE HEADER LINE: $h\n";
91 } elsif ($h[0] =~ /#-#-#-#-#/) {
92 warn "SKIPPING ERROR MARKER IN HEADER: $h\n";
93 } elsif (@h == 2) {
94 $cur{$h[0]} = $h[1];
95 } else {
96 warn "PROBLEM LINE IN HEADER: $h\n";
97 $cur{$h} = '';
101 # init header ref
102 $$json{''} ||= {};
104 # populate header ref
105 foreach my $key (keys %cur) {
106 $$json{''}{$key} = length($cur{$key}) ? $cur{$key} : '';
109 # save plural form count
110 if ($$json{''}{'Plural-Forms'}) {
111 my $t = $$json{''}{'Plural-Forms'};
112 $t =~ s/^\s*//;
113 if ($t =~ /nplurals=(\d+)/) {
114 $plural_form_count = $1;
115 } else {
116 die "ERROR parsing plural forms header [$t]\n";
118 } else {
119 warn "NO PLURAL FORM HEADER FOUND - DEFAULTING TO 2\n";
120 # just default to 2
121 $plural_form_count = 2;
124 # on a normal msgid
125 } else {
126 my $qmsgctxt = $po->msgctxt;
127 my $msgctxt;
128 $msgctxt = $po->dequote($qmsgctxt) if $qmsgctxt;
130 # build the new msgid key
131 my $msg_ctxt_id = defined($msgctxt) ? join($gettext_context_glue, ($msgctxt, $msgid1)) : $msgid1;
133 # build translation side
134 my @trans;
136 # msgid plural side
137 my $qmsgid_plural = $po->msgid_plural;
138 my $msgid2;
139 $msgid2 = $po->dequote( $qmsgid_plural ) if $qmsgid_plural;
140 push(@trans, $msgid2);
142 # translated string
143 # this shows up different if we're plural
144 if (defined($msgid2) && length($msgid2))
146 my $plurals = $po->msgstr_n;
147 for (my $i=0; $i<$plural_form_count; $i++)
149 my $qstr = ref($plurals) ? $$plurals{$i} : undef;
150 my $str;
151 $str = $po->dequote( $qstr ) if $qstr;
152 push(@trans, $str);
155 # singular
156 } else {
157 my $qmsgstr = $po->msgstr;
158 my $msgstr;
159 $msgstr = $po->dequote( $qmsgstr ) if $qmsgstr;
160 push(@trans, $msgstr);
163 $$json{$msg_ctxt_id} = \@trans;
168 my $jsonobj = JSON->new;
169 my $basename = basename($src);
170 $basename =~ s/\.pot?$//;
171 if ($pretty)
173 print $jsonobj->pretty->encode( { $basename => $json });
174 } else {
175 print $jsonobj->encode($json);
179 __END__
181 =pod
183 =head1 NAME
185 po2json - Command line tool for converting a po file into a Gettext.js compatible json dataset
187 =head1 VERSION
189 version 0.019
191 =head1 SYNOPSIS
193 po2json /path/to/domain.po > domain.json
195 =head1 DESCRIPTION
197 This takes a PO file, as is created from GNU Gettext's xgettext, and converts it into a JSON file.
199 The output is an annonymous associative array. So, if you plan to load this via a <script> tag, more processing will be require (the output from this program must be assigned to a named javascript variable). For example:
201 echo -n "var json_locale_data = " > domain.json
202 po2json /path/to/domain.po >> domain.json
203 echo ";" >> domain.json
205 =head1 NAME
207 po2json - Convert a Uniforum format portable object file to javascript object notation.
209 =head1 OPTIONS
211 -p : pretty-print the output. Makes the output more human-readable.
213 =head1 BUGS
215 Locale::PO has a potential bug (I don't know if this actually causes a problem or not). Given a .po file with an entry like:
217 msgid ""
218 "some string"
219 msgstr ""
221 When $po->dump is run on that entry, it will output:
223 msgid "some string"
224 msgstr ""
226 The above is removing the first linebreak. I don't know if that is significant. If so, we'll have to rewrite using a different parser (or include our own parser).
228 =head1 REQUIRES
230 Locale::PO
231 JSON
233 =head1 SEE ALSO
235 Locale::PO
236 Gettext.js
238 =head1 AUTHOR
240 Copyright (C) 2008, Joshua I. Miller E<lt>unrtst@cpan.orgE<gt>, all rights reserved. See the source code for details.
242 =head1 AUTHOR
244 Torsten Raudssus <torsten@raudss.us>
246 =head1 COPYRIGHT AND LICENSE
248 This software is copyright (c) 2012 by DuckDuckGo, Inc. L<http://duckduckgo.com/>, Torsten Raudssus <torsten@raudss.us>.
250 This is free software; you can redistribute it and/or modify it under
251 the same terms as the Perl 5 programming language system itself.
253 =cut