2008-11-04 Anders Carlsson <andersca@apple.com>
[webkit/qt.git] / WebKitTools / Scripts / extract-localizable-strings
blob420624bc1de9da4dfcb129ec7c0d1375c69e5ca5
1 #!/usr/bin/perl -w
3 # Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions
7 # are met:
9 # 1. Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 # 2. Redistributions in binary form must reproduce the above copyright
12 # notice, this list of conditions and the following disclaimer in the
13 # documentation and/or other materials provided with the distribution.
14 # 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
15 # its contributors may be used to endorse or promote products derived
16 # from this software without specific prior written permission.
18 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
19 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
22 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 # This script is like the genstrings tool (minus most of the options) with these differences.
31 # 1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros
32 # from NSBundle.h, and doesn't support tables (although they would be easy to add).
33 # 2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings;
34 # @"" strings only reliably support ASCII since they are decoded based on the system encoding
35 # at runtime, so give different results on US and Japanese systems for example).
36 # 3) It looks for strings that are not marked for localization, using both macro names that are
37 # known to be used for debugging in Intrigue source code and an exceptions file.
38 # 4) It finds the files to work on rather than taking them as parameters, and also uses a
39 # hardcoded location for both the output file and the exceptions file.
40 # It would have been nice to use the project to find the source files, but it's too hard to
41 # locate source files after parsing a .pbxproj file.
43 # The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :.
45 use strict;
47 my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 );
49 @ARGV >= 1 or die "Usage: extract-localizable-strings <exceptions file> [ directory... ]\nDid you mean to run extract-webkit-localizable-strings instead?\n";
51 my $exceptionsFile = shift @ARGV;
52 -f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n";
54 my $fileToUpdate = shift @ARGV;
55 -f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n";
57 my @directories = ();
58 my @directoriesToSkip = ();
59 if (@ARGV < 1) {
60 push(@directories, ".");
61 } else {
62 for my $dir (@ARGV) {
63 if ($dir =~ /^-(.*)$/) {
64 push @directoriesToSkip, $1;
65 } else {
66 push @directories, $dir;
71 my $sawError = 0;
73 my $localizedCount = 0;
74 my $keyCollisionCount = 0;
75 my $notLocalizedCount = 0;
76 my $NSLocalizeCount = 0;
78 my %exception;
79 my %usedException;
81 if (open EXCEPTIONS, $exceptionsFile) {
82 while (<EXCEPTIONS>) {
83 chomp;
84 if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) {
85 if ($exception{$_}) {
86 print "$exceptionsFile:$.:exception for $_ appears twice\n";
87 print "$exceptionsFile:$exception{$_}:first appearance\n";
88 } else {
89 $exception{$_} = $.;
91 } else {
92 print "$exceptionsFile:$.:syntax error\n";
95 close EXCEPTIONS;
98 my $quotedDirectoriesString = '"' . join('" "', @directories) . '"';
99 for my $dir (@directoriesToSkip) {
100 $quotedDirectoriesString .= ' -path "' . $dir . '" -prune';
103 my @files = ( split "\n", `find $quotedDirectoriesString -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp"` );
105 for my $file (sort @files) {
106 next if $file =~ /\/WebLocalizableStrings\.h$/;
107 next if $file =~ /\/icu\//;
109 $file =~ s-^./--;
111 open SOURCE, $file or die "can't open $file\n";
113 my $inComment = 0;
115 my $expected = "";
116 my $macroLine;
117 my $macro;
118 my $UIString;
119 my $key;
120 my $comment;
122 my $string;
123 my $stringLine;
124 my $nestingLevel;
126 my $previousToken = "";
128 while (<SOURCE>) {
129 chomp;
131 # Handle continued multi-line comment.
132 if ($inComment) {
133 next unless s-.*\*/--;
134 $inComment = 0;
137 # Handle all the tokens in the line.
138 while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) {
139 my $token = $1;
141 if ($token eq "\"") {
142 if ($expected and $expected ne "a quoted string") {
143 print "$file:$.:ERROR:found a quoted string but expected $expected\n";
144 $sawError = 1;
145 $expected = "";
147 if (s-^(([^\\$token]|\\.)*?)$token--) {
148 if (!defined $string) {
149 $stringLine = $.;
150 $string = $1;
151 } else {
152 $string .= $1;
154 } else {
155 print "$file:$.:ERROR:mismatched quotes\n";
156 $sawError = 1;
157 $_ = "";
159 next;
162 if (defined $string) {
163 handleString:
164 if ($expected) {
165 if (!defined $UIString) {
166 # FIXME: Validate UTF-8 here?
167 $UIString = $string;
168 $expected = ",";
169 } elsif (($macro =~ /UI_STRING_KEY$/) and !defined $key) {
170 # FIXME: Validate UTF-8 here?
171 $key = $string;
172 $expected = ",";
173 } elsif (!defined $comment) {
174 # FIXME: Validate UTF-8 here?
175 $comment = $string;
176 $expected = ")";
178 } else {
179 if (defined $nestingLevel) {
180 # In a debug macro, no need to localize.
181 } elsif ($previousToken eq "#include" or $previousToken eq "#import") {
182 # File name, no need to localize.
183 } elsif ($previousToken eq "extern" and $string eq "C") {
184 # extern "C", no need to localize.
185 } elsif ($string eq "") {
186 # Empty string can sometimes be localized, but we need not complain if not.
187 } elsif ($exception{$file}) {
188 $usedException{$file} = 1;
189 } elsif ($exception{"\"$string\""}) {
190 $usedException{"\"$string\""} = 1;
191 } elsif ($exception{"$file:\"$string\""}) {
192 $usedException{"$file:\"$string\""} = 1;
193 } else {
194 print "$file:$stringLine:\"$string\" is not marked for localization\n";
195 $notLocalizedCount++;
198 $string = undef;
199 last if !defined $token;
202 $previousToken = $token;
204 if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/) {
205 print "$file:$.:ERROR:found a use of an NSLocalized macro; not supported\n";
206 $nestingLevel = 0 if !defined $nestingLevel;
207 $sawError = 1;
208 $NSLocalizeCount++;
209 } elsif ($token eq "/*") {
210 if (!s-^.*?\*/--) {
211 $_ = ""; # If the comment doesn't end, discard the result of the line and set flag
212 $inComment = 1;
214 } elsif ($token eq "//") {
215 $_ = ""; # Discard the rest of the line
216 } elsif ($token eq "'") {
217 if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused
218 print "$file:$.:ERROR:mismatched single quote\n";
219 $sawError = 1;
220 $_ = "";
222 } else {
223 if ($expected and $expected ne $token) {
224 print "$file:$.:ERROR:found $token but expected $expected\n";
225 $sawError = 1;
226 $expected = "";
228 if ($token =~ /UI_STRING(_KEY)?$/) {
229 $expected = "(";
230 $macro = $token;
231 $UIString = undef;
232 $key = undef;
233 $comment = undef;
234 $macroLine = $.;
235 } elsif ($token eq "(" or $token eq "[") {
236 ++$nestingLevel if defined $nestingLevel;
237 $expected = "a quoted string" if $expected;
238 } elsif ($token eq ",") {
239 $expected = "a quoted string" if $expected;
240 } elsif ($token eq ")" or $token eq "]") {
241 $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel;
242 if ($expected) {
243 $key = $UIString if !defined $key;
244 HandleUIString($UIString, $key, $comment, $file, $macroLine);
245 $macro = "";
246 $expected = "";
247 $localizedCount++;
249 } elsif ($isDebugMacro{$token}) {
250 $nestingLevel = 0 if !defined $nestingLevel;
257 goto handleString if defined $string;
259 if ($expected) {
260 print "$file:ERROR:reached end of file but expected $expected\n";
261 $sawError = 1;
264 close SOURCE;
267 my %stringByKey;
268 my %commentByKey;
269 my %fileByKey;
270 my %lineByKey;
272 sub HandleUIString
274 my ($string, $key, $comment, $file, $line) = @_;
276 my $bad = 0;
277 if (grep { $_ == 0xFFFD } unpack "U*", $string) {
278 print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
279 $bad = 1;
281 if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) {
282 print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
283 $bad = 1;
285 if (grep { $_ == 0xFFFD } unpack "U*", $comment) {
286 print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
287 $bad = 1;
289 if ($bad) {
290 $sawError = 1;
291 return;
294 if ($stringByKey{$key} && $stringByKey{$key} ne $string) {
295 print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n";
296 print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
297 $keyCollisionCount++;
298 return;
300 if ($commentByKey{$key} && $commentByKey{$key} ne $comment) {
301 print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n";
302 print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
303 $keyCollisionCount++;
304 return;
307 $fileByKey{$key} = $file;
308 $lineByKey{$key} = $line;
309 $stringByKey{$key} = $string;
310 $commentByKey{$key} = $comment;
313 print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount;
315 my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception;
316 if (@unusedExceptions) {
317 for my $unused (@unusedExceptions) {
318 print "$exceptionsFile:$exception{$unused}:exception $unused not used\n";
320 print "\n";
323 print "$localizedCount localizable strings\n" if $localizedCount;
324 print "$keyCollisionCount key collisions\n" if $keyCollisionCount;
325 print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount;
326 print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount;
327 print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions;
329 if ($sawError) {
330 print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n";
331 exit 1;
334 my $localizedStrings = "";
336 for my $key (sort keys %commentByKey) {
337 $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n";
340 # Write out the strings file in UTF-16 with a BOM.
341 utf8::decode($localizedStrings) if $^V ge chr(5).chr(8);
342 my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings);
344 if (-e "$fileToUpdate") {
345 open STRINGS, ">", "$fileToUpdate" or die;
346 print STRINGS $output;
347 close STRINGS;
348 } else {
349 print "$fileToUpdate does not exist\n";
350 exit 1;