1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 # Tool to generate the cldr-quotes.inc file, to be #include'd in Quotes.cpp
6 # to provide locale-appropriate opening and closing quote marks.
8 # To regenerate cldr-quotes.inc for a new CLDR release, download the data file
9 # "cldr-common-##.zip" from http://unicode.org/Public/cldr/latest into the
10 # current directory, run
12 # perl cldr-quotes.pl <filename> > cldr-quotes.inc
14 # (where <filename> is the downloaded cldr-common-## archive), and
15 # then use `hg diff` to check that the result looks sane.
21 use IO
::Uncompress
::Unzip
"unzip";
23 die "Usage: perl cldr-quotes.pl <filename>" unless $#ARGV == 0;
25 my $filename = $ARGV[0];
27 my (%langQuotes, %quoteLangs);
29 my $zip = IO
::Uncompress
::Unzip
->new($filename) ||
30 die "unzip failed: $IO::Uncompress::Unzip::UnzipError\n";
34 my $name = $zip->getHeaderInfo()->{Name
};
35 if ($name =~ m
@common/main/([A
-Za
-z0
-9_
]+)\
.xml@
) {
39 $langQuotes{$lang}[0] = $1 if (m!<quotationStart>(.+)<!);
40 $langQuotes{$lang}[1] = $1 if (m!<quotationEnd>(.+)<!);
41 $langQuotes{$lang}[2] = $1 if (m!<alternateQuotationStart>(.+)<!);
42 $langQuotes{$lang}[3] = $1 if (m!<alternateQuotationEnd>(.+)<!);
45 $status = $zip->nextStream();
49 foreach my $lang (sort keys %langQuotes) {
50 # We don't actually want to emit anything for the root locale
51 next if $lang eq "root";
53 # Inherit any missing entries from the locale's parent
55 while ($parent =~ m/\-/) {
56 # Strip off a trailing subtag to find a parent locale code
57 $parent =~ s/\-[^-]+$//;
58 # Fill in any values available from the parent
59 for (my $i = 0; $i < 4; $i++) {
60 $langQuotes{$lang}[$i] = $langQuotes{$parent}[$i] unless $langQuotes{$lang}[$i];
64 # Anything still missing is copied from the root locale
65 for (my $i = 0; $i < 4; $i++) {
66 $langQuotes{$lang}[$i] = $langQuotes{"root"}[$i] unless $langQuotes{$lang}[$i];
69 # If the locale ends up the same as its parent, skip
70 next if ($parent ne $lang) && (exists $langQuotes{$parent}) &&
71 (join(",", @
{$langQuotes{$lang}}) eq join(",", @
{$langQuotes{$parent}}));
73 # Create a string with the C source form for the array of 4 quote characters
74 my $quoteChars = join(", ", map { sprintf("0x%x", ord Encode
::decode
("UTF-8", $_)) } @
{$langQuotes{$lang}});
76 # Record this locale in the list of those which use this particular set of quotes
77 $quoteLangs{$quoteChars} = [] unless exists $quoteLangs{$quoteChars};
78 push @
{$quoteLangs{$quoteChars}}, $lang;
81 # Output each unique list of quotes, with the string of associated locales
82 my $timestamp = gmtime();
84 /* This Source Code Form is subject to the terms of the Mozilla Public
85 * License
, v
. 2.0. If a copy of the MPL was
not distributed with this
86 * file
, You can obtain one at http
://mozilla
.org
/MPL/2.0/. */
89 * Derived from the Unicode Common Locale Data Repository by cldr
-quotes
.pl
.
91 * For terms of
use, see http
://www
.unicode
.org
/copyright
.html
.
95 * Created on
$timestamp from CLDR data file
$filename.
97 * * * * * This file contains MACHINE
-GENERATED DATA
, do not edit
! * * * * *
99 * (generated by intl
/locale/cldr
-quotes
.pl
)
104 print "static const LangQuotesRec sLangQuotes[] = {\n";
105 print " // clang-format off\n";
106 print sort map { sprintf(" { \"%s\\0\", { { %s } } },\n", join("\\0", sort @
{$quoteLangs{$_}}), $_) } (keys %quoteLangs);
107 print " // clang-format on\n";