Update.
[libidn.git] / lib / gen-tld-tables.pl
blob62a8331194736033ac18d2b21231a0a098b281d5
1 #!/usr/bin/perl
3 # Author: Thomas Jacob, Internet24.de
5 # Copyright (C) 2004, 2005 Simon Josefsson.
6 # Copyright (C) 2004 Free Software Foundation, Inc.
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
11 # any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, you can either send email to this
20 # program's maintainer or write to: The Free Software Foundation,
21 # Inc.; 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 # I consider the output of this program to be unrestricted. Use it as
24 # you will.
26 # See doc/tld/README and draft-hoffman-idn-reg-*.txt inside Libidn for
27 # more information on the input file syntax.
29 # Use this to generate TLD tables:
30 # $ ./gen-tld-tables *.tld > tlds.c
32 use strict;
33 use warnings;
35 die "Usage: $0 <TLD-FILE [TLD-FILE ...]>" if ($#ARGV == -1);
37 print "/* This file is automatically generated. DO NOT EDIT!\n";
38 print " Instead, edit gen-tld-tables.pl and re-run. */\n";
39 print "\n";
40 print "#include \"tld.h\"\n";
41 print "\n";
43 my @tlds;
45 foreach my $tldfile (@ARGV)
47 my ($tld, $version);
48 my $data = process_definition ($tldfile, \$tld, \$version);
50 push @tlds, $tld;
52 optimize($data);
54 print_tld_domain($tld, \@$data, $version);
57 print "/* Main array */\n";
58 print "const Tld_table * _tld_tables[] =\n";
59 print "{\n";
60 foreach (@tlds)
62 print " &_tld_${_},\n";
64 print " NULL\n";
65 print "};\n";
67 # Print tld domain structure.
68 # void print_tld_domain($tldfile, \$tld, \@intervals, $version);
69 sub print_tld_domain
71 my $tld = shift;
72 my $intervals = shift;
73 my $version = shift;
74 my $num_intervals = @$intervals;
76 print "/* TLD $tld */\n";
77 print "static const Tld_table_element _tld_${tld}_valid[] =\n";
78 print "{\n";
79 if (@$intervals)
81 my $last = pop @$intervals;
82 foreach (@$intervals)
84 printf " { 0x%x, 0x%x },\n", $_->[0], $_->[1];
86 printf " { 0x%x, 0x%x }\n", $last->[0], $last->[1];
88 print "};\n";
89 print "\n";
91 print "static const Tld_table _tld_${tld} =\n";
92 print "{\n";
93 print " \"$tld\",\n";
94 print " \"$version\",\n";
95 print " $num_intervals,\n";
96 print " &_tld_${tld}_valid[0]\n";
97 print "};\n\n";
100 # process a definition file
101 # $filename, \$tld, \$version
102 # return @data on success, die otherwise
103 sub process_definition
105 my $filename = shift;
106 my $tld = shift;
107 my $version = shift;
108 my $incversion;
109 my $path;
110 my @data;
112 local * FILE;
114 open(FILE, "<$filename") or die "Cannot open $filename";
116 $path = $filename;
117 $path =~ s/\/[^\/]+$//;
119 my ($is_int,$have_num,$num,$cnum);
120 my $line = 1;
121 while(<FILE>)
123 chomp;
125 s/#.*$//;
127 if (m/^include\s+(\S+)\s*$/i)
129 my $incfile = $1;
130 my ($junk, $ver);
131 my $incdata = process_definition("$path/$incfile", \$junk, \$ver);
132 $incversion = $incversion . " $incfile ($ver)";
133 push @data, @$incdata;
134 next;
137 if (m/^version\s+"(.*)"\s*$/i)
139 $$version = $1;
140 next;
143 if (m/^tld\s+(\S+)\s*$/i)
145 $$tld = $1;
146 next;
149 while ($_ ne "")
151 s/^\s*//;
152 if ( (s/^(0x)([a-f0-9]+)//i) ||
153 (s/^(U\+)([a-f0-9]+)//i) ||
154 (s/^(0)(\d+)//) ||
155 (s/^(\d+)//) )
157 $cnum = $1;
158 if (((lc($1) eq "0x") || (lc($1) eq "u+")) && $2)
160 $cnum = hex($2);
162 elsif (($1 eq "0") && $2)
164 $cnum = oct($2);
167 if ($have_num)
169 if ($is_int)
171 push @data, [$num, $cnum];
172 $have_num = 0;
173 $is_int = 0;
175 else
177 push @data, [$num,$num];
178 $num = $cnum;
181 else
183 $have_num = 1;
184 $num = $cnum;
187 elsif (s/^\-//)
189 $is_int = 1;
191 elsif (s/^\|//)
193 $is_int = 0;
195 elsif (s/^\://)
197 $is_int = 0;
199 else
201 die "Parser error in file $filename at line $line near $_, "
202 if $_ ne "";
205 $line++;
207 close(FILE);
209 if ($have_num)
211 if ($is_int)
213 push @data, [$num, $cnum];
215 else
217 push @data, [$num, $num];
221 $$version = $$version . $incversion if $incversion;
222 return \@data;
225 # Build minimal set of intervals.
226 # void optimize(\@intervals)
227 sub optimize
229 my $intervals = shift;
231 return undef unless (@$intervals);
233 my @intervals = sort {$a->[0] <=> $b->[0]} @$intervals;
234 @$intervals = ();
236 my $cur_int = shift @intervals;
238 foreach (@intervals)
240 if ($_->[0]>($cur_int->[1]+1))
242 push @$intervals, $cur_int;
243 $cur_int = $_;
245 else
247 if ($_->[1] > $cur_int->[1])
249 $cur_int->[1] = $_->[1];
253 push @$intervals, $cur_int;