Added phash.pl
[nasm/perl-rewrite.git] / perl / pptok.pl
blob2920e693066da33ee11cba2d630e8d67ff4401a0
1 #! /usr/bin/env perl
2 use strict;
3 use warnings;
4 use 5.010;
6 use lib qw'lib';
8 sub load_pptok(;$);
9 sub h;
10 sub c;
11 sub ph;
14 our %jump = (
15 h => \&h,
16 c => \&c,
17 ph => \&ph,
18 dump => sub{
19 use Data::Dump 'dump';
20 say dump {@_};
25 my($what, $in, $out) = @ARGV;
26 my %info = load_pptok();
28 $in //= '';
30 $info{out} = $out;
32 my $jump = $jump{lc $what};
33 given( ref $jump ){
34 when( 'CODE' ){
35 my $return = $jump->(%info);
36 print $return if defined $return;
41 sub load_pptok(;$){
42 my($filename) = @_;
43 $filename = 'pptok.dat' unless @_;
45 my @tokens_cond;
46 my @conditions;
48 my @tokens;
49 my $first_uncond;
52 use autodie qw'open close';
53 open( my $file, '<', $filename ) or die;
55 while( my $line = <$file> ){
56 $line =~ s/^\s+//; # Remove leading whitespace
57 $line =~ s/\s*(?:\#.*)?$//; # Remove comments and trailing whitespace
58 next unless $line;
60 given($line){
61 when( /^\* (.*) /x ){ push @conditions, $1; }
62 when( /^\% (.*) \*$/x ){ push @tokens_cond, $1; }
63 when( /^\% (.*) $/x ){ push @tokens, $1; }
67 close $file;
70 @conditions = sort @conditions;
71 @tokens = sort @tokens;
72 @tokens_cond = sort @tokens_cond;
75 # Generate the expanded list including conditionals. The conditionals
76 # are at the beginning, padded to a power of 2, with the inverses
77 # interspersed; this allows a simple mask to pick out the condition.
79 while ((scalar @conditions) & (scalar @conditions)-1) {
80 push(@conditions, undef);
83 $first_uncond = $tokens[0];
85 my @tokens_cond_p;
86 for my $token ( @tokens_cond ){
87 for my $cond ( @conditions ){
88 if( defined $cond){
89 push @tokens_cond_p, "${token}${cond}", "${token}n${cond}";
90 }else{
91 push @tokens_cond_p, undef, undef;
95 @tokens = ( @tokens_cond_p, @tokens );
98 my %return = (
99 tokens => [@tokens],
100 conditions => [@conditions],
101 first_uncond => $first_uncond,
102 in => $filename,
103 tokens_cond => [@tokens_cond],
104 #tokens_cond_p => \@tokens_cond_p
107 return %return if wantarray;
108 return \%return;
111 sub h{
112 my %info = @_;
114 my $output = <<END;
115 /* Automatically generated from $info{in} by $0 */
116 /* Do not edit */
118 enum preproc_token {
121 my $f = " PP_%-13s = %3d\n";
123 my $n = 0;
124 for my $token ( @{$info{tokens}} ){
125 if( defined($token) ){
126 #printf OUT " %-16s = %3d,\n", "PP_\U$token\E", $n;
127 $output .= sprintf $f, uc $token, $n;
129 $n++;
132 $output .= sprintf $f, 'INVALID', -1;
134 $output .= <<END;
137 enum pp_conditional {
140 my $n = 0;
141 for my $cc ( @{$info{conditions}} ) {
142 if (defined($cc)) {
143 $output .= sprintf " PPC_IF%-10s = %3d,\n",uc $cc, $n;
145 $n += 2;
148 my $pp_cond = (scalar(@{$info{conditions}})-1) << 1;
149 $output .= sprintf <<END, $pp_cond, uc $info{first_uncond};
152 #define PP_COND(x) ((enum pp_conditional)((x) & 0x%x))
153 #define PP_IS_COND(x) ((unsigned int)(x) < PP_%s)
154 #define PP_NEGATIVE(x) ((x) & 1)
158 use List::MoreUtils 'zip';
160 for my $token( @{$info{tokens_cond}} ){
161 my $token = uc $token;
162 $output .= "#define CASE_PP_$token \\\n";
164 my @cond = map {uc $_} grep { defined $_ } '', @{$info{conditions}};
165 my @ncond = map {"N$_"} @cond;
167 @cond = zip @cond, @ncond;
169 @cond = map { " case PP_${token}$_" } @cond;
171 $output .= join ": \\\n", @cond;
173 $output .= "\n";
176 return $output;
186 sub c{
187 my %info = @_;
189 # header
190 my $output = <<END;
191 /* Automatically generated from $info{in} by $0 */
192 /* Do not edit */
194 #include "compiler.h"
195 #include <inttypes.h>
196 #include <ctype.h>
197 #include "nasmlib.h"
198 #include "hashtbl.h"
199 #include "preproc.h"
206 # list of tokens, followed by list of the lengths of the tokens
208 my @tokens = @{$info{tokens}};
209 $output .= sprintf "const char * const pp_directives[%d] = {\n", scalar @tokens;
210 for my $d ( @tokens ){
211 if (defined($d)) {
212 $output .= " \"%$d\",\n";
213 } else {
214 $output .= " NULL,\n";
217 $output .= "};\n";
220 $output .= sprintf "const uint8_t pp_directives_len[%d] = {\n", scalar(@tokens);
221 for my $d (@tokens) {
222 $output .= sprintf " %d,\n", defined($d) ? length($d)+1 : 0;
224 $output .= "};\n";
231 my %tokens;
232 #my @tokendata;
234 my $n = 0;
235 for my $token( @{$info{tokens}} ){
236 if (defined($token)) {
237 $tokens{'%'.$token} = $n;
238 if ($token =~ /[\@\[\]\\_]/) {
239 # Fail on characters which look like upper-case letters
240 # to the quick-and-dirty downcasing in the prehash
241 # (see below)
242 die "$info{in}: invalid character in token: $token";
245 $n++;
252 use phash;
254 my @hashinfo = gen_perfect_hash(%tokens);
255 if(! @hashinfo ){
256 die "$0: no hash found\n";
259 # Paranoia...
260 # no longer needed, gen_perfect_hash now runs verify_hash_table
261 # verify_hash_table(\%tokens, \@hashinfo);
263 my ($n, $sv, $g) = @hashinfo;
264 my $sv2 = $sv+2;
266 die if ($n & ($n-1));
268 # Put a large value in unused slots. This makes it extremely unlikely
269 # that any combination that involves unused slot will pass the range test.
270 # This speeds up rejection of unrecognized tokens, i.e. identifiers.
271 $output .= <<END;
272 enum preproc_token pp_token_hash(const char *token)
274 #define UNUSED 16383
275 static const int16_t hash1[$n] = {
277 for( my $i = 0; $i < $n; $i++ ){
278 my $h = ${$g}[$i*2+0];
279 $output .= " ".(defined($h) ? $h : 'UNUSED'). ",\n";
281 $output .= " };\n";
283 $output .= " static const int16_t hash2[$n] = {\n";
284 for( my $i = 0; $i < $n; $i++ ){
285 my $h = ${$g}[$i*2+1];
286 $output .= " ".( defined($h) ? $h : 'UNUSED'). ",\n";
288 $output .= " };\n";
290 $output .= sprintf <<END, $$sv[0], $$sv[1], $n-1, $n-1, scalar @{$info{tokens}};
291 uint32_t k1, k2;
292 uint64_t crc;
293 /* For correct overflow behavior, "ix" should be unsigned of the same
294 width as the hash arrays. */
295 uint16_t ix;
297 crc = crc64i(UINT64_C(0x%08x%08x), token);
298 k1 = (uint32_t)crc;
299 k2 = (uint32_t)(crc >> 32);
301 ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];
302 if (ix >= %d)
303 return PP_INVALID;
305 if (!pp_directives[ix] || nasm_stricmp(pp_directives[ix], token))
306 return PP_INVALID;
308 return ix;
313 return $output;
319 sub ph{
320 my %info = @_;
322 my $output = <<END;
323 # Automatically generated from $info{in} by $0
324 # Do not edit
326 %pptok_hash = (
329 my $longest = 0;
330 my @tokens = @{$info{tokens}};
331 map {
332 my $len = $_ ? length $_ : 0;
333 $longest = $len if $len > $longest;
334 } @tokens;
336 my $n = 0;
337 for my $token ( @tokens ){
338 if( $token ){
339 my $pad = ' ' x ( $longest - length $token );
340 $output .= " '%$token' $pad=> $n,\n";
342 $n++;
345 $output .= <<END;
349 return $output;