Rewritten pptok.pl, now works
[nasm/perl-rewrite.git] / perl / pptok.pl
blobe315a146c2778fa417f0e45a4418b922d629b6db
1 #! /usr/bin/env perl
2 use strict;
3 use warnings;
4 use 5.010;
6 use lib qw'lib';
8 sub load_pptok(;$);
9 sub h;
10 sub c;
11 sub ph;
14 our %jump = (
15 h => \&h,
16 c => \&c,
17 ph => \&ph,
18 dump => sub{
19 use Data::Dump 'dump';
20 say dump {@_};
25 my($what, $in, $out) = @ARGV;
26 my %info = load_pptok();
28 $in //= '';
30 $info{out} = $out;
32 my $jump = $jump{lc $what};
33 given( ref $jump ){
34 when( 'CODE' ){
35 my $return = $jump->(%info);
36 print $return if defined $return;
41 sub load_pptok(;$){
42 my($filename) = @_;
43 $filename = 'pptok.dat' unless @_;
45 my @tokens_cond;
46 my @conditions;
48 my @tokens;
49 my $first_uncond;
52 use autodie qw'open close';
53 open( my $file, '<', $filename ) or die;
55 while( my $line = <$file> ){
56 $line =~ s/^\s+//; # Remove leading whitespace
57 $line =~ s/\s*(?:\#.*)?$//; # Remove comments and trailing whitespace
58 next unless $line;
60 given($line){
61 when( /^\* (.*) /x ){ push @conditions, $1; }
62 when( /^\% (.*) \*$/x ){ push @tokens_cond, $1; }
63 when( /^\% (.*) $/x ){ push @tokens, $1; }
67 close $file;
70 @conditions = sort @conditions;
71 @tokens = sort @tokens;
72 @tokens_cond = sort @tokens_cond;
75 # Generate the expanded list including conditionals. The conditionals
76 # are at the beginning, padded to a power of 2, with the inverses
77 # interspersed; this allows a simple mask to pick out the condition.
79 while ((scalar @conditions) & (scalar @conditions)-1) {
80 push(@conditions, undef);
83 $first_uncond = $tokens[0];
85 my @tokens_cond_p;
86 for my $token ( @tokens_cond ){
87 for my $cond ( @conditions ){
88 if( defined $cond){
89 push @tokens_cond_p, "${token}${cond}", "${token}n${cond}";
90 }else{
91 push @tokens_cond_p, undef, undef;
95 @tokens = ( @tokens_cond_p, @tokens );
98 my %return = (
99 tokens => [@tokens],
100 conditions => [@conditions],
101 first_uncond => $first_uncond,
102 in => $filename,
103 tokens_cond => [@tokens_cond],
104 #tokens_cond_p => \@tokens_cond_p
107 return %return if wantarray;
108 return \%return;
111 sub h{
112 my %info = @_;
114 my $output = <<END;
115 /* Automatically generated from $info{in} by $0 */
116 /* Do not edit */
118 enum preproc_token {
121 my $f = " PP_%-13s = %3d\n";
123 my $n = 0;
124 for my $token ( @{$info{tokens}} ){
125 if( defined($token) ){
126 #printf OUT " %-16s = %3d,\n", "PP_\U$token\E", $n;
127 $output .= sprintf $f, uc $token, $n;
129 $n++;
132 $output .= sprintf $f, 'INVALID', -1;
134 $output .= <<END;
137 enum pp_conditional {
140 my $n = 0;
141 for my $cc ( @{$info{conditions}} ) {
142 if (defined($cc)) {
143 $output .= sprintf " PPC_IF%-10s = %3d,\n",uc $cc, $n;
145 $n += 2;
148 my $pp_cond = (scalar(@{$info{conditions}})-1) << 1;
149 $output .= sprintf <<END, $pp_cond, uc $info{first_uncond};
152 #define PP_COND(x) ((enum pp_conditional)((x) & 0x%x))
153 #define PP_IS_COND(x) ((unsigned int)(x) < PP_%s)
154 #define PP_NEGATIVE(x) ((x) & 1)
158 use List::MoreUtils 'zip';
160 for my $token( @{$info{tokens_cond}} ){
161 my $token = uc $token;
162 $output .= "#define CASE_PP_$token \\\n";
164 my @cond = map {uc $_} grep { defined $_ } '', @{$info{conditions}};
165 my @ncond = map {"N$_"} @cond;
167 @cond = zip @cond, @ncond;
169 @cond = map { " case PP_${token}$_" } @cond;
171 $output .= join ": \\\n", @cond;
173 $output .= "\n";
176 return $output;
186 sub c{
187 my %info = @_;
189 # header
190 my $output = <<END;
191 /* Automatically generated from $info{in} by $0 */
192 /* Do not edit */
194 #include "compiler.h"
195 #include <inttypes.h>
196 #include <ctype.h>
197 #include "nasmlib.h"
198 #include "hashtbl.h"
199 #include "preproc.h"
206 # list of tokens, followed by list of the lengths of the tokens
208 my @tokens = @{$info{tokens}};
209 $output .= sprintf "const char * const pp_directives[%d] = {\n", scalar @tokens;
210 for my $d ( @tokens ){
211 if (defined($d)) {
212 $output .= " \"%$d\",\n";
213 } else {
214 $output .= " NULL,\n";
217 $output .= "};\n";
220 $output .= sprintf "const uint8_t pp_directives_len[%d] = {\n", scalar(@tokens);
221 for my $d (@tokens) {
222 $output .= sprintf " %d,\n", defined($d) ? length($d)+1 : 0;
224 $output .= "};\n";
231 my %tokens;
232 #my @tokendata;
234 my $n = 0;
235 for my $token( @{$info{tokens}} ){
236 if (defined($token)) {
237 $tokens{'%'.$token} = $n;
238 if ($token =~ /[\@\[\]\\_]/) {
239 # Fail on characters which look like upper-case letters
240 # to the quick-and-dirty downcasing in the prehash
241 # (see below)
242 die "$info{in}: invalid character in token: $token";
245 $n++;
252 use phash;
254 my @hashinfo = gen_perfect_hash(%tokens);
255 if(! @hashinfo ){
256 die "$0: no hash found\n";
259 # Paranoia...
260 verify_hash_table(\%tokens, \@hashinfo);
262 my ($n, $sv, $g) = @hashinfo;
263 my $sv2 = $sv+2;
265 die if ($n & ($n-1));
267 # Put a large value in unused slots. This makes it extremely unlikely
268 # that any combination that involves unused slot will pass the range test.
269 # This speeds up rejection of unrecognized tokens, i.e. identifiers.
270 $output .= <<END;
271 enum preproc_token pp_token_hash(const char *token)
273 #define UNUSED 16383
274 static const int16_t hash1[$n] = {
276 for( my $i = 0; $i < $n; $i++ ){
277 my $h = ${$g}[$i*2+0];
278 $output .= " ".(defined($h) ? $h : 'UNUSED'). ",\n";
280 $output .= " };\n";
282 $output .= " static const int16_t hash2[$n] = {\n";
283 for( my $i = 0; $i < $n; $i++ ){
284 my $h = ${$g}[$i*2+1];
285 $output .= " ".( defined($h) ? $h : 'UNUSED'). ",\n";
287 $output .= " };\n";
289 $output .= sprintf <<END, $$sv[0], $$sv[1], $n-1, $n-1, scalar @{$info{tokens}};
290 uint32_t k1, k2;
291 uint64_t crc;
292 /* For correct overflow behavior, "ix" should be unsigned of the same
293 width as the hash arrays. */
294 uint16_t ix;
296 crc = crc64i(UINT64_C(0x%08x%08x), token);
297 k1 = (uint32_t)crc;
298 k2 = (uint32_t)(crc >> 32);
300 ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];
301 if (ix >= %d)
302 return PP_INVALID;
304 if (!pp_directives[ix] || nasm_stricmp(pp_directives[ix], token))
305 return PP_INVALID;
307 return ix;
312 return $output;
318 sub ph{
319 my %info = @_;
321 my $output = <<END;
322 # Automatically generated from $info{in} by $0
323 # Do not edit
325 %pptok_hash = (
328 my $longest = 0;
329 my @tokens = @{$info{tokens}};
330 map {
331 my $len = $_ ? length $_ : 0;
332 $longest = $len if $len > $longest;
333 } @tokens;
335 my $n = 0;
336 for my $token ( @tokens ){
337 if( $token ){
338 my $pad = ' ' x ( $longest - length $token );
339 $output .= " '%$token' $pad=> $n,\n";
341 $n++;
344 $output .= <<END;
348 return $output;