libcpp/ucnid.pl

   1 #! /usr/bin/perl -w
   2 use strict;
   3
   4 # Convert cppucnid.tab to cppucnid.h.  We use two arrays of length
   5 # 65536 to represent the table, since this is nice and simple.  The
   6 # first array holds the tags indicating which ranges are valid in
   7 # which contexts.  The second array holds the language name associated
   8 # with each element.
   9
  10 our(@tags, @names);
  11 @tags = ("") x 65536;
  12 @names = ("") x 65536;
  13
  14
  15 # Array mapping tag numbers to standard #defines
  16 our @stds;
  17
  18 # Current standard and language
  19 our($curstd, $curlang);
  20
  21 # First block of the file is a template to be saved for later.
  22 our @template;
  23
  24 while (<>) {
  25     chomp;
  26     last if $_ eq '%%';
  27     push @template, $_;
  28 };
  29
  30 # Second block of the file is the UCN tables.
  31 # The format looks like this:
  32 #
  33 # [std]
  34 #
  35 # ; language
  36 # xxxx-xxxx xxxx xxxx-xxxx ....
  37 #
  38 # with comment lines starting with #.
  39
  40 while (<>) {
  41     chomp;
  42     /^#/ and next;
  43     /^\s*$/ and next;
  44     /^\[(.+)\]$/ and do {
  45         $curstd = $1;
  46         next;
  47     };
  48     /^; (.+)$/ and do {
  49         $curlang = $1;
  50         next;
  51     };
  52
  53     process_range(split);
  54 }
  55
  56 # Print out the template, inserting as requested.
  57 $\ = "\n";
  58 for (@template) {
  59     print("/* Automatically generated from cppucnid.tab, do not edit */"),
  60         next if $_ eq "[dne]";
  61     print_table(), next if $_ eq "[table]";
  62     print;
  63 }
  64
  65 sub print_table {
  66     my($lo, $hi);
  67     my $prevname = "";
  68
  69     for ($lo = 0; $lo <= $#tags; $lo = $hi) {
  70         $hi = $lo;
  71         $hi++ while $hi <= $#tags
  72             && $tags[$hi] eq $tags[$lo]
  73             && $names[$hi] eq $names[$lo];
  74
  75         # Range from $lo to $hi-1.
  76         # Don't make entries for ranges that are not valid idchars.
  77         next if ($tags[$lo] eq "");
  78         my $tag = $tags[$lo];
  79         $tag = "    ".$tag if $tag =~ /^C99/;
  80
  81         if ($names[$lo] eq $prevname) {
  82             printf("  { 0x%04x, 0x%04x, %-11s },\n",
  83                    $lo, $hi-1, $tag);
  84         } else {
  85             printf("  { 0x%04x, 0x%04x, %-11s },  /* %s */\n",
  86                    $lo, $hi-1, $tag, $names[$lo]);
  87         }
  88         $prevname = $names[$lo];
  89     }
  90 }
  91
  92 # The line is a list of four-digit hexadecimal numbers or
  93 # pairs of such numbers.  Each is a valid identifier character
  94 # from the given language, under the given standard.
  95 sub process_range {
  96     for my $range (@_) {
  97         if ($range =~ /^[0-9a-f]{4}$/) {
  98             my $i = hex($range);
  99             if ($tags[$i] eq "") {
 100                 $tags[$i] = $curstd;
 101             } else {
 102                 $tags[$i] = $curstd . "|" . $tags[$i];
 103             }
 104             if ($names[$i] ne "" && $names[$i] ne $curlang) {
 105                 warn sprintf ("language overlap: %s/%s at %x (tag %d)",
 106                               $names[$i], $curlang, $i, $tags[$i]);
 107                 next;
 108             }
 109             $names[$i] = $curlang;
 110         } elsif ($range =~ /^ ([0-9a-f]{4}) - ([0-9a-f]{4}) $/x) {
 111             my ($start, $end) = (hex($1), hex($2));
 112             my $i;
 113             for ($i = $start; $i <= $end; $i++) {
 114                 if ($tags[$i] eq "") {
 115                     $tags[$i] = $curstd;
 116                 } else {
 117                     $tags[$i] = $curstd . "|" . $tags[$i];
 118                 }
 119                 if ($names[$i] ne "" && $names[$i] ne $curlang) {
 120                     warn sprintf ("language overlap: %s/%s at %x (tag %d)",
 121                                   $names[$i], $curlang, $i, $tags[$i]);
 122                     next;
 123                 }
 124                 $names[$i] = $curlang;
 125             }
 126         } else {
 127             warn "malformed range expression $range";
 128         }
 129     }
 130 }