[cage] Add some karma aliases for myself
[parrot.git] / tools / dev / gen_charset_tables.pl
blobd5a4f4d8091c59e6c2036edaf4f05d6b2fd6eca7
1 #! perl
2 use strict;
3 use warnings;
4 use POSIX qw(locale_h);
5 use locale;
6 use File::Spec;
8 =head1 NAME
10 tools/dev/gen_charset_tables.pl -- generate charset tables
12 =head1 SYNOPSIS
14 perl tools/dev/gen_charset_tables.pl
16 =head1 DESCRIPTION
18 Generate character set tables.
20 =cut
22 my ($svnid) =
23 '$Id$' =~
24 /^\$[iI][dD]:\s(.*) \$$/;
25 my $fileid = '$' . 'Id $';
26 my $charset_dir = File::Spec->catdir(qw/ src charset /);
28 my $coda = <<'EOF';
30 * Local variables:
31 * c-file-style: "parrot"
32 * End:
33 * vim: expandtab shiftwidth=4:
35 EOF
38 # charset tables to create
40 my %table = (
41 "en_US.iso88591" => "Parrot_iso_8859_1_typetable",
43 # "en_US.iso885915" => "Parrot_iso_8859_15_typetable",
44 "POSIX" => "Parrot_ascii_typetable",
47 my $header = <<"HEADER";
48 /* $fileid
49 * Copyright (C) 2005-2007, Parrot Foundation.
51 * DO NOT EDIT THIS FILE DIRECTLY!
52 * please update the $0 script instead.
54 * Created by $svnid
55 * Overview:
56 * This file contains various charset tables.
57 * Data Structure and Algorithms:
58 * History:
59 * Notes:
60 * References:
63 /* HEADERIZER HFILE: none */
65 HEADER
67 =over
69 =item B<classify>( $chr )
71 Character classification
73 =cut
75 sub classify {
76 my ($chr) = @_;
77 my $ret = 0;
79 $chr = chr($chr);
80 $ret |= 0x0001 if $chr =~ /^[[:upper:]]$/; # CCLASS_UPPERCASE
81 $ret |= 0x0002 if $chr =~ /^[[:lower:]]$/; # CCLASS_LOWERCASE
82 $ret |= 0x0004 if $chr =~ /^[[:alpha:]]$/; # CCLASS_ALPHABETIC
83 $ret |= 0x0008 if $chr =~ /^[[:digit:]]$/; # CCLASS_NUMERIC
84 $ret |= 0x0010 if $chr =~ /^[[:xdigit:]]$/; # CCLASS_HEXADECIMAL
85 $ret |= 0x0020 if $chr =~ /^[[:space:]\x85\xa0]$/; # CCLASS_WHITESPACE
86 $ret |= 0x0040 if $chr =~ /^[[:print:]]$/; # CCLASS_PRINTING
87 $ret |= 0x0080 if $chr =~ /^[[:graph:]]$/; # CCLASS_GRAPHICAL
88 $ret |= 0x0100 if $chr =~ /^[[:blank:]]$/; # CCLASS_BLANK
89 $ret |= 0x0200 if $chr =~ /^[[:cntrl:]]$/; # CCLASS_CONTROL
90 $ret |= 0x0400 if $chr =~ /^[[:punct:]]$/; # CCLASS_PUNCTUATION
91 $ret |= 0x0800 if $chr =~ /^[[:alnum:]]$/; # CCLASS_ALPHANUMERIC
92 $ret |= 0x1000 if $chr =~ /^[\n\r\f\x85]$/; # CCLASS_NEWLINE
93 $ret |= 0x2000 if $chr =~ /^[[:alnum:]_]$/; # CCLASS_WORD
95 return $ret;
98 =item B<create_table>( $name )
100 Create a whole character table
102 =back
104 =cut
106 sub create_table {
107 my ($name) = @_;
108 my $len = 8;
110 print "const INTVAL ${name}[256] = {\n";
111 foreach my $char ( 0 .. 255 ) {
112 printf "0x%.4x, ", classify($char);
113 print "/* @{[$char-$len+1]}-$char */\n" if $char % $len == $len - 1;
115 print "};\n";
119 # create 'src/charset/tables.c'
121 ###########################################################################
122 my $c_file = File::Spec->catfile( $charset_dir, 'tables.c' );
123 open STDOUT, '>', $c_file
124 or die "can not open '$c_file': $!\n";
125 print <<"END";
126 $header
127 #include "tables.h"
129 foreach my $name ( sort keys %table ) {
130 print STDERR "creating table: '$table{$name}' (charset: $name)\n";
131 setlocale( LC_CTYPE, $name );
132 create_table( $table{$name} );
134 print $coda;
135 close STDOUT;
138 # create 'src/charset/tables.h'
140 ###########################################################################
141 my $h_file = File::Spec->catfile( $charset_dir, 'tables.h' );
142 open STDOUT, '>', $h_file
143 or die "can not open '$h_file': $!\n";
144 print <<"END";
145 $header
146 #ifndef PARROT_CHARSET_TABLES_H_GUARD
147 #define PARROT_CHARSET_TABLES_H_GUARD
148 #include "parrot/cclass.h"
149 #include "parrot/parrot.h"
150 #define WHITESPACE enum_cclass_whitespace
151 #define WORDCHAR enum_cclass_word
152 #define PUNCTUATION enum_cclass_punctuation
153 #define DIGIT enum_cclass_numeric
155 foreach my $name ( sort keys %table ) {
156 print "extern const INTVAL ${table{$name}}[256];\n";
158 print <<"EOF";
159 #endif /* PARROT_CHARSET_TABLES_H_GUARD */
160 $coda
162 close STDOUT;
164 # Local Variables:
165 # mode: cperl
166 # cperl-indent-level: 4
167 # fill-column: 100
168 # End:
169 # vim: expandtab shiftwidth=4: