4 use POSIX
qw(locale_h);
10 tools/dev/gen_charset_tables.pl -- generate charset tables
14 perl tools/dev/gen_charset_tables.pl
18 Generate character set tables.
24 /^\$[iI][dD]:\s(.*) \$$/;
25 my $fileid = '$' . 'Id $';
26 my $charset_dir = File
::Spec
->catdir(qw
/ src charset /);
31 * c-file-style: "parrot"
33 * vim: expandtab shiftwidth=4:
38 # charset tables to create
41 "en_US.iso88591" => "Parrot_iso_8859_1_typetable",
43 # "en_US.iso885915" => "Parrot_iso_8859_15_typetable",
44 "POSIX" => "Parrot_ascii_typetable",
47 my $header = <<"HEADER";
49 * Copyright (C) 2005-2007, Parrot Foundation.
51 * DO NOT EDIT THIS FILE DIRECTLY!
52 * please update the $0 script instead.
56 * This file contains various charset tables.
57 * Data Structure and Algorithms:
63 /* HEADERIZER HFILE: none */
69 =item B<classify>( $chr )
71 Character classification
80 $ret |= 0x0001 if $chr =~ /^[[:upper:]]$/; # CCLASS_UPPERCASE
81 $ret |= 0x0002 if $chr =~ /^[[:lower:]]$/; # CCLASS_LOWERCASE
82 $ret |= 0x0004 if $chr =~ /^[[:alpha:]]$/; # CCLASS_ALPHABETIC
83 $ret |= 0x0008 if $chr =~ /^[[:digit:]]$/; # CCLASS_NUMERIC
84 $ret |= 0x0010 if $chr =~ /^[[:xdigit:]]$/; # CCLASS_HEXADECIMAL
85 $ret |= 0x0020 if $chr =~ /^[[:space:]\x85\xa0]$/; # CCLASS_WHITESPACE
86 $ret |= 0x0040 if $chr =~ /^[[:print:]]$/; # CCLASS_PRINTING
87 $ret |= 0x0080 if $chr =~ /^[[:graph:]]$/; # CCLASS_GRAPHICAL
88 $ret |= 0x0100 if $chr =~ /^[[:blank:]]$/; # CCLASS_BLANK
89 $ret |= 0x0200 if $chr =~ /^[[:cntrl:]]$/; # CCLASS_CONTROL
90 $ret |= 0x0400 if $chr =~ /^[[:punct:]]$/; # CCLASS_PUNCTUATION
91 $ret |= 0x0800 if $chr =~ /^[[:alnum:]]$/; # CCLASS_ALPHANUMERIC
92 $ret |= 0x1000 if $chr =~ /^[\n\r\f\x85]$/; # CCLASS_NEWLINE
93 $ret |= 0x2000 if $chr =~ /^[[:alnum:]_]$/; # CCLASS_WORD
98 =item B<create_table>( $name )
100 Create a whole character table
110 print "const INTVAL ${name}[256] = {\n";
111 foreach my $char ( 0 .. 255 ) {
112 printf "0x%.4x, ", classify
($char);
113 print "/* @{[$char-$len+1]}-$char */\n" if $char % $len == $len - 1;
119 # create 'src/charset/tables.c'
121 ###########################################################################
122 my $c_file = File
::Spec
->catfile( $charset_dir, 'tables.c' );
123 open STDOUT
, '>', $c_file
124 or die "can not open '$c_file': $!\n";
129 foreach my $name ( sort keys %table ) {
130 print STDERR
"creating table: '$table{$name}' (charset: $name)\n";
131 setlocale
( LC_CTYPE
, $name );
132 create_table
( $table{$name} );
138 # create 'src/charset/tables.h'
140 ###########################################################################
141 my $h_file = File
::Spec
->catfile( $charset_dir, 'tables.h' );
142 open STDOUT
, '>', $h_file
143 or die "can not open '$h_file': $!\n";
146 #ifndef PARROT_CHARSET_TABLES_H_GUARD
147 #define PARROT_CHARSET_TABLES_H_GUARD
148 #include "parrot/cclass.h"
149 #include "parrot/parrot.h"
150 #define WHITESPACE enum_cclass_whitespace
151 #define WORDCHAR enum_cclass_word
152 #define PUNCTUATION enum_cclass_punctuation
153 #define DIGIT enum_cclass_numeric
155 foreach my $name ( sort keys %table ) {
156 print "extern const INTVAL ${table{$name}}[256];\n";
159 #endif /* PARROT_CHARSET_TABLES_H_GUARD */
166 # cperl-indent-level: 4
169 # vim: expandtab shiftwidth=4: