[ci] Enable IRC notifications from travis
[xapian.git] / xapian-core / common / Tokeniseise.pm
blob430264aa4eab022484fbcfc6353933bb4661695e
1 package Tokeniseise;
2 # Copyright (C) 2012,2013,2016 Olly Betts
4 # Permission is hereby granted, free of charge, to any person obtaining a copy
5 # of this software and associated documentation files (the "Software"), to
6 # deal in the Software without restriction, including without limitation the
7 # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 # sell copies of the Software, and to permit persons to whom the Software is
9 # furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice shall be included in
12 # all copies or substantial portions of the Software.
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 # IN THE SOFTWARE.
22 use strict;
23 use warnings;
25 sub new {
26 my ($class, $header, $desc, $copyright, $guard, $type, $width) = @_;
27 my $fh;
28 open $fh, '>', "$header~" or die $!;
29 print $fh <<"EOF";
30 /** \@file $header
31 * \@brief $desc
33 /* Warning: This file is generated by $0 - do not modify directly! */
34 $copyright
35 #ifndef $guard
36 #define $guard
38 enum $type {
39 EOF
40 my $self = {
41 FH => $fh,
42 HEADER => $header,
43 M => {},
44 WIDTH => ($width || 1),
45 ENUM_VALUES => {}
47 bless($self, $class);
48 return $self;
51 sub add {
52 my ($self, $t, $enum) = @_;
53 !exists ${$self->{M}{length $t}}{$t} or die "Token $t already seen";
54 ${$self->{M}{length $t}}{$t} = $enum;
55 if (!exists $self->{ENUM_VALUES}{$enum}) {
56 $self->{ENUM_VALUES}{$enum} = scalar keys %{$self->{ENUM_VALUES}};
58 return;
61 sub append {
62 my ($self, $line) = @_;
63 push @{$self->{APPEND}}, $line;
64 return;
67 sub write {
68 my $self = shift;
69 my $fh = $self->{FH};
70 print $fh join ",\n", map { " $_ = $self->{ENUM_VALUES}{$_}" } sort {$self->{ENUM_VALUES}{$a} <=> $self->{ENUM_VALUES}{$b}} keys %{$self->{ENUM_VALUES}};
71 print $fh "\n};\n";
73 my $width = $self->{WIDTH};
74 my $max = (1 << (8 * $width)) - 1;
75 if (scalar keys %{$self->{ENUM_VALUES}} > $max + 1) {
76 die "Token value ", (scalar keys %{$self->{ENUM_VALUES}}) - 1, " > $max";
78 my $m = $self->{M};
79 my @lens = sort {$a <=> $b} keys %$m;
80 my $max_len = $lens[-1];
81 sub space_needed {
82 my ($l, $m) = @_;
83 # Add a fraction of the length to give a deterministic order.
84 return 1 + (1 + $l) * scalar(keys %{$m->{$l}}) + $l / 1000.0;
86 # Put the largest entries last so the offsets are more likely to fit into a
87 # byte.
88 @lens = sort {space_needed($a, $m) <=> space_needed($b, $m)} @lens;
89 # 1 means "no entries" since it can't be a valid offset.
90 # 2 also can't be a valid offset, but isn't currently used.
91 my @h = (1) x $max_len;
92 my @r = ();
93 my $offset = 0;
94 for my $len (@lens) {
95 push @r, undef;
96 ($offset == 1 or $offset == 2) and die "Offset $offset shouldn't be possible";
97 $offset > $max and die "Offset $offset > $max (you should specify a larger width)";
98 $h[$len - 1] = $offset;
99 my $href = $m->{$len};
100 my $tab_len = scalar(keys %$href);
101 $tab_len - 1 < 0 and die "Offset $tab_len < 0";
102 $tab_len - 1 > $max and die "Offset $tab_len > $max";
103 push @r, "($tab_len - 1),";
104 ++$offset;
105 for my $s (sort keys %$href) {
106 $offset += 1 + length($s);
107 my $v = $$href{$s};
108 push @r, "$v, " . join(",", map { my $o = ord $_; $o >= 32 && $o < 127 ? "'$_'" : $o } split //, $s) . ",";
111 print $fh "\nstatic const unsigned char tab[] = {\n";
112 print $fh " $max_len,\n";
113 my $c = 0;
114 for (@h) {
115 if ($c++ % 8 == 0) {
116 print $fh "\n ";
117 } else {
118 print $fh " ";
120 if ($width == 1) {
121 printf $fh "%3d,", $_;
122 } elsif ($width == 2) {
123 if ($_ == 1) {
124 print $fh "1,0,";
125 } else {
126 printf $fh "(%d&255),(%d>>8),", $_, $_;
128 } else {
129 die "Unhandled width==$width";
132 print $fh "\n";
134 $r[-1] =~ s/,$//;
136 for (@r) {
137 if (defined $_) {
138 print $fh " ", $_;
140 print $fh "\n";
143 print $fh <<'EOF';
146 if (exists $self->{APPEND}) {
147 print $fh "\n";
148 for (@{$self->{APPEND}}) {
149 print $fh $_, "\n";
152 print $fh <<'EOF';
154 #endif
156 close $fh or die $!;
157 rename "$self->{HEADER}~", $self->{HEADER} or die $!;
159 return;