Fix testcase unsupportedcheck1 for --disable-backend-remote
[xapian.git] / xapian-core / common / Tokeniseise.pm
blobd19d4ec77dbb5bc949b5a62bd66c6c00453803c3
1 package Tokeniseise;
2 # Copyright (C) 2012,2013,2016 Olly Betts
4 # Permission is hereby granted, free of charge, to any person obtaining a copy
5 # of this software and associated documentation files (the "Software"), to
6 # deal in the Software without restriction, including without limitation the
7 # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 # sell copies of the Software, and to permit persons to whom the Software is
9 # furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice shall be included in
12 # all copies or substantial portions of the Software.
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 # IN THE SOFTWARE.
22 use strict;
23 use warnings;
25 sub new {
26 my ($class, $header, $desc, $copyright, $guard, $type, $width, $need_enum) = @_;
27 my $fh;
28 open $fh, '>', "$header~" or die $!;
29 print $fh <<"EOF";
30 /** \@file
31 * \@brief $desc
33 /* Warning: This file is generated by $0 - do not modify directly! */
34 $copyright
35 #ifndef $guard
36 #define $guard
37 EOF
38 $need_enum ||= 'true';
39 if ($need_enum eq 'true') {
40 print $fh <<"EOF";
42 enum $type {
43 EOF
45 my $self = {
46 FH => $fh,
47 HEADER => $header,
48 M => {},
49 WIDTH => ($width || 1),
50 ENUM_VALUES => {}
52 bless($self, $class);
53 return $self;
56 sub add {
57 my ($self, $t, $enum) = @_;
58 !exists ${$self->{M}{length $t}}{$t} or die "Token $t already seen";
59 ${$self->{M}{length $t}}{$t} = $enum;
60 if (!exists $self->{ENUM_VALUES}{$enum}) {
61 $self->{ENUM_VALUES}{$enum} = scalar keys %{$self->{ENUM_VALUES}};
63 return;
66 sub append {
67 my ($self, $line) = @_;
68 push @{$self->{APPEND}}, $line;
69 return;
72 sub write {
73 my ($self, $need_enum, $tab_name) = @_;
74 my $fh = $self->{FH};
75 $need_enum ||= 'true';
76 if ($need_enum eq 'true') {
77 print $fh join ",\n", map { " $_ = $self->{ENUM_VALUES}{$_}" } sort {$self->{ENUM_VALUES}{$a} <=> $self->{ENUM_VALUES}{$b}} keys %{$self->{ENUM_VALUES}};
78 print $fh "\n};\n";
81 my $width = $self->{WIDTH};
82 my $max = (1 << (8 * $width)) - 1;
83 if (scalar keys %{$self->{ENUM_VALUES}} > $max + 1) {
84 die "Token value ", (scalar keys %{$self->{ENUM_VALUES}}) - 1, " > $max";
86 my $m = $self->{M};
87 my @lens = sort {$a <=> $b} keys %$m;
88 my $max_len = $lens[-1];
89 sub space_needed {
90 my ($l, $m) = @_;
91 # Add a fraction of the length to give a deterministic order.
92 return 1 + (1 + $l) * scalar(keys %{$m->{$l}}) + $l / 1000.0;
94 # Put the largest entries last so the offsets are more likely to fit into a
95 # byte.
96 @lens = sort {space_needed($a, $m) <=> space_needed($b, $m)} @lens;
97 # 1 means "no entries" since it can't be a valid offset.
98 # 2 also can't be a valid offset, but isn't currently used.
99 my @h = (1) x $max_len;
100 my @r = ();
101 my $offset = 0;
102 for my $len (@lens) {
103 push @r, undef;
104 ($offset == 1 or $offset == 2) and die "Offset $offset shouldn't be possible";
105 $offset > $max and die "Offset $offset > $max (you should specify a larger width)";
106 $h[$len - 1] = $offset;
107 my $href = $m->{$len};
108 my $tab_len = scalar(keys %$href);
109 $tab_len - 1 < 0 and die "Offset $tab_len < 0";
110 $tab_len - 1 > $max and die "Offset $tab_len > $max";
111 push @r, "($tab_len - 1),";
112 ++$offset;
113 for my $s (sort keys %$href) {
114 $offset += 1 + length($s);
115 my $v = $$href{$s};
116 push @r, "$v, " . join(",", map { my $o = ord $_; $o >= 32 && $o < 127 ? "'$_'" : $o } split //, $s) . ",";
119 $tab_name ||= 'tab';
120 print $fh "\nstatic const unsigned char ",$tab_name,"[] = {\n";
121 print $fh " $max_len,\n";
122 my $c = 0;
123 for (@h) {
124 if ($c++ % 8 == 0) {
125 print $fh "\n ";
126 } else {
127 print $fh " ";
129 if ($width == 1) {
130 printf $fh "%3d,", $_;
131 } elsif ($width == 2) {
132 if ($_ == 1) {
133 print $fh "1,0,";
134 } else {
135 printf $fh "(%d&255),(%d>>8),", $_, $_;
137 } else {
138 die "Unhandled width==$width";
141 print $fh "\n";
143 $r[-1] =~ s/,$//;
145 for (@r) {
146 if (defined $_) {
147 print $fh " ", $_;
149 print $fh "\n";
152 print $fh <<'EOF';
155 if (exists $self->{APPEND}) {
156 print $fh "\n";
157 for (@{$self->{APPEND}}) {
158 print $fh $_, "\n";
161 print $fh <<'EOF';
163 #endif
165 close $fh or die $!;
166 rename "$self->{HEADER}~", $self->{HEADER} or die $!;
168 return;