Add a paragraph to summarize the motivation for releases since 5.815
[libwww-perl-eserte.git] / t / robot / rules.t
blob6125e85fd9a9a8ca1dfc10937d47cb9e3c479ffa
1 #!/local/bin/perl
3 =head1 NAME
5 robot-rules.t
7 =head1 DESCRIPTION
9 Test a number of different A</robots.txt> files against a number
10 of different User-agents.
12 =cut
14 require WWW::RobotRules;
15 use Carp;
16 use strict;
18 print "1..50\n"; # for Test::Harness
20 # We test a number of different /robots.txt files,
23 my $content1 = <<EOM;
24 # http://foo/robots.txt
25 User-agent: *
26 Disallow: /private
27 Disallow: http://foo/also_private
29 User-agent: MOMspider
30 Disallow:
31 EOM
33 my $content2 = <<EOM;
34 # http://foo/robots.txt
35 User-agent: MOMspider
36 # comment which should be ignored
37 Disallow: /private
38 EOM
40 my $content3 = <<EOM;
41 # http://foo/robots.txt
42 EOM
44 my $content4 = <<EOM;
45 # http://foo/robots.txt
46 User-agent: *
47 Disallow: /private
48 Disallow: mailto:foo
50 User-agent: MOMspider
51 Disallow: /this
53 User-agent: Another
54 Disallow: /that
57 User-agent: SvartEnke1
58 Disallow: http://fOO
59 Disallow: http://bar
61 User-Agent: SvartEnke2
62 Disallow: ftp://foo
63 Disallow: http://foo:8080/
64 Disallow: http://bar/
65 EOM
67 my $content5 = <<EOM;
68 # I've locked myself away
69 User-agent: *
70 Disallow: /
71 # The castle is your home now, so you can go anywhere you like.
72 User-agent: Belle
73 Disallow: /west-wing/ # except the west wing!
74 # It's good to be the Prince...
75 User-agent: Beast
76 Disallow:
77 EOM
79 # same thing backwards
80 my $content6 = <<EOM;
81 # It's good to be the Prince...
82 User-agent: Beast
83 Disallow:
84 # The castle is your home now, so you can go anywhere you like.
85 User-agent: Belle
86 Disallow: /west-wing/ # except the west wing!
87 # I've locked myself away
88 User-agent: *
89 Disallow: /
90 EOM
92 # and a number of different robots:
94 my @tests1 = (
95 [$content1, 'MOMspider' =>
96 1 => 'http://foo/private' => 1,
97 2 => 'http://foo/also_private' => 1,
100 [$content1, 'Wubble' =>
101 3 => 'http://foo/private' => 0,
102 4 => 'http://foo/also_private' => 0,
103 5 => 'http://foo/other' => 1,
106 [$content2, 'MOMspider' =>
107 6 => 'http://foo/private' => 0,
108 7 => 'http://foo/other' => 1,
111 [$content2, 'Wubble' =>
112 8 => 'http://foo/private' => 1,
113 9 => 'http://foo/also_private' => 1,
114 10 => 'http://foo/other' => 1,
117 [$content3, 'MOMspider' =>
118 11 => 'http://foo/private' => 1,
119 12 => 'http://foo/other' => 1,
122 [$content3, 'Wubble' =>
123 13 => 'http://foo/private' => 1,
124 14 => 'http://foo/other' => 1,
127 [$content4, 'MOMspider' =>
128 15 => 'http://foo/private' => 1,
129 16 => 'http://foo/this' => 0,
130 17 => 'http://foo/that' => 1,
133 [$content4, 'Another' =>
134 18 => 'http://foo/private' => 1,
135 19 => 'http://foo/this' => 1,
136 20 => 'http://foo/that' => 0,
139 [$content4, 'Wubble' =>
140 21 => 'http://foo/private' => 0,
141 22 => 'http://foo/this' => 1,
142 23 => 'http://foo/that' => 1,
145 [$content4, 'Another/1.0' =>
146 24 => 'http://foo/private' => 1,
147 25 => 'http://foo/this' => 1,
148 26 => 'http://foo/that' => 0,
151 [$content4, "SvartEnke1" =>
152 27 => "http://foo/" => 0,
153 28 => "http://foo/this" => 0,
154 29 => "http://bar/" => 1,
157 [$content4, "SvartEnke2" =>
158 30 => "http://foo/" => 1,
159 31 => "http://foo/this" => 1,
160 32 => "http://bar/" => 1,
163 [$content4, "MomSpiderJr" => # should match "MomSpider"
164 33 => 'http://foo/private' => 1,
165 34 => 'http://foo/also_private' => 1,
166 35 => 'http://foo/this/' => 0,
169 [$content4, "SvartEnk" => # should match "*"
170 36 => "http://foo/" => 1,
171 37 => "http://foo/private/" => 0,
172 38 => "http://bar/" => 1,
175 [$content5, 'Villager/1.0' =>
176 39 => 'http://foo/west-wing/' => 0,
177 40 => 'http://foo/' => 0,
180 [$content5, 'Belle/2.0' =>
181 41 => 'http://foo/west-wing/' => 0,
182 42 => 'http://foo/' => 1,
185 [$content5, 'Beast/3.0' =>
186 43 => 'http://foo/west-wing/' => 1,
187 44 => 'http://foo/' => 1,
190 [$content6, 'Villager/1.0' =>
191 45 => 'http://foo/west-wing/' => 0,
192 46 => 'http://foo/' => 0,
195 [$content6, 'Belle/2.0' =>
196 47 => 'http://foo/west-wing/' => 0,
197 48 => 'http://foo/' => 1,
200 [$content6, 'Beast/3.0' =>
201 49 => 'http://foo/west-wing/' => 1,
202 50 => 'http://foo/' => 1,
205 # when adding tests, remember to increase
206 # the maximum at the top
210 my $t;
212 for $t (@tests1) {
213 my ($content, $ua) = splice(@$t, 0, 2);
215 my $robotsrules = new WWW::RobotRules($ua);
216 $robotsrules->parse('http://foo/robots.txt', $content);
218 my ($num, $path, $expected);
219 while(($num, $path, $expected) = splice(@$t, 0, 3)) {
220 my $allowed = $robotsrules->allowed($path);
221 $allowed = 1 if $allowed;
222 if($allowed != $expected) {
223 $robotsrules->dump;
224 confess "Test Failed: $ua => $path ($allowed != $expected)";
226 print "ok $num\n";