9 Test a number of different A</robots.txt> files against a number
10 of different User-agents.
14 require WWW
::RobotRules
;
18 print "1..50\n"; # for Test::Harness
20 # We test a number of different /robots.txt files,
24 # http://foo/robots.txt
27 Disallow: http://foo/also_private
34 # http://foo/robots.txt
36 # comment which should be ignored
41 # http://foo/robots.txt
45 # http://foo/robots.txt
57 User-agent: SvartEnke1
61 User-Agent: SvartEnke2
63 Disallow: http://foo:8080/
68 # I've locked myself away
71 # The castle is your home now, so you can go anywhere you like.
73 Disallow: /west-wing/ # except the west wing!
74 # It's good to be the Prince...
79 # same thing backwards
81 # It's good to be the Prince...
84 # The castle is your home now, so you can go anywhere you like.
86 Disallow: /west-wing/ # except the west wing!
87 # I've locked myself away
92 # and a number of different robots:
95 [$content1, 'MOMspider' =>
96 1 => 'http://foo/private' => 1,
97 2 => 'http://foo/also_private' => 1,
100 [$content1, 'Wubble' =>
101 3 => 'http://foo/private' => 0,
102 4 => 'http://foo/also_private' => 0,
103 5 => 'http://foo/other' => 1,
106 [$content2, 'MOMspider' =>
107 6 => 'http://foo/private' => 0,
108 7 => 'http://foo/other' => 1,
111 [$content2, 'Wubble' =>
112 8 => 'http://foo/private' => 1,
113 9 => 'http://foo/also_private' => 1,
114 10 => 'http://foo/other' => 1,
117 [$content3, 'MOMspider' =>
118 11 => 'http://foo/private' => 1,
119 12 => 'http://foo/other' => 1,
122 [$content3, 'Wubble' =>
123 13 => 'http://foo/private' => 1,
124 14 => 'http://foo/other' => 1,
127 [$content4, 'MOMspider' =>
128 15 => 'http://foo/private' => 1,
129 16 => 'http://foo/this' => 0,
130 17 => 'http://foo/that' => 1,
133 [$content4, 'Another' =>
134 18 => 'http://foo/private' => 1,
135 19 => 'http://foo/this' => 1,
136 20 => 'http://foo/that' => 0,
139 [$content4, 'Wubble' =>
140 21 => 'http://foo/private' => 0,
141 22 => 'http://foo/this' => 1,
142 23 => 'http://foo/that' => 1,
145 [$content4, 'Another/1.0' =>
146 24 => 'http://foo/private' => 1,
147 25 => 'http://foo/this' => 1,
148 26 => 'http://foo/that' => 0,
151 [$content4, "SvartEnke1" =>
152 27 => "http://foo/" => 0,
153 28 => "http://foo/this" => 0,
154 29 => "http://bar/" => 1,
157 [$content4, "SvartEnke2" =>
158 30 => "http://foo/" => 1,
159 31 => "http://foo/this" => 1,
160 32 => "http://bar/" => 1,
163 [$content4, "MomSpiderJr" => # should match "MomSpider"
164 33 => 'http://foo/private' => 1,
165 34 => 'http://foo/also_private' => 1,
166 35 => 'http://foo/this/' => 0,
169 [$content4, "SvartEnk" => # should match "*"
170 36 => "http://foo/" => 1,
171 37 => "http://foo/private/" => 0,
172 38 => "http://bar/" => 1,
175 [$content5, 'Villager/1.0' =>
176 39 => 'http://foo/west-wing/' => 0,
177 40 => 'http://foo/' => 0,
180 [$content5, 'Belle/2.0' =>
181 41 => 'http://foo/west-wing/' => 0,
182 42 => 'http://foo/' => 1,
185 [$content5, 'Beast/3.0' =>
186 43 => 'http://foo/west-wing/' => 1,
187 44 => 'http://foo/' => 1,
190 [$content6, 'Villager/1.0' =>
191 45 => 'http://foo/west-wing/' => 0,
192 46 => 'http://foo/' => 0,
195 [$content6, 'Belle/2.0' =>
196 47 => 'http://foo/west-wing/' => 0,
197 48 => 'http://foo/' => 1,
200 [$content6, 'Beast/3.0' =>
201 49 => 'http://foo/west-wing/' => 1,
202 50 => 'http://foo/' => 1,
205 # when adding tests, remember to increase
206 # the maximum at the top
213 my ($content, $ua) = splice(@
$t, 0, 2);
215 my $robotsrules = new WWW
::RobotRules
($ua);
216 $robotsrules->parse('http://foo/robots.txt', $content);
218 my ($num, $path, $expected);
219 while(($num, $path, $expected) = splice(@
$t, 0, 3)) {
220 my $allowed = $robotsrules->allowed($path);
221 $allowed = 1 if $allowed;
222 if($allowed != $expected) {
224 confess
"Test Failed: $ua => $path ($allowed != $expected)";