5 use vars qw($EXHAUSTIVE $VERBOSE);
11 $EXHAUSTIVE = $ENV{BIOPERL_TILING_EXHAUSTIVE_TESTS};
12 $VERBOSE = $ENV{BIOPERL_TILING_VERBOSE_TESTS};
13 test_begin(-tests => ($EXHAUSTIVE ? 6471 : 1093) );
16 use_ok('Bio::Search::Tiling::MapTiling');
17 use_ok('Bio::Search::Tiling::MapTileUtils');
18 use_ok('Bio::SearchIO');
19 use_ok('Bio::Search::Hit::BlastHit');
22 my ($blio, $result, $hit, $tiling, $hsp);
23 my @normal_formats = qw( blast wublast
30 my @xltd_formats = qw( blastx wublastx
35 # an exhaustive listing of search reports in
53 hsinsulin.blastcl3.blastn
62 catalase-webblast.BLASTP
67 contig-by-hand.wublastp
68 ecolitst.noseqs.wublastp
74 dnaEbsub_ecoli.wublastx
81 1ZZ19XR301R-Alignment.tblastn
84 dnaEbsub_ecoli.wutblastn
91 dnaEbsub_ecoli.wutblastx
97 ecoli_domains.rpsblast
104 # a subset of search reports for
105 # run-o-the-mill regression tests
107 my %example_files = (
115 brassica_ATH.WUBLASTN
119 catalase-webblast.BLASTP
128 dnaEbsub_ecoli.wublastx
137 dnaEbsub_ecoli.wutblastn
143 dnaEbsub_ecoli.wutblastx
150 ok( $blio = new Bio::SearchIO(
151 -file=>test_input_file('dcr1_sp.WUBLASTP'),
152 -format=>'blast'), 'parse data file');
154 $result = $blio->next_result;
155 while ( $_ = $result->next_hit ) {
156 last if $_->name =~ /ASPTN/;
158 ok($hit = $_, 'got test hit');
159 ok($tiling = Bio::Search::Tiling::MapTiling->new($hit), 'create tiling');
164 isa_ok($tiling, 'Bio::Search::Tiling::TilingI');
165 foreach ( qw( next_tiling rewind_tilings identities conserved length ) ) {
166 ok( $tiling->$_, "implements '$_'" );
169 # regression test on original calculations
171 my @orig_id_results = ( 387,388,388,381,382,389 );
172 my @orig_cn_results = ( 622,619,628,608,611,613 );
174 $tiling->identities('query', 'exact'),
175 $tiling->identities('query', 'est'),
176 $tiling->identities('query', 'max'),
177 $tiling->identities('subject', 'exact'),
178 $tiling->identities('subject', 'est'),
179 $tiling->identities('subject', 'max')
182 $tiling->conserved('query', 'exact'),
183 $tiling->conserved('query', 'est'),
184 $tiling->conserved('query', 'max'),
185 $tiling->conserved('subject', 'exact'),
186 $tiling->conserved('subject', 'est'),
187 $tiling->conserved('subject', 'max')
189 map { $_ = int($_) } @id_results, @cn_results;
191 is_deeply(\@id_results, \@orig_id_results, 'identities regression test');
192 is_deeply(\@cn_results, \@orig_cn_results, 'conserved regression test');
194 # tiling iterator regression tests
197 while ($tiling->next_tiling('query')) {$qn++};
198 while ($tiling->next_tiling('subject')) {$sn++};
199 is ($qn, 8, 'tiling iterator regression test(1)');
200 is ($sn, 128, 'tiling iterator regression test(2)');
201 $tiling->rewind('subject');
202 while ($tiling->next_tiling('subject')) {$sn++};
203 is ($sn, 256, 'tiling iterator regression test(3, rewind)');
205 diag("Old blast.t tiling tests") if $VERBOSE;
207 ok($blio = Bio::SearchIO->new(
208 '-format' => 'blast',
209 '-file' => test_input_file('ecolitst.wublastp')
210 ), "ecolitst.wublastp");
211 $result = $blio->next_result;
213 $hit = $result->next_hit;
214 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
215 # Test HSP contig data returned by SearchUtils::tile_hsps()
216 # Second hit has two hsps that overlap.
218 # compare with the contig made by hand for these two contigs
219 # in t/data/contig-by-hand.wublastp
220 # (in this made-up file, the hsps from ecolitst.wublastp
221 # were aligned and contiged, and Length, Identities, Positives
222 # were counted, by a human (maj) )
224 my $hand_hit = Bio::SearchIO->new(
226 -file=>test_input_file('contig-by-hand.wublastp')
227 )->next_result->next_hit;
228 my $hand_hsp = $hand_hit->next_hsp;
229 my @hand_qrng = $hand_hsp->range('query');
230 my @hand_srng = $hand_hsp->range('hit');
231 my @hand_matches = $hand_hit->matches;
233 is(($tiling->range('query'))[0], $hand_qrng[0]);
234 is(($tiling->range('query'))[1], $hand_qrng[1]);
235 is(sprintf("%d",$tiling->identities('query')), $hand_matches[0]);
236 is(sprintf("%d",$tiling->conserved('query')), $hand_matches[1]);
237 is(($tiling->range('hit'))[0], $hand_srng[0]);
238 is(($tiling->range('hit'))[1], $hand_srng[1]);
239 is(sprintf("%d",$tiling->identities('hit')), $hand_matches[0]);
240 is(sprintf("%d",$tiling->conserved('hit')), $hand_matches[1]);
242 ok( $blio = Bio::SearchIO->new(
243 '-format' => 'blast',
244 '-file' => test_input_file('dnaEbsub_ecoli.wublastx')
245 ), "dnaEbsub_ecoli.wublastx");
247 $hit = $blio->next_result->next_hit;
248 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
249 is(sprintf("%.3f",$tiling->frac_identical(-type=>'query',-denom=>'aligned',-context=>'p2')), '0.364');
250 is(sprintf("%.3f",$tiling->frac_identical(-type=>'hit',-denom=>'aligned',-context=>'all')), '0.366');
251 is(sprintf("%.3f",$tiling->frac_conserved(-type=>'query',-denom=>'aligned',-context=>'p2')), '0.537');
252 is(sprintf("%.3f",$tiling->frac_conserved(-type=>'hit',-denom=>'aligned',-context=>'all')), '0.540');
253 is(sprintf("%.2f",$tiling->frac_aligned_query(-context=>'p2')), '0.62');
254 is(sprintf("%.2f",$tiling->frac_aligned_hit(-context=>'all')), '0.71');
256 ok( $blio = Bio::SearchIO->new(
257 '-format' => 'blast',
258 '-file' => test_input_file('tricky.wublast')
259 ), "tricky.wublast");
261 $hit = $blio->next_result->next_hit;
262 $tiling = Bio::Search::Tiling::MapTiling->new($hit);
263 cmp_ok sprintf("%.3f",$tiling->frac_identical(-denom => 'aligned')), '>', 0.2, 'tricky.wublast(1)';
264 cmp_ok sprintf("%.3f",$tiling->frac_conserved(-denom => 'aligned')), '<=', 1, 'tricky.wublast(2)';
265 is(sprintf("%.2f",$tiling->frac_aligned_query), '0.92', 'tricky.wublast(3)');
266 is(sprintf("%.2f",$tiling->frac_aligned_hit), '0.91','tricky.wublast(4)');
268 diag("New tiling tests") if $VERBOSE;
270 # select test file set based on the environment variable
271 # BIOPERL_TILING_EXHAUSTIVE_TESTS
273 my $files = ($EXHAUSTIVE ? \%test_files : \%example_files);
275 foreach my $alg (@normal_formats, @xltd_formats) {
276 diag("*******$alg files*******") if ($files->{$alg} && $VERBOSE);
277 foreach my $tf (@{$files->{$alg}}) {
278 ok( $blio = Bio::SearchIO->new( -format=>'blast',
279 -file=>test_input_file($tf)
281 $result = $blio->next_result;
283 # compare the per-aligned-base identity avg over hsps
284 # with frac_identical (bzw, conserved)
287 while ( $hit = $result->next_hit ) {
289 # quiet the "No HSPs" warning with -verbose => -1
290 ok( $tiling = Bio::Search::Tiling::MapTiling->new(-hit=>$hit,-verbose=>-1), "tile $tf hit $hit_count #hsps ".scalar $tiling->hsps );
291 my @hsps = $tiling->hsps;
294 diag( "--no hsps for $tf hit $hit_count") if $VERBOSE;
297 my ($dpct, $est, $fast,$exact, $max);
298 my $tol = 0.10; # % difference accepted as approx. equal
300 ## loop through contexts:
301 for my $type qw( query hit ) {
302 for my $context ($tiling->contexts($type)) {
303 diag(" --- $type $context ---") if $VERBOSE;
304 if (scalar($tiling->contexts($type, $context)) == 1) {
306 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'identical','aligned', 'est', 'fast', $context);
307 is( $est,$fast, substr($type,0,1)." id: est ($est) = fast ($fast)");
308 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'conserved','aligned', 'est', 'fast', $context);
309 is( $est,$fast, substr($type,0,1)." cn: est ($est) = fast ($fast)");
313 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'identical','aligned', 'est', 'fast', $context);
314 # cmp_ok( $dpct, "<", $tol,
315 # substr($type,0,1)." id: est ($est) ~ fast ($fast)");
316 ($dpct, $exact, $max) = $tiling->cmp_frac($type,'identical','aligned', 'exact', 'max', $context);
317 cmp_ok( abs($exact-$est)/$exact, "<" , $tol,
318 substr($type,0,1)." id: exact ($exact) ~ est ($est)");
319 cmp_ok( $exact, "<=" , $max,
320 substr($type,0,1)." id: exact ($exact) <= max ($max)");
322 ($dpct, $est, $fast) = $tiling->cmp_frac($type,'conserved','aligned', 'est', 'fast', $context);
323 # cmp_ok( $dpct, "<", $tol,
324 # substr($type,0,1)." cn: est ($est) ~ fast ($fast)");
325 ($dpct, $exact, $max) = $tiling->cmp_frac($type,'conserved','aligned', 'exact', 'max', $context);
326 cmp_ok( abs($exact-$est)/$exact, "<" , $tol,
327 substr($type,0,1)." cn: exact ($exact) ~ est ($est)");
328 cmp_ok( $exact, "<=" , $max,
329 substr($type,0,1)." cn: exact ($exact) <= max ($max)");
337 package Bio::Search::Tiling::MapTiling;
340 my ($tiling, $type, $method, $denom, @actions) = @_;
342 my $context = ($actions[2] ? $actions[2] : 'all');
343 $a = $tiling->frac(-type=>$type,
346 -action=>$actions[0],
348 $b = $tiling->frac(-type=>$type,
351 -action=>$actions[1],
353 return ( abs($a-$b)/$a, f(5,$a), f(5,$b) );
356 sub f { my ($d,$val) = @_; sprintf("%.${d}f",$val) }