1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id: SearchIO_blast.t 14995 2008-11-16 06:20:00Z cjfields $
10 test_begin(-tests => 1093);
12 use_ok('Bio::SearchIO');
15 my ($searchio, $result,$iter,$hit,$hsp);
17 $searchio = Bio::SearchIO->new('-format' => 'blast',
18 '-file' => test_input_file('ecolitst.bls'));
20 $result = $searchio->next_result;
22 is($result->database_name, 'ecoli.aa', 'database_name()');
23 is($result->database_entries, 4289);
24 is($result->database_letters, 1358990);
26 is($result->algorithm, 'BLASTP');
27 like($result->algorithm_version, qr/^2\.1\.3/);
28 like($result->query_name, qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/);
29 is($result->query_accession, 'AAC73113.1');
30 is($result->query_gi, 1786183);
31 is($result->query_length, 820);
32 is($result->get_statistic('kappa'), '0.135');
33 is($result->get_statistic('kappa_gapped'), '0.0410');
34 is($result->get_statistic('lambda'), '0.319');
35 is($result->get_statistic('lambda_gapped'), '0.267');
36 is($result->get_statistic('entropy'), '0.383');
37 is($result->get_statistic('entropy_gapped'), '0.140');
39 is($result->get_statistic('dbletters'), 1358990);
40 is($result->get_statistic('dbentries'), 4289);
41 is($result->get_statistic('effective_hsplength'), 47);
42 is($result->get_statistic('effectivespace'), 894675611);
43 is($result->get_parameter('matrix'), 'BLOSUM62');
44 is($result->get_parameter('gapopen'), 11);
45 is($result->get_parameter('gapext'), 1);
46 is($result->get_statistic('S2'), '92');
47 is($result->get_statistic('S2_bits'), '40.0');
48 float_is($result->get_parameter('expect'), '1.0e-03');
49 is($result->get_statistic('num_extensions'), '82424');
52 my @valid = ( [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 1567, 4058],
53 [ 'gb|AAC76922.1|', 810, 'AAC76922', '1e-91', 332, 850],
54 [ 'gb|AAC76994.1|', 449, 'AAC76994', '3e-47', 184, 467]);
56 while( $hit = $result->next_hit ) {
59 is($hit->name, shift @$d);
60 is($hit->length, shift @$d);
61 is($hit->accession, shift @$d);
62 float_is($hit->significance, shift @$d);
63 is($hit->bits, shift @$d );
64 is($hit->raw_score, shift @$d );
68 while( my $hsp = $hit->next_hsp ) {
69 is($hsp->query->start, 1);
70 is($hsp->query->end, 820);
71 is($hsp->hit->start, 1);
72 is($hsp->hit->end, 820);
73 is($hsp->length('total'), 820);
74 is($hsp->start('hit'), $hsp->hit->start);
75 is($hsp->end('query'), $hsp->query->end);
76 is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
77 float_is($hsp->evalue, 0.0);
78 is($hsp->score, 4058);
80 is(sprintf("%.2f",$hsp->percent_identity), 98.29);
81 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.9829);
82 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.9829);
88 last if( $count++ > @valid );
92 $searchio = Bio::SearchIO->new('-format' => 'blast',
93 '-file' => test_input_file('ecolitst.wublastp'));
95 $result = $searchio->next_result;
97 is($result->database_name, 'ecoli.aa');
98 is($result->database_letters, 1358990);
99 is($result->database_entries, 4289);
100 is($result->algorithm, 'BLASTP');
101 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
102 like($result->query_name, qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/);
103 is($result->query_accession, 'AAC73113.1');
105 is($result->query_length, 820);
106 is($result->query_gi, 1786183);
107 is($result->get_statistic('kappa'), 0.136);
108 is($result->get_statistic('lambda'), 0.319);
109 is($result->get_statistic('entropy'), 0.384);
110 is($result->get_statistic('dbletters'), 1358990);
111 is($result->get_statistic('dbentries'), 4289);
112 is($result->get_parameter('matrix'), 'BLOSUM62');
113 is($result->get_statistic('Frame+0_lambda_used'), '0.319');
114 is($result->get_statistic('Frame+0_kappa_used'), '0.136');
115 is($result->get_statistic('Frame+0_entropy_used'), '0.384');
117 is($result->get_statistic('Frame+0_lambda_computed'), '0.319');
118 is($result->get_statistic('Frame+0_kappa_computed'), '0.136');
119 is($result->get_statistic('Frame+0_entropy_computed'), '0.384');
121 is($result->get_statistic('Frame+0_lambda_gapped'), '0.244');
122 is($result->get_statistic('Frame+0_kappa_gapped'), '0.0300');
123 is($result->get_statistic('Frame+0_entropy_gapped'), '0.180');
125 @valid = ( [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 4141],
126 [ 'gb|AAC76922.1|', 810, 'AAC76922', '3.1e-86', 844],
127 [ 'gb|AAC76994.1|', 449, 'AAC76994', '2.8e-47', 483]);
129 while( $hit = $result->next_hit ) {
130 my $d = shift @valid;
133 # Test HSP contig data returned by SearchUtils::tile_hsps()
134 # Second hit has two hsps that overlap.
136 # compare with the contig made by hand for these two contigs
137 # in t/data/contig-by-hand.wublastp
138 # (in this made-up file, the hsps from ecolitst.wublastp
139 # were aligned and contiged, and Length, Identities, Positives
140 # were counted, by a human (maj) )
142 my $hand_hit = Bio::SearchIO->new(
144 -file=>test_input_file('contig-by-hand.wublastp')
145 )->next_result->next_hit;
146 my $hand_hsp = $hand_hit->next_hsp;
147 my @hand_qrng = $hand_hsp->range('query');
148 my @hand_srng = $hand_hsp->range('hit');
149 my @hand_matches = $hand_hit->matches;
151 my($qcontigs, $scontigs) = Bio::Search::SearchUtils::tile_hsps($hit);
153 is($qcontigs->[0]->{'start'}, $hand_qrng[0]);
154 is($qcontigs->[0]->{'stop'}, $hand_qrng[1]);
155 is($qcontigs->[0]->{'iden'}, $hand_matches[0]);
156 is($qcontigs->[0]->{'cons'}, $hand_matches[1]);
158 is($scontigs->[0]->{'start'}, $hand_srng[0]);
159 is($scontigs->[0]->{'stop'}, $hand_srng[1]);
160 is($scontigs->[0]->{'iden'}, $hand_matches[0]);
161 is($scontigs->[0]->{'cons'}, $hand_matches[1]);
164 is($hit->name, shift @$d);
165 is($hit->length, shift @$d);
166 is($hit->accession, shift @$d);
167 float_is($hit->significance, shift @$d);
168 is($hit->raw_score, shift @$d );
172 while( my $hsp = $hit->next_hsp ) {
173 is($hsp->query->start, 1);
174 is($hsp->query->end, 820);
175 is($hsp->hit->start, 1);
176 is($hsp->hit->end, 820);
177 is($hsp->length('total'), 820);
179 float_is($hsp->evalue, 0.0);
180 float_is($hsp->pvalue, '0.0');
181 is($hsp->score, 4141);
182 is($hsp->bits,1462.8);
183 is($hsp->percent_identity, 100);
184 is($hsp->frac_identical('query'), 1.00);
185 is($hsp->frac_identical('hit'), 1.00);
191 last if( $count++ > @valid );
195 # test that add hit really works properly for BLAST objects
197 my @hits = $result->hits;
198 $result->add_hit($hits[0]);
199 is($result->num_hits, @hits + 1);
201 # test WU-BLAST -noseqs option
202 $searchio = Bio::SearchIO->new('-format' => 'blast',
203 '-file' => test_input_file('ecolitst.noseqs.wublastp'));
205 $result = $searchio->next_result;
207 is($result->database_name, 'ecoli.aa');
208 is($result->database_letters, 1358990);
209 is($result->database_entries, 4289);
210 is($result->algorithm, 'BLASTP');
211 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
212 like($result->query_name, qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/);
213 is($result->query_accession, 'AAC73113.1');
214 is($result->query_gi, 1786183);
216 is($result->query_length, 820);
217 is($result->get_statistic('kappa'), 0.135);
218 is($result->get_statistic('lambda'), 0.319);
219 is($result->get_statistic('entropy'), 0.384);
220 is($result->get_statistic('dbletters'), 1358990);
221 is($result->get_statistic('dbentries'), 4289);
222 is($result->get_parameter('matrix'), 'BLOSUM62');
223 is($result->get_statistic('Frame+0_lambda_used'), '0.319');
224 is($result->get_statistic('Frame+0_kappa_used'), '0.135');
225 is($result->get_statistic('Frame+0_entropy_used'), '0.384');
227 is($result->get_statistic('Frame+0_lambda_computed'), '0.319');
228 is($result->get_statistic('Frame+0_kappa_computed'), '0.135');
229 is($result->get_statistic('Frame+0_entropy_computed'), '0.384');
231 is($result->get_statistic('Frame+0_lambda_gapped'), '0.244');
232 is($result->get_statistic('Frame+0_kappa_gapped'), '0.0300');
233 is($result->get_statistic('Frame+0_entropy_gapped'), '0.180');
235 @valid = ( [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 4141],
236 [ 'gb|AAC76922.1|', 810, 'AAC76922', '6.6e-93', 907],
237 [ 'gb|AAC76994.1|', 449, 'AAC76994', '2.8e-47', 483]);
239 while( $hit = $result->next_hit ) {
240 my $d = shift @valid;
242 is($hit->name, shift @$d);
243 is($hit->length, shift @$d);
244 is($hit->accession, shift @$d);
245 float_is($hit->significance, shift @$d);
246 is($hit->raw_score, shift @$d );
250 while( my $hsp = $hit->next_hsp ) {
251 is($hsp->query->start, 1);
252 is($hsp->query->end, 820);
253 is($hsp->hit->start, 1);
254 is($hsp->hit->end, 820);
255 is($hsp->length('total'), 820);
257 float_is($hsp->evalue, 0.);
258 float_is($hsp->pvalue , '0.');
259 is($hsp->score, 4141);
260 is($hsp->bits,1462.8);
261 is($hsp->percent_identity, 100);
262 is($hsp->frac_identical('query'), 1.00);
263 is($hsp->frac_identical('hit'), 1.00);
269 last if( $count++ > @valid );
274 $searchio = Bio::SearchIO->new('-format' => 'blast',
275 '-file' => test_input_file('HUMBETGLOA.tblastx'));
277 $result = $searchio->next_result;
278 is($result->database_name, 'ecoli.nt');
279 is($result->database_letters, 4662239);
280 is($result->database_entries, 400);
281 is($result->algorithm, 'TBLASTX');
282 like($result->algorithm_version, qr/^2\.1\.2/);
283 is($result->query_name, 'HUMBETGLOA');
284 is($result->query_description, 'Human haplotype C4 beta-globin gene, complete cds.');
285 is($result->query_length, 3002);
286 is($result->get_statistic('kappa'), 0.135);
287 is($result->get_statistic('lambda'), 0.318);
288 is($result->get_statistic('entropy'), 0.401);
289 is($result->get_statistic('dbletters'), 4662239);
290 is($result->get_statistic('dbentries'), 400);
291 is($result->get_statistic('T'), 13);
292 is($result->get_statistic('X1'), 16);
293 is($result->get_statistic('X1_bits'), 7.3);
294 is($result->get_statistic('X2'), 0);
295 is($result->get_statistic('X2_bits'), '0.0');
296 is($result->get_statistic('S1'), 41);
297 is($result->get_statistic('S1_bits'), 21.7);
298 is($result->get_statistic('S2'), 53);
299 is($result->get_statistic('S2_bits'), 27.2);
301 is($result->get_statistic('decayconst'), 0.1);
303 is($result->get_parameter('matrix'), 'BLOSUM62');
305 @valid = ( [ 'gb|AE000479.1|AE000479', 10934, 'AE000479', '0.13', 33.6, 67],
306 [ 'gb|AE000302.1|AE000302', 10264, 'AE000302', '0.61', 31.3, 62],
307 [ 'gb|AE000277.1|AE000277', 11653, 'AE000277', '0.84', 30.8, 61]);
310 while( $hit = $result->next_hit ) {
311 my $d = shift @valid;
312 is($hit->name, shift @$d);
313 is($hit->length, shift @$d);
314 is($hit->accession, shift @$d);
315 float_is($hit->significance, shift @$d);
316 is($hit->bits, shift @$d );
317 is($hit->raw_score, shift @$d );
321 while( my $hsp = $hit->next_hsp ) {
322 is($hsp->query->start, 1057);
323 is($hsp->query->end, 1134);
324 is($hsp->query->strand, 1);
325 is($hsp->strand('query'), $hsp->query->strand);
326 is($hsp->hit->end, 5893);
327 is($hsp->hit->start, 5816);
328 is($hsp->hit->strand, -1);
329 is($hsp->strand('sbjct'), $hsp->subject->strand);
330 is($hsp->length('total'), 26);
332 float_is($hsp->evalue, 0.13);
335 is(sprintf("%.2f",$hsp->percent_identity), 42.31);
336 is(sprintf("%.4f",$hsp->frac_identical('query')), '0.4231');
337 is(sprintf("%.4f",$hsp->frac_identical('hit')), '0.4231');
338 is($hsp->query->frame(), 0);
339 is($hsp->hit->frame(), 1);
341 is($hsp->query_string, 'SAYWSIFPPLGCWWSTLGPRGSLSPL');
342 is($hsp->hit_string, 'AAVWALFPPVGSQWGCLASQWRTSPL');
343 is($hsp->homology_string, '+A W++FPP+G W L + SPL');
344 # changed to reflect positional ambiguities, note extra flag
345 is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '1063-1065 1090-1095 1099-1104 1108-1113 1117-1125');
346 is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '5825-5833 5837-5842 5846-5851 5855-5860 5885-5887');
347 is($hsp->ambiguous_seq_inds, 'query/subject');
352 last if( $count++ > @valid );
356 # test for MarkW bug in blastN
358 $searchio = Bio::SearchIO->new('-format' => 'blast',
359 '-file' => test_input_file('a_thaliana.blastn'));
362 $result = $searchio->next_result;
363 is($result->database_name, 'All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS, GSS,or phase 0, 1 or 2 HTGS sequences) ');
364 is($result->database_letters, 4677375331);
365 is($result->database_entries, 1083200);
366 is($result->algorithm, 'BLASTN');
367 like($result->algorithm_version, qr/^2\.2\.1/);
368 is($result->query_name, '');
369 is($result->query_length, 60);
370 is($result->get_parameter('gapopen'), 5);
371 is($result->get_parameter('gapext'), 2);
372 is($result->get_parameter('ktup'), undef);
374 is($result->get_statistic('lambda'), 1.37);
375 is($result->get_statistic('kappa'), 0.711);
376 is($result->get_statistic('entropy'),1.31 );
377 is($result->get_statistic('T'), 0);
378 is($result->get_statistic('A'), 30);
379 is($result->get_statistic('X1'), '6');
380 is($result->get_statistic('X1_bits'), 11.9);
381 is($result->get_statistic('X2'), 15);
382 is($result->get_statistic('X2_bits'), 29.7);
383 is($result->get_statistic('S1'), 12);
384 is($result->get_statistic('S1_bits'), 24.3);
385 is($result->get_statistic('S2'), 17);
386 is($result->get_statistic('S2_bits'), 34.2);
388 is($result->get_statistic('dbentries'), 1083200);
390 @valid = ( [ 'gb|AY052359.1|', 2826, 'AY052359', '3e-18', 95.6, 48, 1, 60,
392 [ 'gb|AC002329.2|AC002329', 76170, 'AC002329', '3e-18', 95.6, 48, 1, 60,
394 [ 'gb|AF132318.1|AF132318', 5383, 'AF132318', '0.04', 42.1, 21, 35, 55,
398 while( my $hit = $result->next_hit ) {
399 my $d = shift @valid;
400 is($hit->name, shift @$d);
401 is($hit->length, shift @$d);
402 is($hit->accession, shift @$d);
403 float_is($hit->significance, shift @$d);
404 is($hit->bits, shift @$d );
405 is($hit->raw_score, shift @$d );
406 is($hit->start, shift @$d);
407 is($hit->end,shift @$d);
408 is(sprintf("%.4f",$hit->frac_aligned_query), shift @$d);
411 while( my $hsp = $hit->next_hsp ) {
412 is($hsp->query->start, 1);
413 is($hsp->query->end, 60);
414 is($hsp->query->strand, 1);
415 is($hsp->hit->start, 154);
416 is($hsp->hit->end, 212);
417 is($hsp->hit->strand, 1);
418 is($hsp->length('total'), 60);
419 float_is($hsp->evalue, 3e-18);
422 is(sprintf("%.2f",$hsp->percent_identity), 96.67);
423 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.9667);
424 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.9831);
425 is($hsp->query->frame(), 0);
426 is($hsp->hit->frame(), 0);
427 is($hsp->query->seq_id, undef);
428 is($hsp->hit->seq_id, 'gb|AY052359.1|');
429 is($hsp->gaps('query'), 0);
430 is($hsp->gaps('hit'), 1);
432 is($hsp->query_string, 'aggaatgctgtttaattggaatcgtacaatggagaatttgacggaaatagaatcaacgat');
433 is($hsp->hit_string, 'aggaatgctgtttaattggaatca-acaatggagaatttgacggaaatagaatcaacgat');
434 is($hsp->homology_string, '||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||');
435 my $aln = $hsp->get_aln;
436 is(sprintf("%.2f", $aln->overall_percentage_identity), 96.67);
437 is(sprintf("%.2f",$aln->percentage_identity), 98.31);
442 last if( $count++ > @valid );
448 $searchio = Bio::SearchIO->new('-format' => 'blast',
449 '-file' => test_input_file('dnaEbsub_ecoli.wublastx'));
451 $result = $searchio->next_result;
452 is($result->database_name, 'ecoli.aa');
453 is($result->database_letters, 1358990);
454 is($result->database_entries, 4289);
455 is($result->algorithm, 'BLASTX');
456 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
457 is($result->query_name, 'gi|142864|gb|M10040.1|BACDNAE');
458 is($result->query_description, 'B.subtilis dnaE gene encoding DNA primase, complete cds');
459 is($result->query_accession, 'M10040.1');
460 is($result->query_gi, 142864);
461 is($result->query_length, 2001);
462 is($result->get_parameter('matrix'), 'blosum62');
464 is($result->get_statistic('lambda'), 0.318);
465 is($result->get_statistic('kappa'), 0.135);
466 is($result->get_statistic('entropy'),0.401 );
468 is($result->get_statistic('dbentries'), 4289);
470 @valid = ( [ 'gi|1789447|gb|AAC76102.1|', 581, 'AAC76102', '1.1e-74', 671]);
473 while( my $hit = $result->next_hit ) {
474 my $d = shift @valid;
475 is($hit->name, shift @$d);
476 is($hit->length, shift @$d);
477 is($hit->accession, shift @$d);
478 float_is($hit->significance, shift @$d);
479 is($hit->raw_score, shift @$d );
480 is(sprintf("%.4f",$hit->frac_identical('query')), '0.3640');
481 is(sprintf("%.4f",$hit->frac_identical('hit')), '0.3660');
482 is(sprintf("%.4f",$hit->frac_conserved('query')), '0.5370');
483 is(sprintf("%.4f",$hit->frac_conserved('hit')), '0.5400');
484 is(sprintf("%.4f",$hit->frac_aligned_query), '0.6200');
485 is(sprintf("%.4f",$hit->frac_aligned_hit), '0.7100');
489 while( my $hsp = $hit->next_hsp ) {
490 is($hsp->query->start, 21);
491 is($hsp->query->end, 1265);
492 is($hsp->query->strand, 1);
493 is($hsp->hit->start, 1);
494 is($hsp->hit->end, 413);
495 is($hsp->hit->strand, 0);
496 is($hsp->length('total'), 421);
497 float_is($hsp->evalue, 1.1e-74);
498 float_is($hsp->pvalue, '1.1e-74');
500 is($hsp->bits,265.8);
501 is(sprintf("%.2f",$hsp->percent_identity), 35.87);
503 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.3639);
504 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.3656);
505 is(sprintf("%.4f",$hsp->frac_conserved('query')), 0.5373);
506 is(sprintf("%.2f",$hsp->frac_conserved('hit')), 0.54);
508 is(sprintf("%.4f",$hsp->frac_identical('hsp')), 0.3587);
509 is(sprintf("%.4f",$hsp->frac_conserved('hsp')), 0.5297);
511 is($hsp->query->frame(), 2);
512 is($hsp->hit->frame(), 0);
513 is($hsp->gaps('query'), 6);
514 is($hsp->gaps('hit'), 8);
516 is($hsp->query_string, 'MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFHCFGCGAGGNVFSFLRQMEGYSFAESVSHLADKYQIDFPDDITVHSGARP---ESSGEQKMAEAHELLKKFYHHLLINTKEGQEALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGYFDRFRNRVMFPIHDHHGAVVAFSGRALGSQQPKYMNSPETPLFHKSKLLYNFYKARLHIRKQERAVLFEGFADVYTAVSSDVKESIATMGTSLTDDHVKILRRNVEEIILCYDSDKAGYEATLKASELL---QKKGCKVRVAMIPDGLDPDDYIKKFGGEKFKNDIIDASVTVMAFKMQYFRKGKNLSDEGDRLAYIKDVLKEISTLSGSLEQEVYVKQ');
517 is($hsp->hit_string, 'MAGRIPRVFINDLLARTDIVDLIDARVKLKKQGKNFHACCPFHNEKTPSFTVNGEKQFYHCFGCGAHGNAIDFLMNYDKLEFVETVEELAAMHNLEVPFE----AGSGPSQIERHQRQTLYQLMDGLNTFYQQSL-QQPVATSARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY-DRFRERVMFPIRDKRGRVIGFGGRVLGNDTPKYLNSPETDIFHKGRQLYGLYEAQQDNAEPNRLLVVEGYMDVVALAQYGINYAVASLGTSTTADHIQLLFRATNNVICCYDGDRAGRDAAWRALETALPYMTDGRQLRFMFLPDGEDPDTLVRKEGKEAFEARM-EQAMPLSAFLFNSLMPQVDLSTPDGRARLSTLALPLISQVPGETLR-IYLRQ');
518 is($hsp->homology_string, 'M RIP ++ + DIV++I V+LKKQG+N+ CPFH E TPSF+V+ +KQ +HCFGCGA GN FL + F E+V LA + ++ P + +G+ P E Q + + + L FY L A YL RG + E+I F IG+A WD + K + + AG+L+ + G Y DRFR RVMFPI D G V+ F GR LG+ PKY+NSPET +FHK + LY Y+A+ + R ++ EG+ DV + ++A++GTS T DH+++L R +I CYD D+AG +A +A E G ++R +PDG DPD ++K G E F+ + + ++ + AF +LS R L IS + G + +Y++Q');
519 is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '24-29 39-47 54-56 60-71 90-98 129-137 150-152 156-158 180-182 192-194 219-221 228-236 243-251 255-263 267-269 279-284 291-296 300-302 309-311 315-317 321-332 342-344 351-362 366-368 372-374 378-383 387-389 393-398 405-413 417-440 444-449 456-461 468-470 474-476 486-491 495-497 510-518 525-527 531-533 537-557 561-569 573-578 594-599 603-605 609-614 618-620 633-635 654-656 660-665 669-671 678-680 684-686 693-695 705-710 738-740 753-755 759-761 768-773 786-797 801-806 810-812 819-821 831-833 840-860 864-869 894-896 900-902 921-923 927-938 945-947 957-959 972-974 981-986 993-995 999-1013 1017-1019 1029-1037 1050-1052 1062-1067 1077-1079 1083-1085 1089-1091 1098-1103 1107-1109 1113-1115 1122-1124 1128-1130 1137-1163 1173-1184 1188-1208 1212-1217 1224-1226 1230-1232 1236-1244 1248-1250');
520 is(join(' ', $hsp->seq_inds('query', 'mismatch',1)), '24-29 39-47 54-56 60-71 90-98 129-137 150-152 156-158 180-182 192-194 219-221 228-236 243-251 255-263 267-269 279-284 291-296 300-302 309-311 315-317 342-344 351-362 366-368 372-374 378-383 387-389 393-398 405-413 420-440 444-449 456-461 468-470 474-476 486-491 495-497 510-518 525-527 531-533 537-557 561-569 573-578 594-599 603-605 609-614 633-635 654-656 660-665 669-671 678-680 684-686 693-695 705-710 738-740 753-755 759-761 768-773 786-797 801-806 810-812 819-821 831-833 840-860 864-869 894-896 900-902 921-923 927-938 945-947 957-959 972-974 981-986 993-995 999-1013 1017-1019 1029-1037 1050-1052 1062-1067 1077-1079 1083-1085 1089-1091 1098-1103 1113-1115 1122-1124 1128-1130 1137-1163 1173-1184 1188-1208 1212-1217 1224-1226 1230-1232 1236-1244');
521 is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '2 3 7-9 12 14-17 24-26 37-39 44 46 54 58 67 70-72 75-77 79-81 83 87 88 91 92 94 97 99 104 106-108 110-113 115 117 119 120 122 124 125 128-130 132-138 140 141 144 145 148 150 154 155 157 162-164 167 169 171-177 179-181 183 184 190 191 193 195 196 202 209 211 212 214 217 219 222 226 227 237 242 244 247 248 253-256 258 259 261 264 268 271-277 279 280 289 291 298 300-303 306 310 315 318 319 322 324-331 333 337-339 344 348 349 353 355 357 360 361 364 367 369 372-380 384-387 389-395 397 398 401 403 405-407');
522 is(join(' ', $hsp->seq_inds('hit', 'mismatch',1)), '2 3 7-9 12 14-17 24-26 37-39 44 46 54 58 67 70-72 75-77 79-81 83 87 88 91 92 94 97 99 104 110-113 115 117 119 120 122 124 125 128-130 132-138 140 141 144 145 148 150 154 155 157 162-164 167 169 171-177 179-181 183 184 190 191 193 195 196 202 209 211 212 214 217 219 222 226 227 237 242 244 247 248 253-256 258 259 261 264 268 271-277 279 280 289 291 298 300-303 306 310 315 318 319 322 324 325 329-331 333 337-339 344 348 349 353 355 357 360 361 364 367 369 372-380 384-387 389-395 397 398 401 403 405-407');
523 is(join(' ', $hsp->seq_inds('query', 'gaps',1)), '347 1004');
524 is(join(' ', $hsp->seq_inds('hit', 'gaps',1)), '100 131 197 362 408');
525 is($hsp->ambiguous_seq_inds, 'query');
530 last if( $count++ > @valid );
535 $searchio = Bio::SearchIO->new('-format' => 'blast',
536 '-file' => test_input_file('tricky.wublast'));
537 $result = $searchio->next_result;
539 while (my $hit = $result->next_hit) {
540 # frac_aligned_hit used to be over 1, frac_identical & frac_conserved are still too wrong
542 local $TODO = 'frac_identical & frac_conserved are still too wrong';
543 cmp_ok sprintf("%.3f",$hit->frac_identical), '>', 0.9;
544 cmp_ok sprintf("%.3f",$hit->frac_conserved), '<=', 1;
546 is(sprintf("%.2f",$hit->frac_aligned_query), '0.92');
547 is(sprintf("%.2f",$hit->frac_aligned_hit), '0.91');
552 # More frac_ method testing, this time on ncbi blastn
553 $searchio = Bio::SearchIO->new('-format' => 'blast',
554 '-file' => test_input_file('frac_problems.blast'));
555 my @expected = ("1.000", "0.943");
556 while (my $result = $searchio->next_result) {
557 my $hit = $result->next_hit;
558 is($hit->frac_identical, shift @expected);
562 # And even more: frac_aligned_query should never be over 1!
563 $searchio = Bio::SearchIO->new('-format' => 'blast',
564 '-file' => test_input_file('frac_problems2.blast'));
565 $result = $searchio->next_result;
566 $hit = $result->next_hit;
567 is $hit->frac_aligned_query, 0.97;
569 # Also, start and end should be sane
570 $searchio = Bio::SearchIO->new('-format' => 'blast',
571 '-file' => test_input_file('frac_problems3.blast'));
572 $result = $searchio->next_result;
573 $hit = $result->next_hit;
574 is $hit->start('sbjct'), 207;
575 is $hit->end('sbjct'), 1051;
579 $searchio = Bio::SearchIO->new('-format' => 'blast',
580 '-file' => test_input_file('dnaEbsub_ecoli.wutblastn'));
582 $result = $searchio->next_result;
583 is($result->database_name, 'ecoli.nt');
584 is($result->database_letters, 4662239);
585 is($result->database_entries, 400);
586 is($result->algorithm, 'TBLASTN');
587 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
588 is($result->query_name, 'gi|142865|gb|AAA22406.1|');
589 is($result->query_description, 'DNA primase');
590 is($result->query_accession, 'AAA22406.1');
591 is($result->query_gi, 142865);
592 is($result->query_length, 603);
593 is($result->get_parameter('matrix'), 'blosum62');
595 is($result->get_statistic('lambda'), '0.320');
596 is($result->get_statistic('kappa'), 0.136);
597 is($result->get_statistic('entropy'),0.387 );
599 is($result->get_statistic('dbentries'), 400);
601 @valid = ( [ 'gi|1789441|gb|AE000388.1|AE000388', 10334, 'AE000388', '1.4e-73', 671]);
604 while( my $hit = $result->next_hit ) {
605 my $d = shift @valid;
606 is($hit->name, shift @$d);
607 is($hit->length, shift @$d);
608 is($hit->accession, shift @$d);
609 float_is($hit->significance, shift @$d);
610 is($hit->raw_score, shift @$d );
614 while( my $hsp = $hit->next_hsp ) {
615 is($hsp->query->start, 1);
616 is($hsp->query->end, 415);
617 is($hsp->query->strand, 0);
618 is($hsp->hit->start, 4778);
619 is($hsp->hit->end, 6016);
620 is($hsp->hit->strand, 1);
621 is($hsp->length('total'), 421);
622 float_is($hsp->evalue, 1.4e-73);
623 float_is($hsp->pvalue,1.4e-73);
625 is($hsp->bits,265.8);
626 is(sprintf("%.2f",$hsp->percent_identity), 35.87);
627 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.3656);
628 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.3639);
629 is(sprintf("%.4f",$hsp->frac_conserved('hsp')), 0.5297);
630 is($hsp->query->frame(), 0);
631 is($hsp->hit->frame(), 1);
632 is($hsp->gaps('query'), 6);
633 is($hsp->gaps('hit'), 8);
635 is($hsp->query_string, 'MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFHCFGCGAGGNVFSFLRQMEGYSFAESVSHLADKYQIDFPDDITVHSGARP---ESSGEQKMAEAHELLKKFYHHLLINTKEGQEALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGYFDRFRNRVMFPIHDHHGAVVAFSGRALGSQQPKYMNSPETPLFHKSKLLYNFYKARLHIRKQERAVLFEGFADVYTAVSSDVKESIATMGTSLTDDHVKILRRNVEEIILCYDSDKAGYEATLKASELL---QKKGCKVRVAMIPDGLDPDDYIKKFGGEKFKNDIIDASVTVMAFKMQYFRKGKNLSDEGDRLAYIKDVLKEISTLSGSLEQEVYVKQ');
636 is($hsp->hit_string, 'MAGRIPRVFINDLLARTDIVDLIDARVKLKKQGKNFHACCPFHNEKTPSFTVNGEKQFYHCFGCGAHGNAIDFLMNYDKLEFVETVEELAAMHNLEVPFE----AGSGPSQIERHQRQTLYQLMDGLNTFYQQSL-QQPVATSARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY-DRFRERVMFPIRDKRGRVIGFGGRVLGNDTPKYLNSPETDIFHKGRQLYGLYEAQQDNAEPNRLLVVEGYMDVVALAQYGINYAVASLGTSTTADHIQLLFRATNNVICCYDGDRAGRDAAWRALETALPYMTDGRQLRFMFLPDGEDPDTLVRKEGKEAFEARM-EQAMPLSAFLFNSLMPQVDLSTPDGRARLSTLALPLISQVPGETLR-IYLRQ');
637 is($hsp->homology_string, 'M RIP ++ + DIV++I V+LKKQG+N+ CPFH E TPSF+V+ +KQ +HCFGCGA GN FL + F E+V LA + ++ P + +G+ P E Q + + + L FY L A YL RG + E+I F IG+A WD + K + + AG+L+ + G Y DRFR RVMFPI D G V+ F GR LG+ PKY+NSPET +FHK + LY Y+A+ + R ++ EG+ DV + ++A++GTS T DH+++L R +I CYD D+AG +A +A E G ++R +PDG DPD ++K G E F+ + + ++ + AF +LS R L IS + G + +Y++Q');
638 is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '2 3 7-9 12 14-17 24-26 37-39 44 46 54 58 67 70-72 75-77 79-81 83 87 88 91 92 94 97 99 101-104 108 111-114 116 118 120 121 123 125 126 129-131 133-140 142 143 146 147 150 152 156 157 159 164-166 169 171 173-179 181-183 185 186 192 193 195 197 198 200 205 212 214 215 217 220 222 225 229 230 240 245 247 250 251 256-259 261 262 264 267 271 274-280 282 283 292 294 301 303-306 309 313 318 321 322 325 327-331 333 337-339 344 348 349 353 355 357 360 361 363 365 368 370 373-381 385-388 390-396 398 399 402 404 406-408 410');
639 is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '4781-4786 4796-4804 4811-4813 4817-4828 4847-4855 4886-4894 4907-4909 4913-4915 4937-4939 4949-4951 4976-4978 4985-4993 5000-5008 5012-5020 5024-5026 5036-5041 5048-5053 5057-5059 5066-5068 5072-5074 5087-5089 5093-5101 5105-5116 5120-5122 5126-5128 5132-5137 5141-5143 5147-5152 5159-5167 5171-5191 5195-5200 5207-5212 5219-5221 5225-5227 5237-5242 5246-5248 5261-5269 5276-5278 5282-5284 5288-5308 5312-5320 5324-5329 5345-5350 5354-5356 5360-5365 5381-5383 5402-5404 5408-5413 5417-5419 5426-5428 5432-5434 5441-5443 5453-5458 5486-5488 5501-5503 5507-5509 5516-5521 5534-5545 5549-5554 5558-5560 5567-5569 5579-5581 5588-5608 5612-5617 5642-5644 5648-5650 5669-5671 5675-5686 5693-5695 5705-5707 5720-5722 5729-5734 5741-5743 5747-5770 5774-5776 5786-5794 5807-5809 5819-5824 5834-5836 5840-5842 5846-5848 5855-5860 5867-5869 5876-5878 5882-5884 5891-5917 5927-5938 5942-5962 5966-5971 5978-5980 5984-5986 5990-5998');
640 is(join(' ', $hsp->seq_inds('query', 'gaps',1)), '109 328');
641 is(join(' ', $hsp->seq_inds('hit', 'gaps',1)), '5077 5170 5368 5863 6001');
642 is($hsp->ambiguous_seq_inds, 'subject');
647 last if( $count++ > @valid );
652 $searchio = Bio::SearchIO->new('-format' => 'blast',
653 '-file' => test_input_file('dnaEbsub_ecoli.wutblastx'));
655 $result = $searchio->next_result;
656 is($result->database_name, 'ecoli.nt');
657 is($result->database_letters, 4662239);
658 is($result->database_entries, 400);
659 is($result->algorithm, 'TBLASTX');
660 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
661 is($result->query_name, 'gi|142864|gb|M10040.1|BACDNAE');
662 is($result->query_description, 'B.subtilis dnaE gene encoding DNA primase, complete cds');
663 is($result->query_accession, 'M10040.1');
664 is($result->query_gi, 142864);
665 is($result->query_length, 2001);
666 is($result->get_parameter('matrix'), 'blosum62');
668 is($result->get_statistic('lambda'), 0.318);
669 is($result->get_statistic('kappa'), 0.135);
670 is($result->get_statistic('entropy'),0.401 );
671 is($result->get_statistic('dbentries'), 400);
673 @valid = ( [ 'gi|1789441|gb|AE000388.1|AE000388', 10334, 'AE000388', '6.4e-70', 318, 148.6],
674 [ 'gi|2367383|gb|AE000509.1|AE000509', 10589, 'AE000509', 1, 59, 29.9]
678 while( my $hit = $result->next_hit ) {
679 my $d = shift @valid;
680 is($hit->name, shift @$d);
681 is($hit->length, shift @$d);
682 is($hit->accession, shift @$d);
683 # using e here to deal with 0.9992 coming out right here as well
684 float_is($hit->significance, shift @$d);
685 is($hit->raw_score, shift @$d );
686 is($hit->bits, shift @$d );
689 while( my $hsp = $hit->next_hsp ) {
691 if( $hspcounter == 3 ) {
692 # let's actually look at the 3rd HSP
693 is($hsp->query->start, 441);
694 is($hsp->query->end, 617);
695 is($hsp->query->strand, 1);
696 is($hsp->hit->start, 5192);
697 is($hsp->hit->end, 5368);
698 is($hsp->hit->strand, 1);
699 is($hsp->length('total'), 59);
700 float_is($hsp->evalue, 6.4e-70);
701 float_is($hsp->pvalue,6.4e-70);
704 is(sprintf("%.2f",$hsp->percent_identity), '32.20');
705 is(sprintf("%.3f",$hsp->frac_identical('hit')), 0.322);
706 is(sprintf("%.3f",$hsp->frac_identical('query')), 0.322);
707 is(sprintf("%.4f",$hsp->frac_conserved('hsp')), 0.4746);
708 is($hsp->query->frame(), 2);
709 is($hsp->hit->frame(), 1);
710 is($hsp->gaps('query'), 0);
711 is($hsp->gaps('hit'), 0);
713 is($hsp->query_string, 'ALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGY');
714 is($hsp->hit_string, 'ARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY');
715 is($hsp->homology_string, 'A YL RG + E+I F IG+A WD + K + + AG+L+ + G Y');
716 is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '444-449 456-461 468-470 474-476 486-491 495-497 510-518 525-527 531-533 537-557 561-569 573-578 594-599 603-605 609-614');
717 is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '5195-5200 5207-5212 5219-5221 5225-5227 5237-5242 5246-5248 5261-5269 5276-5278 5282-5284 5288-5308 5312-5320 5324-5329 5345-5350 5354-5356 5360-5365');
718 is($hsp->ambiguous_seq_inds, 'query/subject');
724 elsif( $count == 1 ) {
726 while( my $hsp = $hit->next_hsp ) {
727 is($hsp->query->start, 587);
728 is($hsp->query->end, 706);
729 is($hsp->query->strand, -1);
730 is($hsp->hit->start, 4108);
731 is($hsp->hit->end, 4227);
732 is($hsp->hit->strand, -1);
733 is($hsp->length('total'), 40);
734 float_is($hsp->evalue, 7.1);
735 float_is($hsp->pvalue , '1.00');
738 is(sprintf("%.2f",$hsp->percent_identity), '37.50');
739 is(sprintf("%.4f",$hsp->frac_identical('hit')), '0.3750');
740 is(sprintf("%.4f",$hsp->frac_identical('query')), '0.3750');
741 is(sprintf("%.4f",$hsp->frac_conserved('hsp')), '0.4750');
742 is($hsp->query->frame(), 2);
743 is($hsp->hit->frame(), 2);
744 is($hsp->gaps('query'), 0);
745 is($hsp->gaps('hit'), 0);
747 is($hsp->query_string, 'WLPRALPEKATTAP**SWIGNMTRFLKRSKYPLPSSRLIR');
748 is($hsp->hit_string, 'WLSRTTVGSSTVSPRTFWITRMKVKLSSSKVTLPSTKSTR');
749 is($hsp->homology_string, 'WL R +T +P WI M L SK LPS++ R');
755 last if( $count++ > @valid );
759 # WU-BLAST -echofilter option test (Bug 2388)
760 $searchio = Bio::SearchIO->new('-format' => 'blast',
761 '-file' => test_input_file('echofilter.wublastn'));
763 $result = $searchio->next_result;
765 is($result->database_name, 'NM_003201.fa');
766 is($result->database_letters, 1936);
767 is($result->database_entries, 1);
768 is($result->algorithm, 'BLASTN');
769 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
770 like($result->query_name, qr/ref|NM_003201.1| Homo sapiens transcription factor A, mitochondrial \(TFAM\), mRNA/);
771 is($result->query_accession, 'NM_003201.1');
773 is($result->query_length, 1936);
774 is($result->get_statistic('lambda'), 0.192);
775 is($result->get_statistic('kappa'), 0.182);
776 is($result->get_statistic('entropy'), 0.357);
777 is($result->get_statistic('dbletters'), 1936);
778 is($result->get_statistic('dbentries'), 1);
779 is($result->get_parameter('matrix'), '+5,-4');
781 @valid = ( [ 'ref|NM_003201.1|', 1936, 'NM_003201', '0', 9680],);
783 while( $hit = $result->next_hit ) {
784 my $d = shift @valid;
786 is($hit->name, shift @$d);
787 is($hit->length, shift @$d);
788 is($hit->accession, shift @$d);
789 float_is($hit->significance, shift @$d);
790 is($hit->raw_score, shift @$d );
794 while( my $hsp = $hit->next_hsp ) {
795 is($hsp->query->start, 1);
796 is($hsp->query->end, 1936);
797 is($hsp->hit->start, 1);
798 is($hsp->hit->end, 1936);
799 is($hsp->length('total'), 1936);
801 float_is($hsp->evalue, 0.);
802 float_is($hsp->pvalue , '0.');
803 is($hsp->score, 9680);
804 is($hsp->bits,1458.4);
805 is($hsp->percent_identity, 100);
806 is($hsp->frac_identical('query'), 1.00);
807 is($hsp->frac_identical('hit'), 1.00);
813 last if( $count++ > @valid );
818 # Do a multiblast report test
819 $searchio = Bio::SearchIO->new('-format' => 'blast',
820 '-file' => test_input_file('multi_blast.bls'));
822 @expected = qw(CATH_RAT CATL_HUMAN CATL_RAT PAPA_CARPA);
823 my $results_left = 4;
824 while( my $result = $searchio->next_result ) {
825 is($result->query_name, shift @expected, "Multiblast query test");
828 is($results_left, 0);
830 # Test GCGBlast parsing
832 $searchio = Bio::SearchIO->new('-format' => 'blast',
833 '-file' => test_input_file('test.gcgblast'));
834 $result = $searchio->next_result();
836 is($result->query_name, '/v0/people/staji002/test.gcg');
837 is($result->algorithm, 'BLASTP');
838 is($result->algorithm_version, '2.2.1 [Apr-13-2001]');
839 is($result->database_name, 'pir');
840 is($result->database_entries, 274514);
841 is($result->database_letters, 93460074);
843 $hit = $result->next_hit;
844 is($hit->description, 'F22B7.10 protein - Caenorhabditis elegans');
845 is($hit->name, 'PIR2:S44629');
846 is($hit->length, 628);
847 is($hit->accession, 'PIR2:S44629');
848 float_is($hit->significance, 2e-08);
849 is($hit->raw_score, 136 );
850 is($hit->bits, '57.0' );
851 $hsp = $hit->next_hsp;
852 float_is($hsp->evalue, 2e-08);
853 is($hsp->bits, '57.0');
854 is($hsp->score, 136);
855 is(int($hsp->percent_identity), 28);
856 is(sprintf("%.2f",$hsp->frac_identical('query')), 0.29);
857 is($hsp->frac_conserved('total'), 69/135);
858 is($hsp->gaps('total'), 8);
859 is($hsp->gaps('hit'), 6);
860 is($hsp->gaps('query'), 2);
862 is($hsp->hit->start, 342);
863 is($hsp->hit->end, 470);
864 is($hsp->query->start, 3);
865 is($hsp->query->end, 135);
867 is($hsp->query_string, 'CAAEFDFMEKETPLRYTKTXXXXXXXXXXXXXXRKIISDMWGVLAKQQTHVRKHQFDHGELVYHALQLLAYTALGILIMRLKLFLTPYMCVMASLICSRQLFGW--LFCKVHPGAIVFVILAAMSIQGSANLQTQ');
868 is($hsp->hit_string, 'CSAEFDFIQYSTIEKLCGTLLIPLALISLVTFVFNFVKNT-NLLWRNSEEIG----ENGEILYNVVQLCCSTVMAFLIMRLKLFMTPHLCIVAALFANSKLLGGDRISKTIRVSALVGVI-AILFYRGIPNIRQQ');
869 is($hsp->homology_string, 'C+AEFDF++ T + T + + +L + + ++GE++Y+ +QL T + LIMRLKLF+TP++C++A+L + +L G + + A+V VI A + +G N++ Q');
871 #test all the database accession number formats
872 $searchio = Bio::SearchIO->new(-format => 'blast',
873 -file => test_input_file('testdbaccnums.out'));
874 $result = $searchio->next_result;
876 @valid = ( ['pir||T14789','T14789','T14789','CAB53709','AAH01726'],
877 ['gb|NP_065733.1|CYT19', 'NP_065733','CYT19'],
878 ['emb|XP_053690.4|Cyt19','XP_053690'],
879 ['dbj|NP_056277.2|DKFZP586L0724','NP_056277'],
880 ['prf||XP_064862.2','XP_064862'],
881 ['pdb|BAB13968.1|1','BAB13968'],
882 ['sp|Q16478|GLK5_HUMAN','Q16478'],
883 ['pat|US|NP_002079.2','NP_002079'],
884 ['bbs|NP_079463.2|','NP_079463'],
885 ['gnl|db1|NP_002444.1','NP_002444'],
886 ['ref|XP_051877.1|','XP_051877'],
887 ['lcl|AAH16829.1|','AAH16829'],
888 ['gi|1|gb|NP_065733.1|CYT19','NP_065733'],
889 ['gi|2|emb|XP_053690.4|Cyt19','XP_053690'],
890 ['gi|3|dbj|NP_056277.2|DKFZP586L0724','NP_056277'],
891 ['gi|4|pir||T14789','T14789'],
892 ['gi|5|prf||XP_064862.2','XP_064862'],
893 ['gi|6|pdb|BAB13968.1|1','BAB13968'],
894 ['gi|7|sp|Q16478|GLK5_HUMAN','Q16478'],
895 ['gi|8|pat|US|NP_002079.2','NP_002079'],
896 ['gi|9|bbs|NP_079463.2|','NP_079463'],
897 ['gi|10|gnl|db1|NP_002444.1','NP_002444'],
898 ['gi|11|ref|XP_051877.1|','XP_051877'],
899 ['gi|12|lcl|AAH16829.1|','AAH16829'],
900 ['MY_test_ID','MY_test_ID']
903 $hit = $result->next_hit;
904 my $d = shift @valid;
905 is($hit->name, shift @$d);
906 is($hit->accession, shift @$d);
907 my @accnums = $hit->each_accession_number;
908 foreach my $a (@accnums) {
912 $hit = $result->next_hit;
913 is($hit->name, shift @$d);
914 is($hit->accession, shift @$d);
915 is($hit->locus, shift @$d);
918 while( $hit = $result->next_hit ) {
919 my $d = shift @valid;
920 is($hit->name, shift @$d);
921 is($hit->accession, shift @$d);
928 # parse the BLAST-like output
929 my $infile = test_input_file('503384.MEGABLAST.2');
930 my $in = Bio::SearchIO->new(-file => $infile,
931 -format => 'blast'); # this is megablast blast-like output
932 my $r = $in->next_result;
933 my @dcompare = ( ['Contig3700', 5631, 396, 785,'0.0', 785, '0.0', 396, 639, 12,
934 8723,9434, 1, 4083, 4794, -1],
935 ['Contig3997', 12734, 335, 664,'0.0', 664, '0.0', 335, 401, 0,
936 1282, 1704, 1, 1546, 1968,-1 ],
937 ['Contig634', 858, 245, 486,'1e-136', 486, '1e-136', 245, 304, 3,
938 7620, 7941, 1, 1, 321, -1],
939 ['Contig1853', 2314, 171, 339,'1e-91',339, '1e-91', 171, 204, 0,
940 6406, 6620, 1, 1691, 1905, 1]
943 is($r->algorithm, 'MEGABLAST');
944 is($r->query_name, '503384');
945 is($r->query_description, '11337 bp 2 contigs');
946 is($r->query_length, 11337);
947 is($r->database_name, 'cneoA.nt ');
948 is($r->database_letters, 17206226);
949 is($r->database_entries, 4935);
952 while( my $hit = $r->next_hit ) {
953 my $d = shift @dcompare;
954 is($hit->name, shift @$d);
955 is($hit->length, shift @$d);
956 is($hit->raw_score, shift @$d);
957 is($hit->bits, shift @$d);
958 float_is($hit->significance, shift @$d);
960 my $hsp = $hit->next_hsp;
961 is($hsp->bits, shift @$d);
962 float_is($hsp->evalue, shift @$d);
963 is($hsp->score, shift @$d);
964 is($hsp->num_identical, shift @$d);
965 is($hsp->gaps('total'), shift @$d);
966 is($hsp->query->start, shift @$d);
967 is($hsp->query->end, shift @$d);
968 is($hsp->query->strand, shift @$d);
969 is($hsp->hit->start, shift @$d);
970 is($hsp->hit->end, shift @$d);
971 is($hsp->hit->strand, shift @$d);
976 # Let's test RPS-BLAST
978 my $parser = Bio::SearchIO->new(-format => 'blast',
979 -file => test_input_file('ecoli_domains.rpsblast'));
981 $r = $parser->next_result;
982 is($r->query_name, 'gi|1786183|gb|AAC73113.1|');
983 is($r->query_gi, 1786183);
986 is($hit->name, 'gnl|CDD|3919');
987 float_is($hit->significance, 0.064);
988 is($hit->bits, 28.3);
989 is($hit->raw_score, 63);
990 $hsp = $hit->next_hsp;
991 is($hsp->query->start, 599);
992 is($hsp->query->end,655);
993 is($hsp->hit->start,23);
994 is($hsp->hit->end,76);
997 # Test PSI-BLAST parsing
999 $searchio = Bio::SearchIO->new('-format' => 'blast',
1000 '-file' => test_input_file('psiblastreport.out'));
1002 $result = $searchio->next_result;
1004 is($result->database_name, '/home/peter/blast/data/swissprot.pr');
1005 is($result->database_entries, 88780);
1006 is($result->database_letters, 31984247);
1008 is($result->algorithm, 'BLASTP');
1009 like($result->algorithm_version, qr/^2\.0\.14/);
1010 is($result->query_name, 'CYS1_DICDI');
1011 is($result->query_length, 343);
1012 is($result->get_statistic('kappa') , 0.0491);
1013 cmp_ok($result->get_statistic('lambda'), '==', 0.270);
1014 cmp_ok($result->get_statistic('entropy'), '==', 0.230);
1015 is($result->get_statistic('dbletters'), 31984247);
1016 is($result->get_statistic('dbentries'), 88780);
1017 is($result->get_statistic('effective_hsplength'), 49);
1018 is($result->get_statistic('effectivespace'), 8124403938);
1019 is($result->get_parameter('matrix'), 'BLOSUM62');
1020 is($result->get_parameter('gapopen'), 11);
1021 is($result->get_parameter('gapext'), 1);
1023 my @valid_hit_data = ( [ 'sp|P04988|CYS1_DICDI', 343, 'P04988', '0', 721],
1024 [ 'sp|P43295|A494_ARATH', 313, 'P43295', '1e-75', 281],
1025 [ 'sp|P25804|CYSP_PEA', 363, 'P25804', '1e-74', 278]);
1026 my @valid_iter_data = ( [ 127, 127, 0, 109, 18, 0, 0, 0, 0],
1027 [ 157, 40, 117, 2, 38, 0, 109, 3, 5]);
1029 while( $iter = $result->next_iteration ) {
1031 my $di = shift @valid_iter_data;
1032 is($iter->number, $iter_count);
1034 is($iter->num_hits, shift @$di);
1035 is($iter->num_hits_new, shift @$di);
1036 is($iter->num_hits_old, shift @$di);
1037 is(scalar($iter->newhits_below_threshold), shift @$di);
1038 is(scalar($iter->newhits_not_below_threshold), shift @$di);
1039 is(scalar($iter->newhits_unclassified), shift @$di);
1040 is(scalar($iter->oldhits_below_threshold), shift @$di);
1041 is(scalar($iter->oldhits_newly_below_threshold), shift @$di);
1042 is(scalar($iter->oldhits_not_below_threshold), shift @$di);
1045 if ($iter_count == 1) {
1046 while( $hit = $result->next_hit ) {
1047 my $d = shift @valid_hit_data;
1049 is($hit->name, shift @$d);
1050 is($hit->length, shift @$d);
1051 is($hit->accession, shift @$d);
1052 float_is($hit->significance, shift @$d);
1053 is($hit->bits, shift @$d );
1055 if( $hit_count == 1 ) {
1057 while( my $hsp = $hit->next_hsp ){
1058 is($hsp->query->start, 32);
1059 is($hsp->query->end, 340);
1060 is($hsp->hit->start, 3);
1061 is($hsp->hit->end, 307);
1062 is($hsp->length('total'), 316);
1063 is($hsp->start('hit'), $hsp->hit->start);
1064 is($hsp->end('query'), $hsp->query->end);
1065 is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
1066 float_is($hsp->evalue, 1e-75);
1067 is($hsp->score, 712);
1068 is($hsp->bits, 281);
1069 is(sprintf("%.1f",$hsp->percent_identity), 46.5);
1070 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.4757);
1071 is(sprintf("%.3f",$hsp->frac_identical('hit')), 0.482);
1077 last if( $hit_count++ > @valid_hit_data );
1081 is(@valid_hit_data, 0);
1082 is(@valid_iter_data, 0);
1086 $searchio = Bio::SearchIO->new( '-format' => 'blast',
1087 '-file' => test_input_file('ecolitst.bls'),
1088 '-signif' => 1e-100);
1090 @valid = qw(gb|AAC73113.1|);
1091 $r = $searchio->next_result;
1093 while( my $hit = $r->next_hit ) {
1094 is($hit->name, shift @valid);
1097 $searchio = Bio::SearchIO->new( '-format' => 'blast',
1098 '-file' => test_input_file('ecolitst.bls'),
1101 @valid = qw(gb|AAC73113.1| gb|AAC76922.1| gb|AAC76994.1|);
1102 $r = $searchio->next_result;
1104 while( my $hit = $r->next_hit ) {
1105 is($hit->name, shift @valid);
1109 $searchio = Bio::SearchIO->new( '-format' => 'blast',
1110 '-file' => test_input_file('ecolitst.bls'),
1113 @valid = qw(gb|AAC73113.1| gb|AAC76922.1|);
1114 $r = $searchio->next_result;
1116 while( my $hit = $r->next_hit ) {
1117 is($hit->name, shift @valid);
1122 my $filt_func = sub{ my $hit=shift;
1123 $hit->frac_identical('query') >= 0.31
1126 $searchio = Bio::SearchIO->new( '-format' => 'blast',
1127 '-file' => test_input_file('ecolitst.bls'),
1128 '-hit_filter' => $filt_func);
1130 @valid = qw(gb|AAC73113.1| gb|AAC76994.1|);
1131 $r = $searchio->next_result;
1133 while( my $hit = $r->next_hit ) {
1134 is($hit->name, shift @valid);
1141 # bl2seq parsing testing
1143 # this is blastp bl2seq
1144 $searchio = Bio::SearchIO->new(-format => 'blast',
1145 -file => test_input_file('bl2seq.out'));
1146 $result = $searchio->next_result;
1147 isa_ok($result,'Bio::Search::Result::ResultI');
1148 is($result->query_name, '');
1149 is($result->algorithm, 'BLASTP');
1150 $hit = $result->next_hit;
1151 is($hit->name, 'ALEU_HORVU');
1152 is($hit->length, 362);
1153 $hsp = $hit->next_hsp;
1154 is($hsp->score, 481);
1155 is($hsp->bits, 191);
1156 is(int $hsp->percent_identity, 34);
1157 float_is($hsp->evalue, 2e-53);
1158 is(int($hsp->frac_conserved*$hsp->length), 167);
1159 is($hsp->query->start, 28);
1160 is($hsp->query->end, 343);
1161 is($hsp->hit->start, 60);
1162 is($hsp->hit->end,360);
1165 # this is blastn bl2seq
1166 $searchio = Bio::SearchIO->new(-format => 'blast',
1167 -file => test_input_file('bl2seq.blastn.rev'));
1168 $result = $searchio->next_result;
1169 isa_ok($result,'Bio::Search::Result::ResultI');
1170 is($result->query_name, '');
1171 is($result->algorithm, 'BLASTN');
1172 is($result->query_length, 180);
1173 $hit = $result->next_hit;
1174 is($hit->length, 179);
1175 is($hit->name, 'human');
1176 $hsp = $hit->next_hsp;
1177 is($hsp->score, 27);
1178 is($hsp->bits, '54.0');
1179 is(int $hsp->percent_identity, 88);
1180 float_is($hsp->evalue, 2e-12);
1181 is(int($hsp->frac_conserved*$hsp->length), 83);
1182 is($hsp->query->start, 94);
1183 is($hsp->query->end, 180);
1184 is($hsp->query->strand, 1);
1185 is($hsp->hit->strand, -1);
1186 is($hsp->hit->start, 1);
1187 is($hsp->hit->end,94);
1190 # this is blastn bl2seq
1191 $searchio = Bio::SearchIO->new(-format => 'blast',
1192 -file => test_input_file('bl2seq.blastn'));
1193 $result = $searchio->next_result;
1194 isa_ok($result,'Bio::Search::Result::ResultI');
1195 is($result->query_name, '');
1196 is($result->query_length, 180);
1197 is($result->algorithm, 'BLASTN');
1198 $hit = $result->next_hit;
1199 is($hit->name, 'human');
1200 is($hit->length, 179);
1201 $hsp = $hit->next_hsp;
1202 is($hsp->score, 27);
1203 is($hsp->bits, '54.0');
1204 is(int $hsp->percent_identity, 88);
1205 float_is($hsp->evalue, 2e-12);
1206 is(int($hsp->frac_conserved*$hsp->length), 83);
1207 is($hsp->query->start, 94);
1208 is($hsp->query->end, 180);
1209 is($hsp->query->strand, 1);
1210 is($hsp->hit->strand, 1);
1211 is($hsp->hit->start, 86);
1212 is($hsp->hit->end,179);
1215 # this is blastp bl2seq
1216 $searchio = Bio::SearchIO->new(-format => 'blast',
1217 -file => test_input_file('bl2seq.bug940.out'));
1218 $result = $searchio->next_result;
1219 isa_ok($result,'Bio::Search::Result::ResultI');
1220 is($result->query_name, 'zinc');
1221 is($result->algorithm, 'BLASTP');
1222 is($result->query_description, 'finger protein 135 (clone pHZ-17) [Homo sapiens]. neo_id RS.ctg14243-000000.6.0');
1223 is($result->query_length, 469);
1224 $hit = $result->next_hit;
1225 is($hit->name, 'gi|4507985|');
1226 is($hit->ncbi_gi, 4507985);
1227 is($hit->description,'zinc finger protein 135 (clone pHZ-17) [Homo sapiens]. neo_id RS.ctg14243-000000.6.0');
1228 is($hit->length, 469);
1229 $hsp = $hit->next_hsp;
1230 is($hsp->score, 1626);
1231 is($hsp->bits, 637);
1232 is(int $hsp->percent_identity, 66);
1233 float_is($hsp->evalue, 0.0);
1234 is(int($hsp->frac_conserved*$hsp->length), 330);
1235 is($hsp->query->start, 121);
1236 is($hsp->query->end, 469);
1237 is($hsp->hit->start, 1);
1238 is($hsp->hit->end,469);
1239 is($hsp->gaps, 120);
1240 ok($hit->next_hsp); # there is more than one HSP here,
1241 # make sure it is parsed at least
1243 # cannot distinguish between blastx and tblastn reports
1244 # so we're only testing a blastx report for now
1246 # this is blastx bl2seq
1247 $searchio = Bio::SearchIO->new(-format => 'blast',
1248 -file => test_input_file('bl2seq.blastx.out'));
1249 $result = $searchio->next_result;
1250 isa_ok($result,'Bio::Search::Result::ResultI');
1251 is($result->query_name, 'AE000111.1');
1252 is($result->query_description, 'Escherichia coli K-12 MG1655 section 1 of 400 of the complete genome');
1253 is($result->algorithm, 'BLASTX');
1254 is($result->query_length, 720);
1255 $hit = $result->next_hit;
1256 is($hit->name, 'AK1H_ECOLI');
1257 is($hit->description,'P00561 Bifunctional aspartokinase/homoserine dehydrogenase I (AKI-HDI) [Includes: Aspartokinase I ; Homoserine dehydrogenase I ]');
1258 is($hit->length, 820);
1259 $hsp = $hit->next_hsp;
1260 is($hsp->score, 634);
1261 is($hsp->bits, 248);
1262 is(int $hsp->percent_identity, 100);
1263 float_is($hsp->evalue, 2e-70);
1264 is(int($hsp->frac_conserved*$hsp->length), 128);
1265 is($hsp->query->start, 1);
1266 is($hsp->query->end, 384);
1267 is($hsp->hit->start, 1);
1268 is($hsp->hit->end,128);
1270 is($hsp->query->frame,0);
1271 is($hsp->hit->frame,0);
1272 is($hsp->query->strand,-1);
1273 is($hsp->hit->strand,0);
1275 # this is tblastx bl2seq (self against self)
1276 $searchio = Bio::SearchIO->new(-format => 'blast',
1277 -file => test_input_file('bl2seq.tblastx.out'));
1278 $result = $searchio->next_result;
1279 isa_ok($result,'Bio::Search::Result::ResultI');
1280 is($result->query_name, 'Escherichia');
1281 is($result->algorithm, 'TBLASTX');
1282 is($result->query_description, 'coli K-12 MG1655 section 1 of 400 of the complete genome');
1283 is($result->query_length, 720);
1284 $hit = $result->next_hit;
1285 is($hit->name, 'gi|1786181|gb|AE000111.1|AE000111');
1286 is($hit->ncbi_gi, 1786181);
1287 is($hit->description,'Escherichia coli K-12 MG1655 section 1 of 400 of the complete genome');
1288 is($hit->length, 720);
1289 $hsp = $hit->next_hsp;
1290 is($hsp->score, 1118);
1291 is($hsp->bits, 515);
1292 is(int $hsp->percent_identity, 95);
1293 float_is($hsp->evalue, 1e-151);
1294 is(int($hsp->frac_conserved*$hsp->length), 229);
1295 is($hsp->query->start, 1);
1296 is($hsp->query->end, 720);
1297 is($hsp->hit->start, 1);
1298 is($hsp->hit->end,720);
1300 is($hsp->query->frame,0);
1301 is($hsp->hit->frame,0);
1302 is($hsp->query->strand,1);
1303 is($hsp->hit->strand,1);
1305 # this is NCBI tblastn
1306 $searchio = Bio::SearchIO->new(-format => 'blast',
1307 -file => test_input_file('tblastn.out'));
1308 $result = $searchio->next_result;
1309 isa_ok($result,'Bio::Search::Result::ResultI');
1310 is($result->algorithm, 'TBLASTN');
1311 $hit = $result->next_hit;
1312 is($hit->name,'gi|10040111|emb|AL390796.6|AL390796');
1314 # Test Blast parsing with B=0 (WU-BLAST)
1315 $searchio = Bio::SearchIO->new(-file => test_input_file('no_hsps.blastp'),
1316 -format => 'blast');
1317 $result = $searchio->next_result;
1318 is($result->query_name, 'mgri:MG00189.3');
1319 $hit = $result->next_hit;
1320 is($hit->name, 'mgri:MG00189.3');
1321 is($hit->description, 'hypothetical protein 6892 8867 +');
1322 is($hit->bits, 3098);
1323 float_is($hit->significance, 0.);
1325 $hit = $result->next_hit;
1326 is($hit->name, 'fgram:FG01141.1');
1327 is($hit->description, 'hypothetical protein 47007 48803 -');
1328 is($hit->bits, 2182);
1329 float_is($hit->significance, 4.2e-226);
1330 is($result->num_hits, 415);
1331 # Let's now test if _guess_format is doing its job correctly
1332 my %pair = ( 'filename.blast' => 'blast',
1333 'filename.bls' => 'blast',
1335 'f.tblx' => 'blast',
1336 'fast.bls' => 'blast',
1337 'f.fasta' => 'fasta',
1341 'f.ssearch' => 'fasta',
1342 'f.SSEARCH.m9' => 'fasta',
1344 'f.psearch' => 'fasta',
1345 'f.osearch' => 'fasta',
1346 'f.exon' => 'exonerate',
1347 'f.exonerate' => 'exonerate',
1348 'f.blastxml' => 'blastxml',
1349 'f.xml' => 'blastxml');
1350 while( my ($file,$expformat) = each %pair ) {
1351 is(Bio::SearchIO->_guess_format($file),$expformat, "$expformat for $file");
1355 # Test Wes Barris's reported bug when parsing blastcl3 output which
1356 # has integer overflow
1358 $searchio = Bio::SearchIO->new(-file => test_input_file('hsinsulin.blastcl3.blastn'),
1359 -format => 'blast');
1360 $result = $searchio->next_result;
1361 is($result->query_name, 'human');
1362 is($result->database_letters(), '-24016349');
1363 # this is of course not the right length, but is the what blastcl3
1364 # reports, the correct value is
1365 is($result->get_statistic('dbletters'),'192913178');
1366 is($result->get_statistic('dbentries'),'1867771');
1369 # test for links and groups being parsed out of WU-BLAST properly
1370 $searchio = Bio::SearchIO->new(-format => 'blast',
1371 -file => test_input_file('brassica_ATH.WUBLASTN'));
1372 ok($result = $searchio->next_result);
1373 ok($hit = $result->next_hit);
1374 ok($hsp = $hit->next_hsp);
1375 is($hsp->links,'(1)-3-2');
1376 is($hsp->query->strand, 1);
1377 is($hsp->hit->strand, 1);
1378 is($hsp->hsp_group, '1');
1380 ## Web blast result parsing
1382 $searchio = Bio::SearchIO->new(-format => 'blast',
1383 -file => test_input_file('catalase-webblast.BLASTP'));
1384 ok($result = $searchio->next_result);
1385 ok($hit = $result->next_hit);
1386 is($hit->name, 'gi|40747822|gb|EAA66978.1|', 'full hit name');
1387 is($hit->accession, 'EAA66978', 'hit accession');
1388 is($hit->ncbi_gi, 40747822);
1389 ok($hsp = $hit->next_hsp);
1390 is($hsp->query->start, 1, 'query start');
1391 is($hsp->query->end, 528, 'query start');
1393 # tests for new BLAST 2.2.13 output
1394 $searchio = Bio::SearchIO->new(-format => 'blast',
1395 -file => test_input_file('new_blastn.txt'));
1397 $result = $searchio->next_result;
1398 is($result->database_name, 'All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS,GSS,environmental samples or phase 0, 1 or 2 HTGS sequences)');
1399 is($result->database_entries, 3742891);
1400 is($result->database_letters, 16670205594);
1401 is($result->algorithm, 'BLASTN');
1402 is($result->algorithm_version, '2.2.13 [Nov-27-2005]');
1403 is($result->query_name, 'pyrR,');
1404 is($result->query_length, 558);
1405 is($result->get_statistic('kappa'), '0.711');
1406 is($result->get_statistic('kappa_gapped'), '0.711');
1407 is($result->get_statistic('lambda'), '1.37');
1408 is($result->get_statistic('lambda_gapped'), '1.37');
1409 is($result->get_statistic('entropy'), '1.31');
1410 is($result->get_statistic('entropy_gapped'), '1.31');
1411 is($result->get_statistic('dbletters'), '-509663586');
1412 is($result->get_statistic('dbentries'), 3742891);
1413 is($result->get_statistic('effective_hsplength'), undef);
1414 is($result->get_statistic('effectivespace'), undef);
1415 is($result->get_parameter('matrix'), 'blastn matrix:1 -3');
1416 is($result->get_parameter('gapopen'), 5);
1417 is($result->get_parameter('gapext'), 2);
1418 is($result->get_statistic('S2'), '60');
1419 is($result->get_statistic('S2_bits'), '119.4');
1420 float_is($result->get_parameter('expect'), '1e-23');
1421 is($result->get_statistic('num_extensions'), '117843');
1423 @valid = ( [ 'gi|41400296|gb|AE016958.1|', 4829781, 'AE016958', 41400296, '6e-059', 119, 236],
1424 [ 'gi|54013472|dbj|AP006618.1|', 6021225, 'AP006618', 54013472, '4e-026', 64, 127],
1425 [ 'gi|57546753|dbj|BA000030.2|', 9025608, 'BA000030', 57546753, '1e-023', 60, 119]);
1427 while( $hit = $result->next_hit ) {
1428 my $d = shift @valid;
1430 is($hit->name, shift @$d);
1431 is($hit->length, shift @$d);
1432 is($hit->accession, shift @$d);
1433 is($hit->ncbi_gi, shift @$d);
1434 float_is($hit->significance, shift @$d);
1435 is($hit->raw_score, shift @$d );
1436 is($hit->bits, shift @$d );
1440 while( my $hsp = $hit->next_hsp ) {
1441 is($hsp->query->start, 262);
1442 is($hsp->query->end, 552);
1443 is($hsp->hit->start, 1166897);
1444 is($hsp->hit->end, 1167187);
1445 is($hsp->length('total'), 291);
1446 is($hsp->hit_features, 'PyrR');
1447 is($hsp->start('hit'), $hsp->hit->start);
1448 is($hsp->end('query'), $hsp->query->end);
1449 is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
1450 float_is($hsp->evalue, 6e-59);
1451 is($hsp->score, 119);
1453 is(sprintf("%.2f",$hsp->percent_identity), 85.22);
1454 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.8522);
1455 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.8522);
1461 last if( $count++ > @valid );
1466 $searchio = Bio::SearchIO->new(-format => 'blast',
1467 -file => test_input_file('blastp2215.blast'));
1469 $result = $searchio->next_result;
1470 is($result->database_entries, 4460989);
1471 is($result->database_letters, 1533424333);
1472 is($result->algorithm, 'BLASTP');
1473 is($result->algorithm_version, '2.2.15 [Oct-15-2006]');
1474 is($result->query_name, 'gi|15608519|ref|NP_215895.1|');
1475 is($result->query_gi, 15608519);
1476 is($result->query_length, 193);
1477 @hits = $result->hits;
1478 is(scalar(@hits), 10);
1479 is($hits[1]->accession,'1W30');
1480 is($hits[4]->significance,'2e-72');
1481 is($hits[7]->bits,'254');
1482 $result = $searchio->next_result;
1483 is($result->database_entries, 4460989);
1484 is($result->database_letters, 1533424333);
1485 is($result->algorithm, 'BLASTP');
1486 is($result->algorithm_version, '2.2.15 [Oct-15-2006]');
1487 is($result->query_name, 'gi|15595598|ref|NP_249092.1|');
1488 is($result->query_length, 423);
1489 @hits = $result->hits;
1490 is(scalar(@hits), 10);
1491 is($hits[1]->accession,'ZP_00972546');
1492 is($hits[2]->ncbi_gi, 116054132);
1493 is($hits[4]->significance, '0.0');
1494 is($hits[7]->bits, 624);
1497 $searchio = Bio::SearchIO->new(-format => 'blast',
1499 -file => test_input_file('bug2246.blast'));
1500 $result = $searchio->next_result;
1501 $hit = $result->next_hit;
1502 is $hit->name, 'UniRef50_Q9X0H5';
1504 is $hit->accession, 'UniRef50_Q9X0H5';
1505 is $hit->description, 'Cluster: Histidyl-tRNA synthetase; n=4; Thermoto...';
1507 float_is($hit->significance, 650);
1510 $searchio = Bio::SearchIO->new(-format => 'blast',
1512 -file => test_input_file('bug1986.blastp'));
1513 $result = $searchio->next_result;
1514 $hit = $result->next_hit;
1515 is $hit->name, 'ENSP00000350182';
1516 is $hit->length, 425;
1517 is $hit->accession, 'ENSP00000350182';
1518 is $hit->description, 'pep:novel clone::BX322644.8:4905:15090:-1 gene:ENSG00000137397 transcript:ENST00000357569 ';
1519 is $hit->raw_score, 301;
1521 float_is($hit->significance, 3e-27);
1522 $hit = $result->next_hit;
1523 is $hit->name, 'ENSP00000327738';
1524 is $hit->length, 468;
1525 is $hit->accession, 'ENSP00000327738';
1526 is $hit->description, 'pep:known-ccds chromosome:NCBI36:4:189297592:189305643:1 gene:ENSG00000184108 transcript:ENST00000332517 CCDS3851.1';
1527 is $hit->raw_score, 289;
1529 float_is($hit->significance, 8e-26);
1533 # handle at least the first iteration with BLAST searches using databases
1534 # containing non-unique IDs
1536 my $file = test_input_file('bug1986.blast2');
1538 open (my $IN,$file) or die $!;
1541 last if (/^Sequences/);
1549 my ($accession) = split(/\s+/);
1550 #print "Real Hit $count = $accession\n";
1551 $unique_accs{$accession}++;
1552 #last if ($count == 10);
1558 is (scalar(keys %unique_accs), 490);
1562 $searchio = Bio::SearchIO->new(-format => 'blast',
1565 $result = $searchio->next_result;
1567 while (my $hit = $result->next_hit) {
1568 $search_accs{$hit->accession}++;
1573 is (scalar(keys %search_accs), 490);
1575 is_deeply(\%unique_accs, \%search_accs);
1577 # bug 2391 - long query names
1579 $file = test_input_file('bug2391.megablast');
1581 $searchio = Bio::SearchIO->new(-format => 'blast',
1583 $result = $searchio->next_result;
1585 # data is getting munged up with long names
1586 is($result->query_name, 'c6_COX;c6_QBL;6|31508172;31503325;31478402|rs36223351|1|dbSNP|C/G');
1587 is($result->query_description, '');
1588 is($result->algorithm, 'MEGABLAST');
1590 # bug 2399 - catching Expect(n) values
1592 $file = test_input_file('bug2399.tblastn');
1594 $searchio = Bio::SearchIO->new(-format => 'blast',
1597 while(my $query = $searchio->next_result) {
1598 while(my $subject = $query->next_hit) {
1599 $total_n += grep{$_->n} $subject->hsps;
1604 sub cmp_evalue ($$) {
1605 my ($tval, $aval) = @_;
1606 is(sprintf("%g",$tval), sprintf("%g",$aval));