1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 1794);
12 use_ok('Bio::SearchIO');
13 use_ok('Bio::SearchIO::Writer::HitTableWriter');
14 use_ok('Bio::SearchIO::Writer::HTMLResultWriter');
17 my ($searchio, $result,$iter,$hit,$hsp);
20 # XML encoding/decoding done within XML::SAX now, though some parsers
21 # do not work properly (XML::SAX::PurePerl, XML::LibXML::SAX)
22 test_skip(-tests => 129, -requires_module => 'XML::SAX');
25 # test with RPSBLAST data first
26 # this needs to be eval'd b/c the XML::SAX parser object is
27 # instantiated in the constructor
28 $searchio = Bio::SearchIO->new('-tempfile' => 1,
29 '-format' => 'blastxml',
30 '-file' => test_input_file('ecoli_domains.rps.xml'),
31 '-blasttype' => 'blast',
33 # PurePerl works with these BLAST reports, so removed verbose promotion
34 $result = $searchio->next_result;
35 die if !defined $result;
37 if ($@ && $@ =~ m{Handler could not resolve external entity}) {
38 skip("XML::SAX::Expat does not work with XML tests; skipping",129);
40 skip("Problem with XML::SAX setup: $@. Check ParserDetails.ini; skipping XML tests",129);
43 isa_ok($result, 'Bio::Search::Result::ResultI');
44 is($result->database_name, '/data_2/jason/db/cdd/cdd/Pfam', 'database_name()');
45 is($result->query_name,'gi|1786182|gb|AAC73112.1|','query_name()');
46 is($result->query_description, '(AE000111) thr operon leader peptide [Escherichia coli]');
47 is($result->query_accession, 'AAC73112.1');
48 is($result->query_gi, 1786182);
49 is($result->query_length, 21);
50 is($result->algorithm, 'BLASTP');
51 is($result->algorithm_version, 'blastp 2.1.3 [Apr-1-2001]');
53 is($result->available_parameters, 8);
54 is($result->get_parameter('gapext'), 1);
55 is($result->available_statistics, 5);
56 is($result->get_statistic('lambda'), 0.267);
58 # this result actually has a hit
59 $result = $searchio->next_result;
60 $hit = $result->next_hit;
61 is($hit->name, 'gnl|Pfam|pfam00742');
62 is($hit->description(), 'HomoS_dh, HomoS dehydrogenase');
63 is($hit->accession, 'pfam00742');
64 is($hit->ncbi_gi, ''); # not found
65 is($hit->length, 310);
67 $hsp = $hit->next_hsp;
68 is($hsp->query->seq_id, $result->query_name,'query name on HSP');
69 is($hsp->query->seqdesc, $result->query_description,'query desc on HSP');
70 is($hsp->hit->seq_id, $hit->name,'hitname');
71 is($hsp->hit->seqdesc, $hit->description,'hitdesc');
72 is($hsp->pvalue, undef);
73 is(sprintf("%g",$hsp->evalue), sprintf("%g",'1.46134e-90'));
75 is($hsp->bits,327.405);
76 is($hsp->query->start, 498);
77 is($hsp->query->end,815);
78 is($hsp->hit->start, 3);
79 is($hsp->hit->end, 310);
80 is($hsp->query->frame,0);
81 is($hsp->hit->frame,0);
82 is(sprintf("%.2f", $hsp->percent_identity), 37.73);
83 is(sprintf("%.4f", $hsp->frac_identical('hit')), 0.3994);
84 is(sprintf("%.4f", $hsp->frac_identical('query')), 0.3868);
85 is(sprintf("%.4f",$hsp->query->frac_identical), 0.3868);
87 is(sprintf("%.4f",$hsp->frac_conserved('total')),0.5245);
88 is(sprintf("%.4f",$hsp->frac_conserved('hit')),0.5552);
89 is(sprintf("%.4f",$hsp->frac_conserved('query')),0.5377);
90 is($hsp->gaps('total'), 26);
91 is($hsp->length('hsp'), 326);
92 is($hsp->query_string, 'LRVCGVANSKALLTNVHGLNLENWQEELAQAKEPF-NLGRLIRLVKEYHLLN----PVIVDCTSSQAVAD-QYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDE-GMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARET-GRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLS');
93 is($hsp->hit_string, 'GVVTGITDSREMLLSRIGLPLEIWKVALRDLEKPRKDLGKLDLTDDAFAVVDDPDIDVVVELTGGIEVARELYLDALEEGKHVVTANKALNASHGDEYLAL---AEKSGVDVLYEAAVAGGIPIIKTLRELLATGDRILKIEGIFNGTTNFILSEMDEKGLPFSDVLAEAQELGYTEADPRDDVEGIDAARKLAILARIAFGIELELDDVYVEGISPITAEDISSADEFGYTLKLLDEAMRQRVEDAESGGEVLRYPTLIPE-------------DHPLASVKGSDNAVAVEGEAYG--PLMFYGPGAGAEPTASAVVADIVRIAR');
94 is($hsp->homology_string, ' V G+ +S+ +L + GL LE W+ L ++P +LG+L + +++ V+V+ T VA Y D L EG HVVT NK N S D Y L AEKS LY+ V G+P+I+ L+ LL GD ++K GI +G+ ++I ++DE G+ FS+ A+E+GYTE DPRDD+ G+D ARKL ILAR G ELEL D+ +E + P F L LD+ RV A G+VLRY I E + PL VK +NA+A Y PL+ G GAG + TA+ V AD++R ');
95 is(join(' ', $hsp->seq_inds('query', 'gap',1)), '532 548 562 649 690');
96 is($hsp->ambiguous_seq_inds, '');
99 $hit = $result->next_hit;
100 isa_ok($hit,'Bio::Search::Hit::HitI');
102 my $results_left = 8;
103 while( $result = $searchio->next_result ) { ok($result); $results_left--; }
104 is($results_left, 0);
107 $searchio = Bio::SearchIO->new(-format => 'blastxml',
109 -file => test_input_file('plague_yeast.bls.xml'));
111 $result = $searchio->next_result;
113 is($result->database_name, 'yeast.aa');
114 is($result->query_name, 'gi|5763811|emb|CAB53164.1|');
115 is($result->query_description, 'putative transposase [Yersinia pestis]');
116 is($result->query_accession, 'CAB53164.1');
117 is($result->query_gi, 5763811);
118 is($result->query_length, 340);
120 $hit = $result->next_hit;
123 $searchio = Bio::SearchIO->new(-format => 'blastxml',
125 -file => test_input_file('mus.bls.xml'));
127 $result = $searchio->next_result;
129 is($result->database_name,'Hs15_up1000');
130 is($result->query_name,'NM_011441_up_1000_chr1_4505586_r');
131 is($result->query_description,'chr1:4505586-4506585');
132 is($result->query_accession,'NM_011441_up_1000_chr1_4505586_r');
133 is($result->query_gi, '');
134 is($result->query_length,'1000');
135 $hit = $result->next_hit;
136 is($hit->name,'NM_001938_up_1000_chr1_93161154_f');
137 is($hit->description,'chr1:93161154-93162153');
138 is($hit->ncbi_gi, ''); # none reported
139 is($hit->accession,'3153');
140 is($hit->length,'1000');
142 # deal with new BLAST XML changes
143 $searchio = Bio::SearchIO->new(-format => 'blastxml',
145 -file => test_input_file('newblast.xml'));
147 $result = $searchio->next_result;
149 is($result->database_name,'nr');
150 is($result->database_name,'nr');
151 is($result->database_letters,'1479795817');
152 is($result->database_entries,'4299737');
153 is($result->algorithm,'BLASTP');
154 is($result->algorithm_version,'BLASTP 2.2.15 [Oct-15-2006]');
156 # some XML::SAX parsers (PurePerl, XML::SAX::LibXML) don't decode entities
157 # properly, not fixable using decode_entities()
158 like($result->algorithm_reference, qr{Nucleic Acids Res} );
159 is($result->available_parameters,4);
160 is($result->available_statistics,5);
161 is($result->query_name,'gi|15600734|ref|NP_254228.1|');
162 is($result->query_description,'dihydroorotase [Pseudomonas aeruginosa PAO1]');
163 is($result->query_accession,'NP_254228.1');
164 is($result->query_gi, 15600734);
165 is($result->query_length,'445');
166 $hit = $result->next_hit;
167 is($hit->name,'gi|15600734|ref|NP_254228.1|');
168 is($hit->description,'dihydroorotase [Pseudomonas aeruginosa PAO1] '.
169 '>gi|107104643|ref|ZP_01368561.1| hypothetical protein PaerPA_01005722 '.
170 '[Pseudomonas aeruginosa PACS2] >gi|9951880|gb|AAG08926.1|AE004966_8 '.
171 'dihydroorotase [Pseudomonas aeruginosa PAO1]');
172 is($hit->accession,'NP_254228');
173 is($hit->length,'445');
174 $hsp = $hit->next_hsp;
175 is($hsp->query->seq_id, $result->query_name,'query name on HSP');
176 is($hsp->query->seqdesc, $result->query_description,'query desc on HSP');
177 is($hsp->hit->seq_id, $hit->name,'hitname');
178 is($hsp->hit->seqdesc, $hit->description,'hitdesc');
179 is($hsp->pvalue, undef);
180 is(sprintf("%g",$hsp->evalue), sprintf("%g",'0'));
181 is($hsp->score, 2251);
182 is($hsp->bits,871.692);
183 is($hsp->query->start, 1);
184 is($hsp->query->end,445);
185 is($hsp->hit->start, 1);
186 is($hsp->hit->end, 445);
187 is($hsp->query->frame,0);
188 is($hsp->hit->frame,0);
190 $result = $searchio->next_result;
192 is($result->database_name,'nr');
193 is($result->database_letters,'1479795817');
194 is($result->database_entries,'4299737');
195 is($result->algorithm,'BLASTP');
196 is($result->algorithm_version,'BLASTP 2.2.15 [Oct-15-2006]');
197 like($result->algorithm_reference, qr{Nucleic Acids Res} );
198 is($result->available_parameters,4);
199 is($result->available_statistics,5);
200 is($result->query_name,'gi|15598723|ref|NP_252217.1|');
201 is($result->query_description,'dihydroorotase [Pseudomonas aeruginosa PAO1]');
202 is($result->query_accession,'NP_252217.1');
203 is($result->query_gi, 15598723);
204 is($result->query_length,'348');
205 $hit = $result->next_hit;
206 is($hit->name,'gi|15598723|ref|NP_252217.1|');
207 is($hit->description,'dihydroorotase [Pseudomonas aeruginosa PAO1] '.
208 '>gi|6226683|sp|P72170|PYRC_PSEAE Dihydroorotase (DHOase) '.
209 '>gi|9949676|gb|AAG06915.1|AE004773_4 dihydroorotase [Pseudomonas aeruginosa PAO1] '.
210 '>gi|3868712|gb|AAC73109.1| dihydroorotase [Pseudomonas aeruginosa]');
211 is($hit->ncbi_gi, 15598723);
212 is($hit->accession,'NP_252217');
213 is($hit->length,'348');
214 $hsp = $hit->next_hsp;
215 is($hsp->query->seq_id, $result->query_name,'query name on HSP');
216 is($hsp->query->seqdesc, $result->query_description,'query desc on HSP');
217 is($hsp->hit->seq_id, $hit->name,'hitname');
218 is($hsp->hit->seqdesc, $hit->description,'hitdesc');
219 is($hsp->pvalue, undef);
220 is(sprintf("%g",$hsp->evalue), sprintf("%g",'0'));
221 is($hsp->score, 1780);
222 is($hsp->bits,690.263);
223 is($hsp->query->start, 1);
224 is($hsp->query->end,348);
225 is($hsp->hit->start, 1);
226 is($hsp->hit->end, 348);
227 is($hsp->query->frame,0);
228 is($hsp->hit->frame,0);
230 # PSIBLAST XML parsing
232 $searchio = Bio::SearchIO->new('-tempfile' => 1,
233 '-format' => 'blastxml',
234 '-file' => test_input_file('psiblast.xml'),
235 '-blasttype' => 'psiblast');
237 my $result = $searchio->next_result;
238 is($result->database_name, 'AL591824.faa');
239 is($result->database_entries, 2846);
240 is($result->database_letters, 870878);
241 is($result->algorithm, 'BLASTP');
242 like($result->algorithm_version, qr/2\.2\.16/);
243 is($result->query_name, 'gi|1373160|gb|AAB57770.1|');
244 is($result->query_accession, 'AAB57770.1');
245 is($result->query_gi, '1373160');
246 is($result->query_length, 173);
247 is($result->get_statistic('kappa') , 0.0475563);
248 cmp_ok($result->get_statistic('lambda'), '==', 0.267);
249 cmp_ok($result->get_statistic('entropy'), '==', 0.14);
250 #is($result->get_statistic('dbletters'), 31984247);
251 #is($result->get_statistic('dbentries'), 88780);
252 #is($result->get_statistic('effective_hsplength'), 49);
253 is($result->get_statistic('effectivespace'), '6.44279e+07');
254 is($result->get_parameter('matrix'), 'BLOSUM62');
255 is($result->get_parameter('gapopen'), 11);
256 is($result->get_parameter('gapext'), 1);
259 my @valid_hit_data = ( [ 'gi|16411294|emb|CAC99918.1|', 183, 'CAC99918', 16411294, '4.5377e-56', 209.92],
260 [ 'gi|16409584|emb|CAD00746.1|', 648, 'CAD00746', 16409584, '0.000286309', 37.7354],
261 [ 'gi|16411285|emb|CAC99909.1|', 209, 'CAC99909', 16411285, '0.107059', 29.261]);
262 my @valid_iter_data = ( [ 16, 16, 0, 2, 14, 0, 0, 0, 0],
263 [ 16, 8, 8, 0, 8, 0, 2, 0, 6]);
265 while (my $iter = $result->next_iteration) {
267 my $di = shift @valid_iter_data;
268 is($iter->number, $iter_count);
269 is($iter->num_hits, shift @$di);
270 is($iter->num_hits_new, shift @$di);
271 is($iter->num_hits_old, shift @$di);
272 is(scalar($iter->newhits_below_threshold), shift @$di);
273 is(scalar($iter->newhits_not_below_threshold), shift @$di);
274 is(scalar($iter->newhits_unclassified), shift @$di);
275 is(scalar($iter->oldhits_below_threshold), shift @$di);
276 is(scalar($iter->oldhits_newly_below_threshold), shift @$di);
277 is(scalar($iter->oldhits_not_below_threshold), shift @$di);
279 if ($iter_count == 1) {
280 while( my $hit = $result->next_hit ) {
281 my $d = shift @valid_hit_data;
282 is($hit->name, shift @$d);
283 is($hit->length, shift @$d);
284 is($hit->accession, shift @$d);
285 is($hit->ncbi_gi, shift @$d);
286 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
287 is($hit->bits, shift @$d );
288 if( $hit_count == 1 ) {
290 while( my $hsp = $hit->next_hsp ){
291 is($hsp->query->start, 4);
292 is($hsp->query->end, 155);
293 is($hsp->hit->start, 475);
294 is($hsp->hit->end, 617);
295 is($hsp->length('hsp'), 153);
296 is($hsp->start('hit'), $hsp->hit->start);
297 is($hsp->end('query'), $hsp->query->end);
298 is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
299 cmp_ok($hsp->evalue, '==', 0.000286309);
301 is($hsp->bits, 37.7354);
302 is(sprintf("%.1f",$hsp->percent_identity), 20.9);
303 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.2105);
304 is(sprintf("%.3f",$hsp->frac_identical('hit')), 0.224);
310 last if( $hit_count++ > @valid_hit_data );
314 is(@valid_hit_data, 0);
315 is(@valid_iter_data, 0);
318 $result = $searchio->next_result;
319 is($result->database_name, 'AL591824.faa');
320 is($result->database_entries, 2846);
321 is($result->database_letters, 870878);
322 is($result->algorithm, 'BLASTP');
323 like($result->algorithm_version, qr/2\.2\.16/);
324 is($result->query_name, 'gi|154350371|gb|ABS72450.1|');
325 is($result->query_accession, 'ABS72450.1');
326 is($result->query_gi, '154350371');
327 is($result->query_length, 378);
328 is($result->get_statistic('kappa') , 0.0450367);
329 cmp_ok($result->get_statistic('lambda'), '==', 0.267);
330 cmp_ok($result->get_statistic('entropy'), '==', 0.14);
331 is($result->get_statistic('effectivespace'), '1.88702e+08');
332 is($result->get_parameter('matrix'), 'BLOSUM62');
333 is($result->get_parameter('gapopen'), 11);
334 is($result->get_parameter('gapext'), 1);
338 @valid_hit_data = ( [ 'gi|16409361|emb|CAC98217.1|', 381, 'CAC98217', 16409361, '5.57178e-119', 420.239],
339 [ 'gi|16409959|emb|CAC98662.1|', 776, 'CAC98662', 16409959, '0.0242028', 32.7278],
340 [ 'gi|16410942|emb|CAC99591.1|', 382, 'CAC99591', 16410942, '0.340848', 28.8758]);
341 @valid_iter_data = ( [ 11, 11, 0, 1, 10, 0, 0, 0, 0],
342 [ 19, 11, 8, 0, 11, 0, 1, 0, 7]);
344 while (my $iter = $result->next_iteration) {
346 my $di = shift @valid_iter_data;
347 is($iter->number, $iter_count);
348 is($iter->num_hits, shift @$di);
349 is($iter->num_hits_new, shift @$di);
350 is($iter->num_hits_old, shift @$di);
351 is(scalar($iter->newhits_below_threshold), shift @$di);
352 is(scalar($iter->newhits_not_below_threshold), shift @$di);
353 is(scalar($iter->newhits_unclassified), shift @$di);
354 is(scalar($iter->oldhits_below_threshold), shift @$di);
355 is(scalar($iter->oldhits_newly_below_threshold), shift @$di);
356 is(scalar($iter->oldhits_not_below_threshold), shift @$di);
358 if ($iter_count == 1) {
359 while( my $hit = $result->next_hit ) {
360 my $d = shift @valid_hit_data;
361 is($hit->name, shift @$d);
362 is($hit->length, shift @$d);
363 is($hit->accession, shift @$d);
364 is($hit->ncbi_gi, shift @$d);
365 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
366 is($hit->bits, shift @$d );
367 if( $hit_count == 1 ) {
369 while( my $hsp = $hit->next_hsp ){
370 is($hsp->query->start, 63);
371 is($hsp->query->end, 181);
372 is($hsp->hit->start, 304);
373 is($hsp->hit->end, 432);
374 is($hsp->length('hsp'), 129);
375 is($hsp->start('hit'), $hsp->hit->start);
376 is($hsp->end('query'), $hsp->query->end);
377 is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
378 cmp_ok($hsp->evalue, '==', 0.0242028);
380 is($hsp->bits, 32.7278);
381 is(sprintf("%.1f",$hsp->percent_identity), '24.0');
382 is(sprintf("%.4f",$hsp->frac_identical('query')), '0.2605');
383 is(sprintf("%.3f",$hsp->frac_identical('hit')), '0.240');
389 last if( $hit_count++ > @valid_hit_data );
393 is(@valid_hit_data, 0);
394 is(@valid_iter_data, 0);
398 $searchio = Bio::SearchIO->new('-format' => 'blast',
399 '-file' => test_input_file('ecolitst.bls'));
401 $result = $searchio->next_result;
403 is($result->database_name, 'ecoli.aa', 'database_name()');
404 is($result->database_entries, 4289);
405 is($result->database_letters, 1358990);
407 is($result->algorithm, 'BLASTP');
408 like($result->algorithm_version, qr/^2\.1\.3/);
409 like($result->query_name, qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/);
410 is($result->query_accession, 'AAC73113.1');
411 is($result->query_gi, 1786183);
412 is($result->query_length, 820);
413 is($result->get_statistic('kappa'), '0.135');
414 is($result->get_statistic('kappa_gapped'), '0.0410');
415 is($result->get_statistic('lambda'), '0.319');
416 is($result->get_statistic('lambda_gapped'), '0.267');
417 is($result->get_statistic('entropy'), '0.383');
418 is($result->get_statistic('entropy_gapped'), '0.140');
420 is($result->get_statistic('dbletters'), 1358990);
421 is($result->get_statistic('dbentries'), 4289);
422 is($result->get_statistic('effective_hsplength'), 47);
423 is($result->get_statistic('effectivespace'), 894675611);
424 is($result->get_parameter('matrix'), 'BLOSUM62');
425 is($result->get_parameter('gapopen'), 11);
426 is($result->get_parameter('gapext'), 1);
427 is($result->get_statistic('S2'), '92');
428 is($result->get_statistic('S2_bits'), '40.0');
429 is($result->get_parameter('expect'), '1.0e-03');
430 is($result->get_statistic('num_extensions'), '82424');
433 my @valid = ( [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 1567, 4058],
434 [ 'gb|AAC76922.1|', 810, 'AAC76922', '1e-91', 332, 850],
435 [ 'gb|AAC76994.1|', 449, 'AAC76994', '3e-47', 184, 467]);
437 while( $hit = $result->next_hit ) {
438 my $d = shift @valid;
440 is($hit->name, shift @$d);
441 is($hit->length, shift @$d);
442 is($hit->accession, shift @$d);
443 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
444 is($hit->bits, shift @$d );
445 is($hit->raw_score, shift @$d );
449 while( my $hsp = $hit->next_hsp ) {
450 is($hsp->query->start, 1);
451 is($hsp->query->end, 820);
452 is($hsp->hit->start, 1);
453 is($hsp->hit->end, 820);
454 is($hsp->length('hsp'), 820);
455 is($hsp->start('hit'), $hsp->hit->start);
456 is($hsp->end('query'), $hsp->query->end);
457 is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
458 is($hsp->evalue, '0.0');
459 is($hsp->score, 4058);
461 is(sprintf("%.2f",$hsp->percent_identity), 98.29);
462 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.9829);
463 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.9829);
469 last if( $count++ > @valid );
473 $searchio = Bio::SearchIO->new('-format' => 'blast',
474 '-file' => test_input_file('ecolitst.wublastp'));
476 $result = $searchio->next_result;
478 is($result->database_name, 'ecoli.aa');
479 is($result->database_letters, 1358990);
480 is($result->database_entries, 4289);
481 is($result->algorithm, 'BLASTP');
482 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
483 like($result->query_name, qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/);
484 is($result->query_accession, 'AAC73113.1');
486 is($result->query_length, 820);
487 is($result->query_gi, 1786183);
488 is($result->get_statistic('kappa'), 0.136);
489 is($result->get_statistic('lambda'), 0.319);
490 is($result->get_statistic('entropy'), 0.384);
491 is($result->get_statistic('dbletters'), 1358990);
492 is($result->get_statistic('dbentries'), 4289);
493 is($result->get_parameter('matrix'), 'BLOSUM62');
494 is($result->get_statistic('Frame+0_lambda_used'), '0.319');
495 is($result->get_statistic('Frame+0_kappa_used'), '0.136');
496 is($result->get_statistic('Frame+0_entropy_used'), '0.384');
498 is($result->get_statistic('Frame+0_lambda_computed'), '0.319');
499 is($result->get_statistic('Frame+0_kappa_computed'), '0.136');
500 is($result->get_statistic('Frame+0_entropy_computed'), '0.384');
502 is($result->get_statistic('Frame+0_lambda_gapped'), '0.244');
503 is($result->get_statistic('Frame+0_kappa_gapped'), '0.0300');
504 is($result->get_statistic('Frame+0_entropy_gapped'), '0.180');
506 @valid = ( [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 4141],
507 [ 'gb|AAC76922.1|', 810, 'AAC76922', '3.1e-86', 844],
508 [ 'gb|AAC76994.1|', 449, 'AAC76994', '2.8e-47', 483]);
510 while( $hit = $result->next_hit ) {
511 my $d = shift @valid;
514 # Test HSP contig data returned by SearchUtils::tile_hsps()
515 # Second hit has two hsps that overlap.
516 my($qcontigs, $scontigs) = Bio::Search::SearchUtils::tile_hsps($hit);
518 is($qcontigs->[0]->{'start'}, 5);
519 is($qcontigs->[0]->{'stop'}, 812);
520 is($qcontigs->[0]->{'iden'}, 250);
521 is($qcontigs->[0]->{'cons'}, 413);
523 is($scontigs->[0]->{'start'}, 16);
524 is($scontigs->[0]->{'stop'}, 805);
525 is($scontigs->[0]->{'iden'}, 248);
526 is($scontigs->[0]->{'cons'}, 410);
529 is($hit->name, shift @$d);
530 is($hit->length, shift @$d);
531 is($hit->accession, shift @$d);
532 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
533 is($hit->raw_score, shift @$d );
537 while( my $hsp = $hit->next_hsp ) {
538 is($hsp->query->start, 1);
539 is($hsp->query->end, 820);
540 is($hsp->hit->start, 1);
541 is($hsp->hit->end, 820);
542 is($hsp->length('hsp'), 820);
544 is($hsp->evalue, '0.0');
545 is($hsp->pvalue, '0.0');
546 is($hsp->score, 4141);
547 is($hsp->bits,1462.8);
548 is($hsp->percent_identity, 100);
549 is($hsp->frac_identical('query'), 1.00);
550 is($hsp->frac_identical('hit'), 1.00);
556 last if( $count++ > @valid );
560 # test that add hit really works properly for BLAST objects
562 my @hits = $result->hits;
563 $result->add_hit($hits[0]);
564 is($result->num_hits, @hits + 1);
566 # test WU-BLAST -noseqs option
567 $searchio = Bio::SearchIO->new('-format' => 'blast',
568 '-file' => test_input_file('ecolitst.noseqs.wublastp'));
570 $result = $searchio->next_result;
572 is($result->database_name, 'ecoli.aa');
573 is($result->database_letters, 1358990);
574 is($result->database_entries, 4289);
575 is($result->algorithm, 'BLASTP');
576 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
577 like($result->query_name, qr/gi|1786183|gb|AAC73113.1| (AE000111) aspartokinase I,\s+homoserine dehydrogenase I [Escherichia coli]/);
578 is($result->query_accession, 'AAC73113.1');
579 is($result->query_gi, 1786183);
581 is($result->query_length, 820);
582 is($result->get_statistic('kappa'), 0.135);
583 is($result->get_statistic('lambda'), 0.319);
584 is($result->get_statistic('entropy'), 0.384);
585 is($result->get_statistic('dbletters'), 1358990);
586 is($result->get_statistic('dbentries'), 4289);
587 is($result->get_parameter('matrix'), 'BLOSUM62');
588 is($result->get_statistic('Frame+0_lambda_used'), '0.319');
589 is($result->get_statistic('Frame+0_kappa_used'), '0.135');
590 is($result->get_statistic('Frame+0_entropy_used'), '0.384');
592 is($result->get_statistic('Frame+0_lambda_computed'), '0.319');
593 is($result->get_statistic('Frame+0_kappa_computed'), '0.135');
594 is($result->get_statistic('Frame+0_entropy_computed'), '0.384');
596 is($result->get_statistic('Frame+0_lambda_gapped'), '0.244');
597 is($result->get_statistic('Frame+0_kappa_gapped'), '0.0300');
598 is($result->get_statistic('Frame+0_entropy_gapped'), '0.180');
600 @valid = ( [ 'gb|AAC73113.1|', 820, 'AAC73113', '0', 4141],
601 [ 'gb|AAC76922.1|', 810, 'AAC76922', '6.6e-93', 907],
602 [ 'gb|AAC76994.1|', 449, 'AAC76994', '2.8e-47', 483]);
604 while( $hit = $result->next_hit ) {
605 my $d = shift @valid;
607 is($hit->name, shift @$d);
608 is($hit->length, shift @$d);
609 is($hit->accession, shift @$d);
610 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
611 is($hit->raw_score, shift @$d );
615 while( my $hsp = $hit->next_hsp ) {
616 is($hsp->query->start, 1);
617 is($hsp->query->end, 820);
618 is($hsp->hit->start, 1);
619 is($hsp->hit->end, 820);
620 is($hsp->length('hsp'), 820);
622 is($hsp->evalue , '0.');
623 is($hsp->pvalue , '0.');
624 is($hsp->score, 4141);
625 is($hsp->bits,1462.8);
626 is($hsp->percent_identity, 100);
627 is($hsp->frac_identical('query'), 1.00);
628 is($hsp->frac_identical('hit'), 1.00);
634 last if( $count++ > @valid );
639 $searchio = Bio::SearchIO->new('-format' => 'blast',
640 '-file' => test_input_file('HUMBETGLOA.tblastx'));
642 $result = $searchio->next_result;
643 is($result->database_name, 'ecoli.nt');
644 is($result->database_letters, 4662239);
645 is($result->database_entries, 400);
646 is($result->algorithm, 'TBLASTX');
647 like($result->algorithm_version, qr/^2\.1\.2/);
648 is($result->query_name, 'HUMBETGLOA');
649 is($result->query_description, 'Human haplotype C4 beta-globin gene, complete cds.');
650 is($result->query_length, 3002);
651 is($result->get_statistic('kappa'), 0.135);
652 is($result->get_statistic('lambda'), 0.318);
653 is($result->get_statistic('entropy'), 0.401);
654 is($result->get_statistic('dbletters'), 4662239);
655 is($result->get_statistic('dbentries'), 400);
656 is($result->get_statistic('T'), 13);
657 is($result->get_statistic('X1'), 16);
658 is($result->get_statistic('X1_bits'), 7.3);
659 is($result->get_statistic('X2'), 0);
660 is($result->get_statistic('X2_bits'), '0.0');
661 is($result->get_statistic('S1'), 41);
662 is($result->get_statistic('S1_bits'), 21.7);
663 is($result->get_statistic('S2'), 53);
664 is($result->get_statistic('S2_bits'), 27.2);
666 is($result->get_statistic('decayconst'), 0.1);
668 is($result->get_parameter('matrix'), 'BLOSUM62');
670 @valid = ( [ 'gb|AE000479.1|AE000479', 10934, 'AE000479', '0.13', 33.6, 67],
671 [ 'gb|AE000302.1|AE000302', 10264, 'AE000302', '0.61', 31.3, 62],
672 [ 'gb|AE000277.1|AE000277', 11653, 'AE000277', '0.84', 30.8, 61]);
675 while( $hit = $result->next_hit ) {
676 my $d = shift @valid;
677 is($hit->name, shift @$d);
678 is($hit->length, shift @$d);
679 is($hit->accession, shift @$d);
680 is($hit->significance, shift @$d );
681 is($hit->bits, shift @$d );
682 is($hit->raw_score, shift @$d );
686 while( my $hsp = $hit->next_hsp ) {
687 is($hsp->query->start, 1057);
688 is($hsp->query->end, 1134);
689 is($hsp->query->strand, 1);
690 is($hsp->strand('query'), $hsp->query->strand);
691 is($hsp->hit->end, 5893);
692 is($hsp->hit->start, 5816);
693 is($hsp->hit->strand, -1);
694 is($hsp->strand('sbjct'), $hsp->subject->strand);
695 is($hsp->length('hsp'), 26);
697 is($hsp->evalue , 0.13);
700 is(sprintf("%.2f",$hsp->percent_identity), 42.31);
701 is(sprintf("%.4f",$hsp->frac_identical('query')), '0.4231');
702 is(sprintf("%.4f",$hsp->frac_identical('hit')), '0.4231');
703 is($hsp->query->frame(), 0);
704 is($hsp->hit->frame(), 1);
706 is($hsp->query_string, 'SAYWSIFPPLGCWWSTLGPRGSLSPL');
707 is($hsp->hit_string, 'AAVWALFPPVGSQWGCLASQWRTSPL');
708 is($hsp->homology_string, '+A W++FPP+G W L + SPL');
709 # changed to reflect positional ambiguities, note extra flag
710 is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '1063-1065 1090-1095 1099-1104 1108-1113 1117-1125');
711 is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '5825-5833 5837-5842 5846-5851 5855-5860 5885-5887');
712 is($hsp->ambiguous_seq_inds, 'query/subject');
717 last if( $count++ > @valid );
721 $searchio = Bio::SearchIO->new(-format => 'fasta',
722 -file => test_input_file('HUMBETGLOA.FASTA'));
723 $result = $searchio->next_result;
724 like($result->database_name, qr/dros_clones.2.5/);
725 is($result->database_letters, 112936249);
726 is($result->database_entries, 657);
727 is($result->algorithm, 'FASTN');
728 is($result->algorithm_version, '3.3t08');
729 is($result->query_name, "HUMBETGLOA");
730 is($result->query_description, "Human haplotype C4 beta-globin gene, complete cds.");
731 is($result->query_length, 3002);
732 is($result->get_parameter('gapopen'), -16);
733 is($result->get_parameter('gapext'), -4);
734 is($result->get_parameter('ktup'), 6);
736 is($result->get_statistic('lambda'), 0.0823);
737 is($result->get_statistic('dbletters'), 112936249);
738 is($result->get_statistic('dbentries'), 657);
740 @valid = ( [ 'BACR21I23', 73982, 'BACR21I23', '0.017', 44.2],
741 [ 'BACR40P19', 73982, 'BACR40P19', '0.017', 44.2],
742 [ 'BACR30L17', 32481, 'BACR30L17', '0.018', 44.1]);
745 while( my $hit = $result->next_hit ) {
746 my $d = shift @valid;
747 is($hit->name, shift @$d);
748 is($hit->length, shift @$d);
749 is($hit->accession, shift @$d);
750 is($hit->significance, shift @$d );
751 is($hit->raw_score, shift @$d );
752 is($hit->rank, $count + 1);
755 while( my $hsp = $hit->next_hsp ) {
756 is($hsp->query->start, 31);
757 is($hsp->query->end, 289);
758 is($hsp->query->strand, -1);
759 is($hsp->hit->end, 65167);
760 is($hsp->hit->start, 64902);
761 is($hsp->hit->strand, 1);
762 is($hsp->length('hsp'), 267);
763 is($hsp->evalue , 0.017);
764 is($hsp->score, 134.5);
766 is(sprintf("%.2f",$hsp->percent_identity), '57.30');
767 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.5907);
768 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.5752);
769 # these are really UNGAPPED values not CONSERVED
770 # otherwise ident and conserved would be identical for
771 # nucleotide alignments
772 is(sprintf("%.4f",$hsp->frac_conserved('total')), 0.5955);
773 is(sprintf("%.4f",$hsp->frac_conserved('query')), 0.6139);
774 is(sprintf("%.4f",$hsp->frac_conserved('hit')), 0.5977);
775 is($hsp->query->frame(), 0);
776 is($hsp->hit->frame(), 0);
778 is($hsp->gaps('query'), 8);
779 is($hsp->gaps('hit'),1);
780 is($hsp->query_string, 'GATTAAAACCTTCTGGTAAGAAAAGAAAAAATATATATATATATATATGTGTATATGTACACACATACATATACATATATATGCATTCATTTGTTGTTGTTTTTCTTAATTTGCTCATGCATGCTA----ATAAATTATGTCTAAAAATAGAAT---AAATACAAATCAATGTGCTCTGTGCATTA-GTTACTTATTAGGTTTTGGGAAACAAGAGGTAAAAAACTAGAGACCTCTTAATGCAGTCAAAAATACAAATAAATAAAAAGTCACTTACAACCCAAAGTGTGACTATCAATGGGGTAATCAGTGGTGTCAAATAGGAGGT');
781 is($hsp->hit_string, 'GATGTCCTTGGTGGATTATGGTGTTAGGGTATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATATAATATAATACAAAATATAATACAAAATATAATACAAAATATAATACAAAATATAATACAAAATATAATACAAAATATAATACAAAATATAATATAAAATATAATATAAAATATAATATAAAATAAAATATAAAATAAAATATAAAATAAAATATAAAATAAAATATAAAATAAAATAT-AATATAAAATATAAAATAAAATATAATATAAAATATAATATAAAATATAATATAAAATATAATATAAAATA');
782 is($hsp->homology_string, ' :::::::::::::::::: : ::::: :: : : ::: ::::: :::::::: :: :: : : : : : : : :: : :: :: :: : ::: ::: :::::: ::: ::::: :: ::: : : : :: ::: : :: : : : : :: : :: : : :: : : :: : : ::: ::: :: ::::: ::: : : :: :: ::: : : : ::: :: '.' 'x60);
783 is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '33 37 39 41 43 47-49 52 55 56 58 60 64 70 71 74 78 82 84 86 87 90-96 98 100 103 105 107 110-112 114 117 119 121-123 125 127-129 132 134 135 139-141 143 145-148 150-153 155 156 160 161 164 170 173 180-184 188 192 194 196-198 201 204 206-209 212 213 215 217 219 221 223-225 227 229 232 233 236 237 246 252 256 258 260 263 269 271');
784 is(join(' ', $hsp->seq_inds('query', 'conserved',1)), '31 32 34-36 38 40 42 44-46 50 51 53 54 57 59 61-63 65-69 72 73 75-77 79-81 83 85 88 89 97 99 101 102 104 106 108 109 113 115 116 118 120 124 126 130 131 133 136-138 141 142 144 149 154 157-159 162 163 165-172 174-179 185-187 189-191 193-195 199 200 202 203 205 210 211 214 216 218 220 222 226 228 230 231 234 235 238-245 247-251 253-255 257 259 261 262 264-268 270 272-289');
785 is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '64920 64922 64928 64931 64933 64935 64939 64945 64954 64955 64958 64959 64962 64964 64966-64968 64970 64972 64974 64976 64978 64979 64982-64985 64987 64990 64993-64995 64998-65001 65003 65007 65011-65015 65022 65026-65028 65034 65037 65038 65042 65043 65045-65048 65050-65053 65055 65058-65060 65064 65065 65067 65070-65072 65074 65076-65078 65080 65082 65085 65087-65089 65092 65094 65096 65099 65101 65103-65109 65112 65113 65115 65117 65121 65125 65128 65129 65135 65139 65141 65143 65144 65147 65150-65152 65156 65158 65159 65161 65165');
786 is(join(' ', $hsp->seq_inds('hit', 'conserved',1)), '64902-64919 64921 64923-64927 64929 64930 64932 64934 64936-64938 64940-64944 64946-64953 64956 64957 64960 64961 64963 64965 64969 64971 64973 64975 64977 64980 64981 64986 64988 64989 64991 64992 64996 64997 65002 65004-65006 65008-65010 65016-65021 65023-65025 65029-65033 65035 65036 65039-65041 65044 65049 65054 65056 65057 65061-65063 65066 65068 65069 65073 65075 65079 65081 65083 65084 65086 65090 65091 65093 65095 65097 65098 65100 65102 65110 65111 65114 65116 65118-65120 65122-65124 65126 65127 65130-65134 65136-65138 65140 65142 65145 65146 65148 65149 65153-65155 65157 65159 65160 65162-65164 65166 65167');
787 is(join(' ', $hsp->seq_inds('query', 'gap',1)), '141 170 194');
788 is($hsp->ambiguous_seq_inds, '');
789 # note: the reason this is not the same percent id above
790 # is we are calculating average percent id
791 is(sprintf("%.2f",$hsp->get_aln->percentage_identity()), '59.30');
796 last if( $count++ > @valid );
800 $searchio = Bio::SearchIO->new(-format => 'fasta',
801 -file => test_input_file('cysprot1.FASTA'));
802 $result = $searchio->next_result;
803 like($result->database_name, qr/ecoli.aa/);
804 is($result->database_letters, 1358987);
805 is($result->database_entries, 4289);
806 is($result->algorithm, 'FASTP');
807 is($result->algorithm_version, '3.3t08');
808 is($result->query_name, 'CYS1_DICDI');
809 is($result->query_length, 343);
810 is($result->get_parameter('gapopen'), -12);
811 is($result->get_parameter('gapext'), -2);
812 is($result->get_parameter('ktup'), 2);
814 is($result->get_statistic('lambda'), 0.1456);
815 is($result->get_statistic('dbletters'), 1358987);
816 is($result->get_statistic('dbentries'), 4289);
819 @valid = ( [ 'gi|1787478|gb|AAC74309.1|', 512, 'AAC74309', 1787478, 1.2, 29.2],
820 [ 'gi|1790635|gb|AAC77148.1|', 251, 'AAC77148', 1790635, 2.1, 27.4],
821 [ 'gi|1786590|gb|AAC73494.1|', 94, 'AAC73494', 1786590, 2.1, 25.9]);
824 while( my $hit = $result->next_hit ) {
825 my $d = shift @valid;
826 is($hit->name, shift @$d);
827 is($hit->length, shift @$d);
828 is($hit->accession, shift @$d);
829 is($hit->ncbi_gi, shift @$d);
830 is($hit->significance, shift @$d );
831 is($hit->raw_score, shift @$d );
835 while( my $hsp = $hit->next_hsp ) {
836 is($hsp->query->start, 125);
837 is($hsp->query->end, 305);
838 is($hsp->query->strand, 0);
839 is($hsp->hit->start, 2);
840 is($hsp->hit->end, 181);
841 is($hsp->hit->strand, 0);
842 is($hsp->length('hsp'), 188);
843 is($hsp->evalue , 1.2);
844 is($hsp->score, 109.2);
846 is(sprintf("%.2f",$hsp->percent_identity), 23.94);
847 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.2486);
848 is(sprintf("%.4f",$hsp->frac_identical('hit')), '0.2500');
849 is(sprintf("%.4f",$hsp->frac_conserved('query')), '0.2707');
850 is(sprintf("%.4f",$hsp->frac_conserved('hit')), '0.2722');
851 # there is slight rounding different here so file says 26.012%
852 # but with the rounding this ends up as 0.2606
853 is(sprintf("%.4f",$hsp->frac_conserved('total')), '0.2606');
854 is($hsp->query->frame(), 0);
855 is($hsp->hit->frame(), 0);
856 is($hsp->gaps('query'), 7);
858 is($hsp->query_string, 'NKEAIFTDDLPVADYLDDEFINSIPTAFDWRTRGAVTPVKNQGQCGSCWSFSTT-GNV----EGQHFISQNKLVSLSEQNLVDCDHECME-YEGEEACDEGCNGGLQPNAYNYIIKNGGIQTESSYPYTAETGTQCNFNSANIGAKISNFTMIPKNETVMAGYIVSTGP-LAIAADAVEWQFYIGGVFDIPCNPNSLDHGILIVGYSAKNTIFRKNMPYWIVKNSWGADWGEQGYIYLRRGKNTCGVSNFVSTSII');
859 is($hsp->hit_string, (' 'x29).'MKIRSQVGMVLNLDKCIGCHTCSVTCKNVWTSREGVEYAWFNNVETKPGQGF-PTDWENQEKYKGGWI--RKINGKLQPRMGNRAMLLGKIFANPHLPGIDDYYEPFDFDYQNLHTAPEG----SKSQPIARPRSLITGERMAKIEKGPNWEDDLGGEFDKLAKDKNFDN-IQKAMYSQFENTFMMYLPRLCEHCLNPACVATCPSGAIYKREEDGIVLIDQDKCRGWRMCITGCPYKKIYFNWKSGKSEKCIFCYPRIEAGQPTVCSETC');
860 is($hsp->homology_string, ' . :. : : : .: .: . :.: :: :: .. :.. . :.. : : .: :.: . :: ::: : . : : .. : . .:. :. . . :.. . . :: .: . .:. .:: :: . ...:. : . :: .. : .: '.' 'x60);
861 # note: the reason this is not the same percent id above
862 # is we are calculating average percent id
863 is(sprintf("%.2f",$hsp->get_aln->percentage_identity()), 26.01);
868 last if( $count++ > @valid );
872 is($result->hits, 8);
873 $searchio = Bio::SearchIO->new(-format => 'fasta',
874 -file => test_input_file('cysprot_vs_gadfly.FASTA'));
875 $result = $searchio->next_result;
876 like($result->database_name, qr/gadflypep2/);
877 is($result->database_letters, 7177762);
878 is($result->database_entries, 14334);
879 is($result->algorithm, 'FASTP');
880 is($result->algorithm_version, '3.3t08');
881 is($result->query_name, 'cysprot.fa');
882 is($result->query_length, 2385);
883 is($result->get_parameter('gapopen'), -12);
884 is($result->get_parameter('gapext'), -2);
885 is($result->get_parameter('ktup'), 2);
886 is($result->get_parameter('matrix'), 'BL50');
888 is($result->get_statistic('lambda'), 0.1397);
889 is($result->get_statistic('dbletters'), 7177762 );
890 is($result->get_statistic('dbentries'), 14334);
893 @valid = ( [ 'Cp1|FBgn0013770|pp-CT20780|FBan0006692', 341,
894 'FBan0006692', '3.1e-59', 227.8],
895 [ 'CG11459|FBgn0037396|pp-CT28891|FBan0011459', 336,
896 'FBan0011459', '6.4e-41', 166.9],
897 [ 'CG4847|FBgn0034229|pp-CT15577|FBan0004847', 390,
898 'FBan0004847', '2.5e-40', 165.2]);
901 while( my $hit = $result->next_hit ) {
902 my $d = shift @valid;
904 is($hit->name, shift @$d);
905 is($hit->length, shift @$d);
906 is($hit->accession, shift @$d);
907 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
908 is($hit->raw_score, shift @$d );
912 while( my $hsp = $hit->next_hsp ) {
913 is($hsp->query->start, 1373);
914 is($hsp->query->end, 1706);
915 is($hsp->query->strand, 0);
916 is($hsp->hit->start, 5);
917 is($hsp->hit->end, 341);
918 is($hsp->hit->strand, 0);
919 is($hsp->length('hsp'), 345);
920 is(sprintf("%g",$hsp->evalue), sprintf("%g",'3.1e-59') );
921 is($hsp->score, 1170.6);
922 is($hsp->bits,227.8);
923 is(sprintf("%.2f",$hsp->percent_identity), 53.04);
924 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.5479);
925 is(sprintf("%.4f",$hsp->frac_identical('hit')), '0.5430');
926 is($hsp->query->frame(), 0);
927 is($hsp->hit->frame(), 0);
928 is($hsp->gaps('query'), 11);
930 is($hsp->hit_string, (' 'x26).'MRTAVLLPLLAL----LAVAQA-VSFADVVMEEWHTFKLEHRKNYQDETEERFRLKIFNENKHKIAKHNQRFAEGKVSFKLAVNKYADLLHHEFRQLMNGFNYTLHKQLRAADESFKGVTFISPAHVTLPKSVDWRTKGAVTAVKDQGHCGSCWAFSSTGALEGQHFRKSGVLVSLSEQNLVDCSTKYGNNGCNGGLMDNAFRYIKDNGGIDTEKSYPYEAIDDSCHFNKGTVGATDRGFTDIPQGDEKKMAEAVATVGPVSVAIDASHESFQFYSEGVYNEPQCDAQNLDHGVLVVGFGTDESGED---YWLVKNSWGTTWGDKGFIKMLRNKENQCGIASASSYPLV');
931 is($hsp->query_string, 'SNWGNNGYFLIERGKNMCGLAACASYPIPQVMNPTLILAAFCLGIASATLTFDHSLEAQWTKWKAMHNRLY-GMNEEGWRRAVWEKNMKMIELHNQEYREGKHSFTMAMNAFGDMTSEEFRQVMNGFQ---NRKPR------KGKVFQEPLFYEAPRSVDWREKGYVTPVKNQGQCGSCWAFSATGALEGQMFRKTGRLISLSEQNLVDCSGPQGNEGCNGGLMDYAFQYVQDNGGLDSEESYPYEATEESCKYNPKYSVANDTGFVDIPK-QEKALMKAVATVGPISVAIDAGHESFLFYKEGIYFEPDCSSEDMDHGVLVVGYGFESTESDNNKYWLVKNSWGEEWGMGGYVKMAKDRRNHCGIASAASYPTVMTPLLLLAVLCLGTALATPKFDQTFNAQWHQWKSTHRRLYGTNEE');
932 # note: the reason this is not the same percent id above
933 # is we are calculating average percent id
934 is(sprintf("%.2f",$hsp->get_aln->percentage_identity()), 56.13);
939 last if( $count++ > @valid );
942 is($result->hits, 21);
945 $searchio = Bio::SearchIO->new(-format => 'fasta',
946 -file => test_input_file('5X_1895.FASTXY'));
947 $result = $searchio->next_result;
948 like($result->database_name, qr/yeast_nrpep.fasta/);
949 is($result->database_letters, 4215311);
950 is($result->database_entries, 9190);
951 is($result->algorithm, 'FASTY');
952 is($result->algorithm_version, '3.4t07');
953 is($result->query_name, '5X_1895.fa');
954 is($result->query_length, 7972);
955 is($result->get_parameter('gapopen'), -14);
956 is($result->get_parameter('gapext'), -2);
957 is($result->get_parameter('ktup'), 2);
958 is($result->get_parameter('matrix'), 'BL50');
960 is($result->get_statistic('lambda'), 0.1711);
961 is($result->get_statistic('dbletters'), 4215311);
962 is($result->get_statistic('dbentries'), 9190);
965 @valid = ( [ 'NR_SC:SW-YNN2_YEAST', 1056, 'NR_SC:SW-YNN2_YEAST','1.6e-154', '547.0'],
966 [ 'NR_SC:SW-MPCP_YEAST', 311, 'NR_SC:SW-MPCP_YEAST', '1.3e-25', 117.1],
967 [ 'NR_SC:SW-YEO3_YEAST', 300, 'NR_SC:SW-YEO3_YEAST', '5.7e-05', 48.5]);
970 while( my $hit = $result->next_hit ) {
971 my $d = shift @valid;
973 is($hit->name, shift @$d);
974 is($hit->length, shift @$d);
975 is($hit->accession, shift @$d);
976 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
977 is($hit->raw_score, shift @$d );
981 while( my $hsp = $hit->next_hsp ) {
982 is($hsp->query->start, 2180);
983 is($hsp->query->end, 5623);
984 is($hsp->query->strand, 1);
985 is($hsp->hit->start, 3);
986 is($hsp->hit->end, 1053);
987 is($hsp->hit->strand, 0);
988 is($hsp->length('hsp'), 1165);
990 is(sprintf("%g",$hsp->evalue), sprintf("%g",'1.6e-154'));
991 is($hsp->score, 2877.6);
992 is($hsp->bits,'547.0');
993 is(sprintf("%.2f",$hsp->percent_identity), 51.67);
994 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.5244);
995 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.5728);
996 is($hsp->query->frame(), 0);
997 is($hsp->hit->frame(), 0);
999 is($hsp->query_string, 'RKQLDPRIPALINNGVKANHRSFFVMVGDKGRDQVCPGMQAAMRFD*HRCR/LVNLHFLLSQARVSSRPSVLWCYKKD-LGFTT*VAASENLQQTIYFRPIATSHRKKREAKIKRDVKRGIRDANEQDPFELFVTVTDIRYTYYKDSAKILGQTFGMLVLQDYEAITPNLLARTIETVEGGGIVVLLLKTMSSLKQLYAMAM/DKL*CRDGVE*SDFS*LLI*DVHSRYRTDAHQFVQPRFNERFILSLGSNPDCLVLDDELNVLPLSKGKDIQIGKAGEEDDRGRKRKAEELKEMKENLEGVDIVGSLAKLAKTVDQAKAILTFVEAISEKNLSSTVALTAGRGRGKSAALGLAIGAALAHDYSNIFVTSPDPENLKTLFEFVFKALDALGYEEHIDYDVVQSTNPDFKKAIVRVNIFRGHRQTIQYISPEDSHVLGQAELVIIDEAAAIPLPLVRKLIGPYLVFMASTINGYEGTGRSLSIKLIQQLREQTRPSITKDSENAAASSAGSSSKAAAAGRSGAGLVRSLREIKLDEPIRYSPGDNVEKWLNNLLCLDATIVSK---SIQGCPHPSKCELYYVNRDTLFSYHPASEVFLQRMMALYVASHYKNSPNDLQMLSDAPAHHLFVLLPPIDEND-NTLPDPLVVLQVALEGNISREAILKEMAQSGMRSSGDMIPWIISTQFQDNDFATLSGARVVRIATHPDYARMGYGSRAMEALESFYNGTSYNFDDVPVDMGESFAD\VPRSDL*VTSFIPFPQNRTSTECVSQNANLQNDTIAIRDPSRMPPLLQRLSERKPETLDYLGVSFGLTRDLLRFWKKGGFTPLYASQKENALTGEYTFVMLKVLASAGGGGEWLGAFAQGMSCLLLQDEVHMGND*RL*TDFRQRFMNLLSYEAFKKFDASIALSILESTVPRNSPSPAP----KLLTNTELSSLLTPFDIKRLESYADSMLDYHVVLDLVPTIASLFFGKRLETS--LPPAQQAILLALGLQRKNVEALENELGITSTQTLALFGKVLRKMTKSLEDIRKASIASELP-----AEPTLAGRSANGSNKFVALQQTIEQDLADSAVQLNGEDDDASKKEQRELLNTLNMEEFAI-DQGGDWTEAEKQVERLASGKGGTRLSSTVSVKVDKLDD\AKRRRRRARMRVPRMRRR');
1000 is($hsp->hit_string, 'KKAIDSRIPSLIRNGVQTKQRSIFVIVGDRARNQ------------------LPNLHYLMMSADLKMNKSVLWAYKKKLLGFT--------------------SHRKKRENKIKKEIKRGTREVNEMDPFESFISNQNIRYVYYKESEKILGNTYGMCILQDFEALTPNLLARTIETVEGGGIVVILLKSMSSLKQLYTMTM-D--------------------VHARYRTEAHGDVVARFNERFILSLGSNPNCLVVDDELNVLPLSGAKNVKPLPPKEDDELPPKQL--ELQELKESLEDVQPAGSLVSLSKTVNQAHAILSFIDAISEKTLNFTVALTAGRGRGKSAALGISIAAAVSHGYSNIFVTSPSPENLKTLFEFIFKGFDALGYQEHIDYDIIQSTNPDFNKAIVRVDIKRDHRQTIQYIVPQDHQVLGQAELVVIDEAAAIPLPIVKNLLGPYLVFMASTINGYEGTGRSLSLKLIQQLRNQNNTSGRESTQTAVVSRDNKEKDSHLHSQS-----RQLREISLDEPIRYAPGDPIEKWLNKLLCLDVTLIKNPRFATRGTPHPSQCNLFVVNRDTLFSYHPVSENFLEKMMALYVSSHYKNSPNDLQLMSDAPAHKLFVLLPPIDPKDGGRIPDPLCVIQIALEGEISKESVRNSLSR-GQRAGGDLIPWLISQQFQDEEFASLSGARIVRIATNPEYASMGYGSRAIELLRDYFEGKF-------TDMSE---D-VRPKDYSI--------KRVSDKELAKT-NLLKDDVKLRDAKTLPPLLLKLSEQPPHYLHYLGVSYGLTQSLHKFWKNNSFVPVYLRQTANDLTGEHTCVMLNVLE--GRESNWLVEFAK---------------------DFRKRFLSLLSYD-FHKFTAVQALSVIESSKKAQDLSDDEKHDNKELTRTHLDDIFSPFDLKRLDSYSNNLLDYHVIGDMIPMLALLYFGDKMGDSVKLSSVQSAILLAIGLQRKNIDTIAKELNLPSNQTIAMFAKIMRKMSQYFRQLLSQSIEETLPNIKDDAIAEMDGEEIKNYNAAEALDQ-MEEDLEEAG----SEAVQAMREKQKELINSLNLDKYAINDNSEEWAESQKSLEIAAKAKGVVSLKTGKKRTTEKAED-IYRQEMKA-MKKPRKSKK');
1001 is($hsp->homology_string, '.: .: :::.:: :::....::.::.:::..:.: : :::.:. .: .. :::: ::: :::: ::::::: :::...::: :..::.:::: :.. .:::.:::.: ::::.:.:: .:::.::.:::::::::::::::::::.:::.::::::::.:.: : ::.::::.:: : ::::::::::::::.:::.:::::::::: .:... :.:. :. ::.:.::.:: :. .:::..:.:::.::.:::.:..:::::.:. :::::::::::::::::..:.::..: :::::::::.::::::::::.::..:::::.::::::..:::::::.::::::.: : :::::::: :.: .::::::::.::::::::::.:..:.::::::::::::::::::::::.:::::::.:. : .....:..: .. . . ..: :.::::.:::::::.::: .:::::.:::::.:.... . .: ::::.:.:. :::::::::::.:: ::..::::::.:::::::::::..::::::.::::::::: .: . .:::: :.:.::::.::.:.. . ... :.:..::.:::.:: ::::..::.:::::.:::::.:.:: :::::::.: :.....: .::.: : : .: . .:.: . .... :: .: . .:: . .:::: .:::. :. : :::::.:::..: .:::...:.:.: : : ::::.: :::.:: : ..:: ::. :::.::..::::. :.:: : :::..::. .. : : :: :.:.....:::.:::.::....:::::. :..: .: :.:: .. : : .:.:::::.::::::.... .::.. :.::.:.:.:..:::.. .... . :: :: : . :. .. : ::.: .:.:: ... .: .: ...:.::.:.::....:: :.. .:.:..:..: :..:: . :.. . ..: .: :.. .: :. :: ..');
1002 # note: the reason this is not the same percent id above
1003 # is we are calculating average percent id
1004 is(sprintf("%.2f",$hsp->get_aln->overall_percentage_identity()),
1006 is(sprintf("%.2f",$hsp->get_aln->average_percentage_identity()),
1008 is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '2186-2188 2195-2197 2216-2218 2282-2335 2339-2341 2360-2362 2369-2371 2378-2386 2399-2401 2411-2416 2426-2485 2507-2509 2537-2539 2570-2572 2582-2587 2618-2620 2648-2650 2783-2785 2789-2848 2879-2884 2888-2893 2981-2983 2999-3013 3026-3034 3041-3049 3080-3082 3089-3091 3182-3184 3263-3265 3431-3433 3437-3439 3464-3466 3476-3478 3656-3661 3665-3670 3698-3703 3710-3712 3716-3718 3722-3730 3740-3754 3809-3811 3866-3871 3878-3880 3908-3910 3953-3955 4076-4078 4085-4090 4106-4108 4154-4156 4160-4162 4172-4174 4217-4219 4295-4297 4325-4327 4349-4375 4391-4399 4403-4405 4409-4414 4421-4426 4430-4453 4466-4468 4472-4474 4487-4489 4496-4498 4505-4507 4511-4513 4523-4525 4529-4531 4547-4549 4565-4567 4574-4576 4580-4582 4619-4621 4658-4663 4667-4672 4676-4678 4697-4699 4718-4726 4730-4735 4748-4753 4763-4825 4865-4867 4880-4882 4886-4891 4916-4924 4931-4933 4937-4951 4958-4960 5045-5047 5060-5062 5069-5071 5084-5086 5093-5098 5102-5110 5168-5170 5186-5188 5240-5242 5255-5257 5261-5263 5270-5278 5285-5296 5300-5302 5309-5314 5321-5323 5327-5335 5348-5350 5366-5368 5378-5389 5396-5401 5408-5410 5465-5467 5474-5476 5507-5512 5528-5530 5534-5536 5546-5551 5555-5560 5570-5572 5579-5587 5597-5599 5606-5608 5615-5617');
1009 is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '9 12 19 40 42 49 52 55-57 62 66 67 71 79 89 100 104 105 116 126 170 171 182 183 185 186 216 222-226 231-233 236 247 250 281 308 364 366 375 379 439 440 442 443 453 454 457 459 461-463 466 485 504-506 508 511 521 536 577 580 582 588 604 606 609 624 650 660 668 669 674 675 677 678 681-683 688 690 694 697 700 702 706 708 714 720 723 725 738 751 752 754 755 757 764 771 773 774 779 780 783 796 801 803 804 813-815 818 820-826 828 831 860 865 868 873 876 877 879 880 882 883 903 909 927 932 934 937-939 942-946 948-950 952 955 956 959 961-963 967 973 976 979 980 983 1002 1006 1017 1018 1024 1026 1030 1031 1033 1034 1038 1040-1042 1046 1048 1051');
1010 is(join(' ', $hsp->seq_inds('query', 'gap',1)), '2422 3871 4090 4948 5104 5287 5467');
1011 is(join(' ', $hsp->seq_inds('hit', 'gap',1)), '40 71 170 171 236 466 609 669 674 675 683 694 771 783 796 967 976 1040 1048');
1012 is($hsp->ambiguous_seq_inds, 'query');
1017 last if( $count++ > @valid );
1020 is($result->hits, 58);
1021 # test for MarkW bug in blastN
1023 $searchio = Bio::SearchIO->new('-format' => 'blast',
1024 '-file' => test_input_file('a_thaliana.blastn'));
1027 $result = $searchio->next_result;
1028 is($result->database_name, 'All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS, GSS,or phase 0, 1 or 2 HTGS sequences) ');
1029 is($result->database_letters, 4677375331);
1030 is($result->database_entries, 1083200);
1031 is($result->algorithm, 'BLASTN');
1032 like($result->algorithm_version, qr/^2\.2\.1/);
1033 is($result->query_name, '');
1034 is($result->query_length, 60);
1035 is($result->get_parameter('gapopen'), 5);
1036 is($result->get_parameter('gapext'), 2);
1037 is($result->get_parameter('ktup'), undef);
1039 is($result->get_statistic('lambda'), 1.37);
1040 is($result->get_statistic('kappa'), 0.711);
1041 is($result->get_statistic('entropy'),1.31 );
1042 is($result->get_statistic('T'), 0);
1043 is($result->get_statistic('A'), 30);
1044 is($result->get_statistic('X1'), '6');
1045 is($result->get_statistic('X1_bits'), 11.9);
1046 is($result->get_statistic('X2'), 15);
1047 is($result->get_statistic('X2_bits'), 29.7);
1048 is($result->get_statistic('S1'), 12);
1049 is($result->get_statistic('S1_bits'), 24.3);
1050 is($result->get_statistic('S2'), 17);
1051 is($result->get_statistic('S2_bits'), 34.2);
1053 is($result->get_statistic('dbentries'), 1083200);
1055 @valid = ( [ 'gb|AY052359.1|', 2826, 'AY052359', '3e-18', 95.6, 48, 1, 60,
1057 [ 'gb|AC002329.2|AC002329', 76170, 'AC002329', '3e-18', 95.6, 48, 1, 60,
1059 [ 'gb|AF132318.1|AF132318', 5383, 'AF132318', '0.04', 42.1, 21, 35, 55,
1063 while( my $hit = $result->next_hit ) {
1064 my $d = shift @valid;
1065 is($hit->name, shift @$d);
1066 is($hit->length, shift @$d);
1067 is($hit->accession, shift @$d);
1068 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
1069 is($hit->bits, shift @$d );
1070 is($hit->raw_score, shift @$d );
1071 is($hit->start, shift @$d);
1072 is($hit->end,shift @$d);
1073 is(sprintf("%.4f",$hit->frac_aligned_query), shift @$d);
1076 while( my $hsp = $hit->next_hsp ) {
1077 is($hsp->query->start, 1);
1078 is($hsp->query->end, 60);
1079 is($hsp->query->strand, 1);
1080 is($hsp->hit->start, 154);
1081 is($hsp->hit->end, 212);
1082 is($hsp->hit->strand, 1);
1083 is($hsp->length('hsp'), 60);
1084 is(sprintf("%g",$hsp->evalue), sprintf("%g",'3e-18'));
1085 is($hsp->score, 48);
1086 is($hsp->bits,95.6);
1087 is(sprintf("%.2f",$hsp->percent_identity), 96.67);
1088 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.9667);
1089 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.9831);
1090 is($hsp->query->frame(), 0);
1091 is($hsp->hit->frame(), 0);
1092 is($hsp->gaps('query'), 0);
1093 is($hsp->gaps('hit'), 1);
1095 is($hsp->query_string, 'aggaatgctgtttaattggaatcgtacaatggagaatttgacggaaatagaatcaacgat');
1096 is($hsp->hit_string, 'aggaatgctgtttaattggaatca-acaatggagaatttgacggaaatagaatcaacgat');
1097 is($hsp->homology_string, '||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||');
1098 is(sprintf("%.2f",$hsp->get_aln->overall_percentage_identity), 96.67);
1099 is(sprintf("%.2f",$hsp->get_aln->percentage_identity), 98.31);
1104 last if( $count++ > @valid );
1110 $searchio = Bio::SearchIO->new('-format' => 'blast',
1111 '-file' => test_input_file('dnaEbsub_ecoli.wublastx'));
1113 $result = $searchio->next_result;
1114 is($result->database_name, 'ecoli.aa');
1115 is($result->database_letters, 1358990);
1116 is($result->database_entries, 4289);
1117 is($result->algorithm, 'BLASTX');
1118 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
1119 is($result->query_name, 'gi|142864|gb|M10040.1|BACDNAE');
1120 is($result->query_description, 'B.subtilis dnaE gene encoding DNA primase, complete cds');
1121 is($result->query_accession, 'M10040.1');
1122 is($result->query_gi, 142864);
1123 is($result->query_length, 2001);
1124 is($result->get_parameter('matrix'), 'blosum62');
1126 is($result->get_statistic('lambda'), 0.318);
1127 is($result->get_statistic('kappa'), 0.135);
1128 is($result->get_statistic('entropy'),0.401 );
1130 is($result->get_statistic('dbentries'), 4289);
1132 @valid = ( [ 'gi|1789447|gb|AAC76102.1|', 581, 'AAC76102', '1.1e-74', 671]);
1135 while( my $hit = $result->next_hit ) {
1136 my $d = shift @valid;
1137 is($hit->name, shift @$d);
1138 is($hit->length, shift @$d);
1139 is($hit->accession, shift @$d);
1140 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
1141 is($hit->raw_score, shift @$d );
1142 is(sprintf("%.4f",$hit->frac_identical('query')), '0.3640');
1143 is(sprintf("%.4f",$hit->frac_identical('hit')), '0.3660');
1144 is(sprintf("%.4f",$hit->frac_conserved('query')), '0.5370');
1145 is(sprintf("%.4f",$hit->frac_conserved('hit')), '0.5400');
1146 is(sprintf("%.4f",$hit->frac_aligned_query), '0.6200');
1147 is(sprintf("%.4f",$hit->frac_aligned_hit), '0.7100');
1151 while( my $hsp = $hit->next_hsp ) {
1152 is($hsp->query->start, 21);
1153 is($hsp->query->end, 1265);
1154 is($hsp->query->strand, 1);
1155 is($hsp->hit->start, 1);
1156 is($hsp->hit->end, 413);
1157 is($hsp->hit->strand, 0);
1158 is($hsp->length('hsp'), 421);
1159 is(sprintf("%g",$hsp->evalue), sprintf("%g",'1.1e-74'));
1160 is(sprintf("%g",$hsp->pvalue), sprintf("%g",'1.1e-74'));
1161 is($hsp->score,671);
1162 is($hsp->bits,265.8);
1163 is(sprintf("%.2f",$hsp->percent_identity), 35.87);
1165 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.3639);
1166 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.3656);
1167 is(sprintf("%.4f",$hsp->frac_conserved('query')), 0.5373);
1168 is(sprintf("%.2f",$hsp->frac_conserved('hit')), 0.54);
1170 is(sprintf("%.4f",$hsp->frac_identical('hsp')), 0.3587);
1171 is(sprintf("%.4f",$hsp->frac_conserved('hsp')), 0.5297);
1173 is($hsp->query->frame(), 2);
1174 is($hsp->hit->frame(), 0);
1175 is($hsp->gaps('query'), 6);
1176 is($hsp->gaps('hit'), 8);
1178 is($hsp->query_string, 'MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFHCFGCGAGGNVFSFLRQMEGYSFAESVSHLADKYQIDFPDDITVHSGARP---ESSGEQKMAEAHELLKKFYHHLLINTKEGQEALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGYFDRFRNRVMFPIHDHHGAVVAFSGRALGSQQPKYMNSPETPLFHKSKLLYNFYKARLHIRKQERAVLFEGFADVYTAVSSDVKESIATMGTSLTDDHVKILRRNVEEIILCYDSDKAGYEATLKASELL---QKKGCKVRVAMIPDGLDPDDYIKKFGGEKFKNDIIDASVTVMAFKMQYFRKGKNLSDEGDRLAYIKDVLKEISTLSGSLEQEVYVKQ');
1179 is($hsp->hit_string, 'MAGRIPRVFINDLLARTDIVDLIDARVKLKKQGKNFHACCPFHNEKTPSFTVNGEKQFYHCFGCGAHGNAIDFLMNYDKLEFVETVEELAAMHNLEVPFE----AGSGPSQIERHQRQTLYQLMDGLNTFYQQSL-QQPVATSARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY-DRFRERVMFPIRDKRGRVIGFGGRVLGNDTPKYLNSPETDIFHKGRQLYGLYEAQQDNAEPNRLLVVEGYMDVVALAQYGINYAVASLGTSTTADHIQLLFRATNNVICCYDGDRAGRDAAWRALETALPYMTDGRQLRFMFLPDGEDPDTLVRKEGKEAFEARM-EQAMPLSAFLFNSLMPQVDLSTPDGRARLSTLALPLISQVPGETLR-IYLRQ');
1180 is($hsp->homology_string, 'M RIP ++ + DIV++I V+LKKQG+N+ CPFH E TPSF+V+ +KQ +HCFGCGA GN FL + F E+V LA + ++ P + +G+ P E Q + + + L FY L A YL RG + E+I F IG+A WD + K + + AG+L+ + G Y DRFR RVMFPI D G V+ F GR LG+ PKY+NSPET +FHK + LY Y+A+ + R ++ EG+ DV + ++A++GTS T DH+++L R +I CYD D+AG +A +A E G ++R +PDG DPD ++K G E F+ + + ++ + AF +LS R L IS + G + +Y++Q');
1181 is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '24-29 39-47 54-56 60-71 90-98 129-137 150-152 156-158 180-182 192-194 219-221 228-236 243-251 255-263 267-269 279-284 291-296 300-302 309-311 315-317 321-332 342-347 351-362 366-368 372-374 378-383 387-389 393-398 405-413 417-440 444-449 456-461 468-470 474-476 486-491 495-497 510-518 525-527 531-533 537-557 561-569 573-578 594-599 603-605 609-614 618-620 633-635 654-656 660-665 669-671 678-680 684-686 693-695 705-710 738-740 753-755 759-761 768-773 786-797 801-806 810-812 819-821 831-833 840-860 864-869 894-896 900-902 921-923 927-938 945-947 957-959 972-974 981-986 993-995 999-1013 1017-1019 1029-1037 1050-1052 1062-1067 1077-1079 1083-1085 1089-1091 1098-1103 1107-1109 1113-1115 1122-1124 1128-1130 1137-1163 1173-1184 1188-1208 1212-1217 1224-1226 1230-1232 1236-1244 1248-1250');
1182 is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '2 3 7-9 12 14-17 24-26 37-39 44 46 54 58 67 70-72 75-77 79-81 83 87 88 91 92 94 97 99 100 104 106-108 110-113 115 117 119 120 122 124 125 128-138 140 141 144 145 148 150 154 155 157 162-164 167 169 171-177 179-181 183 184 190 191 193 195-197 202 209 211 212 214 217 219 222 226 227 237 242 244 247 248 253-256 258 259 261 264 268 271-277 279 280 289 291 298 300-303 306 310 315 318 319 322 324-331 333 337-339 344 348 349 353 355 357 360-362 364 367 369 372-380 384-387 389-395 397 398 401 403 405-408');
1183 is($hsp->ambiguous_seq_inds, 'query');
1188 last if( $count++ > @valid );
1193 $searchio = Bio::SearchIO->new('-format' => 'blast',
1194 '-file' => test_input_file('tricky.wublast'));
1195 $result = $searchio->next_result;
1197 while (my $hit = $result->next_hit) {
1198 # frac_aligned_hit used to be over 1, frac_identical & frac_conserved are still too wrong
1200 local $TODO = 'frac_identical & frac_conserved are still too wrong';
1201 cmp_ok sprintf("%.3f",$hit->frac_identical), '>', 0.9;
1202 cmp_ok sprintf("%.3f",$hit->frac_conserved), '<=', 1;
1204 is(sprintf("%.2f",$hit->frac_aligned_query), '0.92');
1205 is(sprintf("%.2f",$hit->frac_aligned_hit), '0.91');
1210 # More frac_ method testing, this time on ncbi blastn
1211 $searchio = Bio::SearchIO->new('-format' => 'blast',
1212 '-file' => test_input_file('frac_problems.blast'));
1213 my @expected = ("1.000", "0.943");
1214 while (my $result = $searchio->next_result) {
1215 my $hit = $result->next_hit;
1216 is($hit->frac_identical, shift @expected);
1220 # And even more: frac_aligned_query should never be over 1!
1221 $searchio = Bio::SearchIO->new('-format' => 'blast',
1222 '-file' => test_input_file('frac_problems2.blast'));
1223 $result = $searchio->next_result;
1224 $hit = $result->next_hit;
1225 is $hit->frac_aligned_query, 0.97;
1227 # Also, start and end should be sane
1228 $searchio = Bio::SearchIO->new('-format' => 'blast',
1229 '-file' => test_input_file('frac_problems3.blast'));
1230 $result = $searchio->next_result;
1231 $hit = $result->next_hit;
1232 is $hit->start('sbjct'), 207;
1233 is $hit->end('sbjct'), 1051;
1237 $searchio = Bio::SearchIO->new('-format' => 'blast',
1238 '-file' => test_input_file('dnaEbsub_ecoli.wutblastn'));
1240 $result = $searchio->next_result;
1241 is($result->database_name, 'ecoli.nt');
1242 is($result->database_letters, 4662239);
1243 is($result->database_entries, 400);
1244 is($result->algorithm, 'TBLASTN');
1245 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
1246 is($result->query_name, 'gi|142865|gb|AAA22406.1|');
1247 is($result->query_description, 'DNA primase');
1248 is($result->query_accession, 'AAA22406.1');
1249 is($result->query_gi, 142865);
1250 is($result->query_length, 603);
1251 is($result->get_parameter('matrix'), 'blosum62');
1253 is($result->get_statistic('lambda'), '0.320');
1254 is($result->get_statistic('kappa'), 0.136);
1255 is($result->get_statistic('entropy'),0.387 );
1257 is($result->get_statistic('dbentries'), 400);
1259 @valid = ( [ 'gi|1789441|gb|AE000388.1|AE000388', 10334, 'AE000388', '1.4e-73', 671]);
1262 while( my $hit = $result->next_hit ) {
1263 my $d = shift @valid;
1264 is($hit->name, shift @$d);
1265 is($hit->length, shift @$d);
1266 is($hit->accession, shift @$d);
1267 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
1268 is($hit->raw_score, shift @$d );
1272 while( my $hsp = $hit->next_hsp ) {
1273 is($hsp->query->start, 1);
1274 is($hsp->query->end, 415);
1275 is($hsp->query->strand, 0);
1276 is($hsp->hit->start, 4778);
1277 is($hsp->hit->end, 6016);
1278 is($hsp->hit->strand, 1);
1279 is($hsp->length('hsp'), 421);
1280 cmp_ok($hsp->evalue,'==',1.4e-73);
1281 cmp_ok($hsp->pvalue,'==',1.4e-73);
1282 is($hsp->score,671);
1283 is($hsp->bits,265.8);
1284 is(sprintf("%.2f",$hsp->percent_identity), 35.87);
1285 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.3656);
1286 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.3639);
1287 is(sprintf("%.4f",$hsp->frac_conserved('hsp')), 0.5297);
1288 is($hsp->query->frame(), 0);
1289 is($hsp->hit->frame(), 1);
1290 is($hsp->gaps('query'), 6);
1291 is($hsp->gaps('hit'), 8);
1293 is($hsp->query_string, 'MGNRIPDEIVDQVQKSADIVEVIGDYVQLKKQGRNYFGLCPFHGESTPSFSVSPDKQIFHCFGCGAGGNVFSFLRQMEGYSFAESVSHLADKYQIDFPDDITVHSGARP---ESSGEQKMAEAHELLKKFYHHLLINTKEGQEALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGYFDRFRNRVMFPIHDHHGAVVAFSGRALGSQQPKYMNSPETPLFHKSKLLYNFYKARLHIRKQERAVLFEGFADVYTAVSSDVKESIATMGTSLTDDHVKILRRNVEEIILCYDSDKAGYEATLKASELL---QKKGCKVRVAMIPDGLDPDDYIKKFGGEKFKNDIIDASVTVMAFKMQYFRKGKNLSDEGDRLAYIKDVLKEISTLSGSLEQEVYVKQ');
1294 is($hsp->hit_string, 'MAGRIPRVFINDLLARTDIVDLIDARVKLKKQGKNFHACCPFHNEKTPSFTVNGEKQFYHCFGCGAHGNAIDFLMNYDKLEFVETVEELAAMHNLEVPFE----AGSGPSQIERHQRQTLYQLMDGLNTFYQQSL-QQPVATSARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY-DRFRERVMFPIRDKRGRVIGFGGRVLGNDTPKYLNSPETDIFHKGRQLYGLYEAQQDNAEPNRLLVVEGYMDVVALAQYGINYAVASLGTSTTADHIQLLFRATNNVICCYDGDRAGRDAAWRALETALPYMTDGRQLRFMFLPDGEDPDTLVRKEGKEAFEARM-EQAMPLSAFLFNSLMPQVDLSTPDGRARLSTLALPLISQVPGETLR-IYLRQ');
1295 is($hsp->homology_string, 'M RIP ++ + DIV++I V+LKKQG+N+ CPFH E TPSF+V+ +KQ +HCFGCGA GN FL + F E+V LA + ++ P + +G+ P E Q + + + L FY L A YL RG + E+I F IG+A WD + K + + AG+L+ + G Y DRFR RVMFPI D G V+ F GR LG+ PKY+NSPET +FHK + LY Y+A+ + R ++ EG+ DV + ++A++GTS T DH+++L R +I CYD D+AG +A +A E G ++R +PDG DPD ++K G E F+ + + ++ + AF +LS R L IS + G + +Y++Q');
1296 is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '2 3 7-9 12 14-17 24-26 37-39 44 46 54 58 67 70-72 75-77 79-81 83 87 88 91 92 94 97 99 101-104 108 109 111-114 116 118 120 121 123 125 126 129-131 133-140 142 143 146 147 150 152 156 157 159 164-166 169 171 173-179 181-183 185 186 192 193 195 197 198 200 205 212 214 215 217 220 222 225 229 230 240 245 247 250 251 256-259 261 262 264 267 271 274-280 282 283 292 294 301 303-306 309 313 318 321 322 325 327-331 333 337-339 344 348 349 353 355 357 360 361 363 365 368 370 373-381 385-388 390-396 398 399 402 404 406-408 410');
1297 is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '4781-4786 4796-4804 4811-4813 4817-4828 4847-4855 4886-4894 4907-4909 4913-4915 4937-4939 4949-4951 4976-4978 4985-4993 5000-5008 5012-5020 5024-5026 5036-5041 5048-5053 5057-5059 5066-5068 5072-5077 5087-5089 5093-5101 5105-5116 5120-5122 5126-5128 5132-5137 5141-5143 5147-5152 5159-5191 5195-5200 5207-5212 5219-5221 5225-5227 5237-5242 5246-5248 5261-5269 5276-5278 5282-5284 5288-5308 5312-5320 5324-5329 5345-5350 5354-5356 5360-5368 5381-5383 5402-5404 5408-5413 5417-5419 5426-5428 5432-5434 5441-5443 5453-5458 5486-5488 5501-5503 5507-5509 5516-5521 5534-5545 5549-5554 5558-5560 5567-5569 5579-5581 5588-5608 5612-5617 5642-5644 5648-5650 5669-5671 5675-5686 5693-5695 5705-5707 5720-5722 5729-5734 5741-5743 5747-5770 5774-5776 5786-5794 5807-5809 5819-5824 5834-5836 5840-5842 5846-5848 5855-5863 5867-5869 5876-5878 5882-5884 5891-5917 5927-5938 5942-5962 5966-5971 5978-5980 5984-5986 5990-6001');
1298 is($hsp->ambiguous_seq_inds, 'subject');
1303 last if( $count++ > @valid );
1308 $searchio = Bio::SearchIO->new('-format' => 'blast',
1309 '-file' => test_input_file('dnaEbsub_ecoli.wutblastx'));
1311 $result = $searchio->next_result;
1312 is($result->database_name, 'ecoli.nt');
1313 is($result->database_letters, 4662239);
1314 is($result->database_entries, 400);
1315 is($result->algorithm, 'TBLASTX');
1316 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
1317 is($result->query_name, 'gi|142864|gb|M10040.1|BACDNAE');
1318 is($result->query_description, 'B.subtilis dnaE gene encoding DNA primase, complete cds');
1319 is($result->query_accession, 'M10040.1');
1320 is($result->query_gi, 142864);
1321 is($result->query_length, 2001);
1322 is($result->get_parameter('matrix'), 'blosum62');
1324 is($result->get_statistic('lambda'), 0.318);
1325 is($result->get_statistic('kappa'), 0.135);
1326 is($result->get_statistic('entropy'),0.401 );
1327 is($result->get_statistic('dbentries'), 400);
1329 @valid = ( [ 'gi|1789441|gb|AE000388.1|AE000388', 10334, 'AE000388', '6.4e-70', 318, 148.6],
1330 [ 'gi|2367383|gb|AE000509.1|AE000509', 10589, 'AE000509', 1, 59, 29.9]
1334 while( my $hit = $result->next_hit ) {
1335 my $d = shift @valid;
1336 is($hit->name, shift @$d);
1337 is($hit->length, shift @$d);
1338 is($hit->accession, shift @$d);
1339 # using e here to deal with 0.9992 coming out right here as well
1340 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
1341 is($hit->raw_score, shift @$d );
1342 is($hit->bits, shift @$d );
1345 while( my $hsp = $hit->next_hsp ) {
1347 if( $hspcounter == 3 ) {
1348 # let's actually look at the 3rd HSP
1349 is($hsp->query->start, 441);
1350 is($hsp->query->end, 617);
1351 is($hsp->query->strand, 1);
1352 is($hsp->hit->start, 5192);
1353 is($hsp->hit->end, 5368);
1354 is($hsp->hit->strand, 1);
1355 is($hsp->length('hsp'), 59);
1356 cmp_ok($hsp->evalue,'==',6.4e-70);
1357 cmp_ok($hsp->pvalue,'==',6.4e-70);
1359 is($hsp->bits,41.8);
1360 is(sprintf("%.2f",$hsp->percent_identity), '32.20');
1361 is(sprintf("%.3f",$hsp->frac_identical('hit')), 0.322);
1362 is(sprintf("%.3f",$hsp->frac_identical('query')), 0.322);
1363 is(sprintf("%.4f",$hsp->frac_conserved('hsp')), 0.4746);
1364 is($hsp->query->frame(), 2);
1365 is($hsp->hit->frame(), 1);
1366 is($hsp->gaps('query'), 0);
1367 is($hsp->gaps('hit'), 0);
1369 is($hsp->query_string, 'ALDYLLSRGFTKELINEFQIGYALDSWDFITKFLVKRGFSEAQMEKAGLLIRREDGSGY');
1370 is($hsp->hit_string, 'ARQYLEKRGLSHEVIARFAIGFAPPGWDNVLKRFGGNPENRQSLIDAGMLVTNDQGRSY');
1371 is($hsp->homology_string, 'A YL RG + E+I F IG+A WD + K + + AG+L+ + G Y');
1372 is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '444-449 456-461 468-470 474-476 486-491 495-497 510-518 525-527 531-533 537-557 561-569 573-578 594-599 603-605 609-614');
1373 is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '5195-5200 5207-5212 5219-5221 5225-5227 5237-5242 5246-5248 5261-5269 5276-5278 5282-5284 5288-5308 5312-5320 5324-5329 5345-5350 5354-5356 5360-5365');
1374 is($hsp->ambiguous_seq_inds, 'query/subject');
1380 elsif( $count == 1 ) {
1382 while( my $hsp = $hit->next_hsp ) {
1383 is($hsp->query->start, 587);
1384 is($hsp->query->end, 706);
1385 is($hsp->query->strand, -1);
1386 is($hsp->hit->start, 4108);
1387 is($hsp->hit->end, 4227);
1388 is($hsp->hit->strand, -1);
1389 is($hsp->length('hsp'), 40);
1390 is($hsp->evalue , 7.1);
1391 is($hsp->pvalue , '1.00');
1393 is($hsp->bits,29.9);
1394 is(sprintf("%.2f",$hsp->percent_identity), '37.50');
1395 is(sprintf("%.4f",$hsp->frac_identical('hit')), '0.3750');
1396 is(sprintf("%.4f",$hsp->frac_identical('query')), '0.3750');
1397 is(sprintf("%.4f",$hsp->frac_conserved('hsp')), '0.4750');
1398 is($hsp->query->frame(), 2);
1399 is($hsp->hit->frame(), 2);
1400 is($hsp->gaps('query'), 0);
1401 is($hsp->gaps('hit'), 0);
1403 is($hsp->query_string, 'WLPRALPEKATTAP**SWIGNMTRFLKRSKYPLPSSRLIR');
1404 is($hsp->hit_string, 'WLSRTTVGSSTVSPRTFWITRMKVKLSSSKVTLPSTKSTR');
1405 is($hsp->homology_string, 'WL R +T +P WI M L SK LPS++ R');
1411 last if( $count++ > @valid );
1415 # WU-BLAST -echofilter option test (Bug 2388)
1416 $searchio = Bio::SearchIO->new('-format' => 'blast',
1417 '-file' => test_input_file('echofilter.wublastn'));
1419 $result = $searchio->next_result;
1421 is($result->database_name, 'NM_003201.fa');
1422 is($result->database_letters, 1936);
1423 is($result->database_entries, 1);
1424 is($result->algorithm, 'BLASTN');
1425 like($result->algorithm_version, qr/^2\.0MP\-WashU/);
1426 like($result->query_name, qr/ref|NM_003201.1| Homo sapiens transcription factor A, mitochondrial \(TFAM\), mRNA/);
1427 is($result->query_accession, 'NM_003201.1');
1429 is($result->query_length, 1936);
1430 is($result->get_statistic('lambda'), 0.192);
1431 is($result->get_statistic('kappa'), 0.182);
1432 is($result->get_statistic('entropy'), 0.357);
1433 is($result->get_statistic('dbletters'), 1936);
1434 is($result->get_statistic('dbentries'), 1);
1435 is($result->get_parameter('matrix'), '+5,-4');
1437 @valid = ( [ 'ref|NM_003201.1|', 1936, 'NM_003201', '0', 9680],);
1439 while( $hit = $result->next_hit ) {
1440 my $d = shift @valid;
1442 is($hit->name, shift @$d);
1443 is($hit->length, shift @$d);
1444 is($hit->accession, shift @$d);
1445 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
1446 is($hit->raw_score, shift @$d );
1450 while( my $hsp = $hit->next_hsp ) {
1451 is($hsp->query->start, 1);
1452 is($hsp->query->end, 1936);
1453 is($hsp->hit->start, 1);
1454 is($hsp->hit->end, 1936);
1455 is($hsp->length('hsp'), 1936);
1457 is($hsp->evalue , '0.');
1458 is($hsp->pvalue , '0.');
1459 is($hsp->score, 9680);
1460 is($hsp->bits,1458.4);
1461 is($hsp->percent_identity, 100);
1462 is($hsp->frac_identical('query'), 1.00);
1463 is($hsp->frac_identical('hit'), 1.00);
1469 last if( $count++ > @valid );
1474 # Do a multiblast report test
1475 $searchio = Bio::SearchIO->new('-format' => 'blast',
1476 '-file' => test_input_file('multi_blast.bls'));
1478 @expected = qw(CATH_RAT CATL_HUMAN CATL_RAT PAPA_CARPA);
1479 my $results_left = 4;
1480 while( my $result = $searchio->next_result ) {
1481 is($result->query_name, shift @expected, "Multiblast query test");
1484 is($results_left, 0);
1486 # Test GCGBlast parsing
1488 $searchio = Bio::SearchIO->new('-format' => 'blast',
1489 '-file' => test_input_file('test.gcgblast'));
1490 $result = $searchio->next_result();
1492 is($result->query_name, '/v0/people/staji002/test.gcg');
1493 is($result->algorithm, 'BLASTP');
1494 is($result->algorithm_version, '2.2.1 [Apr-13-2001]');
1495 is($result->database_name, 'pir');
1496 is($result->database_entries, 274514);
1497 is($result->database_letters, 93460074);
1499 $hit = $result->next_hit;
1500 is($hit->description, 'F22B7.10 protein - Caenorhabditis elegans');
1501 is($hit->name, 'PIR2:S44629');
1502 is($hit->length, 628);
1503 is($hit->accession, 'PIR2:S44629');
1504 is($hit->significance, '2e-08' );
1505 is($hit->raw_score, 136 );
1506 is($hit->bits, '57.0' );
1507 $hsp = $hit->next_hsp;
1508 cmp_ok($hsp->evalue, '==', 2e-08);
1509 is($hsp->bits, '57.0');
1510 is($hsp->score, 136);
1511 is(int($hsp->percent_identity), 28);
1512 is(sprintf("%.2f",$hsp->frac_identical('query')), 0.29);
1513 is($hsp->frac_conserved('total'), 69/135);
1514 is($hsp->gaps('total'), 8);
1515 is($hsp->gaps('hit'), 6);
1516 is($hsp->gaps('query'), 2);
1518 is($hsp->hit->start, 342);
1519 is($hsp->hit->end, 470);
1520 is($hsp->query->start, 3);
1521 is($hsp->query->end, 135);
1523 is($hsp->query_string, 'CAAEFDFMEKETPLRYTKTXXXXXXXXXXXXXXRKIISDMWGVLAKQQTHVRKHQFDHGELVYHALQLLAYTALGILIMRLKLFLTPYMCVMASLICSRQLFGW--LFCKVHPGAIVFVILAAMSIQGSANLQTQ');
1524 is($hsp->hit_string, 'CSAEFDFIQYSTIEKLCGTLLIPLALISLVTFVFNFVKNT-NLLWRNSEEIG----ENGEILYNVVQLCCSTVMAFLIMRLKLFMTPHLCIVAALFANSKLLGGDRISKTIRVSALVGVI-AILFYRGIPNIRQQ');
1525 is($hsp->homology_string, 'C+AEFDF++ T + T + + +L + + ++GE++Y+ +QL T + LIMRLKLF+TP++C++A+L + +L G + + A+V VI A + +G N++ Q');
1527 $searchio = Bio::SearchIO->new('-format' => 'blast',
1528 '-file' => test_input_file('HUMBETGLOA.tblastx'));
1530 $result = $searchio->next_result;
1532 isa_ok($result,'Bio::Search::Result::ResultI');
1533 $hit = $result->next_hit;
1534 is($hit->accession, 'AE000479');
1535 is($hit->bits, 33.6);
1536 $hsp = $hit->next_hsp;
1537 is($hit->hsp->bits,$hsp->bits);
1539 isa_ok($hsp->get_aln,'Bio::Align::AlignI');
1540 my $writer = Bio::SearchIO::Writer::HitTableWriter->new(
1548 frac_identical_query
1552 my $outfile = test_output_file();
1553 my $out = Bio::SearchIO->new(-writer => $writer,
1554 -file => ">$outfile");
1555 $out->write_result($result, 1);
1557 my $writerhtml = Bio::SearchIO::Writer::HTMLResultWriter->new();
1558 my $outhtml = Bio::SearchIO->new(-writer => $writerhtml,
1559 -file => ">$outfile");
1560 $outhtml->write_result($result, 1);
1563 #test all the database accession number formats
1564 $searchio = Bio::SearchIO->new(-format => 'blast',
1565 -file => test_input_file('testdbaccnums.out'));
1566 $result = $searchio->next_result;
1568 @valid = ( ['pir||T14789','T14789','T14789','CAB53709','AAH01726'],
1569 ['gb|NP_065733.1|CYT19', 'NP_065733','CYT19'],
1570 ['emb|XP_053690.4|Cyt19','XP_053690'],
1571 ['dbj|NP_056277.2|DKFZP586L0724','NP_056277'],
1572 ['prf||XP_064862.2','XP_064862'],
1573 ['pdb|BAB13968.1|1','BAB13968'],
1574 ['sp|Q16478|GLK5_HUMAN','Q16478'],
1575 ['pat|US|NP_002079.2','NP_002079'],
1576 ['bbs|NP_079463.2|','NP_079463'],
1577 ['gnl|db1|NP_002444.1','NP_002444'],
1578 ['ref|XP_051877.1|','XP_051877'],
1579 ['lcl|AAH16829.1|','AAH16829'],
1580 ['gi|1|gb|NP_065733.1|CYT19','NP_065733'],
1581 ['gi|2|emb|XP_053690.4|Cyt19','XP_053690'],
1582 ['gi|3|dbj|NP_056277.2|DKFZP586L0724','NP_056277'],
1583 ['gi|4|pir||T14789','T14789'],
1584 ['gi|5|prf||XP_064862.2','XP_064862'],
1585 ['gi|6|pdb|BAB13968.1|1','BAB13968'],
1586 ['gi|7|sp|Q16478|GLK5_HUMAN','Q16478'],
1587 ['gi|8|pat|US|NP_002079.2','NP_002079'],
1588 ['gi|9|bbs|NP_079463.2|','NP_079463'],
1589 ['gi|10|gnl|db1|NP_002444.1','NP_002444'],
1590 ['gi|11|ref|XP_051877.1|','XP_051877'],
1591 ['gi|12|lcl|AAH16829.1|','AAH16829'],
1592 ['MY_test_ID','MY_test_ID']
1595 $hit = $result->next_hit;
1596 my $d = shift @valid;
1597 is($hit->name, shift @$d);
1598 is($hit->accession, shift @$d);
1599 my @accnums = $hit->each_accession_number;
1600 foreach my $a (@accnums) {
1604 $hit = $result->next_hit;
1605 is($hit->name, shift @$d);
1606 is($hit->accession, shift @$d);
1607 is($hit->locus, shift @$d);
1610 while( $hit = $result->next_hit ) {
1611 my $d = shift @valid;
1612 is($hit->name, shift @$d);
1613 is($hit->accession, shift @$d);
1620 # parse the BLAST-like output
1621 my $infile = test_input_file('503384.MEGABLAST.2');
1622 my $in = Bio::SearchIO->new(-file => $infile,
1623 -format => 'blast'); # this is megablast
1625 my $r = $in->next_result;
1626 my @dcompare = ( ['Contig3700', 5631, 396, 785,'0.0', 785, '0.0', 396, 639, 12,
1627 8723,9434, 1, 4083, 4794, -1],
1628 ['Contig3997', 12734, 335, 664,'0.0', 664, '0.0', 335, 401, 0,
1629 1282, 1704, 1, 1546, 1968,-1 ],
1630 ['Contig634', 858, 245, 486,'1e-136', 486, '1e-136', 245, 304, 3,
1631 7620, 7941, 1, 1, 321, -1],
1632 ['Contig1853', 2314, 171, 339,'1e-91',339, '1e-91', 171, 204, 0,
1633 6406, 6620, 1, 1691, 1905, 1]
1636 is($r->query_name, '503384');
1637 is($r->query_description, '11337 bp 2 contigs');
1638 is($r->query_length, 11337);
1639 is($r->database_name, 'cneoA.nt ');
1640 is($r->database_letters, 17206226);
1641 is($r->database_entries, 4935);
1644 while( my $hit = $r->next_hit ) {
1645 my $d = shift @dcompare;
1646 is($hit->name, shift @$d);
1647 is($hit->length, shift @$d);
1648 is($hit->raw_score, shift @$d);
1649 is($hit->bits, shift @$d);
1650 is($hit->significance, shift @$d);
1652 my $hsp = $hit->next_hsp;
1653 is($hsp->bits, shift @$d);
1654 cmp_ok($hsp->evalue, '==', shift @$d);
1655 is($hsp->score, shift @$d);
1656 is($hsp->num_identical, shift @$d);
1657 is($hsp->gaps('total'), shift @$d);
1658 is($hsp->query->start, shift @$d);
1659 is($hsp->query->end, shift @$d);
1660 is($hsp->query->strand, shift @$d);
1661 is($hsp->hit->start, shift @$d);
1662 is($hsp->hit->end, shift @$d);
1663 is($hsp->hit->strand, shift @$d);
1668 # parse another megablast format
1670 $infile = test_input_file('503384.MEGABLAST.0');
1672 # this is megablast output type 0
1673 $in = Bio::SearchIO->new(-file => $infile,
1674 -report_format => 0,
1675 -format => 'megablast');
1676 $r = $in->next_result;
1678 ['Contig634', 7620, 7941, 1, 1, 321, -1],
1679 ['Contig1853', 6406, 6620, 1, 1691, 1905, 1],
1680 ['Contig3700', 8723,9434, 1, 4083, 4794, -1],
1681 ['Contig3997', 1282, 1704, 1, 1546, 1968,-1 ],
1684 is($r->query_name, '503384');
1686 while( my $hit = $r->next_hit ) {
1687 my $d = shift @dcompare;
1688 is($hit->name, shift @$d);
1689 my $hsp = $hit->next_hsp;
1690 is($hsp->query->start, shift @$d);
1691 is($hsp->query->end, shift @$d);
1692 is($hsp->query->strand, shift @$d);
1693 is($hsp->hit->start, shift @$d);
1694 is($hsp->hit->end, shift @$d);
1695 is($hsp->hit->strand, shift @$d);
1700 # Let's test RPS-BLAST
1702 my $parser = Bio::SearchIO->new(-format => 'blast',
1703 -file => test_input_file('ecoli_domains.rpsblast'));
1705 $r = $parser->next_result;
1706 is($r->query_name, 'gi|1786183|gb|AAC73113.1|');
1707 is($r->query_gi, 1786183);
1708 is($r->num_hits, 7);
1709 $hit = $r->next_hit;
1710 is($hit->name, 'gnl|CDD|3919');
1711 is($hit->significance, 0.064);
1712 is($hit->bits, 28.3);
1713 is($hit->raw_score, 63);
1714 $hsp = $hit->next_hsp;
1715 is($hsp->query->start, 599);
1716 is($hsp->query->end,655);
1717 is($hsp->hit->start,23);
1718 is($hsp->hit->end,76);
1721 # Test PSI-BLAST parsing
1723 $searchio = Bio::SearchIO->new('-format' => 'blast',
1724 '-file' => test_input_file('psiblastreport.out'));
1726 $result = $searchio->next_result;
1728 is($result->database_name, '/home/peter/blast/data/swissprot.pr');
1729 is($result->database_entries, 88780);
1730 is($result->database_letters, 31984247);
1732 is($result->algorithm, 'BLASTP');
1733 like($result->algorithm_version, qr/^2\.0\.14/);
1734 is($result->query_name, 'CYS1_DICDI');
1735 is($result->query_length, 343);
1736 is($result->get_statistic('kappa') , 0.0491);
1737 cmp_ok($result->get_statistic('lambda'), '==', 0.270);
1738 cmp_ok($result->get_statistic('entropy'), '==', 0.230);
1739 is($result->get_statistic('dbletters'), 31984247);
1740 is($result->get_statistic('dbentries'), 88780);
1741 is($result->get_statistic('effective_hsplength'), 49);
1742 is($result->get_statistic('effectivespace'), 8124403938);
1743 is($result->get_parameter('matrix'), 'BLOSUM62');
1744 is($result->get_parameter('gapopen'), 11);
1745 is($result->get_parameter('gapext'), 1);
1747 my @valid_hit_data = ( [ 'sp|P04988|CYS1_DICDI', 343, 'P04988', '0', 721],
1748 [ 'sp|P43295|A494_ARATH', 313, 'P43295', '1e-75', 281],
1749 [ 'sp|P25804|CYSP_PEA', 363, 'P25804', '1e-74', 278]);
1750 my @valid_iter_data = ( [ 127, 127, 0, 109, 18, 0, 0, 0, 0],
1751 [ 157, 40, 117, 2, 38, 0, 109, 3, 5]);
1753 while( $iter = $result->next_iteration ) {
1755 my $di = shift @valid_iter_data;
1756 is($iter->number, $iter_count);
1758 is($iter->num_hits, shift @$di);
1759 is($iter->num_hits_new, shift @$di);
1760 is($iter->num_hits_old, shift @$di);
1761 is(scalar($iter->newhits_below_threshold), shift @$di);
1762 is(scalar($iter->newhits_not_below_threshold), shift @$di);
1763 is(scalar($iter->newhits_unclassified), shift @$di);
1764 is(scalar($iter->oldhits_below_threshold), shift @$di);
1765 is(scalar($iter->oldhits_newly_below_threshold), shift @$di);
1766 is(scalar($iter->oldhits_not_below_threshold), shift @$di);
1769 if ($iter_count == 1) {
1770 while( $hit = $result->next_hit ) {
1771 my $d = shift @valid_hit_data;
1773 is($hit->name, shift @$d);
1774 is($hit->length, shift @$d);
1775 is($hit->accession, shift @$d);
1776 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
1777 is($hit->bits, shift @$d );
1779 if( $hit_count == 1 ) {
1781 while( my $hsp = $hit->next_hsp ){
1782 is($hsp->query->start, 32);
1783 is($hsp->query->end, 340);
1784 is($hsp->hit->start, 3);
1785 is($hsp->hit->end, 307);
1786 is($hsp->length('hsp'), 316);
1787 is($hsp->start('hit'), $hsp->hit->start);
1788 is($hsp->end('query'), $hsp->query->end);
1789 is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
1790 cmp_ok($hsp->evalue, '==', 1e-75);
1791 is($hsp->score, 712);
1792 is($hsp->bits, 281);
1793 is(sprintf("%.1f",$hsp->percent_identity), 46.5);
1794 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.4757);
1795 is(sprintf("%.3f",$hsp->frac_identical('hit')), 0.482);
1801 last if( $hit_count++ > @valid_hit_data );
1805 is(@valid_hit_data, 0);
1806 is(@valid_iter_data, 0);
1810 $searchio = Bio::SearchIO->new( '-format' => 'blast',
1811 '-file' => test_input_file('ecolitst.bls'),
1812 '-signif' => 1e-100);
1814 @valid = qw(gb|AAC73113.1|);
1815 $r = $searchio->next_result;
1817 while( my $hit = $r->next_hit ) {
1818 is($hit->name, shift @valid);
1821 $searchio = Bio::SearchIO->new( '-format' => 'blast',
1822 '-file' => test_input_file('ecolitst.bls'),
1825 @valid = qw(gb|AAC73113.1| gb|AAC76922.1| gb|AAC76994.1|);
1826 $r = $searchio->next_result;
1828 while( my $hit = $r->next_hit ) {
1829 is($hit->name, shift @valid);
1833 $searchio = Bio::SearchIO->new( '-format' => 'blast',
1834 '-file' => test_input_file('ecolitst.bls'),
1837 @valid = qw(gb|AAC73113.1| gb|AAC76922.1|);
1838 $r = $searchio->next_result;
1840 while( my $hit = $r->next_hit ) {
1841 is($hit->name, shift @valid);
1846 my $filt_func = sub{ my $hit=shift;
1847 $hit->frac_identical('query') >= 0.31
1850 $searchio = Bio::SearchIO->new( '-format' => 'blast',
1851 '-file' => test_input_file('ecolitst.bls'),
1852 '-hit_filter' => $filt_func);
1854 @valid = qw(gb|AAC73113.1| gb|AAC76994.1|);
1855 $r = $searchio->next_result;
1857 while( my $hit = $r->next_hit ) {
1858 is($hit->name, shift @valid);
1865 # bl2seq parsing testing
1867 # this is blastp bl2seq
1868 $searchio = Bio::SearchIO->new(-format => 'blast',
1869 -file => test_input_file('bl2seq.out'));
1870 $result = $searchio->next_result;
1871 isa_ok($result,'Bio::Search::Result::ResultI');
1872 is($result->query_name, '');
1873 is($result->algorithm, 'BLASTP');
1874 $hit = $result->next_hit;
1875 is($hit->name, 'ALEU_HORVU');
1876 is($hit->length, 362);
1877 $hsp = $hit->next_hsp;
1878 is($hsp->score, 481);
1879 is($hsp->bits, 191);
1880 is(int $hsp->percent_identity, 34);
1881 cmp_ok($hsp->evalue, '==', 2e-53);
1882 is(int($hsp->frac_conserved*$hsp->length), 167);
1883 is($hsp->query->start, 28);
1884 is($hsp->query->end, 343);
1885 is($hsp->hit->start, 60);
1886 is($hsp->hit->end,360);
1889 # this is blastn bl2seq
1890 $searchio = Bio::SearchIO->new(-format => 'blast',
1891 -file => test_input_file('bl2seq.blastn.rev'));
1892 $result = $searchio->next_result;
1893 isa_ok($result,'Bio::Search::Result::ResultI');
1894 is($result->query_name, '');
1895 is($result->algorithm, 'BLASTN');
1896 is($result->query_length, 180);
1897 $hit = $result->next_hit;
1898 is($hit->length, 179);
1899 is($hit->name, 'human');
1900 $hsp = $hit->next_hsp;
1901 is($hsp->score, 27);
1902 is($hsp->bits, '54.0');
1903 is(int $hsp->percent_identity, 88);
1904 cmp_ok($hsp->evalue, '==', 2e-12);
1905 is(int($hsp->frac_conserved*$hsp->length), 83);
1906 is($hsp->query->start, 94);
1907 is($hsp->query->end, 180);
1908 is($hsp->query->strand, 1);
1909 is($hsp->hit->strand, -1);
1910 is($hsp->hit->start, 1);
1911 is($hsp->hit->end,94);
1914 # this is blastn bl2seq
1915 $searchio = Bio::SearchIO->new(-format => 'blast',
1916 -file => test_input_file('bl2seq.blastn'));
1917 $result = $searchio->next_result;
1918 isa_ok($result,'Bio::Search::Result::ResultI');
1919 is($result->query_name, '');
1920 is($result->query_length, 180);
1921 is($result->algorithm, 'BLASTN');
1922 $hit = $result->next_hit;
1923 is($hit->name, 'human');
1924 is($hit->length, 179);
1925 $hsp = $hit->next_hsp;
1926 is($hsp->score, 27);
1927 is($hsp->bits, '54.0');
1928 is(int $hsp->percent_identity, 88);
1929 cmp_ok($hsp->evalue,'==', 2e-12);
1930 is(int($hsp->frac_conserved*$hsp->length), 83);
1931 is($hsp->query->start, 94);
1932 is($hsp->query->end, 180);
1933 is($hsp->query->strand, 1);
1934 is($hsp->hit->strand, 1);
1935 is($hsp->hit->start, 86);
1936 is($hsp->hit->end,179);
1939 # this is blastp bl2seq
1940 $searchio = Bio::SearchIO->new(-format => 'blast',
1941 -file => test_input_file('bl2seq.bug940.out'));
1942 $result = $searchio->next_result;
1943 isa_ok($result,'Bio::Search::Result::ResultI');
1944 is($result->query_name, 'zinc');
1945 is($result->algorithm, 'BLASTP');
1946 is($result->query_description, 'finger protein 135 (clone pHZ-17) [Homo sapiens]. neo_id RS.ctg14243-000000.6.0');
1947 is($result->query_length, 469);
1948 $hit = $result->next_hit;
1949 is($hit->name, 'gi|4507985|');
1950 is($hit->ncbi_gi, 4507985);
1951 is($hit->description,'zinc finger protein 135 (clone pHZ-17) [Homo sapiens]. neo_id RS.ctg14243-000000.6.0');
1952 is($hit->length, 469);
1953 $hsp = $hit->next_hsp;
1954 is($hsp->score, 1626);
1955 is($hsp->bits, 637);
1956 is(int $hsp->percent_identity, 66);
1957 cmp_ok($hsp->evalue, '==', 0.0);
1958 is(int($hsp->frac_conserved*$hsp->length), 330);
1959 is($hsp->query->start, 121);
1960 is($hsp->query->end, 469);
1961 is($hsp->hit->start, 1);
1962 is($hsp->hit->end,469);
1963 is($hsp->gaps, 120);
1964 ok($hit->next_hsp); # there is more than one HSP here,
1965 # make sure it is parsed at least
1967 # cannot distinguish between blastx and tblastn reports
1968 # so we're only testing a blastx report for now
1970 # this is blastx bl2seq
1971 $searchio = Bio::SearchIO->new(-format => 'blast',
1972 -file => test_input_file('bl2seq.blastx.out'));
1973 $result = $searchio->next_result;
1974 isa_ok($result,'Bio::Search::Result::ResultI');
1975 is($result->query_name, 'AE000111.1');
1976 is($result->query_description, 'Escherichia coli K-12 MG1655 section 1 of 400 of the complete genome');
1977 is($result->algorithm, 'BLASTX');
1978 is($result->query_length, 720);
1979 $hit = $result->next_hit;
1980 is($hit->name, 'AK1H_ECOLI');
1981 is($hit->description,'P00561 Bifunctional aspartokinase/homoserine dehydrogenase I (AKI-HDI) [Includes: Aspartokinase I ; Homoserine dehydrogenase I ]');
1982 is($hit->length, 820);
1983 $hsp = $hit->next_hsp;
1984 is($hsp->score, 634);
1985 is($hsp->bits, 248);
1986 is(int $hsp->percent_identity, 100);
1987 cmp_ok($hsp->evalue, '==' ,2e-70);
1988 is(int($hsp->frac_conserved*$hsp->length), 128);
1989 is($hsp->query->start, 1);
1990 is($hsp->query->end, 384);
1991 is($hsp->hit->start, 1);
1992 is($hsp->hit->end,128);
1994 is($hsp->query->frame,0);
1995 is($hsp->hit->frame,0);
1996 is($hsp->query->strand,-1);
1997 is($hsp->hit->strand,0);
1999 # this is tblastx bl2seq (self against self)
2000 $searchio = Bio::SearchIO->new(-format => 'blast',
2001 -file => test_input_file('bl2seq.tblastx.out'));
2002 $result = $searchio->next_result;
2003 isa_ok($result,'Bio::Search::Result::ResultI');
2004 is($result->query_name, 'Escherichia');
2005 is($result->algorithm, 'TBLASTX');
2006 is($result->query_description, 'coli K-12 MG1655 section 1 of 400 of the complete genome');
2007 is($result->query_length, 720);
2008 $hit = $result->next_hit;
2009 is($hit->name, 'gi|1786181|gb|AE000111.1|AE000111');
2010 is($hit->ncbi_gi, 1786181);
2011 is($hit->description,'Escherichia coli K-12 MG1655 section 1 of 400 of the complete genome');
2012 is($hit->length, 720);
2013 $hsp = $hit->next_hsp;
2014 is($hsp->score, 1118);
2015 is($hsp->bits, 515);
2016 is(int $hsp->percent_identity, 95);
2017 cmp_ok($hsp->evalue, '==', 1e-151);
2018 is(int($hsp->frac_conserved*$hsp->length), 229);
2019 is($hsp->query->start, 1);
2020 is($hsp->query->end, 720);
2021 is($hsp->hit->start, 1);
2022 is($hsp->hit->end,720);
2024 is($hsp->query->frame,0);
2025 is($hsp->hit->frame,0);
2026 is($hsp->query->strand,1);
2027 is($hsp->hit->strand,1);
2029 # this is NCBI tblastn
2030 $searchio = Bio::SearchIO->new(-format => 'blast',
2031 -file => test_input_file('tblastn.out'));
2032 $result = $searchio->next_result;
2033 isa_ok($result,'Bio::Search::Result::ResultI');
2034 is($result->algorithm, 'TBLASTN');
2035 $hit = $result->next_hit;
2036 is($hit->name,'gi|10040111|emb|AL390796.6|AL390796');
2038 # test blasttable output
2039 my @eqset = qw( c200-vs-yeast.BLASTN.m9);
2040 $searchio = Bio::SearchIO->new(-file => test_input_file('c200-vs-yeast.BLASTN'),
2041 -format => 'blast');
2042 $result = $searchio->next_result;
2043 isa_ok($result,'Bio::Search::Result::ResultI');
2044 my %ref = &result2hash($result);
2045 is( scalar keys %ref, 67);
2046 $searchio = Bio::SearchIO->new(-file => test_input_file('c200-vs-yeast.BLASTN.m8'),
2047 -program_name => 'BLASTN',
2048 -format => 'blasttable');
2049 $result = $searchio->next_result;
2050 my %tester = &result2hash($result);
2051 is( scalar keys %tester, 67);
2052 foreach my $key ( sort keys %ref ) {
2053 is($tester{$key}, $ref{$key},$key);
2056 # test WU-BLAST blasttable output
2057 $searchio = Bio::SearchIO->new(-file => test_input_file('test1.wublastp'),
2058 -format => 'blast');
2059 $result = $searchio->next_result;
2060 isa_ok($result,'Bio::Search::Result::ResultI');
2061 my %wuref = &result2hash($result);
2062 is( scalar keys %wuref, 31);
2063 $searchio = Bio::SearchIO->new(-file => test_input_file('test1.blasttab3'),
2064 -program_name => 'BLASTP',
2065 -format => 'blasttable');
2066 $result = $searchio->next_result;
2067 my %wutester = &result2hash($result);
2068 is( scalar keys %wutester, 31);
2069 foreach my $key ( sort keys %ref ) {
2070 is($wutester{$key}, $wuref{$key},$key);
2073 # Test Blast parsing with B=0 (WU-BLAST)
2074 $searchio = Bio::SearchIO->new(-file => test_input_file('no_hsps.blastp'),
2075 -format => 'blast');
2076 $result = $searchio->next_result;
2077 is($result->query_name, 'mgri:MG00189.3');
2078 $hit = $result->next_hit;
2079 is($hit->name, 'mgri:MG00189.3');
2080 is($hit->description, 'hypothetical protein 6892 8867 +');
2081 is($hit->bits, 3098);
2082 is($hit->significance, '0.');
2084 $hit = $result->next_hit;
2085 is($hit->name, 'fgram:FG01141.1');
2086 is($hit->description, 'hypothetical protein 47007 48803 -');
2087 is($hit->bits, 2182);
2088 is($hit->significance, '4.2e-226');
2089 is($result->num_hits, 415);
2090 # Let's now test if _guess_format is doing its job correctly
2091 my %pair = ( 'filename.blast' => 'blast',
2092 'filename.bls' => 'blast',
2094 'f.tblx' => 'blast',
2095 'fast.bls' => 'blast',
2096 'f.fasta' => 'fasta',
2100 'f.ssearch' => 'fasta',
2101 'f.SSEARCH.m9' => 'fasta',
2103 'f.psearch' => 'fasta',
2104 'f.osearch' => 'fasta',
2105 'f.exon' => 'exonerate',
2106 'f.exonerate' => 'exonerate',
2107 'f.blastxml' => 'blastxml',
2108 'f.xml' => 'blastxml');
2109 while( my ($file,$expformat) = each %pair ) {
2110 is(Bio::SearchIO->_guess_format($file),$expformat, "$expformat for $file");
2114 # Test Wes Barris's reported bug when parsing blastcl3 output which
2115 # has integer overflow
2117 $searchio = Bio::SearchIO->new(-file => test_input_file('hsinsulin.blastcl3.blastn'),
2118 -format => 'blast');
2119 $result = $searchio->next_result;
2120 is($result->query_name, 'human');
2121 is($result->database_letters(), '-24016349');
2122 # this is of course not the right length, but is the what blastcl3
2123 # reports, the correct value is
2124 is($result->get_statistic('dbletters'),'192913178');
2125 is($result->get_statistic('dbentries'),'1867771');
2128 # test for links and groups being parsed out of WU-BLAST properly
2129 $searchio = Bio::SearchIO->new(-format => 'blast',
2130 -file => test_input_file('brassica_ATH.WUBLASTN'));
2131 ok($result = $searchio->next_result);
2132 ok($hit = $result->next_hit);
2133 ok($hsp = $hit->next_hsp);
2134 is($hsp->links,'(1)-3-2');
2135 is($hsp->query->strand, 1);
2136 is($hsp->hit->strand, 1);
2137 is($hsp->hsp_group, '1');
2139 ## Web blast result parsing
2141 $searchio = Bio::SearchIO->new(-format => 'blast',
2142 -file => test_input_file('catalase-webblast.BLASTP'));
2143 ok($result = $searchio->next_result);
2144 ok($hit = $result->next_hit);
2145 is($hit->name, 'gi|40747822|gb|EAA66978.1|', 'full hit name');
2146 is($hit->accession, 'EAA66978', 'hit accession');
2147 is($hit->ncbi_gi, 40747822);
2148 ok($hsp = $hit->next_hsp);
2149 is($hsp->query->start, 1, 'query start');
2150 is($hsp->query->end, 528, 'query start');
2152 # tests for new BLAST 2.2.13 output
2153 $searchio = Bio::SearchIO->new(-format => 'blast',
2154 -file => test_input_file('new_blastn.txt'));
2156 $result = $searchio->next_result;
2157 is($result->database_name, 'All GenBank+EMBL+DDBJ+PDB sequences (but no EST, STS,GSS,environmental samples or phase 0, 1 or 2 HTGS sequences)');
2158 is($result->database_entries, 3742891);
2159 is($result->database_letters, 16670205594);
2160 is($result->algorithm, 'BLASTN');
2161 is($result->algorithm_version, '2.2.13 [Nov-27-2005]');
2162 is($result->query_name, 'pyrR,');
2163 is($result->query_length, 558);
2164 is($result->get_statistic('kappa'), '0.711');
2165 is($result->get_statistic('kappa_gapped'), '0.711');
2166 is($result->get_statistic('lambda'), '1.37');
2167 is($result->get_statistic('lambda_gapped'), '1.37');
2168 is($result->get_statistic('entropy'), '1.31');
2169 is($result->get_statistic('entropy_gapped'), '1.31');
2170 is($result->get_statistic('dbletters'), '-509663586');
2171 is($result->get_statistic('dbentries'), 3742891);
2172 is($result->get_statistic('effective_hsplength'), undef);
2173 is($result->get_statistic('effectivespace'), undef);
2174 is($result->get_parameter('matrix'), 'blastn matrix:1 -3');
2175 is($result->get_parameter('gapopen'), 5);
2176 is($result->get_parameter('gapext'), 2);
2177 is($result->get_statistic('S2'), '60');
2178 is($result->get_statistic('S2_bits'), '119.4');
2179 is($result->get_parameter('expect'), '1e-23');
2180 is($result->get_statistic('num_extensions'), '117843');
2183 @valid = ( [ 'gi|41400296|gb|AE016958.1|', 4829781, 'AE016958', 41400296, '6e-059', 119, 236],
2184 [ 'gi|54013472|dbj|AP006618.1|', 6021225, 'AP006618', 54013472, '4e-026', 64, 127],
2185 [ 'gi|57546753|dbj|BA000030.2|', 9025608, 'BA000030', 57546753, '1e-023', 60, 119]);
2187 while( $hit = $result->next_hit ) {
2188 my $d = shift @valid;
2190 is($hit->name, shift @$d);
2191 is($hit->length, shift @$d);
2192 is($hit->accession, shift @$d);
2193 is($hit->ncbi_gi, shift @$d);
2194 is(sprintf("%g",$hit->significance), sprintf("%g",shift @$d) );
2195 is($hit->raw_score, shift @$d );
2196 is($hit->bits, shift @$d );
2200 while( my $hsp = $hit->next_hsp ) {
2201 is($hsp->query->start, 262);
2202 is($hsp->query->end, 552);
2203 is($hsp->hit->start, 1166897);
2204 is($hsp->hit->end, 1167187);
2205 is($hsp->length('hsp'), 291);
2206 is($hsp->start('hit'), $hsp->hit->start);
2207 is($hsp->end('query'), $hsp->query->end);
2208 is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
2209 cmp_ok($hsp->evalue, '==', 6e-59);
2210 is($hsp->score, 119);
2212 is(sprintf("%.2f",$hsp->percent_identity), 85.22);
2213 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.8522);
2214 is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.8522);
2220 last if( $count++ > @valid );
2225 $searchio = Bio::SearchIO->new(-format => 'blast',
2226 -file => test_input_file('blastp2215.blast'));
2228 $result = $searchio->next_result;
2229 is($result->database_entries, 4460989);
2230 is($result->database_letters, 1533424333);
2231 is($result->algorithm, 'BLASTP');
2232 is($result->algorithm_version, '2.2.15 [Oct-15-2006]');
2233 is($result->query_name, 'gi|15608519|ref|NP_215895.1|');
2234 is($result->query_gi, 15608519);
2235 is($result->query_length, 193);
2236 @hits = $result->hits;
2237 is(scalar(@hits), 10);
2238 is($hits[1]->accession,'1W30');
2239 is($hits[4]->significance,'2e-72');
2240 is($hits[7]->bits,'254');
2241 $result = $searchio->next_result;
2242 is($result->database_entries, 4460989);
2243 is($result->database_letters, 1533424333);
2244 is($result->algorithm, 'BLASTP');
2245 is($result->algorithm_version, '2.2.15 [Oct-15-2006]');
2246 is($result->query_name, 'gi|15595598|ref|NP_249092.1|');
2247 is($result->query_length, 423);
2248 @hits = $result->hits;
2249 is(scalar(@hits), 10);
2250 is($hits[1]->accession,'ZP_00972546');
2251 is($hits[2]->ncbi_gi, 116054132);
2252 is($hits[4]->significance, '0.0');
2253 is($hits[7]->bits, 624);
2256 $searchio = Bio::SearchIO->new(-format => 'blast',
2258 -file => test_input_file('bug2246.blast'));
2259 $result = $searchio->next_result;
2260 $hit = $result->next_hit;
2261 is $hit->name, 'UniRef50_Q9X0H5';
2263 is $hit->accession, 'UniRef50_Q9X0H5';
2264 is $hit->description, 'Cluster: Histidyl-tRNA synthetase; n=4; Thermoto...';
2266 is $hit->significance, 650;
2269 $searchio = Bio::SearchIO->new(-format => 'blast',
2271 -file => test_input_file('bug1986.blastp'));
2272 $result = $searchio->next_result;
2273 $hit = $result->next_hit;
2274 is $hit->name, 'ENSP00000350182';
2275 is $hit->length, 425;
2276 is $hit->accession, 'ENSP00000350182';
2277 is $hit->description, 'pep:novel clone::BX322644.8:4905:15090:-1 gene:ENSG00000137397 transcript:ENST00000357569 ';
2278 is $hit->raw_score, 301;
2280 is $hit->significance, 3e-27;
2281 $hit = $result->next_hit;
2282 is $hit->name, 'ENSP00000327738';
2283 is $hit->length, 468;
2284 is $hit->accession, 'ENSP00000327738';
2285 is $hit->description, 'pep:known-ccds chromosome:NCBI36:4:189297592:189305643:1 gene:ENSG00000184108 transcript:ENST00000332517 CCDS3851.1';
2286 is $hit->raw_score, 289;
2288 is $hit->significance, 8e-26;
2292 # handle at least the first iteration with BLAST searches using databases
2293 # containing non-unique IDs
2295 my $file = test_input_file('bug1986.blast2');
2297 open (my $IN,$file) or die $!;
2300 last if (/^Sequences/);
2308 my ($accession) = split(/\s+/);
2309 #print "Real Hit $count = $accession\n";
2310 $unique_accs{$accession}++;
2311 #last if ($count == 10);
2317 is (scalar(keys %unique_accs), 490);
2321 $searchio = Bio::SearchIO->new(-format => 'blast',
2324 $result = $searchio->next_result;
2326 while (my $hit = $result->next_hit) {
2327 $search_accs{$hit->accession}++;
2332 is (scalar(keys %search_accs), 490);
2334 is_deeply(\%unique_accs, \%search_accs);
2336 # bug 2391 - long query names
2338 $file = test_input_file('bug2391.megablast');
2340 $searchio = Bio::SearchIO->new(-format => 'blast',
2342 $result = $searchio->next_result;
2344 # data is getting munged up with long names
2345 is($result->query_name, 'c6_COX;c6_QBL;6|31508172;31503325;31478402|rs36223351|1|dbSNP|C/G');
2346 is($result->query_description, '');
2348 # bug 2399 - catching Expect(n) values
2350 $file = test_input_file('bug2399.tblastn');
2352 $searchio = Bio::SearchIO->new(-format => 'blast',
2355 while(my $query = $searchio->next_result) {
2356 while(my $subject = $query->next_hit) {
2357 $total_n += grep{$_->n} $subject->hsps;
2362 # bug 2473 - fasta3.4 parsing with -U option
2364 $file = test_input_file('bug2473.fasta');
2366 $searchio = Bio::SearchIO->new(-format => 'fasta',
2369 while(my $res = $searchio->next_result) {
2370 is($res->query_name, 'total:39860_L:12096_-3:12346_0:617_+3:14801');
2371 is($res->query_description, '');
2372 is($res->query_length, 22);
2373 is($res->algorithm, 'FASTN');
2376 # BLAST 2.2.18+ tabular output (has 13 columns instead of 12)
2377 $file = test_input_file('2008.blasttable');
2379 $searchio = Bio::SearchIO->new(-format => 'blasttable',
2382 while(my $res = $searchio->next_result) {
2383 is($res->query_name, 'gi|1786183|gb|AAC73113.1|');
2384 is($res->algorithm, 'BLASTP');
2385 is($res->algorithm_version, '2.2.18+');
2386 my $hit = $res->next_hit;
2387 is($hit->name, 'gi|34395933|sp|P00561.2|AK1H_ECOLI');
2388 $hit = $res->next_hit;
2389 my $hsp = $hit->next_hsp;
2390 is($hsp->bits, 331);
2391 is($hsp->evalue, '2e-91');
2392 is($hsp->start('hit'), 16);
2393 is($hsp->end('hit'), 805);
2394 is($hsp->start('query'), 5);
2395 is($hsp->end('query'), 812);
2396 is($hsp->length, 821);
2402 # a utility function for comparing result objects
2406 $hash{'query_name'} = $result->query_name;
2409 foreach my $hit ( $result->hits ) {
2410 $hash{"hit$hitcount\_name"} = $hit->name;
2411 # only going to test order of magnitude
2412 # too hard as these don't always match
2413 # $hash{"hit$hitcount\_signif"} =
2414 # ( sprintf("%.0e",$hit->significance) =~ /e\-?(\d+)/ );
2415 $hash{"hit$hitcount\_bits"} = sprintf("%d",$hit->bits);
2417 foreach my $hsp ( $hit->hsps ) {
2418 $hash{"hsp$hspcount\_bits"} = sprintf("%d",$hsp->bits);
2419 # only going to test order of magnitude
2420 # too hard as these don't always match
2421 # $hash{"hsp$hspcount\_evalue"} =
2422 # ( sprintf("%.0e",$hsp->evalue) =~ /e\-?(\d+)/ );
2423 $hash{"hsp$hspcount\_qs"} = $hsp->query->start;
2424 $hash{"hsp$hspcount\_qe"} = $hsp->query->end;
2425 $hash{"hsp$hspcount\_qstr"} = $hsp->query->strand;
2426 $hash{"hsp$hspcount\_hs"} = $hsp->hit->start;
2427 $hash{"hsp$hspcount\_he"} = $hsp->hit->end;
2428 $hash{"hsp$hspcount\_hstr"} = $hsp->hit->strand;
2430 #$hash{"hsp$hspcount\_pid"} = sprintf("%d",$hsp->percent_identity);
2431 #$hash{"hsp$hspcount\_fid"} = sprintf("%.2f",$hsp->frac_identical);
2432 $hash{"hsp$hspcount\_gaps"} = $hsp->gaps('total');
2442 Useful for debugging:
2444 if ($iter_count == 3) {
2446 foreach ($iter->newhits) {
2447 print " " . $_->name . "\n";
2449 print "\nOLDHITS:\n";
2450 foreach ($iter->oldhits) {
2451 print " " . $_->name . "\n";
2453 print "\nNEWHITS BELOW:\n";
2454 foreach ($iter->newhits_below_threshold) {
2455 print " " . $_->name . "\n";
2457 print "\nNEWHITS NOT BELOW:\n";
2458 foreach ($iter->newhits_not_below_threshold) {
2459 print " " . $_->name . "\n";
2461 print "\nNEWHITS UNCLASSIFIED:\n";
2462 foreach ($iter->newhits_unclassified) {
2463 print " " . $_->name . "\n";
2465 print "\nOLDHITS BELOW:\n";
2466 foreach ($iter->oldhits_below_threshold) {
2467 print " " . $_->name . "\n";
2469 print "\nOLDHITS NEWLY BELOW:\n";
2470 foreach ($iter->oldhits_newly_below_threshold) {
2471 print " " . $_->name . "\n";
2473 print "\nOLDHITS NOT BELOW:\n";
2474 foreach ($iter->oldhits_not_below_threshold) {
2475 print " " . $_->name . "\n";