sync w/ main trunk
[bioperl-live.git] / t / SearchIO / blastxml.t
blob2998f8329329f0388252719a270c92e2e8c186d0
1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id: SearchIO.t 14995 2008-11-16 06:20:00Z cjfields $
4 use strict;
6 BEGIN {
7         use lib '.';
8     use Bio::Root::Test;
9     
10     test_begin(-tests => 391,
11                            -requires_module => 'XML::SAX');
12         
13         use_ok('Bio::SearchIO');
16 my ($searchio, $result,$iter,$hit,$hsp);
18 # XML encoding/decoding done within XML::SAX now, though some parsers
19 # do not work properly (XML::SAX::PurePerl, XML::LibXML::SAX)
21 eval {
22         # test with RPSBLAST data first
23         # this needs to be eval'd b/c the XML::SAX parser object is
24         # instantiated in the constructor
25         $searchio = Bio::SearchIO->new('-tempfile' => 1,
26                    '-format' => 'blastxml',
27                    '-file'   => test_input_file('ecoli_domains.rps.xml'),
28                    '-blasttype' => 'blast',
29                    '-verbose' => -1);
30         # PurePerl works with these BLAST reports, so removed verbose promotion
31         $result = $searchio->next_result;
32         die if !defined $result;
35 SKIP: {
36         # this should be fixed with newer installations of XML::SAX::Expat, but as we
37         # don't require a certain version (multiple backends can be used) we catch
38         # and skip if needed 
39         if ($@ && $@ =~ m{Handler could not resolve external entity}) {
40                 skip("XML::SAX::Expat does not work with XML tests; skipping",297);
41         } elsif ($@) {
42                 skip("Problem with XML::SAX setup: $@. Check ParserDetails.ini; skipping XML tests",297);
43         }
44         is($searchio->result_count, 1);
45         
46         # basic ResultI data
47         isa_ok($result, 'Bio::Search::Result::ResultI');
48         is($result->database_name, '/data_2/jason/db/cdd/cdd/Pfam', 'database_name()');
49         is($result->query_name,'gi|1786182|gb|AAC73112.1|','query_name()');
50         is($result->query_description, '(AE000111) thr operon leader peptide [Escherichia coli]');
51         is($result->query_accession, 'AAC73112.1');
52         is($result->query_gi, 1786182);
53         is($result->query_length, 21);
54         is($result->algorithm, 'BLASTP');
55         is($result->algorithm_version, 'blastp 2.1.3 [Apr-1-2001]');
56         
57         # check parameters
58         is($result->available_parameters, 8);
59         is($result->get_parameter('matrix'), 'BLOSUM62');
60         float_is($result->get_parameter('expect'), '1e-05');
61         is($result->get_parameter('include'), 0);
62         is($result->get_parameter('match'), 0);
63         is($result->get_parameter('mismatch'), 0);
64         is($result->get_parameter('gapopen'), 11);
65         is($result->get_parameter('gapext'), 1);
66         is($result->get_parameter('filter'), 'F');
67         
68         # check statistics
69         is($result->available_statistics, 5);
70         is($result->database_entries, 0);
71         is($result->database_letters, 0);
72         is($result->get_statistic('hsplength'), 0);
73         float_is($result->get_statistic('effectivespace'), 4.16497e+11);
74         is($result->get_statistic('lambda'), 0.267);
75         is($result->get_statistic('kappa'), 0.041);
76         is($result->get_statistic('entropy'), 0.14);
77         
78         # this result actually has a hit
79         $result = $searchio->next_result;
80         
81         # does the parser catch everything in the next result?
82         is($result->database_name, '/data_2/jason/db/cdd/cdd/Pfam', 'database_name()');
83         is($result->query_name,'gi|1786183|gb|AAC73113.1|');
84         is($result->query_description, '(AE000111) aspartokinase I, homoserine dehydrogenase I [Escherichia coli]');
85         is($result->query_accession, 'AAC73113.1');
86         is($result->query_gi, 1786183);
87         is($result->query_length, 820);
88         is($result->algorithm, 'BLASTP');
89         is($result->algorithm_version, 'blastp 2.1.3 [Apr-1-2001]');    
90         
91         is($searchio->result_count, 2);
93         # check parameters
94         is($result->available_parameters, 8);
95         is($result->get_parameter('matrix'), 'BLOSUM62');
96         float_is($result->get_parameter('expect'), '1e-05');
97         is($result->get_parameter('include'), 0);
98         is($result->get_parameter('match'), 0);
99         is($result->get_parameter('mismatch'), 0);
100         is($result->get_parameter('gapopen'), 11);
101         is($result->get_parameter('gapext'), 1);
102         is($result->get_parameter('filter'), 'F');
103         
104         # check statistics
105         is($result->available_statistics, 5);
106         is($result->database_entries, 0);
107         is($result->database_letters, 0);
108         is($result->get_statistic('hsplength'), 0);
109         float_is($result->get_statistic('effectivespace'), 3.82682e+07);
110         is($result->get_statistic('lambda'), 0.267);
111         is($result->get_statistic('kappa'), 0.041);
112         is($result->get_statistic('entropy'), 0.14);
113         
114         $hit = $result->next_hit;
115         is($hit->name, 'gnl|Pfam|pfam00742');
116         is($hit->description(), 'HomoS_dh, HomoS dehydrogenase');
117         is($hit->accession, 'pfam00742');
118         is($hit->ncbi_gi, ''); # not found
119         is($hit->length, 310);
120         
121         $hsp = $hit->next_hsp;
122         is($hsp->query->seq_id, $result->query_name,'query name on HSP');
123         is($hsp->query->seqdesc, $result->query_description,'query desc on HSP');
124         is($hsp->hit->seq_id, $hit->name,'hitname');
125         is($hsp->hit->seqdesc, $hit->description,'hitdesc');
126         is($hsp->pvalue, undef);
127         float_is($hsp->evalue, 1.46134e-90);
128         is($hsp->score, 838);
129         is($hsp->bits,327.405);
130         is($hsp->query->start, 498);
131         is($hsp->query->end,815);
132         is($hsp->hit->start, 3);
133         is($hsp->hit->end, 310);
134         is($hsp->query->frame,0);
135         is($hsp->hit->frame,0);
136         is(sprintf("%.2f", $hsp->percent_identity), 37.73);
137         is(sprintf("%.4f", $hsp->frac_identical('hit')), 0.3994);
138         is(sprintf("%.4f", $hsp->frac_identical('query')), 0.3868);
139         is(sprintf("%.4f",$hsp->query->frac_identical), 0.3868);
140         
141         is(sprintf("%.4f",$hsp->frac_conserved('total')),0.5245);
142         is(sprintf("%.4f",$hsp->frac_conserved('hit')),0.5552);
143         is(sprintf("%.4f",$hsp->frac_conserved('query')),0.5377);
144         # gaps should match calculated sequence indices for gaps and vice versa
145         is($hsp->gaps('total'), $hsp->seq_inds('hit', 'gaps') + $hsp->seq_inds('query', 'gaps'));
146         is($hsp->gaps('hit'), $hsp->seq_inds('hit', 'gaps'));
147         is($hsp->gaps('query'), $hsp->seq_inds('query', 'gaps'));
148         is($hsp->length('total'), 326);
149         is($hsp->query_string, 'LRVCGVANSKALLTNVHGLNLENWQEELAQAKEPF-NLGRLIRLVKEYHLLN----PVIVDCTSSQAVAD-QYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDE-GMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARET-GRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLS');
150         is($hsp->hit_string, 'GVVTGITDSREMLLSRIGLPLEIWKVALRDLEKPRKDLGKLDLTDDAFAVVDDPDIDVVVELTGGIEVARELYLDALEEGKHVVTANKALNASHGDEYLAL---AEKSGVDVLYEAAVAGGIPIIKTLRELLATGDRILKIEGIFNGTTNFILSEMDEKGLPFSDVLAEAQELGYTEADPRDDVEGIDAARKLAILARIAFGIELELDDVYVEGISPITAEDISSADEFGYTLKLLDEAMRQRVEDAESGGEVLRYPTLIPE-------------DHPLASVKGSDNAVAVEGEAYG--PLMFYGPGAGAEPTASAVVADIVRIAR');
151         is($hsp->homology_string, '  V G+ +S+ +L +  GL LE W+  L   ++P  +LG+L      + +++     V+V+ T    VA   Y D L EG HVVT NK  N S  D Y  L   AEKS    LY+  V  G+P+I+ L+ LL  GD ++K  GI +G+ ++I  ++DE G+ FS+    A+E+GYTE DPRDD+ G+D ARKL ILAR   G ELEL D+ +E + P           F   L  LD+    RV  A   G+VLRY   I E             + PL  VK  +NA+A     Y   PL+  G GAG + TA+ V AD++R   ');
152         is(join(' ', $hsp->seq_inds('query', 'gap',1)), '532 548 562 649 690');
153         is($hsp->ambiguous_seq_inds, '');
154         
155         # one more 
156         $hit = $result->next_hit;
157         isa_ok($hit,'Bio::Search::Hit::HitI');
158         
159         my $results_left = 8;
160         while( $result = $searchio->next_result ) { ok($result); $results_left--; }
161         is($results_left, 0);
162         
163         $searchio = Bio::SearchIO->new(-format => 'blastxml',
164                                                                   -verbose => -1,
165                                   -file => test_input_file('plague_yeast.bls.xml'));
166         
167         $result = $searchio->next_result;
168         is($searchio->result_count, 1);
169         is($result->database_name, 'yeast.aa');
170         is($result->query_name, 'gi|5763811|emb|CAB53164.1|');
171         is($result->query_description,  'putative transposase [Yersinia pestis]');
172         is($result->query_accession, 'CAB53164.1');
173         is($result->query_gi, 5763811);  
174         is($result->query_length, 340);
175         
176         $hit = $result->next_hit;
177         ok(! $hit);
178         
179         $searchio = Bio::SearchIO->new(-format => 'blastxml',
180                                                                   -verbose => -1,
181                                   -file => test_input_file('mus.bls.xml'));
182         
183         $result = $searchio->next_result;
184         is($searchio->result_count, 1);
185         is($result->database_name,'Hs15_up1000');
186         is($result->query_name,'NM_011441_up_1000_chr1_4505586_r');
187         is($result->query_description,'chr1:4505586-4506585');
188         is($result->query_accession,'NM_011441_up_1000_chr1_4505586_r');
189         is($result->query_gi, '');
190         is($result->query_length,'1000');
191         
192         # check parameters
193         is($result->available_parameters, 6);
194         is($result->get_parameter('matrix'), undef); # not set
195         float_is($result->get_parameter('expect'), 10);
196         is($result->get_parameter('include'), undef); # not set
197         is($result->get_parameter('match'), 1);
198         is($result->get_parameter('mismatch'), -3);
199         is($result->get_parameter('gapopen'), 5);
200         is($result->get_parameter('gapext'), 2);
201         is($result->get_parameter('filter'), 'D');
202         
203         # check statistics
204         is($result->available_statistics, 5);
205         is($result->database_entries, 17516);
206         is($result->database_letters, 17516000);
207         is($result->get_statistic('hsplength'), 0);
208         float_is($result->get_statistic('effectivespace'), 1.69255e+10);
209         is($result->get_statistic('lambda'), 1.37407);
210         is($result->get_statistic('kappa'), 0.710605);
211         is($result->get_statistic('entropy'), 1.30725);
212         
213         $hit = $result->next_hit;
214         is($hit->name,'NM_001938_up_1000_chr1_93161154_f');
215         is($hit->description,'chr1:93161154-93162153');
216         is($hit->ncbi_gi, ''); # none reported
217         is($hit->accession,'3153');
218         is($hit->length,'1000');
219         
220         # deal with new BLAST XML changes
221         $searchio = Bio::SearchIO->new(-format => 'blastxml',
222                                                                   -verbose => -1,
223                                   -file => test_input_file('newblast.xml'));
224         
225         $result = $searchio->next_result;
226         is($searchio->result_count, 1);
227         is($result->database_name,'nr');
228         is($result->algorithm,'BLASTP');
229         is($result->algorithm_version,'BLASTP 2.2.15 [Oct-15-2006]');
230         # some XML::SAX parsers (PurePerl, XML::SAX::LibXML) don't decode entities
231         # properly, not fixable using decode_entities()
232         like($result->algorithm_reference, qr{Nucleic Acids Res} ); 
233         is($result->query_name,'gi|15600734|ref|NP_254228.1|');
234         is($result->query_description,'dihydroorotase [Pseudomonas aeruginosa PAO1]');
235         is($result->query_accession,'NP_254228.1');
236         is($result->query_gi, 15600734);
237         is($result->query_length,'445');        
239         # check parameters
240         is($result->available_parameters, 4);
241         is($result->get_parameter('matrix'), 'BLOSUM62'); 
242         float_is($result->get_parameter('expect'), 10);
243         is($result->get_parameter('include'), undef); # not set
244         is($result->get_parameter('match'), undef);   # not set
245         is($result->get_parameter('mismatch'), undef);# not set
246         is($result->get_parameter('gapopen'), 11);
247         is($result->get_parameter('gapext'), 1);
248         is($result->get_parameter('filter'), undef);  # not set
249         
250         # check statistics
251         is($result->available_statistics, 5);
252         is($result->database_entries, 4299737);
253         is($result->database_letters, 1479795817);
254         is($result->get_statistic('hsplength'), 0);
255         float_is($result->get_statistic('effectivespace'), 0);
256         is($result->get_statistic('lambda'), 0.267);
257         is($result->get_statistic('kappa'), 0.041);
258         is($result->get_statistic('entropy'), 0.14);
259         
260         $hit = $result->next_hit;
261         is($hit->name,'gi|15600734|ref|NP_254228.1|');
262         is($hit->description,'dihydroorotase [Pseudomonas aeruginosa PAO1] '.
263            '>gi|107104643|ref|ZP_01368561.1| hypothetical protein PaerPA_01005722 '.
264            '[Pseudomonas aeruginosa PACS2] >gi|9951880|gb|AAG08926.1|AE004966_8 '.
265            'dihydroorotase [Pseudomonas aeruginosa PAO1]');
266         is($hit->accession,'NP_254228');
267         is($hit->length,'445');
268         $hsp = $hit->next_hsp;
269         is($hsp->query->seq_id, $result->query_name,'query name on HSP');
270         is($hsp->query->seqdesc, $result->query_description,'query desc on HSP');
271         is($hsp->hit->seq_id, $hit->name,'hitname');
272         is($hsp->hit->seqdesc, $hit->description,'hitdesc');
273         is($hsp->pvalue, undef);
274         float_is($hsp->evalue, 0);
275         is($hsp->score, 2251);
276         is($hsp->bits,871.692);
277         is($hsp->query->start, 1);
278         is($hsp->query->end,445);
279         is($hsp->hit->start, 1);
280         is($hsp->hit->end, 445);
281         is($hsp->query->frame,0);
282         is($hsp->hit->frame,0);
283         
284         $result = $searchio->next_result;
285         is($searchio->result_count, 2);
286         is($result->database_name,'nr'); 
287         is($result->algorithm,'BLASTP');
288         is($result->algorithm_version,'BLASTP 2.2.15 [Oct-15-2006]'); 
289         like($result->algorithm_reference, qr{Nucleic Acids Res} );
290         is($result->query_name,'gi|15598723|ref|NP_252217.1|');
291         is($result->query_description,'dihydroorotase [Pseudomonas aeruginosa PAO1]');
292         is($result->query_accession,'NP_252217.1');
293         is($result->query_gi, 15598723);
294         is($result->query_length,'348');
295         
296         # check parameters
297         is($result->available_parameters, 4);
298         is($result->get_parameter('matrix'), 'BLOSUM62'); 
299         float_is($result->get_parameter('expect'), 10);
300         is($result->get_parameter('include'), undef); # not set
301         is($result->get_parameter('match'), undef);   # not set
302         is($result->get_parameter('mismatch'), undef);# not set
303         is($result->get_parameter('gapopen'), 11);
304         is($result->get_parameter('gapext'), 1);
305         is($result->get_parameter('filter'), undef);  # not set
306         
307         # check statistics
308         is($result->available_statistics, 5);
309         is($result->database_entries, 4299737);
310         is($result->database_letters, 1479795817);
311         is($result->get_statistic('hsplength'), 0);
312         float_is($result->get_statistic('effectivespace'), 0);
313         is($result->get_statistic('lambda'), 0.267);
314         is($result->get_statistic('kappa'), 0.041);
315         is($result->get_statistic('entropy'), 0.14);
317         $hit = $result->next_hit;
318         is($hit->name,'gi|15598723|ref|NP_252217.1|');
319         is($hit->description,'dihydroorotase [Pseudomonas aeruginosa PAO1] '.
320            '>gi|6226683|sp|P72170|PYRC_PSEAE Dihydroorotase (DHOase) '.
321            '>gi|9949676|gb|AAG06915.1|AE004773_4 dihydroorotase [Pseudomonas aeruginosa PAO1] '.
322            '>gi|3868712|gb|AAC73109.1| dihydroorotase [Pseudomonas aeruginosa]');
323         is($hit->ncbi_gi, 15598723);
324         is($hit->accession,'NP_252217');
325         is($hit->length,'348');
326         $hsp = $hit->next_hsp;
327         is($hsp->query->seq_id, $result->query_name,'query name on HSP');
328         is($hsp->query->seqdesc, $result->query_description,'query desc on HSP');
329         is($hsp->hit->seq_id, $hit->name,'hitname');
330         is($hsp->hit->seqdesc, $hit->description,'hitdesc');
331         is($hsp->pvalue, undef);
332         float_is($hsp->evalue, 0);
333         is($hsp->score, 1780);
334         is($hsp->bits,690.263);
335         is($hsp->query->start, 1);
336         is($hsp->query->end,348);
337         is($hsp->hit->start, 1);
338         is($hsp->hit->end, 348);
339         is($hsp->query->frame,0);
340         is($hsp->hit->frame,0);
341         
342         # PSIBLAST XML parsing 
343         
344         $searchio = Bio::SearchIO->new('-tempfile' => 1,
345                    '-format' => 'blastxml',
346                    '-file'   => test_input_file('psiblast.xml'),
347                    '-blasttype' => 'psiblast');
348         
349         $result = $searchio->next_result;
350         is($searchio->result_count, 1);    
351         is($result->database_name, 'AL591824.faa');
352         is($result->algorithm, 'BLASTP');
353         like($result->algorithm_version, qr/2\.2\.16/);
354         is($result->query_name, 'gi|1373160|gb|AAB57770.1|');
355         is($result->query_accession, 'AAB57770.1');
356         is($result->query_gi, '1373160');
357         is($result->query_length, 173);
358         
359         # check parameters
360         is($result->available_parameters, 6);
361         is($result->get_parameter('matrix'), 'BLOSUM62'); 
362         float_is($result->get_parameter('expect'), 10);
363         is($result->get_parameter('include'), 0.002); 
364         is($result->get_parameter('match'), undef);   # not set
365         is($result->get_parameter('mismatch'), undef);# not set
366         is($result->get_parameter('gapopen'), 11);
367         is($result->get_parameter('gapext'), 1);
368         is($result->get_parameter('filter'), 'F');  
369         
370         # check statistics
371         is($result->available_statistics, 5);
372         is($result->database_entries, 2846);
373         is($result->database_letters, 870878);
374         is($result->get_statistic('hsplength'), 75);
375         float_is($result->get_statistic('effectivespace'), 6.44279e+07);
376         is($result->get_statistic('lambda'), 0.267);
377         is($result->get_statistic('kappa'), 0.0475563);
378         is($result->get_statistic('entropy'), 0.14);
379         
380         my $iter_count = 0;
381         my @valid_hit_data = ( [ 'gi|16411294|emb|CAC99918.1|', 183, 'CAC99918', 16411294, '4.5377e-56', 209.92],
382                                    [ 'gi|16409584|emb|CAD00746.1|', 648, 'CAD00746', 16409584, '0.000286309', 37.7354],
383                                    [ 'gi|16411285|emb|CAC99909.1|', 209, 'CAC99909', 16411285, '0.107059', 29.261]);
384         my @valid_iter_data = ( [ 16, 16, 0, 2, 14, 0, 0, 0, 0],
385                                 [ 16, 8, 8, 0, 8, 0, 2, 0, 6]);
386         
387         while (my $iter = $result->next_iteration) {
388                 $iter_count++;
389                 my $di = shift @valid_iter_data;
390                 is($iter->number, $iter_count);
391                 is($iter->num_hits, shift @$di);
392                 is($iter->num_hits_new, shift @$di);
393                 is($iter->num_hits_old, shift @$di);
394                 is(scalar($iter->newhits_below_threshold), shift @$di);
395                 is(scalar($iter->newhits_not_below_threshold), shift @$di);
396                 is(scalar($iter->newhits_unclassified), shift @$di);
397                 is(scalar($iter->oldhits_below_threshold), shift @$di);
398                 is(scalar($iter->oldhits_newly_below_threshold), shift @$di);
399                 is(scalar($iter->oldhits_not_below_threshold), shift @$di);
400                 my $hit_count = 0;
401                 if ($iter_count == 1) {
402                         while( my $hit = $result->next_hit ) {
403                                 my $d = shift @valid_hit_data;
404                                 is($hit->name, shift @$d);
405                                 is($hit->length, shift @$d);
406                                 is($hit->accession, shift @$d);
407                                 is($hit->ncbi_gi, shift @$d);
408                                 float_is($hit->significance, shift @$d);
409                                 is($hit->bits, shift @$d );
410                                 if( $hit_count == 1 ) {
411                                         my $hsps_left = 1;
412                                         while( my $hsp = $hit->next_hsp ){
413                                                 is($hsp->query->start, 4);
414                                                 is($hsp->query->end, 155);
415                                                 is($hsp->hit->start, 475);
416                                                 is($hsp->hit->end, 617);
417                                                 is($hsp->length('total'), 153);
418                                                 is($hsp->start('hit'), $hsp->hit->start);
419                                                 is($hsp->end('query'), $hsp->query->end);
420                                                 is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
421                                                 float_is($hsp->evalue, 0.000286309);
422                                                 is($hsp->score, 86);
423                                                 is($hsp->bits, 37.7354);
424                                                 is(sprintf("%.1f",$hsp->percent_identity), 20.9);
425                                                 is(sprintf("%.4f",$hsp->frac_identical('query')), 0.2105);
426                                                 is(sprintf("%.3f",$hsp->frac_identical('hit')), 0.224);
427                                                 is($hsp->gaps('total'), 11);
428                                                 $hsps_left--;
429                                         }
430                                         is($hsps_left, 0);
431                                 }
432                                 last if( $hit_count++ > @valid_hit_data );
433                         }
434                 }
435         }
436         is(@valid_hit_data, 0);
437         is(@valid_iter_data, 0);
438         is($iter_count, 2);
439         
440         $result = $searchio->next_result;
441         is($searchio->result_count, 2);    
442         is($result->database_name, 'AL591824.faa');
443         is($result->algorithm, 'BLASTP');
444         like($result->algorithm_version, qr/2\.2\.16/);
445         is($result->query_name, 'gi|154350371|gb|ABS72450.1|');
446         is($result->query_accession, 'ABS72450.1');
447         is($result->query_gi, '154350371');
448         is($result->query_length, 378);
450         # check parameters
451         is($result->available_parameters, 6);
452         is($result->get_parameter('matrix'), 'BLOSUM62'); 
453         float_is($result->get_parameter('expect'), 10);
454         is($result->get_parameter('include'), 0.002); 
455         is($result->get_parameter('match'), undef);   # not set
456         is($result->get_parameter('mismatch'), undef);# not set
457         is($result->get_parameter('gapopen'), 11);
458         is($result->get_parameter('gapext'), 1);
459         is($result->get_parameter('filter'), 'F');  
460         
461         # check statistics
462         is($result->available_statistics, 5);
463         is($result->database_entries, 2846);
464         is($result->database_letters, 870878);
465         is($result->get_statistic('hsplength'), 82);
466         float_is($result->get_statistic('effectivespace'), 1.88702e+08);
467         is($result->get_statistic('lambda'), 0.267);
468         is($result->get_statistic('kappa'), 0.0450367);
469         is($result->get_statistic('entropy'), 0.14);
470         
471         $iter_count = 0;
472         
473         @valid_hit_data = ( [ 'gi|16409361|emb|CAC98217.1|', 381, 'CAC98217', 16409361, '5.57178e-119', 420.239],
474                                    [ 'gi|16409959|emb|CAC98662.1|', 776, 'CAC98662', 16409959, '0.0242028', 32.7278],
475                                    [ 'gi|16410942|emb|CAC99591.1|', 382, 'CAC99591', 16410942, '0.340848', 28.8758]);
476         @valid_iter_data = ( [ 11, 11, 0, 1, 10, 0, 0, 0, 0],
477                                 [ 19, 11, 8, 0, 11, 0, 1, 0, 7]);
478         
479         while (my $iter = $result->next_iteration) {
480                 $iter_count++;
481                 my $di = shift @valid_iter_data;
482                 is($iter->number, $iter_count);
483                 is($iter->num_hits, shift @$di);
484                 is($iter->num_hits_new, shift @$di);
485                 is($iter->num_hits_old, shift @$di);
486                 is(scalar($iter->newhits_below_threshold), shift @$di);
487                 is(scalar($iter->newhits_not_below_threshold), shift @$di);
488                 is(scalar($iter->newhits_unclassified), shift @$di);
489                 is(scalar($iter->oldhits_below_threshold), shift @$di);
490                 is(scalar($iter->oldhits_newly_below_threshold), shift @$di);
491                 is(scalar($iter->oldhits_not_below_threshold), shift @$di);
492                 my $hit_count = 0;
493                 if ($iter_count == 1) {
494                         while( my $hit = $result->next_hit ) {
495                                 my $d = shift @valid_hit_data;
496                                 is($hit->name, shift @$d);
497                                 is($hit->length, shift @$d);
498                                 is($hit->accession, shift @$d);
499                                 is($hit->ncbi_gi, shift @$d);
500                                 float_is($hit->significance, shift @$d);
501                                 is($hit->bits, shift @$d );
502                                 if( $hit_count == 1 ) {
503                                         my $hsps_left = 1;
504                                         while( my $hsp = $hit->next_hsp ){
505                                                 is($hsp->query->start, 63);
506                                                 is($hsp->query->end, 181);
507                                                 is($hsp->hit->start, 304);
508                                                 is($hsp->hit->end, 432);
509                                                 is($hsp->length('total'), 129);
510                                                 is($hsp->start('hit'), $hsp->hit->start);
511                                                 is($hsp->end('query'), $hsp->query->end);
512                                                 is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit
513                                                 float_is($hsp->evalue, 0.0242028);
514                                                 is($hsp->score, 73);
515                                                 is($hsp->bits, 32.7278);
516                                                 is(sprintf("%.1f",$hsp->percent_identity), '24.0');
517                                                 is(sprintf("%.4f",$hsp->frac_identical('query')), '0.2605');
518                                                 is(sprintf("%.3f",$hsp->frac_identical('hit')), '0.240');
519                                                 is($hsp->gaps, 10);
520                                                 $hsps_left--;
521                                         }
522                                         is($hsps_left, 0);
523                                 }
524                                 last if( $hit_count++ > @valid_hit_data );
525                         }
526                 }
527         }
528         is(@valid_hit_data, 0);
529         is(@valid_iter_data, 0);
530         is($iter_count, 2);