Fix bug 253 testing for defined
[bioperl-live.git] / t / SearchIO / hmmer_pull.t
blob00b9f7d4d6188d9d1042542638186b16be4a323b
1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id: SearchIO_hmmer_pull.t 14984 2008-11-11 18:39:20Z sendu $
4 use strict;
6 BEGIN {     
7     use lib '.';
8     use Bio::Root::Test;
9     
10     test_begin(-tests => 290);
11         
12         use_ok('Bio::SearchIO');
15 my $searchio = Bio::SearchIO->new(-format => 'hmmer_pull', -file => test_input_file('hmmpfam_fake.out'), -verbose => -1);
16 my @data = ([qw(roa1_drome roa2_drome)], [2, 1], [1, 2], [2, 1]);
17 while (my $result = $searchio->next_result) {
18     is ref($result), 'Bio::Search::Result::HmmpfamResult';
19     is $result->algorithm, 'HMMPFAM';
20     is $result->algorithm_version, '2.1.1';
21     is $result->hmm_name, 'pfam';
22     is $result->hmm_file, $result->hmm_name;
23     is $result->database_name, $result->hmm_name;
24     is $result->sequence_file, '/home/birney/src/wise2/example/road.pep';
25     is $result->sequence_database, $result->sequence_file;
26     is $result->query_name, shift @{$data[0]};
27     is $result->num_hits(), shift @{$data[1]};
28     is $result->no_hits_found, 0;
29     
30         is $result->query_accession, '';
31     is $result->query_description, '';
32     ok ! $result->query_length;
33     ok ! $result->database_letters;
34     ok ! $result->database_entries;
35     is $result->algorithm_reference, '';
36     is $result->get_parameter('test'), undef;
37     is $result->available_parameters, undef;
38     is $result->get_statistic('test'), undef;
39     is $result->available_statistics, undef;
40     
41     my @orig_order = $result->hits;
42     is @orig_order, shift @{$data[3]};
43         if (@orig_order > 1) {
44                 isnt $orig_order[0]->name, $orig_order[1]->name;
45                 $result->sort_hits(sub{$Bio::Search::Result::HmmpfamResult::a->[2]
46                                                                                                         <=> 
47                                                            $Bio::Search::Result::HmmpfamResult::b->[2]});
48                 my @hits = $result->hits;
49                 is @hits, @orig_order;
50                 is $hits[0]->name, $orig_order[1]->name;
51                 $result->sort_hits(sub{$Bio::Search::Result::HmmpfamResult::b->[4]
52                                                                                                         <=> 
53                                                            $Bio::Search::Result::HmmpfamResult::a->[4]});
54         }
55     
56     my @hit_data = ([qw(SEED TEST)], [146.1, "5.0"], [6.3e-40, 7.2], [2, 1], [77, undef], [2, 0], [1, 2],
57                     ["33 34 36 38 43 45 47 48 51 53 55 57 58 65 68 71 73 74 76 88 98 99 124 125 126 127 129 132 135 140 142 145 146 148 149 151 153 154 156 157 158 159 160 161 164 165 166 167 168 169 170 178 187 189 194", ''],
58                     ["1 2 3 4 6 9 11 12 13 15 16 17 19 21 22 23 25 26 28 30 31 33 39 40 41 42 43 44 46 47 48 49 50 51 52 60 61 70 72 73 77", ''],
59                     ["1-6 8-13 15-23 25-33 39-56 58-63 67-77", '']);
60     while (defined(my $hit = $result->next_model)) {
61         is ref($hit), 'Bio::Search::Hit::HmmpfamHit';
62         is $hit->name, shift @{$hit_data[0]};
63         is $hit->raw_score, shift @{$hit_data[1]};
64         is $hit->score, $hit->raw_score;
65         float_is $hit->significance, shift @{$hit_data[2]};
66         float_is $hit->p, $hit->significance;
67         is $hit->num_hsps, shift @{$hit_data[3]};
68         is $hit->n, $hit->num_hsps;
69         is $hit->algorithm, $result->algorithm;
70         is $hit->overlap, 0;
71         is $hit->rank, shift @{$hit_data[6]};
72         is $hit->tiled_hsps, 0;
73         is $hit->strand('query'), 1;
74         is $hit->strand('hit'), 1;
75         my @strands = $hit->strand;
76         is "@strands", "1 1";
77         
78         is $hit->description, undef;
79         is $hit->accession, undef;
80         ok ! $hit->locus;
81         ok ! $hit->bits;
82         ok ! $result->logical_length('query');
83         ok ! $result->frame;
84         is $hit->each_accession_number, undef;
85         
86         is $hit->length, shift @{$hit_data[4]};
87         is $hit->logical_length('hit'), $hit->length;
88                 
89                 if ($result->query_name eq 'roa1_drome') {
90                         my @inds = $hit->seq_inds('query', 'identical');
91                         is "@inds", shift @{$hit_data[7]};
92                         @inds = $hit->seq_inds('hit', 'identical');
93                         is "@inds", shift @{$hit_data[8]};
94                         @inds = $hit->seq_inds('hit', 'conserved', 1);
95                         is "@inds", shift @{$hit_data[9]};
96                 }
97                 
98                 if ($hit->name eq 'SEED') {
99                         my $best = $hit->hsp('best');
100                         float_is($best->evalue, 1.1e-18);
101                         my $worst = $hit->hsp('worst');
102                         float_is($worst->evalue, 2.2e-17);
103                         is $hit->start('query'), 33;
104                         is $hit->start('hit'), 1;
105                         is $hit->end('query'), 194;
106                         is $hit->end('hit'), 77;
107                         my @range = $hit->range('query');
108                         is "@range", '33 194';
109                         @range = $hit->range('hit');
110                         is "@range", '1 77';
111                         
112                         if ($hit->query_name eq 'roa1_drome') {
113                                 is $hit->length_aln('query'),142;
114                                 is $hit->length_aln('hit'), 77;
115                                 is $hit->gaps('total'), 14;
116                                 is $hit->gaps('query'), 13;
117                                 is $hit->gaps('hit'), 1;
118                                 is $hit->matches('id'), 41;
119                                 is $hit->matches('cons'), 24;
120                                 is $hit->frac_identical, 0.387;
121                                 is $hit->frac_conserved, 0.169;
122                                 ok ! $hit->frac_aligned_query;
123                                 is $hit->frac_aligned_hit, '1.00';
124                                 is $hit->num_unaligned_hit, 1;
125                                 is $hit->num_unaligned_query, 13;
126                         }
127                 }
128         
129         my @hsps = $hit->hsps;
130         is @hsps, shift @{$hit_data[5]};
131         
132         my @hsp_data = ([1, 1], [77, 77], [33, 124], [103, 194], [71.2, 75.5], [2.2e-17, 1.1e-18],
133                         ['LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP',
134                          'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV'],
135                                                 [7, 6],
136                                                 ['lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG.kelggrklrv',
137                          'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv'],
138                         ['lf+g+L + +t+e Lk++F+k G iv++ +++D     + t++s+Gf+F+++  ++  + A +    +++++gr+++ ',
139                          'lfVg L  d +e+ ++d+F++fG iv+i+iv+D     ketgk +GfaFVeF++++ ++k +     ++l+g+ + v'],
140                                                 [1, 0], [8, 6], [1, 2], ['33 103', '124 194'], [78, 77], [22, 33], [33, 23],
141                                                 ['0.3099', '0.4648'], ['0.2857', '0.4286'], ['0.2821', '0.4286']);
142         
143         while (defined(my $hsp = $hit->next_domain)) {
144             is ref($hsp), 'Bio::Search::HSP::HmmpfamHSP';
145             is $hsp->hit->start, shift @{$hsp_data[0]};
146             is $hsp->hit->end, shift @{$hsp_data[1]};
147             is $hsp->query->start, shift @{$hsp_data[2]};
148             is $hsp->query->end, shift @{$hsp_data[3]};
149                         is $hsp->start('hit'), $hsp->hit->start;
150                         is $hsp->end('hit'),$hsp->hit->end;
151                         is $hsp->start('query'), $hsp->query->start;
152                         is $hsp->end('query'), $hsp->query->end;
153                         is $hsp->strand('hit'), 1;
154                         is $hsp->strand('query'), 1;
155             is $hsp->score, shift @{$hsp_data[4]};
156                         ok ! $hsp->bits;
157             float_is($hsp->evalue, shift @{$hsp_data[5]});
158                         ok ! $hsp->pvalue;
159                         float_is($hsp->significance, $hsp->evalue);
160                         is $hsp->algorithm, $result->algorithm;
161                         is $hsp->rank, shift @{$hsp_data[12]};
162                         my @range = $hsp->range;
163                         is "@range", shift @{$hsp_data[13]};
164                         is $hsp->n, $hit->num_hsps;
165                         is $hsp->length('query'), 71;
166                         is $hsp->length('hit'), 77;
167                         my $locseq = $hsp->seq('hit');
168                         
169                         if ($result->query_name eq 'roa1_drome') {
170                                 is ref($locseq), 'Bio::LocatableSeq';
171                                 my $aln = $hsp->get_aln('hit');
172                                 is ref($aln), 'Bio::SimpleAlign';
173                                 is $hsp->query_string, shift @{$hsp_data[6]};
174                                 is $hsp->gaps('query'), shift @{$hsp_data[7]};
175                                 is $hsp->gaps('hit'), shift @{$hsp_data[10]};
176                                 is $hsp->gaps('total'), shift @{$hsp_data[11]};
177                                 is $hsp->hit_string, shift @{$hsp_data[8]};
178                                 is $hsp->homology_string, shift @{$hsp_data[9]};
179                                 is $hsp->seq_str('hit'), $hsp->hit_string;
180                                 is $hsp->seq_str('query'), $hsp->query_string;
181                                 is $hsp->seq_str('homology'), $hsp->homology_string;
182                                 is length($hsp->homology_string), length($hsp->hit_string);
183                                 is length($hsp->query_string), length($hsp->homology_string);
184                                 is $hsp->length('total'), shift @{$hsp_data[14]};
185                                 is $hsp->hsp_length, $hsp->length('total');
186                                 is $hsp->num_identical, shift @{$hsp_data[15]};
187                                 is $hsp->num_conserved, shift @{$hsp_data[16]};
188                                 is $hsp->frac_identical('query'), shift @{$hsp_data[17]};
189                                 is $hsp->frac_identical('hit'), shift @{$hsp_data[18]};
190                                 is $hsp->frac_identical('total'), shift @{$hsp_data[19]};
191                         }
192         }
193     }
196 is $searchio->result_count, 2;
198 # bug revealed by bug 2632 - CS lines were already ignored, but we couldn't
199 # parse alignments when HSPs weren't in simple order!!
200 $searchio = Bio::SearchIO->new(-format => 'hmmer_pull', -file => test_input_file('hmmpfam_cs.out'), -verbose => 1);
201 my $result = $searchio->next_result;
202 my $hit = $result->next_hit;
203 my $hsp = $hit->next_hsp;
204 is $hsp->seq_str, "IPPLLAVGAVHHHLINKGLRQEASILV";
206 # and another bug revealed: we don't always know the hit length, and
207 # shouldn't complain about that with a warning
208 is $hsp->hit->seqlength, 412;
210 my $count = 0;
211 while (my $hit = $result->next_hit) {
212     $count++;
213     next if $count < 6;
214     last if $count > 6;
215         my $hsp = $hit->next_hsp;
216     ok ! $hsp->hit->seqlength;
217     #*** not sure how to test for the lack of a warning though...
218         # Maybe run an eval with verbose set to 2, then make sure $@ is undef? --cjfields