New INSTALL.WIN doc (from wiki)
[bioperl-live.git] / t / hmmer.t
blob8f2922212e4b67f6fbfd7367ca9f6c3858c852d7
1 # -*-Perl-*-
2 ## Bioperl Test Harness Script for Modules
3 ##
4 # $Id$
6 # Before `make install' is performed this script should be runnable with
7 # `make test'. After `make install' it should work as `perl test.t'
9 use strict;
10 BEGIN {     
11     # to handle systems with no installed Test module
12     # we include the t dir (where a copy of Test.pm is located)
13     # as a fallback
14     eval { require Test; };
15     if( $@ ) {
16         use lib 't';
17     }
18     use Test;
19     plan test => 136;
22 use Bio::SearchIO;
23 use Bio::Tools::HMMER::Domain;
24 use Bio::Tools::HMMER::Set;
25 use Bio::Tools::HMMER::Results;
26 use Bio::Root::IO;
29 my $searchio = new Bio::SearchIO(-format => 'hmmer',
30                                  -file   => Bio::Root::IO->catfile
31                                  ("t","data","hmmpfam.out"));
33 while( my $result = $searchio->next_result ) {
34     ok(ref($result),'Bio::Search::Result::HMMERResult');
35     ok($result->algorithm, 'HMMPFAM');
36     ok($result->algorithm_version, '2.1.1');
37     ok($result->hmm_name, 'pfam');
38     ok($result->sequence_file, '/home/birney/src/wise2/example/road.pep');
39     ok($result->query_name, 'roa1_drome');
40     ok($result->query_description, '');
41     ok($result->num_hits(), 2);
42     my ($hsp,$hit);
43     if( $hit = $result->next_model ) {
44         ok($hit->name, 'SEED');
45         ok($hit->raw_score, '146.1');
46         ok($hit->significance, '6.3e-40');
47         ok(ref($hit), 'Bio::Search::Hit::HMMERHit');
48         ok($hit->num_hsps, 1);
50         if( defined( $hsp = $hit->next_domain ) ) {
51             ok($hsp->hit->start, 1);
52             ok($hsp->hit->end, 77);
53             ok($hsp->query->start, 33);
54             ok($hsp->query->end, 103);
55             ok($hsp->score, 71.2);
56             ok($hsp->evalue, '2.2e-17');
57             ok($hsp->query_string, 'LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP');
58             ok($hsp->gaps('query'), 7);
59             ok($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG.kelggrklrv');
60             ok($hsp->homology_string, 'lf+g+L + +t+e Lk++F+k G iv++ +++D     + t++s+Gf+F+++  ++  + A +    +++++gr+++ ');
61             ok( length($hsp->homology_string), length($hsp->hit_string));
62             ok( length($hsp->query_string), length($hsp->homology_string));
63         }
64     }
65     if( defined ($hit = $result->next_model) ) {
66         if( defined($hsp = $hit->next_domain) ) {
67             ok($hsp->hit->start, 1);
68             ok($hsp->hit->end, 77);
69             ok($hsp->query->start, 124);
70             ok($hsp->query->end, 194);
71             ok($hsp->score, 75.5);
72             ok($hsp->evalue, '1.1e-18');
73             ok($hsp->query_string, 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV');
74             ok($hsp->gaps('query'), 6);
75             ok($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv');       
76             ok($hsp->homology_string, 'lfVg L  d +e+ ++d+F++fG iv+i+iv+D     ketgk +GfaFVeF++++ ++k +     ++l+g+ + v');
77             ok( length($hsp->homology_string), length($hsp->hit_string));
78             ok( length($hsp->query_string), length($hsp->homology_string));
79         }
80         last;
81     }
83 $searchio = new Bio::SearchIO(-format => 'hmmer',
84                               -file   => Bio::Root::IO->catfile
85                               ("t","data","hmmsearch.out"));
86 while( my $result = $searchio->next_result ) {
87     ok(ref($result),'Bio::Search::Result::HMMERResult');
88     ok($result->algorithm, 'HMMSEARCH');
89     ok($result->algorithm_version, '2.0');
90     ok($result->hmm_name, 'HMM [SEED]');
91     ok($result->sequence_file, 'HMM.dbtemp.29591');
92     ok($result->database_name, 'HMM.dbtemp.29591');
93     ok($result->query_name, 'SEED');
94     ok($result->query_description, '');
95     ok($result->num_hits(), 1215);
96     my $hit = $result->next_model;
97     ok($hit->name, 'Q91581');
98     ok($hit->description,'Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN');
99     ok($hit->significance, '2e-31');
100     ok($hit->raw_score, 119.7);
101     my $hsp = $hit->next_domain;
102     ok($hsp->score,119.7);
103     ok($hsp->evalue, '2e-31');
104     ok($hsp->query->start, 18);
105     ok($hsp->query->end, 89);
106     ok($hsp->hit->start, 1);
107     ok($hsp->hit->end, 77);
108     ok($hsp->query->seq_id(), 'SEED');
109     ok($hsp->hit->seq_id(), 'Q91581');   
112 $searchio = new Bio::SearchIO(-format => 'hmmer',
113                               -file   => Bio::Root::IO->catfile("t","data",
114                                                                 "L77119.hmmer"));
116 while( my $result = $searchio->next_result ) {
117     ok(ref($result),'Bio::Search::Result::HMMERResult');
118     ok($result->algorithm, 'HMMPFAM');
119     ok($result->algorithm_version, '2.2g');
120     ok($result->hmm_name, 'Pfam');
121     ok($result->sequence_file, 'L77119.faa');
122     ok($result->query_name, 'gi|1522636|gb|AAC37060.1|');
123     ok($result->query_description, 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]');
124     ok($result->num_hits(), 1);
125     my $hit = $result->next_hit;
126     ok($hit->name, 'Methylase_M');
127     ok($hit->description,'Type I restriction modification system, M');
128     ok($hit->significance, '0.0022');
129     ok($hit->raw_score, -105.2);
130     my $hsp = $hit->next_hsp;
131     ok($hsp->score,-105.2);
132     ok($hsp->evalue, '0.0022');
133     ok($hsp->query->start, 280);
134     ok($hsp->query->end, 481);
135     ok($hsp->hit->start, 1);
136     ok($hsp->hit->end, 279);
137     ok($hsp->query->seq_id(), 'gi|1522636|gb|AAC37060.1|');
138     ok($hsp->hit->seq_id(), 'Methylase_M');
139     ok($hsp->hit_string, 'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG.dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn');
140     ok($hsp->query_string, 'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST');
141     ok($hsp->homology_string, ' ++EL+++  av+   R              L+F K++ dk      +i+         p +   + +++y   ++   ++ ++y ++      + lF++++   e ++  ++++ + +    ++      + +       Glf ++++  ++ +s+   +ne ++e+i+ +++ +++     G++ +el   D++G +YE L+   Ae   K+ G +YTP e++  ia+ + i+  ++                  +++ ++    k+n+i +    s+');
142     
146 $searchio = new Bio::SearchIO(-format => 'hmmer',
147                               -file   => Bio::Root::IO->catfile("t","data",
148                                                                 "cysprot1b.hmmsearch"));
151 while( my $result = $searchio->next_result ) {
152     ok(ref($result),'Bio::Search::Result::HMMERResult');
153     ok($result->algorithm, 'HMMSEARCH');
154     ok($result->algorithm_version, '2.2g');
155     ok($result->hmm_name, 'Peptidase_C1.hmm [Peptidase_C1]');
156     ok($result->database_name, 'cysprot1b.fa');
157     ok($result->sequence_file, 'cysprot1b.fa');
158     ok($result->query_name, 'Peptidase_C1');
159     ok($result->query_accession, 'PF00112');
160     ok($result->query_description, 'Papain family cysteine protease');
161     ok($result->num_hits(), 4);
162     my $hit = $result->next_hit;
163     ok($hit->name, 'CATL_RAT');
164     ok($hit->description,'');
165     ok($hit->significance, '2e-135');
166     ok($hit->raw_score, 449.4);
167     my $hsp = $hit->next_hsp;
168     ok($hsp->score,449.4);
169     ok($hsp->evalue, '2e-135');
170     ok($hsp->query->start, 1);
171     ok($hsp->query->end, 337);
172     ok($hsp->hit->start, 114);
173     ok($hsp->hit->end, 332);
174     ok($hsp->query->seq_id(), 'Peptidase_C1');
175     ok($hsp->hit->seq_id(), 'CATL_RAT');
176     ok($hsp->hit_string, 'IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT------GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE-----NGGLDSEESY-----PYE----AKD-------------------GSCKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAMDASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGYGYEG-T------------------------------------DSNKDKYWLVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI');
177     ok($hsp->homology_string, '+P+++DWRe kg  VtpVK+QG qCGSCWAFSa g lEg+ ++kt      gkl+sLSEQ+LvDC++ d gn+      GCnG Glmd Af+Yik+     NgGl++E++Y     PY+    +kd                   g+Cky+  + ++     a+++g++d+p++     E+al+ka+a++GP+sVa+das+ s    q+Y+sG       +Y+++    C+++   +LdH+Vl+VGYG e+                                      ++++ +YW+VKNSWG++WG++GY++ia+++n    n+CG+a+ asypi');
178     ok($hsp->query_string, 'lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgtkawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikkeqIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgtCkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVaidasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGYGteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYWIVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi');
179     $hit = $result->next_hit;
180     ok($hit->name, 'CATL_HUMAN');
181     ok($hit->description,'');
182     ok($hit->significance, '6.1e-134');
183     ok($hit->raw_score, 444.5);
185                               
186 my ($domain,$set,$homol,$rev,$res,$dom,@doms);
187     $domain = Bio::Tools::HMMER::Domain->new(-verbose=>1);
189 ok ref($domain), 'Bio::Tools::HMMER::Domain';
191 $domain->start(50);
192 $domain->end(200);
193 $domain->hstart(10);
194 $domain->hend(100);
195 $domain->seqbits(50);
196 $domain->bits(20);
197 $domain->evalue(0.0001);
198 $domain->seq_id('silly');
201 # test that we can get out forward and reverse homol_SeqFeatures
202 $homol = $domain->feature2();
203 ok $homol->start(), 10;
205 $rev = $domain;
207 ok $rev->start(), 50;
209 $set = Bio::Tools::HMMER::Set->new();
210 $set->add_Domain($domain);
212 @doms = $set->each_Domain();
213 $dom = shift @doms;
215 ok $dom->start(), 50;
217 $set->bits(300);
218 $set->evalue(0.0001);
219 $set->name('sillyname');
220 $set->desc('a desc');
221 $set->accession('fakeaccesssion');
222 ok $set->bits(), 300;
223 ok $set->evalue(), 0.0001;
224 ok $set->name(), 'sillyname';
225 ok $set->desc, 'a desc';
226 ok $set->accession, 'fakeaccesssion';
228 $res = Bio::Tools::HMMER::Results->new( -file => Bio::Root::IO->catfile("t","data","hmmsearch.out") , -type => 'hmmsearch');
229 my $seen =0;
230 ok $res->hmmfile, "HMM";
231 ok $res->seqfile, "HMM.dbtemp.29591";
233 my $first = 0;
234 foreach $set ( $res->each_Set) {
235     foreach $domain ( $set->each_Domain ) {
236     #print STDERR "Got domain ",$domain->seq_id," start ",$domain->start," end ",$domain->end,"\n";
237     # do nothing for the moment
238       $seen = 1;
239   }
241 ok $seen, 1;
243 ok $res->number, 1215, "\nBad number of domains. Expecting 1215. Got" . $res->number;
245 $res = Bio::Tools::HMMER::Results->new( -file => 
246                                       Bio::Root::IO->catfile("t","data",
247                                                              "hmmpfam.out") , 
248                                         -type => 'hmmpfam');
250 ok ($res->number, 2);
252 # parse HMM 2.2 files
254 $res = Bio::Tools::HMMER::Results->new( -file => 
255                                       Bio::Root::IO->catfile("t","data",
256                                                              "L77119.hmmer"),
257                                         -type => 'hmmpfam');
258 $seen =0;
259 ok $res->hmmfile, 'Pfam';
260 ok $res->seqfile, 'L77119.faa';
261 foreach $set ( $res->each_Set) {
262     # only one set anyways
264     ok($set->name, 'gi|1522636|gb|AAC37060.1|');
265     ok($set->desc, 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]');
266     ok($set->accession, '[none]');
267     foreach $domain ( $set->each_Domain ) {
268         #print STDERR "Got domain ",$domain->seq_id," start ",$domain->start," end ",$domain->end,"\n";
269     # do nothing for the moment
270         ok($domain->start, 280);
271         ok($domain->end, 481);
272         ok($domain->bits, -105.2);
273         ok($domain->evalue, 0.0022 );
274     }
276 ok ($res->number, 1);
278 # test for bugs #(1189,1034,1172)
279 $res = Bio::Tools::HMMER::Results->new( -file => Bio::Root::IO->catfile
280                                         ("t","data","hmmsearch.out") , 
281                                         -type => 'hmmsearch');
282 my $res2 = $res->filter_on_cutoff(100,50);
283 ok($res2);
284 ok($res2->number, 604);
287 # now let's test the new Bio::SearchIO::hmmer