1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 140);
12 use_ok('Bio::SearchIO');
13 use_ok('Bio::Tools::HMMER::Domain');
14 use_ok('Bio::Tools::HMMER::Set');
15 use_ok('Bio::Tools::HMMER::Results');
18 my $searchio = Bio::SearchIO->new(-format => 'hmmer',
19 -file => test_input_file('hmmpfam.out'));
21 while( my $result = $searchio->next_result ) {
22 is(ref($result),'Bio::Search::Result::HMMERResult');
23 is($result->algorithm, 'HMMPFAM');
24 is($result->algorithm_version, '2.1.1');
25 is($result->hmm_name, 'pfam');
26 is($result->sequence_file, '/home/birney/src/wise2/example/road.pep');
27 is($result->query_name, 'roa1_drome');
28 is($result->query_description, '');
29 is($result->num_hits(), 2);
31 if( $hit = $result->next_model ) {
32 is($hit->name, 'SEED');
33 is($hit->raw_score, '146.1');
34 is($hit->significance, '6.3e-40');
35 is(ref($hit), 'Bio::Search::Hit::HMMERHit');
36 is($hit->num_hsps, 1);
38 if( defined( $hsp = $hit->next_domain ) ) {
39 is($hsp->hit->start, 1);
40 is($hsp->hit->end, 77);
41 is($hsp->query->start, 33);
42 is($hsp->query->end, 103);
43 is($hsp->score, 71.2);
44 is($hsp->evalue, '2.2e-17');
45 is($hsp->query_string, 'LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP');
46 is($hsp->gaps('query'), 7);
47 is($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG.kelggrklrv');
48 is($hsp->homology_string, 'lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf+F+++ ++ + A + +++++gr+++ ');
49 is( length($hsp->homology_string), length($hsp->hit_string));
50 is( length($hsp->query_string), length($hsp->homology_string));
53 if( defined ($hit = $result->next_model) ) {
54 if( defined($hsp = $hit->next_domain) ) {
55 is($hsp->hit->start, 1);
56 is($hsp->hit->end, 77);
57 is($hsp->query->start, 124);
58 is($hsp->query->end, 194);
59 is($hsp->score, 75.5);
60 is($hsp->evalue, '1.1e-18');
61 is($hsp->query_string, 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV');
62 is($hsp->gaps('query'), 6);
63 is($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv');
64 is($hsp->homology_string, 'lfVg L d +e+ ++d+F++fG iv+i+iv+D ketgk +GfaFVeF++++ ++k + ++l+g+ + v');
65 is( length($hsp->homology_string), length($hsp->hit_string));
66 is( length($hsp->query_string), length($hsp->homology_string));
71 $searchio = Bio::SearchIO->new(-format => 'hmmer',
72 -file => test_input_file('hmmsearch.out'));
73 while( my $result = $searchio->next_result ) {
74 is(ref($result),'Bio::Search::Result::HMMERResult');
75 is($result->algorithm, 'HMMSEARCH');
76 is($result->algorithm_version, '2.0');
77 is($result->hmm_name, 'HMM [SEED]');
78 is($result->sequence_file, 'HMM.dbtemp.29591');
79 is($result->database_name, 'HMM.dbtemp.29591');
80 is($result->query_name, 'SEED');
81 is($result->query_description, '');
82 is($result->num_hits(), 1215);
83 my $hit = $result->next_model;
84 is($hit->name, 'Q91581');
85 is($hit->description,'Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN');
86 is($hit->significance, '2e-31');
87 is($hit->raw_score, 119.7);
88 my $hsp = $hit->next_domain;
89 is($hsp->score,119.7);
90 is($hsp->evalue, '2e-31');
91 is($hsp->query->start, 18);
92 is($hsp->query->end, 89);
93 is($hsp->hit->start, 1);
94 is($hsp->hit->end, 77);
95 is($hsp->query->seq_id(), 'SEED');
96 is($hsp->hit->seq_id(), 'Q91581');
99 $searchio = Bio::SearchIO->new(-format => 'hmmer',
100 -file => test_input_file('L77119.hmmer'));
102 while( my $result = $searchio->next_result ) {
103 is(ref($result),'Bio::Search::Result::HMMERResult');
104 is($result->algorithm, 'HMMPFAM');
105 is($result->algorithm_version, '2.2g');
106 is($result->hmm_name, 'Pfam');
107 is($result->sequence_file, 'L77119.faa');
108 is($result->query_name, 'gi|1522636|gb|AAC37060.1|');
109 is($result->query_description, 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]');
110 is($result->num_hits(), 1);
111 my $hit = $result->next_hit;
112 is($hit->name, 'Methylase_M');
113 is($hit->description,'Type I restriction modification system, M');
114 is($hit->significance, '0.0022');
115 is($hit->raw_score, -105.2);
116 my $hsp = $hit->next_hsp;
117 is($hsp->score,-105.2);
118 is($hsp->evalue, '0.0022');
119 is($hsp->query->start, 280);
120 is($hsp->query->end, 481);
121 is($hsp->hit->start, 1);
122 is($hsp->hit->end, 279);
123 is($hsp->query->seq_id(), 'gi|1522636|gb|AAC37060.1|');
124 is($hsp->hit->seq_id(), 'Methylase_M');
125 is($hsp->hit_string, 'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG.dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn');
126 is($hsp->query_string, 'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST');
127 is($hsp->homology_string, ' ++EL+++ av+ R L+F K++ dk +i+ p + + +++y ++ ++ ++y ++ + lF++++ e ++ ++++ + + ++ + + Glf ++++ ++ +s+ +ne ++e+i+ +++ +++ G++ +el D++G +YE L+ Ae K+ G +YTP e++ ia+ + i+ ++ +++ ++ k+n+i + s+');
132 $searchio = Bio::SearchIO->new(-format => 'hmmer',
133 -file => test_input_file('cysprot1b.hmmsearch'));
136 while( my $result = $searchio->next_result ) {
137 is(ref($result),'Bio::Search::Result::HMMERResult');
138 is($result->algorithm, 'HMMSEARCH');
139 is($result->algorithm_version, '2.2g');
140 is($result->hmm_name, 'Peptidase_C1.hmm [Peptidase_C1]');
141 is($result->database_name, 'cysprot1b.fa');
142 is($result->sequence_file, 'cysprot1b.fa');
143 is($result->query_name, 'Peptidase_C1');
144 is($result->query_accession, 'PF00112');
145 is($result->query_description, 'Papain family cysteine protease');
146 is($result->num_hits(), 4);
147 my $hit = $result->next_hit;
148 is($hit->name, 'CATL_RAT');
149 is($hit->description,'');
150 is($hit->significance, '2e-135');
151 is($hit->raw_score, 449.4);
152 my $hsp = $hit->next_hsp;
153 is($hsp->score,449.4);
154 is($hsp->evalue, '2e-135');
155 is($hsp->query->start, 1);
156 is($hsp->query->end, 337);
157 is($hsp->hit->start, 114);
158 is($hsp->hit->end, 332);
159 is($hsp->query->seq_id(), 'Peptidase_C1');
160 is($hsp->hit->seq_id(), 'CATL_RAT');
161 is($hsp->hit_string, 'IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT------GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE-----NGGLDSEESY-----PYE----AKD-------------------GSCKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAMDASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGYGYEG-T------------------------------------DSNKDKYWLVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI');
162 is($hsp->homology_string, '+P+++DWRe kg VtpVK+QG qCGSCWAFSa g lEg+ ++kt gkl+sLSEQ+LvDC++ d gn+ GCnG Glmd Af+Yik+ NgGl++E++Y PY+ +kd g+Cky+ + ++ a+++g++d+p++ E+al+ka+a++GP+sVa+das+ s q+Y+sG +Y+++ C+++ +LdH+Vl+VGYG e+ ++++ +YW+VKNSWG++WG++GY++ia+++n n+CG+a+ asypi');
163 is($hsp->query_string, 'lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgtkawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikkeqIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgtCkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVaidasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGYGteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYWIVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi');
164 $hit = $result->next_hit;
165 is($hit->name, 'CATL_HUMAN');
166 is($hit->description,'');
167 is($hit->significance, '6.1e-134');
168 is($hit->raw_score, 444.5);
171 my ($domain,$set,$homol,$rev,$res,$dom,@doms);
172 $domain = Bio::Tools::HMMER::Domain->new(-verbose=>1);
174 is ref($domain), 'Bio::Tools::HMMER::Domain';
180 $domain->seqbits(50);
182 $domain->evalue(0.0001);
183 $domain->seq_id('silly');
186 # test that we can get out forward and reverse homol_SeqFeatures
187 $homol = $domain->feature2();
188 is $homol->start(), 10;
192 is $rev->start(), 50;
194 $set = Bio::Tools::HMMER::Set->new();
195 $set->add_Domain($domain);
197 @doms = $set->each_Domain();
200 is $dom->start(), 50;
203 $set->evalue(0.0001);
204 $set->name('sillyname');
205 $set->desc('a desc');
206 $set->accession('fakeaccesssion');
207 is $set->bits(), 300;
208 is $set->evalue(), 0.0001;
209 is $set->name(), 'sillyname';
210 is $set->desc, 'a desc';
211 is $set->accession, 'fakeaccesssion';
213 $res = Bio::Tools::HMMER::Results->new( -file => test_input_file('hmmsearch.out') , -type => 'hmmsearch');
215 is $res->hmmfile, "HMM";
216 is $res->seqfile, "HMM.dbtemp.29591";
219 foreach $set ( $res->each_Set) {
220 foreach $domain ( $set->each_Domain ) {
221 #print STDERR "Got domain ",$domain->seq_id," start ",$domain->start," end ",$domain->end,"\n";
222 # do nothing for the moment
228 is $res->number, 1215;
230 $res = Bio::Tools::HMMER::Results->new( -file => test_input_file('hmmpfam.out') ,
233 is ($res->number, 2);
235 # parse HMM 2.2 files
237 $res = Bio::Tools::HMMER::Results->new( -file => test_input_file('L77119.hmmer'),
240 is $res->hmmfile, 'Pfam';
241 is $res->seqfile, 'L77119.faa';
242 foreach $set ( $res->each_Set) {
243 # only one set anyways
245 is($set->name, 'gi|1522636|gb|AAC37060.1|');
246 is($set->desc, 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]');
247 is($set->accession, '[none]');
248 foreach $domain ( $set->each_Domain ) {
249 #print STDERR "Got domain ",$domain->seq_id," start ",$domain->start," end ",$domain->end,"\n";
250 # do nothing for the moment
251 is($domain->start, 280);
252 is($domain->end, 481);
253 is($domain->bits, -105.2);
254 is($domain->evalue, 0.0022 );
257 is ($res->number, 1);
259 # test for bugs #(1189,1034,1172)
260 $res = Bio::Tools::HMMER::Results->new( -file => test_input_file('hmmsearch.out') ,
261 -type => 'hmmsearch');
262 my $res2 = $res->filter_on_cutoff(100,50);
264 is($res2->number, 604);
266 # now let's test the new Bio::SearchIO::hmmer (I believe this is in SearchIO.t)