1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id: SearchIO_hmmer.t 14989 2008-11-11 19:52:02Z cjfields $
10 test_begin(-tests => 116);
12 use_ok('Bio::SearchIO');
15 my $searchio = Bio::SearchIO->new(-format => 'hmmer',
16 -file => test_input_file('hmmpfam.out'));
18 while( my $result = $searchio->next_result ) {
19 is(ref($result),'Bio::Search::Result::HMMERResult');
20 is($result->algorithm, 'HMMPFAM');
21 is($result->algorithm_version, '2.1.1');
22 is($result->hmm_name, 'pfam');
23 is($result->sequence_file, '/home/birney/src/wise2/example/road.pep');
24 is($result->query_name, 'roa1_drome');
25 is($result->query_description, '');
26 is($result->num_hits(), 2);
28 if( $hit = $result->next_model ) {
29 is($hit->name, 'SEED');
30 is($hit->raw_score, '146.1');
31 float_is($hit->significance, 6.3e-40);
32 is(ref($hit), 'Bio::Search::Hit::HMMERHit');
33 is($hit->num_hsps, 1);
35 if( defined( $hsp = $hit->next_domain ) ) {
36 is($hsp->hit->start, 1);
37 is($hsp->hit->end, 77);
38 is($hsp->query->start, 33);
39 is($hsp->query->end, 103);
40 is($hsp->score, 71.2);
41 float_is($hsp->evalue, 2.2e-17);
42 is($hsp->query_string, 'LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP');
43 is($hsp->gaps('query'), 7);
44 is($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG-kelggrklrv');
45 is($hsp->homology_string, 'lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf+F+++ ++ + A + +++++gr+++ ');
46 is( length($hsp->homology_string), length($hsp->hit_string));
47 is( length($hsp->query_string), length($hsp->homology_string));
50 if( defined ($hit = $result->next_model) ) {
51 if( defined($hsp = $hit->next_domain) ) {
52 is($hsp->hit->start, 1);
53 is($hsp->hit->end, 77);
54 is($hsp->query->start, 124);
55 is($hsp->query->end, 194);
56 is($hsp->score, 75.5);
57 float_is($hsp->evalue, 1.1e-18);
58 is($hsp->query_string, 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV');
59 is($hsp->gaps('query'), 6);
60 is($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv');
61 is($hsp->homology_string, 'lfVg L d +e+ ++d+F++fG iv+i+iv+D ketgk +GfaFVeF++++ ++k + ++l+g+ + v');
62 is( length($hsp->homology_string), length($hsp->hit_string));
63 is( length($hsp->query_string), length($hsp->homology_string));
68 $searchio = Bio::SearchIO->new(-format => 'hmmer',
69 -file => test_input_file('hmmsearch.out'));
70 while( my $result = $searchio->next_result ) {
71 is(ref($result),'Bio::Search::Result::HMMERResult');
72 is($result->algorithm, 'HMMSEARCH');
73 is($result->algorithm_version, '2.0');
74 is($result->hmm_name, 'HMM [SEED]');
75 is($result->sequence_file, 'HMM.dbtemp.29591');
76 is($result->database_name, 'HMM.dbtemp.29591');
77 is($result->query_name, 'SEED');
78 is($result->query_description, '');
79 is($result->num_hits(), 1215);
80 my $hit = $result->next_model;
81 is($hit->name, 'Q91581');
82 is($hit->description,'Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN');
83 float_is($hit->significance, 2e-31);
84 is($hit->raw_score, 119.7);
85 my $hsp = $hit->next_domain;
86 is($hsp->score,119.7);
87 float_is($hsp->evalue, 2e-31);
88 is($hsp->query->start, 18);
89 is($hsp->query->end, 89);
90 is($hsp->hit->start, 1);
91 is($hsp->hit->end, 77);
92 is($hsp->query->seq_id(), 'SEED');
93 is($hsp->hit->seq_id(), 'Q91581');
96 $searchio = Bio::SearchIO->new(-format => 'hmmer',
97 -file => test_input_file('L77119.hmmer'));
99 while( my $result = $searchio->next_result ) {
100 is(ref($result),'Bio::Search::Result::HMMERResult');
101 is($result->algorithm, 'HMMPFAM');
102 is($result->algorithm_version, '2.2g');
103 is($result->hmm_name, 'Pfam');
104 is($result->sequence_file, 'L77119.faa');
105 is($result->query_name, 'gi|1522636|gb|AAC37060.1|');
106 is($result->query_description, 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]');
107 is($result->num_hits(), 1);
108 my $hit = $result->next_hit;
109 is($hit->name, 'Methylase_M');
110 is($hit->description,'Type I restriction modification system, M');
111 float_is($hit->significance, 0.0022);
112 is($hit->raw_score, -105.2);
113 my $hsp = $hit->next_hsp;
114 is($hsp->score,-105.2);
115 float_is($hsp->evalue, 0.0022);
116 is($hsp->query->start, 280);
117 is($hsp->query->end, 481);
118 is($hsp->hit->start, 1);
119 is($hsp->hit->end, 279);
120 is($hsp->query->seq_id(), 'gi|1522636|gb|AAC37060.1|');
121 is($hsp->hit->seq_id(), 'Methylase_M');
122 is($hsp->hit_string, 'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG-dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn');
123 is($hsp->query_string, 'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST');
124 is($hsp->homology_string, ' ++EL+++ av+ R L+F K++ dk +i+ p + + +++y ++ ++ ++y ++ + lF++++ e ++ ++++ + + ++ + + Glf ++++ ++ +s+ +ne ++e+i+ +++ +++ G++ +el D++G +YE L+ Ae K+ G +YTP e++ ia+ + i+ ++ +++ ++ k+n+i + s+');
125 is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '280 288 289 293-295 300 304 311 313-315 317 324-326 332 335 337 344-346 348 355 358-361 364-366 372 379 383-385 389 396 400 404-408 412 416 417 422 426 429-431 434-436 439 441 446 450 451 455 459 460 463 464 468 471 472 478');
126 is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '1 9 10 14-16 18-31 35 39 42-47 51-59 61 63-65 67 72-74 77-79 82 86 89-94 96 103-105 107 110 111 116 118 120-123 126-131 133 135-141 145 150 151 154 158-160 164 171 175 179-183 187 191-193 198 202 205-207 210-212 215 217 222 226 227 231 233 236 237 240-257 261 264-267 273 275-278');
127 is(join(' ', $hsp->seq_inds('query', 'gap',1)), '296 306 309 321 328 334 335 350 356 366-368 376 417 456 463 470 479');
128 is(join(' ', $hsp->seq_inds('hit', 'gap',1)), '');
132 $searchio = Bio::SearchIO->new(-format => 'hmmer',
133 -file => test_input_file('cysprot1b.hmmsearch'));
136 while( my $result = $searchio->next_result ) {
137 is(ref($result),'Bio::Search::Result::HMMERResult');
138 is($result->algorithm, 'HMMSEARCH');
139 is($result->algorithm_version, '2.2g');
140 is($result->hmm_name, 'Peptidase_C1.hmm [Peptidase_C1]');
141 is($result->database_name, 'cysprot1b.fa');
142 is($result->sequence_file, 'cysprot1b.fa');
143 is($result->query_name, 'Peptidase_C1');
144 is($result->query_accession, 'PF00112');
145 is($result->query_description, 'Papain family cysteine protease');
146 is($result->num_hits(), 4);
147 my $hit = $result->next_hit;
148 is($hit->name, 'CATL_RAT');
149 is($hit->description,'');
150 float_is($hit->significance, 2e-135);
151 is($hit->raw_score, 449.4);
152 my $hsp = $hit->next_hsp;
153 is($hsp->score,449.4);
154 float_is($hsp->evalue, 2e-135);
155 is($hsp->query->start, 1);
156 is($hsp->query->end, 337);
157 is($hsp->hit->start, 114);
158 is($hsp->hit->end, 332);
159 is($hsp->query->seq_id(), 'Peptidase_C1');
160 is($hsp->hit->seq_id(), 'CATL_RAT');
161 is($hsp->hit_string, 'IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT------GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE-----NGGLDSEESY-----PYE----AKD-------------------GSCKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAMDASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGYGYEG-T------------------------------------DSNKDKYWLVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI');
162 is($hsp->homology_string, '+P+++DWRe kg VtpVK+QG qCGSCWAFSa g lEg+ ++kt gkl+sLSEQ+LvDC++ d gn+ GCnG Glmd Af+Yik+ NgGl++E++Y PY+ +kd g+Cky+ + ++ a+++g++d+p++ E+al+ka+a++GP+sVa+das+ s q+Y+sG +Y+++ C+++ +LdH+Vl+VGYG e+ ++++ +YW+VKNSWG++WG++GY++ia+++n n+CG+a+ asypi');
163 is($hsp->query_string, 'lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgtkawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikkeqIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgtCkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVaidasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGYGteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYWIVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi');
164 $hit = $result->next_hit;
165 is($hit->name, 'CATL_HUMAN');
166 is($hit->description,'');
167 float_is($hit->significance, 6.1e-134);
168 is($hit->raw_score, 444.5);
171 # test for bug 2632 - CS lines should get ignored without breaking the parser
172 $searchio = Bio::SearchIO->new(-format => 'hmmer', -file => test_input_file('hmmpfam_cs.out'));
173 my $result = $searchio->next_result;
174 my $hit = $result->next_hit;
175 my $hsp = $hit->next_hsp;
176 is $hsp->seq_str, 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES';