1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin( -tests => 591 );
12 use_ok('Bio::SearchIO');
15 my $searchio = Bio::SearchIO->new(
17 -file => test_input_file('hmmpfam.out')
21 while ( $result = $searchio->next_result ) {
23 'Bio::Search::Result::HMMERResult',
24 'Check for the correct result reference type'
26 is( $result->algorithm, 'HMMPFAM', 'Check algorithm' );
27 is( $result->algorithm_version, '2.1.1', 'Check algorithm version' );
28 is( $result->hmm_name, 'pfam', 'Check hmm_name' );
29 is( $result->sequence_file,
30 '/home/birney/src/wise2/example/road.pep',
33 is( $result->query_name, 'roa1_drome', 'Check query_name' );
34 is( $result->query_description, '', 'Check query_description' );
35 is( $result->num_hits(), 2, 'Check num_hits' );
38 if ( $hit = $result->next_model ) {
39 is( $hit->name, 'SEED', 'Check hit name' );
40 is( $hit->raw_score, '146.1', 'Check hit raw_score' );
41 float_is( $hit->significance, 6.3e-40, 'Check hit significance' );
42 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
43 'Check for the correct hit reference type' );
44 is( $hit->num_hsps, 1, 'Check num_hsps' );
46 if ( defined( $hsp = $hit->next_domain ) ) {
47 is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' );
48 is( $hsp->hit->end, 77, 'Check for hit hmm to value' );
49 is( $hsp->query->start, 33, 'Check for query alifrom value' );
50 is( $hsp->query->end, 103, 'Check for query ali to value' );
51 is( $hsp->score, 71.2, 'Check for hsp score' );
52 float_is( $hsp->evalue, 2.2e-17, 'Check for hsp c-Evalue' );
54 is( $hsp->length('query'), 71, 'Check for hsp query length' );
55 is( $hsp->length('hit'), 77, 'Check for hsp hit length' );
56 is( $hsp->length('total'), 78, 'Check for hsp total length' );
57 is( $hsp->gaps('query'), 7, 'Check for hsp query gaps' );
58 is( $hsp->gaps('hit'), 1, 'Check for hsp hit gaps' );
59 is( $hsp->gaps('total'), 8, 'Check for hsp total gaps' );
62 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
63 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
64 ($result->query_length == 0) ?
65 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
66 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
68 is( $hsp->query_string,
69 'LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP',
70 'Check for query string'
72 is( $hsp->gaps('query'), 7, 'Check for number of gaps in query' );
74 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG-kelggrklrv',
75 'Check for hit string'
77 is( $hsp->homology_string,
78 'lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf+F+++ ++ + A + +++++gr+++ ',
79 'Check for homology string'
81 is( length( $hsp->homology_string ),
82 length( $hsp->hit_string ),
83 'Check if homology string and hit string have an equal length'
85 is( length( $hsp->query_string ),
86 length( $hsp->homology_string ),
87 'Check if query string and homology string have an equal length'
89 # Hmmpfam don't have PP or CS strings, these are tests to check for side effects
90 is( $hsp->posterior_string, '' );
91 is( $hsp->consensus_structure, '' );
94 if ( defined( $hit = $result->next_model ) ) {
95 if ( defined( $hsp = $hit->next_domain ) ) {
96 is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' );
97 is( $hsp->hit->end, 77, 'Check for hit hmm to value' );
98 is( $hsp->query->start, 124, 'Check for query alifrom value' );
99 is( $hsp->query->end, 193, 'Check for query ali to value' );
100 is( $hsp->score, 75.5, 'Check for hsp score' );
101 float_is( $hsp->evalue, 1.1e-18, 'Check for hsp c-Evalue' );
103 is( $hsp->length('query'), 70, 'Check for hsp query length' );
104 is( $hsp->length('hit'), 77, 'Check for hsp hit length' );
105 is( $hsp->length('total'), 77, 'Check for hsp total length' );
106 is( $hsp->gaps('query'), 7, 'Check for hsp query gaps' );
107 is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' );
108 is( $hsp->gaps('total'), 7, 'Check for hsp total gaps' );
110 ($hit->length == 0) ?
111 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
112 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
113 ($result->query_length == 0) ?
114 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
115 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
117 is( $hsp->query_string,
118 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL--KQHQLNGKMVDV',
119 'Check for query string'
121 is( $hsp->hit_string,
122 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv',
123 'Check for hit string'
125 is( $hsp->homology_string,
126 'lfVg L d +e+ ++d+F++fG iv+i+iv+D ketgk +GfaFVeF++++ ++k + ++l+g+ + v',
127 'Check for homology string'
129 is( length( $hsp->homology_string ),
130 length( $hsp->hit_string ),
131 'Check if homology string and hit string have an equal length'
133 is( length( $hsp->query_string ),
134 length( $hsp->homology_string ),
135 'Check if query string and homology string have an equal length'
142 $searchio = Bio::SearchIO->new(
144 -file => test_input_file('hmmsearch.out')
146 while ( $result = $searchio->next_result ) {
148 'Bio::Search::Result::HMMERResult',
149 'Check for the correct result reference type'
151 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
152 is( $result->algorithm_version, '2.0', 'Check algorithm version' );
153 is( $result->hmm_name, 'HMM [SEED]', 'Check hmm_name' );
154 is( $result->sequence_file, 'HMM.dbtemp.29591', 'Check sequence_file' );
155 is( $result->database_name, 'HMM.dbtemp.29591', 'Check database_name' );
156 is( $result->query_name, 'SEED', 'Check query_name' );
157 is( $result->query_description, '', 'Check query_description' );
158 is( $result->num_hits(), 1215, 'Check num_hits' );
159 my $hit = $result->next_model;
160 is( $hit->name, 'Q91581', 'Check hit name' );
161 is( $hit->description,
162 'Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN',
163 'Check for hit description'
165 float_is( $hit->significance, 2e-31, 'Check hit significance' );
166 is( $hit->raw_score, 119.7, 'Check hit raw_score' );
167 my $hsp = $hit->next_domain;
168 is( $hsp->score, 119.7, 'Check for hsp score' );
169 float_is( $hsp->evalue, 2e-31, 'Check for hsp c-Evalue' );
170 is( $hsp->query->start, 18, 'Check for query alifrom value' );
171 is( $hsp->query->end, 89, 'Check for query ali to value' );
172 is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' );
173 is( $hsp->hit->end, 77, 'Check for hit hmm to value' );
174 is( $hsp->query->seq_id(), 'SEED', 'Check for query seq_id' );
175 is( $hsp->hit->seq_id(), 'Q91581', 'Check for hit seq_id' );
178 $searchio = Bio::SearchIO->new(
180 -file => test_input_file('L77119.hmmer')
183 while ( $result = $searchio->next_result ) {
185 'Bio::Search::Result::HMMERResult',
186 'Check for the correct result reference type'
188 is( $result->algorithm, 'HMMPFAM', 'Check algorithm' );
189 is( $result->algorithm_version, '2.2g', 'Check algorithm version' );
190 is( $result->hmm_name, 'Pfam', 'Check hmm_name' );
191 is( $result->sequence_file, 'L77119.faa', 'Check sequence_file' );
192 is( $result->query_name, 'gi|1522636|gb|AAC37060.1|',
193 'Check query_name' );
194 is( $result->query_description,
195 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]',
196 'Check query_description'
198 is( $result->num_hits(), 1, 'Check num_hits' );
199 my $hit = $result->next_hit;
200 is( $hit->name, 'Methylase_M', 'Check hit name' );
201 is( $hit->description,
202 'Type I restriction modification system, M',
203 'Check for hit description'
205 float_is( $hit->significance, 0.0022, 'Check hit significance' );
206 is( $hit->raw_score, -105.2, 'Check hit raw_score' );
207 my $hsp = $hit->next_hsp;
208 is( $hsp->score, -105.2, 'Check for hsp score' );
209 float_is( $hsp->evalue, 0.0022, 'Check for hsp evalue' );
210 is( $hsp->query->start, 280, 'Check for query alifrom value' );
211 is( $hsp->query->end, 481, 'Check for query ali to value' );
212 is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' );
213 is( $hsp->hit->end, 279, 'Check for hit hmm to value' );
214 is( $hsp->query->seq_id(),
215 'gi|1522636|gb|AAC37060.1|', 'Check for query seq_id' );
216 is( $hsp->hit->seq_id(), 'Methylase_M', 'Check for hit seq_id' );
218 ($hit->length == 0) ?
219 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
220 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
221 ($result->query_length == 0) ?
222 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
223 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
225 is( $hsp->hit_string,
226 'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG-dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn',
227 'Check for hiy string'
229 is( $hsp->query_string,
230 'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST',
231 'Check for query string'
233 is( $hsp->homology_string,
234 ' ++EL+++ av+ R L+F K++ dk +i+ p + + +++y ++ ++ ++y ++ + lF++++ e ++ ++++ + + ++ + + Glf ++++ ++ +s+ +ne ++e+i+ +++ +++ G++ +el D++G +YE L+ Ae K+ G +YTP e++ ia+ + i+ ++ +++ ++ k+n+i + s+',
235 'Check for homology string'
237 is( join( ' ', $hsp->seq_inds( 'query', 'nomatch', 1 ) ),
238 '280 288 289 293-295 300 304 311 313-315 317 324-326 332 335 337 344-346 348 355 358-361 364-366 372 379 383-385 389 396 400 404-408 412 416 417 422 426 429-431 434-436 439 441 446 450 451 455 459 460 463 464 468 471 472 478',
239 'Check for nomatch indices in query'
241 is( join( ' ', $hsp->seq_inds( 'hit', 'nomatch', 1 ) ),
242 '1 9 10 14-16 18-31 35 39 42-47 51-59 61 63-65 67 72-74 77-79 82 86 89-94 96 103-105 107 110 111 116 118 120-123 126-131 133 135-141 145 150 151 154 158-160 164 171 175 179-183 187 191-193 198 202 205-207 210-212 215 217 222 226 227 231 233 236 237 240-257 261 264-267 273 275-278',
243 'Check for nomatch indices in hit'
245 is( join( ' ', $hsp->seq_inds( 'query', 'gap', 1 ) ),
246 '296 306 309 321 328 334 335 350 356 366-368 376 417 456 463 470 479',
247 'Check for gap indices in query'
249 is( join( ' ', $hsp->seq_inds( 'hit', 'gap', 1 ) ),
250 '', 'Check for gap indices in hit' );
253 $searchio = Bio::SearchIO->new(
255 -file => test_input_file('cysprot1b.hmmsearch')
258 while ( $result = $searchio->next_result ) {
260 'Bio::Search::Result::HMMERResult',
261 'Check for the correct result reference type'
263 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
264 is( $result->algorithm_version, '2.2g', 'Check algorithm version' );
265 is( $result->hmm_name,
266 'Peptidase_C1.hmm [Peptidase_C1]',
269 is( $result->database_name, 'cysprot1b.fa', 'Check database_name' );
270 is( $result->sequence_file, 'cysprot1b.fa', 'Check sequence_file' );
272 is( $result->query_name, 'Peptidase_C1', 'Check query_name' );
273 is( $result->query_length, 0, 'Check query_length absence' );
274 is( $result->query_accession, 'PF00112', 'Check query_accession' );
275 is( $result->query_description,
276 'Papain family cysteine protease',
277 'Check query_description'
279 is( $result->num_hits(), 4, 'Check num_hits' );
281 my $hit = $result->next_hit;
282 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
283 'Check for the correct hit reference type' );
284 is( $hit->name, 'CATL_RAT', 'Check hit name' );
285 is( $hit->description,
287 'Check for hit description'
289 is( $hit->raw_score, 449.4, 'Check hit raw_score' );
290 float_is( $hit->significance, 2e-135, 'Check hit significance' );
291 is( $hit->num_hsps, 1, 'Check num_hsps' );
293 # Query and Hit lengths are unknown by default in HMMER2,
294 # but sometimes they can be deduced from domain data '[]'
295 is( $hit->length, 0, 'Check hit length absence' );
296 is( $hit->frac_aligned_query, undef );
297 is( $hit->frac_aligned_hit, undef );
299 my $hsp = $hit->next_hsp;
300 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
301 'Check for correct hsp reference type' );
302 is( $hsp->query->seq_id(), 'Peptidase_C1', 'Check for query seq_id' );
303 is( $hsp->hit->seq_id(), 'CATL_RAT', 'Check for hit seq_id' );
305 is( $hsp->hit->start, 114, 'Check for hit hmmfrom value' );
306 is( $hsp->hit->end, 332, 'Check for hit hmm to value' );
307 is( $hsp->query->start, 1, 'Check for query alifrom value' );
308 is( $hsp->query->end, 337, 'Check for query ali to value' );
309 is( $hsp->score, 449.4, 'Check for hsp score' );
310 float_is( $hsp->evalue, 2e-135, 'Check for hsp evalue' );
312 is( $hsp->length('query'), 337, 'Check for hsp query length' );
313 is( $hsp->length('hit'), 219, 'Check for hsp hit length' );
314 is( $hsp->length('total'), 337, 'Check for hsp total length' );
315 is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' );
316 is( $hsp->gaps('hit'), 118, 'Check for hsp hit gaps' );
317 is( $hsp->gaps('total'), 118, 'Check for hsp total gaps' );
319 ($hit->length == 0) ?
320 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
321 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
322 ($result->query_length == 0) ?
323 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
324 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
326 is ( $hsp->num_conserved, 204 );
327 is ( $hsp->num_identical, 131 );
328 is( sprintf( "%.2f", $hsp->percent_identity ), 38.87 );
329 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.389 );
330 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.598 );
331 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.389 );
332 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.605 );
333 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.932 );
334 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.605 );
336 is (length($hsp->homology_string), length($hsp->query_string));
338 is( $hsp->hit_string,
339 'IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT------GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE-----NGGLDSEESY-----PYE----AKD-------------------GSCKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAMDASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGYGYEG-T------------------------------------DSNKDKYWLVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI',
340 'Check for hiy string'
342 is( $hsp->homology_string,
343 '+P+++DWRe kg VtpVK+QG qCGSCWAFSa g lEg+ ++kt gkl+sLSEQ+LvDC++ d gn+ GCnG Glmd Af+Yik+ NgGl++E++Y PY+ +kd g+Cky+ + ++ a+++g++d+p++ E+al+ka+a++GP+sVa+das+ s q+Y+sG +Y+++ C+++ +LdH+Vl+VGYG e+ ++++ +YW+VKNSWG++WG++GY++ia+++n n+CG+a+ asypi',
344 'Check for homology string'
346 is( $hsp->query_string,
347 'lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgtkawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikkeqIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgtCkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVaidasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGYGteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYWIVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi',
348 'Check for query string'
350 # Hmmsearch2 don't have PP or CS strings, these are tests to check for side effects
351 is( $hsp->posterior_string, '' );
352 is( $hsp->consensus_structure, '' );
354 $hit = $result->next_hit;
355 is( $hit->name, 'CATL_HUMAN', 'Check hit name' );
356 is( $hit->description, '', 'Check for hit description' );
357 float_is( $hit->significance, 6.1e-134, 'Check hit significance' );
358 is( $hit->raw_score, 444.5, 'Check hit raw_score' );
361 # test for bug 2632 - CS lines should get ignored without breaking the parser
362 $searchio = Bio::SearchIO->new(
364 -file => test_input_file('hmmpfam_cs.out')
366 $result = $searchio->next_result;
367 my $hit = $result->next_hit;
368 my $hsp = $hit->next_hsp;
370 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES',
371 'Check for hsp seq_str'
374 # Tests for hmmer3 output here
375 $searchio = Bio::SearchIO->new(
377 -file => test_input_file('hmmscan.out'),
380 is( ref($searchio), 'Bio::SearchIO::hmmer3',
381 'Check if correct searchio object is returned' );
383 while ( $result = $searchio->next_result ) {
387 'Bio::Search::Result::HMMERResult',
388 'Check for the correct result reference type'
390 is( $result->algorithm, 'HMMSCAN', 'Check algorithm' );
391 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
392 is( $result->hmm_name,
393 '/data/biodata/HMMerDB/Pfam.hmm',
396 is( $result->sequence_file,
397 'BA000019.orf1.fasta',
398 'Check sequence_file'
400 is( $result->query_name, 'BA000019.orf1', 'Check query_name' );
401 is( $result->query_length, 198, 'Check query_length' );
402 is( $result->query_accession, '', 'Check query_accession' );
403 is( $result->query_description, '', 'Check query_description' );
404 # 1 hit above and 6 below inclusion threshold
405 is( $result->num_hits(), 7, 'Check num_hits' );
408 if ( $hit = $result->next_model ) {
409 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
410 'Check for the correct hit reference type' );
411 is( $hit->name, 'Peripla_BP_2', 'Check hit name' );
412 is( $hit->description,
413 'Periplasmic binding protein',
414 'Check for hit description'
416 is( $hit->raw_score, 105.2, 'Check hit raw_score' );
417 float_is( $hit->significance, 6e-30, 'Check hit significance' );
418 is( $hit->num_hsps, 1, 'Check num_hsps' );
420 # Hit length is unknown for HMMSCAN and HMMSEARCH but not for NHMMER
421 is( $hit->length, 0, 'Check hit length absence' );
422 is( $hit->frac_aligned_query, 0.87 );
423 is( $hit->frac_aligned_hit, undef );
425 if ( defined( $hsp = $hit->next_domain ) ) {
426 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
427 'Check for correct hsp reference type' );
428 is( $hsp->hit->seq_id(), 'Peripla_BP_2', 'Check for hit seq_id' );
429 is( $hsp->query->seq_id(), 'BA000019.orf1', 'Check for query seq_id' );
431 is( $hsp->hit->start, 59, 'Check for hit hmmfrom value' );
432 is( $hsp->hit->end, 236, 'Check for hit hmm to value' );
433 is( $hsp->query->start, 2, 'Check for query alifrom value' );
434 is( $hsp->query->end, 173, 'Check for query ali to value' );
435 is( $hsp->score, '105.0', 'Check for hsp score' );
436 float_is( $hsp->evalue, 1.5e-33, 'Check for hsp c-Evalue' );
438 is( $hsp->length('query'), 172, 'Check for hsp query length' );
439 is( $hsp->length('hit'), 178, 'Check for hsp hit length' );
440 is( $hsp->length('total'), 180, 'Check for hsp total length' );
441 is( $hsp->gaps('query'), 8, 'Check for hsp query gaps' );
442 is( $hsp->gaps('hit'), 2, 'Check for hsp hit gaps' );
443 is( $hsp->gaps('total'), 10, 'Check for hsp total gaps' );
445 ($hit->length == 0) ?
446 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
447 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
448 ($result->query_length == 0) ?
449 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
450 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
452 is ( $hsp->num_conserved, 140 );
453 is ( $hsp->num_identical, 50 );
454 is( sprintf( "%.2f", $hsp->percent_identity ), 27.78 );
455 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.291 );
456 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.281 );
457 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.278 );
458 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.814 );
459 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.787 );
460 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.778 );
462 is (length($hsp->homology_string), length($hsp->query_string));
464 is( $hsp->query_string,
465 'LKPDLIIGREYQ---KNIYNQLSNFAPTVLVDWGSF-TSFQDNFRYIAQVLNEEEQGKLVLQQYQKRIRDLQDRMGERlQKIEVSVIGFSGQSIKSLNR-DAVFNQVLDDAGIKRIsIQKNQQERYLEISIENLNKYDADVLFVINE---SKEQLYPDLKNPLWHHLRAVKKQQVYVVNQ',
466 'Check for query string'
468 is( $hsp->hit_string,
469 'lkPDlvivsafgalvseieellelgipvvavessstaeslleqirllgellgeedeaeelvaelesridavkaridsl-kpktvlvfgyadegikvvfgsgswvgdlldaaggeni-iaeakgseseeisaEqilaadpdviivsgrgedtktgveelkenplwaelpAvkngrvyllds',
470 'Check for hit string'
472 is( $hsp->homology_string,
473 'lkPDl+i+ +++ ++i+++l++ +p+v v+ s+ s+++ +r ++++l+ee++++ + +++++ri+++++r + ++ +v+v+g+++ +ik+++ + ++++ld+ag++ i i++++++ + eis+E+++++d+dv++v k+ + ++nplw +l+Avk+++vy++++',
474 'Check for homology string'
476 is( $hsp->posterior_string,
477 '8***********...********************9.*****************************************999999999999997777776.5678999999****99777777*************************...77777777899***************9976',
478 'Check for posterior probability string'
483 # Check for errors in HSP caused by the existence of 2 hits with the same ID
484 elsif ($counter == 2) {
485 is( $result->algorithm, 'HMMSCAN', 'Check algorithm' );
486 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
487 is( $result->hmm_name,
488 '/data/biodata/HMMerDB/Pfam.hmm',
491 is( $result->sequence_file,
492 'BA000019.orf1.fasta',
493 'Check sequence_file'
495 is( $result->query_name, 'lcl|Test_ID.1|P1', 'Check query_name' );
496 is( $result->query_length, 463, 'Check query_length' );
497 is( $result->query_description, '281521..282909', 'Check query_description' );
498 is( $result->num_hits(), 2, 'Check num_hits' );
502 while ( $hit = $result->next_model ) {
504 if ($hit_counter == 1) {
505 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
506 'Check for the correct hit reference type' );
507 is( $hit->name, 'IS4.original', 'Check hit name' );
508 is( $hit->description, '', 'Check for hit description' );
509 is( $hit->num_hsps, 1, 'Check num_hsps' );
510 if ( defined( $hsp = $hit->next_domain ) ) {
511 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
512 'Check for correct hsp reference type' );
513 is( $hsp->hit->seq_id(), 'IS4.original', 'Check for hit seq_id' );
514 is( $hsp->query->seq_id(), 'lcl|Test_ID.1|P1', 'Check for query seq_id' );
516 is( $hsp->hit->start, 315, 'Check for hit hmmfrom value' );
517 is( $hsp->hit->end, 353, 'Check for hit hmm to value' );
518 is( $hsp->query->start, 335, 'Check for query alifrom value' );
519 is( $hsp->query->end, 369, 'Check for query ali to value' );
520 is( $hsp->score, 18.9, 'Check for hsp score' );
521 float_is( $hsp->evalue, 8.9e-08, 'Check for hsp c-Evalue' );
524 elsif ($hit_counter == 2) {
525 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
526 'Check for the correct hit reference type' );
527 is( $hit->name, 'IS4.original', 'Check hit name' );
528 is( $hit->description, '', 'Check for hit description' );
529 is( $hit->num_hsps, 1, 'Check num_hsps' );
530 if ( defined( $hsp = $hit->next_domain ) ) {
531 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
532 'Check for correct hsp reference type' );
533 is( $hsp->hit->seq_id(), 'IS4.original', 'Check for hit seq_id' );
534 is( $hsp->query->seq_id(), 'lcl|Test_ID.1|P1', 'Check for query seq_id' );
536 is( $hsp->hit->start, 315, 'Check for hit hmmfrom value' );
537 is( $hsp->hit->end, 353, 'Check for hit hmm to value' );
538 is( $hsp->query->start, 335, 'Check for query alifrom value' );
539 is( $hsp->query->end, 369, 'Check for query ali to value' );
540 is( $hsp->score, 18.8, 'Check for hsp score' );
541 float_is( $hsp->evalue, 9e-08, 'Check for hsp c-Evalue' );
548 $searchio = Bio::SearchIO->new(
550 -file => test_input_file('hmmsearch3.out'),
553 while ( $result = $searchio->next_result ) {
555 'Bio::Search::Result::HMMERResult',
556 'Check for the correct result reference type'
558 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
559 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
560 is( $result->hmm_name, 'Kv9.hmm', 'Check hmm_name' );
561 is( $result->sequence_file,
562 '/home/pboutet/Desktop/databases/nr_May26',
563 'Check sequence_file'
565 is( $result->query_name, 'Kv9', 'Check query_name' );
566 is( $result->query_length, '481', 'Check query_length' );
567 is( $result->query_description, '', 'Check query_description' );
568 is( $result->num_hits(), 2, 'Check num_hits' );
570 while ( my $hit = $result->next_model ) {
574 $searchio = Bio::SearchIO->new(
576 -file => test_input_file('hmmsearch3_multi.out'),
579 is( ref($searchio), 'Bio::SearchIO::hmmer3',
580 'Check if correct searchio object is returned' );
582 while ( $result = $searchio->next_result ) {
586 'Bio::Search::Result::HMMERResult',
587 'Check for the correct result reference type'
589 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
590 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
591 is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' );
592 is( $result->sequence_file, 'test_seqs.seq_raw.txt', 'Check sequence_file' );
594 is( $result->query_name, '1-cysPrx_C', 'Check query_name' );
595 is( $result->query_length, 40, 'Check query_length' );
596 is( $result->query_accession, 'PF10417.4', 'Check query_accession' );
597 is( $result->query_description,
598 'C-terminal domain of 1-Cys peroxiredoxin',
599 'Check query_description'
601 is( $result->num_hits(), 0, 'Check num_hits' );
603 elsif ($counter == 2) {
605 'Bio::Search::Result::HMMERResult',
606 'Check for the correct result reference type'
608 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
609 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
610 is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' );
611 is( $result->sequence_file, 'test_seqs.seq_raw.txt', 'Check sequence_file' );
613 is( $result->query_name, 'DUF4229', 'Check query_name' );
614 is( $result->query_length, 69, 'Check query_length' );
615 is( $result->query_accession, 'PF14012.1', 'Check query_accession' );
616 is( $result->query_description,
617 'Protein of unknown function (DUF4229)',
618 'Check query_description'
620 is( $result->num_hits(), 1, 'Check num_hits' );
623 if ( $hit = $result->next_model ) {
624 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
625 'Check for the correct hit reference type' );
626 is( $hit->name, 'lcl|Protein_ID1.3|M3', 'Check hit name' );
627 is( $hit->description,
628 'complement(48376..51420)',
629 'Check for hit description'
631 is( $hit->raw_score, -17.8, 'Check hit raw_score' );
632 float_is( $hit->significance, 3, 'Check hit significance' );
633 is( $hit->num_hsps, 5, 'Check num_hsps' );
636 if ( defined( $hsp = $hit->next_domain ) ) {
637 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
638 'Check for correct hsp reference type' );
639 is( $hsp->hit->seq_id(), 'lcl|Protein_ID1.3|M3', 'Check for hit seq_id' );
640 is( $hsp->query->seq_id(), 'DUF4229', 'Check for query seq_id' );
642 is( $hsp->hit->start, 305, 'Check for hit alifrom value' );
643 is( $hsp->hit->end, 311, 'Check for hit ali to value' );
644 is( $hsp->query->start, 34, 'Check for query hmmfrom value' );
645 is( $hsp->query->end, 40, 'Check for query hmm to value' );
646 is( $hsp->score, -4.3, 'Check for hsp score' );
647 float_is( $hsp->evalue, 1, 'Check for hsp c-Evalue' );
649 is( $hsp->length('query'), 7, 'Check for hsp query length' );
650 is( $hsp->length('hit'), 7, 'Check for hsp hit length' );
651 is( $hsp->length('total'), 7, 'Check for hsp total length' );
652 is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' );
653 is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' );
654 is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' );
656 ($hit->length == 0) ?
657 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
658 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
659 ($result->query_length == 0) ?
660 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
661 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
663 is( $hsp->consensus_structure,
665 'Check for consensus structure string'
667 is( $hsp->query_string,
669 'Check for query string'
671 is( $hsp->hit_string,
673 'Check for hit string'
675 is( $hsp->homology_string,
677 'Check for homology string'
679 is( $hsp->posterior_string,
681 'Check for posterior probability string'
686 elsif ($counter == 3) {
688 'Bio::Search::Result::HMMERResult',
689 'Check for the correct result reference type'
691 is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' );
692 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
693 is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' );
694 is( $result->sequence_file, 'test_seqs.seq_raw.txt', 'Check sequence_file' );
696 is( $result->query_name, 'ACR_tran', 'Check query_name' );
697 is( $result->query_length, 1021, 'Check query_length' );
698 is( $result->query_accession, 'PF00873.14', 'Check query_accession' );
699 is( $result->query_description,
700 'AcrB/AcrD/AcrF family',
701 'Check query_description'
703 is( $result->num_hits(), 1, 'Check num_hits' );
706 if ( $hit = $result->next_model ) {
707 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
708 'Check for the correct hit reference type' );
709 is( $hit->name, 'lcl|Protein_ID1.3|M3', 'Check hit name' );
710 is( $hit->description,
711 'complement(48376..51420)',
712 'Check for hit description'
714 is( $hit->raw_score, 616.9, 'Check hit raw_score' );
715 float_is( $hit->significance, 9.3e-189, 'Check hit significance' );
716 is( $hit->num_hsps, 1, 'Check num_hsps' );
718 # Hit length is unknown for HMMSCAN and HMMSEARCH but not for NHMMER
719 is( $hit->length, 0, 'Check hit length absence' );
720 is( $hit->frac_aligned_query, 0.93 );
721 is( $hit->frac_aligned_hit, undef );
723 if ( defined( $hsp = $hit->next_domain ) ) {
724 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
725 'Check for correct hsp reference type' );
726 is( $hsp->hit->seq_id(), 'lcl|Protein_ID1.3|M3', 'Check for hit seq_id' );
727 is( $hsp->query->seq_id(), 'ACR_tran', 'Check for query seq_id' );
729 is( $hsp->hit->start, 11, 'Check for hit alifrom value' );
730 is( $hsp->hit->end, 1000, 'Check for hit ali to value' );
731 is( $hsp->query->start, 71, 'Check for query hmmfrom value' );
732 is( $hsp->query->end, 1021, 'Check for query hmm to value' );
733 is( $hsp->score, 616.6, 'Check for hsp score' );
734 float_is( $hsp->evalue, 3.9e-189, 'Check for hsp c-Evalue' );
736 is( $hsp->length('query'), 951, 'Check for hsp query length' );
737 is( $hsp->length('hit'), 990, 'Check for hsp hit length' );
738 is( $hsp->length('total'), 1003, 'Check for hsp total length' );
739 is( $hsp->gaps('query'), 52, 'Check for hsp query gaps' );
740 is( $hsp->gaps('hit'), 13, 'Check for hsp hit gaps' );
741 is( $hsp->gaps('total'), 65, 'Check for hsp total gaps' );
743 ($hit->length == 0) ?
744 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
745 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
746 ($result->query_length == 0) ?
747 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
748 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
750 is ( $hsp->num_conserved, 690 );
751 is ( $hsp->num_identical, 262 );
752 is( sprintf( "%.2f", $hsp->percent_identity ), 26.12 );
753 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.275 );
754 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.265 );
755 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.261 );
756 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.726 );
757 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), 0.697 );
758 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.688 );
760 is (length($hsp->homology_string), length($hsp->query_string));
762 is( $hsp->consensus_structure,
763 'S-TTEEEEEEEETTSEEEEEEEESTTS-HHHHHHHHHHHHHHHGGGS-HHHHHH-EEEEEEECCECEEEEEEESSSTS-HHHHHHHHHHCTHHHHHTSTTEEEEEESS.--EEEEEEE-HHHHHCTT--HHHHHHHHHHHSSB-EEEECTT-SB-EEEE-SB---SCCHHCT-EEEETTSEEEEHHHCEEEEEEESSSS-EEEETTCEEEEEEEEEETTSBHHHHHHHHHHHHHCCGGGSSTTEEEEEEEESHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHSSHCCCHHHHHHHHHHHHHHHHHHHHTT--EEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCSS-HHHHHHHHHHHHCCHHHHHHHHHHHHCCGGGGSBHHHHHHHHHHHHHHHHHHHHHHHHHHCCHHHHHHHCS----TT-CC..............................CHHHHHHHHHHHHHHHHHHHHHHHHHSCHHHHHHHHHHHHH.HHHHHCCS-BESS----TSEEEEEEE-STTC-HHHHHHHHHHHHHHHH...TTTTEEEEEEEESESSSS..E........CTTEEEEEEEE--CTTS-SCCCSHHHHHHHHHHHC.CTSTSSEEEEEE-SSSCCCSSSSSEEEEEEE.TSSSCHHHHHHHHHHHHHHHCCSTTEECEEESS-S-EEEEEEEE-HHHHHHCTB-HHHHHHHHHHHHT-..EEEEEEEETTE...EEEEEEEE-GGGSSSGGGGCC-EEEETTSE.EEECGGCEEEEEEEE-SEEEEETTCEEEEEEEEESTTS...-HHHHHHHHHHCCTT..SSTTEEEEEECHHHHHHHHCCCHHHHHHHHHHHHHHHHHHHCTSSSTCHHHHTTHHHHHHHHHHHHHHTT--BSHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCTTTBHHHHHHHHHHHHCHHHHHHHHHHHHHCCHHHHTT-STTHHHHHHHHHHHHHHHHHHHHCHHHHHHHHHHHHH',
764 'Check for consensus structure string'
766 is( $hsp->query_string,
767 'gldglkyvsSqSseglssitvtFedgtdidiArqqvqnrlqeaknkLPeevqepgiskiktssseilvlavtskdgsltktdlrdlaesnikdqlsrveGVgdvqliGgsekavriwldpqklaklgltltdvvsalkeqnvqvaaGqlegqqeelliraqgrlqsaediekiivksqdgskvrlrDvAkvelgaeeeriaatlngkpavllavkklpganaievvkavkekleelketlPegveivvvydttefvrasieeVvktlleaivLvvlvlflFLqnlratlipaiavPlsllgtfavlkalglsiNlltlfgLvlAiGlvvDdAiVvvEnverkleeegekpleaalksmkeiegalvaialvllavfvPilflgGveGklfrqfaltivlaillsvlvaltltPalcallLkarkeekek..............................gffrefnrlfdalerrYekllekvlrhravvllvalllvvg.slllfvripkeflPeedegvlvtsvqlppgvsleqtekvlkqvekilk...ekpevesvfavtGfafagdta........gqnsakvfisLkpekerkeeektvealierlrkel.ekikganvellapiqlreletlsgvrlelqvklfgddleaLseareqllaalkqlpeladvrseqqedepqlqvkidrekaaalGvsiadinetlstalgg..syvndfieegr...vvkvvvqleedlrsspedlkklyvrnkkgk.mvplsavakieeekgpnsierenglrsveisgevaegd...slgeaeeavekiakqvklPagvgiewtglseqeqeagnsllllvalalllvflvLaalyeslsdpllvlltvPlalvGallalllrglelsviaqvGlilliGlavkNailivefakelrekeglsleeAileaaklRLrPiLMTalaailGvlPLalstGaGselqqplgivvlGGlvtstvLtlllvPvlYvlva',
768 'Check for query string'
770 is( $hsp->hit_string,
771 'TVNDIEHIESQSLFGYGIVKIFFQPDVDIRTANAQVTAISQTVLKQMPPGITPPLILNYNAATVPILQLALSSK--VLSEDRIFDLGQNFIRPQLATVRGSAVPSPYGGKVRQIQIDLDPQAMQSKRVSPDDVARALSQQNLVLSPGTEKIGSFEYNVKINDSPDEFTLLNNLPIKNVGGVTIFIHDVAHVRDGFPPQINVVRDDGRRSVLMTILKNGATSTLDIIQGTKELIPKLKETLPNNLVLKVVGDQSIFVKSAISGVVREGTIAGILTSVMILLFLGSWRSTIIISMSIPLAILSAIIFLSLTGNTLNVMTLGGLALAVGMLVDDATVVIENINHHLEM-GKPTTKAIIDAARQIIQPALVSTLSICIVFVPMFSLTGVPRYLFIPMAEAVIFGMLSSFVLSQTFVPTVANKLLKYQTQHFKHehhtdahrpehdpnfkvhrsvkasifqffiNIQQGFEKRFTKVRLVYRSILHFALDHRKKFITLFLGFVIVsCVTLFPLLGKNFFPEVDSGDMKIHIRVQVGTRIEETAKQFDLIENTIRrlvPQNELDTIVDNIGLSVSGINTaysstgtiGPQDGDILIHLNEN------HHPTKEYMKKLRETLpRAFPGVS-FAFLPADITSQILNFGVPAPIDIRVDGPNHDNNLKFVRAILKDIRNVPGIADLRVQQATNYPQFNVDIDRSQAKNYGLTEGDITNSLVATLAGtsQVAPTFWLNNKngvSYPIVIQMPQYKINSLADLANIPITTKESSsMQVLGGLGSIERDQSDSVISHYNIKPSFDIFASLQGRDlgsISGDIETIIQHHHQE--LPKGVSVKLQGQVPIMQDSYRGLSLGLVASIILIYFLVVVNFESWLDPFVIITALPAALAGIVWMLYLTGTTLSVPALTGAIMCMGVATANSILVISFARERLA-IVKDSTQAALEAGYTRFRPVLMTASAMLIGMIPMALGLGDGGEQNAPLGRAVIGGLLLATIATLIFVPVVFSVVH',
772 'Check for hit string'
774 is( $hsp->homology_string,
775 ' ++ +++++SqS g + + F+ + di A+ qv++ q + +++P ++++p i +++ +il+la++sk l++ + dl ++ i++ql+ v G + +Gg+ ++++i ldpq++++ +++++dv++al++qn + G+ + + e+++++++ + ++++ +k+ g + ++DvA+v +g + ++++ +g vl+++ k ++++++ ke +++lketlP+++ ++vv d++ fv+++i+ Vv + +a +L ++++lFL+++r+t+i+ +++Pl++l ++++l++ g ++N++tl+gL+lA+G++vDdA Vv+En+ +le+ g+ +a++ ++++i + + ++l++++vfvP+++l+Gv lf ++a ++++ +l s +++ t++P ++ lLk + ++ ++ ++ + f++ f ++ Y+ +l++ l hr+ ++++l +v++ ++ lf+ ++k+f+Pe d g++ ++++++ g+ +e+t+k + +e++++ ++e + ++ G + +g + g++ +++ i+L ++ ++ ++ +++lr+ l ++++g++ +++ p +++ + gv + ++ + g ++++ + ++++l+ ++++p++ad+r++q ++ pq++v+idr +a+++G++ di + l + l g +++ +f +++ + +v+q+++ + +s+ dl+++++++k++ m l+ + +ie+ ++ + i+++n ++s+ i ++++ +d ++g++e+++++ +++ lP+gv+++ +g+ q ++ l+l ++++++l++++ + +es++dp+++++ +P al+G + l+l+g++lsv a+ G i+ +G+a N il+++fa+e + ++ +A+lea+ +R+rP+LMTa a+++G++P+al+ G+G e plg +v+GGl+++t+ tl +vPv++ +v+',
776 'Check for homology string'
778 is( $hsp->posterior_string,
779 '578899********************************************************************..*****************************************************************************************************************************************************************************************************************************************************************************.***************************************************************************8776544446799********************9655555578*************************999999887775899******************************************8875446889999999999888774331111111134445555555444......45688999999999945678887.7888999*999************************************************************************8877666655434556776544422279***********************998764889*******************************8876222578999999999888..********************************************************************************************************.888899*****************************************************************9997',
780 'Check for posterior probability string'
787 $searchio = Bio::SearchIO->new(
789 -file => test_input_file('hmmscan_multi_domain.out'),
795 'Bacterial pre-peptidase C-terminal domain',
797 [ [ 4, 59, 117, 183, 0.5, 0.16 ],
798 [ 12, 58, 347, 388, -0.6, 0.36 ],
799 [ 1, 69, 470, 549, 71.3, 1.3e-23 ],
800 [ 15, 25, 582, 603, -3.2, 2 ],
801 [ 13, 36, 987, 1019, -1.1, 0.5 ],
802 [ 1, 69, 1087, 1168, 54.4, 2.4e-18 ]
806 'Hemolysin-type calcium-binding repeat (2 cop',
808 [ [ 2, 13, 1214, 1225, 5.9, 0.0026 ],
809 [ 1, 18, 1231, 1248, 10.8, 6.8e-5 ],
810 [ 4, 18, 1243, 1257, 11.4, 4.3e-05 ]
815 while ( $result = $searchio->next_result ) {
817 'Bio::Search::Result::HMMERResult',
818 'Check for the correct result reference type'
820 is( $result->algorithm, 'HMMSCAN', 'Check algorithm' );
821 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
822 is( $result->hmm_name,
823 '/data/biodata/HMMerDB/Pfam-A.hmm',
826 is( $result->sequence_file, 'BA000019.orf37.fasta',
827 'Check sequence_file' );
828 is( $result->query_name, 'BA000019.orf37', 'Check query_name' );
829 is( $result->query_length, '1418', 'Check query_length' );
830 is( $result->query_description, '', 'Check query_description' );
831 is( $result->num_hits(), 2, 'Check num_hits' );
834 while ( $hit = $result->next_model ) {
835 my @expected = @{ shift @multi_hits };
836 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
837 'Check for the correct hit reference type' );
838 is( $hit->name, shift @expected, 'Check hit name' );
839 is( $hit->description, shift @expected, 'Check for hit description' );
840 is( $hit->raw_score, shift @expected, 'Check hit raw_score' );
844 'Check hit significance'
846 is( $hit->num_hsps, shift @expected, 'Check num_hsps' );
847 my @hsp_list = @{ shift @expected };
849 while ( defined( $hsp = $hit->next_domain ) ) {
850 my @hsp_exp = @{ shift @hsp_list };
851 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
852 'Check for correct hsp reference type' );
853 is( $hsp->hit->start,
855 'Check for hit envfrom value'
857 is( $hsp->hit->end, shift @hsp_exp,
858 'Check for hit env to value' );
859 is( $hsp->query->start,
861 'Check for query hmmfrom value'
863 is( $hsp->query->end,
865 'Check for query hmm to value'
867 is( $hsp->score, shift @hsp_exp, 'Check for hsp score' );
868 float_is( $hsp->evalue, shift @hsp_exp,
869 'Check for hsp c-Evalue' );
874 $searchio = Bio::SearchIO->new(
876 -file => test_input_file('hmmscan_sec_struct.out'),
882 'Bacterial regulatory helix-turn-helix proteins, Ara',
884 [ [ 'siadiAeevgfSpsyfsrlFkkytGvt', 'SLMELSRQVGLNDCTLKRGFRLVFDTT' ],
885 [ 'nwsiadiAeevgf-SpsyfsrlFkkytGvtPsqyr',
886 'EINISQAARRVGFsSRSYFATAFRKKFGINPKEFL'
891 '', '38.2', 3.8e-12, 2,
892 [ [ 'GPSvtVDTACSSSLvA', 'GPSVTVDTLCSSSLVA' ],
893 [ 'GPSvtVDTACSSSLv', 'GPNLVIDSACSSALV' ]
897 'Domain of Unknown Function (DUF746)',
899 [ [ 'rllIrlLsqplslaeaadqlgtdegiiak',
900 'EILIRNLENPPSLMELSRQVGLNDCTLKR'
902 [ 'plslaeaadqlgtdeg', 'EINISQAARRVGFSSR' ]
907 while ( $result = $searchio->next_result ) {
909 'Bio::Search::Result::HMMERResult',
910 'Check for the correct result reference type'
912 is( $result->algorithm, 'HMMSCAN', 'Check algorithm' );
913 is( $result->algorithm_version, '3.0', 'Check algorithm version' );
914 is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' );
915 is( $result->sequence_file, 'BA000019.orf8.fasta',
916 'Check sequence_file' );
917 is( $result->query_name, 'BA000019.orf8', 'Check query_name' );
918 is( $result->query_length, '348', 'Check query_length' );
919 is( $result->query_description, '', 'Check query_description' );
920 is( $result->num_hits(), 3, 'Check num_hits' );
923 while ( $hit = $result->next_model ) {
924 my @expected = @{ shift @multi_hits };
925 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
926 'Check for the correct hit reference type' );
927 is( $hit->name, shift @expected, 'Check hit name' );
928 is( $hit->description, shift @expected, 'Check for hit description' );
929 is( $hit->raw_score, shift @expected, 'Check hit raw_score' );
933 'Check hit significance'
935 is( $hit->num_hsps, shift @expected, 'Check num_hsps' );
936 my @hsp_list = @{ shift @expected };
938 while ( defined( $hsp = $hit->next_domain ) ) {
939 my @hsp_exp = @{ shift @hsp_list };
940 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
941 'Check for correct hsp reference type' );
942 is( $hsp->hit_string, shift @hsp_exp, 'Check hit sequence' );
943 is( $hsp->query_string, shift @hsp_exp, 'Check query sequence' );
948 # Make sure that you can also directly call the hmmer2 and hmmer3 subclasses
949 $searchio = Bio::SearchIO->new(
951 -file => test_input_file('hmmpfam.out')
953 is( ref($searchio), 'Bio::SearchIO::hmmer2',
954 'Check if loading hmmpfam output via the hmm2 parser directly works' );
955 is( ref( $searchio->next_result ),
956 'Bio::Search::Result::HMMERResult',
957 'Check for the correct result reference type'
960 $searchio = Bio::SearchIO->new(
962 -file => test_input_file('hmmsearch.out')
964 is( ref($searchio), 'Bio::SearchIO::hmmer2',
965 'Check if loading hmmsearch2 output via the hmm2 parser directly works' );
966 is( ref( $searchio->next_result ),
967 'Bio::Search::Result::HMMERResult',
968 'Check for the correct result reference type'
971 $searchio = Bio::SearchIO->new(
973 -file => test_input_file('hmmscan.out')
975 is( ref($searchio), 'Bio::SearchIO::hmmer3',
976 'Check if loading hmmscan output via the hmm3 parser directly works' );
977 is( ref( $searchio->next_result ),
978 'Bio::Search::Result::HMMERResult',
979 'Check for the correct result reference type'
982 $searchio = Bio::SearchIO->new(
984 -file => test_input_file('hmmsearch3.out')
986 is( ref($searchio), 'Bio::SearchIO::hmmer3',
987 'Check if loading hmmsearch3 output via the hmm3 parser directly works' );
988 is( ref( $searchio->next_result ),
989 'Bio::Search::Result::HMMERResult',
990 'Check for the correct result reference type'
993 # Make sure that you can also specify the -version parameter directly
994 $searchio = Bio::SearchIO->new(
996 -file => test_input_file('hmmpfam.out'),
999 is( ref($searchio), 'Bio::SearchIO::hmmer2',
1000 'Check if selecting the correct hmmpfam parser using -version works' );
1001 is( ref( $searchio->next_result ),
1002 'Bio::Search::Result::HMMERResult',
1003 'Check for the correct result reference type'
1006 $searchio = Bio::SearchIO->new(
1008 -file => test_input_file('hmmsearch.out'),
1011 is( ref($searchio), 'Bio::SearchIO::hmmer2',
1012 'Check if selecting the correct hmmsearch2 parser using -version works' );
1013 is( ref( $searchio->next_result ),
1014 'Bio::Search::Result::HMMERResult',
1015 'Check for the correct result reference type'
1018 $searchio = Bio::SearchIO->new(
1019 -format => 'hmmer3',
1020 -file => test_input_file('hmmscan.out'),
1023 is( ref($searchio), 'Bio::SearchIO::hmmer3',
1024 'Check if selecting the correct hmmscan parser using -version works' );
1025 is( ref( $searchio->next_result ),
1026 'Bio::Search::Result::HMMERResult',
1027 'Check for the correct result reference type'
1030 $searchio = Bio::SearchIO->new(
1032 -file => test_input_file('hmmsearch3.out'),
1035 is( ref($searchio), 'Bio::SearchIO::hmmer3',
1036 'Check if selecting the correct hmmsearch3 parser using -version works' );
1037 is( ref( $searchio->next_result ),
1038 'Bio::Search::Result::HMMERResult',
1039 'Check for the correct result reference type'
1042 my $cat_command = ($^O =~ m/mswin/i) ? 'type' : 'cat';
1043 my $pipestr = "$cat_command " . test_input_file('hmmpfam.out') . " |";
1044 open( my $pipefh, $pipestr );
1046 $searchio = Bio::SearchIO->new(
1050 is( ref($searchio), 'Bio::SearchIO::hmmer2',
1051 'Check if reading from a pipe works' );
1052 $result = $searchio->next_result;
1054 'Bio::Search::Result::HMMERResult',
1055 'Check for the correct result reference type'
1057 is( $result->num_hits(), 2, 'Check num_hits' );
1061 my $in = Bio::SearchIO->new(
1063 -file => test_input_file('pfamOutput-bug3376.out')
1065 my $result = $in->next_result;
1066 my $hit = $result->next_hit;
1067 my $hsp = $hit->next_hsp;
1068 is( $hsp->hit_string,
1069 'svfqqqqssksttgstvtAiAiAigYRYRYRAvtWnsGsLssGvnDnDnDqqsdgLYtiYYsvtvpssslpsqtviHHHaHkasstkiiikiePr',
1075 # bug 3421 - making sure a full line of dashes in an HSP is parsed correctly
1077 my $in = Bio::SearchIO->new(
1079 -file => test_input_file('hmmpfam_HSPdashline.txt')
1081 my $result = $in->next_result;
1082 my $hit = $result->next_hit;
1083 my $hsp = $hit->next_hsp;
1084 is( $hsp->length, '561',
1085 'bug3421 - Check if can correctly parse an HSP with line full of dashes'
1092 my $in = Bio::SearchIO->new(
1094 -file => test_input_file('hmmpfam_multiresult.out')
1096 my $result = $in->next_result;
1097 $result = $in->next_result;
1098 my $hit = $result->next_hit;
1099 is( $hit->name, 'IS66_ORF3.uniq', 'bug3302 - Check if can parse multiresult hmmer' );
1103 # HMMER 3.1 nhmmer output
1105 my $in = Bio::SearchIO->new(
1108 -file => test_input_file('nhmmer-3.1.out')
1110 my $result = $in->next_result;
1111 is( $result->algorithm, 'NHMMER', 'Check algorithm' );
1112 is( $result->algorithm_version, '3.1b1', 'Check nhmmer algorithm version' );
1113 is( $result->hmm_name,
1114 '../HMMs/A_HA_H7_CDS_nucleotide.hmm',
1117 is( $result->sequence_file,
1119 'Check sequence_file'
1121 is( $result->query_name, 'A_HA_H7_CDS_nucleotide', 'Check query_name' );
1122 is( $result->query_length, 1683, 'Check query_length' );
1123 is( $result->query_accession, '', 'Check query_accession' );
1124 is( $result->query_description, '', 'Check query_description' );
1125 is( $result->num_hits(), 2, 'Check num_hits' );
1127 my $hit = $result->next_hit;
1128 is( ref($hit), 'Bio::Search::Hit::HMMERHit',
1129 'Check for the correct hit reference type' );
1130 is( $hit->name, 'seq1', 'Check nhmmer hit name' );
1131 is( $hit->description, 'Description of seq1', 'Check nhmmer hit description' );
1132 is( $hit->score, 148.2, 'Check nhmmer hit score' );
1133 float_is( $hit->significance, 3.2e-48, 'Check nhmmer hit significance' );
1134 is( $hit->num_hsps, 1, 'Check num_hsps' );
1136 # Hit length is unknown for HMMSCAN and HMMSEARCH but not for NHMMER
1137 is( $hit->length, 151, 'Check nhmmer hit length' );
1138 is( $hit->frac_aligned_query, 0.09 );
1139 is( $hit->frac_aligned_hit, '1.00' );
1141 my $hsp = $hit->next_hsp;
1142 is( ref($hsp), 'Bio::Search::HSP::HMMERHSP',
1143 'Check for correct hsp reference type' );
1144 is( $hsp->hit->seq_id(), 'seq1', 'Check for nhmmer hit seq_id' );
1145 is( $hsp->query->seq_id(), 'A_HA_H7_CDS_nucleotide', 'Check for nhmmer query seq_id' );
1147 is( $hsp->start('hit'), 1, 'Check nhmmer hsp hit start' );
1148 is( $hsp->end('hit'), 151, 'Check nhmmer hsp hit end' );
1149 is( $hsp->start('query'), 258, 'Check nhmmer hsp query start' );
1150 is( $hsp->end('query'), 411, 'Check nhmmer hsp query end' );
1151 is( $hsp->strand('hit'), 1, 'Check nhmmer hsp hit strand' );
1152 is( $hsp->strand('query'), 1, 'Check nhmmer hsp query strand' );
1153 is( $hsp->score, 148.2, 'Check nhmmer hsp score' );
1154 float_is( $hsp->significance, 3.2e-48, 'Check nhmmer hsp evalue' );
1156 is( $hsp->length('query'), 154, 'Check for hsp query length' );
1157 is( $hsp->length('hit'), 151, 'Check for hsp hit length' );
1158 is( $hsp->length('total'), 154, 'Check for hsp total length' );
1159 is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' );
1160 is( $hsp->gaps('hit'), 3, 'Check for hsp hit gaps' );
1161 is( $hsp->gaps('total'), 3, 'Check for hsp total gaps' );
1163 ($hit->length == 0) ?
1164 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
1165 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
1166 ($result->query_length == 0) ?
1167 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
1168 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
1170 is ( $hsp->num_conserved, 151 );
1171 is ( $hsp->num_identical, 146 );
1172 is( sprintf( "%.2f", $hsp->percent_identity ), 94.81 );
1173 is( sprintf( "%.3f", $hsp->frac_identical('query') ), 0.948 );
1174 is( sprintf( "%.3f", $hsp->frac_identical('hit') ), 0.967 );
1175 is( sprintf( "%.3f", $hsp->frac_identical('total') ), 0.948 );
1176 is( sprintf( "%.3f", $hsp->frac_conserved('query') ), 0.981 );
1177 is( sprintf( "%.3f", $hsp->frac_conserved('hit') ), '1.000' );
1178 is( sprintf( "%.3f", $hsp->frac_conserved('total') ), 0.981 );
1180 is (length($hsp->homology_string), length($hsp->query_string));
1182 is( $hsp->consensus_structure,
1184 'Check for consensus structure string'
1186 is( $hsp->query_string,
1187 'attcctagaattttcagctgatttaattattgagaggcgagaaggaagtaatgatgtctgttatcctgggaaattcgtaaatgaagaagctctgaggcaaattctcagggggtcaggcggaattgacaaggagacaatgggattcacatatagc',
1188 'Check for nhmmer query string'
1190 is( $hsp->homology_string,
1191 'attcctagaattttcagc+gatttaattattgagaggcgagaaggaagt gatgtctgttatcctgggaaattcgt+aatgaagaagctctgaggcaaattctcaggg+gtcaggcggaattgacaaggagacaatgggattcac+ta+agc',
1192 'Check for nhmmer homology string'
1194 is( $hsp->hit_string,
1195 'ATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGT---GATGTCTGTTATCCTGGGAAATTCGTGAATGAAGAAGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACCTACAGC',
1196 'Check for nhmmer hit string'
1198 is( $hsp->posterior_string,
1199 '689*******************************************777...***************************************************************************************************986',
1200 'Check for nhmmer posterior probability string'
1202 is( length( $hsp->homology_string ),
1203 length( $hsp->hit_string ),
1204 'Check if nhmmer homology string and hit string have an equal length'
1206 is( length( $hsp->query_string ),
1207 length( $hsp->homology_string ),
1208 'Check if nhmmer query string and homology string have an equal length'
1211 $hit = $result->next_hit;
1212 is( $hit->name, 'seq2', 'Check nhmmer hit name' );
1213 is( $hit->description, 'Description of seq2', 'Check nhmmer hit description' );
1214 is( $hit->score, 38.6, 'Check nhmmer hit score' );
1215 float_is( $hit->significance, 3.9e-15, 'Check nhmmer hit significance' );
1216 is( $hit->length, 60, 'Check nhmmer hit length' );
1218 $hsp = $hit->next_hsp;
1219 is( $hsp->hit->seq_id(), 'seq2', 'Check for nhmmer hit seq_id' );
1220 is( $hsp->query->seq_id(), 'A_HA_H7_CDS_nucleotide', 'Check for nhmmer query seq_id' );
1222 is( $hsp->start('query'), 34, 'Check nhmmer hsp query start' );
1223 is( $hsp->end('query'), 92, 'Check nhmmer hsp query end' );
1224 is( $hsp->start('hit'), 1, 'Check nhmmer hsp hit start' );
1225 is( $hsp->end('hit'), 59, 'Check nhmmer hsp hit end' );
1226 is( $hsp->strand('hit'), -1, 'Check nhmmer hsp hit strand' );
1227 is( $hsp->strand('query'), 1, 'Check nhmmer hsp query strand' );
1228 is( $hsp->score, 38.6, 'Check nhmmer hsp score' );
1229 float_is( $hsp->significance, 3.9e-15, 'Check nhmmer hsp evalue' );
1231 is( $hsp->length('query'), 59, 'Check for hsp query length' );
1232 is( $hsp->length('hit'), 59, 'Check for hsp hit length' );
1233 is( $hsp->length('total'), 59, 'Check for hsp total length' );
1234 is( $hsp->gaps('query'), 0, 'Check for hsp query gaps' );
1235 is( $hsp->gaps('hit'), 0, 'Check for hsp hit gaps' );
1236 is( $hsp->gaps('total'), 0, 'Check for hsp total gaps' );
1238 ($hit->length == 0) ?
1239 is( $hsp->{HIT_LENGTH}, $hsp->hit->length, 'Check hit length consistency' )
1240 : is( $hsp->{HIT_LENGTH}, $hit->length, 'Check hit length consistency' );
1241 ($result->query_length == 0) ?
1242 is( $hsp->{QUERY_LENGTH}, $hsp->query->length, 'Check query length consistency' )
1243 : is( $hsp->{QUERY_LENGTH}, $result->query_length, 'Check query length consistency' );
1245 is( $hsp->consensus_structure,
1247 'Check for consensus structure string'
1249 is( $hsp->query_string,
1250 'gtgatgattgcaacaaatgcagacaaaatctgccttgggcaccatgctgtgtcaaacgg',
1251 'Check for nhmmer query string'
1253 is( $hsp->homology_string,
1254 'g+gat+att+c+acaaatgcagacaa atctgccttgggca+catgc+gtgtcaaacgg',
1255 'Check for nhmmer homology string'
1257 is( $hsp->hit_string,
1258 'GCGATCATTCCGACAAATGCAGACAAGATCTGCCTTGGGCATCATGCCGTGTCAAACGG',
1259 'Check for nhmmer hit string'
1261 is( $hsp->posterior_string,
1262 '6899****************************************************986',
1263 'Check for nhmmer posterior probability string' );
1264 is( length( $hsp->homology_string ),
1265 length( $hsp->hit_string ),
1266 'Check if nhmmer homology string and hit string have an equal length'
1268 is( length( $hsp->query_string ),
1269 length( $hsp->homology_string ),
1270 'Check if nhmmer query string and homology string have an equal length'
1273 # end HMMER 3.1 nhmmer output