From 0af95c4f0ba9fd0d954ec3bfb3ed407f24c998bf Mon Sep 17 00:00:00 2001 From: Brian Osborne Date: Mon, 15 Jul 2013 20:33:20 -0400 Subject: [PATCH] Add partial support for nhmmer (version 3.1) --- Bio/Search/HSP/HMMERHSP.pm | 4 +- Bio/Search/HSP/hmmer3HSP.pm | 1 - Bio/SearchIO/hmmer3.pm | 1901 +++++++++++++++++++++++------------------- Bio/SeqFeature/Similarity.pm | 10 +- t/SearchIO/hmmer.t | 1151 +++++++++++++++---------- t/data/nhmmer-3.1.out | 63 ++ 6 files changed, 1823 insertions(+), 1307 deletions(-) rewrite Bio/SearchIO/hmmer3.pm (68%) rewrite t/SearchIO/hmmer.t (94%) create mode 100644 t/data/nhmmer-3.1.out diff --git a/Bio/Search/HSP/HMMERHSP.pm b/Bio/Search/HSP/HMMERHSP.pm index a7cc70d15..b9557cbb1 100644 --- a/Bio/Search/HSP/HMMERHSP.pm +++ b/Bio/Search/HSP/HMMERHSP.pm @@ -65,10 +65,8 @@ Internal methods are usually preceded with a _ =cut - # Let the code begin... - package Bio::Search::HSP::HMMERHSP; use strict; @@ -82,7 +80,7 @@ use base qw(Bio::Search::HSP::GenericHSP); Returns : Bio::Search::HSP::HMMERHSP Args : -Plus Bio::Seach::HSP::GenericHSP methods +Plus Bio::Search::HSP::GenericHSP methods -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc) -evalue => evalue diff --git a/Bio/Search/HSP/hmmer3HSP.pm b/Bio/Search/HSP/hmmer3HSP.pm index e31ea2dcc..b1d7e9af8 100644 --- a/Bio/Search/HSP/hmmer3HSP.pm +++ b/Bio/Search/HSP/hmmer3HSP.pm @@ -1,4 +1,3 @@ -# $Id: bioperl.lisp 15559 2009-02-23 12:11:20Z maj $ # # BioPerl module for Bio::Search::HSP::hmmer3HSP # diff --git a/Bio/SearchIO/hmmer3.pm b/Bio/SearchIO/hmmer3.pm dissimilarity index 68% index 4292f6ac9..c31eec737 100644 --- a/Bio/SearchIO/hmmer3.pm +++ b/Bio/SearchIO/hmmer3.pm @@ -1,859 +1,1042 @@ -# $Id: bioperl.lisp 15559 2009-02-23 12:11:20Z maj $ -# -# BioPerl module for Bio::SearchIO::hmmer3 -# -# Please direct questions and support issues to -# -# Cared for by Thomas Sharpton -# -# Copyright Thomas Sharpton -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::SearchIO::hmmer3 - DESCRIPTION of Object - -=head1 SYNOPSIS - -Give standard usage here - -=head1 DESCRIPTION - -Describe the object here - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -L - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via -the web: - - https://redmine.open-bio.org/projects/bioperl/ - -=head1 AUTHOR - Thomas Sharpton - -Email thomas.sharpton@gmail.com - -Describe contact details here - -=head1 CONTRIBUTORS - -Additional contributors names and emails here - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - -package Bio::SearchIO::hmmer3; - -use strict; -use Data::Dumper; -use Bio::Factory::ObjectFactory; -use vars qw(%MAPPING %MODEMAP); -use base qw(Bio::SearchIO::hmmer); - -BEGIN { - - # mapping of HMMER items to Bioperl hash keys - %MODEMAP = ( - 'HMMER_Output' => 'result', - 'Hit' => 'hit', - 'Hsp' => 'hsp' - ); - - %MAPPING = ( - 'Hsp_bit-score' => 'HSP-bits', - 'Hsp_score' => 'HSP-score', - 'Hsp_evalue' => 'HSP-evalue', - 'Hsp_query-from' => 'HSP-query_start', - 'Hsp_query-to' => 'HSP-query_end', - 'Hsp_hit-from' => 'HSP-hit_start', - 'Hsp_hit-to' => 'HSP-hit_end', - 'Hsp_positive' => 'HSP-conserved', - 'Hsp_identity' => 'HSP-identical', - 'Hsp_gaps' => 'HSP-hsp_gaps', - 'Hsp_hitgaps' => 'HSP-hit_gaps', - 'Hsp_querygaps' => 'HSP-query_gaps', - 'Hsp_qseq' => 'HSP-query_seq', - 'Hsp_hseq' => 'HSP-hit_seq', - 'Hsp_midline' => 'HSP-homology_seq', - 'Hsp_align-len' => 'HSP-hsp_length', - 'Hsp_query-frame' => 'HSP-query_frame', - 'Hsp_hit-frame' => 'HSP-hit_frame', - - 'Hit_id' => 'HIT-name', - 'Hit_len' => 'HIT-length', - 'Hit_accession' => 'HIT-accession', - 'Hit_desc' => 'HIT-description', - 'Hit_signif' => 'HIT-significance', - 'Hit_score' => 'HIT-score', - - 'HMMER_program' => 'RESULT-algorithm_name', - 'HMMER_version' => 'RESULT-algorithm_version', - 'HMMER_query-def' => 'RESULT-query_name', - 'HMMER_query-len' => 'RESULT-query_length', - 'HMMER_query-acc' => 'RESULT-query_accession', - 'HMMER_querydesc' => 'RESULT-query_description', - 'HMMER_hmm' => 'RESULT-hmm_name', - 'HMMER_seqfile' => 'RESULT-sequence_file', - 'HMMER_db' => 'RESULT-database_name', - ); -} - -=head2 next_result - - Title : next_result - Usage : my $hit = $searchio->next_result; - Function: Returns the next Result from a search - Returns : Bio::Search::Result::ResultI object - Args : none - -=cut - -sub next_result{ - my ($self) = @_; - my $seentop = 0; #Placeholder for when we deal with multi-query reports - my $reporttype; - my ( $last, @hit_list, @hsp_list, %hspinfo, %hitinfo, %domaincounter ); - local $/ = "\n"; - local $_; - - my $verbose = $self->verbose; # cache for speed? JES's idea in hmmer.pm - $self->start_document(); - local ($_); - #This is here to ensure that next_result doesn't produce infinite loop - if(!defined( $_ = $self->_readline) ) { - return undef; - } - else{ - $self->_pushback($_); - } - #Regex goes here for HMMER3 - #Start with hmmsearch processing - while ( defined( $_ = $self->_readline ) ) { - my $lineorig = $_; - chomp; - #Grab the program name. - if ( $_ =~ m/^\#\s(\S+)\s\:\:\s/ ){ - my $prog = $1; - #TO DO LATER: customize the above regex to adapt to other - #program types!!! (hmmscan, etc) - $self->start_element( { 'Name' => 'HMMER_Output' } ); - $self->{'_result_count'}++; #Might need to move to another block - $self->element( - { - 'Name' => 'HMMER_program', - 'Data' => uc($prog) - } - ); - } - #Get the HMMER package version and release date - elsif ( $_ =~ m/^\#\sHMMER\s+(\S+)\s+\((.+)\)/ ) { - my $version = $1; - my $versiondate = $2; - $self->{'_hmmidline'} = $_; - $self->element( - { - 'Name' => 'HMMER_version', - 'Data' => $version - } - ); - } - #Get the query info - elsif( $_ =~ /^\#\squery \w+ file\:\s+(\S+)/ ){ - if( $self->{'_reporttype'} eq 'HMMSEARCH') { - $self->{'_hmmfileline'} = $lineorig; - $self->element( - { - 'Name' => 'HMMER_hmm', - 'Data' => $1 - } - ); - } - elsif( $self->{'_reporttype'} eq 'HMMSCAN' ) { - $self->{'_hmmseqline'} = $lineorig; - $self->element( - { - 'Name' => 'HMMER_seqfile', - 'Data' => $1 - } - ); - } - } - #If this is a report without alignments - elsif( $_ =~ m/^\#\sshow\salignments\sin\soutput/ ){ - $self->{'_alnreport'} = 0; - } - #Get the database info - elsif( $_ =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/ ){ -# $self->{'_hmmseqline'} = $lineorig; -# $self->element( -# { -# 'Name' => 'HMMER_seqfile', -# 'Data' => $1 -# } -# ); - if( $self->{'_reporttype'} eq 'HMMSEARCH') { - $self->{'_hmmseqline'} = $lineorig; - $self->element( - { - 'Name' => 'HMMER_seqfile', - 'Data' => $1 - } - ); - } - elsif( $self->{'_reporttype'} eq 'HMMSCAN' ) { - $self->{'_hmmfileline'} = $lineorig; - $self->element( - { - 'Name' => 'HMMER_hmm', - 'Data' => $1 - } - ); - } - } - #Get query data - elsif( $_ =~ s/^Query:\s+// ) { - #TODO Code to deal with multi query report - unless( s/\s+\[[L|M]\=(\d+)\]$// ){ - warn "Error parsing length for query, offending line $_\n"; - exit(0); - } - my $querylen = $1; - $self->element( - { - 'Name' => 'HMMER_query-len', - 'Data' => $querylen - } - ); - $self->element( - { - 'Name' => 'HMMER_query-def', - 'Data' => $_ - } - ); - } - #Get Accession data - elsif( $_ =~ s/^Accession:\s+// ){ - s/\s+$//; - $self->element( - { - 'Name' => 'HMMER_query-acc', - 'Data' => $_ - } - ); - } - #Get description data - elsif( $_ =~ s/^Description:\s+// ){ - s/\s+$//; - $self->element( - { - 'Name' => 'HMMER_querydesc', - 'Data' => $_ - } - ); - } - #PROCESS HMMSEARCH AND HMMSCAN RESULTS SPECIFIC FORMATTING HERE - elsif( defined $self->{'_reporttype'} && - ( - $self->{'_reporttype'} eq 'HMMSEARCH' || - $self->{'_reporttype'} eq 'HMMSCAN' - ) - ){ - #Complete sequence table data above inclusion threshold - if( $_ =~ m/Scores for complete sequence/){ - while (defined( $_ = $self->_readline ) ) { - if ($_ =~ m/inclusion threshold/ || m/Domain( and alignment)? annotation for each/ || - m/\[No hits detected/ || m!^//! ){ - $self->_pushback($_); - last; - } - #grab table data - next if ( m/\-\-\-/ || m/^\s+E\-value\s+score/ || m/^$/); - my ( - $eval_full, $score_full, $bias_full, - $eval_best, $score_best, $bias_best, - $exp, $n, $hitid, $desc, @hitline - ); - @hitline = split(" ", $_); - $eval_full = shift @hitline; - $score_full = shift @hitline; - $bias_full = shift @hitline; - $eval_best = shift @hitline; - $score_best = shift @hitline; - $bias_best = shift @hitline; - $exp = shift @hitline; - $n = shift @hitline; - $hitid = shift @hitline; - $desc = join " ", @hitline; - - if( !defined( $desc ) ){ - $desc = ""; - } - push @hit_list, [ $hitid, $desc, $eval_full, $score_full ]; - $hitinfo{$hitid}= $#hit_list; - } - } - #Complete sequence table data below inclusion threshold - #not currently fully implemented - elsif( $_ =~ m/inclusion threshold/ ){ - while( defined( $_ = $self->_readline ) ) { - if( $_ =~ m/Domain( and alignment)? annotation for each/ || - m/Internal pipeline statistics summary/ ){ - $self->_pushback($_); - last; - } - next if( $_ =~ m/^$/ ); - my ( - $eval_full, $score_full, $bias_full, - $eval_best, $score_best, $bias_best, - $exp, $n, $hitid, $desc, @hitline - ); - @hitline = split(" ", $_); - $eval_full = shift @hitline; - $score_full = shift @hitline; - $bias_full = shift @hitline; - $eval_best = shift @hitline; - $score_best = shift @hitline; - $bias_best = shift @hitline; - $exp = shift @hitline; - $n = shift @hitline; - $hitid = shift @hitline; - $desc = join " ", @hitline; - - $hitinfo{$hitid} = "below_inclusion"; - } - } - #Domain annotation for each sequence table data - elsif( $_ =~ m/Domain( and alignment)? annotation for each/){ - @hsp_list = (); #here for multi-query reports - my $name; - - while( defined( $_ = $self->_readline ) ) { - if ($_ =~ m/Internal pipeline statistics/ || m/\[No targets detected/ ){ - $self->_pushback($_); - last; - } - if( $_ =~ m/^\>\>\s(.*?)\s+/ ) { - $name = $1; - #skip hits below inclusion threshold - next if( $hitinfo{$name} eq "below_inclusion"); - $domaincounter{$name} = 0; - - while( defined( $_ = $self->_readline ) ) { - #grab table data for sequence - if ($_ =~ m/Internal pipeline statistics/ || - $_ =~ m/^\>\>/ ){ - $self->_pushback($_); - last; - } - if ( $_ =~ m/Alignments for each domain/ ) { - $self->_pushback($_); - last; - } - if ( $_ =~ m/^\s+\#\s+score/ || - $_ =~ m/^\s\-\-\-\s+/ || -# $_ =~ m/^\>\>/ || - $_ =~ m/^$/ ){ - next; - } - -# grab hsp data from table, push into @hsp; - if( - my ($domain_num, $score, $bias, $ceval, - $ieval, $hmmstart, $hmmstop, - $qalistart, $qalistop, $envstart, - $envstop, $envbound, $acc) = - m!^\s+(\d+)\s\!*\?*\s+ #domain num - (\S+)\s+(\S+)\s+ #score, bias - (\S+)\s+(\S+)\s+ #c-eval, i-eval - (\d+)\s+(\d+).+? #hmm start, stop - (\d+)\s+(\d+).+? #query start, stop - (\d+)\s+(\d+).+? #env start, stop - (\S+) #acc - \s*$!ox - ){ - #keeping simple for now. let's customize later - my @vals = ($hmmstart, $hmmstop, $qalistart, $qalistop, $score, $ceval, '', '', ''); - my $info = $hit_list[ $hitinfo{$name} ]; - if( !defined $info ){ - $self->warn( - "Incomplete sequence information; can't find $name, hitinfo says $hitinfo{$name}\n" - ); - next; - } - $domaincounter{$name}++; - my $hsp_key = $name . "_" . $domaincounter{$name}; - push @hsp_list, [ $name, @vals ]; - $hspinfo{$hsp_key} = $#hsp_list; - } - else{ - print "missed this line: $_\n"; - } - } - } - elsif ($_ =~ m/Alignments for each domain/ ) { - my $domain_count = 0; - #line counter - my $count = 0; - # There's an optional block, so we sometimes need to - # count to 3, and sometimes to 4. - my $max_count = 3; - my $lastdomain; - my $hsp; - my ($hline, $midline, $qline); - - while( defined( $_ = $self->_readline ) ) { - if( $_ =~ m/^\>\>/ || - $_ =~ m/Internal pipeline statistics/){ - $self->_pushback($_); - last; - } - elsif( $hitinfo{$name} eq "below_inclusion" || - $_ =~ m/^$/ ) { - next; - } - elsif( $_ =~ /\s\s\=\=\sdomain\s(\d+)\s+/){ - my $domainnum = $1; - $count = 0; - my $key = $name . "_" . $domainnum; - $hsp = $hsp_list[ $hspinfo{$key} ]; - $hline = $$hsp[-3]; - $midline = $$hsp[-2]; - $qline = $$hsp[-1]; - $lastdomain = $name; - } - # model data track, some reports don't have - elsif( $_ =~ m/\s+\S+\sCS$/ ){ - my $modeltrack = $_; - $max_count++; - $count++; - next; - } - elsif( $count == $max_count - 3 ){ - #hit sequence - my @data = split(" ", $_); - my $seq = $data[-2]; - $hline .= $seq; - $count++; - next; - } - elsif( $count == $max_count - 2 ){ - #conservation track - #storage isn't quite right - need to remove - #leading/lagging whitespace while preserving - #gap data (latter isn't done, former is) - $_ =~ s/^\s+//; - $_ =~ s/\s+$//; - $midline .= $_; - $count++; - next; - } - elsif( $count == $max_count - 1 ){ - #query track - my @data = split(" ", $_); - my $seq = $data[-2]; - $qline .= $seq; - $count++; - next; - } - elsif( $count == $max_count ){ - #pval track - my $pvals = $_; - $count = 0; - $max_count = 3; - $$hsp[-3] = $hline; - $$hsp[-2] = $midline; - $$hsp[-1] = $qline; - next; - } - else{ - print "missed $_\n"; - } - } - } - } - } - elsif( m/Internal pipeline statistics/ || m!^//! ){ -# if within hit, hsp close; - if ( $self->within_element('hit') ) { - if ( $self->within_element('hsp') ) { - $self->end_element( { 'Name' => 'Hsp' } ); - } - $self->end_element( { 'Name' => 'Hit' } ); - } - #grab summary statistics of run - while( defined( $_ = $self->_readline ) ) { - last if ( $_ =~ m/^\/\/$/ ); - } - - #Jason does a lot of processing of hits/hsps here; - while( my $hit = shift @hit_list ) { - my $hit_name = shift @$hit; - my $hit_desc = shift @$hit; - my $hit_signif = shift @$hit; - my $hit_score = shift @$hit; - my $num_domains = $domaincounter{$hit_name} || 0; - - $self->start_element( { 'Name' => 'Hit' } ); - $self->element( - { - 'Name' => 'Hit_id', - 'Data' => $hit_name - } - ); - $self->element( - { - 'Name' => 'Hit_desc', - 'Data' => $hit_desc - } - ); - $self->element( - { - 'Name' => 'Hit_signif', - 'Data' => $hit_signif - } - ); - $self->element( - { - 'Name' => 'Hit_score', - 'Data' => $hit_score - } - ); - for my $i (1..$num_domains) { - my $key = $hit_name . "_" . $i; - my $hsp = $hsp_list[ $hspinfo{$key} ]; - if(defined $hsp) { - my $hsp_name = shift @$hsp; - $self->start_element( { 'Name' => 'Hsp' } ); - $self->element( { - 'Name' => 'Hsp_identity', - 'Data' => 0 - } ); - $self->element( { - 'Name' => 'Hsp_positive', - 'Data' => 0 - } ); - $self->element( { - 'Name' => 'Hsp_hit-from', - 'Data' => shift @$hsp - } ); - $self->element( { - 'Name' => 'Hsp_hit-to', - 'Data' => shift @$hsp - } ); - $self->element( { - 'Name' => 'Hsp_query-from', - 'Data' => shift @$hsp - } ); - $self->element( { - 'Name' => 'Hsp_query-to', - 'Data' => shift @$hsp - } ); - $self->element( { - 'Name' => 'Hsp_score', - 'Data' => shift @$hsp - } ); - $self->element( { - 'Name' => 'Hsp_evalue', - 'Data' => shift @$hsp - } ); - $self->element( { - 'Name' => 'Hsp_hseq', - 'Data' => shift @$hsp - } ); - $self->element( { - 'Name' => 'Hsp_midline', - 'Data' => shift @$hsp - } ); - $self->element( { - 'Name' => 'Hsp_qseq', - 'Data' => shift @$hsp - } ); - $self->end_element( { 'Name' => 'Hsp' } ); - } - } - $self->end_element( { 'Name' => 'Hit' } ); - } - @hit_list = (); - %hitinfo = (); - last; - } - } - else{ - print "missed: $_\n"; - $self->debug($_); - } - $last = $_; - } - $self->end_element( { 'Name' => 'HMMER_Output' } ); - my $result = $self->end_document(); - return $result; -} - - - -=head2 start_element - - Title : start_element - Usage : $eventgenerator->start_element - Function: Handles a start event - Returns : none - Args : hashref with at least 2 keys 'Data' and 'Name' - -=cut - -sub start_element{ - - my ( $self, $data ) = @_; - - # we currently don't care about attributes - my $nm = $data->{'Name'}; - my $type = $MODEMAP{$nm}; - if ($type) { - if ( $self->_eventHandler->will_handle($type) ) { - my $func = sprintf( "start_%s", lc $type ); - $self->_eventHandler->$func( $data->{'Attributes'} ); - } - unshift @{ $self->{'_elements'} }, $type; - } - if ( defined $type - && $type eq 'result' ) - { - $self->{'_values'} = {}; - $self->{'_result'} = undef; - } -} - -=head2 end_element - - Title : end_element - Usage : $eventgeneartor->end_element - Function: Handles and end element event - Returns : none - Args : hashref with at least 2 keys 'Data' and 'Name' - -=cut - -sub end_element{ - - my ( $self, $data ) = @_; - my $nm = $data->{'Name'}; - my $type = $MODEMAP{$nm}; - my $rc; - - if ( $nm eq 'HMMER_program' ) { - if ( $self->{'_last_data'} =~ /(HMM\S+)/i ) { - $self->{'_reporttype'} = uc $1; - } - } - - # Hsp are sort of weird, in that they end when another - # object begins so have to detect this in end_element for now - if ( $nm eq 'Hsp' ) { - foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) { - my $data = $self->{'_last_hspdata'}->{$_}; - if ($data && $_ eq 'Hsp_hseq') { - # replace hmm '.' gap symbol by '-' - $data =~ s/\./-/g; - } - $self->element( - { - 'Name' => $_, - 'Data' => $data - } - ); - } - $self->{'_last_hspdata'} = {}; - } - if ($type) { - if ( $self->_eventHandler->will_handle($type) ) { - my $func = sprintf( "end_%s", lc $type ); - $rc = $self->_eventHandler->$func( $self->{'_reporttype'}, - $self->{'_values'} ); - } - my $lastelem = shift @{ $self->{'_elements'} }; - } - elsif ( $MAPPING{$nm} ) { - if ( ref( $MAPPING{$nm} ) =~ /hash/i ) { - my $key = ( keys %{ $MAPPING{$nm} } )[0]; - $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } = - $self->{'_last_data'}; - } - else { - $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'}; -# print "lastdata is " . $self->{'_last_data'} . "\n"; - } - } - else { - $self->debug("unknown nm $nm, ignoring\n"); - } - $self->{'_last_data'} = ''; # remove read data if we are at - # end of an element - $self->{'_result'} = $rc if ( defined $type && $type eq 'result' ); - return $rc; -} - -=head2 element - - Title : element - Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); - Function: Convienence method that calls start_element, characters, end_element - Returns : none - Args : Hash ref with the keys 'Name' and 'Data' - -=cut - -sub element{ - my ( $self, $data ) = @_; - $self->start_element($data); - $self->characters($data); - $self->end_element($data); -} - -=head2 characters - - Title : characters - Usage : $eventgenerator->characters($str) - Function: Send a character events - Returns : none - Args : string - -=cut - -sub characters{ - my ( $self, $data ) = @_; - - if ( $self->in_element('hsp') - && $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o - && defined $data->{'Data'} ) - { - $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'}; - } - return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o ); - - $self->{'_last_data'} = $data->{'Data'}; -} - -=head2 within_element - - Title : within_element - Usage : if( $eventgenerator->within_element( $element ) ) {} - Function: Test if we are within a particular element - This is different than 'in' because within can be tested for - a whole block - Returns : boolean - Args : string element name - -=cut - -sub within_element{ - my ( $self, $name ) = @_; - return 0 - if ( !defined $name - || !defined $self->{'_elements'} - || scalar @{ $self->{'_elements'} } == 0 ); - foreach ( @{ $self->{'_elements'} } ) { - return 1 if ( $_ eq $name ); - } - return 0; -} - -=head2 in_element - - Title : in_element - Usage : if( $eventgenerator->in_element( $element ) ) {} - Function: Test if we are in a particular element - This is different than 'within' because 'in' only - tests its immediate parent - Returns : boolean - Args : string element name - -=cut - -sub in_element{ - my ( $self, $name ) = @_; - return 0 if !defined $self->{'_elements'}->[0]; - return ( $self->{'_elements'}->[0] eq $name ); -} - -=head2 start_document - - Title : start_document - Usage : $eventgenerator->start_document - Function: Handle a start document event - Returns : none - Args : none - -=cut - -sub start_document{ - my ($self) = @_; - $self->{'_lasttype'} = ''; - $self->{'_values'} = {}; - $self->{'_result'} = undef; - $self->{'_elements'} = []; -} - -=head2 end_document - - Title : end_document - Usage : $eventgenerator->end_document - Function: Handles an end document event - Returns : Bio::Search::Result::ResultI object - Args : none - -=cut - -sub end_document{ - my ($self) = @_; - return $self->{'_result'}; -} - -=head2 result_count - - Title : result_count - Usage : my $count = $searchio->result_count - Function: Returns the number of results processed - Returns : interger - Args : none - -=cut - -sub result_count{ - my $self = shift; - return $self->{'_result_count'}; -} - - -1; +# +# BioPerl module for Bio::SearchIO::hmmer3 +# +# Please direct questions and support issues to +# +# Cared for by Thomas Sharpton +# +# Copyright Thomas Sharpton +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::SearchIO::hmmer3 + +=head1 SYNOPSIS + +use Bio::SearchIO; + +my $searchio = Bio::SearchIO->new( + -format => 'hmmer', + -version => 3, + -file => 'hmmsearch.out' +); + +my $result = $searchio->next_result; +my $hit = $result->next_hit; +print $hit->name, $hit->description, $hit->significance, + $hit->score, "\n"; + +my $hsp = $hit->next_hsp; +print $hsp->start('hit'), $hsp->end('hit'), $hsp->start('query'), + $hsp->end('query'), "\n"; + +=head1 DESCRIPTION + +Code to parse output from hmmsearch, hmmscan, and nhmmer, compatible with +both version 2 and version 3 of the HMMER package from L. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/wiki/Mailing_lists - About the mailing lists + +=head2 Support + +Please direct usage questions or support issues to the mailing list: + +L + +rather than to the module maintainer directly. Many experienced and +reponsive experts will be able look at the problem and quickly +address it. Please include a thorough description of the problem +with code and data examples if at all possible. + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +the web: + + https://redmine.open-bio.org/projects/bioperl/ + +=head1 AUTHOR - Thomas Sharpton + +Email thomas.sharpton@gmail.com + +Describe contact details here + +=head1 CONTRIBUTORS + +Additional contributors names and emails here + +briano at bioteam.net + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + +# Let the code begin... + +package Bio::SearchIO::hmmer3; + +use strict; +use Data::Dumper; +use Bio::Factory::ObjectFactory; +use vars qw(%MAPPING %MODEMAP); +use base qw(Bio::SearchIO::hmmer); + +BEGIN { + + # mapping of HMMER items to Bioperl hash keys + %MODEMAP = ( + 'HMMER_Output' => 'result', + 'Hit' => 'hit', + 'Hsp' => 'hsp' + ); + + %MAPPING = ( + 'Hsp_bit-score' => 'HSP-bits', + 'Hsp_score' => 'HSP-score', + 'Hsp_evalue' => 'HSP-evalue', + 'Hsp_query-from' => 'HSP-query_start', + 'Hsp_query-to' => 'HSP-query_end', + 'Hsp_query-strand' => 'HSP-query_strand', + 'Hsp_hit-from' => 'HSP-hit_start', + 'Hsp_hit-to' => 'HSP-hit_end', + 'Hsp_hit-strand' => 'HSP-hit_strand', + 'Hsp_positive' => 'HSP-conserved', + 'Hsp_identity' => 'HSP-identical', + 'Hsp_gaps' => 'HSP-hsp_gaps', + 'Hsp_hitgaps' => 'HSP-hit_gaps', + 'Hsp_querygaps' => 'HSP-query_gaps', + 'Hsp_qseq' => 'HSP-query_seq', + 'Hsp_hseq' => 'HSP-hit_seq', + 'Hsp_midline' => 'HSP-homology_seq', + 'Hsp_align-len' => 'HSP-hsp_length', + 'Hsp_query-frame' => 'HSP-query_frame', + 'Hsp_hit-frame' => 'HSP-hit_frame', + + 'Hit_id' => 'HIT-name', + 'Hit_len' => 'HIT-length', + 'Hit_accession' => 'HIT-accession', + 'Hit_desc' => 'HIT-description', + 'Hit_signif' => 'HIT-significance', + 'Hit_score' => 'HIT-score', + + 'HMMER_program' => 'RESULT-algorithm_name', + 'HMMER_version' => 'RESULT-algorithm_version', + 'HMMER_query-def' => 'RESULT-query_name', + 'HMMER_query-len' => 'RESULT-query_length', + 'HMMER_query-acc' => 'RESULT-query_accession', + 'HMMER_querydesc' => 'RESULT-query_description', + 'HMMER_hmm' => 'RESULT-hmm_name', + 'HMMER_seqfile' => 'RESULT-sequence_file', + 'HMMER_db' => 'RESULT-database_name', + ); +} + +=head2 next_result + + Title : next_result + Usage : my $hit = $searchio->next_result; + Function: Returns the next Result from a search + Returns : Bio::Search::Result::ResultI object + Args : none + +=cut + +sub next_result { + my ($self) = @_; + my $seentop = 0; # Placeholder for when we deal with multi-query reports + my $reporttype; + my ( $last, @hit_list, @hsp_list, %hspinfo, %hitinfo, %domaincounter ); + local $/ = "\n"; + local $_; + + my $verbose = $self->verbose; # cache for speed? JES's idea in hmmer.pm + $self->start_document(); + local ($_); + + # This is here to ensure that next_result doesn't produce infinite loop + if ( !defined( $_ = $self->_readline ) ) { + return undef; + } + else { + $self->_pushback($_); + } + + # Regex goes here for HMMER3 + # Start with hmmsearch processing + while ( defined( $_ = $self->_readline ) ) { + my $lineorig = $_; + chomp; + + # Grab the program name + if ( $_ =~ m/^\#\s(\S+)\s\:\:\s/ ) { + my $prog = $1; + + # TO DO: customize the above regex to adapt to other + # program types (hmmscan, etc) + $self->start_element( { 'Name' => 'HMMER_Output' } ); + $self->{'_result_count'}++; #Might need to move to another block + $self->element( + { 'Name' => 'HMMER_program', + 'Data' => uc($prog) + } + ); + } + + # Get the HMMER package version and release date + elsif ( $_ =~ m/^\#\sHMMER\s+(\S+)\s+\((.+)\)/ ) { + my $version = $1; + my $versiondate = $2; + $self->{'_hmmidline'} = $_; + $self->element( + { 'Name' => 'HMMER_version', + 'Data' => $version + } + ); + } + + # Get the query info + elsif ( $_ =~ /^\#\squery \w+ file\:\s+(\S+)/ ) { + if ( $self->{'_reporttype'} eq 'HMMSEARCH' + || $self->{'_reporttype'} eq 'NHMMER' ) + { + $self->{'_hmmfileline'} = $lineorig; + $self->element( + { 'Name' => 'HMMER_hmm', + 'Data' => $1 + } + ); + } + elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) { + $self->{'_hmmseqline'} = $lineorig; + $self->element( + { 'Name' => 'HMMER_seqfile', + 'Data' => $1 + } + ); + } + } + + # If this is a report without alignments + elsif ( $_ =~ m/^\#\sshow\salignments\sin\soutput/ ) { + $self->{'_alnreport'} = 0; + } + + # Get the database info + elsif ( $_ =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/ ) { + + if ( $self->{'_reporttype'} eq 'HMMSEARCH' + || $self->{'_reporttype'} eq 'NHMMER' ) + { + $self->{'_hmmseqline'} = $lineorig; + $self->element( + { 'Name' => 'HMMER_seqfile', + 'Data' => $1 + } + ); + } + elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) { + $self->{'_hmmfileline'} = $lineorig; + $self->element( + { 'Name' => 'HMMER_hmm', + 'Data' => $1 + } + ); + } + } + + # Get query data + elsif ( $_ =~ s/^Query:\s+// ) { + + # TO DO: code to deal with multi-query report + unless (s/\s+\[[L|M]\=(\d+)\]$//) { + warn "Error parsing length for query, offending line $_\n"; + exit(0); + } + my $querylen = $1; + $self->element( + { 'Name' => 'HMMER_query-len', + 'Data' => $querylen + } + ); + $self->element( + { 'Name' => 'HMMER_query-def', + 'Data' => $_ + } + ); + } + + # Get Accession data + elsif ( $_ =~ s/^Accession:\s+// ) { + s/\s+$//; + $self->element( + { 'Name' => 'HMMER_query-acc', + 'Data' => $_ + } + ); + } + + # Get description data + elsif ( $_ =~ s/^Description:\s+// ) { + s/\s+$//; + $self->element( + { 'Name' => 'HMMER_querydesc', + 'Data' => $_ + } + ); + } + + # hmmsearch, nhmmer, and hmmscan-specific formatting here + elsif ( + defined $self->{'_reporttype'} + && ( $self->{'_reporttype'} eq 'HMMSEARCH' + || $self->{'_reporttype'} eq 'HMMSCAN' + || $self->{'_reporttype'} eq 'NHMMER' ) + ) + { + # Complete sequence table data above inclusion threshold, + # hmmsearch or hmmscan + if ( $_ =~ m/Scores for complete sequence/ ) { + while ( defined( $_ = $self->_readline ) ) { + if ( $_ =~ m/inclusion threshold/ + || m/Domain( and alignment)? annotation for each/ + || m/\[No hits detected/ + || m!^//! ) + { + $self->_pushback($_); + last; + } + + # Grab table data + next if ( m/\-\-\-/ || m/^\s+E-value\s+score/ || m/^$/ ); + my ($eval_full, $score_full, $bias_full, $eval_best, + $score_best, $bias_best, $exp, $n, + $hitid, $desc, @hitline + ); + @hitline = split( " ", $_ ); + $eval_full = shift @hitline; + $score_full = shift @hitline; + $bias_full = shift @hitline; + $eval_best = shift @hitline; + $score_best = shift @hitline; + $bias_best = shift @hitline; + $exp = shift @hitline; + $n = shift @hitline; + $hitid = shift @hitline; + $desc = join " ", @hitline; + + if ( !defined($desc) ) { + $desc = ""; + } + push @hit_list, + [ $hitid, $desc, $eval_full, $score_full ]; + $hitinfo{$hitid} = $#hit_list; + } + } + + # nhmmer + if ( $_ =~ m/Scores for complete hits/ ) { + while ( defined( $_ = $self->_readline ) ) { + if ( $_ =~ m/inclusion threshold/ + || m/Annotation for each hit/ + || m/\[No hits detected/ + || m!^//! ) + { + $self->_pushback($_); + last; + } + + # Grab table data + next if ( m/\-\-\-/ || m/^\s+E-value\s+score/ || m/^$/ ); + my ($eval, $score, $bias, $hitid, + $start, $end, $desc, @hitline + ); + @hitline = split( " ", $_ ); + $eval = shift @hitline; + $score = shift @hitline; + $bias = shift @hitline; + $hitid = shift @hitline; + $start = shift @hitline; + $end = shift @hitline; + $desc = join ' ', @hitline; + + $desc = '' if ( !defined($desc) ); + + push @hit_list, [ $hitid, $desc, $eval, $score ]; + $hitinfo{$hitid} = $#hit_list; + } + } + + # Complete sequence table data below inclusion threshold + # not currently fully implemented + elsif ( $_ =~ m/inclusion threshold/ ) { + while ( defined( $_ = $self->_readline ) ) { + if ( $_ =~ m/Domain( and alignment)? annotation for each/ + || m/Internal pipeline statistics summary/ ) + { + $self->_pushback($_); + last; + } + next if ( $_ =~ m/^$/ ); + my ($eval_full, $score_full, $bias_full, $eval_best, + $score_best, $bias_best, $exp, $n, + $hitid, $desc, @hitline + ); + @hitline = split( " ", $_ ); + $eval_full = shift @hitline; + $score_full = shift @hitline; + $bias_full = shift @hitline; + $eval_best = shift @hitline; + $score_best = shift @hitline; + $bias_best = shift @hitline; + $exp = shift @hitline; + $n = shift @hitline; + $hitid = shift @hitline; + $desc = join " ", @hitline; + + $hitinfo{$hitid} = "below_inclusion"; + } + } + + # Domain annotation for each sequence table data, hmmscan + elsif ( $_ =~ m/Domain( and alignment)? annotation for each/ ) { + @hsp_list = (); # Here for multi-query reports + my $name; + + while ( defined( $_ = $self->_readline ) ) { + if ( $_ =~ m/Internal pipeline statistics/ + || m/\[No targets detected/ ) + { + $self->_pushback($_); + last; + } + if ( $_ =~ m/^\>\>\s(.*?)\s+/ ) { + $name = $1; + + # Skip hits below inclusion threshold + next if ( $hitinfo{$name} eq "below_inclusion" ); + $domaincounter{$name} = 0; + + while ( defined( $_ = $self->_readline ) ) { + + # Grab table data for sequence + if ( $_ =~ m/Internal pipeline statistics/ + || $_ =~ m/^\>\>/ ) + { + $self->_pushback($_); + last; + } + if ( $_ =~ m/Alignments for each domain/ ) { + $self->_pushback($_); + last; + } + if ( $_ =~ m/^\s+\#\s+score/ + || $_ =~ m/^\s\-\-\-\s+/ + || + + # $_ =~ m/^\>\>/ || + $_ =~ m/^$/ + ) + { + next; + } + + # Grab hsp data from table, push into @hsp; + if (my ($domain_num, $score, $bias, + $ceval, $ieval, $hmmstart, + $hmmstop, $qalistart, $qalistop, + $envstart, $envstop, $envbound, + $acc + ) + = m|^\s+(\d+)\s\!*\?*\s+ # domain number + (\S+)\s+(\S+)\s+ # score, bias + (\S+)\s+(\S+)\s+ # c-eval, i-eval + (\d+)\s+(\d+).+? # hmm start, stop + (\d+)\s+(\d+).+? # query start, stop + (\d+)\s+(\d+).+? # env start, stop + (\S+) # Accession + \s*$|ox + ) + { + # Keep it simple for now. let's customize later + my @vals = ( + $hmmstart, $hmmstop, + $qalistart, $qalistop, + $score, $ceval, + '', '', + '' + ); + my $info = $hit_list[ $hitinfo{$name} ]; + if ( !defined $info ) { + $self->warn( + "Incomplete sequence information; can't find $name, hitinfo says $hitinfo{$name}\n" + ); + next; + } + $domaincounter{$name}++; + my $hsp_key + = $name . "_" . $domaincounter{$name}; + push @hsp_list, [ $name, @vals ]; + $hspinfo{$hsp_key} = $#hsp_list; + } + else { + print "missed this line: $_\n"; + } + } + } + elsif ( $_ =~ m/Alignments for each domain/ ) { + my $domain_count = 0; + + #line counter + my $count = 0; + + # There's an optional block, so we sometimes need to + # count to 3, and sometimes to 4. + my $max_count = 3; + my $lastdomain; + my $hsp; + my ( $hline, $midline, $qline ); + + while ( defined( $_ = $self->_readline ) ) { + if ( $_ =~ m/^\>\>/ + || $_ =~ m/Internal pipeline statistics/ ) + { + $self->_pushback($_); + last; + } + elsif ($hitinfo{$name} eq "below_inclusion" + || $_ =~ m/^$/ ) + { + next; + } + elsif ( $_ =~ /\s\s\=\=\sdomain\s(\d+)\s+/ ) { + my $domainnum = $1; + $count = 0; + my $key = $name . "_" . $domainnum; + $hsp = $hsp_list[ $hspinfo{$key} ]; + $hline = $$hsp[-3]; + $midline = $$hsp[-2]; + $qline = $$hsp[-1]; + $lastdomain = $name; + } + + # model data track, some reports don't have + elsif ( $_ =~ m/\s+\S+\sCS$/ ) { + my $modeltrack = $_; + $max_count++; + $count++; + next; + } + elsif ( $count == $max_count - 3 ) { + + # hit sequence + my @data = split( " ", $_ ); + my $seq = $data[-2]; + $hline .= $seq; + $count++; + next; + } + elsif ( $count == $max_count - 2 ) { + + # conservation track + # storage isn't quite right - need to remove + # leading/lagging whitespace while preserving + # gap data (latter isn't done, former is) + $_ =~ s/^\s+//; + $_ =~ s/\s+$//; + $midline .= $_; + $count++; + next; + } + elsif ( $count == $max_count - 1 ) { + + # query track + my @data = split( " ", $_ ); + my $seq = $data[-2]; + $qline .= $seq; + $count++; + next; + } + elsif ( $count == $max_count ) { + + #pval track + my $pvals = $_; + $count = 0; + $max_count = 3; + $$hsp[-3] = $hline; + $$hsp[-2] = $midline; + $$hsp[-1] = $qline; + next; + } + else { + print "missed $_\n"; + } + } + } + } + } + + # Annotation for each hit, nhmmer + # This code is currently incomplete, the alignment strings + # are not being captured + elsif ( $_ =~ m/Annotation for each hit\s+\(and alignments\)/ ) { + @hsp_list = (); + my $name; + + while ( defined( $_ = $self->_readline ) ) { + if ( $_ =~ m/Internal pipeline statistics/ + || m/\[No targets detected/ ) + { + $self->_pushback($_); + last; + } + if ( $_ =~ m/^>>\s(.*?)\s+/ ) { + $name = $1; + + while ( defined( $_ = $self->_readline ) ) { + + if ( $_ =~ m/Internal pipeline statistics/ + || $_ =~ m/^>>/ ) + { + $self->_pushback($_); + last; + } + elsif ( + $_ =~ /^\s+#\s+score/ + || $_ =~ /^\s+------\s+/ + || $_ =~ /^>>/ + || $_ =~ /^$/ + || $_ =~ /^\s+Alignment:/ + || $_ =~ /^\s+score:/ + || $_ =~ /^\s+score\s+bias/ + || $_ =~ /^\s+\S+\s+\d+\s+([\s+gatc-]+)/ # Alignment, line 1 + || $_ =~ /^\s{20,}([\s+gatc-]+)/ # Alignment, line 2 + || $_ =~ /^\s+$name\s+\d+\s+([\s+GATC-]+)/ # Alignment, line 3 + || $_ =~ /^\s+[\d.\*]+/ # Alignment, line 4 + ) + { + next; + } + elsif ( + /^\s+!\s+(\S+)\s+ + (\S+)\s+(\S+)\s+ + (\d+)\s+(\d+)\s+[.\[\]]*\s+ + (\d+)\s+(\d+)\s+[.\[\]]*\s+ + (\d+)\s+(\d+)\s+[.\[\]]*\s+ + (\d+)\s+(\S+).*$/ox + ) + { + my ($score, $bias, $eval, + $hmmstart, $hmmstop, $hitstart, + $hitstop, $envstart, $envstop, + $length, $acc + ) + = ( + $1, $2, $3, $4, $5, $6, + $7, $8, $9, $10, $11 + ); + + my @vals = ( + $hitstart, $hitstop, + $hmmstart, $hmmstop, + $score, $eval, + '', '', + '' + ); + my $info = $hit_list[ $hitinfo{$name} ]; + if ( !defined $info ) { + $self->warn( + "Incomplete information: can't find HSP $name in list of hits\n" + ); + next; + } + $domaincounter{$name}++; + my $hsp_key = $name . "_" . $domaincounter{$name}; + push @hsp_list, [ $name, @vals ]; + $hspinfo{$hsp_key} = $#hsp_list; + } + else { + print "Missed this line: $_\n"; + } + } + } + } + } + + elsif ( m/Internal pipeline statistics/ || m!^//! ) { + + # If within hit, hsp close; + if ( $self->within_element('hit') ) { + if ( $self->within_element('hsp') ) { + $self->end_element( { 'Name' => 'Hsp' } ); + } + $self->end_element( { 'Name' => 'Hit' } ); + } + + # Grab summary statistics of run + while ( defined( $_ = $self->_readline ) ) { + last if ( $_ =~ m/^\/\/$/ ); + } + + # Do a lot of processing of hits and hsps here + while ( my $hit = shift @hit_list ) { + my $hit_name = shift @$hit; + my $hit_desc = shift @$hit; + my $hit_signif = shift @$hit; + my $hit_score = shift @$hit; + my $num_domains = $domaincounter{$hit_name} || 0; + + $self->start_element( { 'Name' => 'Hit' } ); + $self->element( + { 'Name' => 'Hit_id', + 'Data' => $hit_name + } + ); + $self->element( + { 'Name' => 'Hit_desc', + 'Data' => $hit_desc + } + ); + $self->element( + { 'Name' => 'Hit_signif', + 'Data' => $hit_signif + } + ); + $self->element( + { 'Name' => 'Hit_score', + 'Data' => $hit_score + } + ); + + for my $i ( 1 .. $num_domains ) { + my $key = $hit_name . "_" . $i; + my $hsp = $hsp_list[ $hspinfo{$key} ]; + if ( defined $hsp ) { + my $hsp_name = shift @$hsp; + $self->start_element( { 'Name' => 'Hsp' } ); + $self->element( + { 'Name' => 'Hsp_identity', + 'Data' => 0 + } + ); + $self->element( + { 'Name' => 'Hsp_positive', + 'Data' => 0 + } + ); + $self->element( + { 'Name' => 'Hsp_hit-from', + 'Data' => shift @$hsp + } + ); + $self->element( + { 'Name' => 'Hsp_hit-to', + 'Data' => shift @$hsp + } + ); + $self->element( + { 'Name' => 'Hsp_query-from', + 'Data' => shift @$hsp + } + ); + $self->element( + { 'Name' => 'Hsp_query-to', + 'Data' => shift @$hsp + } + ); + $self->element( + { 'Name' => 'Hsp_score', + 'Data' => shift @$hsp + } + ); + $self->element( + { 'Name' => 'Hsp_evalue', + 'Data' => shift @$hsp + } + ); + $self->element( + { 'Name' => 'Hsp_hseq', + 'Data' => shift @$hsp + } + ); + $self->element( + { 'Name' => 'Hsp_midline', + 'Data' => shift @$hsp + } + ); + $self->element( + { 'Name' => 'Hsp_qseq', + 'Data' => shift @$hsp + } + ); + $self->end_element( { 'Name' => 'Hsp' } ); + } + } + $self->end_element( { 'Name' => 'Hit' } ); + } + @hit_list = (); + %hitinfo = (); + last; + } + } + else { + print "Missed line: $_\n"; + $self->debug($_); + } + $last = $_; + } + $self->end_element( { 'Name' => 'HMMER_Output' } ); + my $result = $self->end_document(); + return $result; +} + +=head2 start_element + + Title : start_element + Usage : $eventgenerator->start_element + Function: Handles a start event + Returns : none + Args : hashref with at least 2 keys 'Data' and 'Name' + +=cut + +sub start_element { + + my ( $self, $data ) = @_; + + # we currently don't care about attributes + my $nm = $data->{'Name'}; + my $type = $MODEMAP{$nm}; + if ($type) { + if ( $self->_eventHandler->will_handle($type) ) { + my $func = sprintf( "start_%s", lc $type ); + $self->_eventHandler->$func( $data->{'Attributes'} ); + } + unshift @{ $self->{'_elements'} }, $type; + } + if ( defined $type + && $type eq 'result' ) + { + $self->{'_values'} = {}; + $self->{'_result'} = undef; + } +} + +=head2 end_element + + Title : end_element + Usage : $eventgeneartor->end_element + Function: Handles and end element event + Returns : none + Args : hashref with at least 2 keys 'Data' and 'Name' + +=cut + +sub end_element { + + my ( $self, $data ) = @_; + my $nm = $data->{'Name'}; + my $type = $MODEMAP{$nm}; + my $rc; + + if ( $nm eq 'HMMER_program' ) { + if ( $self->{'_last_data'} =~ /(N?HMM\S+)/i ) { + $self->{'_reporttype'} = uc $1; + } + } + + # Hsp are sort of weird, in that they end when another + # object begins so have to detect this in end_element for now + if ( $nm eq 'Hsp' ) { + foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) { + my $data = $self->{'_last_hspdata'}->{$_}; + if ( $data && $_ eq 'Hsp_hseq' ) { + + # replace hmm '.' gap symbol by '-' + $data =~ s/\./-/g; + } + $self->element( + { 'Name' => $_, + 'Data' => $data + } + ); + } + $self->{'_last_hspdata'} = {}; + } + if ($type) { + if ( $self->_eventHandler->will_handle($type) ) { + my $func = sprintf( "end_%s", lc $type ); + $rc = $self->_eventHandler->$func( $self->{'_reporttype'}, + $self->{'_values'} ); + } + my $lastelem = shift @{ $self->{'_elements'} }; + } + elsif ( $MAPPING{$nm} ) { + if ( ref( $MAPPING{$nm} ) =~ /hash/i ) { + my $key = ( keys %{ $MAPPING{$nm} } )[0]; + $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } + = $self->{'_last_data'}; + } + else { + $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'}; + + # print "lastdata is " . $self->{'_last_data'} . "\n"; + } + } + else { + $self->debug("unknown nm $nm, ignoring\n"); + } + $self->{'_last_data'} = ''; # remove read data if we are at + # end of an element + $self->{'_result'} = $rc if ( defined $type && $type eq 'result' ); + return $rc; +} + +=head2 element + + Title : element + Usage : $eventhandler->element({'Name' => $name, 'Data' => $str}); + Function: Convienence method that calls start_element, characters, end_element + Returns : none + Args : Hash ref with the keys 'Name' and 'Data' + +=cut + +sub element { + my ( $self, $data ) = @_; + $self->start_element($data); + $self->characters($data); + $self->end_element($data); +} + +=head2 characters + + Title : characters + Usage : $eventgenerator->characters($str) + Function: Send a character events + Returns : none + Args : string + +=cut + +sub characters { + my ( $self, $data ) = @_; + + if ( $self->in_element('hsp') + && $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o + && defined $data->{'Data'} ) + { + $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'}; + } + return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o ); + + $self->{'_last_data'} = $data->{'Data'}; +} + +=head2 within_element + + Title : within_element + Usage : if( $eventgenerator->within_element( $element ) ) {} + Function: Test if we are within a particular element + This is different than 'in' because within can be tested for + a whole block + Returns : boolean + Args : string element name + +=cut + +sub within_element { + my ( $self, $name ) = @_; + return 0 + if ( !defined $name + || !defined $self->{'_elements'} + || scalar @{ $self->{'_elements'} } == 0 ); + foreach ( @{ $self->{'_elements'} } ) { + return 1 if ( $_ eq $name ); + } + return 0; +} + +=head2 in_element + + Title : in_element + Usage : if( $eventgenerator->in_element( $element ) ) {} + Function: Test if we are in a particular element + This is different than 'within' because 'in' only + tests its immediate parent + Returns : boolean + Args : string element name + +=cut + +sub in_element { + my ( $self, $name ) = @_; + return 0 if !defined $self->{'_elements'}->[0]; + return ( $self->{'_elements'}->[0] eq $name ); +} + +=head2 start_document + + Title : start_document + Usage : $eventgenerator->start_document + Function: Handle a start document event + Returns : none + Args : none + +=cut + +sub start_document { + my ($self) = @_; + $self->{'_lasttype'} = ''; + $self->{'_values'} = {}; + $self->{'_result'} = undef; + $self->{'_elements'} = []; +} + +=head2 end_document + + Title : end_document + Usage : $eventgenerator->end_document + Function: Handles an end document event + Returns : Bio::Search::Result::ResultI object + Args : none + +=cut + +sub end_document { + my ($self) = @_; + return $self->{'_result'}; +} + +=head2 result_count + + Title : result_count + Usage : my $count = $searchio->result_count + Function: Returns the number of results processed + Returns : interger + Args : none + +=cut + +sub result_count { + my $self = shift; + return $self->{'_result_count'}; +} + +1; diff --git a/Bio/SeqFeature/Similarity.pm b/Bio/SeqFeature/Similarity.pm index 47d3488c4..edf2f654d 100644 --- a/Bio/SeqFeature/Similarity.pm +++ b/Bio/SeqFeature/Similarity.pm @@ -188,12 +188,12 @@ sub seqlength { =cut sub seqdesc { - my ($self, $value) = @_; + my ( $self, $value ) = @_; - if( defined $value ) { - my $v = Bio::Annotation::SimpleValue->new(); - $v->value($value); - $self->annotation->add_Annotation('description',$v); + if ( defined $value ) { + my $v = Bio::Annotation::SimpleValue->new(); + $v->value($value); + $self->annotation->add_Annotation( 'description', $v ); } my ($v) = $self->annotation()->get_Annotations('description'); return defined $v ? $v->value : undef; diff --git a/t/SearchIO/hmmer.t b/t/SearchIO/hmmer.t dissimilarity index 94% index 885cc40cb..31a2f8a00 100644 --- a/t/SearchIO/hmmer.t +++ b/t/SearchIO/hmmer.t @@ -1,439 +1,712 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id: SearchIO_hmmer.t 14989 2008-11-11 19:52:02Z cjfields $ - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 302); - - use_ok('Bio::SearchIO'); -} - -my $searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmpfam.out')); -my $result; - -while( $result = $searchio->next_result ) { - is(ref($result),'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - is($result->algorithm, 'HMMPFAM', 'Check algorithm'); - is($result->algorithm_version, '2.1.1', 'Check algorithm version'); - is($result->hmm_name, 'pfam', 'Check hmm_name'); - is($result->sequence_file, '/home/birney/src/wise2/example/road.pep', 'Check sequence_file'); - is($result->query_name, 'roa1_drome', 'Check query_name'); - is($result->query_description, '', 'Check query_description'); - is($result->num_hits(), 2, 'Check num_hits'); - my ($hsp,$hit); - if( $hit = $result->next_model ) { - is($hit->name, 'SEED', 'Check hit name'); - is($hit->raw_score, '146.1', 'Check hit raw_score'); - float_is($hit->significance, 6.3e-40, 'Check hit significance'); - is(ref($hit), 'Bio::Search::Hit::HMMERHit', 'Check for the correct hit reference type'); - is($hit->num_hsps, 1, 'Check num_hsps'); - - if( defined( $hsp = $hit->next_domain ) ) { - is($hsp->hit->start, 1, 'Check for hit hmmfrom value'); - is($hsp->hit->end, 77, 'Check for hit hmm to value'); - is($hsp->query->start, 33, 'Check for query alifrom value'); - is($hsp->query->end, 103, 'Check for query ali to value'); - is($hsp->score, 71.2, 'Check for hsp score'); - float_is($hsp->evalue, 2.2e-17, 'Check for hsp c-Evalue'); - is($hsp->query_string, 'LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP', - 'Check for query string'); - is($hsp->gaps('query'), 7, 'Check for number of gaps in query'); - is($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG-kelggrklrv', - 'Check for hit string'); - is($hsp->homology_string, 'lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf+F+++ ++ + A + +++++gr+++ ', - 'Check for homology string'); - is( length($hsp->homology_string), length($hsp->hit_string), - 'Check if homology string and hit string have an equal lenght'); - is( length($hsp->query_string), length($hsp->homology_string), - 'Check if query string and homology string have an equal lenght'); - } - } - if( defined ($hit = $result->next_model) ) { - if( defined($hsp = $hit->next_domain) ) { - is($hsp->hit->start, 1, 'Check for hit hmmfrom value'); - is($hsp->hit->end, 77, 'Check for hit hmm to value'); - is($hsp->query->start, 124, 'Check for query alifrom value'); - is($hsp->query->end, 194, 'Check for query ali to value'); - is($hsp->score, 75.5, 'Check for hsp score'); - float_is($hsp->evalue, 1.1e-18, 'Check for hsp c-Evalue'); - is($hsp->query_string, 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV', - 'Check for query string'); - is($hsp->gaps('query'), 6, 'Check for number of gaps in query'); - is($hsp->hit_string, 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv', - 'Check for hit string'); - is($hsp->homology_string, 'lfVg L d +e+ ++d+F++fG iv+i+iv+D ketgk +GfaFVeF++++ ++k + ++l+g+ + v', - 'Check for homology string'); - is( length($hsp->homology_string), length($hsp->hit_string), - 'Check if homology string and hit string have an equal lenght'); - is( length($hsp->query_string), length($hsp->homology_string), - 'Check if query string and homology string have an equal lenght'); - } - last; - } -} -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmsearch.out')); -while( $result = $searchio->next_result ) { - is(ref($result),'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - is($result->algorithm, 'HMMSEARCH', 'Check algorithm'); - is($result->algorithm_version, '2.0', 'Check algorithm version'); - is($result->hmm_name, 'HMM [SEED]', 'Check hmm_name'); - is($result->sequence_file, 'HMM.dbtemp.29591', 'Check sequence_file'); - is($result->database_name, 'HMM.dbtemp.29591', 'Check database_name'); - is($result->query_name, 'SEED', 'Check query_name'); - is($result->query_description, '', 'Check query_description'); - is($result->num_hits(), 1215, 'Check num_hits'); - my $hit = $result->next_model; - is($hit->name, 'Q91581', 'Check hit name'); - is($hit->description,'Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN', 'Check for hit description'); - float_is($hit->significance, 2e-31, 'Check hit significance'); - is($hit->raw_score, 119.7, 'Check hit raw_score'); - my $hsp = $hit->next_domain; - is($hsp->score,119.7, 'Check for hsp score'); - float_is($hsp->evalue, 2e-31, 'Check for hsp c-Evalue'); - is($hsp->query->start, 18, 'Check for query alifrom value'); - is($hsp->query->end, 89, 'Check for query ali to value'); - is($hsp->hit->start, 1, 'Check for hit hmmfrom value'); - is($hsp->hit->end, 77, 'Check for hit hmm to value'); - is($hsp->query->seq_id(), 'SEED', 'Check for query seq_id'); - is($hsp->hit->seq_id(), 'Q91581', 'Check for hit seq_id'); -} - -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('L77119.hmmer')); - -while( $result = $searchio->next_result ) { - is(ref($result),'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - is($result->algorithm, 'HMMPFAM', 'Check algorithm'); - is($result->algorithm_version, '2.2g', 'Check algorithm version'); - is($result->hmm_name, 'Pfam', 'Check hmm_name'); - is($result->sequence_file, 'L77119.faa', 'Check sequence_file'); - is($result->query_name, 'gi|1522636|gb|AAC37060.1|', 'Check query_name'); - is($result->query_description, 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]', - 'Check query_description'); - is($result->num_hits(), 1, 'Check num_hits'); - my $hit = $result->next_hit; - is($hit->name, 'Methylase_M', 'Check hit name'); - is($hit->description,'Type I restriction modification system, M', 'Check for hit description'); - float_is($hit->significance, 0.0022, 'Check hit significance'); - is($hit->raw_score, -105.2, 'Check hit raw_score'); - my $hsp = $hit->next_hsp; - is($hsp->score,-105.2, 'Check for hsp score'); - float_is($hsp->evalue, 0.0022, 'Check for hsp evalue'); - is($hsp->query->start, 280, 'Check for query alifrom value'); - is($hsp->query->end, 481, 'Check for query ali to value'); - is($hsp->hit->start, 1, 'Check for hit hmmfrom value'); - is($hsp->hit->end, 279, 'Check for hit hmm to value'); - is($hsp->query->seq_id(), 'gi|1522636|gb|AAC37060.1|', 'Check for query seq_id'); - is($hsp->hit->seq_id(), 'Methylase_M', 'Check for hit seq_id'); - is($hsp->hit_string, 'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG-dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn', - 'Check for hiy string'); - is($hsp->query_string, 'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST', - 'Check for query string'); - is($hsp->homology_string, ' ++EL+++ av+ R L+F K++ dk +i+ p + + +++y ++ ++ ++y ++ + lF++++ e ++ ++++ + + ++ + + Glf ++++ ++ +s+ +ne ++e+i+ +++ +++ G++ +el D++G +YE L+ Ae K+ G +YTP e++ ia+ + i+ ++ +++ ++ k+n+i + s+', - 'Check for homology string'); - is(join(' ', $hsp->seq_inds('query', 'nomatch',1)), '280 288 289 293-295 300 304 311 313-315 317 324-326 332 335 337 344-346 348 355 358-361 364-366 372 379 383-385 389 396 400 404-408 412 416 417 422 426 429-431 434-436 439 441 446 450 451 455 459 460 463 464 468 471 472 478', - 'Check for nomatch indices in query'); - is(join(' ', $hsp->seq_inds('hit', 'nomatch',1)), '1 9 10 14-16 18-31 35 39 42-47 51-59 61 63-65 67 72-74 77-79 82 86 89-94 96 103-105 107 110 111 116 118 120-123 126-131 133 135-141 145 150 151 154 158-160 164 171 175 179-183 187 191-193 198 202 205-207 210-212 215 217 222 226 227 231 233 236 237 240-257 261 264-267 273 275-278', - 'Check for nomatch indices in hit'); - is(join(' ', $hsp->seq_inds('query', 'gap',1)), '296 306 309 321 328 334 335 350 356 366-368 376 417 456 463 470 479', - 'Check for gap indices in query'); - is(join(' ', $hsp->seq_inds('hit', 'gap',1)), '', 'Check for gap indices in hit'); -} - - -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('cysprot1b.hmmsearch')); - - -while( $result = $searchio->next_result ) { - is(ref($result),'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - is($result->algorithm, 'HMMSEARCH', 'Check algorithm'); - is($result->algorithm_version, '2.2g', 'Check algorithm version'); - is($result->hmm_name, 'Peptidase_C1.hmm [Peptidase_C1]', 'Check hmm_name'); - is($result->database_name, 'cysprot1b.fa', 'Check database_name'); - is($result->sequence_file, 'cysprot1b.fa', 'Check sequence_file'); - is($result->query_name, 'Peptidase_C1', 'Check query_name'); - is($result->query_accession, 'PF00112', 'Check query_accession'); - is($result->query_description, 'Papain family cysteine protease', 'Check query_description'); - is($result->num_hits(), 4, 'Check num_hits'); - my $hit = $result->next_hit; - is($hit->name, 'CATL_RAT', 'Check hit name'); - is($hit->description,'', 'Check for hit description'); - float_is($hit->significance, 2e-135, 'Check hit significance'); - is($hit->raw_score, 449.4, 'Check hit raw_score'); - my $hsp = $hit->next_hsp; - is($hsp->score,449.4, 'Check for hsp score'); - float_is($hsp->evalue, 2e-135, 'Check for hsp evalue'); - is($hsp->query->start, 1, 'Check for query alifrom value'); - is($hsp->query->end, 337, 'Check for query ali to value'); - is($hsp->hit->start, 114, 'Check for hit hmmfrom value'); - is($hsp->hit->end, 332, 'Check for hit hmm to value'); - is($hsp->query->seq_id(), 'Peptidase_C1', 'Check for query seq_id'); - is($hsp->hit->seq_id(), 'CATL_RAT', 'Check for hit seq_id'); - is($hsp->hit_string, 'IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT------GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE-----NGGLDSEESY-----PYE----AKD-------------------GSCKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAMDASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGYGYEG-T------------------------------------DSNKDKYWLVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI', - 'Check for hiy string'); - is($hsp->homology_string, '+P+++DWRe kg VtpVK+QG qCGSCWAFSa g lEg+ ++kt gkl+sLSEQ+LvDC++ d gn+ GCnG Glmd Af+Yik+ NgGl++E++Y PY+ +kd g+Cky+ + ++ a+++g++d+p++ E+al+ka+a++GP+sVa+das+ s q+Y+sG +Y+++ C+++ +LdH+Vl+VGYG e+ ++++ +YW+VKNSWG++WG++GY++ia+++n n+CG+a+ asypi', - 'Check for homology string'); - is($hsp->query_string, 'lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgtkawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikkeqIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgtCkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVaidasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGYGteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYWIVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi', - 'Check for query string'); - $hit = $result->next_hit; - is($hit->name, 'CATL_HUMAN', 'Check hit name'); - is($hit->description,'', 'Check for hit description'); - float_is($hit->significance, 6.1e-134, 'Check hit significance'); - is($hit->raw_score, 444.5, 'Check hit raw_score'); -} - -# test for bug 2632 - CS lines should get ignored without breaking the parser -$searchio = Bio::SearchIO->new(-format => 'hmmer', -file => test_input_file('hmmpfam_cs.out')); -$result = $searchio->next_result; -my $hit = $result->next_hit; -my $hsp = $hit->next_hsp; -is($hsp->seq_str, 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES', - 'Check for hsp seq_str'); - - -# Tests for hmmer3 output here -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmscan.out'), - -verbose => 1); -is(ref($searchio), 'Bio::SearchIO::hmmer3', 'Check if correct searchio object is returned'); -while( $result = $searchio->next_result ) { - is(ref($result),'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - is($result->algorithm, 'HMMSCAN', 'Check algorithm'); - is($result->algorithm_version, '3.0', 'Check algorithm version'); - is($result->hmm_name, '/data/biodata/HMMerDB/Pfam.hmm', 'Check hmm_name'); - is($result->sequence_file, 'BA000019.orf1.fasta', 'Check sequence_file'); - is($result->query_name, 'BA000019.orf1', 'Check query_name'); - is($result->query_length, '198', 'Check query_length'); - is($result->query_description, '', 'Check query_description'); - is($result->num_hits(), 1, 'Check num_hits'); - my ($hsp,$hit); - if( $hit = $result->next_model ) { - is(ref($hit), 'Bio::Search::Hit::HMMERHit', 'Check for the correct hit reference type'); - is($hit->name, 'Peripla_BP_2', 'Check hit name'); - is($hit->description, 'Periplasmic binding protein', 'Check for hit description'); - is($hit->raw_score, '105.2', 'Check hit raw_score'); - float_is($hit->significance, 6e-30, 'Check hit significance'); - is($hit->num_hsps, 1, 'Check num_hsps'); - - if( defined( $hsp = $hit->next_domain ) ) { - is(ref($hsp), 'Bio::Search::HSP::HMMERHSP', 'Check for correct hsp reference type'); - is($hsp->hit->start, 59, 'Check for hit hmmfrom value'); - is($hsp->hit->end, 236, 'Check for hit hmm to value'); - is($hsp->query->start, 2, 'Check for query alifrom value'); - is($hsp->query->end, 173, 'Check for query ali to value'); - is($hsp->score, '105.0', 'Check for hsp score'); - float_is($hsp->evalue, 1.5e-33, 'Check for hsp c-Evalue'); - is($hsp->query_string, 'LKPDLIIGREYQ---KNIYNQLSNFAPTVLVDWGSF-TSFQDNFRYIAQVLNEEEQGKLVLQQYQKRIRDLQDRMGERlQKIEVSVIGFSGQSIKSLNR-DAVFNQVLDDAGIKRIsIQKNQQERYLEISIENLNKYDADVLFVINE---SKEQLYPDLKNPLWHHLRAVKKQQVYVVNQ', 'Check for query string'); - is($hsp->hit_string, 'lkPDlvivsafgalvseieellelgipvvavessstaeslleqirllgellgeedeaeelvaelesridavkaridsl-kpktvlvfgyadegikvvfgsgswvgdlldaaggeni-iaeakgseseeisaEqilaadpdviivsgrgedtktgveelkenplwaelpAvkngrvyllds', 'Check for hit string'); - is($hsp->homology_string, 'lkPDl+i+ +++ ++i+++l++ +p+v v+ s+ s+++ +r ++++l+ee++++ + +++++ri+++++r + ++ +v+v+g+++ +ik+++ + ++++ld+ag++ i i++++++ + eis+E+++++d+dv++v k+ + ++nplw +l+Avk+++vy++++', 'Check for homology string'); - } - } -} - -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmsearch3.out'), - -verbose => 1); -while( $result = $searchio->next_result ) { - is(ref($result),'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - is($result->algorithm, 'HMMSEARCH', 'Check algorithm'); - is($result->algorithm_version, '3.0', 'Check algorithm version'); - is($result->hmm_name, 'Kv9.hmm', 'Check hmm_name'); - is($result->sequence_file, '/home/pboutet/Desktop/databases/nr_May26', 'Check sequence_file'); - is($result->query_name, 'Kv9', 'Check query_name'); - is($result->query_length, '481', 'Check query_length'); - is($result->query_description, '', 'Check query_description'); - is($result->num_hits(), 2, 'Check num_hits'); - while( my $hit = $result->next_model ) { - } -} - -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmscan_multi_domain.out'), - -verbose => 1); - -my @multi_hits = ( - ['PPC', 'Bacterial pre-peptidase C-terminal domain', '111.0', 3.1e-32, 6, - [[4, 59, 117, 183, 0.5, 0.16], [12, 58, 347, 388, -0.6, 0.36], - [1, 69, 470, 549, 71.3, 1.3e-23], [15, 25, 582, 603, -3.2, 2], - [13, 36, 987, 1019, -1.1, 0.5], [1, 69, 1087, 1168, 54.4, 2.4e-18]]], - ['HemolysinCabind', 'Hemolysin-type calcium-binding repeat (2 cop', '47.9', 4.7e-13, 3, - [[2, 13, 1214, 1225, 5.9, 0.0026], [1, 18, 1231, 1248, 10.8, 6.8e-5], - [4, 18, 1243, 1257, 11.4, 4.3e-05]]] - ); - -while( $result = $searchio->next_result ) { - is(ref($result),'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - is($result->algorithm, 'HMMSCAN', 'Check algorithm'); - is($result->algorithm_version, '3.0', 'Check algorithm version'); - is($result->hmm_name, '/data/biodata/HMMerDB/Pfam-A.hmm', 'Check hmm_name'); - is($result->sequence_file, 'BA000019.orf37.fasta', 'Check sequence_file'); - is($result->query_name, 'BA000019.orf37', 'Check query_name'); - is($result->query_length, '1418', 'Check query_length'); - is($result->query_description, '', 'Check query_description'); - is($result->num_hits(), 2, 'Check num_hits'); - my ($hsp,$hit); - while( $hit = $result->next_model ) { - my @expected = @{shift @multi_hits}; - is(ref($hit), 'Bio::Search::Hit::HMMERHit', 'Check for the correct hit reference type'); - is($hit->name, shift @expected, 'Check hit name'); - is($hit->description, shift @expected, 'Check for hit description'); - is($hit->raw_score, shift @expected, 'Check hit raw_score'); - float_is($hit->significance, shift @expected, 'Check hit significance'); - is($hit->num_hsps, shift @expected, 'Check num_hsps'); - my @hsp_list = @{shift @expected}; - while( defined( $hsp = $hit->next_domain ) ) { - my @hsp_exp = @{shift @hsp_list}; - is(ref($hsp), 'Bio::Search::HSP::HMMERHSP', 'Check for correct hsp reference type'); - is($hsp->hit->start, shift @hsp_exp, 'Check for hit envfrom value'); - is($hsp->hit->end, shift @hsp_exp, 'Check for hit env to value'); - is($hsp->query->start, shift @hsp_exp, 'Check for query hmmfrom value'); - is($hsp->query->end, shift @hsp_exp, 'Check for query hmm to value'); - is($hsp->score, shift @hsp_exp, 'Check for hsp score'); - float_is($hsp->evalue, shift @hsp_exp, 'Check for hsp c-Evalue'); - } - } -} - -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmscan_sec_struct.out'), - -verbose => 1); - -@multi_hits = ( - ['HTH_AraC', 'Bacterial regulatory helix-turn-helix proteins, Ara', '41.3', 6.7e-11, 2, - [['siadiAeevgfSpsyfsrlFkkytGvt', 'SLMELSRQVGLNDCTLKRGFRLVFDTT'], - ['nwsiadiAeevgf-SpsyfsrlFkkytGvtPsqyr', 'EINISQAARRVGFsSRSYFATAFRKKFGINPKEFL']]], - ['PKSI-KS_m3', '', '38.2', 3.8e-12, 2, - [['GPSvtVDTACSSSLvA', 'GPSVTVDTLCSSSLVA'], - ['GPSvtVDTACSSSLv', 'GPNLVIDSACSSALV']]], - ['DUF746', 'Domain of Unknown Function (DUF746)', '13.9', 0.023, 2, - [['rllIrlLsqplslaeaadqlgtdegiiak', 'EILIRNLENPPSLMELSRQVGLNDCTLKR'], - ['plslaeaadqlgtdeg', 'EINISQAARRVGFSSR']]] - ); - -while( $result = $searchio->next_result ) { - is(ref($result),'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - is($result->algorithm, 'HMMSCAN', 'Check algorithm'); - is($result->algorithm_version, '3.0', 'Check algorithm version'); - is($result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name'); - is($result->sequence_file, 'BA000019.orf8.fasta', 'Check sequence_file'); - is($result->query_name, 'BA000019.orf8', 'Check query_name'); - is($result->query_length, '348', 'Check query_length'); - is($result->query_description, '', 'Check query_description'); - is($result->num_hits(), 3, 'Check num_hits'); - my ($hsp,$hit); - while( $hit = $result->next_model ) { - my @expected = @{shift @multi_hits}; - is(ref($hit), 'Bio::Search::Hit::HMMERHit', 'Check for the correct hit reference type'); - is($hit->name, shift @expected, 'Check hit name'); - is($hit->description, shift @expected, 'Check for hit description'); - is($hit->raw_score, shift @expected, 'Check hit raw_score'); - float_is($hit->significance, shift @expected, 'Check hit significance'); - is($hit->num_hsps, shift @expected, 'Check num_hsps'); - my @hsp_list = @{shift @expected}; - while( defined( $hsp = $hit->next_domain ) ) { - my @hsp_exp = @{shift @hsp_list}; - is(ref($hsp), 'Bio::Search::HSP::HMMERHSP', 'Check for correct hsp reference type'); - is($hsp->hit_string, shift @hsp_exp, 'Check hit sequence'); - is($hsp->query_string, shift @hsp_exp, 'Check query sequence'); - } - } -} - -# Make sure that you can also directly call the hmmer2 and hmmer3 subclasses -$searchio = Bio::SearchIO->new(-format => 'hmmer2', - -file => test_input_file('hmmpfam.out')); -is(ref($searchio), 'Bio::SearchIO::hmmer2', 'Check if loading hmmpfam output via the hmm2 parser directly works'); -is(ref($searchio->next_result), 'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - -$searchio = Bio::SearchIO->new(-format => 'hmmer2', - -file => test_input_file('hmmsearch.out')); -is(ref($searchio), 'Bio::SearchIO::hmmer2', 'Check if loading hmmsearch2 output via the hmm2 parser directly works'); -is(ref($searchio->next_result), 'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - -$searchio = Bio::SearchIO->new(-format => 'hmmer3', - -file => test_input_file('hmmscan.out')); -is(ref($searchio), 'Bio::SearchIO::hmmer3', 'Check if loading hmmscan output via the hmm3 parser directly works'); -is(ref($searchio->next_result), 'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmsearch3.out')); -is(ref($searchio), 'Bio::SearchIO::hmmer3', 'Check if loading hmmsearch3 output via the hmm3 parser directly works'); -is(ref($searchio->next_result), 'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - -# Make sure that you can also specify the -version parameter directly -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmpfam.out'), - -version => 2); -is(ref($searchio), 'Bio::SearchIO::hmmer2', 'Check if selecting the correct hmmpfam parser using -version works'); -is(ref($searchio->next_result), 'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmsearch.out'), - -version => 2); -is(ref($searchio), 'Bio::SearchIO::hmmer2', 'Check if selecting the correct hmmsearch2 parser using -version works'); -is(ref($searchio->next_result), 'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - -$searchio = Bio::SearchIO->new(-format => 'hmmer3', - -file => test_input_file('hmmscan.out'), - -version => 3); -is(ref($searchio), 'Bio::SearchIO::hmmer3', 'Check if selecting the correct hmmscan parser using -version works'); -is(ref($searchio->next_result), 'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmsearch3.out'), - -version => 3); -is(ref($searchio), 'Bio::SearchIO::hmmer3', 'Check if selecting the correct hmmsearch3 parser using -version works'); -is(ref($searchio->next_result), 'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); - -my $pipestr = "cat " . test_input_file('hmmpfam.out') . " |"; -open(my $pipefh, $pipestr); - -$searchio = Bio::SearchIO->new(-format => 'hmmer', - -fh => $pipefh); -is(ref($searchio), 'Bio::SearchIO::hmmer2', 'Check if reading from a pipe works'); -$result = $searchio->next_result; -is(ref($result), 'Bio::Search::Result::HMMERResult', 'Check for the correct result reference type'); -is($result->num_hits(), 2, 'Check num_hits'); - -# bug 3376 -{ - my $in = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('pfamOutput-bug3376.out') - ); - my $result = $in->next_result; - my $hit = $result->next_hit; - my $hsp = $hit->next_hsp; - is($hsp->hit_string,"svfqqqqssksttgstvtAiAiAigYRYRYRAvtWnsGsLssGvnDnDnDqqsdgLYtiYYsvtvpssslpsqtviHHHaHkasstkiiikiePr","bug3376"); -} -# end bug 3376 - -# bug 3421 - making sure a full line of dashes in an HSP is parsed correctly -{ - my $in = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmpfam_HSPdashline.txt') - ); - my $result = $in->next_result; - my $hit = $result->next_hit; - my $hsp = $hit->next_hsp; - is($hsp->length,"561","bug3421 - Check if can correctly parse an HSP with line full of dashes"); -} -# end bug 3421 - -# bug 3302 -{ - my $in = Bio::SearchIO->new(-format => 'hmmer', - -file => test_input_file('hmmpfam_multiresult.out') - ); - my $result = $in->next_result; - $result = $in->next_result; - my $hit = $result->next_hit; - is($hit->name, "IS66_ORF3.uniq","bug3302 - Check if can parse multiresult hmmer"); -} -# end bug 3302 +# -*-Perl-*- Test Harness script for Bioperl + +use strict; + +BEGIN { + use lib '.'; + use Bio::Root::Test; + + test_begin( -tests => 313 ); + + use_ok('Bio::SearchIO'); +} + +my $searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmpfam.out') +); +my $result; + +while ( $result = $searchio->next_result ) { + is( ref($result), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' + ); + is( $result->algorithm, 'HMMPFAM', 'Check algorithm' ); + is( $result->algorithm_version, '2.1.1', 'Check algorithm version' ); + is( $result->hmm_name, 'pfam', 'Check hmm_name' ); + is( $result->sequence_file, + '/home/birney/src/wise2/example/road.pep', + 'Check sequence_file' + ); + is( $result->query_name, 'roa1_drome', 'Check query_name' ); + is( $result->query_description, '', 'Check query_description' ); + is( $result->num_hits(), 2, 'Check num_hits' ); + my ( $hsp, $hit ); + + if ( $hit = $result->next_model ) { + is( $hit->name, 'SEED', 'Check hit name' ); + is( $hit->raw_score, '146.1', 'Check hit raw_score' ); + float_is( $hit->significance, 6.3e-40, 'Check hit significance' ); + is( ref($hit), 'Bio::Search::Hit::HMMERHit', + 'Check for the correct hit reference type' ); + is( $hit->num_hsps, 1, 'Check num_hsps' ); + + if ( defined( $hsp = $hit->next_domain ) ) { + is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' ); + is( $hsp->hit->end, 77, 'Check for hit hmm to value' ); + is( $hsp->query->start, 33, 'Check for query alifrom value' ); + is( $hsp->query->end, 103, 'Check for query ali to value' ); + is( $hsp->score, 71.2, 'Check for hsp score' ); + float_is( $hsp->evalue, 2.2e-17, 'Check for hsp c-Evalue' ); + is( $hsp->query_string, + 'LFIGGLDYRTTDENLKAHFEKWGNIVDVVVMKD-----PRTKRSRGFGFITYSHSSMIDEAQK--SRpHKIDGRVVEP', + 'Check for query string' + ); + is( $hsp->gaps('query'), 7, 'Check for number of gaps in query' ); + is( $hsp->hit_string, + 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnG-kelggrklrv', + 'Check for hit string' + ); + is( $hsp->homology_string, + 'lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf+F+++ ++ + A + +++++gr+++ ', + 'Check for homology string' + ); + is( length( $hsp->homology_string ), + length( $hsp->hit_string ), + 'Check if homology string and hit string have an equal lenght' + ); + is( length( $hsp->query_string ), + length( $hsp->homology_string ), + 'Check if query string and homology string have an equal lenght' + ); + } + } + if ( defined( $hit = $result->next_model ) ) { + if ( defined( $hsp = $hit->next_domain ) ) { + is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' ); + is( $hsp->hit->end, 77, 'Check for hit hmm to value' ); + is( $hsp->query->start, 124, 'Check for query alifrom value' ); + is( $hsp->query->end, 194, 'Check for query ali to value' ); + is( $hsp->score, 75.5, 'Check for hsp score' ); + float_is( $hsp->evalue, 1.1e-18, 'Check for hsp c-Evalue' ); + is( $hsp->query_string, + 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV', + 'Check for query string' + ); + is( $hsp->gaps('query'), 6, 'Check for number of gaps in query' ); + is( $hsp->hit_string, + 'lfVgNLppdvteedLkdlFskfGpivsikivrDiiekpketgkskGfaFVeFeseedAekAlealnGkelggrklrv', + 'Check for hit string' + ); + is( $hsp->homology_string, + 'lfVg L d +e+ ++d+F++fG iv+i+iv+D ketgk +GfaFVeF++++ ++k + ++l+g+ + v', + 'Check for homology string' + ); + is( length( $hsp->homology_string ), + length( $hsp->hit_string ), + 'Check if homology string and hit string have an equal lenght' + ); + is( length( $hsp->query_string ), + length( $hsp->homology_string ), + 'Check if query string and homology string have an equal lenght' + ); + } + last; + } +} +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmsearch.out') +); +while ( $result = $searchio->next_result ) { + is( ref($result), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' + ); + is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' ); + is( $result->algorithm_version, '2.0', 'Check algorithm version' ); + is( $result->hmm_name, 'HMM [SEED]', 'Check hmm_name' ); + is( $result->sequence_file, 'HMM.dbtemp.29591', 'Check sequence_file' ); + is( $result->database_name, 'HMM.dbtemp.29591', 'Check database_name' ); + is( $result->query_name, 'SEED', 'Check query_name' ); + is( $result->query_description, '', 'Check query_description' ); + is( $result->num_hits(), 1215, 'Check num_hits' ); + my $hit = $result->next_model; + is( $hit->name, 'Q91581', 'Check hit name' ); + is( $hit->description, + 'Q91581 POLYADENYLATION FACTOR 64 KDA SUBUN', + 'Check for hit description' + ); + float_is( $hit->significance, 2e-31, 'Check hit significance' ); + is( $hit->raw_score, 119.7, 'Check hit raw_score' ); + my $hsp = $hit->next_domain; + is( $hsp->score, 119.7, 'Check for hsp score' ); + float_is( $hsp->evalue, 2e-31, 'Check for hsp c-Evalue' ); + is( $hsp->query->start, 18, 'Check for query alifrom value' ); + is( $hsp->query->end, 89, 'Check for query ali to value' ); + is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' ); + is( $hsp->hit->end, 77, 'Check for hit hmm to value' ); + is( $hsp->query->seq_id(), 'SEED', 'Check for query seq_id' ); + is( $hsp->hit->seq_id(), 'Q91581', 'Check for hit seq_id' ); +} + +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('L77119.hmmer') +); + +while ( $result = $searchio->next_result ) { + is( ref($result), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' + ); + is( $result->algorithm, 'HMMPFAM', 'Check algorithm' ); + is( $result->algorithm_version, '2.2g', 'Check algorithm version' ); + is( $result->hmm_name, 'Pfam', 'Check hmm_name' ); + is( $result->sequence_file, 'L77119.faa', 'Check sequence_file' ); + is( $result->query_name, 'gi|1522636|gb|AAC37060.1|', + 'Check query_name' ); + is( $result->query_description, + 'M. jannaschii predicted coding region MJECS02 [Methanococcus jannaschii]', + 'Check query_description' + ); + is( $result->num_hits(), 1, 'Check num_hits' ); + my $hit = $result->next_hit; + is( $hit->name, 'Methylase_M', 'Check hit name' ); + is( $hit->description, + 'Type I restriction modification system, M', + 'Check for hit description' + ); + float_is( $hit->significance, 0.0022, 'Check hit significance' ); + is( $hit->raw_score, -105.2, 'Check hit raw_score' ); + my $hsp = $hit->next_hsp; + is( $hsp->score, -105.2, 'Check for hsp score' ); + float_is( $hsp->evalue, 0.0022, 'Check for hsp evalue' ); + is( $hsp->query->start, 280, 'Check for query alifrom value' ); + is( $hsp->query->end, 481, 'Check for query ali to value' ); + is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' ); + is( $hsp->hit->end, 279, 'Check for hit hmm to value' ); + is( $hsp->query->seq_id(), + 'gi|1522636|gb|AAC37060.1|', 'Check for query seq_id' ); + is( $hsp->hit->seq_id(), 'Methylase_M', 'Check for hit seq_id' ); + is( $hsp->hit_string, + 'lrnELentLWavADkLRGsmDaseYKdyVLGLlFlKYiSdkFlerrieieerktdtesepsldyakledqyeqlededlekedfyqkkGvFilPsqlFwdfikeaeknkldedigtdldkifseledqialgypaSeedfkGlfpdldfnsnkLgskaqarnetLtelidlfselelgtPmHNG-dfeelgikDlfGDaYEYLLgkFAeneGKsGGeFYTPqeVSkLiaeiLtigqpsegdfsIYDPAcGSGSLllqaskflgehdgkrnaisyYGQEsn', + 'Check for hiy string' + ); + is( $hsp->query_string, + 'NTSELDKKKFAVLLMNR--------------LIFIKFLEDK------GIV---------PRDLLRRTYEDY---KKSNVLI-NYYDAY-L----KPLFYEVLNTPEDER--KENIRT-NPYYKDIPYL---N-G-------GLFRSNNV--PNELSFTIKDNEIIGEVINFLERYKFTLSTSEGsEEVELNP-DILGYVYEKLINILAEKGQKGLGAYYTPDEITSYIAKNT-IEPIVVE----------------RFKEIIK--NWKINDINF----ST', + 'Check for query string' + ); + is( $hsp->homology_string, + ' ++EL+++ av+ R L+F K++ dk +i+ p + + +++y ++ ++ ++y ++ + lF++++ e ++ ++++ + + ++ + + Glf ++++ ++ +s+ +ne ++e+i+ +++ +++ G++ +el D++G +YE L+ Ae K+ G +YTP e++ ia+ + i+ ++ +++ ++ k+n+i + s+', + 'Check for homology string' + ); + is( join( ' ', $hsp->seq_inds( 'query', 'nomatch', 1 ) ), + '280 288 289 293-295 300 304 311 313-315 317 324-326 332 335 337 344-346 348 355 358-361 364-366 372 379 383-385 389 396 400 404-408 412 416 417 422 426 429-431 434-436 439 441 446 450 451 455 459 460 463 464 468 471 472 478', + 'Check for nomatch indices in query' + ); + is( join( ' ', $hsp->seq_inds( 'hit', 'nomatch', 1 ) ), + '1 9 10 14-16 18-31 35 39 42-47 51-59 61 63-65 67 72-74 77-79 82 86 89-94 96 103-105 107 110 111 116 118 120-123 126-131 133 135-141 145 150 151 154 158-160 164 171 175 179-183 187 191-193 198 202 205-207 210-212 215 217 222 226 227 231 233 236 237 240-257 261 264-267 273 275-278', + 'Check for nomatch indices in hit' + ); + is( join( ' ', $hsp->seq_inds( 'query', 'gap', 1 ) ), + '296 306 309 321 328 334 335 350 356 366-368 376 417 456 463 470 479', + 'Check for gap indices in query' + ); + is( join( ' ', $hsp->seq_inds( 'hit', 'gap', 1 ) ), + '', 'Check for gap indices in hit' ); +} + +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('cysprot1b.hmmsearch') +); + +while ( $result = $searchio->next_result ) { + is( ref($result), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' + ); + is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' ); + is( $result->algorithm_version, '2.2g', 'Check algorithm version' ); + is( $result->hmm_name, + 'Peptidase_C1.hmm [Peptidase_C1]', + 'Check hmm_name' + ); + is( $result->database_name, 'cysprot1b.fa', 'Check database_name' ); + is( $result->sequence_file, 'cysprot1b.fa', 'Check sequence_file' ); + is( $result->query_name, 'Peptidase_C1', 'Check query_name' ); + is( $result->query_accession, 'PF00112', 'Check query_accession' ); + is( $result->query_description, + 'Papain family cysteine protease', + 'Check query_description' + ); + is( $result->num_hits(), 4, 'Check num_hits' ); + my $hit = $result->next_hit; + is( $hit->name, 'CATL_RAT', 'Check hit name' ); + is( $hit->description, '', 'Check for hit description' ); + float_is( $hit->significance, 2e-135, 'Check hit significance' ); + is( $hit->raw_score, 449.4, 'Check hit raw_score' ); + my $hsp = $hit->next_hsp; + is( $hsp->score, 449.4, 'Check for hsp score' ); + float_is( $hsp->evalue, 2e-135, 'Check for hsp evalue' ); + is( $hsp->query->start, 1, 'Check for query alifrom value' ); + is( $hsp->query->end, 337, 'Check for query ali to value' ); + is( $hsp->hit->start, 114, 'Check for hit hmmfrom value' ); + is( $hsp->hit->end, 332, 'Check for hit hmm to value' ); + is( $hsp->query->seq_id(), 'Peptidase_C1', 'Check for query seq_id' ); + is( $hsp->hit->seq_id(), 'CATL_RAT', 'Check for hit seq_id' ); + is( $hsp->hit_string, + 'IPKTVDWRE-KG-CVTPVKNQG-QCGSCWAFSASGCLEGQMFLKT------GKLISLSEQNLVDCSH-DQGNQ------GCNG-GLMDFAFQYIKE-----NGGLDSEESY-----PYE----AKD-------------------GSCKYR-AEYAV-----ANDTGFVDIPQQ-----EKALMKAVATVGPISVAMDASHPS---LQFYSSG-------IYYEP---NCSSK---DLDHGVLVVGYGYEG-T------------------------------------DSNKDKYWLVKNSWGKEWGMDGYIKIAKDRN----NHCGLATAASYPI', + 'Check for hiy string' + ); + is( $hsp->homology_string, + '+P+++DWRe kg VtpVK+QG qCGSCWAFSa g lEg+ ++kt gkl+sLSEQ+LvDC++ d gn+ GCnG Glmd Af+Yik+ NgGl++E++Y PY+ +kd g+Cky+ + ++ a+++g++d+p++ E+al+ka+a++GP+sVa+das+ s q+Y+sG +Y+++ C+++ +LdH+Vl+VGYG e+ ++++ +YW+VKNSWG++WG++GY++ia+++n n+CG+a+ asypi', + 'Check for homology string' + ); + is( $hsp->query_string, + 'lPesfDWReWkggaVtpVKdQGiqCGSCWAFSavgalEgryciktgtkawggklvsLSEQqLvDCdgedygnngesCGyGCnGGGlmdnAfeYikkeqIsnNgGlvtEsdYekgCkPYtdfPCgkdggndtyypCpgkaydpndTgtCkynckknskypktyakikgygdvpynvsTydEealqkalaknGPvsVaidasedskgDFqlYksGendvgyGvYkhtsageCggtpfteLdHAVliVGYGteneggtfdetssskksesgiqvssgsngssgSSgssgapiedkgkdYWIVKNSWGtdWGEnGYfriaRgknksgkneCGIaseasypi', + 'Check for query string' + ); + $hit = $result->next_hit; + is( $hit->name, 'CATL_HUMAN', 'Check hit name' ); + is( $hit->description, '', 'Check for hit description' ); + float_is( $hit->significance, 6.1e-134, 'Check hit significance' ); + is( $hit->raw_score, 444.5, 'Check hit raw_score' ); +} + +# test for bug 2632 - CS lines should get ignored without breaking the parser +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmpfam_cs.out') +); +$result = $searchio->next_result; +my $hit = $result->next_hit; +my $hsp = $hit->next_hsp; +is( $hsp->seq_str, + 'CGV-GFIADVNNVANHKIVVQALEALTCMEHRGACSADRDSGDGAGITTAIPWNLFQKSLQNQNIKFEQnDSVGVGMLFLPAHKLKES--KLIIETVLKEENLEIIGWRLVPTVQEVLGKQAYLNKPHVEQVFCKSSNLSKDRLEQQLFLVRKKIEKYIGINGKDwaheFYICSLSCYTIVYKGMMRSAVLGQFYQDLYHSEYTSSFAIYHRRFSTNTMPKWPLAQPMR---------FVSHNGEINTLLGNLNWMQSREPLLQSKVWKDRIHELKPITNKDNSDSANLDAAVELLIASGRSPEEALMILVPEAFQNQPDFA-NNTEISDFYEYYSGLQEPWDGPALVVFTNGKV-IGATLDRNGL-RPARYVIT----KDNLVIVSSES', + 'Check for hsp seq_str' +); + +# Tests for hmmer3 output here +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmscan.out'), + -verbose => 1 +); +is( ref($searchio), 'Bio::SearchIO::hmmer3', + 'Check if correct searchio object is returned' ); +while ( $result = $searchio->next_result ) { + is( ref($result), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' + ); + is( $result->algorithm, 'HMMSCAN', 'Check algorithm' ); + is( $result->algorithm_version, '3.0', 'Check algorithm version' ); + is( $result->hmm_name, + '/data/biodata/HMMerDB/Pfam.hmm', + 'Check hmm_name' + ); + is( $result->sequence_file, 'BA000019.orf1.fasta', + 'Check sequence_file' ); + is( $result->query_name, 'BA000019.orf1', 'Check query_name' ); + is( $result->query_length, '198', 'Check query_length' ); + is( $result->query_description, '', 'Check query_description' ); + is( $result->num_hits(), 1, 'Check num_hits' ); + my ( $hsp, $hit ); + + if ( $hit = $result->next_model ) { + is( ref($hit), 'Bio::Search::Hit::HMMERHit', + 'Check for the correct hit reference type' ); + is( $hit->name, 'Peripla_BP_2', 'Check hit name' ); + is( $hit->description, + 'Periplasmic binding protein', + 'Check for hit description' + ); + is( $hit->raw_score, '105.2', 'Check hit raw_score' ); + float_is( $hit->significance, 6e-30, 'Check hit significance' ); + is( $hit->num_hsps, 1, 'Check num_hsps' ); + + if ( defined( $hsp = $hit->next_domain ) ) { + is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', + 'Check for correct hsp reference type' ); + is( $hsp->hit->start, 59, 'Check for hit hmmfrom value' ); + is( $hsp->hit->end, 236, 'Check for hit hmm to value' ); + is( $hsp->query->start, 2, 'Check for query alifrom value' ); + is( $hsp->query->end, 173, 'Check for query ali to value' ); + is( $hsp->score, '105.0', 'Check for hsp score' ); + float_is( $hsp->evalue, 1.5e-33, 'Check for hsp c-Evalue' ); + is( $hsp->query_string, + 'LKPDLIIGREYQ---KNIYNQLSNFAPTVLVDWGSF-TSFQDNFRYIAQVLNEEEQGKLVLQQYQKRIRDLQDRMGERlQKIEVSVIGFSGQSIKSLNR-DAVFNQVLDDAGIKRIsIQKNQQERYLEISIENLNKYDADVLFVINE---SKEQLYPDLKNPLWHHLRAVKKQQVYVVNQ', + 'Check for query string' + ); + is( $hsp->hit_string, + 'lkPDlvivsafgalvseieellelgipvvavessstaeslleqirllgellgeedeaeelvaelesridavkaridsl-kpktvlvfgyadegikvvfgsgswvgdlldaaggeni-iaeakgseseeisaEqilaadpdviivsgrgedtktgveelkenplwaelpAvkngrvyllds', + 'Check for hit string' + ); + is( $hsp->homology_string, + 'lkPDl+i+ +++ ++i+++l++ +p+v v+ s+ s+++ +r ++++l+ee++++ + +++++ri+++++r + ++ +v+v+g+++ +ik+++ + ++++ld+ag++ i i++++++ + eis+E+++++d+dv++v k+ + ++nplw +l+Avk+++vy++++', + 'Check for homology string' + ); + } + } +} + +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmsearch3.out'), + -verbose => 1 +); +while ( $result = $searchio->next_result ) { + is( ref($result), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' + ); + is( $result->algorithm, 'HMMSEARCH', 'Check algorithm' ); + is( $result->algorithm_version, '3.0', 'Check algorithm version' ); + is( $result->hmm_name, 'Kv9.hmm', 'Check hmm_name' ); + is( $result->sequence_file, + '/home/pboutet/Desktop/databases/nr_May26', + 'Check sequence_file' + ); + is( $result->query_name, 'Kv9', 'Check query_name' ); + is( $result->query_length, '481', 'Check query_length' ); + is( $result->query_description, '', 'Check query_description' ); + is( $result->num_hits(), 2, 'Check num_hits' ); + + while ( my $hit = $result->next_model ) { + } +} + +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmscan_multi_domain.out'), + -verbose => 1 +); + +my @multi_hits = ( + [ 'PPC', + 'Bacterial pre-peptidase C-terminal domain', + '111.0', 3.1e-32, 6, + [ [ 4, 59, 117, 183, 0.5, 0.16 ], + [ 12, 58, 347, 388, -0.6, 0.36 ], + [ 1, 69, 470, 549, 71.3, 1.3e-23 ], + [ 15, 25, 582, 603, -3.2, 2 ], + [ 13, 36, 987, 1019, -1.1, 0.5 ], + [ 1, 69, 1087, 1168, 54.4, 2.4e-18 ] + ] + ], + [ 'HemolysinCabind', + 'Hemolysin-type calcium-binding repeat (2 cop', + '47.9', 4.7e-13, 3, + [ [ 2, 13, 1214, 1225, 5.9, 0.0026 ], + [ 1, 18, 1231, 1248, 10.8, 6.8e-5 ], + [ 4, 18, 1243, 1257, 11.4, 4.3e-05 ] + ] + ] +); + +while ( $result = $searchio->next_result ) { + is( ref($result), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' + ); + is( $result->algorithm, 'HMMSCAN', 'Check algorithm' ); + is( $result->algorithm_version, '3.0', 'Check algorithm version' ); + is( $result->hmm_name, + '/data/biodata/HMMerDB/Pfam-A.hmm', + 'Check hmm_name' + ); + is( $result->sequence_file, 'BA000019.orf37.fasta', + 'Check sequence_file' ); + is( $result->query_name, 'BA000019.orf37', 'Check query_name' ); + is( $result->query_length, '1418', 'Check query_length' ); + is( $result->query_description, '', 'Check query_description' ); + is( $result->num_hits(), 2, 'Check num_hits' ); + my ( $hsp, $hit ); + + while ( $hit = $result->next_model ) { + my @expected = @{ shift @multi_hits }; + is( ref($hit), 'Bio::Search::Hit::HMMERHit', + 'Check for the correct hit reference type' ); + is( $hit->name, shift @expected, 'Check hit name' ); + is( $hit->description, shift @expected, 'Check for hit description' ); + is( $hit->raw_score, shift @expected, 'Check hit raw_score' ); + float_is( + $hit->significance, + shift @expected, + 'Check hit significance' + ); + is( $hit->num_hsps, shift @expected, 'Check num_hsps' ); + my @hsp_list = @{ shift @expected }; + + while ( defined( $hsp = $hit->next_domain ) ) { + my @hsp_exp = @{ shift @hsp_list }; + is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', + 'Check for correct hsp reference type' ); + is( $hsp->hit->start, + shift @hsp_exp, + 'Check for hit envfrom value' + ); + is( $hsp->hit->end, shift @hsp_exp, + 'Check for hit env to value' ); + is( $hsp->query->start, + shift @hsp_exp, + 'Check for query hmmfrom value' + ); + is( $hsp->query->end, + shift @hsp_exp, + 'Check for query hmm to value' + ); + is( $hsp->score, shift @hsp_exp, 'Check for hsp score' ); + float_is( $hsp->evalue, shift @hsp_exp, + 'Check for hsp c-Evalue' ); + } + } +} + +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmscan_sec_struct.out'), + -verbose => 1 +); + +@multi_hits = ( + [ 'HTH_AraC', + 'Bacterial regulatory helix-turn-helix proteins, Ara', + '41.3', 6.7e-11, 2, + [ [ 'siadiAeevgfSpsyfsrlFkkytGvt', 'SLMELSRQVGLNDCTLKRGFRLVFDTT' ], + [ 'nwsiadiAeevgf-SpsyfsrlFkkytGvtPsqyr', + 'EINISQAARRVGFsSRSYFATAFRKKFGINPKEFL' + ] + ] + ], + [ 'PKSI-KS_m3', + '', '38.2', 3.8e-12, 2, + [ [ 'GPSvtVDTACSSSLvA', 'GPSVTVDTLCSSSLVA' ], + [ 'GPSvtVDTACSSSLv', 'GPNLVIDSACSSALV' ] + ] + ], + [ 'DUF746', + 'Domain of Unknown Function (DUF746)', + '13.9', 0.023, 2, + [ [ 'rllIrlLsqplslaeaadqlgtdegiiak', + 'EILIRNLENPPSLMELSRQVGLNDCTLKR' + ], + [ 'plslaeaadqlgtdeg', 'EINISQAARRVGFSSR' ] + ] + ] +); + +while ( $result = $searchio->next_result ) { + is( ref($result), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' + ); + is( $result->algorithm, 'HMMSCAN', 'Check algorithm' ); + is( $result->algorithm_version, '3.0', 'Check algorithm version' ); + is( $result->hmm_name, 'Pfam-A.hmm', 'Check hmm_name' ); + is( $result->sequence_file, 'BA000019.orf8.fasta', + 'Check sequence_file' ); + is( $result->query_name, 'BA000019.orf8', 'Check query_name' ); + is( $result->query_length, '348', 'Check query_length' ); + is( $result->query_description, '', 'Check query_description' ); + is( $result->num_hits(), 3, 'Check num_hits' ); + my ( $hsp, $hit ); + + while ( $hit = $result->next_model ) { + my @expected = @{ shift @multi_hits }; + is( ref($hit), 'Bio::Search::Hit::HMMERHit', + 'Check for the correct hit reference type' ); + is( $hit->name, shift @expected, 'Check hit name' ); + is( $hit->description, shift @expected, 'Check for hit description' ); + is( $hit->raw_score, shift @expected, 'Check hit raw_score' ); + float_is( + $hit->significance, + shift @expected, + 'Check hit significance' + ); + is( $hit->num_hsps, shift @expected, 'Check num_hsps' ); + my @hsp_list = @{ shift @expected }; + + while ( defined( $hsp = $hit->next_domain ) ) { + my @hsp_exp = @{ shift @hsp_list }; + is( ref($hsp), 'Bio::Search::HSP::HMMERHSP', + 'Check for correct hsp reference type' ); + is( $hsp->hit_string, shift @hsp_exp, 'Check hit sequence' ); + is( $hsp->query_string, shift @hsp_exp, 'Check query sequence' ); + } + } +} + +# Make sure that you can also directly call the hmmer2 and hmmer3 subclasses +$searchio = Bio::SearchIO->new( + -format => 'hmmer2', + -file => test_input_file('hmmpfam.out') +); +is( ref($searchio), 'Bio::SearchIO::hmmer2', + 'Check if loading hmmpfam output via the hmm2 parser directly works' ); +is( ref( $searchio->next_result ), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' +); + +$searchio = Bio::SearchIO->new( + -format => 'hmmer2', + -file => test_input_file('hmmsearch.out') +); +is( ref($searchio), 'Bio::SearchIO::hmmer2', + 'Check if loading hmmsearch2 output via the hmm2 parser directly works' ); +is( ref( $searchio->next_result ), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' +); + +$searchio = Bio::SearchIO->new( + -format => 'hmmer3', + -file => test_input_file('hmmscan.out') +); +is( ref($searchio), 'Bio::SearchIO::hmmer3', + 'Check if loading hmmscan output via the hmm3 parser directly works' ); +is( ref( $searchio->next_result ), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' +); + +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmsearch3.out') +); +is( ref($searchio), 'Bio::SearchIO::hmmer3', + 'Check if loading hmmsearch3 output via the hmm3 parser directly works' ); +is( ref( $searchio->next_result ), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' +); + +# Make sure that you can also specify the -version parameter directly +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmpfam.out'), + -version => 2 +); +is( ref($searchio), 'Bio::SearchIO::hmmer2', + 'Check if selecting the correct hmmpfam parser using -version works' ); +is( ref( $searchio->next_result ), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' +); + +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmsearch.out'), + -version => 2 +); +is( ref($searchio), 'Bio::SearchIO::hmmer2', + 'Check if selecting the correct hmmsearch2 parser using -version works' ); +is( ref( $searchio->next_result ), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' +); + +$searchio = Bio::SearchIO->new( + -format => 'hmmer3', + -file => test_input_file('hmmscan.out'), + -version => 3 +); +is( ref($searchio), 'Bio::SearchIO::hmmer3', + 'Check if selecting the correct hmmscan parser using -version works' ); +is( ref( $searchio->next_result ), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' +); + +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmsearch3.out'), + -version => 3 +); +is( ref($searchio), 'Bio::SearchIO::hmmer3', + 'Check if selecting the correct hmmsearch3 parser using -version works' ); +is( ref( $searchio->next_result ), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' +); + +my $pipestr = "cat " . test_input_file('hmmpfam.out') . " |"; +open( my $pipefh, $pipestr ); + +$searchio = Bio::SearchIO->new( + -format => 'hmmer', + -fh => $pipefh +); +is( ref($searchio), 'Bio::SearchIO::hmmer2', + 'Check if reading from a pipe works' ); +$result = $searchio->next_result; +is( ref($result), + 'Bio::Search::Result::HMMERResult', + 'Check for the correct result reference type' +); +is( $result->num_hits(), 2, 'Check num_hits' ); + +# bug 3376 +{ + my $in = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('pfamOutput-bug3376.out') + ); + my $result = $in->next_result; + my $hit = $result->next_hit; + my $hsp = $hit->next_hsp; + is( $hsp->hit_string, + "svfqqqqssksttgstvtAiAiAigYRYRYRAvtWnsGsLssGvnDnDnDqqsdgLYtiYYsvtvpssslpsqtviHHHaHkasstkiiikiePr", + "bug3376" + ); +} +# end bug 3376 + +# bug 3421 - making sure a full line of dashes in an HSP is parsed correctly +{ + my $in = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmpfam_HSPdashline.txt') + ); + my $result = $in->next_result; + my $hit = $result->next_hit; + my $hsp = $hit->next_hsp; + is( $hsp->length, "561", + "bug3421 - Check if can correctly parse an HSP with line full of dashes" + ); +} +# end bug 3421 + +# bug 3302 +{ + my $in = Bio::SearchIO->new( + -format => 'hmmer', + -file => test_input_file('hmmpfam_multiresult.out') + ); + my $result = $in->next_result; + $result = $in->next_result; + my $hit = $result->next_hit; + is( $hit->name, "IS66_ORF3.uniq", "bug3302 - Check if can parse multiresult hmmer" ); +} +# end bug 3302 + +# HMMER 3.1 nhmmer output +{ + my $in = Bio::SearchIO->new( + -format => 'hmmer', + -version => 3, + -file => test_input_file('nhmmer-3.1.out') + ); + my $result = $in->next_result; + is( $result->algorithm_version, '3.1b1', 'Check nhmmer algorithm version' ); + + my $hit = $result->next_hit; + is( $hit->name, "seq1", "Check nhmmer hit name" ); + is( $hit->description, "Description of seq1", "Check nhmmer hit description" ); + is( $hit->significance, "3.2e-48", "Check nhmmer hit significance" ); + is( $hit->score, "148.2", "Check nhmmer hit score" ); + + my $hsp = $hit->next_hsp; + is( $hsp->score, "148.2", "Check nhmmer hsp score" ); + is( $hsp->significance, "3.2e-48", "Check nhmmer hsp score" ); + is( $hsp->start('hit'), "1", "Check nhmmer hsp hit start" ); + is( $hsp->end('hit'), "151", "Check nhmmer hsp hit end" ); + is( $hsp->start('query'), "258", "Check nhmmer hsp query start" ); + is( $hsp->end('query'), "411", "Check nhmmer hsp query end" ); +} +# end HMMER 3.1 nhmmer output + diff --git a/t/data/nhmmer-3.1.out b/t/data/nhmmer-3.1.out new file mode 100644 index 000000000..b799943ed --- /dev/null +++ b/t/data/nhmmer-3.1.out @@ -0,0 +1,63 @@ +# nhmmer :: search a DNA model or alignment against a DNA database +# HMMER 3.1b1 (May 2013); http://hmmer.org/ +# Copyright (C) 2013 Howard Hughes Medical Institute. +# Freely distributed under the GNU General Public License (GPLv3). +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +# query file: ../HMMs/A_HA_H7_CDS_nucleotide.hmm +# target sequence database: tmp.fa +# number of worker threads: 4 +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +Query: A_HA_H7_CDS_nucleotide [M=1683] +Scores for complete hits: + E-value score bias Sequence start end Description + ------- ------ ----- -------- ----- ----- ----------- + 3.2e-48 148.2 5.7 seq1 1 151 Description of seq1 + 3.9e-15 38.6 0.4 seq2 59 1 Description of seq2 + + +Annotation for each hit (and alignments): +>> seq1 Description of seq1 + score bias Evalue hmmfrom hmm to alifrom ali to envfrom env to sq len acc + ------ ----- --------- ------- ------- --------- --------- --------- --------- --------- ---- + ! 148.2 5.7 3.2e-48 258 411 .. 1 151 [] 1 151 [] 151 0.98 + + Alignment: + score: 148.2 bits + A_HA_H7_CDS_nucleotide 258 attcctagaattttcagctgatttaattattgagaggcgagaaggaagtaatgatgtctgttatcctgggaaattcgtaaatgaaga 344 + attcctagaattttcagc+gatttaattattgagaggcgagaaggaagt gatgtctgttatcctgggaaattcgt+aatgaaga + seq1 1 ATTCCTAGAATTTTCAGCCGATTTAATTATTGAGAGGCGAGAAGGAAGT---GATGTCTGTTATCCTGGGAAATTCGTGAATGAAGA 84 + 689*******************************************777...*********************************** PP + + A_HA_H7_CDS_nucleotide 345 agctctgaggcaaattctcagggggtcaggcggaattgacaaggagacaatgggattcacatatagc 411 + agctctgaggcaaattctcaggg+gtcaggcggaattgacaaggagacaatgggattcac+ta+agc + seq1 85 AGCTCTGAGGCAAATTCTCAGGGAGTCAGGCGGAATTGACAAGGAGACAATGGGATTCACCTACAGC 151 + ****************************************************************986 PP + +>> seq2 Description of seq2 + score bias Evalue hmmfrom hmm to alifrom ali to envfrom env to sq len acc + ------ ----- --------- ------- ------- --------- --------- --------- --------- --------- ---- + ! 38.6 0.4 3.9e-15 34 92 .. 59 1 .. 60 1 .. 60 0.97 + + Alignment: + score: 38.6 bits + A_HA_H7_CDS_nucleotide 34 gtgatgattgcaacaaatgcagacaaaatctgccttgggcaccatgctgtgtcaaacgg 92 + g+gat+att+c+acaaatgcagacaa atctgccttgggca+catgc+gtgtcaaacgg + seq2 59 GCGATCATTCCGACAAATGCAGACAAGATCTGCCTTGGGCATCATGCCGTGTCAAACGG 1 + 6899****************************************************986 PP + + + +Internal pipeline statistics summary: +------------------------------------- +Query model(s): 1 (1683 nodes) +Target sequences: 2 (422 residues searched) +Residues passing SSV filter: 211 (0.5); expected (0.02) +Residues passing bias filter: 211 (0.5); expected (0.02) +Residues passing Vit filter: 211 (0.5); expected (0.003) +Residues passing Fwd filter: 211 (0.5); expected (3e-05) +Total number of hits: 2 (0.498) +# CPU time: 0.02u 0.01s 00:00:00.03 Elapsed: 00:00:00.30 +# Mc/sec: 2.37 +// +[ok] -- 2.11.4.GIT