2 # BioPerl module for Bio::SearchIO::hmmer3
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Thomas Sharpton <thomas.sharpton@gmail.com>
8 # Copyright Thomas Sharpton
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
22 my $searchio = Bio::SearchIO->new(
25 -file => 'hmmsearch.out'
28 my $result = $searchio->next_result;
29 my $hit = $result->next_hit;
30 print $hit->name, $hit->description, $hit->significance,
33 my $hsp = $hit->next_hsp;
34 print $hsp->start('hit'), $hsp->end('hit'), $hsp->start('query'),
35 $hsp->end('query'), "\n";
39 Code to parse output from hmmsearch, hmmscan, phmmer and nhmmer, compatible with
40 both version 2 and version 3 of the HMMER package from L<http://hmmer.org>.
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to
48 the Bioperl mailing list. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
55 Please direct usage questions or support issues to the mailing list:
57 L<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 of the bugs and their resolution. Bug reports can be submitted via
70 https://github.com/bioperl/bioperl-live/issues
72 =head1 AUTHOR - Thomas Sharpton
74 Email thomas.sharpton@gmail.com
76 Describe contact details here
80 Additional contributors names and emails here
86 The rest of the documentation details each of the object methods.
87 Internal methods are usually preceded with a _
91 # Let the code begin...
93 package Bio
::SearchIO
::hmmer3
;
97 use Bio
::Factory
::ObjectFactory
;
98 use Bio
::Tools
::IUPAC
;
99 use vars
qw(%MAPPING %MODEMAP);
100 use base qw(Bio::SearchIO::hmmer);
104 # mapping of HMMER items to Bioperl hash keys
106 'HMMER_Output' => 'result',
112 'Hsp_bit-score' => 'HSP-bits',
113 'Hsp_score' => 'HSP-score',
114 'Hsp_evalue' => 'HSP-evalue',
115 'Hsp_query-from' => 'HSP-query_start',
116 'Hsp_query-to' => 'HSP-query_end',
117 'Hsp_query-strand' => 'HSP-query_strand',
118 'Hsp_hit-from' => 'HSP-hit_start',
119 'Hsp_hit-to' => 'HSP-hit_end',
120 'Hsp_hit-strand' => 'HSP-hit_strand',
121 'Hsp_positive' => 'HSP-conserved',
122 'Hsp_identity' => 'HSP-identical',
123 'Hsp_gaps' => 'HSP-hsp_gaps',
124 'Hsp_hitgaps' => 'HSP-hit_gaps',
125 'Hsp_querygaps' => 'HSP-query_gaps',
126 'Hsp_qseq' => 'HSP-query_seq',
127 'Hsp_csline' => 'HSP-cs_seq',
128 'Hsp_hseq' => 'HSP-hit_seq',
129 'Hsp_midline' => 'HSP-homology_seq',
130 'Hsp_pline' => 'HSP-pp_seq',
131 'Hsp_align-len' => 'HSP-hsp_length',
132 'Hsp_query-frame' => 'HSP-query_frame',
133 'Hsp_hit-frame' => 'HSP-hit_frame',
135 'Hit_id' => 'HIT-name',
136 'Hit_len' => 'HIT-length',
137 'Hit_accession' => 'HIT-accession',
138 'Hit_desc' => 'HIT-description',
139 'Hit_signif' => 'HIT-significance',
140 'Hit_score' => 'HIT-score',
142 'HMMER_program' => 'RESULT-algorithm_name',
143 'HMMER_version' => 'RESULT-algorithm_version',
144 'HMMER_query-def' => 'RESULT-query_name',
145 'HMMER_query-len' => 'RESULT-query_length',
146 'HMMER_query-acc' => 'RESULT-query_accession',
147 'HMMER_querydesc' => 'RESULT-query_description',
148 'HMMER_hmm' => 'RESULT-hmm_name',
149 'HMMER_seqfile' => 'RESULT-sequence_file',
150 'HMMER_db' => 'RESULT-database_name',
157 Usage : my $hit = $searchio->next_result;
158 Function: Returns the next Result from a search
159 Returns : Bio::Search::Result::ResultI object
166 my ( $buffer, $last, @hit_list, @hsp_list, %hspinfo, %hitinfo, %domaincounter );
169 my @ambiguous_nt = keys %Bio::Tools
::IUPAC
::IUB
;
170 my $ambiguous_nt = join '', @ambiguous_nt;
172 my $verbose = $self->verbose; # cache for speed? JES's idea in hmmer.pm
173 $self->start_document();
175 # This is here to ensure that next_result doesn't produce infinite loop
176 if ( !defined( $buffer = $self->_readline ) ) {
180 $self->_pushback($buffer);
183 my $hit_counter = 0; # helper variable for non-unique hit IDs
185 # Regex goes here for HMMER3
186 # Start with hmmsearch processing
187 while ( defined( $buffer = $self->_readline ) ) {
188 my $lineorig = $buffer;
191 # Grab the program name
192 if ( $buffer =~ m/^\#\s(\S+)\s\:\:\s/ ) {
195 # TO DO: customize the above regex to adapt to other
196 # program types (hmmscan, etc)
197 $self->start_element( { 'Name' => 'HMMER_Output' } );
198 $self->{'_result_count'}++; #Might need to move to another block
200 { 'Name' => 'HMMER_program',
206 # Get the HMMER package version and release date
207 elsif ( $buffer =~ m/^\#\sHMMER\s+(\S+)\s+\((.+)\)/ ) {
209 my $versiondate = $2;
210 $self->{'_hmmidline'} = $buffer;
212 { 'Name' => 'HMMER_version',
219 elsif ( $buffer =~ /^\#\squery (?:\w+ )?file\:\s+(\S+)/ ) {
220 if ( $self->{'_reporttype'} eq 'HMMSEARCH'
221 || $self->{'_reporttype'} eq 'PHMMER'
222 || $self->{'_reporttype'} eq 'NHMMER' )
224 $self->{'_hmmfileline'} = $lineorig;
226 { 'Name' => 'HMMER_hmm',
231 elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
232 $self->{'_hmmseqline'} = $lineorig;
234 { 'Name' => 'HMMER_seqfile',
241 # If this is a report without alignments
242 elsif ( $buffer =~ m/^\#\sshow\salignments\sin\soutput/ ) {
243 $self->{'_alnreport'} = 0;
246 # Get the database info
247 elsif ( $buffer =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/ ) {
249 if ( $self->{'_reporttype'} eq 'HMMSEARCH'
250 || $self->{'_reporttype'} eq 'PHMMER'
251 || $self->{'_reporttype'} eq 'NHMMER' )
253 $self->{'_hmmseqline'} = $lineorig;
255 { 'Name' => 'HMMER_seqfile',
260 elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
261 $self->{'_hmmfileline'} = $lineorig;
263 { 'Name' => 'HMMER_hmm',
271 elsif ( $buffer =~ s/^Query:\s+// ) {
272 # For multi-query reports
273 if ( ( not exists $self->{_values
}->{"RESULT-algorithm_name"}
274 or not exists $self->{_values
}->{"RESULT-algorithm_version"}
276 and exists $self->{_hmmidline
}
278 my ($version, $versiondate) = $self->{_hmmidline
} =~ m/^\#\sHMMER\s+(\S+)\s+\((.+)\)/;
280 { 'Name' => 'HMMER_program',
281 'Data' => $self->{_reporttype
}
285 { 'Name' => 'HMMER_version',
290 if ( ( not exists $self->{_values
}->{"RESULT-hmm_name"}
291 or not exists $self->{_values
}->{"RESULT-sequence_file"}
293 and ( exists $self->{_hmmfileline
}
294 or exists $self->{_hmmseqline
}
297 if ( $self->{'_reporttype'} eq 'HMMSEARCH'
298 or $self->{'_reporttype'} eq 'PHMMER'
299 or $self->{'_reporttype'} eq 'NHMMER'
301 my ($qry_file) = $self->{_hmmfileline
} =~ m/^\#\squery (?:\w+ )?file\:\s+(\S+)/;
302 my ($target_file) = $self->{_hmmseqline
} =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/;
304 { 'Name' => 'HMMER_hmm',
309 { 'Name' => 'HMMER_seqfile',
310 'Data' => $target_file
314 elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
315 my ($qry_file) = $self->{_hmmseqline
} =~ m/^\#\squery \w+ file\:\s+(\S+)/;
316 my ($target_file) = $self->{_hmmfileline
} =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/;
318 { 'Name' => 'HMMER_seqfile',
323 { 'Name' => 'HMMER_hmm',
324 'Data' => $target_file
330 unless ($buffer =~ s/\s+\[[L|M]\=(\d+)\]$//) {
331 warn "Error parsing length for query, offending line $buffer\n";
336 { 'Name' => 'HMMER_query-len',
341 { 'Name' => 'HMMER_query-def',
348 elsif ( $buffer =~ s/^Accession:\s+// ) {
351 { 'Name' => 'HMMER_query-acc',
357 # Get description data
358 elsif ( $buffer =~ s/^Description:\s+// ) {
361 { 'Name' => 'HMMER_querydesc',
367 # hmmsearch, nhmmer, and hmmscan-specific formatting here
369 defined $self->{'_reporttype'}
370 && ( $self->{'_reporttype'} eq 'HMMSEARCH'
371 || $self->{'_reporttype'} eq 'HMMSCAN'
372 || $self->{'_reporttype'} eq 'PHMMER'
373 || $self->{'_reporttype'} eq 'NHMMER' )
376 # Complete sequence table data above inclusion threshold,
377 # hmmsearch or hmmscan
378 if ( $buffer =~ m/Scores for complete sequence/ ) {
379 while ( defined( $buffer = $self->_readline ) ) {
380 if ( $buffer =~ m/inclusion threshold/
381 || $buffer =~ m/Domain( and alignment)? annotation for each/
382 || $buffer =~ m/\[No hits detected/
383 || $buffer =~ m!^//! )
385 $self->_pushback($buffer);
388 elsif ( $buffer =~ m/^\s+E-value\s+score/
389 || $buffer =~ m/\-\-\-/
398 my ($eval_full, $score_full, $bias_full, $eval_best,
399 $score_best, $bias_best, $exp, $n,
400 $hitid, $desc, @hitline
402 @hitline = split( " ", $buffer );
403 $eval_full = shift @hitline;
404 $score_full = shift @hitline;
405 $bias_full = shift @hitline;
406 $eval_best = shift @hitline;
407 $score_best = shift @hitline;
408 $bias_best = shift @hitline;
409 $exp = shift @hitline;
411 $hitid = shift @hitline;
412 $desc = join " ", @hitline;
414 $desc = '' if ( !defined($desc) );
417 [ $hitid, $desc, $eval_full, $score_full ];
418 $hitinfo{"$hitid.$hit_counter"} = $#hit_list;
423 elsif ( $buffer =~ /Scores for complete hits/ ) {
424 while ( defined( $buffer = $self->_readline ) ) {
425 if ( $buffer =~ /inclusion threshold/
426 || $buffer =~ /Annotation for each hit/
427 || $buffer =~ /\[No hits detected/
428 || $buffer =~ m!^//! )
430 $self->_pushback($buffer);
433 elsif ( $buffer =~ m/^\s+E-value\s+score/
434 || $buffer =~ m/\-\-\-/
443 my ($eval, $score, $bias, $hitid,
444 $start, $end, $desc, @hitline
446 @hitline = split( " ", $buffer );
447 $eval = shift @hitline;
448 $score = shift @hitline;
449 $bias = shift @hitline;
450 $hitid = shift @hitline;
451 $start = shift @hitline;
452 $end = shift @hitline;
453 $desc = join ' ', @hitline;
455 $desc = '' if ( !defined($desc) );
457 push @hit_list, [ $hitid, $desc, $eval, $score ];
458 $hitinfo{"$hitid.$hit_counter"} = $#hit_list;
462 # Complete sequence table data below inclusion threshold
463 elsif ( $buffer =~ /inclusion threshold/ ) {
464 while ( defined( $buffer = $self->_readline ) ) {
465 if ( $buffer =~ /Domain( and alignment)? annotation for each/
466 || $buffer =~ /Internal pipeline statistics summary/
467 || $buffer =~ /Annotation for each hit\s+\(and alignments\)/
470 $self->_pushback($buffer);
473 elsif ( $buffer =~ m/inclusion threshold/
482 my ($eval_full, $score_full, $bias_full, $eval_best,
483 $score_best, $bias_best, $exp, $n,
484 $hitid, $desc, @hitline
486 @hitline = split( " ", $buffer );
487 $eval_full = shift @hitline;
488 $score_full = shift @hitline;
489 $bias_full = shift @hitline;
490 $eval_best = shift @hitline;
491 $score_best = shift @hitline;
492 $bias_best = shift @hitline;
493 $exp = shift @hitline;
495 $hitid = shift @hitline;
496 $desc = join " ", @hitline;
498 $desc = '' if ( !defined($desc) );
501 [ $hitid, $desc, $eval_full, $score_full ];
502 $hitinfo{"$hitid.$hit_counter"} = $#hit_list;
506 # Domain annotation for each sequence table data,
507 # for hmmscan, hmmsearch & nhmmer
508 elsif ( $buffer =~ /Domain( and alignment)? annotation for each/
509 or $buffer =~ /Annotation for each hit\s+\(and alignments\)/
511 @hsp_list = (); # Here for multi-query reports
513 my $annot_counter = 0;
515 while ( defined( $buffer = $self->_readline ) ) {
516 if ( $buffer =~ /\[No targets detected/
517 || $buffer =~ /Internal pipeline statistics/ )
519 $self->_pushback($buffer);
523 if ( $buffer =~ m/^\>\>\s(\S*)\s+(.*)/ ) {
527 $domaincounter{"$name.$annot_counter"} = 0;
529 # The Hit Description from the Scores table can be truncated if
530 # its too long, so use the '>>' line description when its longer
531 if (length $hit_list[
532 $hitinfo{"$name.$annot_counter"}
536 $hit_list[ $hitinfo{"$name.$annot_counter"} ][1] = $desc;
539 while ( defined( $buffer = $self->_readline ) ) {
540 if ( $buffer =~ m/Internal pipeline statistics/
541 || $buffer =~ m/Alignments for each domain/
542 || $buffer =~ m/^\s+Alignment:/
543 || $buffer =~ m/^\>\>/ )
545 $self->_pushback($buffer);
548 elsif ( $buffer =~ m/^\s+score\s+bias/
549 || $buffer =~ m/^\s+\#\s+score/
550 || $buffer =~ m/^\s+------\s+/
551 || $buffer =~ m/^\s\-\-\-\s+/
558 # Grab hsp data from table, push into @hsp;
559 if ($self->{'_reporttype'} =~ m/(?:HMMSCAN|HMMSEARCH|PHMMER|NHMMER)/) {
560 my ( $domain_num, $score, $bias,
562 $hmm_start, $hmm_stop, $hmm_cov,
563 $seq_start, $seq_stop, $seq_cov,
564 $env_start, $env_stop, $env_cov,
568 if ( # HMMSCAN & HMMSEARCH
569 ( $domain_num, $score, $bias,
571 $hmm_start, $hmm_stop, $hmm_cov,
572 $seq_start, $seq_stop, $seq_cov,
573 $env_start, $env_stop, $env_cov,
576 m
|^\s
+(\d
+)\s\
!*\?*\s
+ # domain number
577 (\S
+)\s
+(\S
+)\s
+ # score, bias
578 (\S
+)\s
+(\S
+)\s
+ # c-eval, i-eval
579 (\d
+)\s
+(\d
+)\s
+(\S
+)\s
+ # hmm start, stop, coverage
580 (\d
+)\s
+(\d
+)\s
+(\S
+)\s
+ # seq start, stop, coverage
581 (\d
+)\s
+(\d
+)\s
+(\S
+)\s
+ # env start, stop, coverage
582 (\S
+) # posterior probability accuracy
586 # Values assigned when IF succeeded
588 # Try to get the Hit length from the alignment information
590 if ($self->{'_reporttype'} eq 'HMMSEARCH' || $self->{'_reporttype'} eq 'PHMMER') {
591 # For Hmmsearch, if seq coverage ends in ']' it means that the alignment
592 # runs until the end. In that case add the END coordinate to @hitinfo
593 # to use it as Hit Length
594 if ( $seq_cov =~ m/\]$/ ) {
595 $hitlength = $seq_stop;
598 elsif ($self->{'_reporttype'} eq 'HMMSCAN') {
599 # For Hmmscan, if hmm coverage ends in ']' it means that the alignment
600 # runs until the end. In that case add the END coordinate to @hitinfo
601 # to use it as Hit Length
602 if ( $hmm_cov =~ m/\]$/ ) {
603 $hitlength = $hmm_stop;
608 ( $score, $bias, $ceval,
609 $hmm_start, $hmm_stop, $hmm_cov,
610 $seq_start, $seq_stop, $seq_cov,
611 $env_start, $env_stop, $env_cov,
615 (\S
+)\s
+(\S
+)\s
+(\S
+)\s
+ # score, bias, evalue
616 (\d
+)\s
+(\d
+)\s
+(\S
+)\s
+ # hmm start, stop, coverage
617 (\d
+)\s
+(\d
+)\s
+(\S
+)\s
+ # seq start, stop, coverage
618 (\d
+)\s
+(\d
+)\s
+(\S
+)\s
+ # env start, stop, coverage
619 (\d
+)\s
+(\S
+) # target length, pp accuracy
623 # Values assigned when IF succeeded
626 print STDERR
"Missed this line: $buffer\n";
630 my $info = $hit_list[ $hitinfo{"$name.$annot_counter"} ];
631 if ( !defined $info ) {
633 "Incomplete information: can't find HSP $name in list of hits\n"
638 $domaincounter{"$name.$annot_counter"}++;
640 = $name . "_" . $domaincounter{"$name.$annot_counter"};
642 # Keep it simple for now. let's customize later
644 $hmm_start, $hmm_stop,
645 $seq_start, $seq_stop,
651 push @hsp_list, [ $name, @vals ];
652 $hspinfo{"$hsp_key.$annot_counter"} = $#hsp_list;
656 elsif ( $buffer =~ /Alignment(?:s for each domain)?:/ ) {
660 # There's an optional block, so we sometimes need to
661 # count to 3, and sometimes to 4.
665 my ( $csline, $hline, $midline, $qline, $pline );
667 # To avoid deleting whitespaces from the homology line,
668 # keep track of the position and length of the alignment
669 # in each individual hline/qline, to take them as reference
670 # and use them in the homology line
671 my $align_offset = 0;
672 my $align_length = 0;
674 while ( defined( $buffer = $self->_readline ) ) {
675 if ( $buffer =~ m/^\>\>/
676 || $buffer =~ m/Internal pipeline statistics/ )
678 $self->_pushback($buffer);
681 elsif ($buffer =~ m/^$/ )
683 # Reset these scalars on empty lines to help
684 # distinguish between the consensus structure/reference
685 # tracks (CS|RF lines) and homology lines ending in
686 # CS or RF aminoacids
692 if ( $buffer =~ /\s\s\=\=\sdomain\s(\d+)\s+/
693 or $buffer =~ /\s\sscore:\s\S+\s+/
695 my $domainnum = $1 || 1;
697 my $key = $name . "_" . $domainnum;
698 $hsp = $hsp_list[ $hspinfo{"$key.$annot_counter"} ];
701 $midline = $$hsp[-3];
706 # Consensus Structure or Reference track, some reports
707 # don't have it. Since it appears on top of the alignment,
708 # the reset of $align_length to 0 between alignment blocks
709 # avoid confusing homology lines with it.
710 elsif ( $buffer =~ m/\s+\S+\s(?:CS|RF)$/ and $align_length == 0 ) {
711 my @data = split( " ", $buffer );
712 $csline .= $data[-2];
717 # Query line and Hit line swaps positions
718 # depending of the program
719 elsif ( $count == $max_count - 3
720 or $count == $max_count - 1
722 my @data = split( " ", $buffer );
725 # Use \Q\E on match to avoid errors on alignments
726 # that include stop codons (*)
727 while ($buffer =~ m/\Q$data[-2]\E/g) {
728 $line_offset = pos $buffer;
730 if ($line_offset != 0) {
731 $align_length = length $data[-2];
732 $align_offset = $line_offset - $align_length;
735 if ($self->{'_reporttype'} eq 'HMMSCAN') {
737 $hline .= $data[-2] if ($count == $max_count - 3);
739 $qline .= $data[-2] if ($count == $max_count - 1);
741 else { # hmmsearch & nhmmer
743 $hline .= $data[-2] if ($count == $max_count - 1);
745 $qline .= $data[-2] if ($count == $max_count - 3);
752 # storage isn't quite right - need to remove
753 # leading/lagging whitespace while preserving
754 # gap data (latter isn't done, former is)
755 elsif ( $count == $max_count - 2 ) {
756 $midline .= substr $buffer, $align_offset, $align_length;
760 # posterior probability track
761 elsif ( $count == $max_count ) {
762 my @data = split(" ", $buffer);
768 $$hsp[-3] = $midline;
774 print STDERR
"Missed this line: $buffer\n";
782 elsif ( $buffer =~ m/Internal pipeline statistics/ || $buffer =~ m!^//! ) {
783 # If within hit, hsp close;
784 if ( $self->within_element('hit') ) {
785 if ( $self->within_element('hsp') ) {
786 $self->end_element( { 'Name' => 'Hsp' } );
788 $self->end_element( { 'Name' => 'Hit' } );
791 # Grab summary statistics of run
792 while ( defined( $buffer = $self->_readline ) ) {
793 last if ( $buffer =~ m/^\/\
/$/ );
796 # Do a lot of processing of hits and hsps here
798 while ( my $hit = shift @hit_list ) {
800 my $hit_name = shift @
$hit;
801 my $hit_desc = shift @
$hit;
802 my $hit_signif = shift @
$hit;
803 my $hit_score = shift @
$hit;
804 my $num_domains = $domaincounter{"$hit_name.$index"} || 0;
806 $self->start_element( { 'Name' => 'Hit' } );
808 { 'Name' => 'Hit_id',
813 { 'Name' => 'Hit_desc',
818 { 'Name' => 'Hit_signif',
819 'Data' => $hit_signif
823 { 'Name' => 'Hit_score',
828 for my $i ( 1 .. $num_domains ) {
829 my $key = $hit_name . "_" . $i;
830 my $hsp = $hsp_list[ $hspinfo{"$key.$index"} ];
831 if ( defined $hsp ) {
832 my $hsp_name = shift @
$hsp;
833 $self->start_element( { 'Name' => 'Hsp' } );
834 # Since HMMER doesn't print some data explicitly,
835 # calculate it from the homology line (midline)
836 if ($$hsp[-3] ne '') {
837 my $length = length $$hsp[-3];
838 my $identical = ($$hsp[-3] =~ tr/a-zA-Z//);
839 my $positive = ($$hsp[-3] =~ tr/+//) + $identical;
842 'Name' => 'Hsp_align-len',
847 { 'Name' => 'Hsp_identity',
852 { 'Name' => 'Hsp_positive',
859 { 'Name' => 'Hsp_identity',
864 { 'Name' => 'Hsp_positive',
869 if ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
871 { 'Name' => 'Hsp_hit-from',
872 'Data' => shift @
$hsp
876 { 'Name' => 'Hsp_hit-to',
877 'Data' => shift @
$hsp
881 { 'Name' => 'Hsp_query-from',
882 'Data' => shift @
$hsp
886 { 'Name' => 'Hsp_query-to',
887 'Data' => shift @
$hsp
891 elsif ( $self->{'_reporttype'} eq 'HMMSEARCH'
892 or $self->{'_reporttype'} eq 'NHMMER'
895 { 'Name' => 'Hsp_query-from',
896 'Data' => shift @
$hsp
900 { 'Name' => 'Hsp_query-to',
901 'Data' => shift @
$hsp
905 { 'Name' => 'Hsp_hit-from',
906 'Data' => shift @
$hsp
910 { 'Name' => 'Hsp_hit-to',
911 'Data' => shift @
$hsp
916 { 'Name' => 'Hsp_score',
917 'Data' => shift @
$hsp
921 { 'Name' => 'Hsp_evalue',
922 'Data' => shift @
$hsp
925 my $hitlength = shift @
$hsp;
926 if ( $hitlength != 0 ) {
928 { 'Name' => 'Hit_len',
934 { 'Name' => 'Hsp_csline',
935 'Data' => shift @
$hsp
939 { 'Name' => 'Hsp_hseq',
940 'Data' => shift @
$hsp
944 { 'Name' => 'Hsp_midline',
945 'Data' => shift @
$hsp
949 { 'Name' => 'Hsp_qseq',
950 'Data' => shift @
$hsp
954 { 'Name' => 'Hsp_pline',
955 'Data' => shift @
$hsp
959 # Only nhmmer output has strand information
960 if ( $self->{'_reporttype'} eq 'NHMMER' ) {
961 my $hstart = $self->get_from_element('HSP-hit_start');
962 my $hend = $self->get_from_element('HSP-hit_end');
963 my $hstrand = ( $hstart < $hend ) ?
1 : -1;
965 my $qstart = $self->get_from_element('HSP-query_start');
966 my $qend = $self->get_from_element('HSP-query_end');
967 my $qstrand = ( $qstart < $qend ) ?
1 : -1;
970 { 'Name' => 'Hsp_query-strand',
975 { 'Name' => 'Hsp_hit-strand',
981 $self->end_element( { 'Name' => 'Hsp' } );
984 $self->end_element( { 'Name' => 'Hit' } );
992 print STDERR
"Missed this line: $buffer\n";
993 $self->debug($buffer);
997 $self->end_element( { 'Name' => 'HMMER_Output' } );
998 my $result = $self->end_document();
1002 =head2 start_element
1004 Title : start_element
1005 Usage : $eventgenerator->start_element
1006 Function: Handles a start event
1008 Args : hashref with at least 2 keys 'Data' and 'Name'
1014 my ( $self, $data ) = @_;
1016 # we currently don't care about attributes
1017 my $nm = $data->{'Name'};
1018 my $type = $MODEMAP{$nm};
1020 if ( $self->_eventHandler->will_handle($type) ) {
1021 my $func = sprintf( "start_%s", lc $type );
1022 $self->_eventHandler->$func( $data->{'Attributes'} );
1024 unshift @
{ $self->{'_elements'} }, $type;
1027 && $type eq 'result' )
1029 $self->{'_values'} = {};
1030 $self->{'_result'} = undef;
1037 Usage : $eventgeneartor->end_element
1038 Function: Handles and end element event
1040 Args : hashref with at least 2 keys 'Data' and 'Name'
1046 my ( $self, $data ) = @_;
1047 my $nm = $data->{'Name'};
1048 my $type = $MODEMAP{$nm};
1051 if ( $nm eq 'HMMER_program' ) {
1052 if ( $self->{'_last_data'} =~ /([NP]?HMM\S+)/i ) {
1053 $self->{'_reporttype'} = uc $1;
1057 # Hsp are sort of weird, in that they end when another
1058 # object begins so have to detect this in end_element for now
1059 if ( $nm eq 'Hsp' ) {
1060 foreach my $line (qw(Hsp_csline Hsp_qseq Hsp_midline Hsp_hseq Hsp_pline)) {
1061 my $data = $self->{'_last_hspdata'}->{$line};
1062 if ( $data && $line eq 'Hsp_hseq' ) {
1064 # replace hmm '.' gap symbol by '-'
1073 $self->{'_last_hspdata'} = {};
1076 if ( $self->_eventHandler->will_handle($type) ) {
1077 my $func = sprintf( "end_%s", lc $type );
1078 $rc = $self->_eventHandler->$func( $self->{'_reporttype'},
1079 $self->{'_values'} );
1081 my $lastelem = shift @
{ $self->{'_elements'} };
1083 # Flush corresponding values from the {_values} buffer
1084 my $name = uc $type;
1085 foreach my $key (keys %{ $self->{_values
} }) {
1086 delete $self->{_values
}->{$key} if ($key =~ m/^$name-/);
1089 elsif ( $MAPPING{$nm} ) {
1090 if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
1091 my $key = ( keys %{ $MAPPING{$nm} } )[0];
1092 $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} }
1093 = $self->{'_last_data'};
1096 $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
1098 # print "lastdata is " . $self->{'_last_data'} . "\n";
1102 $self->debug("unknown nm $nm, ignoring\n");
1104 $self->{'_last_data'} = ''; # remove read data if we are at
1106 $self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
1113 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
1114 Function: Convenience method that calls start_element, characters, end_element
1116 Args : Hash ref with the keys 'Name' and 'Data'
1121 my ( $self, $data ) = @_;
1122 $self->start_element($data);
1123 $self->characters($data);
1124 $self->end_element($data);
1127 =head2 get_from_element
1129 Title : get_from_element
1130 Usage : $self->get_from_element('HSP-hit_start');
1131 Function: Convenience method to retrieve data from '_values' hash
1137 sub get_from_element
{
1138 my ($self,$key) = @_;
1139 my $values = $self->{_values
};
1146 Usage : $eventgenerator->characters($str)
1147 Function: Send a character events
1154 my ( $self, $data ) = @_;
1156 if ( $self->in_element('hsp')
1157 && $data->{'Name'} =~ /Hsp\_(?:qseq|hseq|csline|pline|midline)/o
1158 && defined $data->{'Data'} )
1160 $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
1162 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o );
1164 $self->{'_last_data'} = $data->{'Data'};
1167 =head2 within_element
1169 Title : within_element
1170 Usage : if( $eventgenerator->within_element( $element ) ) {}
1171 Function: Test if we are within a particular element
1172 This is different than 'in' because within can be tested for
1175 Args : string element name
1179 sub within_element
{
1180 my ( $self, $name ) = @_;
1183 || !defined $self->{'_elements'}
1184 || scalar @
{ $self->{'_elements'} } == 0 );
1185 foreach my $element ( @
{ $self->{'_elements'} } ) {
1186 return 1 if ( $element eq $name );
1194 Usage : if( $eventgenerator->in_element( $element ) ) {}
1195 Function: Test if we are in a particular element
1196 This is different than 'within' because 'in' only
1197 tests its immediate parent
1199 Args : string element name
1204 my ( $self, $name ) = @_;
1205 return 0 if !defined $self->{'_elements'}->[0];
1206 return ( $self->{'_elements'}->[0] eq $name );
1209 =head2 start_document
1211 Title : start_document
1212 Usage : $eventgenerator->start_document
1213 Function: Handle a start document event
1219 sub start_document
{
1221 $self->{'_lasttype'} = '';
1222 $self->{'_values'} = {};
1223 $self->{'_result'} = undef;
1224 $self->{'_elements'} = [];
1229 Title : end_document
1230 Usage : $eventgenerator->end_document
1231 Function: Handles an end document event
1232 Returns : Bio::Search::Result::ResultI object
1239 return $self->{'_result'};
1244 Title : result_count
1245 Usage : my $count = $searchio->result_count
1246 Function: Returns the number of results processed
1254 return $self->{'_result_count'};