bug 2549; fixed small bug in Bio::Taxon which doesn't catch -common_name
[bioperl-live.git] / Bio / SearchIO / hmmer.pm
blob2edcb0c3d477c9bdf43c3e6e0461dd5771a0accc
1 # $Id$
3 # BioPerl module for Bio::SearchIO::hmmer
5 # Cared for by Jason Stajich <jason@bioperl.org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::SearchIO::hmmer - A parser for HMMER output (hmmpfam, hmmsearch)
17 =head1 SYNOPSIS
19 # do not use this class directly it is available through Bio::SearchIO
20 use Bio::SearchIO;
21 my $in = Bio::SearchIO->new(-format => 'hmmer',
22 -file => 't/data/L77119.hmmer');
23 while( my $result = $in->next_result ) {
24 # this is a Bio::Search::Result::HMMERResult object
25 print $result->query_name(), " for HMM ", $result->hmm_name(), "\n";
26 while( my $hit = $result->next_hit ) {
27 print $hit->name(), "\n";
28 while( my $hsp = $hit->next_hsp ) {
29 print "length is ", $hsp->length(), "\n";
34 =head1 DESCRIPTION
36 This object implements a parser for HMMER output.
38 =head1 FEEDBACK
40 =head2 Mailing Lists
42 User feedback is an integral part of the evolution of this and other
43 Bioperl modules. Send your comments and suggestions preferably to
44 the Bioperl mailing list. Your participation is much appreciated.
46 bioperl-l@bioperl.org - General discussion
47 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
49 =head2 Reporting Bugs
51 Report bugs to the Bioperl bug tracking system to help us keep track
52 of the bugs and their resolution. Bug reports can be submitted via the
53 web:
55 http://bugzilla.open-bio.org/
57 =head1 AUTHOR - Jason Stajich
59 Email jason-at-bioperl.org
61 =head1 APPENDIX
63 The rest of the documentation details each of the object methods.
64 Internal methods are usually preceded with a _
66 =cut
68 # Let the code begin...
70 package Bio::SearchIO::hmmer;
72 use strict;
74 use Bio::Factory::ObjectFactory;
76 use vars qw(%MAPPING %MODEMAP
79 use base qw(Bio::SearchIO);
81 BEGIN {
83 # mapping of HMMER items to Bioperl hash keys
84 %MODEMAP = (
85 'HMMER_Output' => 'result',
86 'Hit' => 'hit',
87 'Hsp' => 'hsp'
90 %MAPPING = (
91 'Hsp_bit-score' => 'HSP-bits',
92 'Hsp_score' => 'HSP-score',
93 'Hsp_evalue' => 'HSP-evalue',
94 'Hsp_query-from' => 'HSP-query_start',
95 'Hsp_query-to' => 'HSP-query_end',
96 'Hsp_hit-from' => 'HSP-hit_start',
97 'Hsp_hit-to' => 'HSP-hit_end',
98 'Hsp_positive' => 'HSP-conserved',
99 'Hsp_identity' => 'HSP-identical',
100 'Hsp_gaps' => 'HSP-hsp_gaps',
101 'Hsp_hitgaps' => 'HSP-hit_gaps',
102 'Hsp_querygaps' => 'HSP-query_gaps',
103 'Hsp_qseq' => 'HSP-query_seq',
104 'Hsp_hseq' => 'HSP-hit_seq',
105 'Hsp_midline' => 'HSP-homology_seq',
106 'Hsp_align-len' => 'HSP-hsp_length',
107 'Hsp_query-frame' => 'HSP-query_frame',
108 'Hsp_hit-frame' => 'HSP-hit_frame',
110 'Hit_id' => 'HIT-name',
111 'Hit_len' => 'HIT-length',
112 'Hit_accession' => 'HIT-accession',
113 'Hit_desc' => 'HIT-description',
114 'Hit_signif' => 'HIT-significance',
115 'Hit_score' => 'HIT-score',
117 'HMMER_program' => 'RESULT-algorithm_name',
118 'HMMER_version' => 'RESULT-algorithm_version',
119 'HMMER_query-def' => 'RESULT-query_name',
120 'HMMER_query-len' => 'RESULT-query_length',
121 'HMMER_query-acc' => 'RESULT-query_accession',
122 'HMMER_querydesc' => 'RESULT-query_description',
123 'HMMER_hmm' => 'RESULT-hmm_name',
124 'HMMER_seqfile' => 'RESULT-sequence_file',
125 'HMMER_db' => 'RESULT-database_name',
129 =head2 new
131 Title : new
132 Usage : my $obj = Bio::SearchIO::hmmer->new();
133 Function: Builds a new Bio::SearchIO::hmmer object
134 Returns : Bio::SearchIO::hmmer
135 Args : -fh/-file => HMMER filename
136 -format => 'hmmer'
138 =cut
140 sub _initialize {
141 my ( $self, @args ) = @_;
142 $self->SUPER::_initialize(@args);
143 my $handler = $self->_eventHandler;
144 $handler->register_factory(
145 'result',
146 Bio::Factory::ObjectFactory->new(
147 -type => 'Bio::Search::Result::HMMERResult',
148 -interface => 'Bio::Search::Result::ResultI'
152 $handler->register_factory(
153 'hit',
154 Bio::Factory::ObjectFactory->new(
155 -type => 'Bio::Search::Hit::HMMERHit',
156 -interface => 'Bio::Search::Hit::HitI'
160 $handler->register_factory(
161 'hsp',
162 Bio::Factory::ObjectFactory->new(
163 -type => 'Bio::Search::HSP::HMMERHSP',
164 -interface => 'Bio::Search::HSP::HSPI'
167 $self->{'_hmmidline'} = 'HMMER 2.2g (August 2001)';
170 =head2 next_result
172 Title : next_result
173 Usage : my $hit = $searchio->next_result;
174 Function: Returns the next Result from a search
175 Returns : Bio::Search::Result::ResultI object
176 Args : none
178 =cut
180 sub next_result {
181 my ($self) = @_;
182 my $seentop = 0;
183 my $reporttype;
184 my ( $last, @hitinfo, @hspinfo, %hspinfo, %hitinfo );
185 local $/ = "\n";
186 local $_;
188 my $verbose = $self->verbose; # cache for speed?
189 $self->start_document();
190 local ($_);
191 while ( defined( $_ = $self->_readline ) ) {
192 my $lineorig = $_;
193 chomp;
194 if (/^HMMER\s+(\S+)\s+\((.+)\)/o) {
195 my ( $prog, $version ) = split;
196 if ($seentop) {
197 $self->_pushback($_);
198 $self->end_element( { 'Name' => 'HMMER_Output' } );
199 return $self->end_document();
201 $self->{'_hmmidline'} = $_;
202 $self->start_element( { 'Name' => 'HMMER_Output' } );
203 $self->{'_result_count'}++;
204 $seentop = 1;
205 if ( defined $last ) {
206 ($reporttype) = split( /\s+/, $last );
207 $self->element(
209 'Name' => 'HMMER_program',
210 'Data' => uc($reporttype)
214 $self->element(
216 'Name' => 'HMMER_version',
217 'Data' => $version
221 elsif (s/^HMM file:\s+//o) {
222 $self->{'_hmmfileline'} = $lineorig;
223 $self->element(
225 'Name' => 'HMMER_hmm',
226 'Data' => $_
230 elsif (s/^Sequence\s+(file|database):\s+//o) {
231 $self->{'_hmmseqline'} = $lineorig;
232 if ( $1 eq 'database' ) {
233 $self->element(
235 'Name' => 'HMMER_db',
236 'Data' => $_
240 $self->element(
242 'Name' => 'HMMER_seqfile',
243 'Data' => $_
247 elsif (s/^Query(\s+(sequence|HMM))?(?:\s+\d+)?:\s+//o) {
248 if ( !$seentop ) {
250 # we're in a multi-query report
251 $self->_pushback( $self->{'_hmmidline'} );
252 $self->_pushback( $self->{'_hmmfileline'} );
253 $self->_pushback( $self->{'_hmmseqline'} );
254 $self->_pushback($lineorig);
255 next;
257 s/\s+$//;
258 $self->element(
260 'Name' => 'HMMER_query-def',
261 'Data' => $_
265 elsif (s/^Accession:\s+//o) {
266 s/\s+$//;
267 $self->element(
269 'Name' => 'HMMER_query-acc',
270 'Data' => $_
274 elsif (s/^Description:\s+//o) {
275 s/\s+$//;
276 $self->element(
278 'Name' => 'HMMER_querydesc',
279 'Data' => $_
283 elsif ( defined $self->{'_reporttype'}
284 && $self->{'_reporttype'} eq 'HMMSEARCH' )
287 # PROCESS HMMSEARCH RESULTS HERE
288 if (/^Scores for complete sequences/o) {
289 while ( defined( $_ = $self->_readline ) ) {
290 last if (/^\s+$/);
291 next if ( /^Sequence\s+Description/o || /^\-\-\-/o );
292 my @line = split;
293 my ( $name, $n, $evalue, $score ) =
294 ( shift @line, pop @line, pop @line, pop @line );
295 my $desc = join( ' ', @line );
296 push @hitinfo, [ $name, $desc, $evalue, $score ];
297 $hitinfo{$name} = $#hitinfo;
300 elsif (/^Parsed for domains:/o) {
301 @hspinfo = ();
303 while ( defined( $_ = $self->_readline ) ) {
304 last if (/^\s+$/);
305 if (m!^//!) {
306 $self->_pushback($_);
307 last;
309 next if ( /^(Model|Sequence)\s+Domain/ || /^\-\-\-/ );
311 chomp;
312 if (
313 my ( $n, $domainnum, $domainct, @vals ) = (
314 m!^(\S+)\s+ # host name
315 (\d+)/(\d+)\s+ # num/num (ie 1 of 2)
316 (\d+)\s+(\d+).+? # sequence start and end
317 (\d+)\s+(\d+)\s+ # hmm start and end
318 \S+\s+ # []
319 (\S+)\s+ # score
320 (\S+) # evalue
321 \s*$!ox
326 # array lookup so that we can get rid of things
327 # when they've been processed
328 my $info = $hitinfo[ $hitinfo{$n} ];
329 if ( !defined $info ) {
330 $self->warn(
331 "Incomplete Sequence information, can't find $n hitinfo says $hitinfo{$n}"
333 next;
335 push @hspinfo, [ $n, @vals ];
339 elsif (/^Alignments of top/o) {
340 my ( $prelength, $lastdomain, $count, $width );
341 $count = 0;
342 my %domaincounter;
343 my $second_tier = 0;
344 while ( defined( $_ = $self->_readline ) ) {
345 next if ( /^Align/o
346 || /^\s+RF\s+[x\s]+$/o );
347 if ( /^Histogram/o || m!^//!o ) {
348 if ( $self->in_element('hsp') ) {
349 $self->end_element( { 'Name' => 'Hsp' } );
351 if ( $self->within_element('hit') ) {
352 $self->end_element( { 'Name' => 'Hit' } );
354 last;
356 chomp;
358 if (
359 m/^\s*(.+):\s+domain\s+(\d+)\s+of\s+(\d+)\,\s+
360 from\s+(\d+)\s+to\s+(\d+)/x
363 my ( $name, $domainct, $domaintotal, $from, $to ) =
364 ( $1, $2, $3, $4, $5 );
365 $domaincounter{$name}++;
366 if ( $self->within_element('hit') ) {
367 if ( $self->within_element('hsp') ) {
368 $self->end_element( { 'Name' => 'Hsp' } );
370 $self->end_element( { 'Name' => 'Hit' } );
373 $self->start_element( { 'Name' => 'Hit' } );
374 my $info = [
376 $hitinfo[ $hitinfo{$name} ] || $self->throw(
377 "Could not find hit info for $name: Insure that your database contains only unique sequence names"
381 if ( $info->[0] ne $name ) {
382 $self->throw(
383 "Somehow the Model table order does not match the order in the domains (got "
384 . $info->[0]
385 . ", expected $name)" );
387 $self->element(
389 'Name' => 'Hit_id',
390 'Data' => shift @{$info}
393 $self->element(
395 'Name' => 'Hit_desc',
396 'Data' => shift @{$info}
399 $self->element(
401 'Name' => 'Hit_signif',
402 'Data' => shift @{$info}
405 $self->element(
407 'Name' => 'Hit_score',
408 'Data' => shift @{$info}
412 $self->start_element( { 'Name' => 'Hsp' } );
413 $self->element(
415 'Name' => 'Hsp_identity',
416 'Data' => 0
419 $self->element(
421 'Name' => 'Hsp_positive',
422 'Data' => 0
425 my $HSPinfo = shift @hspinfo;
426 my $id = shift @$HSPinfo;
428 if ( $id ne $name ) {
429 $self->throw(
430 "Somehow the domain list details do not match the table (got $id, expected $name)"
433 if ( $domaincounter{$name} == $domaintotal ) {
434 $hitinfo[ $hitinfo{$name} ] = undef;
436 $self->element(
438 'Name' => 'Hsp_hit-from',
439 'Data' => shift @$HSPinfo
442 $self->element(
444 'Name' => 'Hsp_hit-to',
445 'Data' => shift @$HSPinfo
448 $self->element(
450 'Name' => 'Hsp_query-from',
451 'Data' => shift @$HSPinfo
454 $self->element(
456 'Name' => 'Hsp_query-to',
457 'Data' => shift @$HSPinfo
460 $self->element(
462 'Name' => 'Hsp_score',
463 'Data' => shift @$HSPinfo
466 $self->element(
468 'Name' => 'Hsp_evalue',
469 'Data' => shift @$HSPinfo
472 $lastdomain = $name;
474 else {
476 # Might want to change this so that it
477 # accumulates all the of the alignment lines into
478 # three array slots and then tests for the
479 # end of the line
480 if (/^(\s+\*\-\>)(\S+)/o) { # start of domain
481 $prelength = CORE::length($1);
482 $width = 0;
484 # deal with fact that start en stop is on same line
485 my $data = $2;
486 if ($data =~ s/\<\-?\*?\s*$//)
488 $width = CORE::length($data);
491 $self->element(
493 'Name' => 'Hsp_qseq',
494 'Data' => $data
497 $count = 0;
498 $second_tier = 0;
500 elsif (/^(\s+)(\S+)\<\-\*\s*$/o) { #end of domain
501 $self->element(
503 'Name' => 'Hsp_qseq',
504 'Data' => $2
507 $width = CORE::length($2);
508 $count = 0;
510 elsif (( $count != 1 && /^\s+$/o )
511 || CORE::length($_) == 0
512 || /^\s+\-?\*\s*$/ )
514 next;
516 elsif ( $count == 0 ) {
517 $prelength -= 3 unless ( $second_tier++ );
518 unless ( defined $prelength ) {
520 # $self->warn("prelength not set");
521 next;
523 $self->element(
525 'Name' => 'Hsp_qseq',
526 'Data' => substr( $_, $prelength )
530 elsif ( $count == 1 ) {
531 if ( !defined $prelength ) {
532 $self->warn("prelength not set");
534 if ($width) {
535 $self->element(
537 'Name' => 'Hsp_midline',
538 'Data' =>
539 substr( $_, $prelength, $width )
543 else {
544 $self->debug("midline is $_\n")
545 if ( $verbose > 0
546 && CORE::length($_) <= $prelength );
547 $self->element(
549 'Name' => 'Hsp_midline',
550 'Data' => substr( $_, $prelength )
555 elsif ( $count == 2 ) {
556 if (/^\s+(\S+)\s+(\d+|\-)\s+(\S*)\s+(\d+|\-)/o) {
557 $self->element(
559 'Name' => 'Hsp_hseq',
560 'Data' => $3
564 else {
565 $self->warn("unrecognized line: $_\n");
568 $count = 0 if $count++ >= 2;
572 elsif ( /^Histogram/o || m!^//!o ) {
573 while ( my $HSPinfo = shift @hspinfo ) {
574 my $id = shift @$HSPinfo;
575 my $info = [ @{ $hitinfo[ $hitinfo{$id} ] } ];
576 next unless defined $info;
577 $self->start_element( { 'Name' => 'Hit' } );
578 $self->element(
580 'Name' => 'Hit_id',
581 'Data' => shift @{$info}
584 $self->element(
586 'Name' => 'Hit_desc',
587 'Data' => shift @{$info}
590 $self->element(
592 'Name' => 'Hit_signif',
593 'Data' => shift @{$info}
596 $self->element(
598 'Name' => 'Hit_score',
599 'Data' => shift @{$info}
602 $self->start_element( { 'Name' => 'Hsp' } );
603 $self->element(
605 'Name' => 'Hsp_query-from',
606 'Data' => shift @$HSPinfo
609 $self->element(
611 'Name' => 'Hsp_query-to',
612 'Data' => shift @$HSPinfo
615 $self->element(
617 'Name' => 'Hsp_hit-from',
618 'Data' => shift @$HSPinfo
621 $self->element(
623 'Name' => 'Hsp_hit-to',
624 'Data' => shift @$HSPinfo
627 $self->element(
629 'Name' => 'Hsp_score',
630 'Data' => shift @$HSPinfo
633 $self->element(
635 'Name' => 'Hsp_evalue',
636 'Data' => shift @$HSPinfo
639 $self->element(
641 'Name' => 'Hsp_identity',
642 'Data' => 0
645 $self->element(
647 'Name' => 'Hsp_positive',
648 'Data' => 0
651 $self->element(
653 'Name' => 'Hsp_positive',
654 'Data' => 0
657 $self->end_element( { 'Name' => 'Hsp' } );
658 $self->end_element( { 'Name' => 'Hit' } );
660 @hitinfo = ();
661 %hitinfo = ();
662 last;
665 elsif ( defined $self->{'_reporttype'}
666 && $self->{'_reporttype'} eq 'HMMPFAM' )
668 # process HMMPFAM results here
669 if (/^Scores for sequence family/o) {
670 while ( defined( $_ = $self->_readline ) ) {
671 last if (/^\s+$/);
672 next if ( /^Model\s+Description/o || /^\-\-\-/o );
673 chomp;
674 my @line = split;
675 my ( $model, $n, $evalue, $score ) =
676 ( shift @line, pop @line, pop @line, pop @line );
677 my $desc = join( ' ', @line );
678 push @hitinfo, [ $model, $desc, $score, $evalue, $n ];
679 $hitinfo{$model} = $#hitinfo;
682 elsif (/^Parsed for domains:/o) {
683 @hspinfo = ();
684 while ( defined( $_ = $self->_readline ) ) {
685 last if (/^\s+$/);
686 if (m!^//!) {
687 $self->_pushback($_);
688 last;
690 next if ( /^Model\s+Domain/o || /^\-\-\-/o );
691 chomp;
692 if (
693 my ( $n, $domainnum, $domainct, @vals ) = (
694 m!^(\S+)\s+ # domain name
695 (\d+)/(\d+)\s+ # domain num out of num
696 (\d+)\s+(\d+).+? # seq start, end
697 (\d+)\s+(\d+)\s+ # hmm start, end
698 \S+\s+ # []
699 (\S+)\s+ # score
700 (\S+) # evalue
701 \s*$!ox
705 my $hindex = $hitinfo{$n};
706 if ( !defined $hindex ) {
707 push @hitinfo,
708 [ $n, '', $vals[5], $vals[6], $domainct ];
709 $hitinfo{$n} = $#hitinfo;
710 $hindex = $#hitinfo;
712 my $info = $hitinfo[$hindex];
713 if ( !defined $info ) {
714 $self->warn(
715 "incomplete Domain information, can't find $n hitinfo says $hitinfo{$n}"
717 next;
719 push @hspinfo, [ $n, @vals ];
723 elsif (/^Alignments of top/o) {
724 my ( $prelength, $lastdomain, $count, $width );
725 $count = 0;
726 my $second_tier = 0;
727 while ( defined( $_ = $self->_readline ) ) {
728 next
729 if (
730 /^Align/o
731 || ( $count != 1
732 && /^\s+RF\s+[x\s]+$/o )
734 $self->debug("$count $_");
735 if ( /^Histogram/o || m!^//!o || /^Query sequence/o ) {
736 if ( $self->in_element('hsp') ) {
737 $self->end_element( { 'Name' => 'Hsp' } );
739 if ( $self->in_element('hit') ) {
740 $self->end_element( { 'Name' => 'Hit' } );
742 $self->_pushback($_);
743 last;
745 chomp;
746 if (m/(\S+):.*from\s+(\d+)\s+to\s+(\d+)/o) {
747 my ( $name, $from, $to ) = ( $1, $2, $3 );
749 if ( $self->within_element('hit') ) {
750 if ( $self->in_element('hsp') ) {
751 $self->end_element( { 'Name' => 'Hsp' } );
753 $self->end_element( { 'Name' => 'Hit' } );
755 my $info = [ @{ $hitinfo[ $hitinfo{$name} ] } ];
756 if ( !defined $info
757 || $info->[0] ne $name )
759 $self->warn(
760 "Somehow the Model table order does not match the order in the domains (got "
761 . $info->[0]
762 . ", expected $name). We're back loading this from the alignment information instead"
764 $info = [
765 $name, '',
766 /score\s+([^,\s]+),\s+E\s+=\s+(\S+)/ox
768 push @hitinfo, $info;
769 $hitinfo{$name} = $#hitinfo;
771 $self->start_element( { 'Name' => 'Hit' } );
773 $self->element(
775 'Name' => 'Hit_id',
776 'Data' => shift @{$info}
779 $self->element(
781 'Name' => 'Hit_desc',
782 'Data' => shift @{$info}
785 $self->element(
787 'Name' => 'Hit_score',
788 'Data' => shift @{$info}
791 $self->element(
793 'Name' => 'Hit_signif',
794 'Data' => shift @{$info}
798 $self->start_element( { 'Name' => 'Hsp' } );
799 $self->element(
801 'Name' => 'Hsp_identity',
802 'Data' => 0
805 $self->element(
807 'Name' => 'Hsp_positive',
808 'Data' => 0
811 my $HSPinfo = shift @hspinfo;
812 my $id = shift @$HSPinfo;
814 if ( $id ne $name ) {
815 $self->throw(
816 "Somehow the domain list details do not match the table (got $id, expected $name)"
819 $self->element(
821 'Name' => 'Hsp_query-from',
822 'Data' => shift @$HSPinfo
825 $self->element(
827 'Name' => 'Hsp_query-to',
828 'Data' => shift @$HSPinfo
831 $self->element(
833 'Name' => 'Hsp_hit-from',
834 'Data' => shift @$HSPinfo
837 $self->element(
839 'Name' => 'Hsp_hit-to',
840 'Data' => shift @$HSPinfo
843 $self->element(
845 'Name' => 'Hsp_score',
846 'Data' => shift @$HSPinfo
849 $self->element(
851 'Name' => 'Hsp_evalue',
852 'Data' => shift @$HSPinfo
855 $lastdomain = $name;
857 else {
858 if (/^(\s+\*\-\>)(\S+)/o) {
860 # start of domain
861 $prelength = CORE::length($1);
862 $width = 0;
864 # deal with fact that start en stop is on same line
865 my $data = $2;
866 if ($data =~ s/\<\-?\*?\s*$//)
868 $width = CORE::length($data);
871 $self->element(
873 'Name' => 'Hsp_hseq',
874 'Data' => $data
877 $count = 0;
878 $second_tier = 0;
881 elsif (/^(\s+)(\S+)\<\-?\*?\s*$/o) {
883 #end of domain
884 $prelength -= 3 unless ( $second_tier++ );
885 $self->element(
887 'Name' => 'Hsp_hseq',
888 'Data' => $2
891 $width = CORE::length($2);
892 $count = 0;
894 elsif (CORE::length($_) == 0
895 || ( $count != 1 && /^\s+$/o )
896 || /^\s+\-?\*\s*$/ )
898 next;
900 elsif ( $count == 0 ) {
901 $prelength -= 3 unless ( $second_tier++ );
902 unless ( defined $prelength ) {
904 # $self->warn("prelength not set");
905 next;
907 $self->element(
909 'Name' => 'Hsp_hseq',
910 'Data' => substr( $_, $prelength )
914 elsif ( $count == 1 ) {
915 if ( !defined $prelength ) {
916 $self->warn("prelength not set");
918 if ($width) {
919 $self->element(
921 'Name' => 'Hsp_midline',
922 'Data' =>
923 substr( $_, $prelength, $width )
927 else {
928 $self->element(
930 'Name' => 'Hsp_midline',
931 'Data' => substr( $_, $prelength )
936 elsif ( $count == 2 ) {
937 if ( /^\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)/o
938 || /^\s+(\S+)\s+(\-)\s+(\S*)\s+(\-)/o )
940 $self->element(
942 'Name' => 'Hsp_qseq',
943 'Data' => $3
947 else {
948 $self->throw(
949 "unrecognized line ($count): $_\n");
952 $count = 0 if $count++ >= 2;
956 elsif ( /^Histogram/o || m!^//!o ) {
958 while ( my $HSPinfo = shift @hspinfo ) {
959 my $id = shift @$HSPinfo;
960 my $info = [ @{ $hitinfo[ $hitinfo{$id} ] } ];
961 next unless defined $info;
962 $self->start_element( { 'Name' => 'Hit' } );
963 $self->element(
965 'Name' => 'Hit_id',
966 'Data' => shift @{$info}
969 $self->element(
971 'Name' => 'Hit_desc',
972 'Data' => shift @{$info}
975 $self->element(
977 'Name' => 'Hit_signif',
978 'Data' => shift @{$info}
981 $self->element(
983 'Name' => 'Hit_score',
984 'Data' => shift @{$info}
987 $self->start_element( { 'Name' => 'Hsp' } );
988 $self->element(
990 'Name' => 'Hsp_query-from',
991 'Data' => shift @$HSPinfo
994 $self->element(
996 'Name' => 'Hsp_query-to',
997 'Data' => shift @$HSPinfo
1000 $self->element(
1002 'Name' => 'Hsp_hit-from',
1003 'Data' => shift @$HSPinfo
1006 $self->element(
1008 'Name' => 'Hsp_hit-to',
1009 'Data' => shift @$HSPinfo
1012 $self->element(
1014 'Name' => 'Hsp_score',
1015 'Data' => shift @$HSPinfo
1018 $self->element(
1020 'Name' => 'Hsp_evalue',
1021 'Data' => shift @$HSPinfo
1024 $self->element(
1026 'Name' => 'Hsp_identity',
1027 'Data' => 0
1030 $self->element(
1032 'Name' => 'Hsp_positive',
1033 'Data' => 0
1036 $self->element(
1038 'Name' => 'Hsp_positive',
1039 'Data' => 0
1042 $self->end_element( { 'Name' => 'Hsp' } );
1043 $self->end_element( { 'Name' => 'Hit' } );
1045 @hitinfo = ();
1046 %hitinfo = ();
1047 last;
1049 else {
1050 $self->debug($_);
1053 $last = $_;
1055 $self->end_element( { 'Name' => 'HMMER_Output' } ) unless !$seentop;
1056 return $self->end_document();
1059 =head2 start_element
1061 Title : start_element
1062 Usage : $eventgenerator->start_element
1063 Function: Handles a start element event
1064 Returns : none
1065 Args : hashref with at least 2 keys 'Data' and 'Name'
1068 =cut
1070 sub start_element {
1071 my ( $self, $data ) = @_;
1073 # we currently don't care about attributes
1074 my $nm = $data->{'Name'};
1075 my $type = $MODEMAP{$nm};
1076 if ($type) {
1077 if ( $self->_eventHandler->will_handle($type) ) {
1078 my $func = sprintf( "start_%s", lc $type );
1079 $self->_eventHandler->$func( $data->{'Attributes'} );
1081 unshift @{ $self->{'_elements'} }, $type;
1083 if ( defined $type
1084 && $type eq 'result' )
1086 $self->{'_values'} = {};
1087 $self->{'_result'} = undef;
1091 =head2 end_element
1093 Title : start_element
1094 Usage : $eventgenerator->end_element
1095 Function: Handles an end element event
1096 Returns : none
1097 Args : hashref with at least 2 keys 'Data' and 'Name'
1100 =cut
1102 sub end_element {
1103 my ( $self, $data ) = @_;
1104 my $nm = $data->{'Name'};
1105 my $type = $MODEMAP{$nm};
1106 my $rc;
1108 if ( $nm eq 'HMMER_program' ) {
1109 if ( $self->{'_last_data'} =~ /(HMM\S+)/i ) {
1110 $self->{'_reporttype'} = uc $1;
1114 # Hsp are sort of weird, in that they end when another
1115 # object begins so have to detect this in end_element for now
1116 if ( $nm eq 'Hsp' ) {
1117 foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
1118 my $data = $self->{'_last_hspdata'}->{$_};
1119 if ($data && $_ eq 'Hsp_hseq') {
1120 # replace hmm '.' gap symbol by '-'
1121 $data =~ s/\./-/g;
1123 $self->element(
1125 'Name' => $_,
1126 'Data' => $data
1130 $self->{'_last_hspdata'} = {};
1132 if ($type) {
1133 if ( $self->_eventHandler->will_handle($type) ) {
1134 my $func = sprintf( "end_%s", lc $type );
1135 $rc = $self->_eventHandler->$func( $self->{'_reporttype'},
1136 $self->{'_values'} );
1138 my $lastelem = shift @{ $self->{'_elements'} };
1140 elsif ( $MAPPING{$nm} ) {
1141 if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
1142 my $key = ( keys %{ $MAPPING{$nm} } )[0];
1143 $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } =
1144 $self->{'_last_data'};
1146 else {
1147 $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
1150 else {
1151 $self->debug("unknown nm $nm, ignoring\n");
1153 $self->{'_last_data'} = ''; # remove read data if we are at
1154 # end of an element
1155 $self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
1156 return $rc;
1159 =head2 element
1161 Title : element
1162 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
1163 Function: Convience method that calls start_element, characters, end_element
1164 Returns : none
1165 Args : Hash ref with the keys 'Name' and 'Data'
1168 =cut
1170 sub element {
1171 my ( $self, $data ) = @_;
1172 $self->start_element($data);
1173 $self->characters($data);
1174 $self->end_element($data);
1177 =head2 characters
1179 Title : characters
1180 Usage : $eventgenerator->characters($str)
1181 Function: Send a character events
1182 Returns : none
1183 Args : string
1186 =cut
1188 sub characters {
1189 my ( $self, $data ) = @_;
1191 if ( $self->in_element('hsp')
1192 && $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o
1193 && defined $data->{'Data'} )
1195 $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
1197 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o );
1199 $self->{'_last_data'} = $data->{'Data'};
1202 =head2 within_element
1204 Title : within_element
1205 Usage : if( $eventgenerator->within_element($element) ) {}
1206 Function: Test if we are within a particular element
1207 This is different than 'in' because within can be tested
1208 for a whole block.
1209 Returns : boolean
1210 Args : string element name
1213 =cut
1215 sub within_element {
1216 my ( $self, $name ) = @_;
1217 return 0
1218 if ( !defined $name
1219 || !defined $self->{'_elements'}
1220 || scalar @{ $self->{'_elements'} } == 0 );
1221 foreach ( @{ $self->{'_elements'} } ) {
1222 return 1 if ( $_ eq $name );
1224 return 0;
1227 =head2 in_element
1229 Title : in_element
1230 Usage : if( $eventgenerator->in_element($element) ) {}
1231 Function: Test if we are in a particular element
1232 This is different than 'within' because 'in' only
1233 tests its immediete parent.
1234 Returns : boolean
1235 Args : string element name
1238 =cut
1240 sub in_element {
1241 my ( $self, $name ) = @_;
1242 return 0 if !defined $self->{'_elements'}->[0];
1243 return ( $self->{'_elements'}->[0] eq $name );
1246 =head2 start_document
1248 Title : start_document
1249 Usage : $eventgenerator->start_document
1250 Function: Handle a start document event
1251 Returns : none
1252 Args : none
1255 =cut
1257 sub start_document {
1258 my ($self) = @_;
1259 $self->{'_lasttype'} = '';
1260 $self->{'_values'} = {};
1261 $self->{'_result'} = undef;
1262 $self->{'_elements'} = [];
1265 =head2 end_document
1267 Title : end_document
1268 Usage : $eventgenerator->end_document
1269 Function: Handles an end document event
1270 Returns : Bio::Search::Result::ResultI object
1271 Args : none
1274 =cut
1276 sub end_document {
1277 my ($self) = @_;
1278 return $self->{'_result'};
1281 =head2 result_count
1283 Title : result_count
1284 Usage : my $count = $searchio->result_count
1285 Function: Returns the number of results we have processed
1286 Returns : integer
1287 Args : none
1290 =cut
1292 sub result_count {
1293 my $self = shift;
1294 return $self->{'_result_count'};