comment out FeatureIO, let Annotated tests fail until they are fixed
[bioperl-live.git] / Bio / SearchIO / hmmer2.pm
blob771036cf07a6f0b694170cd8320c0d2853283559
2 # BioPerl module for Bio::SearchIO::hmmer2
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SearchIO::hmmer2 - A parser for HMMER output (hmmpfam, hmmsearch)
18 =head1 SYNOPSIS
20 # do not use this class directly it is available through Bio::SearchIO
21 use Bio::SearchIO;
22 my $in = Bio::SearchIO->new(-format => 'hmmer2',
23 -file => 't/data/L77119.hmmer');
24 while( my $result = $in->next_result ) {
25 # this is a Bio::Search::Result::HMMERResult object
26 print $result->query_name(), " for HMM ", $result->hmm_name(), "\n";
27 while( my $hit = $result->next_hit ) {
28 print $hit->name(), "\n";
29 while( my $hsp = $hit->next_hsp ) {
30 print "length is ", $hsp->length(), "\n";
35 =head1 DESCRIPTION
37 This object implements a parser for HMMER output.
39 =head1 FEEDBACK
41 =head2 Mailing Lists
43 User feedback is an integral part of the evolution of this and other
44 Bioperl modules. Send your comments and suggestions preferably to
45 the Bioperl mailing list. Your participation is much appreciated.
47 bioperl-l@bioperl.org - General discussion
48 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50 =head2 Support
52 Please direct usage questions or support issues to the mailing list:
54 I<bioperl-l@bioperl.org>
56 rather than to the module maintainer directly. Many experienced and
57 reponsive experts will be able look at the problem and quickly
58 address it. Please include a thorough description of the problem
59 with code and data examples if at all possible.
61 =head2 Reporting Bugs
63 Report bugs to the Bioperl bug tracking system to help us keep track
64 of the bugs and their resolution. Bug reports can be submitted via the
65 web:
67 https://redmine.open-bio.org/projects/bioperl/
69 =head1 AUTHOR - Jason Stajich
71 Email jason-at-bioperl.org
73 =head1 APPENDIX
75 The rest of the documentation details each of the object methods.
76 Internal methods are usually preceded with a _
78 =cut
80 # Let the code begin...
82 package Bio::SearchIO::hmmer2;
84 use strict;
86 use Bio::Factory::ObjectFactory;
88 use vars qw(%MAPPING %MODEMAP
91 use base qw(Bio::SearchIO::hmmer);
93 BEGIN {
95 # mapping of HMMER items to Bioperl hash keys
96 %MODEMAP = (
97 'HMMER_Output' => 'result',
98 'Hit' => 'hit',
99 'Hsp' => 'hsp'
102 %MAPPING = (
103 'Hsp_bit-score' => 'HSP-bits',
104 'Hsp_score' => 'HSP-score',
105 'Hsp_evalue' => 'HSP-evalue',
106 'Hsp_query-from' => 'HSP-query_start',
107 'Hsp_query-to' => 'HSP-query_end',
108 'Hsp_hit-from' => 'HSP-hit_start',
109 'Hsp_hit-to' => 'HSP-hit_end',
110 'Hsp_positive' => 'HSP-conserved',
111 'Hsp_identity' => 'HSP-identical',
112 'Hsp_gaps' => 'HSP-hsp_gaps',
113 'Hsp_hitgaps' => 'HSP-hit_gaps',
114 'Hsp_querygaps' => 'HSP-query_gaps',
115 'Hsp_qseq' => 'HSP-query_seq',
116 'Hsp_hseq' => 'HSP-hit_seq',
117 'Hsp_midline' => 'HSP-homology_seq',
118 'Hsp_align-len' => 'HSP-hsp_length',
119 'Hsp_query-frame' => 'HSP-query_frame',
120 'Hsp_hit-frame' => 'HSP-hit_frame',
122 'Hit_id' => 'HIT-name',
123 'Hit_len' => 'HIT-length',
124 'Hit_accession' => 'HIT-accession',
125 'Hit_desc' => 'HIT-description',
126 'Hit_signif' => 'HIT-significance',
127 'Hit_score' => 'HIT-score',
129 'HMMER_program' => 'RESULT-algorithm_name',
130 'HMMER_version' => 'RESULT-algorithm_version',
131 'HMMER_query-def' => 'RESULT-query_name',
132 'HMMER_query-len' => 'RESULT-query_length',
133 'HMMER_query-acc' => 'RESULT-query_accession',
134 'HMMER_querydesc' => 'RESULT-query_description',
135 'HMMER_hmm' => 'RESULT-hmm_name',
136 'HMMER_seqfile' => 'RESULT-sequence_file',
137 'HMMER_db' => 'RESULT-database_name',
141 =head2 next_result
143 Title : next_result
144 Usage : my $hit = $searchio->next_result;
145 Function: Returns the next Result from a search
146 Returns : Bio::Search::Result::ResultI object
147 Args : none
149 =cut
151 sub next_result {
152 my ($self) = @_;
153 my $seentop = 0;
154 my $reporttype;
155 my ( $last, @hitinfo, @hspinfo, %hspinfo, %hitinfo );
156 local $/ = "\n";
157 local $_;
159 my $verbose = $self->verbose; # cache for speed?
160 $self->start_document();
161 local ($_);
162 while ( defined( $_ = $self->_readline ) ) {
163 my $lineorig = $_;
164 chomp;
165 if (/^HMMER\s+(\S+)\s+\((.+)\)/o) {
166 my ( $prog, $version ) = split;
167 if ($seentop) {
168 $self->_pushback($_);
169 $self->end_element( { 'Name' => 'HMMER_Output' } );
170 return $self->end_document();
172 $self->{'_hmmidline'} = $_;
173 $self->start_element( { 'Name' => 'HMMER_Output' } );
174 $self->{'_result_count'}++;
175 $seentop = 1;
176 if ( defined $last ) {
177 ($reporttype) = split( /\s+/, $last );
178 $reporttype = uc($reporttype) if defined $reporttype;
179 $self->element(
181 'Name' => 'HMMER_program',
182 'Data' => $reporttype
186 $self->element(
188 'Name' => 'HMMER_version',
189 'Data' => $version
193 elsif (s/^HMM file:\s+//o) {
194 $self->{'_hmmfileline'} = $lineorig;
195 $self->element(
197 'Name' => 'HMMER_hmm',
198 'Data' => $_
202 elsif (s/^Sequence\s+(file|database):\s+//o) {
203 $self->{'_hmmseqline'} = $lineorig;
204 if ( $1 eq 'database' ) {
205 $self->element(
207 'Name' => 'HMMER_db',
208 'Data' => $_
212 $self->element(
214 'Name' => 'HMMER_seqfile',
215 'Data' => $_
219 elsif (s/^Query(\s+(sequence|HMM))?(?:\s+\d+)?:\s+//o) {
220 if ( !$seentop ) {
222 # we're in a multi-query report
223 $self->_pushback($lineorig);
224 $self->_pushback( $self->{'_hmmseqline'} );
225 $self->_pushback( $self->{'_hmmfileline'} );
226 $self->_pushback( $self->{'_hmmidline'} );
227 next;
229 s/\s+$//;
230 $self->element(
232 'Name' => 'HMMER_query-def',
233 'Data' => $_
237 elsif (s/^Accession:\s+//o) {
238 s/\s+$//;
239 $self->element(
241 'Name' => 'HMMER_query-acc',
242 'Data' => $_
246 elsif (s/^Description:\s+//o) {
247 s/\s+$//;
248 $self->element(
250 'Name' => 'HMMER_querydesc',
251 'Data' => $_
255 elsif ( defined $self->{'_reporttype'}
256 && $self->{'_reporttype'} eq 'HMMSEARCH' )
259 # PROCESS HMMSEARCH RESULTS HERE
260 if (/^Scores for complete sequences/o) {
261 while ( defined( $_ = $self->_readline ) ) {
262 last if (/^\s+$/);
263 next if ( /^Sequence\s+Description/o || /^\-\-\-/o );
264 my @line = split;
265 my ( $name, $n, $evalue, $score ) =
266 ( shift @line, pop @line, pop @line, pop @line );
267 my $desc = join( ' ', @line );
268 push @hitinfo, [ $name, $desc, $evalue, $score ];
269 $hitinfo{$name} = $#hitinfo;
272 elsif (/^Parsed for domains:/o) {
273 @hspinfo = ();
275 while ( defined( $_ = $self->_readline ) ) {
276 last if (/^\s+$/);
277 if (m!^//!) {
278 $self->_pushback($_);
279 last;
281 next if ( /^(Model|Sequence)\s+Domain/ || /^\-\-\-/ );
283 chomp;
284 if (
285 my ( $n, $domainnum, $domainct, @vals ) = (
286 m!^(\S+)\s+ # host name
287 (\d+)/(\d+)\s+ # num/num (ie 1 of 2)
288 (\d+)\s+(\d+).+? # sequence start and end
289 (\d+)\s+(\d+)\s+ # hmm start and end
290 \S+\s+ # []
291 (\S+)\s+ # score
292 (\S+) # evalue
293 \s*$!ox
298 # array lookup so that we can get rid of things
299 # when they've been processed
300 my $info = $hitinfo[ $hitinfo{$n} ];
301 if ( !defined $info ) {
302 $self->warn(
303 "Incomplete Sequence information, can't find $n hitinfo says $hitinfo{$n}"
305 next;
307 push @hspinfo, [ $n, @vals ];
311 elsif (/^Alignments of top/o) {
312 my ( $prelength, $lastdomain, $count, $width );
313 $count = 0;
314 my %domaincounter;
315 my $second_tier = 0;
316 while ( defined( $_ = $self->_readline ) ) {
317 next if ( /^Align/o
318 || /^\s+RF\s+[x\s]+$/o );
319 if ( /^Histogram/o || m!^//!o ) {
320 if ( $self->in_element('hsp') ) {
321 $self->end_element( { 'Name' => 'Hsp' } );
323 if ( $self->within_element('hit') ) {
324 $self->end_element( { 'Name' => 'Hit' } );
326 last;
328 chomp;
330 if (
331 m/^\s*(.+):\s+domain\s+(\d+)\s+of\s+(\d+)\,\s+
332 from\s+(\d+)\s+to\s+(\d+)/x
335 my ( $name, $domainct, $domaintotal, $from, $to ) =
336 ( $1, $2, $3, $4, $5 );
337 $domaincounter{$name}++;
338 if ( $self->within_element('hit') ) {
339 if ( $self->within_element('hsp') ) {
340 $self->end_element( { 'Name' => 'Hsp' } );
342 $self->end_element( { 'Name' => 'Hit' } );
345 $self->start_element( { 'Name' => 'Hit' } );
346 my $info = [
348 $hitinfo[ $hitinfo{$name} ] || $self->throw(
349 "Could not find hit info for $name: Insure that your database contains only unique sequence names"
353 if ( $info->[0] ne $name ) {
354 $self->throw(
355 "Somehow the Model table order does not match the order in the domains (got "
356 . $info->[0]
357 . ", expected $name)" );
359 $self->element(
361 'Name' => 'Hit_id',
362 'Data' => shift @{$info}
365 $self->element(
367 'Name' => 'Hit_desc',
368 'Data' => shift @{$info}
371 $self->element(
373 'Name' => 'Hit_signif',
374 'Data' => shift @{$info}
377 $self->element(
379 'Name' => 'Hit_score',
380 'Data' => shift @{$info}
384 $self->start_element( { 'Name' => 'Hsp' } );
385 $self->element(
387 'Name' => 'Hsp_identity',
388 'Data' => 0
391 $self->element(
393 'Name' => 'Hsp_positive',
394 'Data' => 0
397 my $HSPinfo = shift @hspinfo;
398 my $id = shift @$HSPinfo;
400 if ( $id ne $name ) {
401 $self->throw(
402 "Somehow the domain list details do not match the table (got $id, expected $name)"
405 if ( $domaincounter{$name} == $domaintotal ) {
406 $hitinfo[ $hitinfo{$name} ] = undef;
408 $self->element(
410 'Name' => 'Hsp_hit-from',
411 'Data' => shift @$HSPinfo
414 $self->element(
416 'Name' => 'Hsp_hit-to',
417 'Data' => shift @$HSPinfo
420 $self->element(
422 'Name' => 'Hsp_query-from',
423 'Data' => shift @$HSPinfo
426 $self->element(
428 'Name' => 'Hsp_query-to',
429 'Data' => shift @$HSPinfo
432 $self->element(
434 'Name' => 'Hsp_score',
435 'Data' => shift @$HSPinfo
438 $self->element(
440 'Name' => 'Hsp_evalue',
441 'Data' => shift @$HSPinfo
444 $lastdomain = $name;
446 else {
448 # Might want to change this so that it
449 # accumulates all the of the alignment lines into
450 # three array slots and then tests for the
451 # end of the line
452 if (/^(\s+\*\-\>)(\S+)/o) { # start of domain
453 $prelength = CORE::length($1);
454 $width = 0;
456 # deal with fact that start en stop is on same line
457 my $data = $2;
458 if ($data =~ s/\<\-?\*?\s*$//)
460 $width = CORE::length($data);
463 $self->element(
465 'Name' => 'Hsp_qseq',
466 'Data' => $data
469 $count = 0;
470 $second_tier = 0;
472 elsif (/^(\s+)(\S+)\<\-\*\s*$/o) { #end of domain
473 $self->element(
475 'Name' => 'Hsp_qseq',
476 'Data' => $2
479 $width = CORE::length($2);
480 $count = 0;
482 elsif (( $count != 1 && /^\s+$/o )
483 || CORE::length($_) == 0
484 || /^\s+\-?\*\s*$/ )
486 next;
488 elsif ( $count == 0 ) {
489 $prelength -= 3 unless ( $second_tier++ );
490 unless ( defined $prelength ) {
492 # $self->warn("prelength not set");
493 next;
495 $self->element(
497 'Name' => 'Hsp_qseq',
498 'Data' => substr( $_, $prelength )
502 elsif ( $count == 1 ) {
503 if ( !defined $prelength ) {
504 $self->warn("prelength not set");
506 if ($width) {
507 $self->element(
509 'Name' => 'Hsp_midline',
510 'Data' =>
511 substr( $_, $prelength, $width )
515 else {
516 $self->element(
518 'Name' => 'Hsp_midline',
519 'Data' => substr( $_, $prelength )
524 elsif ( $count == 2 ) {
525 if (/^\s+(\S+)\s+(\d+|\-)\s+(\S*)\s+(\d+|\-)/o) {
526 $self->element(
528 'Name' => 'Hsp_hseq',
529 'Data' => $3
533 else {
534 $self->warn("unrecognized line: $_\n");
537 $count = 0 if $count++ >= 2;
541 elsif ( /^Histogram/o || m!^//!o ) {
542 while ( my $HSPinfo = shift @hspinfo ) {
543 my $id = shift @$HSPinfo;
544 my $info = [ @{ $hitinfo[ $hitinfo{$id} ] } ];
545 next unless defined $info;
546 $self->start_element( { 'Name' => 'Hit' } );
547 $self->element(
549 'Name' => 'Hit_id',
550 'Data' => shift @{$info}
553 $self->element(
555 'Name' => 'Hit_desc',
556 'Data' => shift @{$info}
559 $self->element(
561 'Name' => 'Hit_signif',
562 'Data' => shift @{$info}
565 $self->element(
567 'Name' => 'Hit_score',
568 'Data' => shift @{$info}
571 $self->start_element( { 'Name' => 'Hsp' } );
572 $self->element(
574 'Name' => 'Hsp_query-from',
575 'Data' => shift @$HSPinfo
578 $self->element(
580 'Name' => 'Hsp_query-to',
581 'Data' => shift @$HSPinfo
584 $self->element(
586 'Name' => 'Hsp_hit-from',
587 'Data' => shift @$HSPinfo
590 $self->element(
592 'Name' => 'Hsp_hit-to',
593 'Data' => shift @$HSPinfo
596 $self->element(
598 'Name' => 'Hsp_score',
599 'Data' => shift @$HSPinfo
602 $self->element(
604 'Name' => 'Hsp_evalue',
605 'Data' => shift @$HSPinfo
608 $self->element(
610 'Name' => 'Hsp_identity',
611 'Data' => 0
614 $self->element(
616 'Name' => 'Hsp_positive',
617 'Data' => 0
620 $self->element(
622 'Name' => 'Hsp_positive',
623 'Data' => 0
626 $self->end_element( { 'Name' => 'Hsp' } );
627 $self->end_element( { 'Name' => 'Hit' } );
629 @hitinfo = ();
630 %hitinfo = ();
631 last;
634 elsif ( defined $self->{'_reporttype'}
635 && $self->{'_reporttype'} eq 'HMMPFAM' )
637 # process HMMPFAM results here
638 if (/^Scores for sequence family/o) {
639 while ( defined( $_ = $self->_readline ) ) {
640 last if (/^\s+$/);
641 next if ( /^Model\s+Description/o || /^\-\-\-/o );
642 chomp;
643 my @line = split;
644 my ( $model, $n, $evalue, $score ) =
645 ( shift @line, pop @line, pop @line, pop @line );
646 my $desc = join( ' ', @line );
647 push @hitinfo, [ $model, $desc, $score, $evalue, $n ];
648 $hitinfo{$model} = $#hitinfo;
651 elsif (/^Parsed for domains:/o) {
652 @hspinfo = ();
653 while ( defined( $_ = $self->_readline ) ) {
654 last if (/^\s+$/);
655 if (m!^//!) {
656 $self->_pushback($_);
657 last;
659 next if ( /^Model\s+Domain/o || /^\-\-\-/o );
660 chomp;
661 if (
662 my ( $n, $domainnum, $domainct, @vals ) = (
663 m!^(\S+)\s+ # domain name
664 (\d+)/(\d+)\s+ # domain num out of num
665 (\d+)\s+(\d+).+? # seq start, end
666 (\d+)\s+(\d+)\s+ # hmm start, end
667 \S+\s+ # []
668 (\S+)\s+ # score
669 (\S+) # evalue
670 \s*$!ox
674 my $hindex = $hitinfo{$n};
675 if ( !defined $hindex ) {
676 push @hitinfo,
677 [ $n, '', $vals[5], $vals[6], $domainct ];
678 $hitinfo{$n} = $#hitinfo;
679 $hindex = $#hitinfo;
681 my $info = $hitinfo[$hindex];
682 if ( !defined $info ) {
683 $self->warn(
684 "incomplete Domain information, can't find $n hitinfo says $hitinfo{$n}"
686 next;
688 push @hspinfo, [ $n, @vals ];
692 elsif (/^Alignments of top/o) {
693 my ( $prelength, $lastdomain, $count, $width );
694 $count = 0;
695 my $second_tier = 0;
696 while ( defined( $_ = $self->_readline ) ) {
697 next
698 if (
699 /^Align/o
700 || ( $count != 1
701 && /^\s+RF\s+[x\s]+$/o )
703 # fix for bug 2632
704 next if ($_ =~ m/^\s+CS\s+/o && $count == 0);
705 if ( /^Histogram/o || m!^//!o || /^Query sequence/o ) {
706 if ( $self->in_element('hsp') ) {
707 $self->end_element( { 'Name' => 'Hsp' } );
709 if ( $self->in_element('hit') ) {
710 $self->end_element( { 'Name' => 'Hit' } );
712 $self->_pushback($_);
713 last;
715 chomp;
716 if (m/(\S+):.*from\s+(\d+)\s+to\s+(\d+)/o) {
717 my ( $name, $from, $to ) = ( $1, $2, $3 );
719 if ( $self->within_element('hit') ) {
720 if ( $self->in_element('hsp') ) {
721 $self->end_element( { 'Name' => 'Hsp' } );
723 $self->end_element( { 'Name' => 'Hit' } );
725 my $info = [ @{ $hitinfo[ $hitinfo{$name} ] } ];
726 if ( !defined $info
727 || $info->[0] ne $name )
729 $self->warn(
730 "Somehow the Model table order does not match the order in the domains (got "
731 . $info->[0]
732 . ", expected $name). We're back loading this from the alignment information instead"
734 $info = [
735 $name, '',
736 /score\s+([^,\s]+),\s+E\s+=\s+(\S+)/ox
738 push @hitinfo, $info;
739 $hitinfo{$name} = $#hitinfo;
741 $self->start_element( { 'Name' => 'Hit' } );
743 $self->element(
745 'Name' => 'Hit_id',
746 'Data' => shift @{$info}
749 $self->element(
751 'Name' => 'Hit_desc',
752 'Data' => shift @{$info}
755 $self->element(
757 'Name' => 'Hit_score',
758 'Data' => shift @{$info}
761 $self->element(
763 'Name' => 'Hit_signif',
764 'Data' => shift @{$info}
768 $self->start_element( { 'Name' => 'Hsp' } );
769 $self->element(
771 'Name' => 'Hsp_identity',
772 'Data' => 0
775 $self->element(
777 'Name' => 'Hsp_positive',
778 'Data' => 0
781 my $HSPinfo = shift @hspinfo;
782 my $id = shift @$HSPinfo;
784 if ( $id ne $name ) {
785 $self->throw(
786 "Somehow the domain list details do not match the table (got $id, expected $name)"
789 $self->element(
791 'Name' => 'Hsp_query-from',
792 'Data' => shift @$HSPinfo
795 $self->element(
797 'Name' => 'Hsp_query-to',
798 'Data' => shift @$HSPinfo
801 $self->element(
803 'Name' => 'Hsp_hit-from',
804 'Data' => shift @$HSPinfo
807 $self->element(
809 'Name' => 'Hsp_hit-to',
810 'Data' => shift @$HSPinfo
813 $self->element(
815 'Name' => 'Hsp_score',
816 'Data' => shift @$HSPinfo
819 $self->element(
821 'Name' => 'Hsp_evalue',
822 'Data' => shift @$HSPinfo
825 $lastdomain = $name;
827 else {
828 if (/^(\s+\*\-\>)(\S+)/o) {
830 # start of domain
831 $prelength = CORE::length($1);
832 $width = 0;
834 # deal with fact that start en stop is on same line
835 my $data = $2;
836 if ($data =~ s/\<\-?\*?\s*$//)
838 $width = CORE::length($data);
841 $self->element(
843 'Name' => 'Hsp_hseq',
844 'Data' => $data
847 $count = 0;
848 $second_tier = 0;
851 elsif (/^(\s+)(\S+)\<\-?\*?\s*$/o) {
853 #end of domain
854 $prelength -= 3 unless ( $second_tier++ );
855 $self->element(
857 'Name' => 'Hsp_hseq',
858 'Data' => $2
861 $width = CORE::length($2);
862 $count = 0;
864 elsif (CORE::length($_) == 0
865 || ( $count != 1 && /^\s+$/o )
866 || /^\s+\-?\*\s*$/
867 || /^\s+\S+\s+\-\s+\-\s*$/ )
869 next;
871 elsif ( $count == 0 ) {
872 $prelength -= 3 unless ( $second_tier++ );
873 unless ( defined $prelength ) {
875 # $self->warn("prelength not set");
876 next;
878 $self->element(
880 'Name' => 'Hsp_hseq',
881 'Data' => substr( $_, $prelength )
885 elsif ( $count == 1 ) {
886 if ( !defined $prelength ) {
887 $self->warn("prelength not set");
889 if ($width) {
890 $self->element(
892 'Name' => 'Hsp_midline',
893 'Data' =>
894 substr( $_, $prelength, $width )
898 else {
899 $self->element(
901 'Name' => 'Hsp_midline',
902 'Data' => substr( $_, $prelength )
907 elsif ( $count == 2 ) {
908 if ( /^\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)/o
909 || /^\s+(\S+)\s+(\-)\s+(\S*)\s+(\-)/o )
911 $self->element(
913 'Name' => 'Hsp_qseq',
914 'Data' => $3
918 else {
919 $self->throw(
920 "unrecognized line ($count): $_\n");
923 $count = 0 if $count++ >= 2;
927 elsif ( /^Histogram/o || m!^//!o ) {
929 while ( my $HSPinfo = shift @hspinfo ) {
930 my $id = shift @$HSPinfo;
931 my $info = [ @{ $hitinfo[ $hitinfo{$id} ] } ];
932 next unless defined $info;
933 $self->start_element( { 'Name' => 'Hit' } );
934 $self->element(
936 'Name' => 'Hit_id',
937 'Data' => shift @{$info}
940 $self->element(
942 'Name' => 'Hit_desc',
943 'Data' => shift @{$info}
946 $self->element(
948 'Name' => 'Hit_signif',
949 'Data' => shift @{$info}
952 $self->element(
954 'Name' => 'Hit_score',
955 'Data' => shift @{$info}
958 $self->start_element( { 'Name' => 'Hsp' } );
959 $self->element(
961 'Name' => 'Hsp_query-from',
962 'Data' => shift @$HSPinfo
965 $self->element(
967 'Name' => 'Hsp_query-to',
968 'Data' => shift @$HSPinfo
971 $self->element(
973 'Name' => 'Hsp_hit-from',
974 'Data' => shift @$HSPinfo
977 $self->element(
979 'Name' => 'Hsp_hit-to',
980 'Data' => shift @$HSPinfo
983 $self->element(
985 'Name' => 'Hsp_score',
986 'Data' => shift @$HSPinfo
989 $self->element(
991 'Name' => 'Hsp_evalue',
992 'Data' => shift @$HSPinfo
995 $self->element(
997 'Name' => 'Hsp_identity',
998 'Data' => 0
1001 $self->element(
1003 'Name' => 'Hsp_positive',
1004 'Data' => 0
1007 $self->element(
1009 'Name' => 'Hsp_positive',
1010 'Data' => 0
1013 $self->end_element( { 'Name' => 'Hsp' } );
1014 $self->end_element( { 'Name' => 'Hit' } );
1016 @hitinfo = ();
1017 %hitinfo = ();
1018 last;
1020 # uncomment to see missed lines with verbose on
1021 #else {
1022 # $self->debug($_);
1025 $last = $_;
1027 $self->end_element( { 'Name' => 'HMMER_Output' } ) unless !$seentop;
1028 return $self->end_document();
1031 =head2 start_element
1033 Title : start_element
1034 Usage : $eventgenerator->start_element
1035 Function: Handles a start element event
1036 Returns : none
1037 Args : hashref with at least 2 keys 'Data' and 'Name'
1040 =cut
1042 sub start_element {
1043 my ( $self, $data ) = @_;
1045 # we currently don't care about attributes
1046 my $nm = $data->{'Name'};
1047 my $type = $MODEMAP{$nm};
1048 if ($type) {
1049 if ( $self->_eventHandler->will_handle($type) ) {
1050 my $func = sprintf( "start_%s", lc $type );
1051 $self->_eventHandler->$func( $data->{'Attributes'} );
1053 unshift @{ $self->{'_elements'} }, $type;
1055 if ( defined $type
1056 && $type eq 'result' )
1058 $self->{'_values'} = {};
1059 $self->{'_result'} = undef;
1063 =head2 end_element
1065 Title : start_element
1066 Usage : $eventgenerator->end_element
1067 Function: Handles an end element event
1068 Returns : none
1069 Args : hashref with at least 2 keys 'Data' and 'Name'
1072 =cut
1074 sub end_element {
1075 my ( $self, $data ) = @_;
1076 my $nm = $data->{'Name'};
1077 my $type = $MODEMAP{$nm};
1078 my $rc;
1080 if ( $nm eq 'HMMER_program' ) {
1081 if ( $self->{'_last_data'} =~ /(HMM\S+)/i ) {
1082 $self->{'_reporttype'} = uc $1;
1086 # Hsp are sort of weird, in that they end when another
1087 # object begins so have to detect this in end_element for now
1088 if ( $nm eq 'Hsp' ) {
1089 foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
1090 my $data = $self->{'_last_hspdata'}->{$_};
1091 if ($data && $_ eq 'Hsp_hseq') {
1092 # replace hmm '.' gap symbol by '-'
1093 $data =~ s/\./-/g;
1095 $self->element(
1097 'Name' => $_,
1098 'Data' => $data
1102 $self->{'_last_hspdata'} = {};
1104 if ($type) {
1105 if ( $self->_eventHandler->will_handle($type) ) {
1106 my $func = sprintf( "end_%s", lc $type );
1107 $rc = $self->_eventHandler->$func( $self->{'_reporttype'},
1108 $self->{'_values'} );
1110 my $lastelem = shift @{ $self->{'_elements'} };
1112 elsif ( $MAPPING{$nm} ) {
1113 if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
1114 my $key = ( keys %{ $MAPPING{$nm} } )[0];
1115 $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } =
1116 $self->{'_last_data'};
1118 else {
1119 $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
1122 else {
1123 $self->debug("unknown nm $nm, ignoring\n");
1125 $self->{'_last_data'} = ''; # remove read data if we are at
1126 # end of an element
1127 $self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
1128 return $rc;
1131 =head2 element
1133 Title : element
1134 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
1135 Function: Convience method that calls start_element, characters, end_element
1136 Returns : none
1137 Args : Hash ref with the keys 'Name' and 'Data'
1140 =cut
1142 sub element {
1143 my ( $self, $data ) = @_;
1144 $self->start_element($data);
1145 $self->characters($data);
1146 $self->end_element($data);
1149 =head2 characters
1151 Title : characters
1152 Usage : $eventgenerator->characters($str)
1153 Function: Send a character events
1154 Returns : none
1155 Args : string
1158 =cut
1160 sub characters {
1161 my ( $self, $data ) = @_;
1163 if ( $self->in_element('hsp')
1164 && $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o
1165 && defined $data->{'Data'} )
1167 $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
1169 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o );
1171 $self->{'_last_data'} = $data->{'Data'};
1174 =head2 within_element
1176 Title : within_element
1177 Usage : if( $eventgenerator->within_element($element) ) {}
1178 Function: Test if we are within a particular element
1179 This is different than 'in' because within can be tested
1180 for a whole block.
1181 Returns : boolean
1182 Args : string element name
1185 =cut
1187 sub within_element {
1188 my ( $self, $name ) = @_;
1189 return 0
1190 if ( !defined $name
1191 || !defined $self->{'_elements'}
1192 || scalar @{ $self->{'_elements'} } == 0 );
1193 foreach ( @{ $self->{'_elements'} } ) {
1194 return 1 if ( $_ eq $name );
1196 return 0;
1199 =head2 in_element
1201 Title : in_element
1202 Usage : if( $eventgenerator->in_element($element) ) {}
1203 Function: Test if we are in a particular element
1204 This is different than 'within' because 'in' only
1205 tests its immediete parent.
1206 Returns : boolean
1207 Args : string element name
1210 =cut
1212 sub in_element {
1213 my ( $self, $name ) = @_;
1214 return 0 if !defined $self->{'_elements'}->[0];
1215 return ( $self->{'_elements'}->[0] eq $name );
1218 =head2 start_document
1220 Title : start_document
1221 Usage : $eventgenerator->start_document
1222 Function: Handle a start document event
1223 Returns : none
1224 Args : none
1227 =cut
1229 sub start_document {
1230 my ($self) = @_;
1231 $self->{'_lasttype'} = '';
1232 $self->{'_values'} = {};
1233 $self->{'_result'} = undef;
1234 $self->{'_elements'} = [];
1237 =head2 end_document
1239 Title : end_document
1240 Usage : $eventgenerator->end_document
1241 Function: Handles an end document event
1242 Returns : Bio::Search::Result::ResultI object
1243 Args : none
1246 =cut
1248 sub end_document {
1249 my ($self) = @_;
1250 return $self->{'_result'};
1253 =head2 result_count
1255 Title : result_count
1256 Usage : my $count = $searchio->result_count
1257 Function: Returns the number of results we have processed
1258 Returns : integer
1259 Args : none
1262 =cut
1264 sub result_count {
1265 my $self = shift;
1266 return $self->{'_result_count'};