add skips and a warning re: NeXML v0.9 support
[bioperl-live.git] / Bio / SearchIO / hmmer2.pm
blobaf1a6a064a7db61eaf9f928863e41585771305a5
2 # BioPerl module for Bio::SearchIO::hmmer2
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SearchIO::hmmer2 - A parser for HMMER output (hmmpfam, hmmsearch)
18 =head1 SYNOPSIS
20 # do not use this class directly it is available through Bio::SearchIO
21 use Bio::SearchIO;
22 my $in = Bio::SearchIO->new(-format => 'hmmer2',
23 -file => 't/data/L77119.hmmer');
24 while( my $result = $in->next_result ) {
25 # this is a Bio::Search::Result::HMMERResult object
26 print $result->query_name(), " for HMM ", $result->hmm_name(), "\n";
27 while( my $hit = $result->next_hit ) {
28 print $hit->name(), "\n";
29 while( my $hsp = $hit->next_hsp ) {
30 print "length is ", $hsp->length(), "\n";
35 =head1 DESCRIPTION
37 This object implements a parser for HMMER output.
39 =head1 FEEDBACK
41 =head2 Mailing Lists
43 User feedback is an integral part of the evolution of this and other
44 Bioperl modules. Send your comments and suggestions preferably to
45 the Bioperl mailing list. Your participation is much appreciated.
47 bioperl-l@bioperl.org - General discussion
48 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50 =head2 Support
52 Please direct usage questions or support issues to the mailing list:
54 I<bioperl-l@bioperl.org>
56 rather than to the module maintainer directly. Many experienced and
57 reponsive experts will be able look at the problem and quickly
58 address it. Please include a thorough description of the problem
59 with code and data examples if at all possible.
61 =head2 Reporting Bugs
63 Report bugs to the Bioperl bug tracking system to help us keep track
64 of the bugs and their resolution. Bug reports can be submitted via the
65 web:
67 https://redmine.open-bio.org/projects/bioperl/
69 =head1 AUTHOR - Jason Stajich
71 Email jason-at-bioperl.org
73 =head1 APPENDIX
75 The rest of the documentation details each of the object methods.
76 Internal methods are usually preceded with a _
78 =cut
80 # Let the code begin...
82 package Bio::SearchIO::hmmer2;
84 use strict;
86 use Bio::Factory::ObjectFactory;
88 use vars qw(%MAPPING %MODEMAP
91 use base qw(Bio::SearchIO::hmmer);
93 BEGIN {
95 # mapping of HMMER items to Bioperl hash keys
96 %MODEMAP = (
97 'HMMER_Output' => 'result',
98 'Hit' => 'hit',
99 'Hsp' => 'hsp'
102 %MAPPING = (
103 'Hsp_bit-score' => 'HSP-bits',
104 'Hsp_score' => 'HSP-score',
105 'Hsp_evalue' => 'HSP-evalue',
106 'Hsp_query-from' => 'HSP-query_start',
107 'Hsp_query-to' => 'HSP-query_end',
108 'Hsp_hit-from' => 'HSP-hit_start',
109 'Hsp_hit-to' => 'HSP-hit_end',
110 'Hsp_positive' => 'HSP-conserved',
111 'Hsp_identity' => 'HSP-identical',
112 'Hsp_gaps' => 'HSP-hsp_gaps',
113 'Hsp_hitgaps' => 'HSP-hit_gaps',
114 'Hsp_querygaps' => 'HSP-query_gaps',
115 'Hsp_qseq' => 'HSP-query_seq',
116 'Hsp_hseq' => 'HSP-hit_seq',
117 'Hsp_midline' => 'HSP-homology_seq',
118 'Hsp_align-len' => 'HSP-hsp_length',
119 'Hsp_query-frame' => 'HSP-query_frame',
120 'Hsp_hit-frame' => 'HSP-hit_frame',
122 'Hit_id' => 'HIT-name',
123 'Hit_len' => 'HIT-length',
124 'Hit_accession' => 'HIT-accession',
125 'Hit_desc' => 'HIT-description',
126 'Hit_signif' => 'HIT-significance',
127 'Hit_score' => 'HIT-score',
129 'HMMER_program' => 'RESULT-algorithm_name',
130 'HMMER_version' => 'RESULT-algorithm_version',
131 'HMMER_query-def' => 'RESULT-query_name',
132 'HMMER_query-len' => 'RESULT-query_length',
133 'HMMER_query-acc' => 'RESULT-query_accession',
134 'HMMER_querydesc' => 'RESULT-query_description',
135 'HMMER_hmm' => 'RESULT-hmm_name',
136 'HMMER_seqfile' => 'RESULT-sequence_file',
137 'HMMER_db' => 'RESULT-database_name',
141 =head2 new
143 Title : new
144 Usage : my $obj = Bio::SearchIO::hmmer2->new();
145 Function: Builds a new Bio::SearchIO::hmmer2 object
146 Returns : Bio::SearchIO::hmmer2
147 Args : -fh/-file => HMMER filename
148 -format => 'hmmer2'
150 =cut
152 sub _initialize {
153 my ( $self, @args ) = @_;
154 $self->SUPER::_initialize(@args);
155 $self->{'_hmmidline'} = 'HMMER 2.2g (August 2001)';
158 =head2 next_result
160 Title : next_result
161 Usage : my $hit = $searchio->next_result;
162 Function: Returns the next Result from a search
163 Returns : Bio::Search::Result::ResultI object
164 Args : none
166 =cut
168 sub next_result {
169 my ($self) = @_;
170 my $seentop = 0;
171 my $reporttype;
172 my ( $last, @hitinfo, @hspinfo, %hspinfo, %hitinfo );
173 local $/ = "\n";
174 local $_;
176 my $verbose = $self->verbose; # cache for speed?
177 $self->start_document();
178 local ($_);
179 while ( defined( $_ = $self->_readline ) ) {
180 my $lineorig = $_;
181 chomp;
182 if (/^HMMER\s+(\S+)\s+\((.+)\)/o) {
183 my ( $prog, $version ) = split;
184 if ($seentop) {
185 $self->_pushback($_);
186 $self->end_element( { 'Name' => 'HMMER_Output' } );
187 return $self->end_document();
189 $self->{'_hmmidline'} = $_;
190 $self->start_element( { 'Name' => 'HMMER_Output' } );
191 $self->{'_result_count'}++;
192 $seentop = 1;
193 if ( defined $last ) {
194 ($reporttype) = split( /\s+/, $last );
195 $self->element(
197 'Name' => 'HMMER_program',
198 'Data' => uc($reporttype)
202 $self->element(
204 'Name' => 'HMMER_version',
205 'Data' => $version
209 elsif (s/^HMM file:\s+//o) {
210 $self->{'_hmmfileline'} = $lineorig;
211 $self->element(
213 'Name' => 'HMMER_hmm',
214 'Data' => $_
218 elsif (s/^Sequence\s+(file|database):\s+//o) {
219 $self->{'_hmmseqline'} = $lineorig;
220 if ( $1 eq 'database' ) {
221 $self->element(
223 'Name' => 'HMMER_db',
224 'Data' => $_
228 $self->element(
230 'Name' => 'HMMER_seqfile',
231 'Data' => $_
235 elsif (s/^Query(\s+(sequence|HMM))?(?:\s+\d+)?:\s+//o) {
236 if ( !$seentop ) {
238 # we're in a multi-query report
239 $self->_pushback( $self->{'_hmmidline'} );
240 $self->_pushback( $self->{'_hmmfileline'} );
241 $self->_pushback( $self->{'_hmmseqline'} );
242 $self->_pushback($lineorig);
243 next;
245 s/\s+$//;
246 $self->element(
248 'Name' => 'HMMER_query-def',
249 'Data' => $_
253 elsif (s/^Accession:\s+//o) {
254 s/\s+$//;
255 $self->element(
257 'Name' => 'HMMER_query-acc',
258 'Data' => $_
262 elsif (s/^Description:\s+//o) {
263 s/\s+$//;
264 $self->element(
266 'Name' => 'HMMER_querydesc',
267 'Data' => $_
271 elsif ( defined $self->{'_reporttype'}
272 && $self->{'_reporttype'} eq 'HMMSEARCH' )
275 # PROCESS HMMSEARCH RESULTS HERE
276 if (/^Scores for complete sequences/o) {
277 while ( defined( $_ = $self->_readline ) ) {
278 last if (/^\s+$/);
279 next if ( /^Sequence\s+Description/o || /^\-\-\-/o );
280 my @line = split;
281 my ( $name, $n, $evalue, $score ) =
282 ( shift @line, pop @line, pop @line, pop @line );
283 my $desc = join( ' ', @line );
284 push @hitinfo, [ $name, $desc, $evalue, $score ];
285 $hitinfo{$name} = $#hitinfo;
288 elsif (/^Parsed for domains:/o) {
289 @hspinfo = ();
291 while ( defined( $_ = $self->_readline ) ) {
292 last if (/^\s+$/);
293 if (m!^//!) {
294 $self->_pushback($_);
295 last;
297 next if ( /^(Model|Sequence)\s+Domain/ || /^\-\-\-/ );
299 chomp;
300 if (
301 my ( $n, $domainnum, $domainct, @vals ) = (
302 m!^(\S+)\s+ # host name
303 (\d+)/(\d+)\s+ # num/num (ie 1 of 2)
304 (\d+)\s+(\d+).+? # sequence start and end
305 (\d+)\s+(\d+)\s+ # hmm start and end
306 \S+\s+ # []
307 (\S+)\s+ # score
308 (\S+) # evalue
309 \s*$!ox
314 # array lookup so that we can get rid of things
315 # when they've been processed
316 my $info = $hitinfo[ $hitinfo{$n} ];
317 if ( !defined $info ) {
318 $self->warn(
319 "Incomplete Sequence information, can't find $n hitinfo says $hitinfo{$n}"
321 next;
323 push @hspinfo, [ $n, @vals ];
327 elsif (/^Alignments of top/o) {
328 my ( $prelength, $lastdomain, $count, $width );
329 $count = 0;
330 my %domaincounter;
331 my $second_tier = 0;
332 while ( defined( $_ = $self->_readline ) ) {
333 next if ( /^Align/o
334 || /^\s+RF\s+[x\s]+$/o );
335 if ( /^Histogram/o || m!^//!o ) {
336 if ( $self->in_element('hsp') ) {
337 $self->end_element( { 'Name' => 'Hsp' } );
339 if ( $self->within_element('hit') ) {
340 $self->end_element( { 'Name' => 'Hit' } );
342 last;
344 chomp;
346 if (
347 m/^\s*(.+):\s+domain\s+(\d+)\s+of\s+(\d+)\,\s+
348 from\s+(\d+)\s+to\s+(\d+)/x
351 my ( $name, $domainct, $domaintotal, $from, $to ) =
352 ( $1, $2, $3, $4, $5 );
353 $domaincounter{$name}++;
354 if ( $self->within_element('hit') ) {
355 if ( $self->within_element('hsp') ) {
356 $self->end_element( { 'Name' => 'Hsp' } );
358 $self->end_element( { 'Name' => 'Hit' } );
361 $self->start_element( { 'Name' => 'Hit' } );
362 my $info = [
364 $hitinfo[ $hitinfo{$name} ] || $self->throw(
365 "Could not find hit info for $name: Insure that your database contains only unique sequence names"
369 if ( $info->[0] ne $name ) {
370 $self->throw(
371 "Somehow the Model table order does not match the order in the domains (got "
372 . $info->[0]
373 . ", expected $name)" );
375 $self->element(
377 'Name' => 'Hit_id',
378 'Data' => shift @{$info}
381 $self->element(
383 'Name' => 'Hit_desc',
384 'Data' => shift @{$info}
387 $self->element(
389 'Name' => 'Hit_signif',
390 'Data' => shift @{$info}
393 $self->element(
395 'Name' => 'Hit_score',
396 'Data' => shift @{$info}
400 $self->start_element( { 'Name' => 'Hsp' } );
401 $self->element(
403 'Name' => 'Hsp_identity',
404 'Data' => 0
407 $self->element(
409 'Name' => 'Hsp_positive',
410 'Data' => 0
413 my $HSPinfo = shift @hspinfo;
414 my $id = shift @$HSPinfo;
416 if ( $id ne $name ) {
417 $self->throw(
418 "Somehow the domain list details do not match the table (got $id, expected $name)"
421 if ( $domaincounter{$name} == $domaintotal ) {
422 $hitinfo[ $hitinfo{$name} ] = undef;
424 $self->element(
426 'Name' => 'Hsp_hit-from',
427 'Data' => shift @$HSPinfo
430 $self->element(
432 'Name' => 'Hsp_hit-to',
433 'Data' => shift @$HSPinfo
436 $self->element(
438 'Name' => 'Hsp_query-from',
439 'Data' => shift @$HSPinfo
442 $self->element(
444 'Name' => 'Hsp_query-to',
445 'Data' => shift @$HSPinfo
448 $self->element(
450 'Name' => 'Hsp_score',
451 'Data' => shift @$HSPinfo
454 $self->element(
456 'Name' => 'Hsp_evalue',
457 'Data' => shift @$HSPinfo
460 $lastdomain = $name;
462 else {
464 # Might want to change this so that it
465 # accumulates all the of the alignment lines into
466 # three array slots and then tests for the
467 # end of the line
468 if (/^(\s+\*\-\>)(\S+)/o) { # start of domain
469 $prelength = CORE::length($1);
470 $width = 0;
472 # deal with fact that start en stop is on same line
473 my $data = $2;
474 if ($data =~ s/\<\-?\*?\s*$//)
476 $width = CORE::length($data);
479 $self->element(
481 'Name' => 'Hsp_qseq',
482 'Data' => $data
485 $count = 0;
486 $second_tier = 0;
488 elsif (/^(\s+)(\S+)\<\-\*\s*$/o) { #end of domain
489 $self->element(
491 'Name' => 'Hsp_qseq',
492 'Data' => $2
495 $width = CORE::length($2);
496 $count = 0;
498 elsif (( $count != 1 && /^\s+$/o )
499 || CORE::length($_) == 0
500 || /^\s+\-?\*\s*$/ )
502 next;
504 elsif ( $count == 0 ) {
505 $prelength -= 3 unless ( $second_tier++ );
506 unless ( defined $prelength ) {
508 # $self->warn("prelength not set");
509 next;
511 $self->element(
513 'Name' => 'Hsp_qseq',
514 'Data' => substr( $_, $prelength )
518 elsif ( $count == 1 ) {
519 if ( !defined $prelength ) {
520 $self->warn("prelength not set");
522 if ($width) {
523 $self->element(
525 'Name' => 'Hsp_midline',
526 'Data' =>
527 substr( $_, $prelength, $width )
531 else {
532 $self->element(
534 'Name' => 'Hsp_midline',
535 'Data' => substr( $_, $prelength )
540 elsif ( $count == 2 ) {
541 if (/^\s+(\S+)\s+(\d+|\-)\s+(\S*)\s+(\d+|\-)/o) {
542 $self->element(
544 'Name' => 'Hsp_hseq',
545 'Data' => $3
549 else {
550 $self->warn("unrecognized line: $_\n");
553 $count = 0 if $count++ >= 2;
557 elsif ( /^Histogram/o || m!^//!o ) {
558 while ( my $HSPinfo = shift @hspinfo ) {
559 my $id = shift @$HSPinfo;
560 my $info = [ @{ $hitinfo[ $hitinfo{$id} ] } ];
561 next unless defined $info;
562 $self->start_element( { 'Name' => 'Hit' } );
563 $self->element(
565 'Name' => 'Hit_id',
566 'Data' => shift @{$info}
569 $self->element(
571 'Name' => 'Hit_desc',
572 'Data' => shift @{$info}
575 $self->element(
577 'Name' => 'Hit_signif',
578 'Data' => shift @{$info}
581 $self->element(
583 'Name' => 'Hit_score',
584 'Data' => shift @{$info}
587 $self->start_element( { 'Name' => 'Hsp' } );
588 $self->element(
590 'Name' => 'Hsp_query-from',
591 'Data' => shift @$HSPinfo
594 $self->element(
596 'Name' => 'Hsp_query-to',
597 'Data' => shift @$HSPinfo
600 $self->element(
602 'Name' => 'Hsp_hit-from',
603 'Data' => shift @$HSPinfo
606 $self->element(
608 'Name' => 'Hsp_hit-to',
609 'Data' => shift @$HSPinfo
612 $self->element(
614 'Name' => 'Hsp_score',
615 'Data' => shift @$HSPinfo
618 $self->element(
620 'Name' => 'Hsp_evalue',
621 'Data' => shift @$HSPinfo
624 $self->element(
626 'Name' => 'Hsp_identity',
627 'Data' => 0
630 $self->element(
632 'Name' => 'Hsp_positive',
633 'Data' => 0
636 $self->element(
638 'Name' => 'Hsp_positive',
639 'Data' => 0
642 $self->end_element( { 'Name' => 'Hsp' } );
643 $self->end_element( { 'Name' => 'Hit' } );
645 @hitinfo = ();
646 %hitinfo = ();
647 last;
650 elsif ( defined $self->{'_reporttype'}
651 && $self->{'_reporttype'} eq 'HMMPFAM' )
653 # process HMMPFAM results here
654 if (/^Scores for sequence family/o) {
655 while ( defined( $_ = $self->_readline ) ) {
656 last if (/^\s+$/);
657 next if ( /^Model\s+Description/o || /^\-\-\-/o );
658 chomp;
659 my @line = split;
660 my ( $model, $n, $evalue, $score ) =
661 ( shift @line, pop @line, pop @line, pop @line );
662 my $desc = join( ' ', @line );
663 push @hitinfo, [ $model, $desc, $score, $evalue, $n ];
664 $hitinfo{$model} = $#hitinfo;
667 elsif (/^Parsed for domains:/o) {
668 @hspinfo = ();
669 while ( defined( $_ = $self->_readline ) ) {
670 last if (/^\s+$/);
671 if (m!^//!) {
672 $self->_pushback($_);
673 last;
675 next if ( /^Model\s+Domain/o || /^\-\-\-/o );
676 chomp;
677 if (
678 my ( $n, $domainnum, $domainct, @vals ) = (
679 m!^(\S+)\s+ # domain name
680 (\d+)/(\d+)\s+ # domain num out of num
681 (\d+)\s+(\d+).+? # seq start, end
682 (\d+)\s+(\d+)\s+ # hmm start, end
683 \S+\s+ # []
684 (\S+)\s+ # score
685 (\S+) # evalue
686 \s*$!ox
690 my $hindex = $hitinfo{$n};
691 if ( !defined $hindex ) {
692 push @hitinfo,
693 [ $n, '', $vals[5], $vals[6], $domainct ];
694 $hitinfo{$n} = $#hitinfo;
695 $hindex = $#hitinfo;
697 my $info = $hitinfo[$hindex];
698 if ( !defined $info ) {
699 $self->warn(
700 "incomplete Domain information, can't find $n hitinfo says $hitinfo{$n}"
702 next;
704 push @hspinfo, [ $n, @vals ];
708 elsif (/^Alignments of top/o) {
709 my ( $prelength, $lastdomain, $count, $width );
710 $count = 0;
711 my $second_tier = 0;
712 while ( defined( $_ = $self->_readline ) ) {
713 next
714 if (
715 /^Align/o
716 || ( $count != 1
717 && /^\s+RF\s+[x\s]+$/o )
719 # fix for bug 2632
720 next if ($_ =~ m/^\s+CS\s+/o && $count == 0);
721 if ( /^Histogram/o || m!^//!o || /^Query sequence/o ) {
722 if ( $self->in_element('hsp') ) {
723 $self->end_element( { 'Name' => 'Hsp' } );
725 if ( $self->in_element('hit') ) {
726 $self->end_element( { 'Name' => 'Hit' } );
728 $self->_pushback($_);
729 last;
731 chomp;
732 if (m/(\S+):.*from\s+(\d+)\s+to\s+(\d+)/o) {
733 my ( $name, $from, $to ) = ( $1, $2, $3 );
735 if ( $self->within_element('hit') ) {
736 if ( $self->in_element('hsp') ) {
737 $self->end_element( { 'Name' => 'Hsp' } );
739 $self->end_element( { 'Name' => 'Hit' } );
741 my $info = [ @{ $hitinfo[ $hitinfo{$name} ] } ];
742 if ( !defined $info
743 || $info->[0] ne $name )
745 $self->warn(
746 "Somehow the Model table order does not match the order in the domains (got "
747 . $info->[0]
748 . ", expected $name). We're back loading this from the alignment information instead"
750 $info = [
751 $name, '',
752 /score\s+([^,\s]+),\s+E\s+=\s+(\S+)/ox
754 push @hitinfo, $info;
755 $hitinfo{$name} = $#hitinfo;
757 $self->start_element( { 'Name' => 'Hit' } );
759 $self->element(
761 'Name' => 'Hit_id',
762 'Data' => shift @{$info}
765 $self->element(
767 'Name' => 'Hit_desc',
768 'Data' => shift @{$info}
771 $self->element(
773 'Name' => 'Hit_score',
774 'Data' => shift @{$info}
777 $self->element(
779 'Name' => 'Hit_signif',
780 'Data' => shift @{$info}
784 $self->start_element( { 'Name' => 'Hsp' } );
785 $self->element(
787 'Name' => 'Hsp_identity',
788 'Data' => 0
791 $self->element(
793 'Name' => 'Hsp_positive',
794 'Data' => 0
797 my $HSPinfo = shift @hspinfo;
798 my $id = shift @$HSPinfo;
800 if ( $id ne $name ) {
801 $self->throw(
802 "Somehow the domain list details do not match the table (got $id, expected $name)"
805 $self->element(
807 'Name' => 'Hsp_query-from',
808 'Data' => shift @$HSPinfo
811 $self->element(
813 'Name' => 'Hsp_query-to',
814 'Data' => shift @$HSPinfo
817 $self->element(
819 'Name' => 'Hsp_hit-from',
820 'Data' => shift @$HSPinfo
823 $self->element(
825 'Name' => 'Hsp_hit-to',
826 'Data' => shift @$HSPinfo
829 $self->element(
831 'Name' => 'Hsp_score',
832 'Data' => shift @$HSPinfo
835 $self->element(
837 'Name' => 'Hsp_evalue',
838 'Data' => shift @$HSPinfo
841 $lastdomain = $name;
843 else {
844 if (/^(\s+\*\-\>)(\S+)/o) {
846 # start of domain
847 $prelength = CORE::length($1);
848 $width = 0;
850 # deal with fact that start en stop is on same line
851 my $data = $2;
852 if ($data =~ s/\<\-?\*?\s*$//)
854 $width = CORE::length($data);
857 $self->element(
859 'Name' => 'Hsp_hseq',
860 'Data' => $data
863 $count = 0;
864 $second_tier = 0;
867 elsif (/^(\s+)(\S+)\<\-?\*?\s*$/o) {
869 #end of domain
870 $prelength -= 3 unless ( $second_tier++ );
871 $self->element(
873 'Name' => 'Hsp_hseq',
874 'Data' => $2
877 $width = CORE::length($2);
878 $count = 0;
880 elsif (CORE::length($_) == 0
881 || ( $count != 1 && /^\s+$/o )
882 || /^\s+\-?\*\s*$/ )
884 next;
886 elsif ( $count == 0 ) {
887 $prelength -= 3 unless ( $second_tier++ );
888 unless ( defined $prelength ) {
890 # $self->warn("prelength not set");
891 next;
893 $self->element(
895 'Name' => 'Hsp_hseq',
896 'Data' => substr( $_, $prelength )
900 elsif ( $count == 1 ) {
901 if ( !defined $prelength ) {
902 $self->warn("prelength not set");
904 if ($width) {
905 $self->element(
907 'Name' => 'Hsp_midline',
908 'Data' =>
909 substr( $_, $prelength, $width )
913 else {
914 $self->element(
916 'Name' => 'Hsp_midline',
917 'Data' => substr( $_, $prelength )
922 elsif ( $count == 2 ) {
923 if ( /^\s+(\S+)\s+(\d+)\s+(\S+)\s+(\d+)/o
924 || /^\s+(\S+)\s+(\-)\s+(\S*)\s+(\-)/o )
926 $self->element(
928 'Name' => 'Hsp_qseq',
929 'Data' => $3
933 else {
934 $self->throw(
935 "unrecognized line ($count): $_\n");
938 $count = 0 if $count++ >= 2;
942 elsif ( /^Histogram/o || m!^//!o ) {
944 while ( my $HSPinfo = shift @hspinfo ) {
945 my $id = shift @$HSPinfo;
946 my $info = [ @{ $hitinfo[ $hitinfo{$id} ] } ];
947 next unless defined $info;
948 $self->start_element( { 'Name' => 'Hit' } );
949 $self->element(
951 'Name' => 'Hit_id',
952 'Data' => shift @{$info}
955 $self->element(
957 'Name' => 'Hit_desc',
958 'Data' => shift @{$info}
961 $self->element(
963 'Name' => 'Hit_signif',
964 'Data' => shift @{$info}
967 $self->element(
969 'Name' => 'Hit_score',
970 'Data' => shift @{$info}
973 $self->start_element( { 'Name' => 'Hsp' } );
974 $self->element(
976 'Name' => 'Hsp_query-from',
977 'Data' => shift @$HSPinfo
980 $self->element(
982 'Name' => 'Hsp_query-to',
983 'Data' => shift @$HSPinfo
986 $self->element(
988 'Name' => 'Hsp_hit-from',
989 'Data' => shift @$HSPinfo
992 $self->element(
994 'Name' => 'Hsp_hit-to',
995 'Data' => shift @$HSPinfo
998 $self->element(
1000 'Name' => 'Hsp_score',
1001 'Data' => shift @$HSPinfo
1004 $self->element(
1006 'Name' => 'Hsp_evalue',
1007 'Data' => shift @$HSPinfo
1010 $self->element(
1012 'Name' => 'Hsp_identity',
1013 'Data' => 0
1016 $self->element(
1018 'Name' => 'Hsp_positive',
1019 'Data' => 0
1022 $self->element(
1024 'Name' => 'Hsp_positive',
1025 'Data' => 0
1028 $self->end_element( { 'Name' => 'Hsp' } );
1029 $self->end_element( { 'Name' => 'Hit' } );
1031 @hitinfo = ();
1032 %hitinfo = ();
1033 last;
1035 # uncomment to see missed lines with verbose on
1036 #else {
1037 # $self->debug($_);
1040 $last = $_;
1042 $self->end_element( { 'Name' => 'HMMER_Output' } ) unless !$seentop;
1043 return $self->end_document();
1046 =head2 start_element
1048 Title : start_element
1049 Usage : $eventgenerator->start_element
1050 Function: Handles a start element event
1051 Returns : none
1052 Args : hashref with at least 2 keys 'Data' and 'Name'
1055 =cut
1057 sub start_element {
1058 my ( $self, $data ) = @_;
1060 # we currently don't care about attributes
1061 my $nm = $data->{'Name'};
1062 my $type = $MODEMAP{$nm};
1063 if ($type) {
1064 if ( $self->_eventHandler->will_handle($type) ) {
1065 my $func = sprintf( "start_%s", lc $type );
1066 $self->_eventHandler->$func( $data->{'Attributes'} );
1068 unshift @{ $self->{'_elements'} }, $type;
1070 if ( defined $type
1071 && $type eq 'result' )
1073 $self->{'_values'} = {};
1074 $self->{'_result'} = undef;
1078 =head2 end_element
1080 Title : start_element
1081 Usage : $eventgenerator->end_element
1082 Function: Handles an end element event
1083 Returns : none
1084 Args : hashref with at least 2 keys 'Data' and 'Name'
1087 =cut
1089 sub end_element {
1090 my ( $self, $data ) = @_;
1091 my $nm = $data->{'Name'};
1092 my $type = $MODEMAP{$nm};
1093 my $rc;
1095 if ( $nm eq 'HMMER_program' ) {
1096 if ( $self->{'_last_data'} =~ /(HMM\S+)/i ) {
1097 $self->{'_reporttype'} = uc $1;
1101 # Hsp are sort of weird, in that they end when another
1102 # object begins so have to detect this in end_element for now
1103 if ( $nm eq 'Hsp' ) {
1104 foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
1105 my $data = $self->{'_last_hspdata'}->{$_};
1106 if ($data && $_ eq 'Hsp_hseq') {
1107 # replace hmm '.' gap symbol by '-'
1108 $data =~ s/\./-/g;
1110 $self->element(
1112 'Name' => $_,
1113 'Data' => $data
1117 $self->{'_last_hspdata'} = {};
1119 if ($type) {
1120 if ( $self->_eventHandler->will_handle($type) ) {
1121 my $func = sprintf( "end_%s", lc $type );
1122 $rc = $self->_eventHandler->$func( $self->{'_reporttype'},
1123 $self->{'_values'} );
1125 my $lastelem = shift @{ $self->{'_elements'} };
1127 elsif ( $MAPPING{$nm} ) {
1128 if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
1129 my $key = ( keys %{ $MAPPING{$nm} } )[0];
1130 $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } =
1131 $self->{'_last_data'};
1133 else {
1134 $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
1137 else {
1138 $self->debug("unknown nm $nm, ignoring\n");
1140 $self->{'_last_data'} = ''; # remove read data if we are at
1141 # end of an element
1142 $self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
1143 return $rc;
1146 =head2 element
1148 Title : element
1149 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
1150 Function: Convience method that calls start_element, characters, end_element
1151 Returns : none
1152 Args : Hash ref with the keys 'Name' and 'Data'
1155 =cut
1157 sub element {
1158 my ( $self, $data ) = @_;
1159 $self->start_element($data);
1160 $self->characters($data);
1161 $self->end_element($data);
1164 =head2 characters
1166 Title : characters
1167 Usage : $eventgenerator->characters($str)
1168 Function: Send a character events
1169 Returns : none
1170 Args : string
1173 =cut
1175 sub characters {
1176 my ( $self, $data ) = @_;
1178 if ( $self->in_element('hsp')
1179 && $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o
1180 && defined $data->{'Data'} )
1182 $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
1184 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o );
1186 $self->{'_last_data'} = $data->{'Data'};
1189 =head2 within_element
1191 Title : within_element
1192 Usage : if( $eventgenerator->within_element($element) ) {}
1193 Function: Test if we are within a particular element
1194 This is different than 'in' because within can be tested
1195 for a whole block.
1196 Returns : boolean
1197 Args : string element name
1200 =cut
1202 sub within_element {
1203 my ( $self, $name ) = @_;
1204 return 0
1205 if ( !defined $name
1206 || !defined $self->{'_elements'}
1207 || scalar @{ $self->{'_elements'} } == 0 );
1208 foreach ( @{ $self->{'_elements'} } ) {
1209 return 1 if ( $_ eq $name );
1211 return 0;
1214 =head2 in_element
1216 Title : in_element
1217 Usage : if( $eventgenerator->in_element($element) ) {}
1218 Function: Test if we are in a particular element
1219 This is different than 'within' because 'in' only
1220 tests its immediete parent.
1221 Returns : boolean
1222 Args : string element name
1225 =cut
1227 sub in_element {
1228 my ( $self, $name ) = @_;
1229 return 0 if !defined $self->{'_elements'}->[0];
1230 return ( $self->{'_elements'}->[0] eq $name );
1233 =head2 start_document
1235 Title : start_document
1236 Usage : $eventgenerator->start_document
1237 Function: Handle a start document event
1238 Returns : none
1239 Args : none
1242 =cut
1244 sub start_document {
1245 my ($self) = @_;
1246 $self->{'_lasttype'} = '';
1247 $self->{'_values'} = {};
1248 $self->{'_result'} = undef;
1249 $self->{'_elements'} = [];
1252 =head2 end_document
1254 Title : end_document
1255 Usage : $eventgenerator->end_document
1256 Function: Handles an end document event
1257 Returns : Bio::Search::Result::ResultI object
1258 Args : none
1261 =cut
1263 sub end_document {
1264 my ($self) = @_;
1265 return $self->{'_result'};
1268 =head2 result_count
1270 Title : result_count
1271 Usage : my $count = $searchio->result_count
1272 Function: Returns the number of results we have processed
1273 Returns : integer
1274 Args : none
1277 =cut
1279 sub result_count {
1280 my $self = shift;
1281 return $self->{'_result_count'};