Add partial support for nhmmer (version 3.1)
[bioperl-live.git] / Bio / SearchIO / hmmer3.pm
blobc31eec737f86ac37c9e750ef667981d0758219be
2 # BioPerl module for Bio::SearchIO::hmmer3
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Thomas Sharpton <thomas.sharpton@gmail.com>
8 # Copyright Thomas Sharpton
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SearchIO::hmmer3
18 =head1 SYNOPSIS
20 use Bio::SearchIO;
22 my $searchio = Bio::SearchIO->new(
23 -format => 'hmmer',
24 -version => 3,
25 -file => 'hmmsearch.out'
28 my $result = $searchio->next_result;
29 my $hit = $result->next_hit;
30 print $hit->name, $hit->description, $hit->significance,
31 $hit->score, "\n";
33 my $hsp = $hit->next_hsp;
34 print $hsp->start('hit'), $hsp->end('hit'), $hsp->start('query'),
35 $hsp->end('query'), "\n";
37 =head1 DESCRIPTION
39 Code to parse output from hmmsearch, hmmscan, and nhmmer, compatible with
40 both version 2 and version 3 of the HMMER package from L<http://hmmer.org>.
42 =head1 FEEDBACK
44 =head2 Mailing Lists
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to
48 the Bioperl mailing list. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 =head2 Support
55 Please direct usage questions or support issues to the mailing list:
57 L<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
64 =head2 Reporting Bugs
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 of the bugs and their resolution. Bug reports can be submitted via
68 the web:
70 https://redmine.open-bio.org/projects/bioperl/
72 =head1 AUTHOR - Thomas Sharpton
74 Email thomas.sharpton@gmail.com
76 Describe contact details here
78 =head1 CONTRIBUTORS
80 Additional contributors names and emails here
82 briano at bioteam.net
84 =head1 APPENDIX
86 The rest of the documentation details each of the object methods.
87 Internal methods are usually preceded with a _
89 =cut
91 # Let the code begin...
93 package Bio::SearchIO::hmmer3;
95 use strict;
96 use Data::Dumper;
97 use Bio::Factory::ObjectFactory;
98 use vars qw(%MAPPING %MODEMAP);
99 use base qw(Bio::SearchIO::hmmer);
101 BEGIN {
103 # mapping of HMMER items to Bioperl hash keys
104 %MODEMAP = (
105 'HMMER_Output' => 'result',
106 'Hit' => 'hit',
107 'Hsp' => 'hsp'
110 %MAPPING = (
111 'Hsp_bit-score' => 'HSP-bits',
112 'Hsp_score' => 'HSP-score',
113 'Hsp_evalue' => 'HSP-evalue',
114 'Hsp_query-from' => 'HSP-query_start',
115 'Hsp_query-to' => 'HSP-query_end',
116 'Hsp_query-strand' => 'HSP-query_strand',
117 'Hsp_hit-from' => 'HSP-hit_start',
118 'Hsp_hit-to' => 'HSP-hit_end',
119 'Hsp_hit-strand' => 'HSP-hit_strand',
120 'Hsp_positive' => 'HSP-conserved',
121 'Hsp_identity' => 'HSP-identical',
122 'Hsp_gaps' => 'HSP-hsp_gaps',
123 'Hsp_hitgaps' => 'HSP-hit_gaps',
124 'Hsp_querygaps' => 'HSP-query_gaps',
125 'Hsp_qseq' => 'HSP-query_seq',
126 'Hsp_hseq' => 'HSP-hit_seq',
127 'Hsp_midline' => 'HSP-homology_seq',
128 'Hsp_align-len' => 'HSP-hsp_length',
129 'Hsp_query-frame' => 'HSP-query_frame',
130 'Hsp_hit-frame' => 'HSP-hit_frame',
132 'Hit_id' => 'HIT-name',
133 'Hit_len' => 'HIT-length',
134 'Hit_accession' => 'HIT-accession',
135 'Hit_desc' => 'HIT-description',
136 'Hit_signif' => 'HIT-significance',
137 'Hit_score' => 'HIT-score',
139 'HMMER_program' => 'RESULT-algorithm_name',
140 'HMMER_version' => 'RESULT-algorithm_version',
141 'HMMER_query-def' => 'RESULT-query_name',
142 'HMMER_query-len' => 'RESULT-query_length',
143 'HMMER_query-acc' => 'RESULT-query_accession',
144 'HMMER_querydesc' => 'RESULT-query_description',
145 'HMMER_hmm' => 'RESULT-hmm_name',
146 'HMMER_seqfile' => 'RESULT-sequence_file',
147 'HMMER_db' => 'RESULT-database_name',
151 =head2 next_result
153 Title : next_result
154 Usage : my $hit = $searchio->next_result;
155 Function: Returns the next Result from a search
156 Returns : Bio::Search::Result::ResultI object
157 Args : none
159 =cut
161 sub next_result {
162 my ($self) = @_;
163 my $seentop = 0; # Placeholder for when we deal with multi-query reports
164 my $reporttype;
165 my ( $last, @hit_list, @hsp_list, %hspinfo, %hitinfo, %domaincounter );
166 local $/ = "\n";
167 local $_;
169 my $verbose = $self->verbose; # cache for speed? JES's idea in hmmer.pm
170 $self->start_document();
171 local ($_);
173 # This is here to ensure that next_result doesn't produce infinite loop
174 if ( !defined( $_ = $self->_readline ) ) {
175 return undef;
177 else {
178 $self->_pushback($_);
181 # Regex goes here for HMMER3
182 # Start with hmmsearch processing
183 while ( defined( $_ = $self->_readline ) ) {
184 my $lineorig = $_;
185 chomp;
187 # Grab the program name
188 if ( $_ =~ m/^\#\s(\S+)\s\:\:\s/ ) {
189 my $prog = $1;
191 # TO DO: customize the above regex to adapt to other
192 # program types (hmmscan, etc)
193 $self->start_element( { 'Name' => 'HMMER_Output' } );
194 $self->{'_result_count'}++; #Might need to move to another block
195 $self->element(
196 { 'Name' => 'HMMER_program',
197 'Data' => uc($prog)
202 # Get the HMMER package version and release date
203 elsif ( $_ =~ m/^\#\sHMMER\s+(\S+)\s+\((.+)\)/ ) {
204 my $version = $1;
205 my $versiondate = $2;
206 $self->{'_hmmidline'} = $_;
207 $self->element(
208 { 'Name' => 'HMMER_version',
209 'Data' => $version
214 # Get the query info
215 elsif ( $_ =~ /^\#\squery \w+ file\:\s+(\S+)/ ) {
216 if ( $self->{'_reporttype'} eq 'HMMSEARCH'
217 || $self->{'_reporttype'} eq 'NHMMER' )
219 $self->{'_hmmfileline'} = $lineorig;
220 $self->element(
221 { 'Name' => 'HMMER_hmm',
222 'Data' => $1
226 elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
227 $self->{'_hmmseqline'} = $lineorig;
228 $self->element(
229 { 'Name' => 'HMMER_seqfile',
230 'Data' => $1
236 # If this is a report without alignments
237 elsif ( $_ =~ m/^\#\sshow\salignments\sin\soutput/ ) {
238 $self->{'_alnreport'} = 0;
241 # Get the database info
242 elsif ( $_ =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/ ) {
244 if ( $self->{'_reporttype'} eq 'HMMSEARCH'
245 || $self->{'_reporttype'} eq 'NHMMER' )
247 $self->{'_hmmseqline'} = $lineorig;
248 $self->element(
249 { 'Name' => 'HMMER_seqfile',
250 'Data' => $1
254 elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
255 $self->{'_hmmfileline'} = $lineorig;
256 $self->element(
257 { 'Name' => 'HMMER_hmm',
258 'Data' => $1
264 # Get query data
265 elsif ( $_ =~ s/^Query:\s+// ) {
267 # TO DO: code to deal with multi-query report
268 unless (s/\s+\[[L|M]\=(\d+)\]$//) {
269 warn "Error parsing length for query, offending line $_\n";
270 exit(0);
272 my $querylen = $1;
273 $self->element(
274 { 'Name' => 'HMMER_query-len',
275 'Data' => $querylen
278 $self->element(
279 { 'Name' => 'HMMER_query-def',
280 'Data' => $_
285 # Get Accession data
286 elsif ( $_ =~ s/^Accession:\s+// ) {
287 s/\s+$//;
288 $self->element(
289 { 'Name' => 'HMMER_query-acc',
290 'Data' => $_
295 # Get description data
296 elsif ( $_ =~ s/^Description:\s+// ) {
297 s/\s+$//;
298 $self->element(
299 { 'Name' => 'HMMER_querydesc',
300 'Data' => $_
305 # hmmsearch, nhmmer, and hmmscan-specific formatting here
306 elsif (
307 defined $self->{'_reporttype'}
308 && ( $self->{'_reporttype'} eq 'HMMSEARCH'
309 || $self->{'_reporttype'} eq 'HMMSCAN'
310 || $self->{'_reporttype'} eq 'NHMMER' )
313 # Complete sequence table data above inclusion threshold,
314 # hmmsearch or hmmscan
315 if ( $_ =~ m/Scores for complete sequence/ ) {
316 while ( defined( $_ = $self->_readline ) ) {
317 if ( $_ =~ m/inclusion threshold/
318 || m/Domain( and alignment)? annotation for each/
319 || m/\[No hits detected/
320 || m!^//! )
322 $self->_pushback($_);
323 last;
326 # Grab table data
327 next if ( m/\-\-\-/ || m/^\s+E-value\s+score/ || m/^$/ );
328 my ($eval_full, $score_full, $bias_full, $eval_best,
329 $score_best, $bias_best, $exp, $n,
330 $hitid, $desc, @hitline
332 @hitline = split( " ", $_ );
333 $eval_full = shift @hitline;
334 $score_full = shift @hitline;
335 $bias_full = shift @hitline;
336 $eval_best = shift @hitline;
337 $score_best = shift @hitline;
338 $bias_best = shift @hitline;
339 $exp = shift @hitline;
340 $n = shift @hitline;
341 $hitid = shift @hitline;
342 $desc = join " ", @hitline;
344 if ( !defined($desc) ) {
345 $desc = "";
347 push @hit_list,
348 [ $hitid, $desc, $eval_full, $score_full ];
349 $hitinfo{$hitid} = $#hit_list;
353 # nhmmer
354 if ( $_ =~ m/Scores for complete hits/ ) {
355 while ( defined( $_ = $self->_readline ) ) {
356 if ( $_ =~ m/inclusion threshold/
357 || m/Annotation for each hit/
358 || m/\[No hits detected/
359 || m!^//! )
361 $self->_pushback($_);
362 last;
365 # Grab table data
366 next if ( m/\-\-\-/ || m/^\s+E-value\s+score/ || m/^$/ );
367 my ($eval, $score, $bias, $hitid,
368 $start, $end, $desc, @hitline
370 @hitline = split( " ", $_ );
371 $eval = shift @hitline;
372 $score = shift @hitline;
373 $bias = shift @hitline;
374 $hitid = shift @hitline;
375 $start = shift @hitline;
376 $end = shift @hitline;
377 $desc = join ' ', @hitline;
379 $desc = '' if ( !defined($desc) );
381 push @hit_list, [ $hitid, $desc, $eval, $score ];
382 $hitinfo{$hitid} = $#hit_list;
386 # Complete sequence table data below inclusion threshold
387 # not currently fully implemented
388 elsif ( $_ =~ m/inclusion threshold/ ) {
389 while ( defined( $_ = $self->_readline ) ) {
390 if ( $_ =~ m/Domain( and alignment)? annotation for each/
391 || m/Internal pipeline statistics summary/ )
393 $self->_pushback($_);
394 last;
396 next if ( $_ =~ m/^$/ );
397 my ($eval_full, $score_full, $bias_full, $eval_best,
398 $score_best, $bias_best, $exp, $n,
399 $hitid, $desc, @hitline
401 @hitline = split( " ", $_ );
402 $eval_full = shift @hitline;
403 $score_full = shift @hitline;
404 $bias_full = shift @hitline;
405 $eval_best = shift @hitline;
406 $score_best = shift @hitline;
407 $bias_best = shift @hitline;
408 $exp = shift @hitline;
409 $n = shift @hitline;
410 $hitid = shift @hitline;
411 $desc = join " ", @hitline;
413 $hitinfo{$hitid} = "below_inclusion";
417 # Domain annotation for each sequence table data, hmmscan
418 elsif ( $_ =~ m/Domain( and alignment)? annotation for each/ ) {
419 @hsp_list = (); # Here for multi-query reports
420 my $name;
422 while ( defined( $_ = $self->_readline ) ) {
423 if ( $_ =~ m/Internal pipeline statistics/
424 || m/\[No targets detected/ )
426 $self->_pushback($_);
427 last;
429 if ( $_ =~ m/^\>\>\s(.*?)\s+/ ) {
430 $name = $1;
432 # Skip hits below inclusion threshold
433 next if ( $hitinfo{$name} eq "below_inclusion" );
434 $domaincounter{$name} = 0;
436 while ( defined( $_ = $self->_readline ) ) {
438 # Grab table data for sequence
439 if ( $_ =~ m/Internal pipeline statistics/
440 || $_ =~ m/^\>\>/ )
442 $self->_pushback($_);
443 last;
445 if ( $_ =~ m/Alignments for each domain/ ) {
446 $self->_pushback($_);
447 last;
449 if ( $_ =~ m/^\s+\#\s+score/
450 || $_ =~ m/^\s\-\-\-\s+/
453 # $_ =~ m/^\>\>/ ||
454 $_ =~ m/^$/
457 next;
460 # Grab hsp data from table, push into @hsp;
461 if (my ($domain_num, $score, $bias,
462 $ceval, $ieval, $hmmstart,
463 $hmmstop, $qalistart, $qalistop,
464 $envstart, $envstop, $envbound,
465 $acc
467 = m|^\s+(\d+)\s\!*\?*\s+ # domain number
468 (\S+)\s+(\S+)\s+ # score, bias
469 (\S+)\s+(\S+)\s+ # c-eval, i-eval
470 (\d+)\s+(\d+).+? # hmm start, stop
471 (\d+)\s+(\d+).+? # query start, stop
472 (\d+)\s+(\d+).+? # env start, stop
473 (\S+) # Accession
474 \s*$|ox
477 # Keep it simple for now. let's customize later
478 my @vals = (
479 $hmmstart, $hmmstop,
480 $qalistart, $qalistop,
481 $score, $ceval,
482 '', '',
485 my $info = $hit_list[ $hitinfo{$name} ];
486 if ( !defined $info ) {
487 $self->warn(
488 "Incomplete sequence information; can't find $name, hitinfo says $hitinfo{$name}\n"
490 next;
492 $domaincounter{$name}++;
493 my $hsp_key
494 = $name . "_" . $domaincounter{$name};
495 push @hsp_list, [ $name, @vals ];
496 $hspinfo{$hsp_key} = $#hsp_list;
498 else {
499 print "missed this line: $_\n";
503 elsif ( $_ =~ m/Alignments for each domain/ ) {
504 my $domain_count = 0;
506 #line counter
507 my $count = 0;
509 # There's an optional block, so we sometimes need to
510 # count to 3, and sometimes to 4.
511 my $max_count = 3;
512 my $lastdomain;
513 my $hsp;
514 my ( $hline, $midline, $qline );
516 while ( defined( $_ = $self->_readline ) ) {
517 if ( $_ =~ m/^\>\>/
518 || $_ =~ m/Internal pipeline statistics/ )
520 $self->_pushback($_);
521 last;
523 elsif ($hitinfo{$name} eq "below_inclusion"
524 || $_ =~ m/^$/ )
526 next;
528 elsif ( $_ =~ /\s\s\=\=\sdomain\s(\d+)\s+/ ) {
529 my $domainnum = $1;
530 $count = 0;
531 my $key = $name . "_" . $domainnum;
532 $hsp = $hsp_list[ $hspinfo{$key} ];
533 $hline = $$hsp[-3];
534 $midline = $$hsp[-2];
535 $qline = $$hsp[-1];
536 $lastdomain = $name;
539 # model data track, some reports don't have
540 elsif ( $_ =~ m/\s+\S+\sCS$/ ) {
541 my $modeltrack = $_;
542 $max_count++;
543 $count++;
544 next;
546 elsif ( $count == $max_count - 3 ) {
548 # hit sequence
549 my @data = split( " ", $_ );
550 my $seq = $data[-2];
551 $hline .= $seq;
552 $count++;
553 next;
555 elsif ( $count == $max_count - 2 ) {
557 # conservation track
558 # storage isn't quite right - need to remove
559 # leading/lagging whitespace while preserving
560 # gap data (latter isn't done, former is)
561 $_ =~ s/^\s+//;
562 $_ =~ s/\s+$//;
563 $midline .= $_;
564 $count++;
565 next;
567 elsif ( $count == $max_count - 1 ) {
569 # query track
570 my @data = split( " ", $_ );
571 my $seq = $data[-2];
572 $qline .= $seq;
573 $count++;
574 next;
576 elsif ( $count == $max_count ) {
578 #pval track
579 my $pvals = $_;
580 $count = 0;
581 $max_count = 3;
582 $$hsp[-3] = $hline;
583 $$hsp[-2] = $midline;
584 $$hsp[-1] = $qline;
585 next;
587 else {
588 print "missed $_\n";
595 # Annotation for each hit, nhmmer
596 # This code is currently incomplete, the alignment strings
597 # are not being captured
598 elsif ( $_ =~ m/Annotation for each hit\s+\(and alignments\)/ ) {
599 @hsp_list = ();
600 my $name;
602 while ( defined( $_ = $self->_readline ) ) {
603 if ( $_ =~ m/Internal pipeline statistics/
604 || m/\[No targets detected/ )
606 $self->_pushback($_);
607 last;
609 if ( $_ =~ m/^>>\s(.*?)\s+/ ) {
610 $name = $1;
612 while ( defined( $_ = $self->_readline ) ) {
614 if ( $_ =~ m/Internal pipeline statistics/
615 || $_ =~ m/^>>/ )
617 $self->_pushback($_);
618 last;
620 elsif (
621 $_ =~ /^\s+#\s+score/
622 || $_ =~ /^\s+------\s+/
623 || $_ =~ /^>>/
624 || $_ =~ /^$/
625 || $_ =~ /^\s+Alignment:/
626 || $_ =~ /^\s+score:/
627 || $_ =~ /^\s+score\s+bias/
628 || $_ =~ /^\s+\S+\s+\d+\s+([\s+gatc-]+)/ # Alignment, line 1
629 || $_ =~ /^\s{20,}([\s+gatc-]+)/ # Alignment, line 2
630 || $_ =~ /^\s+$name\s+\d+\s+([\s+GATC-]+)/ # Alignment, line 3
631 || $_ =~ /^\s+[\d.\*]+/ # Alignment, line 4
634 next;
636 elsif (
637 /^\s+!\s+(\S+)\s+
638 (\S+)\s+(\S+)\s+
639 (\d+)\s+(\d+)\s+[.\[\]]*\s+
640 (\d+)\s+(\d+)\s+[.\[\]]*\s+
641 (\d+)\s+(\d+)\s+[.\[\]]*\s+
642 (\d+)\s+(\S+).*$/ox
645 my ($score, $bias, $eval,
646 $hmmstart, $hmmstop, $hitstart,
647 $hitstop, $envstart, $envstop,
648 $length, $acc
651 $1, $2, $3, $4, $5, $6,
652 $7, $8, $9, $10, $11
655 my @vals = (
656 $hitstart, $hitstop,
657 $hmmstart, $hmmstop,
658 $score, $eval,
659 '', '',
662 my $info = $hit_list[ $hitinfo{$name} ];
663 if ( !defined $info ) {
664 $self->warn(
665 "Incomplete information: can't find HSP $name in list of hits\n"
667 next;
669 $domaincounter{$name}++;
670 my $hsp_key = $name . "_" . $domaincounter{$name};
671 push @hsp_list, [ $name, @vals ];
672 $hspinfo{$hsp_key} = $#hsp_list;
674 else {
675 print "Missed this line: $_\n";
682 elsif ( m/Internal pipeline statistics/ || m!^//! ) {
684 # If within hit, hsp close;
685 if ( $self->within_element('hit') ) {
686 if ( $self->within_element('hsp') ) {
687 $self->end_element( { 'Name' => 'Hsp' } );
689 $self->end_element( { 'Name' => 'Hit' } );
692 # Grab summary statistics of run
693 while ( defined( $_ = $self->_readline ) ) {
694 last if ( $_ =~ m/^\/\/$/ );
697 # Do a lot of processing of hits and hsps here
698 while ( my $hit = shift @hit_list ) {
699 my $hit_name = shift @$hit;
700 my $hit_desc = shift @$hit;
701 my $hit_signif = shift @$hit;
702 my $hit_score = shift @$hit;
703 my $num_domains = $domaincounter{$hit_name} || 0;
705 $self->start_element( { 'Name' => 'Hit' } );
706 $self->element(
707 { 'Name' => 'Hit_id',
708 'Data' => $hit_name
711 $self->element(
712 { 'Name' => 'Hit_desc',
713 'Data' => $hit_desc
716 $self->element(
717 { 'Name' => 'Hit_signif',
718 'Data' => $hit_signif
721 $self->element(
722 { 'Name' => 'Hit_score',
723 'Data' => $hit_score
727 for my $i ( 1 .. $num_domains ) {
728 my $key = $hit_name . "_" . $i;
729 my $hsp = $hsp_list[ $hspinfo{$key} ];
730 if ( defined $hsp ) {
731 my $hsp_name = shift @$hsp;
732 $self->start_element( { 'Name' => 'Hsp' } );
733 $self->element(
734 { 'Name' => 'Hsp_identity',
735 'Data' => 0
738 $self->element(
739 { 'Name' => 'Hsp_positive',
740 'Data' => 0
743 $self->element(
744 { 'Name' => 'Hsp_hit-from',
745 'Data' => shift @$hsp
748 $self->element(
749 { 'Name' => 'Hsp_hit-to',
750 'Data' => shift @$hsp
753 $self->element(
754 { 'Name' => 'Hsp_query-from',
755 'Data' => shift @$hsp
758 $self->element(
759 { 'Name' => 'Hsp_query-to',
760 'Data' => shift @$hsp
763 $self->element(
764 { 'Name' => 'Hsp_score',
765 'Data' => shift @$hsp
768 $self->element(
769 { 'Name' => 'Hsp_evalue',
770 'Data' => shift @$hsp
773 $self->element(
774 { 'Name' => 'Hsp_hseq',
775 'Data' => shift @$hsp
778 $self->element(
779 { 'Name' => 'Hsp_midline',
780 'Data' => shift @$hsp
783 $self->element(
784 { 'Name' => 'Hsp_qseq',
785 'Data' => shift @$hsp
788 $self->end_element( { 'Name' => 'Hsp' } );
791 $self->end_element( { 'Name' => 'Hit' } );
793 @hit_list = ();
794 %hitinfo = ();
795 last;
798 else {
799 print "Missed line: $_\n";
800 $self->debug($_);
802 $last = $_;
804 $self->end_element( { 'Name' => 'HMMER_Output' } );
805 my $result = $self->end_document();
806 return $result;
809 =head2 start_element
811 Title : start_element
812 Usage : $eventgenerator->start_element
813 Function: Handles a start event
814 Returns : none
815 Args : hashref with at least 2 keys 'Data' and 'Name'
817 =cut
819 sub start_element {
821 my ( $self, $data ) = @_;
823 # we currently don't care about attributes
824 my $nm = $data->{'Name'};
825 my $type = $MODEMAP{$nm};
826 if ($type) {
827 if ( $self->_eventHandler->will_handle($type) ) {
828 my $func = sprintf( "start_%s", lc $type );
829 $self->_eventHandler->$func( $data->{'Attributes'} );
831 unshift @{ $self->{'_elements'} }, $type;
833 if ( defined $type
834 && $type eq 'result' )
836 $self->{'_values'} = {};
837 $self->{'_result'} = undef;
841 =head2 end_element
843 Title : end_element
844 Usage : $eventgeneartor->end_element
845 Function: Handles and end element event
846 Returns : none
847 Args : hashref with at least 2 keys 'Data' and 'Name'
849 =cut
851 sub end_element {
853 my ( $self, $data ) = @_;
854 my $nm = $data->{'Name'};
855 my $type = $MODEMAP{$nm};
856 my $rc;
858 if ( $nm eq 'HMMER_program' ) {
859 if ( $self->{'_last_data'} =~ /(N?HMM\S+)/i ) {
860 $self->{'_reporttype'} = uc $1;
864 # Hsp are sort of weird, in that they end when another
865 # object begins so have to detect this in end_element for now
866 if ( $nm eq 'Hsp' ) {
867 foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
868 my $data = $self->{'_last_hspdata'}->{$_};
869 if ( $data && $_ eq 'Hsp_hseq' ) {
871 # replace hmm '.' gap symbol by '-'
872 $data =~ s/\./-/g;
874 $self->element(
875 { 'Name' => $_,
876 'Data' => $data
880 $self->{'_last_hspdata'} = {};
882 if ($type) {
883 if ( $self->_eventHandler->will_handle($type) ) {
884 my $func = sprintf( "end_%s", lc $type );
885 $rc = $self->_eventHandler->$func( $self->{'_reporttype'},
886 $self->{'_values'} );
888 my $lastelem = shift @{ $self->{'_elements'} };
890 elsif ( $MAPPING{$nm} ) {
891 if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
892 my $key = ( keys %{ $MAPPING{$nm} } )[0];
893 $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} }
894 = $self->{'_last_data'};
896 else {
897 $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
899 # print "lastdata is " . $self->{'_last_data'} . "\n";
902 else {
903 $self->debug("unknown nm $nm, ignoring\n");
905 $self->{'_last_data'} = ''; # remove read data if we are at
906 # end of an element
907 $self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
908 return $rc;
911 =head2 element
913 Title : element
914 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
915 Function: Convienence method that calls start_element, characters, end_element
916 Returns : none
917 Args : Hash ref with the keys 'Name' and 'Data'
919 =cut
921 sub element {
922 my ( $self, $data ) = @_;
923 $self->start_element($data);
924 $self->characters($data);
925 $self->end_element($data);
928 =head2 characters
930 Title : characters
931 Usage : $eventgenerator->characters($str)
932 Function: Send a character events
933 Returns : none
934 Args : string
936 =cut
938 sub characters {
939 my ( $self, $data ) = @_;
941 if ( $self->in_element('hsp')
942 && $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o
943 && defined $data->{'Data'} )
945 $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
947 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o );
949 $self->{'_last_data'} = $data->{'Data'};
952 =head2 within_element
954 Title : within_element
955 Usage : if( $eventgenerator->within_element( $element ) ) {}
956 Function: Test if we are within a particular element
957 This is different than 'in' because within can be tested for
958 a whole block
959 Returns : boolean
960 Args : string element name
962 =cut
964 sub within_element {
965 my ( $self, $name ) = @_;
966 return 0
967 if ( !defined $name
968 || !defined $self->{'_elements'}
969 || scalar @{ $self->{'_elements'} } == 0 );
970 foreach ( @{ $self->{'_elements'} } ) {
971 return 1 if ( $_ eq $name );
973 return 0;
976 =head2 in_element
978 Title : in_element
979 Usage : if( $eventgenerator->in_element( $element ) ) {}
980 Function: Test if we are in a particular element
981 This is different than 'within' because 'in' only
982 tests its immediate parent
983 Returns : boolean
984 Args : string element name
986 =cut
988 sub in_element {
989 my ( $self, $name ) = @_;
990 return 0 if !defined $self->{'_elements'}->[0];
991 return ( $self->{'_elements'}->[0] eq $name );
994 =head2 start_document
996 Title : start_document
997 Usage : $eventgenerator->start_document
998 Function: Handle a start document event
999 Returns : none
1000 Args : none
1002 =cut
1004 sub start_document {
1005 my ($self) = @_;
1006 $self->{'_lasttype'} = '';
1007 $self->{'_values'} = {};
1008 $self->{'_result'} = undef;
1009 $self->{'_elements'} = [];
1012 =head2 end_document
1014 Title : end_document
1015 Usage : $eventgenerator->end_document
1016 Function: Handles an end document event
1017 Returns : Bio::Search::Result::ResultI object
1018 Args : none
1020 =cut
1022 sub end_document {
1023 my ($self) = @_;
1024 return $self->{'_result'};
1027 =head2 result_count
1029 Title : result_count
1030 Usage : my $count = $searchio->result_count
1031 Function: Returns the number of results processed
1032 Returns : interger
1033 Args : none
1035 =cut
1037 sub result_count {
1038 my $self = shift;
1039 return $self->{'_result_count'};