Sorry about this, analyzing new output files is leading to minor changes in regex's
[bioperl-live.git] / Bio / SearchIO / hmmer3.pm
blob9f7fc91ddcaf78a1137bf14e3eba84bee3340312
2 # BioPerl module for Bio::SearchIO::hmmer3
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Thomas Sharpton <thomas.sharpton@gmail.com>
8 # Copyright Thomas Sharpton
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SearchIO::hmmer3
18 =head1 SYNOPSIS
20 use Bio::SearchIO;
22 my $searchio = Bio::SearchIO->new(
23 -format => 'hmmer',
24 -version => 3,
25 -file => 'hmmsearch.out'
28 my $result = $searchio->next_result;
29 my $hit = $result->next_hit;
30 print $hit->name, $hit->description, $hit->significance,
31 $hit->score, "\n";
33 my $hsp = $hit->next_hsp;
34 print $hsp->start('hit'), $hsp->end('hit'), $hsp->start('query'),
35 $hsp->end('query'), "\n";
37 =head1 DESCRIPTION
39 Code to parse output from hmmsearch, hmmscan, and nhmmer, compatible with
40 both version 2 and version 3 of the HMMER package from L<http://hmmer.org>.
42 =head1 FEEDBACK
44 =head2 Mailing Lists
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to
48 the Bioperl mailing list. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 =head2 Support
55 Please direct usage questions or support issues to the mailing list:
57 L<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
64 =head2 Reporting Bugs
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 of the bugs and their resolution. Bug reports can be submitted via
68 the web:
70 https://redmine.open-bio.org/projects/bioperl/
72 =head1 AUTHOR - Thomas Sharpton
74 Email thomas.sharpton@gmail.com
76 Describe contact details here
78 =head1 CONTRIBUTORS
80 Additional contributors names and emails here
82 briano at bioteam.net
84 =head1 APPENDIX
86 The rest of the documentation details each of the object methods.
87 Internal methods are usually preceded with a _
89 =cut
91 # Let the code begin...
93 package Bio::SearchIO::hmmer3;
95 use strict;
96 use Data::Dumper;
97 use Bio::Factory::ObjectFactory;
98 use vars qw(%MAPPING %MODEMAP);
99 use base qw(Bio::SearchIO::hmmer);
101 BEGIN {
103 # mapping of HMMER items to Bioperl hash keys
104 %MODEMAP = (
105 'HMMER_Output' => 'result',
106 'Hit' => 'hit',
107 'Hsp' => 'hsp'
110 %MAPPING = (
111 'Hsp_bit-score' => 'HSP-bits',
112 'Hsp_score' => 'HSP-score',
113 'Hsp_evalue' => 'HSP-evalue',
114 'Hsp_query-from' => 'HSP-query_start',
115 'Hsp_query-to' => 'HSP-query_end',
116 'Hsp_query-strand' => 'HSP-query_strand',
117 'Hsp_hit-from' => 'HSP-hit_start',
118 'Hsp_hit-to' => 'HSP-hit_end',
119 'Hsp_hit-strand' => 'HSP-hit_strand',
120 'Hsp_positive' => 'HSP-conserved',
121 'Hsp_identity' => 'HSP-identical',
122 'Hsp_gaps' => 'HSP-hsp_gaps',
123 'Hsp_hitgaps' => 'HSP-hit_gaps',
124 'Hsp_querygaps' => 'HSP-query_gaps',
125 'Hsp_qseq' => 'HSP-query_seq',
126 'Hsp_hseq' => 'HSP-hit_seq',
127 'Hsp_midline' => 'HSP-homology_seq',
128 'Hsp_align-len' => 'HSP-hsp_length',
129 'Hsp_query-frame' => 'HSP-query_frame',
130 'Hsp_hit-frame' => 'HSP-hit_frame',
132 'Hit_id' => 'HIT-name',
133 'Hit_len' => 'HIT-length',
134 'Hit_accession' => 'HIT-accession',
135 'Hit_desc' => 'HIT-description',
136 'Hit_signif' => 'HIT-significance',
137 'Hit_score' => 'HIT-score',
139 'HMMER_program' => 'RESULT-algorithm_name',
140 'HMMER_version' => 'RESULT-algorithm_version',
141 'HMMER_query-def' => 'RESULT-query_name',
142 'HMMER_query-len' => 'RESULT-query_length',
143 'HMMER_query-acc' => 'RESULT-query_accession',
144 'HMMER_querydesc' => 'RESULT-query_description',
145 'HMMER_hmm' => 'RESULT-hmm_name',
146 'HMMER_seqfile' => 'RESULT-sequence_file',
147 'HMMER_db' => 'RESULT-database_name',
151 =head2 next_result
153 Title : next_result
154 Usage : my $hit = $searchio->next_result;
155 Function: Returns the next Result from a search
156 Returns : Bio::Search::Result::ResultI object
157 Args : none
159 =cut
161 sub next_result {
162 my ($self) = @_;
163 my $seentop = 0; # Placeholder for when we deal with multi-query reports
164 my $reporttype;
165 my ( $last, @hit_list, @hsp_list, %hspinfo, %hitinfo, %domaincounter );
166 local $/ = "\n";
167 local $_;
169 my $verbose = $self->verbose; # cache for speed? JES's idea in hmmer.pm
170 $self->start_document();
171 local ($_);
173 # This is here to ensure that next_result doesn't produce infinite loop
174 if ( !defined( $_ = $self->_readline ) ) {
175 return undef;
177 else {
178 $self->_pushback($_);
181 # Regex goes here for HMMER3
182 # Start with hmmsearch processing
183 while ( defined( $_ = $self->_readline ) ) {
184 my $lineorig = $_;
185 chomp;
187 # Grab the program name
188 if ( $_ =~ m/^\#\s(\S+)\s\:\:\s/ ) {
189 my $prog = $1;
191 # TO DO: customize the above regex to adapt to other
192 # program types (hmmscan, etc)
193 $self->start_element( { 'Name' => 'HMMER_Output' } );
194 $self->{'_result_count'}++; #Might need to move to another block
195 $self->element(
196 { 'Name' => 'HMMER_program',
197 'Data' => uc($prog)
202 # Get the HMMER package version and release date
203 elsif ( $_ =~ m/^\#\sHMMER\s+(\S+)\s+\((.+)\)/ ) {
204 my $version = $1;
205 my $versiondate = $2;
206 $self->{'_hmmidline'} = $_;
207 $self->element(
208 { 'Name' => 'HMMER_version',
209 'Data' => $version
214 # Get the query info
215 elsif ( $_ =~ /^\#\squery \w+ file\:\s+(\S+)/ ) {
216 if ( $self->{'_reporttype'} eq 'HMMSEARCH'
217 || $self->{'_reporttype'} eq 'NHMMER' )
219 $self->{'_hmmfileline'} = $lineorig;
220 $self->element(
221 { 'Name' => 'HMMER_hmm',
222 'Data' => $1
226 elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
227 $self->{'_hmmseqline'} = $lineorig;
228 $self->element(
229 { 'Name' => 'HMMER_seqfile',
230 'Data' => $1
236 # If this is a report without alignments
237 elsif ( $_ =~ m/^\#\sshow\salignments\sin\soutput/ ) {
238 $self->{'_alnreport'} = 0;
241 # Get the database info
242 elsif ( $_ =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/ ) {
244 if ( $self->{'_reporttype'} eq 'HMMSEARCH'
245 || $self->{'_reporttype'} eq 'NHMMER' )
247 $self->{'_hmmseqline'} = $lineorig;
248 $self->element(
249 { 'Name' => 'HMMER_seqfile',
250 'Data' => $1
254 elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
255 $self->{'_hmmfileline'} = $lineorig;
256 $self->element(
257 { 'Name' => 'HMMER_hmm',
258 'Data' => $1
264 # Get query data
265 elsif ( $_ =~ s/^Query:\s+// ) {
267 # TO DO: code to deal with multi-query report
268 unless (s/\s+\[[L|M]\=(\d+)\]$//) {
269 warn "Error parsing length for query, offending line $_\n";
270 exit(0);
272 my $querylen = $1;
273 $self->element(
274 { 'Name' => 'HMMER_query-len',
275 'Data' => $querylen
278 $self->element(
279 { 'Name' => 'HMMER_query-def',
280 'Data' => $_
285 # Get Accession data
286 elsif ( $_ =~ s/^Accession:\s+// ) {
287 s/\s+$//;
288 $self->element(
289 { 'Name' => 'HMMER_query-acc',
290 'Data' => $_
295 # Get description data
296 elsif ( $_ =~ s/^Description:\s+// ) {
297 s/\s+$//;
298 $self->element(
299 { 'Name' => 'HMMER_querydesc',
300 'Data' => $_
305 # hmmsearch, nhmmer, and hmmscan-specific formatting here
306 elsif (
307 defined $self->{'_reporttype'}
308 && ( $self->{'_reporttype'} eq 'HMMSEARCH'
309 || $self->{'_reporttype'} eq 'HMMSCAN'
310 || $self->{'_reporttype'} eq 'NHMMER' )
313 # Complete sequence table data above inclusion threshold,
314 # hmmsearch or hmmscan
315 if ( $_ =~ m/Scores for complete sequence/ ) {
316 while ( defined( $_ = $self->_readline ) ) {
317 if ( $_ =~ m/inclusion threshold/
318 || m/Domain( and alignment)? annotation for each/
319 || m/\[No hits detected/
320 || m!^//! )
322 $self->_pushback($_);
323 last;
326 # Grab table data
327 next if ( m/\-\-\-/ || m/^\s+E-value\s+score/ || m/^$/ );
328 my ($eval_full, $score_full, $bias_full, $eval_best,
329 $score_best, $bias_best, $exp, $n,
330 $hitid, $desc, @hitline
332 @hitline = split( " ", $_ );
333 $eval_full = shift @hitline;
334 $score_full = shift @hitline;
335 $bias_full = shift @hitline;
336 $eval_best = shift @hitline;
337 $score_best = shift @hitline;
338 $bias_best = shift @hitline;
339 $exp = shift @hitline;
340 $n = shift @hitline;
341 $hitid = shift @hitline;
342 $desc = join " ", @hitline;
344 if ( !defined($desc) ) {
345 $desc = "";
347 push @hit_list,
348 [ $hitid, $desc, $eval_full, $score_full ];
349 $hitinfo{$hitid} = $#hit_list;
353 # nhmmer
354 if ( /Scores for complete hits/ ) {
355 while ( defined( $_ = $self->_readline ) ) {
357 if ( /inclusion threshold/
358 || /Annotation for each hit/
359 || /\[No hits detected/
360 || m!^//! )
362 $self->_pushback($_);
363 last;
366 # Grab table data
367 next if ( /\-\-\-/ || /^\s+E-value\s+score/ || /^$/ );
368 my ($eval, $score, $bias, $hitid,
369 $start, $end, $desc, @hitline
371 @hitline = split( " ", $_ );
372 $eval = shift @hitline;
373 $score = shift @hitline;
374 $bias = shift @hitline;
375 $hitid = shift @hitline;
376 $start = shift @hitline;
377 $end = shift @hitline;
378 $desc = join ' ', @hitline;
380 $desc = '' if ( !defined($desc) );
382 push @hit_list, [ $hitid, $desc, $eval, $score ];
383 $hitinfo{$hitid} = $#hit_list;
387 # Complete sequence table data below inclusion threshold
388 # not currently fully implemented
389 elsif ( /inclusion threshold/ ) {
390 while ( defined( $_ = $self->_readline ) ) {
391 if ( /Domain( and alignment)? annotation for each/
392 || /Internal pipeline statistics summary/
393 || /Annotation for each hit\s+\(and alignments\)/
396 $self->_pushback($_);
397 last;
399 next if ( $_ =~ m/^$/ );
400 my ($eval_full, $score_full, $bias_full, $eval_best,
401 $score_best, $bias_best, $exp, $n,
402 $hitid, $desc, @hitline
404 @hitline = split( " ", $_ );
405 $eval_full = shift @hitline;
406 $score_full = shift @hitline;
407 $bias_full = shift @hitline;
408 $eval_best = shift @hitline;
409 $score_best = shift @hitline;
410 $bias_best = shift @hitline;
411 $exp = shift @hitline;
412 $n = shift @hitline;
413 $hitid = shift @hitline;
414 $desc = join " ", @hitline;
416 $hitinfo{$hitid} = "below_inclusion";
420 # Domain annotation for each sequence table data, hmmscan
421 elsif ( /Domain( and alignment)? annotation for each/ ) {
422 @hsp_list = (); # Here for multi-query reports
423 my $name;
425 while ( defined( $_ = $self->_readline ) ) {
426 if ( /Internal pipeline statistics/
427 || /\[No targets detected/ )
429 $self->_pushback($_);
430 last;
432 if ( $_ =~ m/^\>\>\s(.*?)\s+/ ) {
433 $name = $1;
435 # Skip hits below inclusion threshold
436 next if ( $hitinfo{$name} eq "below_inclusion" );
437 $domaincounter{$name} = 0;
439 while ( defined( $_ = $self->_readline ) ) {
441 # Grab table data for sequence
442 if ( $_ =~ m/Internal pipeline statistics/
443 || $_ =~ m/^\>\>/ )
445 $self->_pushback($_);
446 last;
448 if ( $_ =~ m/Alignments for each domain/ ) {
449 $self->_pushback($_);
450 last;
452 if ( $_ =~ m/^\s+\#\s+score/
453 || $_ =~ m/^\s\-\-\-\s+/
456 # $_ =~ m/^\>\>/ ||
457 $_ =~ m/^$/
460 next;
463 # Grab hsp data from table, push into @hsp;
464 if (my ($domain_num, $score, $bias,
465 $ceval, $ieval, $hmmstart,
466 $hmmstop, $qalistart, $qalistop,
467 $envstart, $envstop, $envbound,
468 $acc
470 = m|^\s+(\d+)\s\!*\?*\s+ # domain number
471 (\S+)\s+(\S+)\s+ # score, bias
472 (\S+)\s+(\S+)\s+ # c-eval, i-eval
473 (\d+)\s+(\d+).+? # hmm start, stop
474 (\d+)\s+(\d+).+? # query start, stop
475 (\d+)\s+(\d+).+? # env start, stop
476 (\S+) # Accession
477 \s*$|ox
480 # Keep it simple for now. let's customize later
481 my @vals = (
482 $hmmstart, $hmmstop,
483 $qalistart, $qalistop,
484 $score, $ceval,
485 '', '',
488 my $info = $hit_list[ $hitinfo{$name} ];
489 if ( !defined $info ) {
490 $self->warn(
491 "Incomplete sequence information; can't find $name, hitinfo says $hitinfo{$name}\n"
493 next;
495 $domaincounter{$name}++;
496 my $hsp_key
497 = $name . "_" . $domaincounter{$name};
498 push @hsp_list, [ $name, @vals ];
499 $hspinfo{$hsp_key} = $#hsp_list;
501 else {
502 print "missed this line: $_\n";
506 elsif ( /Alignments for each domain/ ) {
507 my $domain_count = 0;
509 #line counter
510 my $count = 0;
512 # There's an optional block, so we sometimes need to
513 # count to 3, and sometimes to 4.
514 my $max_count = 3;
515 my $lastdomain;
516 my $hsp;
517 my ( $hline, $midline, $qline );
519 while ( defined( $_ = $self->_readline ) ) {
520 if ( $_ =~ m/^\>\>/
521 || $_ =~ m/Internal pipeline statistics/ )
523 $self->_pushback($_);
524 last;
526 elsif ($hitinfo{$name} eq "below_inclusion"
527 || $_ =~ m/^$/ )
529 next;
531 elsif ( $_ =~ /\s\s\=\=\sdomain\s(\d+)\s+/ ) {
532 my $domainnum = $1;
533 $count = 0;
534 my $key = $name . "_" . $domainnum;
535 $hsp = $hsp_list[ $hspinfo{$key} ];
536 $hline = $$hsp[-3];
537 $midline = $$hsp[-2];
538 $qline = $$hsp[-1];
539 $lastdomain = $name;
542 # model data track, some reports don't have
543 elsif ( $_ =~ m/\s+\S+\sCS$/ ) {
544 my $modeltrack = $_;
545 $max_count++;
546 $count++;
547 next;
549 elsif ( $count == $max_count - 3 ) {
551 # hit sequence
552 my @data = split( " ", $_ );
553 my $seq = $data[-2];
554 $hline .= $seq;
555 $count++;
556 next;
558 elsif ( $count == $max_count - 2 ) {
560 # conservation track
561 # storage isn't quite right - need to remove
562 # leading/lagging whitespace while preserving
563 # gap data (latter isn't done, former is)
564 $_ =~ s/^\s+//;
565 $_ =~ s/\s+$//;
566 $midline .= $_;
567 $count++;
568 next;
570 elsif ( $count == $max_count - 1 ) {
572 # query track
573 my @data = split( " ", $_ );
574 my $seq = $data[-2];
575 $qline .= $seq;
576 $count++;
577 next;
579 elsif ( $count == $max_count ) {
581 #pval track
582 my $pvals = $_;
583 $count = 0;
584 $max_count = 3;
585 $$hsp[-3] = $hline;
586 $$hsp[-2] = $midline;
587 $$hsp[-1] = $qline;
588 next;
590 else {
591 print "missed $_\n";
598 # Annotation for each hit, nhmmer
599 # This code is currently incomplete, the alignment strings
600 # are not being captured
601 elsif ( /Annotation for each hit\s+\(and alignments\)/ ) {
602 @hsp_list = ();
603 my $name;
605 while ( defined( $_ = $self->_readline ) ) {
606 if ( $_ =~ m/Internal pipeline statistics/
607 || m/\[No targets detected/ )
609 $self->_pushback($_);
610 last;
612 if ( /^>>\s+(\S+)\s+/ ) {
613 $name = $1;
615 while ( defined( $_ = $self->_readline ) ) {
617 if ( $_ =~ m/Internal pipeline statistics/
618 || $_ =~ m/^>>/ )
620 $self->_pushback($_);
621 last;
623 elsif (
624 $_ =~ /^\s+#\s+score/
625 || $_ =~ /^\s+------\s+/
626 || $_ =~ /^>>/
627 || $_ =~ /^$/
628 || $_ =~ /^\s+Alignment:/
629 || $_ =~ /^\s+score:/
630 || $_ =~ /^\s+score\s+bias/
631 || $_ =~ /^\s+\S+\s+\d+\s+([\s+.gatc-]+)/i # Alignment, line 1
632 || $_ =~ /^\s{20,}([\s+gatc-]+)/i # Alignment, line 2
633 || $_ =~ /^\s+$name\s+\d+\s+([\s+gatc-]+)/i # Alignment, line 3
634 || $_ =~ /^\s+[\d.\*]+/ # Alignment, line 4
637 next;
639 elsif (
640 /^\s+[!?]\s+(\S+)\s+
641 (\S+)\s+(\S+)\s+
642 (\d+)\s+(\d+)\s+[.\[\]]*\s+
643 (\d+)\s+(\d+)\s+[.\[\]]*\s+
644 (\d+)\s+(\d+)\s+[.\[\]]*\s+
645 (\d+)\s+(\S+).*$/ox
648 my ($score, $bias, $eval,
649 $hmmstart, $hmmstop, $hitstart,
650 $hitstop, $envstart, $envstop,
651 $length, $acc
654 $1, $2, $3, $4, $5, $6,
655 $7, $8, $9, $10, $11
658 my @vals = (
659 $hitstart, $hitstop,
660 $hmmstart, $hmmstop,
661 $score, $eval,
662 '', '',
665 my $info = $hit_list[ $hitinfo{$name} ];
666 if ( !defined $info ) {
667 $self->warn(
668 "Incomplete information: can't find HSP $name in list of hits\n"
670 next;
672 $domaincounter{$name}++;
673 my $hsp_key = $name . "_" . $domaincounter{$name};
674 push @hsp_list, [ $name, @vals ];
675 $hspinfo{$hsp_key} = $#hsp_list;
677 else {
678 print "Missed this line: $_\n";
685 elsif ( m/Internal pipeline statistics/ || m!^//! ) {
687 # If within hit, hsp close;
688 if ( $self->within_element('hit') ) {
689 if ( $self->within_element('hsp') ) {
690 $self->end_element( { 'Name' => 'Hsp' } );
692 $self->end_element( { 'Name' => 'Hit' } );
695 # Grab summary statistics of run
696 while ( defined( $_ = $self->_readline ) ) {
697 last if ( $_ =~ m/^\/\/$/ );
700 # Do a lot of processing of hits and hsps here
701 while ( my $hit = shift @hit_list ) {
702 my $hit_name = shift @$hit;
703 my $hit_desc = shift @$hit;
704 my $hit_signif = shift @$hit;
705 my $hit_score = shift @$hit;
706 my $num_domains = $domaincounter{$hit_name} || 0;
708 $self->start_element( { 'Name' => 'Hit' } );
709 $self->element(
710 { 'Name' => 'Hit_id',
711 'Data' => $hit_name
714 $self->element(
715 { 'Name' => 'Hit_desc',
716 'Data' => $hit_desc
719 $self->element(
720 { 'Name' => 'Hit_signif',
721 'Data' => $hit_signif
724 $self->element(
725 { 'Name' => 'Hit_score',
726 'Data' => $hit_score
730 for my $i ( 1 .. $num_domains ) {
731 my $key = $hit_name . "_" . $i;
732 my $hsp = $hsp_list[ $hspinfo{$key} ];
733 if ( defined $hsp ) {
734 my $hsp_name = shift @$hsp;
735 $self->start_element( { 'Name' => 'Hsp' } );
736 $self->element(
737 { 'Name' => 'Hsp_identity',
738 'Data' => 0
741 $self->element(
742 { 'Name' => 'Hsp_positive',
743 'Data' => 0
746 $self->element(
747 { 'Name' => 'Hsp_hit-from',
748 'Data' => shift @$hsp
751 $self->element(
752 { 'Name' => 'Hsp_hit-to',
753 'Data' => shift @$hsp
756 $self->element(
757 { 'Name' => 'Hsp_query-from',
758 'Data' => shift @$hsp
761 $self->element(
762 { 'Name' => 'Hsp_query-to',
763 'Data' => shift @$hsp
766 $self->element(
767 { 'Name' => 'Hsp_score',
768 'Data' => shift @$hsp
771 $self->element(
772 { 'Name' => 'Hsp_evalue',
773 'Data' => shift @$hsp
776 $self->element(
777 { 'Name' => 'Hsp_hseq',
778 'Data' => shift @$hsp
781 $self->element(
782 { 'Name' => 'Hsp_midline',
783 'Data' => shift @$hsp
786 $self->element(
787 { 'Name' => 'Hsp_qseq',
788 'Data' => shift @$hsp
792 # Only nhmmer output has strand information
793 if ( $self->{'_reporttype'} eq 'NHMMER' ) {
794 my $hstart = $self->get_from_element('HSP-hit_start');
795 my $hend = $self->get_from_element('HSP-hit_end');
796 my $hstrand = ( $hstart < $hend ) ? 1 : -1;
798 my $qstart = $self->get_from_element('HSP-query_start');
799 my $qend = $self->get_from_element('HSP-query_end');
800 my $qstrand = ( $qstart < $qend ) ? 1 : -1;
802 $self->element(
803 { 'Name' => 'Hsp_query-strand',
804 'Data' => $qstrand
807 $self->element(
808 { 'Name' => 'Hsp_hit-strand',
809 'Data' => $hstrand
814 $self->end_element( { 'Name' => 'Hsp' } );
817 $self->end_element( { 'Name' => 'Hit' } );
819 @hit_list = ();
820 %hitinfo = ();
821 last;
824 else {
825 print "Missed line: $_\n";
826 $self->debug($_);
828 $last = $_;
830 $self->end_element( { 'Name' => 'HMMER_Output' } );
831 my $result = $self->end_document();
832 return $result;
835 =head2 start_element
837 Title : start_element
838 Usage : $eventgenerator->start_element
839 Function: Handles a start event
840 Returns : none
841 Args : hashref with at least 2 keys 'Data' and 'Name'
843 =cut
845 sub start_element {
847 my ( $self, $data ) = @_;
849 # we currently don't care about attributes
850 my $nm = $data->{'Name'};
851 my $type = $MODEMAP{$nm};
852 if ($type) {
853 if ( $self->_eventHandler->will_handle($type) ) {
854 my $func = sprintf( "start_%s", lc $type );
855 $self->_eventHandler->$func( $data->{'Attributes'} );
857 unshift @{ $self->{'_elements'} }, $type;
859 if ( defined $type
860 && $type eq 'result' )
862 $self->{'_values'} = {};
863 $self->{'_result'} = undef;
867 =head2 end_element
869 Title : end_element
870 Usage : $eventgeneartor->end_element
871 Function: Handles and end element event
872 Returns : none
873 Args : hashref with at least 2 keys 'Data' and 'Name'
875 =cut
877 sub end_element {
879 my ( $self, $data ) = @_;
880 my $nm = $data->{'Name'};
881 my $type = $MODEMAP{$nm};
882 my $rc;
884 if ( $nm eq 'HMMER_program' ) {
885 if ( $self->{'_last_data'} =~ /(N?HMM\S+)/i ) {
886 $self->{'_reporttype'} = uc $1;
890 # Hsp are sort of weird, in that they end when another
891 # object begins so have to detect this in end_element for now
892 if ( $nm eq 'Hsp' ) {
893 foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
894 my $data = $self->{'_last_hspdata'}->{$_};
895 if ( $data && $_ eq 'Hsp_hseq' ) {
897 # replace hmm '.' gap symbol by '-'
898 $data =~ s/\./-/g;
900 $self->element(
901 { 'Name' => $_,
902 'Data' => $data
906 $self->{'_last_hspdata'} = {};
908 if ($type) {
909 if ( $self->_eventHandler->will_handle($type) ) {
910 my $func = sprintf( "end_%s", lc $type );
911 $rc = $self->_eventHandler->$func( $self->{'_reporttype'},
912 $self->{'_values'} );
914 my $lastelem = shift @{ $self->{'_elements'} };
916 elsif ( $MAPPING{$nm} ) {
917 if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
918 my $key = ( keys %{ $MAPPING{$nm} } )[0];
919 $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} }
920 = $self->{'_last_data'};
922 else {
923 $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
925 # print "lastdata is " . $self->{'_last_data'} . "\n";
928 else {
929 $self->debug("unknown nm $nm, ignoring\n");
931 $self->{'_last_data'} = ''; # remove read data if we are at
932 # end of an element
933 $self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
934 return $rc;
937 =head2 element
939 Title : element
940 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
941 Function: Convenience method that calls start_element, characters, end_element
942 Returns : none
943 Args : Hash ref with the keys 'Name' and 'Data'
945 =cut
947 sub element {
948 my ( $self, $data ) = @_;
949 $self->start_element($data);
950 $self->characters($data);
951 $self->end_element($data);
954 =head2 get_from_element
956 Title : get_from_element
957 Usage : $self->get_from_element('HSP-hit_start');
958 Function: Convenience method to retrieve data from '_values' hash
959 Returns : string
960 Args : key
962 =cut
964 sub get_from_element {
965 my ($self,$key) = @_;
966 my $values = $self->{_values};
967 $values->{$key};
970 =head2 characters
972 Title : characters
973 Usage : $eventgenerator->characters($str)
974 Function: Send a character events
975 Returns : none
976 Args : string
978 =cut
980 sub characters {
981 my ( $self, $data ) = @_;
983 if ( $self->in_element('hsp')
984 && $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/o
985 && defined $data->{'Data'} )
987 $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
989 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o );
991 $self->{'_last_data'} = $data->{'Data'};
994 =head2 within_element
996 Title : within_element
997 Usage : if( $eventgenerator->within_element( $element ) ) {}
998 Function: Test if we are within a particular element
999 This is different than 'in' because within can be tested for
1000 a whole block
1001 Returns : boolean
1002 Args : string element name
1004 =cut
1006 sub within_element {
1007 my ( $self, $name ) = @_;
1008 return 0
1009 if ( !defined $name
1010 || !defined $self->{'_elements'}
1011 || scalar @{ $self->{'_elements'} } == 0 );
1012 foreach ( @{ $self->{'_elements'} } ) {
1013 return 1 if ( $_ eq $name );
1015 return 0;
1018 =head2 in_element
1020 Title : in_element
1021 Usage : if( $eventgenerator->in_element( $element ) ) {}
1022 Function: Test if we are in a particular element
1023 This is different than 'within' because 'in' only
1024 tests its immediate parent
1025 Returns : boolean
1026 Args : string element name
1028 =cut
1030 sub in_element {
1031 my ( $self, $name ) = @_;
1032 return 0 if !defined $self->{'_elements'}->[0];
1033 return ( $self->{'_elements'}->[0] eq $name );
1036 =head2 start_document
1038 Title : start_document
1039 Usage : $eventgenerator->start_document
1040 Function: Handle a start document event
1041 Returns : none
1042 Args : none
1044 =cut
1046 sub start_document {
1047 my ($self) = @_;
1048 $self->{'_lasttype'} = '';
1049 $self->{'_values'} = {};
1050 $self->{'_result'} = undef;
1051 $self->{'_elements'} = [];
1054 =head2 end_document
1056 Title : end_document
1057 Usage : $eventgenerator->end_document
1058 Function: Handles an end document event
1059 Returns : Bio::Search::Result::ResultI object
1060 Args : none
1062 =cut
1064 sub end_document {
1065 my ($self) = @_;
1066 return $self->{'_result'};
1069 =head2 result_count
1071 Title : result_count
1072 Usage : my $count = $searchio->result_count
1073 Function: Returns the number of results processed
1074 Returns : interger
1075 Args : none
1077 =cut
1079 sub result_count {
1080 my $self = shift;
1081 return $self->{'_result_count'};