Hmmer3: Final fix for Bug #3369. Eliminated the removal of
[bioperl-live.git] / Bio / SearchIO / hmmer3.pm
blobca53516fc1ba51919126eec5ca15da0cd6dab22b
2 # BioPerl module for Bio::SearchIO::hmmer3
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Thomas Sharpton <thomas.sharpton@gmail.com>
8 # Copyright Thomas Sharpton
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SearchIO::hmmer3
18 =head1 SYNOPSIS
20 use Bio::SearchIO;
22 my $searchio = Bio::SearchIO->new(
23 -format => 'hmmer',
24 -version => 3,
25 -file => 'hmmsearch.out'
28 my $result = $searchio->next_result;
29 my $hit = $result->next_hit;
30 print $hit->name, $hit->description, $hit->significance,
31 $hit->score, "\n";
33 my $hsp = $hit->next_hsp;
34 print $hsp->start('hit'), $hsp->end('hit'), $hsp->start('query'),
35 $hsp->end('query'), "\n";
37 =head1 DESCRIPTION
39 Code to parse output from hmmsearch, hmmscan, and nhmmer, compatible with
40 both version 2 and version 3 of the HMMER package from L<http://hmmer.org>.
42 =head1 FEEDBACK
44 =head2 Mailing Lists
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to
48 the Bioperl mailing list. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 =head2 Support
55 Please direct usage questions or support issues to the mailing list:
57 L<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
64 =head2 Reporting Bugs
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 of the bugs and their resolution. Bug reports can be submitted via
68 the web:
70 https://redmine.open-bio.org/projects/bioperl/
72 =head1 AUTHOR - Thomas Sharpton
74 Email thomas.sharpton@gmail.com
76 Describe contact details here
78 =head1 CONTRIBUTORS
80 Additional contributors names and emails here
82 briano at bioteam.net
84 =head1 APPENDIX
86 The rest of the documentation details each of the object methods.
87 Internal methods are usually preceded with a _
89 =cut
91 # Let the code begin...
93 package Bio::SearchIO::hmmer3;
95 use strict;
96 use Data::Dumper;
97 use Bio::Factory::ObjectFactory;
98 use Bio::Tools::IUPAC;
99 use vars qw(%MAPPING %MODEMAP);
100 use base qw(Bio::SearchIO::hmmer);
102 BEGIN {
104 # mapping of HMMER items to Bioperl hash keys
105 %MODEMAP = (
106 'HMMER_Output' => 'result',
107 'Hit' => 'hit',
108 'Hsp' => 'hsp'
111 %MAPPING = (
112 'Hsp_bit-score' => 'HSP-bits',
113 'Hsp_score' => 'HSP-score',
114 'Hsp_evalue' => 'HSP-evalue',
115 'Hsp_query-from' => 'HSP-query_start',
116 'Hsp_query-to' => 'HSP-query_end',
117 'Hsp_query-strand' => 'HSP-query_strand',
118 'Hsp_hit-from' => 'HSP-hit_start',
119 'Hsp_hit-to' => 'HSP-hit_end',
120 'Hsp_hit-strand' => 'HSP-hit_strand',
121 'Hsp_positive' => 'HSP-conserved',
122 'Hsp_identity' => 'HSP-identical',
123 'Hsp_gaps' => 'HSP-hsp_gaps',
124 'Hsp_hitgaps' => 'HSP-hit_gaps',
125 'Hsp_querygaps' => 'HSP-query_gaps',
126 'Hsp_qseq' => 'HSP-query_seq',
127 'Hsp_hseq' => 'HSP-hit_seq',
128 'Hsp_midline' => 'HSP-homology_seq',
129 'Hsp_pline' => 'HSP-pp_seq',
130 'Hsp_align-len' => 'HSP-hsp_length',
131 'Hsp_query-frame' => 'HSP-query_frame',
132 'Hsp_hit-frame' => 'HSP-hit_frame',
134 'Hit_id' => 'HIT-name',
135 'Hit_len' => 'HIT-length',
136 'Hit_accession' => 'HIT-accession',
137 'Hit_desc' => 'HIT-description',
138 'Hit_signif' => 'HIT-significance',
139 'Hit_score' => 'HIT-score',
141 'HMMER_program' => 'RESULT-algorithm_name',
142 'HMMER_version' => 'RESULT-algorithm_version',
143 'HMMER_query-def' => 'RESULT-query_name',
144 'HMMER_query-len' => 'RESULT-query_length',
145 'HMMER_query-acc' => 'RESULT-query_accession',
146 'HMMER_querydesc' => 'RESULT-query_description',
147 'HMMER_hmm' => 'RESULT-hmm_name',
148 'HMMER_seqfile' => 'RESULT-sequence_file',
149 'HMMER_db' => 'RESULT-database_name',
153 =head2 next_result
155 Title : next_result
156 Usage : my $hit = $searchio->next_result;
157 Function: Returns the next Result from a search
158 Returns : Bio::Search::Result::ResultI object
159 Args : none
161 =cut
163 sub next_result {
164 my ($self) = @_;
165 my $seentop = 0; # Placeholder for when we deal with multi-query reports
166 my $reporttype;
167 my ( $last, @hit_list, @hsp_list, %hspinfo, %hitinfo, %domaincounter );
168 local $/ = "\n";
169 local $_;
171 my @ambiguous_nt = keys %Bio::Tools::IUPAC::IUB;
172 my $ambiguous_nt = join '', @ambiguous_nt;
174 my $verbose = $self->verbose; # cache for speed? JES's idea in hmmer.pm
175 $self->start_document();
176 local ($_);
178 # This is here to ensure that next_result doesn't produce infinite loop
179 if ( !defined( $_ = $self->_readline ) ) {
180 return undef;
182 else {
183 $self->_pushback($_);
186 # Regex goes here for HMMER3
187 # Start with hmmsearch processing
188 while ( defined( $_ = $self->_readline ) ) {
189 my $lineorig = $_;
190 chomp;
192 # Grab the program name
193 if ( $_ =~ m/^\#\s(\S+)\s\:\:\s/ ) {
194 my $prog = $1;
196 # TO DO: customize the above regex to adapt to other
197 # program types (hmmscan, etc)
198 $self->start_element( { 'Name' => 'HMMER_Output' } );
199 $self->{'_result_count'}++; #Might need to move to another block
200 $self->element(
201 { 'Name' => 'HMMER_program',
202 'Data' => uc($prog)
207 # Get the HMMER package version and release date
208 elsif ( $_ =~ m/^\#\sHMMER\s+(\S+)\s+\((.+)\)/ ) {
209 my $version = $1;
210 my $versiondate = $2;
211 $self->{'_hmmidline'} = $_;
212 $self->element(
213 { 'Name' => 'HMMER_version',
214 'Data' => $version
219 # Get the query info
220 elsif ( $_ =~ /^\#\squery \w+ file\:\s+(\S+)/ ) {
221 if ( $self->{'_reporttype'} eq 'HMMSEARCH'
222 || $self->{'_reporttype'} eq 'NHMMER' )
224 $self->{'_hmmfileline'} = $lineorig;
225 $self->element(
226 { 'Name' => 'HMMER_hmm',
227 'Data' => $1
231 elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
232 $self->{'_hmmseqline'} = $lineorig;
233 $self->element(
234 { 'Name' => 'HMMER_seqfile',
235 'Data' => $1
241 # If this is a report without alignments
242 elsif ( $_ =~ m/^\#\sshow\salignments\sin\soutput/ ) {
243 $self->{'_alnreport'} = 0;
246 # Get the database info
247 elsif ( $_ =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/ ) {
249 if ( $self->{'_reporttype'} eq 'HMMSEARCH'
250 || $self->{'_reporttype'} eq 'NHMMER' )
252 $self->{'_hmmseqline'} = $lineorig;
253 $self->element(
254 { 'Name' => 'HMMER_seqfile',
255 'Data' => $1
259 elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
260 $self->{'_hmmfileline'} = $lineorig;
261 $self->element(
262 { 'Name' => 'HMMER_hmm',
263 'Data' => $1
269 # Get query data
270 elsif ( $_ =~ s/^Query:\s+// ) {
272 # TO DO: code to deal with multi-query report
273 unless (s/\s+\[[L|M]\=(\d+)\]$//) {
274 warn "Error parsing length for query, offending line $_\n";
275 exit(0);
277 my $querylen = $1;
278 $self->element(
279 { 'Name' => 'HMMER_query-len',
280 'Data' => $querylen
283 $self->element(
284 { 'Name' => 'HMMER_query-def',
285 'Data' => $_
290 # Get Accession data
291 elsif ( $_ =~ s/^Accession:\s+// ) {
292 s/\s+$//;
293 $self->element(
294 { 'Name' => 'HMMER_query-acc',
295 'Data' => $_
300 # Get description data
301 elsif ( $_ =~ s/^Description:\s+// ) {
302 s/\s+$//;
303 $self->element(
304 { 'Name' => 'HMMER_querydesc',
305 'Data' => $_
310 # hmmsearch, nhmmer, and hmmscan-specific formatting here
311 elsif (
312 defined $self->{'_reporttype'}
313 && ( $self->{'_reporttype'} eq 'HMMSEARCH'
314 || $self->{'_reporttype'} eq 'HMMSCAN'
315 || $self->{'_reporttype'} eq 'NHMMER' )
318 # Complete sequence table data above inclusion threshold,
319 # hmmsearch or hmmscan
320 if ( $_ =~ m/Scores for complete sequence/ ) {
321 while ( defined( $_ = $self->_readline ) ) {
322 if ( $_ =~ m/inclusion threshold/
323 || m/Domain( and alignment)? annotation for each/
324 || m/\[No hits detected/
325 || m!^//! )
327 $self->_pushback($_);
328 last;
331 # Grab table data
332 next if ( m/\-\-\-/ || m/^\s+E-value\s+score/ || m/^$/ );
333 my ($eval_full, $score_full, $bias_full, $eval_best,
334 $score_best, $bias_best, $exp, $n,
335 $hitid, $desc, @hitline
337 @hitline = split( " ", $_ );
338 $eval_full = shift @hitline;
339 $score_full = shift @hitline;
340 $bias_full = shift @hitline;
341 $eval_best = shift @hitline;
342 $score_best = shift @hitline;
343 $bias_best = shift @hitline;
344 $exp = shift @hitline;
345 $n = shift @hitline;
346 $hitid = shift @hitline;
347 $desc = join " ", @hitline;
349 $desc = '' if ( !defined($desc) );
351 push @hit_list,
352 [ $hitid, $desc, $eval_full, $score_full ];
353 $hitinfo{$hitid} = $#hit_list;
357 # nhmmer
358 if ( /Scores for complete hits/ ) {
359 while ( defined( $_ = $self->_readline ) ) {
361 if ( /inclusion threshold/
362 || /Annotation for each hit/
363 || /\[No hits detected/
364 || m!^//! )
366 $self->_pushback($_);
367 last;
370 # Grab table data
371 next if ( /\-\-\-/ || /^\s+E-value\s+score/ || /^$/ );
372 my ($eval, $score, $bias, $hitid,
373 $start, $end, $desc, @hitline
375 @hitline = split( " ", $_ );
376 $eval = shift @hitline;
377 $score = shift @hitline;
378 $bias = shift @hitline;
379 $hitid = shift @hitline;
380 $start = shift @hitline;
381 $end = shift @hitline;
382 $desc = join ' ', @hitline;
384 $desc = '' if ( !defined($desc) );
386 push @hit_list, [ $hitid, $desc, $eval, $score ];
387 $hitinfo{$hitid} = $#hit_list;
391 # Complete sequence table data below inclusion threshold
392 # not currently fully implemented -
393 # Should all these lines simply be skipped?
394 elsif ( /inclusion threshold/ ) {
395 while ( defined( $_ = $self->_readline ) ) {
396 if ( /Domain( and alignment)? annotation for each/
397 || /Internal pipeline statistics summary/
398 || /Annotation for each hit\s+\(and alignments\)/
401 $self->_pushback($_);
402 last;
404 next if ( $_ =~ m/^$/ );
405 next if ( $_ =~ m/inclusion threshold/ );
406 my ($eval_full, $score_full, $bias_full, $eval_best,
407 $score_best, $bias_best, $exp, $n,
408 $hitid, $desc, @hitline
410 @hitline = split( " ", $_ );
411 $eval_full = shift @hitline;
412 $score_full = shift @hitline;
413 $bias_full = shift @hitline;
414 $eval_best = shift @hitline;
415 $score_best = shift @hitline;
416 $bias_best = shift @hitline;
417 $exp = shift @hitline;
418 $n = shift @hitline;
419 $hitid = shift @hitline;
420 $desc = join " ", @hitline;
422 $desc = '' if ( !defined($desc) );
424 push @hit_list,
425 [ $hitid, $desc, $eval_full, $score_full ];
426 $hitinfo{$hitid} = $#hit_list;
430 # Domain annotation for each sequence table data, hmmscan
431 elsif ( /Domain( and alignment)? annotation for each/ ) {
432 @hsp_list = (); # Here for multi-query reports
433 my $name;
435 while ( defined( $_ = $self->_readline ) ) {
436 if ( /Internal pipeline statistics/
437 || /\[No targets detected/ )
439 $self->_pushback($_);
440 last;
442 if ( $_ =~ m/^\>\>\s(.*?)\s+/ ) {
443 $name = $1;
445 # Do not skip hits below inclusion threshold - leave it to the user to decide
446 #next if ( $hitinfo{$name} eq "below_inclusion" );
447 $domaincounter{$name} = 0;
449 while ( defined( $_ = $self->_readline ) ) {
451 # Grab table data for sequence
452 if ( $_ =~ m/Internal pipeline statistics/
453 || $_ =~ m/^\>\>/ )
455 $self->_pushback($_);
456 last;
458 if ( $_ =~ m/Alignments for each domain/ ) {
459 $self->_pushback($_);
460 last;
462 if ( $_ =~ m/^\s+\#\s+score/
463 || $_ =~ m/^\s\-\-\-\s+/
466 # $_ =~ m/^\>\>/ ||
467 $_ =~ m/^$/
470 next;
473 # Grab hsp data from table, push into @hsp;
474 if (my ($domain_num, $score, $bias,
475 $ceval, $ieval, $hmmstart,
476 $hmmstop, $qalistart, $qalistop,
477 $envstart, $envstop, $envbound,
478 $acc
480 = m|^\s+(\d+)\s\!*\?*\s+ # domain number
481 (\S+)\s+(\S+)\s+ # score, bias
482 (\S+)\s+(\S+)\s+ # c-eval, i-eval
483 (\d+)\s+(\d+).+? # hmm start, stop
484 (\d+)\s+(\d+).+? # query start, stop
485 (\d+)\s+(\d+).+? # env start, stop
486 (\S+) # Accession
487 \s*$|ox
490 # Keep it simple for now. let's customize later
491 my @vals = (
492 $hmmstart, $hmmstop,
493 $qalistart, $qalistop,
494 $score, $ceval,
495 '', '',
496 '', ''
498 my $info = $hit_list[ $hitinfo{$name} ];
499 if ( !defined $info ) {
500 $self->warn(
501 "Incomplete sequence information; can't find $name, hitinfo says $hitinfo{$name}\n"
503 next;
505 $domaincounter{$name}++;
506 my $hsp_key
507 = $name . "_" . $domaincounter{$name};
508 push @hsp_list, [ $name, @vals ];
509 $hspinfo{$hsp_key} = $#hsp_list;
511 else {
512 print "missed this line: $_\n";
516 elsif ( /Alignments for each domain/ ) {
517 my $domain_count = 0;
519 #line counter
520 my $count = 0;
522 # There's an optional block, so we sometimes need to
523 # count to 3, and sometimes to 4.
524 my $max_count = 3;
525 my $lastdomain;
526 my $hsp;
527 my ( $hline, $midline, $qline, $pline );
529 while ( defined( $_ = $self->_readline ) ) {
530 if ( $_ =~ m/^\>\>/
531 || $_ =~ m/Internal pipeline statistics/ )
533 $self->_pushback($_);
534 last;
536 elsif ($_ =~ m/^$/ )
538 next;
540 elsif ( $_ =~ /\s\s\=\=\sdomain\s(\d+)\s+/ ) {
541 my $domainnum = $1;
542 $count = 0;
543 my $key = $name . "_" . $domainnum;
544 $hsp = $hsp_list[ $hspinfo{$key} ];
545 $hline = $$hsp[-4];
546 $midline = $$hsp[-3];
547 $qline = $$hsp[-2];
548 $pline = $$hsp[-1];
549 $lastdomain = $name;
552 # model data track, some reports don't have
553 elsif ( $_ =~ m/\s+\S+\sCS$/ ) {
554 my $modeltrack = $_;
555 $max_count++;
556 $count++;
557 next;
560 # Query line and Hit line swaps positions
561 # depending of the program
562 elsif ( $count == $max_count - 3
563 or $count == $max_count - 1
565 my @data = split( " ", $_ );
567 if ($self->{'_reporttype'} eq 'HMMSCAN') {
568 # hit sequence
569 $hline .= $data[-2] if ($count == $max_count - 3);
570 # query sequence
571 $qline .= $data[-2] if ($count == $max_count - 1);
573 else { # hmmsearch & nhmmer
574 # hit sequence
575 $hline .= $data[-2] if ($count == $max_count - 1);
576 # query sequence
577 $qline .= $data[-2] if ($count == $max_count - 3);
580 $count++;
581 next;
584 # conservation track
585 # storage isn't quite right - need to remove
586 # leading/lagging whitespace while preserving
587 # gap data (latter isn't done, former is)
588 elsif ( $count == $max_count - 2 ) {
589 $_ =~ s/^\s+//;
590 $_ =~ s/\s+$//;
591 $midline .= $_;
592 $count++;
593 next;
596 # posterior probability track
597 elsif ( $count == $max_count ) {
598 my @data = split(" ", $_);
599 $pline .= $data[-2];
600 $count = 0;
601 $max_count = 3;
602 $$hsp[-4] = $hline;
603 $$hsp[-3] = $midline;
604 $$hsp[-2] = $qline;
605 $$hsp[-1] = $pline;
606 next;
608 else {
609 print "missed $_\n";
616 # Annotation for each hit, nhmmer
617 # This code is currently incomplete, the alignment strings
618 # are not being captured
619 elsif ( /Annotation for each hit\s+\(and alignments\)/ ) {
620 @hsp_list = ();
621 my $name;
623 while ( defined( $_ = $self->_readline ) ) {
624 if ( $_ =~ m/Internal pipeline statistics/
625 || m/\[No targets detected/ )
627 $self->_pushback($_);
628 last;
630 if ( /^>>\s+(\S+)\s+/ ) {
631 $name = $1;
633 while ( defined( $_ = $self->_readline ) ) {
635 if ( $_ =~ m/Internal pipeline statistics/
636 || $_ =~ m/^>>/ )
638 $self->_pushback($_);
639 last;
641 elsif (
642 $_ =~ /^\s+#\s+score/
643 || $_ =~ /^\s+------\s+/
644 || $_ =~ /^>>/
645 || $_ =~ /^$/
646 || $_ =~ /^\s+Alignment:/
647 || $_ =~ /^\s+score:/
648 || $_ =~ /^\s+score\s+bias/
649 || $_ =~ /^\s+\S+\s+\d+\s+([\s+.$ambiguous_nt-]+)/i # Alignment, line 1
650 || $_ =~ /^\s{20,}([\s+gatc-]+)/i # Alignment, line 2
651 || $_ =~ /^\s+$name\s+[\d-]+\s+([\s+$ambiguous_nt-]+)/i # Alignment, line 3
652 || $_ =~ /^\s+[\d.\*]+/ # Alignment, line 4
655 next;
657 elsif (
658 /^\s+[!?]\s+(\S+)\s+
659 (\S+)\s+(\S+)\s+
660 (\d+)\s+(\d+)\s+[.\[\]]*\s+
661 (\d+)\s+(\d+)\s+[.\[\]]*\s+
662 (\d+)\s+(\d+)\s+[.\[\]]*\s+
663 (\d+)\s+(\S+).*$/ox
666 my ($score, $bias, $eval,
667 $hmmstart, $hmmstop, $hitstart,
668 $hitstop, $envstart, $envstop,
669 $length, $acc
672 $1, $2, $3, $4, $5, $6,
673 $7, $8, $9, $10, $11
676 my @vals = (
677 $hitstart, $hitstop,
678 $hmmstart, $hmmstop,
679 $score, $eval,
680 '', '',
683 my $info = $hit_list[ $hitinfo{$name} ];
684 if ( !defined $info ) {
685 $self->warn(
686 "Incomplete information: can't find HSP $name in list of hits\n"
688 next;
690 $domaincounter{$name}++;
691 my $hsp_key = $name . "_" . $domaincounter{$name};
692 push @hsp_list, [ $name, @vals ];
693 $hspinfo{$hsp_key} = $#hsp_list;
695 else {
696 print "Missed this line: $_\n";
703 elsif ( m/Internal pipeline statistics/ || m!^//! ) {
705 # If within hit, hsp close;
706 if ( $self->within_element('hit') ) {
707 if ( $self->within_element('hsp') ) {
708 $self->end_element( { 'Name' => 'Hsp' } );
710 $self->end_element( { 'Name' => 'Hit' } );
713 # Grab summary statistics of run
714 while ( defined( $_ = $self->_readline ) ) {
715 last if ( $_ =~ m/^\/\/$/ );
718 # Do a lot of processing of hits and hsps here
719 while ( my $hit = shift @hit_list ) {
720 my $hit_name = shift @$hit;
721 my $hit_desc = shift @$hit;
722 my $hit_signif = shift @$hit;
723 my $hit_score = shift @$hit;
724 my $num_domains = $domaincounter{$hit_name} || 0;
726 $self->start_element( { 'Name' => 'Hit' } );
727 $self->element(
728 { 'Name' => 'Hit_id',
729 'Data' => $hit_name
732 $self->element(
733 { 'Name' => 'Hit_desc',
734 'Data' => $hit_desc
737 $self->element(
738 { 'Name' => 'Hit_signif',
739 'Data' => $hit_signif
742 $self->element(
743 { 'Name' => 'Hit_score',
744 'Data' => $hit_score
748 for my $i ( 1 .. $num_domains ) {
749 my $key = $hit_name . "_" . $i;
750 my $hsp = $hsp_list[ $hspinfo{$key} ];
751 if ( defined $hsp ) {
752 my $hsp_name = shift @$hsp;
753 $self->start_element( { 'Name' => 'Hsp' } );
754 $self->element(
755 { 'Name' => 'Hsp_identity',
756 'Data' => 0
759 $self->element(
760 { 'Name' => 'Hsp_positive',
761 'Data' => 0
764 if ( $self->{'_reporttype'} eq 'HMMSCAN'
765 or $self->{'_reporttype'} eq 'NHMMER') {
766 $self->element(
767 { 'Name' => 'Hsp_hit-from',
768 'Data' => shift @$hsp
771 $self->element(
772 { 'Name' => 'Hsp_hit-to',
773 'Data' => shift @$hsp
776 $self->element(
777 { 'Name' => 'Hsp_query-from',
778 'Data' => shift @$hsp
781 $self->element(
782 { 'Name' => 'Hsp_query-to',
783 'Data' => shift @$hsp
787 else { # hmmsearch
788 $self->element(
789 { 'Name' => 'Hsp_query-from',
790 'Data' => shift @$hsp
793 $self->element(
794 { 'Name' => 'Hsp_query-to',
795 'Data' => shift @$hsp
798 $self->element(
799 { 'Name' => 'Hsp_hit-from',
800 'Data' => shift @$hsp
803 $self->element(
804 { 'Name' => 'Hsp_hit-to',
805 'Data' => shift @$hsp
809 $self->element(
810 { 'Name' => 'Hsp_score',
811 'Data' => shift @$hsp
814 $self->element(
815 { 'Name' => 'Hsp_evalue',
816 'Data' => shift @$hsp
819 $self->element(
820 { 'Name' => 'Hsp_hseq',
821 'Data' => shift @$hsp
824 $self->element(
825 { 'Name' => 'Hsp_midline',
826 'Data' => shift @$hsp
829 $self->element(
830 { 'Name' => 'Hsp_qseq',
831 'Data' => shift @$hsp
834 $self->element(
835 { 'Name' => 'Hsp_pline',
836 'Data' => shift @$hsp
840 # Only nhmmer output has strand information
841 if ( $self->{'_reporttype'} eq 'NHMMER' ) {
842 my $hstart = $self->get_from_element('HSP-hit_start');
843 my $hend = $self->get_from_element('HSP-hit_end');
844 my $hstrand = ( $hstart < $hend ) ? 1 : -1;
846 my $qstart = $self->get_from_element('HSP-query_start');
847 my $qend = $self->get_from_element('HSP-query_end');
848 my $qstrand = ( $qstart < $qend ) ? 1 : -1;
850 $self->element(
851 { 'Name' => 'Hsp_query-strand',
852 'Data' => $qstrand
855 $self->element(
856 { 'Name' => 'Hsp_hit-strand',
857 'Data' => $hstrand
862 $self->end_element( { 'Name' => 'Hsp' } );
865 $self->end_element( { 'Name' => 'Hit' } );
867 @hit_list = ();
868 %hitinfo = ();
869 last;
872 else {
873 print "Missed line: $_\n";
874 $self->debug($_);
876 $last = $_;
878 $self->end_element( { 'Name' => 'HMMER_Output' } );
879 my $result = $self->end_document();
880 return $result;
883 =head2 start_element
885 Title : start_element
886 Usage : $eventgenerator->start_element
887 Function: Handles a start event
888 Returns : none
889 Args : hashref with at least 2 keys 'Data' and 'Name'
891 =cut
893 sub start_element {
895 my ( $self, $data ) = @_;
897 # we currently don't care about attributes
898 my $nm = $data->{'Name'};
899 my $type = $MODEMAP{$nm};
900 if ($type) {
901 if ( $self->_eventHandler->will_handle($type) ) {
902 my $func = sprintf( "start_%s", lc $type );
903 $self->_eventHandler->$func( $data->{'Attributes'} );
905 unshift @{ $self->{'_elements'} }, $type;
907 if ( defined $type
908 && $type eq 'result' )
910 $self->{'_values'} = {};
911 $self->{'_result'} = undef;
915 =head2 end_element
917 Title : end_element
918 Usage : $eventgeneartor->end_element
919 Function: Handles and end element event
920 Returns : none
921 Args : hashref with at least 2 keys 'Data' and 'Name'
923 =cut
925 sub end_element {
927 my ( $self, $data ) = @_;
928 my $nm = $data->{'Name'};
929 my $type = $MODEMAP{$nm};
930 my $rc;
932 if ( $nm eq 'HMMER_program' ) {
933 if ( $self->{'_last_data'} =~ /(N?HMM\S+)/i ) {
934 $self->{'_reporttype'} = uc $1;
938 # Hsp are sort of weird, in that they end when another
939 # object begins so have to detect this in end_element for now
940 if ( $nm eq 'Hsp' ) {
941 foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
942 my $data = $self->{'_last_hspdata'}->{$_};
943 if ( $data && $_ eq 'Hsp_hseq' ) {
945 # replace hmm '.' gap symbol by '-'
946 $data =~ s/\./-/g;
948 $self->element(
949 { 'Name' => $_,
950 'Data' => $data
954 $self->{'_last_hspdata'} = {};
956 if ($type) {
957 if ( $self->_eventHandler->will_handle($type) ) {
958 my $func = sprintf( "end_%s", lc $type );
959 $rc = $self->_eventHandler->$func( $self->{'_reporttype'},
960 $self->{'_values'} );
962 my $lastelem = shift @{ $self->{'_elements'} };
964 elsif ( $MAPPING{$nm} ) {
965 if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
966 my $key = ( keys %{ $MAPPING{$nm} } )[0];
967 $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} }
968 = $self->{'_last_data'};
970 else {
971 $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
973 # print "lastdata is " . $self->{'_last_data'} . "\n";
976 else {
977 $self->debug("unknown nm $nm, ignoring\n");
979 $self->{'_last_data'} = ''; # remove read data if we are at
980 # end of an element
981 $self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
982 return $rc;
985 =head2 element
987 Title : element
988 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
989 Function: Convenience method that calls start_element, characters, end_element
990 Returns : none
991 Args : Hash ref with the keys 'Name' and 'Data'
993 =cut
995 sub element {
996 my ( $self, $data ) = @_;
997 $self->start_element($data);
998 $self->characters($data);
999 $self->end_element($data);
1002 =head2 get_from_element
1004 Title : get_from_element
1005 Usage : $self->get_from_element('HSP-hit_start');
1006 Function: Convenience method to retrieve data from '_values' hash
1007 Returns : string
1008 Args : key
1010 =cut
1012 sub get_from_element {
1013 my ($self,$key) = @_;
1014 my $values = $self->{_values};
1015 $values->{$key};
1018 =head2 characters
1020 Title : characters
1021 Usage : $eventgenerator->characters($str)
1022 Function: Send a character events
1023 Returns : none
1024 Args : string
1026 =cut
1028 sub characters {
1029 my ( $self, $data ) = @_;
1031 if ( $self->in_element('hsp')
1032 && $data->{'Name'} =~ /Hsp\_(qseq|hseq|pline|midline)/o
1033 && defined $data->{'Data'} )
1035 $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
1037 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o );
1039 $self->{'_last_data'} = $data->{'Data'};
1042 =head2 within_element
1044 Title : within_element
1045 Usage : if( $eventgenerator->within_element( $element ) ) {}
1046 Function: Test if we are within a particular element
1047 This is different than 'in' because within can be tested for
1048 a whole block
1049 Returns : boolean
1050 Args : string element name
1052 =cut
1054 sub within_element {
1055 my ( $self, $name ) = @_;
1056 return 0
1057 if ( !defined $name
1058 || !defined $self->{'_elements'}
1059 || scalar @{ $self->{'_elements'} } == 0 );
1060 foreach ( @{ $self->{'_elements'} } ) {
1061 return 1 if ( $_ eq $name );
1063 return 0;
1066 =head2 in_element
1068 Title : in_element
1069 Usage : if( $eventgenerator->in_element( $element ) ) {}
1070 Function: Test if we are in a particular element
1071 This is different than 'within' because 'in' only
1072 tests its immediate parent
1073 Returns : boolean
1074 Args : string element name
1076 =cut
1078 sub in_element {
1079 my ( $self, $name ) = @_;
1080 return 0 if !defined $self->{'_elements'}->[0];
1081 return ( $self->{'_elements'}->[0] eq $name );
1084 =head2 start_document
1086 Title : start_document
1087 Usage : $eventgenerator->start_document
1088 Function: Handle a start document event
1089 Returns : none
1090 Args : none
1092 =cut
1094 sub start_document {
1095 my ($self) = @_;
1096 $self->{'_lasttype'} = '';
1097 $self->{'_values'} = {};
1098 $self->{'_result'} = undef;
1099 $self->{'_elements'} = [];
1102 =head2 end_document
1104 Title : end_document
1105 Usage : $eventgenerator->end_document
1106 Function: Handles an end document event
1107 Returns : Bio::Search::Result::ResultI object
1108 Args : none
1110 =cut
1112 sub end_document {
1113 my ($self) = @_;
1114 return $self->{'_result'};
1117 =head2 result_count
1119 Title : result_count
1120 Usage : my $count = $searchio->result_count
1121 Function: Returns the number of results processed
1122 Returns : interger
1123 Args : none
1125 =cut
1127 sub result_count {
1128 my $self = shift;
1129 return $self->{'_result_count'};