Hmmer3: Added support to retrieve the CS line using
[bioperl-live.git] / Bio / SearchIO / hmmer3.pm
blobefed529a6da8d44248a0f0f5aba2ad72925c1e06
2 # BioPerl module for Bio::SearchIO::hmmer3
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Thomas Sharpton <thomas.sharpton@gmail.com>
8 # Copyright Thomas Sharpton
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SearchIO::hmmer3
18 =head1 SYNOPSIS
20 use Bio::SearchIO;
22 my $searchio = Bio::SearchIO->new(
23 -format => 'hmmer',
24 -version => 3,
25 -file => 'hmmsearch.out'
28 my $result = $searchio->next_result;
29 my $hit = $result->next_hit;
30 print $hit->name, $hit->description, $hit->significance,
31 $hit->score, "\n";
33 my $hsp = $hit->next_hsp;
34 print $hsp->start('hit'), $hsp->end('hit'), $hsp->start('query'),
35 $hsp->end('query'), "\n";
37 =head1 DESCRIPTION
39 Code to parse output from hmmsearch, hmmscan, and nhmmer, compatible with
40 both version 2 and version 3 of the HMMER package from L<http://hmmer.org>.
42 =head1 FEEDBACK
44 =head2 Mailing Lists
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to
48 the Bioperl mailing list. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 =head2 Support
55 Please direct usage questions or support issues to the mailing list:
57 L<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
64 =head2 Reporting Bugs
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 of the bugs and their resolution. Bug reports can be submitted via
68 the web:
70 https://redmine.open-bio.org/projects/bioperl/
72 =head1 AUTHOR - Thomas Sharpton
74 Email thomas.sharpton@gmail.com
76 Describe contact details here
78 =head1 CONTRIBUTORS
80 Additional contributors names and emails here
82 briano at bioteam.net
84 =head1 APPENDIX
86 The rest of the documentation details each of the object methods.
87 Internal methods are usually preceded with a _
89 =cut
91 # Let the code begin...
93 package Bio::SearchIO::hmmer3;
95 use strict;
96 use Data::Dumper;
97 use Bio::Factory::ObjectFactory;
98 use Bio::Tools::IUPAC;
99 use vars qw(%MAPPING %MODEMAP);
100 use base qw(Bio::SearchIO::hmmer);
102 BEGIN {
104 # mapping of HMMER items to Bioperl hash keys
105 %MODEMAP = (
106 'HMMER_Output' => 'result',
107 'Hit' => 'hit',
108 'Hsp' => 'hsp'
111 %MAPPING = (
112 'Hsp_bit-score' => 'HSP-bits',
113 'Hsp_score' => 'HSP-score',
114 'Hsp_evalue' => 'HSP-evalue',
115 'Hsp_query-from' => 'HSP-query_start',
116 'Hsp_query-to' => 'HSP-query_end',
117 'Hsp_query-strand' => 'HSP-query_strand',
118 'Hsp_hit-from' => 'HSP-hit_start',
119 'Hsp_hit-to' => 'HSP-hit_end',
120 'Hsp_hit-strand' => 'HSP-hit_strand',
121 'Hsp_positive' => 'HSP-conserved',
122 'Hsp_identity' => 'HSP-identical',
123 'Hsp_gaps' => 'HSP-hsp_gaps',
124 'Hsp_hitgaps' => 'HSP-hit_gaps',
125 'Hsp_querygaps' => 'HSP-query_gaps',
126 'Hsp_qseq' => 'HSP-query_seq',
127 'Hsp_csline' => 'HSP-cs_seq',
128 'Hsp_hseq' => 'HSP-hit_seq',
129 'Hsp_midline' => 'HSP-homology_seq',
130 'Hsp_pline' => 'HSP-pp_seq',
131 'Hsp_align-len' => 'HSP-hsp_length',
132 'Hsp_query-frame' => 'HSP-query_frame',
133 'Hsp_hit-frame' => 'HSP-hit_frame',
135 'Hit_id' => 'HIT-name',
136 'Hit_len' => 'HIT-length',
137 'Hit_accession' => 'HIT-accession',
138 'Hit_desc' => 'HIT-description',
139 'Hit_signif' => 'HIT-significance',
140 'Hit_score' => 'HIT-score',
142 'HMMER_program' => 'RESULT-algorithm_name',
143 'HMMER_version' => 'RESULT-algorithm_version',
144 'HMMER_query-def' => 'RESULT-query_name',
145 'HMMER_query-len' => 'RESULT-query_length',
146 'HMMER_query-acc' => 'RESULT-query_accession',
147 'HMMER_querydesc' => 'RESULT-query_description',
148 'HMMER_hmm' => 'RESULT-hmm_name',
149 'HMMER_seqfile' => 'RESULT-sequence_file',
150 'HMMER_db' => 'RESULT-database_name',
154 =head2 next_result
156 Title : next_result
157 Usage : my $hit = $searchio->next_result;
158 Function: Returns the next Result from a search
159 Returns : Bio::Search::Result::ResultI object
160 Args : none
162 =cut
164 sub next_result {
165 my ($self) = @_;
166 my $seentop = 0; # Placeholder for when we deal with multi-query reports
167 my $reporttype;
168 my ( $last, @hit_list, @hsp_list, %hspinfo, %hitinfo, %domaincounter );
169 local $/ = "\n";
170 local $_;
172 my @ambiguous_nt = keys %Bio::Tools::IUPAC::IUB;
173 my $ambiguous_nt = join '', @ambiguous_nt;
175 my $verbose = $self->verbose; # cache for speed? JES's idea in hmmer.pm
176 $self->start_document();
177 local ($_);
179 # This is here to ensure that next_result doesn't produce infinite loop
180 if ( !defined( $_ = $self->_readline ) ) {
181 return undef;
183 else {
184 $self->_pushback($_);
187 # Regex goes here for HMMER3
188 # Start with hmmsearch processing
189 while ( defined( $_ = $self->_readline ) ) {
190 my $lineorig = $_;
191 chomp;
193 # Grab the program name
194 if ( $_ =~ m/^\#\s(\S+)\s\:\:\s/ ) {
195 my $prog = $1;
197 # TO DO: customize the above regex to adapt to other
198 # program types (hmmscan, etc)
199 $self->start_element( { 'Name' => 'HMMER_Output' } );
200 $self->{'_result_count'}++; #Might need to move to another block
201 $self->element(
202 { 'Name' => 'HMMER_program',
203 'Data' => uc($prog)
208 # Get the HMMER package version and release date
209 elsif ( $_ =~ m/^\#\sHMMER\s+(\S+)\s+\((.+)\)/ ) {
210 my $version = $1;
211 my $versiondate = $2;
212 $self->{'_hmmidline'} = $_;
213 $self->element(
214 { 'Name' => 'HMMER_version',
215 'Data' => $version
220 # Get the query info
221 elsif ( $_ =~ /^\#\squery \w+ file\:\s+(\S+)/ ) {
222 if ( $self->{'_reporttype'} eq 'HMMSEARCH'
223 || $self->{'_reporttype'} eq 'NHMMER' )
225 $self->{'_hmmfileline'} = $lineorig;
226 $self->element(
227 { 'Name' => 'HMMER_hmm',
228 'Data' => $1
232 elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
233 $self->{'_hmmseqline'} = $lineorig;
234 $self->element(
235 { 'Name' => 'HMMER_seqfile',
236 'Data' => $1
242 # If this is a report without alignments
243 elsif ( $_ =~ m/^\#\sshow\salignments\sin\soutput/ ) {
244 $self->{'_alnreport'} = 0;
247 # Get the database info
248 elsif ( $_ =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/ ) {
250 if ( $self->{'_reporttype'} eq 'HMMSEARCH'
251 || $self->{'_reporttype'} eq 'NHMMER' )
253 $self->{'_hmmseqline'} = $lineorig;
254 $self->element(
255 { 'Name' => 'HMMER_seqfile',
256 'Data' => $1
260 elsif ( $self->{'_reporttype'} eq 'HMMSCAN' ) {
261 $self->{'_hmmfileline'} = $lineorig;
262 $self->element(
263 { 'Name' => 'HMMER_hmm',
264 'Data' => $1
270 # Get query data
271 elsif ( $_ =~ s/^Query:\s+// ) {
272 # TO DO: code to deal with multi-query report
273 unless (s/\s+\[[L|M]\=(\d+)\]$//) {
274 warn "Error parsing length for query, offending line $_\n";
275 exit(0);
277 my $querylen = $1;
278 $self->element(
279 { 'Name' => 'HMMER_query-len',
280 'Data' => $querylen
283 $self->element(
284 { 'Name' => 'HMMER_query-def',
285 'Data' => $_
290 # Get Accession data
291 elsif ( $_ =~ s/^Accession:\s+// ) {
292 s/\s+$//;
293 $self->element(
294 { 'Name' => 'HMMER_query-acc',
295 'Data' => $_
300 # Get description data
301 elsif ( $_ =~ s/^Description:\s+// ) {
302 s/\s+$//;
303 $self->element(
304 { 'Name' => 'HMMER_querydesc',
305 'Data' => $_
310 # hmmsearch, nhmmer, and hmmscan-specific formatting here
311 elsif (
312 defined $self->{'_reporttype'}
313 && ( $self->{'_reporttype'} eq 'HMMSEARCH'
314 || $self->{'_reporttype'} eq 'HMMSCAN'
315 || $self->{'_reporttype'} eq 'NHMMER' )
318 # Complete sequence table data above inclusion threshold,
319 # hmmsearch or hmmscan
320 my $hit_counter = 0; # helper variable for non-unique hit IDs
321 if ( $_ =~ m/Scores for complete sequence/ ) {
322 while ( defined( $_ = $self->_readline ) ) {
323 if ( $_ =~ m/inclusion threshold/
324 || m/Domain( and alignment)? annotation for each/
325 || m/\[No hits detected/
326 || m!^//! )
328 $self->_pushback($_);
329 last;
332 # Grab table data
333 next if ( m/\-\-\-/ || m/^\s+E-value\s+score/ || m/^$/ );
334 $hit_counter++;
335 my ($eval_full, $score_full, $bias_full, $eval_best,
336 $score_best, $bias_best, $exp, $n,
337 $hitid, $desc, @hitline
339 @hitline = split( " ", $_ );
340 $eval_full = shift @hitline;
341 $score_full = shift @hitline;
342 $bias_full = shift @hitline;
343 $eval_best = shift @hitline;
344 $score_best = shift @hitline;
345 $bias_best = shift @hitline;
346 $exp = shift @hitline;
347 $n = shift @hitline;
348 $hitid = shift @hitline;
349 $desc = join " ", @hitline;
351 $desc = '' if ( !defined($desc) );
353 push @hit_list,
354 [ $hitid, $desc, $eval_full, $score_full ];
355 $hitinfo{"$hitid.$hit_counter"} = $#hit_list;
359 # nhmmer
360 if ( /Scores for complete hits/ ) {
361 while ( defined( $_ = $self->_readline ) ) {
363 if ( /inclusion threshold/
364 || /Annotation for each hit/
365 || /\[No hits detected/
366 || m!^//! )
368 $self->_pushback($_);
369 last;
372 # Grab table data
373 next if ( /\-\-\-/ || /^\s+E-value\s+score/ || /^$/ );
374 $hit_counter++;
375 my ($eval, $score, $bias, $hitid,
376 $start, $end, $desc, @hitline
378 @hitline = split( " ", $_ );
379 $eval = shift @hitline;
380 $score = shift @hitline;
381 $bias = shift @hitline;
382 $hitid = shift @hitline;
383 $start = shift @hitline;
384 $end = shift @hitline;
385 $desc = join ' ', @hitline;
387 $desc = '' if ( !defined($desc) );
389 push @hit_list, [ $hitid, $desc, $eval, $score ];
390 $hitinfo{"$hitid.$hit_counter"} = $#hit_list;
394 # Complete sequence table data below inclusion threshold
395 # not currently fully implemented -
396 # Should all these lines simply be skipped?
397 elsif ( /inclusion threshold/ ) {
398 while ( defined( $_ = $self->_readline ) ) {
399 if ( /Domain( and alignment)? annotation for each/
400 || /Internal pipeline statistics summary/
401 || /Annotation for each hit\s+\(and alignments\)/
404 $self->_pushback($_);
405 last;
407 next if ( $_ =~ m/^$/ );
408 next if ( $_ =~ m/inclusion threshold/ );
409 $hit_counter++;
410 my ($eval_full, $score_full, $bias_full, $eval_best,
411 $score_best, $bias_best, $exp, $n,
412 $hitid, $desc, @hitline
414 @hitline = split( " ", $_ );
415 $eval_full = shift @hitline;
416 $score_full = shift @hitline;
417 $bias_full = shift @hitline;
418 $eval_best = shift @hitline;
419 $score_best = shift @hitline;
420 $bias_best = shift @hitline;
421 $exp = shift @hitline;
422 $n = shift @hitline;
423 $hitid = shift @hitline;
424 $desc = join " ", @hitline;
426 $desc = '' if ( !defined($desc) );
428 push @hit_list,
429 [ $hitid, $desc, $eval_full, $score_full ];
430 $hitinfo{"$hitid.$hit_counter"} = $#hit_list;
434 # Domain annotation for each sequence table data, hmmscan
435 elsif ( /Domain( and alignment)? annotation for each/ ) {
436 @hsp_list = (); # Here for multi-query reports
437 my $name;
439 my $dom_counter = 0;
440 while ( defined( $_ = $self->_readline ) ) {
441 if ( /Internal pipeline statistics/
442 || /\[No targets detected/ )
444 $self->_pushback($_);
445 last;
447 if ( $_ =~ m/^\>\>\s(.*?)\s+/ ) {
448 $name = $1;
449 $dom_counter++;
450 $domaincounter{"$name.$dom_counter"} = 0;
452 while ( defined( $_ = $self->_readline ) ) {
454 # Grab table data for sequence
455 if ( $_ =~ m/Internal pipeline statistics/
456 || $_ =~ m/^\>\>/ )
458 $self->_pushback($_);
459 last;
461 if ( $_ =~ m/Alignments for each domain/ ) {
462 $self->_pushback($_);
463 last;
465 if ( $_ =~ m/^\s+\#\s+score/
466 || $_ =~ m/^\s\-\-\-\s+/
469 # $_ =~ m/^\>\>/ ||
470 $_ =~ m/^$/
473 next;
476 # Grab hsp data from table, push into @hsp;
477 if (my ($domain_num, $score, $bias,
478 $ceval, $ieval, $hmmstart,
479 $hmmstop, $qalistart, $qalistop,
480 $envstart, $envstop, $envbound,
481 $acc
483 = m|^\s+(\d+)\s\!*\?*\s+ # domain number
484 (\S+)\s+(\S+)\s+ # score, bias
485 (\S+)\s+(\S+)\s+ # c-eval, i-eval
486 (\d+)\s+(\d+).+? # hmm start, stop
487 (\d+)\s+(\d+).+? # query start, stop
488 (\d+)\s+(\d+).+? # env start, stop
489 (\S+) # Accession
490 \s*$|ox
493 # Keep it simple for now. let's customize later
494 my @vals = (
495 $hmmstart, $hmmstop,
496 $qalistart, $qalistop,
497 $score, $ceval,
498 '', '',
499 '', '',
502 my $info = $hit_list[ $hitinfo{"$name.$dom_counter"} ];
503 if ( !defined $info ) {
504 $self->warn(
505 "Incomplete sequence information; can't find $name, hitinfo says $hitinfo{$name}\n"
507 next;
509 $domaincounter{"$name.$dom_counter"}++;
510 my $hsp_key
511 = $name . "_" . $domaincounter{"$name.$dom_counter"};
512 push @hsp_list, [ $name, @vals ];
513 $hspinfo{"$hsp_key.$dom_counter"} = $#hsp_list;
515 else {
516 print "missed this line: $_\n";
520 elsif ( /Alignments for each domain/ ) {
521 my $domain_count = 0;
523 #line counter
524 my $count = 0;
526 # There's an optional block, so we sometimes need to
527 # count to 3, and sometimes to 4.
528 my $max_count = 3;
529 my $lastdomain;
530 my $hsp;
531 my ( $csline, $hline, $midline, $qline, $pline );
533 while ( defined( $_ = $self->_readline ) ) {
534 if ( $_ =~ m/^\>\>/
535 || $_ =~ m/Internal pipeline statistics/ )
537 $self->_pushback($_);
538 last;
540 elsif ($_ =~ m/^$/ )
542 next;
544 elsif ( $_ =~ /\s\s\=\=\sdomain\s(\d+)\s+/ ) {
545 my $domainnum = $1;
546 $count = 0;
547 my $key = $name . "_" . $domainnum;
548 $hsp = $hsp_list[ $hspinfo{"$key.$dom_counter"} ];
549 $csline = $$hsp[-5];
550 $hline = $$hsp[-4];
551 $midline = $$hsp[-3];
552 $qline = $$hsp[-2];
553 $pline = $$hsp[-1];
554 $lastdomain = $name;
557 # model data track, some reports don't have
558 elsif ( $_ =~ m/\s+\S+\s(?:CS|RF)$/ ) {
559 my @data = split( " ", $_ );
560 $csline .= $data[-2];
561 $max_count++;
562 $count++;
563 next;
566 # Query line and Hit line swaps positions
567 # depending of the program
568 elsif ( $count == $max_count - 3
569 or $count == $max_count - 1
571 my @data = split( " ", $_ );
573 if ($self->{'_reporttype'} eq 'HMMSCAN') {
574 # hit sequence
575 $hline .= $data[-2] if ($count == $max_count - 3);
576 # query sequence
577 $qline .= $data[-2] if ($count == $max_count - 1);
579 else { # hmmsearch & nhmmer
580 # hit sequence
581 $hline .= $data[-2] if ($count == $max_count - 1);
582 # query sequence
583 $qline .= $data[-2] if ($count == $max_count - 3);
586 $count++;
587 next;
590 # conservation track
591 # storage isn't quite right - need to remove
592 # leading/lagging whitespace while preserving
593 # gap data (latter isn't done, former is)
594 elsif ( $count == $max_count - 2 ) {
595 $_ =~ s/^\s+//;
596 $_ =~ s/\s+$//;
597 $midline .= $_;
598 $count++;
599 next;
602 # posterior probability track
603 elsif ( $count == $max_count ) {
604 my @data = split(" ", $_);
605 $pline .= $data[-2];
606 $count = 0;
607 $max_count = 3;
608 $$hsp[-5] = $csline;
609 $$hsp[-4] = $hline;
610 $$hsp[-3] = $midline;
611 $$hsp[-2] = $qline;
612 $$hsp[-1] = $pline;
613 next;
615 else {
616 print "missed $_\n";
623 # Annotation for each hit, nhmmer
624 # This code is currently incomplete, the alignment strings
625 # are not being captured
626 elsif ( /Annotation for each hit\s+\(and alignments\)/ ) {
627 @hsp_list = ();
628 my $name;
630 my $dom_counter = 0;
631 while ( defined( $_ = $self->_readline ) ) {
632 if ( $_ =~ m/Internal pipeline statistics/
633 || m/\[No targets detected/ )
635 $self->_pushback($_);
636 last;
638 if ( /^>>\s+(\S+)\s+/ ) {
639 $name = $1;
640 $dom_counter++;
642 while ( defined( $_ = $self->_readline ) ) {
644 if ( $_ =~ m/Internal pipeline statistics/
645 || $_ =~ m/^>>/ )
647 $self->_pushback($_);
648 last;
650 elsif (
651 $_ =~ /^\s+#\s+score/
652 || $_ =~ /^\s+------\s+/
653 || $_ =~ /^>>/
654 || $_ =~ /^$/
655 || $_ =~ /^\s+Alignment:/
656 || $_ =~ /^\s+score:/
657 || $_ =~ /^\s+score\s+bias/
658 || $_ =~ /^\s+\S+\s+\d+\s+([\s+.$ambiguous_nt-]+)/i # Alignment, line 1
659 || $_ =~ /^\s{20,}([\s+gatc-]+)/i # Alignment, line 2
660 || $_ =~ /^\s+$name\s+[\d-]+\s+([\s+$ambiguous_nt-]+)/i # Alignment, line 3
661 || $_ =~ /^\s+[\d.\*]+/ # Alignment, line 4
664 next;
666 elsif (
667 /^\s+[!?]\s+(\S+)\s+
668 (\S+)\s+(\S+)\s+
669 (\d+)\s+(\d+)\s+[.\[\]]*\s+
670 (\d+)\s+(\d+)\s+[.\[\]]*\s+
671 (\d+)\s+(\d+)\s+[.\[\]]*\s+
672 (\d+)\s+(\S+).*$/ox
675 my ($score, $bias, $eval,
676 $hmmstart, $hmmstop, $hitstart,
677 $hitstop, $envstart, $envstop,
678 $length, $acc
681 $1, $2, $3, $4, $5, $6,
682 $7, $8, $9, $10, $11
685 my @vals = (
686 $hitstart, $hitstop,
687 $hmmstart, $hmmstop,
688 $score, $eval,
689 '', '',
692 my $info = $hit_list[ $hitinfo{"$name.$dom_counter"} ];
693 if ( !defined $info ) {
694 $self->warn(
695 "Incomplete information: can't find HSP $name in list of hits\n"
697 next;
699 $domaincounter{"$name.$dom_counter"}++;
700 my $hsp_key = $name . "_" . $domaincounter{"$name.$dom_counter"};
701 push @hsp_list, [ $name, @vals ];
702 $hspinfo{"$hsp_key.$dom_counter"} = $#hsp_list;
704 else {
705 print "Missed this line: $_\n";
712 elsif ( m/Internal pipeline statistics/ || m!^//! ) {
714 # If within hit, hsp close;
715 if ( $self->within_element('hit') ) {
716 if ( $self->within_element('hsp') ) {
717 $self->end_element( { 'Name' => 'Hsp' } );
719 $self->end_element( { 'Name' => 'Hit' } );
722 # Grab summary statistics of run
723 while ( defined( $_ = $self->_readline ) ) {
724 last if ( $_ =~ m/^\/\/$/ );
727 # Do a lot of processing of hits and hsps here
728 my $index = 0;
729 while ( my $hit = shift @hit_list ) {
730 $index++;
731 my $hit_name = shift @$hit;
732 my $hit_desc = shift @$hit;
733 my $hit_signif = shift @$hit;
734 my $hit_score = shift @$hit;
735 my $num_domains = $domaincounter{"$hit_name.$index"} || 0;
737 $self->start_element( { 'Name' => 'Hit' } );
738 $self->element(
739 { 'Name' => 'Hit_id',
740 'Data' => $hit_name
743 $self->element(
744 { 'Name' => 'Hit_desc',
745 'Data' => $hit_desc
748 $self->element(
749 { 'Name' => 'Hit_signif',
750 'Data' => $hit_signif
753 $self->element(
754 { 'Name' => 'Hit_score',
755 'Data' => $hit_score
759 for my $i ( 1 .. $num_domains ) {
760 my $key = $hit_name . "_" . $i;
761 my $hsp = $hsp_list[ $hspinfo{"$key.$index"} ];
762 if ( defined $hsp ) {
763 my $hsp_name = shift @$hsp;
764 $self->start_element( { 'Name' => 'Hsp' } );
765 $self->element(
766 { 'Name' => 'Hsp_identity',
767 'Data' => 0
770 $self->element(
771 { 'Name' => 'Hsp_positive',
772 'Data' => 0
775 if ( $self->{'_reporttype'} eq 'HMMSCAN'
776 or $self->{'_reporttype'} eq 'NHMMER') {
777 $self->element(
778 { 'Name' => 'Hsp_hit-from',
779 'Data' => shift @$hsp
782 $self->element(
783 { 'Name' => 'Hsp_hit-to',
784 'Data' => shift @$hsp
787 $self->element(
788 { 'Name' => 'Hsp_query-from',
789 'Data' => shift @$hsp
792 $self->element(
793 { 'Name' => 'Hsp_query-to',
794 'Data' => shift @$hsp
798 else { # hmmsearch
799 $self->element(
800 { 'Name' => 'Hsp_query-from',
801 'Data' => shift @$hsp
804 $self->element(
805 { 'Name' => 'Hsp_query-to',
806 'Data' => shift @$hsp
809 $self->element(
810 { 'Name' => 'Hsp_hit-from',
811 'Data' => shift @$hsp
814 $self->element(
815 { 'Name' => 'Hsp_hit-to',
816 'Data' => shift @$hsp
820 $self->element(
821 { 'Name' => 'Hsp_score',
822 'Data' => shift @$hsp
825 $self->element(
826 { 'Name' => 'Hsp_evalue',
827 'Data' => shift @$hsp
830 $self->element(
831 { 'Name' => 'Hsp_csline',
832 'Data' => shift @$hsp
835 $self->element(
836 { 'Name' => 'Hsp_hseq',
837 'Data' => shift @$hsp
840 $self->element(
841 { 'Name' => 'Hsp_midline',
842 'Data' => shift @$hsp
845 $self->element(
846 { 'Name' => 'Hsp_qseq',
847 'Data' => shift @$hsp
850 $self->element(
851 { 'Name' => 'Hsp_pline',
852 'Data' => shift @$hsp
856 # Only nhmmer output has strand information
857 if ( $self->{'_reporttype'} eq 'NHMMER' ) {
858 my $hstart = $self->get_from_element('HSP-hit_start');
859 my $hend = $self->get_from_element('HSP-hit_end');
860 my $hstrand = ( $hstart < $hend ) ? 1 : -1;
862 my $qstart = $self->get_from_element('HSP-query_start');
863 my $qend = $self->get_from_element('HSP-query_end');
864 my $qstrand = ( $qstart < $qend ) ? 1 : -1;
866 $self->element(
867 { 'Name' => 'Hsp_query-strand',
868 'Data' => $qstrand
871 $self->element(
872 { 'Name' => 'Hsp_hit-strand',
873 'Data' => $hstrand
878 $self->end_element( { 'Name' => 'Hsp' } );
881 $self->end_element( { 'Name' => 'Hit' } );
883 @hit_list = ();
884 %hitinfo = ();
885 last;
888 else {
889 print "Missed line: $_\n";
890 $self->debug($_);
892 $last = $_;
894 $self->end_element( { 'Name' => 'HMMER_Output' } );
895 my $result = $self->end_document();
896 return $result;
899 =head2 start_element
901 Title : start_element
902 Usage : $eventgenerator->start_element
903 Function: Handles a start event
904 Returns : none
905 Args : hashref with at least 2 keys 'Data' and 'Name'
907 =cut
909 sub start_element {
911 my ( $self, $data ) = @_;
913 # we currently don't care about attributes
914 my $nm = $data->{'Name'};
915 my $type = $MODEMAP{$nm};
916 if ($type) {
917 if ( $self->_eventHandler->will_handle($type) ) {
918 my $func = sprintf( "start_%s", lc $type );
919 $self->_eventHandler->$func( $data->{'Attributes'} );
921 unshift @{ $self->{'_elements'} }, $type;
923 if ( defined $type
924 && $type eq 'result' )
926 $self->{'_values'} = {};
927 $self->{'_result'} = undef;
931 =head2 end_element
933 Title : end_element
934 Usage : $eventgeneartor->end_element
935 Function: Handles and end element event
936 Returns : none
937 Args : hashref with at least 2 keys 'Data' and 'Name'
939 =cut
941 sub end_element {
943 my ( $self, $data ) = @_;
944 my $nm = $data->{'Name'};
945 my $type = $MODEMAP{$nm};
946 my $rc;
948 if ( $nm eq 'HMMER_program' ) {
949 if ( $self->{'_last_data'} =~ /(N?HMM\S+)/i ) {
950 $self->{'_reporttype'} = uc $1;
954 # Hsp are sort of weird, in that they end when another
955 # object begins so have to detect this in end_element for now
956 if ( $nm eq 'Hsp' ) {
957 foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
958 my $data = $self->{'_last_hspdata'}->{$_};
959 if ( $data && $_ eq 'Hsp_hseq' ) {
961 # replace hmm '.' gap symbol by '-'
962 $data =~ s/\./-/g;
964 $self->element(
965 { 'Name' => $_,
966 'Data' => $data
970 $self->{'_last_hspdata'} = {};
972 if ($type) {
973 if ( $self->_eventHandler->will_handle($type) ) {
974 my $func = sprintf( "end_%s", lc $type );
975 $rc = $self->_eventHandler->$func( $self->{'_reporttype'},
976 $self->{'_values'} );
978 my $lastelem = shift @{ $self->{'_elements'} };
980 elsif ( $MAPPING{$nm} ) {
981 if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
982 my $key = ( keys %{ $MAPPING{$nm} } )[0];
983 $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} }
984 = $self->{'_last_data'};
986 else {
987 $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
989 # print "lastdata is " . $self->{'_last_data'} . "\n";
992 else {
993 $self->debug("unknown nm $nm, ignoring\n");
995 $self->{'_last_data'} = ''; # remove read data if we are at
996 # end of an element
997 $self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
998 return $rc;
1001 =head2 element
1003 Title : element
1004 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
1005 Function: Convenience method that calls start_element, characters, end_element
1006 Returns : none
1007 Args : Hash ref with the keys 'Name' and 'Data'
1009 =cut
1011 sub element {
1012 my ( $self, $data ) = @_;
1013 $self->start_element($data);
1014 $self->characters($data);
1015 $self->end_element($data);
1018 =head2 get_from_element
1020 Title : get_from_element
1021 Usage : $self->get_from_element('HSP-hit_start');
1022 Function: Convenience method to retrieve data from '_values' hash
1023 Returns : string
1024 Args : key
1026 =cut
1028 sub get_from_element {
1029 my ($self,$key) = @_;
1030 my $values = $self->{_values};
1031 $values->{$key};
1034 =head2 characters
1036 Title : characters
1037 Usage : $eventgenerator->characters($str)
1038 Function: Send a character events
1039 Returns : none
1040 Args : string
1042 =cut
1044 sub characters {
1045 my ( $self, $data ) = @_;
1047 if ( $self->in_element('hsp')
1048 && $data->{'Name'} =~ /Hsp\_(qseq|hseq|csline|pline|midline)/o
1049 && defined $data->{'Data'} )
1051 $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
1053 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o );
1055 $self->{'_last_data'} = $data->{'Data'};
1058 =head2 within_element
1060 Title : within_element
1061 Usage : if( $eventgenerator->within_element( $element ) ) {}
1062 Function: Test if we are within a particular element
1063 This is different than 'in' because within can be tested for
1064 a whole block
1065 Returns : boolean
1066 Args : string element name
1068 =cut
1070 sub within_element {
1071 my ( $self, $name ) = @_;
1072 return 0
1073 if ( !defined $name
1074 || !defined $self->{'_elements'}
1075 || scalar @{ $self->{'_elements'} } == 0 );
1076 foreach ( @{ $self->{'_elements'} } ) {
1077 return 1 if ( $_ eq $name );
1079 return 0;
1082 =head2 in_element
1084 Title : in_element
1085 Usage : if( $eventgenerator->in_element( $element ) ) {}
1086 Function: Test if we are in a particular element
1087 This is different than 'within' because 'in' only
1088 tests its immediate parent
1089 Returns : boolean
1090 Args : string element name
1092 =cut
1094 sub in_element {
1095 my ( $self, $name ) = @_;
1096 return 0 if !defined $self->{'_elements'}->[0];
1097 return ( $self->{'_elements'}->[0] eq $name );
1100 =head2 start_document
1102 Title : start_document
1103 Usage : $eventgenerator->start_document
1104 Function: Handle a start document event
1105 Returns : none
1106 Args : none
1108 =cut
1110 sub start_document {
1111 my ($self) = @_;
1112 $self->{'_lasttype'} = '';
1113 $self->{'_values'} = {};
1114 $self->{'_result'} = undef;
1115 $self->{'_elements'} = [];
1118 =head2 end_document
1120 Title : end_document
1121 Usage : $eventgenerator->end_document
1122 Function: Handles an end document event
1123 Returns : Bio::Search::Result::ResultI object
1124 Args : none
1126 =cut
1128 sub end_document {
1129 my ($self) = @_;
1130 return $self->{'_result'};
1133 =head2 result_count
1135 Title : result_count
1136 Usage : my $count = $searchio->result_count
1137 Function: Returns the number of results processed
1138 Returns : interger
1139 Args : none
1141 =cut
1143 sub result_count {
1144 my $self = shift;
1145 return $self->{'_result_count'};