branch-1-6/Bio/SearchIO/hmmer_pull.pm

   1 # $Id$
   2 #
   3 # BioPerl module for Bio::SearchIO::hmmer_pull
   4 #
   5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
   6 #
   7 # Cared for by Sendu Bala <bix@sendu.me.uk>
   8 #
   9 # Copyright Sendu Bala
  10 #
  11 # You may distribute this module under the same terms as perl itself
  12
  13 # POD documentation - main docs before the code
  14
  15 =head1 NAME
  16
  17 Bio::SearchIO::hmmer_pull - A parser for HMMER output
  18
  19 =head1 SYNOPSIS
  20
  21     # do not use this class directly it is available through Bio::SearchIO
  22     use Bio::SearchIO;
  23     my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
  24                                -file   => 't/data/hmmpfam.bigout');
  25     while (my $result = $in->next_result) {
  26         # this is a Bio::Search::Result::HmmpfamResult object
  27         print $result->query_name(), " for HMM ", $result->hmm_name(), "\n";
  28         while (my $hit = $result->next_hit) {
  29             print $hit->name(), "\n";
  30             while (my $hsp = $hit->next_hsp) {
  31                 print "length is ", $hsp->length(), "\n";
  32             }
  33         }
  34     }
  35
  36 =head1 DESCRIPTION
  37
  38 This object implements a pull-parser for HMMER output. It is fast since it
  39 only does work on request (hence 'pull').
  40
  41 =head1 FEEDBACK
  42
  43 =head2 Mailing Lists
  44
  45 User feedback is an integral part of the evolution of this and other
  46 Bioperl modules. Send your comments and suggestions preferably to
  47 the Bioperl mailing list.  Your participation is much appreciated.
  48
  49   bioperl-l@bioperl.org                  - General discussion
  50   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  51
  52 =head2 Support
  53
  54 Please direct usage questions or support issues to the mailing list:
  55
  56 I<bioperl-l@bioperl.org>
  57
  58 rather than to the module maintainer directly. Many experienced and
  59 reponsive experts will be able look at the problem and quickly
  60 address it. Please include a thorough description of the problem
  61 with code and data examples if at all possible.
  62
  63 =head2 Reporting Bugs
  64
  65 Report bugs to the Bioperl bug tracking system to help us keep track
  66 of the bugs and their resolution. Bug reports can be submitted via the
  67 web:
  68
  69   http://bugzilla.open-bio.org/
  70
  71 =head1 AUTHOR - Sendu Bala
  72
  73 Email bix@sendu.me.uk
  74
  75 =head1 APPENDIX
  76
  77 The rest of the documentation details each of the object methods.
  78 Internal methods are usually preceded with a _
  79
  80 =cut
  81
  82 # Let the code begin...
  83
  84 package Bio::SearchIO::hmmer_pull;
  85
  86 use strict;
  87
  88
  89 use base qw(Bio::SearchIO Bio::PullParserI);
  90
  91 =head2 new
  92
  93  Title   : new
  94  Usage   : my $obj = Bio::SearchIO::hmmer_pull->new();
  95  Function: Builds a new Bio::SearchIO::hmmer_pull object
  96  Returns : Bio::SearchIO::hmmer_pull
  97  Args    : -fh/-file => HMMER output filename
  98            -format   => 'hmmer_pull'
  99            -evalue   => float or scientific notation number to be used
 100                         as an evalue cutoff for hits
 101            -score    => integer or scientific notation number to be used
 102                         as a score value cutoff for hits
 103            -hsps     => integer minimum number of hsps (domains) a hit must have
 104            -piped_behaviour => 'temp_file'|'memory'|'sequential_read'
 105
 106            -piped_behaviour defines what the parser should do if the input is
 107             an unseekable filehandle (eg. piped input), see
 108             Bio::PullParserI::chunk for details. Default is 'sequential_read'.
 109
 110 =cut
 111
 112 sub _initialize {
 113     my ($self, @args) = @_;
 114
 115     # don't do normal SearchIO initialization
 116
 117     my ($writer, $file, $fh, $piped_behaviour, $evalue, $score, $hsps) =
 118                             $self->_rearrange([qw(WRITER
 119                                                   FILE FH
 120                                                   PIPED_BEHAVIOUR
 121                                                   EVALUE
 122                                                   SCORE
 123                                                   HSPS)], @args);
 124     $self->writer($writer) if $writer;
 125
 126     $self->_fields( { ( header => undef,
 127                         algorithm => undef,
 128                         algorithm_version => undef,
 129                         algorithm_reference => '',
 130                         hmm_file => undef,
 131                         hmm_name => undef,
 132                         sequence_file => undef,
 133                         sequence_database => undef,
 134                         database_name => undef,
 135                         database_letters => undef,
 136                         database_entries => undef,
 137                         next_result => undef,
 138                         evalue_cutoff => '[unset]',
 139                         score_cutoff => '[unset]',
 140                         hsps_cutoff => '[unset]' ) } );
 141
 142     $self->_fields->{evalue_cutoff} = $evalue if $evalue;
 143     $self->_fields->{score_cutoff} = $score if $score;
 144     $self->_fields->{hsps_cutoff} = $hsps if $hsps;
 145
 146     $self->_dependencies( { ( algorithm => 'header',
 147                               algorithm_version => 'header',
 148                               hmm_file => 'header',
 149                               hmm_name => 'header',
 150                               sequence_file => 'header',
 151                               sequence_database => 'header' ) } );
 152
 153     $self->chunk($file || $fh || $self->throw("-file or -fh must be supplied"),
 154                  -piped_behaviour => $piped_behaviour || 'sequential_read');
 155 }
 156
 157 sub _discover_header {
 158     my $self = shift;
 159     $self->_chunk_seek(0);
 160     my $header = $self->_get_chunk_by_nol(8);
 161     $self->{_after_header} = $self->_chunk_tell;
 162
 163     my ($algo) = $header =~ /^(hmm\S+) - search/m;
 164     $self->_fields->{algorithm} = uc $algo;
 165
 166     ($self->_fields->{algorithm_version}) = $header =~ /^HMMER\s+?(\S+)/m;
 167
 168     ($self->_fields->{hmm_file}) = $header =~ /^HMM file:\s.+?(\S+)$/m;
 169     $self->_fields->{hmm_name} = $self->_fields->{hmm_file};
 170
 171     ($self->_fields->{sequence_file}) = $header =~ /^Sequence (?:file|database):\s.+?(\S+)$/m;
 172     $self->_fields->{sequence_database} = $self->_fields->{sequence_file};
 173
 174     $self->_fields->{header} = 1;
 175 }
 176
 177 sub _discover_database_name {
 178     my $self = shift;
 179     my $type = $self->get_field('algorithm');
 180
 181     if ($type eq 'HMMPFAM') {
 182         $self->_fields->{database_name} = $self->get_field('hmm_file');
 183     }
 184     elsif ($type eq 'HMMSEARCH') {
 185         $self->_fields->{database_name} = $self->get_field('sequence_file');
 186     }
 187 }
 188
 189 sub _discover_next_result {
 190     my $self = shift;
 191     my $type = $self->get_field('algorithm'); # also sets _after_header if not set
 192
 193     if ($type eq 'HMMPFAM') {
 194         use Bio::Search::Result::HmmpfamResult;
 195
 196         unless ($self->_sequential) {
 197             $self->_chunk_seek($self->{_end_of_previous_result} || $self->{_after_header});
 198
 199             my ($start, $end) = $self->_find_chunk_by_end("//\n");
 200             return if $start == $end;
 201             $self->_fields->{next_result} = Bio::Search::Result::HmmpfamResult->new(-chunk => [($self->chunk, $start, $end)],
 202                                                                                    -parent => $self);
 203
 204             $self->{_end_of_previous_result} = $end;
 205         }
 206         else {
 207             # deliberatly don't cache these, which means rewind won't work;
 208             # if we cached we may as well have used 'memory' option to
 209             # -piped_behaviour
 210             my $chunk = $self->_get_chunk_by_end("//\n");
 211             $chunk || return;
 212             $self->_fields->{next_result} = Bio::Search::Result::HmmpfamResult->new(-chunk => [$chunk],
 213                                                                                    -parent => $self);
 214         }
 215     }
 216     elsif ($type eq 'HMMSEARCH') {
 217         $self->throw("Can't handle hmmsearch yet\n");
 218     }
 219     else {
 220         $self->throw("Unknown report type");
 221     }
 222 }
 223
 224 =head2 next_result
 225
 226  Title   : next_result
 227  Usage   : my $hit = $searchio->next_result;
 228  Function: Returns the next Result from a search
 229  Returns : Bio::Search::Result::ResultI object
 230  Args    : none
 231
 232 =cut
 233
 234 sub next_result {
 235     my $self = shift;
 236     my $result = $self->get_field('next_result') || return;
 237
 238     undef $self->_fields->{next_result};
 239
 240     $self->{'_result_count'}++;
 241     return $result;
 242 }
 243
 244 =head2 result_count
 245
 246  Title   : result_count
 247  Usage   : my $count = $searchio->result_count
 248  Function: Returns the number of results we have processed.
 249  Returns : integer
 250  Args    : none
 251
 252 =cut
 253
 254 sub result_count {
 255     my $self = shift;
 256     return $self->{'_result_count'};
 257 }
 258
 259 =head2 rewind
 260
 261  Title   : rewind
 262  Usage   : $searchio->rewind;
 263  Function: Allow one to reset the Result iterator to the beginning, so that
 264            next_result() will subsequently return the first result and so on.
 265
 266            NB: result objects are not cached, so you will get new result objects
 267            each time you rewind. Also, note that result_count() counts the
 268            number of times you have called next_result(), so will not be able
 269            tell you how many results there were in the file if you use rewind().
 270
 271  Returns : n/a
 272  Args    : none
 273
 274 =cut
 275
 276 sub rewind {
 277         my $self = shift;
 278     if ($self->_sequential) {
 279         $self->warn("rewind has no effect on piped input when you have chosen 'sequential_read' mode");
 280     }
 281         delete $self->{_end_of_previous_result};
 282 }
 283
 284 1;