sync w/ main trunk
[bioperl-live.git] / Bio / Search / Result / HmmpfamResult.pm
blobfb97bffa5275a2e62e77970b2685c2239d2b70c6
1 # $Id$
3 # BioPerl module for Bio::Search::Result::HmmpfamResult
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Sendu Bala <bix@sendu.me.uk>
9 # Copyright Sendu Bala
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Search::Result::HmmpfamResult - A parser and result object for hmmpfam
18 results
20 =head1 SYNOPSIS
22 # generally we use Bio::SearchIO to build these objects
23 use Bio::SearchIO;
24 my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
25 -file => 'result.hmmer');
27 while (my $result = $in->next_result) {
28 print $result->query_name, " ", $result->algorithm, " ", $result->num_hits(), " hits\n";
31 =head1 DESCRIPTION
33 This object implements a parser for hmmpfam result output, a program in the HMMER
34 package.
36 =head1 FEEDBACK
38 =head2 Mailing Lists
40 User feedback is an integral part of the evolution of this and other
41 Bioperl modules. Send your comments and suggestions preferably to
42 the Bioperl mailing list. Your participation is much appreciated.
44 bioperl-l@bioperl.org - General discussion
45 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47 =head2 Support
49 Please direct usage questions or support issues to the mailing list:
51 L<bioperl-l@bioperl.org>
53 rather than to the module maintainer directly. Many experienced and
54 reponsive experts will be able look at the problem and quickly
55 address it. Please include a thorough description of the problem
56 with code and data examples if at all possible.
58 =head2 Reporting Bugs
60 Report bugs to the Bioperl bug tracking system to help us keep track
61 of the bugs and their resolution. Bug reports can be submitted via the
62 web:
64 http://bugzilla.open-bio.org/
66 =head1 AUTHOR - Sendu Bala
68 Email bix@sendu.me.uk
70 =head1 APPENDIX
72 The rest of the documentation details each of the object methods.
73 Internal methods are usually preceded with a _
75 =cut
77 # Let the code begin...
79 package Bio::Search::Result::HmmpfamResult;
81 use strict;
83 use Bio::Search::Hit::HmmpfamHit;
85 use base qw(Bio::Root::Root Bio::Search::Result::PullResultI);
87 =head2 new
89 Title : new
90 Usage : my $obj = Bio::SearchIO::Result::hmmpfam->new();
91 Function: Builds a new Bio::SearchIO::Result::hmmpfam object
92 Returns : Bio::SearchIO::Result::hmmpfam
93 Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
94 -parent => Bio::PullParserI object (required if no -chunk)
95 -parameters => hash ref of search parameters (key => value), optional
96 -statistics => hash ref of search statistics (key => value), optional
98 where the array ref provided to -chunk contains an IO object
99 for a filehandle to something representing the raw data of the
100 result, and $start and $end define the tell() position within the
101 filehandle that the result data starts and ends (optional; defaults
102 to start and end of the entire thing described by the filehandle)
104 =cut
106 sub new {
107 my ($class, @args) = @_;
108 my $self = $class->SUPER::new(@args);
110 $self->_setup(@args);
112 foreach my $field (qw( header hit_table hsp_table alignments next_model models query_length )) {
113 $self->_fields->{$field} = undef;
116 $self->_dependencies( { ( query_name => 'header',
117 query_accession => 'header',
118 query_description => 'header',
119 hit_table => 'header',
120 num_hits => 'hit_table',
121 no_hits_found => 'hit_table',
122 hsp_table => 'hit_table',
123 next_alignment => 'hsp_table' ) } );
125 return $self;
129 # PullParserI discovery methods so we can answer all ResultI questions
132 sub _discover_header {
133 my $self = shift;
134 $self->_chunk_seek(0);
135 my $header = $self->_get_chunk_by_end("all domains):\n");
136 $self->{_after_header} = $self->_chunk_tell;
138 $header || $self->throw("Could not find hmmer header, is file really hmmer format?");
140 ($self->_fields->{query_name}) = $header =~ /^Query(?:\s+sequence)?:\s+(\S+)/m;
141 ($self->_fields->{query_accession}) = $header =~ /^Accession:\s+(\S+)/m;
142 ($self->_fields->{query_description}) = $header =~ /^Description:\s+(\S.+)/m;
143 $self->_fields->{query_accession} ||= '';
144 $self->_fields->{query_description} ||= '';
146 $self->_fields->{header} = 1; # stop this method being called again
149 sub _discover_hit_table {
150 my $self = shift;
152 $self->_chunk_seek($self->{_after_header});
153 my $table = $self->_get_chunk_by_end("for domains:\n");
154 $self->{_after_hit_table} = $self->_chunk_tell;
156 my $evalue_cutoff = $self->get_field('evalue_cutoff');
157 undef $evalue_cutoff if $evalue_cutoff eq '[unset]';
158 my $score_cutoff = $self->get_field('score_cutoff');
159 undef $score_cutoff if $score_cutoff eq '[unset]';
160 my $hsps_cutoff = $self->get_field('hsps_cutoff');
161 undef $hsps_cutoff if $hsps_cutoff eq '[unset]';
163 my @table;
164 my $no_hit = 1;
165 while ($table =~ /^(\S+)\s+(\S.+?)?\s+(\S+)\s+(\S+)\s+(\d+)\n/gm) {
166 $no_hit = 0;
167 my $evalue = abs($4); # consistency for tests under Windows
168 next if ($evalue_cutoff && $evalue > $evalue_cutoff);
169 next if ($score_cutoff && $3 < $score_cutoff);
170 next if ($hsps_cutoff && $5 < $hsps_cutoff);
171 push(@table, [$1, $2, $3, $evalue, $5]);
173 $self->_fields->{hit_table} = \@table;
174 $self->{_next_hit_index} = @table > 0 ? 0 : -1;
176 $self->_fields->{no_hits_found} = $no_hit;
177 $self->_fields->{num_hits} = @table;
180 sub _discover_hsp_table {
181 my $self = shift;
183 $self->_chunk_seek($self->{_after_hit_table});
184 my $table = $self->_get_chunk_by_end("top-scoring domains:\n");
185 $table ||= $self->_get_chunk_by_end("//\n"); # A0 reports
186 $self->{_after_hsp_table} = $self->_chunk_tell;
188 my %table;
189 # can't save this regex work for the hsp object because the hit object needs
190 # its length, so may as well just do all the work here
191 while ($table =~ /^(\S+)\s+(\d+)\/\d+\s+(\d+)\s+(\d+)\s+\S\S\s+(\d+)\s+(\d+)\s+\S(\S)\s+(\S+)\s+(\S+)/gm) {
192 # rank query_start query_end hit_start hit_end score evalue
193 my $evalue = abs($9); # consistency for tests under Windows
194 push(@{$table{$1}->{hsp_data}}, [$2, $3, $4, $5, $6, $8, $evalue]);
195 if ($7 eq ']') {
196 $table{$1}->{hit_length} = $6;
199 $self->_fields->{hsp_table} = \%table;
202 sub _discover_alignments {
203 my $self = shift;
204 $self->_fields->{alignments} = { };
207 sub _next_alignment {
208 my $self = shift;;
209 return if $self->{_no_more_alignments};
211 my $aligns = $self->_fields->{alignments};
213 unless (defined $self->{_after_previous_alignment}) {
214 $self->_chunk_seek($self->{_after_hsp_table});
215 my $chunk = $self->_get_chunk_by_end(": domain");
216 unless ($chunk) {
217 $self->{_no_more_alignments} = 1;
218 return;
221 $self->{_after_previous_alignment} = $self->_chunk_tell;
222 $self->{_next_alignment_start_text} = $chunk;
223 return $self->_next_alignment;
226 $self->_chunk_seek($self->{_after_previous_alignment});
227 my $chunk = $self->_get_chunk_by_end(": domain");
228 unless ($chunk) {
229 $self->_chunk_seek($self->{_after_previous_alignment});
230 $chunk = $self->_get_chunk_by_end("//");
232 unless ($chunk) {
233 $self->{_no_more_alignments} = 1;
234 return;
238 $self->{_after_previous_alignment} = $self->_chunk_tell;
240 if (defined $self->{_next_alignment_start_text}) {
241 $chunk = $self->{_next_alignment_start_text}.$chunk;
244 $chunk =~ s/(\S+: domain)$//;
245 $self->{_next_alignment_start_text} = $1;
247 my ($name, $domain) = $chunk =~ /^(\S+): domain (\d+)/;
248 $aligns->{$name.'~~~~'.$domain} = $chunk;
249 return 1;
252 sub _discover_next_hit {
253 my $self = shift;
254 my @hit_table = @{$self->get_field('hit_table')};
255 return if $self->{_next_hit_index} == -1;
257 #[name description score significance num_hsps rank]
258 my @hit_data = (@{$hit_table[$self->{_next_hit_index}++]}, $self->{_next_hit_index});
260 $self->_fields->{next_hit} = Bio::Search::Hit::HmmpfamHit->new(-parent => $self,
261 -hit_data => \@hit_data);
263 if ($self->{_next_hit_index} > $#hit_table) {
264 $self->{_next_hit_index} = -1;
268 =head2 next_hit
270 Title : next_hit
271 Usage : while( $hit = $result->next_hit()) { ... }
272 Function: Returns the next available Hit object, representing potential
273 matches between the query and various entities from the database.
274 Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
275 Args : none
277 =cut
279 sub next_hit {
280 my $self = shift;
281 my $hit = $self->get_field('next_hit');
282 undef $self->_fields->{next_hit};
283 return $hit;
286 =head2 next_model
288 Title : next_model
289 Usage : my $domain = $result->next_model
290 Function: Returns the next domain - this is an alias for next_hit()
291 Returns : L<Bio::Search::Hit::HitI> object
292 Args : none
294 =cut
296 *next_model = \&next_hit;
298 =head2 hits
300 Title : hits
301 Usage : my @hits = $result->hits
302 Function: Returns the HitI objects contained within this Result
303 Returns : Array of Bio::Search::Hit::HitI objects
304 Args : none
306 See Also: L<Bio::Search::Hit::HitI>
308 =cut
310 sub hits {
311 my $self = shift;
312 my $old = $self->{_next_hit_index} || 0;
313 $self->rewind;
314 my @hits;
315 while (defined(my $hit = $self->next_hit)) {
316 push(@hits, $hit);
318 $self->{_next_hit_index} = @hits > 0 ? $old : -1;
319 return @hits;
322 =head2 models
324 Title : models
325 Usage : my @domains = $result->models;
326 Function: Returns the list of HMM models seen - this is an alias for hits()
327 Returns : Array of L<Bio::Search::Hit::HitI> objects
328 Args : none
330 =cut
332 *models = \&hits;
334 =head2 sort_hits
336 Title : sort_hits
337 Usage : $result->sort_hits('<score')
338 Function : Sorts the hits so that they come out in the desired order when
339 hits() or next_hit() is called.
340 Returns : n/a
341 Args : A coderef for the sort function. See the documentation on the Perl
342 sort() function for guidelines on writing sort functions.
343 You will be sorting array references, not HitI objects. The
344 references contain name as element 0, description as element 1,
345 score as element 2, significance as element 3 and number of hsps
346 as element 4.
347 By default the sort order is ascending significance value (ie.
348 most significant hits first).
349 Note : To access the special variables $a and $b used by the Perl sort()
350 function the user function must access
351 Bio::Search::Result::HmmpfamResult namespace.
352 For example, use :
353 $result->sort_hits(
354 sub{$Bio::Search::Result::HmmpfamResult::a->[2]
355 <=>
356 $Bio::Search::Result::HmmpfamResult::b->[2]});
357 NOT $result->sort_hits($a->[2] <=> $b->[2]);
359 =cut
361 sub sort_hits {
362 my ($self, $code_ref) = @_;
363 $code_ref ||= sub { $a->[3] <=> $b->[3] };
365 # avoid creating hit objects just to sort, hence force user to sort on
366 # the array references in hit table
367 my $table_ref = $self->get_field('hit_table');
368 @{$table_ref} > 1 || return;
370 my @sorted = sort $code_ref @{$table_ref};
371 @sorted == @{$table_ref} || $self->throw("Your sort routine failed to give back all hits!");
372 $self->_fields->{hit_table} = \@sorted;
375 =head2 rewind
377 Title : rewind
378 Usage : $result->rewind;
379 Function: Allow one to reset the Hit iterator to the beginning, so that
380 next_hit() will subsequently return the first hit and so on.
381 Returns : n/a
382 Args : none
384 =cut
386 sub rewind {
387 my $self = shift;
388 unless ($self->_fields->{hit_table}) {
389 $self->get_field('hit_table');
391 $self->{_next_hit_index} = @{$self->_fields->{hit_table}} > 0 ? 0 : -1;