2 # BioPerl module for Bio::Search::Result::HmmpfamResult
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Search::Result::HmmpfamResult - A parser and result object for hmmpfam
21 # generally we use Bio::SearchIO to build these objects
23 my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
24 -file => 'result.hmmer');
26 while (my $result = $in->next_result) {
27 print $result->query_name, " ", $result->algorithm, " ", $result->num_hits(), " hits\n";
32 This object implements a parser for hmmpfam result output, a program in the HMMER
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to
41 the Bioperl mailing list. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 of the bugs and their resolution. Bug reports can be submitted via the
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Sendu Bala
71 The rest of the documentation details each of the object methods.
72 Internal methods are usually preceded with a _
76 # Let the code begin...
78 package Bio
::Search
::Result
::HmmpfamResult
;
82 use Bio
::Search
::Hit
::HmmpfamHit
;
84 use base
qw(Bio::Root::Root Bio::Search::Result::PullResultI);
89 Usage : my $obj = Bio::SearchIO::Result::hmmpfam->new();
90 Function: Builds a new Bio::SearchIO::Result::hmmpfam object
91 Returns : Bio::SearchIO::Result::hmmpfam
92 Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
93 -parent => Bio::PullParserI object (required if no -chunk)
94 -parameters => hash ref of search parameters (key => value), optional
95 -statistics => hash ref of search statistics (key => value), optional
97 where the array ref provided to -chunk contains an IO object
98 for a filehandle to something representing the raw data of the
99 result, and $start and $end define the tell() position within the
100 filehandle that the result data starts and ends (optional; defaults
101 to start and end of the entire thing described by the filehandle)
106 my ($class, @args) = @_;
107 my $self = $class->SUPER::new
(@args);
109 $self->_setup(@args);
111 foreach my $field (qw( header hit_table hsp_table alignments next_model models query_length )) {
112 $self->_fields->{$field} = undef;
115 $self->_dependencies( { ( query_name
=> 'header',
116 query_accession
=> 'header',
117 query_description
=> 'header',
118 hit_table
=> 'header',
119 num_hits
=> 'hit_table',
120 no_hits_found
=> 'hit_table',
121 hsp_table
=> 'hit_table',
122 next_alignment
=> 'hsp_table' ) } );
128 # PullParserI discovery methods so we can answer all ResultI questions
131 sub _discover_header
{
133 $self->_chunk_seek(0);
134 my $header = $self->_get_chunk_by_end("all domains):\n");
135 $self->{_after_header
} = $self->_chunk_tell;
137 $header || $self->throw("Could not find hmmer header, is file really hmmer format?");
139 ($self->_fields->{query_name
}) = $header =~ /^Query(?:\s+sequence)?:\s+(\S+)/m;
140 ($self->_fields->{query_accession
}) = $header =~ /^Accession:\s+(\S+)/m;
141 ($self->_fields->{query_description
}) = $header =~ /^Description:\s+(\S.+)/m;
142 $self->_fields->{query_accession
} ||= '';
143 $self->_fields->{query_description
} ||= '';
145 $self->_fields->{header
} = 1; # stop this method being called again
148 sub _discover_hit_table
{
151 $self->_chunk_seek($self->{_after_header
});
152 my $table = $self->_get_chunk_by_end("for domains:\n");
153 $self->{_after_hit_table
} = $self->_chunk_tell;
155 my $evalue_cutoff = $self->get_field('evalue_cutoff');
156 undef $evalue_cutoff if $evalue_cutoff eq '[unset]';
157 my $score_cutoff = $self->get_field('score_cutoff');
158 undef $score_cutoff if $score_cutoff eq '[unset]';
159 my $hsps_cutoff = $self->get_field('hsps_cutoff');
160 undef $hsps_cutoff if $hsps_cutoff eq '[unset]';
164 while ($table =~ /^(\S+)\s+(\S.+?)?\s+(\S+)\s+(\S+)\s+(\d+)\n/gm) {
166 my $evalue = abs($4); # consistency for tests under Windows
167 next if ($evalue_cutoff && $evalue > $evalue_cutoff);
168 next if ($score_cutoff && $3 < $score_cutoff);
169 next if ($hsps_cutoff && $5 < $hsps_cutoff);
170 push(@table, [$1, $2, $3, $evalue, $5]);
172 $self->_fields->{hit_table
} = \
@table;
173 $self->{_next_hit_index
} = @table > 0 ?
0 : -1;
175 $self->_fields->{no_hits_found
} = $no_hit;
176 $self->_fields->{num_hits
} = @table;
179 sub _discover_hsp_table
{
182 $self->_chunk_seek($self->{_after_hit_table
});
183 my $table = $self->_get_chunk_by_end("top-scoring domains:\n");
184 $table ||= $self->_get_chunk_by_end("//\n"); # A0 reports
185 $self->{_after_hsp_table
} = $self->_chunk_tell;
188 # can't save this regex work for the hsp object because the hit object needs
189 # its length, so may as well just do all the work here
190 while ($table =~ /^(\S+)\s+(\d+)\/\d
+\s
+(\d
+)\s
+(\d
+)\s
+\S\S\s
+(\d
+)\s
+(\d
+)\s
+\S
(\S
)\s
+(\S
+)\s
+(\S
+)/gm
) {
191 # rank query_start query_end hit_start hit_end score evalue
192 my $evalue = abs($9); # consistency for tests under Windows
193 push(@
{$table{$1}->{hsp_data
}}, [$2, $3, $4, $5, $6, $8, $evalue]);
195 $table{$1}->{hit_length
} = $6;
198 $self->_fields->{hsp_table
} = \
%table;
201 sub _discover_alignments
{
203 $self->_fields->{alignments
} = { };
206 sub _next_alignment
{
208 return if $self->{_no_more_alignments
};
210 my $aligns = $self->_fields->{alignments
};
212 unless (defined $self->{_after_previous_alignment
}) {
213 $self->_chunk_seek($self->{_after_hsp_table
});
214 my $chunk = $self->_get_chunk_by_end(": domain");
216 $self->{_no_more_alignments
} = 1;
220 $self->{_after_previous_alignment
} = $self->_chunk_tell;
221 $self->{_next_alignment_start_text
} = $chunk;
222 return $self->_next_alignment;
225 $self->_chunk_seek($self->{_after_previous_alignment
});
226 my $chunk = $self->_get_chunk_by_end(": domain");
228 $self->_chunk_seek($self->{_after_previous_alignment
});
229 $chunk = $self->_get_chunk_by_end("//");
232 $self->{_no_more_alignments
} = 1;
237 $self->{_after_previous_alignment
} = $self->_chunk_tell;
239 if (defined $self->{_next_alignment_start_text
}) {
240 $chunk = $self->{_next_alignment_start_text
}.$chunk;
243 $chunk =~ s/(\S+: domain)$//;
244 $self->{_next_alignment_start_text
} = $1;
246 my ($name, $domain) = $chunk =~ /^(\S+): domain (\d+)/;
247 $aligns->{$name.'~~~~'.$domain} = $chunk;
251 sub _discover_next_hit
{
253 my @hit_table = @
{$self->get_field('hit_table')};
254 return if $self->{_next_hit_index
} == -1;
256 #[name description score significance num_hsps rank]
257 my @hit_data = (@
{$hit_table[$self->{_next_hit_index
}++]}, $self->{_next_hit_index
});
259 $self->_fields->{next_hit
} = Bio
::Search
::Hit
::HmmpfamHit
->new(-parent
=> $self,
260 -hit_data
=> \
@hit_data);
262 if ($self->{_next_hit_index
} > $#hit_table) {
263 $self->{_next_hit_index
} = -1;
270 Usage : while( $hit = $result->next_hit()) { ... }
271 Function: Returns the next available Hit object, representing potential
272 matches between the query and various entities from the database.
273 Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
280 my $hit = $self->get_field('next_hit');
281 undef $self->_fields->{next_hit
};
288 Usage : my $domain = $result->next_model
289 Function: Returns the next domain - this is an alias for next_hit()
290 Returns : L<Bio::Search::Hit::HitI> object
295 *next_model
= \
&next_hit
;
300 Usage : my @hits = $result->hits
301 Function: Returns the HitI objects contained within this Result
302 Returns : Array of Bio::Search::Hit::HitI objects
305 See Also: L<Bio::Search::Hit::HitI>
311 my $old = $self->{_next_hit_index
} || 0;
314 while (defined(my $hit = $self->next_hit)) {
317 $self->{_next_hit_index
} = @hits > 0 ?
$old : -1;
324 Usage : my @domains = $result->models;
325 Function: Returns the list of HMM models seen - this is an alias for hits()
326 Returns : Array of L<Bio::Search::Hit::HitI> objects
336 Usage : $result->sort_hits('<score')
337 Function : Sorts the hits so that they come out in the desired order when
338 hits() or next_hit() is called.
340 Args : A coderef for the sort function. See the documentation on the Perl
341 sort() function for guidelines on writing sort functions.
342 You will be sorting array references, not HitI objects. The
343 references contain name as element 0, description as element 1,
344 score as element 2, significance as element 3 and number of hsps
346 By default the sort order is ascending significance value (ie.
347 most significant hits first).
348 Note : To access the special variables $a and $b used by the Perl sort()
349 function the user function must access
350 Bio::Search::Result::HmmpfamResult namespace.
353 sub{$Bio::Search::Result::HmmpfamResult::a->[2]
355 $Bio::Search::Result::HmmpfamResult::b->[2]});
356 NOT $result->sort_hits($a->[2] <=> $b->[2]);
361 my ($self, $code_ref) = @_;
362 $code_ref ||= sub { $a->[3] <=> $b->[3] };
364 # avoid creating hit objects just to sort, hence force user to sort on
365 # the array references in hit table
366 my $table_ref = $self->get_field('hit_table');
367 @
{$table_ref} > 1 || return;
369 my @sorted = sort $code_ref @
{$table_ref};
370 @sorted == @
{$table_ref} || $self->throw("Your sort routine failed to give back all hits!");
371 $self->_fields->{hit_table
} = \
@sorted;
377 Usage : $result->rewind;
378 Function: Allow one to reset the Hit iterator to the beginning, so that
379 next_hit() will subsequently return the first hit and so on.
387 unless ($self->_fields->{hit_table
}) {
388 $self->get_field('hit_table');
390 $self->{_next_hit_index
} = @
{$self->_fields->{hit_table
}} > 0 ?
0 : -1;