bug 2485 and related fixes
[bioperl-live.git] / Bio / Search / HSP / HMMERHSP.pm
blobe2d36218bd983dcb9a97eb737326bdc30cf0f375
1 # $Id$
3 # BioPerl module for Bio::Search::HSP::HMMERHSP
5 # Cared for by Jason Stajich <jason@bioperl.org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Search::HSP::HMMERHSP - A HSP object for HMMER results
17 =head1 SYNOPSIS
19 use Bio::Search::HSP::HMMERHSP;
20 # us it just like a Bio::Search::HSP::GenericHSP object
22 =head1 DESCRIPTION
24 This object is a specialization of L<Bio::Search::HSP::GenericHSP>.
26 =head1 FEEDBACK
28 =head2 Mailing Lists
30 User feedback is an integral part of the evolution of this and other
31 Bioperl modules. Send your comments and suggestions preferably to
32 the Bioperl mailing list. Your participation is much appreciated.
34 bioperl-l@bioperl.org - General discussion
35 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
37 =head2 Reporting Bugs
39 Report bugs to the Bioperl bug tracking system to help us keep track
40 of the bugs and their resolution. Bug reports can be submitted via the
41 web:
43 http://bugzilla.open-bio.org/
45 =head1 AUTHOR - Jason Stajich
47 Email jason-at-bioperl.org
49 =head1 APPENDIX
51 The rest of the documentation details each of the object methods.
52 Internal methods are usually preceded with a _
54 =cut
57 # Let the code begin...
60 package Bio::Search::HSP::HMMERHSP;
61 use strict;
63 use base qw(Bio::Search::HSP::GenericHSP);
65 =head2 new
67 Title : new
68 Usage : my $obj = Bio::Search::HSP::HMMERHSP->new();
69 Function: Builds a new Bio::Search::HSP::HMMERHSP object
70 Returns : Bio::Search::HSP::HMMERHSP
71 Args :
73 Plus Bio::Seach::HSP::GenericHSP methods
75 -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc)
76 -evalue => evalue
77 -pvalue => pvalue
78 -bits => bit value for HSP
79 -score => score value for HSP (typically z-score but depends on
80 analysis)
81 -hsp_length=> Length of the HSP (including gaps)
82 -identical => # of residues that that matched identically
83 -conserved => # of residues that matched conservatively
84 (only protein comparisions;
85 conserved == identical in nucleotide comparisons)
86 -hsp_gaps => # of gaps in the HSP
87 -query_gaps => # of gaps in the query in the alignment
88 -hit_gaps => # of gaps in the subject in the alignment
89 -query_name => HSP Query sequence name (if available)
90 -query_start => HSP Query start (in original query sequence coords)
91 -query_end => HSP Query end (in original query sequence coords)
92 -hit_name => HSP Hit sequence name (if available)
93 -hit_start => HSP Hit start (in original hit sequence coords)
94 -hit_end => HSP Hit end (in original hit sequence coords)
95 -hit_length => total length of the hit sequence
96 -query_length=> total length of the query sequence
97 -query_seq => query sequence portion of the HSP
98 -hit_seq => hit sequence portion of the HSP
99 -homology_seq=> homology sequence for the HSP
100 -hit_frame => hit frame (only if hit is translated protein)
101 -query_frame => query frame (only if query is translated protein)
103 =cut
105 =head2 Bio::Search::HSP::HSPI methods
107 Implementation of Bio::Search::HSP::HSPI methods follow
109 =head2 algorithm
111 Title : algorithm
112 Usage : my $r_type = $hsp->algorithm
113 Function: Obtain the name of the algorithm used to obtain the HSP
114 Returns : string (e.g., BLASTP)
115 Args : [optional] scalar string to set value
117 =cut
119 =head2 pvalue
121 Title : pvalue
122 Usage : my $pvalue = $hsp->pvalue();
123 Function: Returns the P-value for this HSP or undef
124 Returns : float or exponential (2e-10)
125 P-value is not defined with NCBI Blast2 reports.
126 Args : [optional] numeric to set value
128 =cut
130 =head2 evalue
132 Title : evalue
133 Usage : my $evalue = $hsp->evalue();
134 Function: Returns the e-value for this HSP
135 Returns : float or exponential (2e-10)
136 Args : [optional] numeric to set value
138 =cut
140 =head2 frac_identical
142 Title : frac_identical
143 Usage : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
144 Function: Returns the fraction of identitical positions for this HSP
145 Returns : Float in range 0.0 -> 1.0
146 Args : arg 1: 'query' = num identical / length of query seq (without gaps)
147 'hit' = num identical / length of hit seq (without gaps)
148 'total' = num identical / length of alignment (with gaps)
149 default = 'total'
150 arg 2: [optional] frac identical value to set for the type requested
152 =cut
154 =head2 frac_conserved
156 Title : frac_conserved
157 Usage : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
158 Function : Returns the fraction of conserved positions for this HSP.
159 This is the fraction of symbols in the alignment with a
160 positive score.
161 Returns : Float in range 0.0 -> 1.0
162 Args : arg 1: 'query' = num conserved / length of query seq (without gaps)
163 'hit' = num conserved / length of hit seq (without gaps)
164 'total' = num conserved / length of alignment (with gaps)
165 default = 'total'
166 arg 2: [optional] frac conserved value to set for the type requested
168 =cut
170 =head2 gaps
172 Title : gaps
173 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
174 Function : Get the number of gaps in the query, hit, or total alignment.
175 Returns : Integer, number of gaps or 0 if none
176 Args : arg 1: 'query' = num gaps in query seq
177 'hit' = num gaps in hit seq
178 'total' = num gaps in whole alignment
179 default = 'total'
180 arg 2: [optional] integer gap value to set for the type requested
182 =cut
184 =head2 query_string
186 Title : query_string
187 Usage : my $qseq = $hsp->query_string;
188 Function: Retrieves the query sequence of this HSP as a string
189 Returns : string
190 Args : [optional] string to set for query sequence
193 =cut
195 =head2 hit_string
197 Title : hit_string
198 Usage : my $hseq = $hsp->hit_string;
199 Function: Retrieves the hit sequence of this HSP as a string
200 Returns : string
201 Args : [optional] string to set for hit sequence
204 =cut
207 =head2 homology_string
209 Title : homology_string
210 Usage : my $homo_string = $hsp->homology_string;
211 Function: Retrieves the homology sequence for this HSP as a string.
212 : The homology sequence is the string of symbols in between the
213 : query and hit sequences in the alignment indicating the degree
214 : of conservation (e.g., identical, similar, not similar).
215 Returns : string
216 Args : [optional] string to set for homology sequence
218 =cut
220 =head2 length
222 Title : length
223 Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
224 Function : Returns the length of the query or hit in the alignment
225 (without gaps)
226 or the aggregate length of the HSP (including gaps;
227 this may be greater than either hit or query )
228 Returns : integer
229 Args : arg 1: 'query' = length of query seq (without gaps)
230 'hit' = length of hit seq (without gaps)
231 'total' = length of alignment (with gaps)
232 default = 'total'
233 arg 2: [optional] integer length value to set for specific type
235 =cut
237 =head2 percent_identity
239 Title : percent_identity
240 Usage : my $percentid = $hsp->percent_identity()
241 Function: Returns the calculated percent identity for an HSP
242 Returns : floating point between 0 and 100
243 Args : none
246 =cut
249 =head2 frame
251 Title : frame
252 Usage : my ($qframe, $hframe) = $hsp->frame('list',$queryframe,$subjectframe)
253 Function: Set the Frame for both query and subject and insure that
254 they agree.
255 This overrides the frame() method implementation in
256 FeaturePair.
257 Returns : array of query and subject frame if return type wants an array
258 or query frame if defined or subject frame if not defined
259 Args : 'hit' or 'subject' or 'sbjct' to retrieve the frame of the subject (default)
260 'query' to retrieve the query frame
261 'list' or 'array' to retrieve both query and hit frames together
262 Note : Frames are stored in the GFF way (0-2) not 1-3
263 as they are in BLAST (negative frames are deduced by checking
264 the strand of the query or hit)
266 =cut
268 =head2 get_aln
270 Title : get_aln
271 Usage : my $aln = $hsp->gel_aln
272 Function: Returns a Bio::SimpleAlign representing the HSP alignment
273 Returns : Bio::SimpleAlign
274 Args : none
276 =cut
278 sub get_aln {
279 my ($self) = shift;
280 $self->warn("Inappropriate to build a Bio::SimpleAlign from a HMMER HSP object");
281 return;
284 =head2 num_conserved
286 Title : num_conserved
287 Usage : $obj->num_conserved($newval)
288 Function: returns the number of conserved residues in the alignment
289 Returns : inetger
290 Args : integer (optional)
293 =cut
295 =head2 num_identical
297 Title : num_identical
298 Usage : $obj->num_identical($newval)
299 Function: returns the number of identical residues in the alignment
300 Returns : integer
301 Args : integer (optional)
304 =cut
306 =head2 seq_inds
308 Title : seq_inds
309 Purpose : Get a list of residue positions (indices) for all identical
310 : or conserved residues in the query or sbjct sequence.
311 Example : @s_ind = $hsp->seq_inds('query', 'identical');
312 : @h_ind = $hsp->seq_inds('hit', 'conserved');
313 : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
314 Returns : List of integers
315 : May include ranges if collapse is true.
316 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query)
317 : ('sbjct' is synonymous with 'hit')
318 : class = 'identical' or 'conserved' or 'nomatch' or 'gap'
319 : (default = identical)
320 : (can be shortened to 'id' or 'cons')
322 : collapse = boolean, if true, consecutive positions are merged
323 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
324 : collapses to "1-5 7 9-11". This is useful for
325 : consolidating long lists. Default = no collapse.
326 Throws : n/a.
327 Comments :
329 See Also : L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
331 =cut
333 =head2 Inherited from Bio::SeqFeature::SimilarityPair
335 These methods come from Bio::SeqFeature::SimilarityPair
337 =head2 query
339 Title : query
340 Usage : my $query = $hsp->query
341 Function: Returns a SeqFeature representing the query in the HSP
342 Returns : Bio::SeqFeature::Similarity
343 Args : [optional] new value to set
346 =head2 hit
348 Title : hit
349 Usage : my $hit = $hsp->hit
350 Function: Returns a SeqFeature representing the hit in the HSP
351 Returns : Bio::SeqFeature::Similarity
352 Args : [optional] new value to set
355 =head2 significance
357 Title : significance
358 Usage : $evalue = $obj->significance();
359 $obj->significance($evalue);
360 Function: Get/Set the significance value
361 Returns : numeric
362 Args : [optional] new value to set
365 =head2 score
367 Title : score
368 Usage : my $score = $hsp->score();
369 Function: Returns the score for this HSP or undef
370 Returns : numeric
371 Args : [optional] numeric to set value
373 =cut
375 =head2 bits
377 Title : bits
378 Usage : my $bits = $hsp->bits();
379 Function: Returns the bit value for this HSP or undef
380 Returns : numeric
381 Args : none
383 =cut