* seq_inds is not defined for Model-based HSPs
[bioperl-live.git] / Bio / Search / HSP / ModelHSP.pm
blobe649aa4838fe92ae3d24d5c7bc6ea2604c73c882
1 # $Id$
3 # BioPerl module for Bio::Search::HSP::ModelHSP
5 # Cared for by Chris Fields <cjfields at uiuc dot edu>
7 # Copyright Chris Fields
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Search::HSP::ModelHSP - A HSP object for model-based searches
17 =head1 SYNOPSIS
19 use Bio::Search::HSP::ModelHSP;
20 # us it just like a Bio::Search::HSP::ModelHSP object
22 =head1 DESCRIPTION
24 This object is a specialization of L<Bio::Search::HSP::ModelHSP> and is used
25 for searches which involve a query model, such as a Hidden Markov Model (HMM),
26 covariance model (CM), descriptor, or anything else besides a sequence. Note
27 that results from any HSPI class methods which rely on the query being a
28 sequence are unreliable and have thus been overridden with warnings indicating
29 they have not been implemented at this time.
31 =head1 FEEDBACK
33 =head2 Mailing Lists
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to
37 the Bioperl mailing list. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42 =head2 Reporting Bugs
44 Report bugs to the Bioperl bug tracking system to help us keep track
45 of the bugs and their resolution. Bug reports can be submitted via the
46 web:
48 http://bugzilla.open-bio.org/
50 =head1 AUTHOR - Chris Fields
52 Email cjfields at uiuc dot edu
54 =head1 APPENDIX
56 The rest of the documentation details each of the object methods.
57 Internal methods are usually preceded with a _
59 =cut
61 # Let the code begin...
63 package Bio::Search::HSP::ModelHSP;
64 use strict;
65 use Bio::Seq::Meta;
67 use base qw(Bio::Search::HSP::GenericHSP);
69 =head2 new
71 Title : new
72 Usage : my $obj = Bio::Search::HSP::ModelHSP->new();
73 Function: Builds a new Bio::Search::HSP::ModelHSP object
74 Returns : Bio::Search::HSP::ModelHSP
75 Args :
77 Plus Bio::Seach::HSP::ModelHSP methods
79 -algorithm => algorithm used (Infernal, RNAMotif, ERPIN, etc)
80 -evalue => evalue
81 -pvalue => pvalue
82 -bits => bit value for HSP
83 -score => score value for HSP (typically z-score but depends on
84 analysis)
85 -hsp_length=> Length of the HSP (including gaps)
86 -identical => # of residues that that matched identically
87 -conserved => # of residues that matched conservatively
88 (only protein comparisions;
89 conserved == identical in nucleotide comparisons)
90 -hsp_gaps => # of gaps in the HSP
91 -query_gaps => # of gaps in the query in the alignment
92 -hit_gaps => # of gaps in the subject in the alignment
93 -query_name => HSP Query sequence name (if available)
94 -query_start => HSP Query start (in original query sequence coords)
95 -query_end => HSP Query end (in original query sequence coords)
96 -hit_name => HSP Hit sequence name (if available)
97 -hit_start => HSP Hit start (in original hit sequence coords)
98 -hit_end => HSP Hit end (in original hit sequence coords)
99 -hit_length => total length of the hit sequence
100 -query_length=> total length of the query sequence
101 -query_seq => query sequence portion of the HSP
102 -hit_seq => hit sequence portion of the HSP
103 -homology_seq=> homology sequence for the HSP
104 -hit_frame => hit frame (only if hit is translated protein)
105 -query_frame => query frame (only if query is translated protein)
106 -meta => optional meta data (sec structure, markup, etc)
107 -custom_score=> custom score data
109 =cut
111 =head2 meta
113 Title : meta
114 Usage : my $meta = $hsp->meta();
115 Function: Returns meta data for this HSP or undef
116 Returns : string of meta data or undef
117 Args : [optional] string to set value
118 Note : At some point very soon this will likely be a Bio::AnnotationI.
119 Don't get used to a simple string!
121 =cut
123 sub meta {
124 my ($self,$value) = @_;
125 my $previous = $self->{'META'};
126 if( defined $value ) {
127 $self->{'META'} = $value;
129 return $previous;
132 =head2 custom_score
134 Title : custom_score
135 Usage : my $data = $hsp->custom_score();
136 Function: Returns custom_score data for this HSP, or undef
137 Returns : custom_score data or undef
138 Args : [optional] custom_score
139 Note : This is a Get/Set used to deal with odd score-like data generated
140 from RNAMotif (and other programs) where the score section
141 can be customized to include non-standard data, including sequence
142 data, user-based scores, and other values.
144 =cut
146 sub custom_score {
147 my ($self,$value) = @_;
148 my $previous = $self->{'CUSTOMSCORE'};
149 if( defined $value ) {
150 $self->{'CUSTOMSCORE'} = $value;
152 return $previous;
155 =head2 Bio::Search::HSP::HSPI methods
157 Implementation of Bio::Search::HSP::HSPI methods follow
159 =head2 algorithm
161 Title : algorithm
162 Usage : my $r_type = $hsp->algorithm
163 Function: Obtain the name of the algorithm used to obtain the HSP
164 Returns : string (e.g., BLASTP)
165 Args : [optional] scalar string to set value
167 =cut
169 =head2 strand
171 Title : strand
172 Usage : $hsp->strand('hit')
173 Function: Retrieves the strand for the HSP component requested
174 Returns : +1 or -1 (0 if unknown)
175 Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject.
176 There is no strand available for 'query', as the query is a model
177 and not a true sequence.
179 =cut
181 # overrides HSPI::seq()
183 =head2 seq
185 Usage : $hsp->seq( [seq_type] );
186 Purpose : Get the query or sbjct sequence as a Bio::Seq.pm object.
187 Example : $seqObj = $hsp->seq('sbjct');
188 Returns : Object reference for a Bio::Seq.pm object.
189 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'sbjct').
190 : ('sbjct' is synonymous with 'hit')
191 : default is 'sbjct'
192 : Note: if there is no sequence available (eg for a model-based
193 : search), this returns a LocatableSeq object w/o a sequence
194 Throws : Propagates any exception that occurs during construction
195 : of the Bio::Seq.pm object.
196 Comments : The sequence is returned in an array of strings corresponding
197 : to the strings in the original format of the Blast alignment.
198 : (i.e., same spacing).
200 See Also : L<seq_str()|seq_str>, L<Bio::Seq>
202 =cut
204 #-------
205 sub seq {
206 #-------
207 my($self,$seqType) = @_;
208 $seqType ||= 'sbjct';
209 $seqType = 'sbjct' if $seqType eq 'hit';
210 my $str = $self->seq_str($seqType);
211 if( $seqType =~ /^(m|ho)/i ) {
212 $self->throw("cannot call seq on the homology match string, it isn't really a sequence, use get_aln to convert the HSP to a Bio::AlignIO and generate a consensus from that.");
214 require Bio::LocatableSeq;
215 my $id = $seqType =~ /^q/i ? $self->query->seq_id : $self->hit->seq_id;
216 my $seq = Bio::LocatableSeq->new (-ID => $id,
217 -START => $self->start($seqType),
218 -END => $self->end($seqType),
219 -STRAND=> $self->strand($seqType),
220 -DESC => "$seqType sequence ",
222 $seq->seq($str) if $str;
223 $seq;
226 =head2 pvalue
228 Title : pvalue
229 Usage : my $pvalue = $hsp->pvalue();
230 Function: Returns the P-value for this HSP or undef
231 Returns : float or exponential (2e-10)
232 P-value is not defined with NCBI Blast2 reports.
233 Args : [optional] numeric to set value
235 =cut
237 =head2 evalue
239 Title : evalue
240 Usage : my $evalue = $hsp->evalue();
241 Function: Returns the e-value for this HSP
242 Returns : float or exponential (2e-10)
243 Args : [optional] numeric to set value
245 =cut
247 =head2 gaps
249 Title : gaps
250 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
251 Function : Get the number of gaps in the query, hit, or total alignment.
252 Returns : Integer, number of gaps or 0 if none
253 Args : arg 1: 'query' = num gaps in query seq
254 'hit' = num gaps in hit seq
255 'total' = num gaps in whole alignment
256 default = 'total'
257 arg 2: [optional] integer gap value to set for the type requested
259 =cut
261 =head2 query_string
263 Title : query_string
264 Usage : my $qseq = $hsp->query_string;
265 Function: Retrieves the query sequence of this HSP as a string
266 Returns : string
267 Args : [optional] string to set for query sequence
269 =cut
271 =head2 hit_string
273 Title : hit_string
274 Usage : my $hseq = $hsp->hit_string;
275 Function: Retrieves the hit sequence of this HSP as a string
276 Returns : string
277 Args : [optional] string to set for hit sequence
279 =cut
281 =head2 homology_string
283 Title : homology_string
284 Usage : my $homo_string = $hsp->homology_string;
285 Function: Retrieves the homology sequence for this HSP as a string.
286 : The homology sequence is the string of symbols in between the
287 : query and hit sequences in the alignment indicating the degree
288 : of conservation (e.g., identical, similar, not similar).
289 Returns : string
290 Args : [optional] string to set for homology sequence
292 =cut
294 =head2 length
296 Title : length
297 Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
298 Function : Returns the length of the query or hit in the alignment
299 (without gaps)
300 or the aggregate length of the HSP (including gaps;
301 this may be greater than either hit or query )
302 Returns : integer
303 Args : arg 1: 'query' = length of query seq (without gaps)
304 'hit' = length of hit seq (without gaps)
305 'total' = length of alignment (with gaps)
306 default = 'total'
307 arg 2: [optional] integer length value to set for specific type
309 =cut
311 =head2 frame
313 Title : frame
314 Usage : my ($qframe, $hframe) = $hsp->frame('list',$queryframe,$subjectframe)
315 Function: Set the Frame for both query and subject and insure that
316 they agree.
317 This overrides the frame() method implementation in
318 FeaturePair.
319 Returns : array of query and subject frame if return type wants an array
320 or query frame if defined or subject frame if not defined
321 Args : 'hit' or 'subject' or 'sbjct' to retrieve the frame of the subject (default)
322 'query' to retrieve the query frame
323 'list' or 'array' to retrieve both query and hit frames together
324 Note : Frames are stored in the GFF way (0-2) not 1-3
325 as they are in BLAST (negative frames are deduced by checking
326 the strand of the query or hit)
328 =cut
330 =head2 get_aln
332 Title : get_aln
333 Usage : my $aln = $hsp->gel_aln
334 Function: Returns a Bio::SimpleAlign representing the HSP alignment
335 Returns : Bio::SimpleAlign
336 Args : none
338 =cut
340 sub get_aln {
341 my ($self) = @_;
342 require Bio::LocatableSeq;
343 require Bio::SimpleAlign;
344 my $aln = Bio::SimpleAlign->new;
345 my $hs = $self->hit_string();
346 my $qs = $self->query_string();
347 if (!$qs) {
348 $self->warn("Missing query string, can't build alignment");
349 return;
351 my $seqonly = $qs;
352 $seqonly =~ s/[\-\s]//g;
353 my ($q_nm,$s_nm) = ($self->query->seq_id(),
354 $self->hit->seq_id());
355 unless( defined $q_nm && CORE::length ($q_nm) ) {
356 $q_nm = 'query';
358 unless( defined $s_nm && CORE::length ($s_nm) ) {
359 $s_nm = 'hit';
361 my $query = Bio::LocatableSeq->new('-seq' => $qs,
362 '-id' => $q_nm,
363 '-start' => $self->query->start,
364 '-end' => $self->query->end,
366 $seqonly = $hs;
367 $seqonly =~ s/[\-\s]//g;
368 my $hit = Bio::LocatableSeq->new('-seq' => $hs,
369 '-id' => $s_nm,
370 '-start' => $self->hit->start,
371 '-end' => $self->hit->end,
373 $aln->add_seq($query);
374 $aln->add_seq($hit);
375 if ($self->meta) {
376 my $meta_obj = Bio::Seq::Meta->new();
377 $meta_obj->named_meta('ss_cons', $self->meta);
378 $aln->consensus_meta($meta_obj);
380 return $aln;
383 =head2 seq_inds
385 Title : seq_inds
386 Purpose : Get a list of residue positions (indices) for all identical
387 : or conserved residues in the query or sbjct sequence.
388 Example : @s_ind = $hsp->seq_inds('query', 'identical');
389 : @h_ind = $hsp->seq_inds('hit', 'conserved');
390 : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
391 Returns : List of integers
392 : May include ranges if collapse is true.
393 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query)
394 : ('sbjct' is synonymous with 'hit')
395 : class = 'identical' or 'conserved' or 'nomatch' or 'gap'
396 : (default = identical)
397 : (can be shortened to 'id' or 'cons')
399 : collapse = boolean, if true, consecutive positions are merged
400 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
401 : collapses to "1-5 7 9-11". This is useful for
402 : consolidating long lists. Default = no collapse.
403 Throws : n/a.
404 Comments :
406 See Also : L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
408 =cut
410 =head2 Inherited from Bio::SeqFeature::SimilarityPair
412 These methods come from Bio::SeqFeature::SimilarityPair
414 =head2 query
416 Title : query
417 Usage : my $query = $hsp->query
418 Function: Returns a SeqFeature representing the query in the HSP
419 Returns : Bio::SeqFeature::Similarity
420 Args : [optional] new value to set
423 =head2 hit
425 Title : hit
426 Usage : my $hit = $hsp->hit
427 Function: Returns a SeqFeature representing the hit in the HSP
428 Returns : Bio::SeqFeature::Similarity
429 Args : [optional] new value to set
432 =head2 significance
434 Title : significance
435 Usage : $evalue = $obj->significance();
436 $obj->significance($evalue);
437 Function: Get/Set the significance value
438 Returns : numeric
439 Args : [optional] new value to set
442 =head2 score
444 Title : score
445 Usage : my $score = $hsp->score();
446 Function: Returns the score for this HSP or undef
447 Returns : numeric
448 Args : [optional] numeric to set value
450 =cut
452 =head2 bits
454 Title : bits
455 Usage : my $bits = $hsp->bits();
456 Function: Returns the bit value for this HSP or undef
457 Returns : numeric
458 Args : none
460 =cut
462 =head1 ModelHSP methods overridden in ModelHSP
464 The following methods have been overridden due to their current reliance on
465 sequence-based queries. They may be implemented in future versions of this class.
467 =head2 frac_identical
469 =cut
471 sub seq_inds {
472 my $self = shift;
473 $self->warn('$hsp->seq_inds not implemented for Model-based searches');
474 return;
477 =head2 frac_identical
479 =cut
481 sub frac_identical {
482 my $self = shift;
483 $self->warn('$hsp->frac_identical not implemented for Model-based searches');
484 return;
487 =head2 frac_conserved
489 =cut
491 sub frac_conserved {
492 my $self = shift;
493 $self->warn('$hsp->frac_conserved not implemented for Model-based searches');
494 return;
497 =head2 matches
499 =cut
501 sub matches {
502 my $self = shift;
503 $self->warn('$hsp->matches not implemented for Model-based searches');
504 return;
507 =head2 num_conserved
509 =cut
511 sub num_conserved {
512 my $self = shift;
513 $self->warn('$hsp->num_conserved not implemented for Model-based searches');
514 return;
517 =head2 num_identical
519 =cut
521 sub num_identical {
522 my $self = shift;
523 $self->warn('$hsp->num_identical not implemented for Model-based searches');
524 return;
527 =head2 cigar_string
529 =cut
532 sub cigar_string {
533 my $self = shift;
534 $self->warn('$hsp->cigar_string not implemented for Model-based searches');
535 return;
538 =head2 generate_cigar_string
540 =cut
542 sub generate_cigar_string {
543 my $self = shift;
544 $self->warn('$hsp->generate_cigar_string not implemented for Model-based searches');
545 return;
548 =head2 percent_identity
550 =cut
552 sub percent_identity {
553 my $self = shift;
554 $self->warn('$hsp->percent_identity not implemented for Model-based searches');
555 return;