3 # BioPerl module for Bio::Search::HSP::ModelHSP
5 # Cared for by Chris Fields <cjfields at uiuc dot edu>
7 # Copyright Chris Fields
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::Search::HSP::ModelHSP - A HSP object for model-based searches
19 use Bio::Search::HSP::ModelHSP;
20 # us it just like a Bio::Search::HSP::ModelHSP object
24 This object is a specialization of L<Bio::Search::HSP::ModelHSP> and is used
25 for searches which involve a query model, such as a Hidden Markov Model (HMM),
26 covariance model (CM), descriptor, or anything else besides a sequence. Note
27 that results from any HSPI class methods which rely on the query being a
28 sequence are unreliable and have thus been overridden with warnings indicating
29 they have not been implemented at this time.
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to
37 the Bioperl mailing list. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44 Report bugs to the Bioperl bug tracking system to help us keep track
45 of the bugs and their resolution. Bug reports can be submitted via the
48 http://bugzilla.open-bio.org/
50 =head1 AUTHOR - Chris Fields
52 Email cjfields at uiuc dot edu
56 The rest of the documentation details each of the object methods.
57 Internal methods are usually preceded with a _
61 # Let the code begin...
63 package Bio
::Search
::HSP
::ModelHSP
;
67 use base
qw(Bio::Search::HSP::GenericHSP);
72 Usage : my $obj = Bio::Search::HSP::ModelHSP->new();
73 Function: Builds a new Bio::Search::HSP::ModelHSP object
74 Returns : Bio::Search::HSP::ModelHSP
77 Plus Bio::Seach::HSP::ModelHSP methods
79 -algorithm => algorithm used (Infernal, RNAMotif, ERPIN, etc)
82 -bits => bit value for HSP
83 -score => score value for HSP (typically z-score but depends on
85 -hsp_length=> Length of the HSP (including gaps)
86 -identical => # of residues that that matched identically
87 -conserved => # of residues that matched conservatively
88 (only protein comparisions;
89 conserved == identical in nucleotide comparisons)
90 -hsp_gaps => # of gaps in the HSP
91 -query_gaps => # of gaps in the query in the alignment
92 -hit_gaps => # of gaps in the subject in the alignment
93 -query_name => HSP Query sequence name (if available)
94 -query_start => HSP Query start (in original query sequence coords)
95 -query_end => HSP Query end (in original query sequence coords)
96 -hit_name => HSP Hit sequence name (if available)
97 -hit_start => HSP Hit start (in original hit sequence coords)
98 -hit_end => HSP Hit end (in original hit sequence coords)
99 -hit_length => total length of the hit sequence
100 -query_length=> total length of the query sequence
101 -query_seq => query sequence portion of the HSP
102 -hit_seq => hit sequence portion of the HSP
103 -homology_seq=> homology sequence for the HSP
104 -hit_frame => hit frame (only if hit is translated protein)
105 -query_frame => query frame (only if query is translated protein)
106 -meta => optional meta data (sec structure, markup, etc)
107 -custom_score=> custom score data
114 Usage : my $meta = $hsp->meta();
115 Function: Returns meta data for this HSP or undef
116 Returns : string of meta data or undef
117 Args : [optional] string to set value
118 Note : At some point very soon this will likely be a Bio::AnnotationI.
119 Don't get used to a simple string!
124 my ($self,$value) = @_;
125 my $previous = $self->{'META'};
126 if( defined $value ) {
127 $self->{'META'} = $value;
135 Usage : my $data = $hsp->custom_score();
136 Function: Returns custom_score data for this HSP, or undef
137 Returns : custom_score data or undef
138 Args : [optional] custom_score
139 Note : This is a Get/Set used to deal with odd score-like data generated
140 from RNAMotif (and other programs) where the score section
141 can be customized to include non-standard data, including sequence
142 data, user-based scores, and other values.
147 my ($self,$value) = @_;
148 my $previous = $self->{'CUSTOMSCORE'};
149 if( defined $value ) {
150 $self->{'CUSTOMSCORE'} = $value;
155 =head2 Bio::Search::HSP::HSPI methods
157 Implementation of Bio::Search::HSP::HSPI methods follow
162 Usage : my $r_type = $hsp->algorithm
163 Function: Obtain the name of the algorithm used to obtain the HSP
164 Returns : string (e.g., BLASTP)
165 Args : [optional] scalar string to set value
172 Usage : $hsp->strand('hit')
173 Function: Retrieves the strand for the HSP component requested
174 Returns : +1 or -1 (0 if unknown)
175 Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject.
176 There is no strand available for 'query', as the query is a model
177 and not a true sequence.
181 # overrides HSPI::seq()
185 Usage : $hsp->seq( [seq_type] );
186 Purpose : Get the query or sbjct sequence as a Bio::Seq.pm object.
187 Example : $seqObj = $hsp->seq('sbjct');
188 Returns : Object reference for a Bio::Seq.pm object.
189 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'sbjct').
190 : ('sbjct' is synonymous with 'hit')
192 : Note: if there is no sequence available (eg for a model-based
193 : search), this returns a LocatableSeq object w/o a sequence
194 Throws : Propagates any exception that occurs during construction
195 : of the Bio::Seq.pm object.
196 Comments : The sequence is returned in an array of strings corresponding
197 : to the strings in the original format of the Blast alignment.
198 : (i.e., same spacing).
200 See Also : L<seq_str()|seq_str>, L<Bio::Seq>
207 my($self,$seqType) = @_;
208 $seqType ||= 'sbjct';
209 $seqType = 'sbjct' if $seqType eq 'hit';
210 my $str = $self->seq_str($seqType);
211 if( $seqType =~ /^(m|ho)/i ) {
212 $self->throw("cannot call seq on the homology match string, it isn't really a sequence, use get_aln to convert the HSP to a Bio::AlignIO and generate a consensus from that.");
214 require Bio
::LocatableSeq
;
215 my $id = $seqType =~ /^q/i ?
$self->query->seq_id : $self->hit->seq_id;
216 my $seq = Bio
::LocatableSeq
->new (-ID
=> $id,
217 -START
=> $self->start($seqType),
218 -END => $self->end($seqType),
219 -STRAND
=> $self->strand($seqType),
220 -DESC
=> "$seqType sequence ",
222 $seq->seq($str) if $str;
229 Usage : my $pvalue = $hsp->pvalue();
230 Function: Returns the P-value for this HSP or undef
231 Returns : float or exponential (2e-10)
232 P-value is not defined with NCBI Blast2 reports.
233 Args : [optional] numeric to set value
240 Usage : my $evalue = $hsp->evalue();
241 Function: Returns the e-value for this HSP
242 Returns : float or exponential (2e-10)
243 Args : [optional] numeric to set value
250 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
251 Function : Get the number of gaps in the query, hit, or total alignment.
252 Returns : Integer, number of gaps or 0 if none
253 Args : arg 1: 'query' = num gaps in query seq
254 'hit' = num gaps in hit seq
255 'total' = num gaps in whole alignment
257 arg 2: [optional] integer gap value to set for the type requested
264 Usage : my $qseq = $hsp->query_string;
265 Function: Retrieves the query sequence of this HSP as a string
267 Args : [optional] string to set for query sequence
274 Usage : my $hseq = $hsp->hit_string;
275 Function: Retrieves the hit sequence of this HSP as a string
277 Args : [optional] string to set for hit sequence
281 =head2 homology_string
283 Title : homology_string
284 Usage : my $homo_string = $hsp->homology_string;
285 Function: Retrieves the homology sequence for this HSP as a string.
286 : The homology sequence is the string of symbols in between the
287 : query and hit sequences in the alignment indicating the degree
288 : of conservation (e.g., identical, similar, not similar).
290 Args : [optional] string to set for homology sequence
297 Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
298 Function : Returns the length of the query or hit in the alignment
300 or the aggregate length of the HSP (including gaps;
301 this may be greater than either hit or query )
303 Args : arg 1: 'query' = length of query seq (without gaps)
304 'hit' = length of hit seq (without gaps)
305 'total' = length of alignment (with gaps)
307 arg 2: [optional] integer length value to set for specific type
314 Usage : my ($qframe, $hframe) = $hsp->frame('list',$queryframe,$subjectframe)
315 Function: Set the Frame for both query and subject and insure that
317 This overrides the frame() method implementation in
319 Returns : array of query and subject frame if return type wants an array
320 or query frame if defined or subject frame if not defined
321 Args : 'hit' or 'subject' or 'sbjct' to retrieve the frame of the subject (default)
322 'query' to retrieve the query frame
323 'list' or 'array' to retrieve both query and hit frames together
324 Note : Frames are stored in the GFF way (0-2) not 1-3
325 as they are in BLAST (negative frames are deduced by checking
326 the strand of the query or hit)
333 Usage : my $aln = $hsp->gel_aln
334 Function: Returns a Bio::SimpleAlign representing the HSP alignment
335 Returns : Bio::SimpleAlign
342 require Bio
::LocatableSeq
;
343 require Bio
::SimpleAlign
;
344 my $aln = Bio
::SimpleAlign
->new;
345 my $hs = $self->hit_string();
346 my $qs = $self->query_string();
348 $self->warn("Missing query string, can't build alignment");
352 $seqonly =~ s/[\-\s]//g;
353 my ($q_nm,$s_nm) = ($self->query->seq_id(),
354 $self->hit->seq_id());
355 unless( defined $q_nm && CORE
::length ($q_nm) ) {
358 unless( defined $s_nm && CORE
::length ($s_nm) ) {
361 my $query = Bio
::LocatableSeq
->new('-seq' => $qs,
363 '-start' => $self->query->start,
364 '-end' => $self->query->end,
367 $seqonly =~ s/[\-\s]//g;
368 my $hit = Bio
::LocatableSeq
->new('-seq' => $hs,
370 '-start' => $self->hit->start,
371 '-end' => $self->hit->end,
373 $aln->add_seq($query);
376 my $meta_obj = Bio
::Seq
::Meta
->new();
377 $meta_obj->named_meta('ss_cons', $self->meta);
378 $aln->consensus_meta($meta_obj);
386 Purpose : Get a list of residue positions (indices) for all identical
387 : or conserved residues in the query or sbjct sequence.
388 Example : @s_ind = $hsp->seq_inds('query', 'identical');
389 : @h_ind = $hsp->seq_inds('hit', 'conserved');
390 : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
391 Returns : List of integers
392 : May include ranges if collapse is true.
393 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query)
394 : ('sbjct' is synonymous with 'hit')
395 : class = 'identical' or 'conserved' or 'nomatch' or 'gap'
396 : (default = identical)
397 : (can be shortened to 'id' or 'cons')
399 : collapse = boolean, if true, consecutive positions are merged
400 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
401 : collapses to "1-5 7 9-11". This is useful for
402 : consolidating long lists. Default = no collapse.
406 See Also : L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
410 =head2 Inherited from Bio::SeqFeature::SimilarityPair
412 These methods come from Bio::SeqFeature::SimilarityPair
417 Usage : my $query = $hsp->query
418 Function: Returns a SeqFeature representing the query in the HSP
419 Returns : Bio::SeqFeature::Similarity
420 Args : [optional] new value to set
426 Usage : my $hit = $hsp->hit
427 Function: Returns a SeqFeature representing the hit in the HSP
428 Returns : Bio::SeqFeature::Similarity
429 Args : [optional] new value to set
435 Usage : $evalue = $obj->significance();
436 $obj->significance($evalue);
437 Function: Get/Set the significance value
439 Args : [optional] new value to set
445 Usage : my $score = $hsp->score();
446 Function: Returns the score for this HSP or undef
448 Args : [optional] numeric to set value
455 Usage : my $bits = $hsp->bits();
456 Function: Returns the bit value for this HSP or undef
462 =head1 ModelHSP methods overridden in ModelHSP
464 The following methods have been overridden due to their current reliance on
465 sequence-based queries. They may be implemented in future versions of this class.
467 =head2 frac_identical
473 $self->warn('$hsp->seq_inds not implemented for Model-based searches');
477 =head2 frac_identical
483 $self->warn('$hsp->frac_identical not implemented for Model-based searches');
487 =head2 frac_conserved
493 $self->warn('$hsp->frac_conserved not implemented for Model-based searches');
503 $self->warn('$hsp->matches not implemented for Model-based searches');
513 $self->warn('$hsp->num_conserved not implemented for Model-based searches');
523 $self->warn('$hsp->num_identical not implemented for Model-based searches');
534 $self->warn('$hsp->cigar_string not implemented for Model-based searches');
538 =head2 generate_cigar_string
542 sub generate_cigar_string
{
544 $self->warn('$hsp->generate_cigar_string not implemented for Model-based searches');
548 =head2 percent_identity
552 sub percent_identity
{
554 $self->warn('$hsp->percent_identity not implemented for Model-based searches');