2 # BioPerl module for Bio::Search::HSP::GenericHSP
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Search::HSP::GenericHSP - A "Generic" implementation of a High Scoring Pair
20 my $hsp = Bio::Search::HSP::GenericHSP->new( -algorithm => 'blastp',
24 $r_type = $hsp->algorithm;
28 $evalue = $hsp->evalue();
30 $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
32 $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
34 $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
36 $qseq = $hsp->query_string;
38 $hseq = $hsp->hit_string;
40 $homo_string = $hsp->homology_string;
42 $len = $hsp->length( ['query'|'hit'|'total'] );
44 $len = $hsp->length( ['query'|'hit'|'total'] );
48 # TODO: Describe how to configure a SearchIO stream so that it generates
53 This implementation is "Generic", meaning it is is suitable for
54 holding information about High Scoring pairs from most Search reports
55 such as BLAST and FastA. Specialized objects can be derived from
58 Unless you're writing a parser, you won't ever need to create a
59 GenericHSP or any other HSPI-implementing object. If you use
60 the SearchIO system, HSPI objects are created automatically from
61 a SearchIO stream which returns Bio::Search::Result::ResultI objects
62 and you get the HSPI objects via the ResultI API.
64 For documentation on what you can do with GenericHSP (and other HSPI
65 objects), please see the API documentation in
66 L<Bio::Search::HSP::HSPI|Bio::Search::HSP::HSPI>.
72 User feedback is an integral part of the evolution of this and other
73 Bioperl modules. Send your comments and suggestions preferably to
74 the Bioperl mailing list. Your participation is much appreciated.
76 bioperl-l@bioperl.org - General discussion
77 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
81 Please direct usage questions or support issues to the mailing list:
83 I<bioperl-l@bioperl.org>
85 rather than to the module maintainer directly. Many experienced and
86 reponsive experts will be able look at the problem and quickly
87 address it. Please include a thorough description of the problem
88 with code and data examples if at all possible.
92 Report bugs to the Bioperl bug tracking system to help us keep track
93 of the bugs and their resolution. Bug reports can be submitted via the
96 https://redmine.open-bio.org/projects/bioperl/
98 =head1 AUTHOR - Jason Stajich and Steve Chervitz
100 Email jason-at-bioperl.org
101 Email sac-at-bioperl.org
105 Sendu Bala, bix@sendu.me.uk
109 The rest of the documentation details each of the object methods.
110 Internal methods are usually preceded with a _
114 # Let the code begin...
116 package Bio
::Search
::HSP
::GenericHSP
;
120 use Bio
::SeqFeature
::Similarity
;
122 use base
qw(Bio::Search::HSP::HSPI);
127 Usage : my $obj = Bio::Search::HSP::GenericHSP->new();
128 Function: Builds a new Bio::Search::HSP::GenericHSP object
129 Returns : Bio::Search::HSP::GenericHSP
130 Args : -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc)
133 -bits => bit value for HSP
134 -score => score value for HSP (typically z-score but depends on
136 -hsp_length=> Length of the HSP (including gaps)
137 -identical => # of residues that that matched identically
138 -percent_identity => (optional) percent identity
139 -conserved => # of residues that matched conservatively
140 (only protein comparisions;
141 conserved == identical in nucleotide comparisons)
142 -hsp_gaps => # of gaps in the HSP
143 -query_gaps => # of gaps in the query in the alignment
144 -hit_gaps => # of gaps in the subject in the alignment
145 -query_name => HSP Query sequence name (if available)
146 -query_start => HSP Query start (in original query sequence coords)
147 -query_end => HSP Query end (in original query sequence coords)
148 -query_length=> total length of the query sequence
149 -query_seq => query sequence portion of the HSP
150 -query_desc => textual description of the query
151 -hit_name => HSP Hit sequence name (if available)
152 -hit_start => HSP Hit start (in original hit sequence coords)
153 -hit_end => HSP Hit end (in original hit sequence coords)
154 -hit_length => total length of the hit sequence
155 -hit_seq => hit sequence portion of the HSP
156 -hit_desc => textual description of the hit
157 -homology_seq=> homology sequence for the HSP
158 -hit_frame => hit frame (only if hit is translated protein)
159 -query_frame => query frame (only if query is translated protein)
161 -links => HSP links information (WU-BLAST only)
162 -hsp_group => HSP Group informat (WU-BLAST only)
163 -gap_symbol => symbol representing a gap (default = '-')
164 -hit_features=> string of features found in or near HSP hit
165 region (reported in some BLAST text output,
167 -stranded => If the algorithm isn't known (i.e. defaults to
168 'generic'), setting this will indicate start/end
169 coordinates are to be used to determine the strand
170 for 'query', 'subject', 'hit', 'both', or 'none'
176 my($class,%args) = @_;
178 # don't pass anything to SUPER; complex hierarchy results in lots of work
181 my $self = $class->SUPER::new
();
183 # for speed, don't use _rearrange and just store all input data directly
184 # with no method calls and no work done. work can be carried
185 # out just-in-time later if desired
186 while (my ($arg, $value) = each %args) {
187 $arg =~ tr/a-z\055/A-Z/d;
188 $self->{$arg} = $value;
190 my $bits = $self->{BITS
};
192 defined $self->{VERBOSE
} && $self->verbose($self->{VERBOSE
});
193 if (exists $self->{GAP_SYMBOL
}) {
194 # not checking anything else but the length (must be 1 as only one gap
195 # symbol allowed currently); can add in support for symbol checks or
196 # multiple symbols later if needed
197 $self->throw("Gap symbol must be of length 1") if
198 CORE
::length($self->{GAP_SYMBOL
}) != 1;
201 $self->{GAP_SYMBOL
} = '-';
203 $self->{ALGORITHM
} ||= 'GENERIC';
204 $self->{STRANDED
} ||= 'NONE';
206 if (! defined $self->{QUERY_LENGTH
} || ! defined $self->{HIT_LENGTH
}) {
207 $self->throw("Must define hit and query length");
210 $self->{'_sequenceschanged'} = 1;
212 $self->{_finished_new
} = 1;
216 sub _logical_length
{
217 my ($self, $type) = @_;
218 if (!defined($self->{_sbjct_offset
}) || !defined($self->{_query_offset
})) {
219 $self->_calculate_seq_offsets();
221 my $key = $type =~ /sbjct|hit|tot/i ?
'sbjct' : 'query';
223 my $offset = $self->{"_${key}_offset"};
224 return $self->length($type) / $offset ;
227 =head2 L<Bio::Search::HSP::HSPI> methods
229 Implementation of L<Bio::Search::HSP::HSPI> methods follow
234 Usage : my $r_type = $hsp->algorithm
235 Function: Obtain the name of the algorithm used to obtain the HSP
236 Returns : string (e.g., BLASTP)
237 Args : [optional] scalar string to set value
242 my ($self,$value) = @_;
243 my $previous = $self->{'ALGORITHM'};
244 if( defined $value || ! defined $previous ) {
245 $value = $previous = '' unless defined $value;
246 $self->{'ALGORITHM'} = $value;
255 Usage : my $pvalue = $hsp->pvalue();
256 Function: Returns the P-value for this HSP or undef
257 Returns : float or exponential (2e-10)
258 P-value is not defined with NCBI Blast2 reports.
259 Args : [optional] numeric to set value
264 my ($self,$value) = @_;
265 my $previous = $self->{'PVALUE'};
266 if( defined $value ) {
267 $self->{'PVALUE'} = $value;
275 Usage : my $evalue = $hsp->evalue();
276 Function: Returns the e-value for this HSP
277 Returns : float or exponential (2e-10)
278 Args : [optional] numeric to set value
283 my ($self,$value) = @_;
284 my $previous = $self->{'EVALUE'};
285 if( defined $value ) {
286 $self->{'EVALUE'} = $value;
291 =head2 frac_identical
293 Title : frac_identical
294 Usage : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
295 Function: Returns the fraction of identitical positions for this HSP
296 Returns : Float in range 0.0 -> 1.0
297 Args : arg 1: 'query' = num identical / length of query seq (without gaps)
298 'hit' = num identical / length of hit seq (without gaps)
299 synonyms: 'sbjct', 'subject'
300 'total' = num identical / length of alignment (with gaps)
303 arg 2: [optional] frac identical value to set for the type requested
304 Note : for translated sequences, this adjusts the length accordingly
309 my ($self, $type,$value) = @_;
311 unless ($self->{_did_prefrac
}) {
315 $type = lc $type if defined $type;
316 $type = 'hit' if( defined $type &&
317 $type =~ /subject|sbjct/);
318 $type = 'total' if( ! defined $type || $type eq 'hsp' ||
319 $type !~ /query|hit|subject|sbjct|total/);
320 my $previous = $self->{'_frac_identical'}->{$type};
321 if( defined $value || ! defined $previous ) {
322 $value = $previous = '' unless defined $value;
323 if( $type eq 'hit' || $type eq 'query' ) {
324 $self->$type()->frac_identical( $value);
326 $self->{'_frac_identical'}->{$type} = $value;
332 =head2 frac_conserved
334 Title : frac_conserved
335 Usage : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
336 Function : Returns the fraction of conserved positions for this HSP.
337 This is the fraction of symbols in the alignment with a
339 Returns : Float in range 0.0 -> 1.0
340 Args : arg 1: 'query' = num conserved / length of query seq (without gaps)
341 'hit' = num conserved / length of hit seq (without gaps)
342 synonyms: 'sbjct', 'subject'
343 'total' = num conserved / length of alignment (with gaps)
346 arg 2: [optional] frac conserved value to set for the type requested
351 my ($self, $type,$value) = @_;
353 unless ($self->{_did_prefrac
}) {
357 $type = lc $type if defined $type;
358 $type = 'hit' if( defined $type && $type =~ /subject|sbjct/);
359 $type = 'total' if( ! defined $type || $type eq 'hsp' ||
360 $type !~ /query|hit|subject|sbjct|total/);
361 my $previous = $self->{'_frac_conserved'}->{$type};
362 if( defined $value || ! defined $previous ) {
363 $value = $previous = '' unless defined $value;
364 $self->{'_frac_conserved'}->{$type} = $value;
372 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
373 Function : Get the number of gap characters in the query, hit, or total alignment.
374 Returns : Integer, number of gaps or 0 if none
375 Args : arg 1: 'query' = num gap characters in query seq
376 'hit' = num gap characters in hit seq; synonyms: 'sbjct', 'subject'
377 'total' = num gap characters in whole alignment; synonyms: 'hsp'
379 arg 2: [optional] integer gap value to set for the type requested
384 my ($self, $type, $value) = @_;
386 unless ($self->{_did_pregaps
}) {
390 $type = lc $type if defined $type;
391 $type = 'total' if( ! defined $type || $type eq 'hsp' ||
392 $type !~ /query|hit|subject|sbjct|total/);
393 $type = 'hit' if $type =~ /sbjct|subject/;
394 my $previous = $self->{'_gaps'}->{$type};
395 if( defined $value || ! defined $previous ) {
396 $value = $previous = '' unless defined $value;
397 $self->{'_gaps'}->{$type} = $value;
399 return $previous || 0;
405 Usage : my $qseq = $hsp->query_string;
406 Function: Retrieves the query sequence of this HSP as a string
408 Args : [optional] string to set for query sequence
414 my ($self,$value) = @_;
415 my $previous = $self->{QUERY_SEQ
};
416 if( defined $value || ! defined $previous ) {
417 $value = $previous = '' unless defined $value;
418 $self->{QUERY_SEQ
} = $value;
419 # do some housekeeping so we know when to
420 # re-run _calculate_seq_positions
421 $self->{'_sequenceschanged'} = 1;
429 Usage : my $hseq = $hsp->hit_string;
430 Function: Retrieves the hit sequence of this HSP as a string
432 Args : [optional] string to set for hit sequence
438 my ($self,$value) = @_;
439 my $previous = $self->{HIT_SEQ
};
440 if( defined $value || ! defined $previous ) {
441 $value = $previous = '' unless defined $value;
442 $self->{HIT_SEQ
} = $value;
443 # do some housekeeping so we know when to
444 # re-run _calculate_seq_positions
445 $self->{'_sequenceschanged'} = 1;
450 =head2 homology_string
452 Title : homology_string
453 Usage : my $homo_string = $hsp->homology_string;
454 Function: Retrieves the homology sequence for this HSP as a string.
455 : The homology sequence is the string of symbols in between the
456 : query and hit sequences in the alignment indicating the degree
457 : of conservation (e.g., identical, similar, not similar).
459 Args : [optional] string to set for homology sequence
464 my ($self,$value) = @_;
465 my $previous = $self->{HOMOLOGY_SEQ
};
466 if( defined $value || ! defined $previous ) {
467 $value = $previous = '' unless defined $value;
468 $self->{HOMOLOGY_SEQ
} = $value;
469 # do some housekeeping so we know when to
470 # re-run _calculate_seq_positions
471 $self->{'_sequenceschanged'} = 1;
479 Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
480 Function : Returns the length of the query or hit in the alignment
482 or the aggregate length of the HSP (including gaps;
483 this may be greater than either hit or query )
485 Args : arg 1: 'query' = length of query seq (without gaps)
486 'hit' = length of hit seq (without gaps) (synonyms: sbjct, subject)
487 'total' = length of alignment (with gaps)
489 arg 2: [optional] integer length value to set for specific type
498 $type = 'total' unless defined $type;
501 if( $type =~ /^q/i ) {
502 return $self->query()->length(shift);
503 } elsif( $type =~ /^(hit|subject|sbjct)/ ) {
504 return $self->hit()->length(shift);
508 $self->{HSP_LENGTH
} = $v;
510 return $self->{HSP_LENGTH
};
512 return 0; # should never get here
518 Usage : my $len = $hsp->hsp_length()
519 Function: shortcut length('hsp')
520 Returns : floating point between 0 and 100
525 sub hsp_length
{ return shift->length('hsp', shift); }
527 =head2 percent_identity
529 Title : percent_identity
530 Usage : my $percentid = $hsp->percent_identity()
531 Function: Returns the calculated percent identity for an HSP
532 Returns : floating point between 0 and 100
538 sub percent_identity
{
541 unless ($self->{_did_prepi
}) {
545 return $self->SUPER::percent_identity
(@_);
551 Usage : my ($qframe, $hframe) = $hsp->frame('list',$queryframe,$subjectframe)
552 Function: Set the Frame for both query and subject and insure that
554 This overrides the frame() method implementation in
556 Returns : array of query and subject frame if return type wants an array
557 or query frame if defined or subject frame if not defined
558 Args : 'hit' or 'subject' or 'sbjct' to retrieve the frame of the subject (default)
559 'query' to retrieve the query frame
560 'list' or 'array' to retrieve both query and hit frames together
561 Note : Frames are stored in the GFF way (0-2) not 1-3
562 as they are in BLAST (negative frames are deduced by checking
563 the strand of the query or hit)
567 # Note: changed 4/19/08 - bug 2485
569 # frame() is supposed to be a getter/setter (as is implied by the Function desc
570 # above; this is also consistent with Bio::SeqFeature::SimilarityPair). Also,
571 # the API is not consistent with the other HSP/SimilarityPair methods such as
572 # strand(), start(), end(), etc.
574 # In order to make this consistent with other methods all work is now done
575 # when the features are instantiated and not delayed. We compromise by
576 # defaulting back 'to hit' w/o passed args. Setting is now allowed.
582 # unfortunately, w/o args we need to warn about API changes
583 $self->warn("API for frame() has changed.\n".
584 "Please refer to documentation for Bio::Search::HSP::GenericHSP;\n".
585 "returning query frame");
590 if( $val =~ /^q/i ) {
591 return $self->query->frame(@_);
592 } elsif( $val =~ /^hi|^s/i ) {
593 return $self->hit->frame(@_);
594 } elsif ( $val =~ /^list|array/i ) {
595 return ($self->query->frame($_[0]),
596 $self->hit->frame($_[1]) );
597 } elsif ( $val =~ /^\d+$/) {
598 # old API i.e. frame($query_frame, $hit_frame). This catches all but one
599 # case, where no arg is passed (so the hit is wanted).
600 $self->warn("API for frame() has changed.\n".
601 "Please refer to documentation for Bio::Search::HSP::GenericHSP");
603 return ($self->query->frame($val),
604 $self->hit->frame(@_) ) :
605 return $self->hit->frame($val,@_);
607 $self->warn("unrecognized component '$val' requested\n");
615 Usage : my $aln = $hsp->gel_aln
616 Function: Returns a L<Bio::SimpleAlign> object representing the HSP alignment
617 Returns : L<Bio::SimpleAlign>
624 require Bio
::LocatableSeq
;
625 require Bio
::SimpleAlign
;
627 my $aln = Bio
::SimpleAlign
->new();
628 my $hs = $self->hit_string();
629 my $qs = $self->query_string();
630 # FASTA specific stuff moved to the FastaHSP object
632 $seqonly =~ s/[\-\s]//g;
633 my ($q_nm,$s_nm) = ($self->query->seq_id(),
634 $self->hit->seq_id());
635 # Should we silently change the name of the query or hit if it isn't
636 # defined? May need revisiting... cjfields 2008-12-3 (commented out below)
638 #unless( defined $q_nm && CORE::length ($q_nm) ) {
641 #unless( defined $s_nm && CORE::length ($s_nm) ) {
645 # mapping: 1 residues for every x coordinate positions
646 my $query = Bio
::LocatableSeq
->new(
649 -start
=> $self->query->start,
650 -end
=> $self->query->end,
651 -strand
=> $self->query->strand,
652 -force_nse
=> $q_nm ?
0 : 1,
653 -mapping
=> [ 1, $self->{_query_mapping
} ]
656 $seqonly =~ s/[\-\s]//g;
657 my $hit = Bio
::LocatableSeq
->new(
660 -start
=> $self->hit->start,
661 -end
=> $self->hit->end,
662 -strand
=> $self->hit->strand,
663 -force_nse
=> $s_nm ?
0 : 1,
664 -mapping
=> [ 1, $self->{_hit_mapping
} ]
666 $aln->add_seq($query);
673 Title : num_conserved
674 Usage : $obj->num_conserved($newval)
675 Function: returns the number of conserved residues in the alignment
677 Args : integer (optional)
683 my ($self,$value) = @_;
685 unless ($self->{_did_presimilar
}) {
686 $self->_pre_similar_stats;
689 if (defined $value) {
690 $self->{CONSERVED
} = $value;
692 return $self->{CONSERVED
};
697 Title : num_identical
698 Usage : $obj->num_identical($newval)
699 Function: returns the number of identical residues in the alignment
701 Args : integer (optional)
707 my ($self,$value) = @_;
709 unless ($self->{_did_presimilar
}) {
710 $self->_pre_similar_stats;
713 if( defined $value) {
714 $self->{IDENTICAL
} = $value;
716 return $self->{IDENTICAL
};
721 Usage : $hsp->rank( [string] );
722 Purpose : Get the rank of the HSP within a given Blast hit.
723 Example : $rank = $hsp->rank;
724 Returns : Integer (1..n) corresponding to the order in which the HSP
725 appears in the BLAST report.
730 my ($self,$value) = @_;
731 if( defined $value) {
732 $self->{RANK
} = $value;
734 return $self->{RANK
};
740 Purpose : Get a list of residue positions (indices) for all identical
741 : or conserved residues in the query or sbjct sequence.
742 Example : @s_ind = $hsp->seq_inds('query', 'identical');
743 : @h_ind = $hsp->seq_inds('hit', 'conserved');
744 : @h_ind = $hsp->seq_inds('hit', 'conserved-not-identical');
745 : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
746 Returns : List of integers
747 : May include ranges if collapse is true.
748 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query)
749 : ('sbjct' is synonymous with 'hit')
750 : class = 'identical' - identical positions
751 : 'conserved' - conserved positions
752 : 'nomatch' - mismatched residue or gap positions
753 : 'mismatch' - mismatched residue positions (no gaps)
754 : 'gap' - gap positions only
755 : 'frameshift'- frameshift positions only
756 : 'conserved-not-identical' - conserved positions w/o
758 : The name can be shortened to 'id' or 'cons' unless
759 : the name is . The default value is
762 : collapse = boolean, if true, consecutive positions are merged
763 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
764 : collapses to "1-5 7 9-11". This is useful for
765 : consolidating long lists. Default = no collapse.
768 Comments : For HSPs partially or completely derived from translated sequences
769 : (TBLASTN, BLASTX, TBLASTX, or similar), some positional data
770 : cannot easily be attributed to a single position (i.e. the
771 : positional data is ambiguous). In these cases all three codon
772 : positions are reported. Under these conditions you can check
773 : ambiguous_seq_inds() to determine whether the query, subject,
774 : or both are ambiguous.
776 See Also : L<Bio::Search::SearchUtils::collapse_nums()|Bio::Search::SearchUtils>,
777 L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
782 my ($self, $seqType, $class, $collapse) = @_;
784 # prepare the internal structures - this is cached so
785 # if the strings have not changed we're okay
786 $self->_calculate_seq_positions();
788 $seqType ||= 'query';
789 $class ||= 'identical';
791 $seqType = 'sbjct' if $seqType eq 'hit';
792 my $t = lc(substr($seqType,0,1));
795 } elsif ( $t eq 's' || $t eq 'h' ) {
798 $self->warn("unknown seqtype $seqType using 'query'");
802 $t = lc(substr($class,0,1));
805 if( $class =~ /conserved\-not\-identical/ ) {
806 $class = 'conserved';
808 $class = 'conservedall';
810 } elsif( $t eq 'i' ) {
811 $class = 'identical';
812 } elsif( $t eq 'n' ) {
814 } elsif( $t eq 'm' ) {
816 } elsif( $t eq 'g' ) {
818 } elsif( $t eq 'f' ) {
819 $class = 'frameshift';
821 $self->warn("unknown sequence class $class using 'identical'");
822 $class = 'identical';
825 ## Sensitive to member name changes.
826 $seqType = "_\L$seqType\E";
827 $class = "_\L$class\E";
830 if( $class eq '_gap' ) {
831 # this means that we are remapping the gap length that is stored
832 # in the hash (for example $self->{'_gapRes_query'} )
833 # so we'll return an array which has the values of the position of the
834 # of the gap (the key in the hash) + the gap length (value in the
835 # hash for this key - 1.
837 # changing this; since the index is the position prior to the insertion,
838 # repeat the position based on the number of gaps inserted
839 @ary = map { my @tmp;
840 # position holds number of gaps inserted
841 for my $g (1..$self->{seqinds
}{"${class}Res$seqType"}->{$_}) {
845 sort { $a <=> $b } keys %{ $self->{seqinds
}{"${class}Res$seqType"}};
846 } elsif( $class eq '_conservedall' ) {
847 @ary = sort { $a <=> $b }
848 keys %{ $self->{seqinds
}{"_conservedRes$seqType"}},
849 keys %{ $self->{seqinds
}{"_identicalRes$seqType"}},
851 @ary = sort { $a <=> $b } keys %{ $self->{seqinds
}{"${class}Res$seqType"}};
853 require Bio
::Search
::BlastUtils
if $collapse;
855 return $collapse ?
&Bio
::Search
::SearchUtils
::collapse_nums
(@ary) : @ary;
858 =head2 ambiguous_seq_inds
860 Title : ambiguous_seq_inds
861 Purpose : returns a string denoting whether sequence indices for query,
862 : subject, or both are ambiguous
863 Returns : String; 'query', 'subject', 'query/subject', or empty string ''
865 Comments : For HSPs partially or completely derived from translated sequences
866 : (TBLASTN, BLASTX, TBLASTX, or similar), some positional data
867 : cannot easily be attributed to a single position (i.e. the
868 : positional data is ambiguous). In these cases all three codon
869 : positions are reported when using seq_inds(). Under these
870 : conditions you can check ambiguous_seq_inds() to determine whether
871 : the query, subject, or both are ambiguous.
872 See Also : L<Bio::Search::Hit::HSPI::seq_inds()>
876 sub ambiguous_seq_inds
{
878 $self->_calculate_seq_positions();
879 my $type = ($self->{_query_offset
} == 3 && $self->{_sbjct_offset
} == 3) ?
881 ($self->{_query_offset
} == 3) ?
'query' :
882 ($self->{_sbjct_offset
} == 3) ?
'subject' : '';
886 =head2 Inherited from L<Bio::SeqFeature::SimilarityPair>
888 These methods come from L<Bio::SeqFeature::SimilarityPair>
893 Usage : my $query = $hsp->query
894 Function: Returns a SeqFeature representing the query in the HSP
895 Returns : L<Bio::SeqFeature::Similarity>
896 Args : [optional] new value to set
902 unless ($self->{_created_qff
}) {
903 $self->_query_seq_feature;
905 return $self->SUPER::query
(@_);
910 if (! $self->{_finished_new
} || $self->{_making_qff
}) {
911 return $self->{_sim1
} if $self->{_sim1
};
912 $self->{_sim1
} = Bio
::SeqFeature
::Similarity
->new();
913 return $self->{_sim1
};
915 unless ($self->{_created_qff
}) {
916 $self->_query_seq_feature;
918 return $self->SUPER::feature1
(@_);
924 Usage : my $hit = $hsp->hit
925 Function: Returns a SeqFeature representing the hit in the HSP
926 Returns : L<Bio::SeqFeature::Similarity>
927 Args : [optional] new value to set
933 unless ($self->{_created_sff
}) {
934 $self->_subject_seq_feature;
936 return $self->SUPER::hit
(@_);
941 if (! $self->{_finished_new
} || $self->{_making_sff
}) {
942 return $self->{_sim2
} if $self->{_sim2
};
943 $self->{_sim2
} = Bio
::SeqFeature
::Similarity
->new();
944 return $self->{_sim2
};
946 unless ($self->{_created_sff
}) {
947 $self->_subject_seq_feature;
949 return $self->SUPER::feature2
(@_);
955 Usage : $evalue = $obj->significance();
956 $obj->significance($evalue);
957 Function: Get/Set the significance value
959 Args : [optional] new value to set
963 # Override significance to return the e-value or, if this is
964 # not defined (WU-BLAST), return the p-value.
966 my ($self, $val) = @_;
967 if (!defined $self->{SIGNIFICANCE
} || defined $val) {
968 $self->{SIGNIFICANCE
} = defined $val ?
$val :
969 defined $self->evalue ?
$self->evalue :
970 defined $self->pvalue ?
$$self->pvalue :
972 $self->query->significance($self->{SIGNIFICANCE
});
974 return $self->{SIGNIFICANCE
};
980 Usage : $hsp->strand('query')
981 Function: Retrieves the strand for the HSP component requested
982 Returns : +1 or -1 (0 if unknown)
983 Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject,
984 'query' to retrieve the query strand (default)
989 my ($self,$type) = @_;
991 if( $type =~ /^q/i && defined $self->{'QUERY_STRAND'} ) {
992 return $self->{'QUERY_STRAND'};
993 } elsif( $type =~ /^(hit|subject|sbjct)/i && defined $self->{'HIT_STRAND'} ) {
994 return $self->{'HIT_STRAND'};
997 return $self->SUPER::strand
($type)
1003 Usage : $score = $obj->score();
1004 $obj->score($value);
1005 Function: Get/Set the score value
1007 Args : [optional] new value to set
1012 Usage : $bits = $obj->bits();
1014 Function: Get/Set the bits value
1016 Args : [optional] new value to set
1018 =head1 Private methods
1022 =head2 _calculate_seq_positions
1024 Title : _calculate_seq_positions
1025 Usage : $self->_calculate_seq_positions
1026 Function: Internal function
1033 sub _calculate_seq_positions
{
1034 my ($self,@args) = @_;
1035 return unless ( $self->{'_sequenceschanged'} );
1036 $self->{'_sequenceschanged'} = 0;
1037 my ($seqString, $qseq,$sseq) = ( $self->homology_string(),
1038 $self->query_string(),
1039 $self->hit_string() );
1040 my ($mlen, $qlen, $slen) = (CORE
::length($seqString), CORE
::length($qseq), CORE
::length($sseq));
1041 my $qdir = $self->query->strand || 1;
1042 my $sdir = $self->hit->strand || 1;
1043 my ($resCount_query, $endpoint_query) = ($qdir <=0) ?
($self->query->end, $self->query->start)
1044 : ($self->query->start, $self->query->end);
1045 my ($resCount_sbjct, $endpoint_sbjct) = ($sdir <=0) ?
($self->hit->end, $self->hit->start)
1046 : ($self->hit->start, $self->hit->end);
1048 my $prog = $self->algorithm;
1050 if( $prog =~ /FAST|SSEARCH|SMITH-WATERMAN/i ) {
1052 # we infer the end of the regional sequence where the first and last
1053 # non spaces are in the homology string
1054 # then we use the HSP->length to tell us how far to read
1055 # to cut off the end of the sequence
1057 my ($start, $rest) = (0,0);
1058 if( $seqString =~ /^(\s+)?(.*?)\s*$/ ) {
1059 ($start, $rest) = ($1 ? CORE
::length($1) : 0, CORE
::length($2));
1062 $seqString = substr($seqString, $start, $rest);
1063 $qseq = substr($qseq, $start, $rest);
1064 $sseq = substr($sseq, $start, $rest);
1066 # commented out 10/29/08
1067 # removing frameshift symbols doesn't take into account the following:
1068 # 1) does not remove the same point in the homology string (get
1069 # positional errors)
1070 # 2) adjustments to the overall position in the string due to the
1071 # frameshift must be taken into consideration (get balancing errors)
1073 # Frameshifts will be handled directly in the main loop.
1076 # fasta reports some extra 'regional' sequence information
1077 # we need to clear out first
1078 # this seemed a bit insane to me at first, but it appears to
1081 #$qseq =~ s![\\\/]!!g;
1082 #$sseq =~ s![\\\/]!!g;
1085 if (!defined($self->{_sbjct_offset
}) || !defined($self->{_query_offset
})) {
1086 $self->_calculate_seq_offsets();
1089 my ($qfs, $sfs) = (0,0);
1091 for my $pos (0..CORE
::length($seqString)-1) {
1092 my @qrange = (0 - $qfs)..($self->{_query_offset
} - 1);
1093 my @srange = (0 - $sfs)..($self->{_sbjct_offset
} - 1);
1094 # $self->debug("QRange:@qrange SRange:@srange\n") if ($qfs || $sfs);
1095 ($qfs, $sfs) = (0,0);
1096 my ($mchar, $qchar, $schar) = (
1097 unpack("x$pos A1",$seqString) || ' ',
1098 $pos < CORE
::length($qseq) ?
unpack("x$pos A1",$qseq) : '-',
1099 $pos < CORE
::length($sseq) ?
unpack("x$pos A1",$sseq) : '-'
1102 my ($qgap, $sgap) = (0,0);
1103 if( $mchar eq '+' || $mchar eq '.') { # conserved
1104 $self->{seqinds
}{_conservedRes_query
}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange;
1105 $self->{seqinds
}{_conservedRes_sbjct
}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange;
1106 $matchtype = 'conserved';
1107 } elsif( $mchar eq ':' || $mchar ne ' ' ) { # identical
1108 $self->{seqinds
}{_identicalRes_query
}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange;
1109 $self->{seqinds
}{_identicalRes_sbjct
}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange;
1110 $matchtype = 'identical';
1111 } elsif( $mchar eq ' ' ) { # possible mismatch/nomatch/frameshift
1112 $qfs = $qchar eq '/' ?
-1 : # base inserted to match frame
1113 $qchar eq '\\' ?
1 : # base deleted to match frame
1115 $sfs = $schar eq '/' ?
-1 :
1116 $schar eq '\\' ?
1 :
1119 # Frameshifts are tricky.
1121 # '/' indicates that the next residue is shifted back one
1122 # (-1) frame position and is a deletion of one base (so to
1123 # correctly align, a base is inserted). That residue should only
1124 # occupy two sequence positions instead of three.
1126 # '\' indicates that the next residue is shifted forward
1127 # one (+1) frame position and is an insertion of one base (so to
1128 # correctly align, a base is removed). That residue should
1129 # occupy four sequence positions instead of three.
1131 # Note that gaps are not counted across from frameshifts.
1132 # Frameshift indices are a range of positions starting in the
1133 # previous sequence position in which the frameshift occurs;
1134 # they are ambiguous by nature.
1135 $self->{seqinds
}{_frameshiftRes_query
}{ $resCount_query - ($_ * $qdir * $qfs) } = $qfs for @qrange;
1136 $matchtype = "$qfs frameshift-query";
1140 $self->{seqinds
}{_frameshiftRes_sbjct
}{ $resCount_sbjct - ($_ * $sdir * $sfs) } = $sfs for @srange;
1141 $matchtype = "$sfs frameshift-sbcjt";
1144 elsif ($qchar eq $self->{GAP_SYMBOL
}) {
1145 # gap are counted as being in the immediately preceeding residue
1146 # position; for translated sequences this is not the start of
1147 # the previous codon, but the third codon position
1148 $self->{seqinds
}{_gapRes_query
}{ $resCount_query - $qdir }++ for @qrange;
1149 $matchtype = 'gap-query';
1152 elsif ($schar eq $self->{GAP_SYMBOL
}) {
1153 $self->{seqinds
}{_gapRes_sbjct
}{ $resCount_sbjct - $sdir }++ for @srange;
1154 $matchtype = 'gap-sbjct';
1158 # if not a gap or frameshift in either seq, must be mismatch
1159 $self->{seqinds
}{_mismatchRes_query
}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange;
1160 $self->{seqinds
}{_mismatchRes_sbjct
}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange;
1161 $matchtype = 'mismatch';
1163 # always add a nomatch unless the current position in the seq is a gap
1165 $self->{seqinds
}{_nomatchRes_sbjct
}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange;
1168 $self->{seqinds
}{_nomatchRes_query
}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange;
1171 $self->warn("Unknown midline character: [$mchar]");
1173 # leave in and uncomment for future debugging
1174 #$self->debug(sprintf("%7d %1s[%1s]%1s %-7d Type: %-20s QOff:%-2d SOff:%-2d\n",
1181 # ($self->{_query_offset} * $qdir),
1182 # ($self->{_sbjct_offset} * $sdir)));
1183 $resCount_query += ($qdir * (scalar(@qrange) + $qfs)) if !$qgap;
1184 $resCount_sbjct += ($sdir * (scalar(@srange) + $sfs)) if !$sgap;
1189 sub _calculate_seq_offsets
{
1191 my $prog = $self->algorithm;
1192 ($self->{_sbjct_offset
}, $self->{_query_offset
}) = (1,1);
1193 if($prog =~ /^(?:PSI)?T(BLAST|FAST)(N|X|Y)/oi ) {
1194 $self->{_sbjct_offset
} = 3;
1195 if ($1 eq 'BLAST' && $2 eq 'X') { #TBLASTX
1196 $self->{_query_offset
} = 3;
1198 } elsif($prog =~ /^(BLAST|FAST)(X|Y|XY)/oi ) {
1199 $self->{_query_offset
} = 3;
1206 See documentation in L<Bio::Search::HSP::HSPI::n()|Bio::Search::HSP::HSPI>
1212 if(@_) { $self->{'N'} = shift; }
1213 # note that returning 1 is completely an assumption
1214 defined $self->{'N'} ?
$self->{'N'} : 1;
1219 See documentation in L<Bio::Search::HSP::HSPI::range()|Bio::Search::HSP::HSPI>
1224 my ($self, $seqType) = @_;
1226 $seqType ||= 'query';
1227 $seqType = 'sbjct' if $seqType eq 'hit';
1230 if( $seqType eq 'query' ) {
1231 $start = $self->query->start;
1232 $end = $self->query->end;
1235 $start = $self->hit->start;
1236 $end = $self->hit->end;
1238 return ($start, $end);
1245 Usage : $obj->links($newval)
1246 Function: Get/Set the Links value (from WU-BLAST)
1247 Indicates the placement of the alignment in the group of HSPs
1248 Returns : Value of links
1249 Args : On set, new value (a scalar or undef, optional)
1257 return $self->{LINKS
} = shift if @_;
1258 return $self->{LINKS
};
1264 Usage : $obj->hsp_group($newval)
1265 Function: Get/Set the Group value (from WU-BLAST)
1266 Indicates a grouping of HSPs
1267 Returns : Value of group
1268 Args : On set, new value (a scalar or undef, optional)
1276 return $self->{HSP_GROUP
} = shift if @_;
1277 return $self->{HSP_GROUP
};
1282 Title : hit_features
1283 Usage : $obj->hit_features($newval)
1284 Function: Get/Set the HSP hit feature string (from some BLAST 2.2.13 text
1285 output), which is a string of overlapping or nearby features in HSP
1287 Returns : Value of hit features, if present
1288 Args : On set, new value (a scalar or undef, optional)
1296 return $self->{HIT_FEATURES
} = shift if @_;
1297 return $self->{HIT_FEATURES
};
1300 # The cigar string code is written by Juguang Xiao <juguang@fugu-sg.org>
1302 =head1 Brief introduction on cigar string
1304 NOTE: the concept is originally from EnsEMBL docs at
1305 http://may2005.archive.ensembl.org/Docs/wiki/html/EnsemblDocs/CigarFormat.html
1306 Please append to these docs if you have a better definition.
1308 Sequence alignment hits can be stored in a database as ungapped alignments.
1309 This imposes 2 major constraints on alignments:
1311 a) alignments for a single hit record require multiple rows in the database,
1313 b) it is not possible to accurately retrieve the exact original alignment.
1315 Alternatively, sequence alignments can be stored as gapped alignments using
1316 the CIGAR line format (where CIGAR stands for Concise Idiosyncratic Gapped
1319 In the cigar line format alignments are stored as follows:
1325 An example of an alignment for a hypthetical protein match is shown below:
1328 Query: 42 PGPAGLP----GSVGLQGPRGLRGPLP-GPLGPPL...
1332 Sbjct: 1672 PGTP*TPLVPLGPWVPLGPSSPR--LPSGPLGPTD...
1335 protein_align_feature table as the following cigar line:
1342 Usage: $cigar_string = $hsp->cigar_string
1343 Function: Generate and return cigar string for this HSP alignment
1344 Args: No input needed
1345 Return: a cigar string
1351 my ($self, $arg) = @_;
1352 $self->warn("this is not a setter") if(defined $arg);
1354 unless(defined $self->{_cigar_string
}){ # generate cigar string
1355 my $cigar_string = $self->generate_cigar_string($self->query_string, $self->hit_string);
1356 $self->{_cigar_string
} = $cigar_string;
1359 return $self->{_cigar_string
};
1362 =head2 generate_cigar_string
1364 Name: generate_cigar_string
1365 Usage: my $cigar_string = Bio::Search::HSP::GenericHSP::generate_cigar_string ($qstr, $hstr);
1366 Function: generate cigar string from a simple sequence of alignment.
1367 Args: the string of query and subject
1368 Return: cigar string
1372 sub generate_cigar_string
{
1373 my ($self, $qstr, $hstr) = @_;
1374 my @qchars = split //, $qstr;
1375 my @hchars = split //, $hstr;
1377 unless(scalar(@qchars) == scalar(@hchars)){
1378 $self->throw("two sequences are not equal in lengths");
1381 $self->{_count_for_cigar_string
} = 0;
1382 $self->{_state_for_cigar_string
} = 'M';
1384 my $cigar_string = '';
1385 for(my $i=0; $i <= $#qchars; $i++){
1386 my $qchar = $qchars[$i];
1387 my $hchar = $hchars[$i];
1388 if($qchar ne $self->{GAP_SYMBOL
} && $hchar ne $self->{GAP_SYMBOL
}){ # Match
1389 $cigar_string .= $self->_sub_cigar_string('M');
1390 }elsif($qchar eq $self->{GAP_SYMBOL
}){ # Deletion
1391 $cigar_string .= $self->_sub_cigar_string('D');
1392 }elsif($hchar eq $self->{GAP_SYMBOL
}){ # Insertion
1393 $cigar_string .= $self->_sub_cigar_string('I');
1395 $self->throw("Impossible state that 2 gaps on each seq aligned");
1398 $cigar_string .= $self->_sub_cigar_string('X'); # not forget the tail.
1399 return $cigar_string;
1402 # an internal method to help generate cigar string
1404 sub _sub_cigar_string
{
1405 my ($self, $new_state) = @_;
1407 my $sub_cigar_string = '';
1408 if($self->{_state_for_cigar_string
} eq $new_state){
1409 $self->{_count_for_cigar_string
} += 1; # Remain the state and increase the counter
1411 $sub_cigar_string .= $self->{_count_for_cigar_string
}
1412 unless $self->{_count_for_cigar_string
} == 1;
1413 $sub_cigar_string .= $self->{_state_for_cigar_string
};
1414 $self->{_count_for_cigar_string
} = 1;
1415 $self->{_state_for_cigar_string
} = $new_state;
1417 return $sub_cigar_string;
1420 # needed before seqfeatures can be made
1421 sub _pre_seq_feature
{
1423 my $algo = $self->{ALGORITHM
};
1425 my ($queryfactor, $hitfactor) = (0,0);
1426 my ($hitmap, $querymap) = (1,1);
1427 if( $algo =~ /^(?:PSI)?T(?:BLAST|FAST|SW)[NY]/oi ) {
1431 elsif ($algo =~ /^(?:FAST|BLAST)(?:X|Y|XY)/oi || $algo =~ /^P?GENEWISE/oi ) {
1435 elsif ($algo =~ /^T(BLAST|FAST|SW)(X|Y|XY)/oi || $algo =~ /^(BLAST|FAST|SW)N/oi || $algo =~ /^WABA|AXT|BLAT|BLASTZ|PSL|MEGABLAST|EXONERATE|SW|SSEARCH|SMITH\-WATERMAN|SIM4$/){
1437 $hitmap = $querymap = 3;
1442 elsif ($algo =~ /^RPS-BLAST/) {
1443 if ($algo =~ /^RPS-BLAST\(BLASTX\)/) {
1450 my $stranded = lc substr($self->{STRANDED
}, 0,1);
1451 $queryfactor = ($stranded eq 'q' || $stranded eq 'b') ?
1 : 0;
1452 $hitfactor = ($stranded eq 'h' || $stranded eq 's' || $stranded eq 'b') ?
1 : 0;
1454 $self->{_query_factor
} = $queryfactor;
1455 $self->{_hit_factor
} = $hitfactor;
1456 $self->{_hit_mapping
} = $hitmap;
1457 $self->{_query_mapping
} = $querymap;
1460 # make query seq feature
1461 sub _query_seq_feature
{
1463 $self->{_making_qff
} = 1;
1464 my $qs = $self->{QUERY_START
};
1465 my $qe = $self->{QUERY_END
};
1466 unless (defined $self->{_query_factor
}) {
1467 $self->_pre_seq_feature;
1469 my $queryfactor = $self->{_query_factor
};
1471 unless( defined $qe && defined $qs ) { $self->throw("Did not specify a Query End or Query Begin"); }
1474 if ($qe > $qs) { # normal query: start < end
1489 ($qs,$qe) = ($qe,$qs);
1492 # Note: many of these data are not query- and hit-specific.
1493 # Only start, end, name, length are.
1494 # We could be more efficient by only storing this info once.
1495 # steve chervitz --- Sat Apr 5 00:55:07 2003
1497 my $sim1 = $self->{_sim1
} || Bio
::SeqFeature
::Similarity
->new();
1500 $sim1->significance($self->{EVALUE
});
1501 $sim1->bits($self->{BITS
});
1502 $sim1->score($self->{SCORE
});
1503 $sim1->strand($strand);
1504 $sim1->seq_id($self->{QUERY_NAME
});
1505 $sim1->seqlength($self->{QUERY_LENGTH
});
1506 $sim1->source_tag($self->{ALGORITHM
});
1507 $sim1->seqdesc($self->{QUERY_DESC
});
1508 $sim1->add_tag_value('meta', $self->{META
}) if $self->can('meta');
1509 # to determine frame from something like FASTXY which doesn't
1511 my $qframe = $self->{QUERY_FRAME
};
1513 if (defined $strand && !defined $qframe && $queryfactor) {
1514 $qframe = ( $qs % 3 ) * $strand;
1516 elsif (!defined $strand) {
1520 if( $qframe =~ /^([+-])?([0-3])/ ) {
1521 my $dir = $1 || '+';
1522 if($qframe && (($dir eq '-' && $strand >= 0) || ($dir eq '+' && $strand <= 0)) ) {
1523 $self->warn("Query frame ($qframe) did not match strand of query ($strand)");
1525 $qframe = $2 != 0 ?
$2 - 1 : $2;
1527 $self->warn("Unknown query frame ($qframe)");
1531 $sim1->frame($qframe);
1532 $self->SUPER::feature1
($sim1);
1534 $self->{_created_qff
} = 1;
1535 $self->{_making_qff
} = 0;
1538 # make subject seq feature
1539 sub _subject_seq_feature
{
1541 $self->{_making_sff
} = 1;
1542 my $hs = $self->{HIT_START
};
1543 my $he = $self->{HIT_END
};
1544 unless (defined $self->{_hit_factor
}) {
1545 $self->_pre_seq_feature;
1547 my $hitfactor = $self->{_hit_factor
};
1549 unless( defined $he && defined $hs ) { $self->throw("Did not specify a Hit End or Hit Begin"); }
1552 if ($he > $hs) { # normal subject
1567 ($hs,$he) = ( $he,$hs); # reverse subject: start bigger than end
1570 my $sim2 = $self->{_sim2
} || Bio
::SeqFeature
::Similarity
->new();
1573 $sim2->significance($self->{EVALUE
});
1574 $sim2->bits($self->{BITS
});
1575 $sim2->score($self->{SCORE
});
1576 $sim2->strand($strand);
1577 $sim2->seq_id($self->{HIT_NAME
});
1578 $sim2->seqlength($self->{HIT_LENGTH
});
1579 $sim2->source_tag($self->{ALGORITHM
});
1580 $sim2->seqdesc($self->{HIT_DESC
});
1581 $sim2->add_tag_value('meta', $self->{META
}) if $self->can('meta');
1582 my $hframe = $self->{HIT_FRAME
};
1584 if (defined $strand && !defined $hframe && $hitfactor) {
1585 $hframe = ( $hs % 3 ) * $strand;
1587 elsif (!defined $strand) {
1591 if( $hframe =~ /^([+-])?([0-3])/ ) {
1592 my $dir = $1 || '+';
1593 if($hframe && (($dir eq '-' && $strand >= 0) || ($dir eq '+' && $strand <= 0)) ) {
1594 $self->warn("Subject frame ($hframe) did not match strand of subject ($strand)");
1596 $hframe = $2 != 0 ?
$2 - 1 : $2;
1598 $self->warn("Unknown subject frame ($hframe)");
1602 $sim2->frame($hframe);
1603 $self->SUPER::feature2
($sim2);
1605 $self->{_created_sff
} = 1;
1606 $self->{_making_sff
} = 0;
1609 # before calling the num_* methods
1610 sub _pre_similar_stats
{
1612 my $identical = $self->{IDENTICAL
};
1613 my $conserved = $self->{CONSERVED
};
1614 my $percent_id = $self->{PERCENT_IDENTITY
};
1616 if (! defined $identical) {
1617 if (! defined $percent_id) {
1618 $self->warn("Did not defined the number of identical matches or overall percent identity in the HSP; assuming 0");
1622 $identical = sprintf("%.0f",$percent_id * $self->{HSP_LENGTH
});
1626 if (! defined $conserved) {
1627 $self->warn("Did not define the number of conserved matches in the HSP; assuming conserved == identical ($identical)")
1628 if( $self->{ALGORITHM
} !~ /^((FAST|BLAST)N)|EXONERATE|SIM4|AXT|PSL|BLAT|BLASTZ|WABA/oi);
1629 $conserved = $identical;
1631 $self->{IDENTICAL
} = $identical;
1632 $self->{CONSERVED
} = $conserved;
1633 $self->{_did_presimilar
} = 1;
1636 # before calling the frac_* methods
1641 my $hsp_len = $self->{HSP_LENGTH
};
1642 my $hit_len = $self->{HIT_LENGTH
};
1643 my $query_len = $self->{QUERY_LENGTH
};
1645 my $identical = $self->num_identical;
1646 my $conserved = $self->num_conserved;
1648 $self->{_did_prefrac
} = 1;
1651 $self->length('total', $hsp_len);
1652 $logical = $self->_logical_length('total');
1653 $self->frac_identical( 'total', $identical / $hsp_len);
1654 $self->frac_conserved( 'total', $conserved / $hsp_len);
1657 $logical = $self->_logical_length('hit');
1658 $self->frac_identical( 'hit', $identical / $logical);
1659 $self->frac_conserved( 'hit', $conserved / $logical);
1662 $logical = $self->_logical_length('query');
1663 $self->frac_identical( 'query', $identical / $logical) ;
1664 $self->frac_conserved( 'query', $conserved / $logical);
1668 # before calling gaps()
1669 # This relies first on passed parameters (parser-dependent), then on gaps
1670 # calculated by seq_inds() (if set), then falls back to directly checking
1671 # for '-' as a last resort
1675 my $query_gaps = $self->{QUERY_GAPS
};
1676 my $query_seq = $self->{QUERY_SEQ
};
1677 my $hit_gaps = $self->{HIT_GAPS
};
1678 my $hit_seq = $self->{HIT_SEQ
};
1679 my $gaps = $self->{HSP_GAPS
};
1681 $self->{_did_pregaps
} = 1; # well, we're in the process; avoid recursion
1682 if( defined $query_gaps ) {
1683 $self->gaps('query', $query_gaps);
1684 } elsif( defined $query_seq ) {
1685 my $qg = (defined $self->{'_query_offset'}) ?
$self->seq_inds('query','gaps') : scalar( $query_seq =~ tr/\-//);
1686 my $offset = $self->{'_query_offset'} || 1;
1687 $self->gaps('query', $qg/$offset);
1689 if( defined $hit_gaps ) {
1690 $self->gaps('hit', $hit_gaps);
1691 } elsif( defined $hit_seq ) {
1692 my $hg = (defined $self->{'_sbjct_offset'}) ?
$self->seq_inds('hit','gaps') : scalar( $hit_seq =~ tr/\-//);
1693 my $offset = $self->{'_sbjct_offset'} || 1;
1694 $self->gaps('hit', $hg/$offset);
1696 if( ! defined $gaps ) {
1697 $gaps = $self->gaps("query") + $self->gaps("hit");
1699 $self->gaps('total', $gaps);
1702 # before percent_identity
1705 $self->{_did_prepi
} = 1;
1706 $self->percent_identity($self->{PERCENT_IDENTITY
} || $self->frac_identical('total')*100) if( $self->{HSP_LENGTH
} > 0 );