3 # BioPerl module for Bio::Search::HSP::GenericHSP
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason@bioperl.org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::Search::HSP::GenericHSP - A "Generic" implementation of a High Scoring Pair
21 my $hsp = Bio::Search::HSP::GenericHSP->new( -algorithm => 'blastp',
25 $r_type = $hsp->algorithm;
29 $evalue = $hsp->evalue();
31 $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
33 $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
35 $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
37 $qseq = $hsp->query_string;
39 $hseq = $hsp->hit_string;
41 $homo_string = $hsp->homology_string;
43 $len = $hsp->length( ['query'|'hit'|'total'] );
45 $len = $hsp->length( ['query'|'hit'|'total'] );
49 # TODO: Describe how to configure a SearchIO stream so that it generates
54 This implementation is "Generic", meaning it is is suitable for
55 holding information about High Scoring pairs from most Search reports
56 such as BLAST and FastA. Specialized objects can be derived from
59 Unless you're writing a parser, you won't ever need to create a
60 GenericHSP or any other HSPI-implementing object. If you use
61 the SearchIO system, HSPI objects are created automatically from
62 a SearchIO stream which returns Bio::Search::Result::ResultI objects
63 and you get the HSPI objects via the ResultI API.
65 For documentation on what you can do with GenericHSP (and other HSPI
66 objects), please see the API documentation in
67 L<Bio::Search::HSP::HSPI|Bio::Search::HSP::HSPI>.
73 User feedback is an integral part of the evolution of this and other
74 Bioperl modules. Send your comments and suggestions preferably to
75 the Bioperl mailing list. Your participation is much appreciated.
77 bioperl-l@bioperl.org - General discussion
78 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
82 Please direct usage questions or support issues to the mailing list:
84 I<bioperl-l@bioperl.org>
86 rather than to the module maintainer directly. Many experienced and
87 reponsive experts will be able look at the problem and quickly
88 address it. Please include a thorough description of the problem
89 with code and data examples if at all possible.
93 Report bugs to the Bioperl bug tracking system to help us keep track
94 of the bugs and their resolution. Bug reports can be submitted via the
97 http://bugzilla.open-bio.org/
99 =head1 AUTHOR - Jason Stajich and Steve Chervitz
101 Email jason-at-bioperl.org
102 Email sac-at-bioperl.org
106 Sendu Bala, bix@sendu.me.uk
110 The rest of the documentation details each of the object methods.
111 Internal methods are usually preceded with a _
115 # Let the code begin...
117 package Bio
::Search
::HSP
::GenericHSP
;
121 use Bio
::SeqFeature
::Similarity
;
123 use base
qw(Bio::Search::HSP::HSPI);
128 Usage : my $obj = Bio::Search::HSP::GenericHSP->new();
129 Function: Builds a new Bio::Search::HSP::GenericHSP object
130 Returns : Bio::Search::HSP::GenericHSP
131 Args : -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc)
134 -bits => bit value for HSP
135 -score => score value for HSP (typically z-score but depends on
137 -hsp_length=> Length of the HSP (including gaps)
138 -identical => # of residues that that matched identically
139 -percent_identity => (optional) percent identity
140 -conserved => # of residues that matched conservatively
141 (only protein comparisions;
142 conserved == identical in nucleotide comparisons)
143 -hsp_gaps => # of gaps in the HSP
144 -query_gaps => # of gaps in the query in the alignment
145 -hit_gaps => # of gaps in the subject in the alignment
146 -query_name => HSP Query sequence name (if available)
147 -query_start => HSP Query start (in original query sequence coords)
148 -query_end => HSP Query end (in original query sequence coords)
149 -query_length=> total length of the query sequence
150 -query_seq => query sequence portion of the HSP
151 -query_desc => textual description of the query
152 -hit_name => HSP Hit sequence name (if available)
153 -hit_start => HSP Hit start (in original hit sequence coords)
154 -hit_end => HSP Hit end (in original hit sequence coords)
155 -hit_length => total length of the hit sequence
156 -hit_seq => hit sequence portion of the HSP
157 -hit_desc => textual description of the hit
158 -homology_seq=> homology sequence for the HSP
159 -hit_frame => hit frame (only if hit is translated protein)
160 -query_frame => query frame (only if query is translated protein)
162 -links => HSP links information (WU-BLAST only)
163 -hsp_group => HSP Group informat (WU-BLAST only)
164 -gap_symbol => symbol representing a gap (default = '-')
165 -hit_features=> string of features found in or near HSP hit
166 region (reported in some BLAST text output,
168 -stranded => If the algorithm isn't known (i.e. defaults to
169 'generic'), setting this will indicate start/end
170 coordinates are to be used to determine the strand
171 for 'query', 'subject', 'hit', 'both', or 'none'
177 my($class,%args) = @_;
179 # don't pass anything to SUPER; complex hierarchy results in lots of work
182 my $self = $class->SUPER::new
();
184 # for speed, don't use _rearrange and just store all input data directly
185 # with no method calls and no work done. work can be carried
186 # out just-in-time later if desired
187 while (my ($arg, $value) = each %args) {
188 $arg =~ tr/a-z\055/A-Z/d;
189 $self->{$arg} = $value;
191 my $bits = $self->{BITS
};
193 defined $self->{VERBOSE
} && $self->verbose($self->{VERBOSE
});
194 if (exists $self->{GAP_SYMBOL
}) {
195 # not checking anything else but the length (must be 1 as only one gap
196 # symbol allowed currently); can add in support for symbol checks or
197 # multiple symbols later if needed
198 $self->throw("Gap symbol must be of length 1") if
199 CORE
::length($self->{GAP_SYMBOL
}) != 1;
202 $self->{GAP_SYMBOL
} = '-';
204 $self->{ALGORITHM
} ||= 'GENERIC';
205 $self->{STRANDED
} ||= 'NONE';
207 if (! defined $self->{QUERY_LENGTH
} || ! defined $self->{HIT_LENGTH
}) {
208 $self->throw("Must define hit and query length");
211 $self->{'_sequenceschanged'} = 1;
213 $self->{_finished_new
} = 1;
217 sub _logical_length
{
218 my ($self, $type) = @_;
219 my $algo = $self->algorithm;
220 my $len = $self->length($type);
222 if($algo =~ /^(PSI)?T(BLAST|FAST)[NY]/oi ) {
223 $logical = $len/3 if $type =~ /sbjct
|hit
|tot
/i
;
224 } elsif($algo =~ /^(BLAST|FAST)(X|Y|XY)/oi ) {
225 $logical = $len/3 if $type =~ /query
|tot
/i
;
226 } elsif($algo =~ /^T(BLAST|FAST)(X|Y|XY)/oi ) {
232 =head2 L<Bio::Search::HSP::HSPI> methods
234 Implementation of L<Bio::Search::HSP::HSPI> methods follow
239 Usage : my $r_type = $hsp->algorithm
240 Function: Obtain the name of the algorithm used to obtain the HSP
241 Returns : string (e.g., BLASTP)
242 Args : [optional] scalar string to set value
247 my ($self,$value) = @_;
248 my $previous = $self->{'ALGORITHM'};
249 if( defined $value || ! defined $previous ) {
250 $value = $previous = '' unless defined $value;
251 $self->{'ALGORITHM'} = $value;
260 Usage : my $pvalue = $hsp->pvalue();
261 Function: Returns the P-value for this HSP or undef
262 Returns : float or exponential (2e-10)
263 P-value is not defined with NCBI Blast2 reports.
264 Args : [optional] numeric to set value
269 my ($self,$value) = @_;
270 my $previous = $self->{'PVALUE'};
271 if( defined $value ) {
272 $self->{'PVALUE'} = $value;
280 Usage : my $evalue = $hsp->evalue();
281 Function: Returns the e-value for this HSP
282 Returns : float or exponential (2e-10)
283 Args : [optional] numeric to set value
288 my ($self,$value) = @_;
289 my $previous = $self->{'EVALUE'};
290 if( defined $value ) {
291 $self->{'EVALUE'} = $value;
296 =head2 frac_identical
298 Title : frac_identical
299 Usage : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
300 Function: Returns the fraction of identitical positions for this HSP
301 Returns : Float in range 0.0 -> 1.0
302 Args : arg 1: 'query' = num identical / length of query seq (without gaps)
303 'hit' = num identical / length of hit seq (without gaps)
304 synonyms: 'sbjct', 'subject'
305 'total' = num identical / length of alignment (with gaps)
308 arg 2: [optional] frac identical value to set for the type requested
313 my ($self, $type,$value) = @_;
315 unless ($self->{_did_prefrac
}) {
319 $type = lc $type if defined $type;
320 $type = 'hit' if( defined $type &&
321 $type =~ /subject|sbjct/);
322 $type = 'total' if( ! defined $type || $type eq 'hsp' ||
323 $type !~ /query|hit|subject|sbjct|total/);
324 my $previous = $self->{'_frac_identical'}->{$type};
325 if( defined $value || ! defined $previous ) {
326 $value = $previous = '' unless defined $value;
327 if( $type eq 'hit' || $type eq 'query' ) {
328 $self->$type()->frac_identical( $value);
330 $self->{'_frac_identical'}->{$type} = $value;
336 =head2 frac_conserved
338 Title : frac_conserved
339 Usage : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
340 Function : Returns the fraction of conserved positions for this HSP.
341 This is the fraction of symbols in the alignment with a
343 Returns : Float in range 0.0 -> 1.0
344 Args : arg 1: 'query' = num conserved / length of query seq (without gaps)
345 'hit' = num conserved / length of hit seq (without gaps)
346 synonyms: 'sbjct', 'subject'
347 'total' = num conserved / length of alignment (with gaps)
350 arg 2: [optional] frac conserved value to set for the type requested
355 my ($self, $type,$value) = @_;
357 unless ($self->{_did_prefrac
}) {
361 $type = lc $type if defined $type;
362 $type = 'hit' if( defined $type && $type =~ /subject|sbjct/);
363 $type = 'total' if( ! defined $type || $type eq 'hsp' ||
364 $type !~ /query|hit|subject|sbjct|total/);
365 my $previous = $self->{'_frac_conserved'}->{$type};
366 if( defined $value || ! defined $previous ) {
367 $value = $previous = '' unless defined $value;
368 $self->{'_frac_conserved'}->{$type} = $value;
376 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
377 Function : Get the number of gap characters in the query, hit, or total alignment.
378 Returns : Integer, number of gaps or 0 if none
379 Args : arg 1: 'query' = num gap characters in query seq
380 'hit' = num gap characters in hit seq; synonyms: 'sbjct', 'subject'
381 'total' = num gap characters in whole alignment; synonyms: 'hsp'
383 arg 2: [optional] integer gap value to set for the type requested
388 my ($self, $type, $value) = @_;
390 unless ($self->{_did_pregaps
}) {
394 $type = lc $type if defined $type;
395 $type = 'total' if( ! defined $type || $type eq 'hsp' ||
396 $type !~ /query|hit|subject|sbjct|total/);
397 $type = 'hit' if $type =~ /sbjct|subject/;
398 my $previous = $self->{'_gaps'}->{$type};
399 if( defined $value || ! defined $previous ) {
400 $value = $previous = '' unless defined $value;
401 $self->{'_gaps'}->{$type} = $value;
403 return $previous || 0;
409 Usage : my $qseq = $hsp->query_string;
410 Function: Retrieves the query sequence of this HSP as a string
412 Args : [optional] string to set for query sequence
418 my ($self,$value) = @_;
419 my $previous = $self->{QUERY_SEQ
};
420 if( defined $value || ! defined $previous ) {
421 $value = $previous = '' unless defined $value;
422 $self->{QUERY_SEQ
} = $value;
423 # do some housekeeping so we know when to
424 # re-run _calculate_seq_positions
425 $self->{'_sequenceschanged'} = 1;
433 Usage : my $hseq = $hsp->hit_string;
434 Function: Retrieves the hit sequence of this HSP as a string
436 Args : [optional] string to set for hit sequence
442 my ($self,$value) = @_;
443 my $previous = $self->{HIT_SEQ
};
444 if( defined $value || ! defined $previous ) {
445 $value = $previous = '' unless defined $value;
446 $self->{HIT_SEQ
} = $value;
447 # do some housekeeping so we know when to
448 # re-run _calculate_seq_positions
449 $self->{'_sequenceschanged'} = 1;
454 =head2 homology_string
456 Title : homology_string
457 Usage : my $homo_string = $hsp->homology_string;
458 Function: Retrieves the homology sequence for this HSP as a string.
459 : The homology sequence is the string of symbols in between the
460 : query and hit sequences in the alignment indicating the degree
461 : of conservation (e.g., identical, similar, not similar).
463 Args : [optional] string to set for homology sequence
468 my ($self,$value) = @_;
469 my $previous = $self->{HOMOLOGY_SEQ
};
470 if( defined $value || ! defined $previous ) {
471 $value = $previous = '' unless defined $value;
472 $self->{HOMOLOGY_SEQ
} = $value;
473 # do some housekeeping so we know when to
474 # re-run _calculate_seq_positions
475 $self->{'_sequenceschanged'} = 1;
483 Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
484 Function : Returns the length of the query or hit in the alignment
486 or the aggregate length of the HSP (including gaps;
487 this may be greater than either hit or query )
489 Args : arg 1: 'query' = length of query seq (without gaps)
490 'hit' = length of hit seq (without gaps) (synonyms: sbjct, subject)
491 'total' = length of alignment (with gaps)
493 arg 2: [optional] integer length value to set for specific type
502 $type = 'total' unless defined $type;
505 if( $type =~ /^q/i ) {
506 return $self->query()->length(shift);
507 } elsif( $type =~ /^(hit|subject|sbjct)/ ) {
508 return $self->hit()->length(shift);
512 $self->{HSP_LENGTH
} = $v;
514 return $self->{HSP_LENGTH
};
516 return 0; # should never get here
522 Usage : my $len = $hsp->hsp_length()
523 Function: shortcut length('hsp')
524 Returns : floating point between 0 and 100
529 sub hsp_length
{ return shift->length('hsp', shift); }
531 =head2 percent_identity
533 Title : percent_identity
534 Usage : my $percentid = $hsp->percent_identity()
535 Function: Returns the calculated percent identity for an HSP
536 Returns : floating point between 0 and 100
542 sub percent_identity
{
545 unless ($self->{_did_prepi
}) {
549 return $self->SUPER::percent_identity
(@_);
555 Usage : my ($qframe, $hframe) = $hsp->frame('list',$queryframe,$subjectframe)
556 Function: Set the Frame for both query and subject and insure that
558 This overrides the frame() method implementation in
560 Returns : array of query and subject frame if return type wants an array
561 or query frame if defined or subject frame if not defined
562 Args : 'hit' or 'subject' or 'sbjct' to retrieve the frame of the subject (default)
563 'query' to retrieve the query frame
564 'list' or 'array' to retrieve both query and hit frames together
565 Note : Frames are stored in the GFF way (0-2) not 1-3
566 as they are in BLAST (negative frames are deduced by checking
567 the strand of the query or hit)
571 # Note: changed 4/19/08 - bug 2485
573 # frame() is supposed to be a getter/setter (as is implied by the Function desc
574 # above; this is also consistent with Bio::SeqFeature::SimilarityPair). Also,
575 # the API is not consistent with the other HSP/SimilarityPair methods such as
576 # strand(), start(), end(), etc.
578 # In order to make this consistent with other methods all work is now done
579 # when the features are instantiated and not delayed. We compromise by
580 # defaulting back 'to hit' w/o passed args. Setting is now allowed.
586 # unfortunately, w/o args we need to warn about API changes
587 $self->warn("API for frame() has changed.\n".
588 "Please refer to documentation for Bio::Search::HSP::GenericHSP;\n".
589 "returning query frame");
594 if( $val =~ /^q/i ) {
595 return $self->query->frame(@_);
596 } elsif( $val =~ /^hi|^s/i ) {
597 return $self->hit->frame(@_);
598 } elsif ( $val =~ /^list|array/i ) {
599 return ($self->query->frame($_[0]),
600 $self->hit->frame($_[1]) );
601 } elsif ( $val =~ /^\d+$/) {
602 # old API i.e. frame($query_frame, $hit_frame). This catches all but one
603 # case, where no arg is passed (so the hit is wanted).
604 $self->warn("API for frame() has changed.\n".
605 "Please refer to documentation for Bio::Search::HSP::GenericHSP");
607 return ($self->query->frame($val),
608 $self->hit->frame(@_) ) :
609 return $self->hit->frame($val,@_);
611 $self->warn("unrecognized component '$val' requested\n");
619 Usage : my $aln = $hsp->gel_aln
620 Function: Returns a L<Bio::SimpleAlign> object representing the HSP alignment
621 Returns : L<Bio::SimpleAlign>
628 require Bio
::LocatableSeq
;
629 require Bio
::SimpleAlign
;
630 my $aln = Bio
::SimpleAlign
->new();
631 my $hs = $self->hit_string();
632 my $qs = $self->query_string();
633 # FASTA specific stuff moved to the FastaHSP object
635 $seqonly =~ s/[\-\s]//g;
636 my ($q_nm,$s_nm) = ($self->query->seq_id(),
637 $self->hit->seq_id());
638 # Should we silently change the name of the query or hit if it isn't
639 # defined? May need revisiting... cjfields 2008-12-3 (commented out below)
641 #unless( defined $q_nm && CORE::length ($q_nm) ) {
644 #unless( defined $s_nm && CORE::length ($s_nm) ) {
648 # mapping: 1 residues for every x coordinate positions
649 my $query = Bio
::LocatableSeq
->new('-seq' => $qs,
651 '-start' => $self->query->start,
652 '-end' => $self->query->end,
653 '-force_nse' => $q_nm ?
0 : 1,
654 '-mapping' => [1, $self->{_query_mapping
}]
657 $seqonly =~ s/[\-\s]//g;
658 my $hit = Bio
::LocatableSeq
->new('-seq' => $hs,
660 '-start' => $self->hit->start,
661 '-end' => $self->hit->end,
662 '-force_nse' => $s_nm ?
0 : 1,
663 '-mapping' => [1, $self->{_hit_mapping
}]
665 $aln->add_seq($query);
672 Title : num_conserved
673 Usage : $obj->num_conserved($newval)
674 Function: returns the number of conserved residues in the alignment
676 Args : integer (optional)
682 my ($self,$value) = @_;
684 unless ($self->{_did_presimilar
}) {
685 $self->_pre_similar_stats;
688 if (defined $value) {
689 $self->{CONSERVED
} = $value;
691 return $self->{CONSERVED
};
696 Title : num_identical
697 Usage : $obj->num_identical($newval)
698 Function: returns the number of identical residues in the alignment
700 Args : integer (optional)
706 my ($self,$value) = @_;
708 unless ($self->{_did_presimilar
}) {
709 $self->_pre_similar_stats;
712 if( defined $value) {
713 $self->{IDENTICAL
} = $value;
715 return $self->{IDENTICAL
};
720 Usage : $hsp->rank( [string] );
721 Purpose : Get the rank of the HSP within a given Blast hit.
722 Example : $rank = $hsp->rank;
723 Returns : Integer (1..n) corresponding to the order in which the HSP
724 appears in the BLAST report.
729 my ($self,$value) = @_;
730 if( defined $value) {
731 $self->{RANK
} = $value;
733 return $self->{RANK
};
739 Purpose : Get a list of residue positions (indices) for all identical
740 : or conserved residues in the query or sbjct sequence.
741 Example : @s_ind = $hsp->seq_inds('query', 'identical');
742 : @h_ind = $hsp->seq_inds('hit', 'conserved');
743 : @h_ind = $hsp->seq_inds('hit', 'conserved-not-identical');
744 : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
745 Returns : List of integers
746 : May include ranges if collapse is true.
747 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query)
748 : ('sbjct' is synonymous with 'hit')
749 : class = 'identical' - identical positions
750 : 'conserved' - conserved positions
751 : 'nomatch' - mismatched residue or gap positions
752 : 'mismatch' - mismatched residue positions (no gaps)
753 : 'gap' - gap positions only
754 : 'frameshift'- frameshift positions only
755 : 'conserved-not-identical' - conserved positions w/o
757 : The name can be shortened to 'id' or 'cons' unless
758 : the name is ambiguous. The default value is
761 : collapse = boolean, if true, consecutive positions are merged
762 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
763 : collapses to "1-5 7 9-11". This is useful for
764 : consolidating long lists. Default = no collapse.
767 Comments : For HSPs partially or completely derived from translated sequences
768 : (TBLASTN, BLASTX, TBLASTX, or similar), some positional data
769 : cannot easily be attributed to a single position (i.e. the
770 : positional data is ambiguous). In these cases all three codon
771 : positions are reported. Under these conditions you can check
772 : ambiguous_seq_inds() to determine whether the query, subject,
773 : or both are ambiguous.
775 See Also : L<Bio::Search::SearchUtils::collapse_nums()|Bio::Search::SearchUtils>,
776 L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
781 my ($self, $seqType, $class, $collapse) = @_;
783 # prepare the internal structures - this is cached so
784 # if the strings have not changed we're okay
785 $self->_calculate_seq_positions();
787 $seqType ||= 'query';
788 $class ||= 'identical';
790 $seqType = 'sbjct' if $seqType eq 'hit';
791 my $t = lc(substr($seqType,0,1));
794 } elsif ( $t eq 's' || $t eq 'h' ) {
797 $self->warn("unknown seqtype $seqType using 'query'");
801 $t = lc(substr($class,0,1));
804 if( $class =~ /conserved\-not\-identical/ ) {
805 $class = 'conserved';
807 $class = 'conservedall';
809 } elsif( $t eq 'i' ) {
810 $class = 'identical';
811 } elsif( $t eq 'n' ) {
813 } elsif( $t eq 'm' ) {
815 } elsif( $t eq 'g' ) {
817 } elsif( $t eq 'f' ) {
818 $class = 'frameshift';
820 $self->warn("unknown sequence class $class using 'identical'");
821 $class = 'identical';
824 ## Sensitive to member name changes.
825 $seqType = "_\L$seqType\E";
826 $class = "_\L$class\E";
829 if( $class eq '_gap' ) {
830 # this means that we are remapping the gap length that is stored
831 # in the hash (for example $self->{'_gapRes_query'} )
832 # so we'll return an array which has the values of the position of the
833 # of the gap (the key in the hash) + the gap length (value in the
834 # hash for this key - 1.
836 # changing this; since the index is the position prior to the insertion,
837 # repeat the position based on the number of gaps inserted
838 @ary = map { my @tmp;
839 # position holds number of gaps inserted
840 for my $g (1..$self->{seqinds
}{"${class}Res$seqType"}->{$_}) {
844 sort { $a <=> $b } keys %{ $self->{seqinds
}{"${class}Res$seqType"}};
845 } elsif( $class eq '_conservedall' ) {
846 @ary = sort { $a <=> $b }
847 keys %{ $self->{seqinds
}{"_conservedRes$seqType"}},
848 keys %{ $self->{seqinds
}{"_identicalRes$seqType"}},
850 @ary = sort { $a <=> $b } keys %{ $self->{seqinds
}{"${class}Res$seqType"}};
852 require Bio
::Search
::BlastUtils
if $collapse;
854 return $collapse ?
&Bio
::Search
::SearchUtils
::collapse_nums
(@ary) : @ary;
857 =head2 ambiguous_seq_inds
859 Title : ambiguous_seq_inds
860 Purpose : returns a string denoting whether sequence indices for query,
861 : subject, or both are ambiguous
862 Returns : String; 'query', 'subject', 'query/subject', or empty string ''
864 Comments : For HSPs partially or completely derived from translated sequences
865 : (TBLASTN, BLASTX, TBLASTX, or similar), some positional data
866 : cannot easily be attributed to a single position (i.e. the
867 : positional data is ambiguous). In these cases all three codon
868 : positions are reported when using seq_inds(). Under these
869 : conditions you can check ambiguous_seq_inds() to determine whether
870 : the query, subject, or both are ambiguous.
871 See Also : L<Bio::Search::Hit::HSPI::seq_inds()>
875 sub ambiguous_seq_inds
{
877 $self->_calculate_seq_positions();
878 return $self->{seqinds
}{'_warnRes'} if exists $self->{seqinds
}{'_warnRes'};
879 return $self->{seqinds
}{'_warnRes'} = '';
882 =head2 Inherited from L<Bio::SeqFeature::SimilarityPair>
884 These methods come from L<Bio::SeqFeature::SimilarityPair>
889 Usage : my $query = $hsp->query
890 Function: Returns a SeqFeature representing the query in the HSP
891 Returns : L<Bio::SeqFeature::Similarity>
892 Args : [optional] new value to set
898 unless ($self->{_created_qff
}) {
899 $self->_query_seq_feature;
901 return $self->SUPER::query
(@_);
906 if (! $self->{_finished_new
} || $self->{_making_qff
}) {
907 return $self->{_sim1
} if $self->{_sim1
};
908 $self->{_sim1
} = Bio
::SeqFeature
::Similarity
->new();
909 return $self->{_sim1
};
911 unless ($self->{_created_qff
}) {
912 $self->_query_seq_feature;
914 return $self->SUPER::feature1
(@_);
920 Usage : my $hit = $hsp->hit
921 Function: Returns a SeqFeature representing the hit in the HSP
922 Returns : L<Bio::SeqFeature::Similarity>
923 Args : [optional] new value to set
929 unless ($self->{_created_sff
}) {
930 $self->_subject_seq_feature;
932 return $self->SUPER::hit
(@_);
937 if (! $self->{_finished_new
} || $self->{_making_sff
}) {
938 return $self->{_sim2
} if $self->{_sim2
};
939 $self->{_sim2
} = Bio
::SeqFeature
::Similarity
->new();
940 return $self->{_sim2
};
942 unless ($self->{_created_sff
}) {
943 $self->_subject_seq_feature;
945 return $self->SUPER::feature2
(@_);
951 Usage : $evalue = $obj->significance();
952 $obj->significance($evalue);
953 Function: Get/Set the significance value
955 Args : [optional] new value to set
959 # Override significance to return the e-value or, if this is
960 # not defined (WU-BLAST), return the p-value.
962 my ($self, $val) = @_;
963 if (!defined $self->{SIGNIFICANCE
} || defined $val) {
964 $self->{SIGNIFICANCE
} = defined $val ?
$val :
965 defined $self->evalue ?
$self->evalue :
966 defined $self->pvalue ?
$$self->pvalue :
968 $self->query->significance($self->{SIGNIFICANCE
});
970 return $self->{SIGNIFICANCE
};
976 Usage : $score = $obj->score();
978 Function: Get/Set the score value
980 Args : [optional] new value to set
986 Usage : $bits = $obj->bits();
988 Function: Get/Set the bits value
990 Args : [optional] new value to set
996 Usage : $hsp->strand('query')
997 Function: Retrieves the strand for the HSP component requested
998 Returns : +1 or -1 (0 if unknown)
999 Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
1000 'query' to retrieve the query strand (default)
1004 =head1 Private methods
1008 =head2 _calculate_seq_positions
1010 Title : _calculate_seq_positions
1011 Usage : $self->_calculate_seq_positions
1012 Function: Internal function
1019 sub _calculate_seq_positions
{
1020 my ($self,@args) = @_;
1021 return unless ( $self->{'_sequenceschanged'} );
1022 $self->{'_sequenceschanged'} = 0;
1023 my ($seqString, $qseq,$sseq) = ( $self->homology_string(),
1024 $self->query_string(),
1025 $self->hit_string() );
1026 my ($mlen, $qlen, $slen) = (CORE
::length($seqString), CORE
::length($qseq), CORE
::length($sseq));
1027 my $qdir = $self->query->strand || 1;
1028 my $sdir = $self->hit->strand || 1;
1029 my ($resCount_query, $endpoint_query) = ($qdir <=0) ?
($self->query->end, $self->query->start)
1030 : ($self->query->start, $self->query->end);
1031 my ($resCount_sbjct, $endpoint_sbjct) = ($sdir <=0) ?
($self->hit->end, $self->hit->start)
1032 : ($self->hit->start, $self->hit->end);
1034 my $prog = $self->algorithm;
1036 if( $prog =~ /FAST|SSEARCH|SMITH-WATERMAN/i ) {
1038 # we infer the end of the regional sequence where the first and last
1039 # non spaces are in the homology string
1040 # then we use the HSP->length to tell us how far to read
1041 # to cut off the end of the sequence
1043 my ($start, $rest) = (0,0);
1044 if( $seqString =~ /^(\s+)?(.*?)\s*$/ ) {
1045 ($start, $rest) = ($1 ? CORE
::length($1) : 0, CORE
::length($2));
1048 $seqString = substr($seqString, $start, $rest);
1049 $qseq = substr($qseq, $start, $rest);
1050 $sseq = substr($sseq, $start, $rest);
1052 # commented out 10/29/08
1053 # removing frameshift symbols doesn't take into account the following:
1054 # 1) does not remove the same point in the homology string (get
1055 # positional errors)
1056 # 2) adjustments to the overall position in the string due to the
1057 # frameshift must be taken into consideration (get balancing errors)
1059 # Frameshifts will be handled directly in the main loop.
1062 # fasta reports some extra 'regional' sequence information
1063 # we need to clear out first
1064 # this seemed a bit insane to me at first, but it appears to
1067 #$qseq =~ s![\\\/]!!g;
1068 #$sseq =~ s![\\\/]!!g;
1071 $self->{seqinds
}{'_warnRes'} = '';
1072 ($self->{_sbjct_offset
}, $self->{_query_offset
}) = (1,1);
1073 if($prog =~ /^(?:PSI)?T(BLAST|FAST)(N|X|Y)/oi ) {
1074 $self->{_sbjct_offset
} = 3;
1075 if ($1 eq 'BLAST' && $2 eq 'X') { #TBLASTX
1076 $self->{_query_offset
} = 3;
1077 $self->{seqinds
}{'_warnRes'} = 'query/subject';
1079 $self->{seqinds
}{'_warnRes'} = 'subject';
1081 } elsif($prog =~ /^(BLAST|FAST)(X|Y|XY)/oi ) {
1082 $self->{_query_offset
} = 3;
1083 $self->{seqinds
}{'_warnRes'} = 'query';
1085 my ($qfs, $sfs) = (0,0);
1087 for my $pos (0..CORE
::length($seqString)-1) {
1088 my @qrange = (0 - $qfs)..($self->{_query_offset
} - 1);
1089 my @srange = (0 - $sfs)..($self->{_sbjct_offset
} - 1);
1090 # $self->debug("QRange:@qrange SRange:@srange\n") if ($qfs || $sfs);
1091 ($qfs, $sfs) = (0,0);
1092 my ($mchar, $qchar, $schar) = (
1093 unpack("x$pos A1",$seqString) || ' ',
1094 $pos < CORE
::length($qseq) ?
unpack("x$pos A1",$qseq) : '-',
1095 $pos < CORE
::length($sseq) ?
unpack("x$pos A1",$sseq) : '-'
1098 my ($qgap, $sgap) = (0,0);
1099 if( $mchar eq '+' || $mchar eq '.') { # conserved
1100 $self->{seqinds
}{_conservedRes_query
}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange;
1101 $self->{seqinds
}{_conservedRes_sbjct
}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange;
1102 $matchtype = 'conserved';
1103 } elsif( $mchar eq ':' || $mchar ne ' ' ) { # identical
1104 $self->{seqinds
}{_identicalRes_query
}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange;
1105 $self->{seqinds
}{_identicalRes_sbjct
}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange;
1106 $matchtype = 'identical';
1107 } elsif( $mchar eq ' ' ) { # possible mismatch/nomatch/frameshift
1108 $qfs = $qchar eq '/' ?
-1 : # base inserted to match frame
1109 $qchar eq '\\' ?
1 : # base deleted to match frame
1111 $sfs = $schar eq '/' ?
-1 :
1112 $schar eq '\\' ?
1 :
1115 # Frameshifts are tricky.
1117 # '/' indicates that the next residue is shifted back one
1118 # (-1) frame position and is a deletion of one base (so to
1119 # correctly align, a base is inserted). That residue should only
1120 # occupy two sequence positions instead of three.
1122 # '\' indicates that the next residue is shifted forward
1123 # one (+1) frame position and is an insertion of one base (so to
1124 # correctly align, a base is removed). That residue should
1125 # occupy four sequence positions instead of three.
1127 # Note that gaps are not counted across from frameshifts.
1128 # Frameshift indices are a range of positions starting in the
1129 # previous sequence position in which the frameshift occurs;
1130 # they are ambiguous by nature.
1131 $self->{seqinds
}{_frameshiftRes_query
}{ $resCount_query - ($_ * $qdir * $qfs) } = $qfs for @qrange;
1132 $matchtype = "$qfs frameshift-query";
1136 $self->{seqinds
}{_frameshiftRes_sbjct
}{ $resCount_sbjct - ($_ * $sdir * $sfs) } = $sfs for @srange;
1137 $matchtype = "$sfs frameshift-sbcjt";
1140 elsif ($qchar eq $self->{GAP_SYMBOL
}) {
1141 # gap are counted as being in the immediately preceeding residue
1142 # position; for translated sequences this is not the start of
1143 # the previous codon, but the third codon position
1144 $self->{seqinds
}{_gapRes_query
}{ $resCount_query - $qdir }++ for @qrange;
1145 $matchtype = 'gap-query';
1148 elsif ($schar eq $self->{GAP_SYMBOL
}) {
1149 $self->{seqinds
}{_gapRes_sbjct
}{ $resCount_sbjct - $sdir }++ for @srange;
1150 $matchtype = 'gap-sbjct';
1154 # if not a gap or frameshift in either seq, must be mismatch
1155 $self->{seqinds
}{_mismatchRes_query
}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange;
1156 $self->{seqinds
}{_mismatchRes_sbjct
}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange;
1157 $matchtype = 'mismatch';
1159 # always add a nomatch unless the current position in the seq is a gap
1161 $self->{seqinds
}{_nomatchRes_sbjct
}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange;
1164 $self->{seqinds
}{_nomatchRes_query
}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange;
1167 $self->warn("Unknown midline character: [$mchar]");
1169 # leave in and uncomment for future debugging
1170 #$self->debug(sprintf("%7d %1s[%1s]%1s %-7d Type: %-20s QOff:%-2d SOff:%-2d\n",
1177 # ($self->{_query_offset} * $qdir),
1178 # ($self->{_sbjct_offset} * $sdir)));
1179 $resCount_query += ($qdir * (scalar(@qrange) + $qfs)) if !$qgap;
1180 $resCount_sbjct += ($sdir * (scalar(@srange) + $sfs)) if !$sgap;
1187 See documentation in L<Bio::Search::HSP::HSPI::n()|Bio::Search::HSP::HSPI>
1193 if(@_) { $self->{'N'} = shift; }
1194 defined $self->{'N'} ?
$self->{'N'} : '';
1199 See documentation in L<Bio::Search::HSP::HSPI::range()|Bio::Search::HSP::HSPI>
1204 my ($self, $seqType) = @_;
1206 $seqType ||= 'query';
1207 $seqType = 'sbjct' if $seqType eq 'hit';
1210 if( $seqType eq 'query' ) {
1211 $start = $self->query->start;
1212 $end = $self->query->end;
1215 $start = $self->hit->start;
1216 $end = $self->hit->end;
1218 return ($start, $end);
1225 Usage : $obj->links($newval)
1226 Function: Get/Set the Links value (from WU-BLAST)
1227 Indicates the placement of the alignment in the group of HSPs
1228 Returns : Value of links
1229 Args : On set, new value (a scalar or undef, optional)
1237 return $self->{LINKS
} = shift if @_;
1238 return $self->{LINKS
};
1244 Usage : $obj->hsp_group($newval)
1245 Function: Get/Set the Group value (from WU-BLAST)
1246 Indicates a grouping of HSPs
1247 Returns : Value of group
1248 Args : On set, new value (a scalar or undef, optional)
1256 return $self->{HSP_GROUP
} = shift if @_;
1257 return $self->{HSP_GROUP
};
1262 Title : hit_features
1263 Usage : $obj->hit_features($newval)
1264 Function: Get/Set the HSP hit feature string (from some BLAST 2.2.13 text
1265 output), which is a string of overlapping or nearby features in HSP
1267 Returns : Value of hit features, if present
1268 Args : On set, new value (a scalar or undef, optional)
1276 return $self->{HIT_FEATURES
} = shift if @_;
1277 return $self->{HIT_FEATURES
};
1280 # The cigar string code is written by Juguang Xiao <juguang@fugu-sg.org>
1282 =head1 Brief introduction on cigar string
1284 NOTE: the concept is originally from EnsEMBL docs at
1285 http://may2005.archive.ensembl.org/Docs/wiki/html/EnsemblDocs/CigarFormat.html
1286 Please append to these docs if you have a better definition.
1288 Sequence alignment hits can be stored in a database as ungapped alignments.
1289 This imposes 2 major constraints on alignments:
1291 a) alignments for a single hit record require multiple rows in the database,
1293 b) it is not possible to accurately retrieve the exact original alignment.
1295 Alternatively, sequence alignments can be stored as gapped alignments using
1296 the CIGAR line format (where CIGAR stands for Concise Idiosyncratic Gapped
1299 In the cigar line format alignments are stored as follows:
1305 An example of an alignment for a hypthetical protein match is shown below:
1308 Query: 42 PGPAGLP----GSVGLQGPRGLRGPLP-GPLGPPL...
1312 Sbjct: 1672 PGTP*TPLVPLGPWVPLGPSSPR--LPSGPLGPTD...
1315 protein_align_feature table as the following cigar line:
1322 Usage: $cigar_string = $hsp->cigar_string
1323 Function: Generate and return cigar string for this HSP alignment
1324 Args: No input needed
1325 Return: a cigar string
1331 my ($self, $arg) = @_;
1332 $self->warn("this is not a setter") if(defined $arg);
1334 unless(defined $self->{_cigar_string
}){ # generate cigar string
1335 my $cigar_string = $self->generate_cigar_string($self->query_string, $self->hit_string);
1336 $self->{_cigar_string
} = $cigar_string;
1339 return $self->{_cigar_string
};
1342 =head2 generate_cigar_string
1344 Name: generate_cigar_string
1345 Usage: my $cigar_string = Bio::Search::HSP::GenericHSP::generate_cigar_string ($qstr, $hstr);
1346 Function: generate cigar string from a simple sequence of alignment.
1347 Args: the string of query and subject
1348 Return: cigar string
1352 sub generate_cigar_string
{
1353 my ($self, $qstr, $hstr) = @_;
1354 my @qchars = split //, $qstr;
1355 my @hchars = split //, $hstr;
1357 unless(scalar(@qchars) == scalar(@hchars)){
1358 $self->throw("two sequences are not equal in lengths");
1361 $self->{_count_for_cigar_string
} = 0;
1362 $self->{_state_for_cigar_string
} = 'M';
1364 my $cigar_string = '';
1365 for(my $i=0; $i <= $#qchars; $i++){
1366 my $qchar = $qchars[$i];
1367 my $hchar = $hchars[$i];
1368 if($qchar ne $self->{GAP_SYMBOL
} && $hchar ne $self->{GAP_SYMBOL
}){ # Match
1369 $cigar_string .= $self->_sub_cigar_string('M');
1370 }elsif($qchar eq $self->{GAP_SYMBOL
}){ # Deletion
1371 $cigar_string .= $self->_sub_cigar_string('D');
1372 }elsif($hchar eq $self->{GAP_SYMBOL
}){ # Insertion
1373 $cigar_string .= $self->_sub_cigar_string('I');
1375 $self->throw("Impossible state that 2 gaps on each seq aligned");
1378 $cigar_string .= $self->_sub_cigar_string('X'); # not forget the tail.
1379 return $cigar_string;
1382 # an internal method to help generate cigar string
1384 sub _sub_cigar_string
{
1385 my ($self, $new_state) = @_;
1387 my $sub_cigar_string = '';
1388 if($self->{_state_for_cigar_string
} eq $new_state){
1389 $self->{_count_for_cigar_string
} += 1; # Remain the state and increase the counter
1391 $sub_cigar_string .= $self->{_count_for_cigar_string
}
1392 unless $self->{_count_for_cigar_string
} == 1;
1393 $sub_cigar_string .= $self->{_state_for_cigar_string
};
1394 $self->{_count_for_cigar_string
} = 1;
1395 $self->{_state_for_cigar_string
} = $new_state;
1397 return $sub_cigar_string;
1400 # needed before seqfeatures can be made
1401 sub _pre_seq_feature
{
1403 my $algo = $self->{ALGORITHM
};
1405 my ($queryfactor, $hitfactor) = (0,0);
1406 my ($hitmap, $querymap) = (1,1);
1407 if( $algo =~ /^(?:PSI)?T(?:BLAST|FAST|SW)[NY]/oi ) {
1411 elsif ($algo =~ /^(?:FAST|BLAST)(?:X|Y|XY)/oi || $algo =~ /^P?GENEWISE/oi ) {
1415 elsif ($algo =~ /^T(BLAST|FAST|SW)(X|Y|XY)/oi || $algo =~ /^(BLAST|FAST|SW)N/oi || $algo =~ /^WABA|AXT|BLAT|BLASTZ|PSL|MEGABLAST|EXONERATE|SW|SMITH\-WATERMAN|SIM4$/){
1417 $hitmap = $querymap = 3;
1422 elsif ($algo =~ /^RPS-BLAST/) {
1423 if ($algo =~ /^RPS-BLAST\(BLASTX\)/) {
1430 my $stranded = lc substr($self->{STRANDED
}, 0,1);
1431 $queryfactor = ($stranded eq 'q' || $stranded eq 'b') ?
1 : 0;
1432 $hitfactor = ($stranded eq 'h' || $stranded eq 's' || $stranded eq 'b') ?
1 : 0;
1434 $self->{_query_factor
} = $queryfactor;
1435 $self->{_hit_factor
} = $hitfactor;
1436 $self->{_hit_mapping
} = $hitmap;
1437 $self->{_query_mapping
} = $querymap;
1440 # make query seq feature
1441 sub _query_seq_feature
{
1443 $self->{_making_qff
} = 1;
1444 my $qs = $self->{QUERY_START
};
1445 my $qe = $self->{QUERY_END
};
1446 unless (defined $self->{_query_factor
}) {
1447 $self->_pre_seq_feature;
1449 my $queryfactor = $self->{_query_factor
};
1451 unless( defined $qe && defined $qs ) { $self->throw("Did not specify a Query End or Query Begin"); }
1454 if ($qe > $qs) { # normal query: start < end
1469 ($qs,$qe) = ($qe,$qs);
1472 # Note: many of these data are not query- and hit-specific.
1473 # Only start, end, name, length are.
1474 # We could be more efficient by only storing this info once.
1475 # steve chervitz --- Sat Apr 5 00:55:07 2003
1477 my $sim1 = $self->{_sim1
} || Bio
::SeqFeature
::Similarity
->new();
1480 $sim1->significance($self->{EVALUE
});
1481 $sim1->bits($self->{BITS
});
1482 $sim1->score($self->{SCORE
});
1483 $sim1->strand($strand);
1484 $sim1->seq_id($self->{QUERY_NAME
});
1485 $sim1->seqlength($self->{QUERY_LENGTH
});
1486 $sim1->source_tag($self->{ALGORITHM
});
1487 $sim1->seqdesc($self->{QUERY_DESC
});
1488 $sim1->add_tag_value('meta', $self->{META
}) if $self->can('meta');
1489 # to determine frame from something like FASTXY which doesn't
1491 my $qframe = $self->{QUERY_FRAME
};
1493 if (defined $strand && !defined $qframe && $queryfactor) {
1494 $qframe = ( $qs % 3 ) * $strand;
1496 elsif (!defined $strand) {
1500 if( $qframe =~ /^([+-])?([0-3])/ ) {
1501 my $dir = $1 || '+';
1502 if($qframe && (($dir eq '-' && $strand >= 0) || ($dir eq '+' && $strand <= 0)) ) {
1503 $self->warn("Query frame ($qframe) did not match strand of query ($strand)");
1505 $qframe = $2 != 0 ?
$2 - 1 : $2;
1507 $self->warn("Unknown query frame ($qframe)");
1511 $sim1->frame($qframe);
1512 $self->SUPER::feature1
($sim1);
1514 $self->{_created_qff
} = 1;
1515 $self->{_making_qff
} = 0;
1518 # make subject seq feature
1519 sub _subject_seq_feature
{
1521 $self->{_making_sff
} = 1;
1522 my $hs = $self->{HIT_START
};
1523 my $he = $self->{HIT_END
};
1524 unless (defined $self->{_hit_factor
}) {
1525 $self->_pre_seq_feature;
1527 my $hitfactor = $self->{_hit_factor
};
1529 unless( defined $he && defined $hs ) { $self->throw("Did not specify a Hit End or Hit Begin"); }
1532 if ($he > $hs) { # normal subject
1547 ($hs,$he) = ( $he,$hs); # reverse subject: start bigger than end
1550 my $sim2 = $self->{_sim2
} || Bio
::SeqFeature
::Similarity
->new();
1553 $sim2->significance($self->{EVALUE
});
1554 $sim2->bits($self->{BITS
});
1555 $sim2->score($self->{SCORE
});
1556 $sim2->strand($strand);
1557 $sim2->seq_id($self->{HIT_NAME
});
1558 $sim2->seqlength($self->{HIT_LENGTH
});
1559 $sim2->source_tag($self->{ALGORITHM
});
1560 $sim2->seqdesc($self->{HIT_DESC
});
1561 $sim2->add_tag_value('meta', $self->{META
}) if $self->can('meta');
1562 my $hframe = $self->{HIT_FRAME
};
1564 if (defined $strand && !defined $hframe && $hitfactor) {
1565 $hframe = ( $hs % 3 ) * $strand;
1567 elsif (!defined $strand) {
1571 if( $hframe =~ /^([+-])?([0-3])/ ) {
1572 my $dir = $1 || '+';
1573 if($hframe && (($dir eq '-' && $strand >= 0) || ($dir eq '+' && $strand <= 0)) ) {
1574 $self->warn("Subject frame ($hframe) did not match strand of subject ($strand)");
1576 $hframe = $2 != 0 ?
$2 - 1 : $2;
1578 $self->warn("Unknown subject frame ($hframe)");
1582 $sim2->frame($hframe);
1583 $self->SUPER::feature2
($sim2);
1585 $self->{_created_sff
} = 1;
1586 $self->{_making_sff
} = 0;
1589 # before calling the num_* methods
1590 sub _pre_similar_stats
{
1592 my $identical = $self->{IDENTICAL
};
1593 my $conserved = $self->{CONSERVED
};
1594 my $percent_id = $self->{PERCENT_IDENTITY
};
1596 if (! defined $identical) {
1597 if (! defined $percent_id) {
1598 $self->warn("Did not defined the number of identical matches or overall percent identity in the HSP; assuming 0");
1602 $identical = sprintf("%.0f",$percent_id * $self->{HSP_LENGTH
});
1606 if (! defined $conserved) {
1607 $self->warn("Did not define the number of conserved matches in the HSP; assuming conserved == identical ($identical)")
1608 if( $self->{ALGORITHM
} !~ /^((FAST|BLAST)N)|EXONERATE|SIM4|AXT|PSL|BLAT|BLASTZ|WABA/oi);
1609 $conserved = $identical;
1611 $self->{IDENTICAL
} = $identical;
1612 $self->{CONSERVED
} = $conserved;
1613 $self->{_did_presimilar
} = 1;
1616 # before calling the frac_* methods
1620 my $hsp_len = $self->{HSP_LENGTH
};
1621 my $hit_len = $self->{HIT_LENGTH
};
1622 my $query_len = $self->{QUERY_LENGTH
};
1624 my $identical = $self->num_identical;
1625 my $conserved = $self->num_conserved;
1627 $self->{_did_prefrac
} = 1;
1630 $self->length('total', $hsp_len);
1631 $logical = $self->_logical_length('total');
1632 $self->frac_identical( 'total', $identical / $hsp_len);
1633 $self->frac_conserved( 'total', $conserved / $hsp_len);
1636 $logical = $self->_logical_length('hit');
1637 $self->frac_identical( 'hit', $identical / $logical);
1638 $self->frac_conserved( 'hit', $conserved / $logical);
1641 $logical = $self->_logical_length('query');
1642 $self->frac_identical( 'query', $identical / $logical) ;
1643 $self->frac_conserved( 'query', $conserved / $logical);
1647 # before calling gaps()
1648 # This relies first on passed parameters (parser-dependent), then on gaps
1649 # calculated by seq_inds() (if set), then falls back to directly checking
1650 # for '-' as a last resort
1654 my $query_gaps = $self->{QUERY_GAPS
};
1655 my $query_seq = $self->{QUERY_SEQ
};
1656 my $hit_gaps = $self->{HIT_GAPS
};
1657 my $hit_seq = $self->{HIT_SEQ
};
1658 my $gaps = $self->{HSP_GAPS
};
1660 $self->{_did_pregaps
} = 1; # well, we're in the process; avoid recursion
1661 if( defined $query_gaps ) {
1662 $self->gaps('query', $query_gaps);
1663 } elsif( defined $query_seq ) {
1664 my $qg = (defined $self->{'_query_offset'}) ?
$self->seq_inds('query','gaps') : scalar( $query_seq =~ tr/\-//);
1665 my $offset = $self->{'_query_offset'} || 1;
1666 $self->gaps('query', $qg/$offset);
1668 if( defined $hit_gaps ) {
1669 $self->gaps('hit', $hit_gaps);
1670 } elsif( defined $hit_seq ) {
1671 my $hg = (defined $self->{'_sbjct_offset'}) ?
$self->seq_inds('hit','gaps') : scalar( $hit_seq =~ tr/\-//);
1672 my $offset = $self->{'_sbjct_offset'} || 1;
1673 $self->gaps('hit', $hg/$offset);
1675 if( ! defined $gaps ) {
1676 $gaps = $self->gaps("query") + $self->gaps("hit");
1678 $self->gaps('total', $gaps);
1681 # before percent_identity
1684 $self->{_did_prepi
} = 1;
1685 $self->percent_identity($self->{PERCENT_IDENTITY
} || $self->frac_identical('total')*100) if( $self->{HSP_LENGTH
} > 0 );