2 # BioPerl module for Bio::Search::Hit::GenericHit
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Search::Hit::GenericHit - A generic implementation of the Bio::Search::Hit::HitI interface
20 use Bio::Search::Hit::GenericHit;
21 my $hit = Bio::Search::Hit::GenericHit->new(-algorithm => 'blastp');
23 # typically one gets HitI objects from a SearchIO stream via a ResultI
25 my $parser = Bio::SearchIO->new(-format => 'blast', -file => 'result.bls');
27 my $result = $parser->next_result;
28 my $hit = $result->next_hit;
30 # TODO: Describe how to configure a SearchIO stream so that it generates
35 This object handles the hit data from a Database Sequence Search such
38 Unless you're writing a parser, you won't ever need to create a
39 GenericHit or any other HitI-implementing object. If you use
40 the SearchIO system, HitI objects are created automatically from
41 a SearchIO stream which returns Bio::Search::Hit::HitI objects.
43 For documentation on what you can do with GenericHit (and other HitI
44 objects), please see the API documentation in
45 L<Bio::Search::Hit::HitI|Bio::Search::Hit::HitI>.
51 User feedback is an integral part of the evolution of this and other
52 Bioperl modules. Send your comments and suggestions preferably to
53 the Bioperl mailing list. Your participation is much appreciated.
55 bioperl-l@bioperl.org - General discussion
56 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
60 Please direct usage questions or support issues to the mailing list:
62 I<bioperl-l@bioperl.org>
64 rather than to the module maintainer directly. Many experienced and
65 reponsive experts will be able look at the problem and quickly
66 address it. Please include a thorough description of the problem
67 with code and data examples if at all possible.
71 Report bugs to the Bioperl bug tracking system to help us keep track
72 of the bugs and their resolution. Bug reports can be submitted via the
75 https://github.com/bioperl/bioperl-live/issues
77 =head1 AUTHOR - Jason Stajich and Steve Chervitz
79 Email jason-at-bioperl-dot-org
80 Email sac-at-bioperl-dot-org
84 Sendu Bala, bix@sendu.me.uk
88 The rest of the documentation details each of the object methods.
89 Internal methods are usually preceded with a _
94 # Let the code begin...
97 package Bio
::Search
::Hit
::GenericHit
;
101 use Bio
::Search
::SearchUtils
;
103 use base
qw(Bio::Root::Root Bio::Search::Hit::HitI);
108 Usage : my $obj = Bio::Search::Hit::GenericHit->new();
109 Function: Builds a new Bio::Search::Hit::GenericHit object
110 Returns : Bio::Search::Hit::GenericHit
111 Args : -name => Name of Hit (required)
112 -description => Description (optional)
113 -accession => Accession number (optional)
114 -ncbi_gi => NCBI GI UID (optional)
115 -length => Length of the Hit (optional)
116 -score => Raw Score for the Hit (optional)
117 -bits => Bit Score for the Hit (optional)
118 -significance => Significance value for the Hit (optional)
119 -algorithm => Algorithm used (BLASTP, FASTX, etc...)
120 -hsps => Array ref of HSPs for this Hit.
121 -found_again => boolean, true if hit appears in a
122 "previously found" section of a PSI-Blast report.
123 -hsp_factory => Bio::Factory::ObjectFactoryI able to create HSPI
129 my($class,@args) = @_;
131 my $self = $class->SUPER::new
(@args);
132 my ($hsps, $name,$query_len,$desc, $acc, $locus, $length,
133 $score,$algo,$signif,$bits, $p,
134 $rank, $hsp_factory, $gi, $iter, $found) = $self->_rearrange([qw(HSPS
140 LENGTH SCORE ALGORITHM
146 FOUND_AGAIN)], @args);
148 defined $query_len && $self->query_length($query_len);
150 if( ! defined $name ) {
151 $self->throw("Must have defined a valid name for Hit");
156 defined $acc && $self->accession($acc);
157 defined $locus && $self->locus($locus);
158 defined $desc && $self->description($desc);
159 defined $length && $self->length($length);
160 defined $algo && $self->algorithm($algo);
161 defined $signif && $self->significance($signif);
162 defined $score && $self->raw_score($score);
163 defined $bits && $self->bits($bits);
164 defined $rank && $self->rank($rank);
165 defined $hsp_factory && $self->hsp_factory($hsp_factory);
166 defined $gi && $self->ncbi_gi($gi);
167 defined $iter && $self->iteration($iter);
168 defined $found && $self->found_again($found);
169 # p() has a weird interface, so this is a hack workaround
174 $self->{'_iterator'} = 0;
175 if( defined $hsps ) {
176 if( ref($hsps) !~ /array/i ) {
177 $self->warn("Did not specify a valid array ref for the param HSPS ($hsps)");
182 $self->add_hsp(shift @
{$hsps} );
184 $self->{'_hsps'} = undef if $hspcount == 0;
188 $self->{'_hsps'} = undef;
197 Usage : $hit->add_hsp($hsp)
198 Function: Add a HSP to the collection of HSPs for a Hit
199 Returns : number of HSPs in the Hit
200 Args : Bio::Search::HSP::HSPI object, OR hash ref containing data suitable
201 for creating a HSPI object (&hsp_factory must be set to get it back)
206 my ($self,$hsp) = @_;
207 if (!defined $hsp || (ref($hsp) ne 'HASH' && !$hsp->isa('Bio::Search::HSP::HSPI'))) {
208 $self->throw("Must provide a valid Bio::Search::HSP::HSPI object or hash ref to object: $self method: add_hsp value: $hsp");
212 push @
{$self->{'_hsps'}}, $hsp;
213 if (ref($hsp) eq 'HASH') {
214 $self->{_hashes
}->{$#{$self->{'_hsps'}}} = 1;
216 return scalar @
{$self->{'_hsps'}};
222 Usage : $hit->hsp_factory($hsp_factory)
223 Function: Get/set the factory used to build HSPI objects if necessary.
224 Returns : Bio::Factory::ObjectFactoryI
225 Args : Bio::Factory::ObjectFactoryI
231 if (@_) { $self->{_hsp_factory
} = shift }
232 return $self->{_hsp_factory
} || return;
235 =head2 Bio::Search::Hit::HitI methods
237 Implementation of Bio::Search::Hit::HitI methods
242 Usage : $hit_name = $hit->name();
243 Function: returns the name of the Hit sequence
244 Returns : a scalar string
245 Args : [optional] scalar string to set the name
250 my ($self,$value) = @_;
251 my $previous = $self->{'_name'};
252 if( defined $value || ! defined $previous ) {
253 $value = $previous = '' unless defined $value;
254 $self->{'_name'} = $value;
262 Usage : $acc = $hit->accession();
263 Function: Retrieve the accession (if available) for the hit
264 Returns : a scalar string (empty string if not set)
270 my ($self,$value) = @_;
271 my $previous = $self->{'_accession'};
272 if( defined $value || ! defined $previous ) {
273 $value = $previous = '' unless defined $value;
274 $self->{'_accession'} = $value;
282 Usage : $desc = $hit->description();
283 Function: Retrieve the description for the hit
284 Returns : a scalar string
285 Args : [optional] scalar string to set the description
290 my ($self,$value) = @_;
291 my $previous = $self->{'_description'};
292 if( defined $value || ! defined $previous ) {
293 $value = $previous = '' unless defined $value;
294 $self->{'_description'} = $value;
302 Usage : my $len = $hit->length
303 Function: Returns the length of the hit
305 Args : [optional] integer to set the length
310 my ($self,$value) = @_;
311 my $previous = $self->{'_length'};
312 if( defined $value || ! defined $previous ) {
313 $value = $previous = 0 unless defined $value;
314 $self->{'_length'} = $value;
323 Usage : $alg = $hit->algorithm();
324 Function: Gets the algorithm specification that was used to obtain the hit
325 For BLAST, the algorithm denotes what type of sequence was aligned
326 against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
327 dna-prt, TBLASTN prt-translated dna, TBLASTX translated
329 Returns : a scalar string
330 Args : [optional] scalar string to set the algorithm
335 my ($self,$value) = @_;
336 my $previous = $self->{'_algorithm'};
337 if( defined $value || ! defined $previous ) {
338 $value = $previous = '' unless defined $value;
339 $self->{'_algorithm'} = $value;
347 Usage : $score = $hit->raw_score();
348 Function: Gets the "raw score" generated by the algorithm. What
349 this score is exactly will vary from algorithm to algorithm,
350 returning undef if unavailable.
351 Returns : a scalar value
352 Args : [optional] scalar value to set the raw score
357 my ($self,$value) = @_;
358 my $previous = $self->{'_score'};
359 if( defined $value ) {
360 $self->{'_score'} = $value;
361 } elsif ( ! defined $previous ) {
362 # Set the bits of the Hit to that of the top HSP.
363 unless( defined $self->{'_hsps'}->[0] ) {
364 $self->warn("No HSPs for this minimal Hit (".$self->name.")\n".
365 "If using NCBI BLAST, check bits() instead");
368 # use 'score' if available
369 if ( defined( ($self->hsps)[0]->score ) ) {
370 $previous = $self->{'_score'} = ($self->hsps)[0]->score;
372 # otherwise use 'bits'
373 elsif ( defined( ($self->hsps)[0]->bits ) ) {
374 $previous = $self->{'_score'} = ($self->hsps)[0]->bits;
382 Equivalent to L<raw_score()|raw_score>
386 sub score
{ shift->raw_score(@_); }
391 Usage : $significance = $hit->significance();
392 Function: Used to obtain the E or P value of a hit, i.e. the probability that
393 this particular hit was obtained purely by random chance. If
394 information is not available (nor calculatable from other
395 information sources), return undef.
396 Returns : a scalar value or undef if unavailable
397 Args : [optional] scalar value to set the significance
402 my ($self,$value) = @_;
403 my $previous = $self->{'_significance'};
404 if( defined $value ) {
405 $self->{'_significance'} = $value;
406 } elsif ( ! defined $previous ) {
407 unless( defined $self->{'_hsps'}->[0] ) {
408 $self->warn("No HSPs for this Hit (".$self->name.")");
411 # Set the significance of the Hit to that of the top HSP.
412 $previous = $self->{'_significance'} = ($self->hsps)[0]->significance;
420 Usage : $hit_object->bits();
421 Purpose : Gets the bit score of the best HSP for the current hit.
422 Example : $bits = $hit_object->bits();
423 Returns : Integer or undef if bit score is not set
425 Comments : For BLAST1, the non-bit score is listed in the summary line.
427 See Also : L<score()|score>
432 my ($self,$value) = @_;
433 my $previous = $self->{'_bits'};
434 if( defined $value ) {
435 $self->{'_bits'} = $value;
436 } elsif ( ! defined $previous ) {
437 # Set the bits of the Hit to that of the top HSP.
438 unless( defined $self->{'_hsps'}->[0] ) {
439 $self->warn("No HSPs for this minimal Hit (".$self->name.")\n".
440 "If using WU-BLAST, check raw_score() instead");
443 $previous = $self->{'_bits'} = ($self->hsps)[0]->bits;
451 Usage : while( $hsp = $obj->next_hsp()) { ... }
452 Function : Returns the next available High Scoring Pair
454 Returns : Bio::Search::HSP::HSPI object or null if finished
461 $self->{'_iterator'} = 0 unless defined $self->{'_iterator'};
463 defined($self->{'_hsps'})
464 && $self->{'_iterator'} <= scalar @
{$self->{'_hsps'}};
466 my $iterator = $self->{'_iterator'}++;
467 my $hsp = $self->{'_hsps'}->[$iterator] || return;
468 if (ref($hsp) eq 'HASH') {
469 my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
470 $hsp = $factory->create_object(%{$hsp});
471 $self->{'_hsps'}->[$iterator] = $hsp;
472 delete $self->{_hashes
}->{$iterator};
480 Usage : $hit_object->hsps();
481 Purpose : Get a list containing all HSP objects.
482 : Get the numbers of HSPs for the current hit.
483 Example : @hsps = $hit_object->hsps();
484 : $num = $hit_object->hsps(); # alternatively, use num_hsps()
485 Returns : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
486 : Scalar context: integer (number of HSPs).
487 : (Equivalent to num_hsps()).
488 Argument : n/a. Relies on wantarray
489 Throws : Exception if the HSPs have not been collected.
491 See Also : L<hsp()|hsp>, L<num_hsps()|num_hsps>
497 foreach my $i (keys %{$self->{_hashes
} || {}}) {
498 my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
499 $self->{'_hsps'}->[$i] = $factory->create_object(%{$self->{'_hsps'}->[$i]});
500 delete $self->{_hashes
}->{$i};
503 return wantarray() ? @
{$self->{'_hsps'} || []} : scalar(@
{$self->{'_hsps'} || []});
508 Usage : $hit_object->num_hsps();
509 Purpose : Get the number of HSPs for the present hit.
510 Example : $nhsps = $hit_object->num_hsps();
511 Returns : Integer or '-' if HSPs have not been callected
514 See Also : L<hsps()|hsps>
521 unless ($self->{'_hsps'}) {
525 return scalar(@
{$self->{'_hsps'}});
531 Usage : $hit->rewind;
532 Function: Allow one to reset the HSP iterator to the beginning
533 Since this is an in-memory implementation
541 $self->{'_iterator'} = 0;
546 Usage : $ambig_code = $hit_object->ambiguous_aln();
547 Purpose : Sets/Gets ambiguity code data member.
548 Example : (see usage)
549 Returns : String = 'q', 's', 'qs', '-'
550 : 'q' = query sequence contains overlapping sub-sequences
551 : while sbjct does not.
552 : 's' = sbjct sequence contains overlapping sub-sequences
553 : while query does not.
554 : 'qs' = query and sbjct sequence contains overlapping sub-sequences
555 : relative to each other.
556 : '-' = query and sbjct sequence do not contains multiple domains
557 : relative to each other OR both contain the same distribution
558 : of similar domains.
561 Comment : Note: "sbjct" is synonymous with "hit"
567 if(@_) { $self->{'_ambiguous_aln'} = shift; }
568 $self->{'_ambiguous_aln'} || '-';
573 See documentation in L<Bio::Search::Hit::HitI::overlap()|Bio::Search::Hit::HitI>
579 if(@_) { $self->{'_overlap'} = shift; }
580 defined $self->{'_overlap'} ?
$self->{'_overlap'} : 0;
586 Usage : $hit_object->n();
587 Purpose : Gets the N number for the current hit.
588 : This is the number of HSPs in the set which was ascribed
589 : the lowest P-value (listed on the description line).
590 : This number is not the same as the total number of HSPs.
591 : To get the total number of HSPs, use num_hsps().
592 Example : $n = $hit_object->n();
595 Throws : Exception if HSPs have not been set (BLAST2 reports).
596 Comments : Note that the N parameter is not reported in gapped BLAST2.
597 : Calling n() on such reports will result in a call to num_hsps().
598 : The num_hsps() method will count the actual number of
599 : HSPs in the alignment listing, which may exceed N in
602 See Also : L<num_hsps()|num_hsps>
609 # The check for $self->{'_n'} is a remnant from the 'query' mode days
610 # in which the sbjct object would collect data from the description
614 if(not defined($self->{'_n'})) {
621 $n ||= $self->num_hsps;
628 Usage : $hit_object->p( [format] );
629 Purpose : Get the P-value for the best HSP of the given BLAST hit.
630 : (Note that P-values are not provided with NCBI Blast2 reports).
631 Example : $p = $sbjct->p;
632 : $p = $sbjct->p('exp'); # get exponent only.
633 : ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
634 Returns : Float or scientific notation number (the raw P-value, DEFAULT).
635 : Integer if format == 'exp' (the magnitude of the base 10 exponent).
636 : 2-element list (float, int) if format == 'parts' and P-value
637 : is in scientific notation (See Comments).
638 Argument : format: string of 'raw' | 'exp' | 'parts'
639 : 'raw' returns value given in report. Default. (1.2e-34)
640 : 'exp' returns exponent value only (34)
641 : 'parts' returns the decimal and exponent as a
642 : 2-element list (1.2, -34) (See Comments).
643 Throws : Warns if no P-value is defined. Uses expect instead.
644 Comments : Using the 'parts' argument is not recommended since it will not
645 : work as expected if the P-value is not in scientific notation.
646 : That is, floats are not converted into sci notation before
647 : splitting into parts.
649 See Also : L<expect()|expect>, L<significance()|significance>, L<Bio::Search::SearchUtils::get_exponent()|Bio::Search::SearchUtils>
654 # Some duplication of logic for p(), expect() and signif() for the sake of performance.
655 my ($self, $fmt) = @_;
657 my $val = $self->{'_p'};
661 # P-value not defined, must be a NCBI Blast2 report.
662 # Use expect instead.
663 $self->warn( "P-value not defined. Using significance() instead.");
664 $val = $self->significance();
667 return $val if not $fmt or $fmt =~ /^raw/i;
668 ## Special formats: exponent-only or as list.
669 return &Bio
::Search
::SearchUtils
::get_exponent
($val) if $fmt =~ /^exp/i;
670 return (split (/eE/, $val)) if $fmt =~ /^parts/i;
672 ## Default: return the raw P-value.
678 Usage : $hit_object->hsp( [string] );
679 Purpose : Get a single HSPI object for the present HitI object.
680 Example : $hspObj = $hit_object->hsp; # same as 'best'
681 : $hspObj = $hit_object->hsp('best');
682 : $hspObj = $hit_object->hsp('worst');
683 Returns : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
684 Argument : String (or no argument).
685 : No argument (default) = highest scoring HSP (same as 'best').
686 : 'best' or 'first' = highest scoring HSP.
687 : 'worst' or 'last' = lowest scoring HSP.
688 Throws : Exception if the HSPs have not been collected.
689 : Exception if an unrecognized argument is used.
691 See Also : L<hsps()|hsps>, L<num_hsps>()
696 my( $self, $option ) = @_;
699 if (not ref $self->{'_hsps'}) {
700 $self->throw("Can't get HSPs: data not collected.");
703 my @hsps = $self->hsps;
705 return $hsps[0] if $option =~ /best|first|1/i;
706 return $hsps[$#hsps] if $option =~ /worst|last/i;
708 $self->throw("Can't get HSP for: $option\n" .
709 "Valid arguments: 'best', 'worst'");
712 =head2 logical_length
714 Usage : $hit_object->logical_length( [seq_type] );
715 : (mostly intended for internal use).
716 Purpose : Get the logical length of the hit sequence.
717 : This is necessary since the number of identical/conserved residues
718 : can be in terms of peptide sequence space, yet the query and/or hit
719 : sequence are in nucleotide space.
720 Example : $len = $hit_object->logical_length();
722 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
723 ('sbjct' is synonymous with 'hit')
726 : In the case of BLAST flavors:
727 : For TBLASTN reports, the length of the aligned portion of the
728 : nucleotide hit sequence is divided by 3; for BLASTX reports,
729 : the length of the aligned portion of the nucleotide query
730 : sequence is divided by 3. For TBLASTX reports, the length of
731 : both hit and query sequence are converted.
733 : This is important for functions like frac_aligned_query()
734 : which need to operate in amino acid coordinate space when dealing
735 : with [T]BLAST[NX] type reports.
737 See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>
743 my $seqType = shift || 'query';
744 $seqType = 'sbjct' if $seqType eq 'hit';
746 my ($length, $logical);
747 my $algo = $self->algorithm;
749 # For the sbjct, return logical sbjct length
750 if( $seqType eq 'sbjct' ) {
751 $length = $self->length;
753 # Otherwise, return logical query length
754 $length = $self->query_length();
757 $logical = Bio
::Search
::SearchUtils
::logical_length
($algo, $seqType, $length);
759 return int($logical);
764 Usage : $hit_object->length_aln( [seq_type] );
765 Purpose : Get the total length of the aligned region for query or sbjct seq.
766 : This number will include all HSPs
767 Example : $len = $hit_object->length_aln(); # default = query
768 : $lenAln = $hit_object->length_aln('query');
770 Argument : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
771 ('sbjct' is synonymous with 'hit')
772 Throws : Exception if the argument is not recognized.
773 Comments : This method will report the logical length of the alignment,
774 : meaning that for TBLAST[NX] reports, the length is reported
775 : using amino acid coordinate space (i.e., nucleotides / 3).
777 : This method requires that all HSPs be tiled. If they have not
778 : already been tiled, they will be tiled first automatically..
779 : If you don't want the tiled data, iterate through each HSP
780 : calling length() on each (use hsps() to get all HSPs).
782 See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>, L<gaps()|gaps>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<Bio::Search::HSP::BlastHSP::length()|Bio::Search::HSP::BlastHSP>
787 my( $self, $seqType, $num ) = @_;
789 $seqType ||= 'query';
790 $seqType = 'sbjct' if $seqType eq 'hit';
794 return $self->{'_length_aln_'.$seqType} = $num;
797 unless ($self->{'_hsps'}) {
798 #return wantarray ? ('-','-') : '-';
799 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
803 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
805 my $data = $self->{'_length_aln_'.$seqType};
807 ## If we don't have data, figure out what went wrong.
809 $self->throw("Can't get length aln for sequence type \"$seqType\". " .
810 "Valid types are 'query', 'hit', 'sbjct' ('sbjct' = 'hit')");
817 Usage : $hit_object->gaps( [seq_type] );
818 Purpose : Get the number of gaps in the aligned query, hit, or both sequences.
819 : Data is summed across all HSPs.
820 Example : $qgaps = $hit_object->gaps('query');
821 : $hgaps = $hit_object->gaps('hit');
822 : $tgaps = $hit_object->gaps(); # default = total (query + hit)
823 Returns : scalar context: integer
824 : array context without args: two-element list of integers
825 : (queryGaps, hitGaps)
826 : Array context can be forced by providing an argument of 'list' or 'array'.
828 : CAUTION: Calling this method within printf or sprintf is arrray context.
829 : So this function may not give you what you expect. For example:
830 : printf "Total gaps: %d", $hit->gaps();
831 : Actually returns a two-element array, so what gets printed
832 : is the number of gaps in the query, not the total
834 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list' (default = 'total')
835 ('sbjct' is synonymous with 'hit')
837 Comments : If you need data for each HSP, use hsps() and then interate
838 : through each HSP object.
839 : This method requires that all HSPs be tiled. If they have not
840 : already been tiled, they will be tiled first automatically..
841 : Not relying on wantarray since that will fail in situations
842 : such as printf "%d", $hit->gaps() in which you might expect to
843 : be printing the total gaps, but evaluates to array context.
845 See Also : L<length_aln()|length_aln>
850 my( $self, $seqType, $num ) = @_;
852 $seqType ||= (wantarray ?
'list' : 'total');
853 $seqType = 'sbjct' if $seqType eq 'hit';
855 unless ($self->{'_hsps'}) {
856 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
857 return wantarray ?
('-','-') : '-';
861 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
863 $seqType = lc($seqType);
866 $self->throw("Can't set gaps for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
868 return $self->{'_gaps_'.$seqType} = $num;
870 elsif($seqType =~ /list|array/i) {
871 return ($self->{'_gaps_query'}, $self->{'_gaps_sbjct'});
873 elsif($seqType eq 'total') {
874 return ($self->{'_gaps_query'} + $self->{'_gaps_sbjct'}) || 0;
876 return $self->{'_gaps_'.$seqType} || 0;
883 See documentation in L<Bio::Search::Hit::HitI::matches()|Bio::Search::Hit::HitI>
888 my( $self, $arg1, $arg2) = @_;
891 unless ($self->{'_hsps'}) {
892 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
893 return wantarray ?
('-','-') : '-';
896 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
899 @data = ($self->{'_totalIdentical'}, $self->{'_totalConserved'});
904 if( defined $arg2 ) {
905 $self->{'_totalIdentical'} = $arg1;
906 $self->{'_totalConserved'} = $arg2;
907 return ( $arg1, $arg2 );
909 elsif($arg1 =~ /^id/i) {
910 $data = $self->{'_totalIdentical'};
912 $data = $self->{'_totalConserved'};
914 #print STDERR "\nmatches(): id=$self->{'_totalIdentical'}, cons=$self->{'_totalConserved'}\n\n";
918 ## If we make it to here, it is likely the case that
919 ## the parser constructed a minimal hit object from the summary line only.
920 ## It either delibrately skipped parsing the alignment section,
921 ## or was not able to because it was absent (due to blast executable parameter
922 ## setting such as -b 0 (B=0 for WU-BLAST) )
923 #$self->throw("Can't get identical or conserved data: no data.");
929 Usage : $sbjct->start( [seq_type] );
930 Purpose : Gets the start coordinate for the query, sbjct, or both sequences
931 : in the BlastHit object. If there is more than one HSP, the lowest start
932 : value of all HSPs is returned.
933 Example : $qbeg = $sbjct->start('query');
934 : $sbeg = $sbjct->start('hit');
935 : ($qbeg, $sbeg) = $sbjct->start();
936 Returns : scalar context: integer
937 : array context without args: list of two integers (queryStart, sbjctStart)
938 : Array context can be "induced" by providing an argument of 'list' or 'array'.
939 Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
940 ('sbjct' is synonymous with 'hit')
943 See Also : L<end()|end>, L<range()|range>, L<strand()|strand>,
944 L<Bio::Search::HSP::BlastHSP::start|Bio::Search::HSP::BlastHSP>
949 my ($self, $seqType, $num) = @_;
951 unless ($self->{'_hsps'}) {
952 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
953 return wantarray ?
('-','-') : '-';
956 $seqType ||= (wantarray ?
'list' : 'query');
957 $seqType = 'sbjct' if $seqType eq 'hit';
960 $seqType = "_\L$seqType\E";
961 return $self->{$seqType.'Start'} = $num;
964 # If there is only one HSP, defer this call to the solitary HSP.
965 if($self->num_hsps == 1) {
966 return $self->hsp->start($seqType);
969 # Tiling normally generates $self->{'_queryStart'} and
970 # $self->{'_sbjctStart'}, but is very slow. If we haven't tiled,
971 # find the answer quickly without tiling.
972 unless (defined $self->{'_queryStart'}) {
973 my $earliest_query_start;
974 my $earliest_sbjct_start;
975 foreach my $hsp ($self->hsps) {
976 my $this_query_start = $hsp->start('query');
977 if (! defined $earliest_query_start || $this_query_start < $earliest_query_start) {
978 $earliest_query_start = $this_query_start;
981 my $this_sbjct_start = $hsp->start('sbjct');
982 if (! defined $earliest_sbjct_start || $this_sbjct_start < $earliest_sbjct_start) {
983 $earliest_sbjct_start = $this_sbjct_start;
986 $self->{'_queryStart'} = $earliest_query_start;
987 $self->{'_sbjctStart'} = $earliest_sbjct_start;
991 if ($seqType =~ /list|array/i) {
992 return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
994 ## Sensitive to member name changes.
995 $seqType = "_\L$seqType\E";
996 return $self->{$seqType.'Start'};
1004 Usage : $sbjct->end( [seq_type] );
1005 Purpose : Gets the end coordinate for the query, sbjct, or both sequences
1006 : in the BlastHit object. If there is more than one HSP,
1008 : value of all HSPs is returned.
1009 Example : $qend = $sbjct->end('query');
1010 : $send = $sbjct->end('hit');
1011 : ($qend, $send) = $sbjct->end();
1012 Returns : scalar context: integer
1013 : array context without args: list of two integers
1014 : (queryEnd, sbjctEnd)
1015 : Array context can be "induced" by providing an argument
1016 : of 'list' or 'array'.
1017 Argument : In scalar context: seq_type = 'query' or 'sbjct'
1018 : (case insensitive). If not supplied, 'query' is used.
1021 See Also : L<start()|start>, L<range()|range>, L<strand()|strand>
1026 my ($self, $seqType, $num) = @_;
1028 unless ($self->{'_hsps'}) {
1029 return wantarray ?
('-','-') : '-';
1032 $seqType ||= (wantarray ?
'list' : 'query');
1033 $seqType = 'sbjct' if $seqType eq 'hit';
1035 if( defined $num ) {
1036 $seqType = "_\L$seqType\E";
1037 return $self->{$seqType.'Stop'} = $num;
1040 # If there is only one HSP, defer this call to the solitary HSP.
1041 if($self->num_hsps == 1) {
1042 return $self->hsp->end($seqType);
1045 # Tiling normally generates $self->{'_queryStop'} and
1046 # $self->{'_sbjctStop'}, but is very slow. If we haven't tiled,
1047 # find the answer quickly without tiling.
1048 unless (defined $self->{'_queryStop'}) {
1049 my $latest_query_end;
1050 my $latest_sbjct_end;
1051 foreach my $hsp ($self->hsps) {
1052 my $this_query_end = $hsp->end('query');
1053 if (! defined $latest_query_end || $this_query_end > $latest_query_end) {
1054 $latest_query_end = $this_query_end;
1057 my $this_sbjct_end = $hsp->end('sbjct');
1058 if (! defined $latest_sbjct_end || $this_sbjct_end > $latest_sbjct_end) {
1059 $latest_sbjct_end = $this_sbjct_end;
1062 $self->{'_queryStop'} = $latest_query_end;
1063 $self->{'_sbjctStop'} = $latest_sbjct_end;
1067 if($seqType =~ /list|array/i) {
1068 return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
1070 ## Sensitive to member name changes.
1071 $seqType = "_\L$seqType\E";
1072 return $self->{$seqType.'Stop'};
1079 Usage : $sbjct->range( [seq_type] );
1080 Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
1081 : in the HSP alignment.
1082 Example : ($qbeg, $qend) = $sbjct->range('query');
1083 : ($sbeg, $send) = $sbjct->range('hit');
1084 Returns : Two-element array of integers
1085 Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
1086 ('sbjct' is synonymous with 'hit')
1089 See Also : L<start()|start>, L<end()|end>
1094 my ($self, $seqType) = @_;
1095 $seqType ||= 'query';
1096 $seqType = 'sbjct' if $seqType eq 'hit';
1097 return ($self->start($seqType), $self->end($seqType));
1101 =head2 frac_identical
1103 Usage : $hit_object->frac_identical( [seq_type] );
1104 Purpose : Get the overall fraction of identical positions across all HSPs.
1105 : The number refers to only the aligned regions and does not
1106 : account for unaligned regions in between the HSPs, if any.
1107 Example : $frac_iden = $hit_object->frac_identical('query');
1108 Returns : Float (2-decimal precision, e.g., 0.75).
1109 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1110 : default = 'query' (but see comments below).
1111 : ('sbjct' is synonymous with 'hit')
1114 : To compute the fraction identical, the logical length of the
1115 : aligned portion of the sequence is used, meaning that
1116 : in the case of BLAST flavors, for TBLASTN reports, the length of
1117 : the aligned portion of the
1118 : nucleotide hit sequence is divided by 3; for BLASTX reports,
1119 : the length of the aligned portion of the nucleotide query
1120 : sequence is divided by 3. For TBLASTX reports, the length of
1121 : both hit and query sequence are converted.
1122 : This is necessary since the number of identical residues is
1123 : in terms of peptide sequence space.
1125 : Different versions of Blast report different values for the total
1126 : length of the alignment. This is the number reported in the
1127 : denominators in the stats section:
1128 : "Identical = 34/120 Positives = 67/120".
1129 : NCBI BLAST uses the total length of the alignment (with gaps)
1130 : WU-BLAST uses the length of the query sequence (without gaps).
1132 : Therefore, when called with an argument of 'total',
1133 : this method will report different values depending on the
1134 : version of BLAST used. Total does NOT take into account HSP
1135 : tiling, so it should not be used.
1137 : To get the fraction identical among only the aligned residues,
1138 : ignoring the gaps, call this method without an argument or
1139 : with an argument of 'query' or 'hit'.
1141 : If you need data for each HSP, use hsps() and then iterate
1142 : through the HSP objects.
1143 : This method requires that all HSPs be tiled. If they have not
1144 : already been tiled, they will be tiled first automatically.
1146 See Also : L<frac_conserved()|frac_conserved>, L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1150 sub frac_identical
{
1151 my ($self, $seqType) = @_;
1152 $seqType ||= 'query';
1153 $seqType = 'sbjct' if $seqType eq 'hit';
1155 ## Sensitive to member name format.
1156 $seqType = lc($seqType);
1158 unless ($self->{'_hsps'}) {
1159 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1160 #return wantarray ? ('-','-') : '-';
1164 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1166 my $ident = $self->matches('id');
1167 my $total = $self->length_aln($seqType);
1168 my $ratio = $ident / $total;
1169 my $ratio_rounded = sprintf( "%.3f", $ratio);
1171 # Round down iff normal rounding yields 1 (just like blast)
1172 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1173 return $ratio_rounded;
1177 =head2 frac_conserved
1179 Usage : $hit_object->frac_conserved( [seq_type] );
1180 Purpose : Get the overall fraction of conserved positions across all HSPs.
1181 : The number refers to only the aligned regions and does not
1182 : account for unaligned regions in between the HSPs, if any.
1183 Example : $frac_cons = $hit_object->frac_conserved('hit');
1184 Returns : Float (2-decimal precision, e.g., 0.75).
1185 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1186 : default = 'query' (but see comments below).
1187 : ('sbjct' is synonymous with 'hit')
1190 : To compute the fraction conserved, the logical length of the
1191 : aligned portion of the sequence is used, meaning that
1192 : in the case of BLAST flavors, for TBLASTN reports, the length of
1193 : the aligned portion of the
1194 : nucleotide hit sequence is divided by 3; for BLASTX reports,
1195 : the length of the aligned portion of the nucleotide query
1196 : sequence is divided by 3. For TBLASTX reports, the length of
1197 : both hit and query sequence are converted.
1198 : This is necessary since the number of conserved residues is
1199 : in terms of peptide sequence space.
1201 : Different versions of Blast report different values for the total
1202 : length of the alignment. This is the number reported in the
1203 : denominators in the stats section:
1204 : "Positives = 34/120 Positives = 67/120".
1205 : NCBI BLAST uses the total length of the alignment (with gaps)
1206 : WU-BLAST uses the length of the query sequence (without gaps).
1208 : Therefore, when called with an argument of 'total',
1209 : this method will report different values depending on the
1210 : version of BLAST used. Total does NOT take into account HSP
1211 : tiling, so it should not be used.
1213 : To get the fraction conserved among only the aligned residues,
1214 : ignoring the gaps, call this method without an argument or
1215 : with an argument of 'query' or 'hit'.
1217 : If you need data for each HSP, use hsps() and then interate
1218 : through the HSP objects.
1219 : This method requires that all HSPs be tiled. If they have not
1220 : already been tiled, they will be tiled first automatically.
1222 See Also : L<frac_identical()|frac_identical>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1226 sub frac_conserved
{
1227 my ($self, $seqType) = @_;
1228 $seqType ||= 'query';
1229 $seqType = 'sbjct' if $seqType eq 'hit';
1231 ## Sensitive to member name format.
1232 $seqType = lc($seqType);
1234 unless ($self->{'_hsps'}) {
1235 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1236 #return wantarray ? ('-','-') : '-';
1240 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1242 my $consv = $self->matches('cons');
1243 my $total = $self->length_aln($seqType);
1244 my $ratio = $consv / $total;
1245 my $ratio_rounded = sprintf( "%.3f", $ratio);
1247 # Round down iff normal rounding yields 1 (just like blast)
1248 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1249 return $ratio_rounded;
1255 =head2 frac_aligned_query
1257 Usage : $hit_object->frac_aligned_query();
1258 Purpose : Get the fraction of the query sequence which has been aligned
1259 : across all HSPs (not including intervals between non-overlapping
1261 Example : $frac_alnq = $hit_object->frac_aligned_query();
1262 Returns : Float (2-decimal precision, e.g., 0.75),
1263 : or undef if query length is unknown to avoid division by 0.
1266 Comments : If you need data for each HSP, use hsps() and then interate
1267 : through the HSP objects.
1268 : This method requires that all HSPs be tiled. If they have not
1269 : already been tiled, they will be tiled first automatically.
1271 See Also : L<frac_aligned_hit()|frac_aligned_hit>, L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1275 sub frac_aligned_query
{
1278 unless ($self->{'_hsps'}) {
1279 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1280 #return wantarray ? ('-','-') : '-';
1284 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1286 my $qry_len = $self->logical_length('query');
1287 return undef if $qry_len == 0; # Avoid division by 0 crash
1288 sprintf( "%.2f", $self->length_aln('query') / $qry_len);
1293 =head2 frac_aligned_hit
1295 Usage : $hit_object->frac_aligned_hit();
1296 Purpose : Get the fraction of the hit (sbjct) sequence which has been aligned
1297 : across all HSPs (not including intervals between non-overlapping
1299 Example : $frac_alnq = $hit_object->frac_aligned_hit();
1300 Returns : Float (2-decimal precision, e.g., 0.75),
1301 : or undef if hit length is unknown to avoid division by 0.
1304 Comments : If you need data for each HSP, use hsps() and then interate
1305 : through the HSP objects.
1306 : This method requires that all HSPs be tiled. If they have not
1307 : already been tiled, they will be tiled first automatically.
1309 See Also : L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, , L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1313 sub frac_aligned_hit
{
1316 unless ($self->{'_hsps'}) {
1317 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1318 #return wantarray ? ('-','-') : '-';
1322 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1324 my $sbjct_len = $self->logical_length('sbjct');
1325 return undef if $sbjct_len == 0; # Avoid division by 0 crash
1326 sprintf( "%.2f", $self->length_aln('sbjct') / $sbjct_len);
1330 ## These methods are being maintained for backward compatibility.
1332 =head2 frac_aligned_sbjct
1334 Same as L<frac_aligned_hit()|frac_aligned_hit>
1338 *frac_aligned_sbjct
= \
&frac_aligned_hit
;
1340 =head2 num_unaligned_sbjct
1342 Same as L<num_unaligned_hit()|num_unaligned_hit>
1346 *num_unaligned_sbjct
= \
&num_unaligned_hit
;
1349 =head2 num_unaligned_hit
1351 Usage : $hit_object->num_unaligned_hit();
1352 Purpose : Get the number of the unaligned residues in the hit sequence.
1353 : Sums across all all HSPs.
1354 Example : $num_unaln = $hit_object->num_unaligned_hit();
1358 Comments : See notes regarding logical lengths in the comments for frac_aligned_hit().
1359 : They apply here as well.
1360 : If you need data for each HSP, use hsps() and then interate
1361 : through the HSP objects.
1362 : This method requires that all HSPs be tiled. If they have not
1363 : already been tiled, they will be tiled first automatically..
1365 See Also : L<num_unaligned_query()|num_unaligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<frac_aligned_hit()|frac_aligned_hit>
1369 sub num_unaligned_hit
{
1372 unless ($self->{'_hsps'}) {
1373 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1374 #return wantarray ? ('-','-') : '-';
1378 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1380 my $num = $self->logical_length('sbjct') - $self->length_aln('sbjct');
1381 ($num < 0 ?
0 : $num );
1385 =head2 num_unaligned_query
1387 Usage : $hit_object->num_unaligned_query();
1388 Purpose : Get the number of the unaligned residues in the query sequence.
1389 : Sums across all all HSPs.
1390 Example : $num_unaln = $hit_object->num_unaligned_query();
1394 Comments : See notes regarding logical lengths in the comments for frac_aligned_query().
1395 : They apply here as well.
1396 : If you need data for each HSP, use hsps() and then interate
1397 : through the HSP objects.
1398 : This method requires that all HSPs be tiled. If they have not
1399 : already been tiled, they will be tiled first automatically..
1401 See Also : L<num_unaligned_hit()|num_unaligned_hit>, L<frac_aligned_query()|frac_aligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1405 sub num_unaligned_query
{
1408 unless ($self->{'_hsps'}) {
1409 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1410 #return wantarray ? ('-','-') : '-';
1414 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1416 my $num = $self->logical_length('query') - $self->length_aln('query');
1417 ($num < 0 ?
0 : $num );
1424 Usage : $hit->seq_inds( seq_type, class, collapse );
1425 Purpose : Get a list of residue positions (indices) across all HSPs
1426 : for identical or conserved residues in the query or sbjct sequence.
1427 Example : @s_ind = $hit->seq_inds('query', 'identical');
1428 : @h_ind = $hit->seq_inds('hit', 'conserved');
1429 : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
1430 Returns : Array of integers
1431 : May include ranges if collapse is non-zero.
1432 Argument : [0] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
1433 : ('sbjct' is synonymous with 'hit')
1434 : [1] class = 'identical' or 'conserved' (default = 'identical')
1435 : (can be shortened to 'id' or 'cons')
1436 : (actually, anything not 'id' will evaluate to 'conserved').
1437 : [2] collapse = boolean, if non-zero, consecutive positions are merged
1438 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
1439 : collapses to "1-5 7 9-11". This is useful for
1440 : consolidating long lists. Default = no collapse.
1443 See Also : L<Bio::Search::HSP::BlastHSP::seq_inds()|Bio::Search::HSP::BlastHSP>
1448 my ($self, $seqType, $class, $collapse) = @_;
1450 $seqType ||= 'query';
1451 $class ||= 'identical';
1454 $seqType = 'sbjct' if $seqType eq 'hit';
1457 foreach $hsp ($self->hsps) {
1458 # This will merge data for all HSPs together.
1459 push @inds, $hsp->seq_inds($seqType, $class);
1462 # Need to remove duplicates and sort the merged positions.
1464 my %tmp = map { $_, 1 } @inds;
1465 @inds = sort {$a <=> $b} keys %tmp;
1468 $collapse ?
&Bio
::Search
::SearchUtils
::collapse_nums
(@inds) : @inds;
1474 See documentation in L<Bio::Search::Hit::HitI::strand()|Bio::Search::Hit::HitI>
1479 my ($self, $seqType, $strnd) = @_;
1481 unless ($self->{'_hsps'}) {
1482 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1483 return wantarray ?
('-','-') : '-';
1487 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1489 $seqType ||= (wantarray ?
'list' : 'query');
1490 $seqType = 'sbjct' if $seqType eq 'hit';
1492 $seqType = lc($seqType);
1494 if( defined $strnd ) {
1495 $self->throw("Can't set strand for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
1497 return $self->{'_strand_'.$seqType} = $strnd;
1501 # If there is only one HSP, defer this call to the solitary HSP.
1502 if($self->num_hsps == 1) {
1503 return $self->hsp->strand($seqType);
1505 elsif( defined $self->{'_strand_query'}) {
1506 # Get the data computed during hsp tiling.
1507 $qstr = $self->{'_strand_query'};
1508 $hstr = $self->{'_strand_sbjct'}
1511 # otherwise, iterate through all HSPs collecting strand info.
1512 # This will return the string "-1/1" if there are HSPs on different strands.
1513 # NOTE: This was the pre-10/21/02 procedure which will no longer be used,
1514 # (unless the above elsif{} is commented out).
1516 foreach my $hsp( $self->hsps ) {
1517 my ( $q, $h ) = $hsp->strand();
1521 $qstr = join( '/', sort keys %qstr);
1522 $hstr = join( '/', sort keys %hstr);
1525 if($seqType =~ /list|array/i) {
1526 return ($qstr, $hstr);
1527 } elsif( $seqType eq 'query' ) {
1536 See documentation in L<Bio::Search::Hit::HitI::frame()|Bio::Search::Hit::HitI>
1541 my( $self, $frm ) = @_;
1543 unless ($self->{'_hsps'}) {
1544 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1545 #return wantarray ? ('-','-') : '-';
1549 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1551 if( defined $frm ) {
1552 return $self->{'_frame'} = $frm;
1555 # The check for $self->{'_frame'} is a remnant from the 'query' mode days
1556 # in which the sbjct object would collect data from the description line only.
1559 if(not defined($self->{'_frame'})) {
1560 $frame = $self->hsp->frame('hit');
1562 $frame = $self->{'_frame'};
1570 Usage : $obj->rank($newval)
1571 Function: Get/Set the rank of this Hit in the Query search list
1572 i.e. this is the Nth hit for a specific query
1573 Returns : value of rank
1574 Args : newvalue (optional)
1581 return $self->{'_rank'} = shift if @_;
1582 return $self->{'_rank'} || 1;
1588 Usage : $locus = $hit->locus();
1589 Function: Retrieve the locus (if available) for the hit
1590 Returns : a scalar string (empty string if not set)
1596 my ($self,$value) = @_;
1597 my $previous = $self->{'_locus'};
1598 if( defined $value || ! defined $previous ) {
1599 unless (defined $value) {
1600 if ($self->{'_name'} =~/(gb|emb|dbj|ref)\|(.*)\|(.*)/) {
1601 $value = $previous = $3;
1603 $value = $previous = '';
1606 $self->{'_locus'} = $value;
1611 =head2 each_accession_number
1613 Title : each_accession_number
1614 Usage : @each_accession_number = $hit->each_accession_number();
1615 Function: Get each accession number listed in the description of the hit.
1616 If there are no alternatives, then only the primary accession will
1618 Returns : list of all accession numbers in the description
1623 sub each_accession_number
{
1624 my ($self,$value) = @_;
1625 my $desc = $self->{'_description'};
1626 #put primary accnum on the list
1628 push (@accnums,$self->{'_accession'});
1629 if( defined $desc ) {
1630 while ($desc =~ /(\b\S+\|\S*\|\S*\s?)/g) {
1632 my ($acc, $version);
1633 if ($id =~ /(gb|emb|dbj|sp|pdb|bbs|ref|tp[gde])\|(.*)\|(.*)/) {
1634 ($acc, $version) = split /\./, $2;
1635 } elsif ($id =~ /(pir|prf|pat|gnl)\|(.*)\|(.*)/) {
1636 ($acc, $version) = split /\./, $3;
1637 } elsif( $id =~ /(gim|gi|bbm|bbs|lcl)\|(\d*)/) {
1639 } elsif( $id =~ /(oth)\|(.*)\|(.*)\|(.*)/ ) { # discontinued...
1640 ($acc,$version) = ($2);
1642 #punt, not matching the db's at ftp://ftp.ncbi.nih.gov/blast/db/README
1643 #Database Name Identifier Syntax
1644 #============================ ========================
1645 #GenBank gb|accession|locus
1646 #EMBL Data Library emb|accession|locus
1647 #DDBJ, DNA Database of Japan dbj|accession|locus
1648 #NBRF PIR pir||entry
1649 #Protein Research Foundation prf||name
1650 #SWISS-PROT sp|accession|entry name
1651 #Brookhaven Protein Data Bank pdb|entry|chain
1652 #Patents pat|country|number
1653 #GenInfo Backbone Id bbs|number
1654 #General database identifier gnl|database|identifier
1655 #NCBI Reference Sequence ref|accession|locus
1656 #Local Sequence identifier lcl|identifier
1659 push(@accnums, $acc);
1667 See documentation in L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1673 return $self->{'_tiled_hsps'} = shift if @_;
1674 return $self->{'_tiled_hsps'};
1679 Title : query_length
1680 Usage : $obj->query_length($newval)
1681 Function: Get/Set the query_length
1682 Returns : value of query_length (a scalar)
1683 Args : on set, new value (a scalar or undef, optional)
1689 my ($self,$value) = @_;
1690 my $previous = $self->{'_query_length'};
1691 if( defined $value || ! defined $previous ) {
1692 $value = $previous = 0 unless defined $value;
1693 $self->{'_query_length'} = $value;
1701 Usage : $acc = $hit->ncbi_gi();
1702 Function: Retrieve the NCBI Unique ID (aka the GI #),
1703 if available, for the hit
1704 Returns : a scalar string (empty string if not set)
1706 Note : As of Sept. 2016 NCBI records will no longer have a
1707 GI; this attributue will remain in place for older
1713 my ($self,$value) = @_;
1714 if( defined $value ) {
1715 $self->{'_ncbi_gi'} = $value;
1717 $self->{'_ncbi_gi'} = $self->name =~ m{^gi\|(\d+)} ?
$1 : '';
1719 return $self->{'_ncbi_gi'};
1723 # sort method for HSPs
1728 Usage : $result->sort_hsps(\&sort_function)
1729 Function : Sorts the available HSP objects by a user-supplied function. Defaults to sort
1730 by descending score.
1732 Args : A coderef for the sort function. See the documentation on the Perl sort()
1733 function for guidelines on writing sort functions.
1734 Note : To access the special variables $a and $b used by the Perl sort() function
1735 the user function must access Bio::Search::Hit::HitI namespace.
1737 $hit->sort_hsps( sub{$Bio::Search::Result::HitI::a->length <=>
1738 $Bio::Search::Result::HitI::b->length});
1739 NOT $hit->sort_hsps($a->length <=> $b->length);
1744 my ($self, $coderef) = @_;
1748 $self->throw('sort_hsps requires a sort function passed as a subroutine reference')
1749 unless (ref($coderef) eq 'CODE');
1752 $coderef = \
&_default_sort_hsps
;
1756 my @hsps = $self->hsps();
1757 eval {@sorted_hsps = sort $coderef @hsps };
1760 $self->throw("Unable to sort hsps: $@");
1763 $self->{'_hsps'} = \
@sorted_hsps;
1770 Usage : $hit->iteration( $iteration_num );
1771 Purpose : Gets the iteration number in which the Hit was found.
1772 Example : $iteration_num = $sbjct->iteration();
1773 Returns : Integer greater than or equal to 1
1774 Non-PSI-BLAST reports may report iteration as 1, but this number
1775 is only meaningful for PSI-BLAST reports.
1776 Argument : iteration_num (optional, used when setting only)
1779 See Also : L<found_again()|found_again>
1784 my ($self,$value) = @_;
1785 if( defined $value) {
1786 $self->{'_psiblast_iteration'} = $value;
1788 return $self->{'_psiblast_iteration'};
1794 Usage : $hit->found_again;
1795 $hit->found_again(1);
1796 Purpose : Gets a boolean indicator whether or not the hit has
1797 been found in a previous iteration.
1798 This is only applicable to PSI-BLAST reports.
1800 This method indicates if the hit was reported in the
1801 "Sequences used in model and found again" section of the
1802 PSI-BLAST report or if it was reported in the
1803 "Sequences not found previously or not previously below threshold"
1804 section of the PSI-BLAST report. Only for hits in iteration > 1.
1806 Example : if( $hit->found_again()) { ... };
1807 Returns : Boolean, true (1) if the hit has been found in a
1808 previous PSI-BLAST iteration.
1809 Returns false (0 or undef) for hits that have not occurred in a
1810 previous PSI-BLAST iteration.
1811 Argument : Boolean (1 or 0). Only used for setting.
1814 See Also : L<iteration()|iteration>
1820 return $self->{'_found_again'} = shift if @_;
1821 return $self->{'_found_again'};