maint: remove Travis stuff which has been replaced with Github actions (#325)
[bioperl-live.git] / lib / Bio / Search / Hit / GenericHit.pm
blobe1d8b04d5f5fce20de2eed5e72c34207ae1cf1a0
2 # BioPerl module for Bio::Search::Hit::GenericHit
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Search::Hit::GenericHit - A generic implementation of the Bio::Search::Hit::HitI interface
18 =head1 SYNOPSIS
20 use Bio::Search::Hit::GenericHit;
21 my $hit = Bio::Search::Hit::GenericHit->new(-algorithm => 'blastp');
23 # typically one gets HitI objects from a SearchIO stream via a ResultI
24 use Bio::SearchIO;
25 my $parser = Bio::SearchIO->new(-format => 'blast', -file => 'result.bls');
27 my $result = $parser->next_result;
28 my $hit = $result->next_hit;
30 # TODO: Describe how to configure a SearchIO stream so that it generates
31 # GenericHit objects.
33 =head1 DESCRIPTION
35 This object handles the hit data from a Database Sequence Search such
36 as FASTA or BLAST.
38 Unless you're writing a parser, you won't ever need to create a
39 GenericHit or any other HitI-implementing object. If you use
40 the SearchIO system, HitI objects are created automatically from
41 a SearchIO stream which returns Bio::Search::Hit::HitI objects.
43 For documentation on what you can do with GenericHit (and other HitI
44 objects), please see the API documentation in
45 L<Bio::Search::Hit::HitI|Bio::Search::Hit::HitI>.
47 =head1 FEEDBACK
49 =head2 Mailing Lists
51 User feedback is an integral part of the evolution of this and other
52 Bioperl modules. Send your comments and suggestions preferably to
53 the Bioperl mailing list. Your participation is much appreciated.
55 bioperl-l@bioperl.org - General discussion
56 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
58 =head2 Support
60 Please direct usage questions or support issues to the mailing list:
62 I<bioperl-l@bioperl.org>
64 rather than to the module maintainer directly. Many experienced and
65 reponsive experts will be able look at the problem and quickly
66 address it. Please include a thorough description of the problem
67 with code and data examples if at all possible.
69 =head2 Reporting Bugs
71 Report bugs to the Bioperl bug tracking system to help us keep track
72 of the bugs and their resolution. Bug reports can be submitted via the
73 web:
75 https://github.com/bioperl/bioperl-live/issues
77 =head1 AUTHOR - Jason Stajich and Steve Chervitz
79 Email jason-at-bioperl-dot-org
80 Email sac-at-bioperl-dot-org
82 =head1 CONTRIBUTORS
84 Sendu Bala, bix@sendu.me.uk
86 =head1 APPENDIX
88 The rest of the documentation details each of the object methods.
89 Internal methods are usually preceded with a _
91 =cut
94 # Let the code begin...
97 package Bio::Search::Hit::GenericHit;
99 use strict;
101 use Bio::Search::SearchUtils;
103 use base qw(Bio::Root::Root Bio::Search::Hit::HitI);
105 =head2 new
107 Title : new
108 Usage : my $obj = Bio::Search::Hit::GenericHit->new();
109 Function: Builds a new Bio::Search::Hit::GenericHit object
110 Returns : Bio::Search::Hit::GenericHit
111 Args : -name => Name of Hit (required)
112 -description => Description (optional)
113 -accession => Accession number (optional)
114 -ncbi_gi => NCBI GI UID (optional)
115 -length => Length of the Hit (optional)
116 -score => Raw Score for the Hit (optional)
117 -bits => Bit Score for the Hit (optional)
118 -significance => Significance value for the Hit (optional)
119 -algorithm => Algorithm used (BLASTP, FASTX, etc...)
120 -hsps => Array ref of HSPs for this Hit.
121 -found_again => boolean, true if hit appears in a
122 "previously found" section of a PSI-Blast report.
123 -hsp_factory => Bio::Factory::ObjectFactoryI able to create HSPI
124 objects.
126 =cut
128 sub new {
129 my($class,@args) = @_;
131 my $self = $class->SUPER::new(@args);
132 my ($hsps, $name,$query_len,$desc, $acc, $locus, $length,
133 $score,$algo,$signif,$bits, $p,
134 $rank, $hsp_factory, $gi, $iter, $found) = $self->_rearrange([qw(HSPS
135 NAME
136 QUERY_LEN
137 DESCRIPTION
138 ACCESSION
139 LOCUS
140 LENGTH SCORE ALGORITHM
141 SIGNIFICANCE BITS P
142 RANK
143 HSP_FACTORY
144 NCBI_GI
145 ITERATION
146 FOUND_AGAIN)], @args);
148 defined $query_len && $self->query_length($query_len);
150 if( ! defined $name ) {
151 $self->throw("Must have defined a valid name for Hit");
152 } else {
153 $self->name($name);
156 defined $acc && $self->accession($acc);
157 defined $locus && $self->locus($locus);
158 defined $desc && $self->description($desc);
159 defined $length && $self->length($length);
160 defined $algo && $self->algorithm($algo);
161 defined $signif && $self->significance($signif);
162 defined $score && $self->raw_score($score);
163 defined $bits && $self->bits($bits);
164 defined $rank && $self->rank($rank);
165 defined $hsp_factory && $self->hsp_factory($hsp_factory);
166 defined $gi && $self->ncbi_gi($gi);
167 defined $iter && $self->iteration($iter);
168 defined $found && $self->found_again($found);
169 # p() has a weird interface, so this is a hack workaround
170 if (defined $p) {
171 $self->{_p} = $p;
174 $self->{'_iterator'} = 0;
175 if( defined $hsps ) {
176 if( ref($hsps) !~ /array/i ) {
177 $self->warn("Did not specify a valid array ref for the param HSPS ($hsps)");
178 } else {
179 my $hspcount=0;
180 while( @{$hsps} ) {
181 $hspcount++;
182 $self->add_hsp(shift @{$hsps} );
184 $self->{'_hsps'} = undef if $hspcount == 0;
187 else {
188 $self->{'_hsps'} = undef;
191 return $self;
194 =head2 add_hsp
196 Title : add_hsp
197 Usage : $hit->add_hsp($hsp)
198 Function: Add a HSP to the collection of HSPs for a Hit
199 Returns : number of HSPs in the Hit
200 Args : Bio::Search::HSP::HSPI object, OR hash ref containing data suitable
201 for creating a HSPI object (&hsp_factory must be set to get it back)
203 =cut
205 sub add_hsp {
206 my ($self,$hsp) = @_;
207 if (!defined $hsp || (ref($hsp) ne 'HASH' && !$hsp->isa('Bio::Search::HSP::HSPI'))) {
208 $self->throw("Must provide a valid Bio::Search::HSP::HSPI object or hash ref to object: $self method: add_hsp value: $hsp");
209 return;
212 push @{$self->{'_hsps'}}, $hsp;
213 if (ref($hsp) eq 'HASH') {
214 $self->{_hashes}->{$#{$self->{'_hsps'}}} = 1;
216 return scalar @{$self->{'_hsps'}};
219 =head2 hsp_factory
221 Title : hsp_factory
222 Usage : $hit->hsp_factory($hsp_factory)
223 Function: Get/set the factory used to build HSPI objects if necessary.
224 Returns : Bio::Factory::ObjectFactoryI
225 Args : Bio::Factory::ObjectFactoryI
227 =cut
229 sub hsp_factory {
230 my $self = shift;
231 if (@_) { $self->{_hsp_factory} = shift }
232 return $self->{_hsp_factory} || return;
235 =head2 Bio::Search::Hit::HitI methods
237 Implementation of Bio::Search::Hit::HitI methods
239 =head2 name
241 Title : name
242 Usage : $hit_name = $hit->name();
243 Function: returns the name of the Hit sequence
244 Returns : a scalar string
245 Args : [optional] scalar string to set the name
247 =cut
249 sub name {
250 my ($self,$value) = @_;
251 my $previous = $self->{'_name'};
252 if( defined $value || ! defined $previous ) {
253 $value = $previous = '' unless defined $value;
254 $self->{'_name'} = $value;
256 return $previous;
259 =head2 accession
261 Title : accession
262 Usage : $acc = $hit->accession();
263 Function: Retrieve the accession (if available) for the hit
264 Returns : a scalar string (empty string if not set)
265 Args : none
267 =cut
269 sub accession {
270 my ($self,$value) = @_;
271 my $previous = $self->{'_accession'};
272 if( defined $value || ! defined $previous ) {
273 $value = $previous = '' unless defined $value;
274 $self->{'_accession'} = $value;
276 return $previous;
279 =head2 description
281 Title : description
282 Usage : $desc = $hit->description();
283 Function: Retrieve the description for the hit
284 Returns : a scalar string
285 Args : [optional] scalar string to set the description
287 =cut
289 sub description {
290 my ($self,$value) = @_;
291 my $previous = $self->{'_description'};
292 if( defined $value || ! defined $previous ) {
293 $value = $previous = '' unless defined $value;
294 $self->{'_description'} = $value;
296 return $previous;
299 =head2 length
301 Title : length
302 Usage : my $len = $hit->length
303 Function: Returns the length of the hit
304 Returns : integer
305 Args : [optional] integer to set the length
307 =cut
309 sub length {
310 my ($self,$value) = @_;
311 my $previous = $self->{'_length'};
312 if( defined $value || ! defined $previous ) {
313 $value = $previous = 0 unless defined $value;
314 $self->{'_length'} = $value;
316 return $previous;
320 =head2 algorithm
322 Title : algorithm
323 Usage : $alg = $hit->algorithm();
324 Function: Gets the algorithm specification that was used to obtain the hit
325 For BLAST, the algorithm denotes what type of sequence was aligned
326 against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
327 dna-prt, TBLASTN prt-translated dna, TBLASTX translated
328 dna-translated dna).
329 Returns : a scalar string
330 Args : [optional] scalar string to set the algorithm
332 =cut
334 sub algorithm {
335 my ($self,$value) = @_;
336 my $previous = $self->{'_algorithm'};
337 if( defined $value || ! defined $previous ) {
338 $value = $previous = '' unless defined $value;
339 $self->{'_algorithm'} = $value;
341 return $previous;
344 =head2 raw_score
346 Title : raw_score
347 Usage : $score = $hit->raw_score();
348 Function: Gets the "raw score" generated by the algorithm. What
349 this score is exactly will vary from algorithm to algorithm,
350 returning undef if unavailable.
351 Returns : a scalar value
352 Args : [optional] scalar value to set the raw score
354 =cut
356 sub raw_score {
357 my ($self,$value) = @_;
358 my $previous = $self->{'_score'};
359 if( defined $value ) {
360 $self->{'_score'} = $value;
361 } elsif ( ! defined $previous ) {
362 # Set the bits of the Hit to that of the top HSP.
363 unless( defined $self->{'_hsps'}->[0] ) {
364 $self->warn("No HSPs for this minimal Hit (".$self->name.")\n".
365 "If using NCBI BLAST, check bits() instead");
366 return;
368 # use 'score' if available
369 if ( defined( ($self->hsps)[0]->score ) ) {
370 $previous = $self->{'_score'} = ($self->hsps)[0]->score;
372 # otherwise use 'bits'
373 elsif ( defined( ($self->hsps)[0]->bits ) ) {
374 $previous = $self->{'_score'} = ($self->hsps)[0]->bits;
377 return $previous;
380 =head2 score
382 Equivalent to L<raw_score()|raw_score>
384 =cut
386 sub score { shift->raw_score(@_); }
388 =head2 significance
390 Title : significance
391 Usage : $significance = $hit->significance();
392 Function: Used to obtain the E or P value of a hit, i.e. the probability that
393 this particular hit was obtained purely by random chance. If
394 information is not available (nor calculatable from other
395 information sources), return undef.
396 Returns : a scalar value or undef if unavailable
397 Args : [optional] scalar value to set the significance
399 =cut
401 sub significance {
402 my ($self,$value) = @_;
403 my $previous = $self->{'_significance'};
404 if( defined $value ) {
405 $self->{'_significance'} = $value;
406 } elsif ( ! defined $previous ) {
407 unless( defined $self->{'_hsps'}->[0] ) {
408 $self->warn("No HSPs for this Hit (".$self->name.")");
409 return;
411 # Set the significance of the Hit to that of the top HSP.
412 $previous = $self->{'_significance'} = ($self->hsps)[0]->significance;
415 return $previous;
418 =head2 bits
420 Usage : $hit_object->bits();
421 Purpose : Gets the bit score of the best HSP for the current hit.
422 Example : $bits = $hit_object->bits();
423 Returns : Integer or undef if bit score is not set
424 Argument : n/a
425 Comments : For BLAST1, the non-bit score is listed in the summary line.
427 See Also : L<score()|score>
429 =cut
431 sub bits {
432 my ($self,$value) = @_;
433 my $previous = $self->{'_bits'};
434 if( defined $value ) {
435 $self->{'_bits'} = $value;
436 } elsif ( ! defined $previous ) {
437 # Set the bits of the Hit to that of the top HSP.
438 unless( defined $self->{'_hsps'}->[0] ) {
439 $self->warn("No HSPs for this minimal Hit (".$self->name.")\n".
440 "If using WU-BLAST, check raw_score() instead");
441 return;
443 $previous = $self->{'_bits'} = ($self->hsps)[0]->bits;
445 return $previous;
448 =head2 next_hsp
450 Title : next_hsp
451 Usage : while( $hsp = $obj->next_hsp()) { ... }
452 Function : Returns the next available High Scoring Pair
453 Example :
454 Returns : Bio::Search::HSP::HSPI object or null if finished
455 Args : none
457 =cut
459 sub next_hsp {
460 my $self = shift;
461 $self->{'_iterator'} = 0 unless defined $self->{'_iterator'};
462 return unless
463 defined($self->{'_hsps'})
464 && $self->{'_iterator'} <= scalar @{$self->{'_hsps'}};
466 my $iterator = $self->{'_iterator'}++;
467 my $hsp = $self->{'_hsps'}->[$iterator] || return;
468 if (ref($hsp) eq 'HASH') {
469 my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
470 $hsp = $factory->create_object(%{$hsp});
471 $self->{'_hsps'}->[$iterator] = $hsp;
472 delete $self->{_hashes}->{$iterator};
474 return $hsp;
478 =head2 hsps
480 Usage : $hit_object->hsps();
481 Purpose : Get a list containing all HSP objects.
482 : Get the numbers of HSPs for the current hit.
483 Example : @hsps = $hit_object->hsps();
484 : $num = $hit_object->hsps(); # alternatively, use num_hsps()
485 Returns : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
486 : Scalar context: integer (number of HSPs).
487 : (Equivalent to num_hsps()).
488 Argument : n/a. Relies on wantarray
489 Throws : Exception if the HSPs have not been collected.
491 See Also : L<hsp()|hsp>, L<num_hsps()|num_hsps>
493 =cut
495 sub hsps {
496 my $self = shift;
497 foreach my $i (keys %{$self->{_hashes} || {}}) {
498 my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
499 $self->{'_hsps'}->[$i] = $factory->create_object(%{$self->{'_hsps'}->[$i]});
500 delete $self->{_hashes}->{$i};
503 return wantarray() ? @{$self->{'_hsps'} || []} : scalar(@{$self->{'_hsps'} || []});
506 =head2 num_hsps
508 Usage : $hit_object->num_hsps();
509 Purpose : Get the number of HSPs for the present hit.
510 Example : $nhsps = $hit_object->num_hsps();
511 Returns : Integer or '-' if HSPs have not been callected
512 Argument : n/a
514 See Also : L<hsps()|hsps>
516 =cut
518 sub num_hsps {
519 my $self = shift;
521 unless ($self->{'_hsps'}) {
522 return '-';
525 return scalar(@{$self->{'_hsps'}});
528 =head2 rewind
530 Title : rewind
531 Usage : $hit->rewind;
532 Function: Allow one to reset the HSP iterator to the beginning
533 Since this is an in-memory implementation
534 Returns : none
535 Args : none
537 =cut
539 sub rewind{
540 my ($self) = @_;
541 $self->{'_iterator'} = 0;
544 =head2 ambiguous_aln
546 Usage : $ambig_code = $hit_object->ambiguous_aln();
547 Purpose : Sets/Gets ambiguity code data member.
548 Example : (see usage)
549 Returns : String = 'q', 's', 'qs', '-'
550 : 'q' = query sequence contains overlapping sub-sequences
551 : while sbjct does not.
552 : 's' = sbjct sequence contains overlapping sub-sequences
553 : while query does not.
554 : 'qs' = query and sbjct sequence contains overlapping sub-sequences
555 : relative to each other.
556 : '-' = query and sbjct sequence do not contains multiple domains
557 : relative to each other OR both contain the same distribution
558 : of similar domains.
559 Argument : n/a
560 Throws : n/a
561 Comment : Note: "sbjct" is synonymous with "hit"
563 =cut
565 sub ambiguous_aln {
566 my $self = shift;
567 if(@_) { $self->{'_ambiguous_aln'} = shift; }
568 $self->{'_ambiguous_aln'} || '-';
571 =head2 overlap
573 See documentation in L<Bio::Search::Hit::HitI::overlap()|Bio::Search::Hit::HitI>
575 =cut
577 sub overlap {
578 my $self = shift;
579 if(@_) { $self->{'_overlap'} = shift; }
580 defined $self->{'_overlap'} ? $self->{'_overlap'} : 0;
584 =head2 n
586 Usage : $hit_object->n();
587 Purpose : Gets the N number for the current hit.
588 : This is the number of HSPs in the set which was ascribed
589 : the lowest P-value (listed on the description line).
590 : This number is not the same as the total number of HSPs.
591 : To get the total number of HSPs, use num_hsps().
592 Example : $n = $hit_object->n();
593 Returns : Integer
594 Argument : n/a
595 Throws : Exception if HSPs have not been set (BLAST2 reports).
596 Comments : Note that the N parameter is not reported in gapped BLAST2.
597 : Calling n() on such reports will result in a call to num_hsps().
598 : The num_hsps() method will count the actual number of
599 : HSPs in the alignment listing, which may exceed N in
600 : some cases.
602 See Also : L<num_hsps()|num_hsps>
604 =cut
606 sub n {
607 my $self = shift;
609 # The check for $self->{'_n'} is a remnant from the 'query' mode days
610 # in which the sbjct object would collect data from the description
611 # line only.
613 my ($n);
614 if(not defined($self->{'_n'})) {
615 if( $self->hsp ) {
616 $n = $self->hsp->n;
618 } else {
619 $n = $self->{'_n'};
621 $n ||= $self->num_hsps;
623 return $n;
626 =head2 p
628 Usage : $hit_object->p( [format] );
629 Purpose : Get the P-value for the best HSP of the given BLAST hit.
630 : (Note that P-values are not provided with NCBI Blast2 reports).
631 Example : $p = $sbjct->p;
632 : $p = $sbjct->p('exp'); # get exponent only.
633 : ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
634 Returns : Float or scientific notation number (the raw P-value, DEFAULT).
635 : Integer if format == 'exp' (the magnitude of the base 10 exponent).
636 : 2-element list (float, int) if format == 'parts' and P-value
637 : is in scientific notation (See Comments).
638 Argument : format: string of 'raw' | 'exp' | 'parts'
639 : 'raw' returns value given in report. Default. (1.2e-34)
640 : 'exp' returns exponent value only (34)
641 : 'parts' returns the decimal and exponent as a
642 : 2-element list (1.2, -34) (See Comments).
643 Throws : Warns if no P-value is defined. Uses expect instead.
644 Comments : Using the 'parts' argument is not recommended since it will not
645 : work as expected if the P-value is not in scientific notation.
646 : That is, floats are not converted into sci notation before
647 : splitting into parts.
649 See Also : L<expect()|expect>, L<significance()|significance>, L<Bio::Search::SearchUtils::get_exponent()|Bio::Search::SearchUtils>
651 =cut
653 sub p {
654 # Some duplication of logic for p(), expect() and signif() for the sake of performance.
655 my ($self, $fmt) = @_;
657 my $val = $self->{'_p'};
659 # $val can be zero.
660 if(!defined $val) {
661 # P-value not defined, must be a NCBI Blast2 report.
662 # Use expect instead.
663 $self->warn( "P-value not defined. Using significance() instead.");
664 $val = $self->significance();
667 return $val if not $fmt or $fmt =~ /^raw/i;
668 ## Special formats: exponent-only or as list.
669 return &Bio::Search::SearchUtils::get_exponent($val) if $fmt =~ /^exp/i;
670 return (split (/eE/, $val)) if $fmt =~ /^parts/i;
672 ## Default: return the raw P-value.
673 return $val;
676 =head2 hsp
678 Usage : $hit_object->hsp( [string] );
679 Purpose : Get a single HSPI object for the present HitI object.
680 Example : $hspObj = $hit_object->hsp; # same as 'best'
681 : $hspObj = $hit_object->hsp('best');
682 : $hspObj = $hit_object->hsp('worst');
683 Returns : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
684 Argument : String (or no argument).
685 : No argument (default) = highest scoring HSP (same as 'best').
686 : 'best' or 'first' = highest scoring HSP.
687 : 'worst' or 'last' = lowest scoring HSP.
688 Throws : Exception if the HSPs have not been collected.
689 : Exception if an unrecognized argument is used.
691 See Also : L<hsps()|hsps>, L<num_hsps>()
693 =cut
695 sub hsp {
696 my( $self, $option ) = @_;
697 $option ||= 'best';
699 if (not ref $self->{'_hsps'}) {
700 $self->throw("Can't get HSPs: data not collected.");
703 my @hsps = $self->hsps;
705 return $hsps[0] if $option =~ /best|first|1/i;
706 return $hsps[$#hsps] if $option =~ /worst|last/i;
708 $self->throw("Can't get HSP for: $option\n" .
709 "Valid arguments: 'best', 'worst'");
712 =head2 logical_length
714 Usage : $hit_object->logical_length( [seq_type] );
715 : (mostly intended for internal use).
716 Purpose : Get the logical length of the hit sequence.
717 : This is necessary since the number of identical/conserved residues
718 : can be in terms of peptide sequence space, yet the query and/or hit
719 : sequence are in nucleotide space.
720 Example : $len = $hit_object->logical_length();
721 Returns : Integer
722 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
723 ('sbjct' is synonymous with 'hit')
724 Throws : n/a
725 Comments :
726 : In the case of BLAST flavors:
727 : For TBLASTN reports, the length of the aligned portion of the
728 : nucleotide hit sequence is divided by 3; for BLASTX reports,
729 : the length of the aligned portion of the nucleotide query
730 : sequence is divided by 3. For TBLASTX reports, the length of
731 : both hit and query sequence are converted.
733 : This is important for functions like frac_aligned_query()
734 : which need to operate in amino acid coordinate space when dealing
735 : with [T]BLAST[NX] type reports.
737 See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>
739 =cut
741 sub logical_length {
742 my $self = shift;
743 my $seqType = shift || 'query';
744 $seqType = 'sbjct' if $seqType eq 'hit';
746 my ($length, $logical);
747 my $algo = $self->algorithm;
749 # For the sbjct, return logical sbjct length
750 if( $seqType eq 'sbjct' ) {
751 $length = $self->length;
752 } else {
753 # Otherwise, return logical query length
754 $length = $self->query_length();
757 $logical = Bio::Search::SearchUtils::logical_length($algo, $seqType, $length);
759 return int($logical);
762 =head2 length_aln
764 Usage : $hit_object->length_aln( [seq_type] );
765 Purpose : Get the total length of the aligned region for query or sbjct seq.
766 : This number will include all HSPs
767 Example : $len = $hit_object->length_aln(); # default = query
768 : $lenAln = $hit_object->length_aln('query');
769 Returns : Integer
770 Argument : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
771 ('sbjct' is synonymous with 'hit')
772 Throws : Exception if the argument is not recognized.
773 Comments : This method will report the logical length of the alignment,
774 : meaning that for TBLAST[NX] reports, the length is reported
775 : using amino acid coordinate space (i.e., nucleotides / 3).
777 : This method requires that all HSPs be tiled. If they have not
778 : already been tiled, they will be tiled first automatically..
779 : If you don't want the tiled data, iterate through each HSP
780 : calling length() on each (use hsps() to get all HSPs).
782 See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>, L<gaps()|gaps>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<Bio::Search::HSP::BlastHSP::length()|Bio::Search::HSP::BlastHSP>
784 =cut
786 sub length_aln {
787 my( $self, $seqType, $num ) = @_;
789 $seqType ||= 'query';
790 $seqType = 'sbjct' if $seqType eq 'hit';
792 # Setter:
793 if( defined $num) {
794 return $self->{'_length_aln_'.$seqType} = $num;
797 unless ($self->{'_hsps'}) {
798 #return wantarray ? ('-','-') : '-';
799 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
800 return '-';
803 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
805 my $data = $self->{'_length_aln_'.$seqType};
807 ## If we don't have data, figure out what went wrong.
808 if(!$data) {
809 $self->throw("Can't get length aln for sequence type \"$seqType\". " .
810 "Valid types are 'query', 'hit', 'sbjct' ('sbjct' = 'hit')");
812 return $data;
815 =head2 gaps
817 Usage : $hit_object->gaps( [seq_type] );
818 Purpose : Get the number of gaps in the aligned query, hit, or both sequences.
819 : Data is summed across all HSPs.
820 Example : $qgaps = $hit_object->gaps('query');
821 : $hgaps = $hit_object->gaps('hit');
822 : $tgaps = $hit_object->gaps(); # default = total (query + hit)
823 Returns : scalar context: integer
824 : array context without args: two-element list of integers
825 : (queryGaps, hitGaps)
826 : Array context can be forced by providing an argument of 'list' or 'array'.
828 : CAUTION: Calling this method within printf or sprintf is arrray context.
829 : So this function may not give you what you expect. For example:
830 : printf "Total gaps: %d", $hit->gaps();
831 : Actually returns a two-element array, so what gets printed
832 : is the number of gaps in the query, not the total
834 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list' (default = 'total')
835 ('sbjct' is synonymous with 'hit')
836 Throws : n/a
837 Comments : If you need data for each HSP, use hsps() and then interate
838 : through each HSP object.
839 : This method requires that all HSPs be tiled. If they have not
840 : already been tiled, they will be tiled first automatically..
841 : Not relying on wantarray since that will fail in situations
842 : such as printf "%d", $hit->gaps() in which you might expect to
843 : be printing the total gaps, but evaluates to array context.
845 See Also : L<length_aln()|length_aln>
847 =cut
849 sub gaps {
850 my( $self, $seqType, $num ) = @_;
852 $seqType ||= (wantarray ? 'list' : 'total');
853 $seqType = 'sbjct' if $seqType eq 'hit';
855 unless ($self->{'_hsps'}) {
856 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
857 return wantarray ? ('-','-') : '-';
858 #return '-';
861 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
863 $seqType = lc($seqType);
865 if( defined $num ) {
866 $self->throw("Can't set gaps for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
868 return $self->{'_gaps_'.$seqType} = $num;
870 elsif($seqType =~ /list|array/i) {
871 return ($self->{'_gaps_query'}, $self->{'_gaps_sbjct'});
873 elsif($seqType eq 'total') {
874 return ($self->{'_gaps_query'} + $self->{'_gaps_sbjct'}) || 0;
875 } else {
876 return $self->{'_gaps_'.$seqType} || 0;
881 =head2 matches
883 See documentation in L<Bio::Search::Hit::HitI::matches()|Bio::Search::Hit::HitI>
885 =cut
887 sub matches {
888 my( $self, $arg1, $arg2) = @_;
889 my(@data,$data);
891 unless ($self->{'_hsps'}) {
892 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
893 return wantarray ? ('-','-') : '-';
896 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
898 unless( $arg1 ) {
899 @data = ($self->{'_totalIdentical'}, $self->{'_totalConserved'});
901 return @data;
902 } else {
904 if( defined $arg2 ) {
905 $self->{'_totalIdentical'} = $arg1;
906 $self->{'_totalConserved'} = $arg2;
907 return ( $arg1, $arg2 );
909 elsif($arg1 =~ /^id/i) {
910 $data = $self->{'_totalIdentical'};
911 } else {
912 $data = $self->{'_totalConserved'};
914 #print STDERR "\nmatches(): id=$self->{'_totalIdentical'}, cons=$self->{'_totalConserved'}\n\n";
915 return $data;
918 ## If we make it to here, it is likely the case that
919 ## the parser constructed a minimal hit object from the summary line only.
920 ## It either delibrately skipped parsing the alignment section,
921 ## or was not able to because it was absent (due to blast executable parameter
922 ## setting such as -b 0 (B=0 for WU-BLAST) )
923 #$self->throw("Can't get identical or conserved data: no data.");
927 =head2 start
929 Usage : $sbjct->start( [seq_type] );
930 Purpose : Gets the start coordinate for the query, sbjct, or both sequences
931 : in the BlastHit object. If there is more than one HSP, the lowest start
932 : value of all HSPs is returned.
933 Example : $qbeg = $sbjct->start('query');
934 : $sbeg = $sbjct->start('hit');
935 : ($qbeg, $sbeg) = $sbjct->start();
936 Returns : scalar context: integer
937 : array context without args: list of two integers (queryStart, sbjctStart)
938 : Array context can be "induced" by providing an argument of 'list' or 'array'.
939 Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
940 ('sbjct' is synonymous with 'hit')
941 Throws : n/a
943 See Also : L<end()|end>, L<range()|range>, L<strand()|strand>,
944 L<Bio::Search::HSP::BlastHSP::start|Bio::Search::HSP::BlastHSP>
946 =cut
948 sub start {
949 my ($self, $seqType, $num) = @_;
951 unless ($self->{'_hsps'}) {
952 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
953 return wantarray ? ('-','-') : '-';
956 $seqType ||= (wantarray ? 'list' : 'query');
957 $seqType = 'sbjct' if $seqType eq 'hit';
959 if( defined $num ) {
960 $seqType = "_\L$seqType\E";
961 return $self->{$seqType.'Start'} = $num;
964 # If there is only one HSP, defer this call to the solitary HSP.
965 if($self->num_hsps == 1) {
966 return $self->hsp->start($seqType);
968 else {
969 # Tiling normally generates $self->{'_queryStart'} and
970 # $self->{'_sbjctStart'}, but is very slow. If we haven't tiled,
971 # find the answer quickly without tiling.
972 unless (defined $self->{'_queryStart'}) {
973 my $earliest_query_start;
974 my $earliest_sbjct_start;
975 foreach my $hsp ($self->hsps) {
976 my $this_query_start = $hsp->start('query');
977 if (! defined $earliest_query_start || $this_query_start < $earliest_query_start) {
978 $earliest_query_start = $this_query_start;
981 my $this_sbjct_start = $hsp->start('sbjct');
982 if (! defined $earliest_sbjct_start || $this_sbjct_start < $earliest_sbjct_start) {
983 $earliest_sbjct_start = $this_sbjct_start;
986 $self->{'_queryStart'} = $earliest_query_start;
987 $self->{'_sbjctStart'} = $earliest_sbjct_start;
991 if ($seqType =~ /list|array/i) {
992 return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
993 } else {
994 ## Sensitive to member name changes.
995 $seqType = "_\L$seqType\E";
996 return $self->{$seqType.'Start'};
1002 =head2 end
1004 Usage : $sbjct->end( [seq_type] );
1005 Purpose : Gets the end coordinate for the query, sbjct, or both sequences
1006 : in the BlastHit object. If there is more than one HSP,
1007 the largest end
1008 : value of all HSPs is returned.
1009 Example : $qend = $sbjct->end('query');
1010 : $send = $sbjct->end('hit');
1011 : ($qend, $send) = $sbjct->end();
1012 Returns : scalar context: integer
1013 : array context without args: list of two integers
1014 : (queryEnd, sbjctEnd)
1015 : Array context can be "induced" by providing an argument
1016 : of 'list' or 'array'.
1017 Argument : In scalar context: seq_type = 'query' or 'sbjct'
1018 : (case insensitive). If not supplied, 'query' is used.
1019 Throws : n/a
1021 See Also : L<start()|start>, L<range()|range>, L<strand()|strand>
1023 =cut
1025 sub end {
1026 my ($self, $seqType, $num) = @_;
1028 unless ($self->{'_hsps'}) {
1029 return wantarray ? ('-','-') : '-';
1032 $seqType ||= (wantarray ? 'list' : 'query');
1033 $seqType = 'sbjct' if $seqType eq 'hit';
1035 if( defined $num ) {
1036 $seqType = "_\L$seqType\E";
1037 return $self->{$seqType.'Stop'} = $num;
1040 # If there is only one HSP, defer this call to the solitary HSP.
1041 if($self->num_hsps == 1) {
1042 return $self->hsp->end($seqType);
1044 else {
1045 # Tiling normally generates $self->{'_queryStop'} and
1046 # $self->{'_sbjctStop'}, but is very slow. If we haven't tiled,
1047 # find the answer quickly without tiling.
1048 unless (defined $self->{'_queryStop'}) {
1049 my $latest_query_end;
1050 my $latest_sbjct_end;
1051 foreach my $hsp ($self->hsps) {
1052 my $this_query_end = $hsp->end('query');
1053 if (! defined $latest_query_end || $this_query_end > $latest_query_end) {
1054 $latest_query_end = $this_query_end;
1057 my $this_sbjct_end = $hsp->end('sbjct');
1058 if (! defined $latest_sbjct_end || $this_sbjct_end > $latest_sbjct_end) {
1059 $latest_sbjct_end = $this_sbjct_end;
1062 $self->{'_queryStop'} = $latest_query_end;
1063 $self->{'_sbjctStop'} = $latest_sbjct_end;
1067 if($seqType =~ /list|array/i) {
1068 return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
1069 } else {
1070 ## Sensitive to member name changes.
1071 $seqType = "_\L$seqType\E";
1072 return $self->{$seqType.'Stop'};
1077 =head2 range
1079 Usage : $sbjct->range( [seq_type] );
1080 Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
1081 : in the HSP alignment.
1082 Example : ($qbeg, $qend) = $sbjct->range('query');
1083 : ($sbeg, $send) = $sbjct->range('hit');
1084 Returns : Two-element array of integers
1085 Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
1086 ('sbjct' is synonymous with 'hit')
1087 Throws : n/a
1089 See Also : L<start()|start>, L<end()|end>
1091 =cut
1093 sub range {
1094 my ($self, $seqType) = @_;
1095 $seqType ||= 'query';
1096 $seqType = 'sbjct' if $seqType eq 'hit';
1097 return ($self->start($seqType), $self->end($seqType));
1101 =head2 frac_identical
1103 Usage : $hit_object->frac_identical( [seq_type] );
1104 Purpose : Get the overall fraction of identical positions across all HSPs.
1105 : The number refers to only the aligned regions and does not
1106 : account for unaligned regions in between the HSPs, if any.
1107 Example : $frac_iden = $hit_object->frac_identical('query');
1108 Returns : Float (2-decimal precision, e.g., 0.75).
1109 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1110 : default = 'query' (but see comments below).
1111 : ('sbjct' is synonymous with 'hit')
1112 Throws : n/a
1113 Comments :
1114 : To compute the fraction identical, the logical length of the
1115 : aligned portion of the sequence is used, meaning that
1116 : in the case of BLAST flavors, for TBLASTN reports, the length of
1117 : the aligned portion of the
1118 : nucleotide hit sequence is divided by 3; for BLASTX reports,
1119 : the length of the aligned portion of the nucleotide query
1120 : sequence is divided by 3. For TBLASTX reports, the length of
1121 : both hit and query sequence are converted.
1122 : This is necessary since the number of identical residues is
1123 : in terms of peptide sequence space.
1125 : Different versions of Blast report different values for the total
1126 : length of the alignment. This is the number reported in the
1127 : denominators in the stats section:
1128 : "Identical = 34/120 Positives = 67/120".
1129 : NCBI BLAST uses the total length of the alignment (with gaps)
1130 : WU-BLAST uses the length of the query sequence (without gaps).
1132 : Therefore, when called with an argument of 'total',
1133 : this method will report different values depending on the
1134 : version of BLAST used. Total does NOT take into account HSP
1135 : tiling, so it should not be used.
1137 : To get the fraction identical among only the aligned residues,
1138 : ignoring the gaps, call this method without an argument or
1139 : with an argument of 'query' or 'hit'.
1141 : If you need data for each HSP, use hsps() and then iterate
1142 : through the HSP objects.
1143 : This method requires that all HSPs be tiled. If they have not
1144 : already been tiled, they will be tiled first automatically.
1146 See Also : L<frac_conserved()|frac_conserved>, L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1148 =cut
1150 sub frac_identical {
1151 my ($self, $seqType) = @_;
1152 $seqType ||= 'query';
1153 $seqType = 'sbjct' if $seqType eq 'hit';
1155 ## Sensitive to member name format.
1156 $seqType = lc($seqType);
1158 unless ($self->{'_hsps'}) {
1159 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1160 #return wantarray ? ('-','-') : '-';
1161 return '-';
1164 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1166 my $ident = $self->matches('id');
1167 my $total = $self->length_aln($seqType);
1168 my $ratio = $ident / $total;
1169 my $ratio_rounded = sprintf( "%.3f", $ratio);
1171 # Round down iff normal rounding yields 1 (just like blast)
1172 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1173 return $ratio_rounded;
1177 =head2 frac_conserved
1179 Usage : $hit_object->frac_conserved( [seq_type] );
1180 Purpose : Get the overall fraction of conserved positions across all HSPs.
1181 : The number refers to only the aligned regions and does not
1182 : account for unaligned regions in between the HSPs, if any.
1183 Example : $frac_cons = $hit_object->frac_conserved('hit');
1184 Returns : Float (2-decimal precision, e.g., 0.75).
1185 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1186 : default = 'query' (but see comments below).
1187 : ('sbjct' is synonymous with 'hit')
1188 Throws : n/a
1189 Comments :
1190 : To compute the fraction conserved, the logical length of the
1191 : aligned portion of the sequence is used, meaning that
1192 : in the case of BLAST flavors, for TBLASTN reports, the length of
1193 : the aligned portion of the
1194 : nucleotide hit sequence is divided by 3; for BLASTX reports,
1195 : the length of the aligned portion of the nucleotide query
1196 : sequence is divided by 3. For TBLASTX reports, the length of
1197 : both hit and query sequence are converted.
1198 : This is necessary since the number of conserved residues is
1199 : in terms of peptide sequence space.
1201 : Different versions of Blast report different values for the total
1202 : length of the alignment. This is the number reported in the
1203 : denominators in the stats section:
1204 : "Positives = 34/120 Positives = 67/120".
1205 : NCBI BLAST uses the total length of the alignment (with gaps)
1206 : WU-BLAST uses the length of the query sequence (without gaps).
1208 : Therefore, when called with an argument of 'total',
1209 : this method will report different values depending on the
1210 : version of BLAST used. Total does NOT take into account HSP
1211 : tiling, so it should not be used.
1213 : To get the fraction conserved among only the aligned residues,
1214 : ignoring the gaps, call this method without an argument or
1215 : with an argument of 'query' or 'hit'.
1217 : If you need data for each HSP, use hsps() and then interate
1218 : through the HSP objects.
1219 : This method requires that all HSPs be tiled. If they have not
1220 : already been tiled, they will be tiled first automatically.
1222 See Also : L<frac_identical()|frac_identical>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1224 =cut
1226 sub frac_conserved {
1227 my ($self, $seqType) = @_;
1228 $seqType ||= 'query';
1229 $seqType = 'sbjct' if $seqType eq 'hit';
1231 ## Sensitive to member name format.
1232 $seqType = lc($seqType);
1234 unless ($self->{'_hsps'}) {
1235 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1236 #return wantarray ? ('-','-') : '-';
1237 return '-';
1240 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1242 my $consv = $self->matches('cons');
1243 my $total = $self->length_aln($seqType);
1244 my $ratio = $consv / $total;
1245 my $ratio_rounded = sprintf( "%.3f", $ratio);
1247 # Round down iff normal rounding yields 1 (just like blast)
1248 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1249 return $ratio_rounded;
1255 =head2 frac_aligned_query
1257 Usage : $hit_object->frac_aligned_query();
1258 Purpose : Get the fraction of the query sequence which has been aligned
1259 : across all HSPs (not including intervals between non-overlapping
1260 : HSPs).
1261 Example : $frac_alnq = $hit_object->frac_aligned_query();
1262 Returns : Float (2-decimal precision, e.g., 0.75),
1263 : or undef if query length is unknown to avoid division by 0.
1264 Argument : n/a
1265 Throws : n/a
1266 Comments : If you need data for each HSP, use hsps() and then interate
1267 : through the HSP objects.
1268 : This method requires that all HSPs be tiled. If they have not
1269 : already been tiled, they will be tiled first automatically.
1271 See Also : L<frac_aligned_hit()|frac_aligned_hit>, L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1273 =cut
1275 sub frac_aligned_query {
1276 my $self = shift;
1278 unless ($self->{'_hsps'}) {
1279 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1280 #return wantarray ? ('-','-') : '-';
1281 return '-';
1284 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1286 my $qry_len = $self->logical_length('query');
1287 return undef if $qry_len == 0; # Avoid division by 0 crash
1288 sprintf( "%.2f", $self->length_aln('query') / $qry_len);
1293 =head2 frac_aligned_hit
1295 Usage : $hit_object->frac_aligned_hit();
1296 Purpose : Get the fraction of the hit (sbjct) sequence which has been aligned
1297 : across all HSPs (not including intervals between non-overlapping
1298 : HSPs).
1299 Example : $frac_alnq = $hit_object->frac_aligned_hit();
1300 Returns : Float (2-decimal precision, e.g., 0.75),
1301 : or undef if hit length is unknown to avoid division by 0.
1302 Argument : n/a
1303 Throws : n/a
1304 Comments : If you need data for each HSP, use hsps() and then interate
1305 : through the HSP objects.
1306 : This method requires that all HSPs be tiled. If they have not
1307 : already been tiled, they will be tiled first automatically.
1309 See Also : L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, , L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1311 =cut
1313 sub frac_aligned_hit {
1314 my $self = shift;
1316 unless ($self->{'_hsps'}) {
1317 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1318 #return wantarray ? ('-','-') : '-';
1319 return '-';
1322 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1324 my $sbjct_len = $self->logical_length('sbjct');
1325 return undef if $sbjct_len == 0; # Avoid division by 0 crash
1326 sprintf( "%.2f", $self->length_aln('sbjct') / $sbjct_len);
1330 ## These methods are being maintained for backward compatibility.
1332 =head2 frac_aligned_sbjct
1334 Same as L<frac_aligned_hit()|frac_aligned_hit>
1336 =cut
1338 *frac_aligned_sbjct = \&frac_aligned_hit;
1340 =head2 num_unaligned_sbjct
1342 Same as L<num_unaligned_hit()|num_unaligned_hit>
1344 =cut
1346 *num_unaligned_sbjct = \&num_unaligned_hit;
1349 =head2 num_unaligned_hit
1351 Usage : $hit_object->num_unaligned_hit();
1352 Purpose : Get the number of the unaligned residues in the hit sequence.
1353 : Sums across all all HSPs.
1354 Example : $num_unaln = $hit_object->num_unaligned_hit();
1355 Returns : Integer
1356 Argument : n/a
1357 Throws : n/a
1358 Comments : See notes regarding logical lengths in the comments for frac_aligned_hit().
1359 : They apply here as well.
1360 : If you need data for each HSP, use hsps() and then interate
1361 : through the HSP objects.
1362 : This method requires that all HSPs be tiled. If they have not
1363 : already been tiled, they will be tiled first automatically..
1365 See Also : L<num_unaligned_query()|num_unaligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<frac_aligned_hit()|frac_aligned_hit>
1367 =cut
1369 sub num_unaligned_hit {
1370 my $self = shift;
1372 unless ($self->{'_hsps'}) {
1373 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1374 #return wantarray ? ('-','-') : '-';
1375 return '-';
1378 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1380 my $num = $self->logical_length('sbjct') - $self->length_aln('sbjct');
1381 ($num < 0 ? 0 : $num );
1385 =head2 num_unaligned_query
1387 Usage : $hit_object->num_unaligned_query();
1388 Purpose : Get the number of the unaligned residues in the query sequence.
1389 : Sums across all all HSPs.
1390 Example : $num_unaln = $hit_object->num_unaligned_query();
1391 Returns : Integer
1392 Argument : n/a
1393 Throws : n/a
1394 Comments : See notes regarding logical lengths in the comments for frac_aligned_query().
1395 : They apply here as well.
1396 : If you need data for each HSP, use hsps() and then interate
1397 : through the HSP objects.
1398 : This method requires that all HSPs be tiled. If they have not
1399 : already been tiled, they will be tiled first automatically..
1401 See Also : L<num_unaligned_hit()|num_unaligned_hit>, L<frac_aligned_query()|frac_aligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1403 =cut
1405 sub num_unaligned_query {
1406 my $self = shift;
1408 unless ($self->{'_hsps'}) {
1409 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1410 #return wantarray ? ('-','-') : '-';
1411 return '-';
1414 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1416 my $num = $self->logical_length('query') - $self->length_aln('query');
1417 ($num < 0 ? 0 : $num );
1422 =head2 seq_inds
1424 Usage : $hit->seq_inds( seq_type, class, collapse );
1425 Purpose : Get a list of residue positions (indices) across all HSPs
1426 : for identical or conserved residues in the query or sbjct sequence.
1427 Example : @s_ind = $hit->seq_inds('query', 'identical');
1428 : @h_ind = $hit->seq_inds('hit', 'conserved');
1429 : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
1430 Returns : Array of integers
1431 : May include ranges if collapse is non-zero.
1432 Argument : [0] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
1433 : ('sbjct' is synonymous with 'hit')
1434 : [1] class = 'identical' or 'conserved' (default = 'identical')
1435 : (can be shortened to 'id' or 'cons')
1436 : (actually, anything not 'id' will evaluate to 'conserved').
1437 : [2] collapse = boolean, if non-zero, consecutive positions are merged
1438 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
1439 : collapses to "1-5 7 9-11". This is useful for
1440 : consolidating long lists. Default = no collapse.
1441 Throws : n/a.
1443 See Also : L<Bio::Search::HSP::BlastHSP::seq_inds()|Bio::Search::HSP::BlastHSP>
1445 =cut
1447 sub seq_inds {
1448 my ($self, $seqType, $class, $collapse) = @_;
1450 $seqType ||= 'query';
1451 $class ||= 'identical';
1452 $collapse ||= 0;
1454 $seqType = 'sbjct' if $seqType eq 'hit';
1456 my (@inds, $hsp);
1457 foreach $hsp ($self->hsps) {
1458 # This will merge data for all HSPs together.
1459 push @inds, $hsp->seq_inds($seqType, $class);
1462 # Need to remove duplicates and sort the merged positions.
1463 if(@inds) {
1464 my %tmp = map { $_, 1 } @inds;
1465 @inds = sort {$a <=> $b} keys %tmp;
1468 $collapse ? &Bio::Search::SearchUtils::collapse_nums(@inds) : @inds;
1472 =head2 strand
1474 See documentation in L<Bio::Search::Hit::HitI::strand()|Bio::Search::Hit::HitI>
1476 =cut
1478 sub strand {
1479 my ($self, $seqType, $strnd) = @_;
1481 unless ($self->{'_hsps'}) {
1482 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1483 return wantarray ? ('-','-') : '-';
1484 #return '-';
1487 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1489 $seqType ||= (wantarray ? 'list' : 'query');
1490 $seqType = 'sbjct' if $seqType eq 'hit';
1492 $seqType = lc($seqType);
1494 if( defined $strnd ) {
1495 $self->throw("Can't set strand for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
1497 return $self->{'_strand_'.$seqType} = $strnd;
1500 my ($qstr, $hstr);
1501 # If there is only one HSP, defer this call to the solitary HSP.
1502 if($self->num_hsps == 1) {
1503 return $self->hsp->strand($seqType);
1505 elsif( defined $self->{'_strand_query'}) {
1506 # Get the data computed during hsp tiling.
1507 $qstr = $self->{'_strand_query'};
1508 $hstr = $self->{'_strand_sbjct'}
1510 else {
1511 # otherwise, iterate through all HSPs collecting strand info.
1512 # This will return the string "-1/1" if there are HSPs on different strands.
1513 # NOTE: This was the pre-10/21/02 procedure which will no longer be used,
1514 # (unless the above elsif{} is commented out).
1515 my (%qstr, %hstr);
1516 foreach my $hsp( $self->hsps ) {
1517 my ( $q, $h ) = $hsp->strand();
1518 $qstr{ $q }++;
1519 $hstr{ $h }++;
1521 $qstr = join( '/', sort keys %qstr);
1522 $hstr = join( '/', sort keys %hstr);
1525 if($seqType =~ /list|array/i) {
1526 return ($qstr, $hstr);
1527 } elsif( $seqType eq 'query' ) {
1528 return $qstr;
1529 } else {
1530 return $hstr;
1534 =head2 frame
1536 See documentation in L<Bio::Search::Hit::HitI::frame()|Bio::Search::Hit::HitI>
1538 =cut
1540 sub frame {
1541 my( $self, $frm ) = @_;
1543 unless ($self->{'_hsps'}) {
1544 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1545 #return wantarray ? ('-','-') : '-';
1546 return '-';
1549 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1551 if( defined $frm ) {
1552 return $self->{'_frame'} = $frm;
1555 # The check for $self->{'_frame'} is a remnant from the 'query' mode days
1556 # in which the sbjct object would collect data from the description line only.
1558 my ($frame);
1559 if(not defined($self->{'_frame'})) {
1560 $frame = $self->hsp->frame('hit');
1561 } else {
1562 $frame = $self->{'_frame'};
1564 return $frame;
1567 =head2 rank
1569 Title : rank
1570 Usage : $obj->rank($newval)
1571 Function: Get/Set the rank of this Hit in the Query search list
1572 i.e. this is the Nth hit for a specific query
1573 Returns : value of rank
1574 Args : newvalue (optional)
1577 =cut
1579 sub rank {
1580 my $self = shift;
1581 return $self->{'_rank'} = shift if @_;
1582 return $self->{'_rank'} || 1;
1585 =head2 locus
1587 Title : locus
1588 Usage : $locus = $hit->locus();
1589 Function: Retrieve the locus (if available) for the hit
1590 Returns : a scalar string (empty string if not set)
1591 Args : none
1593 =cut
1595 sub locus {
1596 my ($self,$value) = @_;
1597 my $previous = $self->{'_locus'};
1598 if( defined $value || ! defined $previous ) {
1599 unless (defined $value) {
1600 if ($self->{'_name'} =~/(gb|emb|dbj|ref)\|(.*)\|(.*)/) {
1601 $value = $previous = $3;
1602 } else {
1603 $value = $previous = '';
1606 $self->{'_locus'} = $value;
1608 return $previous;
1611 =head2 each_accession_number
1613 Title : each_accession_number
1614 Usage : @each_accession_number = $hit->each_accession_number();
1615 Function: Get each accession number listed in the description of the hit.
1616 If there are no alternatives, then only the primary accession will
1617 be given
1618 Returns : list of all accession numbers in the description
1619 Args : none
1621 =cut
1623 sub each_accession_number {
1624 my ($self,$value) = @_;
1625 my $desc = $self->{'_description'};
1626 #put primary accnum on the list
1627 my @accnums;
1628 push (@accnums,$self->{'_accession'});
1629 if( defined $desc ) {
1630 while ($desc =~ /(\b\S+\|\S*\|\S*\s?)/g) {
1631 my $id = $1;
1632 my ($acc, $version);
1633 if ($id =~ /(gb|emb|dbj|sp|pdb|bbs|ref|tp[gde])\|(.*)\|(.*)/) {
1634 ($acc, $version) = split /\./, $2;
1635 } elsif ($id =~ /(pir|prf|pat|gnl)\|(.*)\|(.*)/) {
1636 ($acc, $version) = split /\./, $3;
1637 } elsif( $id =~ /(gim|gi|bbm|bbs|lcl)\|(\d*)/) {
1638 $acc = $id;
1639 } elsif( $id =~ /(oth)\|(.*)\|(.*)\|(.*)/ ) { # discontinued...
1640 ($acc,$version) = ($2);
1641 } else {
1642 #punt, not matching the db's at ftp://ftp.ncbi.nih.gov/blast/db/README
1643 #Database Name Identifier Syntax
1644 #============================ ========================
1645 #GenBank gb|accession|locus
1646 #EMBL Data Library emb|accession|locus
1647 #DDBJ, DNA Database of Japan dbj|accession|locus
1648 #NBRF PIR pir||entry
1649 #Protein Research Foundation prf||name
1650 #SWISS-PROT sp|accession|entry name
1651 #Brookhaven Protein Data Bank pdb|entry|chain
1652 #Patents pat|country|number
1653 #GenInfo Backbone Id bbs|number
1654 #General database identifier gnl|database|identifier
1655 #NCBI Reference Sequence ref|accession|locus
1656 #Local Sequence identifier lcl|identifier
1657 $acc=$id;
1659 push(@accnums, $acc);
1662 return @accnums;
1665 =head2 tiled_hsps
1667 See documentation in L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1669 =cut
1671 sub tiled_hsps {
1672 my $self = shift;
1673 return $self->{'_tiled_hsps'} = shift if @_;
1674 return $self->{'_tiled_hsps'};
1677 =head2 query_length
1679 Title : query_length
1680 Usage : $obj->query_length($newval)
1681 Function: Get/Set the query_length
1682 Returns : value of query_length (a scalar)
1683 Args : on set, new value (a scalar or undef, optional)
1686 =cut
1688 sub query_length {
1689 my ($self,$value) = @_;
1690 my $previous = $self->{'_query_length'};
1691 if( defined $value || ! defined $previous ) {
1692 $value = $previous = 0 unless defined $value;
1693 $self->{'_query_length'} = $value;
1695 return $previous;
1698 =head2 ncbi_gi
1700 Title : ncbi_gi
1701 Usage : $acc = $hit->ncbi_gi();
1702 Function: Retrieve the NCBI Unique ID (aka the GI #),
1703 if available, for the hit
1704 Returns : a scalar string (empty string if not set)
1705 Args : none
1706 Note : As of Sept. 2016 NCBI records will no longer have a
1707 GI; this attributue will remain in place for older
1708 records
1710 =cut
1712 sub ncbi_gi {
1713 my ($self,$value) = @_;
1714 if( defined $value ) {
1715 $self->{'_ncbi_gi'} = $value;
1716 } else {
1717 $self->{'_ncbi_gi'} = $self->name =~ m{^gi\|(\d+)} ? $1 : '';
1719 return $self->{'_ncbi_gi'};
1723 # sort method for HSPs
1725 =head2 sort_hits
1727 Title : sort_hsps
1728 Usage : $result->sort_hsps(\&sort_function)
1729 Function : Sorts the available HSP objects by a user-supplied function. Defaults to sort
1730 by descending score.
1731 Returns : n/a
1732 Args : A coderef for the sort function. See the documentation on the Perl sort()
1733 function for guidelines on writing sort functions.
1734 Note : To access the special variables $a and $b used by the Perl sort() function
1735 the user function must access Bio::Search::Hit::HitI namespace.
1736 For example, use :
1737 $hit->sort_hsps( sub{$Bio::Search::Result::HitI::a->length <=>
1738 $Bio::Search::Result::HitI::b->length});
1739 NOT $hit->sort_hsps($a->length <=> $b->length);
1741 =cut
1743 sub sort_hsps {
1744 my ($self, $coderef) = @_;
1745 my @sorted_hsps;
1747 if ($coderef) {
1748 $self->throw('sort_hsps requires a sort function passed as a subroutine reference')
1749 unless (ref($coderef) eq 'CODE');
1751 else {
1752 $coderef = \&_default_sort_hsps;
1753 # throw a warning?
1756 my @hsps = $self->hsps();
1757 eval {@sorted_hsps = sort $coderef @hsps };
1759 if ($@) {
1760 $self->throw("Unable to sort hsps: $@");
1762 else {
1763 $self->{'_hsps'} = \@sorted_hsps;
1768 =head2 iteration
1770 Usage : $hit->iteration( $iteration_num );
1771 Purpose : Gets the iteration number in which the Hit was found.
1772 Example : $iteration_num = $sbjct->iteration();
1773 Returns : Integer greater than or equal to 1
1774 Non-PSI-BLAST reports may report iteration as 1, but this number
1775 is only meaningful for PSI-BLAST reports.
1776 Argument : iteration_num (optional, used when setting only)
1777 Throws : none
1779 See Also : L<found_again()|found_again>
1781 =cut
1783 sub iteration{
1784 my ($self,$value) = @_;
1785 if( defined $value) {
1786 $self->{'_psiblast_iteration'} = $value;
1788 return $self->{'_psiblast_iteration'};
1791 =head2 found_again
1793 Title : found_again
1794 Usage : $hit->found_again;
1795 $hit->found_again(1);
1796 Purpose : Gets a boolean indicator whether or not the hit has
1797 been found in a previous iteration.
1798 This is only applicable to PSI-BLAST reports.
1800 This method indicates if the hit was reported in the
1801 "Sequences used in model and found again" section of the
1802 PSI-BLAST report or if it was reported in the
1803 "Sequences not found previously or not previously below threshold"
1804 section of the PSI-BLAST report. Only for hits in iteration > 1.
1806 Example : if( $hit->found_again()) { ... };
1807 Returns : Boolean, true (1) if the hit has been found in a
1808 previous PSI-BLAST iteration.
1809 Returns false (0 or undef) for hits that have not occurred in a
1810 previous PSI-BLAST iteration.
1811 Argument : Boolean (1 or 0). Only used for setting.
1812 Throws : none
1814 See Also : L<iteration()|iteration>
1816 =cut
1818 sub found_again {
1819 my $self = shift;
1820 return $self->{'_found_again'} = shift if @_;
1821 return $self->{'_found_again'};