2 # BioPerl module for Bio::Search::Iteration::GenericIteration
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Steve Chervitz <sac@bioperl.org>
8 # Copyright Steve Chervitz
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 # TODO: Consider calling this BlastIteration (strongly) and maybe simplifying IterationI.
18 Bio::Search::Iteration::GenericIteration - A generic implementation of the Bio::Search::Iteration::IterationI interface.
22 use Bio::Search::Iteration::GenericIteration;
23 my $it = Bio::Search::GenericIteration->new(
26 -newhits_unclassified => [@newhits_unclass],
27 -newhits_below => [@newhits_below_threshold],
28 -newhits_not_below => [@newhits_not_below_threshold],
29 -oldhits_below => [@oldhits_below_threshold],
30 -oldhits_newly_below => [@oldhits_newly_below_threshold],
31 -oldhits_not_below => [@oldhits_not_below_threshold],
34 # TODO: Describe how to configure a SearchIO stream so that it generates
35 # GenericIteration objects.
40 This module acts as a container for Bio::Search::Hit::HitI objects,
41 allowing a Search::Result::ResultI object to partition its hits based
42 on which iteration the hit occurred in (e.g., a PSI-BLAST round).
44 Unless you're writing a parser, you won't ever need to create a
45 GenericIteration or any other IterationI-implementing object. If you use
46 the SearchIO system, IterationI objects are created automatically from
47 a SearchIO stream which returns Bio::Search::Result::ResultI objects
48 and you get the IterationI objects via the ResultI API.
50 For documentation on what you can do with GenericIteration (and other IterationI
51 objects), please see the API documentation in
52 L<Bio::Search::Iteration::IterationI|Bio::Search::Iteration::IterationI>.
54 Bio::Search::Iteration::GenericIteration is similar in spirit to the deprecated
55 Bio::Tools::BPlite::Iteration modules in bioperl releases prior to 1.6, except
56 that Bio::Search::Iteration::GenericIteration is a pure container, without any
57 parsing functionality as is in Bio::Tools::BPlite::Iteration.
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to
65 the Bioperl mailing list. Your participation is much appreciated.
67 bioperl-l@bioperl.org - General discussion
68 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
72 Please direct usage questions or support issues to the mailing list:
74 I<bioperl-l@bioperl.org>
76 rather than to the module maintainer directly. Many experienced and
77 reponsive experts will be able look at the problem and quickly
78 address it. Please include a thorough description of the problem
79 with code and data examples if at all possible.
83 Report bugs to the Bioperl bug tracking system to help us keep track
84 of the bugs and their resolution. Bug reports can be submitted via the
87 https://github.com/bioperl/bioperl-live/issues
89 =head1 AUTHOR - Steve Chervitz
95 The rest of the documentation details each of the object methods.
96 Internal methods are usually preceded with a _
101 # Let the code begin...
104 package Bio
::Search
::Iteration
::GenericIteration
;
108 use base
qw(Bio::Root::Root Bio::Search::Iteration::IterationI);
113 Usage : my $obj = Bio::Search::Iteration->new(%args);
114 Function: Builds a new Bio::Search::Iteration object
115 Returns : Bio::Search::Iteration::GenericIteration object
116 Args : -number => integer for the number of this iteration (required)
117 -converged => boolean value whether or not the iteration converged
118 -newhits_unclassified => array reference to hits that were not found
119 in a previous iteration for the iteration and have not been
120 classified with regard to the inclusion threshold
122 # The following are only used for PSI-BLAST reports:
124 -newhits_below => array reference to hits were not found in a
125 previous iteration and are below the inclusion threshold.
126 -newhits_not_below => array reference to hits that were not found in a
127 previous iteration below threshold that and are not below
128 the inclusion threshold threshold.
129 -oldhits_below => array reference to hits that were found
130 in a previous iteration below inclusion threshold and are
131 still below threshold in the current iteration.
132 -oldhits_newly_below => array reference to hits that were found
133 in a previous iteration above threshold but are below
134 threshold in the current iteration.
135 -oldhits_not_below => array reference to hits that were found in a
136 previous iteration above threshold that and are still above
137 the inclusion threshold threshold.
139 -hit_factory => Bio::Factory::ObjectFactoryI capable of making
140 Bio::Search::Hit::HitI objects
145 my($class,@args) = @_;
147 my $self = $class->SUPER::new
(@args);
148 my ($number, $newhits_unclassified, $newhits_below, $newhits_not_below,
149 $oldhits_below, $oldhits_newly_below, $oldhits_not_below, $converged,
151 $self->_rearrange([qw(NUMBER
162 if( ! defined $number ) {
163 $self->throw(-class=>'Bio::Root::BadParameter',
164 -text
=>"Iteration number not specified.");
166 $self->number($number);
169 defined $converged && $self->converged($converged);
171 # TODO: Performance optimization test calling add_hit() vs. simple assignment:
172 # push @{$self->{'_hits_new'}}, @{$newhits};
174 # foreach(@{$newhits_below}) {$self->add_hit(-hit=>$_, -old=>0, -below=>1);}
176 if(defined $newhits_unclassified ) {
177 if( ref($newhits_unclassified) =~ /ARRAY/i) {
178 push @
{$self->{'_newhits_unclassified'}}, @
{$newhits_unclassified};
180 $self->throw(-class=>'Bio::Root::BadParameter',
181 -text
=>"Parameter NEWHITS is not an array ref: $newhits_unclassified");
184 $self->{'_newhits_unclassified'} = [];
187 if(defined $newhits_below ) {
188 if( ref($newhits_below) =~ /ARRAY/i) {
189 push @
{$self->{'_newhits_below_threshold'}}, @
{$newhits_below};
191 $self->throw(-class=>'Bio::Root::BadParameter',
192 -text
=>"Parameter NEWHITS_BELOW is not an array ref: $newhits_below");
195 $self->{'_newhits_below_threshold'} = [];
198 if(defined $newhits_not_below ) {
199 if( ref($newhits_not_below) =~ /ARRAY/i) {
200 push @
{$self->{'_newhits_not_below_threshold'}}, @
{$newhits_not_below};
202 $self->throw(-class=>'Bio::Root::BadParameter',
203 -text
=>"Parameter NEWHITS_NOT_BELOW is not an array ref: $newhits_not_below");
206 $self->{'_newhits_not_below_threshold'} = [];
209 if(defined $oldhits_below ) {
210 if( ref($oldhits_below) =~ /ARRAY/i) {
211 push @
{$self->{'_oldhits_below_threshold'}}, @
{$oldhits_below};
213 $self->throw(-class=>'Bio::Root::BadParameter',
214 -text
=>"Parameter OLDHITS_BELOW is not an array ref: $oldhits_below");
217 $self->{'_oldhits_below_threshold'} = [];
220 if(defined $oldhits_newly_below ) {
221 if( ref($oldhits_newly_below) =~ /ARRAY/i) {
222 push @
{$self->{'_oldhits_newly_below_threshold'}}, @
{$oldhits_newly_below};
224 $self->throw(-class=>'Bio::Root::BadParameter',
225 -text
=>"Parameter OLDHITS_NEWLY_BELOW is not an array ref: $oldhits_newly_below");
228 $self->{'_oldhits_newly_below_threshold'} = [];
231 if(defined $oldhits_not_below ) {
232 if( ref($oldhits_not_below) =~ /ARRAY/i) {
233 push @
{$self->{'_oldhits_not_below_threshold'}}, @
{$oldhits_not_below};
235 $self->throw(-class=>'Bio::Root::BadParameter',
236 -text
=>"Parameter OLDHITS_NOT_BELOW is not an array ref: $oldhits_not_below");
239 $self->{'_oldhits_not_below_threshold'} = [];
242 $self->hit_factory($h_f) if $h_f;
250 See documentation in Bio::Search::Iteration::IterationI.
255 my ($self,$value) = @_;
256 my $previous = $self->{'_number'};
257 if( defined $value || ! defined $previous ) {
258 $value = $previous = '' unless defined $value;
259 $self->{'_number'} = $value;
266 See documentation in Bio::Search::Iteration::IterationI.
271 my ($self,$value) = @_;
272 my $previous = $self->{'_converged'};
273 if( defined $value || ! defined $previous ) {
274 $value = $previous = '' unless defined $value;
275 $self->{'_converged'} = $value;
284 Usage : $hit->hit_factory($hit_factory)
285 Function: Get/set the factory used to build HitI objects if necessary.
286 Returns : Bio::Factory::ObjectFactoryI
287 Args : Bio::Factory::ObjectFactoryI
293 if (@_) { $self->{_hit_factory
} = shift }
294 return $self->{_hit_factory
} || return;
299 This iterates through all old hits as returned by L<oldhits>
300 followed by all new hits as returned by L<newhits>.
302 For more documentation see L<Bio::Search::Iteration::IterationI::next_hit()|Bio::Search::Iteration::IterationI>.
309 unless($self->{'_hit_queue_started'}) {
310 $self->{'_hit_queue'} = ( [$self->oldhits(), $self->newhits()] );
311 $self->{'_hit_queue_started'} = 1;
313 return shift @
{$self->{'_hit_queue'}};
318 See documentation in L<Bio::Search::Iteration::IterationI::next_hit_new()|Bio::Search::Iteration::IterationI>.
325 unless($self->{'_hit_queue_new_started'}) {
326 $self->{'_hit_queue_new'} = [$self->newhits()];
327 $self->{'_hit_queue_new_started'} = 1;
329 return shift @
{$self->{'_hit_queue_new'}};
334 See documentation in L<Bio::Search::Iteration::IterationI::next_hit_old()|Bio::Search::Iteration::IterationI>.
339 my ($self,$found_again) = @_;
341 unless($self->{'_hit_queue_old_started'}) {
342 $self->{'_hit_queue_old'} = [$self->oldhits()];
343 $self->{'_hit_queue_old_started'} = 1;
345 return shift @
{$self->{'_hit_queue_old'}};
351 Usage : $iteration->rewind;
352 Function: Allow one to reset the Hit iterators to the beginning
353 Since this is an in-memory implementation
361 $self->{'_hit_queue_started'} = 0;
362 $self->{'_hit_queue_new_started'} = 0;
363 $self->{'_hit_queue_old_started'} = 0;
364 foreach ($self->hits) {
372 See documentation in L<Bio::Search::Iteration::IterationI::num_hits()|Bio::Search::Iteration::IterationI>.
379 return $self->num_hits_old + $self->num_hits_new;
384 See documentation in L<Bio::Search::Iteration::IterationI::num_hits_new()|Bio::Search::Iteration::IterationI>.
391 return scalar $self->newhits();
396 See documentation in L<Bio::Search::Iteration::IterationI::num_hits_old()|Bio::Search::Iteration::IterationI>.
401 my ($self,$found_again) = @_;
403 return scalar $self->oldhits();
408 See documentation in L<Bio::Search::Iteration::IterationI::add_hit()|Bio::Search::Iteration::IterationI>.
413 my ($self,@args) = @_;
414 my( $hit, $old, $below, $newly_below ) =
415 $self->_rearrange([qw(HIT
422 unless( ref($hit) eq 'HASH' || $hit->isa('Bio::Search::Hit::HitI') ) {
423 $self->throw(-class=>'Bio::Root::BadParameter',
424 -text
=>"Passed in " .ref($hit).
425 " as a Hit which is not a Bio::Search::Hit::HitI.");
430 push @
{$self->{'_oldhits_newly_below_threshold'}}, $hit;
431 $count = scalar @
{$self->{'_oldhits_newly_below_threshold'}};
433 push @
{$self->{'_oldhits_below_threshold'}}, $hit;
434 $count = scalar @
{$self->{'_oldhits_below_threshold'}};
436 push @
{$self->{'_oldhits_not_below_threshold'}}, $hit;
437 $count = scalar @
{$self->{'_oldhits_not_below_threshold'}};
439 } elsif (defined $old) {
440 # -old is defined but false, so this is a new PSI-BLAST hit
442 push @
{$self->{'_newhits_below_threshold'}}, $hit;
443 $count = scalar @
{$self->{'_newhits_below_threshold'}};
444 } elsif (defined $below) {
445 push @
{$self->{'_newhits_not_below_threshold'}}, $hit;
446 $count = scalar @
{$self->{'_newhits_not_below_threshold'}};
448 # -below not defined, PSI-BLAST threshold may not be known
449 push @
{$self->{'_newhits_unclassified'}}, $hit;
450 $count = scalar @
{$self->{'_newhits_unclassified'}};
453 # -old not defined, so it's non-PSI-BLAST
454 push @
{$self->{'_newhits_unclassified'}}, $hit;
455 $count = scalar @
{$self->{'_newhits_unclassified'}};
462 See Documentation in InterfaceI.
468 # print STDERR "Called GenericIteration::hits()\n";
469 my @new = $self->newhits;
470 my @old = $self->oldhits;
471 return ( @new, @old );
476 Returns a list containing all newhits in this order:
478 newhits_below_threshold
479 newhits_not_below_threshold
482 See more documentation in InterfaceI.
488 my @hits = $self->newhits_below_threshold;
489 push @hits, $self->newhits_not_below_threshold;
490 push @hits, $self->newhits_unclassified;
494 =head2 newhits_below_threshold
496 See documentation in L<Bio::Search::Iteration::IterationI::newhits_below_threshold()|Bio::Search::Iteration::IterationI>.
500 sub newhits_below_threshold
{
502 if (ref $self->{'_newhits_below_threshold'} ) {
503 my $factory = $self->hit_factory || return @
{$self->{'_newhits_below_threshold'}};
504 for (0..$#{$self->{'_newhits_below_threshold'}}) {
505 ref(${$self->{'_newhits_below_threshold'}}[$_]) eq 'HASH' || next;
506 ${$self->{'_newhits_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_newhits_below_threshold'}}[$_]});
508 return @
{$self->{'_newhits_below_threshold'}};
513 =head2 newhits_not_below_threshold
515 See documentation in L<Bio::Search::Iteration::IterationI::newhits_not_below_threshold()|Bio::Search::Iteration::IterationI>.
519 sub newhits_not_below_threshold
{
521 if (ref $self->{'_newhits_not_below_threshold'} ) {
522 my $factory = $self->hit_factory || return @
{$self->{'_newhits_not_below_threshold'}};
523 for (0..$#{$self->{'_newhits_not_below_threshold'}}) {
524 ref(${$self->{'_newhits_not_below_threshold'}}[$_]) eq 'HASH' || next;
525 ${$self->{'_newhits_not_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_newhits_not_below_threshold'}}[$_]});
527 return @
{$self->{'_newhits_not_below_threshold'}};
532 =head2 newhits_unclassified
534 Title : newhits_unclassified
535 Usage : foreach( $iteration->hits_unclassified ) {...}
536 Function: Gets all newhits that have not been partitioned into
537 sets relative to the inclusion threshold.
538 Returns : Array of Bio::Search::Hit::HitI objects.
543 sub newhits_unclassified
{
545 if (ref $self->{'_newhits_unclassified'} ) {
546 my $factory = $self->hit_factory || return @
{$self->{'_newhits_unclassified'}};
547 for (0..$#{$self->{'_newhits_unclassified'}}) {
548 ref(${$self->{'_newhits_unclassified'}}[$_]) eq 'HASH' || next;
549 ${$self->{'_newhits_unclassified'}}[$_] = $factory->create_object(%{${$self->{'_newhits_unclassified'}}[$_]});
551 return @
{$self->{'_newhits_unclassified'}};
558 Returns a list containing all oldhits in this order:
560 oldhits_below_threshold
561 oldhits_newly_below_threshold
562 oldhits_not_below_threshold
564 See more documentation in InterfaceI.
570 my @hits = $self->oldhits_below_threshold;
571 push @hits, $self->oldhits_newly_below_threshold;
572 push @hits, $self->oldhits_not_below_threshold;
576 =head2 oldhits_below_threshold
578 See documentation in L<Bio::Search::Iteration::IterationI::oldhits_below_threshold()|Bio::Search::Iteration::IterationI>.
582 sub oldhits_below_threshold
{
584 if (ref $self->{'_oldhits_below_threshold'} ) {
585 my $factory = $self->hit_factory || return @
{$self->{'_oldhits_below_threshold'}};
586 for (0..$#{$self->{'_oldhits_below_threshold'}}) {
587 ref(${$self->{'_oldhits_below_threshold'}}[$_]) eq 'HASH' || next;
588 ${$self->{'_oldhits_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_below_threshold'}}[$_]});
590 return @
{$self->{'_oldhits_below_threshold'}};
595 =head2 oldhits_newly_below_threshold
597 See documentation in L<Bio::Search::Iteration::IterationI::oldhits_newly_below_threshold()|Bio::Search::Iteration::IterationI>.
601 sub oldhits_newly_below_threshold
{
603 if (ref $self->{'_oldhits_newly_below_threshold'} ) {
604 my $factory = $self->hit_factory || return @
{$self->{'_oldhits_newly_below_threshold'}};
605 for (0..$#{$self->{'_oldhits_newly_below_threshold'}}) {
606 ref(${$self->{'_oldhits_newly_below_threshold'}}[$_]) eq 'HASH' || next;
607 ${$self->{'_oldhits_newly_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_newly_below_threshold'}}[$_]});
609 return @
{$self->{'_oldhits_newly_below_threshold'}};
614 =head2 oldhits_not_below_threshold
616 See documentation in L<Bio::Search::Iteration::IterationI::oldhits_not_below_threshold()|Bio::Search::Iteration::IterationI>.
620 sub oldhits_not_below_threshold
{
622 if (ref $self->{'_oldhits_not_below_threshold'} ) {
623 my $factory = $self->hit_factory || return @
{$self->{'_oldhits_not_below_threshold'}};
624 for (0..$#{$self->{'_oldhits_not_below_threshold'}}) {
625 ref(${$self->{'_oldhits_not_below_threshold'}}[$_]) eq 'HASH' || next;
626 ${$self->{'_oldhits_not_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_not_below_threshold'}}[$_]});
628 return @
{$self->{'_oldhits_not_below_threshold'}};
633 =head2 hits_below_threshold
635 See documentation in L<Bio::Search::Iteration::IterationI::hits_below_threshold()|Bio::Search::Iteration::IterationI>.
639 sub hits_below_threshold
{
641 my @hits = $self->newhits_below_threshold;
642 push @hits, $self->oldhits_newly_below_threshold;
648 See documentation in L<Bio::Search::Iteration::IterationI::get_hit()|Bio::Search::Iteration::IterationI>.
650 To free up the memory used by the get_hit() functionality, call free_hit_lookup().
652 This functionality might be useful at the Result level, too.
653 BlastResult::get_hit() would return a list of HitI objects for hits
654 that occur in multiple iterations.
659 my ($self,$name) = @_;
660 $self->_create_hit_lookup() unless defined $self->{'_hit_lookup'};
662 return $self->{'_hit_lookup'}->{"\U$name"};
666 sub _create_hit_lookup
{
668 foreach ($self->hits) {
669 my $hname = $_->name;
670 $self->{'_hit_lookup'}->{"\U$hname"} = $_;
674 =head2 free_hit_lookup
676 Purpose : Frees up the memory used by the get_hit() functionality.
677 For the memory-conscious.
681 sub free_hit_lookup
{
683 undef $self->{'_hit_lookup'};