A test to ensure Bio::PrimarySeqI->trunc() doesn't use clone() for a Bio::Seq::RichSe...
[bioperl-live.git] / Bio / Search / Iteration / GenericIteration.pm
blob7b6db55812b84a1653def42caf1f90af20dd1d43
2 # BioPerl module for Bio::Search::Iteration::GenericIteration
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Steve Chervitz <sac@bioperl.org>
8 # Copyright Steve Chervitz
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 # TODO: Consider calling this BlastIteration (strongly) and maybe simplifying IterationI.
16 =head1 NAME
18 Bio::Search::Iteration::GenericIteration - A generic implementation of the Bio::Search::Iteration::IterationI interface.
20 =head1 SYNOPSIS
22 use Bio::Search::Iteration::GenericIteration;
23 my $it = Bio::Search::GenericIteration->new(
24 -number => 1,
25 -converged => 0,
26 -newhits_unclassified => [@newhits_unclass],
27 -newhits_below => [@newhits_below_threshold],
28 -newhits_not_below => [@newhits_not_below_threshold],
29 -oldhits_below => [@oldhits_below_threshold],
30 -oldhits_newly_below => [@oldhits_newly_below_threshold],
31 -oldhits_not_below => [@oldhits_not_below_threshold],
34 # TODO: Describe how to configure a SearchIO stream so that it generates
35 # GenericIteration objects.
38 =head1 DESCRIPTION
40 This module acts as a container for Bio::Search::Hit::HitI objects,
41 allowing a Search::Result::ResultI object to partition its hits based
42 on which iteration the hit occurred in (e.g., a PSI-BLAST round).
44 Unless you're writing a parser, you won't ever need to create a
45 GenericIteration or any other IterationI-implementing object. If you use
46 the SearchIO system, IterationI objects are created automatically from
47 a SearchIO stream which returns Bio::Search::Result::ResultI objects
48 and you get the IterationI objects via the ResultI API.
50 For documentation on what you can do with GenericIteration (and other IterationI
51 objects), please see the API documentation in
52 L<Bio::Search::Iteration::IterationI|Bio::Search::Iteration::IterationI>.
54 Bio::Search::Iteration::GenericIteration is similar in spirit to the deprecated
55 Bio::Tools::BPlite::Iteration modules in bioperl releases prior to 1.6, except
56 that Bio::Search::Iteration::GenericIteration is a pure container, without any
57 parsing functionality as is in Bio::Tools::BPlite::Iteration.
59 =head1 FEEDBACK
61 =head2 Mailing Lists
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to
65 the Bioperl mailing list. Your participation is much appreciated.
67 bioperl-l@bioperl.org - General discussion
68 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
70 =head2 Support
72 Please direct usage questions or support issues to the mailing list:
74 I<bioperl-l@bioperl.org>
76 rather than to the module maintainer directly. Many experienced and
77 reponsive experts will be able look at the problem and quickly
78 address it. Please include a thorough description of the problem
79 with code and data examples if at all possible.
81 =head2 Reporting Bugs
83 Report bugs to the Bioperl bug tracking system to help us keep track
84 of the bugs and their resolution. Bug reports can be submitted via the
85 web:
87 https://github.com/bioperl/bioperl-live/issues
89 =head1 AUTHOR - Steve Chervitz
91 Email sac@bioperl.org
93 =head1 APPENDIX
95 The rest of the documentation details each of the object methods.
96 Internal methods are usually preceded with a _
98 =cut
101 # Let the code begin...
104 package Bio::Search::Iteration::GenericIteration;
105 use strict;
108 use base qw(Bio::Root::Root Bio::Search::Iteration::IterationI);
110 =head2 new
112 Title : new
113 Usage : my $obj = Bio::Search::Iteration->new(%args);
114 Function: Builds a new Bio::Search::Iteration object
115 Returns : Bio::Search::Iteration::GenericIteration object
116 Args : -number => integer for the number of this iteration (required)
117 -converged => boolean value whether or not the iteration converged
118 -newhits_unclassified => array reference to hits that were not found
119 in a previous iteration for the iteration and have not been
120 classified with regard to the inclusion threshold
122 # The following are only used for PSI-BLAST reports:
124 -newhits_below => array reference to hits were not found in a
125 previous iteration and are below the inclusion threshold.
126 -newhits_not_below => array reference to hits that were not found in a
127 previous iteration below threshold that and are not below
128 the inclusion threshold threshold.
129 -oldhits_below => array reference to hits that were found
130 in a previous iteration below inclusion threshold and are
131 still below threshold in the current iteration.
132 -oldhits_newly_below => array reference to hits that were found
133 in a previous iteration above threshold but are below
134 threshold in the current iteration.
135 -oldhits_not_below => array reference to hits that were found in a
136 previous iteration above threshold that and are still above
137 the inclusion threshold threshold.
139 -hit_factory => Bio::Factory::ObjectFactoryI capable of making
140 Bio::Search::Hit::HitI objects
142 =cut
144 sub new {
145 my($class,@args) = @_;
147 my $self = $class->SUPER::new(@args);
148 my ($number, $newhits_unclassified, $newhits_below, $newhits_not_below,
149 $oldhits_below, $oldhits_newly_below, $oldhits_not_below, $converged,
150 $h_f) =
151 $self->_rearrange([qw(NUMBER
152 NEWHITS_UNCLASSIFIED
153 NEWHITS_BELOW
154 NEWHITS_NOT_BELOW
155 OLDHITS_BELOW
156 OLDHITS_NEWLY_BELOW
157 OLDHITS_NOT_BELOW
158 CONVERGED
159 HIT_FACTORY
160 )], @args);
162 if( ! defined $number ) {
163 $self->throw(-class=>'Bio::Root::BadParameter',
164 -text=>"Iteration number not specified.");
165 } else {
166 $self->number($number);
169 defined $converged && $self->converged($converged);
171 # TODO: Performance optimization test calling add_hit() vs. simple assignment:
172 # push @{$self->{'_hits_new'}}, @{$newhits};
173 # vs.
174 # foreach(@{$newhits_below}) {$self->add_hit(-hit=>$_, -old=>0, -below=>1);}
176 if(defined $newhits_unclassified ) {
177 if( ref($newhits_unclassified) =~ /ARRAY/i) {
178 push @{$self->{'_newhits_unclassified'}}, @{$newhits_unclassified};
179 } else {
180 $self->throw(-class=>'Bio::Root::BadParameter',
181 -text=>"Parameter NEWHITS is not an array ref: $newhits_unclassified");
183 } else {
184 $self->{'_newhits_unclassified'} = [];
187 if(defined $newhits_below ) {
188 if( ref($newhits_below) =~ /ARRAY/i) {
189 push @{$self->{'_newhits_below_threshold'}}, @{$newhits_below};
190 } else {
191 $self->throw(-class=>'Bio::Root::BadParameter',
192 -text=>"Parameter NEWHITS_BELOW is not an array ref: $newhits_below");
194 } else {
195 $self->{'_newhits_below_threshold'} = [];
198 if(defined $newhits_not_below ) {
199 if( ref($newhits_not_below) =~ /ARRAY/i) {
200 push @{$self->{'_newhits_not_below_threshold'}}, @{$newhits_not_below};
201 } else {
202 $self->throw(-class=>'Bio::Root::BadParameter',
203 -text=>"Parameter NEWHITS_NOT_BELOW is not an array ref: $newhits_not_below");
205 } else {
206 $self->{'_newhits_not_below_threshold'} = [];
209 if(defined $oldhits_below ) {
210 if( ref($oldhits_below) =~ /ARRAY/i) {
211 push @{$self->{'_oldhits_below_threshold'}}, @{$oldhits_below};
212 } else {
213 $self->throw(-class=>'Bio::Root::BadParameter',
214 -text=>"Parameter OLDHITS_BELOW is not an array ref: $oldhits_below");
216 } else {
217 $self->{'_oldhits_below_threshold'} = [];
220 if(defined $oldhits_newly_below ) {
221 if( ref($oldhits_newly_below) =~ /ARRAY/i) {
222 push @{$self->{'_oldhits_newly_below_threshold'}}, @{$oldhits_newly_below};
223 } else {
224 $self->throw(-class=>'Bio::Root::BadParameter',
225 -text=>"Parameter OLDHITS_NEWLY_BELOW is not an array ref: $oldhits_newly_below");
227 } else {
228 $self->{'_oldhits_newly_below_threshold'} = [];
231 if(defined $oldhits_not_below ) {
232 if( ref($oldhits_not_below) =~ /ARRAY/i) {
233 push @{$self->{'_oldhits_not_below_threshold'}}, @{$oldhits_not_below};
234 } else {
235 $self->throw(-class=>'Bio::Root::BadParameter',
236 -text=>"Parameter OLDHITS_NOT_BELOW is not an array ref: $oldhits_not_below");
238 } else {
239 $self->{'_oldhits_not_below_threshold'} = [];
242 $self->hit_factory($h_f) if $h_f;
244 return $self;
248 =head2 number
250 See documentation in Bio::Search::Iteration::IterationI.
252 =cut
254 sub number {
255 my ($self,$value) = @_;
256 my $previous = $self->{'_number'};
257 if( defined $value || ! defined $previous ) {
258 $value = $previous = '' unless defined $value;
259 $self->{'_number'} = $value;
261 return $previous;
264 =head2 converged
266 See documentation in Bio::Search::Iteration::IterationI.
268 =cut
270 sub converged {
271 my ($self,$value) = @_;
272 my $previous = $self->{'_converged'};
273 if( defined $value || ! defined $previous ) {
274 $value = $previous = '' unless defined $value;
275 $self->{'_converged'} = $value;
277 return $previous;
281 =head2 hit_factory
283 Title : hit_factory
284 Usage : $hit->hit_factory($hit_factory)
285 Function: Get/set the factory used to build HitI objects if necessary.
286 Returns : Bio::Factory::ObjectFactoryI
287 Args : Bio::Factory::ObjectFactoryI
289 =cut
291 sub hit_factory {
292 my $self = shift;
293 if (@_) { $self->{_hit_factory} = shift }
294 return $self->{_hit_factory} || return;
297 =head2 next_hit
299 This iterates through all old hits as returned by L<oldhits>
300 followed by all new hits as returned by L<newhits>.
302 For more documentation see L<Bio::Search::Iteration::IterationI::next_hit()|Bio::Search::Iteration::IterationI>.
304 =cut
306 sub next_hit {
307 my ($self) = @_;
309 unless($self->{'_hit_queue_started'}) {
310 $self->{'_hit_queue'} = ( [$self->oldhits(), $self->newhits()] );
311 $self->{'_hit_queue_started'} = 1;
313 return shift @{$self->{'_hit_queue'}};
316 =head2 next_hit_new
318 See documentation in L<Bio::Search::Iteration::IterationI::next_hit_new()|Bio::Search::Iteration::IterationI>.
320 =cut
322 sub next_hit_new {
323 my ($self) = @_;
325 unless($self->{'_hit_queue_new_started'}) {
326 $self->{'_hit_queue_new'} = [$self->newhits()];
327 $self->{'_hit_queue_new_started'} = 1;
329 return shift @{$self->{'_hit_queue_new'}};
332 =head2 next_hit_old
334 See documentation in L<Bio::Search::Iteration::IterationI::next_hit_old()|Bio::Search::Iteration::IterationI>.
336 =cut
338 sub next_hit_old {
339 my ($self,$found_again) = @_;
341 unless($self->{'_hit_queue_old_started'}) {
342 $self->{'_hit_queue_old'} = [$self->oldhits()];
343 $self->{'_hit_queue_old_started'} = 1;
345 return shift @{$self->{'_hit_queue_old'}};
348 =head2 rewind
350 Title : rewind
351 Usage : $iteration->rewind;
352 Function: Allow one to reset the Hit iterators to the beginning
353 Since this is an in-memory implementation
354 Returns : none
355 Args : none
357 =cut
359 sub rewind {
360 my $self = shift;
361 $self->{'_hit_queue_started'} = 0;
362 $self->{'_hit_queue_new_started'} = 0;
363 $self->{'_hit_queue_old_started'} = 0;
364 foreach ($self->hits) {
365 $_->rewind;
370 =head2 num_hits
372 See documentation in L<Bio::Search::Iteration::IterationI::num_hits()|Bio::Search::Iteration::IterationI>.
374 =cut
376 sub num_hits {
377 my $self = shift;
379 return $self->num_hits_old + $self->num_hits_new;
382 =head2 num_hits_new
384 See documentation in L<Bio::Search::Iteration::IterationI::num_hits_new()|Bio::Search::Iteration::IterationI>.
386 =cut
388 sub num_hits_new {
389 my $self = shift;
391 return scalar $self->newhits();
394 =head2 num_hits_old
396 See documentation in L<Bio::Search::Iteration::IterationI::num_hits_old()|Bio::Search::Iteration::IterationI>.
398 =cut
400 sub num_hits_old {
401 my ($self,$found_again) = @_;
403 return scalar $self->oldhits();
406 =head2 add_hit
408 See documentation in L<Bio::Search::Iteration::IterationI::add_hit()|Bio::Search::Iteration::IterationI>.
410 =cut
412 sub add_hit {
413 my ($self,@args) = @_;
414 my( $hit, $old, $below, $newly_below ) =
415 $self->_rearrange([qw(HIT
417 BELOW_THRESHOLD
418 NEWLY_BELOW
419 )], @args);
420 my $count = 0;
422 unless( ref($hit) eq 'HASH' || $hit->isa('Bio::Search::Hit::HitI') ) {
423 $self->throw(-class=>'Bio::Root::BadParameter',
424 -text=>"Passed in " .ref($hit).
425 " as a Hit which is not a Bio::Search::Hit::HitI.");
428 if($old) {
429 if ($newly_below) {
430 push @{$self->{'_oldhits_newly_below_threshold'}}, $hit;
431 $count = scalar @{$self->{'_oldhits_newly_below_threshold'}};
432 } elsif ($below) {
433 push @{$self->{'_oldhits_below_threshold'}}, $hit;
434 $count = scalar @{$self->{'_oldhits_below_threshold'}};
435 } else {
436 push @{$self->{'_oldhits_not_below_threshold'}}, $hit;
437 $count = scalar @{$self->{'_oldhits_not_below_threshold'}};
439 } elsif (defined $old) {
440 # -old is defined but false, so this is a new PSI-BLAST hit
441 if ($below) {
442 push @{$self->{'_newhits_below_threshold'}}, $hit;
443 $count = scalar @{$self->{'_newhits_below_threshold'}};
444 } elsif (defined $below) {
445 push @{$self->{'_newhits_not_below_threshold'}}, $hit;
446 $count = scalar @{$self->{'_newhits_not_below_threshold'}};
447 } else {
448 # -below not defined, PSI-BLAST threshold may not be known
449 push @{$self->{'_newhits_unclassified'}}, $hit;
450 $count = scalar @{$self->{'_newhits_unclassified'}};
452 } else {
453 # -old not defined, so it's non-PSI-BLAST
454 push @{$self->{'_newhits_unclassified'}}, $hit;
455 $count = scalar @{$self->{'_newhits_unclassified'}};
457 return $count;
460 =head2 hits
462 See Documentation in InterfaceI.
464 =cut
466 sub hits {
467 my $self = shift;
468 # print STDERR "Called GenericIteration::hits()\n";
469 my @new = $self->newhits;
470 my @old = $self->oldhits;
471 return ( @new, @old );
474 =head2 newhits
476 Returns a list containing all newhits in this order:
478 newhits_below_threshold
479 newhits_not_below_threshold
480 newhits_unclassified
482 See more documentation in InterfaceI.
484 =cut
486 sub newhits {
487 my $self = shift;
488 my @hits = $self->newhits_below_threshold;
489 push @hits, $self->newhits_not_below_threshold;
490 push @hits, $self->newhits_unclassified;
491 return @hits;
494 =head2 newhits_below_threshold
496 See documentation in L<Bio::Search::Iteration::IterationI::newhits_below_threshold()|Bio::Search::Iteration::IterationI>.
498 =cut
500 sub newhits_below_threshold {
501 my $self = shift;
502 if (ref $self->{'_newhits_below_threshold'} ) {
503 my $factory = $self->hit_factory || return @{$self->{'_newhits_below_threshold'}};
504 for (0..$#{$self->{'_newhits_below_threshold'}}) {
505 ref(${$self->{'_newhits_below_threshold'}}[$_]) eq 'HASH' || next;
506 ${$self->{'_newhits_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_newhits_below_threshold'}}[$_]});
508 return @{$self->{'_newhits_below_threshold'}};
510 return;
513 =head2 newhits_not_below_threshold
515 See documentation in L<Bio::Search::Iteration::IterationI::newhits_not_below_threshold()|Bio::Search::Iteration::IterationI>.
517 =cut
519 sub newhits_not_below_threshold {
520 my $self = shift;
521 if (ref $self->{'_newhits_not_below_threshold'} ) {
522 my $factory = $self->hit_factory || return @{$self->{'_newhits_not_below_threshold'}};
523 for (0..$#{$self->{'_newhits_not_below_threshold'}}) {
524 ref(${$self->{'_newhits_not_below_threshold'}}[$_]) eq 'HASH' || next;
525 ${$self->{'_newhits_not_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_newhits_not_below_threshold'}}[$_]});
527 return @{$self->{'_newhits_not_below_threshold'}};
529 return;
532 =head2 newhits_unclassified
534 Title : newhits_unclassified
535 Usage : foreach( $iteration->hits_unclassified ) {...}
536 Function: Gets all newhits that have not been partitioned into
537 sets relative to the inclusion threshold.
538 Returns : Array of Bio::Search::Hit::HitI objects.
539 Args : none
541 =cut
543 sub newhits_unclassified {
544 my $self = shift;
545 if (ref $self->{'_newhits_unclassified'} ) {
546 my $factory = $self->hit_factory || return @{$self->{'_newhits_unclassified'}};
547 for (0..$#{$self->{'_newhits_unclassified'}}) {
548 ref(${$self->{'_newhits_unclassified'}}[$_]) eq 'HASH' || next;
549 ${$self->{'_newhits_unclassified'}}[$_] = $factory->create_object(%{${$self->{'_newhits_unclassified'}}[$_]});
551 return @{$self->{'_newhits_unclassified'}};
553 return;
556 =head2 oldhits
558 Returns a list containing all oldhits in this order:
560 oldhits_below_threshold
561 oldhits_newly_below_threshold
562 oldhits_not_below_threshold
564 See more documentation in InterfaceI.
566 =cut
568 sub oldhits {
569 my $self = shift;
570 my @hits = $self->oldhits_below_threshold;
571 push @hits, $self->oldhits_newly_below_threshold;
572 push @hits, $self->oldhits_not_below_threshold;
573 return @hits;
576 =head2 oldhits_below_threshold
578 See documentation in L<Bio::Search::Iteration::IterationI::oldhits_below_threshold()|Bio::Search::Iteration::IterationI>.
580 =cut
582 sub oldhits_below_threshold {
583 my $self = shift;
584 if (ref $self->{'_oldhits_below_threshold'} ) {
585 my $factory = $self->hit_factory || return @{$self->{'_oldhits_below_threshold'}};
586 for (0..$#{$self->{'_oldhits_below_threshold'}}) {
587 ref(${$self->{'_oldhits_below_threshold'}}[$_]) eq 'HASH' || next;
588 ${$self->{'_oldhits_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_below_threshold'}}[$_]});
590 return @{$self->{'_oldhits_below_threshold'}};
592 return;
595 =head2 oldhits_newly_below_threshold
597 See documentation in L<Bio::Search::Iteration::IterationI::oldhits_newly_below_threshold()|Bio::Search::Iteration::IterationI>.
599 =cut
601 sub oldhits_newly_below_threshold {
602 my $self = shift;
603 if (ref $self->{'_oldhits_newly_below_threshold'} ) {
604 my $factory = $self->hit_factory || return @{$self->{'_oldhits_newly_below_threshold'}};
605 for (0..$#{$self->{'_oldhits_newly_below_threshold'}}) {
606 ref(${$self->{'_oldhits_newly_below_threshold'}}[$_]) eq 'HASH' || next;
607 ${$self->{'_oldhits_newly_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_newly_below_threshold'}}[$_]});
609 return @{$self->{'_oldhits_newly_below_threshold'}};
611 return;
614 =head2 oldhits_not_below_threshold
616 See documentation in L<Bio::Search::Iteration::IterationI::oldhits_not_below_threshold()|Bio::Search::Iteration::IterationI>.
618 =cut
620 sub oldhits_not_below_threshold {
621 my $self = shift;
622 if (ref $self->{'_oldhits_not_below_threshold'} ) {
623 my $factory = $self->hit_factory || return @{$self->{'_oldhits_not_below_threshold'}};
624 for (0..$#{$self->{'_oldhits_not_below_threshold'}}) {
625 ref(${$self->{'_oldhits_not_below_threshold'}}[$_]) eq 'HASH' || next;
626 ${$self->{'_oldhits_not_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_not_below_threshold'}}[$_]});
628 return @{$self->{'_oldhits_not_below_threshold'}};
630 return;
633 =head2 hits_below_threshold
635 See documentation in L<Bio::Search::Iteration::IterationI::hits_below_threshold()|Bio::Search::Iteration::IterationI>.
637 =cut
639 sub hits_below_threshold {
640 my $self = shift;
641 my @hits = $self->newhits_below_threshold;
642 push @hits, $self->oldhits_newly_below_threshold;
643 return @hits;
646 =head2 get_hit
648 See documentation in L<Bio::Search::Iteration::IterationI::get_hit()|Bio::Search::Iteration::IterationI>.
650 To free up the memory used by the get_hit() functionality, call free_hit_lookup().
652 This functionality might be useful at the Result level, too.
653 BlastResult::get_hit() would return a list of HitI objects for hits
654 that occur in multiple iterations.
656 =cut
658 sub get_hit {
659 my ($self,$name) = @_;
660 $self->_create_hit_lookup() unless defined $self->{'_hit_lookup'};
662 return $self->{'_hit_lookup'}->{"\U$name"};
665 # Internal method.
666 sub _create_hit_lookup {
667 my $self = shift;
668 foreach ($self->hits) {
669 my $hname = $_->name;
670 $self->{'_hit_lookup'}->{"\U$hname"} = $_;
674 =head2 free_hit_lookup
676 Purpose : Frees up the memory used by the get_hit() functionality.
677 For the memory-conscious.
679 =cut
681 sub free_hit_lookup {
682 my $self = shift;
683 undef $self->{'_hit_lookup'};