sync trunk with branch
[bioperl-live.git] / Bio / Search / Iteration / GenericIteration.pm
blob77481e334ac7825f0cbdab3571d996bea94583d4
1 # $Id$
3 # BioPerl module for Bio::Search::Iteration::GenericIteration
5 # Cared for by Steve Chervitz <sac@bioperl.org>
7 # Copyright Steve Chervitz
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 # TODO: Consider calling this BlastIteration (strongly) and maybe simplifying IterationI.
15 =head1 NAME
17 Bio::Search::Iteration::GenericIteration - A generic implementation of the Bio::Search::Iteration::IterationI interface.
19 =head1 SYNOPSIS
21 use Bio::Search::Iteration::GenericIteration;
22 my $it = Bio::Search::GenericIteration->new(
23 -number => 1,
24 -converged => 0,
25 -newhits_unclassified => [@newhits_unclass],
26 -newhits_below => [@newhits_below_threshold],
27 -newhits_not_below => [@newhits_not_below_threshold],
28 -oldhits_below => [@oldhits_below_threshold],
29 -oldhits_newly_below => [@oldhits_newly_below_threshold],
30 -oldhits_not_below => [@oldhits_not_below_threshold],
33 # TODO: Describe how to configure a SearchIO stream so that it generates
34 # GenericIteration objects.
37 =head1 DESCRIPTION
39 This module acts as a container for Bio::Search::Hit::HitI objects,
40 allowing a Search::Result::ResultI object to partition its hits based
41 on which iteration the hit occurred in (e.g., a PSI-BLAST round).
43 Unless you're writing a parser, you won't ever need to create a
44 GenericIteration or any other IterationI-implementing object. If you use
45 the SearchIO system, IterationI objects are created automatically from
46 a SearchIO stream which returns Bio::Search::Result::ResultI objects
47 and you get the IterationI objects via the ResultI API.
49 For documentation on what you can do with GenericIteration (and other IterationI
50 objects), please see the API documentation in
51 L<Bio::Search::Iteration::IterationI|Bio::Search::Iteration::IterationI>.
53 Bio::Search::Iteration::GenericIteration is similar in spirit to the deprecated
54 Bio::Tools::BPlite::Iteration modules in bioperl releases prior to 1.6, except
55 that Bio::Search::Iteration::GenericIteration is a pure container, without any
56 parsing functionality as is in Bio::Tools::BPlite::Iteration.
58 =head1 FEEDBACK
60 =head2 Mailing Lists
62 User feedback is an integral part of the evolution of this and other
63 Bioperl modules. Send your comments and suggestions preferably to
64 the Bioperl mailing list. Your participation is much appreciated.
66 bioperl-l@bioperl.org - General discussion
67 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
69 =head2 Reporting Bugs
71 Report bugs to the Bioperl bug tracking system to help us keep track
72 of the bugs and their resolution. Bug reports can be submitted via the
73 web:
75 http://bugzilla.open-bio.org/
77 =head1 AUTHOR - Steve Chervitz
79 Email sac@bioperl.org
81 =head1 APPENDIX
83 The rest of the documentation details each of the object methods.
84 Internal methods are usually preceded with a _
86 =cut
89 # Let the code begin...
92 package Bio::Search::Iteration::GenericIteration;
93 use strict;
96 use base qw(Bio::Root::Root Bio::Search::Iteration::IterationI);
98 =head2 new
100 Title : new
101 Usage : my $obj = Bio::Search::Iteration->new(%args);
102 Function: Builds a new Bio::Search::Iteration object
103 Returns : Bio::Search::Iteration::GenericIteration object
104 Args : -number => integer for the number of this iteration (required)
105 -converged => boolean value whether or not the iteration converged
106 -newhits_unclassified => array reference to hits that were not found
107 in a previous iteration for the iteration and have not been
108 classified with regard to the inclusion threshold
110 # The following are only used for PSI-BLAST reports:
112 -newhits_below => array reference to hits were not found in a
113 previous iteration and are below the inclusion threshold.
114 -newhits_not_below => array reference to hits that were not found in a
115 previous iteration below threshold that and are not below
116 the inclusion threshold threshold.
117 -oldhits_below => array reference to hits that were found
118 in a previous iteration below inclusion threshold and are
119 still below threshold in the current iteration.
120 -oldhits_newly_below => array reference to hits that were found
121 in a previous iteration above threshold but are below
122 threshold in the current iteration.
123 -oldhits_not_below => array reference to hits that were found in a
124 previous iteration above threshold that and are still above
125 the inclusion threshold threshold.
127 -hit_factory => Bio::Factory::ObjectFactoryI capable of making
128 Bio::Search::Hit::HitI objects
130 =cut
132 sub new {
133 my($class,@args) = @_;
135 my $self = $class->SUPER::new(@args);
136 my ($number, $newhits_unclassified, $newhits_below, $newhits_not_below,
137 $oldhits_below, $oldhits_newly_below, $oldhits_not_below, $converged,
138 $h_f) =
139 $self->_rearrange([qw(NUMBER
140 NEWHITS_UNCLASSIFIED
141 NEWHITS_BELOW
142 NEWHITS_NOT_BELOW
143 OLDHITS_BELOW
144 OLDHITS_NEWLY_BELOW
145 OLDHITS_NOT_BELOW
146 CONVERGED
147 HIT_FACTORY
148 )], @args);
150 if( ! defined $number ) {
151 $self->throw(-class=>'Bio::Root::BadParameter',
152 -text=>"Iteration number not specified.");
153 } else {
154 $self->number($number);
157 defined $converged && $self->converged($converged);
159 # TODO: Performance optimization test calling add_hit() vs. simple assignment:
160 # push @{$self->{'_hits_new'}}, @{$newhits};
161 # vs.
162 # foreach(@{$newhits_below}) {$self->add_hit(-hit=>$_, -old=>0, -below=>1);}
164 if(defined $newhits_unclassified ) {
165 if( ref($newhits_unclassified) =~ /ARRAY/i) {
166 push @{$self->{'_newhits_unclassified'}}, @{$newhits_unclassified};
167 } else {
168 $self->throw(-class=>'Bio::Root::BadParameter',
169 -text=>"Parameter NEWHITS is not an array ref: $newhits_unclassified");
171 } else {
172 $self->{'_newhits_unclassified'} = [];
175 if(defined $newhits_below ) {
176 if( ref($newhits_below) =~ /ARRAY/i) {
177 push @{$self->{'_newhits_below_threshold'}}, @{$newhits_below};
178 } else {
179 $self->throw(-class=>'Bio::Root::BadParameter',
180 -text=>"Parameter NEWHITS_BELOW is not an array ref: $newhits_below");
182 } else {
183 $self->{'_newhits_below_threshold'} = [];
186 if(defined $newhits_not_below ) {
187 if( ref($newhits_not_below) =~ /ARRAY/i) {
188 push @{$self->{'_newhits_not_below_threshold'}}, @{$newhits_not_below};
189 } else {
190 $self->throw(-class=>'Bio::Root::BadParameter',
191 -text=>"Parameter NEWHITS_NOT_BELOW is not an array ref: $newhits_not_below");
193 } else {
194 $self->{'_newhits_not_below_threshold'} = [];
197 if(defined $oldhits_below ) {
198 if( ref($oldhits_below) =~ /ARRAY/i) {
199 push @{$self->{'_oldhits_below_threshold'}}, @{$oldhits_below};
200 } else {
201 $self->throw(-class=>'Bio::Root::BadParameter',
202 -text=>"Parameter OLDHITS_BELOW is not an array ref: $oldhits_below");
204 } else {
205 $self->{'_oldhits_below_threshold'} = [];
208 if(defined $oldhits_newly_below ) {
209 if( ref($oldhits_newly_below) =~ /ARRAY/i) {
210 push @{$self->{'_oldhits_newly_below_threshold'}}, @{$oldhits_newly_below};
211 } else {
212 $self->throw(-class=>'Bio::Root::BadParameter',
213 -text=>"Parameter OLDHITS_NEWLY_BELOW is not an array ref: $oldhits_newly_below");
215 } else {
216 $self->{'_oldhits_newly_below_threshold'} = [];
219 if(defined $oldhits_not_below ) {
220 if( ref($oldhits_not_below) =~ /ARRAY/i) {
221 push @{$self->{'_oldhits_not_below_threshold'}}, @{$oldhits_not_below};
222 } else {
223 $self->throw(-class=>'Bio::Root::BadParameter',
224 -text=>"Parameter OLDHITS_NOT_BELOW is not an array ref: $oldhits_not_below");
226 } else {
227 $self->{'_oldhits_not_below_threshold'} = [];
230 $self->hit_factory($h_f) if $h_f;
232 return $self;
236 =head2 number
238 See documentation in Bio::Search::Iteration::IterationI.
240 =cut
242 sub number {
243 my ($self,$value) = @_;
244 my $previous = $self->{'_number'};
245 if( defined $value || ! defined $previous ) {
246 $value = $previous = '' unless defined $value;
247 $self->{'_number'} = $value;
249 return $previous;
252 =head2 converged
254 See documentation in Bio::Search::Iteration::IterationI.
256 =cut
258 sub converged {
259 my ($self,$value) = @_;
260 my $previous = $self->{'_converged'};
261 if( defined $value || ! defined $previous ) {
262 $value = $previous = '' unless defined $value;
263 $self->{'_converged'} = $value;
265 return $previous;
269 =head2 hit_factory
271 Title : hit_factory
272 Usage : $hit->hit_factory($hit_factory)
273 Function: Get/set the factory used to build HitI objects if necessary.
274 Returns : Bio::Factory::ObjectFactoryI
275 Args : Bio::Factory::ObjectFactoryI
277 =cut
279 sub hit_factory {
280 my $self = shift;
281 if (@_) { $self->{_hit_factory} = shift }
282 return $self->{_hit_factory} || return;
285 =head2 next_hit
287 This iterates through all old hits as returned by L<oldhits>
288 followed by all new hits as returned by L<newhits>.
290 For more documentation see L<Bio::Search::Iteration::IterationI::next_hit()|Bio::Search::Iteration::IterationI>.
292 =cut
294 sub next_hit {
295 my ($self) = @_;
297 unless($self->{'_hit_queue_started'}) {
298 $self->{'_hit_queue'} = ( [$self->oldhits(), $self->newhits()] );
299 $self->{'_hit_queue_started'} = 1;
301 return shift @{$self->{'_hit_queue'}};
304 =head2 next_hit_new
306 See documentation in L<Bio::Search::Iteration::IterationI::next_hit_new()|Bio::Search::Iteration::IterationI>.
308 =cut
310 sub next_hit_new {
311 my ($self) = @_;
313 unless($self->{'_hit_queue_new_started'}) {
314 $self->{'_hit_queue_new'} = [$self->newhits()];
315 $self->{'_hit_queue_new_started'} = 1;
317 return shift @{$self->{'_hit_queue_new'}};
320 =head2 next_hit_old
322 See documentation in L<Bio::Search::Iteration::IterationI::next_hit_old()|Bio::Search::Iteration::IterationI>.
324 =cut
326 sub next_hit_old {
327 my ($self,$found_again) = @_;
329 unless($self->{'_hit_queue_old_started'}) {
330 $self->{'_hit_queue_old'} = [$self->oldhits()];
331 $self->{'_hit_queue_old_started'} = 1;
333 return shift @{$self->{'_hit_queue_old'}};
336 =head2 rewind
338 Title : rewind
339 Usage : $iteration->rewind;
340 Function: Allow one to reset the Hit iterators to the beginning
341 Since this is an in-memory implementation
342 Returns : none
343 Args : none
345 =cut
347 sub rewind {
348 my $self = shift;
349 $self->{'_hit_queue_started'} = 0;
350 $self->{'_hit_queue_new_started'} = 0;
351 $self->{'_hit_queue_old_started'} = 0;
352 foreach ($self->hits) {
353 $_->rewind;
358 =head2 num_hits
360 See documentation in L<Bio::Search::Iteration::IterationI::num_hits()|Bio::Search::Iteration::IterationI>.
362 =cut
364 sub num_hits {
365 my $self = shift;
367 return $self->num_hits_old + $self->num_hits_new;
370 =head2 num_hits_new
372 See documentation in L<Bio::Search::Iteration::IterationI::num_hits_new()|Bio::Search::Iteration::IterationI>.
374 =cut
376 sub num_hits_new {
377 my $self = shift;
379 return scalar $self->newhits();
382 =head2 num_hits_old
384 See documentation in L<Bio::Search::Iteration::IterationI::num_hits_old()|Bio::Search::Iteration::IterationI>.
386 =cut
388 sub num_hits_old {
389 my ($self,$found_again) = @_;
391 return scalar $self->oldhits();
394 =head2 add_hit
396 See documentation in L<Bio::Search::Iteration::IterationI::add_hit()|Bio::Search::Iteration::IterationI>.
398 =cut
400 sub add_hit {
401 my ($self,@args) = @_;
402 my( $hit, $old, $below, $newly_below ) =
403 $self->_rearrange([qw(HIT
405 BELOW_THRESHOLD
406 NEWLY_BELOW
407 )], @args);
408 my $count = 0;
410 unless( ref($hit) eq 'HASH' || $hit->isa('Bio::Search::Hit::HitI') ) {
411 $self->throw(-class=>'Bio::Root::BadParameter',
412 -text=>"Passed in " .ref($hit).
413 " as a Hit which is not a Bio::Search::Hit::HitI.");
416 if($old) {
417 if ($newly_below) {
418 push @{$self->{'_oldhits_newly_below_threshold'}}, $hit;
419 $count = scalar @{$self->{'_oldhits_newly_below_threshold'}};
420 } elsif ($below) {
421 push @{$self->{'_oldhits_below_threshold'}}, $hit;
422 $count = scalar @{$self->{'_oldhits_below_threshold'}};
423 } else {
424 push @{$self->{'_oldhits_not_below_threshold'}}, $hit;
425 $count = scalar @{$self->{'_oldhits_not_below_threshold'}};
427 } elsif (defined $old) {
428 # -old is defined but false, so this is a new PSI-BLAST hit
429 if ($below) {
430 push @{$self->{'_newhits_below_threshold'}}, $hit;
431 $count = scalar @{$self->{'_newhits_below_threshold'}};
432 } elsif (defined $below) {
433 push @{$self->{'_newhits_not_below_threshold'}}, $hit;
434 $count = scalar @{$self->{'_newhits_not_below_threshold'}};
435 } else {
436 # -below not defined, PSI-BLAST threshold may not be known
437 push @{$self->{'_newhits_unclassified'}}, $hit;
438 $count = scalar @{$self->{'_newhits_unclassified'}};
440 } else {
441 # -old not defined, so it's non-PSI-BLAST
442 push @{$self->{'_newhits_unclassified'}}, $hit;
443 $count = scalar @{$self->{'_newhits_unclassified'}};
445 return $count;
448 =head2 hits
450 See Documentation in InterfaceI.
452 =cut
454 sub hits {
455 my $self = shift;
456 # print STDERR "Called GenericIteration::hits()\n";
457 my @new = $self->newhits;
458 my @old = $self->oldhits;
459 return ( @new, @old );
462 =head2 newhits
464 Returns a list containing all newhits in this order:
466 newhits_below_threshold
467 newhits_not_below_threshold
468 newhits_unclassified
470 See more documentation in InterfaceI.
472 =cut
474 sub newhits {
475 my $self = shift;
476 my @hits = $self->newhits_below_threshold;
477 push @hits, $self->newhits_not_below_threshold;
478 push @hits, $self->newhits_unclassified;
479 return @hits;
482 =head2 newhits_below_threshold
484 See documentation in L<Bio::Search::Iteration::IterationI::newhits_below_threshold()|Bio::Search::Iteration::IterationI>.
486 =cut
488 sub newhits_below_threshold {
489 my $self = shift;
490 if (ref $self->{'_newhits_below_threshold'} ) {
491 my $factory = $self->hit_factory || return @{$self->{'_newhits_below_threshold'}};
492 for (0..$#{$self->{'_newhits_below_threshold'}}) {
493 ref(${$self->{'_newhits_below_threshold'}}[$_]) eq 'HASH' || next;
494 ${$self->{'_newhits_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_newhits_below_threshold'}}[$_]});
496 return @{$self->{'_newhits_below_threshold'}};
498 return;
501 =head2 newhits_not_below_threshold
503 See documentation in L<Bio::Search::Iteration::IterationI::newhits_not_below_threshold()|Bio::Search::Iteration::IterationI>.
505 =cut
507 sub newhits_not_below_threshold {
508 my $self = shift;
509 if (ref $self->{'_newhits_not_below_threshold'} ) {
510 my $factory = $self->hit_factory || return @{$self->{'_newhits_not_below_threshold'}};
511 for (0..$#{$self->{'_newhits_not_below_threshold'}}) {
512 ref(${$self->{'_newhits_not_below_threshold'}}[$_]) eq 'HASH' || next;
513 ${$self->{'_newhits_not_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_newhits_not_below_threshold'}}[$_]});
515 return @{$self->{'_newhits_not_below_threshold'}};
517 return;
520 =head2 newhits_unclassified
522 Title : newhits_unclassified
523 Usage : foreach( $iteration->hits_unclassified ) {...}
524 Function: Gets all newhits that have not been partitioned into
525 sets relative to the inclusion threshold.
526 Returns : Array of Bio::Search::Hit::HitI objects.
527 Args : none
529 =cut
531 sub newhits_unclassified {
532 my $self = shift;
533 if (ref $self->{'_newhits_unclassified'} ) {
534 my $factory = $self->hit_factory || return @{$self->{'_newhits_unclassified'}};
535 for (0..$#{$self->{'_newhits_unclassified'}}) {
536 ref(${$self->{'_newhits_unclassified'}}[$_]) eq 'HASH' || next;
537 ${$self->{'_newhits_unclassified'}}[$_] = $factory->create_object(%{${$self->{'_newhits_unclassified'}}[$_]});
539 return @{$self->{'_newhits_unclassified'}};
541 return;
544 =head2 oldhits
546 Returns a list containing all oldhits in this order:
548 oldhits_below_threshold
549 oldhits_newly_below_threshold
550 oldhits_not_below_threshold
552 See more documentation in InterfaceI.
554 =cut
556 sub oldhits {
557 my $self = shift;
558 my @hits = $self->oldhits_below_threshold;
559 push @hits, $self->oldhits_newly_below_threshold;
560 push @hits, $self->oldhits_not_below_threshold;
561 return @hits;
564 =head2 oldhits_below_threshold
566 See documentation in L<Bio::Search::Iteration::IterationI::oldhits_below_threshold()|Bio::Search::Iteration::IterationI>.
568 =cut
570 sub oldhits_below_threshold {
571 my $self = shift;
572 if (ref $self->{'_oldhits_below_threshold'} ) {
573 my $factory = $self->hit_factory || return @{$self->{'_oldhits_below_threshold'}};
574 for (0..$#{$self->{'_oldhits_below_threshold'}}) {
575 ref(${$self->{'_oldhits_below_threshold'}}[$_]) eq 'HASH' || next;
576 ${$self->{'_oldhits_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_below_threshold'}}[$_]});
578 return @{$self->{'_oldhits_below_threshold'}};
580 return;
583 =head2 oldhits_newly_below_threshold
585 See documentation in L<Bio::Search::Iteration::IterationI::oldhits_newly_below_threshold()|Bio::Search::Iteration::IterationI>.
587 =cut
589 sub oldhits_newly_below_threshold {
590 my $self = shift;
591 if (ref $self->{'_oldhits_newly_below_threshold'} ) {
592 my $factory = $self->hit_factory || return @{$self->{'_oldhits_newly_below_threshold'}};
593 for (0..$#{$self->{'_oldhits_newly_below_threshold'}}) {
594 ref(${$self->{'_oldhits_newly_below_threshold'}}[$_]) eq 'HASH' || next;
595 ${$self->{'_oldhits_newly_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_newly_below_threshold'}}[$_]});
597 return @{$self->{'_oldhits_newly_below_threshold'}};
599 return;
602 =head2 oldhits_not_below_threshold
604 See documentation in L<Bio::Search::Iteration::IterationI::oldhits_not_below_threshold()|Bio::Search::Iteration::IterationI>.
606 =cut
608 sub oldhits_not_below_threshold {
609 my $self = shift;
610 if (ref $self->{'_oldhits_not_below_threshold'} ) {
611 my $factory = $self->hit_factory || return @{$self->{'_oldhits_not_below_threshold'}};
612 for (0..$#{$self->{'_oldhits_not_below_threshold'}}) {
613 ref(${$self->{'_oldhits_not_below_threshold'}}[$_]) eq 'HASH' || next;
614 ${$self->{'_oldhits_not_below_threshold'}}[$_] = $factory->create_object(%{${$self->{'_oldhits_not_below_threshold'}}[$_]});
616 return @{$self->{'_oldhits_not_below_threshold'}};
618 return;
621 =head2 hits_below_threshold
623 See documentation in L<Bio::Search::Iteration::IterationI::hits_below_threshold()|Bio::Search::Iteration::IterationI>.
625 =cut
627 sub hits_below_threshold {
628 my $self = shift;
629 my @hits = $self->newhits_below_threshold;
630 push @hits, $self->oldhits_newly_below_threshold;
631 return @hits;
634 =head2 get_hit
636 See documentation in L<Bio::Search::Iteration::IterationI::get_hit()|Bio::Search::Iteration::IterationI>.
638 To free up the memory used by the get_hit() functionality, call free_hit_lookup().
640 This functionality might be useful at the Result level, too.
641 BlastResult::get_hit() would return a list of HitI objects for hits
642 that occur in multiple iterations.
644 =cut
646 sub get_hit {
647 my ($self,$name) = @_;
648 $self->_create_hit_lookup() unless defined $self->{'_hit_lookup'};
650 return $self->{'_hit_lookup'}->{"\U$name"};
653 # Internal method.
654 sub _create_hit_lookup {
655 my $self = shift;
656 foreach ($self->hits) {
657 my $hname = $_->name;
658 $self->{'_hit_lookup'}->{"\U$hname"} = $_;
662 =head2 free_hit_lookup
664 Purpose : Frees up the memory used by the get_hit() functionality.
665 For the memory-conscious.
667 =cut
669 sub free_hit_lookup {
670 my $self = shift;
671 undef $self->{'_hit_lookup'};