1 #------------------------------------------------------------------
3 # BioPerl module for Bio::SearchIO::IteratedSearchResultEventBuilder
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Steve Chervitz <sac@bioperl.org> and Jason Stajich <jason@bioperl.org>
9 # Copyright Steve Chervitz
11 # You may distribute this module under the same terms as perl itself
12 #------------------------------------------------------------------
14 # POD documentation - main docs before the code
18 Bio::SearchIO::IteratedSearchResultEventBuilder - Event Handler for
23 # Do not use this object directly, this object is part of the SearchIO
24 # event based parsing system.
28 This object handles Search Events generated by the SearchIO classes
29 and build appropriate Bio::Search::* objects from them.
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to
37 the Bioperl mailing list. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44 Please direct usage questions or support issues to the mailing list:
46 I<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
55 Report bugs to the Bioperl bug tracking system to help us keep track
56 of the bugs and their resolution. Bug reports can be submitted via the
59 https://redmine.open-bio.org/projects/bioperl/
61 =head1 AUTHOR - Steve Chervitz
63 Email sac-at-bioperl.org
67 Parts of code based on SearchResultEventBuilder by Jason Stajich
70 Sendu Bala, bix@sendu.me.uk
74 The rest of the documentation details each of the object methods.
75 Internal methods are usually preceded with a _
80 # Let the code begin...
83 package Bio
::SearchIO
::IteratedSearchResultEventBuilder
;
84 use vars
qw(%KNOWNEVENTS $DEFAULT_INCLUSION_THRESHOLD
90 use Bio::Factory::ObjectFactory;
92 use base qw(Bio::SearchIO::SearchResultEventBuilder);
94 # e-value threshold for inclusion in the PSI-BLAST score matrix model (blastpgp)
95 # NOTE: Executing `blastpgp -` incorrectly reports that the default is 0.005.
96 # (version 2.2.2 [Jan-08-2002])
97 $DEFAULT_INCLUSION_THRESHOLD = 0.001;
100 $MAX_HSP_OVERLAP = 2; # Used when tiling multiple HSPs.
105 Usage : my $obj = Bio::SearchIO::IteratedSearchResultEventBuilder->new();
106 Function: Builds a new Bio::SearchIO::IteratedSearchResultEventBuilder object
107 Returns : Bio::SearchIO::IteratedSearchResultEventBuilder
108 Args : -hsp_factory => Bio::Factory::ObjectFactoryI
109 -hit_factory => Bio::Factory::ObjectFactoryI
110 -result_factory => Bio::Factory::ObjectFactoryI
111 -iteration_factory => Bio::Factory::ObjectFactoryI
112 -inclusion_threshold => e-value threshold for inclusion in the
113 PSI-BLAST score matrix model (blastpgp)
114 -signif => float or scientific notation number to be used
115 as a P- or Expect value cutoff
116 -score => integer or scientific notation number to be used
117 as a blast score value cutoff
118 -bits => integer or scientific notation number to be used
119 as a bit score value cutoff
120 -hit_filter => reference to a function to be used for
121 filtering hits based on arbitrary criteria.
124 See L<Bio::SearchIO::SearchResultEventBuilder> for more information
129 my ($class,@args) = @_;
130 my $self = $class->SUPER::new
(@args);
131 my ($hitF, $resultF, $hspF, $iterationF) =
132 $self->_rearrange([qw(
139 $self->_init_parse_params(@args);
141 # Note that we need to override the setting of result and factories here
142 # so that we can set different default factories than are set by the super class.
143 $self->register_factory('result', $resultF ||
144 Bio
::Factory
::ObjectFactory
->new(
145 -type
=> 'Bio::Search::Result::BlastResult',
146 -interface
=> 'Bio::Search::Result::ResultI'));
148 $self->register_factory('hit', $hitF ||
149 Bio
::Factory
::ObjectFactory
->new(
150 -type
=> 'Bio::Search::Hit::BlastHit',
151 -interface
=> 'Bio::Search::Hit::HitI'));
153 $self->register_factory('hsp', $hspF ||
154 Bio
::Factory
::ObjectFactory
->new(
155 -type
=> 'Bio::Search::HSP::GenericHSP',
156 -interface
=> 'Bio::Search::HSP::HSPI'));
158 # TODO: Change this to BlastIteration (maybe)
159 $self->register_factory('iteration', $iterationF ||
160 Bio
::Factory
::ObjectFactory
->new(
161 -type
=> 'Bio::Search::Iteration::GenericIteration',
162 -interface
=> 'Bio::Search::Iteration::IterationI'));
167 #Initializes parameters used during parsing of Blast reports.
168 sub _init_parse_params
{
170 my ($self, @args) = @_;
171 # -FILT_FUNC has been replaced by -HIT_FILTER.
172 # Leaving -FILT_FUNC in place for backward compatibility
173 my($ithresh, $signif, $score, $bits, $hit_filter, $filt_func) =
174 $self->_rearrange([qw(INCLUSION_THRESHOLD
175 SIGNIF SCORE BITS HIT_FILTER FILT_FUNC
178 $self->inclusion_threshold( defined($ithresh) ?
$ithresh : $DEFAULT_INCLUSION_THRESHOLD);
179 my $hit_filt = $hit_filter || $filt_func;
180 defined $hit_filter && $self->hit_filter($hit_filt);
181 defined $signif && $self->max_significance($signif);
182 defined $score && $self->min_score($score);
183 defined $bits && $self->min_bits($bits);
189 Usage : if( $handler->will_handle($event_type) ) { ... }
190 Function: Tests if this event builder knows how to process a specific event
192 Args : event type name
198 my ($self,$type) = @_;
199 # these are the events we recognize
200 return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result' || $type eq 'iteration' ||
201 $type eq 'newhits' || $type eq 'oldhits' );
211 Usage : $handler->start_result($resulttype)
212 Function: Begins a result event cycle
214 Args : Type of Report
220 #print STDERR "ISREB: start_result()\n";
221 $self->SUPER::start_result
(@_);
222 $self->{'_iterations'} = [];
223 $self->{'_iteration_count'} = 0;
224 $self->{'_old_hit_names'} = undef;
225 $self->{'_hit_names_below'} = undef;
232 Usage : my @results = $parser->end_result
233 Function: Finishes a result handler cycle
234 Returns : A Bio::Search::Result::ResultI
240 my ($self,$type,$data) = @_;
241 #print STDERR "ISREB: end_result\n";
242 ## How is runid getting set? Purpose?
243 if( defined $data->{'runid'} &&
244 $data->{'runid'} !~ /^\s+$/ ) {
246 if( $data->{'runid'} !~ /^lcl\|/) {
247 $data->{"RESULT-query_name"}= $data->{'runid'};
249 ($data->{"RESULT-query_name"},$data->{"RESULT-query_description"}) =
250 split(/\s+/,$data->{"RESULT-query_description"},2);
253 if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
254 my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
255 # this is for |123|gb|ABC1.1|
256 $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
257 $data->{"RESULT-query_accession"}= $acc;
259 delete $data->{'runid'};
261 my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); }
262 grep { /^RESULT/ } keys %{$data};
264 $args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
265 $data->{'RESULT-algorithm_name'} || $type);
267 $args{'-iterations'} = $self->{'_iterations'};
269 my $result = $self->factory('result')->create_object(%args);
270 $result->hit_factory($self->factory('hit'));
271 $self->{'_iterations'} = [];
276 # Title : _add_hit (private function for internal use only)
277 # Purpose : Applies hit filtering and calls _store_hit if it passes filtering.
278 # Argument: Bio::Search::Hit::HitI object
281 my ($self, $hit) = @_;
283 my $hit_name = uc($hit->{-name
});
284 my $hit_signif = $hit->{-significance
};
285 my $ithresh = $self->{'_inclusion_threshold'};
287 # Test significance using custom function (if supplied)
290 my $hit_filter = $self->{'_hit_filter'};
293 # since &hit_filter is out of our control and would expect a HitI object,
294 # we're forced to make one for it
295 $hit = $self->factory('hit')->create_object(%{$hit});
296 $add_hit = 0 unless &$hit_filter($hit);
298 if($self->{'_confirm_significance'}) {
299 $add_hit = 0 unless $hit_signif <= $self->{'_max_significance'};
301 if($self->{'_confirm_score'}) {
302 my $hit_score = $hit->{-score
} || $hit->{-hsps
}->[0]->{-score
};
303 $add_hit = 0 unless $hit_score >= $self->{'_min_score'};
305 if($self->{'_confirm_bits'}) {
306 my $hit_bits = $hit->{-bits
} || $hit->{-hsps
}->[0]->{-bits
};
307 $add_hit = 0 unless $hit_bits >= $self->{'_min_bits'};
311 $add_hit && $self->_store_hit($hit, $hit_name, $hit_signif);
312 # Building hit lookup hashes for determining if the hit is old/new and
313 # above/below threshold.
314 $self->{'_old_hit_names'}->{$hit_name}++;
315 $self->{'_hit_names_below'}->{$hit_name}++ if $hit_signif <= $ithresh;
318 # Title : _store_hit (private function for internal use only)
319 # Purpose : Collects hit objects into defined sets that are useful for
320 # analyzing PSI-blast results.
321 # These are ultimately added to the iteration object in end_iteration().
324 # Primary split = old vs. new
325 # Secondary split = below vs. above threshold
326 # 1. Has this hit occurred in a previous iteration?
327 # 1.1. If yes, was it below threshold?
328 # 1.1.1. If yes, ---> [oldhits_below]
329 # 1.1.2. If no, is it now below threshold?
330 # 1.1.2.1. If yes, ---> [oldhits_newly_below]
331 # 1.1.2.2. If no, ---> [oldhits_not_below]
332 # 1.2. If no, is it below threshold?
333 # 1.2.1. If yes, ---> [newhits_below]
334 # 1.2.2. If no, ---> [newhits_not_below]
335 # 1.2.3. If don't know (no inclusion threshold data), ---> [newhits_unclassified]
336 # Note: As long as there's a default inclusion threshold,
337 # there won't be an unclassified set.
339 # For the first iteration, it might be nice to detect non-PSI blast reports
340 # and put the hits in the unclassified set.
341 # However, it shouldn't matter where the hits get put for the first iteration
342 # for non-PSI blast reports since they'll get flattened out in the
343 # result and iteration search objects.
347 my ($self, $hit, $hit_name, $hit_signif) = @_;
349 my $ithresh = $self->{'_inclusion_threshold'};
351 # This is the assumption leading to Bug 1986. The assumption here is that
352 # the hit name is unique (and thus new), therefore any subsequent encounters
353 # with a hit containing the same name are filed as old hits. This isn't
354 # always true (see the bug report for a few examples). Adding an explicit
355 # check for the presence of iterations, adding to new hits otherwise.
357 if (exists $self->{'_old_hit_names'}->{$hit_name}
358 && scalar @
{$self->{_iterations
}}) {
359 if (exists $self->{'_hit_names_below'}->{$hit_name}) {
360 push @
{$self->{'_oldhits_below'}}, $hit;
361 } elsif ($hit_signif <= $ithresh) {
362 push @
{$self->{'_oldhits_newly_below'}}, $hit;
364 push @
{$self->{'_oldhits_not_below'}}, $hit;
367 if ($hit_signif <= $ithresh) {
368 push @
{$self->{'_newhits_below'}}, $hit;
370 push @
{$self->{'_newhits_not_below'}}, $hit;
373 $self->{'_hitcount'}++;
376 =head2 start_iteration
378 Title : start_iteration
379 Usage : $handler->start_iteration()
380 Function: Starts an Iteration event cycle
382 Args : type of event and associated hashref
386 sub start_iteration
{
387 my ($self,$type) = @_;
389 #print STDERR "ISREB: start_iteration()\n";
390 $self->{'_iteration_count'}++;
392 # Reset arrays for the various classes of hits.
393 # $self->{'_newhits_unclassified'} = [];
394 $self->{'_newhits_below'} = [];
395 $self->{'_newhits_not_below'} = [];
396 $self->{'_oldhits_below'} = [];
397 $self->{'_oldhits_newly_below'} = [];
398 $self->{'_oldhits_not_below'} = [];
399 $self->{'_hitcount'} = 0;
406 Title : end_iteration
407 Usage : $handler->end_iteration()
408 Function: Ends an Iteration event cycle
409 Returns : Bio::Search::Iteration object
410 Args : type of event and associated hashref
416 my ($self,$type,$data) = @_;
418 # print STDERR "ISREB: end_iteration()\n";
420 my %args = map { my $v = $data->{$_}; s/ITERATION//; ($_ => $v); }
421 grep { /^ITERATION/ } keys %{$data};
423 $args{'-number'} = $self->{'_iteration_count'};
424 $args{'-oldhits_below'} = $self->{'_oldhits_below'};
425 $args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
426 $args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
427 $args{'-newhits_below'} = $self->{'_newhits_below'};
428 $args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
429 $args{'-hit_factory'} = $self->factory('hit');
431 my $it = $self->factory('iteration')->create_object(%args);
432 push @
{$self->{'_iterations'}}, $it;
436 =head2 max_significance
438 Usage : $obj->max_significance();
439 Purpose : Set/Get the P or Expect value used as significance screening cutoff.
440 This is the value of the -signif parameter supplied to new().
441 Hits with P or E-value above this are skipped.
442 Returns : Scientific notation number with this format: 1.0e-05.
443 Argument : Number (sci notation, float, integer) (when setting)
444 Throws : Bio::Root::BadParameter exception if the supplied argument is
445 : not a valid number.
446 Comments : Screening of significant hits uses the data provided on the
447 : description line. For NCBI BLAST1 and WU-BLAST, this data
448 : is P-value. for NCBI BLAST2 it is an Expect value.
452 sub max_significance
{
456 if( $sig =~ /[^\d.e-]/ or $sig <= 0) {
457 $self->throw(-class => 'Bio::Root::BadParameter',
458 -text
=> "Invalid significance value: $sig\n".
459 "Must be a number greater than zero.",
462 $self->{'_confirm_significance'} = 1;
463 $self->{'_max_significance'} = $sig;
465 sprintf "%.1e", $self->{'_max_significance'};
471 Synonym for L<max_significance()|max_significance>
475 sub signif
{ shift->max_significance }
479 Usage : $obj->min_score();
480 Purpose : Gets the Blast score used as screening cutoff.
481 This is the value of the -score parameter supplied to new().
482 Hits with scores below this are skipped.
483 Returns : Integer (or undef if not set)
484 Argument : Integer (when setting)
485 Throws : Bio::Root::BadParameter exception if the supplied argument is
486 : not a valid number.
487 Comments : Screening of significant hits uses the data provided on the
496 if( $score =~ /[^\de+]/ or $score <= 0) {
497 $self->throw(-class => 'Bio::Root::BadParameter',
498 -text
=> "Invalid score value: $score\n".
499 "Must be an integer greater than zero.",
502 $self->{'_confirm_score'} = 1;
503 $self->{'_min_score'} = $score;
505 return $self->{'_min_score'};
511 Usage : $obj->min_bits();
512 Purpose : Gets the Blast bit score used as screening cutoff.
513 This is the value of the -bits parameter supplied to new().
514 Hits with bits score below this are skipped.
515 Returns : Integer (or undef if not set)
516 Argument : Integer (when setting)
517 Throws : Bio::Root::BadParameter exception if the supplied argument is
518 : not a valid number.
519 Comments : Screening of significant hits uses the data provided on the
528 if( $bits =~ /[^\de+]/ or $bits <= 0) {
529 $self->throw(-class => 'Bio::Root::BadParameter',
530 -text
=> "Invalid bits value: $bits\n".
531 "Must be an integer greater than zero.",
534 $self->{'_confirm_bits'} = 1;
535 $self->{'_min_bits'} = $bits;
537 return $self->{'_min_bits'};
543 Usage : $obj->hit_filter();
544 Purpose : Set/Get a function reference used for filtering out hits.
545 This is the value of the -hit_filter parameter supplied to new().
546 Hits that fail to pass the filter are skipped.
547 Returns : Function ref (or undef if not set)
548 Argument : Function ref (when setting)
549 Throws : Bio::Root::BadParameter exception if the supplied argument is
550 : not a function reference.
558 if(not ref $func eq 'CODE') {
559 $self->throw(-class=>'Bio::Root::BadParameter',
560 -text
=>"Not a function reference: $func\n".
561 "The -hit_filter parameter must be function reference.",
564 $self->{'_hit_filter'} = $func;
566 return $self->{'_hit_filter'};
569 =head2 inclusion_threshold
571 See L<Bio::SearchIO::blast::inclusion_threshold>.
575 sub inclusion_threshold
{
577 return $self->{'_inclusion_threshold'} = shift if @_;
578 return $self->{'_inclusion_threshold'};