Hmmer3: Final fix for Bug #3369. Eliminated the removal of
[bioperl-live.git] / Bio / SearchIO / SearchResultEventBuilder.pm
bloba78a7cd5cfe453c75553afeece818bb376c4aeef
2 # BioPerl module for Bio::SearchIO::SearchResultEventBuilder
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SearchIO::SearchResultEventBuilder - Event Handler for SearchIO events.
18 =head1 SYNOPSIS
20 # Do not use this object directly, this object is part of the SearchIO
21 # event based parsing system.
23 =head1 DESCRIPTION
25 This object handles Search Events generated by the SearchIO classes
26 and build appropriate Bio::Search::* objects from them.
28 =head1 FEEDBACK
30 =head2 Mailing Lists
32 User feedback is an integral part of the evolution of this and other
33 Bioperl modules. Send your comments and suggestions preferably to
34 the Bioperl mailing list. Your participation is much appreciated.
36 bioperl-l@bioperl.org - General discussion
37 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
39 =head2 Support
41 Please direct usage questions or support issues to the mailing list:
43 I<bioperl-l@bioperl.org>
45 rather than to the module maintainer directly. Many experienced and
46 reponsive experts will be able look at the problem and quickly
47 address it. Please include a thorough description of the problem
48 with code and data examples if at all possible.
50 =head2 Reporting Bugs
52 Report bugs to the Bioperl bug tracking system to help us keep track
53 of the bugs and their resolution. Bug reports can be submitted via the
54 web:
56 https://redmine.open-bio.org/projects/bioperl/
58 =head1 AUTHOR - Jason Stajich
60 Email jason-at-bioperl.org
62 =head1 CONTRIBUTORS
64 Sendu Bala, bix@sendu.me.uk
66 =head1 APPENDIX
68 The rest of the documentation details each of the object methods.
69 Internal methods are usually preceded with a _
71 =cut
74 # Let the code begin...
77 package Bio::SearchIO::SearchResultEventBuilder;
78 use vars qw(%KNOWNEVENTS);
79 use strict;
81 use Bio::Factory::ObjectFactory;
83 use base qw(Bio::Root::Root Bio::SearchIO::EventHandlerI);
85 =head2 new
87 Title : new
88 Usage : my $obj = Bio::SearchIO::SearchResultEventBuilder->new();
89 Function: Builds a new Bio::SearchIO::SearchResultEventBuilder object
90 Returns : Bio::SearchIO::SearchResultEventBuilder
91 Args : -hsp_factory => Bio::Factory::ObjectFactoryI
92 -hit_factory => Bio::Factory::ObjectFactoryI
93 -result_factory => Bio::Factory::ObjectFactoryI
95 See L<Bio::Factory::ObjectFactoryI> for more information
97 =cut
99 sub new {
100 my ($class,@args) = @_;
101 my $self = $class->SUPER::new(@args);
102 my ($hspF,$hitF,$resultF) = $self->_rearrange([qw(HSP_FACTORY
103 HIT_FACTORY
104 RESULT_FACTORY)],@args);
105 $self->register_factory('hsp', $hspF ||
106 Bio::Factory::ObjectFactory->new(
107 -type => 'Bio::Search::HSP::GenericHSP',
108 -interface => 'Bio::Search::HSP::HSPI'));
110 $self->register_factory('hit', $hitF ||
111 Bio::Factory::ObjectFactory->new(
112 -type => 'Bio::Search::Hit::GenericHit',
113 -interface => 'Bio::Search::Hit::HitI'));
115 $self->register_factory('result', $resultF ||
116 Bio::Factory::ObjectFactory->new(
117 -type => 'Bio::Search::Result::GenericResult',
118 -interface => 'Bio::Search::Result::ResultI'));
120 return $self;
123 # new comes from the superclass
125 =head2 will_handle
127 Title : will_handle
128 Usage : if( $handler->will_handle($event_type) ) { ... }
129 Function: Tests if this event builder knows how to process a specific event
130 Returns : boolean
131 Args : event type name
134 =cut
136 sub will_handle{
137 my ($self,$type) = @_;
138 # these are the events we recognize
139 return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result' );
142 =head2 SAX methods
144 =cut
146 =head2 start_result
148 Title : start_result
149 Usage : $handler->start_result($resulttype)
150 Function: Begins a result event cycle
151 Returns : none
152 Args : Type of Report
154 =cut
156 sub start_result {
157 my ($self,$type) = @_;
158 $self->{'_resulttype'} = $type;
159 $self->{'_hits'} = [];
160 $self->{'_hsps'} = [];
161 $self->{'_hitcount'} = 0;
162 return;
165 =head2 end_result
167 Title : end_result
168 Usage : my @results = $parser->end_result
169 Function: Finishes a result handler cycle
170 Returns : A Bio::Search::Result::ResultI
171 Args : none
173 =cut
175 # this is overridden by IteratedSearchResultEventBuilder
176 # so keep that in mind when debugging
178 sub end_result {
179 my ($self,$type,$data) = @_;
181 if( defined $data->{'runid'} &&
182 $data->{'runid'} !~ /^\s+$/ ) {
184 if( $data->{'runid'} !~ /^lcl\|/) {
185 $data->{"RESULT-query_name"}= $data->{'runid'};
186 } else {
187 ($data->{"RESULT-query_name"},
188 $data->{"RESULT-query_description"}) =
189 split(/\s+/,$data->{"RESULT-query_description"},2);
192 if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
193 my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
194 # this is for |123|gb|ABC1.1|
195 $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
196 $data->{"RESULT-query_accession"}= $acc;
198 delete $data->{'runid'};
200 my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); }
201 grep { /^RESULT/ } keys %{$data};
203 $args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
204 $data->{'RESULT-algorithm_name'} || $type);
205 $args{'-hits'} = $self->{'_hits'};
206 my $result = $self->factory('result')->create_object(%args);
207 $result->hit_factory($self->factory('hit'));
208 $self->{'_hits'} = [];
209 return $result;
212 =head2 start_hsp
214 Title : start_hsp
215 Usage : $handler->start_hsp($name,$data)
216 Function: Begins processing a HSP event
217 Returns : none
218 Args : type of element
219 associated data (hashref)
221 =cut
223 sub start_hsp {
224 my ($self,@args) = @_;
225 return;
228 =head2 end_hsp
230 Title : end_hsp
231 Usage : $handler->end_hsp()
232 Function: Finish processing a HSP event
233 Returns : none
234 Args : type of event and associated hashref
237 =cut
239 sub end_hsp {
240 my ($self,$type,$data) = @_;
242 if( defined $data->{'runid'} &&
243 $data->{'runid'} !~ /^\s+$/ ) {
245 if( $data->{'runid'} !~ /^lcl\|/) {
246 $data->{"RESULT-query_name"}= $data->{'runid'};
247 } else {
248 ($data->{"RESULT-query_name"},
249 $data->{"RESULT-query_description"}) =
250 split(/\s+/,$data->{"RESULT-query_description"},2);
253 if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
254 my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
255 # this is for |123|gb|ABC1.1|
256 $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
257 $data->{"RESULT-query_accession"}= $acc;
259 delete $data->{'runid'};
262 # this code is to deal with the fact that Blast XML data
263 # always has start < end and one has to infer strandedness
264 # from the frame which is a problem for the Search::HSP object
265 # which expect to be able to infer strand from the order of
266 # of the begin/end of the query and hit coordinates
267 if( defined $data->{'HSP-query_frame'} && # this is here to protect from undefs
268 (( $data->{'HSP-query_frame'} < 0 &&
269 $data->{'HSP-query_start'} < $data->{'HSP-query_end'} ) ||
270 $data->{'HSP-query_frame'} > 0 &&
271 ( $data->{'HSP-query_start'} > $data->{'HSP-query_end'} ) )
274 # swap
275 ($data->{'HSP-query_start'},
276 $data->{'HSP-query_end'}) = ($data->{'HSP-query_end'},
277 $data->{'HSP-query_start'});
279 if( defined $data->{'HSP-hit_frame'} && # this is here to protect from undefs
280 ((defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} < 0 &&
281 $data->{'HSP-hit_start'} < $data->{'HSP-hit_end'} ) ||
282 defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} > 0 &&
283 ( $data->{'HSP-hit_start'} > $data->{'HSP-hit_end'} ) )
286 # swap
287 ($data->{'HSP-hit_start'},
288 $data->{'HSP-hit_end'}) = ($data->{'HSP-hit_end'},
289 $data->{'HSP-hit_start'});
291 $data->{'HSP-query_frame'} ||= 0;
292 $data->{'HSP-hit_frame'} ||= 0;
293 # handle Blast 2.1.2 which did not support data member: hsp_align-len
294 $data->{'HSP-query_length'} ||= $data->{'RESULT-query_length'};
295 $data->{'HSP-query_length'} ||= length ($data->{'HSP-query_seq'} || '');
296 $data->{'HSP-hit_length'} ||= $data->{'HIT-length'};
297 $data->{'HSP-hit_length'} ||= length ($data->{'HSP-hit_seq'} || '');
299 $data->{'HSP-hsp_length'} ||= length ($data->{'HSP-homology_seq'} || '');
301 my %args = map { my $v = $data->{$_}; s/HSP//; ($_ => $v) }
302 grep { /^HSP/ } keys %{$data};
304 $args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
305 $data->{'RESULT-algorithm_name'} || $type);
306 # copy this over from result
307 $args{'-query_name'} = $data->{'RESULT-query_name'};
308 $args{'-hit_name'} = $data->{'HIT-name'};
309 my ($rank) = scalar @{$self->{'_hsps'} || []} + 1;
310 $args{'-rank'} = $rank;
312 $args{'-hit_desc'} = $data->{'HIT-description'};
313 $args{'-query_desc'} = $data->{'RESULT-query_description'};
315 my $bits = $args{'-bits'};
316 my $hsp = \%args;
317 push @{$self->{'_hsps'}}, $hsp;
319 return $hsp;
323 =head2 start_hit
325 Title : start_hit
326 Usage : $handler->start_hit()
327 Function: Starts a Hit event cycle
328 Returns : none
329 Args : type of event and associated hashref
332 =cut
334 sub start_hit{
335 my ($self,$type) = @_;
336 $self->{'_hsps'} = [];
337 return;
341 =head2 end_hit
343 Title : end_hit
344 Usage : $handler->end_hit()
345 Function: Ends a Hit event cycle
346 Returns : Bio::Search::Hit::HitI object
347 Args : type of event and associated hashref
350 =cut
352 sub end_hit{
353 my ($self,$type,$data) = @_;
355 # Skip process unless there is HSP data or Hit Significance (e.g. a bl2seq with no similarity
356 # gives a hit with the subject, but shows a "no hits found" message instead
357 # of the alignment data and don't have a significance value).
358 # This way, we avoid false positives
359 my @hsp_data = grep { /^HSP/ } keys %{$data};
360 return unless (scalar @hsp_data > 0 or exists $data->{'HIT-significance'});
362 my %args = map { my $v = $data->{$_}; s/HIT//; ($_ => $v); } grep { /^HIT/ } keys %{$data};
364 # I hate special cases, but this is here because NCBI BLAST XML
365 # doesn't play nice and is undergoing mutation -jason
366 if(exists $args{'-name'} && $args{'-name'} =~ /BL_ORD_ID/ ) {
367 ($args{'-name'}, $args{'-description'}) = split(/\s+/,$args{'-description'},2);
369 $args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
370 $data->{'RESULT-algorithm_name'} || $type);
371 $args{'-hsps'} = $self->{'_hsps'};
372 $args{'-query_len'} = $data->{'RESULT-query_length'};
373 $args{'-rank'} = $self->{'_hitcount'} + 1;
374 unless( defined $args{'-significance'} ) {
375 if( defined $args{'-hsps'} &&
376 $args{'-hsps'}->[0] ) {
377 # use pvalue if present (WU-BLAST), otherwise evalue (NCBI BLAST)
378 $args{'-significance'} = $args{'-hsps'}->[0]->{'-pvalue'} || $args{'-hsps'}->[0]->{'-evalue'};
381 my $hit = \%args;
382 $hit->{'-hsp_factory'} = $self->factory('hsp');
383 $self->_add_hit($hit);
384 $self->{'_hsps'} = [];
385 return $hit;
388 # TODO: Optionally impose hit filtering here
389 sub _add_hit {
390 my ($self, $hit) = @_;
391 push @{$self->{'_hits'}}, $hit;
392 $self->{'_hitcount'} = scalar @{$self->{'_hits'}};
395 =head2 Factory methods
397 =cut
399 =head2 register_factory
401 Title : register_factory
402 Usage : $handler->register_factory('TYPE',$factory);
403 Function: Register a specific factory for a object type class
404 Returns : none
405 Args : string representing the class and
406 Bio::Factory::ObjectFactoryI
408 See L<Bio::Factory::ObjectFactoryI> for more information
410 =cut
412 sub register_factory{
413 my ($self, $type,$f) = @_;
414 if( ! defined $f || ! ref($f) ||
415 ! $f->isa('Bio::Factory::ObjectFactoryI') ) {
416 $self->throw("Cannot set factory to value $f".ref($f)."\n");
418 $self->{'_factories'}->{lc($type)} = $f;
422 =head2 factory
424 Title : factory
425 Usage : my $f = $handler->factory('TYPE');
426 Function: Retrieves the associated factory for requested 'TYPE'
427 Returns : a Bio::Factory::ObjectFactoryI
428 Throws : Bio::Root::BadParameter if none registered for the supplied type
429 Args : name of factory class to retrieve
431 See L<Bio::Factory::ObjectFactoryI> for more information
433 =cut
435 sub factory{
436 my ($self,$type) = @_;
437 return $self->{'_factories'}->{lc($type)} ||
438 $self->throw(-class=>'Bio::Root::BadParameter',
439 -text=>"No factory registered for $type");
442 =head2 inclusion_threshold
444 See L<Bio::SearchIO::blast::inclusion_threshold>.
446 =cut
448 sub inclusion_threshold {
449 my $self = shift;
450 return $self->{'_inclusion_threshold'} = shift if @_;
451 return $self->{'_inclusion_threshold'};