[bug 2714]
[bioperl-live.git] / Bio / SearchIO / SearchResultEventBuilder.pm
blob5419b53274cd30bae94b9b71628060627fa0717e
1 # $Id$
3 # BioPerl module for Bio::SearchIO::SearchResultEventBuilder
5 # Cared for by Jason Stajich <jason@bioperl.org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::SearchIO::SearchResultEventBuilder - Event Handler for SearchIO events.
17 =head1 SYNOPSIS
19 # Do not use this object directly, this object is part of the SearchIO
20 # event based parsing system.
22 =head1 DESCRIPTION
24 This object handles Search Events generated by the SearchIO classes
25 and build appropriate Bio::Search::* objects from them.
27 =head1 FEEDBACK
29 =head2 Mailing Lists
31 User feedback is an integral part of the evolution of this and other
32 Bioperl modules. Send your comments and suggestions preferably to
33 the Bioperl mailing list. Your participation is much appreciated.
35 bioperl-l@bioperl.org - General discussion
36 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
38 =head2 Reporting Bugs
40 Report bugs to the Bioperl bug tracking system to help us keep track
41 of the bugs and their resolution. Bug reports can be submitted via the
42 web:
44 http://bugzilla.open-bio.org/
46 =head1 AUTHOR - Jason Stajich
48 Email jason-at-bioperl.org
50 =head1 CONTRIBUTORS
52 Sendu Bala, bix@sendu.me.uk
54 =head1 APPENDIX
56 The rest of the documentation details each of the object methods.
57 Internal methods are usually preceded with a _
59 =cut
62 # Let the code begin...
65 package Bio::SearchIO::SearchResultEventBuilder;
66 use vars qw(%KNOWNEVENTS);
67 use strict;
69 use Bio::Factory::ObjectFactory;
71 use base qw(Bio::Root::Root Bio::SearchIO::EventHandlerI);
73 =head2 new
75 Title : new
76 Usage : my $obj = Bio::SearchIO::SearchResultEventBuilder->new();
77 Function: Builds a new Bio::SearchIO::SearchResultEventBuilder object
78 Returns : Bio::SearchIO::SearchResultEventBuilder
79 Args : -hsp_factory => Bio::Factory::ObjectFactoryI
80 -hit_factory => Bio::Factory::ObjectFactoryI
81 -result_factory => Bio::Factory::ObjectFactoryI
83 See L<Bio::Factory::ObjectFactoryI> for more information
85 =cut
87 sub new {
88 my ($class,@args) = @_;
89 my $self = $class->SUPER::new(@args);
90 my ($hspF,$hitF,$resultF) = $self->_rearrange([qw(HSP_FACTORY
91 HIT_FACTORY
92 RESULT_FACTORY)],@args);
93 $self->register_factory('hsp', $hspF ||
94 Bio::Factory::ObjectFactory->new(
95 -type => 'Bio::Search::HSP::GenericHSP',
96 -interface => 'Bio::Search::HSP::HSPI'));
98 $self->register_factory('hit', $hitF ||
99 Bio::Factory::ObjectFactory->new(
100 -type => 'Bio::Search::Hit::GenericHit',
101 -interface => 'Bio::Search::Hit::HitI'));
103 $self->register_factory('result', $resultF ||
104 Bio::Factory::ObjectFactory->new(
105 -type => 'Bio::Search::Result::GenericResult',
106 -interface => 'Bio::Search::Result::ResultI'));
108 return $self;
111 # new comes from the superclass
113 =head2 will_handle
115 Title : will_handle
116 Usage : if( $handler->will_handle($event_type) ) { ... }
117 Function: Tests if this event builder knows how to process a specific event
118 Returns : boolean
119 Args : event type name
122 =cut
124 sub will_handle{
125 my ($self,$type) = @_;
126 # these are the events we recognize
127 return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result' );
130 =head2 SAX methods
132 =cut
134 =head2 start_result
136 Title : start_result
137 Usage : $handler->start_result($resulttype)
138 Function: Begins a result event cycle
139 Returns : none
140 Args : Type of Report
142 =cut
144 sub start_result {
145 my ($self,$type) = @_;
146 $self->{'_resulttype'} = $type;
147 $self->{'_hits'} = [];
148 $self->{'_hsps'} = [];
149 $self->{'_hitcount'} = 0;
150 return;
153 =head2 end_result
155 Title : end_result
156 Usage : my @results = $parser->end_result
157 Function: Finishes a result handler cycle
158 Returns : A Bio::Search::Result::ResultI
159 Args : none
161 =cut
163 # this is overridden by IteratedSearchResultEventBuilder
164 # so keep that in mind when debugging
166 sub end_result {
167 my ($self,$type,$data) = @_;
169 if( defined $data->{'runid'} &&
170 $data->{'runid'} !~ /^\s+$/ ) {
172 if( $data->{'runid'} !~ /^lcl\|/) {
173 $data->{"RESULT-query_name"}= $data->{'runid'};
174 } else {
175 ($data->{"RESULT-query_name"},
176 $data->{"RESULT-query_description"}) =
177 split(/\s+/,$data->{"RESULT-query_description"},2);
180 if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
181 my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
182 # this is for |123|gb|ABC1.1|
183 $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
184 $data->{"RESULT-query_accession"}= $acc;
186 delete $data->{'runid'};
188 my %args = map { my $v = $data->{$_}; s/RESULT//; ($_ => $v); }
189 grep { /^RESULT/ } keys %{$data};
191 $args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
192 $data->{'RESULT-algorithm_name'} || $type);
193 $args{'-hits'} = $self->{'_hits'};
194 my $result = $self->factory('result')->create_object(%args);
195 $result->hit_factory($self->factory('hit'));
196 $self->{'_hits'} = [];
197 return $result;
200 =head2 start_hsp
202 Title : start_hsp
203 Usage : $handler->start_hsp($name,$data)
204 Function: Begins processing a HSP event
205 Returns : none
206 Args : type of element
207 associated data (hashref)
209 =cut
211 sub start_hsp {
212 my ($self,@args) = @_;
213 return;
216 =head2 end_hsp
218 Title : end_hsp
219 Usage : $handler->end_hsp()
220 Function: Finish processing a HSP event
221 Returns : none
222 Args : type of event and associated hashref
225 =cut
227 sub end_hsp {
228 my ($self,$type,$data) = @_;
230 if( defined $data->{'runid'} &&
231 $data->{'runid'} !~ /^\s+$/ ) {
233 if( $data->{'runid'} !~ /^lcl\|/) {
234 $data->{"RESULT-query_name"}= $data->{'runid'};
235 } else {
236 ($data->{"RESULT-query_name"},
237 $data->{"RESULT-query_description"}) =
238 split(/\s+/,$data->{"RESULT-query_description"},2);
241 if( my @a = split(/\|/,$data->{'RESULT-query_name'}) ) {
242 my $acc = pop @a ; # this is for accession |1234|gb|AAABB1.1|AAABB1
243 # this is for |123|gb|ABC1.1|
244 $acc = pop @a if( ! defined $acc || $acc =~ /^\s+$/);
245 $data->{"RESULT-query_accession"}= $acc;
247 delete $data->{'runid'};
250 # this code is to deal with the fact that Blast XML data
251 # always has start < end and one has to infer strandedness
252 # from the frame which is a problem for the Search::HSP object
253 # which expect to be able to infer strand from the order of
254 # of the begin/end of the query and hit coordinates
255 if( defined $data->{'HSP-query_frame'} && # this is here to protect from undefs
256 (( $data->{'HSP-query_frame'} < 0 &&
257 $data->{'HSP-query_start'} < $data->{'HSP-query_end'} ) ||
258 $data->{'HSP-query_frame'} > 0 &&
259 ( $data->{'HSP-query_start'} > $data->{'HSP-query_end'} ) )
262 # swap
263 ($data->{'HSP-query_start'},
264 $data->{'HSP-query_end'}) = ($data->{'HSP-query_end'},
265 $data->{'HSP-query_start'});
267 if( defined $data->{'HSP-hit_frame'} && # this is here to protect from undefs
268 ((defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} < 0 &&
269 $data->{'HSP-hit_start'} < $data->{'HSP-hit_end'} ) ||
270 defined $data->{'HSP-hit_frame'} && $data->{'HSP-hit_frame'} > 0 &&
271 ( $data->{'HSP-hit_start'} > $data->{'HSP-hit_end'} ) )
274 # swap
275 ($data->{'HSP-hit_start'},
276 $data->{'HSP-hit_end'}) = ($data->{'HSP-hit_end'},
277 $data->{'HSP-hit_start'});
279 $data->{'HSP-query_frame'} ||= 0;
280 $data->{'HSP-hit_frame'} ||= 0;
281 # handle Blast 2.1.2 which did not support data member: hsp_align-len
282 $data->{'HSP-query_length'} ||= $data->{'RESULT-query_length'};
283 $data->{'HSP-query_length'} ||= length ($data->{'HSP-query_seq'} || '');
284 $data->{'HSP-hit_length'} ||= $data->{'HIT-length'};
285 $data->{'HSP-hit_length'} ||= length ($data->{'HSP-hit_seq'} || '');
287 $data->{'HSP-hsp_length'} ||= length ($data->{'HSP-homology_seq'} || '');
289 my %args = map { my $v = $data->{$_}; s/HSP//; ($_ => $v) }
290 grep { /^HSP/ } keys %{$data};
292 $args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
293 $data->{'RESULT-algorithm_name'} || $type);
294 # copy this over from result
295 $args{'-query_name'} = $data->{'RESULT-query_name'};
296 $args{'-hit_name'} = $data->{'HIT-name'};
297 my ($rank) = scalar @{$self->{'_hsps'} || []} + 1;
298 $args{'-rank'} = $rank;
300 $args{'-hit_desc'} = $data->{'HIT-description'};
301 $args{'-query_desc'} = $data->{'RESULT-query_description'};
303 my $bits = $args{'-bits'};
304 my $hsp = \%args;
305 push @{$self->{'_hsps'}}, $hsp;
307 return $hsp;
311 =head2 start_hit
313 Title : start_hit
314 Usage : $handler->start_hit()
315 Function: Starts a Hit event cycle
316 Returns : none
317 Args : type of event and associated hashref
320 =cut
322 sub start_hit{
323 my ($self,$type) = @_;
324 $self->{'_hsps'} = [];
325 return;
329 =head2 end_hit
331 Title : end_hit
332 Usage : $handler->end_hit()
333 Function: Ends a Hit event cycle
334 Returns : Bio::Search::Hit::HitI object
335 Args : type of event and associated hashref
338 =cut
340 sub end_hit{
341 my ($self,$type,$data) = @_;
342 my %args = map { my $v = $data->{$_}; s/HIT//; ($_ => $v); } grep { /^HIT/ } keys %{$data};
344 # I hate special cases, but this is here because NCBI BLAST XML
345 # doesn't play nice and is undergoing mutation -jason
346 if(exists $args{'-name'} && $args{'-name'} =~ /BL_ORD_ID/ ) {
347 ($args{'-name'}, $args{'-description'}) = split(/\s+/,$args{'-description'},2);
349 $args{'-algorithm'} = uc( $args{'-algorithm_name'} ||
350 $data->{'RESULT-algorithm_name'} || $type);
351 $args{'-hsps'} = $self->{'_hsps'};
352 $args{'-query_len'} = $data->{'RESULT-query_length'};
353 $args{'-rank'} = $self->{'_hitcount'} + 1;
354 unless( defined $args{'-significance'} ) {
355 if( defined $args{'-hsps'} &&
356 $args{'-hsps'}->[0] ) {
357 # use pvalue if present (WU-BLAST), otherwise evalue (NCBI BLAST)
358 $args{'-significance'} = $args{'-hsps'}->[0]->{'-pvalue'} || $args{'-hsps'}->[0]->{'-evalue'};
361 my $hit = \%args;
362 $hit->{'-hsp_factory'} = $self->factory('hsp');
363 $self->_add_hit($hit);
364 $self->{'_hsps'} = [];
365 return $hit;
368 # TODO: Optionally impose hit filtering here
369 sub _add_hit {
370 my ($self, $hit) = @_;
371 push @{$self->{'_hits'}}, $hit;
372 $self->{'_hitcount'} = scalar @{$self->{'_hits'}};
375 =head2 Factory methods
377 =cut
379 =head2 register_factory
381 Title : register_factory
382 Usage : $handler->register_factory('TYPE',$factory);
383 Function: Register a specific factory for a object type class
384 Returns : none
385 Args : string representing the class and
386 Bio::Factory::ObjectFactoryI
388 See L<Bio::Factory::ObjectFactoryI> for more information
390 =cut
392 sub register_factory{
393 my ($self, $type,$f) = @_;
394 if( ! defined $f || ! ref($f) ||
395 ! $f->isa('Bio::Factory::ObjectFactoryI') ) {
396 $self->throw("Cannot set factory to value $f".ref($f)."\n");
398 $self->{'_factories'}->{lc($type)} = $f;
402 =head2 factory
404 Title : factory
405 Usage : my $f = $handler->factory('TYPE');
406 Function: Retrieves the associated factory for requested 'TYPE'
407 Returns : a Bio::Factory::ObjectFactoryI
408 Throws : Bio::Root::BadParameter if none registered for the supplied type
409 Args : name of factory class to retrieve
411 See L<Bio::Factory::ObjectFactoryI> for more information
413 =cut
415 sub factory{
416 my ($self,$type) = @_;
417 return $self->{'_factories'}->{lc($type)} ||
418 $self->throw(-class=>'Bio::Root::BadParameter',
419 -text=>"No factory registered for $type");
422 =head2 inclusion_threshold
424 See L<Bio::SearchIO::blast::inclusion_threshold>.
426 =cut
428 sub inclusion_threshold {
429 my $self = shift;
430 return $self->{'_inclusion_threshold'} = shift if @_;
431 return $self->{'_inclusion_threshold'};