sync w/ main trunk
[bioperl-live.git] / Bio / SearchIO / XML / BlastHandler.pm
blob070e273f0553096faa09c87d35a09f3a467cb111
1 # $Id$
3 # BioPerl module for Bio::SearchIO::XML::BlastHandler
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich, Chris Fields
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::SearchIO::XML::BlastHandler - XML Handler for NCBI Blast XML parsing.
19 =head1 SYNOPSIS
21 # This is not to be used directly.
23 =head1 DESCRIPTION
25 This is the XML handler for BLAST XML parsing. Currently it passes elements off
26 to the event handler, which is ultimately responsible for Bio::Search object
27 generation.
29 This was recently split off from the original code for Bio::SearchIO::blastxml
30 primarily for maintenance purposes.
32 =head1 DEPENDENCIES
34 In addition to parts of the Bio:: hierarchy, this module uses:
36 XML::SAX::Base
38 which comes with the XML::SAX distribution.
40 =head1 FEEDBACK
42 =head2 Mailing Lists
44 User feedback is an integral part of the evolution of this and other
45 Bioperl modules. Send your comments and suggestions preferably to
46 the Bioperl mailing list. Your participation is much appreciated.
48 bioperl-l@bioperl.org - General discussion
49 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51 =head2 Support
53 Please direct usage questions or support issues to the mailing list:
55 L<bioperl-l@bioperl.org>
57 rather than to the module maintainer directly. Many experienced and
58 reponsive experts will be able look at the problem and quickly
59 address it. Please include a thorough description of the problem
60 with code and data examples if at all possible.
62 =head2 Reporting Bugs
64 Report bugs to the Bioperl bug tracking system to help us keep track
65 of the bugs and their resolution. Bug reports can be submitted via the
66 web:
68 http://bugzilla.open-bio.org/
70 =head1 AUTHOR - Jason Stajich, Chris Fields
72 Email jason-at-bioperl.org
73 Email cjfields-at-uiuc dot edu
75 =head1 APPENDIX
77 The rest of the documentation details each of the object methods.
78 Internal methods are usually preceded with a _
80 =cut
82 # Let the code begin...
83 package Bio::SearchIO::XML::BlastHandler;
84 use base qw(Bio::Root::Root XML::SAX::Base);
86 my %MODEMAP = (
87 'Iteration' => 'result',
88 'Hit' => 'hit',
89 'Hsp' => 'hsp'
92 # major post 2.2.12 BLAST XML changes
93 # 1) moved XML Handler to it's own class
94 # 2) reconfigure blastxml to deal with old and new BLAST XML output
96 my %MAPPING = (
97 # Result-specific fields
98 'BlastOutput_program' => 'RESULT-algorithm_name',
99 'BlastOutput_version' => 'RESULT-algorithm_version',
100 'BlastOutput_db' => 'RESULT-database_name',
101 'BlastOutput_reference' => 'RESULT-program_reference',
102 'BlastOutput_query-def' => 'RESULT-query_description',
103 'BlastOutput_query-len' => 'RESULT-query_length',
104 'BlastOutput_query-ID' => 'runid',
105 'Parameters_matrix' => { 'RESULT-parameters' => 'matrix'},
106 'Parameters_expect' => { 'RESULT-parameters' => 'expect'},
107 'Parameters_include' => { 'RESULT-parameters' => 'include'},
108 'Parameters_sc-match' => { 'RESULT-parameters' => 'match'},
109 'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
110 'Parameters_gap-open' => { 'RESULT-parameters' => 'gapopen'},
111 'Parameters_gap-extend' => { 'RESULT-parameters' => 'gapext'},
112 'Parameters_filter' => {'RESULT-parameters' => 'filter'},
113 'Statistics_db-num' => 'RESULT-database_entries',
114 'Statistics_db-len' => 'RESULT-database_letters',
115 'Statistics_hsp-len' => { 'RESULT-statistics' => 'hsplength'},
116 'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'},
117 'Statistics_kappa' => { 'RESULT-statistics' => 'kappa' },
118 'Statistics_lambda' => { 'RESULT-statistics' => 'lambda' },
119 'Statistics_entropy' => { 'RESULT-statistics' => 'entropy'},
121 # HSP specific fields
122 'Hsp_bit-score' => 'HSP-bits',
123 'Hsp_score' => 'HSP-score',
124 'Hsp_evalue' => 'HSP-evalue',
125 'Hsp_query-from' => 'HSP-query_start',
126 'Hsp_query-to' => 'HSP-query_end',
127 'Hsp_hit-from' => 'HSP-hit_start',
128 'Hsp_hit-to' => 'HSP-hit_end',
129 'Hsp_positive' => 'HSP-conserved',
130 'Hsp_identity' => 'HSP-identical',
131 'Hsp_gaps' => 'HSP-gaps',
132 'Hsp_hitgaps' => 'HSP-hit_gaps',
133 'Hsp_querygaps' => 'HSP-query_gaps',
134 'Hsp_qseq' => 'HSP-query_seq',
135 'Hsp_hseq' => 'HSP-hit_seq',
136 'Hsp_midline' => 'HSP-homology_seq',
137 'Hsp_align-len' => 'HSP-hsp_length',
138 'Hsp_query-frame'=> 'HSP-query_frame',
139 'Hsp_hit-frame' => 'HSP-hit_frame',
141 # Hit specific fields
142 'Hit_id' => 'HIT-name',
143 'Hit_len' => 'HIT-length',
144 'Hit_accession' => 'HIT-accession',
145 'Hit_def' => 'HIT-description',
146 'Hit_num' => 'HIT-order',
147 'Iteration_iter-num' => 'HIT-iteration',
148 'Iteration_stat' => 'HIT-iteration_statistic',
150 # if these tags are present, they will overwrite the
151 # above with more current data (i.e. multiquery hits)
152 'Iteration_query-def' => 'RESULT-query_description',
153 'Iteration_query-len' => 'RESULT-query_length',
154 'Iteration_query-ID' => 'runid',
157 # these XML tags are ignored for now
158 my %IGNOREDTAGS = (
159 'Hsp_num' => 1,#'HSP-order',
160 'Hsp_pattern-from' => 1,#'patternend',
161 'Hsp_pattern-to' => 1,#'patternstart',
162 'Hsp_density' => 1,#'hspdensity',
163 'Iteration_message' => 1,
164 'Hit_hsps' => 1,
165 'BlastOutput_param' => 1,
166 'Iteration_hits' => 1,
167 'Statistics' => 1,
168 'Parameters' => 1,
169 'BlastOutput' => 1,
170 'BlastOutput_iterations' => 1,
173 =head2 SAX methods
175 =cut
177 =head2 start_document
179 Title : start_document
180 Usage : $parser->start_document;
181 Function: SAX method to indicate starting to parse a new document
182 Returns : none
183 Args : none
185 =cut
187 sub start_document{
188 my ($self) = @_;
189 $self->{'_lasttype'} = '';
190 $self->{'_values'} = {};
191 $self->{'_result'}= [];
194 =head2 end_document
196 Title : end_document
197 Usage : $parser->end_document;
198 Function: SAX method to indicate finishing parsing a new document
199 Returns : Bio::Search::Result::ResultI object
200 Args : none
202 =cut
204 sub end_document{
205 my ($self,@args) = @_;
207 # reset data carried throughout parse
208 $self->{'_resultdata'} = undef;
210 # pass back ref to results queue; caller must reset handler results queue
211 return $self->{'_result'};
214 =head2 start_element
216 Title : start_element
217 Usage : $parser->start_element($data)
218 Function: SAX method to indicate starting a new element
219 Returns : none
220 Args : hash ref for data
222 =cut
224 sub start_element{
225 my ($self,$data) = @_;
226 # we currently don't care about attributes
227 my $nm = $data->{'Name'};
229 if( my $type = $MODEMAP{$nm} ) {
230 if( $self->eventHandler->will_handle($type) ) {
231 my $func = sprintf("start_%s",lc $type);
232 $self->eventHandler->$func($data->{'Attributes'});
237 =head2 end_element
239 Title : end_element
240 Usage : $parser->end_element($data)
241 Function: Signals finishing an element
242 Returns : Bio::Search object dpending on what type of element
243 Args : hash ref for data
245 =cut
247 sub end_element{
248 my ($self,$data) = @_;
250 my $nm = $data->{'Name'};
251 my $rc;
252 if($nm eq 'BlastOutput_program' &&
253 $self->{'_last_data'} =~ /(t?blast[npx])/i ) {
254 $self->{'_type'} = uc $1;
256 if ($nm eq 'Iteration') {
257 map {
258 $self->{'_values'}->{$_} = $self->{'_resultdata'}->{$_};
259 } keys %{ $self->{'_resultdata'} };
261 if( my $type = $MODEMAP{$nm} ) {
262 if( $self->eventHandler->will_handle($type) ) {
263 my $func = sprintf("end_%s",lc $type);
264 $rc = $self->eventHandler->$func($self->{'_type'},
265 $self->{'_values'});
268 elsif( exists $MAPPING{$nm} ) {
269 if ( ref($MAPPING{$nm}) =~ /hash/i ) {
270 my $key = (keys %{$MAPPING{$nm}})[0];
271 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
272 } else {
273 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
276 elsif( exists $IGNOREDTAGS{$nm} ){
277 # ignores these elements for now
279 else {
280 $self->debug("ignoring unrecognized element type $nm\n");
282 $self->{'_last_data'} = ''; # remove read data if we are at
283 # end of an element
285 # add to ResultI array
286 $self->{'_result'} = $rc if( $nm eq 'Iteration' );
287 # reset values for each Result round
288 if ($nm eq 'Iteration') {
289 $self->{'_values'} = {};
293 =head2 characters
295 Title : characters
296 Usage : $parser->characters($data)
297 Function: Signals new characters to be processed
298 Returns : characters read
299 Args : hash ref with the key 'Data'
302 =cut
304 sub characters{
305 my ($self,$data) = @_;
306 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
307 $self->{'_last_data'} .= $data->{'Data'};
310 sub eventHandler {
311 my $self = shift;
312 return $self->{'_handler'} = shift if @_;
313 return $self->{'_handler'};