3 # BioPerl module for Bio::SearchIO::waba
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason@bioperl.org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::SearchIO::waba - SearchIO parser for Jim Kent WABA program
22 # do not use this object directly, rather through Bio::SearchIO
25 my $in = Bio::SearchIO->new(-format => 'waba',
26 -file => 'output.wab');
27 while( my $result = $in->next_result ) {
28 while( my $hit = $result->next_hit ) {
29 while( my $hsp = $result->next_hsp ) {
37 This parser will process the waba output (NOT the human readable format).
43 User feedback is an integral part of the evolution of this and other
44 Bioperl modules. Send your comments and suggestions preferably to
45 the Bioperl mailing list. Your participation is much appreciated.
47 bioperl-l@bioperl.org - General discussion
48 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52 Please direct usage questions or support issues to the mailing list:
54 L<bioperl-l@bioperl.org>
56 rather than to the module maintainer directly. Many experienced and
57 reponsive experts will be able look at the problem and quickly
58 address it. Please include a thorough description of the problem
59 with code and data examples if at all possible.
63 Report bugs to the Bioperl bug tracking system to help us keep track
64 of the bugs and their resolution. Bug reports can be submitted via the
67 http://bugzilla.open-bio.org/
69 =head1 AUTHOR - Jason Stajich
71 Email jason-at-bioperl.org
75 The rest of the documentation details each of the object methods.
76 Internal methods are usually preceded with a _
81 # Let the code begin...
84 package Bio
::SearchIO
::waba
;
85 use vars
qw(%MODEMAP %MAPPING @STATES);
88 # Object preamble - inherits from Bio::Root::Root
90 use Bio::Search::Result::ResultFactory;
91 use Bio::Search::HSP::HSPFactory;
96 # mapping of NCBI Blast terms to Bioperl hash keys
97 %MODEMAP = ('WABAOutput' => 'result',
101 @STATES = qw(Hsp_qseq Hsp_hseq Hsp_stateseq);
104 'Hsp_query-from'=> 'HSP-query_start',
105 'Hsp_query-to' => 'HSP-query_end',
106 'Hsp_hit-from' => 'HSP-hit_start',
107 'Hsp_hit-to' => 'HSP-hit_end',
108 'Hsp_qseq' => 'HSP-query_seq',
109 'Hsp_hseq' => 'HSP-hit_seq',
110 'Hsp_midline' => 'HSP-homology_seq',
111 'Hsp_stateseq' => 'HSP-hmmstate_seq',
112 'Hsp_align-len' => 'HSP-hsp_length',
114 'Hit_id' => 'HIT-name',
115 'Hit_accession' => 'HIT-accession',
117 'WABAOutput_program' => 'RESULT-algorithm_name',
118 'WABAOutput_version' => 'RESULT-algorithm_version',
119 'WABAOutput_query-def'=> 'RESULT-query_name',
120 'WABAOutput_query-db' => 'RESULT-query_database',
121 'WABAOutput_db' => 'RESULT-database_name',
126 use base
qw(Bio::SearchIO);
131 Usage : my $obj = Bio::SearchIO::waba->new();
132 Function: Builds a new Bio::SearchIO::waba object
133 Returns : Bio::SearchIO::waba
134 Args : see Bio::SearchIO
139 my ($self,@args) = @_;
140 $self->SUPER::_initialize
(@args);
141 $self->_eventHandler->register_factory('result', Bio
::Search
::Result
::ResultFactory
->new(-type
=> 'Bio::Search::Result::WABAResult'));
143 $self->_eventHandler->register_factory('hsp', Bio
::Search
::HSP
::HSPFactory
->new(-type
=> 'Bio::Search::HSP::WABAHSP'));
150 Usage : my $hit = $searchio->next_result;
151 Function: Returns the next Result from a search
152 Returns : Bio::Search::Result::ResultI object
162 my ($curquery,$curhit);
164 $self->start_document();
166 while( defined ($_ = $self->_readline )) {
169 my ($qid, $qhspid,$qpercent, $junk,
170 $alnlen,$qdb,$qacc,$qstart,$qend,$qstrand,
171 $hitdb,$hacc,$hstart,$hend,
173 ( /^(\S
+)\
.(\S
+)\s
+align\s
+ # get the queryid
174 (\d
+(\
.\d
+)?
)\
%\s
+ # get the percentage
175 of\s
+(\d
+)\s
+ # get the length of the alignment
176 (\S
+)\s
+ # this is the query database
177 (\S
+):(\
-?\d
+)\
-(\
-?\d
+) # The accession:start-end for query
178 \s
+([\
-\
+]) # query strand
180 (\S
+):(\
-?\d
+)\
-(\
-?\d
+) # The accession:start-end for hit
181 \s
+([\
-\
+])\s
*$ # hit strand
184 # Curses. Jim's code is 0 based, the following is to readjust
185 if( $hstart < 0 ) { $hstart *= -1}
186 if( $hend < 0 ) { $hend *= -1}
187 if( $qstart < 0 ) { $qstart *= -1}
188 if( $qend < 0 ) { $qend *= -1}
189 $hstart++; $hend++; $qstart++; $qend++;
190 if( ! defined $alnlen ) {
191 $self->warn("Unable to parse the rest of the WABA alignment info for: '$_'");
194 $self->{'_reporttype'} = 'WABA'; # hardcoded - only
195 # one type of WABA AFAIK
196 if( defined $curquery &&
197 $curquery ne $qid ) {
198 $self->end_element({'Name' => 'Hit'});
199 $self->_pushback($_);
200 $self->end_element({'Name' => 'WABAOutput'});
201 return $self->end_document();
204 if( defined $curhit &&
206 # slight duplication here -- keep these in SYNC
207 $self->end_element({'Name' => 'Hit'});
208 $self->start_element({'Name' => 'Hit'});
209 $self->element({'Name' => 'Hit_id',
211 $self->element({'Name' => 'Hit_accession',
214 } elsif ( ! defined $curquery ) {
215 $self->start_element({'Name' => 'WABAOutput'});
216 $self->{'_result_count'}++;
217 $self->element({'Name' => 'WABAOutput_query-def',
219 $self->element({'Name' => 'WABAOutput_program',
221 $self->element({'Name' => 'WABAOutput_query-db',
223 $self->element({'Name' => 'WABAOutput_db',
226 # slight duplication here -- keep these N'SYNC ;-)
227 $self->start_element({'Name' => 'Hit'});
228 $self->element({'Name' => 'Hit_id',
230 $self->element({'Name' => 'Hit_accession',
235 # strand is inferred by start,end values
236 # in the Result Builder
237 if( $qstrand eq '-' ) {
238 ($qstart,$qend) = ($qend,$qstart);
240 if( $hstrand eq '-' ) {
241 ($hstart,$hend) = ($hend,$hstart);
244 $self->start_element({'Name' => 'Hsp'});
245 $self->element({'Name' => 'Hsp_query-from',
247 $self->element({'Name' => 'Hsp_query-to',
249 $self->element({'Name' => 'Hsp_hit-from',
251 $self->element({'Name' => 'Hsp_hit-to',
253 $self->element({'Name' => 'Hsp_align-len',
259 } elsif( ! defined $curquery ) {
260 $self->warn("skipping because no Hit begin line was recognized\n$_") if( $_ !~ /^\s+$/ );
264 $self->element({'Name' => $STATES[$state++],
266 if( $state >= scalar @STATES ) {
268 $self->end_element({'Name' => 'Hsp'});
272 if( defined $curquery ) {
273 $self->end_element({'Name' => 'Hit'});
274 $self->end_element({'Name' => 'WABAOutput'});
275 return $self->end_document();
282 Title : start_element
283 Usage : $eventgenerator->start_element
284 Function: Handles a start element event
286 Args : hashref with at least 2 keys 'Data' and 'Name'
292 my ($self,$data) = @_;
293 # we currently don't care about attributes
294 my $nm = $data->{'Name'};
295 if( my $type = $MODEMAP{$nm} ) {
297 if( $self->_eventHandler->will_handle($type) ) {
298 my $func = sprintf("start_%s",lc $type);
299 $self->_eventHandler->$func($data->{'Attributes'});
301 unshift @
{$self->{'_elements'}}, $type;
303 if($nm eq 'WABAOutput') {
304 $self->{'_values'} = {};
305 $self->{'_result'}= undef;
306 $self->{'_mode'} = '';
313 Title : start_element
314 Usage : $eventgenerator->end_element
315 Function: Handles an end element event
317 Args : hashref with at least 2 keys 'Data' and 'Name'
323 my ($self,$data) = @_;
324 my $nm = $data->{'Name'};
326 # Hsp are sort of weird, in that they end when another
327 # object begins so have to detect this in end_element for now
329 foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) {
330 $self->element({'Name' => $_,
331 'Data' => $self->{'_last_hspdata'}->{$_}});
333 $self->{'_last_hspdata'} = {}
336 if( my $type = $MODEMAP{$nm} ) {
337 if( $self->_eventHandler->will_handle($type) ) {
338 my $func = sprintf("end_%s",lc $type);
339 $rc = $self->_eventHandler->$func($self->{'_reporttype'},
342 shift @
{$self->{'_elements'}};
344 } elsif( $MAPPING{$nm} ) {
345 if ( ref($MAPPING{$nm}) =~ /hash/i ) {
346 my $key = (keys %{$MAPPING{$nm}})[0];
347 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
349 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
352 $self->warn( "unknown nm $nm ignoring\n");
354 $self->{'_last_data'} = ''; # remove read data if we are at
356 $self->{'_result'} = $rc if( $nm eq 'WABAOutput' );
364 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
365 Function: Convience method that calls start_element, characters, end_element
367 Args : Hash ref with the keys 'Name' and 'Data'
373 my ($self,$data) = @_;
374 $self->start_element($data);
375 $self->characters($data);
376 $self->end_element($data);
383 Usage : $eventgenerator->characters($str)
384 Function: Send a character events
392 my ($self,$data) = @_;
394 return unless ( defined $data->{'Data'} );
395 if( $data->{'Data'} =~ /^\s+$/ ) {
396 return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
399 if( $self->in_element('hsp') &&
400 $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
402 $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
405 $self->{'_last_data'} = $data->{'Data'};
411 Usage : $obj->_mode($newval)
414 Returns : value of _mode
415 Args : newvalue (optional)
421 my ($self,$value) = @_;
422 if( defined $value) {
423 $self->{'_mode'} = $value;
425 return $self->{'_mode'};
428 =head2 within_element
430 Title : within_element
431 Usage : if( $eventgenerator->within_element($element) ) {}
432 Function: Test if we are within a particular element
433 This is different than 'in' because within can be tested
436 Args : string element name
442 my ($self,$name) = @_;
443 return 0 if ( ! defined $name &&
444 ! defined $self->{'_elements'} ||
445 scalar @
{$self->{'_elements'}} == 0) ;
446 foreach ( @
{$self->{'_elements'}} ) {
457 Usage : if( $eventgenerator->in_element($element) ) {}
458 Function: Test if we are in a particular element
459 This is different than 'in' because within can be tested
462 Args : string element name
468 my ($self,$name) = @_;
469 return 0 if ! defined $self->{'_elements'}->[0];
470 return ( $self->{'_elements'}->[0] eq $name)
474 =head2 start_document
476 Title : start_document
477 Usage : $eventgenerator->start_document
478 Function: Handles a start document event
487 $self->{'_lasttype'} = '';
488 $self->{'_values'} = {};
489 $self->{'_result'}= undef;
490 $self->{'_mode'} = '';
491 $self->{'_elements'} = [];
498 Usage : $eventgenerator->end_document
499 Function: Handles an end document event
500 Returns : Bio::Search::Result::ResultI object
507 my ($self,@args) = @_;
508 return $self->{'_result'};
514 Usage : my $count = $searchio->result_count
515 Function: Returns the number of results we have processed
524 return $self->{'_result_count'};
527 sub report_count
{ shift->result_count }