3 # BioPerl module for Bio::SearchIO::waba
5 # Cared for by Jason Stajich <jason@bioperl.org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::SearchIO::waba - SearchIO parser for Jim Kent WABA program
20 # do not use this object directly, rather through Bio::SearchIO
23 my $in = Bio::SearchIO->new(-format => 'waba',
24 -file => 'output.wab');
25 while( my $result = $in->next_result ) {
26 while( my $hit = $result->next_hit ) {
27 while( my $hsp = $result->next_hsp ) {
35 This parser will process the waba output (NOT the human readable format).
41 User feedback is an integral part of the evolution of this and other
42 Bioperl modules. Send your comments and suggestions preferably to
43 the Bioperl mailing list. Your participation is much appreciated.
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50 Report bugs to the Bioperl bug tracking system to help us keep track
51 of the bugs and their resolution. Bug reports can be submitted via the
54 http://bugzilla.open-bio.org/
56 =head1 AUTHOR - Jason Stajich
58 Email jason-at-bioperl.org
62 The rest of the documentation details each of the object methods.
63 Internal methods are usually preceded with a _
68 # Let the code begin...
71 package Bio
::SearchIO
::waba
;
72 use vars
qw(%MODEMAP %MAPPING @STATES);
75 # Object preamble - inherits from Bio::Root::Root
77 use Bio::Search::Result::ResultFactory;
78 use Bio::Search::HSP::HSPFactory;
83 # mapping of NCBI Blast terms to Bioperl hash keys
84 %MODEMAP = ('WABAOutput' => 'result',
88 @STATES = qw(Hsp_qseq Hsp_hseq Hsp_stateseq);
91 'Hsp_query-from'=> 'HSP-query_start',
92 'Hsp_query-to' => 'HSP-query_end',
93 'Hsp_hit-from' => 'HSP-hit_start',
94 'Hsp_hit-to' => 'HSP-hit_end',
95 'Hsp_qseq' => 'HSP-query_seq',
96 'Hsp_hseq' => 'HSP-hit_seq',
97 'Hsp_midline' => 'HSP-homology_seq',
98 'Hsp_stateseq' => 'HSP-hmmstate_seq',
99 'Hsp_align-len' => 'HSP-hsp_length',
101 'Hit_id' => 'HIT-name',
102 'Hit_accession' => 'HIT-accession',
104 'WABAOutput_program' => 'RESULT-algorithm_name',
105 'WABAOutput_version' => 'RESULT-algorithm_version',
106 'WABAOutput_query-def'=> 'RESULT-query_name',
107 'WABAOutput_query-db' => 'RESULT-query_database',
108 'WABAOutput_db' => 'RESULT-database_name',
113 use base
qw(Bio::SearchIO);
118 Usage : my $obj = Bio::SearchIO::waba->new();
119 Function: Builds a new Bio::SearchIO::waba object
120 Returns : Bio::SearchIO::waba
121 Args : see Bio::SearchIO
126 my ($self,@args) = @_;
127 $self->SUPER::_initialize
(@args);
128 $self->_eventHandler->register_factory('result', Bio
::Search
::Result
::ResultFactory
->new(-type
=> 'Bio::Search::Result::WABAResult'));
130 $self->_eventHandler->register_factory('hsp', Bio
::Search
::HSP
::HSPFactory
->new(-type
=> 'Bio::Search::HSP::WABAHSP'));
137 Usage : my $hit = $searchio->next_result;
138 Function: Returns the next Result from a search
139 Returns : Bio::Search::Result::ResultI object
149 my ($curquery,$curhit);
151 $self->start_document();
153 while( defined ($_ = $self->_readline )) {
156 my ($qid, $qhspid,$qpercent, $junk,
157 $alnlen,$qdb,$qacc,$qstart,$qend,$qstrand,
158 $hitdb,$hacc,$hstart,$hend,
160 ( /^(\S
+)\
.(\S
+)\s
+align\s
+ # get the queryid
161 (\d
+(\
.\d
+)?
)\
%\s
+ # get the percentage
162 of\s
+(\d
+)\s
+ # get the length of the alignment
163 (\S
+)\s
+ # this is the query database
164 (\S
+):(\
-?\d
+)\
-(\
-?\d
+) # The accession:start-end for query
165 \s
+([\
-\
+]) # query strand
167 (\S
+):(\
-?\d
+)\
-(\
-?\d
+) # The accession:start-end for hit
168 \s
+([\
-\
+])\s
*$ # hit strand
171 # Curses. Jim's code is 0 based, the following is to readjust
172 if( $hstart < 0 ) { $hstart *= -1}
173 if( $hend < 0 ) { $hend *= -1}
174 if( $qstart < 0 ) { $qstart *= -1}
175 if( $qend < 0 ) { $qend *= -1}
176 $hstart++; $hend++; $qstart++; $qend++;
177 if( ! defined $alnlen ) {
178 $self->warn("Unable to parse the rest of the WABA alignment info for: '$_'");
181 $self->{'_reporttype'} = 'WABA'; # hardcoded - only
182 # one type of WABA AFAIK
183 if( defined $curquery &&
184 $curquery ne $qid ) {
185 $self->end_element({'Name' => 'Hit'});
186 $self->_pushback($_);
187 $self->end_element({'Name' => 'WABAOutput'});
188 return $self->end_document();
191 if( defined $curhit &&
193 # slight duplication here -- keep these in SYNC
194 $self->end_element({'Name' => 'Hit'});
195 $self->start_element({'Name' => 'Hit'});
196 $self->element({'Name' => 'Hit_id',
198 $self->element({'Name' => 'Hit_accession',
201 } elsif ( ! defined $curquery ) {
202 $self->start_element({'Name' => 'WABAOutput'});
203 $self->{'_result_count'}++;
204 $self->element({'Name' => 'WABAOutput_query-def',
206 $self->element({'Name' => 'WABAOutput_program',
208 $self->element({'Name' => 'WABAOutput_query-db',
210 $self->element({'Name' => 'WABAOutput_db',
213 # slight duplication here -- keep these N'SYNC ;-)
214 $self->start_element({'Name' => 'Hit'});
215 $self->element({'Name' => 'Hit_id',
217 $self->element({'Name' => 'Hit_accession',
222 # strand is inferred by start,end values
223 # in the Result Builder
224 if( $qstrand eq '-' ) {
225 ($qstart,$qend) = ($qend,$qstart);
227 if( $hstrand eq '-' ) {
228 ($hstart,$hend) = ($hend,$hstart);
231 $self->start_element({'Name' => 'Hsp'});
232 $self->element({'Name' => 'Hsp_query-from',
234 $self->element({'Name' => 'Hsp_query-to',
236 $self->element({'Name' => 'Hsp_hit-from',
238 $self->element({'Name' => 'Hsp_hit-to',
240 $self->element({'Name' => 'Hsp_align-len',
246 } elsif( ! defined $curquery ) {
247 $self->warn("skipping because no Hit begin line was recognized\n$_") if( $_ !~ /^\s+$/ );
251 $self->element({'Name' => $STATES[$state++],
253 if( $state >= scalar @STATES ) {
255 $self->end_element({'Name' => 'Hsp'});
259 if( defined $curquery ) {
260 $self->end_element({'Name' => 'Hit'});
261 $self->end_element({'Name' => 'WABAOutput'});
262 return $self->end_document();
269 Title : start_element
270 Usage : $eventgenerator->start_element
271 Function: Handles a start element event
273 Args : hashref with at least 2 keys 'Data' and 'Name'
279 my ($self,$data) = @_;
280 # we currently don't care about attributes
281 my $nm = $data->{'Name'};
282 if( my $type = $MODEMAP{$nm} ) {
284 if( $self->_eventHandler->will_handle($type) ) {
285 my $func = sprintf("start_%s",lc $type);
286 $self->_eventHandler->$func($data->{'Attributes'});
288 unshift @
{$self->{'_elements'}}, $type;
290 if($nm eq 'WABAOutput') {
291 $self->{'_values'} = {};
292 $self->{'_result'}= undef;
293 $self->{'_mode'} = '';
300 Title : start_element
301 Usage : $eventgenerator->end_element
302 Function: Handles an end element event
304 Args : hashref with at least 2 keys 'Data' and 'Name'
310 my ($self,$data) = @_;
311 my $nm = $data->{'Name'};
313 # Hsp are sort of weird, in that they end when another
314 # object begins so have to detect this in end_element for now
316 foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) {
317 $self->element({'Name' => $_,
318 'Data' => $self->{'_last_hspdata'}->{$_}});
320 $self->{'_last_hspdata'} = {}
323 if( my $type = $MODEMAP{$nm} ) {
324 if( $self->_eventHandler->will_handle($type) ) {
325 my $func = sprintf("end_%s",lc $type);
326 $rc = $self->_eventHandler->$func($self->{'_reporttype'},
329 shift @
{$self->{'_elements'}};
331 } elsif( $MAPPING{$nm} ) {
332 if ( ref($MAPPING{$nm}) =~ /hash/i ) {
333 my $key = (keys %{$MAPPING{$nm}})[0];
334 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
336 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
339 $self->warn( "unknown nm $nm ignoring\n");
341 $self->{'_last_data'} = ''; # remove read data if we are at
343 $self->{'_result'} = $rc if( $nm eq 'WABAOutput' );
351 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
352 Function: Convience method that calls start_element, characters, end_element
354 Args : Hash ref with the keys 'Name' and 'Data'
360 my ($self,$data) = @_;
361 $self->start_element($data);
362 $self->characters($data);
363 $self->end_element($data);
370 Usage : $eventgenerator->characters($str)
371 Function: Send a character events
379 my ($self,$data) = @_;
381 return unless ( defined $data->{'Data'} );
382 if( $data->{'Data'} =~ /^\s+$/ ) {
383 return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
386 if( $self->in_element('hsp') &&
387 $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
389 $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
392 $self->{'_last_data'} = $data->{'Data'};
398 Usage : $obj->_mode($newval)
401 Returns : value of _mode
402 Args : newvalue (optional)
408 my ($self,$value) = @_;
409 if( defined $value) {
410 $self->{'_mode'} = $value;
412 return $self->{'_mode'};
415 =head2 within_element
417 Title : within_element
418 Usage : if( $eventgenerator->within_element($element) ) {}
419 Function: Test if we are within a particular element
420 This is different than 'in' because within can be tested
423 Args : string element name
429 my ($self,$name) = @_;
430 return 0 if ( ! defined $name &&
431 ! defined $self->{'_elements'} ||
432 scalar @
{$self->{'_elements'}} == 0) ;
433 foreach ( @
{$self->{'_elements'}} ) {
444 Usage : if( $eventgenerator->in_element($element) ) {}
445 Function: Test if we are in a particular element
446 This is different than 'in' because within can be tested
449 Args : string element name
455 my ($self,$name) = @_;
456 return 0 if ! defined $self->{'_elements'}->[0];
457 return ( $self->{'_elements'}->[0] eq $name)
461 =head2 start_document
463 Title : start_document
464 Usage : $eventgenerator->start_document
465 Function: Handles a start document event
474 $self->{'_lasttype'} = '';
475 $self->{'_values'} = {};
476 $self->{'_result'}= undef;
477 $self->{'_mode'} = '';
478 $self->{'_elements'} = [];
485 Usage : $eventgenerator->end_document
486 Function: Handles an end document event
487 Returns : Bio::Search::Result::ResultI object
494 my ($self,@args) = @_;
495 return $self->{'_result'};
501 Usage : my $count = $searchio->result_count
502 Function: Returns the number of results we have processed
511 return $self->{'_result_count'};
514 sub report_count
{ shift->result_count }