3 # BioPerl module for Bio::SearchIO::axt
5 # Cared for by Jason Stajich <jason-at-bioperl.org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::SearchIO::axt - a parser for axt format reports
20 my $parser = Bio::SearchIO->new(-format => 'axt',
21 -file => 't/data/report.blastz');
22 while( my $result = $parser->next_result ) {
23 while( my $hit = $result->next_hit ) {
24 while( my $hsp = $hit->next_hsp ) {
31 This is a parser and event-generator for AXT format reports. BLASTZ
32 reports (Schwartz et al,(2003) Genome Research, 13:103-107) are normally
33 in LAV format but are commonly post-processed to AXT format; many precomputed
34 BLASTZ reports, such as those found in the UCSC Genome
35 Browser, are in AXT format. This parser will also parse any
36 AXT format produced from any lav report and directly out of BLAT.
42 User feedback is an integral part of the evolution of this and other
43 Bioperl modules. Send your comments and suggestions preferably to
44 the Bioperl mailing list. Your participation is much appreciated.
46 bioperl-l@bioperl.org - General discussion
47 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51 Report bugs to the Bioperl bug tracking system to help us keep track
52 of the bugs and their resolution. Bug reports can be submitted via the
55 http://bugzilla.open-bio.org/
57 =head1 AUTHOR - Jason Stajich
59 Email jason-at-bioperl.org
63 The rest of the documentation details each of the object methods.
64 Internal methods are usually preceded with a _
69 # Let the code begin...
72 package Bio
::SearchIO
::axt
;
73 use vars
qw(%MODEMAP %MAPPING @STATES $GAPCHAR);
76 use Bio::Search::Result::ResultFactory;
77 use Bio::Search::HSP::HSPFactory;
78 use base qw(Bio::SearchIO);
83 # mapping of NCBI Blast terms to Bioperl hash keys
84 %MODEMAP = ('AXTOutput' => 'result',
91 'Hsp_score' => 'HSP-score',
92 'Hsp_query-from' => 'HSP-query_start',
93 'Hsp_query-to' => 'HSP-query_end',
94 'Hsp_hit-from' => 'HSP-hit_start',
95 'Hsp_hit-to' => 'HSP-hit_end',
96 'Hsp_positive' => 'HSP-conserved',
97 'Hsp_identity' => 'HSP-identical',
98 'Hsp_gaps' => 'HSP-hsp_gaps',
99 'Hsp_hitgaps' => 'HSP-hit_gaps',
100 'Hsp_querygaps' => 'HSP-query_gaps',
101 'Hsp_qseq' => 'HSP-query_seq',
102 'Hsp_hseq' => 'HSP-hit_seq',
103 'Hsp_midline' => 'HSP-homology_seq', # ignoring this for now
104 'Hsp_align-len' => 'HSP-hsp_length',
106 'Hit_id' => 'HIT-name',
107 'AXTOutput_query-def'=> 'RESULT-query_name',
114 Usage : my $obj = Bio::SearchIO::axt->new();
115 Function: Builds a new Bio::SearchIO::axt object
116 Returns : an instance of Bio::SearchIO::axt
123 my($class,@args) = @_;
124 my $self = $class->SUPER::new
(@args);
131 Usage : my $hit = $searchio->next_result;
132 Function: Returns the next Result from a search
133 Returns : Bio::Search::Result::ResultI object
143 my ($curquery,$curhit);
144 $self->start_document();
146 while( defined ($_ = $self->_readline )) {
148 if( m
/^(\d
+)\s
+ # alignment number - we'll throw this away anyways
149 (\S
+)\s
+ # Query name
150 (\d
+)\s
+(\d
+)\s
+ # Query start Query end (always + strand, 0 based)
152 (\d
+)\s
+(\d
+)\s
+ # Hit start Hit end (0 based)
153 ([\
-\
+])\s
+ # Hit strand
154 ([\d\
.\
-]+)\s
+ # Score
156 my ($alnnum, $qname,$qstart,$qend, $hname,
157 $hstart,$hend,$hstrand, $score) = ($1,$2,$3,$4,$5,
159 $self->{'_reporttype'} = 'AXT';
160 # Jim's code is 0 based
161 $qstart++; $qend++; $hstart++; $hend++;
162 if( defined $curquery &&
163 $curquery ne $qname ) {
164 $self->end_element({'Name' => 'Hit'});
165 $self->_pushback($_);
166 $self->end_element({'Name' => 'AXTOutput'});
167 return $self->end_document();
170 if( defined $curhit &&
172 # slight duplication here -- keep these in SYNC
173 $self->end_element({'Name' => 'Hit'});
174 $self->start_element({'Name' => 'Hit'});
175 $self->element({'Name' => 'Hit_id',
177 } elsif ( ! defined $curquery ) {
178 $self->start_element({'Name' => 'AXTOutput'});
179 $self->{'_result_count'}++;
180 $self->element({'Name' => 'AXTOutput_query-def',
183 $self->start_element({'Name' => 'Hit'});
184 $self->element({'Name' => 'Hit_id',
187 $self->start_element({'Name' => 'Hsp'});
188 my $queryalign = $self->_readline;
189 my $hitalign = $self->_readline;
192 my $alnlen = length($queryalign);
193 my $qgapnum = ( $queryalign =~ s/\Q$GAPCHAR/$GAPCHAR/g);
194 my $hgapnum = ( $hitalign =~ s/\Q$GAPCHAR/$GAPCHAR/g);
195 my $totalgaps = ($qgapnum + $hgapnum);
197 if( $hstrand eq '-' ) { # strand gets inferred by start/end
198 ($hstart,$hend) = ($hend,$hstart);
200 $self->element({'Name' => 'Hsp_score',
202 $self->element({'Name' => 'Hsp_query-from',
204 $self->element({'Name' => 'Hsp_query-to',
206 $self->element({'Name' => 'Hsp_hit-from',
208 $self->element({'Name' => 'Hsp_hit-to',
210 $self->element({'Name' => 'Hsp_gaps',
211 'Data' => $qgapnum + $hgapnum});
212 $self->element({'Name' => 'Hsp_querygaps',
213 'Data' => $qgapnum});
214 $self->element({'Name' => 'Hsp_hitgaps',
215 'Data' => $hgapnum});
217 $self->element({'Name' => 'Hsp_identity',
218 'Data' => $alnlen - $totalgaps});
219 $self->element({'Name' => 'Hsp_positive',
220 'Data' => $alnlen - $totalgaps});
221 $self->element({'Name' => 'Hsp_qseq',
222 'Data' => $queryalign});
223 $self->element({'Name' => 'Hsp_hseq',
224 'Data' => $hitalign});
226 $self->end_element({'Name' => 'Hsp'});
232 if( defined $curquery ) {
233 $self->end_element({'Name' => 'Hit'});
234 $self->end_element({'Name' => 'AXTOutput'});
235 return $self->end_document();
241 my ($self,@args) = @_;
242 $self->SUPER::_initialize
(@args);
243 $self->_eventHandler->register_factory('result', Bio
::Search
::Result
::ResultFactory
->new(-type
=> 'Bio::Search::Result::GenericResult'));
245 $self->_eventHandler->register_factory('hsp', Bio
::Search
::HSP
::HSPFactory
->new(-type
=> 'Bio::Search::HSP::GenericHSP'));
251 Title : start_element
252 Usage : $eventgenerator->start_element
253 Function: Handles a start element event
255 Args : hashref with at least 2 keys 'Data' and 'Name'
261 my ($self,$data) = @_;
262 # we currently don't care about attributes
263 my $nm = $data->{'Name'};
264 if( my $type = $MODEMAP{$nm} ) {
266 if( $self->_eventHandler->will_handle($type) ) {
267 my $func = sprintf("start_%s",lc $type);
268 $self->_eventHandler->$func($data->{'Attributes'});
270 unshift @
{$self->{'_elements'}}, $type;
272 if($nm eq 'AXTOutput') {
273 $self->{'_values'} = {};
274 $self->{'_result'}= undef;
275 $self->{'_mode'} = '';
282 Title : start_element
283 Usage : $eventgenerator->end_element
284 Function: Handles an end element event
286 Args : hashref with at least 2 keys 'Data' and 'Name'
292 my ($self,$data) = @_;
293 my $nm = $data->{'Name'};
295 # Hsp are sort of weird, in that they end when another
296 # object begins so have to detect this in end_element for now
298 if( my $type = $MODEMAP{$nm} ) {
299 if( $self->_eventHandler->will_handle($type) ) {
300 my $func = sprintf("end_%s",lc $type);
301 $rc = $self->_eventHandler->$func($self->{'_reporttype'},
304 shift @
{$self->{'_elements'}};
306 } elsif( $MAPPING{$nm} ) {
307 if ( ref($MAPPING{$nm}) =~ /hash/i ) {
308 my $key = (keys %{$MAPPING{$nm}})[0];
309 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
311 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
314 $self->warn( "unknown nm $nm ignoring\n");
316 $self->{'_last_data'} = ''; # remove read data if we are at
318 $self->{'_result'} = $rc if( $nm eq 'AXTOutput' );
326 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
327 Function: Convience method that calls start_element, characters, end_element
329 Args : Hash ref with the keys 'Name' and 'Data'
335 my ($self,$data) = @_;
336 $self->start_element($data);
337 $self->characters($data);
338 $self->end_element($data);
345 Usage : $eventgenerator->characters($str)
346 Function: Send a character events
354 my ($self,$data) = @_;
356 return unless ( defined $data->{'Data'} );
357 if( $data->{'Data'} =~ /^\s+$/ ) {
358 return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
361 if( $self->in_element('hsp') &&
362 $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
364 $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
367 $self->{'_last_data'} = $data->{'Data'};
373 Usage : $obj->_mode($newval)
376 Returns : value of _mode
377 Args : newvalue (optional)
383 my ($self,$value) = @_;
384 if( defined $value) {
385 $self->{'_mode'} = $value;
387 return $self->{'_mode'};
390 =head2 within_element
392 Title : within_element
393 Usage : if( $eventgenerator->within_element($element) ) {}
394 Function: Test if we are within a particular element
395 This is different than 'in' because within can be tested
398 Args : string element name
404 my ($self,$name) = @_;
405 return 0 if ( ! defined $name &&
406 ! defined $self->{'_elements'} ||
407 scalar @
{$self->{'_elements'}} == 0) ;
408 foreach ( @
{$self->{'_elements'}} ) {
419 Usage : if( $eventgenerator->in_element($element) ) {}
420 Function: Test if we are in a particular element
421 This is different than 'in' because within can be tested
424 Args : string element name
430 my ($self,$name) = @_;
431 return 0 if ! defined $self->{'_elements'}->[0];
432 return ( $self->{'_elements'}->[0] eq $name)
436 =head2 start_document
438 Title : start_document
439 Usage : $eventgenerator->start_document
440 Function: Handles a start document event
449 $self->{'_lasttype'} = '';
450 $self->{'_values'} = {};
451 $self->{'_result'}= undef;
452 $self->{'_mode'} = '';
453 $self->{'_elements'} = [];
460 Usage : $eventgenerator->end_document
461 Function: Handles an end document event
462 Returns : Bio::Search::Result::ResultI object
469 my ($self,@args) = @_;
470 return $self->{'_result'};
476 Usage : my $count = $searchio->result_count
477 Function: Returns the number of results we have processed
486 return $self->{'_result_count'};
489 sub report_count
{ shift->result_count }