3 # BioPerl module for Bio::SearchIO::axt
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason-at-bioperl.org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::SearchIO::axt - a parser for axt format reports
22 my $parser = Bio::SearchIO->new(-format => 'axt',
23 -file => 't/data/report.blastz');
24 while( my $result = $parser->next_result ) {
25 while( my $hit = $result->next_hit ) {
26 while( my $hsp = $hit->next_hsp ) {
33 This is a parser and event-generator for AXT format reports. BLASTZ
34 reports (Schwartz et al,(2003) Genome Research, 13:103-107) are normally
35 in LAV format but are commonly post-processed to AXT format; many precomputed
36 BLASTZ reports, such as those found in the UCSC Genome
37 Browser, are in AXT format. This parser will also parse any
38 AXT format produced from any lav report and directly out of BLAT.
44 User feedback is an integral part of the evolution of this and other
45 Bioperl modules. Send your comments and suggestions preferably to
46 the Bioperl mailing list. Your participation is much appreciated.
48 bioperl-l@bioperl.org - General discussion
49 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 Please direct usage questions or support issues to the mailing list:
55 I<bioperl-l@bioperl.org>
57 rather than to the module maintainer directly. Many experienced and
58 reponsive experts will be able look at the problem and quickly
59 address it. Please include a thorough description of the problem
60 with code and data examples if at all possible.
64 Report bugs to the Bioperl bug tracking system to help us keep track
65 of the bugs and their resolution. Bug reports can be submitted via the
68 http://bugzilla.open-bio.org/
70 =head1 AUTHOR - Jason Stajich
72 Email jason-at-bioperl.org
76 The rest of the documentation details each of the object methods.
77 Internal methods are usually preceded with a _
82 # Let the code begin...
85 package Bio
::SearchIO
::axt
;
86 use vars
qw(%MODEMAP %MAPPING @STATES $GAPCHAR);
89 use Bio::Search::Result::ResultFactory;
90 use Bio::Search::HSP::HSPFactory;
91 use base qw(Bio::SearchIO);
96 # mapping of NCBI Blast terms to Bioperl hash keys
97 %MODEMAP = ('AXTOutput' => 'result',
104 'Hsp_score' => 'HSP-score',
105 'Hsp_query-from' => 'HSP-query_start',
106 'Hsp_query-to' => 'HSP-query_end',
107 'Hsp_hit-from' => 'HSP-hit_start',
108 'Hsp_hit-to' => 'HSP-hit_end',
109 'Hsp_positive' => 'HSP-conserved',
110 'Hsp_identity' => 'HSP-identical',
111 'Hsp_gaps' => 'HSP-hsp_gaps',
112 'Hsp_hitgaps' => 'HSP-hit_gaps',
113 'Hsp_querygaps' => 'HSP-query_gaps',
114 'Hsp_qseq' => 'HSP-query_seq',
115 'Hsp_hseq' => 'HSP-hit_seq',
116 'Hsp_midline' => 'HSP-homology_seq', # ignoring this for now
117 'Hsp_align-len' => 'HSP-hsp_length',
119 'Hit_id' => 'HIT-name',
120 'AXTOutput_query-def'=> 'RESULT-query_name',
127 Usage : my $obj = Bio::SearchIO::axt->new();
128 Function: Builds a new Bio::SearchIO::axt object
129 Returns : an instance of Bio::SearchIO::axt
136 my($class,@args) = @_;
137 my $self = $class->SUPER::new
(@args);
144 Usage : my $hit = $searchio->next_result;
145 Function: Returns the next Result from a search
146 Returns : Bio::Search::Result::ResultI object
156 my ($curquery,$curhit);
157 $self->start_document();
159 while( defined ($_ = $self->_readline )) {
161 if( m
/^(\d
+)\s
+ # alignment number - we'll throw this away anyways
162 (\S
+)\s
+ # Query name
163 (\d
+)\s
+(\d
+)\s
+ # Query start Query end (always + strand, 0 based)
165 (\d
+)\s
+(\d
+)\s
+ # Hit start Hit end (0 based)
166 ([\
-\
+])\s
+ # Hit strand
167 ([\d\
.\
-]+)\s
+ # Score
169 my ($alnnum, $qname,$qstart,$qend, $hname,
170 $hstart,$hend,$hstrand, $score) = ($1,$2,$3,$4,$5,
172 $self->{'_reporttype'} = 'AXT';
173 # Jim's code is 0 based
174 $qstart++; $qend++; $hstart++; $hend++;
175 if( defined $curquery &&
176 $curquery ne $qname ) {
177 $self->end_element({'Name' => 'Hit'});
178 $self->_pushback($_);
179 $self->end_element({'Name' => 'AXTOutput'});
180 return $self->end_document();
183 if( defined $curhit &&
185 # slight duplication here -- keep these in SYNC
186 $self->end_element({'Name' => 'Hit'});
187 $self->start_element({'Name' => 'Hit'});
188 $self->element({'Name' => 'Hit_id',
190 } elsif ( ! defined $curquery ) {
191 $self->start_element({'Name' => 'AXTOutput'});
192 $self->{'_result_count'}++;
193 $self->element({'Name' => 'AXTOutput_query-def',
196 $self->start_element({'Name' => 'Hit'});
197 $self->element({'Name' => 'Hit_id',
200 $self->start_element({'Name' => 'Hsp'});
201 my $queryalign = $self->_readline;
202 my $hitalign = $self->_readline;
205 my $alnlen = length($queryalign);
206 my $qgapnum = ( $queryalign =~ s/\Q$GAPCHAR/$GAPCHAR/g);
207 my $hgapnum = ( $hitalign =~ s/\Q$GAPCHAR/$GAPCHAR/g);
208 my $totalgaps = ($qgapnum + $hgapnum);
210 if( $hstrand eq '-' ) { # strand gets inferred by start/end
211 ($hstart,$hend) = ($hend,$hstart);
213 $self->element({'Name' => 'Hsp_score',
215 $self->element({'Name' => 'Hsp_query-from',
217 $self->element({'Name' => 'Hsp_query-to',
219 $self->element({'Name' => 'Hsp_hit-from',
221 $self->element({'Name' => 'Hsp_hit-to',
223 $self->element({'Name' => 'Hsp_gaps',
224 'Data' => $qgapnum + $hgapnum});
225 $self->element({'Name' => 'Hsp_querygaps',
226 'Data' => $qgapnum});
227 $self->element({'Name' => 'Hsp_hitgaps',
228 'Data' => $hgapnum});
230 $self->element({'Name' => 'Hsp_identity',
231 'Data' => $alnlen - $totalgaps});
232 $self->element({'Name' => 'Hsp_positive',
233 'Data' => $alnlen - $totalgaps});
234 $self->element({'Name' => 'Hsp_qseq',
235 'Data' => $queryalign});
236 $self->element({'Name' => 'Hsp_hseq',
237 'Data' => $hitalign});
239 $self->end_element({'Name' => 'Hsp'});
245 if( defined $curquery ) {
246 $self->end_element({'Name' => 'Hit'});
247 $self->end_element({'Name' => 'AXTOutput'});
248 return $self->end_document();
254 my ($self,@args) = @_;
255 $self->SUPER::_initialize
(@args);
256 $self->_eventHandler->register_factory('result', Bio
::Search
::Result
::ResultFactory
->new(-type
=> 'Bio::Search::Result::GenericResult'));
258 $self->_eventHandler->register_factory('hsp', Bio
::Search
::HSP
::HSPFactory
->new(-type
=> 'Bio::Search::HSP::GenericHSP'));
264 Title : start_element
265 Usage : $eventgenerator->start_element
266 Function: Handles a start element event
268 Args : hashref with at least 2 keys 'Data' and 'Name'
274 my ($self,$data) = @_;
275 # we currently don't care about attributes
276 my $nm = $data->{'Name'};
277 if( my $type = $MODEMAP{$nm} ) {
279 if( $self->_eventHandler->will_handle($type) ) {
280 my $func = sprintf("start_%s",lc $type);
281 $self->_eventHandler->$func($data->{'Attributes'});
283 unshift @
{$self->{'_elements'}}, $type;
285 if($nm eq 'AXTOutput') {
286 $self->{'_values'} = {};
287 $self->{'_result'}= undef;
288 $self->{'_mode'} = '';
295 Title : start_element
296 Usage : $eventgenerator->end_element
297 Function: Handles an end element event
299 Args : hashref with at least 2 keys 'Data' and 'Name'
305 my ($self,$data) = @_;
306 my $nm = $data->{'Name'};
308 # Hsp are sort of weird, in that they end when another
309 # object begins so have to detect this in end_element for now
311 if( my $type = $MODEMAP{$nm} ) {
312 if( $self->_eventHandler->will_handle($type) ) {
313 my $func = sprintf("end_%s",lc $type);
314 $rc = $self->_eventHandler->$func($self->{'_reporttype'},
317 shift @
{$self->{'_elements'}};
319 } elsif( $MAPPING{$nm} ) {
320 if ( ref($MAPPING{$nm}) =~ /hash/i ) {
321 my $key = (keys %{$MAPPING{$nm}})[0];
322 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
324 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
327 $self->warn( "unknown nm $nm ignoring\n");
329 $self->{'_last_data'} = ''; # remove read data if we are at
331 $self->{'_result'} = $rc if( $nm eq 'AXTOutput' );
339 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
340 Function: Convience method that calls start_element, characters, end_element
342 Args : Hash ref with the keys 'Name' and 'Data'
348 my ($self,$data) = @_;
349 $self->start_element($data);
350 $self->characters($data);
351 $self->end_element($data);
358 Usage : $eventgenerator->characters($str)
359 Function: Send a character events
367 my ($self,$data) = @_;
369 return unless ( defined $data->{'Data'} );
370 if( $data->{'Data'} =~ /^\s+$/ ) {
371 return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
374 if( $self->in_element('hsp') &&
375 $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
377 $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
380 $self->{'_last_data'} = $data->{'Data'};
386 Usage : $obj->_mode($newval)
389 Returns : value of _mode
390 Args : newvalue (optional)
396 my ($self,$value) = @_;
397 if( defined $value) {
398 $self->{'_mode'} = $value;
400 return $self->{'_mode'};
403 =head2 within_element
405 Title : within_element
406 Usage : if( $eventgenerator->within_element($element) ) {}
407 Function: Test if we are within a particular element
408 This is different than 'in' because within can be tested
411 Args : string element name
417 my ($self,$name) = @_;
418 return 0 if ( ! defined $name &&
419 ! defined $self->{'_elements'} ||
420 scalar @
{$self->{'_elements'}} == 0) ;
421 foreach ( @
{$self->{'_elements'}} ) {
432 Usage : if( $eventgenerator->in_element($element) ) {}
433 Function: Test if we are in a particular element
434 This is different than 'in' because within can be tested
437 Args : string element name
443 my ($self,$name) = @_;
444 return 0 if ! defined $self->{'_elements'}->[0];
445 return ( $self->{'_elements'}->[0] eq $name)
449 =head2 start_document
451 Title : start_document
452 Usage : $eventgenerator->start_document
453 Function: Handles a start document event
462 $self->{'_lasttype'} = '';
463 $self->{'_values'} = {};
464 $self->{'_result'}= undef;
465 $self->{'_mode'} = '';
466 $self->{'_elements'} = [];
473 Usage : $eventgenerator->end_document
474 Function: Handles an end document event
475 Returns : Bio::Search::Result::ResultI object
482 my ($self,@args) = @_;
483 return $self->{'_result'};
489 Usage : my $count = $searchio->result_count
490 Function: Returns the number of results we have processed
499 return $self->{'_result_count'};
502 sub report_count
{ shift->result_count }