2 # BioPerl module for Bio::SearchIO::axt
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SearchIO::axt - a parser for axt format reports
21 my $parser = Bio::SearchIO->new(-format => 'axt',
22 -file => 't/data/report.blastz');
23 while( my $result = $parser->next_result ) {
24 while( my $hit = $result->next_hit ) {
25 while( my $hsp = $hit->next_hsp ) {
32 This is a parser and event-generator for AXT format reports. BLASTZ
33 reports (Schwartz et al,(2003) Genome Research, 13:103-107) are normally
34 in LAV format but are commonly post-processed to AXT format; many precomputed
35 BLASTZ reports, such as those found in the UCSC Genome
36 Browser, are in AXT format. This parser will also parse any
37 AXT format produced from any lav report and directly out of BLAT.
43 User feedback is an integral part of the evolution of this and other
44 Bioperl modules. Send your comments and suggestions preferably to
45 the Bioperl mailing list. Your participation is much appreciated.
47 bioperl-l@bioperl.org - General discussion
48 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52 Please direct usage questions or support issues to the mailing list:
54 I<bioperl-l@bioperl.org>
56 rather than to the module maintainer directly. Many experienced and
57 reponsive experts will be able look at the problem and quickly
58 address it. Please include a thorough description of the problem
59 with code and data examples if at all possible.
63 Report bugs to the Bioperl bug tracking system to help us keep track
64 of the bugs and their resolution. Bug reports can be submitted via the
67 https://github.com/bioperl/bioperl-live/issues
69 =head1 AUTHOR - Jason Stajich
71 Email jason-at-bioperl.org
75 The rest of the documentation details each of the object methods.
76 Internal methods are usually preceded with a _
81 # Let the code begin...
84 package Bio
::SearchIO
::axt
;
85 use vars
qw(%MODEMAP %MAPPING @STATES $GAPCHAR);
88 use Bio::Search::Result::ResultFactory;
89 use Bio::Search::HSP::HSPFactory;
90 use base qw(Bio::SearchIO);
95 # mapping of NCBI Blast terms to Bioperl hash keys
96 %MODEMAP = ('AXTOutput' => 'result',
103 'Hsp_score' => 'HSP-score',
104 'Hsp_query-from' => 'HSP-query_start',
105 'Hsp_query-to' => 'HSP-query_end',
106 'Hsp_hit-from' => 'HSP-hit_start',
107 'Hsp_hit-to' => 'HSP-hit_end',
108 'Hsp_positive' => 'HSP-conserved',
109 'Hsp_identity' => 'HSP-identical',
110 'Hsp_gaps' => 'HSP-hsp_gaps',
111 'Hsp_hitgaps' => 'HSP-hit_gaps',
112 'Hsp_querygaps' => 'HSP-query_gaps',
113 'Hsp_qseq' => 'HSP-query_seq',
114 'Hsp_hseq' => 'HSP-hit_seq',
115 'Hsp_midline' => 'HSP-homology_seq', # ignoring this for now
116 'Hsp_align-len' => 'HSP-hsp_length',
118 'Hit_id' => 'HIT-name',
119 'AXTOutput_query-def'=> 'RESULT-query_name',
126 Usage : my $obj = Bio::SearchIO::axt->new();
127 Function: Builds a new Bio::SearchIO::axt object
128 Returns : an instance of Bio::SearchIO::axt
135 my($class,@args) = @_;
136 my $self = $class->SUPER::new
(@args);
143 Usage : my $hit = $searchio->next_result;
144 Function: Returns the next Result from a search
145 Returns : Bio::Search::Result::ResultI object
155 my ($curquery,$curhit);
156 $self->start_document();
158 while( defined ($_ = $self->_readline )) {
160 if( m
/^(\d
+)\s
+ # alignment number - we'll throw this away anyways
161 (\S
+)\s
+ # Query name
162 (\d
+)\s
+(\d
+)\s
+ # Query start Query end (always + strand, 0 based)
164 (\d
+)\s
+(\d
+)\s
+ # Hit start Hit end (0 based)
165 ([\
-\
+])\s
+ # Hit strand
166 ([\d\
.\
-]+)\s
+ # Score
168 my ($alnnum, $qname,$qstart,$qend, $hname,
169 $hstart,$hend,$hstrand, $score) = ($1,$2,$3,$4,$5,
171 $self->{'_reporttype'} = 'AXT';
172 # Jim's code is 0 based
173 # yes, but axt format is one-based, see bug 3145 - cjfields 10-11-10
174 #$qstart++; $qend++; $hstart++; $hend++;
175 if( defined $curquery && $curquery ne $qname ) {
176 $self->end_element({'Name' => 'Hit'});
177 $self->_pushback($_);
178 $self->end_element({'Name' => 'AXTOutput'});
179 return $self->end_document();
182 if( defined $curhit &&
184 # slight duplication here -- keep these in SYNC
185 $self->end_element({'Name' => 'Hit'});
186 $self->start_element({'Name' => 'Hit'});
187 $self->element({'Name' => 'Hit_id',
189 } elsif ( ! defined $curquery ) {
190 $self->start_element({'Name' => 'AXTOutput'});
191 $self->{'_result_count'}++;
192 $self->element({'Name' => 'AXTOutput_query-def',
195 $self->start_element({'Name' => 'Hit'});
196 $self->element({'Name' => 'Hit_id',
199 $self->start_element({'Name' => 'Hsp'});
200 my $queryalign = $self->_readline;
201 my $hitalign = $self->_readline;
204 my $alnlen = length($queryalign);
205 my $qgapnum = ( $queryalign =~ s/\Q$GAPCHAR/$GAPCHAR/g);
206 my $hgapnum = ( $hitalign =~ s/\Q$GAPCHAR/$GAPCHAR/g);
207 my $totalgaps = ($qgapnum + $hgapnum);
209 if( $hstrand eq '-' ) { # strand gets inferred by start/end
210 ($hstart,$hend) = ($hend,$hstart);
212 $self->element({'Name' => 'Hsp_score',
214 $self->element({'Name' => 'Hsp_query-from',
216 $self->element({'Name' => 'Hsp_query-to',
218 $self->element({'Name' => 'Hsp_hit-from',
220 $self->element({'Name' => 'Hsp_hit-to',
222 $self->element({'Name' => 'Hsp_gaps',
223 'Data' => $qgapnum + $hgapnum});
224 $self->element({'Name' => 'Hsp_querygaps',
225 'Data' => $qgapnum});
226 $self->element({'Name' => 'Hsp_hitgaps',
227 'Data' => $hgapnum});
229 $self->element({'Name' => 'Hsp_identity',
230 'Data' => $alnlen - $totalgaps});
231 $self->element({'Name' => 'Hsp_positive',
232 'Data' => $alnlen - $totalgaps});
233 $self->element({'Name' => 'Hsp_qseq',
234 'Data' => $queryalign});
235 $self->element({'Name' => 'Hsp_hseq',
236 'Data' => $hitalign});
238 $self->end_element({'Name' => 'Hsp'});
244 if( defined $curquery ) {
245 $self->end_element({'Name' => 'Hit'});
246 $self->end_element({'Name' => 'AXTOutput'});
247 return $self->end_document();
253 my ($self,@args) = @_;
254 $self->SUPER::_initialize
(@args);
255 $self->_eventHandler->register_factory('result', Bio
::Search
::Result
::ResultFactory
->new(-type
=> 'Bio::Search::Result::GenericResult'));
257 $self->_eventHandler->register_factory('hsp', Bio
::Search
::HSP
::HSPFactory
->new(-type
=> 'Bio::Search::HSP::GenericHSP'));
263 Title : start_element
264 Usage : $eventgenerator->start_element
265 Function: Handles a start element event
267 Args : hashref with at least 2 keys 'Data' and 'Name'
273 my ($self,$data) = @_;
274 # we currently don't care about attributes
275 my $nm = $data->{'Name'};
276 if( my $type = $MODEMAP{$nm} ) {
278 if( $self->_eventHandler->will_handle($type) ) {
279 my $func = sprintf("start_%s",lc $type);
280 $self->_eventHandler->$func($data->{'Attributes'});
282 unshift @
{$self->{'_elements'}}, $type;
284 if($nm eq 'AXTOutput') {
285 $self->{'_values'} = {};
286 $self->{'_result'}= undef;
287 $self->{'_mode'} = '';
294 Title : start_element
295 Usage : $eventgenerator->end_element
296 Function: Handles an end element event
298 Args : hashref with at least 2 keys 'Data' and 'Name'
304 my ($self,$data) = @_;
305 my $nm = $data->{'Name'};
307 # Hsp are sort of weird, in that they end when another
308 # object begins so have to detect this in end_element for now
310 if( my $type = $MODEMAP{$nm} ) {
311 if( $self->_eventHandler->will_handle($type) ) {
312 my $func = sprintf("end_%s",lc $type);
313 $rc = $self->_eventHandler->$func($self->{'_reporttype'},
316 shift @
{$self->{'_elements'}};
318 } elsif( $MAPPING{$nm} ) {
319 if ( ref($MAPPING{$nm}) =~ /hash/i ) {
320 my $key = (keys %{$MAPPING{$nm}})[0];
321 $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
323 $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
326 $self->warn( "unknown nm $nm ignoring\n");
328 $self->{'_last_data'} = ''; # remove read data if we are at
330 $self->{'_result'} = $rc if( $nm eq 'AXTOutput' );
338 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
339 Function: Convience method that calls start_element, characters, end_element
341 Args : Hash ref with the keys 'Name' and 'Data'
347 my ($self,$data) = @_;
348 $self->start_element($data);
349 $self->characters($data);
350 $self->end_element($data);
357 Usage : $eventgenerator->characters($str)
358 Function: Send a character events
366 my ($self,$data) = @_;
368 return unless ( defined $data->{'Data'} );
369 if( $data->{'Data'} =~ /^\s+$/ ) {
370 return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
373 if( $self->in_element('hsp') &&
374 $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
376 $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
379 $self->{'_last_data'} = $data->{'Data'};
385 Usage : $obj->_mode($newval)
388 Returns : value of _mode
389 Args : newvalue (optional)
395 my ($self,$value) = @_;
396 if( defined $value) {
397 $self->{'_mode'} = $value;
399 return $self->{'_mode'};
402 =head2 within_element
404 Title : within_element
405 Usage : if( $eventgenerator->within_element($element) ) {}
406 Function: Test if we are within a particular element
407 This is different than 'in' because within can be tested
410 Args : string element name
416 my ($self,$name) = @_;
417 return 0 if ( ! defined $name &&
418 ! defined $self->{'_elements'} ||
419 scalar @
{$self->{'_elements'}} == 0) ;
420 foreach ( @
{$self->{'_elements'}} ) {
431 Usage : if( $eventgenerator->in_element($element) ) {}
432 Function: Test if we are in a particular element
433 This is different than 'in' because within can be tested
436 Args : string element name
442 my ($self,$name) = @_;
443 return 0 if ! defined $self->{'_elements'}->[0];
444 return ( $self->{'_elements'}->[0] eq $name)
448 =head2 start_document
450 Title : start_document
451 Usage : $eventgenerator->start_document
452 Function: Handles a start document event
461 $self->{'_lasttype'} = '';
462 $self->{'_values'} = {};
463 $self->{'_result'}= undef;
464 $self->{'_mode'} = '';
465 $self->{'_elements'} = [];
472 Usage : $eventgenerator->end_document
473 Function: Handles an end document event
474 Returns : Bio::Search::Result::ResultI object
481 my ($self,@args) = @_;
482 return $self->{'_result'};
488 Usage : my $count = $searchio->result_count
489 Function: Returns the number of results we have processed
498 return $self->{'_result_count'};
501 sub report_count
{ shift->result_count }