2 # BioPerl module for Bio::Tools::Spidey::Results
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Ryan Golhar <golharam@umdnj.edu>
8 # You may distribute this module under the same terms as perl itself
10 # POD documentation - main docs before the code
14 Bio::Tools::Spidey::Results - Results of a Spidey run
18 use Bio::Tools::Spidey::Results;
19 my $spidey = Bio::Tools::Spidey::Results->new(-file => 'result.spidey' );
23 my $spidey = Bio::Tools::Spidey::Results->new( -fh => \*INPUT );
25 # get the exons before doing anything else
26 my $exonset = $spidey->next_exonset();
29 my @exons = $exonset->sub_SeqFeature();
30 print "Total no of Exons: ", scalar(@exons), "\n";
32 print "Genomic sequence length: ", $spidey->genomic_dna_length(), "\n";
34 # $exonset is-a Bio::SeqFeature::Generic with Bio::Tools::Spidey::Exons
36 print "Delimited on sequence ", $exonset->seq_id(), " from ",
37 $exonset->start(), " to ", $exonset->end(), "\n";
39 foreach my $exon ( $exonset->sub_SeqFeature() ) {
40 # $exon is-a Bio::SeqFeature::FeaturePair
41 print "Exon from ", $exon->start, " to ", $exon->end,
42 " on strand ", $exon->strand(), "\n";
43 # you can get out what it matched using the est_hit attribute
44 my $homol = $exon->est_hit();
45 print "Matched to sequence ", $homol->seq_id,
46 " at ", $homol->start," to ", $homol->end, "\n";
49 # essential if you gave a filename at initialization (otherwise
50 # the file stays open)
55 The spidey module provides a parser and results object for spidey
56 output. The spidey results are specialised types of SeqFeatures,
57 meaning you can add them to AnnSeq objects fine, and manipulate them
58 in the "normal" seqfeature manner.
60 The spidey Exon objects are Bio::SeqFeature::FeaturePair inherited
61 objects. The $esthit = $exon-E<gt>est_hit() is the alignment as a
62 feature on the matching object (normally, a cDNA), in which the
63 start/end points are where the hit lies.
65 To make this module work sensibly you need to run
67 spidey -i genomic.fasta -m cDNA.fasta
73 User feedback is an integral part of the evolution of this and other
74 Bioperl modules. Send your comments and suggestions preferably to one
75 of the Bioperl mailing lists. Your participation is much appreciated.
77 bioperl-l@bioperl.org - General discussion
78 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
82 Please direct usage questions or support issues to the mailing list:
84 I<bioperl-l@bioperl.org>
86 rather than to the module maintainer directly. Many experienced and
87 reponsive experts will be able look at the problem and quickly
88 address it. Please include a thorough description of the problem
89 with code and data examples if at all possible.
93 Report bugs to the Bioperl bug tracking system to help us keep track
94 the bugs and their resolution. Bug reports can be submitted via the
97 https://github.com/bioperl/bioperl-live/issues
99 =head1 AUTHOR - Ryan Golhar
101 Email golharam@umdnj.edu
105 The rest of the documentation details each of the object methods.
106 Internal methods are usually preceded with a _
111 # Let the code begin...
114 package Bio
::Tools
::Spidey
::Results
;
119 use Bio
::Tools
::Spidey
::Exon
;
121 use base
qw(Bio::Tools::AnalysisResult);
123 sub _initialize_state
{
124 my($self,@args) = @_;
126 # call the inherited method first
127 my $make = $self->SUPER::_initialize_state
(@args);
129 # my ($est_is_first) = $self->_rearrange([qw(ESTFIRST)], @args);
131 # delete($self->{'_est_is_first'});
132 # $self->{'_est_is_first'} = $est_is_first if(defined($est_is_first));
133 $self->analysis_method("Spidey");
136 =head2 analysis_method
138 Usage : $spidey->analysis_method();
139 Purpose : Inherited method. Overridden to ensure that the name matches
147 sub analysis_method
{
149 my ($self, $method) = @_;
150 if($method && ($method !~ /Spidey/i)) {
151 $self->throw("method $method not supported in " . ref($self));
153 return $self->SUPER::analysis_method
($method);
156 =head2 parse_next_alignment
158 Title : parse_next_alignment
159 Usage : @exons = $spidey_result->parse_next_alignment;
160 foreach $exon (@exons) {
163 Function: Parses the next alignment of the Spidey result file and returns the
164 found exons as an array of Bio::Tools::Spidey::Exon objects. Call
165 this method repeatedly until an empty array is returned to get the
166 results for all alignments.
168 Returns : An array of Bio::Tools::Spidey::Exon objects
174 sub parse_next_alignment
{
176 # for strand 1 = plus, -1 = minus
177 my ($started,$version,$strand, $exoncount) = (0,0,0,-1);
178 my (%seq1props,%seq2props,@exons);
180 # we refer to the properties of each seq by reference
182 while(defined($_ = $self->_readline())) {
185 # bascially, parse a Spidey result...
187 # matches: --SPIDEY version 1.40--
188 if( /^--SPIDEY\s+version\s+(\d+\.\d+)--/) {
190 $self->_pushback($_);
194 if ($version != 1.40) {
195 $self->throw("Spidey parser only designed to work with Spidey v1.40\n");
198 } elsif (/^Genomic:\s+(\S+)\s.*,\s+(\d+)\sbp$/ ) {
199 # matches: Genomic: lcl|some_name other information, 1234 bp
200 # $seq1props{'filename'} = $1;
201 $seq1props{'seqname'} = $1;
202 $seq1props{'length'} = $2;
203 $self->genomic_dna_length($seq1props{'length'});
205 } elsif( /^mRNA:\s+(\S+)\s.*,(?:\s+mRNA\s+sequence,)?\s(\d+)\sbp$/ ) {
207 # $seq2props{'filename'} = $1;
208 $seq2props{'seqname'} = $1;
209 $seq2props{'length'} = $2;
211 } elsif( /^Strand:/ ) {
217 } elsif( /^Number of exons: (\d+)/ ) {
220 my ($genomic_start, $genomic_stop, $cdna_start, $cdna_stop,
221 $id, $mismatches, $gaps, $splice_donor,
222 $splice_acceptor, $uncertain);
224 # the next $exoncount lines contains information
225 # about the matches of each exon. we should parse
226 # this information here
228 for (my $ec = 1; $ec <= $exoncount; $ec++) {
229 if (defined($_ = $self->_readline())) {
232 if (/^Exon\s$ec[\(\)-]*:\s(\d+)-(\d+)\s\(gen\)\s+(\d+)-(\d+)\s\(mRNA\)\s+id\s([\d\.inf-]+)%\s+mismatches\s(\d+)\s+gaps\s(\d+)\s+splice\ssite\s\(d\s+a\):\s(\d+)\s+(\d+)\s*(\w*)/) {
241 $splice_acceptor = $9;
244 $self->throw( "Failed to match anything:\n$_\n");
247 my $exon = Bio
::Tools
::Spidey
::Exon
->new
248 (-start
=> $genomic_start,
249 -end
=> $genomic_stop,
251 $exon->seq_id($seq1props{'seqname'});
253 # feature1 is supposed to be initialized to a Similarity object, but we provide a safety net
254 if ($exon->feature1->can('seqlength')) {
255 $exon->feature1->seqlength($seq1props{'length'});
257 $exon->feature1->add_tag_value('seqlength', $seq1props{'length'});
260 # create and initialize the feature wrapping the 'hit' (the cDNA)
261 my $fea2 = Bio
::SeqFeature
::Similarity
->new
262 (-start
=> $cdna_start,
265 -seq_id
=> $seq2props{'seqname'},
266 -primary
=> "aligning_cDNA");
267 $fea2->seqlength($seq2props{'length'});
269 $exon->est_hit($fea2);
272 $exon->source_tag($self->analysis_method());
273 $exon->percentage_id($5);
274 $exon->mismatches($6);
282 $self->throw("Unexpected end of file reached\n");
285 } elsif( /^Number of splice sites:\s+(\d+)/ ) {
286 $self->splicesites($1);
287 } elsif( /^mRNA coverage:\s+(\d+)%/ ) {
288 $self->est_coverage($1);
289 } elsif(/^overall percent identity:\s+([\d\.]+)%/ ) {
290 $self->overall_percentage_id($1);
291 } elsif(/^Missing mRNA ends:\s+(\w+)/ ) {
292 $self->missing_mrna_ends($1);
293 } elsif( /^Exon (\d+): (\d+)-(\d+) \(gen\)\s+(\d+)-(\d+) \(mRNA\)/ ) {
294 my ($exon_num, $gen_start, $gen_stop, $cdna_start, $cdna_stop);
300 } elsif( /No alignment found/ ) {
303 #$self->debug("unmatched $_\n");
307 # Exon 1: 36375798-36375691 (gen) 1-108 (mRNA)
310 # CCTCTTTTTCTTTGCAGGGTATATACCCAGTTACTTAGACAAGGATGAGCTATGTGTAGT
311 # | ||||||||||||||||||||||||||||||||||||||||||||||
312 # ATGTCAGGGTATATACCCAGTTACTTAGACAAGGATGAGCTATGTGTAGT
313 # M S G Y I P S Y L D K D E L C V V
316 # ATGTGGGGACAAAGCCACCGGATATCATTATCGCTGCATCACTTGTGAAGGTTGCAAGGT
317 # ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
318 # ATGTGGGGACAAAGCCACCGGATATCATTATCGCTGCATCACTTGTGAAGGTTGCAAG
319 # C G D K A T G Y H Y R C I T C E G C K
324 @exons ?
return \
@exons : return ;
330 Usage : $exonset = $spidey_result->parse_next_exonset;
331 print "Exons start at ", $exonset->start(),
332 "and end at ", $exonset->end(), "\n";
333 for $exon ($exonset->sub_SeqFeature()) {
336 Function: Parses the next alignment of the Spidey result file and returns the
337 set of exons as a container of features. The container is itself
338 a Bio::SeqFeature::Generic object, with the Bio::Tools::Spidey::Exon
339 objects as sub features. Start, end, and strand of the container
340 will represent the total region covered by the exons of this set.
342 See the documentation of parse_next_alignment() for further
343 reference about parsing and how the information is stored.
345 Returns : An Bio::SeqFeature::Generic object holding Bio::Tools::Spidey::Exon
346 objects as sub features.
355 # get the next array of exons
356 my $exons = $self->parse_next_alignment();
357 if( ! defined $exons ) {
358 $self->warn("No exons returned");
362 return Bio
::SeqFeature
::Generic
->new();
364 # create the container of exons as a feature object itself, with the
365 # data of the first exon for initialization
366 $exonset = Bio
::SeqFeature
::Generic
->new('-start' => $exons->[0]->start(),
367 '-end' => $exons->[-1]->end(),
368 '-strand' => $exons->[0]->strand(),
369 '-primary' => "ExonSet");
370 $exonset->source_tag($exons->[0]->source_tag());
371 $exonset->seq_id($exons->[0]->seq_id());
372 # now add all exons as sub features, with enabling EXPANsion of the region
374 foreach my $exon (@
$exons) {
375 $exonset->add_sub_SeqFeature($exon, 'EXPAND');
383 Usage : while($exonset = $spidey->next_feature()) {
386 Function: Does the same as L<next_exonset()>. See there for documentation of
387 the functionality. Call this method repeatedly until FALSE is
390 The returned object is actually a SeqFeatureI implementing object.
391 This method is required for classes implementing the
392 SeqAnalysisParserI interface, and is merely an alias for
393 next_exonset() at present.
396 Returns : A Bio::SeqFeature::Generic object.
402 my ($self,@args) = @_;
403 # even though next_exonset doesn't expect any args (and this method
404 # does neither), we pass on args in order to be prepared if this changes
406 return $self->next_exonset(@args);
409 =head2 genomic_dna_length
411 Title : genomic_dna_length
412 Usage : $spidey->genomic_dna_length();
413 Function: Returns the length of the genomic DNA used in this Spidey result
415 Returns : An integer value.
420 sub genomic_dna_length
{
421 my ($self, @args) = @_;
426 $self->{'genomic_dna_length'} = $val;
428 $val = $self->{'genomic_dna_length'};
436 Usage : $spidey->splicesites();
437 Function: Returns the number of splice sites found in this Spidey result
439 Returns : An integer value.
445 my ($self, @args) = @_;
450 $self->{'splicesites'} = $val;
452 $val = $self->{'splicesites'};
460 Usage : $spidey->est_coverage();
461 Function: Returns the percent of est coverage in this Spidey result
463 Returns : An integer value.
469 my ($self, @args) = @_;
474 $self->{'est_coverage'} = $val;
476 $val = $self->{'est_coverage'};
481 =head2 overall_percentage_id
483 Title : overall_percentage_id
484 Usage : $spidey->overall_percentage_id();
485 Function: Returns the overall percent id in this Spidey result
487 Returns : An float value.
492 sub overall_percentage_id
{
493 my ($self, @args) = @_;
498 $self->{'overall_percentage_id'} = $val;
500 $val = $self->{'overall_percentage_id'};
505 =head2 missing_mrna_ends
507 Title : missing_mrna_ends
508 Usage : $spidey->missing_mrna_ends();
509 Function: Returns left/right/neither from Spidey
511 Returns : A string value.
516 sub missing_mrna_ends
{
517 my ($self, @args) = @_;
522 $self->{'missing_mrna_ends'} = $val;
524 $val = $self->{'missing_mrna_ends'};