tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / SeqFeature / Gene / Intron.pm
blobe7bdc2e4195d102fcdea0cd4c43b2fbc3f3eb171
1 # $Id$
3 # BioPerl module for Bio::SeqFeature::Gene::Intron
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by David Block <dblock@gene.pbi.nrc.ca>
9 # Copyright David Block
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::SeqFeature::Gene::Intron - An intron feature
19 =head1 SYNOPSIS
21 Give standard usage here
23 =head1 DESCRIPTION
25 Describe the object here
27 =head1 FEEDBACK
29 =head2 Mailing Lists
31 User feedback is an integral part of the evolution of this and other
32 Bioperl modules. Send your comments and suggestions preferably to
33 the Bioperl mailing list. Your participation is much appreciated.
35 bioperl-l@bioperl.org - General discussion
36 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
38 =head2 Support
40 Please direct usage questions or support issues to the mailing list:
42 I<bioperl-l@bioperl.org>
44 rather than to the module maintainer directly. Many experienced and
45 reponsive experts will be able look at the problem and quickly
46 address it. Please include a thorough description of the problem
47 with code and data examples if at all possible.
49 =head2 Reporting Bugs
51 Report bugs to the Bioperl bug tracking system to help us keep track
52 of the bugs and their resolution. Bug reports can be submitted via the
53 web:
55 http://bugzilla.open-bio.org/
57 =head1 AUTHOR - David Block
59 Email dblock@gene.pbi.nrc.ca
61 =head1 APPENDIX
63 The rest of the documentation details each of the object methods.
64 Internal methods are usually preceded with a _
66 =cut
69 # Let the code begin...
72 package Bio::SeqFeature::Gene::Intron;
73 use strict;
75 use Bio::SeqFeature::Gene::Exon;
77 use base qw(Bio::SeqFeature::Gene::NC_Feature);
79 sub new {
80 my($class,@args) = @_;
82 # introns are non-coding by default
83 if(! grep { lc($_) eq '-is_coding'; } @args) {
84 push(@args, '-is_coding', 0);
86 my $self = $class->SUPER::new(@args);
88 my ($primary, $prim) =
89 $self->_rearrange([qw(PRIMARY PRIMARY_TAG)],@args);
90 $self->primary_tag('intron') unless $primary || $prim;
92 return $self;
95 =head2 upstream_Exon
97 Title : upstream_Exon
98 Usage : $intron->upstream_Exon()
99 Function: exon upstream of the intron
100 Returns : Bio::EnsEMBL::Exon
101 Args :
103 =cut
105 sub upstream_Exon {
106 my( $self, $exon ) = @_;
108 if ($exon) {
109 $self->{'_intron_location'} = undef;
110 $self->throw("'$exon' is not a Bio::SeqFeature::Gene::ExonI")
111 unless $exon->isa('Bio::SeqFeature::Gene::ExonI');
112 $self->{'_upstream_exon'} = $exon;
114 return $self->{'_upstream_exon'};
118 =head2 downstream_Exon
120 Title : downstream_Exon
121 Usage : $intron->downstream_Exon()
122 Function: exon downstream of the intron
123 Returns : Bio::EnsEMBL::Exon
124 Args :
126 =cut
128 sub downstream_Exon {
129 my( $self, $exon ) = @_;
131 if ($exon) {
132 $self->{'_intron_location'} = undef;
133 $self->throw("'$exon' is not a Bio::SeqFeature::Gene::ExonI")
134 unless $exon->isa('Bio::SeqFeature::Gene::ExonI');
135 $self->{'_downstream_exon'} = $exon;
137 return $self->{'_downstream_exon'};
140 =head2 phase
142 Title : phase
143 Usage : $intron->phase()
144 Function: returns the phase of the intron(where it interrupts the codon)
145 Returns : int(0,1,2)
146 Args :
148 =cut
150 sub phase {
151 my ($self) = @_;
152 return $self->downstream_Exon->phase;
156 =head2 acceptor_splice_site
158 Title : acceptor_splice_site
159 Usage : $intron->acceptor_splice_site(21,3)
160 Function: returns the sequence corresponding to the
161 consensus acceptor splice site. If start and
162 end are provided, it will number of base pairs
163 left and right of the canonical AG. Here 21 means
164 21 bp into intron and 3 means 3 bp into the exon.
165 --Intron--21----|AG|-3-----Exon
166 Defaults to 21,3
168 Returns : Bio::Seq
169 Args : start and end
171 =cut
173 sub acceptor_splice_site {
174 my ($self,$ss_start,$ss_end) = @_;
175 $ss_start = 21 unless defined $ss_start;
176 $ss_end = 3 unless defined $ss_end;
177 if($self->strand < 0){
178 my $tmp= $ss_start;
179 $ss_start = $ss_end;
180 $ss_end = $tmp;
182 my $intron_end= $self->location->end;
183 my $down_exon = $self->downstream_Exon;
184 my $acceptor;
185 if($self->strand < 0){
186 $ss_start= $ss_start > $down_exon->length ? $down_exon->length: $ss_start;
187 $ss_end= $ss_end > $self->length-2 ? $self->length-2 : $ss_end;
188 $acceptor = Bio::SeqFeature::Generic->new(-start=>$self->start - ($ss_start) ,
189 -end=>$self->start + ($ss_end+1),
190 -strand=>$self->strand,
191 -primary_tag=>"donor splice site");
193 else {
194 $ss_start = $ss_start > $self->length-2 ? $self->length-2 : $ss_start;
195 $ss_end = $ss_end > $down_exon->length ? $down_exon->length : $ss_end;
198 $acceptor = Bio::SeqFeature::Generic->new(-start=>$self->end - ($ss_start + 1),
199 -end=>$self->end + $ss_end,
200 -strand=>$self->strand,
201 -primary_tag=>"donor splice site");
203 $acceptor->attach_seq($self->entire_seq);
205 return $acceptor;
209 =head2 donor_splice_site
211 Title : donor_splice_site
212 Usage : $intron->donor_splice_site(3,6)
213 Function: returns the sequence corresponding to the
214 consensus donor splice site. If start and
215 end are provided, it will number of base pairs
216 left and right of the canonical GT. Here 3 means
217 3 bp into exon and 6 means 6 bp into the intron.
218 --Exon-3--|GT|-6----Intron-
219 Defaults to 3,6
221 Returns : Bio::Seq
222 Args : start and end
224 =cut
226 sub donor_splice_site {
227 my ($self,$ss_start,$ss_end) = @_;
228 $ss_start = 3 unless defined $ss_start;
229 $ss_end = 10 unless defined $ss_end;
230 if($self->strand < 0){
231 my $tmp= $ss_start;
232 $ss_start = $ss_end;
233 $ss_end = $tmp;
235 my $up_exon = $self->upstream_Exon;
236 my $donor;
237 if($self->strand < 0){
238 $ss_end = $ss_end > $up_exon->length ? $up_exon->length : $ss_end;
239 $ss_start = $ss_start> $self->length -2 ? $self->length -2 : $ss_start;
240 $donor = Bio::SeqFeature::Generic->new(-start=>$self->end - ($ss_start+1),
241 -end => $self->end + ($ss_end),
242 -strand=>$self->strand,
243 -primary_tag=>"acceptor splice site");
245 else {
246 $ss_start = $ss_start > $up_exon->length ? $up_exon->length : $ss_start;
247 $ss_end = $ss_end > $self->length -2 ? $self->length -2 : $ss_end;
248 $donor = Bio::SeqFeature::Generic->new(-start=>$self->start - $ss_start,
249 -end => $self->start +($ss_end+1),
250 -strand=>$self->strand,
251 -primary_tag=>"acceptor splice site");
253 $donor->attach_seq($self->entire_seq);
254 return $donor;
257 sub location {
258 my( $self ) = @_;
260 unless ($self->{'_intron_location'}) {
261 my $loc = Bio::Location::Simple->new;
263 my $up_exon = $self->upstream_Exon;
264 my $down_exon = $self->downstream_Exon;
266 # Get the PrimarySeqs attached to both and check it is the same sequence
267 my $up_seq = $up_exon ->entire_seq;
268 my $down_seq = $down_exon->entire_seq;
269 unless (ref($up_seq) eq ref($down_seq) ) {
270 $self->throw("upstream and downstream exons are attached to different sequences\n'$up_seq' and '$down_seq'");
273 # Check that the exons are on the same strand. (Do I need to bother?)
274 my $up_strand = $up_exon ->strand;
275 my $down_strand = $down_exon->strand;
276 unless ($up_strand == $down_strand) {
277 $self->throw("upstream and downstream exons are on different strands "
278 . "('$up_strand' and '$down_strand')");
280 $loc->strand($up_strand);
282 # $exon_end is the end of the exon which is 5' of the intron on the genomic sequence.
283 # $exon_start is the start of the exon which is 3' of the intron on the genomic sequence.
284 my( $exon_end, $exon_start );
285 if ($up_strand == 1) {
286 $exon_end = $up_exon ->end;
287 $exon_start = $down_exon->start;
288 } else {
289 $exon_end = $down_exon->end;
290 $exon_start = $up_exon ->start;
292 unless ($exon_end < $exon_start) {
293 $self->throw("Intron gap begins after '$exon_end' and ends before '$exon_start'");
295 $loc->start($exon_end + 1);
296 $loc->end ($exon_start - 1);
298 # Attach the sequence and location objects to the intron
299 $self->{'_intron_location'} = $loc;
302 return $self->{'_intron_location'};