sync w/ main trunk
[bioperl-live.git] / Bio / SeqFeature / Gene / GeneStructure.pm
blob1bf1c5113a29d10027e38eed155eb10d1c93d733
1 # $Id$
3 # BioPerl module for Bio::SeqFeature::Gene::GeneStructure
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Hilmar Lapp <hlapp@gmx.net>
9 # Copyright Hilmar Lapp
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::SeqFeature::Gene::GeneStructure - A feature representing an arbitrarily complex structure of a gene
19 =head1 SYNOPSIS
21 # See documentation of methods.
23 =head1 DESCRIPTION
25 A feature representing a gene structure. As of now, a gene structure
26 really is only a collection of transcripts. See
27 L<Bio::SeqFeature::Gene::TranscriptI> (interface) and
28 L<Bio::SeqFeature::Gene::Transcript> (implementation) for the features
29 of such objects.
31 =head1 FEEDBACK
33 =head2 Mailing Lists
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to one
37 of the Bioperl mailing lists. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42 =head2 Support
44 Please direct usage questions or support issues to the mailing list:
46 L<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
53 =head2 Reporting Bugs
55 Report bugs to the Bioperl bug tracking system to help us keep track
56 the bugs and their resolution. Bug reports can be submitted via the
57 web:
59 http://bugzilla.open-bio.org/
61 =head1 AUTHOR - Hilmar Lapp
63 Email hlapp-at-gmx.net
65 =head1 APPENDIX
67 The rest of the documentation details each of the object
68 methods. Internal methods are usually preceded with a _
70 =cut
73 # Let the code begin...
76 package Bio::SeqFeature::Gene::GeneStructure;
77 use vars qw($WeakRefs);
78 use strict;
80 BEGIN {
81 eval "use Scalar::Util qw(weaken);";
82 if ($@) {
83 $Bio::SeqFeature::Gene::GeneStructure::WeakRefs = 0;
84 } else { $Bio::SeqFeature::Gene::GeneStructure::WeakRefs = 1; }
88 use base qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::GeneStructureI);
91 sub new {
92 my ($caller, @args) = @_;
93 my $self = $caller->SUPER::new(@args);
94 $self->_register_for_cleanup(\&gene_cleanup);
95 my ($primary) =
96 $self->_rearrange([qw(PRIMARY
97 )],@args);
99 $primary = 'genestructure' unless $primary;
100 $self->primary_tag($primary);
101 $self->strand(0) if(! defined($self->strand()));
102 return $self;
105 =head2 transcripts
107 Title : transcripts
108 Usage : @transcripts = $gene->transcripts();
109 Function: Get the transcripts of this gene structure. Many gene structures
110 will have only one transcript.
112 Returns : An array of Bio::SeqFeature::Gene::TranscriptI implementing objects.
113 Args :
116 =cut
118 sub transcripts {
119 return @{shift->{'_transcripts'} || []};
122 =head2 add_transcript
124 Title : add_transcript()
125 Usage : $gene->add_transcript($transcript);
126 Function: Add a transcript to this gene structure.
127 Returns :
128 Args : A Bio::SeqFeature::Gene::TranscriptI implementing object.
131 =cut
133 sub add_transcript {
134 my ($self, $fea) = @_;
136 if(!$fea || ! $fea->isa('Bio::SeqFeature::Gene::TranscriptI') ) {
137 $self->throw("$fea does not implement Bio::SeqFeature::Gene::TranscriptI");
139 unless( exists $self->{'_transcripts'} ) {
140 $self->{'_transcripts'} = [];
142 $self->_expand_region($fea);
143 if( $Bio::SeqFeature::Gene::GeneStructure::WeakRefs ) {
144 $fea->parent(weaken $self);
145 } else {
146 $fea->parent($self);
148 push(@{$self->{'_transcripts'}}, $fea);
151 =head2 flush_transcripts
153 Title : flush_transcripts()
154 Usage : $gene->flush_transcripts();
155 Function: Remove all transcripts from this gene structure.
156 Returns :
157 Args :
160 =cut
162 sub flush_transcripts {
163 my ($self) = @_;
164 if( defined $self->{'_transcripts'} ) {
165 foreach my $t ( grep {defined} @{$self->{'_transcripts'} || []} ) {
166 $t->parent(undef); # remove bkwds pointers
167 $t = undef;
169 delete($self->{'_transcripts'});
173 =head2 add_transcript_as_features
175 Title : add_transcript_as_features
176 Usage : $gene->add_transcript_as_features(@featurelist);
177 Function: take a list of Bio::SeqFeatureI objects and turn them into a
178 Bio::SeqFeature::Gene::Transcript object. Add that transcript to the gene.
179 Returns : nothing
180 Args : a list of Bio::SeqFeatureI compliant objects
183 =cut
185 sub add_transcript_as_features {
186 my ($self,@features) = @_;
187 my $transcript=Bio::SeqFeature::Gene::Transcript->new;
188 foreach my $fea (@features) {
189 if ($fea->primary_tag =~ /utr/i) { #UTR / utr/ 3' utr / utr5 etc.
190 $transcript->add_utr($fea);
191 } elsif ($fea->primary_tag =~ /promot/i) { #allow for spelling differences
192 $transcript->add_promoter($fea);
193 } elsif ($fea->primary_tag =~ /poly.*A/i) { #polyA, POLY_A, etc.
194 $transcript->poly_A_site($fea);
195 } else { #assume the rest are exons
196 $transcript->add_exon($fea);
199 $self->add_transcript($transcript);
203 =head2 promoters
205 Title : promoters
206 Usage : @prom_sites = $gene->promoters();
207 Function: Get the promoter features of this gene structure.
209 This method basically merges the promoters returned by transcripts.
211 Note that OO-modeling of regulatory elements is not stable yet.
212 This means that this method might change or even disappear in a
213 future release. Be aware of this if you use it.
215 Returns : An array of Bio::SeqFeatureI implementing objects.
216 Args :
219 =cut
221 sub promoters {
222 my ($self) = @_;
223 my @transcripts = $self->transcripts();
224 my @feas = ();
226 foreach my $tr (@transcripts) {
227 push(@feas, $tr->promoters());
229 return @feas;
233 =head2 exons
235 Title : exons()
236 Usage : @exons = $gene->exons();
237 @inital_exons = $gene->exons('Initial');
238 Function: Get all exon features or all exons of a specified type of this gene
239 structure.
241 Exon type is treated as a case-insensitive regular expression and
242 optional. For consistency, use only the following types:
243 initial, internal, terminal, utr, utr5prime, and utr3prime.
244 A special and virtual type is 'coding', which refers to all types
245 except utr.
247 This method basically merges the exons returned by transcripts.
249 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
250 Args : An optional string specifying the type of exon.
253 =cut
255 sub exons {
256 my ($self, @args) = @_;
257 my @transcripts = $self->transcripts();
258 my @feas = ();
260 foreach my $tr (@transcripts) {
261 push(@feas, $tr->exons(@args));
263 return @feas;
266 =head2 introns
268 Title : introns()
269 Usage : @introns = $gene->introns();
270 Function: Get all introns of this gene structure.
272 Note that this class currently generates these features on-the-fly,
273 that is, it simply treats all regions between exons as introns.
274 It assumes that the exons in the transcripts do not overlap.
276 This method basically merges the introns returned by transcripts.
278 Returns : An array of Bio::SeqFeatureI implementing objects.
279 Args :
282 =cut
284 sub introns {
285 my ($self) = @_;
286 my @transcripts = $self->transcripts();
287 my @feas = ();
289 foreach my $tr (@transcripts) {
290 push(@feas, $tr->introns());
292 return @feas;
295 =head2 poly_A_sites
297 Title : poly_A_sites()
298 Usage : @polyAsites = $gene->poly_A_sites();
299 Function: Get the poly-adenylation sites of this gene structure.
301 This method basically merges the poly-adenylation sites returned by
302 transcripts.
304 Returns : An array of Bio::SeqFeatureI implementing objects.
305 Args :
308 =cut
310 sub poly_A_sites {
311 my ($self) = @_;
312 my @transcripts = $self->transcripts();
313 my @feas = ();
315 foreach my $tr (@transcripts) {
316 push(@feas, $tr->poly_A_site());
318 return @feas;
321 =head2 utrs
323 Title : utrs()
324 Usage : @utr_sites = $gene->utrs('3prime');
325 @utr_sites = $gene->utrs('5prime');
326 @utr_sites = $gene->utrs();
327 Function: Get the features representing untranslated regions (UTR) of this
328 gene structure.
330 You may provide an argument specifying the type of UTR. Currently
331 the following types are recognized: 5prime 3prime for UTR on the
332 5' and 3' end of the CDS, respectively.
334 This method basically merges the UTRs returned by transcripts.
336 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects
337 representing the UTR regions or sites.
338 Args : Optionally, either 3prime, or 5prime for the the type of UTR
339 feature.
342 =cut
344 sub utrs {
345 my ($self,@args) = @_;
346 my @transcripts = $self->transcripts();
347 my @feas = ();
349 foreach my $tr (@transcripts) {
350 push(@feas, $tr->utrs(@args));
352 return @feas;
355 =head2 sub_SeqFeature
357 Title : sub_SeqFeature
358 Usage : @feats = $gene->sub_SeqFeature();
359 Function: Returns an array of all subfeatures.
361 This method is defined in Bio::SeqFeatureI. We override this here
362 to include the transcripts.
364 Returns : An array Bio::SeqFeatureI implementing objects.
365 Args : none
368 =cut
370 sub sub_SeqFeature {
371 my ($self) = @_;
372 my @feas = ();
374 # get what the parent already has
375 @feas = $self->SUPER::sub_SeqFeature();
376 push(@feas, $self->transcripts());
377 return @feas;
380 =head2 flush_sub_SeqFeature
382 Title : flush_sub_SeqFeature
383 Usage : $gene->flush_sub_SeqFeature();
384 $gene->flush_sub_SeqFeature(1);
385 Function: Removes all subfeatures.
387 This method is overridden from Bio::SeqFeature::Generic to flush
388 all additional subfeatures, i.e., transcripts, which is
389 almost certainly not what you want. To remove only features added
390 through $gene->add_sub_SeqFeature($feature) pass any
391 argument evaluating to TRUE.
393 Example :
394 Returns : none
395 Args : Optionally, an argument evaluating to TRUE will suppress flushing
396 of all gene structure-specific subfeatures (transcripts).
399 =cut
401 sub flush_sub_SeqFeature {
402 my ($self,$fea_only) = @_;
404 $self->SUPER::flush_sub_SeqFeature();
405 if(! $fea_only) {
406 $self->flush_transcripts();
410 sub gene_cleanup {
411 my $self = shift;
412 $self->flush_transcripts;