3 # BioPerl module for Bio::SeqFeature::Gene::Transcript
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Hilmar Lapp <hlapp@gmx.net>
9 # Copyright Hilmar Lapp
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::SeqFeature::Gene::Transcript - A feature representing a transcript
21 # See documentation of methods.
25 A feature representing a transcript.
31 User feedback is an integral part of the evolution of this and other
32 Bioperl modules. Send your comments and suggestions preferably to one
33 of the Bioperl mailing lists. Your participation is much appreciated.
35 bioperl-l@bioperl.org - General discussion
36 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
40 Please direct usage questions or support issues to the mailing list:
42 I<bioperl-l@bioperl.org>
44 rather than to the module maintainer directly. Many experienced and
45 reponsive experts will be able look at the problem and quickly
46 address it. Please include a thorough description of the problem
47 with code and data examples if at all possible.
51 Report bugs to the Bioperl bug tracking system to help us keep track
52 the bugs and their resolution. Bug reports can be submitted via the
55 http://bugzilla.open-bio.org/
57 =head1 AUTHOR - Hilmar Lapp
63 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
68 # Let the code begin...
70 package Bio
::SeqFeature
::Gene
::Transcript
;
76 use base
qw(Bio::SeqFeature::Generic Bio::SeqFeature::Gene::TranscriptI);
79 my ($caller, @args) = @_;
80 my $self = $caller->SUPER::new
(@args);
81 $self->_register_for_cleanup(\
&transcript_destroy
);
82 my ($primary) = $self->_rearrange([qw(PRIMARY)],@args);
84 $primary = 'transcript' unless $primary;
85 $self->primary_tag($primary);
86 $self->strand(0) if(! defined($self->strand()));
94 Usage : @proms = $transcript->promoters();
95 Function: Get the promoter features/sites of this transcript.
97 Note that OO-modeling of regulatory elements is not stable yet.
98 This means that this method might change or even disappear in a
99 future release. Be aware of this if you use it.
101 Returns : An array of Bio::SeqFeatureI implementing objects representing the
102 promoter regions or sites.
110 return $self->get_feature_type('Bio::SeqFeature::Gene::Promoter');
115 Title : add_promoter()
116 Usage : $transcript->add_promoter($feature);
117 Function: Add a promoter feature/site to this transcript.
120 Note that OO-modeling of regulatory elements is not stable yet.
121 This means that this method might change or even disappear in a
122 future release. Be aware of this if you use it.
125 Args : A Bio::SeqFeatureI implementing object.
131 my ($self, $fea) = @_;
132 $self->_add($fea,'Bio::SeqFeature::Gene::Promoter');
135 =head2 flush_promoters
137 Title : flush_promoters()
138 Usage : $transcript->flush_promoters();
139 Function: Remove all promoter features/sites from this transcript.
141 Note that OO-modeling of regulatory elements is not stable yet.
142 This means that this method might change or even disappear in a
143 future release. Be aware of this if you use it.
145 Returns : the removed features as a list
151 sub flush_promoters
{
153 return $self->_flush('Bio::SeqFeature::Gene::Promoter');
159 Usage : @exons = $gene->exons();
160 ($inital_exon) = $gene->exons('Initial');
161 Function: Get all exon features or all exons of specified type of this
164 Exon type is treated as a case-insensitive regular expression and
165 is optional. For consistency, use only the following types:
166 initial, internal, terminal.
168 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
169 Args : An optional string specifying the primary_tag of the feature.
175 my ($self, $type) = @_;
176 return $self->get_unordered_feature_type('Bio::SeqFeature::Gene::ExonI',
182 Title : exons_ordered
183 Usage : @exons = $gene->exons_ordered();
184 @exons = $gene->exons_ordered("Internal");
185 Function: Get an ordered list of all exon features or all exons of specified
186 type of this transcript.
188 Exon type is treated as a case-insensitive regular expression and
189 is optional. For consistency, use only the following types:
191 Returns : An array of Bio::SeqFeature::Gene::ExonI implementing objects.
192 Args : An optional string specifying the primary_tag of the feature.
197 my ($self,$type) = @_;
198 return $self->get_feature_type('Bio::SeqFeature::Gene::ExonI', $type);
204 Usage : $transcript->add_exon($exon,'initial');
205 Function: Add a exon feature to this transcript.
207 The second argument denotes the type of exon. Mixing exons with and
208 without a type is likely to cause trouble in exons(). Either
209 leave out the type for all exons or for none.
211 Presently, the following types are known: initial, internal,
212 terminal, utr, utr5prime, and utr3prime (all case-insensitive).
213 UTR should better be added through utrs()/add_utr().
215 If you wish to use other or additional types, you will almost
216 certainly have to call exon_type_sortorder() in order to replace
217 the default sort order, or mrna(), cds(), protein(), and exons()
218 may yield unexpected results.
221 Args : A Bio::SeqFeature::Gene::ExonI implementing object.
222 A string indicating the type of the exon (optional).
228 my ($self, $fea, $type) = @_;
229 if(! $fea->isa('Bio::SeqFeature::Gene::ExonI') ) {
230 $self->throw("$fea does not implement Bio::SeqFeature::Gene::ExonI");
232 $self->_add($fea,'Bio::SeqFeature::Gene::Exon', $type);
237 Title : flush_exons()
238 Usage : $transcript->flush_exons();
239 $transcript->flush_exons('terminal');
240 Function: Remove all or a certain type of exon features from this transcript.
242 See add_exon() for documentation about types.
244 Calling without a type will not flush UTRs. Call flush_utrs() for
246 Returns : the deleted features as a list
247 Args : A string indicating the type of the exon (optional).
253 my ($self, $type) = @_;
254 return $self->_flush('Bio::SeqFeature::Gene::Exon',$type);
260 Usage : @introns = $gene->introns();
261 Function: Get all intron features this gene structure.
263 Note that this implementation generates these features
264 on-the-fly, that is, it simply treats all regions between
265 exons as introns, assuming that exons do not overlap. A
266 consequence is that a consistent correspondence between the
267 elements in the returned array and the array that exons()
268 returns will exist only if the exons are properly sorted
269 within their types (forward for plus- strand and reverse
270 for minus-strand transcripts). To ensure correctness the
271 elements in the array returned will always be sorted.
273 Returns : An array of Bio::SeqFeature::Gene::Intron objects representing
283 my @exons = $self->exons();
284 my ($strand, $rev_order);
286 # if there's 1 or less exons we're done
287 return () unless($#exons > 0);
288 # record strand and order (a minus-strand transcript is likely to have
289 # the exons stacked in reverse order)
290 foreach my $exon (@exons) {
291 $strand = $exon->strand();
292 last if $strand; # we're done if we've got 1 or -1
294 $rev_order = ($exons[0]->end() < $exons[1]->start() ?
0 : 1);
296 # Make sure exons are sorted. Because we assume they don't overlap, we
297 # simply sort by start position.
298 if((! defined($strand)) || ($strand != -1) || (! $rev_order)) {
299 # always sort forward for plus-strand transcripts, and for negative-
300 # strand transcripts that appear to be unsorted or forward sorted
301 @exons = map { $_->[0] } sort { $a->[1] <=> $b->[1] }
302 map { [ $_, $_->start * ($_->strand || 1)] } @exons;
304 # sort in reverse order for transcripts on the negative strand and
305 # found to be in reverse order
306 @exons = map { $_->[0] } sort { $b->[1] <=> $a->[1] } map { [ $_, $_->start()] } @exons;
308 # loop over all intervening gaps
309 while ((my $exonA = shift (@exons)) &&(my $exonB = shift(@exons))){
310 my $intron = Bio
::SeqFeature
::Gene
::Intron
->new(-primary
=>'intron');
311 $intron->upstream_Exon($exonA);
312 $intron->downstream_Exon($exonB);
313 $intron->attach_seq($self->entire_seq) if $self->entire_seq;
314 unshift(@exons,$exonB);
315 push @introns,$intron;
322 Title : poly_A_site()
323 Usage : $polyAsite = $transcript->poly_A_site();
324 Function: Get/set the poly-adenylation feature/site of this transcript.
325 Returns : A Bio::SeqFeatureI implementing object representing the
326 poly-adenylation region.
327 Args : A Bio::SeqFeatureI implementing object on set, or FALSE to flush
328 a previously set object.
334 my ($self, $fea) = @_;
336 $self->_add($fea,'Bio::SeqFeature::Gene::Poly_A_site');
338 return ($self->get_feature_type('Bio::SeqFeature::Gene::Poly_A_site'))[0];
344 Usage : @utr_sites = $transcript->utrs('utr3prime');
345 @utr_sites = $transcript->utrs('utr5prime');
346 @utr_sites = $transcript->utrs();
347 Function: Get the features representing untranslated regions (UTR) of this
350 You may provide an argument specifying the type of UTR. Currently
351 the following types are recognized: utr5prime utr3prime for UTR on the
352 5' and 3' end of the CDS, respectively.
354 Returns : An array of Bio::SeqFeature::Gene::UTR objects
355 representing the UTR regions or sites.
356 Args : Optionally, either utr3prime, or utr5prime for the the type of UTR
363 my ($self, $type) = @_;
364 return $self->get_feature_type('Bio::SeqFeature::Gene::UTR',$type);
371 Usage : $transcript->add_utr($utrobj, 'utr3prime');
372 $transcript->add_utr($utrobj);
373 Function: Add a UTR feature/site to this transcript.
375 The second parameter is optional and denotes the type of the UTR
376 feature. Presently recognized types include 'utr5prime' and 'utr3prime'
377 for UTR on the 5' and 3' end of a gene, respectively.
379 Calling this method is the same as calling
380 add_exon($utrobj, 'utr'.$type). In this sense a UTR object is a
381 special exon object, which is transcribed, not spliced out, but
384 Note that the object supplied should return FALSE for is_coding().
385 Otherwise cds() and friends will become confused.
388 Args : A Bio::SeqFeature::Gene::UTR implementing object.
394 my ($self, $fea, $type) = @_;
395 $self->_add($fea,'Bio::SeqFeature::Gene::UTR',$type);
401 Usage : $transcript->flush_utrs();
402 $transcript->flush_utrs('utr3prime');
403 Function: Remove all or a specific type of UTR features/sites from this
406 Cf. add_utr() for documentation about recognized types.
407 Returns : a list of the removed features
408 Args : Optionally a string denoting the type of UTR feature.
414 my ($self, $type) = @_;
415 return $self->_flush('Bio::SeqFeature::Gene::UTR',$type);
418 =head2 sub_SeqFeature
420 Title : sub_SeqFeature
421 Usage : @feats = $transcript->sub_SeqFeature();
422 Function: Returns an array of all subfeatures.
424 This method is defined in Bio::SeqFeatureI. We override this here
425 to include the exon etc features.
427 Returns : An array Bio::SeqFeatureI implementing objects.
437 # get what the parent already has
438 @feas = $self->SUPER::sub_SeqFeature
();
439 # add the features we have in addition
440 push(@feas, $self->exons()); # this includes UTR features
441 push(@feas, $self->promoters());
442 push(@feas, $self->poly_A_site()) if($self->poly_A_site());
446 =head2 flush_sub_SeqFeature
448 Title : flush_sub_SeqFeature
449 Usage : $transcript->flush_sub_SeqFeature();
450 $transcript->flush_sub_SeqFeature(1);
451 Function: Removes all subfeatures.
453 This method is overridden from Bio::SeqFeature::Generic to flush
454 all additional subfeatures like exons, promoters, etc., which is
455 almost certainly not what you want. To remove only features added
456 through $transcript->add_sub_SeqFeature($feature) pass any
457 argument evaluating to TRUE.
461 Args : Optionally, an argument evaluating to TRUE will suppress flushing
462 of all transcript-specific subfeatures (exons etc.).
467 sub flush_sub_SeqFeature
{
468 my ($self,$fea_only) = @_;
470 $self->SUPER::flush_sub_SeqFeature
();
472 $self->flush_promoters();
473 $self->flush_exons();
475 $self->poly_A_site(0);
483 Usage : $seq = $transcript->cds();
484 Function: Returns the CDS (coding sequence) as defined by the exons
485 of this transcript and the attached sequence.
487 If no sequence is attached this method will return false.
489 Note that the implementation provided here returns a
490 concatenation of all coding exons, thereby assuming that
491 exons do not overlap.
493 Note also that you cannot set the CDS via this method. Set
494 a single CDS feature as a single exon, or derive your own
495 class if you want to store a predicted CDS.
498 Returns : A Bio::PrimarySeqI implementing object.
505 my @exons = $self->exons_ordered(); #this is always sorted properly according to strand
508 return unless(@exons);
509 # record strand (a minus-strand transcript must have the exons sorted in
511 foreach my $exon (@exons) {
512 if(defined($exon->strand()) && (! $strand)) {
513 $strand = $exon->strand();
515 if($exon->strand() && (($exon->strand() * $strand) < 0)) {
516 $self->throw("Transcript mixes coding exons on plus and minus ".
517 "strand. This makes no sense.");
520 my $cds = $self->_make_cds(@exons);
522 return Bio
::PrimarySeq
->new('-id' => $self->seq_id(),
524 '-alphabet' => "dna");
530 Usage : $protein = $transcript->protein();
531 Function: Get the protein encoded by the transcript as a sequence object.
533 The implementation provided here simply calls translate() on the
534 object returned by cds().
536 Returns : A Bio::PrimarySeqI implementing object.
547 return $seq->translate() if $seq;
554 Usage : $mrna = $transcript->mrna();
555 Function: Get the mRNA of the transcript as a sequence object.
557 The difference to cds() is that the sequence object returned by
558 this methods will also include UTR and the poly-adenylation site,
559 but not promoter sequence (TBD).
561 HL: do we really need this method?
563 Returns : A Bio::PrimarySeqI implementing object.
571 my ($seq, $mrna, $elem);
573 # get the coding part
576 $seq = Bio
::PrimarySeq
->new('-id' => $self->seq_id(),
577 '-alphabet' => "rna",
580 # get and add UTR sequences
582 foreach $elem ($self->utrs('utr5prime')) {
583 $mrna .= $elem->seq()->seq();
585 $seq->seq($mrna . $seq->seq());
587 foreach $elem ($self->utrs('utr3prime')) {
588 $mrna .= $elem->seq()->seq();
590 $seq->seq($seq->seq() . $mrna);
591 if($self->poly_A_site()) {
592 $seq->seq($seq->seq() . $self->poly_A_site()->seq()->seq());
594 return if($seq->length() == 0);
598 sub _get_typed_keys
{
599 my ($self, $keyprefix, $type) = @_;
603 # make case-insensitive
604 $type = ($type ?
lc($type) : "");
605 # pull out all feature types that exist and match
606 @keys = grep { /^_$keyprefix$type/i; } (keys(%{$self}));
611 my ($self,@exons) = @_;
614 foreach my $exon (@exons) {
615 next if((! defined($exon->seq())) || (! $exon->is_coding()));
616 my $phase = length($cds) % 3;
617 # let's check the simple case
618 if((! defined($exon->frame())) || ($phase == $exon->frame())) {
619 # this one fits exactly, or frame of the exon is undefined (should
620 # we warn about that?); we bypass the $exon->cds() here (hmm,
621 # not very clean style, but I don't see where this screws up)
622 $cds .= $exon->seq()->seq();
624 # this one is probably from exon shuffling and needs some work
625 my $seq = $exon->cds(); # now $seq is guaranteed to be in frame 0
630 # how many Ns can we chop off the piece to be added?
632 if($seq =~ /^(n+)/i) {
633 $n_crop = length($1);
635 if($n_crop >= $phase) {
636 # chop off to match the phase
637 $seq = substr($seq, $phase);
640 $seq = ("n" x
(3-$phase)) . $seq;
652 Usage : my @features=$transcript->features;
653 Function: returns all the features associated with this transcript
654 Returns : a list of SeqFeatureI implementing objects
663 return grep { defined } @
{$self->{'_features'} || []};
666 =head2 features_ordered
668 Title : features_ordered
669 Usage : my @features=$transcript->features_ordered;
670 Function: returns all the features associated with this transcript,
671 in order by feature start, according to strand
672 Returns : a list of SeqFeatureI implementing objects
678 sub features_ordered
{
680 return $self->_stranded_sort(@
{$self->{'_features'} || []});
684 sub get_unordered_feature_type
{
685 my ($self, $type, $pri)=@_;
687 foreach ( $self->features) {
688 if ($_->isa($type)) {
689 if ($pri && $_->primary_tag !~ /$pri/i) {
699 sub get_feature_type
{
701 return $self->_stranded_sort($self->get_unordered_feature_type(@_));
704 #This was fixed by Gene Cutler - the indexing on the list being reversed
705 #fixed a bad bug. Thanks Gene!
707 my ($self, $type, $pri)=@_;
708 my @list=$self->features;
710 for (reverse (0..$#list)) {
711 if (defined $list[$_] &&
712 $list[$_]->isa($type)) {
713 if ($pri && $list[$_]->primary_tag !~ /$pri/i) {
716 push @cut, splice @list, $_, 1; #remove the element of $type from @list
717 #and return each of them in @cut
720 $self->{'_features'}=\
@list;
725 my ($self, $fea, $type, $pri)=@_;
726 require Bio
::SeqFeature
::Gene
::Promoter
;
727 require Bio
::SeqFeature
::Gene
::UTR
;
728 require Bio
::SeqFeature
::Gene
::Exon
;
729 require Bio
::SeqFeature
::Gene
::Intron
;
730 require Bio
::SeqFeature
::Gene
::Poly_A_site
;
732 if(! $fea->isa('Bio::SeqFeatureI') ) {
733 $self->throw("$fea does not implement Bio::SeqFeatureI");
735 if(! $fea->isa($type) || $pri) {
736 $fea=$self->_new_of_type($fea,$type,$pri);
738 if (! $self->strand) {
739 $self->strand($fea->strand);
741 if ($self->strand * $fea->strand == -1) {
742 $self->throw("$fea is on opposite strand from $self");
746 $self->_expand_region($fea);
747 if(defined($self->entire_seq()) && (! defined($fea->entire_seq())) &&
748 $fea->can('attach_seq')) {
749 $fea->attach_seq($self->entire_seq());
751 if (defined $self->parent) {
752 $self->parent->_expand_region($fea);
754 push(@
{$self->{'_features'}}, $fea);
761 foreach my $fea (@list) {
764 $strand = $fea->strand() if(! $strand);
765 if(($fea->strand() * $strand) < 0) {
771 if (defined $strand && $strand == - 1) { #reverse strand
772 return map { $_->[0] } sort {$b->[1] <=> $a->[1]} map { [$_, $_->start] } @list;
773 } else { #undef or forward strand
774 return map { $_->[0] } sort {$a->[1] <=> $b->[1]} map { [$_, $_->start] } @list;
779 my ($self, $fea, $type, $pri)= @_;
782 $primary = $pri; #can set new primary tag if desired
784 ($primary) = $type =~ /.*::(.+)/; #or else primary is just end of type string
787 $fea->primary_tag($primary);
791 sub transcript_destroy
{
793 # We're going to be really explicit to insure memory leaks
795 foreach my $f ( $self->features ) {
798 $self->parent(undef);