sync w/ main trunk
[bioperl-live.git] / Bio / LiveSeq / Translation.pm
blobd386319cbd5c87f403c349a3df4a1352326bc049
1 # $Id$
3 # bioperl module for Bio::LiveSeq::Translation
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
9 # Copyright Joseph Insana
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::LiveSeq::Translation - Translation class for LiveSeq
19 =head1 SYNOPSIS
21 #documentation needed
23 =head1 DESCRIPTION
25 This stores informations about aminoacids translations of transcripts.
26 The implementation is that a Translation object is the translation of
27 a Transcript object, with different possibilities of manipulation,
28 different coordinate system and eventually its own ranges (protein domains).
30 =head1 AUTHOR - Joseph A.L. Insana
32 Email: Insana@ebi.ac.uk, jinsana@gmx.net
34 =head1 APPENDIX
36 The rest of the documentation details each of the object
37 methods. Internal methods are usually preceded with a _
39 =cut
41 # Let the code begin...
43 package Bio::LiveSeq::Translation;
45 use strict;
46 #use Carp qw(croak carp cluck);
47 use Bio::LiveSeq::SeqI; # uses SeqI, inherits from it
48 use Bio::PrimarySeq;
49 use base qw(Bio::LiveSeq::Transcript);
52 =head2 new
54 Title : new
55 Usage : $protein = Bio::LiveSeq::Translation->new(-transcript => $transcr);
57 Function: generates a new Bio::LiveSeq::Translation
58 Returns : reference to a new object of class Translation
59 Errorcode -1
60 Args : reference to an object of class Transcript
62 =cut
64 sub new {
65 my ($thing, %args) = @_;
66 my $class = ref($thing) || $thing;
67 my ($obj,%translation);
69 my $transcript=$args{-transcript};
71 $obj = \%translation;
72 $obj = bless $obj, $class;
74 unless ($transcript) {
75 $obj->throw("$class not initialised because no -transcript given");
77 unless (ref($transcript) eq "Bio::LiveSeq::Transcript") {
78 $obj->throw("$class not initialised because no object of class Transcript given");
81 #my $startbase = $transcript->start;
82 #my $endbase = $transcript->end;
83 my $strand = $transcript->strand;
84 my $seq = $transcript->{'seq'};
86 $obj->{'strand'}=$strand;
87 $obj->{'seq'}=$seq;
88 $obj->{'transcript'}=$transcript;
89 $obj->{'alphabet'}="protein";
91 $transcript->{'translation'}=$obj;# set the Translation ref into its Transcript
92 return $obj;
95 =head2 get_Transcript
97 Title : valid
98 Usage : $transcript = $obj->get_Transcript()
99 Function: retrieves the reference to the object of class Transcript (if any)
100 attached to a LiveSeq object
101 Returns : object reference
102 Args : none
104 =cut
106 sub get_Transcript {
107 my $self=shift;
108 return ($self->{'transcript'});
111 # These get redefined here, overriding the SeqI ones
113 sub change {
114 my ($self)=@_;
115 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
116 return (-1);
118 sub positionchange {
119 my ($self)=@_;
120 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
121 return (-1);
123 sub labelchange {
124 my ($self)=@_;
125 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
126 return (-1);
129 # this just returns the translation of the transcript, without checking for
130 # stop codons
131 sub transl_seq {
132 my $self=shift;
133 my $transcript=$self->get_Transcript;
134 my $translation=$transcript->translate(undef, undef, undef,
135 $self->translation_table)->seq;
136 return $translation;
139 # version 1.74 -> now the "*" is printed
140 sub seq {
141 my $self=shift;
142 my $proteinseq;
143 my $transcript=$self->get_Transcript;
144 my $translation=$transcript->translate(undef, undef, undef,
145 $self->translation_table)->seq;
146 my $stop_pos=index($translation,"*");
147 if ($stop_pos == -1) { # no stop present, continue downstream
148 my $downstreamseq=$transcript->downstream_seq();
149 #carp "the downstream is: $downstreamseq"; # debug
150 my $cdnaseq=$transcript->seq();
151 my $extendedseq = Bio::PrimarySeq->new(-seq => "$cdnaseq$downstreamseq",
152 -alphabet => 'dna'
155 $translation=$extendedseq->translate(undef, undef, undef,
156 $self->translation_table)->seq;
157 #carp "the new translation is: $translation"; # debug
158 $stop_pos=index($translation,"*");
159 if ($stop_pos == -1) { # still no stop present, return warning
160 $self->warn("Warning: no stop codon found in the retrieved sequence downstream of Transcript ",1);
161 undef $stop_pos;
162 $proteinseq=$translation;
163 } else {
164 $proteinseq=substr($translation,0,$stop_pos+1);
165 #carp "the new stopped translation is: $proteinseq, because the stop is at position $stop_pos"; # debug
167 } else {
168 $proteinseq=substr($translation,0,$stop_pos+1);
170 return $proteinseq;
173 sub length {
174 my $self=shift;
175 my $seq=$self->seq;
176 my $length=length($seq);
177 return $length;
180 sub all_labels {
181 my $self=shift;
182 return $self->get_Transcript->all_labels;
185 # counts in triplet. Only a label matching the beginning of a triplet coding
186 # for an aminoacid is considered valid when setting coordinate_start
187 # (i.e. only in frame!)
188 sub valid {
189 my ($self,$label)=@_;
190 my $i;
191 my @labels=$self->get_Transcript->all_labels;
192 my $length=$#labels;
193 while ($i <= $length) {
194 if ($label == $labels[$i]) {
195 return (1); # found
197 $i=$i+3;
199 return (0); # not found
202 # returns the label to the first nucleotide of the triplet coding for $position aminoacid
203 sub label {
204 my ($self,$position)=@_;
205 my $firstlabel=$self->coordinate_start; # this is in_frame checked
206 if ($position > 0) {
207 $position=$position*3-2;
208 } else { # if position = 0 this will be caught by Transcript, error thrown
209 $position=$position*3;
211 return $self->get_Transcript->label($position,$firstlabel);
212 # check for coord_start different
215 # returns position (aminoacids numbering) of a particular label
216 # used to return 0 for not in frame labels
217 # now returns the position anyway (after version 1.66)
218 sub position {
219 my ($self,$label)=@_;
220 my $firstlabel=$self->coordinate_start; # this is in_frame checked
221 my $position=$self->get_Transcript->position($label,$firstlabel);
222 use integer;
223 my $modulus=$position % 3;
224 if ($position == 0) {
225 return (0);
226 } elsif ($position > 0) {
227 if ($modulus != 1) {
228 $self->warn("Attention! Label $label is not in frame ".
229 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
230 if ($modulus == 2) {
231 return ($position / 3 + 1);
232 } else { # i.e. modulus == 0
233 return ($position / 3);
236 return ($position / 3 + 1);
237 } else { # pos < 0
238 if ($modulus != 0) {
239 $self->warn("Attention! Label $label is not in frame ".
240 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
241 return ($position / 3 - 1); # ok for both other positions
243 return ($position / 3);
245 $self->throw( "WEIRD: execution shouldn't have reached here");
246 return (0); # this should never happen, but just in case
249 # note: it inherits subseq and labelsubseq from Transcript!
251 sub start {
252 my $self=shift;
253 return ($self->{'transcript'}->start);
256 sub end {
257 my $self=shift;
258 return ($self->{'transcript'}->end);
261 =head2 aa_ranges
263 Title : aa_ranges
264 Usage : @proteinfeatures = $translation->aa_ranges()
265 Function: to retrieve all the LiveSeq AARange objects attached to a
266 Translation, usually created out of a SwissProt database entry
267 crossreferenced from an EMBL CDS feature.
268 Returns : an array
269 Args : none
271 =cut
273 # returns an array of obj_ref of AARange objects attached to the Translation
274 sub aa_ranges {
275 my $self=shift;
276 return ($self->{'aa_ranges'});
279 sub translation_table {
280 my $self=shift;
281 $self->get_Transcript->translation_table(@_);
284 # returns all aminoacids "affected" i.e. all aminoacids coded by any codon
285 # "touched" by the range selected between the labels, even if only partially.
287 # it's not optimized for performance but it's useful
289 sub labelsubseq {
290 my ($self,$start,$length,$end)=@_;
291 my ($pos1,$pos2);
292 my $transcript=$self->get_Transcript;
293 if ($start) {
294 unless ($transcript->valid($start)) {
295 $self->warn("Start label not valid"); return (-1);
297 $pos1=$self->position($start);
299 if ($end) {
300 if ($end == $start) {
301 $length=1;
302 } else {
303 unless ($transcript->valid($end)) {
304 $self->warn("End label not valid"); return (-1);
306 unless ($transcript->follows($start,$end) == 1) {
307 $self->warn("End label does not follow Start label!"); return (-1);
309 $pos2=$self->position($end);
310 $length=$pos2-$pos1+1;
313 my $sequence=$self->seq;
314 return (substr($sequence,$pos1-1,$length));
317 # return the offset in aminoacids from LiveSeq protein sequence and SwissProt
318 # sequence (usually as a result of an INIT_MET or a gap)
319 sub offset {
320 my $self=shift;
321 return ($self->{'offset'});