* sync with trunk
[bioperl-live.git] / Bio / LiveSeq / Translation.pm
blobf77e373a753fd8b37a2fb6ed578543918deb58c1
1 # $Id$
3 # bioperl module for Bio::LiveSeq::Translation
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
7 # Copyright Joseph Insana
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::LiveSeq::Translation - Translation class for LiveSeq
17 =head1 SYNOPSIS
19 #documentation needed
21 =head1 DESCRIPTION
23 This stores informations about aminoacids translations of transcripts.
24 The implementation is that a Translation object is the translation of
25 a Transcript object, with different possibilities of manipulation,
26 different coordinate system and eventually its own ranges (protein domains).
28 =head1 AUTHOR - Joseph A.L. Insana
30 Email: Insana@ebi.ac.uk, jinsana@gmx.net
32 =head1 APPENDIX
34 The rest of the documentation details each of the object
35 methods. Internal methods are usually preceded with a _
37 =cut
39 # Let the code begin...
41 package Bio::LiveSeq::Translation;
43 use strict;
44 #use Carp qw(croak carp cluck);
45 use Bio::LiveSeq::SeqI; # uses SeqI, inherits from it
46 use Bio::PrimarySeq;
47 use base qw(Bio::LiveSeq::Transcript);
50 =head2 new
52 Title : new
53 Usage : $protein = Bio::LiveSeq::Translation->new(-transcript => $transcr);
55 Function: generates a new Bio::LiveSeq::Translation
56 Returns : reference to a new object of class Translation
57 Errorcode -1
58 Args : reference to an object of class Transcript
60 =cut
62 sub new {
63 my ($thing, %args) = @_;
64 my $class = ref($thing) || $thing;
65 my ($obj,%translation);
67 my $transcript=$args{-transcript};
69 $obj = \%translation;
70 $obj = bless $obj, $class;
72 unless ($transcript) {
73 $obj->throw("$class not initialised because no -transcript given");
75 unless (ref($transcript) eq "Bio::LiveSeq::Transcript") {
76 $obj->throw("$class not initialised because no object of class Transcript given");
79 #my $startbase = $transcript->start;
80 #my $endbase = $transcript->end;
81 my $strand = $transcript->strand;
82 my $seq = $transcript->{'seq'};
84 $obj->{'strand'}=$strand;
85 $obj->{'seq'}=$seq;
86 $obj->{'transcript'}=$transcript;
87 $obj->{'alphabet'}="protein";
89 $transcript->{'translation'}=$obj;# set the Translation ref into its Transcript
90 return $obj;
93 =head2 get_Transcript
95 Title : valid
96 Usage : $transcript = $obj->get_Transcript()
97 Function: retrieves the reference to the object of class Transcript (if any)
98 attached to a LiveSeq object
99 Returns : object reference
100 Args : none
102 =cut
104 sub get_Transcript {
105 my $self=shift;
106 return ($self->{'transcript'});
109 # These get redefined here, overriding the SeqI ones
111 sub change {
112 my ($self)=@_;
113 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
114 return (-1);
116 sub positionchange {
117 my ($self)=@_;
118 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
119 return (-1);
121 sub labelchange {
122 my ($self)=@_;
123 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
124 return (-1);
127 # this just returns the translation of the transcript, without checking for
128 # stop codons
129 sub transl_seq {
130 my $self=shift;
131 my $transcript=$self->get_Transcript;
132 my $translation=$transcript->translate(undef, undef, undef,
133 $self->translation_table)->seq;
134 return $translation;
137 # version 1.74 -> now the "*" is printed
138 sub seq {
139 my $self=shift;
140 my $proteinseq;
141 my $transcript=$self->get_Transcript;
142 my $translation=$transcript->translate(undef, undef, undef,
143 $self->translation_table)->seq;
144 my $stop_pos=index($translation,"*");
145 if ($stop_pos == -1) { # no stop present, continue downstream
146 my $downstreamseq=$transcript->downstream_seq();
147 #carp "the downstream is: $downstreamseq"; # debug
148 my $cdnaseq=$transcript->seq();
149 my $extendedseq = Bio::PrimarySeq->new(-seq => "$cdnaseq$downstreamseq",
150 -alphabet => 'dna'
153 $translation=$extendedseq->translate(undef, undef, undef,
154 $self->translation_table)->seq;
155 #carp "the new translation is: $translation"; # debug
156 $stop_pos=index($translation,"*");
157 if ($stop_pos == -1) { # still no stop present, return warning
158 $self->warn("Warning: no stop codon found in the retrieved sequence downstream of Transcript ",1);
159 undef $stop_pos;
160 $proteinseq=$translation;
161 } else {
162 $proteinseq=substr($translation,0,$stop_pos+1);
163 #carp "the new stopped translation is: $proteinseq, because the stop is at position $stop_pos"; # debug
165 } else {
166 $proteinseq=substr($translation,0,$stop_pos+1);
168 return $proteinseq;
171 sub length {
172 my $self=shift;
173 my $seq=$self->seq;
174 my $length=length($seq);
175 return $length;
178 sub all_labels {
179 my $self=shift;
180 return $self->get_Transcript->all_labels;
183 # counts in triplet. Only a label matching the beginning of a triplet coding
184 # for an aminoacid is considered valid when setting coordinate_start
185 # (i.e. only in frame!)
186 sub valid {
187 my ($self,$label)=@_;
188 my $i;
189 my @labels=$self->get_Transcript->all_labels;
190 my $length=$#labels;
191 while ($i <= $length) {
192 if ($label == $labels[$i]) {
193 return (1); # found
195 $i=$i+3;
197 return (0); # not found
200 # returns the label to the first nucleotide of the triplet coding for $position aminoacid
201 sub label {
202 my ($self,$position)=@_;
203 my $firstlabel=$self->coordinate_start; # this is in_frame checked
204 if ($position > 0) {
205 $position=$position*3-2;
206 } else { # if position = 0 this will be caught by Transcript, error thrown
207 $position=$position*3;
209 return $self->get_Transcript->label($position,$firstlabel);
210 # check for coord_start different
213 # returns position (aminoacids numbering) of a particular label
214 # used to return 0 for not in frame labels
215 # now returns the position anyway (after version 1.66)
216 sub position {
217 my ($self,$label)=@_;
218 my $firstlabel=$self->coordinate_start; # this is in_frame checked
219 my $position=$self->get_Transcript->position($label,$firstlabel);
220 use integer;
221 my $modulus=$position % 3;
222 if ($position == 0) {
223 return (0);
224 } elsif ($position > 0) {
225 if ($modulus != 1) {
226 $self->warn("Attention! Label $label is not in frame ".
227 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
228 if ($modulus == 2) {
229 return ($position / 3 + 1);
230 } else { # i.e. modulus == 0
231 return ($position / 3);
234 return ($position / 3 + 1);
235 } else { # pos < 0
236 if ($modulus != 0) {
237 $self->warn("Attention! Label $label is not in frame ".
238 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
239 return ($position / 3 - 1); # ok for both other positions
241 return ($position / 3);
243 $self->throw( "WEIRD: execution shouldn't have reached here");
244 return (0); # this should never happen, but just in case
247 # note: it inherits subseq and labelsubseq from Transcript!
249 sub start {
250 my $self=shift;
251 return ($self->{'transcript'}->start);
254 sub end {
255 my $self=shift;
256 return ($self->{'transcript'}->end);
259 =head2 aa_ranges
261 Title : aa_ranges
262 Usage : @proteinfeatures = $translation->aa_ranges()
263 Function: to retrieve all the LiveSeq AARange objects attached to a
264 Translation, usually created out of a SwissProt database entry
265 crossreferenced from an EMBL CDS feature.
266 Returns : an array
267 Args : none
269 =cut
271 # returns an array of obj_ref of AARange objects attached to the Translation
272 sub aa_ranges {
273 my $self=shift;
274 return ($self->{'aa_ranges'});
277 sub translation_table {
278 my $self=shift;
279 $self->get_Transcript->translation_table(@_);
282 # returns all aminoacids "affected" i.e. all aminoacids coded by any codon
283 # "touched" by the range selected between the labels, even if only partially.
285 # it's not optimized for performance but it's useful
287 sub labelsubseq {
288 my ($self,$start,$length,$end)=@_;
289 my ($pos1,$pos2);
290 my $transcript=$self->get_Transcript;
291 if ($start) {
292 unless ($transcript->valid($start)) {
293 $self->warn("Start label not valid"); return (-1);
295 $pos1=$self->position($start);
297 if ($end) {
298 if ($end == $start) {
299 $length=1;
300 } else {
301 unless ($transcript->valid($end)) {
302 $self->warn("End label not valid"); return (-1);
304 unless ($transcript->follows($start,$end) == 1) {
305 $self->warn("End label does not follow Start label!"); return (-1);
307 $pos2=$self->position($end);
308 $length=$pos2-$pos1+1;
311 my $sequence=$self->seq;
312 return (substr($sequence,$pos1-1,$length));
315 # return the offset in aminoacids from LiveSeq protein sequence and SwissProt
316 # sequence (usually as a result of an INIT_MET or a gap)
317 sub offset {
318 my $self=shift;
319 return ($self->{'offset'});