fix spelling errors, fixes #3228
[bioperl-live.git] / Bio / LiveSeq / Translation.pm
bloba54d30fb2c1b28ccb4e68383d3c2431e687eee9e
2 # bioperl module for Bio::LiveSeq::Translation
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
8 # Copyright Joseph Insana
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::LiveSeq::Translation - Translation class for LiveSeq
18 =head1 SYNOPSIS
20 #documentation needed
22 =head1 DESCRIPTION
24 This stores information about aminoacids translations of transcripts.
25 The implementation is that a Translation object is the translation of
26 a Transcript object, with different possibilities of manipulation,
27 different coordinate system and eventually its own ranges (protein domains).
29 =head1 AUTHOR - Joseph A.L. Insana
31 Email: Insana@ebi.ac.uk, jinsana@gmx.net
33 =head1 APPENDIX
35 The rest of the documentation details each of the object
36 methods. Internal methods are usually preceded with a _
38 =cut
40 # Let the code begin...
42 package Bio::LiveSeq::Translation;
44 use strict;
45 #use Carp qw(croak carp cluck);
46 use Bio::LiveSeq::SeqI; # uses SeqI, inherits from it
47 use Bio::PrimarySeq;
48 use base qw(Bio::LiveSeq::Transcript);
51 =head2 new
53 Title : new
54 Usage : $protein = Bio::LiveSeq::Translation->new(-transcript => $transcr);
56 Function: generates a new Bio::LiveSeq::Translation
57 Returns : reference to a new object of class Translation
58 Errorcode -1
59 Args : reference to an object of class Transcript
61 =cut
63 sub new {
64 my ($thing, %args) = @_;
65 my $class = ref($thing) || $thing;
66 my ($obj,%translation);
68 my $transcript=$args{-transcript};
70 $obj = \%translation;
71 $obj = bless $obj, $class;
73 unless ($transcript) {
74 $obj->throw("$class not initialised because no -transcript given");
76 unless (ref($transcript) eq "Bio::LiveSeq::Transcript") {
77 $obj->throw("$class not initialised because no object of class Transcript given");
80 #my $startbase = $transcript->start;
81 #my $endbase = $transcript->end;
82 my $strand = $transcript->strand;
83 my $seq = $transcript->{'seq'};
85 $obj->{'strand'}=$strand;
86 $obj->{'seq'}=$seq;
87 $obj->{'transcript'}=$transcript;
88 $obj->{'alphabet'}="protein";
90 $transcript->{'translation'}=$obj;# set the Translation ref into its Transcript
91 return $obj;
94 =head2 get_Transcript
96 Title : valid
97 Usage : $transcript = $obj->get_Transcript()
98 Function: retrieves the reference to the object of class Transcript (if any)
99 attached to a LiveSeq object
100 Returns : object reference
101 Args : none
103 =cut
105 sub get_Transcript {
106 my $self=shift;
107 return ($self->{'transcript'});
110 # These get redefined here, overriding the SeqI ones
112 sub change {
113 my ($self)=@_;
114 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
115 return (-1);
117 sub positionchange {
118 my ($self)=@_;
119 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
120 return (-1);
122 sub labelchange {
123 my ($self)=@_;
124 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
125 return (-1);
128 # this just returns the translation of the transcript, without checking for
129 # stop codons
130 sub transl_seq {
131 my $self=shift;
132 my $transcript=$self->get_Transcript;
133 my $translation=$transcript->translate(undef, undef, undef,
134 $self->translation_table)->seq;
135 return $translation;
138 # version 1.74 -> now the "*" is printed
139 sub seq {
140 my $self=shift;
141 my $proteinseq;
142 my $transcript=$self->get_Transcript;
143 my $translation=$transcript->translate(undef, undef, undef,
144 $self->translation_table)->seq;
145 my $stop_pos=index($translation,"*");
146 if ($stop_pos == -1) { # no stop present, continue downstream
147 my $downstreamseq=$transcript->downstream_seq();
148 #carp "the downstream is: $downstreamseq"; # debug
149 my $cdnaseq=$transcript->seq();
150 my $extendedseq = Bio::PrimarySeq->new(-seq => "$cdnaseq$downstreamseq",
151 -alphabet => 'dna'
154 $translation=$extendedseq->translate(undef, undef, undef,
155 $self->translation_table)->seq;
156 #carp "the new translation is: $translation"; # debug
157 $stop_pos=index($translation,"*");
158 if ($stop_pos == -1) { # still no stop present, return warning
159 $self->warn("Warning: no stop codon found in the retrieved sequence downstream of Transcript ",1);
160 undef $stop_pos;
161 $proteinseq=$translation;
162 } else {
163 $proteinseq=substr($translation,0,$stop_pos+1);
164 #carp "the new stopped translation is: $proteinseq, because the stop is at position $stop_pos"; # debug
166 } else {
167 $proteinseq=substr($translation,0,$stop_pos+1);
169 return $proteinseq;
172 sub length {
173 my $self=shift;
174 my $seq=$self->seq;
175 my $length=length($seq);
176 return $length;
179 sub all_labels {
180 my $self=shift;
181 return $self->get_Transcript->all_labels;
184 # counts in triplet. Only a label matching the beginning of a triplet coding
185 # for an aminoacid is considered valid when setting coordinate_start
186 # (i.e. only in frame!)
187 sub valid {
188 my ($self,$label)=@_;
189 my $i;
190 my @labels=$self->get_Transcript->all_labels;
191 my $length=$#labels;
192 while ($i <= $length) {
193 if ($label == $labels[$i]) {
194 return (1); # found
196 $i=$i+3;
198 return (0); # not found
201 # returns the label to the first nucleotide of the triplet coding for $position aminoacid
202 sub label {
203 my ($self,$position)=@_;
204 my $firstlabel=$self->coordinate_start; # this is in_frame checked
205 if ($position > 0) {
206 $position=$position*3-2;
207 } else { # if position = 0 this will be caught by Transcript, error thrown
208 $position=$position*3;
210 return $self->get_Transcript->label($position,$firstlabel);
211 # check for coord_start different
214 # returns position (aminoacids numbering) of a particular label
215 # used to return 0 for not in frame labels
216 # now returns the position anyway (after version 1.66)
217 sub position {
218 my ($self,$label)=@_;
219 my $firstlabel=$self->coordinate_start; # this is in_frame checked
220 my $position=$self->get_Transcript->position($label,$firstlabel);
221 use integer;
222 my $modulus=$position % 3;
223 if ($position == 0) {
224 return (0);
225 } elsif ($position > 0) {
226 if ($modulus != 1) {
227 $self->warn("Attention! Label $label is not in frame ".
228 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
229 if ($modulus == 2) {
230 return ($position / 3 + 1);
231 } else { # i.e. modulus == 0
232 return ($position / 3);
235 return ($position / 3 + 1);
236 } else { # pos < 0
237 if ($modulus != 0) {
238 $self->warn("Attention! Label $label is not in frame ".
239 "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
240 return ($position / 3 - 1); # ok for both other positions
242 return ($position / 3);
244 $self->throw( "WEIRD: execution shouldn't have reached here");
245 return (0); # this should never happen, but just in case
248 # note: it inherits subseq and labelsubseq from Transcript!
250 sub start {
251 my $self=shift;
252 return ($self->{'transcript'}->start);
255 sub end {
256 my $self=shift;
257 return ($self->{'transcript'}->end);
260 =head2 aa_ranges
262 Title : aa_ranges
263 Usage : @proteinfeatures = $translation->aa_ranges()
264 Function: to retrieve all the LiveSeq AARange objects attached to a
265 Translation, usually created out of a SwissProt database entry
266 crossreferenced from an EMBL CDS feature.
267 Returns : an array
268 Args : none
270 =cut
272 # returns an array of obj_ref of AARange objects attached to the Translation
273 sub aa_ranges {
274 my $self=shift;
275 return ($self->{'aa_ranges'});
278 sub translation_table {
279 my $self=shift;
280 $self->get_Transcript->translation_table(@_);
283 # returns all aminoacids "affected" i.e. all aminoacids coded by any codon
284 # "touched" by the range selected between the labels, even if only partially.
286 # it's not optimized for performance but it's useful
288 sub labelsubseq {
289 my ($self,$start,$length,$end)=@_;
290 my ($pos1,$pos2);
291 my $transcript=$self->get_Transcript;
292 if ($start) {
293 unless ($transcript->valid($start)) {
294 $self->warn("Start label not valid"); return (-1);
296 $pos1=$self->position($start);
298 if ($end) {
299 if ($end == $start) {
300 $length=1;
301 } else {
302 unless ($transcript->valid($end)) {
303 $self->warn("End label not valid"); return (-1);
305 unless ($transcript->follows($start,$end) == 1) {
306 $self->warn("End label does not follow Start label!"); return (-1);
308 $pos2=$self->position($end);
309 $length=$pos2-$pos1+1;
312 my $sequence=$self->seq;
313 return (substr($sequence,$pos1-1,$length));
316 # return the offset in aminoacids from LiveSeq protein sequence and SwissProt
317 # sequence (usually as a result of an INIT_MET or a gap)
318 sub offset {
319 my $self=shift;
320 return ($self->{'offset'});