sync w/ main trunk
[bioperl-live.git] / Bio / Tools / CodonTable.pm
blobc0da2daec50a8dea12a17990fc3c758e3e98214a
1 # $Id$
3 # bioperl module for Bio::Tools::CodonTable
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
9 # Copyright Heikki Lehvaslaiho
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tools::CodonTable - Codon table object
19 =head1 SYNOPSIS
21 # This is a read-only class for all known codon tables. The IDs are
22 # the ones used by nucleotide sequence databases. All common IUPAC
23 # ambiguity codes for DNA, RNA and amino acids are recognized.
25 use Bio::Tools::CodonTable;
27 # defaults to ID 1 "Standard"
28 $myCodonTable = Bio::Tools::CodonTable->new();
29 $myCodonTable2 = Bio::Tools::CodonTable->new( -id => 3 );
31 # change codon table
32 $myCodonTable->id(5);
34 # examine codon table
35 print join (' ', "The name of the codon table no.", $myCodonTable->id(4),
36 "is:", $myCodonTable->name(), "\n");
38 # print possible codon tables
39 $tables = Bio::Tools::CodonTable->tables;
40 while ( ($id,$name) = each %{$tables} ) {
41 print "$id = $name\n";
44 # translate a codon
45 $aa = $myCodonTable->translate('ACU');
46 $aa = $myCodonTable->translate('act');
47 $aa = $myCodonTable->translate('ytr');
49 # reverse translate an amino acid
50 @codons = $myCodonTable->revtranslate('A');
51 @codons = $myCodonTable->revtranslate('Ser');
52 @codons = $myCodonTable->revtranslate('Glx');
53 @codons = $myCodonTable->revtranslate('cYS', 'rna');
55 # reverse translate an entire amino acid sequence into a IUPAC
56 # nucleotide string
58 my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
59 my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
61 # boolean tests
62 print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
63 print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar');
64 print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
66 =head1 DESCRIPTION
68 Codon tables are also called translation tables or genetic codes
69 since that is what they represent. A bit more complete picture
70 of the full complexity of codon usage in various taxonomic groups
71 is presented at the NCBI Genetic Codes Home page.
73 CodonTable is a BioPerl class that knows all current translation
74 tables that are used by primary nucleotide sequence databases
75 (GenBank, EMBL and DDBJ). It provides methods to output information
76 about tables and relationships between codons and amino acids.
78 This class and its methods recognized all common IUPAC ambiguity codes
79 for DNA, RNA and animo acids. The translation method follows the
80 conventions in EMBL and TREMBL databases.
82 It is a nuisance to separate RNA and cDNA representations of nucleic
83 acid transcripts. The CodonTable object accepts codons of both type as
84 input and allows the user to set the mode for output when reverse
85 translating. Its default for output is DNA.
87 Note:
89 This class deals primarily with individual codons and amino
90 acids. However in the interest of speed you can L<translate>
91 longer sequence, too. The full complexity of protein translation
92 is tackled by L<Bio::PrimarySeqI::translate>.
95 The amino acid codes are IUPAC recommendations for common amino acids:
97 A Ala Alanine
98 R Arg Arginine
99 N Asn Asparagine
100 D Asp Aspartic acid
101 C Cys Cysteine
102 Q Gln Glutamine
103 E Glu Glutamic acid
104 G Gly Glycine
105 H His Histidine
106 I Ile Isoleucine
107 L Leu Leucine
108 K Lys Lysine
109 M Met Methionine
110 F Phe Phenylalanine
111 P Pro Proline
112 O Pyl Pyrrolysine (22nd amino acid)
113 U Sec Selenocysteine (21st amino acid)
114 S Ser Serine
115 T Thr Threonine
116 W Trp Tryptophan
117 Y Tyr Tyrosine
118 V Val Valine
119 B Asx Aspartic acid or Asparagine
120 Z Glx Glutamine or Glutamic acid
121 J Xle Isoleucine or Valine (mass spec ambiguity)
122 X Xaa Any or unknown amino acid
125 It is worth noting that, "Bacterial" codon table no. 11 produces an
126 polypeptide that is, confusingly, identical to the standard one. The
127 only differences are in available initiator codons.
130 NCBI Genetic Codes home page:
131 http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
133 EBI Translation Table Viewer:
134 http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi
136 Amended ASN.1 version with ids 16 and 21 is at:
137 ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/
139 Thanks to Matteo diTomasso for the original Perl implementation
140 of these tables.
142 =head1 FEEDBACK
144 =head2 Mailing Lists
146 User feedback is an integral part of the evolution of this and other
147 Bioperl modules. Send your comments and suggestions preferably to the
148 Bioperl mailing lists Your participation is much appreciated.
150 bioperl-l@bioperl.org - General discussion
151 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
153 =head2 Support
155 Please direct usage questions or support issues to the mailing list:
157 L<bioperl-l@bioperl.org>
159 rather than to the module maintainer directly. Many experienced and
160 reponsive experts will be able look at the problem and quickly
161 address it. Please include a thorough description of the problem
162 with code and data examples if at all possible.
164 =head2 Reporting Bugs
166 Report bugs to the Bioperl bug tracking system to help us keep track
167 the bugs and their resolution. Bug reports can be submitted via the
168 web:
170 http://bugzilla.open-bio.org/
172 =head1 AUTHOR - Heikki Lehvaslaiho
174 Email: heikki-at-bioperl-dot-org
176 =head1 APPENDIX
178 The rest of the documentation details each of the object
179 methods. Internal methods are usually preceded with a _
181 =cut
184 # Let the code begin...
186 package Bio::Tools::CodonTable;
187 use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA $CODONGAP $GAP
188 %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
189 use strict;
191 # Object preamble - inherits from Bio::Root::Root
192 use Bio::Tools::IUPAC;
193 use Bio::SeqUtils;
195 use base qw(Bio::Root::Root);
198 # first set internal values for all translation tables
200 BEGIN {
201 use constant CODONSIZE => 3;
202 $GAP = '-';
203 $CODONGAP = $GAP x CODONSIZE;
205 @NAMES = #id
207 'Standard', #1
208 'Vertebrate Mitochondrial',#2
209 'Yeast Mitochondrial',# 3
210 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4
211 'Invertebrate Mitochondrial',#5
212 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
213 '', '',
214 'Echinoderm Mitochondrial',#9
215 'Euplotid Nuclear',#10
216 '"Bacterial"',# 11
217 'Alternative Yeast Nuclear',# 12
218 'Ascidian Mitochondrial',# 13
219 'Flatworm Mitochondrial',# 14
220 'Blepharisma Nuclear',# 15
221 'Chlorophycean Mitochondrial',# 16
222 '', '', '', '',
223 'Trematode Mitochondrial',# 21
224 'Scenedesmus obliquus Mitochondrial', #22
225 'Thraustochytrium Mitochondrial' #23
228 @TABLES =
230 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
231 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
232 FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
233 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
234 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
235 FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
236 '' ''
237 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
238 FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
239 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
240 FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
241 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
242 FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
243 FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
244 FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
245 '' '' '' ''
246 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
247 FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
248 FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
252 @STARTS =
254 ---M---------------M---------------M----------------------------
255 --------------------------------MMMM---------------M------------
256 ----------------------------------MM----------------------------
257 --MM---------------M------------MMMM---------------M------------
258 ---M----------------------------MMMM---------------M------------
259 -----------------------------------M----------------------------
260 '' ''
261 -----------------------------------M----------------------------
262 -----------------------------------M----------------------------
263 ---M---------------M------------MMMM---------------M------------
264 -------------------M---------------M----------------------------
265 -----------------------------------M----------------------------
266 -----------------------------------M----------------------------
267 -----------------------------------M----------------------------
268 -----------------------------------M----------------------------
269 '' '' '' ''
270 -----------------------------------M---------------M------------
271 -----------------------------------M----------------------------
272 --------------------------------M--M---------------M------------
275 my @nucs = qw(t c a g);
276 my $x = 0;
277 ($CODONS, $TRCOL) = ({}, {});
278 for my $i (@nucs) {
279 for my $j (@nucs) {
280 for my $k (@nucs) {
281 my $codon = "$i$j$k";
282 $CODONS->{$codon} = $x;
283 $TRCOL->{$x} = $codon;
284 $x++;
288 %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
289 %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
290 %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
291 $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
292 $TERMINATOR = '*';
295 sub new {
296 my($class,@args) = @_;
297 my $self = $class->SUPER::new(@args);
299 my($id) =
300 $self->_rearrange([qw(ID
302 @args);
304 $id = 1 if ( ! $id );
305 $id && $self->id($id);
306 return $self; # success - we hope!
309 =head2 id
311 Title : id
312 Usage : $obj->id(3); $id_integer = $obj->id();
313 Function: Sets or returns the id of the translation table. IDs are
314 integers from 1 to 15, excluding 7 and 8 which have been
315 removed as redundant. If an invalid ID is given the method
316 returns 0, false.
317 Example :
318 Returns : value of id, a scalar, 0 if not a valid
319 Args : newvalue (optional)
321 =cut
323 sub id{
324 my ($self,$value) = @_;
325 if( defined $value) {
326 if ( !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') {
327 $self->warn("Not a valid codon table ID [$value] ");
328 $value = 0;
330 $self->{'id'} = $value;
332 return $self->{'id'};
335 =head2 name
337 Title : name
338 Usage : $obj->name()
339 Function: returns the descriptive name of the translation table
340 Example :
341 Returns : A string
342 Args : None
345 =cut
347 sub name{
348 my ($self) = @_;
350 my ($id) = $self->{'id'};
351 return $NAMES[$id-1];
354 =head2 tables
356 Title : tables
357 Usage : $obj->tables() or Bio::Tools::CodonTable->tables()
358 Function: returns a hash reference where each key is a valid codon
359 table id() number, and each value is the corresponding
360 codon table name() string
361 Example :
362 Returns : A hashref
363 Args : None
366 =cut
368 sub tables{
369 my %tables;
370 for my $id (1 .. @NAMES) {
371 my $name = $NAMES[$id-1];
372 $tables{$id} = $name if $name;
374 return \%tables;
377 =head2 translate
379 Title : translate
380 Usage : $obj->translate('YTR')
381 Function: Returns a string of one letter amino acid codes from
382 nucleotide sequence input. The imput can be of any length.
384 Returns 'X' for unknown codons and codons that code for
385 more than one amino acid. Returns an empty string if input
386 is not three characters long. Exceptions for these are:
388 - IUPAC amino acid code B for Aspartic Acid and
389 Asparagine, is used.
390 - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
391 used.
392 - if the codon is two nucleotides long and if by adding
393 an a third character 'N', it codes for a single amino
394 acid (with exceptions above), return that, otherwise
395 return empty string.
397 Returns empty string for other input strings that are not
398 three characters long.
400 Example :
401 Returns : a string of one letter ambiguous IUPAC amino acid codes
402 Args : ambiguous IUPAC nucleotide string
405 =cut
407 sub translate {
408 my ($self, $seq) = @_;
409 $self->throw("Calling translate without a seq argument!") unless defined $seq;
410 return '' unless $seq;
412 my $id = $self->id;
413 my ($partial) = 0;
414 $partial = 2 if length($seq) % CODONSIZE == 2;
416 $seq = lc $seq;
417 $seq =~ tr/u/t/;
418 my $protein = "";
419 if ($seq =~ /[^actg]/ ) { #ambiguous chars
420 for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
421 my $triplet = substr($seq, $i, CODONSIZE);
422 if( $triplet eq $CODONGAP ) {
423 $protein .= $GAP;
424 } elsif (exists $CODONS->{$triplet}) {
425 $protein .= substr($TABLES[$id-1],
426 $CODONS->{$triplet},1);
427 } else {
428 $protein .= $self->_translate_ambiguous_codon($triplet);
431 } else { # simple, strict translation
432 for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
433 my $triplet = substr($seq, $i, CODONSIZE);
434 if( $triplet eq $CODONGAP ) {
435 $protein .= $GAP;
436 } if (exists $CODONS->{$triplet}) {
437 $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1);
438 } else {
439 $protein .= 'X';
443 if ($partial == 2) { # 2 overhanging nucleotides
444 my $triplet = substr($seq, ($partial -4)). "n";
445 if( $triplet eq $CODONGAP ) {
446 $protein .= $GAP;
447 } elsif (exists $CODONS->{$triplet}) {
448 my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1);
449 $protein .= $aa;
450 } else {
451 $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
454 return $protein;
457 sub _translate_ambiguous_codon {
458 my ($self, $triplet, $partial) = @_;
459 $partial ||= 0;
460 my $id = $self->id;
461 my $aa;
462 my @codons = _unambiquous_codons($triplet);
463 my %aas =();
464 foreach my $codon (@codons) {
465 $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1;
467 my $count = scalar keys %aas;
468 if ( $count == 1 ) {
469 $aa = (keys %aas)[0];
471 elsif ( $count == 2 ) {
472 if ($aas{'D'} and $aas{'N'}) {
473 $aa = 'B';
475 elsif ($aas{'E'} and $aas{'Q'}) {
476 $aa = 'Z';
477 } else {
478 $partial ? ($aa = '') : ($aa = 'X');
480 } else {
481 $partial ? ($aa = '') : ($aa = 'X');
483 return $aa;
486 =head2 translate_strict
488 Title : translate_strict
489 Usage : $obj->translate_strict('ACT')
490 Function: returns one letter amino acid code for a codon input
492 Fast and simple translation. User is responsible to resolve
493 ambiguous nucleotide codes before calling this
494 method. Returns 'X' for unknown codons and an empty string
495 for input strings that are not three characters long.
497 It is not recommended to use this method in a production
498 environment. Use method translate, instead.
500 Example :
501 Returns : A string
502 Args : a codon = a three nucleotide character string
505 =cut
507 sub translate_strict{
508 my ($self, $value) = @_;
509 my ($id) = $self->{'id'};
511 $value = lc $value;
512 $value =~ tr/u/t/;
514 if (length $value != 3 ) {
515 return '';
517 elsif (!(defined $CODONS->{$value})) {
518 return 'X';
520 else {
521 return substr($TABLES[$id-1],$CODONS->{$value},1);
525 =head2 revtranslate
527 Title : revtranslate
528 Usage : $obj->revtranslate('G')
529 Function: returns codons for an amino acid
531 Returns an empty string for unknown amino acid
532 codes. Ambiquous IUPAC codes Asx,B, (Asp,D; Asn,N) and
533 Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
534 letter amino acid codes are accepted. '*' and 'Ter' are
535 used for terminator.
537 By default, the output codons are shown in DNA. If the
538 output is needed in RNA (tr/t/u/), add a second argument
539 'RNA'.
541 Example : $obj->revtranslate('Gly', 'RNA')
542 Returns : An array of three lower case letter strings i.e. codons
543 Args : amino acid, 'RNA'
545 =cut
547 sub revtranslate {
548 my ($self, $value, $coding) = @_;
549 my ($id) = $self->{'id'};
550 my (@aas, $p);
551 my (@codons) = ();
553 if (length($value) == 3 ) {
554 $value = lc $value;
555 $value = ucfirst $value;
556 $value = $THREELETTERSYMBOLS{$value};
558 if ( defined $value and $value =~ /$VALID_PROTEIN/
559 and length($value) == 1 ) {
560 $value = uc $value;
561 @aas = @{$IUPAC_AA{$value}};
562 foreach my $aa (@aas) {
563 #print $aa, " -2\n";
564 $aa = '\*' if $aa eq '*';
565 while ($TABLES[$id-1] =~ m/$aa/g) {
566 $p = pos $TABLES[$id-1];
567 push (@codons, $TRCOL->{--$p});
572 if ($coding and uc ($coding) eq 'RNA') {
573 for my $i (0..$#codons) {
574 $codons[$i] =~ tr/t/u/;
578 return @codons;
581 =head2 reverse_translate_all
583 Title : reverse_translate_all
584 Usage : my $iup_str = $cttable->reverse_translate_all($seq_object)
585 my $iup_str = $cttable->reverse_translate_all($seq_object,
586 $cutable,
587 15);
588 Function: reverse translates a protein sequence into IUPAC nucleotide
589 sequence. An 'X' in the protein sequence is converted to 'NNN'
590 in the nucleotide sequence.
591 Returns : a string
592 Args : a Bio::PrimarySeqI compatible object (mandatory)
593 a Bio::CodonUsage::Table object and a threshold if only
594 codons with a relative frequency above the threshold are
595 to be considered.
596 =cut
598 sub reverse_translate_all {
600 my ($self, $obj, $cut, $threshold) = @_;
602 ## check args are OK
604 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
605 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
606 ref($obj) . "]");
608 if($obj->alphabet ne 'protein') {
609 $self->throw("Cannot reverse translate, need an amino acid sequence .".
610 "This sequence is of type [" . $obj->alphabet ."]");
612 my @data;
613 my @seq = split '', $obj->seq;
615 ## if we're not supplying a codon usage table...
616 if( !$cut && !$threshold) {
617 ## get lists of possible codons for each aa.
618 for my $aa (@seq) {
619 if ($aa =~ /x/i) {
620 push @data, (['NNN']);
621 }else {
622 my @cods = $self->revtranslate($aa);
623 push @data, \@cods;
626 }else{
627 #else we are supplying a codon usage table, we just want common codons
628 #check args first.
629 if(!$cut->isa('Bio::CodonUsage::Table')) {
630 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
631 ref($cut). "].");
633 my $cod_ref = $cut->probable_codons($threshold);
634 for my $aa (@seq) {
635 if ($aa =~ /x/i) {
636 push @data, (['NNN']);
637 next;
639 push @data, $cod_ref->{$aa};
643 return $self->_make_iupac_string(\@data);
647 =head2 reverse_translate_best
649 Title : reverse_translate_best
650 Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
651 Function: Reverse translates a protein sequence into plain nucleotide
652 sequence (GATC), uses the most common codon for each amino acid
653 Returns : A string
654 Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
656 =cut
658 sub reverse_translate_best {
660 my ($self, $obj, $cut) = @_;
662 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
663 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
664 ref($obj) . "]");
666 if ($obj->alphabet ne 'protein') {
667 $self->throw("Cannot reverse translate, need an amino acid sequence .".
668 "This sequence is of type [" . $obj->alphabet ."]");
670 if ( !$cut | !$cut->isa('Bio::CodonUsage::Table')) {
671 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
672 ref($cut). "].");
675 my $str = '';
676 my @seq = split '', $obj->seq;
678 my $cod_ref = $cut->most_common_codons();
680 for my $aa ( @seq ) {
681 if ($aa =~ /x/i) {
682 $str .= 'NNN';
683 next;
685 if ( defined $cod_ref->{$aa} ) {
686 $str .= $cod_ref->{$aa};
687 } else {
688 $self->throw("Input sequence contains invalid character: $aa");
691 $str;
694 =head2 is_start_codon
696 Title : is_start_codon
697 Usage : $obj->is_start_codon('ATG')
698 Function: returns true (1) for all codons that can be used as a
699 translation start, false (0) for others.
700 Example : $myCodonTable->is_start_codon('ATG')
701 Returns : boolean
702 Args : codon
704 =cut
706 sub is_start_codon{
707 my ($self, $value) = @_;
708 my ($id) = $self->{'id'};
710 $value = lc $value;
711 $value =~ tr/u/t/;
713 if (length $value != 3 ) {
714 return 0;
716 else {
717 my $result = 1;
718 my @ms = map { substr($STARTS[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
719 foreach my $c (@ms) {
720 $result = 0 if $c ne 'M';
722 return $result;
728 =head2 is_ter_codon
730 Title : is_ter_codon
731 Usage : $obj->is_ter_codon('GAA')
732 Function: returns true (1) for all codons that can be used as a
733 translation tarminator, false (0) for others.
734 Example : $myCodonTable->is_ter_codon('ATG')
735 Returns : boolean
736 Args : codon
738 =cut
740 sub is_ter_codon{
741 my ($self, $value) = @_;
742 my ($id) = $self->{'id'};
744 $value = lc $value;
745 $value =~ tr/u/t/;
747 if (length $value != 3 ) {
748 return 0;
750 else {
751 my $result = 1;
752 my @ms = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
753 foreach my $c (@ms) {
754 $result = 0 if $c ne $TERMINATOR;
756 return $result;
760 =head2 is_unknown_codon
762 Title : is_unknown_codon
763 Usage : $obj->is_unknown_codon('GAJ')
764 Function: returns false (0) for all codons that are valid,
765 true (1) for others.
766 Example : $myCodonTable->is_unknown_codon('NTG')
767 Returns : boolean
768 Args : codon
771 =cut
773 sub is_unknown_codon{
774 my ($self, $value) = @_;
775 my ($id) = $self->{'id'};
777 $value = lc $value;
778 $value =~ tr/u/t/;
780 if (length $value != 3 ) {
781 return 1;
783 else {
784 my $result = 0;
785 my @cs = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
786 $result = 1 if scalar @cs == 0;
787 return $result;
791 =head2 _unambiquous_codons
793 Title : _unambiquous_codons
794 Usage : @codons = _unambiquous_codons('ACN')
795 Function:
796 Example :
797 Returns : array of strings (one letter unambiguous amino acid codes)
798 Args : a codon = a three IUPAC nucleotide character string
800 =cut
802 sub _unambiquous_codons{
803 my ($value) = @_;
804 my @nts = ();
805 my @codons = ();
806 my ($i, $j, $k);
807 @nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
808 for my $i (@{$nts[0]}) {
809 for my $j (@{$nts[1]}) {
810 for my $k (@{$nts[2]}) {
811 push @codons, lc "$i$j$k";
815 return @codons;
818 =head2 add_table
820 Title : add_table
821 Usage : $newid = $ct->add_table($name, $table, $starts)
822 Function: Add a custom Codon Table into the object.
823 Know what you are doing, only the length of
824 the argument strings is checked!
825 Returns : the id of the new codon table
826 Args : name, a string, optional (can be empty)
827 table, a string of 64 characters
828 startcodons, a string of 64 characters, defaults to standard
830 =cut
832 sub add_table {
833 my ($self, $name, $table, $starts) = @_;
835 $name ||= 'Custom'. scalar @NAMES + 1;
836 $starts ||= $STARTS[0];
837 $self->throw('Suspect input!')
838 unless length($table) == 64 and length($starts) == 64;
840 push @NAMES, $name;
841 push @TABLES, $table;
842 push @STARTS, $starts;
844 return scalar @NAMES;
848 sub _make_iupac_string {
850 my ($self, $cod_ref) = @_;
851 if(ref($cod_ref) ne 'ARRAY') {
852 $self->throw(" I need a reference to a list of references to codons, ".
853 " not a [". ref($cod_ref) . "].");
855 my %iupac_hash = Bio::Tools::IUPAC->iupac_rev_iub();
856 my $iupac_string = ''; ## the string to be returned
857 for my $aa (@$cod_ref) {
859 ## scan through codon positions, record the differing values,
860 # then look up in the iub hash
861 for my $index(0..2) {
862 my %h;
863 map { my $k = substr($_,$index,1);
864 $h{$k} = undef;} @$aa;
865 my $lookup_key = join '', sort{$a cmp $b}keys %h;
867 ## extend string
868 $iupac_string .= $iupac_hash{uc$lookup_key};
871 return $iupac_string;