[bug 2714]
[bioperl-live.git] / Bio / Tools / CodonTable.pm
blob82358b83fde2cc49c82342278f451b08c0d586e9
1 # $Id$
3 # bioperl module for Bio::Tools::CodonTable
5 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Tools::CodonTable - Codon table object
17 =head1 SYNOPSIS
19 # This is a read-only class for all known codon tables. The IDs are
20 # the ones used by nucleotide sequence databases. All common IUPAC
21 # ambiguity codes for DNA, RNA and amino acids are recognized.
23 use Bio::Tools::CodonTable;
25 # defaults to ID 1 "Standard"
26 $myCodonTable = Bio::Tools::CodonTable->new();
27 $myCodonTable2 = Bio::Tools::CodonTable->new( -id => 3 );
29 # change codon table
30 $myCodonTable->id(5);
32 # examine codon table
33 print join (' ', "The name of the codon table no.", $myCodonTable->id(4),
34 "is:", $myCodonTable->name(), "\n");
36 # print possible codon tables
37 $tables = Bio::Tools::CodonTable->tables;
38 while ( ($id,$name) = each %{$tables} ) {
39 print "$id = $name\n";
42 # translate a codon
43 $aa = $myCodonTable->translate('ACU');
44 $aa = $myCodonTable->translate('act');
45 $aa = $myCodonTable->translate('ytr');
47 # reverse translate an amino acid
48 @codons = $myCodonTable->revtranslate('A');
49 @codons = $myCodonTable->revtranslate('Ser');
50 @codons = $myCodonTable->revtranslate('Glx');
51 @codons = $myCodonTable->revtranslate('cYS', 'rna');
53 # reverse translate an entire amino acid sequence into a IUPAC
54 # nucleotide string
56 my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
57 my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
59 # boolean tests
60 print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
61 print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar');
62 print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
64 =head1 DESCRIPTION
66 Codon tables are also called translation tables or genetic codes
67 since that is what they represent. A bit more complete picture
68 of the full complexity of codon usage in various taxonomic groups
69 is presented at the NCBI Genetic Codes Home page.
71 CodonTable is a BioPerl class that knows all current translation
72 tables that are used by primary nucleotide sequence databases
73 (GenBank, EMBL and DDBJ). It provides methods to output information
74 about tables and relationships between codons and amino acids.
76 This class and its methods recognized all common IUPAC ambiguity codes
77 for DNA, RNA and animo acids. The translation method follows the
78 conventions in EMBL and TREMBL databases.
80 It is a nuisance to separate RNA and cDNA representations of nucleic
81 acid transcripts. The CodonTable object accepts codons of both type as
82 input and allows the user to set the mode for output when reverse
83 translating. Its default for output is DNA.
85 Note:
87 This class deals primarily with individual codons and amino
88 acids. However in the interest of speed you can L<translate>
89 longer sequence, too. The full complexity of protein translation
90 is tackled by L<Bio::PrimarySeqI::translate>.
93 The amino acid codes are IUPAC recommendations for common amino acids:
95 A Ala Alanine
96 R Arg Arginine
97 N Asn Asparagine
98 D Asp Aspartic acid
99 C Cys Cysteine
100 Q Gln Glutamine
101 E Glu Glutamic acid
102 G Gly Glycine
103 H His Histidine
104 I Ile Isoleucine
105 L Leu Leucine
106 K Lys Lysine
107 M Met Methionine
108 F Phe Phenylalanine
109 P Pro Proline
110 O Pyl Pyrrolysine (22nd amino acid)
111 U Sec Selenocysteine (21st amino acid)
112 S Ser Serine
113 T Thr Threonine
114 W Trp Tryptophan
115 Y Tyr Tyrosine
116 V Val Valine
117 B Asx Aspartic acid or Asparagine
118 Z Glx Glutamine or Glutamic acid
119 J Xle Isoleucine or Valine (mass spec ambiguity)
120 X Xaa Any or unknown amino acid
123 It is worth noting that, "Bacterial" codon table no. 11 produces an
124 polypeptide that is, confusingly, identical to the standard one. The
125 only differences are in available initiator codons.
128 NCBI Genetic Codes home page:
129 http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
131 EBI Translation Table Viewer:
132 http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi
134 Amended ASN.1 version with ids 16 and 21 is at:
135 ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/
137 Thanks to Matteo diTomasso for the original Perl implementation
138 of these tables.
140 =head1 FEEDBACK
142 =head2 Mailing Lists
144 User feedback is an integral part of the evolution of this and other
145 Bioperl modules. Send your comments and suggestions preferably to the
146 Bioperl mailing lists Your participation is much appreciated.
148 bioperl-l@bioperl.org - General discussion
149 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
151 =head2 Reporting Bugs
153 Report bugs to the Bioperl bug tracking system to help us keep track
154 the bugs and their resolution. Bug reports can be submitted via the
155 web:
157 http://bugzilla.open-bio.org/
159 =head1 AUTHOR - Heikki Lehvaslaiho
161 Email: heikki-at-bioperl-dot-org
163 =head1 APPENDIX
165 The rest of the documentation details each of the object
166 methods. Internal methods are usually preceded with a _
168 =cut
171 # Let the code begin...
173 package Bio::Tools::CodonTable;
174 use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA $CODONGAP $GAP
175 %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
176 use strict;
178 # Object preamble - inherits from Bio::Root::Root
179 use Bio::Tools::IUPAC;
180 use Bio::SeqUtils;
182 use base qw(Bio::Root::Root);
185 # first set internal values for all translation tables
187 BEGIN {
188 use constant CODONSIZE => 3;
189 $GAP = '-';
190 $CODONGAP = $GAP x CODONSIZE;
192 @NAMES = #id
194 'Standard', #1
195 'Vertebrate Mitochondrial',#2
196 'Yeast Mitochondrial',# 3
197 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4
198 'Invertebrate Mitochondrial',#5
199 'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
200 '', '',
201 'Echinoderm Mitochondrial',#9
202 'Euplotid Nuclear',#10
203 '"Bacterial"',# 11
204 'Alternative Yeast Nuclear',# 12
205 'Ascidian Mitochondrial',# 13
206 'Flatworm Mitochondrial',# 14
207 'Blepharisma Nuclear',# 15
208 'Chlorophycean Mitochondrial',# 16
209 '', '', '', '',
210 'Trematode Mitochondrial',# 21
211 'Scenedesmus obliquus Mitochondrial', #22
212 'Thraustochytrium Mitochondrial' #23
215 @TABLES =
217 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
218 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
219 FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
220 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
221 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
222 FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
223 '' ''
224 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
225 FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
226 FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
227 FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
228 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
229 FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
230 FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
231 FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
232 '' '' '' ''
233 FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
234 FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
235 FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
239 @STARTS =
241 ---M---------------M---------------M----------------------------
242 --------------------------------MMMM---------------M------------
243 ----------------------------------MM----------------------------
244 --MM---------------M------------MMMM---------------M------------
245 ---M----------------------------MMMM---------------M------------
246 -----------------------------------M----------------------------
247 '' ''
248 -----------------------------------M----------------------------
249 -----------------------------------M----------------------------
250 ---M---------------M------------MMMM---------------M------------
251 -------------------M---------------M----------------------------
252 -----------------------------------M----------------------------
253 -----------------------------------M----------------------------
254 -----------------------------------M----------------------------
255 -----------------------------------M----------------------------
256 '' '' '' ''
257 -----------------------------------M---------------M------------
258 -----------------------------------M----------------------------
259 --------------------------------M--M---------------M------------
262 my @nucs = qw(t c a g);
263 my $x = 0;
264 ($CODONS, $TRCOL) = ({}, {});
265 for my $i (@nucs) {
266 for my $j (@nucs) {
267 for my $k (@nucs) {
268 my $codon = "$i$j$k";
269 $CODONS->{$codon} = $x;
270 $TRCOL->{$x} = $codon;
271 $x++;
275 %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
276 %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
277 %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
278 $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
279 $TERMINATOR = '*';
282 sub new {
283 my($class,@args) = @_;
284 my $self = $class->SUPER::new(@args);
286 my($id) =
287 $self->_rearrange([qw(ID
289 @args);
291 $id = 1 if ( ! $id );
292 $id && $self->id($id);
293 return $self; # success - we hope!
296 =head2 id
298 Title : id
299 Usage : $obj->id(3); $id_integer = $obj->id();
300 Function: Sets or returns the id of the translation table. IDs are
301 integers from 1 to 15, excluding 7 and 8 which have been
302 removed as redundant. If an invalid ID is given the method
303 returns 0, false.
304 Example :
305 Returns : value of id, a scalar, 0 if not a valid
306 Args : newvalue (optional)
308 =cut
310 sub id{
311 my ($self,$value) = @_;
312 if( defined $value) {
313 if ( !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') {
314 $self->warn("Not a valid codon table ID [$value] ");
315 $value = 0;
317 $self->{'id'} = $value;
319 return $self->{'id'};
322 =head2 name
324 Title : name
325 Usage : $obj->name()
326 Function: returns the descriptive name of the translation table
327 Example :
328 Returns : A string
329 Args : None
332 =cut
334 sub name{
335 my ($self) = @_;
337 my ($id) = $self->{'id'};
338 return $NAMES[$id-1];
341 =head2 tables
343 Title : tables
344 Usage : $obj->tables() or Bio::Tools::CodonTable->tables()
345 Function: returns a hash reference where each key is a valid codon
346 table id() number, and each value is the corresponding
347 codon table name() string
348 Example :
349 Returns : A hashref
350 Args : None
353 =cut
355 sub tables{
356 my %tables;
357 for my $id (1 .. @NAMES) {
358 my $name = $NAMES[$id-1];
359 $tables{$id} = $name if $name;
361 return \%tables;
364 =head2 translate
366 Title : translate
367 Usage : $obj->translate('YTR')
368 Function: Returns a string of one letter amino acid codes from
369 nucleotide sequence input. The imput can be of any length.
371 Returns 'X' for unknown codons and codons that code for
372 more than one amino acid. Returns an empty string if input
373 is not three characters long. Exceptions for these are:
375 - IUPAC amino acid code B for Aspartic Acid and
376 Asparagine, is used.
377 - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
378 used.
379 - if the codon is two nucleotides long and if by adding
380 an a third character 'N', it codes for a single amino
381 acid (with exceptions above), return that, otherwise
382 return empty string.
384 Returns empty string for other input strings that are not
385 three characters long.
387 Example :
388 Returns : a string of one letter ambiguous IUPAC amino acid codes
389 Args : ambiguous IUPAC nucleotide string
392 =cut
394 sub translate {
395 my ($self, $seq) = @_;
396 $self->throw("Calling translate without a seq argument!") unless defined $seq;
397 return '' unless $seq;
399 my $id = $self->id;
400 my ($partial) = 0;
401 $partial = 2 if length($seq) % CODONSIZE == 2;
403 $seq = lc $seq;
404 $seq =~ tr/u/t/;
405 my $protein = "";
406 if ($seq =~ /[^actg]/ ) { #ambiguous chars
407 for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
408 my $triplet = substr($seq, $i, CODONSIZE);
409 if( $triplet eq $CODONGAP ) {
410 $protein .= $GAP;
411 } elsif (exists $CODONS->{$triplet}) {
412 $protein .= substr($TABLES[$id-1],
413 $CODONS->{$triplet},1);
414 } else {
415 $protein .= $self->_translate_ambiguous_codon($triplet);
418 } else { # simple, strict translation
419 for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
420 my $triplet = substr($seq, $i, CODONSIZE);
421 if( $triplet eq $CODONGAP ) {
422 $protein .= $GAP;
423 } if (exists $CODONS->{$triplet}) {
424 $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1);
425 } else {
426 $protein .= 'X';
430 if ($partial == 2) { # 2 overhanging nucleotides
431 my $triplet = substr($seq, ($partial -4)). "n";
432 if( $triplet eq $CODONGAP ) {
433 $protein .= $GAP;
434 } elsif (exists $CODONS->{$triplet}) {
435 my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1);
436 $protein .= $aa;
437 } else {
438 $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
441 return $protein;
444 sub _translate_ambiguous_codon {
445 my ($self, $triplet, $partial) = @_;
446 $partial ||= 0;
447 my $id = $self->id;
448 my $aa;
449 my @codons = _unambiquous_codons($triplet);
450 my %aas =();
451 foreach my $codon (@codons) {
452 $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1;
454 my $count = scalar keys %aas;
455 if ( $count == 1 ) {
456 $aa = (keys %aas)[0];
458 elsif ( $count == 2 ) {
459 if ($aas{'D'} and $aas{'N'}) {
460 $aa = 'B';
462 elsif ($aas{'E'} and $aas{'Q'}) {
463 $aa = 'Z';
464 } else {
465 $partial ? ($aa = '') : ($aa = 'X');
467 } else {
468 $partial ? ($aa = '') : ($aa = 'X');
470 return $aa;
473 =head2 translate_strict
475 Title : translate_strict
476 Usage : $obj->translate_strict('ACT')
477 Function: returns one letter amino acid code for a codon input
479 Fast and simple translation. User is responsible to resolve
480 ambiguous nucleotide codes before calling this
481 method. Returns 'X' for unknown codons and an empty string
482 for input strings that are not three characters long.
484 It is not recommended to use this method in a production
485 environment. Use method translate, instead.
487 Example :
488 Returns : A string
489 Args : a codon = a three nucleotide character string
492 =cut
494 sub translate_strict{
495 my ($self, $value) = @_;
496 my ($id) = $self->{'id'};
498 $value = lc $value;
499 $value =~ tr/u/t/;
501 if (length $value != 3 ) {
502 return '';
504 elsif (!(defined $CODONS->{$value})) {
505 return 'X';
507 else {
508 return substr($TABLES[$id-1],$CODONS->{$value},1);
512 =head2 revtranslate
514 Title : revtranslate
515 Usage : $obj->revtranslate('G')
516 Function: returns codons for an amino acid
518 Returns an empty string for unknown amino acid
519 codes. Ambiquous IUPAC codes Asx,B, (Asp,D; Asn,N) and
520 Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
521 letter amino acid codes are accepted. '*' and 'Ter' are
522 used for terminator.
524 By default, the output codons are shown in DNA. If the
525 output is needed in RNA (tr/t/u/), add a second argument
526 'RNA'.
528 Example : $obj->revtranslate('Gly', 'RNA')
529 Returns : An array of three lower case letter strings i.e. codons
530 Args : amino acid, 'RNA'
532 =cut
534 sub revtranslate {
535 my ($self, $value, $coding) = @_;
536 my ($id) = $self->{'id'};
537 my (@aas, $p);
538 my (@codons) = ();
540 if (length($value) == 3 ) {
541 $value = lc $value;
542 $value = ucfirst $value;
543 $value = $THREELETTERSYMBOLS{$value};
545 if ( defined $value and $value =~ /$VALID_PROTEIN/
546 and length($value) == 1 ) {
547 $value = uc $value;
548 @aas = @{$IUPAC_AA{$value}};
549 foreach my $aa (@aas) {
550 #print $aa, " -2\n";
551 $aa = '\*' if $aa eq '*';
552 while ($TABLES[$id-1] =~ m/$aa/g) {
553 $p = pos $TABLES[$id-1];
554 push (@codons, $TRCOL->{--$p});
559 if ($coding and uc ($coding) eq 'RNA') {
560 for my $i (0..$#codons) {
561 $codons[$i] =~ tr/t/u/;
565 return @codons;
568 =head2 reverse_translate_all
570 Title : reverse_translate_all
571 Usage : my $iup_str = $cttable->reverse_translate_all($seq_object)
572 my $iup_str = $cttable->reverse_translate_all($seq_object,
573 $cutable,
574 15);
575 Function: reverse translates a protein sequence into IUPAC nucleotide
576 sequence. An 'X' in the protein sequence is converted to 'NNN'
577 in the nucleotide sequence.
578 Returns : a string
579 Args : a Bio::PrimarySeqI compatible object (mandatory)
580 a Bio::CodonUsage::Table object and a threshold if only
581 codons with a relative frequency above the threshold are
582 to be considered.
583 =cut
585 sub reverse_translate_all {
587 my ($self, $obj, $cut, $threshold) = @_;
589 ## check args are OK
591 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
592 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
593 ref($obj) . "]");
595 if($obj->alphabet ne 'protein') {
596 $self->throw("Cannot reverse translate, need an amino acid sequence .".
597 "This sequence is of type [" . $obj->alphabet ."]");
599 my @data;
600 my @seq = split '', $obj->seq;
602 ## if we're not supplying a codon usage table...
603 if( !$cut && !$threshold) {
604 ## get lists of possible codons for each aa.
605 for my $aa (@seq) {
606 if ($aa =~ /x/i) {
607 push @data, (['NNN']);
608 }else {
609 my @cods = $self->revtranslate($aa);
610 push @data, \@cods;
613 }else{
614 #else we are supplying a codon usage table, we just want common codons
615 #check args first.
616 if(!$cut->isa('Bio::CodonUsage::Table')) {
617 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
618 ref($cut). "].");
620 my $cod_ref = $cut->probable_codons($threshold);
621 for my $aa (@seq) {
622 if ($aa =~ /x/i) {
623 push @data, (['NNN']);
624 next;
626 push @data, $cod_ref->{$aa};
630 return $self->_make_iupac_string(\@data);
634 =head2 reverse_translate_best
636 Title : reverse_translate_best
637 Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
638 Function: Reverse translates a protein sequence into plain nucleotide
639 sequence (GATC), uses the most common codon for each amino acid
640 Returns : A string
641 Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
643 =cut
645 sub reverse_translate_best {
647 my ($self, $obj, $cut) = @_;
649 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
650 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
651 ref($obj) . "]");
653 if ($obj->alphabet ne 'protein') {
654 $self->throw("Cannot reverse translate, need an amino acid sequence .".
655 "This sequence is of type [" . $obj->alphabet ."]");
657 if ( !$cut | !$cut->isa('Bio::CodonUsage::Table')) {
658 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
659 ref($cut). "].");
662 my $str = '';
663 my @seq = split '', $obj->seq;
665 my $cod_ref = $cut->most_common_codons();
667 for my $aa ( @seq ) {
668 if ($aa =~ /x/i) {
669 $str .= 'NNN';
670 next;
672 if ( defined $cod_ref->{$aa} ) {
673 $str .= $cod_ref->{$aa};
674 } else {
675 $self->throw("Input sequence contains invalid character: $aa");
678 $str;
681 =head2 is_start_codon
683 Title : is_start_codon
684 Usage : $obj->is_start_codon('ATG')
685 Function: returns true (1) for all codons that can be used as a
686 translation start, false (0) for others.
687 Example : $myCodonTable->is_start_codon('ATG')
688 Returns : boolean
689 Args : codon
691 =cut
693 sub is_start_codon{
694 my ($self, $value) = @_;
695 my ($id) = $self->{'id'};
697 $value = lc $value;
698 $value =~ tr/u/t/;
700 if (length $value != 3 ) {
701 return 0;
703 else {
704 my $result = 1;
705 my @ms = map { substr($STARTS[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
706 foreach my $c (@ms) {
707 $result = 0 if $c ne 'M';
709 return $result;
715 =head2 is_ter_codon
717 Title : is_ter_codon
718 Usage : $obj->is_ter_codon('GAA')
719 Function: returns true (1) for all codons that can be used as a
720 translation tarminator, false (0) for others.
721 Example : $myCodonTable->is_ter_codon('ATG')
722 Returns : boolean
723 Args : codon
725 =cut
727 sub is_ter_codon{
728 my ($self, $value) = @_;
729 my ($id) = $self->{'id'};
731 $value = lc $value;
732 $value =~ tr/u/t/;
734 if (length $value != 3 ) {
735 return 0;
737 else {
738 my $result = 1;
739 my @ms = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
740 foreach my $c (@ms) {
741 $result = 0 if $c ne $TERMINATOR;
743 return $result;
747 =head2 is_unknown_codon
749 Title : is_unknown_codon
750 Usage : $obj->is_unknown_codon('GAJ')
751 Function: returns false (0) for all codons that are valid,
752 true (1) for others.
753 Example : $myCodonTable->is_unknown_codon('NTG')
754 Returns : boolean
755 Args : codon
758 =cut
760 sub is_unknown_codon{
761 my ($self, $value) = @_;
762 my ($id) = $self->{'id'};
764 $value = lc $value;
765 $value =~ tr/u/t/;
767 if (length $value != 3 ) {
768 return 1;
770 else {
771 my $result = 0;
772 my @cs = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
773 $result = 1 if scalar @cs == 0;
774 return $result;
778 =head2 _unambiquous_codons
780 Title : _unambiquous_codons
781 Usage : @codons = _unambiquous_codons('ACN')
782 Function:
783 Example :
784 Returns : array of strings (one letter unambiguous amino acid codes)
785 Args : a codon = a three IUPAC nucleotide character string
787 =cut
789 sub _unambiquous_codons{
790 my ($value) = @_;
791 my @nts = ();
792 my @codons = ();
793 my ($i, $j, $k);
794 @nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
795 for my $i (@{$nts[0]}) {
796 for my $j (@{$nts[1]}) {
797 for my $k (@{$nts[2]}) {
798 push @codons, lc "$i$j$k";
802 return @codons;
805 =head2 add_table
807 Title : add_table
808 Usage : $newid = $ct->add_table($name, $table, $starts)
809 Function: Add a custom Codon Table into the object.
810 Know what you are doing, only the length of
811 the argument strings is checked!
812 Returns : the id of the new codon table
813 Args : name, a string, optional (can be empty)
814 table, a string of 64 characters
815 startcodons, a string of 64 characters, defaults to standard
817 =cut
819 sub add_table {
820 my ($self, $name, $table, $starts) = @_;
822 $name ||= 'Custom'. scalar @NAMES + 1;
823 $starts ||= $STARTS[0];
824 $self->throw('Suspect input!')
825 unless length($table) == 64 and length($starts) == 64;
827 push @NAMES, $name;
828 push @TABLES, $table;
829 push @STARTS, $starts;
831 return scalar @NAMES;
835 sub _make_iupac_string {
837 my ($self, $cod_ref) = @_;
838 if(ref($cod_ref) ne 'ARRAY') {
839 $self->throw(" I need a reference to a list of references to codons, ".
840 " not a [". ref($cod_ref) . "].");
842 my %iupac_hash = Bio::Tools::IUPAC->iupac_rev_iub();
843 my $iupac_string = ''; ## the string to be returned
844 for my $aa (@$cod_ref) {
846 ## scan through codon positions, record the differing values,
847 # then look up in the iub hash
848 for my $index(0..2) {
849 my %h;
850 map { my $k = substr($_,$index,1);
851 $h{$k} = undef;} @$aa;
852 my $lookup_key = join '', sort{$a cmp $b}keys %h;
854 ## extend string
855 $iupac_string .= $iupac_hash{uc$lookup_key};
858 return $iupac_string;