Bio/Tools/CodonTable.pm

   1 # $Id$
   2 #
   3 # bioperl module for Bio::Tools::CodonTable
   4 #
   5 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
   6 #
   7 # Copyright Heikki Lehvaslaiho
   8 #
   9 # You may distribute this module under the same terms as perl itself
  10
  11 # POD documentation - main docs before the code
  12
  13 =head1 NAME
  14
  15 Bio::Tools::CodonTable - Codon table object
  16
  17 =head1 SYNOPSIS
  18
  19   # This is a read-only class for all known codon tables.  The IDs are
  20   # the ones used by nucleotide sequence databases.  All common IUPAC
  21   # ambiguity codes for DNA, RNA and amino acids are recognized.
  22
  23   use Bio::Tools::CodonTable;
  24
  25   # defaults to ID 1 "Standard"
  26   $myCodonTable   = Bio::Tools::CodonTable->new();
  27   $myCodonTable2  = Bio::Tools::CodonTable->new( -id => 3 );
  28
  29   # change codon table
  30   $myCodonTable->id(5);
  31
  32   # examine codon table
  33   print  join (' ', "The name of the codon table no.", $myCodonTable->id(4),
  34                "is:", $myCodonTable->name(), "\n");
  35
  36   # print possible codon tables
  37   $tables = Bio::Tools::CodonTable->tables;
  38   while ( ($id,$name) = each %{$tables} ) {
  39     print "$id = $name\n";
  40   }
  41
  42   # translate a codon
  43   $aa = $myCodonTable->translate('ACU');
  44   $aa = $myCodonTable->translate('act');
  45   $aa = $myCodonTable->translate('ytr');
  46
  47   # reverse translate an amino acid
  48   @codons = $myCodonTable->revtranslate('A');
  49   @codons = $myCodonTable->revtranslate('Ser');
  50   @codons = $myCodonTable->revtranslate('Glx');
  51   @codons = $myCodonTable->revtranslate('cYS', 'rna');
  52
  53   # reverse translate an entire amino acid sequence into a IUPAC
  54   # nucleotide string
  55
  56   my $seqobj    = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
  57   my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
  58
  59   # boolean tests
  60   print "Is a start\n"       if $myCodonTable->is_start_codon('ATG');
  61   print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar');
  62   print "Is a unknown\n"     if $myCodonTable->is_unknown_codon('JTG');
  63
  64 =head1 DESCRIPTION
  65
  66 Codon tables are also called translation tables or genetic codes
  67 since that is what they represent. A bit more complete picture
  68 of the full complexity of codon usage in various taxonomic groups
  69 is presented at the NCBI Genetic Codes Home page.
  70
  71 CodonTable is a BioPerl class that knows all current translation
  72 tables that are used by primary nucleotide sequence databases
  73 (GenBank, EMBL and DDBJ). It provides methods to output information
  74 about tables and relationships between codons and amino acids.
  75
  76 This class and its methods recognized all common IUPAC ambiguity codes
  77 for DNA, RNA and animo acids. The translation method follows the
  78 conventions in EMBL and TREMBL databases.
  79
  80 It is a nuisance to separate RNA and cDNA representations of nucleic
  81 acid transcripts. The CodonTable object accepts codons of both type as
  82 input and allows the user to set the mode for output when reverse
  83 translating. Its default for output is DNA.
  84
  85 Note:
  86
  87 This class deals primarily with individual codons and amino
  88 acids. However in the interest of speed you can L<translate>
  89 longer sequence, too. The full complexity of protein translation
  90 is tackled by L<Bio::PrimarySeqI::translate>.
  91
  92
  93 The amino acid codes are IUPAC recommendations for common amino acids:
  94
  95           A           Ala            Alanine
  96           R           Arg            Arginine
  97           N           Asn            Asparagine
  98           D           Asp            Aspartic acid
  99           C           Cys            Cysteine
 100           Q           Gln            Glutamine
 101           E           Glu            Glutamic acid
 102           G           Gly            Glycine
 103           H           His            Histidine
 104           I           Ile            Isoleucine
 105           L           Leu            Leucine
 106           K           Lys            Lysine
 107           M           Met            Methionine
 108           F           Phe            Phenylalanine
 109           P           Pro            Proline
 110           O           Pyl            Pyrrolysine (22nd amino acid)
 111           U           Sec            Selenocysteine (21st amino acid)
 112           S           Ser            Serine
 113           T           Thr            Threonine
 114           W           Trp            Tryptophan
 115           Y           Tyr            Tyrosine
 116           V           Val            Valine
 117           B           Asx            Aspartic acid or Asparagine
 118           Z           Glx            Glutamine or Glutamic acid
 119           J           Xle            Isoleucine or Valine (mass spec ambiguity)
 120           X           Xaa            Any or unknown amino acid
 121
 122
 123 It is worth noting that, "Bacterial" codon table no. 11 produces an
 124 polypeptide that is, confusingly, identical to the standard one. The
 125 only differences are in available initiator codons.
 126
 127
 128 NCBI Genetic Codes home page:
 129      http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
 130
 131 EBI Translation Table Viewer:
 132      http://www.ebi.ac.uk/cgi-bin/mutations/trtables.cgi
 133
 134 Amended ASN.1 version with ids 16 and 21 is at:
 135      ftp://ftp.ebi.ac.uk/pub/databases/geneticcode/
 136
 137 Thanks to Matteo diTomasso for the original Perl implementation
 138 of these tables.
 139
 140 =head1 FEEDBACK
 141
 142 =head2 Mailing Lists
 143
 144 User feedback is an integral part of the evolution of this and other
 145 Bioperl modules. Send your comments and suggestions preferably to the
 146 Bioperl mailing lists  Your participation is much appreciated.
 147
 148   bioperl-l@bioperl.org                  - General discussion
 149   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
 150
 151 =head2 Reporting Bugs
 152
 153 Report bugs to the Bioperl bug tracking system to help us keep track
 154 the bugs and their resolution.  Bug reports can be submitted via the
 155 web:
 156
 157   http://bugzilla.open-bio.org/
 158
 159 =head1 AUTHOR - Heikki Lehvaslaiho
 160
 161 Email:  heikki-at-bioperl-dot-org
 162
 163 =head1 APPENDIX
 164
 165 The rest of the documentation details each of the object
 166 methods. Internal methods are usually preceded with a _
 167
 168 =cut
 169
 170
 171 # Let the code begin...
 172
 173 package Bio::Tools::CodonTable;
 174 use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA $CODONGAP $GAP
 175                                 %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
 176 use strict;
 177
 178 # Object preamble - inherits from Bio::Root::Root
 179 use Bio::Tools::IUPAC;
 180 use Bio::SeqUtils;
 181
 182 use base qw(Bio::Root::Root);
 183
 184
 185 # first set internal values for all translation tables
 186
 187 BEGIN {
 188     use constant CODONSIZE => 3;
 189     $GAP = '-';
 190     $CODONGAP = $GAP x CODONSIZE;
 191
 192     @NAMES =                    #id
 193         (
 194          'Standard',            #1
 195          'Vertebrate Mitochondrial',#2
 196          'Yeast Mitochondrial',# 3
 197          'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4
 198          'Invertebrate Mitochondrial',#5
 199          'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
 200          '', '',
 201          'Echinoderm Mitochondrial',#9
 202          'Euplotid Nuclear',#10
 203          '"Bacterial"',# 11
 204          'Alternative Yeast Nuclear',# 12
 205          'Ascidian Mitochondrial',# 13
 206          'Flatworm Mitochondrial',# 14
 207          'Blepharisma Nuclear',# 15
 208          'Chlorophycean Mitochondrial',# 16
 209          '', '',  '', '',
 210          'Trematode Mitochondrial',# 21
 211          'Scenedesmus obliquus Mitochondrial', #22
 212          'Thraustochytrium Mitochondrial' #23
 213          );
 214
 215     @TABLES =
 216         qw(
 217            FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
 218            FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
 219            FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
 220            FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
 221            FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
 222            FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
 223            '' ''
 224            FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
 225            FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
 226            FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
 227            FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
 228            FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
 229            FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
 230            FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
 231            FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
 232            '' '' '' ''
 233            FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
 234            FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
 235            FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
 236            );
 237
 238
 239     @STARTS =
 240         qw(
 241            ---M---------------M---------------M----------------------------
 242            --------------------------------MMMM---------------M------------
 243            ----------------------------------MM----------------------------
 244            --MM---------------M------------MMMM---------------M------------
 245            ---M----------------------------MMMM---------------M------------
 246            -----------------------------------M----------------------------
 247            '' ''
 248            -----------------------------------M----------------------------
 249            -----------------------------------M----------------------------
 250            ---M---------------M------------MMMM---------------M------------
 251            -------------------M---------------M----------------------------
 252            -----------------------------------M----------------------------
 253            -----------------------------------M----------------------------
 254            -----------------------------------M----------------------------
 255            -----------------------------------M----------------------------
 256            '' ''  '' ''
 257            -----------------------------------M---------------M------------
 258            -----------------------------------M----------------------------
 259            --------------------------------M--M---------------M------------
 260            );
 261
 262     my @nucs = qw(t c a g);
 263     my $x = 0;
 264     ($CODONS, $TRCOL) = ({}, {});
 265     for my $i (@nucs) {
 266         for my $j (@nucs) {
 267             for my $k (@nucs) {
 268                 my $codon = "$i$j$k";
 269                 $CODONS->{$codon} = $x;
 270                 $TRCOL->{$x} = $codon;
 271                 $x++;
 272             }
 273         }
 274     }
 275     %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
 276     %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
 277     %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
 278     $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
 279     $TERMINATOR = '*';
 280 }
 281
 282 sub new {
 283     my($class,@args) = @_;
 284     my $self = $class->SUPER::new(@args);
 285
 286     my($id) =
 287         $self->_rearrange([qw(ID
 288                              )],
 289                          @args);
 290
 291     $id = 1 if ( ! $id );
 292     $id  && $self->id($id);
 293     return $self; # success - we hope!
 294 }
 295
 296 =head2 id
 297
 298  Title   : id
 299  Usage   : $obj->id(3); $id_integer = $obj->id();
 300  Function: Sets or returns the id of the translation table.  IDs are
 301            integers from 1 to 15, excluding 7 and 8 which have been
 302            removed as redundant. If an invalid ID is given the method
 303            returns 0, false.
 304  Example :
 305  Returns : value of id, a scalar, 0 if not a valid
 306  Args    : newvalue (optional)
 307
 308 =cut
 309
 310 sub id{
 311    my ($self,$value) = @_;
 312    if( defined $value) {
 313        if (  !(defined $TABLES[$value-1]) or $TABLES[$value-1] eq '') {
 314            $self->warn("Not a valid codon table ID [$value] ");
 315            $value = 0;
 316        }
 317        $self->{'id'} = $value;
 318    }
 319    return $self->{'id'};
 320 }
 321
 322 =head2 name
 323
 324  Title   : name
 325  Usage   : $obj->name()
 326  Function: returns the descriptive name of the translation table
 327  Example :
 328  Returns : A string
 329  Args    : None
 330
 331
 332 =cut
 333
 334 sub name{
 335    my ($self) = @_;
 336
 337    my ($id) = $self->{'id'};
 338    return $NAMES[$id-1];
 339 }
 340
 341 =head2 tables
 342
 343  Title   : tables
 344  Usage   : $obj->tables()  or  Bio::Tools::CodonTable->tables()
 345  Function: returns a hash reference where each key is a valid codon
 346            table id() number, and each value is the corresponding
 347            codon table name() string
 348  Example :
 349  Returns : A hashref
 350  Args    : None
 351
 352
 353 =cut
 354
 355 sub tables{
 356   my %tables;
 357   for my $id (1 .. @NAMES) {
 358     my $name = $NAMES[$id-1];
 359     $tables{$id} = $name if $name;
 360   }
 361   return \%tables;
 362 }
 363
 364 =head2 translate
 365
 366  Title   : translate
 367  Usage   : $obj->translate('YTR')
 368  Function: Returns a string of one letter amino acid codes from
 369            nucleotide sequence input. The imput can be of any length.
 370
 371            Returns 'X' for unknown codons and codons that code for
 372            more than one amino acid. Returns an empty string if input
 373            is not three characters long. Exceptions for these are:
 374
 375              - IUPAC amino acid code B for Aspartic Acid and
 376                Asparagine, is used.
 377              - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
 378                used.
 379              - if the codon is two nucleotides long and if by adding
 380                an a third character 'N', it codes for a single amino
 381                acid (with exceptions above), return that, otherwise
 382                return empty string.
 383
 384            Returns empty string for other input strings that are not
 385            three characters long.
 386
 387  Example :
 388  Returns : a string of one letter ambiguous IUPAC amino acid codes
 389  Args    : ambiguous IUPAC nucleotide string
 390
 391
 392 =cut
 393
 394 sub translate {
 395     my ($self, $seq) = @_;
 396     $self->throw("Calling translate without a seq argument!") unless defined $seq;
 397     return '' unless $seq;
 398
 399     my $id = $self->id;
 400     my ($partial) = 0;
 401     $partial = 2 if length($seq) % CODONSIZE == 2;
 402
 403     $seq = lc $seq;
 404     $seq =~ tr/u/t/;
 405     my $protein = "";
 406     if ($seq =~ /[^actg]/ ) { #ambiguous chars
 407         for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
 408             my $triplet = substr($seq, $i, CODONSIZE);
 409             if( $triplet eq $CODONGAP ) {
 410                 $protein .= $GAP;
 411             } elsif (exists $CODONS->{$triplet}) {
 412                 $protein .= substr($TABLES[$id-1],
 413                                    $CODONS->{$triplet},1);
 414             } else {
 415                 $protein .= $self->_translate_ambiguous_codon($triplet);
 416             }
 417         }
 418     } else { # simple, strict translation
 419         for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
 420             my $triplet = substr($seq, $i, CODONSIZE);
 421             if( $triplet eq $CODONGAP ) {
 422                 $protein .= $GAP;
 423             } if (exists $CODONS->{$triplet}) {
 424                 $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1);
 425             } else {
 426                 $protein .= 'X';
 427             }
 428         }
 429     }
 430     if ($partial == 2) { # 2 overhanging nucleotides
 431         my $triplet = substr($seq, ($partial -4)). "n";
 432         if( $triplet eq $CODONGAP ) {
 433             $protein .= $GAP;
 434         } elsif (exists $CODONS->{$triplet}) {
 435             my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1);
 436             $protein .= $aa;
 437         } else {
 438             $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
 439         }
 440     }
 441     return $protein;
 442 }
 443
 444 sub _translate_ambiguous_codon {
 445     my ($self, $triplet, $partial) = @_;
 446     $partial ||= 0;
 447     my $id = $self->id;
 448     my $aa;
 449     my @codons = _unambiquous_codons($triplet);
 450     my %aas =();
 451     foreach my $codon (@codons) {
 452         $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1;
 453     }
 454     my $count = scalar keys %aas;
 455     if ( $count == 1 ) {
 456         $aa = (keys %aas)[0];
 457     }
 458     elsif ( $count == 2 ) {
 459         if ($aas{'D'} and $aas{'N'}) {
 460             $aa = 'B';
 461         }
 462         elsif ($aas{'E'} and $aas{'Q'}) {
 463             $aa = 'Z';
 464         } else {
 465             $partial ? ($aa = '') : ($aa = 'X');
 466         }
 467     } else {
 468         $partial ? ($aa = '') :  ($aa = 'X');
 469     }
 470     return $aa;
 471 }
 472
 473 =head2 translate_strict
 474
 475  Title   : translate_strict
 476  Usage   : $obj->translate_strict('ACT')
 477  Function: returns one letter amino acid code for a codon input
 478
 479            Fast and simple translation. User is responsible to resolve
 480            ambiguous nucleotide codes before calling this
 481            method. Returns 'X' for unknown codons and an empty string
 482            for input strings that are not three characters long.
 483
 484            It is not recommended to use this method in a production
 485            environment. Use method translate, instead.
 486
 487  Example :
 488  Returns : A string
 489  Args    : a codon = a three nucleotide character string
 490
 491
 492 =cut
 493
 494 sub translate_strict{
 495    my ($self, $value) = @_;
 496    my ($id) = $self->{'id'};
 497
 498    $value  = lc $value;
 499    $value  =~ tr/u/t/;
 500
 501    if (length $value != 3 ) {
 502        return '';
 503    }
 504    elsif (!(defined $CODONS->{$value}))  {
 505        return 'X';
 506    }
 507    else {
 508        return substr($TABLES[$id-1],$CODONS->{$value},1);
 509    }
 510 }
 511
 512 =head2 revtranslate
 513
 514  Title   : revtranslate
 515  Usage   : $obj->revtranslate('G')
 516  Function: returns codons for an amino acid
 517
 518            Returns an empty string for unknown amino acid
 519            codes. Ambiquous IUPAC codes Asx,B, (Asp,D; Asn,N) and
 520            Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
 521            letter amino acid codes are accepted. '*' and 'Ter' are
 522            used for terminator.
 523
 524            By default, the output codons are shown in DNA.  If the
 525            output is needed in RNA (tr/t/u/), add a second argument
 526            'RNA'.
 527
 528  Example : $obj->revtranslate('Gly', 'RNA')
 529  Returns : An array of three lower case letter strings i.e. codons
 530  Args    : amino acid, 'RNA'
 531
 532 =cut
 533
 534 sub revtranslate {
 535         my ($self, $value, $coding) = @_;
 536         my ($id) = $self->{'id'};
 537         my (@aas,  $p);
 538         my (@codons) = ();
 539
 540         if (length($value) == 3 ) {
 541                 $value = lc $value;
 542                 $value = ucfirst $value;
 543                 $value = $THREELETTERSYMBOLS{$value};
 544         }
 545         if ( defined $value and $value =~ /$VALID_PROTEIN/
 546                   and length($value) == 1 ) {
 547                 $value = uc $value;
 548                 @aas = @{$IUPAC_AA{$value}};
 549                 foreach my $aa (@aas) {
 550                         #print $aa, " -2\n";
 551                         $aa = '\*' if $aa eq '*';
 552               while ($TABLES[$id-1] =~ m/$aa/g) {
 553                       $p = pos $TABLES[$id-1];
 554                       push (@codons, $TRCOL->{--$p});
 555               }
 556             }
 557     }
 558
 559    if ($coding and uc ($coding) eq 'RNA') {
 560            for my $i (0..$#codons)  {
 561               $codons[$i] =~ tr/t/u/;
 562            }
 563    }
 564
 565    return @codons;
 566 }
 567
 568 =head2 reverse_translate_all
 569
 570  Title   : reverse_translate_all
 571  Usage   : my $iup_str = $cttable->reverse_translate_all($seq_object)
 572            my $iup_str = $cttable->reverse_translate_all($seq_object,
 573                                                          $cutable,
 574                                                          15);
 575  Function: reverse translates a protein sequence into IUPAC nucleotide
 576            sequence. An 'X' in the protein sequence is converted to 'NNN'
 577            in the nucleotide sequence.
 578  Returns : a string
 579  Args    : a Bio::PrimarySeqI compatible object (mandatory)
 580            a Bio::CodonUsage::Table object and a threshold if only
 581              codons with a relative frequency above the threshold are
 582              to be considered.
 583 =cut
 584
 585 sub reverse_translate_all {
 586
 587         my ($self, $obj, $cut, $threshold) = @_;
 588
 589         ## check args are OK
 590
 591         if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
 592                 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
 593                                                 ref($obj) . "]");
 594                 }
 595         if($obj->alphabet ne 'protein') {
 596                 $self->throw("Cannot reverse translate, need an amino acid sequence .".
 597                      "This sequence is of type [" . $obj->alphabet ."]");
 598                 }
 599         my @data;
 600         my @seq = split '', $obj->seq;
 601
 602         ## if we're not supplying a codon usage table...
 603         if( !$cut && !$threshold) {
 604                 ## get lists of possible codons for each aa.
 605                 for my $aa (@seq) {
 606                         if ($aa =~ /x/i) {
 607                                 push @data, (['NNN']);
 608                         }else {
 609                                 my @cods = $self->revtranslate($aa);
 610                                 push @data, \@cods;
 611                         }
 612                 }
 613         }else{
 614         #else we are supplying a codon usage table, we just want common codons
 615         #check args first.
 616                 if(!$cut->isa('Bio::CodonUsage::Table'))        {
 617                         $self->throw("I need a Bio::CodonUsage::Table object, not a [".
 618                      ref($cut). "].");
 619                         }
 620                 my $cod_ref = $cut->probable_codons($threshold);
 621                 for my $aa (@seq) {
 622                         if ($aa =~ /x/i) {
 623                                 push @data, (['NNN']);
 624                                 next;
 625                                 }
 626                         push @data, $cod_ref->{$aa};
 627                 }
 628         }
 629
 630         return $self->_make_iupac_string(\@data);
 631
 632 }
 633
 634 =head2 reverse_translate_best
 635
 636  Title   : reverse_translate_best
 637  Usage   : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
 638  Function: Reverse translates a protein sequence into plain nucleotide
 639            sequence (GATC), uses the most common codon for each amino acid
 640  Returns : A string
 641  Args    : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
 642
 643 =cut
 644
 645 sub reverse_translate_best {
 646
 647         my ($self, $obj, $cut) = @_;
 648
 649         if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
 650                 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
 651                                                  ref($obj) . "]");
 652         }
 653         if ($obj->alphabet ne 'protein')        {
 654                 $self->throw("Cannot reverse translate, need an amino acid sequence .".
 655                                                  "This sequence is of type [" . $obj->alphabet ."]");
 656         }
 657         if ( !$cut | !$cut->isa('Bio::CodonUsage::Table'))      {
 658                 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
 659                                                  ref($cut). "].");
 660         }
 661
 662         my $str = '';
 663         my @seq = split '', $obj->seq;
 664
 665         my $cod_ref = $cut->most_common_codons();
 666
 667         for my $aa ( @seq ) {
 668                 if ($aa =~ /x/i) {
 669                         $str .= 'NNN';
 670                         next;
 671                 }
 672                 if ( defined $cod_ref->{$aa} ) {
 673                         $str .= $cod_ref->{$aa};
 674                 } else {
 675                         $self->throw("Input sequence contains invalid character: $aa");
 676                 }
 677         }
 678    $str;
 679 }
 680
 681 =head2 is_start_codon
 682
 683  Title   : is_start_codon
 684  Usage   : $obj->is_start_codon('ATG')
 685  Function: returns true (1) for all codons that can be used as a
 686            translation start, false (0) for others.
 687  Example : $myCodonTable->is_start_codon('ATG')
 688  Returns : boolean
 689  Args    : codon
 690
 691 =cut
 692
 693 sub is_start_codon{
 694    my ($self, $value) = @_;
 695    my ($id) = $self->{'id'};
 696
 697    $value  = lc $value;
 698    $value  =~ tr/u/t/;
 699
 700    if (length $value != 3  )  {
 701        return 0;
 702    }
 703    else {
 704        my $result = 1;
 705        my @ms = map { substr($STARTS[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
 706        foreach my $c (@ms) {
 707            $result = 0 if $c ne 'M';
 708        }
 709        return $result;
 710    }
 711 }
 712
 713
 714
 715 =head2 is_ter_codon
 716
 717  Title   : is_ter_codon
 718  Usage   : $obj->is_ter_codon('GAA')
 719  Function: returns true (1) for all codons that can be used as a
 720            translation tarminator, false (0) for others.
 721  Example : $myCodonTable->is_ter_codon('ATG')
 722  Returns : boolean
 723  Args    : codon
 724
 725 =cut
 726
 727 sub is_ter_codon{
 728    my ($self, $value) = @_;
 729    my ($id) = $self->{'id'};
 730
 731    $value  = lc $value;
 732    $value  =~ tr/u/t/;
 733
 734    if (length $value != 3  )  {
 735        return 0;
 736    }
 737    else {
 738        my $result = 1;
 739        my @ms = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
 740        foreach my $c (@ms) {
 741            $result = 0 if $c ne $TERMINATOR;
 742        }
 743        return $result;
 744    }
 745 }
 746
 747 =head2 is_unknown_codon
 748
 749  Title   : is_unknown_codon
 750  Usage   : $obj->is_unknown_codon('GAJ')
 751  Function: returns false (0) for all codons that are valid,
 752             true (1) for others.
 753  Example : $myCodonTable->is_unknown_codon('NTG')
 754  Returns : boolean
 755  Args    : codon
 756
 757
 758 =cut
 759
 760 sub is_unknown_codon{
 761    my ($self, $value) = @_;
 762    my ($id) = $self->{'id'};
 763
 764    $value  = lc $value;
 765    $value  =~ tr/u/t/;
 766
 767    if (length $value != 3  )  {
 768        return 1;
 769    }
 770    else {
 771        my $result = 0;
 772        my @cs = map { substr($TABLES[$id-1],$CODONS->{$_},1) } _unambiquous_codons($value);
 773        $result = 1 if scalar @cs == 0;
 774        return $result;
 775    }
 776 }
 777
 778 =head2 _unambiquous_codons
 779
 780  Title   : _unambiquous_codons
 781  Usage   : @codons = _unambiquous_codons('ACN')
 782  Function:
 783  Example :
 784  Returns : array of strings (one letter unambiguous amino acid codes)
 785  Args    : a codon = a three IUPAC nucleotide character string
 786
 787 =cut
 788
 789 sub _unambiquous_codons{
 790     my ($value) = @_;
 791     my @nts = ();
 792     my @codons = ();
 793     my ($i, $j, $k);
 794     @nts = map { $IUPAC_DNA{uc $_} }  split(//, $value);
 795     for my $i (@{$nts[0]}) {
 796         for my $j (@{$nts[1]}) {
 797             for my $k (@{$nts[2]}) {
 798                 push @codons, lc "$i$j$k";
 799             }
 800         }
 801     }
 802     return @codons;
 803 }
 804
 805 =head2 add_table
 806
 807  Title   : add_table
 808  Usage   : $newid = $ct->add_table($name, $table, $starts)
 809  Function: Add a custom Codon Table into the object.
 810            Know what you are doing, only the length of
 811            the argument strings is checked!
 812  Returns : the id of the new codon table
 813  Args    : name, a string, optional (can be empty)
 814            table, a string of 64 characters
 815            startcodons, a string of 64 characters, defaults to standard
 816
 817 =cut
 818
 819 sub add_table {
 820     my ($self, $name, $table, $starts) = @_;
 821
 822     $name ||= 'Custom'. scalar @NAMES + 1;
 823     $starts ||= $STARTS[0];
 824     $self->throw('Suspect input!')
 825         unless length($table) == 64 and length($starts) == 64;
 826
 827     push @NAMES, $name;
 828     push @TABLES, $table;
 829     push @STARTS, $starts;
 830
 831     return scalar @NAMES;
 832
 833 }
 834
 835 sub _make_iupac_string {
 836
 837         my ($self, $cod_ref) = @_;
 838         if(ref($cod_ref) ne 'ARRAY') {
 839                 $self->throw(" I need a reference to a list of references to codons, ".
 840                                          " not a [". ref($cod_ref) . "].");
 841                 }
 842     my %iupac_hash   = Bio::Tools::IUPAC->iupac_rev_iub();
 843         my $iupac_string = ''; ## the string to be returned
 844         for my $aa (@$cod_ref) {
 845
 846                 ## scan through codon positions, record the differing values,
 847                 # then look up in the iub hash
 848                 for my $index(0..2) {
 849                         my %h;
 850                         map { my $k = substr($_,$index,1);
 851                                 $h{$k}  = undef;} @$aa;
 852                         my $lookup_key = join '', sort{$a cmp $b}keys %h;
 853
 854             ## extend string
 855                         $iupac_string .= $iupac_hash{uc$lookup_key};
 856                 }
 857         }
 858     return $iupac_string;
 859
 860 }
 861
 862
 863 1;