lib/Bio/Tools/CodonTable.pm

   1 #
   2 # bioperl module for Bio::Tools::CodonTable
   3 #
   4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
   5 #
   6 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
   7 #
   8 # Copyright Heikki Lehvaslaiho
   9 #
  10 # You may distribute this module under the same terms as perl itself
  11
  12 # POD documentation - main docs before the code
  13
  14 =head1 NAME
  15
  16 Bio::Tools::CodonTable - Codon table object
  17
  18 =head1 SYNOPSIS
  19
  20   # This is a read-only class for all known codon tables.  The IDs are
  21   # the ones used by nucleotide sequence databases.  All common IUPAC
  22   # ambiguity codes for DNA, RNA and amino acids are recognized.
  23
  24   use Bio::Tools::CodonTable;
  25
  26   # defaults to ID 1 "Standard"
  27   $myCodonTable   = Bio::Tools::CodonTable->new();
  28   $myCodonTable2  = Bio::Tools::CodonTable->new( -id => 3 );
  29
  30   # change codon table
  31   $myCodonTable->id(5);
  32
  33   # examine codon table
  34   print  join (' ', "The name of the codon table no.", $myCodonTable->id(4),
  35            "is:", $myCodonTable->name(), "\n");
  36
  37   # print possible codon tables
  38   $tables = Bio::Tools::CodonTable->tables;
  39   while ( ($id,$name) = each %{$tables} ) {
  40     print "$id = $name\n";
  41   }
  42
  43   # translate a codon
  44   $aa = $myCodonTable->translate('ACU');
  45   $aa = $myCodonTable->translate('act');
  46   $aa = $myCodonTable->translate('ytr');
  47
  48   # reverse translate an amino acid
  49   @codons = $myCodonTable->revtranslate('A');
  50   @codons = $myCodonTable->revtranslate('Ser');
  51   @codons = $myCodonTable->revtranslate('Glx');
  52   @codons = $myCodonTable->revtranslate('cYS', 'rna');
  53
  54   # reverse translate an entire amino acid sequence into a IUPAC
  55   # nucleotide string
  56
  57   my $seqobj    = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
  58   my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
  59
  60   # boolean tests
  61   print "Is a start\n"       if $myCodonTable->is_start_codon('ATG');
  62   print "Is a terminator\n"  if $myCodonTable->is_ter_codon('tar');
  63   print "Is a unknown\n"     if $myCodonTable->is_unknown_codon('JTG');
  64
  65 =head1 DESCRIPTION
  66
  67 Codon tables are also called translation tables or genetic codes
  68 since that is what they represent. A bit more complete picture
  69 of the full complexity of codon usage in various taxonomic groups
  70 is presented at the NCBI Genetic Codes Home page.
  71
  72 CodonTable is a BioPerl class that knows all current translation
  73 tables that are used by primary nucleotide sequence databases
  74 (GenBank, EMBL and DDBJ). It provides methods to output information
  75 about tables and relationships between codons and amino acids.
  76
  77 This class and its methods recognized all common IUPAC ambiguity codes
  78 for DNA, RNA and animo acids. The translation method follows the
  79 conventions in EMBL and TREMBL databases.
  80
  81 It is a nuisance to separate RNA and cDNA representations of nucleic
  82 acid transcripts. The CodonTable object accepts codons of both type as
  83 input and allows the user to set the mode for output when reverse
  84 translating. Its default for output is DNA.
  85
  86 Note:
  87
  88 This class deals primarily with individual codons and amino
  89 acids. However in the interest of speed you can L<translate>
  90 longer sequence, too. The full complexity of protein translation
  91 is tackled by L<Bio::PrimarySeqI::translate>.
  92
  93
  94 The amino acid codes are IUPAC recommendations for common amino acids:
  95
  96           A           Ala            Alanine
  97           R           Arg            Arginine
  98           N           Asn            Asparagine
  99           D           Asp            Aspartic acid
 100           C           Cys            Cysteine
 101           Q           Gln            Glutamine
 102           E           Glu            Glutamic acid
 103           G           Gly            Glycine
 104           H           His            Histidine
 105           I           Ile            Isoleucine
 106           L           Leu            Leucine
 107           K           Lys            Lysine
 108           M           Met            Methionine
 109           F           Phe            Phenylalanine
 110           P           Pro            Proline
 111           O           Pyl            Pyrrolysine (22nd amino acid)
 112           U           Sec            Selenocysteine (21st amino acid)
 113           S           Ser            Serine
 114           T           Thr            Threonine
 115           W           Trp            Tryptophan
 116           Y           Tyr            Tyrosine
 117           V           Val            Valine
 118           B           Asx            Aspartic acid or Asparagine
 119           Z           Glx            Glutamine or Glutamic acid
 120           J           Xle            Isoleucine or Valine (mass spec ambiguity)
 121           X           Xaa            Any or unknown amino acid
 122
 123
 124 It is worth noting that, "Bacterial" codon table no. 11 produces an
 125 polypeptide that is, confusingly, identical to the standard one. The
 126 only differences are in available initiator codons.
 127
 128
 129 NCBI Genetic Codes home page:
 130      (Last update of the Genetic Codes: Apr. 25, 2024)
 131      https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
 132
 133 The "value notation" / "print form" ASN.1 version is at:
 134      ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
 135
 136 Thanks to Matteo diTomasso for the original Perl implementation
 137 of these tables.
 138
 139 =head1 FEEDBACK
 140
 141 =head2 Mailing Lists
 142
 143 User feedback is an integral part of the evolution of this and other
 144 Bioperl modules. Send your comments and suggestions preferably to the
 145 Bioperl mailing lists  Your participation is much appreciated.
 146
 147   bioperl-l@bioperl.org                  - General discussion
 148   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
 149
 150 =head2 Support
 151
 152 Please direct usage questions or support issues to the mailing list:
 153
 154 I<bioperl-l@bioperl.org>
 155
 156 rather than to the module maintainer directly. Many experienced and
 157 reponsive experts will be able look at the problem and quickly
 158 address it. Please include a thorough description of the problem
 159 with code and data examples if at all possible.
 160
 161 =head2 Reporting Bugs
 162
 163 Report bugs to the Bioperl bug tracking system to help us keep track
 164 the bugs and their resolution.  Bug reports can be submitted via the
 165 web:
 166
 167   https://github.com/bioperl/bioperl-live/issues
 168
 169 =head1 AUTHOR - Heikki Lehvaslaiho
 170
 171 Email:  heikki-at-bioperl-dot-org
 172
 173 =head1 APPENDIX
 174
 175 The rest of the documentation details each of the object
 176 methods. Internal methods are usually preceded with a _
 177
 178 =cut
 179
 180 # Let the code begin...
 181
 182 package Bio::Tools::CodonTable;
 183
 184 use strict;
 185
 186 # Object preamble - inherits from Bio::Root::Root
 187 use Bio::Tools::IUPAC;
 188 use Bio::SeqUtils;
 189
 190 use base qw(Bio::Root::Root);
 191 our (@NAMES, @TABLES, @STARTS, $TRCOL, $CODONS, %IUPAC_DNA, $CODONGAP, $GAP,
 192      %IUPAC_AA, %THREELETTERSYMBOLS, $VALID_PROTEIN, $TERMINATOR);
 193
 194
 195 # set internal values for all translation tables
 196 {
 197     use constant CODONSIZE => 3;
 198     $GAP = '-';
 199     $CODONGAP = $GAP x CODONSIZE;
 200
 201     # Helper private function to parse the
 202     # ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt file which is
 203     # below __DATA__ in this module (see the end of the file).  This
 204     # fills the @NAMES, @TABLES, and @STARTS variables.  To update to
 205     # a new release of gc.prt, replace the content below __DATA__.
 206     sub parse_gc_prt {
 207
 208         # Init tables has with special option (id=0) for ATG-only start
 209         my %tables = (
 210             0 => {
 211                 name => "Strict",
 212                 ncbieaa => "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
 213                 sncbieaa => "----------**--*--------------------M----------------------------",
 214             },
 215             );
 216
 217         while (defined(my $line = <DATA>)) {
 218             next if $line =~ /^\s*--/;  # skip comment lines
 219             if ($line =~ /^\s*\{\s*$/) {  # start of a table description
 220                 my $name = "";
 221                 my $id = 0;
 222                 my $ncbieaa = "";
 223                 my $sncbieaa = "";
 224                 do {
 225                     if ($line =~ /^\s*(name|id|ncbieaa|sncbieaa)\s+(.+)/) {
 226                         my $key = $1;
 227                         my $rem = $2;
 228                         if ($key eq "id") {
 229                             $rem =~ /^(\d+)/;
 230                             $id = int $1;
 231                         } else {
 232                             # The remaining keys --- name, ncbieaa,
 233                             # and sncbieaa --- are strings which may
 234                             # be multi-line (e.g., name for table with
 235                             # id 4).  We are assuming that there is no
 236                             # " character inside the value so we keep
 237                             # appending lines until we find an end ".
 238                             while ($rem !~ /^"(.*)"/ && ! eof DATA) {
 239                                 $rem .= <DATA>;
 240                             }
 241                             $rem =~ s/\n//g;
 242                             $rem =~ /^"(.*)"/;
 243                             my $str = $1;
 244                             if ($key eq "name" && ! $name) {
 245                                 # ignore alternative names, e.g. SGC0,
 246                                 # only keep the first name listed.
 247                                 $name = $str;
 248                             } elsif ($key eq "ncbieaa") {
 249                                 $ncbieaa = $str;
 250                             } elsif ($key eq "sncbieaa") {
 251                                 $sncbieaa = $str;
 252                             }
 253                         }
 254                     }
 255                 } until (($line = <DATA>) =~ /^\s*}\s*,?$/);  # we reached the end of table description
 256                 $tables{$id} = {
 257                     name => $name,
 258                     ncbieaa => $ncbieaa,
 259                     sncbieaa => $sncbieaa
 260                 };
 261             }
 262         }
 263         close DATA;
 264         # use Data::Dumper;
 265         # print Dumper %tables;
 266
 267         my $highest_id = (sort {$a <=> $b} keys %tables)[-1];
 268         for (my $i = 0; $i < $highest_id; $i++) {
 269             if (defined $tables{$i}) {
 270                 push @NAMES, $tables{$i}->{name};
 271                 push @TABLES, $tables{$i}->{ncbieaa};
 272                 push @STARTS, $tables{$i}->{sncbieaa};
 273             } else {
 274                 push @NAMES, '';
 275                 push @TABLES, '';
 276                 push @STARTS, '';
 277             }
 278         }
 279     }
 280     parse_gc_prt();
 281     undef &parse_gc_prt;
 282
 283
 284     my @nucs = qw(t c a g);
 285     my $x = 0;
 286     ($CODONS, $TRCOL) = ({}, {});
 287     for my $i (@nucs) {
 288         for my $j (@nucs) {
 289             for my $k (@nucs) {
 290                 my $codon = "$i$j$k";
 291                 $CODONS->{$codon} = $x;
 292                 $TRCOL->{$x} = $codon;
 293                 $x++;
 294             }
 295         }
 296     }
 297     %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
 298     %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
 299     %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
 300     $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
 301     $TERMINATOR = '*';
 302 }
 303
 304 sub new {
 305     my($class,@args) = @_;
 306     my $self = $class->SUPER::new(@args);
 307
 308     my($id) =
 309         $self->_rearrange([qw(ID
 310                  )],
 311              @args);
 312
 313     $id = 1 if ( ! defined ( $id ) );
 314     $id  && $self->id($id);
 315     return $self; # success - we hope!
 316 }
 317
 318 =head2 id
 319
 320  Title   : id
 321  Usage   : $obj->id(3); $id_integer = $obj->id();
 322  Function: Sets or returns the id of the translation table.  IDs are
 323            integers from 0 (special ATG-only start) to 25, excluding
 324            7-8 and 17-20 which have been removed. If an invalid ID is
 325            given the method returns 1, the standard table.
 326  Example :
 327  Returns : value of id, a scalar, warn and fall back to 1 (standard table)
 328            if specified id is not valid
 329  Args    : newvalue (optional)
 330
 331 =cut
 332
 333 sub id{
 334     my ($self,$value) = @_;
 335     if( defined $value) {
 336         if (  not defined $TABLES[$value] or $TABLES[$value] eq '') {
 337             $self->warn("Not a valid codon table ID [$value], using [1] instead ");
 338             $value = 1;
 339         }
 340         $self->{'id'} = $value;
 341     }
 342     return $self->{'id'};
 343 }
 344
 345 =head2 name
 346
 347  Title   : name
 348  Usage   : $obj->name()
 349  Function: returns the descriptive name of the translation table
 350  Example :
 351  Returns : A string
 352  Args    : None
 353
 354
 355 =cut
 356
 357 sub name{
 358    my ($self) = @_;
 359
 360    my ($id) = $self->{'id'};
 361    return $NAMES[$id];
 362 }
 363
 364 =head2 tables
 365
 366  Title   : tables
 367  Usage   : $obj->tables()  or  Bio::Tools::CodonTable->tables()
 368  Function: returns a hash reference where each key is a valid codon
 369            table id() number, and each value is the corresponding
 370            codon table name() string
 371  Example :
 372  Returns : A hashref
 373  Args    : None
 374
 375
 376 =cut
 377
 378 sub tables{
 379   my %tables;
 380   for my $id (0 .. $#NAMES) {
 381     my $name = $NAMES[$id];
 382     $tables{$id} = $name if $name;
 383   }
 384   return \%tables;
 385 }
 386
 387 =head2 translate
 388
 389  Title   : translate
 390  Usage   : $obj->translate('YTR')
 391  Function: Returns a string of one letter amino acid codes from
 392            nucleotide sequence input. The imput can be of any length.
 393
 394            Returns 'X' for unknown codons and codons that code for
 395            more than one amino acid. Returns an empty string if input
 396            is not three characters long. Exceptions for these are:
 397
 398              - IUPAC amino acid code B for Aspartic Acid and
 399                Asparagine, is used.
 400              - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
 401                used.
 402              - if the codon is two nucleotides long and if by adding
 403                an a third character 'N', it codes for a single amino
 404                acid (with exceptions above), return that, otherwise
 405                return empty string.
 406
 407            Returns empty string for other input strings that are not
 408            three characters long.
 409
 410  Example :
 411  Returns : a string of one letter ambiguous IUPAC amino acid codes
 412  Args    : ambiguous IUPAC nucleotide string
 413
 414
 415 =cut
 416
 417 sub translate {
 418     my ($self, $seq, $complete_codon) = @_;
 419     $self->throw("Calling translate without a seq argument!") unless defined $seq;
 420     return '' unless $seq;
 421
 422     my $id = $self->id;
 423     my ($partial) = 0;
 424     $partial = 2 if length($seq) % CODONSIZE == 2;
 425
 426     $seq = lc $seq;
 427     $seq =~ tr/u/t/;
 428     my $protein = "";
 429     if ($seq =~ /[^actg]/ ) { #ambiguous chars
 430         for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
 431             my $triplet = substr($seq, $i, CODONSIZE);
 432             if( $triplet eq $CODONGAP ) {
 433                 $protein .= $GAP;
 434             } elsif (exists $CODONS->{$triplet}) {
 435                 $protein .= substr($TABLES[$id],
 436                            $CODONS->{$triplet},1);
 437             } else {
 438                 $protein .= $self->_translate_ambiguous_codon($triplet);
 439             }
 440         }
 441     } else { # simple, strict translation
 442         for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
 443             my $triplet = substr($seq, $i, CODONSIZE);
 444             if( $triplet eq $CODONGAP ) {
 445                 $protein .= $GAP;
 446             }
 447             if (exists $CODONS->{$triplet}) {
 448                 $protein .= substr($TABLES[$id], $CODONS->{$triplet}, 1);
 449             } else {
 450                 $protein .= 'X';
 451             }
 452         }
 453     }
 454     if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides
 455         my $triplet = substr($seq, ($partial -4)). "n";
 456         if( $triplet eq $CODONGAP ) {
 457             $protein .= $GAP;
 458         } elsif (exists $CODONS->{$triplet}) {
 459             my $aa = substr($TABLES[$id], $CODONS->{$triplet},1);
 460             $protein .= $aa;
 461         } else {
 462             $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
 463         }
 464     }
 465     return $protein;
 466 }
 467
 468 sub _translate_ambiguous_codon {
 469     my ($self, $triplet, $partial) = @_;
 470     $partial ||= 0;
 471     my $id = $self->id;
 472     my $aa;
 473     my @codons = $self->unambiguous_codons($triplet);
 474     my %aas =();
 475     foreach my $codon (@codons) {
 476         $aas{substr($TABLES[$id],$CODONS->{$codon},1)} = 1;
 477     }
 478     my $count = scalar keys %aas;
 479     if ( $count == 1 ) {
 480         $aa = (keys %aas)[0];
 481     }
 482     elsif ( $count == 2 ) {
 483         if ($aas{'D'} and $aas{'N'}) {
 484             $aa = 'B';
 485         }
 486         elsif ($aas{'E'} and $aas{'Q'}) {
 487             $aa = 'Z';
 488         } else {
 489             $partial ? ($aa = '') : ($aa = 'X');
 490         }
 491     } else {
 492         $partial ? ($aa = '') :  ($aa = 'X');
 493     }
 494     return $aa;
 495 }
 496
 497 =head2 translate_strict
 498
 499  Title   : translate_strict
 500  Usage   : $obj->translate_strict('ACT')
 501  Function: returns one letter amino acid code for a codon input
 502
 503            Fast and simple translation. User is responsible to resolve
 504            ambiguous nucleotide codes before calling this
 505            method. Returns 'X' for unknown codons and an empty string
 506            for input strings that are not three characters long.
 507
 508            It is not recommended to use this method in a production
 509            environment. Use method translate, instead.
 510
 511  Example :
 512  Returns : A string
 513  Args    : a codon = a three nucleotide character string
 514
 515
 516 =cut
 517
 518 sub translate_strict{
 519    my ($self, $value) = @_;
 520    my $id = $self->{'id'};
 521
 522    $value  = lc $value;
 523    $value  =~ tr/u/t/;
 524
 525    return '' unless length $value == 3;
 526
 527    return 'X' unless defined $CODONS->{$value};
 528
 529    return substr( $TABLES[$id], $CODONS->{$value}, 1 );
 530 }
 531
 532 =head2 revtranslate
 533
 534  Title   : revtranslate
 535  Usage   : $obj->revtranslate('G')
 536  Function: returns codons for an amino acid
 537
 538            Returns an empty string for unknown amino acid
 539            codes. Ambiguous IUPAC codes Asx,B, (Asp,D; Asn,N) and
 540            Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
 541            letter amino acid codes are accepted. '*' and 'Ter' are
 542            used for terminator.
 543
 544            By default, the output codons are shown in DNA.  If the
 545            output is needed in RNA (tr/t/u/), add a second argument
 546            'RNA'.
 547
 548  Example : $obj->revtranslate('Gly', 'RNA')
 549  Returns : An array of three lower case letter strings i.e. codons
 550  Args    : amino acid, 'RNA'
 551
 552 =cut
 553
 554 sub revtranslate {
 555     my ($self, $value, $coding) = @_;
 556     my @codons;
 557
 558     if (length($value) == 3 ) {
 559         $value = lc $value;
 560         $value = ucfirst $value;
 561         $value = $THREELETTERSYMBOLS{$value};
 562     }
 563     if (    defined $value and $value =~ /$VALID_PROTEIN/
 564         and length($value) == 1
 565         ) {
 566         my $id = $self->{'id'};
 567
 568         $value = uc $value;
 569         my @aas = @{$IUPAC_AA{$value}};
 570         foreach my $aa (@aas) {
 571             #print $aa, " -2\n";
 572             $aa = '\*' if $aa eq '*';
 573             while ($TABLES[$id] =~ m/$aa/g) {
 574                 my $p = pos $TABLES[$id];
 575                 push (@codons, $TRCOL->{--$p});
 576             }
 577         }
 578     }
 579
 580     if ($coding and uc ($coding) eq 'RNA') {
 581         for my $i (0..$#codons)  {
 582             $codons[$i] =~ tr/t/u/;
 583         }
 584     }
 585
 586    return @codons;
 587 }
 588
 589 =head2 reverse_translate_all
 590
 591  Title   : reverse_translate_all
 592  Usage   : my $iup_str = $cttable->reverse_translate_all($seq_object)
 593            my $iup_str = $cttable->reverse_translate_all($seq_object,
 594                                                          $cutable,
 595                                                          15);
 596  Function: reverse translates a protein sequence into IUPAC nucleotide
 597            sequence. An 'X' in the protein sequence is converted to 'NNN'
 598            in the nucleotide sequence.
 599  Returns : a string
 600  Args    : a Bio::PrimarySeqI compatible object (mandatory)
 601            a Bio::CodonUsage::Table object and a threshold if only
 602              codons with a relative frequency above the threshold are
 603              to be considered.
 604 =cut
 605
 606 sub reverse_translate_all {
 607     my ($self, $obj, $cut, $threshold) = @_;
 608
 609     ## check args are OK
 610
 611     if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
 612         $self->throw(" I need a Bio::PrimarySeqI object, not a [".
 613                         ref($obj) . "]");
 614         }
 615     if($obj->alphabet ne 'protein') {
 616         $self->throw("Cannot reverse translate, need an amino acid sequence .".
 617                      "This sequence is of type [" . $obj->alphabet ."]");
 618         }
 619     my @data;
 620     my @seq = split '', $obj->seq;
 621
 622     ## if we're not supplying a codon usage table...
 623     if( !$cut && !$threshold) {
 624         ## get lists of possible codons for each aa.
 625         for my $aa (@seq) {
 626             if ($aa =~ /x/i) {
 627                 push @data, (['NNN']);
 628             }else {
 629                 my @cods = $self->revtranslate($aa);
 630                 push @data, \@cods;
 631             }
 632         }
 633     }else{
 634     #else we are supplying a codon usage table, we just want common codons
 635     #check args first.
 636         if(!$cut->isa('Bio::CodonUsage::Table'))    {
 637             $self->throw("I need a Bio::CodonUsage::Table object, not a [".
 638                      ref($cut). "].");
 639             }
 640         my $cod_ref = $cut->probable_codons($threshold);
 641         for my $aa (@seq) {
 642             if ($aa =~ /x/i) {
 643                 push @data, (['NNN']);
 644                 next;
 645                 }
 646             push @data, $cod_ref->{$aa};
 647         }
 648     }
 649
 650     return $self->_make_iupac_string(\@data);
 651 }
 652
 653 =head2 reverse_translate_best
 654
 655  Title   : reverse_translate_best
 656  Usage   : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
 657  Function: Reverse translates a protein sequence into plain nucleotide
 658            sequence (GATC), uses the most common codon for each amino acid
 659  Returns : A string
 660  Args    : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
 661
 662 =cut
 663
 664 sub reverse_translate_best {
 665
 666     my ($self, $obj, $cut) = @_;
 667
 668     if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
 669         $self->throw(" I need a Bio::PrimarySeqI object, not a [".
 670                          ref($obj) . "]");
 671     }
 672     if ($obj->alphabet ne 'protein')    {
 673         $self->throw("Cannot reverse translate, need an amino acid sequence .".
 674                          "This sequence is of type [" . $obj->alphabet ."]");
 675     }
 676     if ( !$cut | !$cut->isa('Bio::CodonUsage::Table'))  {
 677         $self->throw("I need a Bio::CodonUsage::Table object, not a [".
 678                          ref($cut). "].");
 679     }
 680
 681     my $str = '';
 682     my @seq = split '', $obj->seq;
 683
 684     my $cod_ref = $cut->most_common_codons();
 685
 686     for my $aa ( @seq ) {
 687         if ($aa =~ /x/i) {
 688             $str .= 'NNN';
 689             next;
 690         }
 691         if ( defined $cod_ref->{$aa} ) {
 692             $str .= $cod_ref->{$aa};
 693         } else {
 694             $self->throw("Input sequence contains invalid character: $aa");
 695         }
 696     }
 697    return $str;
 698 }
 699
 700 =head2 is_start_codon
 701
 702  Title   : is_start_codon
 703  Usage   : $obj->is_start_codon('ATG')
 704  Function: returns true (1) for all codons that can be used as a
 705            translation start, false (0) for others.
 706  Example : $myCodonTable->is_start_codon('ATG')
 707  Returns : boolean
 708  Args    : codon
 709
 710 =cut
 711
 712 sub is_start_codon{
 713    shift->_codon_is( shift, \@STARTS, 'M' );
 714 }
 715
 716 =head2 is_ter_codon
 717
 718  Title   : is_ter_codon
 719  Usage   : $obj->is_ter_codon('GAA')
 720  Function: returns true (1) for all codons that can be used as a
 721            translation tarminator, false (0) for others.
 722  Example : $myCodonTable->is_ter_codon('ATG')
 723  Returns : boolean
 724  Args    : codon
 725
 726 =cut
 727
 728 sub is_ter_codon{
 729    my ($self, $value) = @_;
 730    my $id = $self->{'id'};
 731
 732    # We need to ensure U is mapped to T (ie. UAG)
 733    $value = uc $value;
 734    $value =~ tr/U/T/;
 735
 736    if (length $value != 3  )  {
 737        # Incomplete codons are not stop codons
 738        return 0;
 739    } else {
 740        my $result = 0;
 741
 742        # For all the possible codons, if any are not a stop
 743        # codon, fail immediately
 744        for my $c ( $self->unambiguous_codons($value) ) {
 745            my $m = substr( $TABLES[$id], $CODONS->{$c}, 1 );
 746            if($m eq $TERMINATOR) {
 747                $result = 1;
 748            } else {
 749                return 0;
 750            }
 751        }
 752        return $result;
 753    }
 754 }
 755
 756 # desc: compares the passed value with a single entry in the given
 757 #       codon table
 758 # args: a value (typically a three-char string like 'atg'),
 759 #       a reference to the appropriate set of codon tables,
 760 #       a single-character value to check for at the position in the
 761 #       given codon table
 762 # ret:  boolean, true if the given codon table contains the $key at the
 763 #       position corresponding to $value
 764 sub _codon_is {
 765    my ($self, $value, $table, $key ) = @_;
 766
 767    return 0 unless length $value == 3;
 768
 769    $value  = lc $value;
 770    $value  =~ tr/u/t/;
 771
 772    my $id = $self->{'id'};
 773    for my $c ( $self->unambiguous_codons($value) ) {
 774        my $m = substr( $table->[$id], $CODONS->{$c}, 1 );
 775        if ($m eq $key) { return 1; }
 776    }
 777    return 0;
 778 }
 779
 780 =head2 is_unknown_codon
 781
 782  Title   : is_unknown_codon
 783  Usage   : $obj->is_unknown_codon('GAJ')
 784  Function: returns false (0) for all codons that are valid,
 785         true (1) for others.
 786  Example : $myCodonTable->is_unknown_codon('NTG')
 787  Returns : boolean
 788  Args    : codon
 789
 790
 791 =cut
 792
 793 sub is_unknown_codon{
 794    my ($self, $value) = @_;
 795    $value  = lc $value;
 796    $value  =~ tr/u/t/;
 797    return 1 unless $self->unambiguous_codons($value);
 798    return 0;
 799 }
 800
 801 =head2 unambiguous_codons
 802
 803  Title   : unambiguous_codons
 804  Usage   : @codons = $self->unambiguous_codons('ACN')
 805  Returns : array of strings (one-letter unambiguous amino acid codes)
 806  Args    : a codon = a three IUPAC nucleotide character string
 807
 808 =cut
 809
 810 sub unambiguous_codons{
 811     my ($self,$value) = @_;
 812     my @nts = map { $IUPAC_DNA{uc $_} }  split(//, $value);
 813
 814     my @codons;
 815     for my $i ( @{$nts[0]} ) {
 816     for my $j ( @{$nts[1]} ) {
 817     for my $k ( @{$nts[2]} ) {
 818         push @codons, lc "$i$j$k";
 819     }}}
 820     return @codons;
 821 }
 822
 823 =head2 _unambiquous_codons
 824
 825 deprecated, now an alias for unambiguous_codons
 826
 827 =cut
 828
 829 sub _unambiquous_codons {
 830     unambiguous_codons( undef, @_ );
 831 }
 832
 833 =head2 add_table
 834
 835  Title   : add_table
 836  Usage   : $newid = $ct->add_table($name, $table, $starts)
 837  Function: Add a custom Codon Table into the object.
 838            Know what you are doing, only the length of
 839            the argument strings is checked!
 840  Returns : the id of the new codon table
 841  Args    : name, a string, optional (can be empty)
 842            table, a string of 64 characters
 843            startcodons, a string of 64 characters, defaults to standard
 844
 845 =cut
 846
 847 sub add_table {
 848     my ($self, $name, $table, $starts) = @_;
 849
 850     $name   ||= 'Custom' . $#NAMES + 1;
 851     $starts ||= $STARTS[1];
 852     $self->throw('Suspect input!')
 853         unless length($table) == 64 and length($starts) == 64;
 854
 855     push @NAMES,  $name;
 856     push @TABLES, $table;
 857     push @STARTS, $starts;
 858
 859     return $#NAMES;
 860 }
 861
 862 sub _make_iupac_string {
 863     my ($self, $cod_ref) = @_;
 864     if(ref($cod_ref) ne 'ARRAY') {
 865         $self->throw(" I need a reference to a list of references to codons, ".
 866                      " not a [". ref($cod_ref) . "].");
 867         }
 868     my %iupac_hash   = Bio::Tools::IUPAC->iupac_rev_iub();
 869     my $iupac_string = ''; ## the string to be returned
 870     for my $aa (@$cod_ref) {
 871
 872         ## scan through codon positions, record the differing values,
 873         # then look up in the iub hash
 874         for my $index(0..2) {
 875             my %h;
 876             map { my $k = substr($_,$index,1);
 877                 $h{$k}  = undef;} @$aa;
 878             my $lookup_key = join '', sort{$a cmp $b}keys %h;
 879
 880             ## extend string
 881             $iupac_string .= $iupac_hash{uc$lookup_key};
 882         }
 883     }
 884     return $iupac_string;
 885 }
 886
 887
 888 1;
 889
 890 # Follows the content of
 891 # ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt, which is the NCBI
 892 # genetic codon table in ASN.1 value notation / print format.  We do
 893 # not have a ASN.1 decoder for value notation but it's easy enough to
 894 # parse.
 895
 896 __DATA__
 897 --**************************************************************************
 898 --  This is the NCBI genetic code table
 899 --  Initial base data set from Andrzej Elzanowski while at PIR International
 900 --  Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI
 901 --  Base 1-3 of each codon have been added as comments to facilitate
 902 --    readability at the suggestion of Peter Rice, EMBL
 903 --  Later additions by Taxonomy Group staff at NCBI
 904 --
 905 --  Version 4.6
 906 --     Renamed genetic code 24 to Rhabdopleuridae Mitochondrial
 907 --
 908 --  Version 4.5
 909 --     Added Cephalodiscidae mitochondrial genetic code 33
 910 --
 911 --  Version 4.4
 912 --     Added GTG as start codon for genetic code 3
 913 --     Added Balanophoraceae plastid genetic code 32
 914 --
 915 --  Version 4.3
 916 --     Change to CTG -> Leu in genetic codes 27, 28, 29, 30
 917 --
 918 --  Version 4.2
 919 --     Added Karyorelict nuclear genetic code 27
 920 --     Added Condylostoma nuclear genetic code 28
 921 --     Added Mesodinium nuclear genetic code 29
 922 --     Added Peritrich nuclear genetic code 30
 923 --     Added Blastocrithidia nuclear genetic code 31
 924 --
 925 --  Version 4.1
 926 --     Added Pachysolen tannophilus nuclear genetic code 26
 927 --
 928 --  Version 4.0
 929 --     Updated version to reflect numerous undocumented changes:
 930 --     Corrected start codons for genetic code 25
 931 --     Name of new genetic code is Candidate Division SR1 and Gracilibacteria
 932 --     Added candidate division SR1 nuclear genetic code 25
 933 --     Added GTG as start codon for genetic code 24
 934 --     Corrected Pterobranchia Mitochondrial genetic code (24)
 935 --     Added genetic code 24, Pterobranchia Mitochondrial
 936 --     Genetic code 11 is now Bacterial, Archaeal and Plant Plastid
 937 --     Fixed capitalization of mitochondrial in codes 22 and 23
 938 --     Added GTG, ATA, and TTG as alternative start codons to code 13
 939 --
 940 --  Version 3.9
 941 --     Code 14 differs from code 9 only by translating UAA to Tyr rather than
 942 --     STOP.  A recent study (Telford et al, 2000) has found no evidence that
 943 --     the codon UAA codes for Tyr in the flatworms, but other opinions exist.
 944 --     There are very few GenBank records that are translated with code 14,
 945 --     but a test translation shows that retranslating these records with code
 946 --     9 can cause premature terminations.  Therefore, GenBank will maintain
 947 --     code 14 until further information becomes available.
 948 --
 949 --  Version 3.8
 950 --     Added GTG start to Echinoderm mitochondrial code, code 9
 951 --
 952 --  Version 3.7
 953 --     Added code 23 Thraustochytrium mitochondrial code
 954 --        formerly OGMP code 93
 955 --        submitted by Gertraude Berger, Ph.D.
 956 --
 957 --  Version 3.6
 958 --     Added code 22 TAG-Leu, TCA-stop
 959 --        found in mitochondrial DNA of Scenedesmus obliquus
 960 --        submitted by Gertraude Berger, Ph.D.
 961 --        Organelle Genome Megasequencing Program, Univ Montreal
 962 --
 963 --  Version 3.5
 964 --     Added code 21, Trematode Mitochondrial
 965 --       (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)
 966 --     Added code 16, Chlorophycean Mitochondrial
 967 --       (TAG can translated to Leucine instaed to STOP in chlorophyceans
 968 --        and fungi)
 969 --
 970 --  Version 3.4
 971 --     Added CTG,TTG as allowed alternate start codons in Standard code.
 972 --        Prats et al. 1989, Hann et al. 1992
 973 --
 974 --  Version 3.3 - 10/13/95
 975 --     Added alternate intiation codon ATC to code 5
 976 --        based on complete mitochondrial genome of honeybee
 977 --        Crozier and Crozier (1993)
 978 --
 979 --  Version 3.2 - 6/24/95
 980 --  Code       Comments
 981 --   10        Alternative Ciliate Macronuclear renamed to Euplotid Macro...
 982 --   15        Blepharisma Macro.. code added
 983 --    5        Invertebrate Mito.. GTG allowed as alternate initiator
 984 --   11        Eubacterial renamed to Bacterial as most alternate starts
 985 --               have been found in Archea
 986 --
 987 --
 988 --  Version 3.1 - 1995
 989 --  Updated as per Andrzej Elzanowski at NCBI
 990 --     Complete documentation in NCBI toolkit documentation
 991 --  Note: 2 genetic codes have been deleted
 992 --
 993 --   Old id   Use id     - Notes
 994 --
 995 --   id 7      id 4      - Kinetoplast code now merged in code id 4
 996 --   id 8      id 1      - all plant chloroplast differences due to RNA edit
 997 --
 998 --
 999 --*************************************************************************
1000
1001 Genetic-code-table ::= {
1002  {
1003   name "Standard" ,
1004   name "SGC0" ,
1005   id 1 ,
1006   ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1007   sncbieaa "---M------**--*----M---------------M----------------------------"
1008   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1009   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1010   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1011  },
1012  {
1013   name "Vertebrate Mitochondrial" ,
1014   name "SGC1" ,
1015   id 2 ,
1016   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
1017   sncbieaa "----------**--------------------MMMM----------**---M------------"
1018   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1019   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1020   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1021  },
1022  {
1023   name "Yeast Mitochondrial" ,
1024   name "SGC2" ,
1025   id 3 ,
1026   ncbieaa  "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1027   sncbieaa "----------**----------------------MM---------------M------------"
1028   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1029   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1030   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1031  },
1032  {
1033     name "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate
1034  Mitochondrial; Mycoplasma; Spiroplasma" ,
1035   name "SGC3" ,
1036   id 4 ,
1037   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1038   sncbieaa "--MM------**-------M------------MMMM---------------M------------"
1039   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1040   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1041   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1042  },
1043  {
1044   name "Invertebrate Mitochondrial" ,
1045   name "SGC4" ,
1046   id 5 ,
1047   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
1048   sncbieaa "---M------**--------------------MMMM---------------M------------"
1049   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1050   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1051   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1052  },
1053  {
1054   name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
1055   name "SGC5" ,
1056   id 6 ,
1057   ncbieaa  "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1058   sncbieaa "--------------*--------------------M----------------------------"
1059   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1060   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1061   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1062  },
1063  {
1064   name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
1065   name "SGC8" ,
1066   id 9 ,
1067   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
1068   sncbieaa "----------**-----------------------M---------------M------------"
1069   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1070   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1071   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1072  },
1073  {
1074   name "Euplotid Nuclear" ,
1075   name "SGC9" ,
1076   id 10 ,
1077   ncbieaa  "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1078   sncbieaa "----------**-----------------------M----------------------------"
1079   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1080   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1081   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1082  },
1083  {
1084   name "Bacterial, Archaeal and Plant Plastid" ,
1085   id 11 ,
1086   ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1087   sncbieaa "---M------**--*----M------------MMMM---------------M------------"
1088   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1089   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1090   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1091  },
1092  {
1093   name "Alternative Yeast Nuclear" ,
1094   id 12 ,
1095   ncbieaa  "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1096   sncbieaa "----------**--*----M---------------M----------------------------"
1097   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1098   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1099   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1100  },
1101  {
1102   name "Ascidian Mitochondrial" ,
1103   id 13 ,
1104   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
1105   sncbieaa "---M------**----------------------MM---------------M------------"
1106   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1107   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1108   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1109  },
1110  {
1111   name "Alternative Flatworm Mitochondrial" ,
1112   id 14 ,
1113   ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
1114   sncbieaa "-----------*-----------------------M----------------------------"
1115   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1116   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1117   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1118  } ,
1119  {
1120   name "Blepharisma Macronuclear" ,
1121   id 15 ,
1122   ncbieaa  "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1123   sncbieaa "----------*---*--------------------M----------------------------"
1124   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1125   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1126   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1127  } ,
1128  {
1129   name "Chlorophycean Mitochondrial" ,
1130   id 16 ,
1131   ncbieaa  "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1132   sncbieaa "----------*---*--------------------M----------------------------"
1133   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1134   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1135   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1136  } ,
1137  {
1138   name "Trematode Mitochondrial" ,
1139   id 21 ,
1140   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
1141   sncbieaa "----------**-----------------------M---------------M------------"
1142   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1143   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1144   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1145  } ,
1146  {
1147   name "Scenedesmus obliquus Mitochondrial" ,
1148   id 22 ,
1149   ncbieaa  "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1150   sncbieaa "------*---*---*--------------------M----------------------------"
1151   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1152   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1153   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1154  } ,
1155  {
1156   name "Thraustochytrium Mitochondrial" ,
1157   id 23 ,
1158   ncbieaa  "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1159   sncbieaa "--*-------**--*-----------------M--M---------------M------------"
1160   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1161   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1162   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1163  } ,
1164  {
1165   name "Rhabdopleuridae Mitochondrial" ,
1166   id 24 ,
1167   ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
1168   sncbieaa "---M------**-------M---------------M---------------M------------"
1169   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1170   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1171   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1172  } ,
1173  {
1174   name "Candidate Division SR1 and Gracilibacteria" ,
1175   id 25 ,
1176   ncbieaa  "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1177   sncbieaa "---M------**-----------------------M---------------M------------"
1178   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1179   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1180   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1181  } ,
1182  {
1183   name "Pachysolen tannophilus Nuclear" ,
1184   id 26 ,
1185   ncbieaa  "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1186   sncbieaa "----------**--*----M---------------M----------------------------"
1187   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1188   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1189   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1190  } ,
1191  {
1192   name "Karyorelict Nuclear" ,
1193   id 27 ,
1194   ncbieaa  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1195   sncbieaa "--------------*--------------------M----------------------------"
1196   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1197   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1198   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1199  } ,
1200  {
1201   name "Condylostoma Nuclear" ,
1202   id 28 ,
1203   ncbieaa  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1204   sncbieaa "----------**--*--------------------M----------------------------"
1205   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1206   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1207   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1208  } ,
1209  {
1210   name "Mesodinium Nuclear" ,
1211   id 29 ,
1212   ncbieaa  "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1213   sncbieaa "--------------*--------------------M----------------------------"
1214   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1215   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1216   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1217  } ,
1218  {
1219   name "Peritrich Nuclear" ,
1220   id 30 ,
1221   ncbieaa  "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1222   sncbieaa "--------------*--------------------M----------------------------"
1223   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1224   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1225   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1226  } ,
1227  {
1228   name "Blastocrithidia Nuclear" ,
1229   id 31 ,
1230   ncbieaa  "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1231   sncbieaa "----------**-----------------------M----------------------------"
1232   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1233   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1234   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1235  } ,
1236  {
1237   name "Balanophoraceae Plastid" ,
1238   id 32 ,
1239   ncbieaa  "FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
1240   sncbieaa "---M------*---*----M------------MMMM---------------M------------"
1241   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1242   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1243   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1244  } ,
1245  {
1246   name "Cephalodiscidae Mitochondrial" ,
1247   id 33 ,
1248   ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
1249   sncbieaa "---M-------*-------M---------------M---------------M------------"
1250   -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
1251   -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
1252   -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
1253  }
1254 }