From a73febe952c50b92f44a4693a7f13e12589c6b10 Mon Sep 17 00:00:00 2001 From: David Miguel Susano Pinto Date: Fri, 26 Apr 2024 01:50:04 +0100 Subject: [PATCH] Bio::Tools::CodonTable: include and parse the gc.prt file (#391) To make it simpler to keep things up to date, just include the gc.prt (the NCBI data file of codon tables in ASN.1 print format) below __DATA__ and parse it. The file is very small and parsing it is very quick. We can't read DATA in a BEGIN block so this also changes the BEGIN into a INIT block. I'm not sure why the code there needs to be inside them though. --- lib/Bio/Tools/CodonTable.pm | 563 +++++++++++++++++++++++++++++++++++--------- t/SeqTools/CodonTable.t | 8 +- 2 files changed, 456 insertions(+), 115 deletions(-) diff --git a/lib/Bio/Tools/CodonTable.pm b/lib/Bio/Tools/CodonTable.pm index 081d9ae72..2d702733d 100644 --- a/lib/Bio/Tools/CodonTable.pm +++ b/lib/Bio/Tools/CodonTable.pm @@ -193,124 +193,93 @@ use base qw(Bio::Root::Root); # first set internal values for all translation tables - -BEGIN { +# XXX: I'm not sure why it needs to be on an INIT block :/ +INIT { use constant CODONSIZE => 3; $GAP = '-'; $CODONGAP = $GAP x CODONSIZE; - @NAMES = ( - 'Strict', # 0 (special option for ATG-only start) - 'Standard', # 1 - 'Vertebrate Mitochondrial', # 2 - 'Yeast Mitochondrial', # 3 - 'Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma', # 4 - 'Invertebrate Mitochondrial', # 5 - 'Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear', # 6 - '', - '', - 'Echinoderm Mitochondrial; Flatworm Mitochondrial', # 9 - 'Euplotid Nuclear', # 10 - 'Bacterial, Archaeal and Plant Plastid', # 11 - 'Alternative Yeast Nuclear', # 12 - 'Ascidian Mitochondrial', # 13 - 'Alternative Flatworm Mitochondrial', # 14 - 'Blepharisma Macronuclear', # 15 - 'Chlorophycean Mitochondrial', # 16 - '', - '', - '', - '', - 'Trematode Mitochondrial', # 21 - 'Scenedesmus obliquus Mitochondrial', # 22 - 'Thraustochytrium Mitochondrial', # 23 - 'Rhabdopleuridae Mitochondrial', # 24 - 'Candidate Division SR1 and Gracilibacteria', # 25 - 'Pachysolen tannophilus Nuclear', # 26 - 'Karyorelict Nuclear', # 27 - 'Condylostoma Nuclear', # 28 - 'Mesodinium Nuclear', # 29 - 'Peritrich Nuclear', # 30 - 'Blastocrithidia Nuclear', # 31 - 'Balanophoraceae Plastid', # 32 - ); - - @TABLES = qw( - FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG - FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG - FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - '' - '' - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG - FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG - FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG - FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - '' - '' - '' - '' - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG - FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG - FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG - ); - - # (bases used for these tables, for reference) - # 1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG - # 2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG - # 3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG - - @STARTS = qw( - ----------**--*--------------------M---------------------------- - ---M------**--*----M---------------M---------------------------- - ----------**--------------------MMMM----------**---M------------ - ----------**----------------------MM---------------M------------ - --MM------**-------M------------MMMM---------------M------------ - ---M------**--------------------MMMM---------------M------------ - --------------*--------------------M---------------------------- - '' - '' - ----------**-----------------------M---------------M------------ - ----------**-----------------------M---------------------------- - ---M------**--*----M------------MMMM---------------M------------ - ----------**--*----M---------------M---------------------------- - ---M------**----------------------MM---------------M------------ - -----------*-----------------------M---------------------------- - ----------*---*--------------------M---------------------------- - ----------*---*--------------------M---------------------------- - '' - '' - '' - '' - ----------**-----------------------M---------------M------------ - ------*---*---*--------------------M---------------------------- - --*-------**--*-----------------M--M---------------M------------ - ---M------**-------M---------------M---------------M------------ - ---M------**-----------------------M---------------M------------ - ----------**--*----M---------------M---------------------------- - --------------*--------------------M---------------------------- - ----------**--*--------------------M---------------------------- - --------------*--------------------M---------------------------- - --------------*--------------------M---------------------------- - ----------**-----------------------M---------------------------- - ---M------*---*----M------------MMMM---------------M------------ - ); + # Helper private function to parse the + # ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt file which is + # below __DATA__ in this module (see the end of the file). This + # fills the @NAMES, @TABLES, and @STARTS variables. To update to + # a new release of gc.prt, replace the content below __DATA__. + sub parse_gc_prt { + + # Init tables has with special option (id=0) for ATG-only start + my %tables = ( + 0 => { + name => "Strict", + ncbieaa => "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa => "----------**--*--------------------M----------------------------", + }, + ); + + while (defined(my $line = )) { + next if $line =~ /^\s*--/; # skip comment lines + if ($line =~ /^\s*\{\s*$/) { # start of a table description + my $name = ""; + my $id = 0; + my $ncbieaa = ""; + my $sncbieaa = ""; + do { + if ($line =~ /^\s*(name|id|ncbieaa|sncbieaa)\s+(.+)/) { + my $key = $1; + my $rem = $2; + if ($key eq "id") { + $rem =~ /^(\d+)/; + $id = int $1; + } else { + # The remaining keys --- name, ncbieaa, + # and sncbieaa --- are strings which may + # be multi-line (e.g., name for table with + # id 4). We are assuming that there is no + # " character inside the value so we keep + # appending lines until we find an end ". + while ($rem !~ /^"(.*)"/ && ! eof DATA) { + $rem .= ; + } + $rem =~ s/\n//g; + $rem =~ /^"(.*)"/; + my $str = $1; + if ($key eq "name" && ! $name) { + # ignore alternative names, e.g. SGC0, + # only keep the first name listed. + $name = $str; + } elsif ($key eq "ncbieaa") { + $ncbieaa = $str; + } elsif ($key eq "sncbieaa") { + $sncbieaa = $str; + } + } + } + } until (($line = ) =~ /^\s*}\s*,?$/); # we reached the end of table description + $tables{$id} = { + name => $name, + ncbieaa => $ncbieaa, + sncbieaa => $sncbieaa + }; + } + } + close DATA; + # use Data::Dumper; + # print Dumper %tables; + + my $highest_id = (sort {$a <=> $b} keys %tables)[-1]; + for (my $i = 0; $i < $highest_id; $i++) { + if (defined $tables{$i}) { + push @NAMES, $tables{$i}->{name}; + push @TABLES, $tables{$i}->{ncbieaa}; + push @STARTS, $tables{$i}->{sncbieaa}; + } else { + push @NAMES, ''; + push @TABLES, ''; + push @STARTS, ''; + } + } + } + parse_gc_prt(); + undef &parse_gc_prt; my @nucs = qw(t c a g); my $x = 0; @@ -917,3 +886,369 @@ sub _make_iupac_string { 1; + +# Follows the content of +# ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt, which is the NCBI +# genetic codon table in ASN.1 value notation / print format. We do +# not have a ASN.1 decoder for value notation but it's easy enough to +# parse. + +__DATA__ +--************************************************************************** +-- This is the NCBI genetic code table +-- Initial base data set from Andrzej Elzanowski while at PIR International +-- Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI +-- Base 1-3 of each codon have been added as comments to facilitate +-- readability at the suggestion of Peter Rice, EMBL +-- Later additions by Taxonomy Group staff at NCBI +-- +-- Version 4.6 +-- Renamed genetic code 24 to Rhabdopleuridae Mitochondrial +-- +-- Version 4.5 +-- Added Cephalodiscidae mitochondrial genetic code 33 +-- +-- Version 4.4 +-- Added GTG as start codon for genetic code 3 +-- Added Balanophoraceae plastid genetic code 32 +-- +-- Version 4.3 +-- Change to CTG -> Leu in genetic codes 27, 28, 29, 30 +-- +-- Version 4.2 +-- Added Karyorelict nuclear genetic code 27 +-- Added Condylostoma nuclear genetic code 28 +-- Added Mesodinium nuclear genetic code 29 +-- Added Peritrich nuclear genetic code 30 +-- Added Blastocrithidia nuclear genetic code 31 +-- +-- Version 4.1 +-- Added Pachysolen tannophilus nuclear genetic code 26 +-- +-- Version 4.0 +-- Updated version to reflect numerous undocumented changes: +-- Corrected start codons for genetic code 25 +-- Name of new genetic code is Candidate Division SR1 and Gracilibacteria +-- Added candidate division SR1 nuclear genetic code 25 +-- Added GTG as start codon for genetic code 24 +-- Corrected Pterobranchia Mitochondrial genetic code (24) +-- Added genetic code 24, Pterobranchia Mitochondrial +-- Genetic code 11 is now Bacterial, Archaeal and Plant Plastid +-- Fixed capitalization of mitochondrial in codes 22 and 23 +-- Added GTG, ATA, and TTG as alternative start codons to code 13 +-- +-- Version 3.9 +-- Code 14 differs from code 9 only by translating UAA to Tyr rather than +-- STOP. A recent study (Telford et al, 2000) has found no evidence that +-- the codon UAA codes for Tyr in the flatworms, but other opinions exist. +-- There are very few GenBank records that are translated with code 14, +-- but a test translation shows that retranslating these records with code +-- 9 can cause premature terminations. Therefore, GenBank will maintain +-- code 14 until further information becomes available. +-- +-- Version 3.8 +-- Added GTG start to Echinoderm mitochondrial code, code 9 +-- +-- Version 3.7 +-- Added code 23 Thraustochytrium mitochondrial code +-- formerly OGMP code 93 +-- submitted by Gertraude Berger, Ph.D. +-- +-- Version 3.6 +-- Added code 22 TAG-Leu, TCA-stop +-- found in mitochondrial DNA of Scenedesmus obliquus +-- submitted by Gertraude Berger, Ph.D. +-- Organelle Genome Megasequencing Program, Univ Montreal +-- +-- Version 3.5 +-- Added code 21, Trematode Mitochondrial +-- (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990) +-- Added code 16, Chlorophycean Mitochondrial +-- (TAG can translated to Leucine instaed to STOP in chlorophyceans +-- and fungi) +-- +-- Version 3.4 +-- Added CTG,TTG as allowed alternate start codons in Standard code. +-- Prats et al. 1989, Hann et al. 1992 +-- +-- Version 3.3 - 10/13/95 +-- Added alternate intiation codon ATC to code 5 +-- based on complete mitochondrial genome of honeybee +-- Crozier and Crozier (1993) +-- +-- Version 3.2 - 6/24/95 +-- Code Comments +-- 10 Alternative Ciliate Macronuclear renamed to Euplotid Macro... +-- 15 Blepharisma Macro.. code added +-- 5 Invertebrate Mito.. GTG allowed as alternate initiator +-- 11 Eubacterial renamed to Bacterial as most alternate starts +-- have been found in Archea +-- +-- +-- Version 3.1 - 1995 +-- Updated as per Andrzej Elzanowski at NCBI +-- Complete documentation in NCBI toolkit documentation +-- Note: 2 genetic codes have been deleted +-- +-- Old id Use id - Notes +-- +-- id 7 id 4 - Kinetoplast code now merged in code id 4 +-- id 8 id 1 - all plant chloroplast differences due to RNA edit +-- +-- +--************************************************************************* + +Genetic-code-table ::= { + { + name "Standard" , + name "SGC0" , + id 1 , + ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "---M------**--*----M---------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Vertebrate Mitochondrial" , + name "SGC1" , + id 2 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", + sncbieaa "----------**--------------------MMMM----------**---M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Yeast Mitochondrial" , + name "SGC2" , + id 3 , + ncbieaa "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**----------------------MM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate + Mitochondrial; Mycoplasma; Spiroplasma" , + name "SGC3" , + id 4 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--MM------**-------M------------MMMM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Invertebrate Mitochondrial" , + name "SGC4" , + id 5 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", + sncbieaa "---M------**--------------------MMMM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" , + name "SGC5" , + id 6 , + ncbieaa "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--------------*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Echinoderm Mitochondrial; Flatworm Mitochondrial" , + name "SGC8" , + id 9 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", + sncbieaa "----------**-----------------------M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Euplotid Nuclear" , + name "SGC9" , + id 10 , + ncbieaa "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**-----------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Bacterial, Archaeal and Plant Plastid" , + id 11 , + ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "---M------**--*----M------------MMMM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Alternative Yeast Nuclear" , + id 12 , + ncbieaa "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**--*----M---------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Ascidian Mitochondrial" , + id 13 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", + sncbieaa "---M------**----------------------MM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + }, + { + name "Alternative Flatworm Mitochondrial" , + id 14 , + ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", + sncbieaa "-----------*-----------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Blepharisma Macronuclear" , + id 15 , + ncbieaa "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------*---*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Chlorophycean Mitochondrial" , + id 16 , + ncbieaa "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------*---*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Trematode Mitochondrial" , + id 21 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", + sncbieaa "----------**-----------------------M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Scenedesmus obliquus Mitochondrial" , + id 22 , + ncbieaa "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "------*---*---*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Thraustochytrium Mitochondrial" , + id 23 , + ncbieaa "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--*-------**--*-----------------M--M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Rhabdopleuridae Mitochondrial" , + id 24 , + ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", + sncbieaa "---M------**-------M---------------M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Candidate Division SR1 and Gracilibacteria" , + id 25 , + ncbieaa "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "---M------**-----------------------M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Pachysolen tannophilus Nuclear" , + id 26 , + ncbieaa "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**--*----M---------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Karyorelict Nuclear" , + id 27 , + ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--------------*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Condylostoma Nuclear" , + id 28 , + ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**--*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Mesodinium Nuclear" , + id 29 , + ncbieaa "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--------------*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Peritrich Nuclear" , + id 30 , + ncbieaa "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "--------------*--------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Blastocrithidia Nuclear" , + id 31 , + ncbieaa "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "----------**-----------------------M----------------------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Balanophoraceae Plastid" , + id 32 , + ncbieaa "FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + sncbieaa "---M------*---*----M------------MMMM---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } , + { + name "Cephalodiscidae Mitochondrial" , + id 33 , + ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", + sncbieaa "---M-------*-------M---------------M---------------M------------" + -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG + -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG + -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG + } +} diff --git a/t/SeqTools/CodonTable.t b/t/SeqTools/CodonTable.t index 5fa5e0344..f1e443e9f 100644 --- a/t/SeqTools/CodonTable.t +++ b/t/SeqTools/CodonTable.t @@ -6,7 +6,7 @@ use strict; BEGIN { use Bio::Root::Test; - test_begin(-tests => 84); + test_begin(-tests => 86); use_ok('Bio::Tools::CodonTable'); use_ok('Bio::CodonUsage::IO'); @@ -21,6 +21,7 @@ isa_ok $myCodonTable, 'Bio::Tools::CodonTable'; # defaults to ID 1 "Standard" $myCodonTable = Bio::Tools::CodonTable->new(); is $myCodonTable->id(), 1; +is $myCodonTable->name(), "Standard"; # invalid table should produce a warn and set default table (1) my $stderr = ''; @@ -256,3 +257,8 @@ $myCodonTable->id(25); is $myCodonTable->is_start_codon('GTG'), 1; is $myCodonTable->is_start_codon('CTG'), 0; is $myCodonTable->translate_strict('TGA'), 'G'; + +# The name for this is pretty long and spans mutliple lines. Confirm +# it was parsed correctly from the ASN.1 file. +$myCodonTable->id(4); +is $myCodonTable->name(), "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma"; -- 2.11.4.GIT