Bio/DB/Taxonomy/sqlite.pm

   1 #
   2 # BioPerl module for Bio::DB::Taxonomy::flatfile
   3 #
   4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
   5 #
   6 # Cared for by Chris Fields <cjfields-at-cpan-dot-org>
   7 #
   8 # Copyright Chris Fields
   9 #
  10 # You may distribute this module under the same terms as perl itself
  11
  12 # POD documentation - main docs before the code
  13
  14 =head1 NAME
  15
  16 Bio::DB::Taxonomy::sqlite - SQLite-based implementation of Bio::DB::Taxonomy::flatfile
  17
  18 =head1 SYNOPSIS
  19
  20   use Bio::DB::Taxonomy;
  21
  22   my $db = Bio::DB::Taxonomy->new(-source    => 'sqlite',
  23                                   -db        => 'mytax.db'  # default 'taxonomy.sqlite'
  24                                   -nodesfile => 'nodes.dmp',
  25                                   -namesfile => 'names.dmp');
  26
  27 =head1 DESCRIPTION
  28
  29 This is an implementation of Bio::DB::Taxonomy which stores and accesses the
  30 NCBI taxonomy using a simple SQLite3 database stored locally on disk.
  31
  32 With this implementation, one can do the same basic searches as with the 'flatfile'
  33 database.  A test lookup of 1000 NCBI TaxIDs with full lineage information took
  34 about 2 seconds on my older MacBook Pro laptop with an on-disk implementation.
  35
  36 A few key differences:
  37
  38 =over 4
  39
  40 =item * You can use typical SQL syntax to run a query search; for instance, if you want you can run:
  41
  42    @ids = sort $db->get_taxonids('Chloroflexi%');
  43
  44 =item * In-memory database is allowed
  45
  46   my $db = Bio::DB::Taxonomy->new(-source    => 'sqlite',
  47                                   -db        => ':memory:',
  48                                   -nodesfile => 'nodes.dmp',
  49                                   -namesfile => 'names.dmp');
  50
  51 =back
  52
  53 The required database files, nodes.dmp and names.dmp can be obtained from
  54 ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
  55
  56 =head1 TODO
  57
  58 =over 4
  59
  60 =item * Small optimizations, such as optimizing name lookups
  61
  62 =item * Possibly use L<recursive CTE|http://www.sqlite.org/lang_with.html> to do lineage lookups
  63
  64 =item * Clean up SQL (still kind of a mess right now)
  65
  66 =item * Check compat. with other NCBI-specific L<Bio::DB::Taxonomy> implementations
  67
  68 =item * Plan out feasibility of allowing other backends (Neo4J, other DBI, etc)
  69
  70 =item * Optionally calculate left/right ID values for TaxID nodes
  71
  72 =back
  73
  74 Beyond completing the implementation and optimization, this will
  75 likely be rolled into a more flexible backend at some future point.
  76
  77 =head1 FEEDBACK
  78
  79 =head2 Mailing Lists
  80
  81 User feedback is an integral part of the evolution of this and other
  82 Bioperl modules. Send your comments and suggestions preferably to
  83 the Bioperl mailing list.  Your participation is much appreciated.
  84
  85   bioperl-l@bioperl.org                  - General discussion
  86   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  87
  88 =head2 Support
  89
  90 Please direct usage questions or support issues to the mailing list:
  91
  92 I<bioperl-l@bioperl.org>
  93
  94 rather than to the module maintainer directly. Many experienced and
  95 reponsive experts will be able look at the problem and quickly
  96 address it. Please include a thorough description of the problem
  97 with code and data examples if at all possible.
  98
  99 =head2 Reporting Bugs
 100
 101 Report bugs to the Bioperl bug tracking system to help us keep track
 102 of the bugs and their resolution. Bug reports can be submitted via
 103 the web:
 104
 105   https://github.com/bioperl/bioperl-live/issues
 106
 107 =head1 AUTHOR - Chris Fields
 108
 109 Email cjfields-at-cpan-dot-org
 110
 111 =head1 APPENDIX
 112
 113 The rest of the documentation details each of the object methods.
 114 Internal methods are usually preceded with a _
 115
 116 =cut
 117
 118 # Let the code begin...
 119
 120 package Bio::DB::Taxonomy::sqlite;
 121
 122 use 5.010;
 123 use strict;
 124 use DB_File;
 125 use Bio::Taxon;
 126 use File::Spec::Functions;
 127 use Data::Dumper;
 128 use DBI;
 129
 130 use constant SEPARATOR => ':';
 131
 132 our $DEFAULT_INDEX_DIR     = $Bio::Root::IO::TEMPDIR;    # /tmp
 133 our $DEFAULT_CACHE_SIZE    = 0;    # /tmp
 134 our $DEFAULT_DB_NAME       = 'taxonomy.sqlite';
 135
 136 our @DIVISIONS = (
 137     [qw(BCT Bacteria)],
 138     [qw(INV Invertebrates)],
 139     [qw(MAM Mammals)],
 140     [qw(PHG Phages)],
 141     [qw(PLN Plants)],                                    # (and fungi)
 142     [qw(PRI Primates)],
 143     [qw(ROD Rodents)],
 144     [qw(SYN Synthetic)],
 145     [qw(UNA Unassigned)],
 146     [qw(VRL Viruses)],
 147     [qw(VRT Vertebrates)],
 148     [qw(ENV 'Environmental samples')]
 149 );
 150
 151 use base qw(Bio::DB::Taxonomy);
 152
 153 =head2 new
 154
 155  Title   : new
 156  Usage   : my $obj = Bio::DB::Taxonomy::flatfile->new();
 157  Function: Builds a new Bio::DB::Taxonomy::flatfile object
 158  Returns : an instance of Bio::DB::Taxonomy::flatfile
 159  Args    : -directory => name of directory where index files should be created
 160            -nodesfile => name of file containing nodes (nodes.dmp from NCBI)
 161            -namesfile => name of the file containing names(names.dmp from NCBI)
 162            -force     => 1 to replace current indexes even if they exist
 163
 164 =cut
 165
 166 # TODO: get rid of globals!
 167 sub new {
 168     my ( $class, @args ) = @_;
 169
 170     my $self = $class->SUPER::new(@args);
 171
 172     my ( $dir, $nodesfile, $namesfile, $db, $force, $cs ) =
 173       $self->_rearrange( [qw(DIRECTORY NODESFILE NAMESFILE DB FORCE CACHE_SIZE)], @args );
 174
 175     $self->index_directory( $dir || $DEFAULT_INDEX_DIR );
 176
 177     $self->db_name( $db || $DEFAULT_DB_NAME );
 178
 179     $self->cache_size($cs // $DEFAULT_CACHE_SIZE);
 180
 181     if ($nodesfile) {
 182         $self->_build_index( $nodesfile, $namesfile, $force );
 183     }
 184
 185     $self->_db_connect;
 186     return $self;
 187 }
 188
 189 =head2 Bio::DB::Taxonomy interface implementation
 190
 191 =head2 get_num_taxa
 192
 193  Title   : get_num_taxa
 194  Usage   : my $num = $db->get_num_taxa();
 195  Function: Get the number of taxa stored in the database.
 196  Returns : A number
 197  Args    : None
 198
 199 =cut
 200
 201 sub get_num_taxa {
 202     my ($self) = @_;
 203
 204     my $ct = $self->_dbh_fetch(<<SQL);
 205     SELECT COUNT(*) FROM taxon
 206 SQL
 207
 208     return @{$ct}[0];
 209 }
 210
 211 =head2 get_taxon
 212
 213  Title   : get_taxon
 214  Usage   : my $taxon = $db->get_taxon(-taxonid => $taxonid)
 215  Function: Get a Bio::Taxon object from the database.
 216  Returns : Bio::Taxon object
 217  Args    : just a single value which is the database id, OR named args:
 218            -taxonid => taxonomy id (to query by taxonid)
 219             OR
 220            -name    => string (to query by a taxonomy name: common name,
 221                                scientific name, etc)
 222
 223 =cut
 224
 225 sub get_taxon {
 226     my ($self) = shift;
 227     my ( $taxonid, $name );
 228
 229     if ( @_ > 1 ) {
 230         ( $taxonid, $name ) = $self->_rearrange( [qw(TAXONID NAME)], @_ );
 231         if ($name) {
 232             ( $taxonid, my @others ) = $self->get_taxonids($name);
 233             $self->warn(
 234 "There were multiple ids ($taxonid @others) matching '$name', using '$taxonid'"
 235             ) if @others > 0;
 236         }
 237     }
 238     else {
 239         $taxonid = shift;
 240     }
 241
 242     return unless $taxonid;
 243
 244     $taxonid =~ /^\d+$/ || $self->throw("TaxID must be integer, got [$taxonid]");
 245
 246     my ( $parent_id, $rank, $code, $divid, $gen_code, $mito, $nm, $uniq, $class );
 247     # single join or two calls?
 248     my $sth = $self->_prepare_cached(<<SQL);
 249     SELECT tax.parent_id, tax.rank, tax.code, tax.division_id, tax.gencode_id, tax.mito_id, names.name, names.uniq_name, names.class
 250     FROM taxon as tax, names
 251     WHERE
 252         tax.taxon_id = ?
 253     AND
 254         names.taxon_id = tax.taxon_id
 255 SQL
 256
 257     $sth->bind_columns(\$parent_id, \$rank, \$code, \$divid, \$gen_code, \$mito, \$nm, \$uniq, \$class);
 258
 259     $sth->execute($taxonid) or $self->throw($sth->errstr);
 260
 261     my ($sci_name, @common_names);
 262
 263     while ($sth->fetch) {
 264         if ($class eq 'scientific name') {
 265             $sci_name = $nm;
 266         } else {
 267             push @common_names, $nm;
 268         }
 269     }
 270
 271     my $taxon = Bio::Taxon->new(
 272         -name         => $sci_name,
 273         -common_names => [@common_names],
 274         -ncbi_taxid   => $taxonid,
 275         -parent_id    => $parent_id,
 276         -rank              => $rank,
 277         -division          => $DIVISIONS[$divid]->[1],
 278         -genetic_code      => $gen_code,
 279         -mito_genetic_code => $mito
 280     );
 281
 282     # we can't use -dbh or the db_handle() method ourselves or we'll go
 283     # infinite on the merge attempt
 284     $taxon->{'db_handle'} = $self;
 285
 286     $self->_handle_internal_id($taxon);
 287
 288     return $taxon;
 289 }
 290
 291 *get_Taxonomy_Node = \&get_taxon;
 292
 293 =head2 get_taxonids
 294
 295  Title   : get_taxonids
 296  Usage   : my @taxonids = $db->get_taxonids('Homo sapiens');
 297  Function: Searches for a taxonid (typically ncbi_taxon_id) based on a query
 298            string. Note that multiple taxonids can match to the same supplied
 299            name.
 300  Returns : array of integer ids in list context, one of these in scalar context
 301  Args    : string representing taxon's name
 302
 303 =cut
 304
 305 sub get_taxonids {
 306     my ( $self, $query ) = @_;
 307
 308     # TODO: note we're not cleaning the query here, so you could technically
 309     # have a fuzzy match (or Bobby Tables someone)
 310
 311     # TODO: OR'd match seems poor optimally
 312     my $taxids = $self->{dbh}->selectcol_arrayref(<<SQL);
 313     SELECT DISTINCT taxon_id FROM names
 314     WHERE
 315         name LIKE "$query"
 316     OR
 317         uniq_name LIKE "$query"
 318 SQL
 319
 320     return wantarray() ? @{$taxids} : @{$taxids}[0];
 321 }
 322
 323 *get_taxonid = \&get_taxonids;
 324
 325 =head2 get_Children_Taxids
 326
 327  Title   : get_Children_Taxids
 328  Usage   : my @childrenids = $db->get_Children_Taxids
 329  Function: Get the ids of the children of a node in the taxonomy
 330  Returns : Array of Ids
 331  Args    : Bio::Taxon or a taxon_id
 332  Status  : deprecated (use each_Descendent())
 333
 334 =cut
 335
 336 sub get_Children_Taxids {
 337     my ( $self, $node ) = @_;
 338     $self->deprecated(); # ?
 339     #$self->warn(
 340     #    "get_Children_Taxids is deprecated, use each_Descendent instead");
 341     #my $id;
 342     #if ( ref($node) ) {
 343     #    if ( $node->can('object_id') ) {
 344     #        $id = $node->object_id;
 345     #    }
 346     #    elsif ( $node->can('ncbi_taxid') ) {
 347     #        $id = $node->ncbi_taxid;
 348     #    }
 349     #    else {
 350     #        $self->warn(
 351     #            "Don't know how to extract a taxon id from the object of type "
 352     #              . ref($node)
 353     #              . "\n" );
 354     #        return;
 355     #    }
 356     #}
 357     #else { $id = $node }
 358     #my @vals = $self->{'_parentbtree'}->get_dup($id);
 359     #return @vals;
 360 }
 361
 362 =head2 ancestor
 363
 364  Title   : ancestor
 365  Usage   : my $ancestor_taxon = $db->ancestor($taxon)
 366  Function: Retrieve the full ancestor taxon of a supplied Taxon from the
 367            database.
 368  Returns : Bio::Taxon
 369  Args    : Bio::Taxon (that was retrieved from this database)
 370
 371 =cut
 372
 373 sub ancestor {
 374     my ( $self, $taxon ) = @_;
 375     $self->throw("Must supply a Bio::Taxon")
 376       unless ref($taxon) && $taxon->isa('Bio::Taxon');
 377     $self->throw("The supplied Taxon must belong to this database")
 378       unless $taxon->db_handle && $taxon->db_handle eq $self;
 379     my $id =
 380       $taxon->id || $self->throw("The supplied Taxon is missing its id!");
 381
 382     # TODO:
 383     # Note here we explicitly set the parent ID, but use a separate method to
 384     # check whether it is defined. Mixing back-end databases, even if from the
 385     # same source, should still work (since a different backend wouldn't
 386     # explicitly set the parent_id)
 387
 388     if ($taxon->trusted_parent_id) {
 389         # this is the failsafe when we hit the root node
 390         if ($taxon->parent_id eq $id) {
 391             return;
 392         }
 393         return $self->get_taxon(-taxonid => $taxon->parent_id);
 394     } else {
 395         # TODO: would there be any other option?
 396         return;
 397     }
 398 }
 399
 400 # TODO: this may act as a drop-in for a recursive CTE lookup
 401
 402 #=head2 ancestors
 403 #
 404 # Title   : ancestors
 405 # Usage   : my @ancestor_taxa = $db->ancestors($taxon)
 406 # Function: Retrieve the full ancestor taxon of a supplied Taxon from the
 407 #           database.
 408 # Returns : List of Bio::Taxon
 409 # Args    : Bio::Taxon (that was retrieved from this database)
 410 #
 411 #=cut
 412
 413 #sub ancestors { ... }
 414
 415 =head2 each_Descendent
 416
 417  Title   : each_Descendent
 418  Usage   : my @taxa = $db->each_Descendent($taxon);
 419  Function: Get all the descendents of the supplied Taxon (but not their
 420            descendents, ie. not a recursive fetchall).
 421  Returns : Array of Bio::Taxon objects
 422  Args    : Bio::Taxon (that was retrieved from this database)
 423
 424 =cut
 425
 426 sub each_Descendent {
 427     my ( $self, $taxon ) = @_;
 428     $self->throw("Must supply a Bio::Taxon")
 429       unless ref($taxon) && $taxon->isa('Bio::Taxon');
 430     $self->throw("The supplied Taxon must belong to this database")
 431       unless $taxon->db_handle && $taxon->db_handle eq $self;  # yikes
 432
 433     my $id =
 434       $taxon->id || $self->throw("The supplied Taxon is missing its id!");
 435
 436     #my ( $parent_id, $rank, $code, $divid, $gen_code, $mito, $nm, $uniq, $class );
 437     # single join or two calls?
 438
 439     # probably not optimal, maybe set up as a cached statement with bindings?
 440     my $desc_ids = $self->{dbh}->selectcol_arrayref(<<SQL) or $self->throw($self->{dbh}->errstr);
 441     SELECT tax.taxon_id
 442     FROM taxon as tax
 443     WHERE
 444         tax.parent_id = $id
 445 SQL
 446
 447     return unless ref $desc_ids eq 'ARRAY';
 448
 449     my @descs;
 450     foreach my $desc_id (@$desc_ids) {
 451         push( @descs, $self->get_taxon($desc_id) || next );
 452     }
 453     return @descs;
 454 }
 455
 456 =head2 Helper methods
 457
 458 =cut
 459
 460 =head2 index_directory
 461
 462  Title   : index_directory
 463  Funtion : Get/set the location that index files are stored. (this module
 464            will index the supplied database)
 465  Usage   : $obj->index_directory($newval)
 466  Returns : value of index_directory (a scalar)
 467  Args    : on set, new value (a scalar or undef, optional)
 468  Note    : kept for backwards compatibility with older DB_File implementation
 469
 470 =cut
 471
 472 sub index_directory {
 473     my $self = shift;
 474     return $self->{'index_directory'} = shift if @_;
 475     return $self->{'index_directory'};
 476 }
 477
 478 =head2 db_name
 479
 480  Title   : db_name
 481  Funtion : Get/set the name of the SQLite3 database where data is stored
 482  Usage   : $obj->db_name($newval)
 483  Returns : value of db_name (a scalar)
 484  Args    : on set, new value (a scalar or undef, optional)
 485
 486 =cut
 487
 488 # TODO: this may need some disambiguation w/ index_directory above; for now we
 489 # assume this doesn't have a full path name (though I see no reason why this
 490 # shouldn't allow that)
 491
 492 sub db_name {
 493     my $self = shift;
 494     return $self->{'db_name'} = shift if @_;
 495     return $self->{'db_name'};
 496 }
 497
 498 =head2 cache_size
 499
 500  Title   : cache_size
 501  Funtion : Get/set the cachesize used for loading the SQLite3 database
 502  Usage   : $obj->cache_size($newval)
 503  Returns : value of cache_size (a scalar)
 504  Args    : on set, new value (a scalar or undef, optional)
 505  Note    : we do no checking on whether this value is an integer (SQLite does this for use)
 506
 507 =cut
 508
 509 sub cache_size {
 510     my $self = shift;
 511     return $self->{'cache_size'} = shift if defined($_[0]);
 512     return $self->{'cache_size'};
 513 }
 514
 515 # internal method which does the indexing
 516 sub _build_index {
 517     my ( $self, $nodesfile, $namesfile, $force ) = @_;
 518
 519     # TODO: need to disambiguate using index_directory here since we only have
 520     # one file.  Mayeb ignore it in favor of having full path for db_name?
 521     my ($dir, $db_name) = ($self->index_directory, $self->db_name);
 522     if (! -e $db_name || $force) {
 523
 524         # TODO: we're ignoring index_directory for now, may add support for this
 525         # down the way
 526         my $dbh = DBI->connect("dbi:SQLite:dbname=$db_name","","") or die $!;
 527
 528         $self->debug("Running SQLite version:".$dbh->{sqlite_version}."\n");
 529
 530         #$dbh->do('PRAGMA synchronous = 0');      # Non transaction safe!!!
 531
 532         if ($self->cache_size) {
 533             my $cs = $self->cache_size;
 534             $self->debug("Setting cache size $cs\n");
 535             $dbh->do("PRAGMA cache_size = $cs")
 536         }
 537
 538         $self->debug("Loading taxon table data\n");
 539         $self->_init_db($dbh);
 540         open my $NODES, '<', $nodesfile
 541             or $self->throw("Could not read node file '$nodesfile': $!");
 542
 543         # TODO: this has the really unnecessary 'OR IGNORE' option added,
 544         # apparently b.c the test data expects to handle cases where the TaxID
 545         # is repeated in this table (which should never happen in this table). I
 546         # will likely change this to throw under those circumstances
 547
 548         my $sth = $dbh->prepare_cached(<<SQL);
 549     INSERT OR IGNORE INTO taxon (taxon_id, parent_id, rank, code, division_id, gencode_id, mito_id) VALUES (?,?,?,?,?,?,?)
 550 SQL
 551         $dbh->do("BEGIN");
 552         while (<$NODES>) {
 553             next if /^\s*$/;
 554             chomp;
 555             my ($taxid,$parent,$rank,$code,$divid,undef,$gen_code,undef,$mito) = split(/\t\|\t/,$_);
 556             next if $taxid == 1;
 557             if ($parent == 1) {
 558                 $parent = undef;
 559             }
 560
 561             $sth->execute($taxid, $parent, $rank, $code, $divid, $gen_code, $mito) or die $sth->errstr.": TaxID $taxid";
 562         }
 563         $dbh->do("COMMIT") or $self->throw($dbh->errstr);
 564
 565         close $NODES;
 566
 567         $self->debug("Loading name table data\n");
 568         open my $NAMES, '<', $namesfile
 569             or $self->throw("Could not read names file '$namesfile': $!");
 570
 571         my $sth = $dbh->prepare_cached(<<SQL) or $self->throw($dbh->errstr);
 572     INSERT INTO names (taxon_id, name, uniq_name, class) VALUES (?,?,?,?)
 573 SQL
 574         $dbh->do("BEGIN");
 575         while (<$NAMES>) {
 576             next if /^$/;
 577             chomp;
 578             my ($taxid, $name, $unique_name, $class) = split(/\t\|\t/,$_);
 579
 580             # don't include the fake root node 'root' or 'all' with id 1
 581             next if $taxid == 1;
 582
 583             $class =~ s/\s+\|\s*$//;
 584
 585             #if ($name =~ /\(class\)$/) { # it seems that only rank of class is ever used in this situation
 586             #    $name =~ s/\s+\(class\)$//;
 587             #}
 588
 589             $sth->execute($taxid, $name, $unique_name, $class) or $self->throw($sth->errstr);
 590         }
 591         close $NAMES;
 592
 593         $dbh->do("COMMIT");
 594
 595         $self->debug("Creating taxon index\n");
 596         $dbh->do("CREATE INDEX parent_idx ON taxon (parent_id)") or $self->throw($dbh->errstr);
 597         $self->debug("Creating name index\n");
 598         $dbh->do("CREATE INDEX name_idx ON names (name)") or $self->throw($dbh->errstr);
 599         $self->debug("Creating taxon name table index\n");
 600         $dbh->do("CREATE INDEX taxon_name_idx ON names (taxon_id)") or $self->throw($dbh->errstr);
 601
 602         $dbh->do("PRAGMA foreign_keys = ON");
 603
 604         #$dbh->do('PRAGMA synchronous = 1');
 605         $self->{dbh} = $dbh;
 606         $self->{'_initialized'} = 1;
 607     }
 608     1;
 609 }
 610
 611 # connect the internal db handle
 612 sub _db_connect {
 613     my $self = shift;
 614     return if $self->{'_initialized'};
 615
 616     my ($dir, $db_name) = ($self->index_directory, $self->db_name);
 617
 618     # TODO: we're ignoring index_directory for now, may add support for this
 619     # down the way
 620     my $dbh = DBI->connect("dbi:SQLite:dbname=$db_name","","") or die $!;
 621     $dbh->do("PRAGMA foreign_keys = ON");
 622     if ($self->cache_size) {
 623         my $cs = $self->cache_size;
 624         $self->debug("Setting cache size $cs\n");
 625         $dbh->do("PRAGMA cache_size = $cs")
 626     }
 627     $self->{dbh} = $dbh;
 628
 629     $self->{'_initialized'} = 1;
 630 }
 631
 632 sub _init_db {
 633     my ($self, $dbh) = @_;
 634     my $schema = $self->taxon_schema();
 635     # TODO: set up handler parameters here
 636     for my $table (sort keys %$schema) {
 637         $dbh->do("DROP TABLE IF EXISTS $table") or $self->throw($dbh->errstr);
 638         $dbh->do("CREATE TABLE $table ".$schema->{$table}) or $self->throw($dbh->errstr);
 639     }
 640     1;
 641 }
 642
 643 sub _dbh_fetch {
 644     my ($self, $sql) = @_;
 645     # TODO: more sanity checks
 646     my $rows = $self->{dbh}->selectrow_arrayref($sql) or $self->throw( $self->{dbh}->errstr );
 647     return $rows;
 648 }
 649
 650 sub _prepare_cached {
 651     my ($self, $sql) = @_;
 652     # TODO: more sanity checks
 653     my $sth = $self->{dbh}->prepare_cached($sql) or $self->throw( $self->{dbh}->errstr );
 654     $sth;
 655 }
 656
 657
 658 # TODO: check data size, this is a ballpark estimate (could be reduced)
 659 sub taxon_schema {
 660     my $self   = shift;
 661     return {
 662     taxon   => <<SCHEMA,
 663     (
 664         taxon_id        INTEGER UNIQUE PRIMARY KEY NOT NULL,
 665         parent_id       INTEGER,
 666         left_id         INTEGER,
 667         right_id        INTEGER,
 668         rank            VARCHAR(25),
 669         code            VARCHAR(5),
 670         division_id     INTEGER,
 671         gencode_id      INTEGER,
 672         mito_id         INTEGER,
 673         FOREIGN KEY(parent_id) REFERENCES taxon(taxon_id)
 674     )
 675 SCHEMA
 676
 677     names   => <<SCHEMA,
 678     (
 679         name_id         INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
 680         taxon_id        INTEGER,
 681         name            VARCHAR(50),
 682         uniq_name       VARCHAR(50),
 683         class           VARCHAR(25),
 684         FOREIGN KEY(taxon_id) REFERENCES taxon(taxon_id)
 685     )
 686 SCHEMA
 687     };
 688 }
 689
 690 sub DESTROY {
 691     my $self = shift;
 692     undef $self->{dbh};
 693 }
 694
 695 1;
 696