lib/Bio/DB/BioSQL/mysql/SpeciesAdaptorDriver.pm

   1 # $Id$
   2 #
   3 # BioPerl module for Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver
   4 #
   5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
   6 #
   7 # Cared for by Hilmar Lapp <hlapp at gmx.net>
   8 #
   9 # Copyright Hilmar Lapp
  10 #
  11 # You may distribute this module under the same terms as perl itself
  12
  13 #
  14 # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
  15 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
  16 #
  17 # You may distribute this module under the same terms as perl itself.
  18 # Refer to the Perl Artistic License (see the license accompanying this
  19 # software package, or see http://www.perl.com/language/misc/Artistic.html)
  20 # for the terms under which you may use, modify, and redistribute this module.
  21 #
  22 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
  23 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  24 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  25 #
  26
  27 # POD documentation - main docs before the code
  28
  29 =head1 NAME
  30
  31 Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver - DESCRIPTION of Object
  32
  33 =head1 SYNOPSIS
  34
  35 Give standard usage here
  36
  37 =head1 DESCRIPTION
  38
  39 Describe the object here
  40
  41 =head1 FEEDBACK
  42
  43 =head2 Mailing Lists
  44
  45 User feedback is an integral part of the evolution of this and other
  46 Bioperl modules. Send your comments and suggestions preferably to
  47 the Bioperl mailing list.  Your participation is much appreciated.
  48
  49   bioperl-l@bioperl.org                  - General discussion
  50   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  51
  52 =head2 Support
  53
  54 Please direct usage questions or support issues to the mailing list:
  55
  56 I<bioperl-l@bioperl.org>
  57
  58 rather than to the module maintainer directly. Many experienced and
  59 reponsive experts will be able look at the problem and quickly
  60 address it. Please include a thorough description of the problem
  61 with code and data examples if at all possible.
  62
  63 =head2 Reporting Bugs
  64
  65 Report bugs to the Bioperl bug tracking system to help us keep track
  66 of the bugs and their resolution. Bug reports can be submitted via
  67 email or the web:
  68
  69   bioperl-bugs@bioperl.org
  70   http://redmine.open-bio.org/projects/bioperl/
  71
  72 =head1 AUTHOR - Hilmar Lapp
  73
  74 Email hlapp at gmx.net
  75
  76 =head1 CONTRIBUTORS
  77
  78 Additional contributors names and emails here
  79
  80 =head1 APPENDIX
  81
  82 The rest of the documentation details each of the object methods.
  83 Internal methods are usually preceded with a _
  84
  85 =cut
  86
  87
  88 # Let the code begin...
  89
  90
  91 package Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver;
  92 use vars qw(@ISA);
  93 use strict;
  94
  95 # Object preamble - inherits from BasePersistenceAdaptorDriver
  96
  97 use Bio::DB::BioSQL::mysql::BasePersistenceAdaptorDriver;
  98
  99 @ISA = qw(Bio::DB::BioSQL::mysql::BasePersistenceAdaptorDriver);
 100
 101
 102 =head2 new
 103
 104  Title   : new
 105  Usage   : my $obj = Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver->new();
 106  Function: Builds a new Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver object
 107  Returns : an instance of Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver
 108  Args    :
 109
 110
 111 =cut
 112
 113 sub new {
 114     my($class,@args) = @_;
 115
 116     my $self = $class->SUPER::new(@args);
 117
 118     return $self;
 119 }
 120
 121 =head2 prepare_findbypk_sth
 122
 123  Title   : prepare_findbypk_sth
 124  Usage   :
 125  Function: Prepares and returns a DBI statement handle with one placeholder for
 126            the primary key. The statement is expected to return the primary key
 127            as the first and then as many columns as
 128            $adp->get_persistent_slots() returns, and in that order.
 129
 130  Example :
 131  Returns : A DBI prepared statement handle with one placeholder
 132  Args    : The Bio::DB::BioSQL::BasePersistenceAdaptor derived object
 133            (basically, it needs to implement dbh() and get_persistent_slots()).
 134            A reference to an array of foreign key slots (class names).
 135            The name class for the taxon name table (default is
 136            'scientific name').
 137
 138
 139 =cut
 140
 141 sub prepare_findbypk_sth{
 142     my ($self,$adp,$fkslots,$nameclass) = @_;
 143
 144     # defaults
 145     $nameclass = "scientific name" unless $nameclass;
 146     # get table name and the primary key name
 147     my $table = $self->table_name($adp);
 148     my $node_table = $self->table_name("TaxonNode");
 149     my $pkname = $self->primary_key_name($table);
 150     my $fkname = $self->foreign_key_name("TaxonNode");
 151     my $slotmap = $self->slot_attribute_map($table);
 152     # gather attributes
 153     my @attrs = $self->_build_select_list($adp,$fkslots);
 154     # create the sql statement
 155     my $sql = "SELECT " .
 156         join(", ", @attrs) .
 157         " FROM $node_table, $table".
 158         " WHERE".
 159         " $node_table.$pkname = $table.$fkname".
 160         " AND $table.".$slotmap->{"name_class"}." = '$nameclass'".
 161         " AND $node_table.$pkname = ?";
 162     $adp->debug("preparing PK select statement: $sql\n");
 163     # prepare statement and return
 164     return $adp->dbh()->prepare($sql);
 165 }
 166
 167 =head2 prepare_findbyuk_sth
 168
 169  Title   : prepare_findbyuk_sth
 170  Usage   :
 171  Function: Prepares and returns a DBI SELECT statement handle with as many
 172            placeholders as necessary for the given unique key.
 173
 174            The statement is expected to return the primary key as the first and
 175            then as many columns as $adp->get_persistent_slots() returns, and in
 176            that order.
 177  Example :
 178  Returns : A DBI prepared statement handle with as many placeholders as
 179            necessary for the given unique key
 180  Args    : The calling Bio::DB::BioSQL::BasePersistenceAdaptor derived object
 181            (basically, it needs to implement dbh() and get_persistent_slots()).
 182            A reference to a hash with the names of the object''s slots in the
 183            unique key as keys and their values as values.
 184            A reference to an array of foreign key objects or slots
 185            (class names if slot).
 186
 187
 188 =cut
 189
 190 sub prepare_findbyuk_sth{
 191     my ($self,$adp,$ukval_h,$fkslots) = @_;
 192
 193     # get the slot/attribute map
 194     my $table = $self->table_name($adp);
 195     my $node_table = $self->table_name("TaxonNode");
 196     my $pkname = $self->primary_key_name($node_table);
 197     my $fkname = $self->foreign_key_name("TaxonNode");
 198     my $slotmap = $self->slot_attribute_map($table);
 199     # SELECT columns
 200     my @attrs = $self->_build_select_list($adp,$fkslots);
 201     # WHERE clause constraints
 202     my @cattrs = ();
 203     foreach (sort keys %$ukval_h) {
 204         my $col;
 205         if(exists($slotmap->{$_})) {
 206             $col = $slotmap->{$_};
 207         }
 208         push(@cattrs, $col || "NULL");
 209         $self->warn("slot $_ is in unique key, but can't be mapped to ".
 210                     "an entity column: you won't find anything")
 211             unless $col;
 212     }
 213     # create the sql statement
 214     my $sql = "SELECT " . join(", ", @attrs) .
 215         " FROM $node_table, $table".
 216         " WHERE $node_table.$pkname = $table.$fkname AND ".
 217         join(" AND ", map { "$_ = ?"; } @cattrs);
 218     $adp->debug("preparing UK select statement: $sql\n");
 219     # prepare statement and return
 220     return $adp->dbh()->prepare($sql);
 221 }
 222
 223 =head2 prepare_delete_sth
 224
 225  Title   : prepare_delete_sth
 226  Usage   :
 227  Function: Creates a prepared statement with one placeholder variable suitable
 228            to delete one row from the respective table the given class maps to.
 229
 230            We override this here in order to delete from the taxon
 231            node table, not the taxon name table. The node table will
 232            cascade to the name table.
 233
 234  Example :
 235  Returns : A DBI statement handle for a prepared statement with one placeholder
 236  Args    : The calling adaptor (basically, it needs to implement dbh()).
 237            Optionally, additional arguments.
 238
 239
 240 =cut
 241
 242 sub prepare_delete_sth{
 243     my ($self, $adp) = @_;
 244
 245     # default is a simple DELETE statement
 246     #
 247     # we need the table name and the name of the primary key
 248     my $tbl = $self->table_name("TaxonNode");
 249     my $pkname = $self->primary_key_name($tbl);
 250     # straightforward SQL:
 251     my $sql = "DELETE FROM $tbl WHERE $pkname = ?";
 252     $adp->debug("preparing DELETE statement: $sql\n");
 253     my $sth = $adp->dbh()->prepare($sql);
 254     # done
 255     return $sth;
 256 }
 257
 258 =head2 insert_object
 259
 260  Title   : insert_object
 261  Usage   :
 262  Function:
 263  Example :
 264  Returns : The primary key of the newly inserted record.
 265  Args    : A Bio::DB::BioSQL::BasePersistenceAdaptor derived object
 266            (basically, it needs to implement dbh(), sth($key, $sth),
 267             dbcontext(), and get_persistent_slots()).
 268            The object to be inserted.
 269            A reference to an array of foreign key objects; if any of those
 270            foreign key values is NULL (some foreign keys may be nullable),
 271            then give the class name.
 272
 273
 274 =cut
 275
 276 sub insert_object{
 277     my ($self,$adp,$obj,$fkobjs) = @_;
 278
 279     # get the INSERT statements: we need one for the taxon node and one for
 280     # the taxon name table
 281     my $cache_key_t = 'INSERT taxon '.ref($obj);
 282     my $cache_key_tn = 'INSERT taxname '.ref($obj);
 283     my $sth_t = $adp->sth($cache_key_t);
 284     my $sth_tn = $adp->sth($cache_key_tn);
 285     my $sth_max = $adp->sth("SELECT MAX TAXON SETID");
 286     # we need the slot map regardless of whether we need to construct the
 287     # SQL or not, because we need to know which slots do not map to a column
 288     # (indicated by them being mapped to undef)
 289     my $table = $self->table_name($adp);
 290     my $node_table = $self->table_name("TaxonNode");
 291     my $fkname = $self->foreign_key_name("TaxonNode");
 292     my $slotmap = $self->slot_attribute_map($table);
 293     $self->throw("no slot/attribute map for table $table") unless $slotmap;
 294     # we'll need the db handle in any case
 295     my $dbh = $adp->dbh();
 296     # if not cached, create SQL and prepare statement
 297     if(! $sth_tn) {
 298         # Prepare the taxon insert statement first. There is really not a
 299         # lot room for being generic here. Also, I'm afraid we need to mandate
 300         # that there is a column mapping to ncbi_taxid, parent, and node rank.
 301         my $sql = "INSERT INTO $node_table (".
 302             join(", ",($slotmap->{"parent_taxon"},
 303                        $slotmap->{"ncbi_taxid"},
 304                        $slotmap->{"node_rank"},
 305                        "left_value","right_value")).
 306             ") VALUES (?, ?, ?, ?, ?)";
 307         $adp->debug("preparing INSERT taxon: $sql\n");
 308         $sth_t = $dbh->prepare($sql);
 309         $adp->sth($cache_key_t, $sth_t);
 310         # now prepare the taxon_name insert statement
 311         my @attrs = ($fkname,
 312                      $slotmap->{"binomial"},
 313                      $slotmap->{"name_class"});
 314         $sql = "INSERT INTO " . $table . " (" . join(", ", @attrs) .
 315             ") VALUES (?, ?, ?)";
 316         $adp->debug("preparing INSERT taxon_name statement: $sql\n");
 317         $sth_tn = $dbh->prepare($sql);
 318         # and cache
 319         $adp->sth($cache_key_tn, $sth_tn);
 320     }
 321     # prepare the classification tree: we may have subspecies and variant
 322     my @clf = map { [$_,undef]; } $obj->classification();
 323     # the only thing that's hopefully relatively reliable is that species
 324     # and genus are the first two elements
 325     $clf[0]->[1] = "species";
 326     $clf[1]->[1] = "genus";
 327     # also, convention is species to equal the binomial, not just first name
 328     $clf[0]->[0] = $obj->binomial();
 329     # sub-species and variant are to be prepended, also as full names
 330     if($obj->sub_species) {
 331         unshift(@clf, [$obj->binomial() ." ". $obj->sub_species(),
 332                        "subspecies"]);
 333     }
 334     if($obj->variant) {
 335         # note that this is not guaranteed to be the "varietas" rank: it
 336         # might also be a strain for instance
 337         unshift(@clf, [$obj->binomial() ." ". $obj->variant(), "no rank"]);
 338     }
 339     # the most specific rank gets the NCBI taxon ID assigned (if provided)
 340     my $taxid_rank = $clf[0]->[1];
 341     # reverse the whole thing before proceeding (Bio::Species stores the
 342     # classification array in reverse order)
 343     @clf = reverse(@clf);
 344     # to avoid unique key clashes, we need to know the largest existing
 345     # number
 346     my $sth = $dbh->prepare("SELECT max(right_value) FROM $node_table");
 347     $sth->execute() || return undef;
 348     my ($maxsetid) = $sth->fetchrow_array() || (0);
 349     my $setid = $maxsetid+1;
 350     # for each element in the array store node and name
 351     my ($pk,$rv);
 352     foreach my $node (@clf) {
 353         # set ncbi taxon id
 354         my $ncbi_taxid = defined($node->[1]) && ($node->[1] eq $taxid_rank) ?
 355             $obj->ncbi_taxid : undef;
 356         # log and insert
 357         if($adp->verbose > 0) {
 358             $adp->debug(substr(ref($adp),rindex(ref($adp),"::")+2).
 359                         "::insert: ".
 360                         "binding columns 1;2;3;4;5 to \"",
 361                         join(";",
 362                              $pk || "<NULL>",
 363                              $ncbi_taxid || "<NULL>", $node->[1] || "<NULL>",
 364                              $setid,
 365                              2*($maxsetid+scalar(@clf))-$setid+1),
 366                         "\" (parent_taxon,ncbi_taxid,node_rank,left,right)\n");
 367         }
 368         $rv = $sth_t->execute($pk,$ncbi_taxid,$node->[1],
 369                               $setid, 2*($maxsetid+scalar(@clf))-$setid+1);
 370         $setid++;
 371         last unless $rv;
 372         # we need the newly assigned primary key
 373         $pk = $adp->dbcontext->dbi->last_id_value($dbh,
 374                                             $self->sequence_name($node_table));
 375         # now insert name of node into the taxon name table
 376         if($adp->verbose > 0) {
 377             $adp->debug(substr(ref($adp),rindex(ref($adp),"::")+2).
 378                         "::insert: ".
 379                         "binding columns 1;2;3 to \"",
 380                         join(";",$pk,$node->[0],"scientific name"),
 381                         "\" ($fkname, name, name_class)\n");
 382         }
 383         $rv = $sth_tn->execute($pk, $node->[0], "scientific name");
 384         last unless $rv;
 385     }
 386     # upon exit the value of $pk is the primary key for the node that got
 387     # the NCBI taxon ID assigned - which is exactly what we need as the
 388     # foreign key of the species for subsequent reference
 389
 390     # if defined insert common_name into the taxon name table
 391     if($rv && $obj->common_name) {
 392         if($adp->verbose > 0) {
 393             $adp->debug(substr(ref($adp),rindex(ref($adp),"::")+2).
 394                         "::insert: ".
 395                         "binding columns 1;2;3 to \"",
 396                         join(";",$pk,$obj->common_name,"common name"),
 397                         "\" ($fkname, name, name_class)\n");
 398         }
 399         $rv = $sth_tn->execute($pk, $obj->common_name(), "common name");
 400     }
 401     # done, return
 402     return $rv ? $pk : undef;
 403 }
 404
 405 =head2 update_object
 406
 407  Title   : update_object
 408  Usage   :
 409  Function:
 410  Example :
 411  Returns : The number of updated rows
 412  Args    : A Bio::DB::BioSQL::BasePersistenceAdaptor derived object
 413            (basically, it needs to implement dbh(), sth($key, $sth),
 414             dbcontext(), and get_persistent_slots()).
 415            The object to be updated.
 416            A reference to an array of foreign key objects; if any of those
 417            foreign key values is NULL (some foreign keys may be nullable),
 418            then give the class name.
 419
 420
 421 =cut
 422
 423 sub update_object{
 424     my ($self,$adp,$obj,$fkobjs) = @_;
 425
 426     $self->throw_not_implemented();
 427
 428 }
 429
 430 =head2 _build_select_list
 431
 432  Title   : _build_select_list
 433  Usage   :
 434  Function: Builds and returns the select list for an object query. The list
 435            contains those columns, in the right order, that are necessary to
 436            populate the object.
 437  Example :
 438  Returns : An array of strings (column names, not prefixed)
 439  Args    : The calling persistence adaptor.
 440            A reference to an array of foreign key entities (objects, class
 441            names, or adaptors) the object must attach.
 442            A reference to a hash table mapping entity names to aliases (if
 443            omitted, aliases will not be used, and SELECT columns can only be
 444            from one table)
 445
 446
 447 =cut
 448
 449 sub _build_select_list{
 450     my ($self,$adp,$fkobjs,$entitymap) = @_;
 451
 452     my @attrs = $self->SUPER::_build_select_list($adp,$fkobjs,$entitymap);
 453     # we need to massage the attribute list ...
 454     for(my $i = 0; $i < @attrs; $i++) {
 455         if($attrs[$i] =~ /ncbi_taxon_id/i) {
 456             my $name_table = $self->table_name("Bio::Species");
 457             my $node_table = $self->table_name("TaxonNode");
 458             $attrs[$i] =~ s/$name_table/$node_table/;
 459         }
 460     }
 461     return @attrs;
 462 }
 463
 464 =head2 get_classification
 465
 466  Title   : get_classification
 467  Usage   :
 468  Function: Returns the classification array for a taxon as identified by
 469            its primary key.
 470  Example :
 471  Returns : a reference to an array of two-element arrays, where the first
 472            element contains the name of the node and the second element
 473            denotes its rank
 474  Args    : the calling adaptor, the primary key of the taxon
 475
 476
 477 =cut
 478
 479 sub get_classification{
 480     my ($self,$adp,$pk) = @_;
 481     my @clf = ();
 482
 483     # try to obtain statement handle from cache
 484     my $cache_key = "SELECT taxon classification";
 485     my $sth = $adp->sth($cache_key);
 486     if(! $sth) {
 487         # we need to build this one
 488
 489         # get table names, primary and foreign key names, slot/attribute map
 490         my $name_table = $self->table_name($adp);
 491         my $node_table = $self->table_name("TaxonNode");
 492         my $pkname = $self->primary_key_name($node_table);
 493         my $fkname = $self->foreign_key_name("TaxonNode");
 494         my $slotmap = $self->slot_attribute_map($name_table);
 495         # we set up the sql without any fancy:
 496         my $sql =
 497             "SELECT name.".$slotmap->{"binomial"}.
 498             ", node.".$slotmap->{"node_rank"}.
 499             " FROM $node_table node, $node_table taxon, $name_table name".
 500             " WHERE name.$fkname = node.$pkname AND".
 501             " taxon.left_value >= node.left_value AND taxon.left_value <= node.right_value".
 502             " AND taxon.$pkname = ?".
 503             " AND name.".$slotmap->{"name_class"}." = 'scientific name'".
 504             " ORDER BY node.left_value";
 505         $adp->debug("prepare SELECT CLASSIFICATION: $sql\n");
 506         # prepare the query
 507         $sth = $adp->dbh->prepare($sql);
 508         # and cache it
 509         $adp->sth($cache_key, $sth);
 510     }
 511     # execute with the given primary key
 512     my $rv = $sth->execute($pk);
 513     if($rv) {
 514         while(my $row = $sth->fetchrow_arrayref()) {
 515             push(@clf, [@$row]);
 516         }
 517     }
 518     return \@clf;
 519 }
 520
 521 =head2 get_common_name
 522
 523  Title   : get_common_name
 524  Usage   :
 525  Function: Get the common name for a taxon as identified by its primary
 526            key.
 527  Example :
 528  Returns : a string denoting the common name
 529  Args    : the calling adaptor, and the primary key of the taxon
 530
 531
 532 =cut
 533
 534 sub get_common_name{
 535     my ($self,$adp,$pk) = @_;
 536
 537     # statement cached?
 538     my $cache_key = "SELECT COMMON_NAME ".ref($adp);
 539     my $sth = $adp->sth($cache_key);
 540     # if not cached we have to build it
 541     if(! $sth) {
 542         # get table names, primary and foreign key names, slot/attribute map
 543         my $name_table = $self->table_name($adp);
 544         my $fkname = $self->foreign_key_name("TaxonNode");
 545         my $slotmap = $self->slot_attribute_map($name_table);
 546         # prepare sql
 547         my $sql =
 548             "SELECT $name_table.".$slotmap->{"binomial"}.
 549             " FROM $name_table".
 550             " WHERE $name_table.$fkname = ?".
 551             " AND $name_table.".$slotmap->{"name_class"}." = 'common_name'";
 552         $adp->debug("preparing SELECT COMMON_NAME: ",$sql,"\n");
 553         $sth = $adp->dbh->prepare($sql);
 554         # and cache
 555         $adp->sth($cache_key, $sth);
 556     }
 557     my $rv = $sth->execute($pk);
 558     my $cname;
 559     if($rv) {
 560         while(my $row = $sth->fetchrow_arrayref()) {
 561             # the last one overwrites
 562             $cname = $row->[0];
 563         }
 564     }
 565     return $cname;
 566 }
 567
 568 1;