3 # BioPerl module for Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Hilmar Lapp <hlapp at gmx.net>
9 # Copyright Hilmar Lapp
11 # You may distribute this module under the same terms as perl itself
14 # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
15 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
17 # You may distribute this module under the same terms as perl itself.
18 # Refer to the Perl Artistic License (see the license accompanying this
19 # software package, or see http://www.perl.com/language/misc/Artistic.html)
20 # for the terms under which you may use, modify, and redistribute this module.
22 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
23 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
24 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27 # POD documentation - main docs before the code
31 Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver - DESCRIPTION of Object
35 Give standard usage here
39 Describe the object here
45 User feedback is an integral part of the evolution of this and other
46 Bioperl modules. Send your comments and suggestions preferably to
47 the Bioperl mailing list. Your participation is much appreciated.
49 bioperl-l@bioperl.org - General discussion
50 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
54 Please direct usage questions or support issues to the mailing list:
56 I<bioperl-l@bioperl.org>
58 rather than to the module maintainer directly. Many experienced and
59 reponsive experts will be able look at the problem and quickly
60 address it. Please include a thorough description of the problem
61 with code and data examples if at all possible.
65 Report bugs to the Bioperl bug tracking system to help us keep track
66 of the bugs and their resolution. Bug reports can be submitted via
69 bioperl-bugs@bioperl.org
70 http://redmine.open-bio.org/projects/bioperl/
72 =head1 AUTHOR - Hilmar Lapp
74 Email hlapp at gmx.net
78 Additional contributors names and emails here
82 The rest of the documentation details each of the object methods.
83 Internal methods are usually preceded with a _
88 # Let the code begin...
91 package Bio
::DB
::BioSQL
::mysql
::SpeciesAdaptorDriver
;
95 # Object preamble - inherits from BasePersistenceAdaptorDriver
97 use Bio::DB::BioSQL::mysql::BasePersistenceAdaptorDriver;
99 @ISA = qw(Bio::DB::BioSQL::mysql::BasePersistenceAdaptorDriver);
105 Usage : my $obj = Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver->new();
106 Function: Builds a new Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver object
107 Returns : an instance of Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver
114 my($class,@args) = @_;
116 my $self = $class->SUPER::new
(@args);
121 =head2 prepare_findbypk_sth
123 Title : prepare_findbypk_sth
125 Function: Prepares and returns a DBI statement handle with one placeholder for
126 the primary key. The statement is expected to return the primary key
127 as the first and then as many columns as
128 $adp->get_persistent_slots() returns, and in that order.
131 Returns : A DBI prepared statement handle with one placeholder
132 Args : The Bio::DB::BioSQL::BasePersistenceAdaptor derived object
133 (basically, it needs to implement dbh() and get_persistent_slots()).
134 A reference to an array of foreign key slots (class names).
135 The name class for the taxon name table (default is
141 sub prepare_findbypk_sth
{
142 my ($self,$adp,$fkslots,$nameclass) = @_;
145 $nameclass = "scientific name" unless $nameclass;
146 # get table name and the primary key name
147 my $table = $self->table_name($adp);
148 my $node_table = $self->table_name("TaxonNode");
149 my $pkname = $self->primary_key_name($table);
150 my $fkname = $self->foreign_key_name("TaxonNode");
151 my $slotmap = $self->slot_attribute_map($table);
153 my @attrs = $self->_build_select_list($adp,$fkslots);
154 # create the sql statement
155 my $sql = "SELECT " .
157 " FROM $node_table, $table".
159 " $node_table.$pkname = $table.$fkname".
160 " AND $table.".$slotmap->{"name_class"}." = '$nameclass'".
161 " AND $node_table.$pkname = ?";
162 $adp->debug("preparing PK select statement: $sql\n");
163 # prepare statement and return
164 return $adp->dbh()->prepare($sql);
167 =head2 prepare_findbyuk_sth
169 Title : prepare_findbyuk_sth
171 Function: Prepares and returns a DBI SELECT statement handle with as many
172 placeholders as necessary for the given unique key.
174 The statement is expected to return the primary key as the first and
175 then as many columns as $adp->get_persistent_slots() returns, and in
178 Returns : A DBI prepared statement handle with as many placeholders as
179 necessary for the given unique key
180 Args : The calling Bio::DB::BioSQL::BasePersistenceAdaptor derived object
181 (basically, it needs to implement dbh() and get_persistent_slots()).
182 A reference to a hash with the names of the object''s slots in the
183 unique key as keys and their values as values.
184 A reference to an array of foreign key objects or slots
185 (class names if slot).
190 sub prepare_findbyuk_sth
{
191 my ($self,$adp,$ukval_h,$fkslots) = @_;
193 # get the slot/attribute map
194 my $table = $self->table_name($adp);
195 my $node_table = $self->table_name("TaxonNode");
196 my $pkname = $self->primary_key_name($node_table);
197 my $fkname = $self->foreign_key_name("TaxonNode");
198 my $slotmap = $self->slot_attribute_map($table);
200 my @attrs = $self->_build_select_list($adp,$fkslots);
201 # WHERE clause constraints
203 foreach (sort keys %$ukval_h) {
205 if(exists($slotmap->{$_})) {
206 $col = $slotmap->{$_};
208 push(@cattrs, $col || "NULL");
209 $self->warn("slot $_ is in unique key, but can't be mapped to ".
210 "an entity column: you won't find anything")
213 # create the sql statement
214 my $sql = "SELECT " . join(", ", @attrs) .
215 " FROM $node_table, $table".
216 " WHERE $node_table.$pkname = $table.$fkname AND ".
217 join(" AND ", map { "$_ = ?"; } @cattrs);
218 $adp->debug("preparing UK select statement: $sql\n");
219 # prepare statement and return
220 return $adp->dbh()->prepare($sql);
223 =head2 prepare_delete_sth
225 Title : prepare_delete_sth
227 Function: Creates a prepared statement with one placeholder variable suitable
228 to delete one row from the respective table the given class maps to.
230 We override this here in order to delete from the taxon
231 node table, not the taxon name table. The node table will
232 cascade to the name table.
235 Returns : A DBI statement handle for a prepared statement with one placeholder
236 Args : The calling adaptor (basically, it needs to implement dbh()).
237 Optionally, additional arguments.
242 sub prepare_delete_sth
{
243 my ($self, $adp) = @_;
245 # default is a simple DELETE statement
247 # we need the table name and the name of the primary key
248 my $tbl = $self->table_name("TaxonNode");
249 my $pkname = $self->primary_key_name($tbl);
250 # straightforward SQL:
251 my $sql = "DELETE FROM $tbl WHERE $pkname = ?";
252 $adp->debug("preparing DELETE statement: $sql\n");
253 my $sth = $adp->dbh()->prepare($sql);
260 Title : insert_object
264 Returns : The primary key of the newly inserted record.
265 Args : A Bio::DB::BioSQL::BasePersistenceAdaptor derived object
266 (basically, it needs to implement dbh(), sth($key, $sth),
267 dbcontext(), and get_persistent_slots()).
268 The object to be inserted.
269 A reference to an array of foreign key objects; if any of those
270 foreign key values is NULL (some foreign keys may be nullable),
271 then give the class name.
277 my ($self,$adp,$obj,$fkobjs) = @_;
279 # get the INSERT statements: we need one for the taxon node and one for
280 # the taxon name table
281 my $cache_key_t = 'INSERT taxon '.ref($obj);
282 my $cache_key_tn = 'INSERT taxname '.ref($obj);
283 my $sth_t = $adp->sth($cache_key_t);
284 my $sth_tn = $adp->sth($cache_key_tn);
285 my $sth_max = $adp->sth("SELECT MAX TAXON SETID");
286 # we need the slot map regardless of whether we need to construct the
287 # SQL or not, because we need to know which slots do not map to a column
288 # (indicated by them being mapped to undef)
289 my $table = $self->table_name($adp);
290 my $node_table = $self->table_name("TaxonNode");
291 my $fkname = $self->foreign_key_name("TaxonNode");
292 my $slotmap = $self->slot_attribute_map($table);
293 $self->throw("no slot/attribute map for table $table") unless $slotmap;
294 # we'll need the db handle in any case
295 my $dbh = $adp->dbh();
296 # if not cached, create SQL and prepare statement
298 # Prepare the taxon insert statement first. There is really not a
299 # lot room for being generic here. Also, I'm afraid we need to mandate
300 # that there is a column mapping to ncbi_taxid, parent, and node rank.
301 my $sql = "INSERT INTO $node_table (".
302 join(", ",($slotmap->{"parent_taxon"},
303 $slotmap->{"ncbi_taxid"},
304 $slotmap->{"node_rank"},
305 "left_value","right_value")).
306 ") VALUES (?, ?, ?, ?, ?)";
307 $adp->debug("preparing INSERT taxon: $sql\n");
308 $sth_t = $dbh->prepare($sql);
309 $adp->sth($cache_key_t, $sth_t);
310 # now prepare the taxon_name insert statement
311 my @attrs = ($fkname,
312 $slotmap->{"binomial"},
313 $slotmap->{"name_class"});
314 $sql = "INSERT INTO " . $table . " (" . join(", ", @attrs) .
315 ") VALUES (?, ?, ?)";
316 $adp->debug("preparing INSERT taxon_name statement: $sql\n");
317 $sth_tn = $dbh->prepare($sql);
319 $adp->sth($cache_key_tn, $sth_tn);
321 # prepare the classification tree: we may have subspecies and variant
322 my @clf = map { [$_,undef]; } $obj->classification();
323 # the only thing that's hopefully relatively reliable is that species
324 # and genus are the first two elements
325 $clf[0]->[1] = "species";
326 $clf[1]->[1] = "genus";
327 # also, convention is species to equal the binomial, not just first name
328 $clf[0]->[0] = $obj->binomial();
329 # sub-species and variant are to be prepended, also as full names
330 if($obj->sub_species) {
331 unshift(@clf, [$obj->binomial() ." ". $obj->sub_species(),
335 # note that this is not guaranteed to be the "varietas" rank: it
336 # might also be a strain for instance
337 unshift(@clf, [$obj->binomial() ." ". $obj->variant(), "no rank"]);
339 # the most specific rank gets the NCBI taxon ID assigned (if provided)
340 my $taxid_rank = $clf[0]->[1];
341 # reverse the whole thing before proceeding (Bio::Species stores the
342 # classification array in reverse order)
343 @clf = reverse(@clf);
344 # to avoid unique key clashes, we need to know the largest existing
346 my $sth = $dbh->prepare("SELECT max(right_value) FROM $node_table");
347 $sth->execute() || return undef;
348 my ($maxsetid) = $sth->fetchrow_array() || (0);
349 my $setid = $maxsetid+1;
350 # for each element in the array store node and name
352 foreach my $node (@clf) {
354 my $ncbi_taxid = defined($node->[1]) && ($node->[1] eq $taxid_rank) ?
355 $obj->ncbi_taxid : undef;
357 if($adp->verbose > 0) {
358 $adp->debug(substr(ref($adp),rindex(ref($adp),"::")+2).
360 "binding columns 1;2;3;4;5 to \"",
363 $ncbi_taxid || "<NULL>", $node->[1] || "<NULL>",
365 2*($maxsetid+scalar(@clf))-$setid+1),
366 "\" (parent_taxon,ncbi_taxid,node_rank,left,right)\n");
368 $rv = $sth_t->execute($pk,$ncbi_taxid,$node->[1],
369 $setid, 2*($maxsetid+scalar(@clf))-$setid+1);
372 # we need the newly assigned primary key
373 $pk = $adp->dbcontext->dbi->last_id_value($dbh,
374 $self->sequence_name($node_table));
375 # now insert name of node into the taxon name table
376 if($adp->verbose > 0) {
377 $adp->debug(substr(ref($adp),rindex(ref($adp),"::")+2).
379 "binding columns 1;2;3 to \"",
380 join(";",$pk,$node->[0],"scientific name"),
381 "\" ($fkname, name, name_class)\n");
383 $rv = $sth_tn->execute($pk, $node->[0], "scientific name");
386 # upon exit the value of $pk is the primary key for the node that got
387 # the NCBI taxon ID assigned - which is exactly what we need as the
388 # foreign key of the species for subsequent reference
390 # if defined insert common_name into the taxon name table
391 if($rv && $obj->common_name) {
392 if($adp->verbose > 0) {
393 $adp->debug(substr(ref($adp),rindex(ref($adp),"::")+2).
395 "binding columns 1;2;3 to \"",
396 join(";",$pk,$obj->common_name,"common name"),
397 "\" ($fkname, name, name_class)\n");
399 $rv = $sth_tn->execute($pk, $obj->common_name(), "common name");
402 return $rv ?
$pk : undef;
407 Title : update_object
411 Returns : The number of updated rows
412 Args : A Bio::DB::BioSQL::BasePersistenceAdaptor derived object
413 (basically, it needs to implement dbh(), sth($key, $sth),
414 dbcontext(), and get_persistent_slots()).
415 The object to be updated.
416 A reference to an array of foreign key objects; if any of those
417 foreign key values is NULL (some foreign keys may be nullable),
418 then give the class name.
424 my ($self,$adp,$obj,$fkobjs) = @_;
426 $self->throw_not_implemented();
430 =head2 _build_select_list
432 Title : _build_select_list
434 Function: Builds and returns the select list for an object query. The list
435 contains those columns, in the right order, that are necessary to
438 Returns : An array of strings (column names, not prefixed)
439 Args : The calling persistence adaptor.
440 A reference to an array of foreign key entities (objects, class
441 names, or adaptors) the object must attach.
442 A reference to a hash table mapping entity names to aliases (if
443 omitted, aliases will not be used, and SELECT columns can only be
449 sub _build_select_list
{
450 my ($self,$adp,$fkobjs,$entitymap) = @_;
452 my @attrs = $self->SUPER::_build_select_list
($adp,$fkobjs,$entitymap);
453 # we need to massage the attribute list ...
454 for(my $i = 0; $i < @attrs; $i++) {
455 if($attrs[$i] =~ /ncbi_taxon_id/i) {
456 my $name_table = $self->table_name("Bio::Species");
457 my $node_table = $self->table_name("TaxonNode");
458 $attrs[$i] =~ s/$name_table/$node_table/;
464 =head2 get_classification
466 Title : get_classification
468 Function: Returns the classification array for a taxon as identified by
471 Returns : a reference to an array of two-element arrays, where the first
472 element contains the name of the node and the second element
474 Args : the calling adaptor, the primary key of the taxon
479 sub get_classification
{
480 my ($self,$adp,$pk) = @_;
483 # try to obtain statement handle from cache
484 my $cache_key = "SELECT taxon classification";
485 my $sth = $adp->sth($cache_key);
487 # we need to build this one
489 # get table names, primary and foreign key names, slot/attribute map
490 my $name_table = $self->table_name($adp);
491 my $node_table = $self->table_name("TaxonNode");
492 my $pkname = $self->primary_key_name($node_table);
493 my $fkname = $self->foreign_key_name("TaxonNode");
494 my $slotmap = $self->slot_attribute_map($name_table);
495 # we set up the sql without any fancy:
497 "SELECT name.".$slotmap->{"binomial"}.
498 ", node.".$slotmap->{"node_rank"}.
499 " FROM $node_table node, $node_table taxon, $name_table name".
500 " WHERE name.$fkname = node.$pkname AND".
501 " taxon.left_value >= node.left_value AND taxon.left_value <= node.right_value".
502 " AND taxon.$pkname = ?".
503 " AND name.".$slotmap->{"name_class"}." = 'scientific name'".
504 " ORDER BY node.left_value";
505 $adp->debug("prepare SELECT CLASSIFICATION: $sql\n");
507 $sth = $adp->dbh->prepare($sql);
509 $adp->sth($cache_key, $sth);
511 # execute with the given primary key
512 my $rv = $sth->execute($pk);
514 while(my $row = $sth->fetchrow_arrayref()) {
521 =head2 get_common_name
523 Title : get_common_name
525 Function: Get the common name for a taxon as identified by its primary
528 Returns : a string denoting the common name
529 Args : the calling adaptor, and the primary key of the taxon
535 my ($self,$adp,$pk) = @_;
538 my $cache_key = "SELECT COMMON_NAME ".ref($adp);
539 my $sth = $adp->sth($cache_key);
540 # if not cached we have to build it
542 # get table names, primary and foreign key names, slot/attribute map
543 my $name_table = $self->table_name($adp);
544 my $fkname = $self->foreign_key_name("TaxonNode");
545 my $slotmap = $self->slot_attribute_map($name_table);
548 "SELECT $name_table.".$slotmap->{"binomial"}.
550 " WHERE $name_table.$fkname = ?".
551 " AND $name_table.".$slotmap->{"name_class"}." = 'common_name'";
552 $adp->debug("preparing SELECT COMMON_NAME: ",$sql,"\n");
553 $sth = $adp->dbh->prepare($sql);
555 $adp->sth($cache_key, $sth);
557 my $rv = $sth->execute($pk);
560 while(my $row = $sth->fetchrow_arrayref()) {
561 # the last one overwrites