_find_by_unique_key - bugfix
[bioperl-db.git] / lib / Bio / DB / BioSQL / mysql / SpeciesAdaptorDriver.pm
blobb0559ac23131a8d559dcd583084633b59bd6412b
1 # $Id$
3 # BioPerl module for Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Hilmar Lapp <hlapp at gmx.net>
9 # Copyright Hilmar Lapp
11 # You may distribute this module under the same terms as perl itself
14 # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
15 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
17 # You may distribute this module under the same terms as perl itself.
18 # Refer to the Perl Artistic License (see the license accompanying this
19 # software package, or see http://www.perl.com/language/misc/Artistic.html)
20 # for the terms under which you may use, modify, and redistribute this module.
22 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
23 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
24 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27 # POD documentation - main docs before the code
29 =head1 NAME
31 Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver - DESCRIPTION of Object
33 =head1 SYNOPSIS
35 Give standard usage here
37 =head1 DESCRIPTION
39 Describe the object here
41 =head1 FEEDBACK
43 =head2 Mailing Lists
45 User feedback is an integral part of the evolution of this and other
46 Bioperl modules. Send your comments and suggestions preferably to
47 the Bioperl mailing list. Your participation is much appreciated.
49 bioperl-l@bioperl.org - General discussion
50 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52 =head2 Support
54 Please direct usage questions or support issues to the mailing list:
56 I<bioperl-l@bioperl.org>
58 rather than to the module maintainer directly. Many experienced and
59 reponsive experts will be able look at the problem and quickly
60 address it. Please include a thorough description of the problem
61 with code and data examples if at all possible.
63 =head2 Reporting Bugs
65 Report bugs to the Bioperl bug tracking system to help us keep track
66 of the bugs and their resolution. Bug reports can be submitted via
67 email or the web:
69 bioperl-bugs@bioperl.org
70 http://redmine.open-bio.org/projects/bioperl/
72 =head1 AUTHOR - Hilmar Lapp
74 Email hlapp at gmx.net
76 =head1 CONTRIBUTORS
78 Additional contributors names and emails here
80 =head1 APPENDIX
82 The rest of the documentation details each of the object methods.
83 Internal methods are usually preceded with a _
85 =cut
88 # Let the code begin...
91 package Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver;
92 use vars qw(@ISA);
93 use strict;
95 # Object preamble - inherits from BasePersistenceAdaptorDriver
97 use Bio::DB::BioSQL::mysql::BasePersistenceAdaptorDriver;
99 @ISA = qw(Bio::DB::BioSQL::mysql::BasePersistenceAdaptorDriver);
102 =head2 new
104 Title : new
105 Usage : my $obj = Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver->new();
106 Function: Builds a new Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver object
107 Returns : an instance of Bio::DB::BioSQL::mysql::SpeciesAdaptorDriver
108 Args :
111 =cut
113 sub new {
114 my($class,@args) = @_;
116 my $self = $class->SUPER::new(@args);
118 return $self;
121 =head2 prepare_findbypk_sth
123 Title : prepare_findbypk_sth
124 Usage :
125 Function: Prepares and returns a DBI statement handle with one placeholder for
126 the primary key. The statement is expected to return the primary key
127 as the first and then as many columns as
128 $adp->get_persistent_slots() returns, and in that order.
130 Example :
131 Returns : A DBI prepared statement handle with one placeholder
132 Args : The Bio::DB::BioSQL::BasePersistenceAdaptor derived object
133 (basically, it needs to implement dbh() and get_persistent_slots()).
134 A reference to an array of foreign key slots (class names).
135 The name class for the taxon name table (default is
136 'scientific name').
139 =cut
141 sub prepare_findbypk_sth{
142 my ($self,$adp,$fkslots,$nameclass) = @_;
144 # defaults
145 $nameclass = "scientific name" unless $nameclass;
146 # get table name and the primary key name
147 my $table = $self->table_name($adp);
148 my $node_table = $self->table_name("TaxonNode");
149 my $pkname = $self->primary_key_name($table);
150 my $fkname = $self->foreign_key_name("TaxonNode");
151 my $slotmap = $self->slot_attribute_map($table);
152 # gather attributes
153 my @attrs = $self->_build_select_list($adp,$fkslots);
154 # create the sql statement
155 my $sql = "SELECT " .
156 join(", ", @attrs) .
157 " FROM $node_table, $table".
158 " WHERE".
159 " $node_table.$pkname = $table.$fkname".
160 " AND $table.".$slotmap->{"name_class"}." = '$nameclass'".
161 " AND $node_table.$pkname = ?";
162 $adp->debug("preparing PK select statement: $sql\n");
163 # prepare statement and return
164 return $adp->dbh()->prepare($sql);
167 =head2 prepare_findbyuk_sth
169 Title : prepare_findbyuk_sth
170 Usage :
171 Function: Prepares and returns a DBI SELECT statement handle with as many
172 placeholders as necessary for the given unique key.
174 The statement is expected to return the primary key as the first and
175 then as many columns as $adp->get_persistent_slots() returns, and in
176 that order.
177 Example :
178 Returns : A DBI prepared statement handle with as many placeholders as
179 necessary for the given unique key
180 Args : The calling Bio::DB::BioSQL::BasePersistenceAdaptor derived object
181 (basically, it needs to implement dbh() and get_persistent_slots()).
182 A reference to a hash with the names of the object''s slots in the
183 unique key as keys and their values as values.
184 A reference to an array of foreign key objects or slots
185 (class names if slot).
188 =cut
190 sub prepare_findbyuk_sth{
191 my ($self,$adp,$ukval_h,$fkslots) = @_;
193 # get the slot/attribute map
194 my $table = $self->table_name($adp);
195 my $node_table = $self->table_name("TaxonNode");
196 my $pkname = $self->primary_key_name($node_table);
197 my $fkname = $self->foreign_key_name("TaxonNode");
198 my $slotmap = $self->slot_attribute_map($table);
199 # SELECT columns
200 my @attrs = $self->_build_select_list($adp,$fkslots);
201 # WHERE clause constraints
202 my @cattrs = ();
203 foreach (sort keys %$ukval_h) {
204 my $col;
205 if(exists($slotmap->{$_})) {
206 $col = $slotmap->{$_};
208 push(@cattrs, $col || "NULL");
209 $self->warn("slot $_ is in unique key, but can't be mapped to ".
210 "an entity column: you won't find anything")
211 unless $col;
213 # create the sql statement
214 my $sql = "SELECT " . join(", ", @attrs) .
215 " FROM $node_table, $table".
216 " WHERE $node_table.$pkname = $table.$fkname AND ".
217 join(" AND ", map { "$_ = ?"; } @cattrs);
218 $adp->debug("preparing UK select statement: $sql\n");
219 # prepare statement and return
220 return $adp->dbh()->prepare($sql);
223 =head2 prepare_delete_sth
225 Title : prepare_delete_sth
226 Usage :
227 Function: Creates a prepared statement with one placeholder variable suitable
228 to delete one row from the respective table the given class maps to.
230 We override this here in order to delete from the taxon
231 node table, not the taxon name table. The node table will
232 cascade to the name table.
234 Example :
235 Returns : A DBI statement handle for a prepared statement with one placeholder
236 Args : The calling adaptor (basically, it needs to implement dbh()).
237 Optionally, additional arguments.
240 =cut
242 sub prepare_delete_sth{
243 my ($self, $adp) = @_;
245 # default is a simple DELETE statement
247 # we need the table name and the name of the primary key
248 my $tbl = $self->table_name("TaxonNode");
249 my $pkname = $self->primary_key_name($tbl);
250 # straightforward SQL:
251 my $sql = "DELETE FROM $tbl WHERE $pkname = ?";
252 $adp->debug("preparing DELETE statement: $sql\n");
253 my $sth = $adp->dbh()->prepare($sql);
254 # done
255 return $sth;
258 =head2 insert_object
260 Title : insert_object
261 Usage :
262 Function:
263 Example :
264 Returns : The primary key of the newly inserted record.
265 Args : A Bio::DB::BioSQL::BasePersistenceAdaptor derived object
266 (basically, it needs to implement dbh(), sth($key, $sth),
267 dbcontext(), and get_persistent_slots()).
268 The object to be inserted.
269 A reference to an array of foreign key objects; if any of those
270 foreign key values is NULL (some foreign keys may be nullable),
271 then give the class name.
274 =cut
276 sub insert_object{
277 my ($self,$adp,$obj,$fkobjs) = @_;
279 # get the INSERT statements: we need one for the taxon node and one for
280 # the taxon name table
281 my $cache_key_t = 'INSERT taxon '.ref($obj);
282 my $cache_key_tn = 'INSERT taxname '.ref($obj);
283 my $sth_t = $adp->sth($cache_key_t);
284 my $sth_tn = $adp->sth($cache_key_tn);
285 my $sth_max = $adp->sth("SELECT MAX TAXON SETID");
286 # we need the slot map regardless of whether we need to construct the
287 # SQL or not, because we need to know which slots do not map to a column
288 # (indicated by them being mapped to undef)
289 my $table = $self->table_name($adp);
290 my $node_table = $self->table_name("TaxonNode");
291 my $fkname = $self->foreign_key_name("TaxonNode");
292 my $slotmap = $self->slot_attribute_map($table);
293 $self->throw("no slot/attribute map for table $table") unless $slotmap;
294 # we'll need the db handle in any case
295 my $dbh = $adp->dbh();
296 # if not cached, create SQL and prepare statement
297 if(! $sth_tn) {
298 # Prepare the taxon insert statement first. There is really not a
299 # lot room for being generic here. Also, I'm afraid we need to mandate
300 # that there is a column mapping to ncbi_taxid, parent, and node rank.
301 my $sql = "INSERT INTO $node_table (".
302 join(", ",($slotmap->{"parent_taxon"},
303 $slotmap->{"ncbi_taxid"},
304 $slotmap->{"node_rank"},
305 "left_value","right_value")).
306 ") VALUES (?, ?, ?, ?, ?)";
307 $adp->debug("preparing INSERT taxon: $sql\n");
308 $sth_t = $dbh->prepare($sql);
309 $adp->sth($cache_key_t, $sth_t);
310 # now prepare the taxon_name insert statement
311 my @attrs = ($fkname,
312 $slotmap->{"binomial"},
313 $slotmap->{"name_class"});
314 $sql = "INSERT INTO " . $table . " (" . join(", ", @attrs) .
315 ") VALUES (?, ?, ?)";
316 $adp->debug("preparing INSERT taxon_name statement: $sql\n");
317 $sth_tn = $dbh->prepare($sql);
318 # and cache
319 $adp->sth($cache_key_tn, $sth_tn);
321 # prepare the classification tree: we may have subspecies and variant
322 my @clf = map { [$_,undef]; } $obj->classification();
323 # the only thing that's hopefully relatively reliable is that species
324 # and genus are the first two elements
325 $clf[0]->[1] = "species";
326 $clf[1]->[1] = "genus";
327 # also, convention is species to equal the binomial, not just first name
328 $clf[0]->[0] = $obj->binomial();
329 # sub-species and variant are to be prepended, also as full names
330 if($obj->sub_species) {
331 unshift(@clf, [$obj->binomial() ." ". $obj->sub_species(),
332 "subspecies"]);
334 if($obj->variant) {
335 # note that this is not guaranteed to be the "varietas" rank: it
336 # might also be a strain for instance
337 unshift(@clf, [$obj->binomial() ." ". $obj->variant(), "no rank"]);
339 # the most specific rank gets the NCBI taxon ID assigned (if provided)
340 my $taxid_rank = $clf[0]->[1];
341 # reverse the whole thing before proceeding (Bio::Species stores the
342 # classification array in reverse order)
343 @clf = reverse(@clf);
344 # to avoid unique key clashes, we need to know the largest existing
345 # number
346 my $sth = $dbh->prepare("SELECT max(right_value) FROM $node_table");
347 $sth->execute() || return undef;
348 my ($maxsetid) = $sth->fetchrow_array() || (0);
349 my $setid = $maxsetid+1;
350 # for each element in the array store node and name
351 my ($pk,$rv);
352 foreach my $node (@clf) {
353 # set ncbi taxon id
354 my $ncbi_taxid = defined($node->[1]) && ($node->[1] eq $taxid_rank) ?
355 $obj->ncbi_taxid : undef;
356 # log and insert
357 if($adp->verbose > 0) {
358 $adp->debug(substr(ref($adp),rindex(ref($adp),"::")+2).
359 "::insert: ".
360 "binding columns 1;2;3;4;5 to \"",
361 join(";",
362 $pk || "<NULL>",
363 $ncbi_taxid || "<NULL>", $node->[1] || "<NULL>",
364 $setid,
365 2*($maxsetid+scalar(@clf))-$setid+1),
366 "\" (parent_taxon,ncbi_taxid,node_rank,left,right)\n");
368 $rv = $sth_t->execute($pk,$ncbi_taxid,$node->[1],
369 $setid, 2*($maxsetid+scalar(@clf))-$setid+1);
370 $setid++;
371 last unless $rv;
372 # we need the newly assigned primary key
373 $pk = $adp->dbcontext->dbi->last_id_value($dbh,
374 $self->sequence_name($node_table));
375 # now insert name of node into the taxon name table
376 if($adp->verbose > 0) {
377 $adp->debug(substr(ref($adp),rindex(ref($adp),"::")+2).
378 "::insert: ".
379 "binding columns 1;2;3 to \"",
380 join(";",$pk,$node->[0],"scientific name"),
381 "\" ($fkname, name, name_class)\n");
383 $rv = $sth_tn->execute($pk, $node->[0], "scientific name");
384 last unless $rv;
386 # upon exit the value of $pk is the primary key for the node that got
387 # the NCBI taxon ID assigned - which is exactly what we need as the
388 # foreign key of the species for subsequent reference
390 # if defined insert common_name into the taxon name table
391 if($rv && $obj->common_name) {
392 if($adp->verbose > 0) {
393 $adp->debug(substr(ref($adp),rindex(ref($adp),"::")+2).
394 "::insert: ".
395 "binding columns 1;2;3 to \"",
396 join(";",$pk,$obj->common_name,"common name"),
397 "\" ($fkname, name, name_class)\n");
399 $rv = $sth_tn->execute($pk, $obj->common_name(), "common name");
401 # done, return
402 return $rv ? $pk : undef;
405 =head2 update_object
407 Title : update_object
408 Usage :
409 Function:
410 Example :
411 Returns : The number of updated rows
412 Args : A Bio::DB::BioSQL::BasePersistenceAdaptor derived object
413 (basically, it needs to implement dbh(), sth($key, $sth),
414 dbcontext(), and get_persistent_slots()).
415 The object to be updated.
416 A reference to an array of foreign key objects; if any of those
417 foreign key values is NULL (some foreign keys may be nullable),
418 then give the class name.
421 =cut
423 sub update_object{
424 my ($self,$adp,$obj,$fkobjs) = @_;
426 $self->throw_not_implemented();
430 =head2 _build_select_list
432 Title : _build_select_list
433 Usage :
434 Function: Builds and returns the select list for an object query. The list
435 contains those columns, in the right order, that are necessary to
436 populate the object.
437 Example :
438 Returns : An array of strings (column names, not prefixed)
439 Args : The calling persistence adaptor.
440 A reference to an array of foreign key entities (objects, class
441 names, or adaptors) the object must attach.
442 A reference to a hash table mapping entity names to aliases (if
443 omitted, aliases will not be used, and SELECT columns can only be
444 from one table)
447 =cut
449 sub _build_select_list{
450 my ($self,$adp,$fkobjs,$entitymap) = @_;
452 my @attrs = $self->SUPER::_build_select_list($adp,$fkobjs,$entitymap);
453 # we need to massage the attribute list ...
454 for(my $i = 0; $i < @attrs; $i++) {
455 if($attrs[$i] =~ /ncbi_taxon_id/i) {
456 my $name_table = $self->table_name("Bio::Species");
457 my $node_table = $self->table_name("TaxonNode");
458 $attrs[$i] =~ s/$name_table/$node_table/;
461 return @attrs;
464 =head2 get_classification
466 Title : get_classification
467 Usage :
468 Function: Returns the classification array for a taxon as identified by
469 its primary key.
470 Example :
471 Returns : a reference to an array of two-element arrays, where the first
472 element contains the name of the node and the second element
473 denotes its rank
474 Args : the calling adaptor, the primary key of the taxon
477 =cut
479 sub get_classification{
480 my ($self,$adp,$pk) = @_;
481 my @clf = ();
483 # try to obtain statement handle from cache
484 my $cache_key = "SELECT taxon classification";
485 my $sth = $adp->sth($cache_key);
486 if(! $sth) {
487 # we need to build this one
489 # get table names, primary and foreign key names, slot/attribute map
490 my $name_table = $self->table_name($adp);
491 my $node_table = $self->table_name("TaxonNode");
492 my $pkname = $self->primary_key_name($node_table);
493 my $fkname = $self->foreign_key_name("TaxonNode");
494 my $slotmap = $self->slot_attribute_map($name_table);
495 # we set up the sql without any fancy:
496 my $sql =
497 "SELECT name.".$slotmap->{"binomial"}.
498 ", node.".$slotmap->{"node_rank"}.
499 " FROM $node_table node, $node_table taxon, $name_table name".
500 " WHERE name.$fkname = node.$pkname AND".
501 " taxon.left_value >= node.left_value AND taxon.left_value <= node.right_value".
502 " AND taxon.$pkname = ?".
503 " AND name.".$slotmap->{"name_class"}." = 'scientific name'".
504 " ORDER BY node.left_value";
505 $adp->debug("prepare SELECT CLASSIFICATION: $sql\n");
506 # prepare the query
507 $sth = $adp->dbh->prepare($sql);
508 # and cache it
509 $adp->sth($cache_key, $sth);
511 # execute with the given primary key
512 my $rv = $sth->execute($pk);
513 if($rv) {
514 while(my $row = $sth->fetchrow_arrayref()) {
515 push(@clf, [@$row]);
518 return \@clf;
521 =head2 get_common_name
523 Title : get_common_name
524 Usage :
525 Function: Get the common name for a taxon as identified by its primary
526 key.
527 Example :
528 Returns : a string denoting the common name
529 Args : the calling adaptor, and the primary key of the taxon
532 =cut
534 sub get_common_name{
535 my ($self,$adp,$pk) = @_;
537 # statement cached?
538 my $cache_key = "SELECT COMMON_NAME ".ref($adp);
539 my $sth = $adp->sth($cache_key);
540 # if not cached we have to build it
541 if(! $sth) {
542 # get table names, primary and foreign key names, slot/attribute map
543 my $name_table = $self->table_name($adp);
544 my $fkname = $self->foreign_key_name("TaxonNode");
545 my $slotmap = $self->slot_attribute_map($name_table);
546 # prepare sql
547 my $sql =
548 "SELECT $name_table.".$slotmap->{"binomial"}.
549 " FROM $name_table".
550 " WHERE $name_table.$fkname = ?".
551 " AND $name_table.".$slotmap->{"name_class"}." = 'common_name'";
552 $adp->debug("preparing SELECT COMMON_NAME: ",$sql,"\n");
553 $sth = $adp->dbh->prepare($sql);
554 # and cache
555 $adp->sth($cache_key, $sth);
557 my $rv = $sth->execute($pk);
558 my $cname;
559 if($rv) {
560 while(my $row = $sth->fetchrow_arrayref()) {
561 # the last one overwrites
562 $cname = $row->[0];
565 return $cname;