Bio/DB/SeqFeature/Store/DBI/Pg.pm

   1 package Bio::DB::SeqFeature::Store::DBI::Pg;
   2 use DBD::Pg qw(:pg_types);
   3 # $Id: Pg.pm 14656 2008-04-14 15:05:37Z lstein $
   4
   5 =head1 NAME
   6
   7 Bio::DB::SeqFeature::Store::DBI::Pg -- Mysql implementation of Bio::DB::SeqFeature::Store
   8
   9 =head1 SYNOPSIS
  10
  11   use Bio::DB::SeqFeature::Store;
  12
  13   # Open the sequence database
  14   my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::Pg',
  15                                           -dsn     => 'dbi:Pg:test');
  16
  17   # get a feature from somewhere
  18   my $feature = Bio::SeqFeature::Generic->new(...);
  19
  20   # store it
  21   $db->store($feature) or die "Couldn't store!";
  22
  23   # primary ID of the feature is changed to indicate its primary ID
  24   # in the database...
  25   my $id = $feature->primary_id;
  26
  27   # get the feature back out
  28   my $f  = $db->fetch($id);
  29
  30   # change the feature and update it
  31   $f->start(100);
  32   $db->update($f) or die "Couldn't update!";
  33
  34   # searching...
  35   # ...by id
  36   my @features = $db->fetch_many(@list_of_ids);
  37
  38   # ...by name
  39   @features = $db->get_features_by_name('ZK909');
  40
  41   # ...by alias
  42   @features = $db->get_features_by_alias('sma-3');
  43
  44   # ...by type
  45   @features = $db->get_features_by_name('gene');
  46
  47   # ...by location
  48   @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
  49
  50   # ...by attribute
  51   @features = $db->get_features_by_attribute({description => 'protein kinase'})
  52
  53   # ...by the GFF "Note" field
  54   @result_list = $db->search_notes('kinase');
  55
  56   # ...by arbitrary combinations of selectors
  57   @features = $db->features(-name => $name,
  58                             -type => $types,
  59                             -seq_id => $seqid,
  60                             -start  => $start,
  61                             -end    => $end,
  62                             -attributes => $attributes);
  63
  64   # ...using an iterator
  65   my $iterator = $db->get_seq_stream(-name => $name,
  66                                      -type => $types,
  67                                      -seq_id => $seqid,
  68                                      -start  => $start,
  69                                      -end    => $end,
  70                                      -attributes => $attributes);
  71
  72   while (my $feature = $iterator->next_seq) {
  73     # do something with the feature
  74   }
  75
  76   # ...limiting the search to a particular region
  77   my $segment  = $db->segment('Chr1',5000=>6000);
  78   my @features = $segment->features(-type=>['mRNA','match']);
  79
  80   # getting & storing sequence information
  81   # Warning: this returns a string, and not a PrimarySeq object
  82   $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
  83   my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
  84
  85   # what feature types are defined in the database?
  86   my @types    = $db->types;
  87
  88   # create a new feature in the database
  89   my $feature = $db->new_feature(-primary_tag => 'mRNA',
  90                                  -seq_id      => 'chr3',
  91                                  -start      => 10000,
  92                                  -end        => 11000);
  93
  94 =head1 DESCRIPTION
  95
  96 Bio::DB::SeqFeature::Store::Pg is the Mysql adaptor for
  97 Bio::DB::SeqFeature::Store. You will not create it directly, but
  98 instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
  99
 100 See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
 101
 102 =head2 Using the Mysql adaptor
 103
 104 Before you can use the adaptor, you must use the Pgadmin tool to
 105 create a database and establish a user account with write
 106 permission. In order to use "fast" loading, the user account must have
 107 "file" privileges.
 108
 109 To establish a connection to the database, call
 110 Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::Pg',@more_args). The
 111 additional arguments are as follows:
 112
 113   Argument name       Description
 114   -------------       -----------
 115
 116  -dsn              The database name. You can abbreviate
 117                    "dbi:Pg:foo" as "foo" if you wish.
 118
 119  -user             Username for authentication.
 120
 121  -pass             Password for authentication.
 122
 123  -namespace        A prefix to attach to each table. This allows you
 124                    to have several virtual databases in the same
 125                    physical database.
 126
 127  -temp             Boolean flag. If true, a temporary database
 128                    will be created and destroyed as soon as
 129                    the Store object goes out of scope. (synonym -temporary)
 130
 131  -autoindex        Boolean flag. If true, features in the database will be
 132                    reindexed every time they change. This is the default.
 133
 134
 135  -tmpdir           Directory in which to place temporary files during "fast" loading.
 136                    Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
 137
 138  -dbi_options      A hashref to pass to DBI->connect's 4th argument, the "attributes."
 139                    (synonyms -options, -dbi_attr)
 140
 141  -write            Pass true to open database for writing or updating.
 142
 143 If successful, a new instance of
 144 Bio::DB::SeqFeature::Store::DBI::Pg will be returned.
 145
 146 In addition to the standard methods supported by all well-behaved
 147 Bio::DB::SeqFeature::Store databases, several following
 148 adaptor-specific methods are provided. These are described in the next
 149 sections.
 150
 151 =cut
 152
 153 use strict;
 154
 155 use base 'Bio::DB::SeqFeature::Store';
 156 use Bio::DB::SeqFeature::Store::DBI::Iterator;
 157 use DBI;
 158 use Memoize;
 159 use Cwd 'abs_path';
 160 use Bio::DB::GFF::Util::Rearrange 'rearrange';
 161 use File::Spec;
 162 use constant DEBUG=>0;
 163
 164 # from the MySQL documentation...
 165 # WARNING: if your sequence uses coordinates greater than 2 GB, you are out of luck!
 166 use constant MAX_INT =>  2_147_483_647;
 167 use constant MIN_INT => -2_147_483_648;
 168 use constant MAX_BIN =>  1_000_000_000;  # size of largest feature = 1 Gb
 169 use constant MIN_BIN =>  1000;           # smallest bin we'll make - on a 100 Mb chromosome, there'll be 100,000 of these
 170
 171 memoize('_typeid');
 172 memoize('_locationid');
 173 memoize('_attributeid');
 174 memoize('dump_path');
 175
 176 ###
 177 # object initialization
 178 #
 179 sub init {
 180   my $self          = shift;
 181   my ($dsn,
 182       $is_temporary,
 183       $autoindex,
 184       $namespace,
 185       $dump_dir,
 186       $user,
 187       $pass,
 188       $dbi_options,
 189       $writeable,
 190       $create,
 191       $schema,
 192      ) = rearrange(['DSN',
 193                     ['TEMP','TEMPORARY'],
 194                     'AUTOINDEX',
 195                     'NAMESPACE',
 196                     ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
 197                     'USER',
 198                     ['PASS','PASSWD','PASSWORD'],
 199                     ['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
 200                     ['WRITE','WRITEABLE'],
 201                     'CREATE',
 202                     'SCHEMA'
 203                    ],@_);
 204
 205
 206   $dbi_options  ||= {};
 207   $writeable    = 1 if $is_temporary or $dump_dir;
 208
 209   $dsn or $self->throw("Usage: ".__PACKAGE__."->init(-dsn => \$dbh || \$dsn)");
 210
 211   my $dbh;
 212   if (ref $dsn) {
 213     $dbh = $dsn;
 214   } else {
 215     $dsn = "dbi:Pg:$dsn" unless $dsn =~ /^dbi:/;
 216     $dbh = DBI->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr);
 217   }
 218   $self->{'original_arguments'} = {
 219       'dsn' => $dsn,
 220       'user' => $user,
 221       'pass' => $pass,
 222       'dbh_options' => $dbi_options,
 223   };
 224   $self->{dbh}       = $dbh;
 225   $self->{dbh}->{InactiveDestroy} = 1;
 226   $self->{is_temp}   = $is_temporary;
 227   $self->{namespace} = $namespace;
 228   $self->{writeable} = $writeable;
 229   $self->schema($schema) if ($schema);
 230
 231   $self->default_settings;
 232   $self->autoindex($autoindex)                   if defined $autoindex;
 233   $self->dumpdir($dump_dir)                      if $dump_dir;
 234   if ($self->is_temp) {
 235     $self->init_tmp_database();
 236   } elsif ($create) {
 237     $self->init_database('erase');
 238   }
 239 }
 240
 241 sub writeable { shift->{writeable} }
 242
 243 sub can_store_parentage { 1 }
 244
 245 sub table_definitions {
 246   my $self = shift;
 247   return {
 248           feature => <<END,
 249 (
 250   id       serial primary key,
 251   typeid   int      not null,
 252   seqid    int,
 253   start    int,
 254   "end"      int,
 255   strand   int      default 0,
 256   tier     int,
 257   bin      int,
 258   indexed  int default 1,
 259   object     bytea not null
 260 );
 261   CREATE INDEX feature_stuff ON feature(seqid,tier,bin,typeid);
 262   CREATE INDEX feature_typeid ON feature(typeid);
 263 END
 264
 265           locationlist => <<END,
 266 (
 267   id         serial primary key,
 268   seqname    varchar(50)   not null
 269 ); CREATE INDEX locationlist_seqname ON locationlist(seqname);
 270 END
 271
 272           typelist => <<END,
 273 (
 274   id       serial primary key,
 275   tag      varchar(100)  not null
 276 ); CREATE INDEX typelist_tab ON typelist(tag);
 277 END
 278           name => <<END,
 279 (
 280   id           int       not null,
 281   name         varchar(128)  not null,
 282   display_name int       default 0
 283 );
 284   CREATE INDEX name_id ON name(id);
 285   CREATE INDEX name_name ON name(name);
 286 END
 287
 288           attribute => <<END,
 289 (
 290   id               int       not null,
 291   attribute_id     int   not null,
 292   attribute_value  text
 293 );
 294   CREATE INDEX attribute_id ON attribute(id);
 295   CREATE INDEX attribute_id_val ON attribute(attribute_id,SUBSTR(attribute_value, 1, 10));
 296 END
 297
 298           attributelist => <<END,
 299 (
 300   id       serial primary key,
 301   tag      varchar(50)  not null
 302 );
 303   CREATE INDEX attributelist_tag ON attributelist(tag);
 304 END
 305           parent2child => <<END,
 306 (
 307   id               int       not null,
 308   child            int       not null
 309 );
 310   CREATE INDEX parent2child_id_child ON parent2child(id,child);
 311 END
 312
 313           meta => <<END,
 314 (
 315   name      varchar(128) primary key,
 316   value     varchar(128) not null
 317 )
 318 END
 319           sequence => <<END,
 320 (
 321   id       int not null,
 322   "offset"   int not null,
 323   sequence text,
 324   primary key(id,"offset")
 325 )
 326 END
 327          };
 328 }
 329
 330 ###
 331 # default settings -- will create and populate meta table if needed
 332 #
 333 sub default_settings {
 334   my $self = shift;
 335   $self->maybe_create_meta();
 336   $self->SUPER::default_settings;
 337   $self->autoindex(1);
 338   $self->dumpdir(File::Spec->tmpdir);
 339 }
 340
 341
 342 ###
 343 # retrieve database handle
 344 #
 345 sub dbh {
 346   my $self = shift;
 347   my $d    = $self->{dbh};
 348   $self->{dbh} = shift if @_;
 349   $d;
 350 }
 351
 352 sub clone {
 353     # Postgres DBI doesn't implement proper cloning, so do it the hard way
 354     my $self = shift;
 355
 356     my $dsn = $self->{'original_arguments'}->{'dsn'};
 357     my $user = $self->{'original_arguments'}->{'user'};
 358     my $pass = $self->{'original_arguments'}->{'pass'};
 359     my $dbi_options = $self->{'original_arguments'}->{'dbi_options'};
 360     $self->dbh()->{InactiveDestroy} = 1;
 361     my $new_dbh = DBI->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr);
 362     $new_dbh->{InactiveDestroy} = 1;
 363     $self->{dbh} = $new_dbh; # unless $self->is_temp;
 364     if ($self->schema()) {
 365       $self->schema($self->schema()); # Reset the DBH's schema
 366     }
 367 }
 368
 369 ###
 370 # get/set directory for bulk load tables
 371 #
 372 sub dumpdir {
 373   my $self = shift;
 374   my $d = $self->{dumpdir};
 375   $self->{dumpdir} = abs_path(shift) if @_;
 376   $d;
 377 }
 378
 379 ###
 380 # table namespace (multiple dbs in one Pg db)
 381 #
 382 sub namespace {
 383   my $self = shift;
 384   my $d    = $self->{namespace};
 385   $self->{namespace} = shift if @_;
 386   $d;
 387 }
 388
 389 ###
 390 # find a path that corresponds to a dump table
 391 #
 392 sub dump_path {
 393   my $self = shift;
 394   my $table = $self->_qualify(shift);
 395   return "$self->{dumpdir}/$table.$$";
 396 }
 397
 398 ###
 399 # make a filehandle (writeable) that corresponds to a dump table
 400 #
 401 sub dump_filehandle {
 402   my $self = shift;
 403   my $table = shift;
 404   eval "require IO::File" unless IO::File->can('new');
 405   my $path  = $self->dump_path($table);
 406   my $fh = $self->{filehandles}{$path} ||= IO::File->new(">$path");
 407   $fh;
 408 }
 409
 410 ###
 411 # find the next ID for a feature (used only during bulk loading)
 412 #
 413 sub next_id {
 414   my $self = shift;
 415   $self->{max_id} ||= $self->max_id;
 416   return ++$self->{max_id};
 417 }
 418
 419 ###
 420 # find the maximum ID for a feature (used only during bulk loading)
 421 #
 422 sub max_id {
 423   my $self = shift;
 424   my $sth  = $self->_prepare("SELECT max(id) from feature");
 425   $sth->execute or $self->throw($sth->errstr);
 426   my ($id) = $sth->fetchrow_array;
 427   $id;
 428 }
 429
 430 sub schema {
 431   my ($self, $schema) = @_;
 432   $self->{'schema'} = $schema if defined($schema);
 433   if ($schema) {
 434     $self->dbh->do("SET search_path TO " . $self->{'schema'});
 435   } else {
 436     $self->dbh->do("SET search_path TO public");
 437   }
 438   return $self->{'schema'};
 439 }
 440 ###
 441 # wipe database clean and reinstall schema
 442 #
 443 sub _init_database {
 444   my $self = shift;
 445   my $erase = shift;
 446
 447   my $dbh    = $self->dbh;
 448   my $tables = $self->table_definitions;
 449   foreach (keys %$tables) {
 450     next if $_ eq 'meta';      # don't get rid of meta data!
 451     my $table = $self->_qualify($_);
 452     $dbh->do("DROP TABLE IF EXISTS $table") if $erase;
 453     my @table_exists = $dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
 454                 if (!scalar(@table_exists)) {
 455                         my $query = "CREATE TABLE $table $tables->{$_}";
 456                         $dbh->do($query) or $self->throw($dbh->errstr);
 457                 }
 458   }
 459   $self->subfeatures_are_indexed(1) if $erase;
 460   1;
 461 }
 462
 463 sub maybe_create_meta {
 464   my $self = shift;
 465   return unless $self->writeable;
 466   my $table = $self->_qualify('meta');
 467   my $tables = $self->table_definitions;
 468         my @table_exists = $self->dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
 469         if (!scalar(@table_exists)) {
 470                 $self->dbh->do("CREATE TABLE $table $tables->{meta}");
 471         }
 472 }
 473
 474 sub init_tmp_database {
 475   my $self = shift;
 476   my $dbh    = $self->dbh;
 477   my $tables = $self->table_definitions;
 478   for my $t (keys %$tables) {
 479     my $table = $self->_qualify($t);
 480                 my @table_exists = $dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
 481                 if (!scalar(@table_exists)) {
 482                         my $query = "CREATE TEMPORARY TABLE $table $tables->{$t}";
 483                         $dbh->do($query) or $self->throw($dbh->errstr);
 484                 }
 485   }
 486   1;
 487 }
 488
 489 ###
 490 # use temporary tables
 491 #
 492 sub is_temp {
 493   shift->{is_temp};
 494 }
 495
 496 sub _store {
 497   my $self    = shift;
 498
 499   # special case for bulk updates
 500   return $self->_dump_store(@_) if $self->{bulk_update_in_progress};
 501
 502   my $indexed = shift;
 503   my $count = 0;
 504
 505   my $autoindex = $self->autoindex;
 506
 507   my $dbh = $self->dbh;
 508   local $dbh->{RaiseError} = 1;
 509   $dbh->begin_work;
 510   eval {
 511     for my $obj (@_) {
 512       $self->replace($obj,$indexed);
 513       $self->_update_indexes($obj) if $indexed && $autoindex;
 514       $count++;
 515     }
 516   };
 517
 518   if ($@) {
 519     warn "Transaction aborted because $@";
 520     $dbh->rollback;
 521   }
 522   else {
 523     $dbh->commit;
 524   }
 525
 526   # remember whether we are have ever stored a non-indexed feature
 527   unless ($indexed or $self->{indexed_flag}++) {
 528     $self->subfeatures_are_indexed(0);
 529   }
 530   $count;
 531 }
 532
 533 # we memoize this in order to avoid making zillions of calls
 534 sub autoindex {
 535   my $self = shift;
 536
 537   # special case for bulk update -- need to build the indexes
 538   # at the same time we build the main feature table
 539   return 1 if $self->{bulk_update_in_progress};
 540   my $d = $self->setting('autoindex');
 541   $self->setting(autoindex=>shift) if @_;
 542   $d;
 543 }
 544
 545 sub _start_bulk_update {
 546   my $self = shift;
 547   my $dbh  = $self->dbh;
 548   $self->{bulk_update_in_progress}++;
 549 }
 550
 551 sub _finish_bulk_update {
 552   my $self = shift;
 553   my $dbh  = $self->dbh;
 554   my $dir = $self->{dumpdir} || '.';
 555   for my $table ('feature',$self->index_tables) {
 556     my $fh = $self->dump_filehandle($table);
 557     my $path = $self->dump_path($table);
 558     $fh->close;
 559     my $qualified_table = $self->_qualify($table);
 560     `cp $path $path.bak`;
 561     # Get stuff from file into STDIN so we don't have to be superuser
 562     open FH, $path;
 563     print STDERR "Loading file $path\n";
 564     $dbh->do("COPY $qualified_table FROM STDIN CSV QUOTE '''' DELIMITER '\t'") or $self->throw($dbh->errstr);
 565     while (my $line = <FH>) {
 566       $dbh->pg_putline($line);
 567     }
 568     $dbh->pg_endcopy() or $self->throw($dbh->errstr);
 569     close FH;
 570     #unlink $path;
 571   }
 572   delete $self->{bulk_update_in_progress};
 573   delete $self->{filehandles};
 574 }
 575
 576
 577 ###
 578 # Add a subparts to a feature. Both feature and all subparts must already be in database.
 579 #
 580 sub _add_SeqFeature {
 581   my $self     = shift;
 582
 583   # special purpose method for case when we are doing a bulk update
 584   return $self->_dump_add_SeqFeature(@_) if $self->{bulk_update_in_progress};
 585
 586   my $parent   = shift;
 587   my @children = @_;
 588
 589   my $dbh = $self->dbh;
 590   local $dbh->{RaiseError} = 1;
 591
 592   my $child_table = $self->_parent2child_table();
 593   my $count = 0;
 594
 595   my $exist_query  = $self->_prepare("SELECT count(*) FROM $child_table WHERE id = ? AND child = ?" );
 596
 597   my $insert_query = $self->_prepare("INSERT INTO $child_table (id,child) VALUES (?, ?)");
 598
 599 #  my $sth = $self->_prepare(<<END);
 600 #REPLACE INTO $child_table (id,child) VALUES (?,?)
 601 #END
 602
 603   my $parent_id = (ref $parent ? $parent->primary_id : $parent)
 604     or $self->throw("$parent should have a primary_id");
 605
 606
 607   $dbh->begin_work or $self->throw($dbh->errstr);
 608   eval {
 609     for my $child (@children) {
 610       my $child_id = ref $child ? $child->primary_id : $child;
 611       defined $child_id or die "no primary ID known for $child";
 612
 613       $exist_query->($parent_id, $child_id);
 614       my ($exist) = $exist_query->fetchrow_array;
 615
 616       if (!$exist) {
 617         $insert_query->execute($parent_id,$child_id);
 618         $count++;
 619       }
 620     }
 621   };
 622
 623   if ($@) {
 624     warn "Transaction aborted because $@";
 625     $dbh->rollback;
 626   }
 627   else {
 628     $dbh->commit;
 629   }
 630   $insert_query->finish;
 631   $count;
 632 }
 633
 634 sub _fetch_SeqFeatures {
 635   my $self   = shift;
 636   my $parent = shift;
 637   my @types  = @_;
 638
 639   my $parent_id = $parent->primary_id or $self->throw("$parent should have a primary_id");
 640   my $feature_table = $self->_feature_table;
 641   my $child_table   = $self->_parent2child_table();
 642
 643   my @from  = ("$feature_table as f","$child_table as c");
 644   my @where = ('f.id=c.child','c.id=?');
 645   my @args  = $parent_id;
 646
 647   if (@types) {
 648     my ($from,$where,undef,@a) = $self->_types_sql(\@types,'f');
 649     push @from,$from   if $from;
 650     push @where,$where if $where;
 651     push @args,@a;
 652   }
 653
 654   my $from  = join ', ',@from;
 655   my $where = join ' AND ',@where;
 656
 657   my $query = <<END;
 658 SELECT f.id,f.object
 659   FROM $from
 660   WHERE $where
 661 END
 662
 663   $self->_print_query($query,@args) if DEBUG || $self->debug;
 664
 665   my $sth = $self->_prepare($query) or $self->throw($self->dbh->errstr);
 666   $sth->execute(@args) or $self->throw($sth->errstr);
 667
 668   return $self->_sth2objs($sth);
 669 }
 670
 671 ###
 672 # get primary sequence between start and end
 673 #
 674 sub _fetch_sequence {
 675   my $self = shift;
 676   my ($seqid,$start,$end) = @_;
 677
 678   # backward compatibility to the old days when I liked reverse complementing
 679   # dna by specifying $start > $end
 680   my $reversed;
 681   if (defined $start && defined $end && $start > $end) {
 682     $reversed++;
 683     ($start,$end) = ($end,$start);
 684   }
 685   $start-- if defined $start;
 686   $end--   if defined $end;
 687
 688   my $offset1 = $self->_offset_boundary($seqid,$start || 'left');
 689   my $offset2 = $self->_offset_boundary($seqid,$end   || 'right');
 690   my $sequence_table = $self->_sequence_table;
 691   my $locationlist_table = $self->_locationlist_table;
 692
 693   my $sth     = $self->_prepare(<<END);
 694 SELECT sequence,offset
 695    FROM $sequence_table as s,$locationlist_table as ll
 696    WHERE s.id=ll.id
 697      AND ll.seqname= ?
 698      AND offset >= ?
 699      AND offset <= ?
 700    ORDER BY offset
 701 END
 702
 703   my $seq = '';
 704   $sth->execute($seqid,$offset1,$offset2) or $self->throw($sth->errstr);
 705
 706   while (my($frag,$offset) = $sth->fetchrow_array) {
 707     substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
 708     $seq .= $frag;
 709   }
 710   substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
 711   if ($reversed) {
 712     $seq = reverse $seq;
 713     $seq =~ tr/gatcGATC/ctagCTAG/;
 714   }
 715   $sth->finish;
 716   $seq;
 717 }
 718
 719 sub _offset_boundary {
 720   my $self = shift;
 721   my ($seqid,$position) = @_;
 722
 723   my $sequence_table     = $self->_sequence_table;
 724   my $locationlist_table = $self->_locationlist_table;
 725
 726   my $sql;
 727   $sql =  $position eq 'left'  ? "SELECT min(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
 728          :$position eq 'right' ? "SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
 729          :"SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=? AND offset<=?";
 730   my $sth = $self->_prepare($sql);
 731   my @args = $position =~ /^-?\d+$/ ? ($seqid,$position) : ($seqid);
 732   $sth->execute(@args) or $self->throw($sth->errstr);
 733   my $boundary = $sth->fetchall_arrayref->[0][0];
 734   $sth->finish;
 735   return $boundary;
 736 }
 737
 738
 739 ###
 740 # add namespace to tablename
 741 #
 742 sub _qualify {
 743   my $self = shift;
 744   my $table_name = shift;
 745   my $namespace = $self->namespace;
 746   return $table_name unless defined $namespace;
 747   return "${namespace}_${table_name}";
 748 }
 749
 750 ###
 751 # Fetch a Bio::SeqFeatureI from database using its primary_id
 752 #
 753 sub _fetch {
 754   my $self       = shift;
 755   @_ or $self->throw("usage: fetch(\$primary_id)");
 756   my $primary_id = shift;
 757   my $features = $self->_feature_table;
 758   my $sth = $self->_prepare(<<END);
 759 SELECT id,object FROM $features WHERE id=?
 760 END
 761   $sth->execute($primary_id) or $self->throw($sth->errstr);
 762   my $obj = $self->_sth2obj($sth);
 763   $sth->finish;
 764   $obj;
 765 }
 766
 767 ###
 768 # Efficiently fetch a series of IDs from the database
 769 # Can pass an array or an array ref
 770 #
 771 sub _fetch_many {
 772   my $self       = shift;
 773   @_ or $self->throw('usage: fetch_many($id1,$id2,$id3...)');
 774   my $ids = join ',',map {ref($_) ? @$_ : $_} @_ or return;
 775   my $features = $self->_feature_table;
 776
 777   my $sth = $self->_prepare(<<END);
 778 SELECT id,object FROM $features WHERE id IN ($ids)
 779 END
 780   $sth->execute() or $self->throw($sth->errstr);
 781   return $self->_sth2objs($sth);
 782 }
 783
 784 sub _features {
 785   my $self = shift;
 786   my ($seq_id,$start,$end,$strand,
 787       $name,$class,$allow_aliases,
 788       $types,
 789       $attributes,
 790       $range_type,
 791       $fromtable,
 792       $iterator
 793      ) = rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
 794                     'NAME','CLASS','ALIASES',
 795                     ['TYPES','TYPE','PRIMARY_TAG'],
 796                     ['ATTRIBUTES','ATTRIBUTE'],
 797                     'RANGE_TYPE',
 798                     'FROM_TABLE',
 799                     'ITERATOR',
 800                    ],@_);
 801
 802   my (@from,@where,@args,@group);
 803   $range_type ||= 'overlaps';
 804
 805   my $feature_table         = $self->_feature_table;
 806   @from = "$feature_table as f";
 807
 808   if (defined $name) {
 809     # hacky backward compatibility workaround
 810     undef $class if $class && $class eq 'Sequence';
 811     $name = "$class:$name" if defined $class && length $class > 0;
 812     # last argument is the join field
 813     my ($from,$where,$group,@a) = $self->_name_sql($name,$allow_aliases,'f.id');
 814     push @from,$from   if $from;
 815     push @where,$where if $where;
 816     push @group,$group if $group;
 817     push @args,@a;
 818   }
 819
 820   if (defined $seq_id) {
 821     # last argument is the name of the features table
 822     my ($from,$where,$group,@a) = $self->_location_sql($seq_id,$start,$end,$range_type,$strand,'f');
 823     push @from,$from   if $from;
 824     push @where,$where if $where;
 825     push @group,$group if $group;
 826     push @args,@a;
 827   }
 828
 829   if (defined($types)) {
 830     # last argument is the name of the features table
 831     my ($from,$where,$group,@a) = $self->_types_sql($types,'f');
 832     push @from,$from   if $from;
 833     push @where,$where if $where;
 834     push @group,$group if $group;
 835     push @args,@a;
 836   }
 837
 838   if (defined $attributes) {
 839     # last argument is the join field
 840     my ($from,$where,$group,@a) = $self->_attributes_sql($attributes,'f.id');
 841     push @from,$from    if $from;
 842     push @where,$where  if $where;
 843     push @group,$group  if $group;
 844     push @args,@a;
 845   }
 846
 847   if (defined $fromtable) {
 848     # last argument is the join field
 849     my ($from,$where,$group,@a) = $self->_from_table_sql($fromtable,'f.id');
 850     push @from,$from    if $from;
 851     push @where,$where  if $where;
 852     push @group,$group  if $group;
 853     push @args,@a;
 854   }
 855
 856   # if no other criteria are specified, then
 857   # only fetch indexed (i.e. top level objects)
 858   @where = 'indexed=1' unless @where;
 859
 860   my $from  = join ', ',@from;
 861   my $where = join ' AND ',map {"($_)"} @where;
 862   my $group = join ', ',@group;
 863   $group    = "GROUP BY $group" if @group;
 864
 865   my $query = <<END;
 866 SELECT f.id,f.object
 867   FROM $from
 868   WHERE $where
 869   $group
 870 END
 871
 872   $self->_print_query($query,@args) if DEBUG || $self->debug;
 873
 874   my $sth = $self->_prepare($query);
 875   $sth->execute(@args) or $self->throw($$ . "\n" . $sth->errstr);
 876
 877 #  use Data::Dumper;
 878 #  print STDERR "I run the query\n  SELECT f.id, f.object FROM $from WHERE $where $group\n with args @args\n";
 879 #  print STDERR "I should make an object out of " . Dumper($sth->fetchrow_hashref()) . "\n";
 880 #  $sth->execute(@args) or $self->throw($sth->errstr);
 881 #  print STDERR "I am making objects: " . Dumper($self->_sth2objs($sth)) . "\n";
 882 #  $sth->execute(@args) or $self->throw($sth->errstr);
 883
 884   return $iterator ? Bio::DB::SeqFeature::Store::DBI::Iterator->new($sth,$self) : $self->_sth2objs($sth);
 885 }
 886
 887 sub _name_sql {
 888   my $self = shift;
 889   my ($name,$allow_aliases,$join) = @_;
 890   my $name_table   = $self->_name_table;
 891
 892   my $from  = "$name_table as n";
 893   my ($match,$string) = $self->_match_sql($name);
 894
 895   my $where = "n.id=$join AND n.name $match";
 896   $where   .= " AND n.display_name>0" unless $allow_aliases;
 897   return ($from,$where,'',$string);
 898 }
 899
 900 sub _search_attributes {
 901   my $self = shift;
 902   my ($search_string,$attribute_names,$limit) = @_;
 903   my @words               = map {quotemeta($_)} split /\s+/,$search_string;
 904   my $name_table          = $self->_name_table;
 905   my $attribute_table     = $self->_attribute_table;
 906   my $attributelist_table = $self->_attributelist_table;
 907   my $type_table          = $self->_type_table;
 908   my $typelist_table      = $self->_typelist_table;
 909
 910   my @tags    = @$attribute_names;
 911   my $tag_sql = join ' OR ',("al.tag=?") x @tags;
 912
 913   my $perl_regexp = join '|',@words;
 914
 915   my $sql_regexp = join ' AND ',("a.attribute_value SIMILAR TO ?")  x @words;
 916   my $sql = <<END;
 917 SELECT name,attribute_value,tl.tag,n.id
 918   FROM $name_table as n,$attribute_table as a,$attributelist_table as al,$type_table as t,$typelist_table as tl
 919   WHERE n.id=a.id
 920     AND al.id=a.attribute_id
 921     AND n.id=t.id
 922     AND t.typeid=tl.id
 923     AND n.display_name=1
 924     AND ($tag_sql)
 925     AND ($sql_regexp)
 926 END
 927   $sql .= "LIMIT $limit" if defined $limit;
 928   $self->_print_query($sql,@tags,@words) if DEBUG || $self->debug;
 929   my $sth = $self->_prepare($sql);
 930   $sth->execute(@tags,@words) or $self->throw($sth->errstr);
 931
 932   my @results;
 933   while (my($name,$value,$type,$id) = $sth->fetchrow_array) {
 934     my (@hits) = $value =~ /$perl_regexp/ig;
 935     my @words_in_row = split /\b/,$value;
 936     my $score  = int(@hits*100/@words/@words_in_row);
 937     push @results,[$name,$value,$score,$type,$id];
 938   }
 939   $sth->finish;
 940   @results = sort {$b->[2]<=>$a->[2]} @results;
 941   return @results;
 942 }
 943
 944 sub _match_sql {
 945   my $self = shift;
 946   my $name = shift;
 947
 948   my ($match,$string);
 949   if ($name =~ /(?:^|[^\\])[*?]/) {
 950     $name =~ s/(^|[^\\])([%_])/$1\\$2/g;
 951     $name =~ s/(^|[^\\])\*/$1%/g;
 952     $name =~ s/(^|[^\\])\?/$1_/g;
 953     $match = "LIKE ?";
 954     $string  = $name;
 955   } else {
 956     $match = "= ?";
 957     $string  = $name;
 958   }
 959   return ($match,$string);
 960 }
 961
 962 sub _from_table_sql {
 963   my $self = shift;
 964   my ($from_table,$join) = @_;
 965   my $from  = "$from_table as ft";
 966   my $where = "ft.id=$join";
 967   return ($from,$where,'');
 968 }
 969
 970 sub _attributes_sql {
 971   my $self = shift;
 972   my ($attributes,$join) = @_;
 973
 974   my ($wf,@bind_args)       = $self->_make_attribute_where('a','al',$attributes);
 975   my ($group_by,@group_args)= $self->_make_attribute_group('a',$attributes);
 976
 977   my $attribute_table       = $self->_attribute_table;
 978   my $attributelist_table   = $self->_attributelist_table;
 979
 980   my $from = "$attribute_table as a use index(attribute_id), $attributelist_table as al";
 981
 982   my $where = <<END;
 983   a.id=$join
 984   AND   a.attribute_id=al.id
 985   AND ($wf)
 986 END
 987
 988   my $group = $group_by;
 989
 990   my @args  = (@bind_args,@group_args);
 991   return ($from,$where,$group,@args);
 992 }
 993
 994 sub subfeature_types_are_indexed     { 1 }
 995 sub subfeature_locations_are_indexed { 1 }
 996
 997 sub _types_sql {
 998   my $self  = shift;
 999   my ($types,$type_table) = @_;
1000   my ($primary_tag,$source_tag);
1001
1002   my @types = ref $types eq 'ARRAY' ?  @$types : $types;
1003
1004   my $typelist      = $self->_typelist_table;
1005   my $from = "$typelist AS tl";
1006
1007   my (@matches,@args);
1008
1009   for my $type (@types) {
1010
1011     if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
1012       $primary_tag = $type->method;
1013       $source_tag  = $type->source;
1014     } else {
1015       ($primary_tag,$source_tag) = split ':',$type,2;
1016     }
1017
1018     if (defined $source_tag) {
1019       push @matches,"tl.tag=?";
1020       push @args,"$primary_tag:$source_tag";
1021     } else {
1022       push @matches,"tl.tag LIKE ?";
1023       push @args,"$primary_tag:%";
1024     }
1025   }
1026   my $matches = join ' OR ',@matches;
1027
1028   my $where = <<END;
1029    tl.id=$type_table.typeid
1030    AND   ($matches)
1031 END
1032
1033   return ($from,$where,'',@args);
1034 }
1035
1036 sub _location_sql {
1037   my $self = shift;
1038   my ($seq_id,$start,$end,$range_type,$strand,$location) = @_;
1039
1040   # the additional join on the location_list table badly impacts performance
1041   # so we build a copy of the table in memory
1042   my $seqid = $self->_locationid($seq_id) || 0; # zero is an invalid primary ID, so will return empty
1043
1044   $start = MIN_INT unless defined $start;
1045   $end   = MAX_INT unless defined $end;
1046
1047   my ($bin_where,@bin_args) = $self->bin_where($start,$end,$location);
1048
1049   my ($range,@range_args);
1050   if ($range_type eq 'overlaps') {
1051     $range = "$location.end>=? AND $location.start<=? AND ($bin_where)";
1052     @range_args = ($start,$end,@bin_args);
1053   } elsif ($range_type eq 'contains') {
1054     $range = "$location.start>=? AND $location.end<=? AND ($bin_where)";
1055     @range_args = ($start,$end,@bin_args);
1056   } elsif ($range_type eq 'contained_in') {
1057     $range = "$location.start<=? AND $location.end>=?";
1058     @range_args = ($start,$end);
1059   } else {
1060     $self->throw("range_type must be one of 'overlaps', 'contains' or 'contained_in'");
1061   }
1062
1063   if (defined $strand) {
1064     $range .= " AND strand=?";
1065     push @range_args,$strand;
1066   }
1067
1068   my $where = <<END;
1069    $location.seqid=?
1070    AND   $range
1071 END
1072
1073   my $from  = '';
1074   my $group = '';
1075
1076   my @args  = ($seqid,@range_args);
1077   return ($from,$where,$group,@args);
1078 }
1079
1080 ###
1081 # force reindexing
1082 #
1083 sub reindex {
1084   my $self = shift;
1085   my $from_update_table = shift;  # if present, will take ids from "update_table"
1086
1087   my $dbh  = $self->dbh;
1088   my $count = 0;
1089   my $now;
1090
1091   # try to bring in highres time() function
1092   eval "require Time::HiRes";
1093
1094   my $last_time = $self->time();
1095
1096   # tell _delete_index() not to bother removing the index rows corresponding
1097   # to each individual feature
1098   local $self->{reindexing} = 1;
1099
1100   $dbh->begin_work;
1101   eval {
1102     my $update = $from_update_table;
1103     for my $table ($self->index_tables) {
1104       my $query = $from_update_table ? "DELETE $table FROM $table,$update WHERE $table.id=$update.id"
1105                                      : "DELETE FROM $table";
1106       $dbh->do($query);
1107       $dbh->do("ALTER TABLE $table DISABLE KEYS");
1108     }
1109     my $iterator = $self->get_seq_stream(-from_table=>$from_update_table ? $update : undef);
1110     while (my $f = $iterator->next_seq) {
1111       if (++$count %1000 == 0) {
1112         $now = $self->time();
1113         my $elapsed = sprintf(" in %5.2fs",$now - $last_time);
1114         $last_time = $now;
1115         print STDERR "$count features indexed$elapsed...",' 'x60;
1116         print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
1117       }
1118       $self->_update_indexes($f);
1119     }
1120   };
1121   for my $table ($self->index_tables) {
1122     $dbh->do("ALTER TABLE $table ENABLE KEYS");
1123   }
1124   if (@_) {
1125     warn "Couldn't complete transaction: $@";
1126     $dbh->rollback;
1127     return;
1128   } else {
1129     $dbh->commit;
1130     return 1;
1131   }
1132 }
1133
1134 sub optimize {
1135   my $self = shift;
1136   $self->dbh->do("ANALYZE TABLE $_") foreach $self->index_tables;
1137 }
1138
1139 sub all_tables {
1140   my $self = shift;
1141   my @index_tables = $self->index_tables;
1142   my $feature_table = $self->_feature_table;
1143   return ($feature_table,@index_tables);
1144 }
1145
1146 sub index_tables {
1147   my $self = shift;
1148   return map {$self->_qualify($_)} qw(name attribute parent2child)
1149 }
1150
1151 sub _firstid {
1152   my $self = shift;
1153   my $features = $self->_feature_table;
1154   my $query = <<END;
1155 SELECT min(id) FROM $features
1156 END
1157   my $sth=$self->_prepare($query);
1158   $sth->execute();
1159   my ($first) = $sth->fetchrow_array;
1160   $sth->finish;
1161   $first;
1162 }
1163
1164 sub _nextid {
1165   my $self = shift;
1166   my $lastkey = shift;
1167   my $features = $self->_feature_table;
1168   my $query = <<END;
1169 SELECT min(id) FROM $features WHERE id>?
1170 END
1171   my $sth=$self->_prepare($query);
1172   $sth->execute($lastkey);
1173   my ($next) = $sth->fetchrow_array;
1174   $sth->finish;
1175   $next;
1176 }
1177
1178 sub _existsid {
1179   my $self = shift;
1180   my $key  = shift;
1181   my $features = $self->_feature_table;
1182   my $query = <<END;
1183 SELECT count(*) FROM $features WHERE id=?
1184 END
1185   my $sth=$self->_prepare($query);
1186   $sth->execute($key);
1187   my ($count) = $sth->fetchrow_array;
1188   $sth->finish;
1189   $count > 0;
1190 }
1191
1192 sub _deleteid {
1193   my $self = shift;
1194   my $key  = shift;
1195   my $dbh = $self->dbh;
1196   my $child_table = $self->_parent2child_table;
1197   my $query = "SELECT child FROM $child_table WHERE id=?";
1198   my $sth=$self->_prepare($query);
1199   $sth->execute($key);
1200   my $success = 0;
1201   while (my ($cid) = $sth->fetchrow_array) {
1202     # Backcheck looking for multiple parents, delete only if one is present. I'm
1203     # sure there is a nice way to left join the parent2child table onto itself
1204     # to get this in one query above, just haven't worked it out yet...
1205     my $sth2 = $self->_prepare("SELECT count(id) FROM $child_table WHERE child=?");
1206     $sth2->execute($cid);
1207     my ($count) = $sth2->fetchrow_array;
1208     if ($count == 1) {
1209         $self->_deleteid($cid) || $self->throw("Couldn't remove subfeature!");
1210     }
1211   }
1212   for my $table ($self->all_tables) {
1213     $success += $dbh->do("DELETE FROM $table WHERE id=$key") || 0;
1214   }
1215   return $success;
1216 }
1217
1218 sub _clearall {
1219   my $self = shift;
1220   my $dbh = $self->dbh;
1221   for my $table ($self->all_tables) {
1222     $dbh->do("DELETE FROM $table");
1223   }
1224 }
1225
1226 sub _featurecount {
1227   my $self = shift;
1228   my $dbh = $self->dbh;
1229   my $features = $self->_feature_table;
1230   my $query = <<END;
1231 SELECT count(*) FROM $features
1232 END
1233   my $sth=$self->_prepare($query);
1234   $sth->execute();
1235   my ($count) = $sth->fetchrow_array;
1236   $sth->finish;
1237   $count;
1238 }
1239
1240 sub _seq_ids {
1241   my $self = shift;
1242   my $dbh = $self->dbh;
1243   my $location = $self->_locationlist_table;
1244   my $sth = $self->_prepare("SELECT DISTINCT seqname FROM $location");
1245   $sth->execute() or $self->throw($sth->errstr);
1246   my @result;
1247   while (my ($id) = $sth->fetchrow_array) {
1248     push @result,$id;
1249   }
1250   return @result;
1251 }
1252
1253 sub setting {
1254   my $self = shift;
1255   my ($variable_name,$value) = @_;
1256   my $meta  = $self->_meta_table;
1257
1258   if (defined $value && $self->writeable) {
1259     my $querydel = "DELETE FROM $meta WHERE name = ?";
1260     my $query = "INSERT INTO $meta (name,value) VALUES (?,?)";
1261     my $sthdel = $self->_prepare($querydel);
1262     my $sth = $self->_prepare($query);
1263     $sthdel->execute($variable_name);
1264     $sth->execute($variable_name,$value) or $self->throw($sth->errstr);
1265     $sth->finish;
1266     $self->{settings_cache}{$variable_name} = $value;
1267   }
1268   else {
1269     return $self->{settings_cache}{$variable_name} if exists $self->{settings_cache}{$variable_name};
1270     my $query = <<END;
1271 SELECT value FROM $meta as m WHERE m.name=?
1272 END
1273     my $sth = $self->_prepare($query);
1274 #    $sth->execute($variable_name) or $self->throw($sth->errstr);
1275     unless ($sth->execute($variable_name)) {
1276       my $errstr = $sth->errstr;
1277       $sth = $self->_prepare("SHOW search_path");
1278       $sth->execute();
1279       $errstr .= "With search_path " . $sth->fetchrow_arrayref->[0] . "\n";
1280       $self->throw($errstr);
1281     }
1282
1283     my ($value) = $sth->fetchrow_array;
1284     $sth->finish;
1285     return $self->{settings_cache}{$variable_name} = $value;
1286   }
1287 }
1288
1289 ###
1290 # Replace Bio::SeqFeatureI into database.
1291 #
1292 sub replace {
1293   my $self       = shift;
1294   my $object     = shift;
1295   my $index_flag = shift || undef;
1296
1297   # ?? shouldn't need to do this
1298   # $self->_load_class($object);
1299   my $id = $object->primary_id;
1300   my $features = $self->_feature_table;
1301
1302   my $exist_query = defined($id)
1303                     ? $self->_prepare("select count(*) from $features where id = ?")
1304                     : 0;
1305   my $update_query = $self->_prepare("update $features object  = ?,
1306                                                        indexed = ?,
1307                                                        seqid   = ?,
1308                                                        start   = ?,
1309                                                        \"end\"   = ?,
1310                                                        strand  = ?,
1311                                                        tier    = ?,
1312                                                        bin     = ?,
1313                                                        typeid  = ?
1314                                       where id = ?");
1315   my $insert_query = $self->_prepare("INSERT INTO $features (object,indexed,seqid,start,\"end\",strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?)");
1316
1317   my @location = $index_flag ? $self->_get_location_and_bin($object) : (undef)x6;
1318
1319   my $primary_tag = $object->primary_tag;
1320   my $source_tag  = $object->source_tag || '';
1321   $primary_tag    .= ":$source_tag";
1322   my $typeid   = $self->_typeid($primary_tag,1);
1323
1324   my $exists;
1325   $exist_query->execute($id) if $exist_query;
1326   if ($exist_query) {
1327     ($exists) = $exist_query->fetchrow_array if $exist_query;
1328   }
1329   else {
1330     $exists = 0;
1331   }
1332
1333   if ($exists) {
1334     $update_query->execute($self->freeze($object),$index_flag||0,@location,$typeid,$id) or $self->throw($update_query->errstr);
1335   }
1336   else {
1337     $insert_query->execute($self->freeze($object),$index_flag||0,@location,$typeid) or $self->throw($insert_query->errstr);
1338   }
1339
1340   #old syntax with with replace into
1341   #$sth->execute($id,$self->freeze($object),$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
1342
1343   my $dbh = $self->dbh;
1344   $object->primary_id($dbh->{pg_insertid}) unless defined $id;
1345
1346   $self->flag_for_indexing($dbh->{pg_insertid}) if $self->{bulk_update_in_progress};
1347 }
1348
1349 ###
1350 # Insert one Bio::SeqFeatureI into database. primary_id must be undef
1351 #
1352 sub insert {
1353   my $self = shift;
1354   my $object = shift;
1355   my $index_flag = shift || 0;
1356
1357   $self->_load_class($object);
1358   defined $object->primary_id and $self->throw("$object already has a primary id");
1359
1360   my $features = $self->_feature_table;
1361   my $sth = $self->_prepare(<<END);
1362 INSERT INTO $features (id,object,indexed) VALUES (?,?,?)
1363 END
1364   $sth->execute(undef,$self->freeze($object),$index_flag) or $self->throw($sth->errstr);
1365   my $dbh = $self->dbh;
1366   $object->primary_id($dbh->{pg_insertid});
1367   $self->flag_for_indexing($dbh->{pg_insertid}) if $self->{bulk_update_in_progress};
1368 }
1369
1370 =head2 types
1371
1372  Title   : types
1373  Usage   : @type_list = $db->types
1374  Function: Get all the types in the database
1375  Returns : array of Bio::DB::GFF::Typename objects
1376  Args    : none
1377  Status  : public
1378
1379 =cut
1380
1381 sub types {
1382     my $self = shift;
1383     eval "require Bio::DB::GFF::Typename"
1384         unless Bio::DB::GFF::Typename->can('new');
1385     my $typelist_table      = $self->_typelist_table;
1386     my $sql = <<END;
1387 SELECT tag from $typelist_table
1388 END
1389 ;
1390     $self->_print_query($sql) if DEBUG || $self->debug;
1391     my $sth = $self->_prepare($sql);
1392     $sth->execute() or $self->throw($sth->errstr);
1393
1394     my @results;
1395     while (my($tag) = $sth->fetchrow_array) {
1396         push @results,Bio::DB::GFF::Typename->new($tag);
1397     }
1398     $sth->finish;
1399     return @results;
1400 }
1401
1402 ###
1403 # Insert a bit of DNA or protein into the database
1404 #
1405 sub _insert_sequence {
1406   my $self = shift;
1407   my ($seqid,$seq,$offset) = @_;
1408   my $id = $self->_locationid($seqid);
1409   my $seqtable = $self->_sequence_table;
1410   my $sthdel = $self->_prepare("DELETE FROM $seqtable WHERE id = ? AND \"offset\" = ?");
1411   my $sth = $self->_prepare(<<END);
1412 INSERT INTO $seqtable (id,"offset",sequence) VALUES (?,?,?)
1413 END
1414   $sthdel->execute($id,$offset);
1415   $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
1416 }
1417
1418 ###
1419 # This subroutine flags the given primary ID for later reindexing
1420 #
1421 sub flag_for_indexing {
1422   my $self = shift;
1423   my $id   = shift;
1424   my $needs_updating = $self->_update_table;
1425   my $sth = $self->_prepare("REPLACE INTO $needs_updating VALUES (?)");
1426   $sth->execute($id) or $self->throw($self->dbh->errstr);
1427 }
1428
1429 ###
1430 # Update indexes for given object
1431 #
1432 sub _update_indexes {
1433   my $self = shift;
1434   my $obj  = shift;
1435   defined (my $id   = $obj->primary_id) or return;
1436
1437   if ($self->{bulk_update_in_progress}) {
1438     $self->_dump_update_name_index($obj,$id);
1439     $self->_dump_update_attribute_index($obj,$id);
1440   } else {
1441     $self->_update_name_index($obj,$id);
1442     $self->_update_attribute_index($obj,$id);
1443   }
1444 }
1445
1446 sub _update_name_index {
1447   my $self = shift;
1448   my ($obj,$id) = @_;
1449   my $name = $self->_name_table;
1450   my $primary_id = $obj->primary_id;
1451
1452   $self->_delete_index($name,$id);
1453   my ($names,$aliases) = $self->feature_names($obj);
1454
1455   my $sth = $self->_prepare("INSERT INTO $name (id,name,display_name) VALUES (?,?,?)");
1456
1457   $sth->execute($id,$_,1) or $self->throw($sth->errstr)   foreach @$names;
1458   $sth->execute($id,$_,0) or $self->throw($sth->errstr) foreach @$aliases;
1459   $sth->finish;
1460 }
1461
1462 sub _update_attribute_index {
1463   my $self = shift;
1464   my ($obj,$id) = @_;
1465   my $attribute = $self->_attribute_table;
1466   $self->_delete_index($attribute,$id);
1467
1468   my $sth = $self->_prepare("INSERT INTO $attribute (id,attribute_id,attribute_value) VALUES (?,?,?)");
1469   for my $tag ($obj->get_all_tags) {
1470     my $tagid = $self->_attributeid($tag);
1471     for my $value ($obj->get_tag_values($tag)) {
1472       $sth->execute($id,$tagid,$value) or $self->throw($sth->errstr);
1473     }
1474   }
1475   $sth->finish;
1476 }
1477
1478 sub _genericid {
1479   my $self = shift;
1480   my ($table,$namefield,$name,$add_if_missing) = @_;
1481   my $qualified_table = $self->_qualify($table);
1482   my $sth = $self->_prepare(<<END);
1483 SELECT id FROM $qualified_table WHERE $namefield=?
1484 END
1485   $sth->execute($name) or die $sth->errstr;
1486   my ($id) = $sth->fetchrow_array;
1487   $sth->finish;
1488   return $id if defined $id;
1489   return     unless $add_if_missing;
1490
1491   $sth = $self->_prepare(<<END);
1492 INSERT INTO $qualified_table ($namefield) VALUES (?)
1493 END
1494 print "Inserting into $qualified_table $namefield $name\n";
1495   $sth->execute($name) or die $sth->errstr;
1496   my $dbh = $self->dbh;
1497   return $dbh->last_insert_id(undef, undef, $qualified_table, undef);
1498 }
1499
1500 sub _typeid {
1501   shift->_genericid('typelist','tag',shift,1);
1502 }
1503 sub _locationid {
1504   shift->_genericid('locationlist','seqname',shift,1);
1505 }
1506 sub _attributeid {
1507   shift->_genericid('attributelist','tag',shift,1);
1508 }
1509
1510 sub _get_location_and_bin {
1511   my $self = shift;
1512   my $feature = shift;
1513   my $seqid   = $self->_locationid($feature->seq_id);
1514   my $start   = $feature->start;
1515   my $end     = $feature->end;
1516   my $strand  = $feature->strand || 0;
1517   my ($tier,$bin) = $self->get_bin($start,$end);
1518   return ($seqid,$start,$end,$strand,$tier,$bin);
1519 }
1520
1521 sub get_bin {
1522   my $self = shift;
1523   my ($start,$end) = @_;
1524   my $binsize = MIN_BIN;
1525   my ($bin_start,$bin_end,$tier);
1526   $tier = 0;
1527   while (1) {
1528     $bin_start = int $start/$binsize;
1529     $bin_end   = int $end/$binsize;
1530     last if $bin_start == $bin_end;
1531     $binsize *= 10;
1532     $tier++;
1533   }
1534   return ($tier,$bin_start);
1535 }
1536
1537 sub bin_where {
1538   my $self = shift;
1539   my ($start,$end,$f) = @_;
1540   my (@bins,@args);
1541
1542   my $tier         = 0;
1543   my $binsize      = MIN_BIN;
1544   while ($binsize <= MAX_BIN) {
1545     my $bin_start = int($start/$binsize);
1546     my $bin_end   = int($end/$binsize);
1547     push @bins,"($f.tier=? AND $f.bin between ? AND ?)";
1548     push @args,($tier,$bin_start,$bin_end);
1549     $binsize *= 10;
1550     $tier++;
1551   }
1552   my $query = join ("\n\t OR ",@bins);
1553   return wantarray ? ($query,@args) : substitute($query,@args);
1554 }
1555
1556
1557 sub _delete_index {
1558   my $self = shift;
1559   my ($table_name,$id) = @_;
1560   return if $self->{reindexing};
1561   my $sth = $self->_prepare("DELETE FROM $table_name WHERE id=?") or $self->throw($self->dbh->errstr);
1562   $sth->execute($id);
1563 }
1564
1565 # given a statement handler that is expected to return rows of (id,object)
1566 # unthaw each object and return a list of 'em
1567 sub _sth2objs {
1568   my $self = shift;
1569   my $sth  = shift;
1570   my @result;
1571   my ($id, $o);
1572         $sth->bind_col(1, \$id);
1573         $sth->bind_col(2, \$o, { pg_type => PG_BYTEA});
1574   #while (my ($id,$o) = $sth->fetchrow_array) {
1575   while ($sth->fetch) {
1576     my $obj = $self->thaw($o,$id);
1577     push @result,$obj;
1578   }
1579   $sth->finish;
1580   return @result;
1581 }
1582
1583 # given a statement handler that is expected to return rows of (id,object)
1584 # unthaw each object and return a list of 'em
1585 sub _sth2obj {
1586   my $self = shift;
1587   my $sth  = shift;
1588   my ($id,$o) = $sth->fetchrow_array;
1589   return unless $o;
1590   my $obj = $self->thaw($o,$id);
1591   $obj;
1592 }
1593
1594 sub _prepare {
1595   my $self = shift;
1596   my $query = shift;
1597   my $dbh   = $self->dbh;
1598   my $sth   = $dbh->prepare_cached($query, {}, 3) or $self->throw($dbh->errstr);
1599   $sth;
1600 }
1601
1602
1603 ####################################################################################################
1604 # SQL Fragment generators
1605 ####################################################################################################
1606
1607 sub _feature_table       {  shift->_qualify('feature')  }
1608 sub _location_table      {  shift->_qualify('location') }
1609 sub _locationlist_table  {  shift->_qualify('locationlist') }
1610 sub _type_table          {  shift->_qualify('feature')     }
1611 sub _typelist_table      {  shift->_qualify('typelist') }
1612 sub _name_table          {  shift->_qualify('name')     }
1613 sub _attribute_table     {  shift->_qualify('attribute')}
1614 sub _attributelist_table {  shift->_qualify('attributelist')}
1615 sub _parent2child_table  {  shift->_qualify('parent2child')}
1616 sub _meta_table          {  shift->_qualify('meta')}
1617 sub _update_table        {  shift->_qualify('update_table')}
1618 sub _sequence_table      {  shift->_qualify('sequence')}
1619
1620 sub _make_attribute_where {
1621   my $self                     = shift;
1622   my ($attributetable,$attributenametable,$attributes) = @_;
1623   my @args;
1624   my @sql;
1625   my $dbh = $self->dbh;
1626   foreach (keys %$attributes) {
1627     my @match_values;
1628     my @values = ref($attributes->{$_}) && ref($attributes->{$_}) eq 'ARRAY' ? @{$attributes->{$_}} : $attributes->{$_};
1629     foreach (@values) {  # convert * into % for wildcard matches
1630       s/\*/%/g;
1631     }
1632     my $match  = join ' OR ',map {
1633       /%/ ? "$attributetable.attribute_value LIKE ?"
1634           : "$attributetable.attribute_value=?"
1635     } @values;
1636     push @sql,"($attributenametable.tag=? AND ($match))";
1637     push @args,($_,@values);
1638   }
1639   return (join(' OR ',@sql),@args);
1640 }
1641
1642 sub _make_attribute_group {
1643   my $self                     = shift;
1644   my ($table_name,$attributes) = @_;
1645   my $key_count = keys %$attributes or return;
1646   return "f.id HAVING count(f.id)>?",$key_count-1;
1647 }
1648
1649 sub _print_query {
1650   my $self = shift;
1651   my ($query,@args) = @_;
1652   while ($query =~ /\?/) {
1653     my $arg = $self->dbh->quote(shift @args);
1654     $query =~ s/\?/$arg/;
1655   }
1656   warn $query,"\n";
1657 }
1658
1659 ###
1660 # special-purpose store for bulk loading - write to a file rather than to the db
1661 #
1662 sub _dump_store {
1663   my $self    = shift;
1664   my $indexed = shift;
1665
1666   my $count = 0;
1667   my $store_fh = $self->dump_filehandle('feature');
1668   my $dbh      = $self->dbh;
1669
1670   my $autoindex = $self->autoindex;
1671
1672   for my $obj (@_) {
1673     my $id       = $self->next_id;
1674     my ($seqid,$start,$end,$strand,$tier,$bin) = $indexed ? $self->_get_location_and_bin($obj) : (undef)x6;
1675     my $primary_tag = $obj->primary_tag;
1676     my $source_tag  = $obj->source_tag || '';
1677     $primary_tag    .= ":$source_tag";
1678     my $typeid   = $self->_typeid($primary_tag,1);
1679
1680                 my $frozen_object = $dbh->quote($self->freeze($obj), { pg_type => PG_BYTEA});
1681                 $frozen_object =~ s/^E?'|'$//g;
1682                 $frozen_object =~ s/\\\\/\\/g;
1683                 # TODO: Fix this, why does frozen object start with quote but not end with one
1684     print $store_fh join("\t",$id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$frozen_object),"\n";
1685     $obj->primary_id($id);
1686     $self->_update_indexes($obj) if $indexed && $autoindex;
1687     $count++;
1688   }
1689
1690   # remember whether we are have ever stored a non-indexed feature
1691   unless ($indexed or $self->{indexed_flag}++) {
1692     $self->subfeatures_are_indexed(0);
1693   }
1694   $count;
1695 }
1696
1697 sub _dump_add_SeqFeature {
1698   my $self     = shift;
1699   my $parent   = shift;
1700   my @children = @_;
1701
1702   my $dbh = $self->dbh;
1703   my $fh = $self->dump_filehandle('parent2child');
1704   my $parent_id = (ref $parent ? $parent->primary_id : $parent)
1705     or $self->throw("$parent should have a primary_id");
1706   my $count = 0;
1707
1708   for my $child_id (@children) {
1709     print $fh join("\t",$parent_id,$child_id),"\n";
1710     $count++;
1711   }
1712   $count;
1713 }
1714
1715 sub _dump_update_name_index {
1716   my $self = shift;
1717   my ($obj,$id) = @_;
1718   my $fh      = $self->dump_filehandle('name');
1719   my $dbh     = $self->dbh;
1720   my ($names,$aliases) = $self->feature_names($obj);
1721   print $fh join("\t",$id,$dbh->quote($_),1),"\n" foreach @$names;
1722   print $fh join("\t",$id,$dbh->quote($_),0),"\n" foreach @$aliases;
1723 }
1724
1725 sub _dump_update_attribute_index {
1726   my $self = shift;
1727   my ($obj,$id) = @_;
1728   my $fh        = $self->dump_filehandle('attribute');
1729   my $dbh       = $self->dbh;
1730   for my $tag ($obj->all_tags) {
1731     my $tagid = $self->_attributeid($tag);
1732     for my $value ($obj->each_tag_value($tag)) {
1733       print $fh join("\t",$id,$tagid,$dbh->quote($value)),"\n";
1734     }
1735   }
1736 }
1737
1738 sub time {
1739   return Time::HiRes::time() if Time::HiRes->can('time');
1740   return time();
1741 }
1742
1743 sub DESTROY {
1744   my $self = shift;
1745   if ($self->{bulk_update_in_progress}) {  # be sure to remove temp files
1746     for my $table ('feature',$self->index_tables) {
1747       my $path = $self->dump_path($table);
1748       unlink $path;
1749     }
1750   }
1751 }
1752
1753 1;