sync with main trunk
[bioperl-live.git] / Bio / DB / SeqFeature / Store / DBI / Pg.pm
blob6ee16640f4f4bc2b95369c02b458a979512cbd5e
1 package Bio::DB::SeqFeature::Store::DBI::Pg;
2 use DBD::Pg qw(:pg_types);
3 # $Id: Pg.pm 14656 2008-04-14 15:05:37Z lstein $
5 =head1 NAME
7 Bio::DB::SeqFeature::Store::DBI::Pg -- Mysql implementation of Bio::DB::SeqFeature::Store
9 =head1 SYNOPSIS
11 use Bio::DB::SeqFeature::Store;
13 # Open the sequence database
14 my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::Pg',
15 -dsn => 'dbi:Pg:test');
17 # get a feature from somewhere
18 my $feature = Bio::SeqFeature::Generic->new(...);
20 # store it
21 $db->store($feature) or die "Couldn't store!";
23 # primary ID of the feature is changed to indicate its primary ID
24 # in the database...
25 my $id = $feature->primary_id;
27 # get the feature back out
28 my $f = $db->fetch($id);
30 # change the feature and update it
31 $f->start(100);
32 $db->update($f) or die "Couldn't update!";
34 # searching...
35 # ...by id
36 my @features = $db->fetch_many(@list_of_ids);
38 # ...by name
39 @features = $db->get_features_by_name('ZK909');
41 # ...by alias
42 @features = $db->get_features_by_alias('sma-3');
44 # ...by type
45 @features = $db->get_features_by_name('gene');
47 # ...by location
48 @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
50 # ...by attribute
51 @features = $db->get_features_by_attribute({description => 'protein kinase'})
53 # ...by the GFF "Note" field
54 @result_list = $db->search_notes('kinase');
56 # ...by arbitrary combinations of selectors
57 @features = $db->features(-name => $name,
58 -type => $types,
59 -seq_id => $seqid,
60 -start => $start,
61 -end => $end,
62 -attributes => $attributes);
64 # ...using an iterator
65 my $iterator = $db->get_seq_stream(-name => $name,
66 -type => $types,
67 -seq_id => $seqid,
68 -start => $start,
69 -end => $end,
70 -attributes => $attributes);
72 while (my $feature = $iterator->next_seq) {
73 # do something with the feature
76 # ...limiting the search to a particular region
77 my $segment = $db->segment('Chr1',5000=>6000);
78 my @features = $segment->features(-type=>['mRNA','match']);
80 # getting & storing sequence information
81 # Warning: this returns a string, and not a PrimarySeq object
82 $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
83 my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
85 # what feature types are defined in the database?
86 my @types = $db->types;
88 # create a new feature in the database
89 my $feature = $db->new_feature(-primary_tag => 'mRNA',
90 -seq_id => 'chr3',
91 -start => 10000,
92 -end => 11000);
94 =head1 DESCRIPTION
96 Bio::DB::SeqFeature::Store::Pg is the Mysql adaptor for
97 Bio::DB::SeqFeature::Store. You will not create it directly, but
98 instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
100 See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
102 =head2 Using the Mysql adaptor
104 Before you can use the adaptor, you must use the Pgadmin tool to
105 create a database and establish a user account with write
106 permission. In order to use "fast" loading, the user account must have
107 "file" privileges.
109 To establish a connection to the database, call
110 Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::Pg',@more_args). The
111 additional arguments are as follows:
113 Argument name Description
114 ------------- -----------
116 -dsn The database name. You can abbreviate
117 "dbi:Pg:foo" as "foo" if you wish.
119 -user Username for authentication.
121 -pass Password for authentication.
123 -namespace A prefix to attach to each table. This allows you
124 to have several virtual databases in the same
125 physical database.
127 -temp Boolean flag. If true, a temporary database
128 will be created and destroyed as soon as
129 the Store object goes out of scope. (synonym -temporary)
131 -autoindex Boolean flag. If true, features in the database will be
132 reindexed every time they change. This is the default.
135 -tmpdir Directory in which to place temporary files during "fast" loading.
136 Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
138 -dbi_options A hashref to pass to DBI->connect's 4th argument, the "attributes."
139 (synonyms -options, -dbi_attr)
141 -write Pass true to open database for writing or updating.
143 If successful, a new instance of
144 Bio::DB::SeqFeature::Store::DBI::Pg will be returned.
146 In addition to the standard methods supported by all well-behaved
147 Bio::DB::SeqFeature::Store databases, several following
148 adaptor-specific methods are provided. These are described in the next
149 sections.
151 =cut
153 use strict;
155 use base 'Bio::DB::SeqFeature::Store';
156 use Bio::DB::SeqFeature::Store::DBI::Iterator;
157 use DBI;
158 use Memoize;
159 use Cwd 'abs_path';
160 use Bio::DB::GFF::Util::Rearrange 'rearrange';
161 use File::Spec;
162 use constant DEBUG=>0;
164 # from the MySQL documentation...
165 # WARNING: if your sequence uses coordinates greater than 2 GB, you are out of luck!
166 use constant MAX_INT => 2_147_483_647;
167 use constant MIN_INT => -2_147_483_648;
168 use constant MAX_BIN => 1_000_000_000; # size of largest feature = 1 Gb
169 use constant MIN_BIN => 1000; # smallest bin we'll make - on a 100 Mb chromosome, there'll be 100,000 of these
171 memoize('_typeid');
172 memoize('_locationid');
173 memoize('_attributeid');
174 memoize('dump_path');
177 # object initialization
179 sub init {
180 my $self = shift;
181 my ($dsn,
182 $is_temporary,
183 $autoindex,
184 $namespace,
185 $dump_dir,
186 $user,
187 $pass,
188 $dbi_options,
189 $writeable,
190 $create,
191 $schema,
192 ) = rearrange(['DSN',
193 ['TEMP','TEMPORARY'],
194 'AUTOINDEX',
195 'NAMESPACE',
196 ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
197 'USER',
198 ['PASS','PASSWD','PASSWORD'],
199 ['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
200 ['WRITE','WRITEABLE'],
201 'CREATE',
202 'SCHEMA'
203 ],@_);
206 $dbi_options ||= {};
207 $writeable = 1 if $is_temporary or $dump_dir;
209 $dsn or $self->throw("Usage: ".__PACKAGE__."->init(-dsn => \$dbh || \$dsn)");
211 my $dbh;
212 if (ref $dsn) {
213 $dbh = $dsn;
214 } else {
215 $dsn = "dbi:Pg:$dsn" unless $dsn =~ /^dbi:/;
216 $dbh = DBI->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr);
218 $self->{'original_arguments'} = {
219 'dsn' => $dsn,
220 'user' => $user,
221 'pass' => $pass,
222 'dbh_options' => $dbi_options,
224 $self->{dbh} = $dbh;
225 $self->{dbh}->{InactiveDestroy} = 1;
226 $self->{is_temp} = $is_temporary;
227 $self->{namespace} = $namespace;
228 $self->{writeable} = $writeable;
229 $self->schema($schema) if ($schema);
231 $self->default_settings;
232 $self->autoindex($autoindex) if defined $autoindex;
233 $self->dumpdir($dump_dir) if $dump_dir;
234 if ($self->is_temp) {
235 $self->init_tmp_database();
236 } elsif ($create) {
237 $self->init_database('erase');
241 sub writeable { shift->{writeable} }
243 sub can_store_parentage { 1 }
245 sub table_definitions {
246 my $self = shift;
247 return {
248 feature => <<END,
250 id serial primary key,
251 typeid int not null,
252 seqid int,
253 start int,
254 "end" int,
255 strand int default 0,
256 tier int,
257 bin int,
258 indexed int default 1,
259 object bytea not null
261 CREATE INDEX feature_stuff ON feature(seqid,tier,bin,typeid);
262 CREATE INDEX feature_typeid ON feature(typeid);
265 locationlist => <<END,
267 id serial primary key,
268 seqname varchar(50) not null
269 ); CREATE INDEX locationlist_seqname ON locationlist(seqname);
272 typelist => <<END,
274 id serial primary key,
275 tag varchar(100) not null
276 ); CREATE INDEX typelist_tab ON typelist(tag);
278 name => <<END,
280 id int not null,
281 name varchar(128) not null,
282 display_name int default 0
284 CREATE INDEX name_id ON name(id);
285 CREATE INDEX name_name ON name(name);
288 attribute => <<END,
290 id int not null,
291 attribute_id int not null,
292 attribute_value text
294 CREATE INDEX attribute_id ON attribute(id);
295 CREATE INDEX attribute_id_val ON attribute(attribute_id,SUBSTR(attribute_value, 1, 10));
298 attributelist => <<END,
300 id serial primary key,
301 tag varchar(50) not null
303 CREATE INDEX attributelist_tag ON attributelist(tag);
305 parent2child => <<END,
307 id int not null,
308 child int not null
310 CREATE INDEX parent2child_id_child ON parent2child(id,child);
313 meta => <<END,
315 name varchar(128) primary key,
316 value varchar(128) not null
319 sequence => <<END,
321 id int not null,
322 "offset" int not null,
323 sequence text,
324 primary key(id,"offset")
331 # default settings -- will create and populate meta table if needed
333 sub default_settings {
334 my $self = shift;
335 $self->maybe_create_meta();
336 $self->SUPER::default_settings;
337 $self->autoindex(1);
338 $self->dumpdir(File::Spec->tmpdir);
343 # retrieve database handle
345 sub dbh {
346 my $self = shift;
347 my $d = $self->{dbh};
348 $self->{dbh} = shift if @_;
352 sub clone {
353 # Postgres DBI doesn't implement proper cloning, so do it the hard way
354 my $self = shift;
356 my $dsn = $self->{'original_arguments'}->{'dsn'};
357 my $user = $self->{'original_arguments'}->{'user'};
358 my $pass = $self->{'original_arguments'}->{'pass'};
359 my $dbi_options = $self->{'original_arguments'}->{'dbi_options'};
360 $self->dbh()->{InactiveDestroy} = 1;
361 my $new_dbh = DBI->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr);
362 $new_dbh->{InactiveDestroy} = 1;
363 $self->{dbh} = $new_dbh; # unless $self->is_temp;
364 if ($self->schema()) {
365 $self->schema($self->schema()); # Reset the DBH's schema
370 # get/set directory for bulk load tables
372 sub dumpdir {
373 my $self = shift;
374 my $d = $self->{dumpdir};
375 $self->{dumpdir} = abs_path(shift) if @_;
380 # table namespace (multiple dbs in one Pg db)
382 sub namespace {
383 my $self = shift;
384 my $d = $self->{namespace};
385 $self->{namespace} = shift if @_;
390 # find a path that corresponds to a dump table
392 sub dump_path {
393 my $self = shift;
394 my $table = $self->_qualify(shift);
395 return "$self->{dumpdir}/$table.$$";
399 # make a filehandle (writeable) that corresponds to a dump table
401 sub dump_filehandle {
402 my $self = shift;
403 my $table = shift;
404 eval "require IO::File" unless IO::File->can('new');
405 my $path = $self->dump_path($table);
406 my $fh = $self->{filehandles}{$path} ||= IO::File->new(">$path");
407 $fh;
411 # find the next ID for a feature (used only during bulk loading)
413 sub next_id {
414 my $self = shift;
415 $self->{max_id} ||= $self->max_id;
416 return ++$self->{max_id};
420 # find the maximum ID for a feature (used only during bulk loading)
422 sub max_id {
423 my $self = shift;
424 my $sth = $self->_prepare("SELECT max(id) from feature");
425 $sth->execute or $self->throw($sth->errstr);
426 my ($id) = $sth->fetchrow_array;
427 $id;
430 sub schema {
431 my ($self, $schema) = @_;
432 $self->{'schema'} = $schema if defined($schema);
433 if ($schema) {
434 $self->dbh->do("SET search_path TO " . $self->{'schema'});
435 } else {
436 $self->dbh->do("SET search_path TO public");
438 return $self->{'schema'};
441 # wipe database clean and reinstall schema
443 sub _init_database {
444 my $self = shift;
445 my $erase = shift;
447 my $dbh = $self->dbh;
448 my $tables = $self->table_definitions;
449 foreach (keys %$tables) {
450 next if $_ eq 'meta'; # don't get rid of meta data!
451 my $table = $self->_qualify($_);
452 $dbh->do("DROP TABLE IF EXISTS $table") if $erase;
453 my @table_exists = $dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
454 if (!scalar(@table_exists)) {
455 my $query = "CREATE TABLE $table $tables->{$_}";
456 $dbh->do($query) or $self->throw($dbh->errstr);
459 $self->subfeatures_are_indexed(1) if $erase;
463 sub maybe_create_meta {
464 my $self = shift;
465 return unless $self->writeable;
466 my $table = $self->_qualify('meta');
467 my $tables = $self->table_definitions;
468 my @table_exists = $self->dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
469 if (!scalar(@table_exists)) {
470 $self->dbh->do("CREATE TABLE $table $tables->{meta}");
474 sub init_tmp_database {
475 my $self = shift;
476 my $dbh = $self->dbh;
477 my $tables = $self->table_definitions;
478 for my $t (keys %$tables) {
479 my $table = $self->_qualify($t);
480 my @table_exists = $dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
481 if (!scalar(@table_exists)) {
482 my $query = "CREATE TEMPORARY TABLE $table $tables->{$t}";
483 $dbh->do($query) or $self->throw($dbh->errstr);
490 # use temporary tables
492 sub is_temp {
493 shift->{is_temp};
496 sub _store {
497 my $self = shift;
499 # special case for bulk updates
500 return $self->_dump_store(@_) if $self->{bulk_update_in_progress};
502 my $indexed = shift;
503 my $count = 0;
505 my $autoindex = $self->autoindex;
507 my $dbh = $self->dbh;
508 local $dbh->{RaiseError} = 1;
509 $dbh->begin_work;
510 eval {
511 for my $obj (@_) {
512 $self->replace($obj,$indexed);
513 $self->_update_indexes($obj) if $indexed && $autoindex;
514 $count++;
518 if ($@) {
519 warn "Transaction aborted because $@";
520 $dbh->rollback;
522 else {
523 $dbh->commit;
526 # remember whether we are have ever stored a non-indexed feature
527 unless ($indexed or $self->{indexed_flag}++) {
528 $self->subfeatures_are_indexed(0);
530 $count;
533 # we memoize this in order to avoid making zillions of calls
534 sub autoindex {
535 my $self = shift;
537 # special case for bulk update -- need to build the indexes
538 # at the same time we build the main feature table
539 return 1 if $self->{bulk_update_in_progress};
540 my $d = $self->setting('autoindex');
541 $self->setting(autoindex=>shift) if @_;
545 sub _start_bulk_update {
546 my $self = shift;
547 my $dbh = $self->dbh;
548 $self->{bulk_update_in_progress}++;
551 sub _finish_bulk_update {
552 my $self = shift;
553 my $dbh = $self->dbh;
554 my $dir = $self->{dumpdir} || '.';
555 for my $table ('feature',$self->index_tables) {
556 my $fh = $self->dump_filehandle($table);
557 my $path = $self->dump_path($table);
558 $fh->close;
559 my $qualified_table = $self->_qualify($table);
560 `cp $path $path.bak`;
561 # Get stuff from file into STDIN so we don't have to be superuser
562 open FH, $path;
563 print STDERR "Loading file $path\n";
564 $dbh->do("COPY $qualified_table FROM STDIN CSV QUOTE '''' DELIMITER '\t'") or $self->throw($dbh->errstr);
565 while (my $line = <FH>) {
566 $dbh->pg_putline($line);
568 $dbh->pg_endcopy() or $self->throw($dbh->errstr);
569 close FH;
570 #unlink $path;
572 delete $self->{bulk_update_in_progress};
573 delete $self->{filehandles};
578 # Add a subparts to a feature. Both feature and all subparts must already be in database.
580 sub _add_SeqFeature {
581 my $self = shift;
583 # special purpose method for case when we are doing a bulk update
584 return $self->_dump_add_SeqFeature(@_) if $self->{bulk_update_in_progress};
586 my $parent = shift;
587 my @children = @_;
589 my $dbh = $self->dbh;
590 local $dbh->{RaiseError} = 1;
592 my $child_table = $self->_parent2child_table();
593 my $count = 0;
595 my $exist_query = $self->_prepare("SELECT count(*) FROM $child_table WHERE id = ? AND child = ?" );
597 my $insert_query = $self->_prepare("INSERT INTO $child_table (id,child) VALUES (?, ?)");
599 # my $sth = $self->_prepare(<<END);
600 #REPLACE INTO $child_table (id,child) VALUES (?,?)
601 #END
603 my $parent_id = (ref $parent ? $parent->primary_id : $parent)
604 or $self->throw("$parent should have a primary_id");
607 $dbh->begin_work or $self->throw($dbh->errstr);
608 eval {
609 for my $child (@children) {
610 my $child_id = ref $child ? $child->primary_id : $child;
611 defined $child_id or die "no primary ID known for $child";
613 $exist_query->($parent_id, $child_id);
614 my ($exist) = $exist_query->fetchrow_array;
616 if (!$exist) {
617 $insert_query->execute($parent_id,$child_id);
618 $count++;
623 if ($@) {
624 warn "Transaction aborted because $@";
625 $dbh->rollback;
627 else {
628 $dbh->commit;
630 $insert_query->finish;
631 $count;
634 sub _fetch_SeqFeatures {
635 my $self = shift;
636 my $parent = shift;
637 my @types = @_;
639 my $parent_id = $parent->primary_id or $self->throw("$parent should have a primary_id");
640 my $feature_table = $self->_feature_table;
641 my $child_table = $self->_parent2child_table();
643 my @from = ("$feature_table as f","$child_table as c");
644 my @where = ('f.id=c.child','c.id=?');
645 my @args = $parent_id;
647 if (@types) {
648 my ($from,$where,undef,@a) = $self->_types_sql(\@types,'f');
649 push @from,$from if $from;
650 push @where,$where if $where;
651 push @args,@a;
654 my $from = join ', ',@from;
655 my $where = join ' AND ',@where;
657 my $query = <<END;
658 SELECT f.id,f.object
659 FROM $from
660 WHERE $where
663 $self->_print_query($query,@args) if DEBUG || $self->debug;
665 my $sth = $self->_prepare($query) or $self->throw($self->dbh->errstr);
666 $sth->execute(@args) or $self->throw($sth->errstr);
668 return $self->_sth2objs($sth);
672 # get primary sequence between start and end
674 sub _fetch_sequence {
675 my $self = shift;
676 my ($seqid,$start,$end) = @_;
678 # backward compatibility to the old days when I liked reverse complementing
679 # dna by specifying $start > $end
680 my $reversed;
681 if (defined $start && defined $end && $start > $end) {
682 $reversed++;
683 ($start,$end) = ($end,$start);
685 $start-- if defined $start;
686 $end-- if defined $end;
688 my $offset1 = $self->_offset_boundary($seqid,$start || 'left');
689 my $offset2 = $self->_offset_boundary($seqid,$end || 'right');
690 my $sequence_table = $self->_sequence_table;
691 my $locationlist_table = $self->_locationlist_table;
693 my $sth = $self->_prepare(<<END);
694 SELECT sequence,offset
695 FROM $sequence_table as s,$locationlist_table as ll
696 WHERE s.id=ll.id
697 AND ll.seqname= ?
698 AND offset >= ?
699 AND offset <= ?
700 ORDER BY offset
703 my $seq = '';
704 $sth->execute($seqid,$offset1,$offset2) or $self->throw($sth->errstr);
706 while (my($frag,$offset) = $sth->fetchrow_array) {
707 substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
708 $seq .= $frag;
710 substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
711 if ($reversed) {
712 $seq = reverse $seq;
713 $seq =~ tr/gatcGATC/ctagCTAG/;
715 $sth->finish;
716 $seq;
719 sub _offset_boundary {
720 my $self = shift;
721 my ($seqid,$position) = @_;
723 my $sequence_table = $self->_sequence_table;
724 my $locationlist_table = $self->_locationlist_table;
726 my $sql;
727 $sql = $position eq 'left' ? "SELECT min(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
728 :$position eq 'right' ? "SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
729 :"SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=? AND offset<=?";
730 my $sth = $self->_prepare($sql);
731 my @args = $position =~ /^-?\d+$/ ? ($seqid,$position) : ($seqid);
732 $sth->execute(@args) or $self->throw($sth->errstr);
733 my $boundary = $sth->fetchall_arrayref->[0][0];
734 $sth->finish;
735 return $boundary;
740 # add namespace to tablename
742 sub _qualify {
743 my $self = shift;
744 my $table_name = shift;
745 my $namespace = $self->namespace;
746 return $table_name unless defined $namespace;
747 return "${namespace}_${table_name}";
751 # Fetch a Bio::SeqFeatureI from database using its primary_id
753 sub _fetch {
754 my $self = shift;
755 @_ or $self->throw("usage: fetch(\$primary_id)");
756 my $primary_id = shift;
757 my $features = $self->_feature_table;
758 my $sth = $self->_prepare(<<END);
759 SELECT id,object FROM $features WHERE id=?
761 $sth->execute($primary_id) or $self->throw($sth->errstr);
762 my $obj = $self->_sth2obj($sth);
763 $sth->finish;
764 $obj;
768 # Efficiently fetch a series of IDs from the database
769 # Can pass an array or an array ref
771 sub _fetch_many {
772 my $self = shift;
773 @_ or $self->throw('usage: fetch_many($id1,$id2,$id3...)');
774 my $ids = join ',',map {ref($_) ? @$_ : $_} @_ or return;
775 my $features = $self->_feature_table;
777 my $sth = $self->_prepare(<<END);
778 SELECT id,object FROM $features WHERE id IN ($ids)
780 $sth->execute() or $self->throw($sth->errstr);
781 return $self->_sth2objs($sth);
784 sub _features {
785 my $self = shift;
786 my ($seq_id,$start,$end,$strand,
787 $name,$class,$allow_aliases,
788 $types,
789 $attributes,
790 $range_type,
791 $fromtable,
792 $iterator
793 ) = rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
794 'NAME','CLASS','ALIASES',
795 ['TYPES','TYPE','PRIMARY_TAG'],
796 ['ATTRIBUTES','ATTRIBUTE'],
797 'RANGE_TYPE',
798 'FROM_TABLE',
799 'ITERATOR',
800 ],@_);
802 my (@from,@where,@args,@group);
803 $range_type ||= 'overlaps';
805 my $feature_table = $self->_feature_table;
806 @from = "$feature_table as f";
808 if (defined $name) {
809 # hacky backward compatibility workaround
810 undef $class if $class && $class eq 'Sequence';
811 $name = "$class:$name" if defined $class && length $class > 0;
812 # last argument is the join field
813 my ($from,$where,$group,@a) = $self->_name_sql($name,$allow_aliases,'f.id');
814 push @from,$from if $from;
815 push @where,$where if $where;
816 push @group,$group if $group;
817 push @args,@a;
820 if (defined $seq_id) {
821 # last argument is the name of the features table
822 my ($from,$where,$group,@a) = $self->_location_sql($seq_id,$start,$end,$range_type,$strand,'f');
823 push @from,$from if $from;
824 push @where,$where if $where;
825 push @group,$group if $group;
826 push @args,@a;
829 if (defined($types)) {
830 # last argument is the name of the features table
831 my ($from,$where,$group,@a) = $self->_types_sql($types,'f');
832 push @from,$from if $from;
833 push @where,$where if $where;
834 push @group,$group if $group;
835 push @args,@a;
838 if (defined $attributes) {
839 # last argument is the join field
840 my ($from,$where,$group,@a) = $self->_attributes_sql($attributes,'f.id');
841 push @from,$from if $from;
842 push @where,$where if $where;
843 push @group,$group if $group;
844 push @args,@a;
847 if (defined $fromtable) {
848 # last argument is the join field
849 my ($from,$where,$group,@a) = $self->_from_table_sql($fromtable,'f.id');
850 push @from,$from if $from;
851 push @where,$where if $where;
852 push @group,$group if $group;
853 push @args,@a;
856 # if no other criteria are specified, then
857 # only fetch indexed (i.e. top level objects)
858 @where = 'indexed=1' unless @where;
860 my $from = join ', ',@from;
861 my $where = join ' AND ',map {"($_)"} @where;
862 my $group = join ', ',@group;
863 $group = "GROUP BY $group" if @group;
865 my $query = <<END;
866 SELECT f.id,f.object
867 FROM $from
868 WHERE $where
869 $group
872 $self->_print_query($query,@args) if DEBUG || $self->debug;
874 my $sth = $self->_prepare($query);
875 $sth->execute(@args) or $self->throw($$ . "\n" . $sth->errstr);
877 # use Data::Dumper;
878 # print STDERR "I run the query\n SELECT f.id, f.object FROM $from WHERE $where $group\n with args @args\n";
879 # print STDERR "I should make an object out of " . Dumper($sth->fetchrow_hashref()) . "\n";
880 # $sth->execute(@args) or $self->throw($sth->errstr);
881 # print STDERR "I am making objects: " . Dumper($self->_sth2objs($sth)) . "\n";
882 # $sth->execute(@args) or $self->throw($sth->errstr);
884 return $iterator ? Bio::DB::SeqFeature::Store::DBI::Iterator->new($sth,$self) : $self->_sth2objs($sth);
887 sub _name_sql {
888 my $self = shift;
889 my ($name,$allow_aliases,$join) = @_;
890 my $name_table = $self->_name_table;
892 my $from = "$name_table as n";
893 my ($match,$string) = $self->_match_sql($name);
895 my $where = "n.id=$join AND n.name $match";
896 $where .= " AND n.display_name>0" unless $allow_aliases;
897 return ($from,$where,'',$string);
900 sub _search_attributes {
901 my $self = shift;
902 my ($search_string,$attribute_names,$limit) = @_;
903 my @words = map {quotemeta($_)} split /\s+/,$search_string;
904 my $name_table = $self->_name_table;
905 my $attribute_table = $self->_attribute_table;
906 my $attributelist_table = $self->_attributelist_table;
907 my $type_table = $self->_type_table;
908 my $typelist_table = $self->_typelist_table;
910 my @tags = @$attribute_names;
911 my $tag_sql = join ' OR ',("al.tag=?") x @tags;
913 my $perl_regexp = join '|',@words;
915 my $sql_regexp = join ' AND ',("a.attribute_value SIMILAR TO ?") x @words;
916 my $sql = <<END;
917 SELECT name,attribute_value,tl.tag,n.id
918 FROM $name_table as n,$attribute_table as a,$attributelist_table as al,$type_table as t,$typelist_table as tl
919 WHERE n.id=a.id
920 AND al.id=a.attribute_id
921 AND n.id=t.id
922 AND t.typeid=tl.id
923 AND n.display_name=1
924 AND ($tag_sql)
925 AND ($sql_regexp)
927 $sql .= "LIMIT $limit" if defined $limit;
928 $self->_print_query($sql,@tags,@words) if DEBUG || $self->debug;
929 my $sth = $self->_prepare($sql);
930 $sth->execute(@tags,@words) or $self->throw($sth->errstr);
932 my @results;
933 while (my($name,$value,$type,$id) = $sth->fetchrow_array) {
934 my (@hits) = $value =~ /$perl_regexp/ig;
935 my @words_in_row = split /\b/,$value;
936 my $score = int(@hits*100/@words/@words_in_row);
937 push @results,[$name,$value,$score,$type,$id];
939 $sth->finish;
940 @results = sort {$b->[2]<=>$a->[2]} @results;
941 return @results;
944 sub _match_sql {
945 my $self = shift;
946 my $name = shift;
948 my ($match,$string);
949 if ($name =~ /(?:^|[^\\])[*?]/) {
950 $name =~ s/(^|[^\\])([%_])/$1\\$2/g;
951 $name =~ s/(^|[^\\])\*/$1%/g;
952 $name =~ s/(^|[^\\])\?/$1_/g;
953 $match = "LIKE ?";
954 $string = $name;
955 } else {
956 $match = "= ?";
957 $string = $name;
959 return ($match,$string);
962 sub _from_table_sql {
963 my $self = shift;
964 my ($from_table,$join) = @_;
965 my $from = "$from_table as ft";
966 my $where = "ft.id=$join";
967 return ($from,$where,'');
970 sub _attributes_sql {
971 my $self = shift;
972 my ($attributes,$join) = @_;
974 my ($wf,@bind_args) = $self->_make_attribute_where('a','al',$attributes);
975 my ($group_by,@group_args)= $self->_make_attribute_group('a',$attributes);
977 my $attribute_table = $self->_attribute_table;
978 my $attributelist_table = $self->_attributelist_table;
980 my $from = "$attribute_table as a use index(attribute_id), $attributelist_table as al";
982 my $where = <<END;
983 a.id=$join
984 AND a.attribute_id=al.id
985 AND ($wf)
988 my $group = $group_by;
990 my @args = (@bind_args,@group_args);
991 return ($from,$where,$group,@args);
994 sub subfeature_types_are_indexed { 1 }
995 sub subfeature_locations_are_indexed { 1 }
997 sub _types_sql {
998 my $self = shift;
999 my ($types,$type_table) = @_;
1000 my ($primary_tag,$source_tag);
1002 my @types = ref $types eq 'ARRAY' ? @$types : $types;
1004 my $typelist = $self->_typelist_table;
1005 my $from = "$typelist AS tl";
1007 my (@matches,@args);
1009 for my $type (@types) {
1011 if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
1012 $primary_tag = $type->method;
1013 $source_tag = $type->source;
1014 } else {
1015 ($primary_tag,$source_tag) = split ':',$type,2;
1018 if (defined $source_tag) {
1019 push @matches,"tl.tag=?";
1020 push @args,"$primary_tag:$source_tag";
1021 } else {
1022 push @matches,"tl.tag LIKE ?";
1023 push @args,"$primary_tag:%";
1026 my $matches = join ' OR ',@matches;
1028 my $where = <<END;
1029 tl.id=$type_table.typeid
1030 AND ($matches)
1033 return ($from,$where,'',@args);
1036 sub _location_sql {
1037 my $self = shift;
1038 my ($seq_id,$start,$end,$range_type,$strand,$location) = @_;
1040 # the additional join on the location_list table badly impacts performance
1041 # so we build a copy of the table in memory
1042 my $seqid = $self->_locationid($seq_id) || 0; # zero is an invalid primary ID, so will return empty
1044 $start = MIN_INT unless defined $start;
1045 $end = MAX_INT unless defined $end;
1047 my ($bin_where,@bin_args) = $self->bin_where($start,$end,$location);
1049 my ($range,@range_args);
1050 if ($range_type eq 'overlaps') {
1051 $range = "$location.end>=? AND $location.start<=? AND ($bin_where)";
1052 @range_args = ($start,$end,@bin_args);
1053 } elsif ($range_type eq 'contains') {
1054 $range = "$location.start>=? AND $location.end<=? AND ($bin_where)";
1055 @range_args = ($start,$end,@bin_args);
1056 } elsif ($range_type eq 'contained_in') {
1057 $range = "$location.start<=? AND $location.end>=?";
1058 @range_args = ($start,$end);
1059 } else {
1060 $self->throw("range_type must be one of 'overlaps', 'contains' or 'contained_in'");
1063 if (defined $strand) {
1064 $range .= " AND strand=?";
1065 push @range_args,$strand;
1068 my $where = <<END;
1069 $location.seqid=?
1070 AND $range
1073 my $from = '';
1074 my $group = '';
1076 my @args = ($seqid,@range_args);
1077 return ($from,$where,$group,@args);
1081 # force reindexing
1083 sub reindex {
1084 my $self = shift;
1085 my $from_update_table = shift; # if present, will take ids from "update_table"
1087 my $dbh = $self->dbh;
1088 my $count = 0;
1089 my $now;
1091 # try to bring in highres time() function
1092 eval "require Time::HiRes";
1094 my $last_time = $self->time();
1096 # tell _delete_index() not to bother removing the index rows corresponding
1097 # to each individual feature
1098 local $self->{reindexing} = 1;
1100 $dbh->begin_work;
1101 eval {
1102 my $update = $from_update_table;
1103 for my $table ($self->index_tables) {
1104 my $query = $from_update_table ? "DELETE $table FROM $table,$update WHERE $table.id=$update.id"
1105 : "DELETE FROM $table";
1106 $dbh->do($query);
1107 $dbh->do("ALTER TABLE $table DISABLE KEYS");
1109 my $iterator = $self->get_seq_stream(-from_table=>$from_update_table ? $update : undef);
1110 while (my $f = $iterator->next_seq) {
1111 if (++$count %1000 == 0) {
1112 $now = $self->time();
1113 my $elapsed = sprintf(" in %5.2fs",$now - $last_time);
1114 $last_time = $now;
1115 print STDERR "$count features indexed$elapsed...",' 'x60;
1116 print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
1118 $self->_update_indexes($f);
1121 for my $table ($self->index_tables) {
1122 $dbh->do("ALTER TABLE $table ENABLE KEYS");
1124 if (@_) {
1125 warn "Couldn't complete transaction: $@";
1126 $dbh->rollback;
1127 return;
1128 } else {
1129 $dbh->commit;
1130 return 1;
1134 sub optimize {
1135 my $self = shift;
1136 $self->dbh->do("ANALYZE TABLE $_") foreach $self->index_tables;
1139 sub all_tables {
1140 my $self = shift;
1141 my @index_tables = $self->index_tables;
1142 my $feature_table = $self->_feature_table;
1143 return ($feature_table,@index_tables);
1146 sub index_tables {
1147 my $self = shift;
1148 return map {$self->_qualify($_)} qw(name attribute parent2child)
1151 sub _firstid {
1152 my $self = shift;
1153 my $features = $self->_feature_table;
1154 my $query = <<END;
1155 SELECT min(id) FROM $features
1157 my $sth=$self->_prepare($query);
1158 $sth->execute();
1159 my ($first) = $sth->fetchrow_array;
1160 $sth->finish;
1161 $first;
1164 sub _nextid {
1165 my $self = shift;
1166 my $lastkey = shift;
1167 my $features = $self->_feature_table;
1168 my $query = <<END;
1169 SELECT min(id) FROM $features WHERE id>?
1171 my $sth=$self->_prepare($query);
1172 $sth->execute($lastkey);
1173 my ($next) = $sth->fetchrow_array;
1174 $sth->finish;
1175 $next;
1178 sub _existsid {
1179 my $self = shift;
1180 my $key = shift;
1181 my $features = $self->_feature_table;
1182 my $query = <<END;
1183 SELECT count(*) FROM $features WHERE id=?
1185 my $sth=$self->_prepare($query);
1186 $sth->execute($key);
1187 my ($count) = $sth->fetchrow_array;
1188 $sth->finish;
1189 $count > 0;
1192 sub _deleteid {
1193 my $self = shift;
1194 my $key = shift;
1195 my $dbh = $self->dbh;
1196 my $child_table = $self->_parent2child_table;
1197 my $query = "SELECT child FROM $child_table WHERE id=?";
1198 my $sth=$self->_prepare($query);
1199 $sth->execute($key);
1200 my $success = 0;
1201 while (my ($cid) = $sth->fetchrow_array) {
1202 # Backcheck looking for multiple parents, delete only if one is present. I'm
1203 # sure there is a nice way to left join the parent2child table onto itself
1204 # to get this in one query above, just haven't worked it out yet...
1205 my $sth2 = $self->_prepare("SELECT count(id) FROM $child_table WHERE child=?");
1206 $sth2->execute($cid);
1207 my ($count) = $sth2->fetchrow_array;
1208 if ($count == 1) {
1209 $self->_deleteid($cid) || $self->throw("Couldn't remove subfeature!");
1212 for my $table ($self->all_tables) {
1213 $success += $dbh->do("DELETE FROM $table WHERE id=$key") || 0;
1215 return $success;
1218 sub _clearall {
1219 my $self = shift;
1220 my $dbh = $self->dbh;
1221 for my $table ($self->all_tables) {
1222 $dbh->do("DELETE FROM $table");
1226 sub _featurecount {
1227 my $self = shift;
1228 my $dbh = $self->dbh;
1229 my $features = $self->_feature_table;
1230 my $query = <<END;
1231 SELECT count(*) FROM $features
1233 my $sth=$self->_prepare($query);
1234 $sth->execute();
1235 my ($count) = $sth->fetchrow_array;
1236 $sth->finish;
1237 $count;
1240 sub _seq_ids {
1241 my $self = shift;
1242 my $dbh = $self->dbh;
1243 my $location = $self->_locationlist_table;
1244 my $sth = $self->_prepare("SELECT DISTINCT seqname FROM $location");
1245 $sth->execute() or $self->throw($sth->errstr);
1246 my @result;
1247 while (my ($id) = $sth->fetchrow_array) {
1248 push @result,$id;
1250 return @result;
1253 sub setting {
1254 my $self = shift;
1255 my ($variable_name,$value) = @_;
1256 my $meta = $self->_meta_table;
1258 if (defined $value && $self->writeable) {
1259 my $querydel = "DELETE FROM $meta WHERE name = ?";
1260 my $query = "INSERT INTO $meta (name,value) VALUES (?,?)";
1261 my $sthdel = $self->_prepare($querydel);
1262 my $sth = $self->_prepare($query);
1263 $sthdel->execute($variable_name);
1264 $sth->execute($variable_name,$value) or $self->throw($sth->errstr);
1265 $sth->finish;
1266 $self->{settings_cache}{$variable_name} = $value;
1268 else {
1269 return $self->{settings_cache}{$variable_name} if exists $self->{settings_cache}{$variable_name};
1270 my $query = <<END;
1271 SELECT value FROM $meta as m WHERE m.name=?
1273 my $sth = $self->_prepare($query);
1274 # $sth->execute($variable_name) or $self->throw($sth->errstr);
1275 unless ($sth->execute($variable_name)) {
1276 my $errstr = $sth->errstr;
1277 $sth = $self->_prepare("SHOW search_path");
1278 $sth->execute();
1279 $errstr .= "With search_path " . $sth->fetchrow_arrayref->[0] . "\n";
1280 $self->throw($errstr);
1283 my ($value) = $sth->fetchrow_array;
1284 $sth->finish;
1285 return $self->{settings_cache}{$variable_name} = $value;
1290 # Replace Bio::SeqFeatureI into database.
1292 sub replace {
1293 my $self = shift;
1294 my $object = shift;
1295 my $index_flag = shift || undef;
1297 # ?? shouldn't need to do this
1298 # $self->_load_class($object);
1299 my $id = $object->primary_id;
1300 my $features = $self->_feature_table;
1302 my $exist_query = defined($id)
1303 ? $self->_prepare("select count(*) from $features where id = ?")
1304 : 0;
1305 my $update_query = $self->_prepare("update $features object = ?,
1306 indexed = ?,
1307 seqid = ?,
1308 start = ?,
1309 \"end\" = ?,
1310 strand = ?,
1311 tier = ?,
1312 bin = ?,
1313 typeid = ?
1314 where id = ?");
1315 my $insert_query = $self->_prepare("INSERT INTO $features (object,indexed,seqid,start,\"end\",strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?)");
1317 my @location = $index_flag ? $self->_get_location_and_bin($object) : (undef)x6;
1319 my $primary_tag = $object->primary_tag;
1320 my $source_tag = $object->source_tag || '';
1321 $primary_tag .= ":$source_tag";
1322 my $typeid = $self->_typeid($primary_tag,1);
1324 my $exists;
1325 $exist_query->execute($id) if $exist_query;
1326 if ($exist_query) {
1327 ($exists) = $exist_query->fetchrow_array if $exist_query;
1329 else {
1330 $exists = 0;
1333 if ($exists) {
1334 $update_query->execute($self->freeze($object),$index_flag||0,@location,$typeid,$id) or $self->throw($update_query->errstr);
1336 else {
1337 $insert_query->execute($self->freeze($object),$index_flag||0,@location,$typeid) or $self->throw($insert_query->errstr);
1340 #old syntax with with replace into
1341 #$sth->execute($id,$self->freeze($object),$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
1343 my $dbh = $self->dbh;
1344 $object->primary_id($dbh->{pg_insertid}) unless defined $id;
1346 $self->flag_for_indexing($dbh->{pg_insertid}) if $self->{bulk_update_in_progress};
1350 # Insert one Bio::SeqFeatureI into database. primary_id must be undef
1352 sub insert {
1353 my $self = shift;
1354 my $object = shift;
1355 my $index_flag = shift || 0;
1357 $self->_load_class($object);
1358 defined $object->primary_id and $self->throw("$object already has a primary id");
1360 my $features = $self->_feature_table;
1361 my $sth = $self->_prepare(<<END);
1362 INSERT INTO $features (id,object,indexed) VALUES (?,?,?)
1364 $sth->execute(undef,$self->freeze($object),$index_flag) or $self->throw($sth->errstr);
1365 my $dbh = $self->dbh;
1366 $object->primary_id($dbh->{pg_insertid});
1367 $self->flag_for_indexing($dbh->{pg_insertid}) if $self->{bulk_update_in_progress};
1370 =head2 types
1372 Title : types
1373 Usage : @type_list = $db->types
1374 Function: Get all the types in the database
1375 Returns : array of Bio::DB::GFF::Typename objects
1376 Args : none
1377 Status : public
1379 =cut
1381 sub types {
1382 my $self = shift;
1383 eval "require Bio::DB::GFF::Typename"
1384 unless Bio::DB::GFF::Typename->can('new');
1385 my $typelist_table = $self->_typelist_table;
1386 my $sql = <<END;
1387 SELECT tag from $typelist_table
1390 $self->_print_query($sql) if DEBUG || $self->debug;
1391 my $sth = $self->_prepare($sql);
1392 $sth->execute() or $self->throw($sth->errstr);
1394 my @results;
1395 while (my($tag) = $sth->fetchrow_array) {
1396 push @results,Bio::DB::GFF::Typename->new($tag);
1398 $sth->finish;
1399 return @results;
1403 # Insert a bit of DNA or protein into the database
1405 sub _insert_sequence {
1406 my $self = shift;
1407 my ($seqid,$seq,$offset) = @_;
1408 my $id = $self->_locationid($seqid);
1409 my $seqtable = $self->_sequence_table;
1410 my $sthdel = $self->_prepare("DELETE FROM $seqtable WHERE id = ? AND \"offset\" = ?");
1411 my $sth = $self->_prepare(<<END);
1412 INSERT INTO $seqtable (id,"offset",sequence) VALUES (?,?,?)
1414 $sthdel->execute($id,$offset);
1415 $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
1419 # This subroutine flags the given primary ID for later reindexing
1421 sub flag_for_indexing {
1422 my $self = shift;
1423 my $id = shift;
1424 my $needs_updating = $self->_update_table;
1425 my $sth = $self->_prepare("REPLACE INTO $needs_updating VALUES (?)");
1426 $sth->execute($id) or $self->throw($self->dbh->errstr);
1430 # Update indexes for given object
1432 sub _update_indexes {
1433 my $self = shift;
1434 my $obj = shift;
1435 defined (my $id = $obj->primary_id) or return;
1437 if ($self->{bulk_update_in_progress}) {
1438 $self->_dump_update_name_index($obj,$id);
1439 $self->_dump_update_attribute_index($obj,$id);
1440 } else {
1441 $self->_update_name_index($obj,$id);
1442 $self->_update_attribute_index($obj,$id);
1446 sub _update_name_index {
1447 my $self = shift;
1448 my ($obj,$id) = @_;
1449 my $name = $self->_name_table;
1450 my $primary_id = $obj->primary_id;
1452 $self->_delete_index($name,$id);
1453 my ($names,$aliases) = $self->feature_names($obj);
1455 my $sth = $self->_prepare("INSERT INTO $name (id,name,display_name) VALUES (?,?,?)");
1457 $sth->execute($id,$_,1) or $self->throw($sth->errstr) foreach @$names;
1458 $sth->execute($id,$_,0) or $self->throw($sth->errstr) foreach @$aliases;
1459 $sth->finish;
1462 sub _update_attribute_index {
1463 my $self = shift;
1464 my ($obj,$id) = @_;
1465 my $attribute = $self->_attribute_table;
1466 $self->_delete_index($attribute,$id);
1468 my $sth = $self->_prepare("INSERT INTO $attribute (id,attribute_id,attribute_value) VALUES (?,?,?)");
1469 for my $tag ($obj->get_all_tags) {
1470 my $tagid = $self->_attributeid($tag);
1471 for my $value ($obj->get_tag_values($tag)) {
1472 $sth->execute($id,$tagid,$value) or $self->throw($sth->errstr);
1475 $sth->finish;
1478 sub _genericid {
1479 my $self = shift;
1480 my ($table,$namefield,$name,$add_if_missing) = @_;
1481 my $qualified_table = $self->_qualify($table);
1482 my $sth = $self->_prepare(<<END);
1483 SELECT id FROM $qualified_table WHERE $namefield=?
1485 $sth->execute($name) or die $sth->errstr;
1486 my ($id) = $sth->fetchrow_array;
1487 $sth->finish;
1488 return $id if defined $id;
1489 return unless $add_if_missing;
1491 $sth = $self->_prepare(<<END);
1492 INSERT INTO $qualified_table ($namefield) VALUES (?)
1494 print "Inserting into $qualified_table $namefield $name\n";
1495 $sth->execute($name) or die $sth->errstr;
1496 my $dbh = $self->dbh;
1497 return $dbh->last_insert_id(undef, undef, $qualified_table, undef);
1500 sub _typeid {
1501 shift->_genericid('typelist','tag',shift,1);
1503 sub _locationid {
1504 shift->_genericid('locationlist','seqname',shift,1);
1506 sub _attributeid {
1507 shift->_genericid('attributelist','tag',shift,1);
1510 sub _get_location_and_bin {
1511 my $self = shift;
1512 my $feature = shift;
1513 my $seqid = $self->_locationid($feature->seq_id);
1514 my $start = $feature->start;
1515 my $end = $feature->end;
1516 my $strand = $feature->strand || 0;
1517 my ($tier,$bin) = $self->get_bin($start,$end);
1518 return ($seqid,$start,$end,$strand,$tier,$bin);
1521 sub get_bin {
1522 my $self = shift;
1523 my ($start,$end) = @_;
1524 my $binsize = MIN_BIN;
1525 my ($bin_start,$bin_end,$tier);
1526 $tier = 0;
1527 while (1) {
1528 $bin_start = int $start/$binsize;
1529 $bin_end = int $end/$binsize;
1530 last if $bin_start == $bin_end;
1531 $binsize *= 10;
1532 $tier++;
1534 return ($tier,$bin_start);
1537 sub bin_where {
1538 my $self = shift;
1539 my ($start,$end,$f) = @_;
1540 my (@bins,@args);
1542 my $tier = 0;
1543 my $binsize = MIN_BIN;
1544 while ($binsize <= MAX_BIN) {
1545 my $bin_start = int($start/$binsize);
1546 my $bin_end = int($end/$binsize);
1547 push @bins,"($f.tier=? AND $f.bin between ? AND ?)";
1548 push @args,($tier,$bin_start,$bin_end);
1549 $binsize *= 10;
1550 $tier++;
1552 my $query = join ("\n\t OR ",@bins);
1553 return wantarray ? ($query,@args) : substitute($query,@args);
1557 sub _delete_index {
1558 my $self = shift;
1559 my ($table_name,$id) = @_;
1560 return if $self->{reindexing};
1561 my $sth = $self->_prepare("DELETE FROM $table_name WHERE id=?") or $self->throw($self->dbh->errstr);
1562 $sth->execute($id);
1565 # given a statement handler that is expected to return rows of (id,object)
1566 # unthaw each object and return a list of 'em
1567 sub _sth2objs {
1568 my $self = shift;
1569 my $sth = shift;
1570 my @result;
1571 my ($id, $o);
1572 $sth->bind_col(1, \$id);
1573 $sth->bind_col(2, \$o, { pg_type => PG_BYTEA});
1574 #while (my ($id,$o) = $sth->fetchrow_array) {
1575 while ($sth->fetch) {
1576 my $obj = $self->thaw($o,$id);
1577 push @result,$obj;
1579 $sth->finish;
1580 return @result;
1583 # given a statement handler that is expected to return rows of (id,object)
1584 # unthaw each object and return a list of 'em
1585 sub _sth2obj {
1586 my $self = shift;
1587 my $sth = shift;
1588 my ($id,$o) = $sth->fetchrow_array;
1589 return unless $o;
1590 my $obj = $self->thaw($o,$id);
1591 $obj;
1594 sub _prepare {
1595 my $self = shift;
1596 my $query = shift;
1597 my $dbh = $self->dbh;
1598 my $sth = $dbh->prepare_cached($query, {}, 3) or $self->throw($dbh->errstr);
1599 $sth;
1603 ####################################################################################################
1604 # SQL Fragment generators
1605 ####################################################################################################
1607 sub _feature_table { shift->_qualify('feature') }
1608 sub _location_table { shift->_qualify('location') }
1609 sub _locationlist_table { shift->_qualify('locationlist') }
1610 sub _type_table { shift->_qualify('feature') }
1611 sub _typelist_table { shift->_qualify('typelist') }
1612 sub _name_table { shift->_qualify('name') }
1613 sub _attribute_table { shift->_qualify('attribute')}
1614 sub _attributelist_table { shift->_qualify('attributelist')}
1615 sub _parent2child_table { shift->_qualify('parent2child')}
1616 sub _meta_table { shift->_qualify('meta')}
1617 sub _update_table { shift->_qualify('update_table')}
1618 sub _sequence_table { shift->_qualify('sequence')}
1620 sub _make_attribute_where {
1621 my $self = shift;
1622 my ($attributetable,$attributenametable,$attributes) = @_;
1623 my @args;
1624 my @sql;
1625 my $dbh = $self->dbh;
1626 foreach (keys %$attributes) {
1627 my @match_values;
1628 my @values = ref($attributes->{$_}) && ref($attributes->{$_}) eq 'ARRAY' ? @{$attributes->{$_}} : $attributes->{$_};
1629 foreach (@values) { # convert * into % for wildcard matches
1630 s/\*/%/g;
1632 my $match = join ' OR ',map {
1633 /%/ ? "$attributetable.attribute_value LIKE ?"
1634 : "$attributetable.attribute_value=?"
1635 } @values;
1636 push @sql,"($attributenametable.tag=? AND ($match))";
1637 push @args,($_,@values);
1639 return (join(' OR ',@sql),@args);
1642 sub _make_attribute_group {
1643 my $self = shift;
1644 my ($table_name,$attributes) = @_;
1645 my $key_count = keys %$attributes or return;
1646 return "f.id HAVING count(f.id)>?",$key_count-1;
1649 sub _print_query {
1650 my $self = shift;
1651 my ($query,@args) = @_;
1652 while ($query =~ /\?/) {
1653 my $arg = $self->dbh->quote(shift @args);
1654 $query =~ s/\?/$arg/;
1656 warn $query,"\n";
1660 # special-purpose store for bulk loading - write to a file rather than to the db
1662 sub _dump_store {
1663 my $self = shift;
1664 my $indexed = shift;
1666 my $count = 0;
1667 my $store_fh = $self->dump_filehandle('feature');
1668 my $dbh = $self->dbh;
1670 my $autoindex = $self->autoindex;
1672 for my $obj (@_) {
1673 my $id = $self->next_id;
1674 my ($seqid,$start,$end,$strand,$tier,$bin) = $indexed ? $self->_get_location_and_bin($obj) : (undef)x6;
1675 my $primary_tag = $obj->primary_tag;
1676 my $source_tag = $obj->source_tag || '';
1677 $primary_tag .= ":$source_tag";
1678 my $typeid = $self->_typeid($primary_tag,1);
1680 my $frozen_object = $dbh->quote($self->freeze($obj), { pg_type => PG_BYTEA});
1681 $frozen_object =~ s/^E?'|'$//g;
1682 $frozen_object =~ s/\\\\/\\/g;
1683 # TODO: Fix this, why does frozen object start with quote but not end with one
1684 print $store_fh join("\t",$id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$frozen_object),"\n";
1685 $obj->primary_id($id);
1686 $self->_update_indexes($obj) if $indexed && $autoindex;
1687 $count++;
1690 # remember whether we are have ever stored a non-indexed feature
1691 unless ($indexed or $self->{indexed_flag}++) {
1692 $self->subfeatures_are_indexed(0);
1694 $count;
1697 sub _dump_add_SeqFeature {
1698 my $self = shift;
1699 my $parent = shift;
1700 my @children = @_;
1702 my $dbh = $self->dbh;
1703 my $fh = $self->dump_filehandle('parent2child');
1704 my $parent_id = (ref $parent ? $parent->primary_id : $parent)
1705 or $self->throw("$parent should have a primary_id");
1706 my $count = 0;
1708 for my $child_id (@children) {
1709 print $fh join("\t",$parent_id,$child_id),"\n";
1710 $count++;
1712 $count;
1715 sub _dump_update_name_index {
1716 my $self = shift;
1717 my ($obj,$id) = @_;
1718 my $fh = $self->dump_filehandle('name');
1719 my $dbh = $self->dbh;
1720 my ($names,$aliases) = $self->feature_names($obj);
1721 print $fh join("\t",$id,$dbh->quote($_),1),"\n" foreach @$names;
1722 print $fh join("\t",$id,$dbh->quote($_),0),"\n" foreach @$aliases;
1725 sub _dump_update_attribute_index {
1726 my $self = shift;
1727 my ($obj,$id) = @_;
1728 my $fh = $self->dump_filehandle('attribute');
1729 my $dbh = $self->dbh;
1730 for my $tag ($obj->all_tags) {
1731 my $tagid = $self->_attributeid($tag);
1732 for my $value ($obj->each_tag_value($tag)) {
1733 print $fh join("\t",$id,$tagid,$dbh->quote($value)),"\n";
1738 sub time {
1739 return Time::HiRes::time() if Time::HiRes->can('time');
1740 return time();
1743 sub DESTROY {
1744 my $self = shift;
1745 if ($self->{bulk_update_in_progress}) { # be sure to remove temp files
1746 for my $table ('feature',$self->index_tables) {
1747 my $path = $self->dump_path($table);
1748 unlink $path;