1 package Bio
::DB
::SeqFeature
::Store
::DBI
::Pg
;
2 use DBD
::Pg
qw(:pg_types);
3 # $Id: Pg.pm 14656 2008-04-14 15:05:37Z lstein $
7 Bio::DB::SeqFeature::Store::DBI::Pg -- Mysql implementation of Bio::DB::SeqFeature::Store
11 use Bio::DB::SeqFeature::Store;
13 # Open the sequence database
14 my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::Pg',
15 -dsn => 'dbi:Pg:test');
17 # get a feature from somewhere
18 my $feature = Bio::SeqFeature::Generic->new(...);
21 $db->store($feature) or die "Couldn't store!";
23 # primary ID of the feature is changed to indicate its primary ID
25 my $id = $feature->primary_id;
27 # get the feature back out
28 my $f = $db->fetch($id);
30 # change the feature and update it
32 $db->update($f) or die "Couldn't update!";
36 my @features = $db->fetch_many(@list_of_ids);
39 @features = $db->get_features_by_name('ZK909');
42 @features = $db->get_features_by_alias('sma-3');
45 @features = $db->get_features_by_name('gene');
48 @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
51 @features = $db->get_features_by_attribute({description => 'protein kinase'})
53 # ...by the GFF "Note" field
54 @result_list = $db->search_notes('kinase');
56 # ...by arbitrary combinations of selectors
57 @features = $db->features(-name => $name,
62 -attributes => $attributes);
64 # ...using an iterator
65 my $iterator = $db->get_seq_stream(-name => $name,
70 -attributes => $attributes);
72 while (my $feature = $iterator->next_seq) {
73 # do something with the feature
76 # ...limiting the search to a particular region
77 my $segment = $db->segment('Chr1',5000=>6000);
78 my @features = $segment->features(-type=>['mRNA','match']);
80 # getting & storing sequence information
81 # Warning: this returns a string, and not a PrimarySeq object
82 $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
83 my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
85 # what feature types are defined in the database?
86 my @types = $db->types;
88 # create a new feature in the database
89 my $feature = $db->new_feature(-primary_tag => 'mRNA',
96 Bio::DB::SeqFeature::Store::Pg is the Mysql adaptor for
97 Bio::DB::SeqFeature::Store. You will not create it directly, but
98 instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
100 See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
102 =head2 Using the Mysql adaptor
104 Before you can use the adaptor, you must use the Pgadmin tool to
105 create a database and establish a user account with write
106 permission. In order to use "fast" loading, the user account must have
109 To establish a connection to the database, call
110 Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::Pg',@more_args). The
111 additional arguments are as follows:
113 Argument name Description
114 ------------- -----------
116 -dsn The database name. You can abbreviate
117 "dbi:Pg:foo" as "foo" if you wish.
119 -user Username for authentication.
121 -pass Password for authentication.
123 -namespace A prefix to attach to each table. This allows you
124 to have several virtual databases in the same
127 -temp Boolean flag. If true, a temporary database
128 will be created and destroyed as soon as
129 the Store object goes out of scope. (synonym -temporary)
131 -autoindex Boolean flag. If true, features in the database will be
132 reindexed every time they change. This is the default.
135 -tmpdir Directory in which to place temporary files during "fast" loading.
136 Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
138 -dbi_options A hashref to pass to DBI->connect's 4th argument, the "attributes."
139 (synonyms -options, -dbi_attr)
141 -write Pass true to open database for writing or updating.
143 If successful, a new instance of
144 Bio::DB::SeqFeature::Store::DBI::Pg will be returned.
146 In addition to the standard methods supported by all well-behaved
147 Bio::DB::SeqFeature::Store databases, several following
148 adaptor-specific methods are provided. These are described in the next
155 use base
'Bio::DB::SeqFeature::Store';
156 use Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
;
160 use Bio
::DB
::GFF
::Util
::Rearrange
'rearrange';
162 use constant DEBUG
=>0;
164 # from the MySQL documentation...
165 # WARNING: if your sequence uses coordinates greater than 2 GB, you are out of luck!
166 use constant MAX_INT
=> 2_147_483_647
;
167 use constant MIN_INT
=> -2_147_483_648
;
168 use constant MAX_BIN
=> 1_000_000_000
; # size of largest feature = 1 Gb
169 use constant MIN_BIN
=> 1000; # smallest bin we'll make - on a 100 Mb chromosome, there'll be 100,000 of these
172 memoize
('_locationid');
173 memoize
('_attributeid');
174 memoize
('dump_path');
177 # object initialization
192 ) = rearrange
(['DSN',
193 ['TEMP','TEMPORARY'],
196 ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
198 ['PASS','PASSWD','PASSWORD'],
199 ['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
200 ['WRITE','WRITEABLE'],
207 $writeable = 1 if $is_temporary or $dump_dir;
209 $dsn or $self->throw("Usage: ".__PACKAGE__
."->init(-dsn => \$dbh || \$dsn)");
215 $dsn = "dbi:Pg:$dsn" unless $dsn =~ /^dbi:/;
216 $dbh = DBI
->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr
);
218 $self->{'original_arguments'} = {
222 'dbh_options' => $dbi_options,
225 $self->{dbh
}->{InactiveDestroy
} = 1;
226 $self->{is_temp
} = $is_temporary;
227 $self->{namespace
} = $namespace;
228 $self->{writeable
} = $writeable;
229 $self->schema($schema) if ($schema);
231 $self->default_settings;
232 $self->autoindex($autoindex) if defined $autoindex;
233 $self->dumpdir($dump_dir) if $dump_dir;
234 if ($self->is_temp) {
235 $self->init_tmp_database();
237 $self->init_database('erase');
241 sub writeable
{ shift->{writeable
} }
243 sub can_store_parentage
{ 1 }
245 sub table_definitions
{
250 id serial primary key,
255 strand int default 0,
258 indexed int default 1,
259 object bytea not null
261 CREATE INDEX feature_stuff ON feature(seqid,tier,bin,typeid);
262 CREATE INDEX feature_typeid ON feature(typeid);
265 locationlist
=> <<END,
267 id serial primary key,
268 seqname varchar(50) not null
269 ); CREATE INDEX locationlist_seqname ON locationlist(seqname);
274 id serial primary key,
275 tag varchar(100) not null
276 ); CREATE INDEX typelist_tab ON typelist(tag);
281 name varchar(128) not null,
282 display_name int default 0
284 CREATE INDEX name_id ON name(id);
285 CREATE INDEX name_name ON name(name);
291 attribute_id int not null,
294 CREATE INDEX attribute_id ON attribute(id);
295 CREATE INDEX attribute_id_val ON attribute(attribute_id,SUBSTR(attribute_value, 1, 10));
298 attributelist
=> <<END,
300 id serial primary key,
301 tag varchar(50) not null
303 CREATE INDEX attributelist_tag ON attributelist(tag);
305 parent2child
=> <<END,
310 CREATE INDEX parent2child_id_child ON parent2child(id,child);
315 name varchar(128) primary key,
316 value varchar(128) not null
322 "offset" int not null,
324 primary key(id,"offset")
331 # default settings -- will create and populate meta table if needed
333 sub default_settings
{
335 $self->maybe_create_meta();
336 $self->SUPER::default_settings
;
338 $self->dumpdir(File
::Spec
->tmpdir);
343 # retrieve database handle
347 my $d = $self->{dbh
};
348 $self->{dbh
} = shift if @_;
353 # Postgres DBI doesn't implement proper cloning, so do it the hard way
356 my $dsn = $self->{'original_arguments'}->{'dsn'};
357 my $user = $self->{'original_arguments'}->{'user'};
358 my $pass = $self->{'original_arguments'}->{'pass'};
359 my $dbi_options = $self->{'original_arguments'}->{'dbi_options'};
360 $self->dbh()->{InactiveDestroy
} = 1;
361 my $new_dbh = DBI
->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr
);
362 $new_dbh->{InactiveDestroy
} = 1;
363 $self->{dbh
} = $new_dbh; # unless $self->is_temp;
364 if ($self->schema()) {
365 $self->schema($self->schema()); # Reset the DBH's schema
370 # get/set directory for bulk load tables
374 my $d = $self->{dumpdir
};
375 $self->{dumpdir
} = abs_path
(shift) if @_;
380 # table namespace (multiple dbs in one Pg db)
384 my $d = $self->{namespace
};
385 $self->{namespace
} = shift if @_;
390 # find a path that corresponds to a dump table
394 my $table = $self->_qualify(shift);
395 return "$self->{dumpdir}/$table.$$";
399 # make a filehandle (writeable) that corresponds to a dump table
401 sub dump_filehandle
{
404 eval "require IO::File" unless IO
::File
->can('new');
405 my $path = $self->dump_path($table);
406 my $fh = $self->{filehandles
}{$path} ||= IO
::File
->new(">$path");
411 # find the next ID for a feature (used only during bulk loading)
415 $self->{max_id
} ||= $self->max_id;
416 return ++$self->{max_id
};
420 # find the maximum ID for a feature (used only during bulk loading)
424 my $sth = $self->_prepare("SELECT max(id) from feature");
425 $sth->execute or $self->throw($sth->errstr);
426 my ($id) = $sth->fetchrow_array;
431 my ($self, $schema) = @_;
432 $self->{'schema'} = $schema if defined($schema);
434 $self->dbh->do("SET search_path TO " . $self->{'schema'});
436 $self->dbh->do("SET search_path TO public");
438 return $self->{'schema'};
441 # wipe database clean and reinstall schema
447 my $dbh = $self->dbh;
448 my $tables = $self->table_definitions;
449 foreach (keys %$tables) {
450 next if $_ eq 'meta'; # don't get rid of meta data!
451 my $table = $self->_qualify($_);
452 $dbh->do("DROP TABLE IF EXISTS $table") if $erase;
453 my @table_exists = $dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
454 if (!scalar(@table_exists)) {
455 my $query = "CREATE TABLE $table $tables->{$_}";
456 $dbh->do($query) or $self->throw($dbh->errstr);
459 $self->subfeatures_are_indexed(1) if $erase;
463 sub maybe_create_meta
{
465 return unless $self->writeable;
466 my $table = $self->_qualify('meta');
467 my $tables = $self->table_definitions;
468 my @table_exists = $self->dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
469 if (!scalar(@table_exists)) {
470 $self->dbh->do("CREATE TABLE $table $tables->{meta}");
474 sub init_tmp_database
{
476 my $dbh = $self->dbh;
477 my $tables = $self->table_definitions;
478 for my $t (keys %$tables) {
479 my $table = $self->_qualify($t);
480 my @table_exists = $dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
481 if (!scalar(@table_exists)) {
482 my $query = "CREATE TEMPORARY TABLE $table $tables->{$t}";
483 $dbh->do($query) or $self->throw($dbh->errstr);
490 # use temporary tables
499 # special case for bulk updates
500 return $self->_dump_store(@_) if $self->{bulk_update_in_progress
};
505 my $autoindex = $self->autoindex;
507 my $dbh = $self->dbh;
508 local $dbh->{RaiseError
} = 1;
512 $self->replace($obj,$indexed);
513 $self->_update_indexes($obj) if $indexed && $autoindex;
519 warn "Transaction aborted because $@";
526 # remember whether we are have ever stored a non-indexed feature
527 unless ($indexed or $self->{indexed_flag
}++) {
528 $self->subfeatures_are_indexed(0);
533 # we memoize this in order to avoid making zillions of calls
537 # special case for bulk update -- need to build the indexes
538 # at the same time we build the main feature table
539 return 1 if $self->{bulk_update_in_progress
};
540 my $d = $self->setting('autoindex');
541 $self->setting(autoindex
=>shift) if @_;
545 sub _start_bulk_update
{
547 my $dbh = $self->dbh;
548 $self->{bulk_update_in_progress
}++;
551 sub _finish_bulk_update
{
553 my $dbh = $self->dbh;
554 my $dir = $self->{dumpdir
} || '.';
555 for my $table ('feature',$self->index_tables) {
556 my $fh = $self->dump_filehandle($table);
557 my $path = $self->dump_path($table);
559 my $qualified_table = $self->_qualify($table);
560 `cp $path $path.bak`;
561 # Get stuff from file into STDIN so we don't have to be superuser
563 print STDERR
"Loading file $path\n";
564 $dbh->do("COPY $qualified_table FROM STDIN CSV QUOTE '''' DELIMITER '\t'") or $self->throw($dbh->errstr);
565 while (my $line = <FH
>) {
566 $dbh->pg_putline($line);
568 $dbh->pg_endcopy() or $self->throw($dbh->errstr);
572 delete $self->{bulk_update_in_progress
};
573 delete $self->{filehandles
};
578 # Add a subparts to a feature. Both feature and all subparts must already be in database.
580 sub _add_SeqFeature
{
583 # special purpose method for case when we are doing a bulk update
584 return $self->_dump_add_SeqFeature(@_) if $self->{bulk_update_in_progress
};
589 my $dbh = $self->dbh;
590 local $dbh->{RaiseError
} = 1;
592 my $child_table = $self->_parent2child_table();
595 my $exist_query = $self->_prepare("SELECT count(*) FROM $child_table WHERE id = ? AND child = ?" );
597 my $insert_query = $self->_prepare("INSERT INTO $child_table (id,child) VALUES (?, ?)");
599 # my $sth = $self->_prepare(<<END);
600 #REPLACE INTO $child_table (id,child) VALUES (?,?)
603 my $parent_id = (ref $parent ?
$parent->primary_id : $parent)
604 or $self->throw("$parent should have a primary_id");
607 $dbh->begin_work or $self->throw($dbh->errstr);
609 for my $child (@children) {
610 my $child_id = ref $child ?
$child->primary_id : $child;
611 defined $child_id or die "no primary ID known for $child";
613 $exist_query->($parent_id, $child_id);
614 my ($exist) = $exist_query->fetchrow_array;
617 $insert_query->execute($parent_id,$child_id);
624 warn "Transaction aborted because $@";
630 $insert_query->finish;
634 sub _fetch_SeqFeatures
{
639 my $parent_id = $parent->primary_id or $self->throw("$parent should have a primary_id");
640 my $feature_table = $self->_feature_table;
641 my $child_table = $self->_parent2child_table();
643 my @from = ("$feature_table as f","$child_table as c");
644 my @where = ('f.id=c.child','c.id=?');
645 my @args = $parent_id;
648 my ($from,$where,undef,@a) = $self->_types_sql(\
@types,'f');
649 push @from,$from if $from;
650 push @where,$where if $where;
654 my $from = join ', ',@from;
655 my $where = join ' AND ',@where;
663 $self->_print_query($query,@args) if DEBUG
|| $self->debug;
665 my $sth = $self->_prepare($query) or $self->throw($self->dbh->errstr);
666 $sth->execute(@args) or $self->throw($sth->errstr);
668 return $self->_sth2objs($sth);
672 # get primary sequence between start and end
674 sub _fetch_sequence
{
676 my ($seqid,$start,$end) = @_;
678 # backward compatibility to the old days when I liked reverse complementing
679 # dna by specifying $start > $end
681 if (defined $start && defined $end && $start > $end) {
683 ($start,$end) = ($end,$start);
685 $start-- if defined $start;
686 $end-- if defined $end;
688 my $offset1 = $self->_offset_boundary($seqid,$start || 'left');
689 my $offset2 = $self->_offset_boundary($seqid,$end || 'right');
690 my $sequence_table = $self->_sequence_table;
691 my $locationlist_table = $self->_locationlist_table;
693 my $sth = $self->_prepare(<<END);
694 SELECT sequence,offset
695 FROM $sequence_table as s,$locationlist_table as ll
704 $sth->execute($seqid,$offset1,$offset2) or $self->throw($sth->errstr);
706 while (my($frag,$offset) = $sth->fetchrow_array) {
707 substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
710 substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
713 $seq =~ tr/gatcGATC/ctagCTAG/;
719 sub _offset_boundary
{
721 my ($seqid,$position) = @_;
723 my $sequence_table = $self->_sequence_table;
724 my $locationlist_table = $self->_locationlist_table;
727 $sql = $position eq 'left' ?
"SELECT min(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
728 :$position eq 'right' ?
"SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
729 :"SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=? AND offset<=?";
730 my $sth = $self->_prepare($sql);
731 my @args = $position =~ /^-?\d+$/ ?
($seqid,$position) : ($seqid);
732 $sth->execute(@args) or $self->throw($sth->errstr);
733 my $boundary = $sth->fetchall_arrayref->[0][0];
740 # add namespace to tablename
744 my $table_name = shift;
745 my $namespace = $self->namespace;
746 return $table_name unless defined $namespace;
747 return "${namespace}_${table_name}";
751 # Fetch a Bio::SeqFeatureI from database using its primary_id
755 @_ or $self->throw("usage: fetch(\$primary_id)");
756 my $primary_id = shift;
757 my $features = $self->_feature_table;
758 my $sth = $self->_prepare(<<END);
759 SELECT id,object FROM $features WHERE id=?
761 $sth->execute($primary_id) or $self->throw($sth->errstr);
762 my $obj = $self->_sth2obj($sth);
768 # Efficiently fetch a series of IDs from the database
769 # Can pass an array or an array ref
773 @_ or $self->throw('usage: fetch_many($id1,$id2,$id3...)');
774 my $ids = join ',',map {ref($_) ? @
$_ : $_} @_ or return;
775 my $features = $self->_feature_table;
777 my $sth = $self->_prepare(<<END);
778 SELECT id,object FROM $features WHERE id IN ($ids)
780 $sth->execute() or $self->throw($sth->errstr);
781 return $self->_sth2objs($sth);
786 my ($seq_id,$start,$end,$strand,
787 $name,$class,$allow_aliases,
793 ) = rearrange
([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
794 'NAME','CLASS','ALIASES',
795 ['TYPES','TYPE','PRIMARY_TAG'],
796 ['ATTRIBUTES','ATTRIBUTE'],
802 my (@from,@where,@args,@group);
803 $range_type ||= 'overlaps';
805 my $feature_table = $self->_feature_table;
806 @from = "$feature_table as f";
809 # hacky backward compatibility workaround
810 undef $class if $class && $class eq 'Sequence';
811 $name = "$class:$name" if defined $class && length $class > 0;
812 # last argument is the join field
813 my ($from,$where,$group,@a) = $self->_name_sql($name,$allow_aliases,'f.id');
814 push @from,$from if $from;
815 push @where,$where if $where;
816 push @group,$group if $group;
820 if (defined $seq_id) {
821 # last argument is the name of the features table
822 my ($from,$where,$group,@a) = $self->_location_sql($seq_id,$start,$end,$range_type,$strand,'f');
823 push @from,$from if $from;
824 push @where,$where if $where;
825 push @group,$group if $group;
829 if (defined($types)) {
830 # last argument is the name of the features table
831 my ($from,$where,$group,@a) = $self->_types_sql($types,'f');
832 push @from,$from if $from;
833 push @where,$where if $where;
834 push @group,$group if $group;
838 if (defined $attributes) {
839 # last argument is the join field
840 my ($from,$where,$group,@a) = $self->_attributes_sql($attributes,'f.id');
841 push @from,$from if $from;
842 push @where,$where if $where;
843 push @group,$group if $group;
847 if (defined $fromtable) {
848 # last argument is the join field
849 my ($from,$where,$group,@a) = $self->_from_table_sql($fromtable,'f.id');
850 push @from,$from if $from;
851 push @where,$where if $where;
852 push @group,$group if $group;
856 # if no other criteria are specified, then
857 # only fetch indexed (i.e. top level objects)
858 @where = 'indexed=1' unless @where;
860 my $from = join ', ',@from;
861 my $where = join ' AND ',map {"($_)"} @where;
862 my $group = join ', ',@group;
863 $group = "GROUP BY $group" if @group;
872 $self->_print_query($query,@args) if DEBUG
|| $self->debug;
874 my $sth = $self->_prepare($query);
875 $sth->execute(@args) or $self->throw($$ . "\n" . $sth->errstr);
878 # print STDERR "I run the query\n SELECT f.id, f.object FROM $from WHERE $where $group\n with args @args\n";
879 # print STDERR "I should make an object out of " . Dumper($sth->fetchrow_hashref()) . "\n";
880 # $sth->execute(@args) or $self->throw($sth->errstr);
881 # print STDERR "I am making objects: " . Dumper($self->_sth2objs($sth)) . "\n";
882 # $sth->execute(@args) or $self->throw($sth->errstr);
884 return $iterator ? Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
->new($sth,$self) : $self->_sth2objs($sth);
889 my ($name,$allow_aliases,$join) = @_;
890 my $name_table = $self->_name_table;
892 my $from = "$name_table as n";
893 my ($match,$string) = $self->_match_sql($name);
895 my $where = "n.id=$join AND n.name $match";
896 $where .= " AND n.display_name>0" unless $allow_aliases;
897 return ($from,$where,'',$string);
900 sub _search_attributes
{
902 my ($search_string,$attribute_names,$limit) = @_;
903 my @words = map {quotemeta($_)} split /\s+/,$search_string;
904 my $name_table = $self->_name_table;
905 my $attribute_table = $self->_attribute_table;
906 my $attributelist_table = $self->_attributelist_table;
907 my $type_table = $self->_type_table;
908 my $typelist_table = $self->_typelist_table;
910 my @tags = @
$attribute_names;
911 my $tag_sql = join ' OR ',("al.tag=?") x
@tags;
913 my $perl_regexp = join '|',@words;
915 my $sql_regexp = join ' AND ',("a.attribute_value SIMILAR TO ?") x
@words;
917 SELECT name,attribute_value,tl.tag,n.id
918 FROM $name_table as n,$attribute_table as a,$attributelist_table as al,$type_table as t,$typelist_table as tl
920 AND al.id=a.attribute_id
927 $sql .= "LIMIT $limit" if defined $limit;
928 $self->_print_query($sql,@tags,@words) if DEBUG
|| $self->debug;
929 my $sth = $self->_prepare($sql);
930 $sth->execute(@tags,@words) or $self->throw($sth->errstr);
933 while (my($name,$value,$type,$id) = $sth->fetchrow_array) {
934 my (@hits) = $value =~ /$perl_regexp/ig;
935 my @words_in_row = split /\b/,$value;
936 my $score = int(@hits*100/@words/@words_in_row);
937 push @results,[$name,$value,$score,$type,$id];
940 @results = sort {$b->[2]<=>$a->[2]} @results;
949 if ($name =~ /(?:^|[^\\])[*?]/) {
950 $name =~ s/(^|[^\\])([%_])/$1\\$2/g;
951 $name =~ s/(^|[^\\])\*/$1%/g;
952 $name =~ s/(^|[^\\])\?/$1_/g;
959 return ($match,$string);
962 sub _from_table_sql
{
964 my ($from_table,$join) = @_;
965 my $from = "$from_table as ft";
966 my $where = "ft.id=$join";
967 return ($from,$where,'');
970 sub _attributes_sql
{
972 my ($attributes,$join) = @_;
974 my ($wf,@bind_args) = $self->_make_attribute_where('a','al',$attributes);
975 my ($group_by,@group_args)= $self->_make_attribute_group('a',$attributes);
977 my $attribute_table = $self->_attribute_table;
978 my $attributelist_table = $self->_attributelist_table;
980 my $from = "$attribute_table as a use index(attribute_id), $attributelist_table as al";
984 AND a.attribute_id=al.id
988 my $group = $group_by;
990 my @args = (@bind_args,@group_args);
991 return ($from,$where,$group,@args);
994 sub subfeature_types_are_indexed
{ 1 }
995 sub subfeature_locations_are_indexed
{ 1 }
999 my ($types,$type_table) = @_;
1000 my ($primary_tag,$source_tag);
1002 my @types = ref $types eq 'ARRAY' ? @
$types : $types;
1004 my $typelist = $self->_typelist_table;
1005 my $from = "$typelist AS tl";
1007 my (@matches,@args);
1009 for my $type (@types) {
1011 if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
1012 $primary_tag = $type->method;
1013 $source_tag = $type->source;
1015 ($primary_tag,$source_tag) = split ':',$type,2;
1018 if (defined $source_tag) {
1019 push @matches,"tl.tag=?";
1020 push @args,"$primary_tag:$source_tag";
1022 push @matches,"tl.tag LIKE ?";
1023 push @args,"$primary_tag:%";
1026 my $matches = join ' OR ',@matches;
1029 tl.id=$type_table.typeid
1033 return ($from,$where,'',@args);
1038 my ($seq_id,$start,$end,$range_type,$strand,$location) = @_;
1040 # the additional join on the location_list table badly impacts performance
1041 # so we build a copy of the table in memory
1042 my $seqid = $self->_locationid($seq_id) || 0; # zero is an invalid primary ID, so will return empty
1044 $start = MIN_INT
unless defined $start;
1045 $end = MAX_INT
unless defined $end;
1047 my ($bin_where,@bin_args) = $self->bin_where($start,$end,$location);
1049 my ($range,@range_args);
1050 if ($range_type eq 'overlaps') {
1051 $range = "$location.end>=? AND $location.start<=? AND ($bin_where)";
1052 @range_args = ($start,$end,@bin_args);
1053 } elsif ($range_type eq 'contains') {
1054 $range = "$location.start>=? AND $location.end<=? AND ($bin_where)";
1055 @range_args = ($start,$end,@bin_args);
1056 } elsif ($range_type eq 'contained_in') {
1057 $range = "$location.start<=? AND $location.end>=?";
1058 @range_args = ($start,$end);
1060 $self->throw("range_type must be one of 'overlaps', 'contains' or 'contained_in'");
1063 if (defined $strand) {
1064 $range .= " AND strand=?";
1065 push @range_args,$strand;
1076 my @args = ($seqid,@range_args);
1077 return ($from,$where,$group,@args);
1085 my $from_update_table = shift; # if present, will take ids from "update_table"
1087 my $dbh = $self->dbh;
1091 # try to bring in highres time() function
1092 eval "require Time::HiRes";
1094 my $last_time = $self->time();
1096 # tell _delete_index() not to bother removing the index rows corresponding
1097 # to each individual feature
1098 local $self->{reindexing
} = 1;
1102 my $update = $from_update_table;
1103 for my $table ($self->index_tables) {
1104 my $query = $from_update_table ?
"DELETE $table FROM $table,$update WHERE $table.id=$update.id"
1105 : "DELETE FROM $table";
1107 $dbh->do("ALTER TABLE $table DISABLE KEYS");
1109 my $iterator = $self->get_seq_stream(-from_table
=>$from_update_table ?
$update : undef);
1110 while (my $f = $iterator->next_seq) {
1111 if (++$count %1000 == 0) {
1112 $now = $self->time();
1113 my $elapsed = sprintf(" in %5.2fs",$now - $last_time);
1115 print STDERR
"$count features indexed$elapsed...",' 'x60
;
1116 print STDERR
-t STDOUT
&& !$ENV{EMACS
} ?
"\r" : "\n";
1118 $self->_update_indexes($f);
1121 for my $table ($self->index_tables) {
1122 $dbh->do("ALTER TABLE $table ENABLE KEYS");
1125 warn "Couldn't complete transaction: $@";
1136 $self->dbh->do("ANALYZE TABLE $_") foreach $self->index_tables;
1141 my @index_tables = $self->index_tables;
1142 my $feature_table = $self->_feature_table;
1143 return ($feature_table,@index_tables);
1148 return map {$self->_qualify($_)} qw(name attribute parent2child)
1153 my $features = $self->_feature_table;
1155 SELECT min(id) FROM $features
1157 my $sth=$self->_prepare($query);
1159 my ($first) = $sth->fetchrow_array;
1166 my $lastkey = shift;
1167 my $features = $self->_feature_table;
1169 SELECT min(id) FROM $features WHERE id>?
1171 my $sth=$self->_prepare($query);
1172 $sth->execute($lastkey);
1173 my ($next) = $sth->fetchrow_array;
1181 my $features = $self->_feature_table;
1183 SELECT count(*) FROM $features WHERE id=?
1185 my $sth=$self->_prepare($query);
1186 $sth->execute($key);
1187 my ($count) = $sth->fetchrow_array;
1195 my $dbh = $self->dbh;
1196 my $child_table = $self->_parent2child_table;
1197 my $query = "SELECT child FROM $child_table WHERE id=?";
1198 my $sth=$self->_prepare($query);
1199 $sth->execute($key);
1201 while (my ($cid) = $sth->fetchrow_array) {
1202 # Backcheck looking for multiple parents, delete only if one is present. I'm
1203 # sure there is a nice way to left join the parent2child table onto itself
1204 # to get this in one query above, just haven't worked it out yet...
1205 my $sth2 = $self->_prepare("SELECT count(id) FROM $child_table WHERE child=?");
1206 $sth2->execute($cid);
1207 my ($count) = $sth2->fetchrow_array;
1209 $self->_deleteid($cid) || $self->throw("Couldn't remove subfeature!");
1212 for my $table ($self->all_tables) {
1213 $success += $dbh->do("DELETE FROM $table WHERE id=$key") || 0;
1220 my $dbh = $self->dbh;
1221 for my $table ($self->all_tables) {
1222 $dbh->do("DELETE FROM $table");
1228 my $dbh = $self->dbh;
1229 my $features = $self->_feature_table;
1231 SELECT count(*) FROM $features
1233 my $sth=$self->_prepare($query);
1235 my ($count) = $sth->fetchrow_array;
1242 my $dbh = $self->dbh;
1243 my $location = $self->_locationlist_table;
1244 my $sth = $self->_prepare("SELECT DISTINCT seqname FROM $location");
1245 $sth->execute() or $self->throw($sth->errstr);
1247 while (my ($id) = $sth->fetchrow_array) {
1255 my ($variable_name,$value) = @_;
1256 my $meta = $self->_meta_table;
1258 if (defined $value && $self->writeable) {
1259 my $querydel = "DELETE FROM $meta WHERE name = ?";
1260 my $query = "INSERT INTO $meta (name,value) VALUES (?,?)";
1261 my $sthdel = $self->_prepare($querydel);
1262 my $sth = $self->_prepare($query);
1263 $sthdel->execute($variable_name);
1264 $sth->execute($variable_name,$value) or $self->throw($sth->errstr);
1266 $self->{settings_cache
}{$variable_name} = $value;
1269 return $self->{settings_cache
}{$variable_name} if exists $self->{settings_cache
}{$variable_name};
1271 SELECT value FROM $meta as m WHERE m.name=?
1273 my $sth = $self->_prepare($query);
1274 # $sth->execute($variable_name) or $self->throw($sth->errstr);
1275 unless ($sth->execute($variable_name)) {
1276 my $errstr = $sth->errstr;
1277 $sth = $self->_prepare("SHOW search_path");
1279 $errstr .= "With search_path " . $sth->fetchrow_arrayref->[0] . "\n";
1280 $self->throw($errstr);
1283 my ($value) = $sth->fetchrow_array;
1285 return $self->{settings_cache
}{$variable_name} = $value;
1290 # Replace Bio::SeqFeatureI into database.
1295 my $index_flag = shift || undef;
1297 # ?? shouldn't need to do this
1298 # $self->_load_class($object);
1299 my $id = $object->primary_id;
1300 my $features = $self->_feature_table;
1302 my $exist_query = defined($id)
1303 ?
$self->_prepare("select count(*) from $features where id = ?")
1305 my $update_query = $self->_prepare("update $features object = ?,
1315 my $insert_query = $self->_prepare("INSERT INTO $features (object,indexed,seqid,start,\"end\",strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?)");
1317 my @location = $index_flag ?
$self->_get_location_and_bin($object) : (undef)x6
;
1319 my $primary_tag = $object->primary_tag;
1320 my $source_tag = $object->source_tag || '';
1321 $primary_tag .= ":$source_tag";
1322 my $typeid = $self->_typeid($primary_tag,1);
1325 $exist_query->execute($id) if $exist_query;
1327 ($exists) = $exist_query->fetchrow_array if $exist_query;
1334 $update_query->execute($self->freeze($object),$index_flag||0,@location,$typeid,$id) or $self->throw($update_query->errstr);
1337 $insert_query->execute($self->freeze($object),$index_flag||0,@location,$typeid) or $self->throw($insert_query->errstr);
1340 #old syntax with with replace into
1341 #$sth->execute($id,$self->freeze($object),$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
1343 my $dbh = $self->dbh;
1344 $object->primary_id($dbh->{pg_insertid
}) unless defined $id;
1346 $self->flag_for_indexing($dbh->{pg_insertid
}) if $self->{bulk_update_in_progress
};
1350 # Insert one Bio::SeqFeatureI into database. primary_id must be undef
1355 my $index_flag = shift || 0;
1357 $self->_load_class($object);
1358 defined $object->primary_id and $self->throw("$object already has a primary id");
1360 my $features = $self->_feature_table;
1361 my $sth = $self->_prepare(<<END);
1362 INSERT INTO $features (id,object,indexed) VALUES (?,?,?)
1364 $sth->execute(undef,$self->freeze($object),$index_flag) or $self->throw($sth->errstr);
1365 my $dbh = $self->dbh;
1366 $object->primary_id($dbh->{pg_insertid
});
1367 $self->flag_for_indexing($dbh->{pg_insertid
}) if $self->{bulk_update_in_progress
};
1373 Usage : @type_list = $db->types
1374 Function: Get all the types in the database
1375 Returns : array of Bio::DB::GFF::Typename objects
1383 eval "require Bio::DB::GFF::Typename"
1384 unless Bio
::DB
::GFF
::Typename
->can('new');
1385 my $typelist_table = $self->_typelist_table;
1387 SELECT tag from $typelist_table
1390 $self->_print_query($sql) if DEBUG
|| $self->debug;
1391 my $sth = $self->_prepare($sql);
1392 $sth->execute() or $self->throw($sth->errstr);
1395 while (my($tag) = $sth->fetchrow_array) {
1396 push @results,Bio
::DB
::GFF
::Typename
->new($tag);
1403 # Insert a bit of DNA or protein into the database
1405 sub _insert_sequence
{
1407 my ($seqid,$seq,$offset) = @_;
1408 my $id = $self->_locationid($seqid);
1409 my $seqtable = $self->_sequence_table;
1410 my $sthdel = $self->_prepare("DELETE FROM $seqtable WHERE id = ? AND \"offset\" = ?");
1411 my $sth = $self->_prepare(<<END);
1412 INSERT INTO $seqtable (id,"offset",sequence) VALUES (?,?,?)
1414 $sthdel->execute($id,$offset);
1415 $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
1419 # This subroutine flags the given primary ID for later reindexing
1421 sub flag_for_indexing
{
1424 my $needs_updating = $self->_update_table;
1425 my $sth = $self->_prepare("REPLACE INTO $needs_updating VALUES (?)");
1426 $sth->execute($id) or $self->throw($self->dbh->errstr);
1430 # Update indexes for given object
1432 sub _update_indexes
{
1435 defined (my $id = $obj->primary_id) or return;
1437 if ($self->{bulk_update_in_progress
}) {
1438 $self->_dump_update_name_index($obj,$id);
1439 $self->_dump_update_attribute_index($obj,$id);
1441 $self->_update_name_index($obj,$id);
1442 $self->_update_attribute_index($obj,$id);
1446 sub _update_name_index
{
1449 my $name = $self->_name_table;
1450 my $primary_id = $obj->primary_id;
1452 $self->_delete_index($name,$id);
1453 my ($names,$aliases) = $self->feature_names($obj);
1455 my $sth = $self->_prepare("INSERT INTO $name (id,name,display_name) VALUES (?,?,?)");
1457 $sth->execute($id,$_,1) or $self->throw($sth->errstr) foreach @
$names;
1458 $sth->execute($id,$_,0) or $self->throw($sth->errstr) foreach @
$aliases;
1462 sub _update_attribute_index
{
1465 my $attribute = $self->_attribute_table;
1466 $self->_delete_index($attribute,$id);
1468 my $sth = $self->_prepare("INSERT INTO $attribute (id,attribute_id,attribute_value) VALUES (?,?,?)");
1469 for my $tag ($obj->get_all_tags) {
1470 my $tagid = $self->_attributeid($tag);
1471 for my $value ($obj->get_tag_values($tag)) {
1472 $sth->execute($id,$tagid,$value) or $self->throw($sth->errstr);
1480 my ($table,$namefield,$name,$add_if_missing) = @_;
1481 my $qualified_table = $self->_qualify($table);
1482 my $sth = $self->_prepare(<<END);
1483 SELECT id FROM $qualified_table WHERE $namefield=?
1485 $sth->execute($name) or die $sth->errstr;
1486 my ($id) = $sth->fetchrow_array;
1488 return $id if defined $id;
1489 return unless $add_if_missing;
1491 $sth = $self->_prepare(<<END);
1492 INSERT INTO $qualified_table ($namefield) VALUES (?)
1494 print "Inserting into $qualified_table $namefield $name\n";
1495 $sth->execute($name) or die $sth->errstr;
1496 my $dbh = $self->dbh;
1497 return $dbh->last_insert_id(undef, undef, $qualified_table, undef);
1501 shift->_genericid('typelist','tag',shift,1);
1504 shift->_genericid('locationlist','seqname',shift,1);
1507 shift->_genericid('attributelist','tag',shift,1);
1510 sub _get_location_and_bin
{
1512 my $feature = shift;
1513 my $seqid = $self->_locationid($feature->seq_id);
1514 my $start = $feature->start;
1515 my $end = $feature->end;
1516 my $strand = $feature->strand || 0;
1517 my ($tier,$bin) = $self->get_bin($start,$end);
1518 return ($seqid,$start,$end,$strand,$tier,$bin);
1523 my ($start,$end) = @_;
1524 my $binsize = MIN_BIN
;
1525 my ($bin_start,$bin_end,$tier);
1528 $bin_start = int $start/$binsize;
1529 $bin_end = int $end/$binsize;
1530 last if $bin_start == $bin_end;
1534 return ($tier,$bin_start);
1539 my ($start,$end,$f) = @_;
1543 my $binsize = MIN_BIN
;
1544 while ($binsize <= MAX_BIN
) {
1545 my $bin_start = int($start/$binsize);
1546 my $bin_end = int($end/$binsize);
1547 push @bins,"($f.tier=? AND $f.bin between ? AND ?)";
1548 push @args,($tier,$bin_start,$bin_end);
1552 my $query = join ("\n\t OR ",@bins);
1553 return wantarray ?
($query,@args) : substitute
($query,@args);
1559 my ($table_name,$id) = @_;
1560 return if $self->{reindexing
};
1561 my $sth = $self->_prepare("DELETE FROM $table_name WHERE id=?") or $self->throw($self->dbh->errstr);
1565 # given a statement handler that is expected to return rows of (id,object)
1566 # unthaw each object and return a list of 'em
1572 $sth->bind_col(1, \
$id);
1573 $sth->bind_col(2, \
$o, { pg_type
=> PG_BYTEA
});
1574 #while (my ($id,$o) = $sth->fetchrow_array) {
1575 while ($sth->fetch) {
1576 my $obj = $self->thaw($o,$id);
1583 # given a statement handler that is expected to return rows of (id,object)
1584 # unthaw each object and return a list of 'em
1588 my ($id,$o) = $sth->fetchrow_array;
1590 my $obj = $self->thaw($o,$id);
1597 my $dbh = $self->dbh;
1598 my $sth = $dbh->prepare_cached($query, {}, 3) or $self->throw($dbh->errstr);
1603 ####################################################################################################
1604 # SQL Fragment generators
1605 ####################################################################################################
1607 sub _feature_table
{ shift->_qualify('feature') }
1608 sub _location_table
{ shift->_qualify('location') }
1609 sub _locationlist_table
{ shift->_qualify('locationlist') }
1610 sub _type_table
{ shift->_qualify('feature') }
1611 sub _typelist_table
{ shift->_qualify('typelist') }
1612 sub _name_table
{ shift->_qualify('name') }
1613 sub _attribute_table
{ shift->_qualify('attribute')}
1614 sub _attributelist_table
{ shift->_qualify('attributelist')}
1615 sub _parent2child_table
{ shift->_qualify('parent2child')}
1616 sub _meta_table
{ shift->_qualify('meta')}
1617 sub _update_table
{ shift->_qualify('update_table')}
1618 sub _sequence_table
{ shift->_qualify('sequence')}
1620 sub _make_attribute_where
{
1622 my ($attributetable,$attributenametable,$attributes) = @_;
1625 my $dbh = $self->dbh;
1626 foreach (keys %$attributes) {
1628 my @values = ref($attributes->{$_}) && ref($attributes->{$_}) eq 'ARRAY' ? @
{$attributes->{$_}} : $attributes->{$_};
1629 foreach (@values) { # convert * into % for wildcard matches
1632 my $match = join ' OR ',map {
1633 /%/ ?
"$attributetable.attribute_value LIKE ?"
1634 : "$attributetable.attribute_value=?"
1636 push @sql,"($attributenametable.tag=? AND ($match))";
1637 push @args,($_,@values);
1639 return (join(' OR ',@sql),@args);
1642 sub _make_attribute_group
{
1644 my ($table_name,$attributes) = @_;
1645 my $key_count = keys %$attributes or return;
1646 return "f.id HAVING count(f.id)>?",$key_count-1;
1651 my ($query,@args) = @_;
1652 while ($query =~ /\?/) {
1653 my $arg = $self->dbh->quote(shift @args);
1654 $query =~ s/\?/$arg/;
1660 # special-purpose store for bulk loading - write to a file rather than to the db
1664 my $indexed = shift;
1667 my $store_fh = $self->dump_filehandle('feature');
1668 my $dbh = $self->dbh;
1670 my $autoindex = $self->autoindex;
1673 my $id = $self->next_id;
1674 my ($seqid,$start,$end,$strand,$tier,$bin) = $indexed ?
$self->_get_location_and_bin($obj) : (undef)x6
;
1675 my $primary_tag = $obj->primary_tag;
1676 my $source_tag = $obj->source_tag || '';
1677 $primary_tag .= ":$source_tag";
1678 my $typeid = $self->_typeid($primary_tag,1);
1680 my $frozen_object = $dbh->quote($self->freeze($obj), { pg_type
=> PG_BYTEA
});
1681 $frozen_object =~ s/^E?'|'$//g;
1682 $frozen_object =~ s/\\\\/\\/g;
1683 # TODO: Fix this, why does frozen object start with quote but not end with one
1684 print $store_fh join("\t",$id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$frozen_object),"\n";
1685 $obj->primary_id($id);
1686 $self->_update_indexes($obj) if $indexed && $autoindex;
1690 # remember whether we are have ever stored a non-indexed feature
1691 unless ($indexed or $self->{indexed_flag
}++) {
1692 $self->subfeatures_are_indexed(0);
1697 sub _dump_add_SeqFeature
{
1702 my $dbh = $self->dbh;
1703 my $fh = $self->dump_filehandle('parent2child');
1704 my $parent_id = (ref $parent ?
$parent->primary_id : $parent)
1705 or $self->throw("$parent should have a primary_id");
1708 for my $child_id (@children) {
1709 print $fh join("\t",$parent_id,$child_id),"\n";
1715 sub _dump_update_name_index
{
1718 my $fh = $self->dump_filehandle('name');
1719 my $dbh = $self->dbh;
1720 my ($names,$aliases) = $self->feature_names($obj);
1721 print $fh join("\t",$id,$dbh->quote($_),1),"\n" foreach @
$names;
1722 print $fh join("\t",$id,$dbh->quote($_),0),"\n" foreach @
$aliases;
1725 sub _dump_update_attribute_index
{
1728 my $fh = $self->dump_filehandle('attribute');
1729 my $dbh = $self->dbh;
1730 for my $tag ($obj->all_tags) {
1731 my $tagid = $self->_attributeid($tag);
1732 for my $value ($obj->each_tag_value($tag)) {
1733 print $fh join("\t",$id,$tagid,$dbh->quote($value)),"\n";
1739 return Time
::HiRes
::time() if Time
::HiRes
->can('time');
1745 if ($self->{bulk_update_in_progress
}) { # be sure to remove temp files
1746 for my $table ('feature',$self->index_tables) {
1747 my $path = $self->dump_path($table);