2 package Bio
::DB
::SeqFeature
::Store
::DBI
::Pg
;
3 use DBD
::Pg
qw(:pg_types);
5 # $Id: Pg.pm 14656 2008-04-14 15:05:37Z lstein $
9 Bio::DB::SeqFeature::Store::DBI::Pg -- Mysql implementation of Bio::DB::SeqFeature::Store
13 use Bio::DB::SeqFeature::Store;
15 # Open the sequence database
16 my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::Pg',
17 -dsn => 'dbi:Pg:test');
19 # get a feature from somewhere
20 my $feature = Bio::SeqFeature::Generic->new(...);
23 $db->store($feature) or die "Couldn't store!";
25 # primary ID of the feature is changed to indicate its primary ID
27 my $id = $feature->primary_id;
29 # get the feature back out
30 my $f = $db->fetch($id);
32 # change the feature and update it
34 $db->update($f) or die "Couldn't update!";
38 my @features = $db->fetch_many(@list_of_ids);
41 @features = $db->get_features_by_name('ZK909');
44 @features = $db->get_features_by_alias('sma-3');
47 @features = $db->get_features_by_name('gene');
50 @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
53 @features = $db->get_features_by_attribute({description => 'protein kinase'})
55 # ...by the GFF "Note" field
56 @result_list = $db->search_notes('kinase');
58 # ...by arbitrary combinations of selectors
59 @features = $db->features(-name => $name,
64 -attributes => $attributes);
66 # ...using an iterator
67 my $iterator = $db->get_seq_stream(-name => $name,
72 -attributes => $attributes);
74 while (my $feature = $iterator->next_seq) {
75 # do something with the feature
78 # ...limiting the search to a particular region
79 my $segment = $db->segment('Chr1',5000=>6000);
80 my @features = $segment->features(-type=>['mRNA','match']);
82 # getting & storing sequence information
83 # Warning: this returns a string, and not a PrimarySeq object
84 $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
85 my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
87 # what feature types are defined in the database?
88 my @types = $db->types;
90 # create a new feature in the database
91 my $feature = $db->new_feature(-primary_tag => 'mRNA',
98 Bio::DB::SeqFeature::Store::Pg is the Mysql adaptor for
99 Bio::DB::SeqFeature::Store. You will not create it directly, but
100 instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
102 See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
104 =head2 Using the Mysql adaptor
106 Before you can use the adaptor, you must use the Pgadmin tool to
107 create a database and establish a user account with write
108 permission. In order to use "fast" loading, the user account must have
111 To establish a connection to the database, call
112 Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::Pg',@more_args). The
113 additional arguments are as follows:
115 Argument name Description
116 ------------- -----------
118 -dsn The database name. You can abbreviate
119 "dbi:Pg:foo" as "foo" if you wish.
121 -user Username for authentication.
123 -pass Password for authentication.
125 -namespace A prefix to attach to each table. This allows you
126 to have several virtual databases in the same
129 -temp Boolean flag. If true, a temporary database
130 will be created and destroyed as soon as
131 the Store object goes out of scope. (synonym -temporary)
133 -autoindex Boolean flag. If true, features in the database will be
134 reindexed every time they change. This is the default.
137 -tmpdir Directory in which to place temporary files during "fast" loading.
138 Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
140 -dbi_options A hashref to pass to DBI->connect's 4th argument, the "attributes."
141 (synonyms -options, -dbi_attr)
143 -write Pass true to open database for writing or updating.
145 If successful, a new instance of
146 Bio::DB::SeqFeature::Store::DBI::Pg will be returned.
148 In addition to the standard methods supported by all well-behaved
149 Bio::DB::SeqFeature::Store databases, several following
150 adaptor-specific methods are provided. These are described in the next
157 use base
'Bio::DB::SeqFeature::Store::DBI::mysql';
158 use Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
;
162 use Bio
::DB
::GFF
::Util
::Rearrange
'rearrange';
164 use constant DEBUG
=>0;
166 # from the MySQL documentation...
167 # WARNING: if your sequence uses coordinates greater than 2 GB, you are out of luck!
168 use constant MAX_INT
=> 2_147_483_647
;
169 use constant MIN_INT
=> -2_147_483_648
;
170 use constant MAX_BIN
=> 1_000_000_000
; # size of largest feature = 1 Gb
171 use constant MIN_BIN
=> 1000; # smallest bin we'll make - on a 100 Mb chromosome, there'll be 100,000 of these
174 # object initialization
176 # NOTE: most of this code can be refactored and inherited from DBI or DBI::mysql adapter
191 ) = rearrange
(['DSN',
192 ['TEMP','TEMPORARY'],
195 ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
197 ['PASS','PASSWD','PASSWORD'],
198 ['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
199 ['WRITE','WRITEABLE'],
206 $writeable = 1 if $is_temporary or $dump_dir;
208 $dsn or $self->throw("Usage: ".__PACKAGE__
."->init(-dsn => \$dbh || \$dsn)");
214 $dsn = "dbi:Pg:$dsn" unless $dsn =~ /^dbi:/;
215 $dbh = DBI
->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr
);
217 $dbh->do('set client_min_messages=warning') if $dbh;
218 $self->{'original_arguments'} = {
222 'dbh_options' => $dbi_options,
225 $self->{dbh
}->{InactiveDestroy
} = 1;
226 $self->{is_temp
} = $is_temporary;
227 $self->{namespace
} = $namespace;
228 $self->{writeable
} = $writeable;
229 $self->schema($schema) if ($schema);
231 $self->default_settings;
232 $self->autoindex($autoindex) if defined $autoindex;
233 $self->dumpdir($dump_dir) if $dump_dir;
234 if ($self->is_temp) {
235 # warn "creating a temp database isn't supported";
236 #$self->init_tmp_database();
237 $self->init_database('erase');
239 $self->init_database('erase');
243 sub table_definitions
{
248 id serial primary key,
253 strand int default 0,
256 indexed int default 1,
257 object bytea not null
259 CREATE INDEX feature_stuff ON feature(seqid,tier,bin,typeid);
260 CREATE INDEX feature_typeid ON feature(typeid);
263 locationlist
=> <<END,
265 id serial primary key,
266 seqname varchar(256) not null
267 ); CREATE INDEX locationlist_seqname ON locationlist(seqname);
272 id serial primary key,
273 tag varchar(256) not null
274 ); CREATE INDEX typelist_tab ON typelist(tag);
279 name varchar(256) not null,
280 display_name int default 0
282 CREATE INDEX name_id ON name(id);
283 CREATE INDEX name_name ON name(name);
289 attribute_id int not null,
292 CREATE INDEX attribute_id ON attribute(id);
293 CREATE INDEX attribute_id_val ON attribute(attribute_id,SUBSTR(attribute_value, 1, 10));
296 attributelist
=> <<END,
298 id serial primary key,
299 tag varchar(256) not null
301 CREATE INDEX attributelist_tag ON attributelist(tag);
303 parent2child
=> <<END,
308 CREATE INDEX parent2child_id_child ON parent2child(id,child);
313 name varchar(128) primary key,
314 value varchar(128) not null
320 "offset" int not null,
322 primary key(id,"offset")
329 my ($self, $schema) = @_;
330 $self->{'schema'} = $schema if defined($schema);
332 $self->dbh->do("SET search_path TO " . $self->{'schema'} . ", public");
334 $self->dbh->do("SET search_path TO public");
336 return $self->{'schema'};
339 # wipe database clean and reinstall schema
345 my $dbh = $self->dbh;
346 my $tables = $self->table_definitions;
347 foreach (keys %$tables) {
348 next if $_ eq 'meta'; # don't get rid of meta data!
349 my $table = $self->_qualify($_);
350 $dbh->do("DROP TABLE IF EXISTS $table") if $erase;
351 my @table_exists = $dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
352 if (!scalar(@table_exists)) {
353 my $query = "CREATE TABLE $table $tables->{$_}";
354 $dbh->do($query) or $self->throw($dbh->errstr);
357 $self->subfeatures_are_indexed(1) if $erase;
361 sub maybe_create_meta
{
363 return unless $self->writeable;
364 my $table = $self->_qualify('meta');
365 my $tables = $self->table_definitions;
366 my $temporary = $self->is_temp ?
'TEMPORARY' : '';
367 my @table_exists = $self->dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
368 $self->dbh->do("CREATE $temporary TABLE $table $tables->{meta}")
369 unless @table_exists || $temporary;
372 sub _finish_bulk_update
{
374 my $dbh = $self->dbh;
375 my $dir = $self->{dumpdir
} || '.';
376 for my $table ('feature',$self->index_tables) {
377 my $fh = $self->dump_filehandle($table);
378 my $path = $self->dump_path($table);
380 my $qualified_table = $self->_qualify($table);
381 system "cp $path $path.bak";
382 # Get stuff from file into STDIN so we don't have to be superuser
384 print STDERR
"Loading file $path\n";
385 $dbh->do("COPY $qualified_table FROM STDIN CSV QUOTE '''' DELIMITER '\t'") or $self->throw($dbh->errstr);
386 while (my $line = <FH
>) {
387 $dbh->pg_putline($line);
389 $dbh->pg_endcopy() or $self->throw($dbh->errstr);
393 delete $self->{bulk_update_in_progress
};
394 delete $self->{filehandles
};
399 # Add a subparts to a feature. Both feature and all subparts must already be in database.
401 sub _add_SeqFeature
{
404 # special purpose method for case when we are doing a bulk update
405 return $self->_dump_add_SeqFeature(@_) if $self->{bulk_update_in_progress
};
410 my $dbh = $self->dbh;
411 local $dbh->{RaiseError
} = 1;
413 my $child_table = $self->_parent2child_table();
416 my $querydel = "DELETE FROM $child_table WHERE id = ? AND child = ?";
417 my $query = "INSERT INTO $child_table (id,child) VALUES (?,?)";
418 my $sthdel = $self->_prepare($querydel);
419 my $sth = $self->_prepare($query);
421 my $parent_id = (ref $parent ?
$parent->primary_id : $parent)
422 or $self->throw("$parent should have a primary_id");
424 $dbh->begin_work or $self->throw($dbh->errstr);
426 for my $child (@children) {
427 my $child_id = ref $child ?
$child->primary_id : $child;
428 defined $child_id or die "no primary ID known for $child";
429 $sthdel->execute($parent_id, $child_id);
430 $sth->execute($parent_id,$child_id);
436 warn "Transaction aborted because $@";
446 # because this is a reserved word in postgresql
448 # get primary sequence between start and end
450 sub _fetch_sequence
{
452 my ($seqid,$start,$end) = @_;
454 # backward compatibility to the old days when I liked reverse complementing
455 # dna by specifying $start > $end
457 if (defined $start && defined $end && $start > $end) {
459 ($start,$end) = ($end,$start);
461 $start-- if defined $start;
462 $end-- if defined $end;
464 my $offset1 = $self->_offset_boundary($seqid,$start || 'left');
465 my $offset2 = $self->_offset_boundary($seqid,$end || 'right');
466 my $sequence_table = $self->_sequence_table;
467 my $locationlist_table = $self->_locationlist_table;
469 my $sth = $self->_prepare(<<END);
470 SELECT sequence,"offset"
471 FROM $sequence_table as s,$locationlist_table as ll
480 $sth->execute($seqid,$offset1,$offset2) or $self->throw($sth->errstr);
482 while (my($frag,$offset) = $sth->fetchrow_array) {
483 substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
486 substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
489 $seq =~ tr/gatcGATC/ctagCTAG/;
495 sub _offset_boundary
{
497 my ($seqid,$position) = @_;
499 my $sequence_table = $self->_sequence_table;
500 my $locationlist_table = $self->_locationlist_table;
503 $sql = $position eq 'left' ?
"SELECT min(\"offset\") FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
504 :$position eq 'right' ?
"SELECT max(\"offset\") FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
505 :"SELECT max(\"offset\") FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=? AND \"offset\"<=?";
507 my $sth = $self->_prepare($sql);
508 my @args = $position =~ /^-?\d+$/ ?
($seqid,$position) : ($seqid);
509 $sth->execute(@args) or $self->throw($sth->errstr);
510 my $boundary = $sth->fetchall_arrayref->[0][0];
517 my ($name,$allow_aliases,$join) = @_;
518 my $name_table = $self->_name_table;
520 my $from = "$name_table as n";
521 my ($match,$string) = $self->_match_sql($name);
523 my $where = "n.id=$join AND lower(n.name) $match";
524 $where .= " AND n.display_name>0" unless $allow_aliases;
525 return ($from,$where,'',$string);
528 sub _search_attributes
{
530 my ($search_string,$attribute_names,$limit) = @_;
531 my @words = map {quotemeta($_)} split /\s+/,$search_string;
532 my $name_table = $self->_name_table;
533 my $attribute_table = $self->_attribute_table;
534 my $attributelist_table = $self->_attributelist_table;
535 my $type_table = $self->_type_table;
536 my $typelist_table = $self->_typelist_table;
538 my @tags = @
$attribute_names;
539 my $tag_sql = join ' OR ',("al.tag=?") x
@tags;
541 my $perl_regexp = join '|',@words;
543 my @wild_card_words = map { "%$_%" } @words;
544 my $sql_regexp = join ' OR ',("a.attribute_value SIMILAR TO ?") x
@words;
546 SELECT name,attribute_value,tl.tag,n.id
547 FROM $name_table as n,$attribute_table as a,$attributelist_table as al,$type_table as t,$typelist_table as tl
549 AND al.id=a.attribute_id
556 $sql .= "LIMIT $limit" if defined $limit;
557 $self->_print_query($sql,@tags,@wild_card_words) if DEBUG
|| $self->debug;
558 my $sth = $self->_prepare($sql);
559 $sth->execute(@tags,@wild_card_words) or $self->throw($sth->errstr);
562 while (my($name,$value,$type,$id) = $sth->fetchrow_array) {
563 my (@hits) = $value =~ /$perl_regexp/ig;
564 my @words_in_row = split /\b/,$value;
565 my $score = int(@hits*100/@words/@words_in_row);
566 push @results,[$name,$value,$score,$type,$id];
569 @results = sort {$b->[2]<=>$a->[2]} @results;
573 # overridden here because the mysql adapter uses
574 # a non-standard query hint
575 sub _attributes_sql
{
577 my ($attributes,$join) = @_;
579 my ($wf,@bind_args) = $self->_make_attribute_where('a','al',$attributes);
580 my ($group_by,@group_args)= $self->_make_attribute_group('a',$attributes);
582 my $attribute_table = $self->_attribute_table;
583 my $attributelist_table = $self->_attributelist_table;
585 my $from = "$attribute_table as a, $attributelist_table as al";
589 AND a.attribute_id=al.id
593 my $group = $group_by;
595 my @args = (@bind_args,@group_args);
596 return ($from,$where,$group,@args);
604 if ($name =~ /(?:^|[^\\])[*?]/) {
605 $name =~ s/(^|[^\\])([%_])/$1\\$2/g;
606 $name =~ s/(^|[^\\])\*/$1%/g;
607 $name =~ s/(^|[^\\])\?/$1_/g;
611 $match = "= lower(?)";
614 return ($match,$string);
617 # overridden because of differences between LIKE behavior in mysql and postgres
618 # as well as case-sensitivity of matches
621 my ($types,$type_table) = @_;
622 my ($primary_tag,$source_tag);
624 my @types = ref $types eq 'ARRAY' ? @
$types : $types;
626 my $typelist = $self->_typelist_table;
627 my $from = "$typelist AS tl";
631 for my $type (@types) {
633 if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
634 $primary_tag = $type->method;
635 $source_tag = $type->source;
637 ($primary_tag,$source_tag) = split ':',$type,2;
640 if (defined $source_tag) {
641 push @matches,"lower(tl.tag)=lower(?)";
642 push @args,"$primary_tag:$source_tag";
644 push @matches,"tl.tag ILIKE ?";
645 push @args,"$primary_tag:%";
648 my $matches = join ' OR ',@matches;
651 tl.id=$type_table.typeid
655 return ($from,$where,'',@args);
658 # overridden because mysql adapter uses the non-standard REPLACE syntax
661 my ($variable_name,$value) = @_;
662 my $meta = $self->_meta_table;
664 if (defined $value && $self->writeable) {
665 my $querydel = "DELETE FROM $meta WHERE name = ?";
666 my $query = "INSERT INTO $meta (name,value) VALUES (?,?)";
667 my $sthdel = $self->_prepare($querydel);
668 my $sth = $self->_prepare($query);
669 $sthdel->execute($variable_name);
670 $sth->execute($variable_name,$value) or $self->throw($sth->errstr);
672 $self->{settings_cache
}{$variable_name} = $value;
675 return $self->{settings_cache
}{$variable_name} if exists $self->{settings_cache
}{$variable_name};
676 my $query = "SELECT value FROM $meta as m WHERE m.name=?";
677 my $sth = $self->_prepare($query);
678 # $sth->execute($variable_name) or $self->throw($sth->errstr);
679 unless ($sth->execute($variable_name)) {
680 my $errstr = $sth->errstr;
681 $sth = $self->_prepare("SHOW search_path");
683 $errstr .= "With search_path " . $sth->fetchrow_arrayref->[0] . "\n";
684 $self->throw($errstr);
687 my ($value) = $sth->fetchrow_array;
689 return $self->{settings_cache
}{$variable_name} = $value;
693 # overridden because of use of REPLACE in mysql adapter
695 # Replace Bio::SeqFeatureI into database.
700 my $index_flag = shift || undef;
702 # ?? shouldn't need to do this
703 # $self->_load_class($object);
704 my $id = $object->primary_id;
705 my $features = $self->_feature_table;
707 my $query = "INSERT INTO $features (id,object,indexed,seqid,start,\"end\",strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?,?)";
708 my $query_noid = "INSERT INTO $features (object,indexed,seqid,start,\"end\",strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?)";
709 my $querydel = "DELETE FROM $features WHERE id = ?";
711 my $sthdel = $self->_prepare($querydel);
712 my $sth = $self->_prepare($query);
713 my $sth_noid = $self->_prepare($query_noid);
715 my @location = $index_flag ?
$self->_get_location_and_bin($object) : (undef)x6
;
717 my $primary_tag = $object->primary_tag;
718 my $source_tag = $object->source_tag || '';
719 $primary_tag .= ":$source_tag";
720 my $typeid = $self->_typeid($primary_tag,1);
723 $sthdel->execute($id);
724 $sth->execute($id,encode_base64
($self->freeze($object), ''),$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
726 $sth_noid->execute(encode_base64
($self->freeze($object), ''),$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
729 my $dbh = $self->dbh;
731 $object->primary_id($dbh->last_insert_id(undef, undef, undef, undef, {sequence
=>$features."_id_seq"})) unless defined $id;
733 $self->flag_for_indexing($dbh->last_insert_id(undef, undef, undef, undef, {sequence
=>$features."_id_seq"})) if $self->{bulk_update_in_progress
};
739 Usage : @type_list = $db->types
740 Function: Get all the types in the database
741 Returns : array of Bio::DB::GFF::Typename objects
747 # overridden because "offset" is reserved in postgres
749 # Insert a bit of DNA or protein into the database
751 sub _insert_sequence
{
753 my ($seqid,$seq,$offset) = @_;
754 my $id = $self->_locationid($seqid);
755 my $seqtable = $self->_sequence_table;
756 my $sthdel = $self->_prepare("DELETE FROM $seqtable WHERE id = ? AND \"offset\" = ?");
757 my $sth = $self->_prepare(<<END);
758 INSERT INTO $seqtable (id,"offset",sequence) VALUES (?,?,?)
760 $sthdel->execute($id,$offset);
761 $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
764 # overridden because of mysql adapter's use of REPLACE
766 # This subroutine flags the given primary ID for later reindexing
768 sub flag_for_indexing
{
771 my $needs_updating = $self->_update_table;
773 my $querydel = "DELETE FROM $needs_updating WHERE id = ?";
774 my $query = "INSERT INTO $needs_updating VALUES (?)";
775 my $sthdel = $self->_prepare($querydel);
776 my $sth = $self->_prepare($query);
778 $sthdel->execute($id);
779 $sth->execute($id) or $self->throw($self->dbh->errstr);
782 # overridden because of the different ways that mysql and postgres
783 # handle id sequences
786 my ($table,$namefield,$name,$add_if_missing) = @_;
787 my $qualified_table = $self->_qualify($table);
788 my $sth = $self->_prepare(<<END);
789 SELECT id FROM $qualified_table WHERE $namefield=?
791 $sth->execute($name) or die $sth->errstr;
792 my ($id) = $sth->fetchrow_array;
794 return $id if defined $id;
795 return unless $add_if_missing;
797 $sth = $self->_prepare(<<END);
798 INSERT INTO $qualified_table ($namefield) VALUES (?)
800 $sth->execute($name) or die $sth->errstr;
801 my $dbh = $self->dbh;
802 return $dbh->last_insert_id(undef, undef, undef, undef, {sequence
=>$qualified_table."_id_seq"});
805 # overridden because of differences in binding between mysql and postgres adapters
806 # given a statement handler that is expected to return rows of (id,object)
807 # unthaw each object and return a list of 'em
813 $sth->bind_col(1, \
$id);
814 $sth->bind_col(2, \
$o, { pg_type
=> PG_BYTEA
});
815 #while (my ($id,$o) = $sth->fetchrow_array) {
816 while ($sth->fetch) {
817 my $obj = $self->thaw(decode_base64
($o) ,$id);
824 # given a statement handler that is expected to return rows of (id,object)
825 # unthaw each object and return a list of 'em
829 my ($id,$o) = $sth->fetchrow_array;
831 my $obj = $self->thaw(decode_base64
($o) ,$id);
835 ####################################################################################################
836 # SQL Fragment generators
837 ####################################################################################################
839 # overridden because of base64 encoding needed here
841 # special-purpose store for bulk loading - write to a file rather than to the db
848 my $store_fh = $self->dump_filehandle('feature');
849 my $dbh = $self->dbh;
851 my $autoindex = $self->autoindex;
854 my $id = $self->next_id;
855 my ($seqid,$start,$end,$strand,$tier,$bin) = $indexed ?
$self->_get_location_and_bin($obj) : (undef)x6
;
856 my $primary_tag = $obj->primary_tag;
857 my $source_tag = $obj->source_tag || '';
858 $primary_tag .= ":$source_tag";
859 my $typeid = $self->_typeid($primary_tag,1);
861 my $frozen_object = encode_base64
($self->freeze($obj), '');
862 # TODO: Fix this, why does frozen object start with quote but not end with one
863 print $store_fh join("\t",$id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$frozen_object),"\n";
864 $obj->primary_id($id);
865 $self->_update_indexes($obj) if $indexed && $autoindex;
869 # remember whether we are have ever stored a non-indexed feature
870 unless ($indexed or $self->{indexed_flag
}++) {
871 $self->subfeatures_are_indexed(0);