2 package Bio
::DB
::SeqFeature
::Store
::DBI
::Pg
;
3 use DBD
::Pg
qw(:pg_types);
5 # $Id: Pg.pm 14656 2008-04-14 15:05:37Z lstein $
9 Bio::DB::SeqFeature::Store::DBI::Pg -- Mysql implementation of Bio::DB::SeqFeature::Store
13 use Bio::DB::SeqFeature::Store;
15 # Open the sequence database
16 my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::Pg',
17 -dsn => 'dbi:Pg:test');
19 # get a feature from somewhere
20 my $feature = Bio::SeqFeature::Generic->new(...);
23 $db->store($feature) or die "Couldn't store!";
25 # primary ID of the feature is changed to indicate its primary ID
27 my $id = $feature->primary_id;
29 # get the feature back out
30 my $f = $db->fetch($id);
32 # change the feature and update it
34 $db->update($f) or die "Couldn't update!";
38 my @features = $db->fetch_many(@list_of_ids);
41 @features = $db->get_features_by_name('ZK909');
44 @features = $db->get_features_by_alias('sma-3');
47 @features = $db->get_features_by_name('gene');
50 @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
53 @features = $db->get_features_by_attribute({description => 'protein kinase'})
55 # ...by the GFF "Note" field
56 @result_list = $db->search_notes('kinase');
58 # ...by arbitrary combinations of selectors
59 @features = $db->features(-name => $name,
64 -attributes => $attributes);
66 # ...using an iterator
67 my $iterator = $db->get_seq_stream(-name => $name,
72 -attributes => $attributes);
74 while (my $feature = $iterator->next_seq) {
75 # do something with the feature
78 # ...limiting the search to a particular region
79 my $segment = $db->segment('Chr1',5000=>6000);
80 my @features = $segment->features(-type=>['mRNA','match']);
82 # getting & storing sequence information
83 # Warning: this returns a string, and not a PrimarySeq object
84 $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
85 my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
87 # what feature types are defined in the database?
88 my @types = $db->types;
90 # create a new feature in the database
91 my $feature = $db->new_feature(-primary_tag => 'mRNA',
98 Bio::DB::SeqFeature::Store::Pg is the Mysql adaptor for
99 Bio::DB::SeqFeature::Store. You will not create it directly, but
100 instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
102 See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
104 =head2 Using the Mysql adaptor
106 Before you can use the adaptor, you must use the Pgadmin tool to
107 create a database and establish a user account with write
108 permission. In order to use "fast" loading, the user account must have
111 To establish a connection to the database, call
112 Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::Pg',@more_args). The
113 additional arguments are as follows:
115 Argument name Description
116 ------------- -----------
118 -dsn The database name. You can abbreviate
119 "dbi:Pg:foo" as "foo" if you wish.
121 -user Username for authentication.
123 -pass Password for authentication.
125 -namespace A prefix to attach to each table. This allows you
126 to have several virtual databases in the same
129 -temp Boolean flag. If true, a temporary database
130 will be created and destroyed as soon as
131 the Store object goes out of scope. (synonym -temporary)
133 -autoindex Boolean flag. If true, features in the database will be
134 reindexed every time they change. This is the default.
137 -tmpdir Directory in which to place temporary files during "fast" loading.
138 Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
140 -dbi_options A hashref to pass to DBI->connect's 4th argument, the "attributes."
141 (synonyms -options, -dbi_attr)
143 -write Pass true to open database for writing or updating.
145 If successful, a new instance of
146 Bio::DB::SeqFeature::Store::DBI::Pg will be returned.
148 In addition to the standard methods supported by all well-behaved
149 Bio::DB::SeqFeature::Store databases, several following
150 adaptor-specific methods are provided. These are described in the next
157 use base
'Bio::DB::SeqFeature::Store::DBI::mysql';
158 use Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
;
162 use Bio
::DB
::GFF
::Util
::Rearrange
'rearrange';
164 use constant DEBUG
=>0;
166 # from the MySQL documentation...
167 # WARNING: if your sequence uses coordinates greater than 2 GB, you are out of luck!
168 use constant MAX_INT
=> 2_147_483_647
;
169 use constant MIN_INT
=> -2_147_483_648
;
170 use constant MAX_BIN
=> 1_000_000_000
; # size of largest feature = 1 Gb
171 use constant MIN_BIN
=> 1000; # smallest bin we'll make - on a 100 Mb chromosome, there'll be 100,000 of these
174 # object initialization
176 # NOTE: most of this code can be refactored and inherited from DBI or DBI::mysql adapter
191 ) = rearrange
(['DSN',
192 ['TEMP','TEMPORARY'],
195 ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
197 ['PASS','PASSWD','PASSWORD'],
198 ['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
199 ['WRITE','WRITEABLE'],
206 $writeable = 1 if $is_temporary or $dump_dir;
208 $dsn or $self->throw("Usage: ".__PACKAGE__
."->init(-dsn => \$dbh || \$dsn)");
214 $dsn = "dbi:Pg:$dsn" unless $dsn =~ /^dbi:/;
215 $dbh = DBI
->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr
);
217 $dbh->do('set client_min_messages=warning') if $dbh;
218 $self->{'original_arguments'} = {
222 'dbh_options' => $dbi_options,
225 $self->{dbh
}->{InactiveDestroy
} = 1;
226 $self->{is_temp
} = $is_temporary;
227 $self->{namespace
} = $namespace;
228 $self->{writeable
} = $writeable;
229 $self->schema($schema) if ($schema);
231 $self->default_settings;
232 $self->autoindex($autoindex) if defined $autoindex;
233 $self->dumpdir($dump_dir) if $dump_dir;
234 if ($self->is_temp) {
235 $self->init_tmp_database();
237 $self->init_database('erase');
241 sub table_definitions
{
246 id serial primary key,
251 strand int default 0,
254 indexed int default 1,
255 object bytea not null
257 CREATE INDEX feature_stuff ON feature(seqid,tier,bin,typeid);
258 CREATE INDEX feature_typeid ON feature(typeid);
261 locationlist
=> <<END,
263 id serial primary key,
264 seqname varchar(256) not null
265 ); CREATE INDEX locationlist_seqname ON locationlist(seqname);
270 id serial primary key,
271 tag varchar(256) not null
272 ); CREATE INDEX typelist_tab ON typelist(tag);
277 name varchar(256) not null,
278 display_name int default 0
280 CREATE INDEX name_id ON name(id);
281 CREATE INDEX name_name ON name(name);
287 attribute_id int not null,
290 CREATE INDEX attribute_id ON attribute(id);
291 CREATE INDEX attribute_id_val ON attribute(attribute_id,SUBSTR(attribute_value, 1, 10));
294 attributelist
=> <<END,
296 id serial primary key,
297 tag varchar(256) not null
299 CREATE INDEX attributelist_tag ON attributelist(tag);
301 parent2child
=> <<END,
306 CREATE INDEX parent2child_id_child ON parent2child(id,child);
311 name varchar(128) primary key,
312 value varchar(128) not null
318 "offset" int not null,
320 primary key(id,"offset")
327 my ($self, $schema) = @_;
328 $self->{'schema'} = $schema if defined($schema);
330 $self->dbh->do("SET search_path TO " . $self->{'schema'} . ", public");
332 $self->dbh->do("SET search_path TO public");
334 return $self->{'schema'};
337 # wipe database clean and reinstall schema
343 my $dbh = $self->dbh;
344 my $tables = $self->table_definitions;
345 foreach (keys %$tables) {
346 next if $_ eq 'meta'; # don't get rid of meta data!
347 my $table = $self->_qualify($_);
348 $dbh->do("DROP TABLE IF EXISTS $table") if $erase;
349 my @table_exists = $dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
350 if (!scalar(@table_exists)) {
351 my $query = "CREATE TABLE $table $tables->{$_}";
352 $dbh->do($query) or $self->throw($dbh->errstr);
355 $self->subfeatures_are_indexed(1) if $erase;
359 sub maybe_create_meta
{
361 return unless $self->writeable;
362 my $table = $self->_qualify('meta');
363 my $tables = $self->table_definitions;
364 my $temporary = $self->is_temp ?
'TEMPORARY' : '';
365 my @table_exists = $self->dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table'");
366 $self->dbh->do("CREATE $temporary TABLE $table $tables->{meta}")
367 unless @table_exists || $temporary;
370 sub _finish_bulk_update
{
372 my $dbh = $self->dbh;
373 my $dir = $self->{dumpdir
} || '.';
374 for my $table ('feature',$self->index_tables) {
375 my $fh = $self->dump_filehandle($table);
376 my $path = $self->dump_path($table);
378 my $qualified_table = $self->_qualify($table);
379 system "cp $path $path.bak";
380 # Get stuff from file into STDIN so we don't have to be superuser
382 print STDERR
"Loading file $path\n";
383 $dbh->do("COPY $qualified_table FROM STDIN CSV QUOTE '''' DELIMITER '\t'") or $self->throw($dbh->errstr);
384 while (my $line = <FH
>) {
385 $dbh->pg_putline($line);
387 $dbh->pg_endcopy() or $self->throw($dbh->errstr);
391 delete $self->{bulk_update_in_progress
};
392 delete $self->{filehandles
};
397 # Add a subparts to a feature. Both feature and all subparts must already be in database.
399 sub _add_SeqFeature
{
402 # special purpose method for case when we are doing a bulk update
403 return $self->_dump_add_SeqFeature(@_) if $self->{bulk_update_in_progress
};
408 my $dbh = $self->dbh;
409 local $dbh->{RaiseError
} = 1;
411 my $child_table = $self->_parent2child_table();
414 my $querydel = "DELETE FROM $child_table WHERE id = ? AND child = ?";
415 my $query = "INSERT INTO $child_table (id,child) VALUES (?,?)";
416 my $sthdel = $self->_prepare($querydel);
417 my $sth = $self->_prepare($query);
419 my $parent_id = (ref $parent ?
$parent->primary_id : $parent)
420 or $self->throw("$parent should have a primary_id");
422 $dbh->begin_work or $self->throw($dbh->errstr);
424 for my $child (@children) {
425 my $child_id = ref $child ?
$child->primary_id : $child;
426 defined $child_id or die "no primary ID known for $child";
427 $sthdel->execute($parent_id, $child_id);
428 $sth->execute($parent_id,$child_id);
434 warn "Transaction aborted because $@";
444 # because this is a reserved word in postgresql
446 # get primary sequence between start and end
448 sub _fetch_sequence
{
450 my ($seqid,$start,$end) = @_;
452 # backward compatibility to the old days when I liked reverse complementing
453 # dna by specifying $start > $end
455 if (defined $start && defined $end && $start > $end) {
457 ($start,$end) = ($end,$start);
459 $start-- if defined $start;
460 $end-- if defined $end;
462 my $offset1 = $self->_offset_boundary($seqid,$start || 'left');
463 my $offset2 = $self->_offset_boundary($seqid,$end || 'right');
464 my $sequence_table = $self->_sequence_table;
465 my $locationlist_table = $self->_locationlist_table;
467 my $sth = $self->_prepare(<<END);
468 SELECT sequence,"offset"
469 FROM $sequence_table as s,$locationlist_table as ll
478 $sth->execute($seqid,$offset1,$offset2) or $self->throw($sth->errstr);
480 while (my($frag,$offset) = $sth->fetchrow_array) {
481 substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
484 substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
487 $seq =~ tr/gatcGATC/ctagCTAG/;
493 sub _offset_boundary
{
495 my ($seqid,$position) = @_;
497 my $sequence_table = $self->_sequence_table;
498 my $locationlist_table = $self->_locationlist_table;
501 $sql = $position eq 'left' ?
"SELECT min(\"offset\") FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
502 :$position eq 'right' ?
"SELECT max(\"offset\") FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
503 :"SELECT max(\"offset\") FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=? AND \"offset\"<=?";
505 my $sth = $self->_prepare($sql);
506 my @args = $position =~ /^-?\d+$/ ?
($seqid,$position) : ($seqid);
507 $sth->execute(@args) or $self->throw($sth->errstr);
508 my $boundary = $sth->fetchall_arrayref->[0][0];
515 my ($name,$allow_aliases,$join) = @_;
516 my $name_table = $self->_name_table;
518 my $from = "$name_table as n";
519 my ($match,$string) = $self->_match_sql($name);
521 my $where = "n.id=$join AND lower(n.name) $match";
522 $where .= " AND n.display_name>0" unless $allow_aliases;
523 return ($from,$where,'',$string);
526 sub _search_attributes
{
528 my ($search_string,$attribute_names,$limit) = @_;
529 my @words = map {quotemeta($_)} split /\s+/,$search_string;
530 my $name_table = $self->_name_table;
531 my $attribute_table = $self->_attribute_table;
532 my $attributelist_table = $self->_attributelist_table;
533 my $type_table = $self->_type_table;
534 my $typelist_table = $self->_typelist_table;
536 my @tags = @
$attribute_names;
537 my $tag_sql = join ' OR ',("al.tag=?") x
@tags;
539 my $perl_regexp = join '|',@words;
541 my @wild_card_words = map { "%$_%" } @words;
542 my $sql_regexp = join ' OR ',("a.attribute_value SIMILAR TO ?") x
@words;
544 SELECT name,attribute_value,tl.tag,n.id
545 FROM $name_table as n,$attribute_table as a,$attributelist_table as al,$type_table as t,$typelist_table as tl
547 AND al.id=a.attribute_id
554 $sql .= "LIMIT $limit" if defined $limit;
555 $self->_print_query($sql,@tags,@wild_card_words) if DEBUG
|| $self->debug;
556 my $sth = $self->_prepare($sql);
557 $sth->execute(@tags,@wild_card_words) or $self->throw($sth->errstr);
560 while (my($name,$value,$type,$id) = $sth->fetchrow_array) {
561 my (@hits) = $value =~ /$perl_regexp/ig;
562 my @words_in_row = split /\b/,$value;
563 my $score = int(@hits*100/@words/@words_in_row);
564 push @results,[$name,$value,$score,$type,$id];
567 @results = sort {$b->[2]<=>$a->[2]} @results;
571 # overridden here because the mysql adapter uses
572 # a non-standard query hint
573 sub _attributes_sql
{
575 my ($attributes,$join) = @_;
577 my ($wf,@bind_args) = $self->_make_attribute_where('a','al',$attributes);
578 my ($group_by,@group_args)= $self->_make_attribute_group('a',$attributes);
580 my $attribute_table = $self->_attribute_table;
581 my $attributelist_table = $self->_attributelist_table;
583 my $from = "$attribute_table as a, $attributelist_table as al";
587 AND a.attribute_id=al.id
591 my $group = $group_by;
593 my @args = (@bind_args,@group_args);
594 return ($from,$where,$group,@args);
602 if ($name =~ /(?:^|[^\\])[*?]/) {
603 $name =~ s/(^|[^\\])([%_])/$1\\$2/g;
604 $name =~ s/(^|[^\\])\*/$1%/g;
605 $name =~ s/(^|[^\\])\?/$1_/g;
609 $match = "= lower(?)";
612 return ($match,$string);
615 # overridden because of differences between LIKE behavior in mysql and postgres
616 # as well as case-sensitivity of matches
619 my ($types,$type_table) = @_;
620 my ($primary_tag,$source_tag);
622 my @types = ref $types eq 'ARRAY' ? @
$types : $types;
624 my $typelist = $self->_typelist_table;
625 my $from = "$typelist AS tl";
629 for my $type (@types) {
631 if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
632 $primary_tag = $type->method;
633 $source_tag = $type->source;
635 ($primary_tag,$source_tag) = split ':',$type,2;
638 if (defined $source_tag) {
639 push @matches,"lower(tl.tag)=lower(?)";
640 push @args,"$primary_tag:$source_tag";
642 push @matches,"tl.tag ILIKE ?";
643 push @args,"$primary_tag:%";
646 my $matches = join ' OR ',@matches;
649 tl.id=$type_table.typeid
653 return ($from,$where,'',@args);
656 # overridden because mysql adapter uses the non-standard REPLACE syntax
659 my ($variable_name,$value) = @_;
660 my $meta = $self->_meta_table;
662 if (defined $value && $self->writeable) {
663 my $querydel = "DELETE FROM $meta WHERE name = ?";
664 my $query = "INSERT INTO $meta (name,value) VALUES (?,?)";
665 my $sthdel = $self->_prepare($querydel);
666 my $sth = $self->_prepare($query);
667 $sthdel->execute($variable_name);
668 $sth->execute($variable_name,$value) or $self->throw($sth->errstr);
670 $self->{settings_cache
}{$variable_name} = $value;
673 return $self->{settings_cache
}{$variable_name} if exists $self->{settings_cache
}{$variable_name};
674 my $query = "SELECT value FROM $meta as m WHERE m.name=?";
675 my $sth = $self->_prepare($query);
676 # $sth->execute($variable_name) or $self->throw($sth->errstr);
677 unless ($sth->execute($variable_name)) {
678 my $errstr = $sth->errstr;
679 $sth = $self->_prepare("SHOW search_path");
681 $errstr .= "With search_path " . $sth->fetchrow_arrayref->[0] . "\n";
682 $self->throw($errstr);
685 my ($value) = $sth->fetchrow_array;
687 return $self->{settings_cache
}{$variable_name} = $value;
691 # overridden because of use of REPLACE in mysql adapter
693 # Replace Bio::SeqFeatureI into database.
698 my $index_flag = shift || undef;
700 # ?? shouldn't need to do this
701 # $self->_load_class($object);
702 my $id = $object->primary_id;
703 my $features = $self->_feature_table;
705 my $query = "INSERT INTO $features (id,object,indexed,seqid,start,\"end\",strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?,?)";
706 my $query_noid = "INSERT INTO $features (object,indexed,seqid,start,\"end\",strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?)";
707 my $querydel = "DELETE FROM $features WHERE id = ?";
709 my $sthdel = $self->_prepare($querydel);
710 my $sth = $self->_prepare($query);
711 my $sth_noid = $self->_prepare($query_noid);
713 my @location = $index_flag ?
$self->_get_location_and_bin($object) : (undef)x6
;
715 my $primary_tag = $object->primary_tag;
716 my $source_tag = $object->source_tag || '';
717 $primary_tag .= ":$source_tag";
718 my $typeid = $self->_typeid($primary_tag,1);
721 $sthdel->execute($id);
722 $sth->execute($id,encode_base64
($self->freeze($object), ''),$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
724 $sth_noid->execute(encode_base64
($self->freeze($object), ''),$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
727 my $dbh = $self->dbh;
729 $object->primary_id($dbh->last_insert_id(undef, undef, undef, undef, {sequence
=>$features."_id_seq"})) unless defined $id;
731 $self->flag_for_indexing($dbh->last_insert_id(undef, undef, undef, undef, {sequence
=>$features."_id_seq"})) if $self->{bulk_update_in_progress
};
737 Usage : @type_list = $db->types
738 Function: Get all the types in the database
739 Returns : array of Bio::DB::GFF::Typename objects
745 # overridden because "offset" is reserved in postgres
747 # Insert a bit of DNA or protein into the database
749 sub _insert_sequence
{
751 my ($seqid,$seq,$offset) = @_;
752 my $id = $self->_locationid($seqid);
753 my $seqtable = $self->_sequence_table;
754 my $sthdel = $self->_prepare("DELETE FROM $seqtable WHERE id = ? AND \"offset\" = ?");
755 my $sth = $self->_prepare(<<END);
756 INSERT INTO $seqtable (id,"offset",sequence) VALUES (?,?,?)
758 $sthdel->execute($id,$offset);
759 $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
762 # overridden because of mysql adapter's use of REPLACE
764 # This subroutine flags the given primary ID for later reindexing
766 sub flag_for_indexing
{
769 my $needs_updating = $self->_update_table;
771 my $querydel = "DELETE FROM $needs_updating WHERE id = ?";
772 my $query = "INSERT INTO $needs_updating VALUES (?)";
773 my $sthdel = $self->_prepare($querydel);
774 my $sth = $self->_prepare($query);
776 $sthdel->execute($id);
777 $sth->execute($id) or $self->throw($self->dbh->errstr);
780 # overridden because of the different ways that mysql and postgres
781 # handle id sequences
784 my ($table,$namefield,$name,$add_if_missing) = @_;
785 my $qualified_table = $self->_qualify($table);
786 my $sth = $self->_prepare(<<END);
787 SELECT id FROM $qualified_table WHERE $namefield=?
789 $sth->execute($name) or die $sth->errstr;
790 my ($id) = $sth->fetchrow_array;
792 return $id if defined $id;
793 return unless $add_if_missing;
795 $sth = $self->_prepare(<<END);
796 INSERT INTO $qualified_table ($namefield) VALUES (?)
798 $sth->execute($name) or die $sth->errstr;
799 my $dbh = $self->dbh;
800 return $dbh->last_insert_id(undef, undef, undef, undef, {sequence
=>$qualified_table."_id_seq"});
803 # overridden because of differences in binding between mysql and postgres adapters
804 # given a statement handler that is expected to return rows of (id,object)
805 # unthaw each object and return a list of 'em
811 $sth->bind_col(1, \
$id);
812 $sth->bind_col(2, \
$o, { pg_type
=> PG_BYTEA
});
813 #while (my ($id,$o) = $sth->fetchrow_array) {
814 while ($sth->fetch) {
815 my $obj = $self->thaw(decode_base64
($o) ,$id);
822 # given a statement handler that is expected to return rows of (id,object)
823 # unthaw each object and return a list of 'em
827 my ($id,$o) = $sth->fetchrow_array;
829 my $obj = $self->thaw(decode_base64
($o) ,$id);
833 ####################################################################################################
834 # SQL Fragment generators
835 ####################################################################################################
837 # overridden because of base64 encoding needed here
839 # special-purpose store for bulk loading - write to a file rather than to the db
846 my $store_fh = $self->dump_filehandle('feature');
847 my $dbh = $self->dbh;
849 my $autoindex = $self->autoindex;
852 my $id = $self->next_id;
853 my ($seqid,$start,$end,$strand,$tier,$bin) = $indexed ?
$self->_get_location_and_bin($obj) : (undef)x6
;
854 my $primary_tag = $obj->primary_tag;
855 my $source_tag = $obj->source_tag || '';
856 $primary_tag .= ":$source_tag";
857 my $typeid = $self->_typeid($primary_tag,1);
859 my $frozen_object = encode_base64
($self->freeze($obj), '');
860 # TODO: Fix this, why does frozen object start with quote but not end with one
861 print $store_fh join("\t",$id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$frozen_object),"\n";
862 $obj->primary_id($id);
863 $self->_update_indexes($obj) if $indexed && $autoindex;
867 # remember whether we are have ever stored a non-indexed feature
868 unless ($indexed or $self->{indexed_flag
}++) {
869 $self->subfeatures_are_indexed(0);