1 package Bio
::DB
::SeqFeature
::Store
::DBI
::mysql
;
6 Bio::DB::SeqFeature::Store::DBI::mysql -- Mysql implementation of Bio::DB::SeqFeature::Store
10 use Bio::DB::SeqFeature::Store;
12 # Open the sequence database
13 my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::mysql',
14 -dsn => 'dbi:mysql:test');
16 # get a feature from somewhere
17 my $feature = Bio::SeqFeature::Generic->new(...);
20 $db->store($feature) or die "Couldn't store!";
22 # primary ID of the feature is changed to indicate its primary ID
24 my $id = $feature->primary_id;
26 # get the feature back out
27 my $f = $db->fetch($id);
29 # change the feature and update it
31 $db->update($f) or die "Couldn't update!";
35 my @features = $db->fetch_many(@list_of_ids);
38 @features = $db->get_features_by_name('ZK909');
41 @features = $db->get_features_by_alias('sma-3');
44 @features = $db->get_features_by_name('gene');
47 @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
50 @features = $db->get_features_by_attribute({description => 'protein kinase'})
52 # ...by the GFF "Note" field
53 @result_list = $db->search_notes('kinase');
55 # ...by arbitrary combinations of selectors
56 @features = $db->features(-name => $name,
61 -attributes => $attributes);
63 # ...using an iterator
64 my $iterator = $db->get_seq_stream(-name => $name,
69 -attributes => $attributes);
71 while (my $feature = $iterator->next_seq) {
72 # do something with the feature
75 # ...limiting the search to a particular region
76 my $segment = $db->segment('Chr1',5000=>6000);
77 my @features = $segment->features(-type=>['mRNA','match']);
79 # getting & storing sequence information
80 # Warning: this returns a string, and not a PrimarySeq object
81 $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
82 my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
84 # what feature types are defined in the database?
85 my @types = $db->types;
87 # create a new feature in the database
88 my $feature = $db->new_feature(-primary_tag => 'mRNA',
95 Bio::DB::SeqFeature::Store::mysql is the Mysql adaptor for
96 Bio::DB::SeqFeature::Store. You will not create it directly, but
97 instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
99 See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
101 =head2 Using the Mysql adaptor
103 Before you can use the adaptor, you must use the mysqladmin tool to
104 create a database and establish a user account with write
105 permission. In order to use "fast" loading, the user account must have
108 To establish a connection to the database, call
109 Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::mysql',@more_args). The
110 additional arguments are as follows:
112 Argument name Description
113 ------------- -----------
115 -dsn The database name. You can abbreviate
116 "dbi:mysql:foo" as "foo" if you wish.
118 -user Username for authentication.
120 -pass Password for authentication.
122 -namespace A prefix to attach to each table. This allows you
123 to have several virtual databases in the same
126 -temp Boolean flag. If true, a temporary database
127 will be created and destroyed as soon as
128 the Store object goes out of scope. (synonym -temporary)
130 -autoindex Boolean flag. If true, features in the database will be
131 reindexed every time they change. This is the default.
134 -tmpdir Directory in which to place temporary files during "fast" loading.
135 Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
137 -dbi_options A hashref to pass to DBI->connect's 4th argument, the "attributes."
138 (synonyms -options, -dbi_attr)
140 -write Pass true to open database for writing or updating.
142 If successful, a new instance of
143 Bio::DB::SeqFeature::Store::DBI::mysql will be returned.
145 In addition to the standard methods supported by all well-behaved
146 Bio::DB::SeqFeature::Store databases, several following
147 adaptor-specific methods are provided. These are described in the next
154 use base
'Bio::DB::SeqFeature::Store';
155 use Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
;
159 use Bio
::DB
::GFF
::Util
::Rearrange
'rearrange';
160 use Bio
::SeqFeature
::Lite
;
162 use constant DEBUG
=>0;
164 # from the MySQL documentation...
165 # WARNING: if your sequence uses coordinates greater than 2 GB, you are out of luck!
166 use constant MAX_INT
=> 2_147_483_647
;
167 use constant MIN_INT
=> -2_147_483_648
;
168 use constant MAX_BIN
=> 1_000_000_000
; # size of largest feature = 1 Gb
169 use constant MIN_BIN
=> 1000; # smallest bin we'll make - on a 100 Mb chromosome, there'll be 100,000 of these
172 memoize
('_locationid');
173 memoize
('_attributeid');
174 memoize
('dump_path');
177 # object initialization
191 ) = rearrange
(['DSN',
192 ['TEMP','TEMPORARY'],
195 ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
197 ['PASS','PASSWD','PASSWORD'],
198 ['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
199 ['WRITE','WRITEABLE'],
203 $writeable = 1 if $is_temporary or $dump_dir;
205 $dsn or $self->throw("Usage: ".__PACKAGE__
."->init(-dsn => \$dbh || \$dsn)");
211 $dsn = "dbi:mysql:$dsn" unless $dsn =~ /^dbi:/;
212 $dbh = DBI
->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr
);
213 $dbh->{mysql_auto_reconnect
} = 1;
216 $self->{is_temp
} = $is_temporary;
217 $self->{namespace
} = $namespace;
218 $self->{writeable
} = $writeable;
220 $self->default_settings;
221 $self->autoindex($autoindex) if defined $autoindex;
222 $self->dumpdir($dump_dir) if $dump_dir;
223 if ($self->is_temp) {
224 $self->init_tmp_database();
226 $self->init_database('erase');
230 sub writeable
{ shift->{writeable
} }
232 sub can_store_parentage
{ 1 }
234 sub table_definitions
{
239 id int(10) auto_increment primary key,
240 typeid int(10) not null,
244 strand tinyint default 0,
247 indexed tinyint default 1,
248 object MEDIUMBLOB not null,
249 index(seqid,tier,bin,typeid),
254 locationlist
=> <<END,
256 id int(10) auto_increment primary key,
257 seqname varchar(256) not null,
264 id int(10) auto_increment primary key,
265 tag varchar(256) not null,
272 name varchar(256) not null,
273 display_name tinyint default 0,
282 attribute_id int(10) not null,
283 attribute_value text,
285 index(attribute_id,attribute_value(10))
289 attributelist
=> <<END,
291 id int(10) auto_increment primary key,
292 tag varchar(256) not null,
296 parent2child
=> <<END,
299 child int(10) not null,
306 name varchar(128) primary key,
307 value varchar(128) not null
313 offset int(10) unsigned not null,
315 primary key(id,offset)
322 # default settings -- will create and populate meta table if needed
324 sub default_settings
{
326 $self->maybe_create_meta();
327 $self->SUPER::default_settings
;
329 $self->dumpdir(File
::Spec
->tmpdir);
334 # retrieve database handle
338 my $d = $self->{dbh
};
339 $self->{dbh
} = shift if @_;
345 $self->{dbh
}{InactiveDestroy
} = 1;
346 $self->{dbh
} = $self->{dbh
}->clone
347 unless $self->is_temp;
351 # get/set directory for bulk load tables
355 my $d = $self->{dumpdir
};
356 $self->{dumpdir
} = abs_path
(shift) if @_;
361 # table namespace (multiple dbs in one mysql db)
365 my $d = $self->{namespace
};
366 $self->{namespace
} = shift if @_;
371 # find a path that corresponds to a dump table
375 my $table = $self->_qualify(shift);
376 return "$self->{dumpdir}/$table.$$";
380 # make a filehandle (writeable) that corresponds to a dump table
382 sub dump_filehandle
{
385 eval "require IO::File" unless IO
::File
->can('new');
386 my $path = $self->dump_path($table);
387 my $fh = $self->{filehandles
}{$path} ||= IO
::File
->new(">$path");
392 # find the next ID for a feature (used only during bulk loading)
396 $self->{max_id
} ||= $self->max_id;
397 return ++$self->{max_id
};
401 # find the maximum ID for a feature (used only during bulk loading)
405 my $sth = $self->_prepare("SELECT max(id) from feature");
406 $sth->execute or $self->throw($sth->errstr);
407 my ($id) = $sth->fetchrow_array;
412 # wipe database clean and reinstall schema
418 my $dbh = $self->dbh;
419 my $tables = $self->table_definitions;
420 foreach (keys %$tables) {
421 next if $_ eq 'meta'; # don't get rid of meta data!
422 my $table = $self->_qualify($_);
423 $dbh->do("DROP table IF EXISTS $table") if $erase;
424 my $query = "CREATE TABLE IF NOT EXISTS $table $tables->{$_}";
425 $self->_create_table($dbh,$query);
427 $self->subfeatures_are_indexed(1) if $erase;
431 sub init_tmp_database
{
433 my $dbh = $self->dbh;
434 my $tables = $self->table_definitions;
435 for my $t (keys %$tables) {
436 next if $t eq 'meta'; # done earlier
437 my $table = $self->_qualify($t);
438 my $query = "CREATE TEMPORARY TABLE $table $tables->{$t}";
439 $self->_create_table($dbh,$query);
446 my ($dbh,$query) = @_;
447 for my $q (split ';',$query) {
449 next unless $q =~ /\S/;
450 $dbh->do("$q;\n") or $self->throw($dbh->errstr);
454 sub maybe_create_meta
{
456 return unless $self->writeable;
457 my $table = $self->_qualify('meta');
458 my $tables = $self->table_definitions;
459 my $temporary = $self->is_temp ?
'TEMPORARY' : '';
460 $self->dbh->do("CREATE $temporary TABLE IF NOT EXISTS $table $tables->{meta}");
464 # use temporary tables
472 my $dbh = $self->dbh;
473 my $attributelist_table = $self->_attributelist_table;
475 my $a = $dbh->selectcol_arrayref("SELECT tag FROM $attributelist_table")
476 or $self->throw($dbh->errstr);
483 # special case for bulk updates
484 return $self->_dump_store(@_) if $self->{bulk_update_in_progress
};
489 my $autoindex = $self->autoindex;
491 my $dbh = $self->dbh;
492 local $dbh->{RaiseError
} = 1;
496 $self->replace($obj,$indexed);
497 $self->_update_indexes($obj) if $indexed && $autoindex;
503 warn "Transaction aborted because $@";
510 # remember whether we are have ever stored a non-indexed feature
511 unless ($indexed or $self->{indexed_flag
}++) {
512 $self->subfeatures_are_indexed(0);
517 # we memoize this in order to avoid making zillions of calls
521 # special case for bulk update -- need to build the indexes
522 # at the same time we build the main feature table
523 return 1 if $self->{bulk_update_in_progress
};
524 my $d = $self->setting('autoindex');
525 $self->setting(autoindex
=>shift) if @_;
529 sub _start_bulk_update
{
531 my $dbh = $self->dbh;
532 $self->{bulk_update_in_progress
}++;
535 sub _finish_bulk_update
{
537 my $dbh = $self->dbh;
538 my $dir = $self->{dumpdir
} || '.';
539 for my $table ('feature',$self->index_tables) {
540 my $fh = $self->dump_filehandle($table);
541 my $path = $self->dump_path($table);
543 my $qualified_table = $self->_qualify($table);
544 $dbh->do("LOAD DATA LOCAL INFILE '$path' REPLACE INTO TABLE $qualified_table FIELDS OPTIONALLY ENCLOSED BY '\\''")
545 or $self->throw($dbh->errstr);
548 delete $self->{bulk_update_in_progress
};
549 delete $self->{filehandles
};
554 # Add a subparts to a feature. Both feature and all subparts must already be in database.
556 sub _add_SeqFeature
{
559 # special purpose method for case when we are doing a bulk update
560 return $self->_dump_add_SeqFeature(@_) if $self->{bulk_update_in_progress
};
565 my $dbh = $self->dbh;
566 local $dbh->{RaiseError
} = 1;
568 my $child_table = $self->_parent2child_table();
571 my $sth = $self->_prepare(<<END);
572 REPLACE INTO $child_table (id,child) VALUES (?,?)
575 my $parent_id = (ref $parent ?
$parent->primary_id : $parent)
576 or $self->throw("$parent should have a primary_id");
578 $dbh->begin_work or $self->throw($dbh->errstr);
580 for my $child (@children) {
581 my $child_id = ref $child ?
$child->primary_id : $child;
582 defined $child_id or die "no primary ID known for $child";
583 $sth->execute($parent_id,$child_id);
589 warn "Transaction aborted because $@";
599 sub _fetch_SeqFeatures
{
604 my $parent_id = $parent->primary_id or $self->throw("$parent should have a primary_id");
605 my $feature_table = $self->_feature_table;
606 my $child_table = $self->_parent2child_table();
608 my @from = ("$feature_table as f","$child_table as c");
609 my @where = ('f.id=c.child','c.id=?');
610 my @args = $parent_id;
613 my ($from,$where,undef,@a) = $self->_types_sql(\
@types,'f');
614 push @from,$from if $from;
615 push @where,$where if $where;
619 my $from = join ', ',@from;
620 my $where = join ' AND ',@where;
628 $self->_print_query($query,@args) if DEBUG
|| $self->debug;
630 my $sth = $self->_prepare($query) or $self->throw($self->dbh->errstr);
632 $sth->execute(@args) or $self->throw($sth->errstr);
633 return $self->_sth2objs($sth);
637 # get primary sequence between start and end
639 sub _fetch_sequence
{
641 my ($seqid,$start,$end) = @_;
643 # backward compatibility to the old days when I liked reverse complementing
644 # dna by specifying $start > $end
646 if (defined $start && defined $end && $start > $end) {
648 ($start,$end) = ($end,$start);
650 $start-- if defined $start;
651 $end-- if defined $end;
653 my $offset1 = $self->_offset_boundary($seqid,$start || 'left');
654 my $offset2 = $self->_offset_boundary($seqid,$end || 'right');
655 my $sequence_table = $self->_sequence_table;
656 my $locationlist_table = $self->_locationlist_table;
658 my $sth = $self->_prepare(<<END);
659 SELECT sequence,offset
660 FROM $sequence_table as s,$locationlist_table as ll
669 $sth->execute($seqid,$offset1,$offset2) or $self->throw($sth->errstr);
671 while (my($frag,$offset) = $sth->fetchrow_array) {
672 substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
675 substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
678 $seq =~ tr/gatcGATC/ctagCTAG/;
684 sub _offset_boundary
{
686 my ($seqid,$position) = @_;
688 my $sequence_table = $self->_sequence_table;
689 my $locationlist_table = $self->_locationlist_table;
692 $sql = $position eq 'left' ?
"SELECT min(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
693 :$position eq 'right' ?
"SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
694 :"SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=? AND offset<=?";
695 my $sth = $self->_prepare($sql);
696 my @args = $position =~ /^-?\d+$/ ?
($seqid,$position) : ($seqid);
697 $sth->execute(@args) or $self->throw($sth->errstr);
698 my $boundary = $sth->fetchall_arrayref->[0][0];
705 # add namespace to tablename
709 my $table_name = shift;
710 my $namespace = $self->namespace;
711 return $table_name unless defined $namespace;
712 return "${namespace}_${table_name}";
716 # Fetch a Bio::SeqFeatureI from database using its primary_id
720 @_ or $self->throw("usage: fetch(\$primary_id)");
721 my $primary_id = shift;
722 my $features = $self->_feature_table;
723 my $sth = $self->_prepare(<<END);
724 SELECT id,object FROM $features WHERE id=?
726 $sth->execute($primary_id) or $self->throw($sth->errstr);
727 my $obj = $self->_sth2obj($sth);
733 # Efficiently fetch a series of IDs from the database
734 # Can pass an array or an array ref
738 @_ or $self->throw('usage: fetch_many($id1,$id2,$id3...)');
739 my $ids = join ',',map {ref($_) ? @
$_ : $_} @_ or return;
740 my $features = $self->_feature_table;
742 my $sth = $self->_prepare(<<END);
743 SELECT id,object FROM $features WHERE id IN ($ids)
745 $sth->execute() or $self->throw($sth->errstr);
746 return $self->_sth2objs($sth);
751 my ($seq_id,$start,$end,$strand,
752 $name,$class,$allow_aliases,
759 ) = rearrange
([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
760 'NAME','CLASS','ALIASES',
761 ['TYPES','TYPE','PRIMARY_TAG'],
762 ['ATTRIBUTES','ATTRIBUTE'],
769 my (@from,@where,@args,@group);
770 $range_type ||= 'overlaps';
772 my $feature_table = $self->_feature_table;
773 @from = "$feature_table as f";
776 # hacky backward compatibility workaround
777 undef $class if $class && $class eq 'Sequence';
778 $name = "$class:$name" if defined $class && length $class > 0;
779 # last argument is the join field
780 my ($from,$where,$group,@a) = $self->_name_sql($name,$allow_aliases,'f.id');
781 push @from,$from if $from;
782 push @where,$where if $where;
783 push @group,$group if $group;
787 if (defined $seq_id) {
788 # last argument is the name of the features table
789 my ($from,$where,$group,@a) = $self->_location_sql($seq_id,$start,$end,$range_type,$strand,'f');
790 push @from,$from if $from;
791 push @where,$where if $where;
792 push @group,$group if $group;
796 if (defined($sources)) {
797 my @sources = ref($sources) eq 'ARRAY' ? @
{$sources} : ($sources);
798 if (defined($types)) {
799 my @types = ref($types) eq 'ARRAY' ? @
{$types} : ($types);
801 foreach my $type (@types) {
802 # *** not sure what to do if user supplies both -source and -type
803 # where the type includes a source!
805 push(@final_types, $type);
808 foreach my $source (@sources) {
809 push(@final_types, $type.':'.$source);
813 $types = \
@final_types;
816 $types = [map { ':'.$_ } @sources];
819 if (defined($types)) {
820 # last argument is the name of the features table
821 my ($from,$where,$group,@a) = $self->_types_sql($types,'f');
822 push @from,$from if $from;
823 push @where,$where if $where;
824 push @group,$group if $group;
828 if (defined $attributes) {
829 # last argument is the join field
830 my ($from,$where,$group,@a) = $self->_attributes_sql($attributes,'f.id');
831 push @from,$from if $from;
832 push @where,$where if $where;
833 push @group,$group if $group;
837 if (defined $fromtable) {
838 # last argument is the join field
839 my ($from,$where,$group,@a) = $self->_from_table_sql($fromtable,'f.id');
840 push @from,$from if $from;
841 push @where,$where if $where;
842 push @group,$group if $group;
846 # if no other criteria are specified, then
847 # only fetch indexed (i.e. top level objects)
848 @where = 'indexed=1' unless @where;
850 my $from = join ', ',@from;
851 my $where = join ' AND ',map {"($_)"} @where;
852 my $group = join ', ',@group;
853 $group = "GROUP BY $group" if @group;
856 SELECT f.id,f.object,f.typeid,f.seqid,f.start,f.end,f.strand
862 $self->_print_query($query,@args) if DEBUG
|| $self->debug;
864 my $sth = $self->_prepare($query) or $self->throw($self->dbh->errstr);
865 $sth->execute(@args) or $self->throw($sth->errstr);
866 return $iterator ? Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
->new($sth,$self) : $self->_sth2objs($sth);
871 my ($name,$allow_aliases,$join) = @_;
872 my $name_table = $self->_name_table;
874 my $from = "$name_table as n";
875 my ($match,$string) = $self->_match_sql($name);
877 my $where = "n.id=$join AND n.name $match";
878 $where .= " AND n.display_name>0" unless $allow_aliases;
879 return ($from,$where,'',$string);
882 sub _search_attributes
{
884 my ($search_string,$attribute_names,$limit) = @_;
885 my @words = map {quotemeta($_)} split /\s+/,$search_string;
887 my $name_table = $self->_name_table;
888 my $attribute_table = $self->_attribute_table;
889 my $attributelist_table = $self->_attributelist_table;
890 my $type_table = $self->_type_table;
891 my $typelist_table = $self->_typelist_table;
893 my @tags = @
$attribute_names;
894 my $tag_sql = join ' OR ',("al.tag=?") x
@tags;
896 my $perl_regexp = join '|',@words;
898 my $sql_regexp = join ' OR ',("a.attribute_value REGEXP ?") x
@words;
900 SELECT name,attribute_value,tl.tag,n.id
901 FROM $name_table as n,$attribute_table as a,$attributelist_table as al,$type_table as t,$typelist_table as tl
903 AND al.id=a.attribute_id
910 $sql .= "LIMIT $limit" if defined $limit;
911 $self->_print_query($sql,@tags,@words) if DEBUG
|| $self->debug;
912 my $sth = $self->_prepare($sql);
913 $sth->execute(@tags,@words) or $self->throw($sth->errstr);
916 while (my($name,$value,$type,$id) = $sth->fetchrow_array) {
917 my (@hits) = $value =~ /$perl_regexp/ig;
918 my @words_in_row = split /\b/,$value;
919 my $score = int(@hits * 10);
920 push @results,[$name,$value,$score,$type,$id];
923 @results = sort {$b->[2]<=>$a->[2]} @results;
932 if ($name =~ /(?:^|[^\\])[*?]/) {
933 $name =~ s/(^|[^\\])([%_])/$1\\$2/g;
934 $name =~ s/(^|[^\\])\*/$1%/g;
935 $name =~ s/(^|[^\\])\?/$1_/g;
942 return ($match,$string);
945 sub _from_table_sql
{
947 my ($from_table,$join) = @_;
948 my $from = "$from_table as ft";
949 my $where = "ft.id=$join";
950 return ($from,$where,'');
953 sub _attributes_sql
{
955 my ($attributes,$join) = @_;
957 my ($wf,@bind_args) = $self->_make_attribute_where('a','al',$attributes);
958 my ($group_by,@group_args)= $self->_make_attribute_group('a',$attributes);
960 my $attribute_table = $self->_attribute_table;
961 my $attributelist_table = $self->_attributelist_table;
963 my $from = "$attribute_table as a use index(attribute_id), $attributelist_table as al";
967 AND a.attribute_id=al.id
971 my $group = $group_by;
973 my @args = (@bind_args,@group_args);
974 return ($from,$where,$group,@args);
977 sub subfeature_types_are_indexed
{ 1 }
978 sub subfeature_locations_are_indexed
{ 1 }
982 my ($types,$type_table) = @_;
983 my ($primary_tag,$source_tag);
985 my @types = ref $types eq 'ARRAY' ? @
$types : $types;
987 my $typelist = $self->_typelist_table;
988 my $from = "$typelist AS tl";
992 for my $type (@types) {
994 if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
995 $primary_tag = $type->method;
996 $source_tag = $type->source;
998 ($primary_tag,$source_tag) = split ':',$type,2;
1001 if (defined $source_tag) {
1002 if (length($primary_tag)) {
1003 push @matches,"tl.tag=?";
1004 push @args,"$primary_tag:$source_tag";
1007 push @matches,"tl.tag LIKE ?";
1008 push @args,"%:$source_tag";
1011 push @matches,"tl.tag LIKE ?";
1012 push @args,"$primary_tag:%";
1015 my $matches = join ' OR ',@matches;
1018 tl.id=$type_table.typeid
1022 return ($from,$where,'',@args);
1027 my ($seq_id,$start,$end,$range_type,$strand,$location) = @_;
1029 # the additional join on the location_list table badly impacts performance
1030 # so we build a copy of the table in memory
1031 my $seqid = $self->_locationid($seq_id) || 0; # zero is an invalid primary ID, so will return empty
1033 $start = MIN_INT
unless defined $start;
1034 $end = MAX_INT
unless defined $end;
1036 my ($bin_where,@bin_args) = $self->bin_where($start,$end,$location);
1038 my ($range,@range_args);
1039 if ($range_type eq 'overlaps') {
1040 $range = "$location.end>=? AND $location.start<=? AND ($bin_where)";
1041 @range_args = ($start,$end,@bin_args);
1042 } elsif ($range_type eq 'contains') {
1043 $range = "$location.start>=? AND $location.end<=? AND ($bin_where)";
1044 @range_args = ($start,$end,@bin_args);
1045 } elsif ($range_type eq 'contained_in') {
1046 $range = "$location.start<=? AND $location.end>=?";
1047 @range_args = ($start,$end);
1049 $self->throw("range_type must be one of 'overlaps', 'contains' or 'contained_in'");
1052 if (defined $strand) {
1053 $range .= " AND strand=?";
1054 push @range_args,$strand;
1065 my @args = ($seqid,@range_args);
1066 return ($from,$where,$group,@args);
1074 my $from_update_table = shift; # if present, will take ids from "update_table"
1076 my $dbh = $self->dbh;
1080 # try to bring in highres time() function
1081 eval "require Time::HiRes";
1083 my $last_time = $self->time();
1085 # tell _delete_index() not to bother removing the index rows corresponding
1086 # to each individual feature
1087 local $self->{reindexing
} = 1;
1091 my $update = $from_update_table;
1092 for my $table ($self->index_tables) {
1093 my $query = $from_update_table ?
"DELETE $table FROM $table,$update WHERE $table.id=$update.id"
1094 : "DELETE FROM $table";
1096 $dbh->do("ALTER TABLE $table DISABLE KEYS");
1098 my $iterator = $self->get_seq_stream(-from_table
=>$from_update_table ?
$update : undef);
1099 while (my $f = $iterator->next_seq) {
1100 if (++$count %1000 == 0) {
1101 $now = $self->time();
1102 my $elapsed = sprintf(" in %5.2fs",$now - $last_time);
1104 print STDERR
"$count features indexed$elapsed...",' 'x60
;
1105 print STDERR
-t STDOUT
&& !$ENV{EMACS
} ?
"\r" : "\n";
1107 $self->_update_indexes($f);
1110 for my $table ($self->index_tables) {
1111 $dbh->do("ALTER TABLE $table ENABLE KEYS");
1114 warn "Couldn't complete transaction: $@";
1125 $self->dbh->do("ANALYZE TABLE $_") foreach $self->index_tables;
1130 my @index_tables = $self->index_tables;
1131 my $feature_table = $self->_feature_table;
1132 return ($feature_table,@index_tables);
1137 return map {$self->_qualify($_)} qw(name attribute parent2child)
1142 my $features = $self->_feature_table;
1144 SELECT min(id) FROM $features
1146 my $sth=$self->_prepare($query);
1148 my ($first) = $sth->fetchrow_array;
1155 my $lastkey = shift;
1156 my $features = $self->_feature_table;
1158 SELECT min(id) FROM $features WHERE id>?
1160 my $sth=$self->_prepare($query);
1161 $sth->execute($lastkey);
1162 my ($next) = $sth->fetchrow_array;
1170 my $features = $self->_feature_table;
1172 SELECT count(*) FROM $features WHERE id=?
1174 my $sth=$self->_prepare($query);
1175 $sth->execute($key);
1176 my ($count) = $sth->fetchrow_array;
1184 my $dbh = $self->dbh;
1185 my $child_table = $self->_parent2child_table;
1186 my $query = "SELECT child FROM $child_table WHERE id=?";
1187 my $sth=$self->_prepare($query);
1188 $sth->execute($key);
1190 while (my ($cid) = $sth->fetchrow_array) {
1191 # Backcheck looking for multiple parents, delete only if one is present. I'm
1192 # sure there is a nice way to left join the parent2child table onto itself
1193 # to get this in one query above, just haven't worked it out yet...
1194 my $sth2 = $self->_prepare("SELECT count(id) FROM $child_table WHERE child=?");
1195 $sth2->execute($cid);
1196 my ($count) = $sth2->fetchrow_array;
1198 $self->_deleteid($cid) || warn "An error occurred while removing subfeature id=$cid. Perhaps it was previously deleted?\n";
1201 for my $table ($self->all_tables) {
1202 $success += $dbh->do("DELETE FROM $table WHERE id=$key") || 0;
1209 my $dbh = $self->dbh;
1210 for my $table ($self->all_tables) {
1211 $dbh->do("DELETE FROM $table");
1217 my $dbh = $self->dbh;
1218 my $features = $self->_feature_table;
1220 SELECT count(*) FROM $features
1222 my $sth=$self->_prepare($query);
1224 my ($count) = $sth->fetchrow_array;
1231 my $dbh = $self->dbh;
1232 my $location = $self->_locationlist_table;
1233 my $sth = $self->_prepare("SELECT DISTINCT seqname FROM $location");
1234 $sth->execute() or $self->throw($sth->errstr);
1236 while (my ($id) = $sth->fetchrow_array) {
1244 my ($variable_name,$value) = @_;
1245 my $meta = $self->_meta_table;
1247 if (defined $value && $self->writeable) {
1249 REPLACE INTO $meta (name,value) VALUES (?,?)
1251 my $sth = $self->_prepare($query);
1252 $sth->execute($variable_name,$value) or $self->throw($sth->errstr);
1254 $self->{settings_cache
}{$variable_name} = $value;
1257 return $self->{settings_cache
}{$variable_name} if exists $self->{settings_cache
}{$variable_name};
1259 SELECT value FROM $meta as m WHERE m.name=?
1261 my $sth = $self->_prepare($query);
1262 $sth->execute($variable_name) or $self->throw($sth->errstr);
1263 my ($value) = $sth->fetchrow_array;
1265 return $self->{settings_cache
}{$variable_name} = $value;
1270 # Replace Bio::SeqFeatureI into database.
1275 my $index_flag = shift || undef;
1277 # ?? shouldn't need to do this
1278 # $self->_load_class($object);
1279 my $id = $object->primary_id;
1280 my $features = $self->_feature_table;
1282 my $sth = $self->_prepare(<<END);
1283 REPLACE INTO $features (id,object,indexed,seqid,start,end,strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?,?)
1286 my @location = $index_flag ?
$self->_get_location_and_bin($object) : (undef)x6
;
1288 my $primary_tag = $object->primary_tag;
1289 my $source_tag = $object->source_tag || '';
1290 $primary_tag .= ":$source_tag";
1291 my $typeid = $self->_typeid($primary_tag,1);
1293 my $frozen = $self->no_blobs() ?
0 : $self->freeze($object);
1295 $sth->execute($id,$frozen,$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
1297 my $dbh = $self->dbh;
1298 $object->primary_id($dbh->{mysql_insertid
}) unless defined $id;
1300 $self->flag_for_indexing($dbh->{mysql_insertid
}) if $self->{bulk_update_in_progress
};
1303 # doesn't work with this schema, since we have to update name and attribute
1304 # tables which need object ids, which we can only know by replacing feats in
1305 # the feature table one by one
1308 my $index_flag = shift || undef;
1311 my $features = $self->_feature_table;
1314 foreach my $object (@objects) {
1315 my $id = $object->primary_id;
1316 my @location = $index_flag ?
$self->_get_location_and_bin($object) : (undef)x6
;
1317 my $primary_tag = $object->primary_tag;
1318 my $source_tag = $object->source_tag || '';
1319 $primary_tag .= ":$source_tag";
1320 my $typeid = $self->_typeid($primary_tag,1);
1322 push(@insert_values, ($id,0,$index_flag||0,@location,$typeid));
1327 push(@value_blocks, '(?,?,?,?,?,?,?,?,?,?)');
1329 my $value_blocks = join(',', @value_blocks);
1330 my $sql = qq{REPLACE INTO
$features (id
,object
,indexed
,seqid
,start
,end
,strand
,tier
,bin
,typeid
) VALUES
$value_blocks};
1332 my $sth = $self->_prepare($sql);
1333 $sth->execute(@insert_values) or $self->throw($sth->errstr);
1337 # Insert one Bio::SeqFeatureI into database. primary_id must be undef
1342 my $index_flag = shift || 0;
1344 $self->_load_class($object);
1345 defined $object->primary_id and $self->throw("$object already has a primary id");
1347 my $features = $self->_feature_table;
1348 my $sth = $self->_prepare(<<END);
1349 INSERT INTO $features (id,object,indexed) VALUES (?,?,?)
1351 $sth->execute(undef,$self->freeze($object),$index_flag) or $self->throw($sth->errstr);
1352 my $dbh = $self->dbh;
1353 $object->primary_id($dbh->{mysql_insertid
});
1354 $self->flag_for_indexing($dbh->{mysql_insertid
}) if $self->{bulk_update_in_progress
};
1360 Usage : @type_list = $db->types
1361 Function: Get all the types in the database
1362 Returns : array of Bio::DB::GFF::Typename objects
1370 eval "require Bio::DB::GFF::Typename"
1371 unless Bio
::DB
::GFF
::Typename
->can('new');
1372 my $typelist_table = $self->_typelist_table;
1374 SELECT tag from $typelist_table
1377 $self->_print_query($sql) if DEBUG
|| $self->debug;
1378 my $sth = $self->_prepare($sql);
1379 $sth->execute() or $self->throw($sth->errstr);
1382 while (my($tag) = $sth->fetchrow_array) {
1383 push @results,Bio
::DB
::GFF
::Typename
->new($tag);
1390 # Insert a bit of DNA or protein into the database
1392 sub _insert_sequence
{
1394 my ($seqid,$seq,$offset) = @_;
1395 my $id = $self->_locationid($seqid);
1396 my $seqtable = $self->_sequence_table;
1397 my $sth = $self->_prepare(<<END);
1398 REPLACE INTO $seqtable (id,offset,sequence) VALUES (?,?,?)
1400 $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
1404 # This subroutine flags the given primary ID for later reindexing
1406 sub flag_for_indexing
{
1409 my $needs_updating = $self->_update_table;
1410 my $sth = $self->_prepare("REPLACE INTO $needs_updating VALUES (?)");
1411 $sth->execute($id) or $self->throw($self->dbh->errstr);
1415 # Update indexes for given object
1417 sub _update_indexes
{
1420 defined (my $id = $obj->primary_id) or return;
1422 if ($self->{bulk_update_in_progress
}) {
1423 $self->_dump_update_name_index($obj,$id);
1424 $self->_dump_update_attribute_index($obj,$id);
1426 $self->_update_name_index($obj,$id);
1427 $self->_update_attribute_index($obj,$id);
1431 sub _update_name_index
{
1434 my $name = $self->_name_table;
1435 my $primary_id = $obj->primary_id;
1437 $self->_delete_index($name,$id);
1438 my ($names,$aliases) = $self->feature_names($obj);
1440 my $sth = $self->_prepare("INSERT INTO $name (id,name,display_name) VALUES (?,?,?)");
1442 $sth->execute($id,$_,1) or $self->throw($sth->errstr) foreach @
$names;
1443 $sth->execute($id,$_,0) or $self->throw($sth->errstr) foreach @
$aliases;
1447 sub _update_attribute_index
{
1450 my $attribute = $self->_attribute_table;
1451 $self->_delete_index($attribute,$id);
1453 my $sth = $self->_prepare("INSERT INTO $attribute (id,attribute_id,attribute_value) VALUES (?,?,?)");
1454 for my $tag ($obj->get_all_tags) {
1455 my $tagid = $self->_attributeid($tag);
1456 for my $value ($obj->get_tag_values($tag)) {
1457 $sth->execute($id,$tagid,$value) or $self->throw($sth->errstr);
1465 my ($table,$namefield,$name,$add_if_missing) = @_;
1466 my $qualified_table = $self->_qualify($table);
1467 my $sth = $self->_prepare(<<END);
1468 SELECT id FROM $qualified_table WHERE $namefield=?
1470 $sth->execute($name) or die $sth->errstr;
1471 my ($id) = $sth->fetchrow_array;
1473 return $id if defined $id;
1474 return unless $add_if_missing;
1476 $sth = $self->_prepare(<<END);
1477 INSERT INTO $qualified_table ($namefield) VALUES (?)
1479 $sth->execute($name) or die $sth->errstr;
1480 my $dbh = $self->dbh;
1481 return $dbh->{mysql_insertid
};
1485 shift->_genericid('typelist','tag',shift,1);
1488 shift->_genericid('locationlist','seqname',shift,1);
1491 shift->_genericid('attributelist','tag',shift,1);
1494 sub _get_location_and_bin
{
1496 my $feature = shift;
1497 my $seqid = $self->_locationid($feature->seq_id);
1498 my $start = $feature->start;
1499 my $end = $feature->end;
1500 my $strand = $feature->strand || 0;
1501 my ($tier,$bin) = $self->get_bin($start,$end);
1502 return ($seqid,$start,$end,$strand,$tier,$bin);
1507 my ($start,$end) = @_;
1508 my $binsize = MIN_BIN
;
1509 my ($bin_start,$bin_end,$tier);
1512 $bin_start = int $start/$binsize;
1513 $bin_end = int $end/$binsize;
1514 last if $bin_start == $bin_end;
1518 return ($tier,$bin_start);
1523 my ($start,$end,$f) = @_;
1527 my $binsize = MIN_BIN
;
1528 while ($binsize <= MAX_BIN
) {
1529 my $bin_start = int($start/$binsize);
1530 my $bin_end = int($end/$binsize);
1531 push @bins,"($f.tier=? AND $f.bin between ? AND ?)";
1532 push @args,($tier,$bin_start,$bin_end);
1536 my $query = join ("\n\t OR ",@bins);
1537 return wantarray ?
($query,@args) : substitute
($query,@args);
1543 my ($table_name,$id) = @_;
1544 return if $self->{reindexing
};
1545 my $sth = $self->_prepare("DELETE FROM $table_name WHERE id=?") or $self->throw($self->dbh->errstr);
1549 # given a statement handler that is expected to return rows of (id,object)
1550 # unthaw each object and return a list of 'em
1555 while (my ($id,$o,$typeid,$seqid,$start,$end,$strand) = $sth->fetchrow_array) {
1558 # rebuild a new feat object from the data stored in the db
1559 $obj = $self->_rebuild_obj($id,$typeid,$seqid,$start,$end,$strand);
1562 $obj = $self->thaw($o,$id);
1571 # given a statement handler that is expected to return rows of (id,object)
1572 # unthaw each object and return a list of 'em
1576 my ($id,$o,$typeid,$seqid,$start,$end,$strand) = $sth->fetchrow_array;
1577 return unless defined $o;
1579 if ($o eq '0') { # I don't understand why an object ever needs to be rebuilt!
1580 # rebuild a new feat object from the data stored in the db
1581 $obj = $self->_rebuild_obj($id,$typeid,$seqid,$start,$end,$strand);
1584 $obj = $self->thaw($o,$id);
1591 my ($self, $id, $typeid, $db_seqid, $start, $end, $strand) = @_;
1592 my ($type, $source, $seqid);
1594 # convert typeid to type and source
1595 if (exists $self->{_type_cache
}->{$typeid}) {
1596 ($type, $source) = @
{$self->{_type_cache
}->{$typeid}};
1599 my $sql = qq{ SELECT
`tag` FROM typelist WHERE
`id` = ?
};
1600 my $sth = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1601 $sth->execute($typeid);
1603 $sth->bind_columns(\
$result);
1604 while ($sth->fetch()) {
1605 # there should be only one row returned, but we ensure to get all rows
1608 ($type, $source) = split(':', $result);
1609 $self->{_type_cache
}->{$typeid} = [$type, $source];
1612 # convert the db seqid to the sequence name
1613 if (exists $self->{_seqid_cache
}->{$db_seqid}) {
1614 $seqid = $self->{_seqid_cache
}->{$db_seqid};
1617 my $sql = qq{ SELECT
`seqname` FROM locationlist WHERE
`id` = ?
};
1618 my $sth = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1619 $sth->execute($db_seqid);
1620 $sth->bind_columns(\
$seqid);
1621 while ($sth->fetch()) {
1622 # there should be only one row returned, but we ensure to get all rows
1625 $self->{_seqid_cache
}->{$db_seqid} = $seqid;
1628 # get the names from name table?
1630 # get the attributes and store those in obj
1631 my $sql = qq{ SELECT attribute_id
,attribute_value FROM attribute WHERE
`id` = ?
};
1632 my $sth = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1634 my ($attribute_id, $attribute_value);
1635 $sth->bind_columns(\
($attribute_id, $attribute_value));
1637 while ($sth->fetch()) {
1638 # convert the attribute_id to its real name
1640 if (exists $self->{_attribute_cache
}->{$attribute_id}) {
1641 $attribute = $self->{_attribute_cache
}->{$attribute_id};
1644 my $sql = qq{ SELECT
`tag` FROM attributelist WHERE
`id` = ?
};
1645 my $sth2 = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1646 $sth2->execute($attribute_id);
1647 $sth2->bind_columns(\
$attribute);
1648 while ($sth2->fetch()) {
1649 # there should be only one row returned, but we ensure to get all rows
1652 $self->{_attribute_cache
}->{$attribute_id} = $attribute;
1655 if ($source && $attribute eq 'source' && $attribute_value eq $source) {
1659 $attribs{$attribute} = $attribute_value;
1662 my $obj = Bio
::SeqFeature
::Lite
->new(-primary_id
=> $id,
1663 $type ?
(-type
=> $type) : (),
1664 $source ?
(-source
=> $source) : (),
1665 $seqid ?
(-seq_id
=> $seqid) : (),
1666 defined $start ?
(-start
=> $start) : (),
1667 defined $end ?
(-end
=> $end) : (),
1668 defined $strand ?
(-strand
=> $strand) : (),
1669 keys %attribs ?
(-attributes
=> \
%attribs) : ());
1677 my $dbh = $self->dbh;
1678 my $sth = $dbh->prepare_cached($query, {}, 3) or $self->throw($dbh->errstr);
1683 ####################################################################################################
1684 # SQL Fragment generators
1685 ####################################################################################################
1687 sub _feature_table
{ shift->_qualify('feature') }
1688 sub _location_table
{ shift->_qualify('location') }
1689 sub _locationlist_table
{ shift->_qualify('locationlist') }
1690 sub _type_table
{ shift->_qualify('feature') }
1691 sub _typelist_table
{ shift->_qualify('typelist') }
1692 sub _name_table
{ shift->_qualify('name') }
1693 sub _attribute_table
{ shift->_qualify('attribute')}
1694 sub _attributelist_table
{ shift->_qualify('attributelist')}
1695 sub _parent2child_table
{ shift->_qualify('parent2child')}
1696 sub _meta_table
{ shift->_qualify('meta')}
1697 sub _update_table
{ shift->_qualify('update_table')}
1698 sub _sequence_table
{ shift->_qualify('sequence')}
1700 sub _make_attribute_where
{
1702 my ($attributetable,$attributenametable,$attributes) = @_;
1705 my $dbh = $self->dbh;
1706 foreach (keys %$attributes) {
1708 my @values = ref($attributes->{$_}) && ref($attributes->{$_}) eq 'ARRAY' ? @
{$attributes->{$_}} : $attributes->{$_};
1709 foreach (@values) { # convert * into % for wildcard matches
1712 my $match = join ' OR ',map {
1713 /%/ ?
"$attributetable.attribute_value LIKE ?"
1714 : "$attributetable.attribute_value=?"
1716 push @sql,"($attributenametable.tag=? AND ($match))";
1717 push @args,($_,@values);
1719 return (join(' OR ',@sql),@args);
1722 sub _make_attribute_group
{
1724 my ($table_name,$attributes) = @_;
1725 my $key_count = keys %$attributes or return;
1726 return "f.id,f.object,f.typeid,f.seqid,f.start,f.end,f.strand HAVING count(f.id)>?",$key_count-1;
1731 my ($query,@args) = @_;
1732 while ($query =~ /\?/) {
1733 my $arg = $self->dbh->quote(shift @args);
1734 $query =~ s/\?/$arg/;
1740 # special-purpose store for bulk loading - write to a file rather than to the db
1744 my $indexed = shift;
1747 my $store_fh = $self->dump_filehandle('feature');
1748 my $dbh = $self->dbh;
1750 my $autoindex = $self->autoindex;
1753 my $id = $self->next_id;
1754 my ($seqid,$start,$end,$strand,$tier,$bin) = $indexed ?
$self->_get_location_and_bin($obj) : (undef)x6
;
1755 my $primary_tag = $obj->primary_tag;
1756 my $source_tag = $obj->source_tag || '';
1757 $primary_tag .= ":$source_tag";
1758 my $typeid = $self->_typeid($primary_tag,1);
1760 print $store_fh join("\t",$id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$dbh->quote($self->freeze($obj))),"\n";
1761 $obj->primary_id($id);
1762 $self->_update_indexes($obj) if $indexed && $autoindex;
1766 # remember whether we are have ever stored a non-indexed feature
1767 unless ($indexed or $self->{indexed_flag
}++) {
1768 $self->subfeatures_are_indexed(0);
1773 sub _dump_add_SeqFeature
{
1778 my $dbh = $self->dbh;
1779 my $fh = $self->dump_filehandle('parent2child');
1780 my $parent_id = (ref $parent ?
$parent->primary_id : $parent)
1781 or $self->throw("$parent should have a primary_id");
1784 for my $child_id (@children) {
1785 print $fh join("\t",$parent_id,$child_id),"\n";
1791 sub _dump_update_name_index
{
1794 my $fh = $self->dump_filehandle('name');
1795 my $dbh = $self->dbh;
1796 my ($names,$aliases) = $self->feature_names($obj);
1797 print $fh join("\t",$id,$dbh->quote($_),1),"\n" foreach @
$names;
1798 print $fh join("\t",$id,$dbh->quote($_),0),"\n" foreach @
$aliases;
1801 sub _dump_update_attribute_index
{
1804 my $fh = $self->dump_filehandle('attribute');
1805 my $dbh = $self->dbh;
1806 for my $tag ($obj->all_tags) {
1807 my $tagid = $self->_attributeid($tag);
1808 for my $value ($obj->each_tag_value($tag)) {
1809 print $fh join("\t",$id,$tagid,$dbh->quote($value)),"\n";
1815 return Time
::HiRes
::time() if Time
::HiRes
->can('time');
1821 if ($self->{bulk_update_in_progress
}) { # be sure to remove temp files
1822 for my $table ('feature',$self->index_tables) {
1823 my $path = $self->dump_path($table);