1 package Bio
::DB
::SeqFeature
::Store
::DBI
::SQLite
;
7 Bio::DB::SeqFeature::Store::DBI::SQLite -- SQLite implementation of Bio::DB::SeqFeature::Store
11 use Bio::DB::SeqFeature::Store;
13 # Open the sequence database
14 my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::SQLite',
15 -dsn => '/path/to/database.db');
17 # get a feature from somewhere
18 my $feature = Bio::SeqFeature::Generic->new(...);
21 $db->store($feature) or die "Couldn't store!";
23 # primary ID of the feature is changed to indicate its primary ID
25 my $id = $feature->primary_id;
27 # get the feature back out
28 my $f = $db->fetch($id);
30 # change the feature and update it
32 $db->update($f) or die "Couldn't update!";
36 my @features = $db->fetch_many(@list_of_ids);
39 @features = $db->get_features_by_name('ZK909');
42 @features = $db->get_features_by_alias('sma-3');
45 @features = $db->get_features_by_name('gene');
48 @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
51 @features = $db->get_features_by_attribute({description => 'protein kinase'})
53 # ...by the GFF "Note" field
54 @result_list = $db->search_notes('kinase');
56 # ...by arbitrary combinations of selectors
57 @features = $db->features(-name => $name,
62 -attributes => $attributes);
64 # ...using an iterator
65 my $iterator = $db->get_seq_stream(-name => $name,
70 -attributes => $attributes);
72 while (my $feature = $iterator->next_seq) {
73 # do something with the feature
76 # ...limiting the search to a particular region
77 my $segment = $db->segment('Chr1',5000=>6000);
78 my @features = $segment->features(-type=>['mRNA','match']);
80 # getting & storing sequence information
81 # Warning: this returns a string, and not a PrimarySeq object
82 $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
83 my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
85 # what feature types are defined in the database?
86 my @types = $db->types;
88 # create a new feature in the database
89 my $feature = $db->new_feature(-primary_tag => 'mRNA',
96 Bio::DB::SeqFeature::Store::SQLite is the SQLite adaptor for
97 Bio::DB::SeqFeature::Store. You will not create it directly, but
98 instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
100 See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
102 =head2 Using the SQLite adaptor
104 To establish a connection to the database, call
105 Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::SQLite',@more_args). The
106 additional arguments are as follows:
108 Argument name Description
109 ------------- -----------
111 -dsn The path to the SQLite database file.
113 -namespace A prefix to attach to each table. This allows you
114 to have several virtual databases in the same
117 -temp Boolean flag. If true, a temporary database
118 will be created and destroyed as soon as
119 the Store object goes out of scope. (synonym -temporary)
121 -autoindex Boolean flag. If true, features in the database will be
122 reindexed every time they change. This is the default.
125 -tmpdir Directory in which to place temporary files during "fast" loading.
126 Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
128 -dbi_options A hashref to pass to DBI->connect's 4th argument, the "attributes."
129 (synonyms -options, -dbi_attr)
131 -write Pass true to open database for writing or updating.
133 If successful, a new instance of
134 Bio::DB::SeqFeature::Store::DBI::SQLite will be returned.
136 In addition to the standard methods supported by all well-behaved
137 Bio::DB::SeqFeature::Store databases, several following
138 adaptor-specific methods are provided. These are described in the next
145 use base
'Bio::DB::SeqFeature::Store::DBI::mysql';
146 use Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
;
147 use DBI
qw(:sql_types);
149 use Cwd
qw(abs_path getcwd);
150 use Bio
::DB
::GFF
::Util
::Rearrange
'rearrange';
151 use Bio
::SeqFeature
::Lite
;
153 use constant DEBUG
=>0;
154 use constant EXPERIMENTAL_COVERAGE
=>1;
156 # Using same limits as MySQL adaptor so I don't have to make something up.
157 use constant MAX_INT
=> 2_147_483_647
;
158 use constant MIN_INT
=> -2_147_483_648
;
159 use constant SUMMARY_BIN_SIZE
=> 1000; # we checkpoint coverage this often, about 20 meg overhead per feature type on hg
160 use constant USE_SPATIAL
=>0;
162 # The binning scheme places each feature into a bin.
163 # Bins are variably sized as powers of two. For example,
164 # there are 585 bins of size 2**17 (131072 bases)
167 @BINS = map {2**$_} (17, 20, 23, 26, 29); # TO DO: experiment with different bin sizes
169 for my $b (sort {$b<=>$a} @BINS) {
171 $start += $BINS[-1]/$b;
185 # my @BINS = sort {$a<=>$b} keys %BINS;
189 my ($start,$end) = @_;
191 my $len = $end - $start;
192 for my $bin (@BINS) {
195 my $binstart = int $start/$bin;
196 my $binend = int $end/$bin;
197 return $binstart+$BINS{$bin} if $binstart == $binend;
200 die "unreasonable coordinates ",$start+1,"..$end";
205 my ($start,$end) = @_;
208 for my $bin (@BINS) {
209 my $binstart = int $start/$bin;
210 my $binend = int $end/$bin;
211 push @results,$binstart+$BINS{$bin}..$binend+$BINS{$bin};
218 # object initialization
232 ) = rearrange
(['DSN',
233 ['TEMP','TEMPORARY'],
236 ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
238 ['PASS','PASSWD','PASSWORD'],
239 ['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
240 ['WRITE','WRITEABLE'],
244 $writeable = 1 if $is_temporary or $dump_dir;
246 $dsn or $self->throw("Usage: ".__PACKAGE__
."->init(-dsn => \$dbh || \$dsn)");
252 $dsn = "dbi:SQLite:$dsn" unless $dsn =~ /^dbi:/;
253 $dbh = DBI
->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr
);
254 $dbh->do("PRAGMA synchronous = OFF;"); # makes writes much faster
255 $dbh->do("PRAGMA temp_store = MEMORY;"); # less disk I/O; some speedup
256 $dbh->do("PRAGMA cache_size = 20000;"); # less disk I/O; some speedup
257 # Keep track of database file location
259 my ($db_file) = ($dsn =~ m/(?:db(?:name)?|database)=(.+)$/);
260 $self->{dbh_file
} = "$cwd/$db_file";
263 $self->{is_temp
} = $is_temporary;
264 $self->{namespace
} = $namespace;
265 $self->{writeable
} = $writeable;
267 $self->default_settings;
268 $self->autoindex($autoindex) if defined $autoindex;
269 $self->dumpdir($dump_dir) if $dump_dir;
270 if ($self->is_temp) {
271 $self->init_tmp_database();
273 $self->init_database('erase');
277 sub table_definitions
{
283 id integer primary key autoincrement,
284 typeid integer not null,
285 strand integer default 0,
286 "indexed" integer default 1,
291 locationlist
=> <<END,
293 id integer primary key autoincrement,
294 seqname text not null
296 create index index_locationlist on locationlist (seqname);
301 id integer primary key autoincrement,
304 create index index_typelist on typelist (tag);
310 display_name integer default 0
312 create index index_name_id on name(id);
313 create index index_name_name on name(name);
319 attribute_id integer not null,
322 create index index_attribute_id on attribute(attribute_id);
323 create index index_attribute_value on attribute(attribute_value);
326 attributelist
=> <<END,
328 id integer primary key autoincrement,
331 create index index_attributelist_id on attributelist(id);
332 create index index_attributelist_tag on attributelist(tag);
334 parent2child
=> <<END,
337 child integer not null
339 create unique index index_parent2child_id_child on parent2child(id,child);
344 name text primary key,
351 offset integer not null,
353 primary key(id,offset)
358 unless ($self->_has_spatial_index) {
359 $defs->{feature_location
} = <<END;
361 id int(10) primary key,
367 create index index_feature_location on feature_location(seqid,bin,start,end);
372 if (EXPERIMENTAL_COVERAGE
) {
373 $defs->{interval_stats
} = <<END;
375 typeid integer not null,
376 seqid integer not null,
377 bin integer not null,
378 cum_count integer not null,
379 unique(typeid,seqid,bin)
389 # must do this first before calling table_definitions
390 $self->_create_spatial_index;
391 $self->SUPER::_init_database
(@_);
394 sub init_tmp_database
{
397 $self->_create_spatial_index;
398 $self->SUPER::init_tmp_database
(@_);
401 sub _create_spatial_index
{
403 my $dbh = $self->dbh;
404 local $dbh->{PrintError
} = 0;
405 $dbh->do("DROP TABLE IF EXISTS feature_index"); # spatial index
407 $dbh->do("CREATE VIRTUAL TABLE feature_index USING RTREE(id,seqid,bin,start,end)");
411 sub _has_spatial_index
{
413 return $self->{'_has_spatial_index'} if exists $self->{'_has_spatial_index'};
414 my $dbh = $self->dbh;
415 my ($count) = $dbh->selectrow_array("select count(*) from sqlite_master where name='feature_index'");
416 return $self->{'_has_spatial_index'} = $count;
419 sub _finish_bulk_update
{
421 my $dbh = $self->dbh;
422 my $dir = $self->{dumpdir
} || '.';
424 $self->begin_work; # making this a transaction greatly improves performance
426 for my $table ('feature', $self->index_tables) {
427 my $fh = $self->dump_filehandle($table);
428 my $path = $self->dump_path($table);
431 open $fh, '<', $path or $self->throw("Could not read file '$path': $!");
432 my $qualified_table = $self->_qualify($table);
435 if ($table =~ /feature$/) {
436 $sth = $dbh->prepare("REPLACE INTO $qualified_table VALUES (?,?,?,?,?)");
440 my ($id,$typeid,$strand,$indexed,$obj) = split(/\t/);
441 $sth->bind_param(1, $id);
442 $sth->bind_param(2, $typeid);
443 $sth->bind_param(3, $strand);
444 $sth->bind_param(4, $indexed);
445 $sth->bind_param(5, pack('H*',$obj), {TYPE
=> SQL_BLOB
});
449 my $feature_index = $self->_feature_index_table;
450 if ($table =~ /parent2child$/) {
451 $sth = $dbh->prepare("REPLACE INTO $qualified_table VALUES (?,?)");
452 } elsif ($table =~ /$feature_index$/) {
453 $sth = $dbh->prepare(
454 $self->_has_spatial_index ?
"REPLACE INTO $qualified_table VALUES (?,?,?,?,?)"
455 :"REPLACE INTO $qualified_table (id,seqid,bin,start,end) VALUES (?,?,?,?,?)"
457 } else { # attribute or name
458 $sth = $dbh->prepare("REPLACE INTO $qualified_table VALUES (?,?,?)");
463 $sth->execute(split(/\t/));
469 $self->commit; # commit the transaction
470 delete $self->{bulk_update_in_progress
};
471 delete $self->{filehandles
};
476 my @t = $self->SUPER::index_tables
;
477 return (@t,$self->_feature_index_table);
480 sub _enable_keys
{ } # nullop
481 sub _disable_keys
{ } # nullop
483 sub _fetch_indexed_features_sql
{
485 my $location_table = $self->_qualify('feature_location');
486 my $feature_table = $self->_qualify('feature');
488 SELECT typeid,seqid,start-1,end
489 FROM $location_table as l,$feature_table as f
490 WHERE l.id=f.id AND f.\"indexed\"=1
491 ORDER BY typeid,seqid,start
496 # get primary sequence between start and end
498 sub _fetch_sequence
{
500 my ($seqid,$start,$end) = @_;
502 # backward compatibility to the old days when I liked reverse complementing
503 # dna by specifying $start > $end
505 if (defined $start && defined $end && $start > $end) {
507 ($start,$end) = ($end,$start);
509 $start-- if defined $start;
510 $end-- if defined $end;
512 my $offset1 = $self->_offset_boundary($seqid,$start || 'left');
513 my $offset2 = $self->_offset_boundary($seqid,$end || 'right');
514 my $sequence_table = $self->_sequence_table;
515 my $locationlist_table = $self->_locationlist_table;
517 # CROSS JOIN gives a hint to the SQLite query optimizer -- mucho speedup!
518 my $sth = $self->_prepare(<<END);
519 SELECT sequence,offset
520 FROM $locationlist_table as ll CROSS JOIN $sequence_table as s
529 $sth->execute($seqid,$offset1,$offset2) or $self->throw($sth->errstr);
531 while (my($frag,$offset) = $sth->fetchrow_array) {
532 substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
535 substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
538 $seq =~ tr/gatcGATC/ctagCTAG/;
544 sub _offset_boundary
{
546 my ($seqid,$position) = @_;
548 my $sequence_table = $self->_sequence_table;
549 my $locationlist_table = $self->_locationlist_table;
552 # use "CROSS JOIN" to give a hint to the SQLite query optimizer.
553 $sql = $position eq 'left' ?
"SELECT min(offset) FROM $locationlist_table as ll CROSS JOIN $sequence_table as s ON ll.id=s.id WHERE ll.seqname=?"
554 :$position eq 'right' ?
"SELECT max(offset) FROM $locationlist_table as ll CROSS JOIN $sequence_table as s ON ll.id=s.id WHERE ll.seqname=?"
555 :"SELECT max(offset) FROM $locationlist_table as ll CROSS JOIN $sequence_table as s ON ll.id=s.id WHERE ll.seqname=? AND offset<=?";
556 my $sth = $self->_prepare($sql);
557 my @args = $position =~ /^-?\d+$/ ?
($seqid,$position) : ($seqid);
558 $sth->execute(@args) or $self->throw($sth->errstr);
559 my $boundary = $sth->fetchall_arrayref->[0][0];
565 # Efficiently fetch a series of IDs from the database
566 # Can pass an array or an array ref
570 @_ or $self->throw('usage: fetch_many($id1,$id2,$id3...)');
571 my $ids = join ',',map {ref($_) ? @
$_ : $_} @_ or return;
572 my $features = $self->_feature_table;
574 my $sth = $self->_prepare(<<END);
575 SELECT id,object FROM $features WHERE id IN ($ids)
577 $sth->execute() or $self->throw($sth->errstr);
578 return $self->_sth2objs($sth);
583 my ($seq_id,$start,$end,$strand,
584 $name,$class,$allow_aliases,
591 ) = rearrange
([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
592 'NAME','CLASS','ALIASES',
593 ['TYPES','TYPE','PRIMARY_TAG'],
594 ['ATTRIBUTES','ATTRIBUTE'],
601 my (@from,@where,@args,@group);
602 $range_type ||= 'overlaps';
604 my $feature_table = $self->_feature_table;
605 @from = "$feature_table as f";
608 # hacky backward compatibility workaround
609 undef $class if $class && $class eq 'Sequence';
610 $name = "$class:$name" if defined $class && length $class > 0;
611 # last argument is the join field
612 my ($from,$where,$group,@a) = $self->_name_sql($name,$allow_aliases,'f.id');
613 push @from,$from if $from;
614 push @where,$where if $where;
615 push @group,$group if $group;
619 if (defined $seq_id) {
620 # last argument is the name of the features table
621 my ($from,$where,$group,@a) = $self->_location_sql($seq_id,$start,$end,$range_type,$strand,'f');
622 push @from,$from if $from;
623 push @where,$where if $where;
624 push @group,$group if $group;
628 if (defined($sources)) {
629 my @sources = ref($sources) eq 'ARRAY' ? @
{$sources} : ($sources);
630 if (defined($types)) {
631 my @types = ref($types) eq 'ARRAY' ? @
{$types} : ($types);
633 foreach my $type (@types) {
634 # *** not sure what to do if user supplies both -source and -type
635 # where the type includes a source!
637 push(@final_types, $type);
640 foreach my $source (@sources) {
641 push(@final_types, $type.':'.$source);
645 $types = \
@final_types;
648 $types = [map { ':'.$_ } @sources];
651 if (defined($types)) {
652 # last argument is the name of the features table
653 my ($from,$where,$group,@a) = $self->_types_sql($types,'f');
654 push @from,$from if $from;
655 push @where,$where if $where;
656 push @group,$group if $group;
660 if (defined $attributes) {
661 # last argument is the join field
662 my ($from,$where,$group,@a) = $self->_attributes_sql($attributes,'f.id');
663 push @from,$from if $from;
664 push @where,$where if $where;
665 push @group,$group if $group;
669 if (defined $fromtable) {
670 # last argument is the join field
671 my ($from,$where,$group,@a) = $self->_from_table_sql($fromtable,'f.id');
672 push @from,$from if $from;
673 push @where,$where if $where;
674 push @group,$group if $group;
678 # if no other criteria are specified, then
679 # only fetch indexed (i.e. top level objects)
680 @where = '"indexed"=1' unless @where;
682 my $from = join ', ',@from;
683 my $where = join ' AND ',map {"($_)"} @where;
684 my $group = join ', ',@group;
685 $group = "GROUP BY $group" if @group;
694 $self->_print_query($query,@args) if DEBUG
|| $self->debug;
696 my $sth = $self->_prepare($query);
697 $sth->execute(@args) or $self->throw($sth->errstr);
698 return $iterator ? Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
->new($sth,$self) : $self->_sth2objs($sth);
701 sub _make_attribute_group
{
703 my ($table_name,$attributes) = @_;
704 my $key_count = keys %$attributes or return;
705 my $count = $key_count-1;
706 return "f.id HAVING count(f.id)>$count";
711 my ($seq_id,$start,$end,$range_type,$strand,$location) = @_;
713 # the additional join on the location_list table badly impacts performance
714 # so we build a copy of the table in memory
715 my $seqid = $self->_locationid_nocreate($seq_id) || 0; # zero is an invalid primary ID, so will return empty
717 my $feature_index = $self->_feature_index_table;
718 my $from = "$feature_index as fi";
720 my ($bin_where,@bin_args);
721 if (defined $start && defined $end && !$self->_has_spatial_index) {
722 my @bins = $self->search_bins($start,$end);
723 $bin_where = ' AND bin in ('.join(',',@bins).')';
726 $start = MIN_INT
unless defined $start;
727 $end = MAX_INT
unless defined $end;
729 my ($range,@range_args);
730 if ($range_type eq 'overlaps') {
731 $range = "fi.end>=? AND fi.start<=?".$bin_where;
732 @range_args = ($start,$end,@bin_args);
733 } elsif ($range_type eq 'contains') {
734 $range = "fi.start>=? AND fi.end<=?".$bin_where;
735 @range_args = ($start,$end,@bin_args);
736 } elsif ($range_type eq 'contained_in') {
737 $range = "fi.start<=? AND fi.end>=?";
738 @range_args = ($start,$end);
740 $self->throw("range_type must be one of 'overlaps', 'contains' or 'contained_in'");
743 if (defined $strand) {
744 $range .= " AND strand=?";
745 push @range_args,$strand;
750 AND $location.id=fi.id
756 my @args = ($seqid,@range_args);
757 return ($from,$where,$group,@args);
760 sub _feature_index_table
{
762 return $self->_has_spatial_index ?
$self->_qualify('feature_index')
763 : $self->_qualify('feature_location') }
765 # Do a case-insensitive search a la the PostgreSQL adaptor
768 my ($name,$allow_aliases,$join) = @_;
769 my $name_table = $self->_name_table;
771 my $from = "$name_table as n";
772 my ($match,$string) = $self->_match_sql($name);
774 my $where = "n.id=$join AND lower(n.name) $match";
775 $where .= " AND n.display_name>0" unless $allow_aliases;
776 return ($from,$where,'',$string);
779 sub _search_attributes
{
781 my ($search_string,$attribute_names,$limit) = @_;
782 my @words = map {quotemeta($_)} split /\s+/,$search_string;
784 my $name_table = $self->_name_table;
785 my $attribute_table = $self->_attribute_table;
786 my $attributelist_table = $self->_attributelist_table;
787 my $type_table = $self->_type_table;
788 my $typelist_table = $self->_typelist_table;
790 my @tags = @
$attribute_names;
791 my $tag_sql = join ' OR ',("al.tag=?") x
@tags;
793 my $perl_regexp = join '|',@words;
795 my @wild_card_words = map { "%$_%" } @words;
796 my $sql_regexp = join ' OR ',("a.attribute_value LIKE ?") x
@words;
797 # CROSS JOIN disables SQLite's table reordering optimization
799 SELECT name,attribute_value,tl.tag,n.id
800 FROM $attributelist_table AS al
801 CROSS JOIN $attribute_table AS a ON al.id = a.attribute_id
802 CROSS JOIN $name_table AS n ON n.id = a.id
803 CROSS JOIN $type_table AS t ON t.id = n.id
804 CROSS JOIN $typelist_table AS tl ON tl.id = t.typeid
809 $sql .= "LIMIT $limit" if defined $limit;
810 $self->_print_query($sql,@tags,@words) if DEBUG
|| $self->debug;
811 my $sth = $self->_prepare($sql);
812 $sth->execute(@tags,@wild_card_words) or $self->throw($sth->errstr);
815 while (my($name,$value,$type,$id) = $sth->fetchrow_array) {
816 my (@hits) = $value =~ /$perl_regexp/ig;
817 my @words_in_row = split /\b/,$value;
818 my $score = int(@hits*100/@words/@words_in_row);
819 push @results,[$name,$value,$score,$type,$id];
822 @results = sort {$b->[2]<=>$a->[2]} @results;
831 if ($name =~ /(?:^|[^\\])[*?]/) {
832 $name =~ s/(^|[^\\])([%_])/$1\\$2/g;
833 $name =~ s/(^|[^\\])\*/$1%/g;
834 $name =~ s/(^|[^\\])\?/$1_/g;
838 $match = "= lower(?)";
841 return ($match,$string);
844 sub _attributes_sql
{
846 my ($attributes,$join) = @_;
848 my ($wf,@bind_args) = $self->_make_attribute_where('a','al',$attributes);
849 my ($group_by,@group_args)= $self->_make_attribute_group('a',$attributes);
851 my $attribute_table = $self->_attribute_table;
852 my $attributelist_table = $self->_attributelist_table;
854 my $from = "$attribute_table AS a INDEXED BY index_attribute_id, $attributelist_table AS al";
858 AND a.attribute_id=al.id
862 my $group = $group_by;
864 my @args = (@bind_args,@group_args);
865 return ($from,$where,$group,@args);
868 # overridden because of case-sensitivity of matches
871 my ($types,$type_table) = @_;
872 my ($primary_tag,$source_tag);
874 my @types = ref $types eq 'ARRAY' ? @
$types : $types;
876 my $typelist = $self->_typelist_table;
877 my $from = "$typelist AS tl";
881 for my $type (@types) {
883 if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
884 $primary_tag = $type->method;
885 $source_tag = $type->source;
887 ($primary_tag,$source_tag) = split ':',$type,2;
890 if (length $source_tag) {
891 push @matches,"lower(tl.tag)=lower(?)";
892 push @args,"$primary_tag:$source_tag";
894 push @matches,"tl.tag LIKE ?";
895 push @args,"$primary_tag:%";
898 my $matches = join ' OR ',@matches;
901 tl.id=$type_table.typeid
905 return ($from,$where,'',@args);
910 $self->dbh->do("ANALYZE $_") foreach $self->index_tables;
914 # Replace Bio::SeqFeatureI into database.
919 my $index_flag = shift || undef;
921 # ?? shouldn't need to do this
922 # $self->_load_class($object);
923 my $id = $object->primary_id;
924 my $features = $self->_feature_table;
926 my $sth = $self->_prepare(<<END);
927 REPLACE INTO $features (id,object,"indexed",strand,typeid) VALUES (?,?,?,?,?)
930 my ($seqid,$start,$end,$strand,$bin) = $index_flag ?
$self->_get_location_and_bin($object) : (undef)x6
;
932 my $primary_tag = $object->primary_tag;
933 my $source_tag = $object->source_tag || '';
934 $primary_tag .= ":$source_tag";
935 my $typeid = $self->_typeid($primary_tag,1);
937 my $frozen = $self->no_blobs() ?
0 : $self->freeze($object);
939 $sth->bind_param(1, $id);
940 $sth->bind_param(2, $frozen, {TYPE
=> SQL_BLOB
});
941 $sth->bind_param(3, $index_flag||0);
942 $sth->bind_param(4, $strand);
943 $sth->bind_param(5, $typeid);
945 $sth->execute() or $self->throw($sth->errstr);
947 my $dbh = $self->dbh;
948 $object->primary_id($dbh->func('last_insert_rowid')) unless defined $id;
950 $self->flag_for_indexing($dbh->func('last_insert_rowid')) if $self->{bulk_update_in_progress
};
953 # doesn't work with this schema, since we have to update name and attribute
954 # tables which need object ids, which we can only know by replacing feats in
955 # the feature table one by one
958 my $index_flag = shift || undef;
961 my $features = $self->_feature_table;
964 foreach my $object (@objects) {
965 my $id = $object->primary_id;
966 my (undef,undef,undef,$strand) = $index_flag ?
$self->_get_location_and_bin($object) : (undef)x4
;
967 my $primary_tag = $object->primary_tag;
968 my $source_tag = $object->source_tag || '';
969 $primary_tag .= ":$source_tag";
970 my $typeid = $self->_typeid($primary_tag,1);
972 push(@insert_values, ($id,0,$index_flag||0,$strand,$typeid));
977 push(@value_blocks, '(?,?,?,?,?)');
979 my $value_blocks = join(',', @value_blocks);
980 my $sql = qq{REPLACE INTO
$features (id
,object
,"indexed",strand
,typeid
) VALUES
$value_blocks};
982 my $sth = $self->_prepare($sql);
983 $sth->execute(@insert_values) or $self->throw($sth->errstr);
986 sub _get_location_and_bin
{
989 my $seqid = $self->_locationid($obj->seq_id||'');
990 my $start = $obj->start;
992 my $strand = $obj->strand;
993 return ($seqid,$start,$end,$strand,$self->calculate_bin($start,$end));
999 # Insert one Bio::SeqFeatureI into database. primary_id must be undef
1004 my $index_flag = shift || 0;
1006 $self->_load_class($object);
1007 defined $object->primary_id and $self->throw("$object already has a primary id");
1009 my $features = $self->_feature_table;
1010 my $sth = $self->_prepare(<<END);
1011 INSERT INTO $features (id,object,"indexed") VALUES (?,?,?)
1013 $sth->execute(undef,$self->freeze($object),$index_flag) or $self->throw($sth->errstr);
1014 my $dbh = $self->dbh;
1015 $object->primary_id($dbh->func('last_insert_rowid'));
1016 $self->flag_for_indexing($dbh->func('last_insert_rowid')) if $self->{bulk_update_in_progress
};
1019 =head2 toplevel_types
1021 Title : toplevel_types
1022 Usage : @type_list = $db->toplevel_types
1023 Function: Get the toplevel types in the database
1024 Returns : array of Bio::DB::GFF::Typename objects
1028 This is similar to types() but only returns the types of
1029 INDEXED (toplevel) features.
1033 sub toplevel_types
{
1035 eval "require Bio::DB::GFF::Typename"
1036 unless Bio
::DB
::GFF
::Typename
->can('new');
1037 my $typelist_table = $self->_typelist_table;
1038 my $feature_table = $self->_feature_table;
1040 SELECT distinct(tag) from $typelist_table as tl,$feature_table as f
1041 WHERE tl.id=f.typeid
1045 $self->_print_query($sql) if DEBUG
|| $self->debug;
1046 my $sth = $self->_prepare($sql);
1047 $sth->execute() or $self->throw($sth->errstr);
1050 while (my($tag) = $sth->fetchrow_array) {
1051 push @results,Bio
::DB
::GFF
::Typename
->new($tag);
1059 my ($table,$namefield,$name,$add_if_missing) = @_;
1060 my $qualified_table = $self->_qualify($table);
1061 my $sth = $self->_prepare(<<END);
1062 SELECT id FROM $qualified_table WHERE lower($namefield)=lower(?)
1064 $sth->execute($name) or die $sth->errstr;
1065 my ($id) = $sth->fetchrow_array;
1067 return $id if defined $id;
1068 return unless $add_if_missing;
1070 $sth = $self->_prepare(<<END);
1071 INSERT INTO $qualified_table ($namefield) VALUES (?)
1073 $sth->execute($name) or die $sth->errstr;
1074 my $dbh = $self->dbh;
1075 return $dbh->func('last_insert_rowid');
1078 # special-purpose store for bulk loading - write to a file rather than to the db
1082 my $indexed = shift;
1085 my $store_fh = $self->dump_filehandle('feature');
1086 my $dbh = $self->dbh;
1088 my $autoindex = $self->autoindex;
1091 my $id = $self->next_id;
1092 my ($seqid,$start,$end,$strand) = $indexed ?
$self->_get_location_and_bin($obj) : (undef)x4
;
1093 my $primary_tag = $obj->primary_tag;
1094 my $source_tag = $obj->source_tag || '';
1095 $primary_tag .= ":$source_tag";
1096 my $typeid = $self->_typeid($primary_tag,1);
1098 # Encode BLOB in hex so we can more easily import it into SQLite
1100 join("\t",$id,$typeid,$strand,$indexed,
1101 unpack('H*', $self->freeze($obj))),"\n";
1102 $obj->primary_id($id);
1103 $self->_update_indexes($obj) if $indexed && $autoindex;
1107 # remember whether we are have ever stored a non-indexed feature
1108 unless ($indexed or $self->{indexed_flag
}++) {
1109 $self->subfeatures_are_indexed(0);
1114 sub _dump_update_name_index
{
1117 my $fh = $self->dump_filehandle('name');
1118 my $dbh = $self->dbh;
1119 my ($names,$aliases) = $self->feature_names($obj);
1120 # unlike DBI::mysql, don't quote, as quotes will be quoted when loaded
1121 print $fh join("\t",$id,lc($_),1),"\n" foreach @
$names;
1122 print $fh join("\t",$id,lc($_),0),"\n" foreach @
$aliases;
1125 sub _update_name_index
{
1128 my $name = $self->_name_table;
1129 my $primary_id = $obj->primary_id;
1131 $self->_delete_index($name,$id);
1132 my ($names,$aliases) = $self->feature_names($obj);
1134 my $sth = $self->_prepare("INSERT INTO $name (id,name,display_name) VALUES (?,?,?)");
1136 $sth->execute($id,lc $_,1) or $self->throw($sth->errstr) foreach @
$names;
1137 $sth->execute($id,lc $_,0) or $self->throw($sth->errstr) foreach @
$aliases;
1142 sub _dump_update_attribute_index
{
1145 my $fh = $self->dump_filehandle('attribute');
1146 my $dbh = $self->dbh;
1147 for my $tag ($obj->all_tags) {
1148 my $tagid = $self->_attributeid($tag);
1149 for my $value ($obj->each_tag_value($tag)) {
1150 # unlike DBI::mysql, don't quote, as quotes will be quoted when loaded
1151 print $fh join("\t",$id,$tagid,$value),"\n";
1156 sub _update_indexes
{
1159 defined (my $id = $obj->primary_id) or return;
1160 $self->SUPER::_update_indexes
($obj);
1162 if ($self->{bulk_update_in_progress
}) {
1163 $self->_dump_update_location_index($obj,$id);
1165 $self->_update_location_index($obj,$id);
1169 sub _update_location_index
{
1172 my ($seqid,$start,$end,$strand,$bin) = $self->_get_location_and_bin($obj);
1174 my $table = $self->_feature_index_table;
1175 $self->_delete_index($table,$id);
1179 if ($self->_has_spatial_index) {
1180 $sql = "INSERT INTO $table (id,seqid,bin,start,end) values (?,?,?,?,?)";
1181 @args = ($id,$seqid,$bin,$start,$end);
1183 $sql = "INSERT INTO $table (id,seqid,bin,start,end) values (?,?,?,?,?)";
1184 @args = ($id,$seqid,$bin,$start,$end);
1187 my $sth = $self->_prepare($sql);
1188 $sth->execute(@args);
1192 sub _dump_update_location_index
{
1195 my $table = $self->_feature_index_table;
1196 my $fh = $self->dump_filehandle($table);
1197 my $dbh = $self->dbh;
1198 my ($seqid,$start,$end,$strand,$bin) = $self->_get_location_and_bin($obj);
1199 my @args = $self->_has_spatial_index ?
($id,$seqid,$bin,$start,$end)
1200 : ($id,$seqid,$bin,$start,$end);
1201 print $fh join("\t",@args),"\n";
1206 # Remove filehandles, so temporal files can be properly deleted
1207 if (%DBI::installed_drh
) {
1208 DBI
->disconnect_all;
1209 %DBI::installed_drh
= ();
1218 Nathan Weeks - Nathan.Weeks@ars.usda.gov
1220 Copyright (c) 2009 Nathan Weeks
1222 Modified 2010 to support cumulative statistics by Lincoln Stein
1223 <lincoln.stein@gmail.com>.
1225 This library is free software; you can redistribute it and/or modify
1226 it under the same terms as Perl itself. See the Bioperl license for