tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / DB / SeqFeature / Store / DBI / mysql.pm
blob537b0bc313d16b4b3ccabcc95699f3478d1f911e
1 package Bio::DB::SeqFeature::Store::DBI::mysql;
2 # $Id$
4 =head1 NAME
6 Bio::DB::SeqFeature::Store::DBI::mysql -- Mysql implementation of Bio::DB::SeqFeature::Store
8 =head1 SYNOPSIS
10 use Bio::DB::SeqFeature::Store;
12 # Open the sequence database
13 my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::mysql',
14 -dsn => 'dbi:mysql:test');
16 # get a feature from somewhere
17 my $feature = Bio::SeqFeature::Generic->new(...);
19 # store it
20 $db->store($feature) or die "Couldn't store!";
22 # primary ID of the feature is changed to indicate its primary ID
23 # in the database...
24 my $id = $feature->primary_id;
26 # get the feature back out
27 my $f = $db->fetch($id);
29 # change the feature and update it
30 $f->start(100);
31 $db->update($f) or die "Couldn't update!";
33 # searching...
34 # ...by id
35 my @features = $db->fetch_many(@list_of_ids);
37 # ...by name
38 @features = $db->get_features_by_name('ZK909');
40 # ...by alias
41 @features = $db->get_features_by_alias('sma-3');
43 # ...by type
44 @features = $db->get_features_by_name('gene');
46 # ...by location
47 @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
49 # ...by attribute
50 @features = $db->get_features_by_attribute({description => 'protein kinase'})
52 # ...by the GFF "Note" field
53 @result_list = $db->search_notes('kinase');
55 # ...by arbitrary combinations of selectors
56 @features = $db->features(-name => $name,
57 -type => $types,
58 -seq_id => $seqid,
59 -start => $start,
60 -end => $end,
61 -attributes => $attributes);
63 # ...using an iterator
64 my $iterator = $db->get_seq_stream(-name => $name,
65 -type => $types,
66 -seq_id => $seqid,
67 -start => $start,
68 -end => $end,
69 -attributes => $attributes);
71 while (my $feature = $iterator->next_seq) {
72 # do something with the feature
75 # ...limiting the search to a particular region
76 my $segment = $db->segment('Chr1',5000=>6000);
77 my @features = $segment->features(-type=>['mRNA','match']);
79 # getting & storing sequence information
80 # Warning: this returns a string, and not a PrimarySeq object
81 $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
82 my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
84 # what feature types are defined in the database?
85 my @types = $db->types;
87 # create a new feature in the database
88 my $feature = $db->new_feature(-primary_tag => 'mRNA',
89 -seq_id => 'chr3',
90 -start => 10000,
91 -end => 11000);
93 =head1 DESCRIPTION
95 Bio::DB::SeqFeature::Store::mysql is the Mysql adaptor for
96 Bio::DB::SeqFeature::Store. You will not create it directly, but
97 instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
99 See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
101 =head2 Using the Mysql adaptor
103 Before you can use the adaptor, you must use the mysqladmin tool to
104 create a database and establish a user account with write
105 permission. In order to use "fast" loading, the user account must have
106 "file" privileges.
108 To establish a connection to the database, call
109 Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::mysql',@more_args). The
110 additional arguments are as follows:
112 Argument name Description
113 ------------- -----------
115 -dsn The database name. You can abbreviate
116 "dbi:mysql:foo" as "foo" if you wish.
118 -user Username for authentication.
120 -pass Password for authentication.
122 -namespace A prefix to attach to each table. This allows you
123 to have several virtual databases in the same
124 physical database.
126 -temp Boolean flag. If true, a temporary database
127 will be created and destroyed as soon as
128 the Store object goes out of scope. (synonym -temporary)
130 -autoindex Boolean flag. If true, features in the database will be
131 reindexed every time they change. This is the default.
134 -tmpdir Directory in which to place temporary files during "fast" loading.
135 Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
137 -dbi_options A hashref to pass to DBI->connect's 4th argument, the "attributes."
138 (synonyms -options, -dbi_attr)
140 -write Pass true to open database for writing or updating.
142 If successful, a new instance of
143 Bio::DB::SeqFeature::Store::DBI::mysql will be returned.
145 In addition to the standard methods supported by all well-behaved
146 Bio::DB::SeqFeature::Store databases, several following
147 adaptor-specific methods are provided. These are described in the next
148 sections.
150 =cut
152 use strict;
154 use base 'Bio::DB::SeqFeature::Store';
155 use Bio::DB::SeqFeature::Store::DBI::Iterator;
156 use DBI;
157 use Memoize;
158 use Cwd 'abs_path';
159 use Bio::DB::GFF::Util::Rearrange 'rearrange';
160 use Bio::SeqFeature::Lite;
161 use File::Spec;
162 use constant DEBUG=>0;
164 # from the MySQL documentation...
165 # WARNING: if your sequence uses coordinates greater than 2 GB, you are out of luck!
166 use constant MAX_INT => 2_147_483_647;
167 use constant MIN_INT => -2_147_483_648;
168 use constant MAX_BIN => 1_000_000_000; # size of largest feature = 1 Gb
169 use constant MIN_BIN => 1000; # smallest bin we'll make - on a 100 Mb chromosome, there'll be 100,000 of these
171 memoize('_typeid');
172 memoize('_locationid');
173 memoize('_attributeid');
174 memoize('dump_path');
177 # object initialization
179 sub init {
180 my $self = shift;
181 my ($dsn,
182 $is_temporary,
183 $autoindex,
184 $namespace,
185 $dump_dir,
186 $user,
187 $pass,
188 $dbi_options,
189 $writeable,
190 $create,
191 ) = rearrange(['DSN',
192 ['TEMP','TEMPORARY'],
193 'AUTOINDEX',
194 'NAMESPACE',
195 ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
196 'USER',
197 ['PASS','PASSWD','PASSWORD'],
198 ['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
199 ['WRITE','WRITEABLE'],
200 'CREATE',
201 ],@_);
202 $dbi_options ||= {};
203 $writeable = 1 if $is_temporary or $dump_dir;
205 $dsn or $self->throw("Usage: ".__PACKAGE__."->init(-dsn => \$dbh || \$dsn)");
207 my $dbh;
208 if (ref $dsn) {
209 $dbh = $dsn;
210 } else {
211 $dsn = "dbi:mysql:$dsn" unless $dsn =~ /^dbi:/;
212 $dbh = DBI->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr);
213 $dbh->{mysql_auto_reconnect} = 1;
215 $self->{dbh} = $dbh;
216 $self->{is_temp} = $is_temporary;
217 $self->{namespace} = $namespace;
218 $self->{writeable} = $writeable;
220 $self->default_settings;
221 $self->autoindex($autoindex) if defined $autoindex;
222 $self->dumpdir($dump_dir) if $dump_dir;
223 if ($self->is_temp) {
224 $self->init_tmp_database();
225 } elsif ($create) {
226 $self->init_database('erase');
230 sub writeable { shift->{writeable} }
232 sub can_store_parentage { 1 }
234 sub table_definitions {
235 my $self = shift;
236 return {
237 feature => <<END,
239 id int(10) auto_increment primary key,
240 typeid int(10) not null,
241 seqid int(10),
242 start int,
243 end int,
244 strand tinyint default 0,
245 tier tinyint,
246 bin int,
247 indexed tinyint default 1,
248 object MEDIUMBLOB not null,
249 index(seqid,tier,bin,typeid),
250 index(typeid)
254 locationlist => <<END,
256 id int(10) auto_increment primary key,
257 seqname varchar(256) not null,
258 index(seqname)
262 typelist => <<END,
264 id int(10) auto_increment primary key,
265 tag varchar(256) not null,
266 index(tag)
269 name => <<END,
271 id int(10) not null,
272 name varchar(256) not null,
273 display_name tinyint default 0,
274 index(id),
275 index(name)
279 attribute => <<END,
281 id int(10) not null,
282 attribute_id int(10) not null,
283 attribute_value text,
284 index(id),
285 index(attribute_id,attribute_value(10))
289 attributelist => <<END,
291 id int(10) auto_increment primary key,
292 tag varchar(256) not null,
293 index(tag)
296 parent2child => <<END,
298 id int(10) not null,
299 child int(10) not null,
300 index(id,child)
304 meta => <<END,
306 name varchar(128) primary key,
307 value varchar(128) not null
310 sequence => <<END,
312 id int(10) not null,
313 offset int(10) unsigned not null,
314 sequence longblob,
315 primary key(id,offset)
322 # default settings -- will create and populate meta table if needed
324 sub default_settings {
325 my $self = shift;
326 $self->maybe_create_meta();
327 $self->SUPER::default_settings;
328 $self->autoindex(1);
329 $self->dumpdir(File::Spec->tmpdir);
334 # retrieve database handle
336 sub dbh {
337 my $self = shift;
338 my $d = $self->{dbh};
339 $self->{dbh} = shift if @_;
343 sub clone {
344 my $self = shift;
345 $self->{dbh}{InactiveDestroy} = 1;
346 $self->{dbh} = $self->{dbh}->clone
347 unless $self->is_temp;
351 # get/set directory for bulk load tables
353 sub dumpdir {
354 my $self = shift;
355 my $d = $self->{dumpdir};
356 $self->{dumpdir} = abs_path(shift) if @_;
361 # table namespace (multiple dbs in one mysql db)
363 sub namespace {
364 my $self = shift;
365 my $d = $self->{namespace};
366 $self->{namespace} = shift if @_;
371 # find a path that corresponds to a dump table
373 sub dump_path {
374 my $self = shift;
375 my $table = $self->_qualify(shift);
376 return "$self->{dumpdir}/$table.$$";
380 # make a filehandle (writeable) that corresponds to a dump table
382 sub dump_filehandle {
383 my $self = shift;
384 my $table = shift;
385 eval "require IO::File" unless IO::File->can('new');
386 my $path = $self->dump_path($table);
387 my $fh = $self->{filehandles}{$path} ||= IO::File->new(">$path");
388 $fh;
392 # find the next ID for a feature (used only during bulk loading)
394 sub next_id {
395 my $self = shift;
396 $self->{max_id} ||= $self->max_id;
397 return ++$self->{max_id};
401 # find the maximum ID for a feature (used only during bulk loading)
403 sub max_id {
404 my $self = shift;
405 my $sth = $self->_prepare("SELECT max(id) from feature");
406 $sth->execute or $self->throw($sth->errstr);
407 my ($id) = $sth->fetchrow_array;
408 $id;
412 # wipe database clean and reinstall schema
414 sub _init_database {
415 my $self = shift;
416 my $erase = shift;
418 my $dbh = $self->dbh;
419 my $tables = $self->table_definitions;
420 foreach (keys %$tables) {
421 next if $_ eq 'meta'; # don't get rid of meta data!
422 my $table = $self->_qualify($_);
423 $dbh->do("DROP table IF EXISTS $table") if $erase;
424 my $query = "CREATE TABLE IF NOT EXISTS $table $tables->{$_}";
425 $self->_create_table($dbh,$query);
427 $self->subfeatures_are_indexed(1) if $erase;
431 sub init_tmp_database {
432 my $self = shift;
433 my $dbh = $self->dbh;
434 my $tables = $self->table_definitions;
435 for my $t (keys %$tables) {
436 next if $t eq 'meta'; # done earlier
437 my $table = $self->_qualify($t);
438 my $query = "CREATE TEMPORARY TABLE $table $tables->{$t}";
439 $self->_create_table($dbh,$query);
444 sub _create_table {
445 my $self = shift;
446 my ($dbh,$query) = @_;
447 for my $q (split ';',$query) {
448 chomp($q);
449 next unless $q =~ /\S/;
450 $dbh->do("$q;\n") or $self->throw($dbh->errstr);
454 sub maybe_create_meta {
455 my $self = shift;
456 return unless $self->writeable;
457 my $table = $self->_qualify('meta');
458 my $tables = $self->table_definitions;
459 my $temporary = $self->is_temp ? 'TEMPORARY' : '';
460 $self->dbh->do("CREATE $temporary TABLE IF NOT EXISTS $table $tables->{meta}");
464 # use temporary tables
466 sub is_temp {
467 shift->{is_temp};
470 sub attributes {
471 my $self = shift;
472 my $dbh = $self->dbh;
473 my $attributelist_table = $self->_attributelist_table;
475 my $a = $dbh->selectcol_arrayref("SELECT tag FROM $attributelist_table")
476 or $self->throw($dbh->errstr);
477 return @$a;
480 sub _store {
481 my $self = shift;
483 # special case for bulk updates
484 return $self->_dump_store(@_) if $self->{bulk_update_in_progress};
486 my $indexed = shift;
487 my $count = 0;
489 my $autoindex = $self->autoindex;
491 my $dbh = $self->dbh;
492 local $dbh->{RaiseError} = 1;
493 $dbh->begin_work;
494 eval {
495 for my $obj (@_) {
496 $self->replace($obj,$indexed);
497 $self->_update_indexes($obj) if $indexed && $autoindex;
498 $count++;
502 if ($@) {
503 warn "Transaction aborted because $@";
504 $dbh->rollback;
506 else {
507 $dbh->commit;
510 # remember whether we are have ever stored a non-indexed feature
511 unless ($indexed or $self->{indexed_flag}++) {
512 $self->subfeatures_are_indexed(0);
514 $count;
517 # we memoize this in order to avoid making zillions of calls
518 sub autoindex {
519 my $self = shift;
521 # special case for bulk update -- need to build the indexes
522 # at the same time we build the main feature table
523 return 1 if $self->{bulk_update_in_progress};
524 my $d = $self->setting('autoindex');
525 $self->setting(autoindex=>shift) if @_;
529 sub _start_bulk_update {
530 my $self = shift;
531 my $dbh = $self->dbh;
532 $self->{bulk_update_in_progress}++;
535 sub _finish_bulk_update {
536 my $self = shift;
537 my $dbh = $self->dbh;
538 my $dir = $self->{dumpdir} || '.';
539 for my $table ('feature',$self->index_tables) {
540 my $fh = $self->dump_filehandle($table);
541 my $path = $self->dump_path($table);
542 $fh->close;
543 my $qualified_table = $self->_qualify($table);
544 $dbh->do("LOAD DATA LOCAL INFILE '$path' REPLACE INTO TABLE $qualified_table FIELDS OPTIONALLY ENCLOSED BY '\\''")
545 or $self->throw($dbh->errstr);
546 unlink $path;
548 delete $self->{bulk_update_in_progress};
549 delete $self->{filehandles};
554 # Add a subparts to a feature. Both feature and all subparts must already be in database.
556 sub _add_SeqFeature {
557 my $self = shift;
559 # special purpose method for case when we are doing a bulk update
560 return $self->_dump_add_SeqFeature(@_) if $self->{bulk_update_in_progress};
562 my $parent = shift;
563 my @children = @_;
565 my $dbh = $self->dbh;
566 local $dbh->{RaiseError} = 1;
568 my $child_table = $self->_parent2child_table();
569 my $count = 0;
571 my $sth = $self->_prepare(<<END);
572 REPLACE INTO $child_table (id,child) VALUES (?,?)
575 my $parent_id = (ref $parent ? $parent->primary_id : $parent)
576 or $self->throw("$parent should have a primary_id");
578 $dbh->begin_work or $self->throw($dbh->errstr);
579 eval {
580 for my $child (@children) {
581 my $child_id = ref $child ? $child->primary_id : $child;
582 defined $child_id or die "no primary ID known for $child";
583 $sth->execute($parent_id,$child_id);
584 $count++;
588 if ($@) {
589 warn "Transaction aborted because $@";
590 $dbh->rollback;
592 else {
593 $dbh->commit;
595 $sth->finish;
596 $count;
599 sub _fetch_SeqFeatures {
600 my $self = shift;
601 my $parent = shift;
602 my @types = @_;
604 my $parent_id = $parent->primary_id or $self->throw("$parent should have a primary_id");
605 my $feature_table = $self->_feature_table;
606 my $child_table = $self->_parent2child_table();
608 my @from = ("$feature_table as f","$child_table as c");
609 my @where = ('f.id=c.child','c.id=?');
610 my @args = $parent_id;
612 if (@types) {
613 my ($from,$where,undef,@a) = $self->_types_sql(\@types,'f');
614 push @from,$from if $from;
615 push @where,$where if $where;
616 push @args,@a;
619 my $from = join ', ',@from;
620 my $where = join ' AND ',@where;
622 my $query = <<END;
623 SELECT f.id,f.object
624 FROM $from
625 WHERE $where
628 $self->_print_query($query,@args) if DEBUG || $self->debug;
630 my $sth = $self->_prepare($query) or $self->throw($self->dbh->errstr);
632 $sth->execute(@args) or $self->throw($sth->errstr);
633 return $self->_sth2objs($sth);
637 # get primary sequence between start and end
639 sub _fetch_sequence {
640 my $self = shift;
641 my ($seqid,$start,$end) = @_;
643 # backward compatibility to the old days when I liked reverse complementing
644 # dna by specifying $start > $end
645 my $reversed;
646 if (defined $start && defined $end && $start > $end) {
647 $reversed++;
648 ($start,$end) = ($end,$start);
650 $start-- if defined $start;
651 $end-- if defined $end;
653 my $offset1 = $self->_offset_boundary($seqid,$start || 'left');
654 my $offset2 = $self->_offset_boundary($seqid,$end || 'right');
655 my $sequence_table = $self->_sequence_table;
656 my $locationlist_table = $self->_locationlist_table;
658 my $sth = $self->_prepare(<<END);
659 SELECT sequence,offset
660 FROM $sequence_table as s,$locationlist_table as ll
661 WHERE s.id=ll.id
662 AND ll.seqname= ?
663 AND offset >= ?
664 AND offset <= ?
665 ORDER BY offset
668 my $seq = '';
669 $sth->execute($seqid,$offset1,$offset2) or $self->throw($sth->errstr);
671 while (my($frag,$offset) = $sth->fetchrow_array) {
672 substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
673 $seq .= $frag;
675 substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
676 if ($reversed) {
677 $seq = reverse $seq;
678 $seq =~ tr/gatcGATC/ctagCTAG/;
680 $sth->finish;
681 $seq;
684 sub _offset_boundary {
685 my $self = shift;
686 my ($seqid,$position) = @_;
688 my $sequence_table = $self->_sequence_table;
689 my $locationlist_table = $self->_locationlist_table;
691 my $sql;
692 $sql = $position eq 'left' ? "SELECT min(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
693 :$position eq 'right' ? "SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
694 :"SELECT max(offset) FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=? AND offset<=?";
695 my $sth = $self->_prepare($sql);
696 my @args = $position =~ /^-?\d+$/ ? ($seqid,$position) : ($seqid);
697 $sth->execute(@args) or $self->throw($sth->errstr);
698 my $boundary = $sth->fetchall_arrayref->[0][0];
699 $sth->finish;
700 return $boundary;
705 # add namespace to tablename
707 sub _qualify {
708 my $self = shift;
709 my $table_name = shift;
710 my $namespace = $self->namespace;
711 return $table_name unless defined $namespace;
712 return "${namespace}_${table_name}";
716 # Fetch a Bio::SeqFeatureI from database using its primary_id
718 sub _fetch {
719 my $self = shift;
720 @_ or $self->throw("usage: fetch(\$primary_id)");
721 my $primary_id = shift;
722 my $features = $self->_feature_table;
723 my $sth = $self->_prepare(<<END);
724 SELECT id,object FROM $features WHERE id=?
726 $sth->execute($primary_id) or $self->throw($sth->errstr);
727 my $obj = $self->_sth2obj($sth);
728 $sth->finish;
729 $obj;
733 # Efficiently fetch a series of IDs from the database
734 # Can pass an array or an array ref
736 sub _fetch_many {
737 my $self = shift;
738 @_ or $self->throw('usage: fetch_many($id1,$id2,$id3...)');
739 my $ids = join ',',map {ref($_) ? @$_ : $_} @_ or return;
740 my $features = $self->_feature_table;
742 my $sth = $self->_prepare(<<END);
743 SELECT id,object FROM $features WHERE id IN ($ids)
745 $sth->execute() or $self->throw($sth->errstr);
746 return $self->_sth2objs($sth);
749 sub _features {
750 my $self = shift;
751 my ($seq_id,$start,$end,$strand,
752 $name,$class,$allow_aliases,
753 $types,
754 $attributes,
755 $range_type,
756 $fromtable,
757 $iterator,
758 $sources
759 ) = rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
760 'NAME','CLASS','ALIASES',
761 ['TYPES','TYPE','PRIMARY_TAG'],
762 ['ATTRIBUTES','ATTRIBUTE'],
763 'RANGE_TYPE',
764 'FROM_TABLE',
765 'ITERATOR',
766 ['SOURCE','SOURCES']
767 ],@_);
769 my (@from,@where,@args,@group);
770 $range_type ||= 'overlaps';
772 my $feature_table = $self->_feature_table;
773 @from = "$feature_table as f";
775 if (defined $name) {
776 # hacky backward compatibility workaround
777 undef $class if $class && $class eq 'Sequence';
778 $name = "$class:$name" if defined $class && length $class > 0;
779 # last argument is the join field
780 my ($from,$where,$group,@a) = $self->_name_sql($name,$allow_aliases,'f.id');
781 push @from,$from if $from;
782 push @where,$where if $where;
783 push @group,$group if $group;
784 push @args,@a;
787 if (defined $seq_id) {
788 # last argument is the name of the features table
789 my ($from,$where,$group,@a) = $self->_location_sql($seq_id,$start,$end,$range_type,$strand,'f');
790 push @from,$from if $from;
791 push @where,$where if $where;
792 push @group,$group if $group;
793 push @args,@a;
796 if (defined($sources)) {
797 my @sources = ref($sources) eq 'ARRAY' ? @{$sources} : ($sources);
798 if (defined($types)) {
799 my @types = ref($types) eq 'ARRAY' ? @{$types} : ($types);
800 my @final_types;
801 foreach my $type (@types) {
802 # *** not sure what to do if user supplies both -source and -type
803 # where the type includes a source!
804 if ($type =~ /:/) {
805 push(@final_types, $type);
807 else {
808 foreach my $source (@sources) {
809 push(@final_types, $type.':'.$source);
813 $types = \@final_types;
815 else {
816 $types = [map { ':'.$_ } @sources];
819 if (defined($types)) {
820 # last argument is the name of the features table
821 my ($from,$where,$group,@a) = $self->_types_sql($types,'f');
822 push @from,$from if $from;
823 push @where,$where if $where;
824 push @group,$group if $group;
825 push @args,@a;
828 if (defined $attributes) {
829 # last argument is the join field
830 my ($from,$where,$group,@a) = $self->_attributes_sql($attributes,'f.id');
831 push @from,$from if $from;
832 push @where,$where if $where;
833 push @group,$group if $group;
834 push @args,@a;
837 if (defined $fromtable) {
838 # last argument is the join field
839 my ($from,$where,$group,@a) = $self->_from_table_sql($fromtable,'f.id');
840 push @from,$from if $from;
841 push @where,$where if $where;
842 push @group,$group if $group;
843 push @args,@a;
846 # if no other criteria are specified, then
847 # only fetch indexed (i.e. top level objects)
848 @where = 'indexed=1' unless @where;
850 my $from = join ', ',@from;
851 my $where = join ' AND ',map {"($_)"} @where;
852 my $group = join ', ',@group;
853 $group = "GROUP BY $group" if @group;
855 my $query = <<END;
856 SELECT f.id,f.object,f.typeid,f.seqid,f.start,f.end,f.strand
857 FROM $from
858 WHERE $where
859 $group
862 $self->_print_query($query,@args) if DEBUG || $self->debug;
864 my $sth = $self->_prepare($query) or $self->throw($self->dbh->errstr);
865 $sth->execute(@args) or $self->throw($sth->errstr);
866 return $iterator ? Bio::DB::SeqFeature::Store::DBI::Iterator->new($sth,$self) : $self->_sth2objs($sth);
869 sub _name_sql {
870 my $self = shift;
871 my ($name,$allow_aliases,$join) = @_;
872 my $name_table = $self->_name_table;
874 my $from = "$name_table as n";
875 my ($match,$string) = $self->_match_sql($name);
877 my $where = "n.id=$join AND n.name $match";
878 $where .= " AND n.display_name>0" unless $allow_aliases;
879 return ($from,$where,'',$string);
882 sub _search_attributes {
883 my $self = shift;
884 my ($search_string,$attribute_names,$limit) = @_;
885 my @words = map {quotemeta($_)} split /\s+/,$search_string;
887 my $name_table = $self->_name_table;
888 my $attribute_table = $self->_attribute_table;
889 my $attributelist_table = $self->_attributelist_table;
890 my $type_table = $self->_type_table;
891 my $typelist_table = $self->_typelist_table;
893 my @tags = @$attribute_names;
894 my $tag_sql = join ' OR ',("al.tag=?") x @tags;
896 my $perl_regexp = join '|',@words;
898 my $sql_regexp = join ' OR ',("a.attribute_value REGEXP ?") x @words;
899 my $sql = <<END;
900 SELECT name,attribute_value,tl.tag,n.id
901 FROM $name_table as n,$attribute_table as a,$attributelist_table as al,$type_table as t,$typelist_table as tl
902 WHERE n.id=a.id
903 AND al.id=a.attribute_id
904 AND n.id=t.id
905 AND t.typeid=tl.id
906 AND n.display_name=1
907 AND ($tag_sql)
908 AND ($sql_regexp)
910 $sql .= "LIMIT $limit" if defined $limit;
911 $self->_print_query($sql,@tags,@words) if DEBUG || $self->debug;
912 my $sth = $self->_prepare($sql);
913 $sth->execute(@tags,@words) or $self->throw($sth->errstr);
915 my @results;
916 while (my($name,$value,$type,$id) = $sth->fetchrow_array) {
917 my (@hits) = $value =~ /$perl_regexp/ig;
918 my @words_in_row = split /\b/,$value;
919 my $score = int(@hits * 10);
920 push @results,[$name,$value,$score,$type,$id];
922 $sth->finish;
923 @results = sort {$b->[2]<=>$a->[2]} @results;
924 return @results;
927 sub _match_sql {
928 my $self = shift;
929 my $name = shift;
931 my ($match,$string);
932 if ($name =~ /(?:^|[^\\])[*?]/) {
933 $name =~ s/(^|[^\\])([%_])/$1\\$2/g;
934 $name =~ s/(^|[^\\])\*/$1%/g;
935 $name =~ s/(^|[^\\])\?/$1_/g;
936 $match = "LIKE ?";
937 $string = $name;
938 } else {
939 $match = "= ?";
940 $string = $name;
942 return ($match,$string);
945 sub _from_table_sql {
946 my $self = shift;
947 my ($from_table,$join) = @_;
948 my $from = "$from_table as ft";
949 my $where = "ft.id=$join";
950 return ($from,$where,'');
953 sub _attributes_sql {
954 my $self = shift;
955 my ($attributes,$join) = @_;
957 my ($wf,@bind_args) = $self->_make_attribute_where('a','al',$attributes);
958 my ($group_by,@group_args)= $self->_make_attribute_group('a',$attributes);
960 my $attribute_table = $self->_attribute_table;
961 my $attributelist_table = $self->_attributelist_table;
963 my $from = "$attribute_table as a use index(attribute_id), $attributelist_table as al";
965 my $where = <<END;
966 a.id=$join
967 AND a.attribute_id=al.id
968 AND ($wf)
971 my $group = $group_by;
973 my @args = (@bind_args,@group_args);
974 return ($from,$where,$group,@args);
977 sub subfeature_types_are_indexed { 1 }
978 sub subfeature_locations_are_indexed { 1 }
980 sub _types_sql {
981 my $self = shift;
982 my ($types,$type_table) = @_;
983 my ($primary_tag,$source_tag);
985 my @types = ref $types eq 'ARRAY' ? @$types : $types;
987 my $typelist = $self->_typelist_table;
988 my $from = "$typelist AS tl";
990 my (@matches,@args);
992 for my $type (@types) {
994 if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
995 $primary_tag = $type->method;
996 $source_tag = $type->source;
997 } else {
998 ($primary_tag,$source_tag) = split ':',$type,2;
1001 if (defined $source_tag) {
1002 if (length($primary_tag)) {
1003 push @matches,"tl.tag=?";
1004 push @args,"$primary_tag:$source_tag";
1006 else {
1007 push @matches,"tl.tag LIKE ?";
1008 push @args,"%:$source_tag";
1010 } else {
1011 push @matches,"tl.tag LIKE ?";
1012 push @args,"$primary_tag:%";
1015 my $matches = join ' OR ',@matches;
1017 my $where = <<END;
1018 tl.id=$type_table.typeid
1019 AND ($matches)
1022 return ($from,$where,'',@args);
1025 sub _location_sql {
1026 my $self = shift;
1027 my ($seq_id,$start,$end,$range_type,$strand,$location) = @_;
1029 # the additional join on the location_list table badly impacts performance
1030 # so we build a copy of the table in memory
1031 my $seqid = $self->_locationid($seq_id) || 0; # zero is an invalid primary ID, so will return empty
1033 $start = MIN_INT unless defined $start;
1034 $end = MAX_INT unless defined $end;
1036 my ($bin_where,@bin_args) = $self->bin_where($start,$end,$location);
1038 my ($range,@range_args);
1039 if ($range_type eq 'overlaps') {
1040 $range = "$location.end>=? AND $location.start<=? AND ($bin_where)";
1041 @range_args = ($start,$end,@bin_args);
1042 } elsif ($range_type eq 'contains') {
1043 $range = "$location.start>=? AND $location.end<=? AND ($bin_where)";
1044 @range_args = ($start,$end,@bin_args);
1045 } elsif ($range_type eq 'contained_in') {
1046 $range = "$location.start<=? AND $location.end>=?";
1047 @range_args = ($start,$end);
1048 } else {
1049 $self->throw("range_type must be one of 'overlaps', 'contains' or 'contained_in'");
1052 if (defined $strand) {
1053 $range .= " AND strand=?";
1054 push @range_args,$strand;
1057 my $where = <<END;
1058 $location.seqid=?
1059 AND $range
1062 my $from = '';
1063 my $group = '';
1065 my @args = ($seqid,@range_args);
1066 return ($from,$where,$group,@args);
1070 # force reindexing
1072 sub reindex {
1073 my $self = shift;
1074 my $from_update_table = shift; # if present, will take ids from "update_table"
1076 my $dbh = $self->dbh;
1077 my $count = 0;
1078 my $now;
1080 # try to bring in highres time() function
1081 eval "require Time::HiRes";
1083 my $last_time = $self->time();
1085 # tell _delete_index() not to bother removing the index rows corresponding
1086 # to each individual feature
1087 local $self->{reindexing} = 1;
1089 $dbh->begin_work;
1090 eval {
1091 my $update = $from_update_table;
1092 for my $table ($self->index_tables) {
1093 my $query = $from_update_table ? "DELETE $table FROM $table,$update WHERE $table.id=$update.id"
1094 : "DELETE FROM $table";
1095 $dbh->do($query);
1096 $dbh->do("ALTER TABLE $table DISABLE KEYS");
1098 my $iterator = $self->get_seq_stream(-from_table=>$from_update_table ? $update : undef);
1099 while (my $f = $iterator->next_seq) {
1100 if (++$count %1000 == 0) {
1101 $now = $self->time();
1102 my $elapsed = sprintf(" in %5.2fs",$now - $last_time);
1103 $last_time = $now;
1104 print STDERR "$count features indexed$elapsed...",' 'x60;
1105 print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
1107 $self->_update_indexes($f);
1110 for my $table ($self->index_tables) {
1111 $dbh->do("ALTER TABLE $table ENABLE KEYS");
1113 if (@_) {
1114 warn "Couldn't complete transaction: $@";
1115 $dbh->rollback;
1116 return;
1117 } else {
1118 $dbh->commit;
1119 return 1;
1123 sub optimize {
1124 my $self = shift;
1125 $self->dbh->do("ANALYZE TABLE $_") foreach $self->index_tables;
1128 sub all_tables {
1129 my $self = shift;
1130 my @index_tables = $self->index_tables;
1131 my $feature_table = $self->_feature_table;
1132 return ($feature_table,@index_tables);
1135 sub index_tables {
1136 my $self = shift;
1137 return map {$self->_qualify($_)} qw(name attribute parent2child)
1140 sub _firstid {
1141 my $self = shift;
1142 my $features = $self->_feature_table;
1143 my $query = <<END;
1144 SELECT min(id) FROM $features
1146 my $sth=$self->_prepare($query);
1147 $sth->execute();
1148 my ($first) = $sth->fetchrow_array;
1149 $sth->finish;
1150 $first;
1153 sub _nextid {
1154 my $self = shift;
1155 my $lastkey = shift;
1156 my $features = $self->_feature_table;
1157 my $query = <<END;
1158 SELECT min(id) FROM $features WHERE id>?
1160 my $sth=$self->_prepare($query);
1161 $sth->execute($lastkey);
1162 my ($next) = $sth->fetchrow_array;
1163 $sth->finish;
1164 $next;
1167 sub _existsid {
1168 my $self = shift;
1169 my $key = shift;
1170 my $features = $self->_feature_table;
1171 my $query = <<END;
1172 SELECT count(*) FROM $features WHERE id=?
1174 my $sth=$self->_prepare($query);
1175 $sth->execute($key);
1176 my ($count) = $sth->fetchrow_array;
1177 $sth->finish;
1178 $count > 0;
1181 sub _deleteid {
1182 my $self = shift;
1183 my $key = shift;
1184 my $dbh = $self->dbh;
1185 my $child_table = $self->_parent2child_table;
1186 my $query = "SELECT child FROM $child_table WHERE id=?";
1187 my $sth=$self->_prepare($query);
1188 $sth->execute($key);
1189 my $success = 0;
1190 while (my ($cid) = $sth->fetchrow_array) {
1191 # Backcheck looking for multiple parents, delete only if one is present. I'm
1192 # sure there is a nice way to left join the parent2child table onto itself
1193 # to get this in one query above, just haven't worked it out yet...
1194 my $sth2 = $self->_prepare("SELECT count(id) FROM $child_table WHERE child=?");
1195 $sth2->execute($cid);
1196 my ($count) = $sth2->fetchrow_array;
1197 if ($count == 1) {
1198 $self->_deleteid($cid) || warn "An error occurred while removing subfeature id=$cid. Perhaps it was previously deleted?\n";
1201 for my $table ($self->all_tables) {
1202 $success += $dbh->do("DELETE FROM $table WHERE id=$key") || 0;
1204 return $success;
1207 sub _clearall {
1208 my $self = shift;
1209 my $dbh = $self->dbh;
1210 for my $table ($self->all_tables) {
1211 $dbh->do("DELETE FROM $table");
1215 sub _featurecount {
1216 my $self = shift;
1217 my $dbh = $self->dbh;
1218 my $features = $self->_feature_table;
1219 my $query = <<END;
1220 SELECT count(*) FROM $features
1222 my $sth=$self->_prepare($query);
1223 $sth->execute();
1224 my ($count) = $sth->fetchrow_array;
1225 $sth->finish;
1226 $count;
1229 sub _seq_ids {
1230 my $self = shift;
1231 my $dbh = $self->dbh;
1232 my $location = $self->_locationlist_table;
1233 my $sth = $self->_prepare("SELECT DISTINCT seqname FROM $location");
1234 $sth->execute() or $self->throw($sth->errstr);
1235 my @result;
1236 while (my ($id) = $sth->fetchrow_array) {
1237 push @result,$id;
1239 return @result;
1242 sub setting {
1243 my $self = shift;
1244 my ($variable_name,$value) = @_;
1245 my $meta = $self->_meta_table;
1247 if (defined $value && $self->writeable) {
1248 my $query = <<END;
1249 REPLACE INTO $meta (name,value) VALUES (?,?)
1251 my $sth = $self->_prepare($query);
1252 $sth->execute($variable_name,$value) or $self->throw($sth->errstr);
1253 $sth->finish;
1254 $self->{settings_cache}{$variable_name} = $value;
1256 else {
1257 return $self->{settings_cache}{$variable_name} if exists $self->{settings_cache}{$variable_name};
1258 my $query = <<END;
1259 SELECT value FROM $meta as m WHERE m.name=?
1261 my $sth = $self->_prepare($query);
1262 $sth->execute($variable_name) or $self->throw($sth->errstr);
1263 my ($value) = $sth->fetchrow_array;
1264 $sth->finish;
1265 return $self->{settings_cache}{$variable_name} = $value;
1270 # Replace Bio::SeqFeatureI into database.
1272 sub replace {
1273 my $self = shift;
1274 my $object = shift;
1275 my $index_flag = shift || undef;
1277 # ?? shouldn't need to do this
1278 # $self->_load_class($object);
1279 my $id = $object->primary_id;
1280 my $features = $self->_feature_table;
1282 my $sth = $self->_prepare(<<END);
1283 REPLACE INTO $features (id,object,indexed,seqid,start,end,strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?,?)
1286 my @location = $index_flag ? $self->_get_location_and_bin($object) : (undef)x6;
1288 my $primary_tag = $object->primary_tag;
1289 my $source_tag = $object->source_tag || '';
1290 $primary_tag .= ":$source_tag";
1291 my $typeid = $self->_typeid($primary_tag,1);
1293 my $frozen = $self->no_blobs() ? 0 : $self->freeze($object);
1295 $sth->execute($id,$frozen,$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
1297 my $dbh = $self->dbh;
1298 $object->primary_id($dbh->{mysql_insertid}) unless defined $id;
1300 $self->flag_for_indexing($dbh->{mysql_insertid}) if $self->{bulk_update_in_progress};
1303 # doesn't work with this schema, since we have to update name and attribute
1304 # tables which need object ids, which we can only know by replacing feats in
1305 # the feature table one by one
1306 sub bulk_replace {
1307 my $self = shift;
1308 my $index_flag = shift || undef;
1309 my @objects = @_;
1311 my $features = $self->_feature_table;
1313 my @insert_values;
1314 foreach my $object (@objects) {
1315 my $id = $object->primary_id;
1316 my @location = $index_flag ? $self->_get_location_and_bin($object) : (undef)x6;
1317 my $primary_tag = $object->primary_tag;
1318 my $source_tag = $object->source_tag || '';
1319 $primary_tag .= ":$source_tag";
1320 my $typeid = $self->_typeid($primary_tag,1);
1322 push(@insert_values, ($id,0,$index_flag||0,@location,$typeid));
1325 my @value_blocks;
1326 for (1..@objects) {
1327 push(@value_blocks, '(?,?,?,?,?,?,?,?,?,?)');
1329 my $value_blocks = join(',', @value_blocks);
1330 my $sql = qq{REPLACE INTO $features (id,object,indexed,seqid,start,end,strand,tier,bin,typeid) VALUES $value_blocks};
1332 my $sth = $self->_prepare($sql);
1333 $sth->execute(@insert_values) or $self->throw($sth->errstr);
1337 # Insert one Bio::SeqFeatureI into database. primary_id must be undef
1339 sub insert {
1340 my $self = shift;
1341 my $object = shift;
1342 my $index_flag = shift || 0;
1344 $self->_load_class($object);
1345 defined $object->primary_id and $self->throw("$object already has a primary id");
1347 my $features = $self->_feature_table;
1348 my $sth = $self->_prepare(<<END);
1349 INSERT INTO $features (id,object,indexed) VALUES (?,?,?)
1351 $sth->execute(undef,$self->freeze($object),$index_flag) or $self->throw($sth->errstr);
1352 my $dbh = $self->dbh;
1353 $object->primary_id($dbh->{mysql_insertid});
1354 $self->flag_for_indexing($dbh->{mysql_insertid}) if $self->{bulk_update_in_progress};
1357 =head2 types
1359 Title : types
1360 Usage : @type_list = $db->types
1361 Function: Get all the types in the database
1362 Returns : array of Bio::DB::GFF::Typename objects
1363 Args : none
1364 Status : public
1366 =cut
1368 sub types {
1369 my $self = shift;
1370 eval "require Bio::DB::GFF::Typename"
1371 unless Bio::DB::GFF::Typename->can('new');
1372 my $typelist_table = $self->_typelist_table;
1373 my $sql = <<END;
1374 SELECT tag from $typelist_table
1377 $self->_print_query($sql) if DEBUG || $self->debug;
1378 my $sth = $self->_prepare($sql);
1379 $sth->execute() or $self->throw($sth->errstr);
1381 my @results;
1382 while (my($tag) = $sth->fetchrow_array) {
1383 push @results,Bio::DB::GFF::Typename->new($tag);
1385 $sth->finish;
1386 return @results;
1390 # Insert a bit of DNA or protein into the database
1392 sub _insert_sequence {
1393 my $self = shift;
1394 my ($seqid,$seq,$offset) = @_;
1395 my $id = $self->_locationid($seqid);
1396 my $seqtable = $self->_sequence_table;
1397 my $sth = $self->_prepare(<<END);
1398 REPLACE INTO $seqtable (id,offset,sequence) VALUES (?,?,?)
1400 $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
1404 # This subroutine flags the given primary ID for later reindexing
1406 sub flag_for_indexing {
1407 my $self = shift;
1408 my $id = shift;
1409 my $needs_updating = $self->_update_table;
1410 my $sth = $self->_prepare("REPLACE INTO $needs_updating VALUES (?)");
1411 $sth->execute($id) or $self->throw($self->dbh->errstr);
1415 # Update indexes for given object
1417 sub _update_indexes {
1418 my $self = shift;
1419 my $obj = shift;
1420 defined (my $id = $obj->primary_id) or return;
1422 if ($self->{bulk_update_in_progress}) {
1423 $self->_dump_update_name_index($obj,$id);
1424 $self->_dump_update_attribute_index($obj,$id);
1425 } else {
1426 $self->_update_name_index($obj,$id);
1427 $self->_update_attribute_index($obj,$id);
1431 sub _update_name_index {
1432 my $self = shift;
1433 my ($obj,$id) = @_;
1434 my $name = $self->_name_table;
1435 my $primary_id = $obj->primary_id;
1437 $self->_delete_index($name,$id);
1438 my ($names,$aliases) = $self->feature_names($obj);
1440 my $sth = $self->_prepare("INSERT INTO $name (id,name,display_name) VALUES (?,?,?)");
1442 $sth->execute($id,$_,1) or $self->throw($sth->errstr) foreach @$names;
1443 $sth->execute($id,$_,0) or $self->throw($sth->errstr) foreach @$aliases;
1444 $sth->finish;
1447 sub _update_attribute_index {
1448 my $self = shift;
1449 my ($obj,$id) = @_;
1450 my $attribute = $self->_attribute_table;
1451 $self->_delete_index($attribute,$id);
1453 my $sth = $self->_prepare("INSERT INTO $attribute (id,attribute_id,attribute_value) VALUES (?,?,?)");
1454 for my $tag ($obj->get_all_tags) {
1455 my $tagid = $self->_attributeid($tag);
1456 for my $value ($obj->get_tag_values($tag)) {
1457 $sth->execute($id,$tagid,$value) or $self->throw($sth->errstr);
1460 $sth->finish;
1463 sub _genericid {
1464 my $self = shift;
1465 my ($table,$namefield,$name,$add_if_missing) = @_;
1466 my $qualified_table = $self->_qualify($table);
1467 my $sth = $self->_prepare(<<END);
1468 SELECT id FROM $qualified_table WHERE $namefield=?
1470 $sth->execute($name) or die $sth->errstr;
1471 my ($id) = $sth->fetchrow_array;
1472 $sth->finish;
1473 return $id if defined $id;
1474 return unless $add_if_missing;
1476 $sth = $self->_prepare(<<END);
1477 INSERT INTO $qualified_table ($namefield) VALUES (?)
1479 $sth->execute($name) or die $sth->errstr;
1480 my $dbh = $self->dbh;
1481 return $dbh->{mysql_insertid};
1484 sub _typeid {
1485 shift->_genericid('typelist','tag',shift,1);
1487 sub _locationid {
1488 shift->_genericid('locationlist','seqname',shift,1);
1490 sub _attributeid {
1491 shift->_genericid('attributelist','tag',shift,1);
1494 sub _get_location_and_bin {
1495 my $self = shift;
1496 my $feature = shift;
1497 my $seqid = $self->_locationid($feature->seq_id);
1498 my $start = $feature->start;
1499 my $end = $feature->end;
1500 my $strand = $feature->strand || 0;
1501 my ($tier,$bin) = $self->get_bin($start,$end);
1502 return ($seqid,$start,$end,$strand,$tier,$bin);
1505 sub get_bin {
1506 my $self = shift;
1507 my ($start,$end) = @_;
1508 my $binsize = MIN_BIN;
1509 my ($bin_start,$bin_end,$tier);
1510 $tier = 0;
1511 while (1) {
1512 $bin_start = int $start/$binsize;
1513 $bin_end = int $end/$binsize;
1514 last if $bin_start == $bin_end;
1515 $binsize *= 10;
1516 $tier++;
1518 return ($tier,$bin_start);
1521 sub bin_where {
1522 my $self = shift;
1523 my ($start,$end,$f) = @_;
1524 my (@bins,@args);
1526 my $tier = 0;
1527 my $binsize = MIN_BIN;
1528 while ($binsize <= MAX_BIN) {
1529 my $bin_start = int($start/$binsize);
1530 my $bin_end = int($end/$binsize);
1531 push @bins,"($f.tier=? AND $f.bin between ? AND ?)";
1532 push @args,($tier,$bin_start,$bin_end);
1533 $binsize *= 10;
1534 $tier++;
1536 my $query = join ("\n\t OR ",@bins);
1537 return wantarray ? ($query,@args) : substitute($query,@args);
1541 sub _delete_index {
1542 my $self = shift;
1543 my ($table_name,$id) = @_;
1544 return if $self->{reindexing};
1545 my $sth = $self->_prepare("DELETE FROM $table_name WHERE id=?") or $self->throw($self->dbh->errstr);
1546 $sth->execute($id);
1549 # given a statement handler that is expected to return rows of (id,object)
1550 # unthaw each object and return a list of 'em
1551 sub _sth2objs {
1552 my $self = shift;
1553 my $sth = shift;
1554 my @result;
1555 while (my ($id,$o,$typeid,$seqid,$start,$end,$strand) = $sth->fetchrow_array) {
1556 my $obj;
1557 if ($o eq '0') {
1558 # rebuild a new feat object from the data stored in the db
1559 $obj = $self->_rebuild_obj($id,$typeid,$seqid,$start,$end,$strand);
1561 else {
1562 $obj = $self->thaw($o,$id);
1565 push @result,$obj;
1567 $sth->finish;
1568 return @result;
1571 # given a statement handler that is expected to return rows of (id,object)
1572 # unthaw each object and return a list of 'em
1573 sub _sth2obj {
1574 my $self = shift;
1575 my $sth = shift;
1576 my ($id,$o,$typeid,$seqid,$start,$end,$strand) = $sth->fetchrow_array;
1577 return unless defined $o;
1578 my $obj;
1579 if ($o eq '0') { # I don't understand why an object ever needs to be rebuilt!
1580 # rebuild a new feat object from the data stored in the db
1581 $obj = $self->_rebuild_obj($id,$typeid,$seqid,$start,$end,$strand);
1583 else {
1584 $obj = $self->thaw($o,$id);
1587 $obj;
1590 sub _rebuild_obj {
1591 my ($self, $id, $typeid, $db_seqid, $start, $end, $strand) = @_;
1592 my ($type, $source, $seqid);
1594 # convert typeid to type and source
1595 if (exists $self->{_type_cache}->{$typeid}) {
1596 ($type, $source) = @{$self->{_type_cache}->{$typeid}};
1598 else {
1599 my $sql = qq{ SELECT `tag` FROM typelist WHERE `id` = ? };
1600 my $sth = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1601 $sth->execute($typeid);
1602 my $result;
1603 $sth->bind_columns(\$result);
1604 while ($sth->fetch()) {
1605 # there should be only one row returned, but we ensure to get all rows
1608 ($type, $source) = split(':', $result);
1609 $self->{_type_cache}->{$typeid} = [$type, $source];
1612 # convert the db seqid to the sequence name
1613 if (exists $self->{_seqid_cache}->{$db_seqid}) {
1614 $seqid = $self->{_seqid_cache}->{$db_seqid};
1616 else {
1617 my $sql = qq{ SELECT `seqname` FROM locationlist WHERE `id` = ? };
1618 my $sth = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1619 $sth->execute($db_seqid);
1620 $sth->bind_columns(\$seqid);
1621 while ($sth->fetch()) {
1622 # there should be only one row returned, but we ensure to get all rows
1625 $self->{_seqid_cache}->{$db_seqid} = $seqid;
1628 # get the names from name table?
1630 # get the attributes and store those in obj
1631 my $sql = qq{ SELECT attribute_id,attribute_value FROM attribute WHERE `id` = ? };
1632 my $sth = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1633 $sth->execute($id);
1634 my ($attribute_id, $attribute_value);
1635 $sth->bind_columns(\($attribute_id, $attribute_value));
1636 my %attribs;
1637 while ($sth->fetch()) {
1638 # convert the attribute_id to its real name
1639 my $attribute;
1640 if (exists $self->{_attribute_cache}->{$attribute_id}) {
1641 $attribute = $self->{_attribute_cache}->{$attribute_id};
1643 else {
1644 my $sql = qq{ SELECT `tag` FROM attributelist WHERE `id` = ? };
1645 my $sth2 = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1646 $sth2->execute($attribute_id);
1647 $sth2->bind_columns(\$attribute);
1648 while ($sth2->fetch()) {
1649 # there should be only one row returned, but we ensure to get all rows
1652 $self->{_attribute_cache}->{$attribute_id} = $attribute;
1655 if ($source && $attribute eq 'source' && $attribute_value eq $source) {
1656 next;
1659 $attribs{$attribute} = $attribute_value;
1662 my $obj = Bio::SeqFeature::Lite->new(-primary_id => $id,
1663 $type ? (-type => $type) : (),
1664 $source ? (-source => $source) : (),
1665 $seqid ? (-seq_id => $seqid) : (),
1666 defined $start ? (-start => $start) : (),
1667 defined $end ? (-end => $end) : (),
1668 defined $strand ? (-strand => $strand) : (),
1669 keys %attribs ? (-attributes => \%attribs) : ());
1671 return $obj;
1674 sub _prepare {
1675 my $self = shift;
1676 my $query = shift;
1677 my $dbh = $self->dbh;
1678 my $sth = $dbh->prepare_cached($query, {}, 3) or $self->throw($dbh->errstr);
1679 $sth;
1683 ####################################################################################################
1684 # SQL Fragment generators
1685 ####################################################################################################
1687 sub _feature_table { shift->_qualify('feature') }
1688 sub _location_table { shift->_qualify('location') }
1689 sub _locationlist_table { shift->_qualify('locationlist') }
1690 sub _type_table { shift->_qualify('feature') }
1691 sub _typelist_table { shift->_qualify('typelist') }
1692 sub _name_table { shift->_qualify('name') }
1693 sub _attribute_table { shift->_qualify('attribute')}
1694 sub _attributelist_table { shift->_qualify('attributelist')}
1695 sub _parent2child_table { shift->_qualify('parent2child')}
1696 sub _meta_table { shift->_qualify('meta')}
1697 sub _update_table { shift->_qualify('update_table')}
1698 sub _sequence_table { shift->_qualify('sequence')}
1700 sub _make_attribute_where {
1701 my $self = shift;
1702 my ($attributetable,$attributenametable,$attributes) = @_;
1703 my @args;
1704 my @sql;
1705 my $dbh = $self->dbh;
1706 foreach (keys %$attributes) {
1707 my @match_values;
1708 my @values = ref($attributes->{$_}) && ref($attributes->{$_}) eq 'ARRAY' ? @{$attributes->{$_}} : $attributes->{$_};
1709 foreach (@values) { # convert * into % for wildcard matches
1710 s/\*/%/g;
1712 my $match = join ' OR ',map {
1713 /%/ ? "$attributetable.attribute_value LIKE ?"
1714 : "$attributetable.attribute_value=?"
1715 } @values;
1716 push @sql,"($attributenametable.tag=? AND ($match))";
1717 push @args,($_,@values);
1719 return (join(' OR ',@sql),@args);
1722 sub _make_attribute_group {
1723 my $self = shift;
1724 my ($table_name,$attributes) = @_;
1725 my $key_count = keys %$attributes or return;
1726 return "f.id,f.object,f.typeid,f.seqid,f.start,f.end,f.strand HAVING count(f.id)>?",$key_count-1;
1729 sub _print_query {
1730 my $self = shift;
1731 my ($query,@args) = @_;
1732 while ($query =~ /\?/) {
1733 my $arg = $self->dbh->quote(shift @args);
1734 $query =~ s/\?/$arg/;
1736 warn $query,"\n";
1740 # special-purpose store for bulk loading - write to a file rather than to the db
1742 sub _dump_store {
1743 my $self = shift;
1744 my $indexed = shift;
1746 my $count = 0;
1747 my $store_fh = $self->dump_filehandle('feature');
1748 my $dbh = $self->dbh;
1750 my $autoindex = $self->autoindex;
1752 for my $obj (@_) {
1753 my $id = $self->next_id;
1754 my ($seqid,$start,$end,$strand,$tier,$bin) = $indexed ? $self->_get_location_and_bin($obj) : (undef)x6;
1755 my $primary_tag = $obj->primary_tag;
1756 my $source_tag = $obj->source_tag || '';
1757 $primary_tag .= ":$source_tag";
1758 my $typeid = $self->_typeid($primary_tag,1);
1760 print $store_fh join("\t",$id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$dbh->quote($self->freeze($obj))),"\n";
1761 $obj->primary_id($id);
1762 $self->_update_indexes($obj) if $indexed && $autoindex;
1763 $count++;
1766 # remember whether we are have ever stored a non-indexed feature
1767 unless ($indexed or $self->{indexed_flag}++) {
1768 $self->subfeatures_are_indexed(0);
1770 $count;
1773 sub _dump_add_SeqFeature {
1774 my $self = shift;
1775 my $parent = shift;
1776 my @children = @_;
1778 my $dbh = $self->dbh;
1779 my $fh = $self->dump_filehandle('parent2child');
1780 my $parent_id = (ref $parent ? $parent->primary_id : $parent)
1781 or $self->throw("$parent should have a primary_id");
1782 my $count = 0;
1784 for my $child_id (@children) {
1785 print $fh join("\t",$parent_id,$child_id),"\n";
1786 $count++;
1788 $count;
1791 sub _dump_update_name_index {
1792 my $self = shift;
1793 my ($obj,$id) = @_;
1794 my $fh = $self->dump_filehandle('name');
1795 my $dbh = $self->dbh;
1796 my ($names,$aliases) = $self->feature_names($obj);
1797 print $fh join("\t",$id,$dbh->quote($_),1),"\n" foreach @$names;
1798 print $fh join("\t",$id,$dbh->quote($_),0),"\n" foreach @$aliases;
1801 sub _dump_update_attribute_index {
1802 my $self = shift;
1803 my ($obj,$id) = @_;
1804 my $fh = $self->dump_filehandle('attribute');
1805 my $dbh = $self->dbh;
1806 for my $tag ($obj->all_tags) {
1807 my $tagid = $self->_attributeid($tag);
1808 for my $value ($obj->each_tag_value($tag)) {
1809 print $fh join("\t",$id,$tagid,$dbh->quote($value)),"\n";
1814 sub time {
1815 return Time::HiRes::time() if Time::HiRes->can('time');
1816 return time();
1819 sub DESTROY {
1820 my $self = shift;
1821 if ($self->{bulk_update_in_progress}) { # be sure to remove temp files
1822 for my $table ('feature',$self->index_tables) {
1823 my $path = $self->dump_path($table);
1824 unlink $path;