1 package Bio
::DB
::SeqFeature
::Store
::DBI
::mysql
;
5 Bio::DB::SeqFeature::Store::DBI::mysql -- Mysql implementation of Bio::DB::SeqFeature::Store
9 use Bio::DB::SeqFeature::Store;
11 # Open the sequence database
12 my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::mysql',
13 -dsn => 'dbi:mysql:test');
15 # get a feature from somewhere
16 my $feature = Bio::SeqFeature::Generic->new(...);
19 $db->store($feature) or die "Couldn't store!";
21 # primary ID of the feature is changed to indicate its primary ID
23 my $id = $feature->primary_id;
25 # get the feature back out
26 my $f = $db->fetch($id);
28 # change the feature and update it
30 $f->update($f) or die "Couldn't update!";
34 my @features = $db->fetch_many(@list_of_ids);
37 @features = $db->get_features_by_name('ZK909');
40 @features = $db->get_features_by_alias('sma-3');
43 @features = $db->get_features_by_name('gene');
46 @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
49 @features = $db->get_features_by_attribute({description => 'protein kinase'})
51 # ...by the GFF "Note" field
52 @result_list = $db->search_notes('kinase');
54 # ...by arbitrary combinations of selectors
55 @features = $db->features(-name => $name,
60 -attributes => $attributes);
62 # ...using an iterator
63 my $iterator = $db->get_seq_stream(-name => $name,
68 -attributes => $attributes);
70 while (my $feature = $iterator->next_seq) {
71 # do something with the feature
74 # ...limiting the search to a particular region
75 my $segment = $db->segment('Chr1',5000=>6000);
76 my @features = $segment->features(-type=>['mRNA','match']);
78 # getting & storing sequence information
79 # Warning: this returns a string, and not a PrimarySeq object
80 $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
81 my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
83 # what feature types are defined in the database?
84 my @types = $db->types;
86 # create a new feature in the database
87 my $feature = $db->new_feature(-primary_tag => 'mRNA',
94 Bio::DB::SeqFeature::Store::mysql is the Mysql adaptor for
95 Bio::DB::SeqFeature::Store. You will not create it directly, but
96 instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
98 See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
100 =head2 Using the Mysql adaptor
102 Before you can use the adaptor, you must use the mysqladmin tool to
103 create a database and establish a user account with write
104 permission. In order to use "fast" loading, the user account must have
107 To establish a connection to the database, call
108 Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::mysql',@more_args). The
109 additional arguments are as follows:
111 Argument name Description
112 ------------- -----------
114 -dsn The database name. You can abbreviate
115 "dbi:mysql:foo" as "foo" if you wish.
117 -user Username for authentication.
119 -pass Password for authentication.
121 -namespace A prefix to attach to each table. This allows you
122 to have several virtual databases in the same
125 -temp Boolean flag. If true, a temporary database
126 will be created and destroyed as soon as
127 the Store object goes out of scope. (synonym -temporary)
129 -autoindex Boolean flag. If true, features in the database will be
130 reindexed every time they change. This is the default.
133 -tmpdir Directory in which to place temporary files during "fast" loading.
134 Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
136 -dbi_options A hashref to pass to DBI->connect's 4th argument, the "attributes."
137 (synonyms -options, -dbi_attr)
139 -write Pass true to open database for writing or updating.
141 If successful, a new instance of
142 Bio::DB::SeqFeature::Store::DBI::mysql will be returned.
144 In addition to the standard methods supported by all well-behaved
145 Bio::DB::SeqFeature::Store databases, several following
146 adaptor-specific methods are provided. These are described in the next
153 use base
'Bio::DB::SeqFeature::Store';
154 use Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
;
158 use Bio
::DB
::GFF
::Util
::Rearrange
'rearrange';
159 use Bio
::SeqFeature
::Lite
;
161 use Carp
'carp','cluck','croak';
162 use constant DEBUG
=>0;
164 # from the MySQL documentation...
165 # WARNING: if your sequence uses coordinates greater than 2 GB, you are out of luck!
166 use constant MAX_INT
=> 2_147_483_647
;
167 use constant MIN_INT
=> -2_147_483_648
;
168 use constant MAX_BIN
=> 1_000_000_000
; # size of largest feature = 1 Gb
169 use constant MIN_BIN
=> 1000; # smallest bin we'll make - on a 100 Mb chromosome, there'll be 100,000 of these
170 use constant SUMMARY_BIN_SIZE
=> 1000;
172 # tier 0 == 1000 bp bins
173 # tier 1 == 10,000 bp bins
177 memoize
('_locationid');
178 memoize
('_attributeid');
179 memoize
('dump_path');
182 # object initialization
196 ) = rearrange
(['DSN',
197 ['TEMP','TEMPORARY'],
200 ['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
202 ['PASS','PASSWD','PASSWORD'],
203 ['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
204 ['WRITE','WRITEABLE'],
208 $writeable = 1 if $is_temporary or $dump_dir;
210 $dsn or $self->throw("Usage: ".__PACKAGE__
."->init(-dsn => \$dbh || \$dsn)");
216 $dsn = "dbi:mysql:$dsn" unless $dsn =~ /^dbi:/;
217 $dbh = DBI
->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr
);
218 $dbh->{mysql_auto_reconnect
} = 1;
221 $self->{is_temp
} = $is_temporary;
222 $self->{namespace
} = $namespace;
223 $self->{writeable
} = $writeable;
225 $self->default_settings;
226 $self->autoindex($autoindex) if defined $autoindex;
227 $self->dumpdir($dump_dir) if $dump_dir;
228 if ($self->is_temp) {
229 $self->init_tmp_database();
231 $self->init_database('erase');
235 sub writeable
{ shift->{writeable
} }
237 sub can_store_parentage
{ 1 }
239 sub table_definitions
{
244 id int(10) auto_increment primary key,
245 typeid int(10) not null,
249 strand tinyint default 0,
252 indexed tinyint default 1,
253 object MEDIUMBLOB not null,
254 index(seqid,tier,bin,typeid),
259 locationlist
=> <<END,
261 id int(10) auto_increment primary key,
262 seqname varchar(255) not null,
269 id int(10) auto_increment primary key,
270 tag varchar(255) not null,
277 name varchar(255) not null,
278 display_name tinyint default 0,
287 attribute_id int(10) not null,
288 attribute_value text,
290 index(attribute_id,attribute_value(10))
294 attributelist
=> <<END,
296 id int(10) auto_increment primary key,
297 tag varchar(255) not null,
301 parent2child
=> <<END,
304 child int(10) not null,
305 unique index(id,child)
311 name varchar(128) primary key,
312 value varchar(128) not null
318 offset int(10) unsigned not null,
320 primary key(id,offset)
323 interval_stats
=> <<END,
325 typeid integer not null,
326 seqid integer not null,
327 bin integer not null,
328 cum_count integer not null,
329 primary key(typeid,seqid,bin)
336 # default settings -- will create and populate meta table if needed
338 sub default_settings
{
340 $self->maybe_create_meta();
341 $self->SUPER::default_settings
;
343 $self->dumpdir(File
::Spec
->tmpdir);
348 # retrieve database handle
352 my $d = $self->{dbh
};
353 $self->{dbh
} = shift if @_;
359 $self->{dbh
}{InactiveDestroy
} = 1;
360 $self->{dbh
} = $self->{dbh
}->clone
361 unless $self->is_temp;
365 # get/set directory for bulk load tables
369 my $d = $self->{dumpdir
};
370 $self->{dumpdir
} = abs_path
(shift) if @_;
375 # table namespace (multiple dbs in one mysql db)
379 my $d = $self->{namespace
};
380 $self->{namespace
} = shift if @_;
385 # Required for Pg not mysql
387 sub remove_namespace
{
392 # find a path that corresponds to a dump table
396 my $table = $self->_qualify(shift);
397 return "$self->{dumpdir}/$table.$$";
401 # make a filehandle (writeable) that corresponds to a dump table
403 sub dump_filehandle
{
406 eval "require IO::File" unless IO
::File
->can('new');
407 my $path = $self->dump_path($table);
408 my $fh = $self->{filehandles
}{$path} ||= IO
::File
->new(">$path");
413 # find the next ID for a feature (used only during bulk loading)
417 $self->{max_id
} ||= $self->max_id;
418 return ++$self->{max_id
};
422 # find the maximum ID for a feature (used only during bulk loading)
426 my $features = $self->_feature_table;
427 my $sth = $self->_prepare("SELECT max(id) from $features");
428 $sth->execute or $self->throw($sth->errstr);
429 my ($id) = $sth->fetchrow_array;
434 # wipe database clean and reinstall schema
440 my $dbh = $self->dbh;
441 my $tables = $self->table_definitions;
443 for my $t (keys %$tables) {
444 next if $t eq 'meta'; # don't get rid of meta data!
445 my $table = $self->_qualify($t);
446 $dbh->do("DROP table IF EXISTS $table") if $erase;
447 my $query = "CREATE TABLE IF NOT EXISTS $table $tables->{$t}";
448 $self->_create_table($dbh,$query);
450 $self->subfeatures_are_indexed(1) if $erase;
454 sub init_tmp_database
{
457 my $dbh = $self->dbh;
458 my $tables = $self->table_definitions;
460 for my $t (keys %$tables) {
461 next if $t eq 'meta'; # done earlier
462 my $table = $self->_qualify($t);
463 my $query = "CREATE TEMPORARY TABLE $table $tables->{$t}";
464 $self->_create_table($dbh,$query);
471 my ($dbh,$query) = @_;
472 for my $q (split ';',$query) {
474 next unless $q =~ /\S/;
475 $dbh->do("$q;\n") or $self->throw($dbh->errstr);
479 sub maybe_create_meta
{
481 return unless $self->writeable;
482 my $meta = $self->_meta_table;
483 my $tables = $self->table_definitions;
484 my $temporary = $self->is_temp ?
'TEMPORARY' : '';
485 $self->dbh->do("CREATE $temporary TABLE IF NOT EXISTS $meta $tables->{meta}");
489 # use temporary tables
497 my $dbh = $self->dbh;
498 my $attributelist_table = $self->_attributelist_table;
500 my $a = $dbh->selectcol_arrayref("SELECT tag FROM $attributelist_table")
501 or $self->throw($dbh->errstr);
508 # special case for bulk updates
509 return $self->_dump_store(@_) if $self->{bulk_update_in_progress
};
514 my $autoindex = $self->autoindex;
516 my $dbh = $self->dbh;
517 local $dbh->{RaiseError
} = 1;
521 $self->replace($obj,$indexed);
522 $self->_update_indexes($obj) if $indexed && $autoindex;
528 warn "Transaction aborted because $@";
535 # remember whether we are have ever stored a non-indexed feature
536 unless ($indexed or $self->{indexed_flag
}++) {
537 $self->subfeatures_are_indexed(0);
542 # we memoize this in order to avoid making zillions of calls
546 # special case for bulk update -- need to build the indexes
547 # at the same time we build the main feature table
548 return 1 if $self->{bulk_update_in_progress
};
549 my $d = $self->setting('autoindex');
550 $self->setting(autoindex
=>shift) if @_;
554 sub _start_bulk_update
{
556 my $dbh = $self->dbh;
558 $self->{bulk_update_in_progress
}++;
561 sub _finish_bulk_update
{
563 my $dbh = $self->dbh;
564 my $dir = $self->{dumpdir
} || '.';
565 for my $table ($self->_feature_table,$self->index_tables) {
566 my $fh = $self->dump_filehandle($table);
567 my $path = $self->dump_path($table);
569 #print STDERR "$path\n";
571 $dbh->do("LOAD DATA LOCAL INFILE '$path' REPLACE INTO TABLE $table FIELDS OPTIONALLY ENCLOSED BY '\\''")
572 or $self->throw($dbh->errstr);
575 delete $self->{bulk_update_in_progress
};
576 delete $self->{ filehandles
};
582 # Add a subparts to a feature. Both feature and all subparts must already be in database.
584 sub _add_SeqFeature
{
587 # special purpose method for case when we are doing a bulk update
588 return $self->_dump_add_SeqFeature(@_) if $self->{bulk_update_in_progress
};
593 my $dbh = $self->dbh;
594 local $dbh->{RaiseError
} = 1;
596 my $parent2child = $self->_parent2child_table();
599 my $sth = $self->_prepare(<<END);
600 REPLACE INTO $parent2child (id,child) VALUES (?,?)
603 my $parent_id = (ref $parent ?
$parent->primary_id : $parent)
604 or $self->throw("$parent should have a primary_id");
606 $self->begin_work or $self->throw($dbh->errstr);
608 for my $child (@children) {
609 my $child_id = ref $child ?
$child->primary_id : $child;
610 defined $child_id or die "no primary ID known for $child";
611 $sth->execute($parent_id,$child_id);
617 warn "Transaction aborted because $@";
627 sub _fetch_SeqFeatures
{
632 my $parent_id = $parent->primary_id or $self->throw("$parent should have a primary_id");
633 my $features = $self->_feature_table;
634 my $parent2child = $self->_parent2child_table();
636 my @from = ("$features as f","$parent2child as c");
637 my @where = ('f.id=c.child','c.id=?');
638 my @args = $parent_id;
641 my ($from,$where,undef,@a) = $self->_types_sql(\
@types,'f');
642 push @from,$from if $from;
643 push @where,$where if $where;
647 my $from = join ', ',@from;
648 my $where = join ' AND ',@where;
656 $self->_print_query($query,@args) if DEBUG
|| $self->debug;
658 my $sth = $self->_prepare($query) or $self->throw($self->dbh->errstr);
660 $sth->execute(@args) or $self->throw($sth->errstr);
661 return $self->_sth2objs($sth);
665 # get primary sequence between start and end
667 sub _fetch_sequence
{
669 my ($seqid,$start,$end) = @_;
671 # backward compatibility to the old days when I liked reverse complementing
672 # dna by specifying $start > $end
674 if (defined $start && defined $end && $start > $end) {
676 ($start,$end) = ($end,$start);
678 $start-- if defined $start;
679 $end-- if defined $end;
681 my $id = $self->_locationid($seqid);
682 my $offset1 = $self->_offset_boundary($id,$start || 'left');
683 my $offset2 = $self->_offset_boundary($id,$end || 'right');
684 my $sequence_table = $self->_sequence_table;
687 SELECT sequence,offset
688 FROM $sequence_table as s
695 my $sth = $self->_prepare($sql);
697 $self->_print_query($sql,$id,$offset1,$offset2) if DEBUG
|| $self->debug;
698 $sth->execute($id,$offset1,$offset2) or $self->throw($sth->errstr);
700 while (my($frag,$offset) = $sth->fetchrow_array) {
701 substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
704 substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
707 $seq =~ tr/gatcGATC/ctagCTAG/;
713 sub _offset_boundary
{
715 my ($seqid,$position) = @_;
717 my $sequence_table = $self->_sequence_table;
718 my $locationlist_table = $self->_locationlist_table;
721 $sql = $position eq 'left' ?
"SELECT min(offset) FROM $sequence_table as s WHERE s.id=?"
722 :$position eq 'right' ?
"SELECT max(offset) FROM $sequence_table as s WHERE s.id=?"
723 :"SELECT max(offset) FROM $sequence_table as s WHERE s.id=? AND offset<=?";
724 my $sth = $self->_prepare($sql);
725 my @args = $position =~ /^-?\d+$/ ?
($seqid,$position) : ($seqid);
726 $self->_print_query($sql,@args) if DEBUG
|| $self->debug;
727 $sth->execute(@args) or $self->throw($sth->errstr);
728 my $boundary = $sth->fetchall_arrayref->[0][0];
735 # add namespace to tablename
739 my $table_name = shift;
740 my $namespace = $self->namespace;
741 return $table_name if (!defined $namespace ||
742 # is namespace already present in table name?
743 index($table_name, $namespace) == 0);
744 return "${namespace}_${table_name}";
748 # Fetch a Bio::SeqFeatureI from database using its primary_id
752 @_ or $self->throw("usage: fetch(\$primary_id)");
753 my $primary_id = shift;
754 my $features = $self->_feature_table;
755 my $sth = $self->_prepare(<<END);
756 SELECT id,object FROM $features WHERE id=?
758 $sth->execute($primary_id) or $self->throw($sth->errstr);
759 my $obj = $self->_sth2obj($sth);
765 # Efficiently fetch a series of IDs from the database
766 # Can pass an array or an array ref
770 @_ or $self->throw('usage: fetch_many($id1,$id2,$id3...)');
771 my $ids = join ',',map {ref($_) ? @
$_ : $_} @_ or return;
772 my $features = $self->_feature_table;
774 my $sth = $self->_prepare(<<END);
775 SELECT id,object FROM $features WHERE id IN ($ids)
777 $sth->execute() or $self->throw($sth->errstr);
778 return $self->_sth2objs($sth);
783 my ($seq_id,$start,$end,$strand,
784 $name,$class,$allow_aliases,
791 ) = rearrange
([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
792 'NAME','CLASS','ALIASES',
793 ['TYPES','TYPE','PRIMARY_TAG'],
794 ['ATTRIBUTES','ATTRIBUTE'],
798 ['SOURCE','SOURCES'],
801 my (@from,@where,@args,@group);
802 $range_type ||= 'overlaps';
804 my $features = $self->_feature_table;
805 @from = "$features as f";
808 # hacky backward compatibility workaround
809 undef $class if $class && $class eq 'Sequence';
810 $name = "$class:$name" if defined $class && length $class > 0;
811 # last argument is the join field
812 my ($from,$where,$group,@a) = $self->_name_sql($name,$allow_aliases,'f.id');
813 push @from,$from if $from;
814 push @where,$where if $where;
815 push @group,$group if $group;
819 if (defined $seq_id) {
820 # last argument is the name of the features table
821 my ($from,$where,$group,@a) = $self->_location_sql($seq_id,$start,$end,$range_type,$strand,'f');
822 push @from,$from if $from;
823 push @where,$where if $where;
824 push @group,$group if $group;
828 if (defined($sources)) {
829 my @sources = ref($sources) eq 'ARRAY' ? @
{$sources} : ($sources);
830 if (defined($types)) {
831 my @types = ref($types) eq 'ARRAY' ? @
{$types} : ($types);
833 foreach my $type (@types) {
834 # *** not sure what to do if user supplies both -source and -type
835 # where the type includes a source!
837 push(@final_types, $type);
840 foreach my $source (@sources) {
841 push(@final_types, $type.':'.$source);
845 $types = \
@final_types;
848 $types = [map { ':'.$_ } @sources];
852 if (defined($types)) {
853 # last argument is the name of the features table
854 my ($from,$where,$group,@a) = $self->_types_sql($types,'f');
855 push @from,$from if $from;
856 push @where,$where if $where;
857 push @group,$group if $group;
861 if (defined $attributes) {
862 # last argument is the join field
863 my ($from,$where,$group,@a) = $self->_attributes_sql($attributes,'f.id');
864 push @from,$from if $from;
865 push @where,$where if $where;
866 push @group,$group if $group;
870 if (defined $fromtable) {
871 # last argument is the join field
872 my ($from,$where,$group,@a) = $self->_from_table_sql($fromtable,'f.id');
873 push @from,$from if $from;
874 push @where,$where if $where;
875 push @group,$group if $group;
879 # if no other criteria are specified, then
880 # only fetch indexed (i.e. top level objects)
881 @where = 'indexed=1' unless @where;
883 my $from = join ', ',@from;
884 my $where = join ' AND ',map {"($_)"} @where;
885 my $group = join ', ',@group;
886 $group = "GROUP BY $group" if @group;
889 SELECT f.id,f.object,f.typeid,f.seqid,f.start,f.end,f.strand
895 $self->_print_query($query,@args) if DEBUG
|| $self->debug;
897 my $sth = $self->_prepare($query) or $self->throw($self->dbh->errstr);
898 $sth->execute(@args) or $self->throw($sth->errstr);
899 return $iterator ? Bio
::DB
::SeqFeature
::Store
::DBI
::Iterator
->new($sth,$self) : $self->_sth2objs($sth);
902 sub _aggregate_bins
{
905 my (%types,$binsize,$binstart);
906 while (my ($type,$seqname,$bin,$count,$bins,$start,$end) = $sth->fetchrow_array) {
907 $binsize ||= ($end-$start+1)/$bins;
908 $binstart ||= int($start/$binsize);
909 $types{$type}{seqname
} ||= $seqname;
910 $types{$type}{min
} ||= $start;
911 $types{$type}{max
} ||= $end;
912 $types{$type}{bins
} ||= [(0) x
$bins];
913 $types{$type}{bins
}[$bin-$binstart] = $count;
914 $types{$type}{count
} += $count;
917 for my $type (keys %types) {
918 my $min = $types{$type}{min
};
919 my $max = $types{$type}{max
};
920 my $seqid= $types{$type}{seqname
};
921 my $f = Bio
::SeqFeature
::Lite
->new(-seq_id
=> $seqid,
924 -type
=> "$type:bins",
925 -score
=> $types{$type}{count
},
926 -attributes
=> {coverage
=> join ',',@
{$types{$type}{bins
}}});
934 my ($name,$allow_aliases,$join) = @_;
935 my $name_table = $self->_name_table;
937 my $from = "$name_table as n";
938 my ($match,$string) = $self->_match_sql($name);
940 my $where = "n.id=$join AND n.name $match";
941 $where .= " AND n.display_name>0" unless $allow_aliases;
942 return ($from,$where,'',$string);
945 sub _search_attributes
{
947 my ($search_string,$attribute_names,$limit) = @_;
948 my @words = map {quotemeta($_)} split /\s+/,$search_string;
950 my $name_table = $self->_name_table;
951 my $attribute_table = $self->_attribute_table;
952 my $attributelist_table = $self->_attributelist_table;
953 my $type_table = $self->_type_table;
954 my $typelist_table = $self->_typelist_table;
956 my @tags = @
$attribute_names;
957 my $tag_sql = join ' OR ',("al.tag=?") x
@tags;
959 my $perl_regexp = join '|',@words;
961 my $sql_regexp = join ' OR ',("a.attribute_value REGEXP ?") x
@words;
963 SELECT name,attribute_value,tl.tag,n.id
964 FROM $name_table as n,$attribute_table as a,$attributelist_table as al,$type_table as t,$typelist_table as tl
966 AND al.id=a.attribute_id
973 $sql .= "LIMIT $limit" if defined $limit;
974 $self->_print_query($sql,@tags,@words) if DEBUG
|| $self->debug;
975 my $sth = $self->_prepare($sql);
976 $sth->execute(@tags,@words) or $self->throw($sth->errstr);
979 while (my($name,$value,$type,$id) = $sth->fetchrow_array) {
980 my (@hits) = $value =~ /$perl_regexp/ig;
981 my @words_in_row = split /\b/,$value;
982 my $score = int(@hits * 10);
983 push @results,[$name,$value,$score,$type,$id];
986 @results = sort {$b->[2]<=>$a->[2]} @results;
995 if ($name =~ /(?:^|[^\\])[*?]/) {
996 $name =~ s/(^|[^\\])([%_])/$1\\$2/g;
997 $name =~ s/(^|[^\\])\*/$1%/g;
998 $name =~ s/(^|[^\\])\?/$1_/g;
1005 return ($match,$string);
1008 sub _from_table_sql
{
1010 my ($from_table,$join) = @_;
1011 my $from = "$from_table as ft";
1012 my $where = "ft.id=$join";
1013 return ($from,$where,'');
1016 sub _attributes_sql
{
1018 my ($attributes,$join) = @_;
1020 my ($wf,@bind_args) = $self->_make_attribute_where('a','al',$attributes);
1021 my ($group_by,@group_args)= $self->_make_attribute_group('a',$attributes);
1023 my $attribute_table = $self->_attribute_table;
1024 my $attributelist_table = $self->_attributelist_table;
1026 my $from = "$attribute_table as a use index(attribute_id), $attributelist_table as al";
1030 AND a.attribute_id=al.id
1034 my $group = $group_by;
1036 my @args = (@bind_args,@group_args);
1037 return ($from,$where,$group,@args);
1040 sub subfeature_types_are_indexed
{ 1 }
1041 sub subfeature_locations_are_indexed
{ 1 }
1045 my ($types,$type_table) = @_;
1046 my ($primary_tag,$source_tag);
1048 my @types = ref $types eq 'ARRAY' ? @
$types : $types;
1050 my $typelist = $self->_typelist_table;
1051 my $from = "$typelist AS tl";
1053 my (@matches,@args);
1055 for my $type (@types) {
1057 if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
1058 $primary_tag = $type->method;
1059 $source_tag = $type->source;
1061 ($primary_tag,$source_tag) = split ':',$type,2;
1064 if (defined $source_tag && length $source_tag) {
1065 if (defined $primary_tag && length($primary_tag)) {
1066 push @matches,"tl.tag=?";
1067 push @args,"$primary_tag:$source_tag";
1070 push @matches,"tl.tag LIKE ?";
1071 push @args,"%:$source_tag";
1074 push @matches,"tl.tag LIKE ?";
1075 push @args,"$primary_tag:%";
1078 my $matches = join ' OR ',@matches;
1081 tl.id=$type_table.typeid
1085 return ($from,$where,'',@args);
1090 my ($seq_id,$start,$end,$range_type,$strand,$location) = @_;
1092 # the additional join on the location_list table badly impacts performance
1093 # so we build a copy of the table in memory
1094 my $seqid = $self->_locationid_nocreate($seq_id) || 0; # zero is an invalid primary ID, so will return empty
1096 $start = MIN_INT
unless defined $start;
1097 $end = MAX_INT
unless defined $end;
1099 my ($bin_where,@bin_args) = $self->bin_where($start,$end,$location);
1101 my ($range,@range_args);
1102 if ($range_type eq 'overlaps') {
1103 $range = "$location.end>=? AND $location.start<=? AND ($bin_where)";
1104 @range_args = ($start,$end,@bin_args);
1105 } elsif ($range_type eq 'contains') {
1106 $range = "$location.start>=? AND $location.end<=? AND ($bin_where)";
1107 @range_args = ($start,$end,@bin_args);
1108 } elsif ($range_type eq 'contained_in') {
1109 $range = "$location.start<=? AND $location.end>=?";
1110 @range_args = ($start,$end);
1112 $self->throw("range_type must be one of 'overlaps', 'contains' or 'contained_in'");
1115 if (defined $strand) {
1116 $range .= " AND strand=?";
1117 push @range_args,$strand;
1128 my @args = ($seqid,@range_args);
1129 return ($from,$where,$group,@args);
1137 my $from_update_table = shift; # if present, will take ids from "update_table"
1139 my $dbh = $self->dbh;
1143 # try to bring in highres time() function
1144 eval "require Time::HiRes";
1146 my $last_time = $self->time();
1148 # tell _delete_index() not to bother removing the index rows corresponding
1149 # to each individual feature
1150 local $self->{reindexing
} = 1;
1154 my $update = $from_update_table;
1155 for my $table ($self->index_tables) {
1156 my $query = $from_update_table ?
"DELETE $table FROM $table,$update WHERE $table.id=$update.id"
1157 : "DELETE FROM $table";
1159 $self->_disable_keys($dbh,$table);
1161 my $iterator = $self->get_seq_stream(-from_table
=>$from_update_table ?
$update : undef);
1162 while (my $f = $iterator->next_seq) {
1163 if (++$count %1000 == 0) {
1164 $now = $self->time();
1165 my $elapsed = sprintf(" in %5.2fs",$now - $last_time);
1167 print STDERR
"$count features indexed$elapsed...",' 'x60
;
1168 print STDERR
-t STDOUT
&& !$ENV{EMACS
} ?
"\r" : "\n";
1170 $self->_update_indexes($f);
1173 for my $table ($self->index_tables) {
1174 $self->_enable_keys($dbh,$table);
1177 warn "Couldn't complete transaction: $@";
1188 $self->dbh->do("ANALYZE TABLE $_") foreach $self->index_tables;
1193 my @index_tables = $self->index_tables;
1194 my $features = $self->_feature_table;
1195 return ($features,@index_tables);
1200 return map {$self->_qualify($_)} qw(name attribute parent2child)
1205 my $features = $self->_feature_table;
1207 SELECT min(id) FROM $features
1209 my $sth=$self->_prepare($query);
1211 my ($first) = $sth->fetchrow_array;
1218 my $lastkey = shift;
1219 my $features = $self->_feature_table;
1221 SELECT min(id) FROM $features WHERE id>?
1223 my $sth=$self->_prepare($query);
1224 $sth->execute($lastkey);
1225 my ($next) = $sth->fetchrow_array;
1233 my $features = $self->_feature_table;
1235 SELECT count(*) FROM $features WHERE id=?
1237 my $sth=$self->_prepare($query);
1238 $sth->execute($key);
1239 my ($count) = $sth->fetchrow_array;
1247 my $dbh = $self->dbh;
1248 my $parent2child = $self->_parent2child_table;
1249 my $query = "SELECT child FROM $parent2child WHERE id=?";
1250 my $sth=$self->_prepare($query);
1251 $sth->execute($key);
1253 while (my ($cid) = $sth->fetchrow_array) {
1254 # Backcheck looking for multiple parents, delete only if one is present. I'm
1255 # sure there is a nice way to left join the parent2child table onto itself
1256 # to get this in one query above, just haven't worked it out yet...
1257 my $sth2 = $self->_prepare("SELECT count(id) FROM $parent2child WHERE child=?");
1258 $sth2->execute($cid);
1259 my ($count) = $sth2->fetchrow_array;
1261 $self->_deleteid($cid) || warn "An error occurred while removing subfeature id=$cid. Perhaps it was previously deleted?\n";
1264 for my $table ($self->all_tables) {
1265 $success += $dbh->do("DELETE FROM $table WHERE id=$key") || 0;
1272 my $dbh = $self->dbh;
1273 for my $table ($self->all_tables) {
1274 $dbh->do("DELETE FROM $table");
1280 my $dbh = $self->dbh;
1281 my $features = $self->_feature_table;
1283 SELECT count(*) FROM $features
1285 my $sth=$self->_prepare($query);
1287 my ($count) = $sth->fetchrow_array;
1294 my $dbh = $self->dbh;
1295 my $location = $self->_locationlist_table;
1296 my $sth = $self->_prepare("SELECT DISTINCT seqname FROM $location");
1297 $sth->execute() or $self->throw($sth->errstr);
1299 while (my ($id) = $sth->fetchrow_array) {
1307 my ($variable_name,$value) = @_;
1308 my $meta = $self->_meta_table;
1310 if (defined $value && $self->writeable) {
1312 REPLACE INTO $meta (name,value) VALUES (?,?)
1314 my $sth = $self->_prepare($query);
1315 $sth->execute($variable_name,$value) or $self->throw($sth->errstr);
1317 $self->{settings_cache
}{$variable_name} = $value;
1320 return $self->{settings_cache
}{$variable_name} if exists $self->{settings_cache
}{$variable_name};
1322 SELECT value FROM $meta as m WHERE m.name=?
1324 my $sth = $self->_prepare($query);
1325 $sth->execute($variable_name) or $self->throw($sth->errstr);
1326 my ($value) = $sth->fetchrow_array;
1328 return $self->{settings_cache
}{$variable_name} = $value;
1333 # Replace Bio::SeqFeatureI into database.
1338 my $index_flag = shift || undef;
1340 # ?? shouldn't need to do this
1341 # $self->_load_class($object);
1342 my $id = $object->primary_id;
1343 my $features = $self->_feature_table;
1345 my $sth = $self->_prepare(<<END);
1346 REPLACE INTO $features (id,object,indexed,seqid,start,end,strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?,?)
1349 my @location = $index_flag ?
$self->_get_location_and_bin($object) : (undef)x6
;
1351 my $primary_tag = $object->primary_tag;
1352 my $source_tag = $object->source_tag || '';
1353 $primary_tag .= ":$source_tag";
1354 my $typeid = $self->_typeid($primary_tag,1);
1356 my $frozen = $self->no_blobs() ?
0 : $self->freeze($object);
1358 $sth->execute($id,$frozen,$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
1360 my $dbh = $self->dbh;
1361 $object->primary_id($dbh->{mysql_insertid
}) unless defined $id;
1363 $self->flag_for_indexing($dbh->{mysql_insertid
}) if $self->{bulk_update_in_progress
};
1366 # doesn't work with this schema, since we have to update name and attribute
1367 # tables which need object ids, which we can only know by replacing feats in
1368 # the feature table one by one
1371 my $index_flag = shift || undef;
1374 my $features = $self->_feature_table;
1377 foreach my $object (@objects) {
1378 my $id = $object->primary_id;
1379 my @location = $index_flag ?
$self->_get_location_and_bin($object) : (undef)x6
;
1380 my $primary_tag = $object->primary_tag;
1381 my $source_tag = $object->source_tag || '';
1382 $primary_tag .= ":$source_tag";
1383 my $typeid = $self->_typeid($primary_tag,1);
1385 push(@insert_values, ($id,0,$index_flag||0,@location,$typeid));
1390 push(@value_blocks, '(?,?,?,?,?,?,?,?,?,?)');
1392 my $value_blocks = join(',', @value_blocks);
1393 my $sql = qq{REPLACE INTO
$features (id
,object
,indexed
,seqid
,start
,end
,strand
,tier
,bin
,typeid
) VALUES
$value_blocks};
1395 my $sth = $self->_prepare($sql);
1396 $sth->execute(@insert_values) or $self->throw($sth->errstr);
1400 # Insert one Bio::SeqFeatureI into database. primary_id must be undef
1405 my $index_flag = shift || 0;
1407 $self->_load_class($object);
1408 defined $object->primary_id and $self->throw("$object already has a primary id");
1410 my $features = $self->_feature_table;
1411 my $sth = $self->_prepare(<<END);
1412 INSERT INTO $features (id,object,indexed) VALUES (?,?,?)
1414 $sth->execute(undef,$self->freeze($object),$index_flag) or $self->throw($sth->errstr);
1415 my $dbh = $self->dbh;
1416 $object->primary_id($dbh->{mysql_insertid
});
1417 $self->flag_for_indexing($dbh->{mysql_insertid
}) if $self->{bulk_update_in_progress
};
1423 Usage : @type_list = $db->types
1424 Function: Get all the types in the database
1425 Returns : array of Bio::DB::GFF::Typename objects
1433 eval "require Bio::DB::GFF::Typename"
1434 unless Bio
::DB
::GFF
::Typename
->can('new');
1435 my $typelist = $self->_typelist_table;
1437 SELECT tag from $typelist
1440 $self->_print_query($sql) if DEBUG
|| $self->debug;
1441 my $sth = $self->_prepare($sql);
1442 $sth->execute() or $self->throw($sth->errstr);
1445 while (my($tag) = $sth->fetchrow_array) {
1446 push @results,Bio
::DB
::GFF
::Typename
->new($tag);
1452 =head2 toplevel_types
1454 Title : toplevel_types
1455 Usage : @type_list = $db->toplevel_types
1456 Function: Get the toplevel types in the database
1457 Returns : array of Bio::DB::GFF::Typename objects
1461 This is similar to types() but only returns the types of
1462 INDEXED (toplevel) features.
1466 sub toplevel_types
{
1468 eval "require Bio::DB::GFF::Typename"
1469 unless Bio
::DB
::GFF
::Typename
->can('new');
1470 my $typelist = $self->_typelist_table;
1471 my $features = $self->_feature_table;
1473 SELECT distinct(tag) from $typelist as tl,$features as f
1474 WHERE tl.id=f.typeid
1478 $self->_print_query($sql) if DEBUG
|| $self->debug;
1479 my $sth = $self->_prepare($sql);
1480 $sth->execute() or $self->throw($sth->errstr);
1483 while (my($tag) = $sth->fetchrow_array) {
1484 push @results,Bio
::DB
::GFF
::Typename
->new($tag);
1491 # Insert a bit of DNA or protein into the database
1493 sub _insert_sequence
{
1495 my ($seqid,$seq,$offset) = @_;
1496 my $id = $self->_locationid($seqid);
1497 my $sequence = $self->_sequence_table;
1498 my $sth = $self->_prepare(<<END);
1499 REPLACE INTO $sequence (id,offset,sequence) VALUES (?,?,?)
1501 $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
1505 # This subroutine flags the given primary ID for later reindexing
1507 sub flag_for_indexing
{
1510 my $needs_updating = $self->_update_table;
1511 my $sth = $self->_prepare("REPLACE INTO $needs_updating VALUES (?)");
1512 $sth->execute($id) or $self->throw($self->dbh->errstr);
1516 # Update indexes for given object
1518 sub _update_indexes
{
1521 defined (my $id = $obj->primary_id) or return;
1523 if ($self->{bulk_update_in_progress
}) {
1524 $self->_dump_update_name_index($obj,$id);
1525 $self->_dump_update_attribute_index($obj,$id);
1527 $self->_update_name_index($obj,$id);
1528 $self->_update_attribute_index($obj,$id);
1532 sub _update_name_index
{
1535 my $name = $self->_name_table;
1536 my $primary_id = $obj->primary_id;
1538 $self->_delete_index($name,$id);
1539 my ($names,$aliases) = $self->feature_names($obj);
1541 my $sth = $self->_prepare("INSERT INTO $name (id,name,display_name) VALUES (?,?,?)");
1543 $sth->execute($id,$_,1) or $self->throw($sth->errstr) foreach @
$names;
1544 $sth->execute($id,$_,0) or $self->throw($sth->errstr) foreach @
$aliases;
1548 sub _update_attribute_index
{
1551 my $attribute = $self->_attribute_table;
1552 $self->_delete_index($attribute,$id);
1554 my $sth = $self->_prepare("INSERT INTO $attribute (id,attribute_id,attribute_value) VALUES (?,?,?)");
1555 for my $tag ($obj->get_all_tags) {
1556 my $tagid = $self->_attributeid($tag);
1557 for my $value ($obj->get_tag_values($tag)) {
1558 $sth->execute($id,$tagid,$value) or $self->throw($sth->errstr);
1566 my ($table,$namefield,$name,$add_if_missing) = @_;
1567 my $qualified_table = $self->_qualify($table);
1568 my $sth = $self->_prepare(<<END);
1569 SELECT id FROM $qualified_table WHERE $namefield=?
1571 $sth->execute($name) or die $sth->errstr;
1572 my ($id) = $sth->fetchrow_array;
1574 return $id if defined $id;
1575 return unless $add_if_missing;
1577 $sth = $self->_prepare(<<END);
1578 INSERT INTO $qualified_table ($namefield) VALUES (?)
1580 $sth->execute($name) or die $sth->errstr;
1581 my $dbh = $self->dbh;
1582 return $dbh->{mysql_insertid
};
1586 shift->_genericid('typelist','tag',shift,1);
1589 shift->_genericid('locationlist','seqname',shift,1);
1591 sub _locationid_nocreate
{
1592 shift->_genericid('locationlist','seqname',shift,0);
1595 shift->_genericid('attributelist','tag',shift,1);
1598 sub _get_location_and_bin
{
1600 my $feature = shift;
1601 my $seqid = $self->_locationid($feature->seq_id||'');
1602 my $start = $feature->start;
1603 my $end = $feature->end;
1604 my $strand = $feature->strand || 0;
1605 my ($tier,$bin) = $self->get_bin($start,$end);
1606 return ($seqid,$start,$end,$strand,$tier,$bin);
1611 my ($start,$end) = @_;
1612 my $binsize = MIN_BIN
;
1613 my ($bin_start,$bin_end,$tier);
1616 $bin_start = int $start/$binsize;
1617 $bin_end = int $end/$binsize;
1618 last if $bin_start == $bin_end;
1622 return ($tier,$bin_start);
1627 my ($start,$end,$f) = @_;
1631 my $binsize = MIN_BIN
;
1632 while ($binsize <= MAX_BIN
) {
1633 my $bin_start = int($start/$binsize);
1634 my $bin_end = int($end/$binsize);
1635 push @bins,"($f.tier=? AND $f.bin between ? AND ?)";
1636 push @args,($tier,$bin_start,$bin_end);
1640 my $query = join ("\n\t OR ",@bins);
1641 return wantarray ?
($query,@args) : substitute
($query,@args);
1646 my ($table_name,$id) = @_;
1647 return if $self->{reindexing
};
1648 my $sth = $self->_prepare("DELETE FROM $table_name WHERE id=?") or $self->throw($self->dbh->errstr);
1652 # given a statement handler that is expected to return rows of (id,object)
1653 # unthaw each object and return a list of 'em
1658 while (my ($id,$o,$typeid,$seqid,$start,$end,$strand) = $sth->fetchrow_array) {
1661 # rebuild a new feat object from the data stored in the db
1662 $obj = $self->_rebuild_obj($id,$typeid,$seqid,$start,$end,$strand);
1665 $obj = $self->thaw($o,$id);
1674 # given a statement handler that is expected to return rows of (id,object)
1675 # unthaw each object and return a list of 'em
1679 my ($id,$o,$typeid,$seqid,$start,$end,$strand) = $sth->fetchrow_array;
1680 return unless defined $o;
1682 if ($o eq '0') { # I don't understand why an object ever needs to be rebuilt!
1683 # rebuild a new feat object from the data stored in the db
1684 $obj = $self->_rebuild_obj($id,$typeid,$seqid,$start,$end,$strand);
1687 $obj = $self->thaw($o,$id);
1694 my ($self, $id, $typeid, $db_seqid, $start, $end, $strand) = @_;
1695 my ($type, $source, $seqid);
1697 # convert typeid to type and source
1698 if (exists $self->{_type_cache
}->{$typeid}) {
1699 ($type, $source) = @
{$self->{_type_cache
}->{$typeid}};
1702 my $sql = qq{ SELECT
`tag` FROM typelist WHERE
`id` = ?
};
1703 my $sth = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1704 $sth->execute($typeid);
1706 $sth->bind_columns(\
$result);
1707 while ($sth->fetch()) {
1708 # there should be only one row returned, but we ensure to get all rows
1711 ($type, $source) = split(':', $result);
1712 $self->{_type_cache
}->{$typeid} = [$type, $source];
1715 # convert the db seqid to the sequence name
1716 if (exists $self->{_seqid_cache
}->{$db_seqid}) {
1717 $seqid = $self->{_seqid_cache
}->{$db_seqid};
1720 my $sql = qq{ SELECT
`seqname` FROM locationlist WHERE
`id` = ?
};
1721 my $sth = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1722 $sth->execute($db_seqid);
1723 $sth->bind_columns(\
$seqid);
1724 while ($sth->fetch()) {
1725 # there should be only one row returned, but we ensure to get all rows
1728 $self->{_seqid_cache
}->{$db_seqid} = $seqid;
1731 # get the names from name table?
1733 # get the attributes and store those in obj
1734 my $sql = qq{ SELECT attribute_id
,attribute_value FROM attribute WHERE
`id` = ?
};
1735 my $sth = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1737 my ($attribute_id, $attribute_value);
1738 $sth->bind_columns(\
($attribute_id, $attribute_value));
1740 while ($sth->fetch()) {
1741 # convert the attribute_id to its real name
1743 if (exists $self->{_attribute_cache
}->{$attribute_id}) {
1744 $attribute = $self->{_attribute_cache
}->{$attribute_id};
1747 my $sql = qq{ SELECT
`tag` FROM attributelist WHERE
`id` = ?
};
1748 my $sth2 = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1749 $sth2->execute($attribute_id);
1750 $sth2->bind_columns(\
$attribute);
1751 while ($sth2->fetch()) {
1752 # there should be only one row returned, but we ensure to get all rows
1755 $self->{_attribute_cache
}->{$attribute_id} = $attribute;
1758 if ($source && $attribute eq 'source' && $attribute_value eq $source) {
1762 $attribs{$attribute} = $attribute_value;
1765 # if we weren't called with all the params, pull those out of the database too
1766 if ( not ( grep { defined($_) } ( $typeid, $db_seqid, $start, $end, $strand ))) {
1767 my $sql = qq{ SELECT start
,end
,tag
,strand
,seqname
1768 FROM feature
,feature_location
,typelist
,locationlist
1769 WHERE feature
.id
=feature_location
.id AND feature
.typeid
=typelist
.id
1770 AND seqid
=locationlist
.id AND feature
.id
= ?
};
1771 my $sth = $self->_prepare($sql) or $self->throw($self->dbh->errstr);
1773 my ($feature_start, $feature_end, $feature_type, $feature_strand,$feature_seqname);
1774 $sth->bind_columns(\
($feature_start, $feature_end, $feature_type, $feature_strand, $feature_seqname));
1775 while ($sth->fetch()) {
1776 # there should be only one row returned, but we call like this to
1777 # ensure we get all rows
1779 $start ||= $feature_start;
1780 $end ||= $feature_end;
1781 $strand ||= $feature_strand;
1782 $seqid ||= $feature_seqname;
1784 my( $feature_typename , $feature_typesource ) = split /:/ , $feature_type;
1785 $type ||= $feature_typename;
1786 $source ||= $feature_typesource;
1789 my $obj = Bio
::SeqFeature
::Lite
->new(-primary_id
=> $id,
1790 $type ?
(-type
=> $type) : (),
1791 $source ?
(-source
=> $source) : (),
1792 $seqid ?
(-seq_id
=> $seqid) : (),
1793 defined $start ?
(-start
=> $start) : (),
1794 defined $end ?
(-end
=> $end) : (),
1795 defined $strand ?
(-strand
=> $strand) : (),
1796 keys %attribs ?
(-attributes
=> \
%attribs) : ());
1804 my $dbh = $self->dbh;
1805 my $sth = $dbh->prepare_cached($query, {}, 3) or $self->throw($dbh->errstr);
1810 ####################################################################################################
1811 # SQL Fragment generators
1812 ####################################################################################################
1814 sub _attribute_table
{ shift->_qualify('attribute') }
1815 sub _attributelist_table
{ shift->_qualify('attributelist') }
1816 sub _feature_table
{ shift->_qualify('feature') }
1817 sub _interval_stats_table
{ shift->_qualify('interval_stats') }
1818 sub _location_table
{ shift->_qualify('location') }
1819 sub _locationlist_table
{ shift->_qualify('locationlist') }
1820 sub _meta_table
{ shift->_qualify('meta') }
1821 sub _name_table
{ shift->_qualify('name') }
1822 sub _parent2child_table
{ shift->_qualify('parent2child') }
1823 sub _sequence_table
{ shift->_qualify('sequence') }
1824 sub _type_table
{ shift->_qualify('feature') }
1825 sub _typelist_table
{ shift->_qualify('typelist') }
1826 sub _update_table
{ shift->_qualify('update_table') }
1828 sub _make_attribute_where
{
1830 my ($attributetable,$attributenametable,$attributes) = @_;
1833 my $dbh = $self->dbh;
1834 foreach (keys %$attributes) {
1836 my @values = ref($attributes->{$_}) && ref($attributes->{$_}) eq 'ARRAY' ? @
{$attributes->{$_}} : $attributes->{$_};
1837 foreach (@values) { # convert * into % for wildcard matches
1840 my $match = join ' OR ',map {
1841 /%/ ?
"$attributetable.attribute_value LIKE ?"
1842 : "$attributetable.attribute_value=?"
1844 push @sql,"($attributenametable.tag=? AND ($match))";
1845 push @args,($_,@values);
1847 return (join(' OR ',@sql),@args);
1850 sub _make_attribute_group
{
1852 my ($table_name,$attributes) = @_;
1853 my $key_count = keys %$attributes or return;
1854 return "f.id,f.object,f.typeid,f.seqid,f.start,f.end,f.strand HAVING count(f.id)>?",$key_count-1;
1859 my ($query,@args) = @_;
1860 while ($query =~ /\?/) {
1861 my $arg = $self->dbh->quote(shift @args);
1862 $query =~ s/\?/$arg/;
1868 # special-purpose store for bulk loading - write to a file rather than to the db
1872 my $indexed = shift;
1875 my $store_fh = $self->dump_filehandle('feature');
1876 my $dbh = $self->dbh;
1878 my $autoindex = $self->autoindex;
1881 my $id = $self->next_id;
1882 my ($seqid,$start,$end,$strand,$tier,$bin) = $indexed ?
$self->_get_location_and_bin($obj) : (undef)x6
;
1883 my $primary_tag = $obj->primary_tag;
1884 my $source_tag = $obj->source_tag || '';
1885 $primary_tag .= ":$source_tag";
1886 my $typeid = $self->_typeid($primary_tag,1);
1888 print $store_fh join("\t",$id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$dbh->quote($self->freeze($obj))),"\n";
1889 $obj->primary_id($id);
1890 $self->_update_indexes($obj) if $indexed && $autoindex;
1894 # remember whether we are have ever stored a non-indexed feature
1895 unless ($indexed or $self->{indexed_flag
}++) {
1896 $self->subfeatures_are_indexed(0);
1901 sub _dump_add_SeqFeature
{
1906 my $dbh = $self->dbh;
1907 my $fh = $self->dump_filehandle('parent2child');
1908 my $parent_id = (ref $parent ?
$parent->primary_id : $parent)
1909 or $self->throw("$parent should have a primary_id");
1912 for my $child_id (@children) {
1913 print $fh join("\t",$parent_id,$child_id),"\n";
1919 sub _dump_update_name_index
{
1922 my $fh = $self->dump_filehandle('name');
1923 my $dbh = $self->dbh;
1924 my ($names,$aliases) = $self->feature_names($obj);
1925 print $fh join("\t",$id,$dbh->quote($_),1),"\n" foreach @
$names;
1926 print $fh join("\t",$id,$dbh->quote($_),0),"\n" foreach @
$aliases;
1929 sub _dump_update_attribute_index
{
1932 my $fh = $self->dump_filehandle('attribute');
1933 my $dbh = $self->dbh;
1934 for my $tag ($obj->all_tags) {
1935 my $tagid = $self->_attributeid($tag);
1936 for my $value ($obj->each_tag_value($tag)) {
1937 print $fh join("\t",$id,$tagid,$dbh->quote($value)),"\n";
1942 sub coverage_array
{
1944 my ($seq_name,$start,$end,$types,$bins) =
1945 rearrange
([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],
1946 ['TYPES','TYPE','PRIMARY_TAG'],'BINS'],@_);
1951 my $segment = $self->segment($seq_name) or $self->throw("unknown seq_id $seq_name");
1952 $end = $segment->end;
1955 my $binsize = ($end-$start+1)/$bins;
1956 my $seqid = $self->_locationid_nocreate($seq_name) || 0;
1958 return [] unless $seqid;
1960 # where each bin starts
1961 my @his_bin_array = map {$start + $binsize * $_} (0..$bins-1);
1962 my @sum_bin_array = map {int(($_-1)/SUMMARY_BIN_SIZE
)} @his_bin_array;
1964 my $interval_stats = $self->_interval_stats_table;
1968 # pick up the type ids
1969 my ($from,$where,$group);
1970 ($from,$where,$group,@a) = $self->_types_sql($types,'b');
1971 $where =~ s/.+AND//s;
1972 $sth = $self->_prepare(<<END);
1973 SELECT id,tag FROM $from
1978 $sth = $self->_prepare(<<END);
1979 SELECT id,tag FROM typelist
1982 my (@t,$report_tag);
1984 while (my ($t,$tag) = $sth->fetchrow_array) {
1985 $report_tag ||= $tag;
1991 SELECT bin,cum_count
1992 FROM $interval_stats
1994 AND seqid=? AND bin >= ?
1998 $sth = $self->_prepare($sql);
2001 for my $typeid (@t) {
2003 for (my $i=0;$i<@sum_bin_array;$i++) {
2005 my @args = ($typeid,$seqid,$sum_bin_array[$i]);
2006 $self->_print_query($sql,@args) if $self->debug;
2008 $sth->execute(@args) or $self->throw($sth->errstr);
2009 my ($bin,$cum_count) = $sth->fetchrow_array;
2010 push @
{$bins{$typeid}},[$bin,$cum_count];
2014 return unless %bins;
2017 my $firstbin = int(($start-1)/$binsize);
2018 for my $type (keys %bins) {
2019 my $arry = $bins{$type};
2020 my $last_count = $arry->[0][1];
2024 for my $b (@
$arry) {
2025 my ($bin,$count) = @
$b;
2026 $delta = $count - $last_count if $bin > $last_bin;
2027 $merged_bins[$i++] += $delta;
2028 $last_count = $count;
2033 return wantarray ?
(\
@merged_bins,$report_tag) : \
@merged_bins;
2036 sub build_summary_statistics
{
2038 my $interval_stats = $self->_interval_stats_table;
2039 my $dbh = $self->dbh;
2042 my $sbs = SUMMARY_BIN_SIZE
;
2045 $self->_add_interval_stats_table;
2046 $self->_disable_keys($dbh,$interval_stats);
2047 $dbh->do("DELETE FROM $interval_stats");
2049 my $insert = $dbh->prepare(<<END) or $self->throw($dbh->errstr);
2050 INSERT INTO $interval_stats
2051 (typeid,seqid,bin,cum_count)
2055 my $sql = $self->_fetch_indexed_features_sql;
2056 my $select = $dbh->prepare($sql) or $self->throw($dbh->errstr);
2058 my $current_bin = -1;
2059 my ($current_type,$current_seqid,$count);
2061 my (%residuals,$last_bin);
2063 my $le = -t \
*STDERR ?
"\r" : "\n";
2068 while (my($typeid,$seqid,$start,$end) = $select->fetchrow_array) {
2069 print STDERR
$count," features processed$le" if ++$count % 1000 == 0;
2071 my $bin = int($start/$sbs);
2072 $current_type ||= $typeid;
2073 $current_seqid ||= $seqid;
2075 # because the input is sorted by start, no more features will contribute to the
2076 # current bin so we can dispose of it
2077 if ($bin != $current_bin) {
2078 if ($seqid != $current_seqid or $typeid != $current_type) {
2079 # load all bins left over
2080 $self->_load_bins($insert,\
%residuals,\
$cum_count,$current_type,$current_seqid);
2084 # load all up to current one
2085 $self->_load_bins($insert,\
%residuals,\
$cum_count,$current_type,$current_seqid,$current_bin);
2089 $last_bin = $current_bin;
2090 ($current_seqid,$current_type,$current_bin) = ($seqid,$typeid,$bin);
2092 # summarize across entire spanned region
2093 my $last_bin = int(($end-1)/$sbs);
2094 for (my $b=$bin;$b<=$last_bin;$b++) {
2099 # load all bins left over
2100 $self->_load_bins($insert,\
%residuals,\
$cum_count,$current_type,$current_seqid);
2101 $self->_enable_keys($dbh,$interval_stats);
2105 if ($result) { $self->commit } else { warn "Can't build summary statistics: $@"; $self->rollback };
2111 my ($insert,$residuals,$cum_count,$type,$seqid,$stop_after) = @_;
2112 for my $b (sort {$a<=>$b} keys %$residuals) {
2113 last if defined $stop_after and $b > $stop_after;
2114 $$cum_count += $residuals->{$b};
2115 my @args = ($type,$seqid,$b,$$cum_count);
2116 $insert->execute(@args);
2117 delete $residuals->{$b}; # no longer needed
2121 sub _add_interval_stats_table
{
2123 my $tables = $self->table_definitions;
2124 my $interval_stats = $self->_interval_stats_table;
2125 $self->dbh->do("CREATE TABLE IF NOT EXISTS $interval_stats $tables->{interval_stats}");
2128 sub _fetch_indexed_features_sql
{
2130 my $features = $self->_feature_table;
2132 SELECT typeid,seqid,start-1,end
2135 ORDER BY typeid,seqid,start
2141 my ($dbh,$table) = @_;
2142 $dbh->do("ALTER TABLE $table DISABLE KEYS");
2146 my ($dbh,$table) = @_;
2147 $dbh->do("ALTER TABLE $table ENABLE KEYS");
2151 return Time
::HiRes
::time() if Time
::HiRes
->can('time');
2157 if ($self->{bulk_update_in_progress
}) { # be sure to remove temp files
2158 for my $table ($self->_feature_table,$self->index_tables) {
2159 my $path = $self->dump_path($table);
2167 return if $self->{_in_transaction
}++;
2168 my $dbh = $self->dbh;
2169 return unless $dbh->{AutoCommit
};
2175 return unless $self->{_in_transaction
};
2176 delete $self->{_in_transaction
};
2182 return unless $self->{_in_transaction
};
2183 delete $self->{_in_transaction
};
2184 $self->dbh->rollback;