ported interval statistics to oracle adaptor, but not testedperl -cw Bio/DB/GFF/Adapt...
[bioperl-live.git] / Bio / DB / GFF / Adaptor / dbi / oracle.pm
blobd9a8c47f81c6fc2ff48b77f5c3ed3c8ef1a28802
1 package Bio::DB::GFF::Adaptor::dbi::oracle;
3 =head1 NAME
5 Bio::DB::GFF::Adaptor::dbi::oracle -- Database adaptor for a specific oracle schema
7 =head1 SYNOPSIS
9 See L<Bio::DB::GFF>
11 =cut
13 # a simple oracle adaptor
14 use strict;
15 #use Bio::DB::GFF::Adaptor::dbi::mysql;
16 #use Bio::DB::GFF::Adaptor::dbi::mysqlopt;
17 use Bio::DB::GFF::Util::Binning;
18 use Bio::DB::GFF::Util::Rearrange; # for rearrange()
19 use base qw(Bio::DB::GFF::Adaptor::dbi);
21 use constant MAX_SEGMENT => 100_000_000; # the largest a segment can get
22 use constant DEFAULT_CHUNK => 2000;
24 use constant GETSEQCOORDS =><<END;
25 SELECT fref,
26 NVL(gclass,'Sequence'),
27 min(fstart),
28 max(fstop),
29 fstrand,
30 gname
31 FROM fdata,fgroup
32 WHERE fgroup.gname=?
33 AND fgroup.gclass=?
34 AND fgroup.gid=fdata.gid
35 GROUP BY fref,fstrand,gclass,gname
36 END
39 use constant GETALIASCOORDS =><<END;
40 SELECT fref,
41 NVL(gclass,'Sequence'),
42 min(fstart),
43 max(fstop),
44 fstrand,
45 gname
46 FROM fdata,fgroup,fattribute,fattribute_to_feature
47 WHERE fattribute_to_feature.fattribute_value=?
48 AND fgroup.gclass=?
49 AND fgroup.gid=fdata.gid
50 AND fattribute.fattribute_name='Alias'
51 AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
52 AND fattribute_to_feature.fid=fdata.fid
53 GROUP BY fref,fstrand,gclass,gname
54 END
57 use constant GETALIASLIKE =><<END;
58 SELECT fref,
59 NVL(gclass,'Sequence'),
60 min(fstart),
61 max(fstop),
62 fstrand,
63 gname
64 FROM fdata,fgroup,fattribute,fattribute_to_feature
65 WHERE fattribute_to_feature.fattribute_value LIKE ?
66 AND fgroup.gclass=?
67 AND fgroup.gid=fdata.gid
68 AND fattribute.fattribute_name='Alias'
69 AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
70 AND fattribute_to_feature.fid=fdata.fid
71 GROUP BY fref,fstrand,gname
72 END
76 use constant GETFORCEDSEQCOORDS =><<END;
77 SELECT fref,
78 NVL(gclass,'Sequence'),
79 min(fstart),
80 max(fstop),
81 fstrand
82 FROM fdata,fgroup
83 WHERE fgroup.gname=?
84 AND fgroup.gclass=?
85 AND fdata.fref=?
86 AND fgroup.gid=fdata.gid
87 GROUP BY fref,fstrand,gclass
88 END
91 ########################
92 # moved from mysqlopt.pm
93 ########################
95 # this is the largest that any reference sequence can be (100 megabases)
96 use constant MAX_BIN => 100_000_000;
98 # this is the smallest bin (1 K)
99 use constant MIN_BIN => 1000;
101 # size of range over which it is faster to force mysql to use the range for indexing
102 use constant STRAIGHT_JOIN_LIMIT => 200_000;
104 ##############################################################################
106 =head1 DESCRIPTION
108 This adaptor implements a specific oracle database schema that is
109 compatible with Bio::DB::GFF. It inherits from
110 Bio::DB::GFF::Adaptor::dbi, which itself inherits from Bio::DB::GFF.
112 The schema uses several tables:
114 =over 4
116 =item fdata
118 This is the feature data table. Its columns are:
120 fid feature ID (integer)
121 fref reference sequence name (string)
122 fstart start position relative to reference (integer)
123 fstop stop postion relative to reference (integer)
124 ftypeid feature type ID (integer)
125 fscore feature score (float); may be null
126 fstrand strand; one of "+" or "-"; may be null
127 fphase phase; one of 0, 1 or 2; may be null
128 gid group ID (integer)
129 ftarget_start for similarity features, the target start position (integer)
130 ftarget_stop for similarity features, the target stop position (integer)
132 Note that it would be desirable to normalize the reference sequence
133 name, since there are usually many features that share the same
134 reference feature. However, in the current schema, query performance
135 suffers dramatically when this additional join is added.
137 =item fgroup
139 This is the group table. There is one row for each group. Columns:
141 gid the group ID (integer)
142 gclass the class of the group (string)
143 gname the name of the group (string)
145 The group table serves multiple purposes. As you might expect, it is
146 used to cluster features that logically belong together, such as the
147 multiple exons of the same transcript. It is also used to assign a
148 name and class to a singleton feature. Finally, the group table is
149 used to identify the target of a similarity hit. This is consistent
150 with the way in which the group field is used in the GFF version 2
151 format.
153 The fgroup.gid field joins with the fdata.gid field.
155 Examples:
157 sql> select * from fgroup where gname='sjj_2L52.1';
158 +-------+-------------+------------+
159 | gid | gclass | gname |
160 +-------+-------------+------------+
161 | 69736 | PCR_product | sjj_2L52.1 |
162 +-------+-------------+------------+
163 1 row in set (0.70 sec)
165 sql> select fref,fstart,fstop from fdata,fgroup
166 where gclass='PCR_product' and gname = 'sjj_2L52.1'
167 and fdata.gid=fgroup.gid;
168 +---------------+--------+-------+
169 | fref | fstart | fstop |
170 +---------------+--------+-------+
171 | CHROMOSOME_II | 1586 | 2355 |
172 +---------------+--------+-------+
173 1 row in set (0.03 sec)
175 =item ftype
177 This table contains the feature types, one per row. Columns are:
179 ftypeid the feature type ID (integer)
180 fmethod the feature type method name (string)
181 fsource the feature type source name (string)
183 The ftype.ftypeid field joins with the fdata.ftypeid field. Example:
185 sql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype
186 where gclass='PCR_product'
187 and gname = 'sjj_2L52.1'
188 and fdata.gid=fgroup.gid
189 and fdata.ftypeid=ftype.ftypeid;
190 +---------------+--------+-------+-------------+-----------+
191 | fref | fstart | fstop | fmethod | fsource |
192 +---------------+--------+-------+-------------+-----------+
193 | CHROMOSOME_II | 1586 | 2355 | PCR_product | GenePairs |
194 +---------------+--------+-------+-------------+-----------+
195 1 row in set (0.08 sec)
197 =item fdna
199 This table holds the raw DNA of the reference sequences. It has three
200 columns:
202 fref reference sequence name (string)
203 foffset offset of this sequence
204 fdna the DNA sequence (longblob)
206 To overcome problems loading large blobs, DNA is automatically
207 fragmented into multiple segments when loading, and the position of
208 each segment is stored in foffset. The fragment size is controlled by
209 the -clump_size argument during initialization.
211 =item fattribute_to_feature
213 This table holds "attributes", which are tag/value pairs stuffed into
214 the GFF line. The first tag/value pair is treated as the group, and
215 anything else is treated as an attribute (weird, huh?).
217 CHR_I assembly_tag Finished 2032 2036 . + . Note "Right: cTel33B"
218 CHR_I assembly_tag Polymorphism 668 668 . + . Note "A->C in cTel33B"
220 The columns of this table are:
222 fid feature ID (integer)
223 fattribute_id ID of the attribute (integer)
224 fattribute_value text of the attribute (text)
226 The fdata.fid column joins with fattribute_to_feature.fid.
228 =item fattribute
230 This table holds the normalized names of the attributes. Fields are:
232 fattribute_id ID of the attribute (integer)
233 fattribute_name Name of the attribute (varchar)
235 =back
237 =head2 Data Loading Methods
239 In addition to implementing the abstract SQL-generating methods of
240 Bio::DB::GFF::Adaptor::dbi, this module also implements the data
241 loading functionality of Bio::DB::GFF.
243 =cut
246 =head2 new
248 Title : new
249 Usage : $db = Bio::DB::GFF->new(@args)
250 Function: create a new adaptor
251 Returns : a Bio::DB::GFF object
252 Args : see below
253 Status : Public
255 The new constructor is identical to the "dbi" adaptor's new() method,
256 except that the prefix "dbi:oracle" is added to the database DSN identifier
257 automatically if it is not there already.
259 Argument Description
260 -------- -----------
262 -dsn the DBI data source, e.g. 'dbi:mysql:ens0040' or "ens0040"
264 -user username for authentication
266 -pass the password for authentication
268 =cut
272 sub new {
273 my $class = shift;
274 my ($dsn,$other) = rearrange([
275 [qw(FEATUREDB DB DSN)],
276 ],@_);
277 $dsn = "dbi:Oracle:$dsn" if !ref($dsn) && $dsn !~ /^(dbi|DBI):/;
278 my $self = $class->SUPER::new(-dsn=>$dsn,%$other);
279 $self;
282 =head2 schema
284 Title : schema
285 Usage : $schema = $db->schema
286 Function: return the CREATE script for the schema
287 Returns : a list of CREATE statemetns
288 Args : none
289 Status : protected
291 This method returns a list containing the various CREATE statements
292 needed to initialize the database tables.
294 =cut
296 sub schema {
297 my %schema = (
298 fdata =>{
299 table=> q{
300 create table fdata (
301 fid INTEGER NOT NULL,
302 fref VARCHAR(100) DEFAULT '' NOT NULL,
303 fstart INTEGER DEFAULT '0' NOT NULL,
304 fstop INTEGER DEFAULT '0' NOT NULL,
305 fbin NUMBER DEFAULT '0.000000' NOT NULL,
306 ftypeid INTEGER DEFAULT '0' NOT NULL,
307 fscore NUMBER ,
308 fstrand VARCHAR2(3) CHECK (fstrand IN ('+','-')),
309 fphase VARCHAR2(3) CHECK (fphase IN ('0','1','2')),
310 gid INTEGER DEFAULT '0' NOT NULL,
311 ftarget_start INTEGER ,
312 ftarget_stop INTEGER ,
313 CONSTRAINT fdata_pk PRIMARY KEY (fid)
315 }, # fdata table
317 index=>{
318 fdata_fref_idx => q{
319 CREATE UNIQUE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)
322 fdata_ftypeid_idx => q{
323 CREATE INDEX fdata_ftypeid_idx ON fdata (ftypeid)
326 fdata_gid_idx => q{
327 CREATE INDEX fdata_gid_idx ON fdata (gid)
329 }, # fdata indexes
331 sequence=> {
332 fdata_fid_sq => q{
333 CREATE SEQUENCE fdata_fid_sq START WITH 1
335 }, # fdata sequences
337 trigger=> {
338 fdata_fid_ai => q{
339 CREATE OR REPLACE TRIGGER fdata_fid_ai
340 BEFORE INSERT ON fdata
341 FOR EACH ROW WHEN (new.fid IS NULL OR new.fid = 0)
342 BEGIN
343 SELECT fdata_fid_sq.nextval INTO :new.fid FROM dual;
344 END;
346 }# fdata triggers
348 }, # fdata
352 fgroup => {
353 table => q{
354 CREATE TABLE fgroup (
355 gid INTEGER NOT NULL,
356 gclass VARCHAR(100) ,
357 gname VARCHAR(100) ,
358 CONSTRAINT fgroup_pk PRIMARY KEY (gid)
360 }, # fgroup table
362 index => {
363 fgroup_gclass_idx => q{
364 CREATE UNIQUE INDEX fgroup_gclass_idx ON fgroup (gclass,gname)
366 }, # fgroup indexes
368 sequence => {
370 fgroup_gid_sq => q{
371 CREATE SEQUENCE fgroup_gid_sq START WITH 1
373 }, # fgroup sequences
376 trigger => {
377 fgroup_gid_ai => q{
378 CREATE OR REPLACE TRIGGER fgroup_gid_ai
379 BEFORE INSERT ON fgroup
380 FOR EACH ROW WHEN (new.gid IS NULL OR new.gid = 0)
381 BEGIN
382 SELECT fgroup_gid_sq.nextval INTO :new.gid FROM dual;
383 END;
385 } # fgroup triggers
387 }, # fgroup
389 ftype => {
390 table => q{
391 CREATE TABLE ftype (
392 ftypeid INTEGER NOT NULL,
393 fmethod VARCHAR(100) DEFAULT '' NOT NULL,
394 fsource VARCHAR(100),
395 CONSTRAINT ftype_pk PRIMARY KEY (ftypeid)
397 }, # ftype table
399 index => {
400 ftype_fmethod_idx => q{
401 CREATE INDEX ftype_fmethod_idx ON ftype (fmethod)
404 ftype_fsource_idx => q{
405 CREATE INDEX ftype_fsource_idx ON ftype (fsource)
408 ftype_ftype_idx => q{
409 CREATE UNIQUE INDEX ftype_ftype_idx ON ftype (fmethod,fsource)
411 }, # ftype indexes
413 sequence => {
414 ftype_ftypeid_sq => q{
415 CREATE SEQUENCE ftype_ftypeid_sq START WITH 1
417 }, #ftype sequences
419 trigger => {
420 ftype_ftypeid_ai => q{
421 CREATE OR REPLACE TRIGGER ftype_ftypeid_ai
422 BEFORE INSERT ON ftype
423 FOR EACH ROW WHEN (new.ftypeid IS NULL OR new.ftypeid = 0)
424 BEGIN
425 SELECT ftype_ftypeid_sq.nextval INTO :new.ftypeid FROM dual;
426 END;
428 } #ftype triggers
429 }, # ftype
432 fdna => {
433 table => q{
434 CREATE TABLE fdna (
435 fref VARCHAR(100) DEFAULT '' NOT NULL,
436 foffset INTEGER DEFAULT '0' NOT NULL,
437 fdna LONG /* LONGBLOB */ ,
438 CONSTRAINT fdna_pk PRIMARY KEY (fref,foffset)
440 } #fdna table
441 }, #fdna
443 fmeta => {
444 table => q{
445 CREATE TABLE fmeta (
446 fname VARCHAR(255) DEFAULT '' NOT NULL,
447 fvalue VARCHAR(255) DEFAULT '' NOT NULL,
448 CONSTRAINT fmeta_pk PRIMARY KEY (fname)
450 } # fmeta table
451 }, # fmeta
454 fattribute => {
455 table => q{
456 CREATE TABLE fattribute (
457 fattribute_id INTEGER NOT NULL,
458 fattribute_name VARCHAR(255) DEFAULT '' NOT NULL,
459 CONSTRAINT fattribute_pk PRIMARY KEY (fattribute_id)
461 }, # fattribute table
463 sequence=> {
464 fattribute_fattribute_id_sq => q{
465 CREATE SEQUENCE fattribute_fattribute_id_sq START WITH 1
467 }, # fattribute sequences
469 trigger => {
470 fattribute_fattribute_id_ai => q{
471 CREATE OR REPLACE TRIGGER fattribute_fattribute_id_ai
472 BEFORE INSERT ON fattribute
473 FOR EACH ROW WHEN (new.fattribute_id IS NULL OR new.fattribute_id = 0)
474 BEGIN
475 SELECT fattribute_fattribute_id_sq.nextval INTO :new.fattribute_id FROM dual;
476 END;
478 } # fattribute triggers
479 }, # fattribute
481 fattribute_to_feature => {
482 table => q{
483 CREATE TABLE fattribute_to_feature (
484 fid INTEGER DEFAULT '0' NOT NULL,
485 fattribute_id INTEGER DEFAULT '0' NOT NULL,
486 fattribute_value VARCHAR2(255) /* TEXT */
488 }, # fattribute_to_feature table
490 index => {
491 fattribute_to_feature_fid => q{
492 CREATE INDEX fattribute_to_feature_fid ON fattribute_to_feature (fid,fattribute_id)
494 } # fattribute_to_feature indexes
495 }, # fattribute_to_feature
497 finterval_stats => {
498 table=> q{
499 CREATE TABLE "finterval_stats" (
500 "ftypeid" integer DEFAULT '0' NOT NULL,
501 "fref" VARCHAR(100) DEFAULT '' NOT NULL,
502 "fbin" integer DEFAULT '0' NOT NULL,
503 "fcum_count" integer DEFAULT '0' NOT NULL,
504 CONSTRAINT finterval_stats_pk PRIMARY KEY (ftypeid,fref,fbin)
506 } # finterval_stats table
507 },# finterval_stats
510 return \%schema;
514 =head2 do_initialize
516 Title : do_initialize
517 Usage : $success = $db->do_initialize($drop_all)
518 Function: initialize the database
519 Returns : a boolean indicating the success of the operation
520 Args : a boolean indicating whether to delete existing data
521 Status : protected
523 This method will load the schema into the database. If $drop_all is
524 true, then any existing data in the tables known to the schema will be
525 deleted.
527 Internally, this method calls schema() to get the schema data.
529 =cut
531 # Create the schema from scratch.
532 # You will need create privileges for this.
533 #sub do_initialize {
534 # my $self = shift;
535 # my $erase = shift;
536 # $self->drop_all if $erase;
538 # my $dbh = $self->features_db;
539 # my $schema = $self->schema;
541 # foreach my $table_name(keys %$schema) {
542 # my $create_table_stmt = $$schema{$table_name}{table} ;
543 # $dbh->do($create_table_stmt) || warn $dbh->errstr;
545 # 1;
550 =head2 drop_all
552 Title : drop_all
553 Usage : $db->drop_all
554 Function: empty the database
555 Returns : void
556 Args : none
557 Status : protected
559 This method drops the tables known to this module. Internally it
560 calls the abstract tables() method.
562 =cut
564 # Drop all the GFF tables -- dangerous!
565 #sub drop_all {
566 # my $self = shift;
567 # my $dbh = $self->features_db;
568 # local $dbh->{PrintError} = 0;
569 # foreach ($self->tables) {
570 # $dbh->do("drop table $_");
579 =head2 setup_load
581 Title : setup_load
582 Usage : $db->setup_load
583 Function: called before load_gff_line()
584 Returns : void
585 Args : none
586 Status : protected
588 This method performs schema-specific initialization prior to loading a
589 set of GFF records. It prepares a set of DBI statement handlers to be
590 used in loading the data.
592 =cut
594 sub setup_load {
595 my $self = shift;
596 my $schema = $self->schema;
598 my $dbh = $self->features_db;
600 if ($self->lock_on_load) {
601 my @tables = map { "$_ WRITE"} $self->tables;
602 my $tables = join ', ',@tables;
603 $dbh->do("LOCK TABLES $tables");
606 my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
607 my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
608 my $sequence_type = (keys %{$schema->{ftype}{sequence}})[0];
609 my $insertid_type = $dbh->prepare_delayed("SELECT $sequence_type.CURRVAL FROM dual");
611 my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE gname=? AND gclass=?');
612 my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');
613 my $sequence_group = (keys %{$schema->{fgroup}{sequence}})[0];
614 my $insertid_group = $dbh->prepare_delayed("SELECT $sequence_group.CURRVAL FROM dual");
616 my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
617 my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
618 my $sequence_attribute = (keys %{$schema->{fattribute}{sequence}})[0];
619 my $insertid_attribute = $dbh->prepare_delayed("SELECT $sequence_attribute.CURRVAL FROM dual");
621 my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');
623 my $insert_data = $dbh->prepare_delayed(<<END);
624 INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
625 fstrand,fphase,gid,ftarget_start,ftarget_stop)
626 VALUES(?,?,?,?,?,?,?,?,?,?,?)
629 my $delete_existing_data = $dbh->prepare_delayed('DELETE FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND fbin=? AND ftypeid=? AND GID=?');
630 my $sequence_data = (keys %{$schema->{fdata}{sequence}})[0];
631 my $insertid_data = $dbh->prepare_delayed("SELECT $sequence_data.CURRVAL FROM dual");
635 $self->{load_stuff}{sth}{lookup_ftype} = $lookup_type;
636 $self->{load_stuff}{sth}{insert_ftype} = $insert_type;
637 $self->{load_stuff}{sth}{insertid_ftype} = $insertid_type;
638 $self->{load_stuff}{sth}{lookup_fgroup} = $lookup_group;
639 $self->{load_stuff}{sth}{insert_fgroup} = $insert_group;
640 $self->{load_stuff}{sth}{insertid_fgroup} = $insertid_group;
641 $self->{load_stuff}{sth}{insert_fdata} = $insert_data;
642 $self->{load_stuff}{sth}{insertid_fdata} = $insertid_data;
643 $self->{load_stuff}{sth}{delete_existing_fdata} = $delete_existing_data;
644 $self->{load_stuff}{sth}{lookup_fattribute} = $lookup_attribute;
645 $self->{load_stuff}{sth}{insert_fattribute} = $insert_attribute;
646 $self->{load_stuff}{sth}{insertid_fattribute} = $insertid_attribute;
647 $self->{load_stuff}{sth}{insert_fattribute_value} = $insert_attribute_value;
648 $self->{load_stuff}{types} = {};
649 $self->{load_stuff}{groups} = {};
650 $self->{load_stuff}{counter} = 0;
653 =head2 load_gff_line
655 Title : load_gff_line
656 Usage : $db->load_gff_line($fields)
657 Function: called to load one parsed line of GFF
658 Returns : true if successfully inserted
659 Args : hashref containing GFF fields
660 Status : protected
662 This method is called once per line of the GFF and passed a series of
663 parsed data items that are stored into the hashref $fields. The keys are:
665 ref reference sequence
666 source annotation source
667 method annotation method
668 start annotation start
669 stop annotation stop
670 score annotation score (may be undef)
671 strand annotation strand (may be undef)
672 phase annotation phase (may be undef)
673 group_class class of annotation's group (may be undef)
674 group_name ID of annotation's group (may be undef)
675 target_start start of target of a similarity hit
676 target_stop stop of target of a similarity hit
677 attributes array reference of attributes, each of which is a [tag=>value] array ref
679 =cut
681 sub load_gff_line {
682 my $self = shift;
683 my $gff = shift;
685 if (defined $gff->{phase}){
686 chomp($gff->{phase});
687 undef($gff->{phase}) if $gff->{phase} eq '.';
690 if (defined $gff->{strand} && $gff->{strand} eq '.'){undef($gff->{strand})};
691 if (defined $gff->{score} && $gff->{score} eq '.'){undef($gff->{score})};
693 my $s = $self->{load_stuff};
694 my $dbh = $self->features_db;
695 local $dbh->{PrintError} = 0;
697 defined(my $typeid = $self->get_table_id('ftype', $gff->{method} => $gff->{source})) or return;
698 defined(my $groupid = $self->get_table_id('fgroup',$gff->{gname} => $gff->{gclass})) or return;
700 my $bin = bin($gff->{start},$gff->{stop},$self->min_bin);
701 my $result = $s->{sth}{insert_fdata}->execute($gff->{ref},
702 $gff->{start},$gff->{stop},$bin,
703 $typeid,
704 $gff->{score},$gff->{strand},$gff->{phase},
705 $groupid,
706 $gff->{tstart},$gff->{tstop});
707 if (defined ($dbh->errstr)){
708 print $dbh->errstr,"\n" ,%$gff,"\n";
709 if ($dbh->errstr =~ /ORA-02290: check constraint/){
710 print "PHASE=$gff->{phase}"."===","\n";
713 if ($dbh->errstr =~ /ORA-00001: unique constraint/){
714 $result = $s->{sth}{delete_existing_fdata}->execute($gff->{ref},
715 $gff->{start},$gff->{stop},$bin,
716 $typeid,
717 $groupid);
719 print "delete row result=$result\n";
720 $result = $s->{sth}{insert_fdata}->execute($gff->{ref},
721 $gff->{start},$gff->{stop},$bin,
722 $typeid,
723 $gff->{score},$gff->{strand},$gff->{phase},
724 $groupid,
725 $gff->{tstart},$gff->{tstop});
727 print "insert row result=$result\n";
730 warn $dbh->errstr,"\n" and print "ref=",$gff->{ref}," start=",$gff->{start}," stop=",$gff->{stop}," bin=",$bin," typeid=",$typeid," groupid=",$groupid,"\n"
731 and return unless $result;
733 my $fid = $self->insertid($s->{sth},'fdata')
734 || $self->get_feature_id($gff->{ref},$gff->{start},$gff->{stop},$typeid,$groupid);
737 # insert attributes
739 # print STDERR map {"$fid attribute:". $_->[0]."=".$_->[1]."\n"} @{$gff->{attributes}};
741 foreach (@{$gff->{attributes}}) {
742 defined(my $attribute_id = $self->get_table_id('fattribute',$_->[0])) or return;
743 $s->{sth}{insert_fattribute_value}->execute($fid,$attribute_id,$_->[1]);
746 if ( (++$s->{counter} % 1000) == 0) {
747 print STDERR "$s->{counter} records loaded...";
748 print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
751 $fid;
757 =head2 get_table_id
759 Title : get_table_id
760 Usage : $integer = $db->get_table_id($table,@ids)
761 Function: get the ID of a group or type
762 Returns : an integer ID or undef
763 Args : none
764 Status : private
766 This internal method is called by load_gff_line to look up the integer
767 ID of an existing feature type or group. The arguments are the name
768 of the table, and two string identifiers. For feature types, the
769 identifiers are the method and source. For groups, the identifiers
770 are group name and class.
772 This method requires that a statement handler named I<lookup_$table>,
773 have been created previously by setup_load(). It is here to overcome
774 deficiencies in mysql's INSERT syntax.
776 =cut
779 # get the object ID from a named table
780 sub get_table_id {
781 my $self = shift;
782 my $table = shift;
783 my @ids = @_;
785 # irritating warning for null id
786 my $id_key;
788 local $^W=0;
789 $id_key = join ':',@ids;
792 my $s = $self->{load_stuff};
793 my $sth = $s->{sth};
794 my $dbh = $self->features_db;
796 unless (defined($s->{$table}{$id_key})) {
797 $sth->{"lookup_$table"}->execute(@ids);
798 my @result = $sth->{"lookup_$table"}->fetchrow_array;
799 if (@result > 0) {
800 $s->{$table}{$id_key} = $result[0];
801 } else {
802 $sth->{"insert_$table"}->execute(@ids)
803 && ($s->{$table}{$id_key} = $self->insertid($sth,$table));
804 #&& ($s->{$table}{$id_key} = $self->insertid($sth->{"insertid_$table"}));
805 #&& ($s->{$table}{$id_key} = $sth->{"insert_$table"}->insertid);
809 my $id = $s->{$table}{$id_key};
810 unless (defined $id) {
811 warn "No $table id for $id_key ",$dbh->errstr," Record skipped.\n";
812 return;
814 $id;
817 sub insertid {
818 my $self = shift;
819 my $sth = shift ;
820 my $table = shift;
822 my $insert_id;
823 if ($sth->{"insertid_$table"}->execute()){
824 $insert_id = ($sth->{"insertid_$table"}->fetchrow_array)[0];
826 else{
827 warn "No CURRVAL for SEQUENCE of table $table ",$sth->errstr,"\n";
828 return;
830 return $insert_id;
834 #sub insertid {
835 # my $self = shift;
836 # my $insertid_sth = shift ;
837 # my $insert_id;
838 # if ($insertid_sth->execute){
839 # $insert_id = ($insertid_sth->fetchrow_array)[0];
841 # else{
842 # warn "No CURRVAL for SEQUENCE ",$insertid_sth->errstr,"\n";
843 # return;
845 # return $insert_id;
848 sub insert_sequence {
849 my $self = shift;
850 my($id,$offset,$seq) = @_;
851 my $sth = $self->{_insert_sequence}
852 ||= $self->dbh->prepare_delayed('insert into fdna values (?,?,?)');
853 $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
856 =head2 search_notes
858 Title : search_notes
859 Usage : @search_results = $db->search_notes("full text search string",$limit)
860 Function: Search the notes for a text string, using mysql full-text search
861 Returns : array of results
862 Args : full text search string, and an optional row limit
863 Status : public
865 This is a mysql-specific method. Given a search string, it performs a
866 full-text search of the notes table and returns an array of results.
867 Each row of the returned array is a arrayref containing the following fields:
869 column 1 A Bio::DB::GFF::Featname object, suitable for passing to segment()
870 column 2 The text of the note
871 column 3 A relevance score.
872 column 4 A Bio::DB::GFF::Typename object
874 =cut
876 sub search_notes {
877 my $self = shift;
878 my ($search_string,$limit) = @_;
880 $search_string =~ tr/*?//d;
882 my @words = $search_string =~ /(\w+)/g;
883 my $regex = join '|',@words;
884 my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
885 my $search = join(' OR ',@searches);
887 my $query = <<END;
888 SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
889 FROM fgroup,fattribute_to_feature,fdata,ftype
890 WHERE fgroup.gid=fdata.gid
891 AND fdata.fid=fattribute_to_feature.fid
892 AND fdata.ftypeid=ftype.ftypeid
893 AND ($search)
897 my $sth = $self->dbh->do_query($query);
898 my @results;
899 while (my ($class,$name,$note,$method,$source) = $sth->fetchrow_array) {
900 next unless $class && $name; # sorry, ignore NULL objects
901 my @matches = $note =~ /($regex)/g;
902 my $relevance = 10*@matches;
903 my $featname = Bio::DB::GFF::Featname->new($class=>$name);
904 my $type = Bio::DB::GFF::Typename->new($method,$source);
905 push @results,[$featname,$note,$relevance,$type];
906 last if $limit && @results >= $limit;
908 @results;
911 =head2 make_meta_set_query
913 Title : make_meta_set_query
914 Usage : $sql = $db->make_meta_set_query
915 Function: return SQL fragment for setting a meta parameter
916 Returns : SQL fragment
917 Args : none
918 Status : public
920 By default this does nothing; meta parameters are not stored or
921 retrieved.
923 =cut
925 sub make_meta_set_query {
926 return 'INSERT INTO fmeta VALUES (?,?)';
929 sub make_classes_query {
930 my $self = shift;
931 return 'SELECT DISTINCT gclass FROM fgroup WHERE NOT gclass IS NULL';
935 sub chunk_size {
936 my $self = shift;
937 $self->meta('chunk_size') || DEFAULT_CHUNK;
940 sub getseqcoords_query {
941 my $self = shift;
942 return GETSEQCOORDS ;
945 sub getaliascoords_query{
946 my $self = shift;
947 return GETALIASCOORDS ;
951 sub getforcedseqcoords_query{
952 my $self = shift;
953 return GETFORCEDSEQCOORDS ;
957 sub getaliaslike_query{
958 my $self = shift;
959 return GETALIASLIKE ;
963 sub make_features_select_part {
964 my $self = shift;
965 my $options = shift || {};
966 my $s;
967 if (my $b = $options->{bin_width}) {
969 $s = <<END;
970 fref,
971 1+$b*floor(fstart/$b) as fstart,
972 $b*(1+floor(fstart/$b)) as fstop,
973 NVL2(fsource,fmethod||':'||fsource,fmethod),'bin',
974 count(*) as fscore,
975 '.','.','bin',
976 NVL2(fsource , fref||':'||fmethod||':'||fsource , fref||':'||fmethod),
977 NULL,NULL,NULL,NULL
980 } else {
981 $s = <<END;
982 fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
986 $s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
990 sub make_features_from_part_bkup {
991 my $self = shift;
992 my $sparse = shift;
993 my $options = shift || {};
994 #my $index = $sparse ? ' USE INDEX(ftypeid)': '';
995 my $index = '';
996 return $options->{attributes} ? "fdata${index},ftype,fgroup,fattribute,fattribute_to_feature\n"
997 : "fdata${index},ftype,fgroup\n";
1001 ####################################
1002 # moved from mysqlopt.pm
1003 ###################################
1004 # meta values
1005 sub default_meta_values {
1006 my $self = shift;
1007 my @values = $self->SUPER::default_meta_values;
1008 return (
1009 @values,
1010 max_bin => MAX_BIN,
1011 min_bin => MIN_BIN,
1012 straight_join_limit => STRAIGHT_JOIN_LIMIT,
1016 sub min_bin {
1017 my $self = shift;
1018 return $self->meta('min_bin') || MIN_BIN;
1020 sub max_bin {
1021 my $self = shift;
1022 return $self->meta('max_bin') || MAX_BIN;
1024 sub straight_join_limit {
1025 my $self = shift;
1026 return $self->meta('straight_join_limit') || STRAIGHT_JOIN_LIMIT;