1 package Bio
::DB
::GFF
::Adaptor
::dbi
::oracle
;
5 Bio::DB::GFF::Adaptor::dbi::oracle -- Database adaptor for a specific oracle schema
13 # a simple oracle adaptor
15 #use Bio::DB::GFF::Adaptor::dbi::mysql;
16 #use Bio::DB::GFF::Adaptor::dbi::mysqlopt;
17 use Bio
::DB
::GFF
::Util
::Binning
;
18 use Bio
::DB
::GFF
::Util
::Rearrange
; # for rearrange()
19 use base
qw(Bio::DB::GFF::Adaptor::dbi);
21 use constant MAX_SEGMENT
=> 100_000_000
; # the largest a segment can get
22 use constant DEFAULT_CHUNK
=> 2000;
24 use constant GETSEQCOORDS
=><<END;
26 NVL(gclass,'Sequence'),
34 AND fgroup.gid=fdata.gid
35 GROUP BY fref,fstrand,gclass,gname
39 use constant GETALIASCOORDS
=><<END;
41 NVL(gclass,'Sequence'),
46 FROM fdata,fgroup,fattribute,fattribute_to_feature
47 WHERE fattribute_to_feature.fattribute_value=?
49 AND fgroup.gid=fdata.gid
50 AND fattribute.fattribute_name='Alias'
51 AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
52 AND fattribute_to_feature.fid=fdata.fid
53 GROUP BY fref,fstrand,gclass,gname
57 use constant GETALIASLIKE
=><<END;
59 NVL(gclass,'Sequence'),
64 FROM fdata,fgroup,fattribute,fattribute_to_feature
65 WHERE fattribute_to_feature.fattribute_value LIKE ?
67 AND fgroup.gid=fdata.gid
68 AND fattribute.fattribute_name='Alias'
69 AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
70 AND fattribute_to_feature.fid=fdata.fid
71 GROUP BY fref,fstrand,gname
76 use constant GETFORCEDSEQCOORDS
=><<END;
78 NVL(gclass,'Sequence'),
86 AND fgroup.gid=fdata.gid
87 GROUP BY fref,fstrand,gclass
91 ########################
92 # moved from mysqlopt.pm
93 ########################
95 # this is the largest that any reference sequence can be (100 megabases)
96 use constant MAX_BIN
=> 100_000_000
;
98 # this is the smallest bin (1 K)
99 use constant MIN_BIN
=> 1000;
101 # size of range over which it is faster to force mysql to use the range for indexing
102 use constant STRAIGHT_JOIN_LIMIT
=> 200_000
;
104 ##############################################################################
108 This adaptor implements a specific oracle database schema that is
109 compatible with Bio::DB::GFF. It inherits from
110 Bio::DB::GFF::Adaptor::dbi, which itself inherits from Bio::DB::GFF.
112 The schema uses several tables:
118 This is the feature data table. Its columns are:
120 fid feature ID (integer)
121 fref reference sequence name (string)
122 fstart start position relative to reference (integer)
123 fstop stop postion relative to reference (integer)
124 ftypeid feature type ID (integer)
125 fscore feature score (float); may be null
126 fstrand strand; one of "+" or "-"; may be null
127 fphase phase; one of 0, 1 or 2; may be null
128 gid group ID (integer)
129 ftarget_start for similarity features, the target start position (integer)
130 ftarget_stop for similarity features, the target stop position (integer)
132 Note that it would be desirable to normalize the reference sequence
133 name, since there are usually many features that share the same
134 reference feature. However, in the current schema, query performance
135 suffers dramatically when this additional join is added.
139 This is the group table. There is one row for each group. Columns:
141 gid the group ID (integer)
142 gclass the class of the group (string)
143 gname the name of the group (string)
145 The group table serves multiple purposes. As you might expect, it is
146 used to cluster features that logically belong together, such as the
147 multiple exons of the same transcript. It is also used to assign a
148 name and class to a singleton feature. Finally, the group table is
149 used to identify the target of a similarity hit. This is consistent
150 with the way in which the group field is used in the GFF version 2
153 The fgroup.gid field joins with the fdata.gid field.
157 sql> select * from fgroup where gname='sjj_2L52.1';
158 +-------+-------------+------------+
159 | gid | gclass | gname |
160 +-------+-------------+------------+
161 | 69736 | PCR_product | sjj_2L52.1 |
162 +-------+-------------+------------+
163 1 row in set (0.70 sec)
165 sql> select fref,fstart,fstop from fdata,fgroup
166 where gclass='PCR_product' and gname = 'sjj_2L52.1'
167 and fdata.gid=fgroup.gid;
168 +---------------+--------+-------+
169 | fref | fstart | fstop |
170 +---------------+--------+-------+
171 | CHROMOSOME_II | 1586 | 2355 |
172 +---------------+--------+-------+
173 1 row in set (0.03 sec)
177 This table contains the feature types, one per row. Columns are:
179 ftypeid the feature type ID (integer)
180 fmethod the feature type method name (string)
181 fsource the feature type source name (string)
183 The ftype.ftypeid field joins with the fdata.ftypeid field. Example:
185 sql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype
186 where gclass='PCR_product'
187 and gname = 'sjj_2L52.1'
188 and fdata.gid=fgroup.gid
189 and fdata.ftypeid=ftype.ftypeid;
190 +---------------+--------+-------+-------------+-----------+
191 | fref | fstart | fstop | fmethod | fsource |
192 +---------------+--------+-------+-------------+-----------+
193 | CHROMOSOME_II | 1586 | 2355 | PCR_product | GenePairs |
194 +---------------+--------+-------+-------------+-----------+
195 1 row in set (0.08 sec)
199 This table holds the raw DNA of the reference sequences. It has three
202 fref reference sequence name (string)
203 foffset offset of this sequence
204 fdna the DNA sequence (longblob)
206 To overcome problems loading large blobs, DNA is automatically
207 fragmented into multiple segments when loading, and the position of
208 each segment is stored in foffset. The fragment size is controlled by
209 the -clump_size argument during initialization.
211 =item fattribute_to_feature
213 This table holds "attributes", which are tag/value pairs stuffed into
214 the GFF line. The first tag/value pair is treated as the group, and
215 anything else is treated as an attribute (weird, huh?).
217 CHR_I assembly_tag Finished 2032 2036 . + . Note "Right: cTel33B"
218 CHR_I assembly_tag Polymorphism 668 668 . + . Note "A->C in cTel33B"
220 The columns of this table are:
222 fid feature ID (integer)
223 fattribute_id ID of the attribute (integer)
224 fattribute_value text of the attribute (text)
226 The fdata.fid column joins with fattribute_to_feature.fid.
230 This table holds the normalized names of the attributes. Fields are:
232 fattribute_id ID of the attribute (integer)
233 fattribute_name Name of the attribute (varchar)
237 =head2 Data Loading Methods
239 In addition to implementing the abstract SQL-generating methods of
240 Bio::DB::GFF::Adaptor::dbi, this module also implements the data
241 loading functionality of Bio::DB::GFF.
249 Usage : $db = Bio::DB::GFF->new(@args)
250 Function: create a new adaptor
251 Returns : a Bio::DB::GFF object
255 The new constructor is identical to the "dbi" adaptor's new() method,
256 except that the prefix "dbi:oracle" is added to the database DSN identifier
257 automatically if it is not there already.
262 -dsn the DBI data source, e.g. 'dbi:mysql:ens0040' or "ens0040"
264 -user username for authentication
266 -pass the password for authentication
274 my ($dsn,$other) = rearrange
([
275 [qw(FEATUREDB DB DSN)],
277 $dsn = "dbi:Oracle:$dsn" if !ref($dsn) && $dsn !~ /^(dbi|DBI):/;
278 my $self = $class->SUPER::new
(-dsn
=>$dsn,%$other);
285 Usage : $schema = $db->schema
286 Function: return the CREATE script for the schema
287 Returns : a list of CREATE statemetns
291 This method returns a list containing the various CREATE statements
292 needed to initialize the database tables.
301 fid INTEGER NOT NULL,
302 fref VARCHAR(100) DEFAULT '' NOT NULL,
303 fstart INTEGER DEFAULT '0' NOT NULL,
304 fstop INTEGER DEFAULT '0' NOT NULL,
305 fbin NUMBER DEFAULT '0.000000' NOT NULL,
306 ftypeid INTEGER DEFAULT '0' NOT NULL,
308 fstrand VARCHAR2(3) CHECK (fstrand IN ('+','-')),
309 fphase VARCHAR2(3) CHECK (fphase IN ('0','1','2')),
310 gid INTEGER DEFAULT '0' NOT NULL,
311 ftarget_start INTEGER ,
312 ftarget_stop INTEGER ,
313 CONSTRAINT fdata_pk PRIMARY KEY (fid)
319 CREATE UNIQUE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)
322 fdata_ftypeid_idx
=> q{
323 CREATE INDEX fdata_ftypeid_idx ON fdata (ftypeid)
327 CREATE INDEX fdata_gid_idx ON fdata (gid)
333 CREATE SEQUENCE fdata_fid_sq START WITH 1
339 CREATE OR REPLACE TRIGGER fdata_fid_ai
340 BEFORE INSERT ON fdata
341 FOR EACH ROW WHEN (new.fid IS NULL OR new.fid = 0)
343 SELECT fdata_fid_sq.nextval INTO :new.fid FROM dual;
354 CREATE TABLE fgroup (
355 gid INTEGER NOT NULL,
356 gclass VARCHAR(100) ,
358 CONSTRAINT fgroup_pk PRIMARY KEY (gid)
363 fgroup_gclass_idx
=> q{
364 CREATE UNIQUE INDEX fgroup_gclass_idx ON fgroup (gclass,gname)
371 CREATE SEQUENCE fgroup_gid_sq START WITH 1
373 }, # fgroup sequences
378 CREATE OR REPLACE TRIGGER fgroup_gid_ai
379 BEFORE INSERT ON fgroup
380 FOR EACH ROW WHEN (new.gid IS NULL OR new.gid = 0)
382 SELECT fgroup_gid_sq.nextval INTO :new.gid FROM dual;
392 ftypeid INTEGER NOT NULL,
393 fmethod VARCHAR(100) DEFAULT '' NOT NULL,
394 fsource VARCHAR(100),
395 CONSTRAINT ftype_pk PRIMARY KEY (ftypeid)
400 ftype_fmethod_idx
=> q{
401 CREATE INDEX ftype_fmethod_idx ON ftype (fmethod)
404 ftype_fsource_idx
=> q{
405 CREATE INDEX ftype_fsource_idx ON ftype (fsource)
408 ftype_ftype_idx
=> q{
409 CREATE UNIQUE INDEX ftype_ftype_idx ON ftype (fmethod,fsource)
414 ftype_ftypeid_sq
=> q{
415 CREATE SEQUENCE ftype_ftypeid_sq START WITH 1
420 ftype_ftypeid_ai
=> q{
421 CREATE OR REPLACE TRIGGER ftype_ftypeid_ai
422 BEFORE INSERT ON ftype
423 FOR EACH ROW WHEN (new.ftypeid IS NULL OR new.ftypeid = 0)
425 SELECT ftype_ftypeid_sq.nextval INTO :new.ftypeid FROM dual;
435 fref VARCHAR(100) DEFAULT '' NOT NULL,
436 foffset INTEGER DEFAULT '0' NOT NULL,
437 fdna LONG /* LONGBLOB */ ,
438 CONSTRAINT fdna_pk PRIMARY KEY (fref,foffset)
446 fname VARCHAR(255) DEFAULT '' NOT NULL,
447 fvalue VARCHAR(255) DEFAULT '' NOT NULL,
448 CONSTRAINT fmeta_pk PRIMARY KEY (fname)
456 CREATE TABLE fattribute (
457 fattribute_id INTEGER NOT NULL,
458 fattribute_name VARCHAR(255) DEFAULT '' NOT NULL,
459 CONSTRAINT fattribute_pk PRIMARY KEY (fattribute_id)
461 }, # fattribute table
464 fattribute_fattribute_id_sq
=> q{
465 CREATE SEQUENCE fattribute_fattribute_id_sq START WITH 1
467 }, # fattribute sequences
470 fattribute_fattribute_id_ai
=> q{
471 CREATE OR REPLACE TRIGGER fattribute_fattribute_id_ai
472 BEFORE INSERT ON fattribute
473 FOR EACH ROW WHEN (new.fattribute_id IS NULL OR new.fattribute_id = 0)
475 SELECT fattribute_fattribute_id_sq.nextval INTO :new.fattribute_id FROM dual;
478 } # fattribute triggers
481 fattribute_to_feature
=> {
483 CREATE TABLE fattribute_to_feature (
484 fid INTEGER DEFAULT '0' NOT NULL,
485 fattribute_id INTEGER DEFAULT '0' NOT NULL,
486 fattribute_value VARCHAR2(255) /* TEXT */
488 }, # fattribute_to_feature table
491 fattribute_to_feature_fid
=> q{
492 CREATE INDEX fattribute_to_feature_fid ON fattribute_to_feature (fid,fattribute_id)
494 } # fattribute_to_feature indexes
495 }, # fattribute_to_feature
499 CREATE TABLE "finterval_stats" (
500 "ftypeid" integer DEFAULT '0' NOT NULL,
501 "fref" VARCHAR(100) DEFAULT '' NOT NULL,
502 "fbin" integer DEFAULT '0' NOT NULL,
503 "fcum_count" integer DEFAULT '0' NOT NULL,
504 CONSTRAINT finterval_stats_pk PRIMARY KEY (ftypeid,fref,fbin)
506 } # finterval_stats table
516 Title : do_initialize
517 Usage : $success = $db->do_initialize($drop_all)
518 Function: initialize the database
519 Returns : a boolean indicating the success of the operation
520 Args : a boolean indicating whether to delete existing data
523 This method will load the schema into the database. If $drop_all is
524 true, then any existing data in the tables known to the schema will be
527 Internally, this method calls schema() to get the schema data.
531 # Create the schema from scratch.
532 # You will need create privileges for this.
536 # $self->drop_all if $erase;
538 # my $dbh = $self->features_db;
539 # my $schema = $self->schema;
541 # foreach my $table_name(keys %$schema) {
542 # my $create_table_stmt = $$schema{$table_name}{table} ;
543 # $dbh->do($create_table_stmt) || warn $dbh->errstr;
553 Usage : $db->drop_all
554 Function: empty the database
559 This method drops the tables known to this module. Internally it
560 calls the abstract tables() method.
564 # Drop all the GFF tables -- dangerous!
567 # my $dbh = $self->features_db;
568 # local $dbh->{PrintError} = 0;
569 # foreach ($self->tables) {
570 # $dbh->do("drop table $_");
582 Usage : $db->setup_load
583 Function: called before load_gff_line()
588 This method performs schema-specific initialization prior to loading a
589 set of GFF records. It prepares a set of DBI statement handlers to be
590 used in loading the data.
596 my $schema = $self->schema;
598 my $dbh = $self->features_db;
600 if ($self->lock_on_load) {
601 my @tables = map { "$_ WRITE"} $self->tables;
602 my $tables = join ', ',@tables;
603 $dbh->do("LOCK TABLES $tables");
606 my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
607 my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
608 my $sequence_type = (keys %{$schema->{ftype
}{sequence
}})[0];
609 my $insertid_type = $dbh->prepare_delayed("SELECT $sequence_type.CURRVAL FROM dual");
611 my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE gname=? AND gclass=?');
612 my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');
613 my $sequence_group = (keys %{$schema->{fgroup
}{sequence
}})[0];
614 my $insertid_group = $dbh->prepare_delayed("SELECT $sequence_group.CURRVAL FROM dual");
616 my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
617 my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
618 my $sequence_attribute = (keys %{$schema->{fattribute
}{sequence
}})[0];
619 my $insertid_attribute = $dbh->prepare_delayed("SELECT $sequence_attribute.CURRVAL FROM dual");
621 my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');
623 my $insert_data = $dbh->prepare_delayed(<<END);
624 INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
625 fstrand,fphase,gid,ftarget_start,ftarget_stop)
626 VALUES(?,?,?,?,?,?,?,?,?,?,?)
629 my $delete_existing_data = $dbh->prepare_delayed('DELETE FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND fbin=? AND ftypeid=? AND GID=?');
630 my $sequence_data = (keys %{$schema->{fdata
}{sequence
}})[0];
631 my $insertid_data = $dbh->prepare_delayed("SELECT $sequence_data.CURRVAL FROM dual");
635 $self->{load_stuff
}{sth
}{lookup_ftype
} = $lookup_type;
636 $self->{load_stuff
}{sth
}{insert_ftype
} = $insert_type;
637 $self->{load_stuff
}{sth
}{insertid_ftype
} = $insertid_type;
638 $self->{load_stuff
}{sth
}{lookup_fgroup
} = $lookup_group;
639 $self->{load_stuff
}{sth
}{insert_fgroup
} = $insert_group;
640 $self->{load_stuff
}{sth
}{insertid_fgroup
} = $insertid_group;
641 $self->{load_stuff
}{sth
}{insert_fdata
} = $insert_data;
642 $self->{load_stuff
}{sth
}{insertid_fdata
} = $insertid_data;
643 $self->{load_stuff
}{sth
}{delete_existing_fdata
} = $delete_existing_data;
644 $self->{load_stuff
}{sth
}{lookup_fattribute
} = $lookup_attribute;
645 $self->{load_stuff
}{sth
}{insert_fattribute
} = $insert_attribute;
646 $self->{load_stuff
}{sth
}{insertid_fattribute
} = $insertid_attribute;
647 $self->{load_stuff
}{sth
}{insert_fattribute_value
} = $insert_attribute_value;
648 $self->{load_stuff
}{types
} = {};
649 $self->{load_stuff
}{groups
} = {};
650 $self->{load_stuff
}{counter
} = 0;
655 Title : load_gff_line
656 Usage : $db->load_gff_line($fields)
657 Function: called to load one parsed line of GFF
658 Returns : true if successfully inserted
659 Args : hashref containing GFF fields
662 This method is called once per line of the GFF and passed a series of
663 parsed data items that are stored into the hashref $fields. The keys are:
665 ref reference sequence
666 source annotation source
667 method annotation method
668 start annotation start
670 score annotation score (may be undef)
671 strand annotation strand (may be undef)
672 phase annotation phase (may be undef)
673 group_class class of annotation's group (may be undef)
674 group_name ID of annotation's group (may be undef)
675 target_start start of target of a similarity hit
676 target_stop stop of target of a similarity hit
677 attributes array reference of attributes, each of which is a [tag=>value] array ref
685 if (defined $gff->{phase
}){
686 chomp($gff->{phase
});
687 undef($gff->{phase
}) if $gff->{phase
} eq '.';
690 if (defined $gff->{strand
} && $gff->{strand
} eq '.'){undef($gff->{strand
})};
691 if (defined $gff->{score
} && $gff->{score
} eq '.'){undef($gff->{score
})};
693 my $s = $self->{load_stuff
};
694 my $dbh = $self->features_db;
695 local $dbh->{PrintError
} = 0;
697 defined(my $typeid = $self->get_table_id('ftype', $gff->{method
} => $gff->{source
})) or return;
698 defined(my $groupid = $self->get_table_id('fgroup',$gff->{gname
} => $gff->{gclass
})) or return;
700 my $bin = bin
($gff->{start
},$gff->{stop
},$self->min_bin);
701 my $result = $s->{sth
}{insert_fdata
}->execute($gff->{ref},
702 $gff->{start
},$gff->{stop
},$bin,
704 $gff->{score
},$gff->{strand
},$gff->{phase
},
706 $gff->{tstart
},$gff->{tstop
});
707 if (defined ($dbh->errstr)){
708 print $dbh->errstr,"\n" ,%$gff,"\n";
709 if ($dbh->errstr =~ /ORA-02290: check constraint/){
710 print "PHASE=$gff->{phase}"."===","\n";
713 if ($dbh->errstr =~ /ORA-00001: unique constraint/){
714 $result = $s->{sth
}{delete_existing_fdata
}->execute($gff->{ref},
715 $gff->{start
},$gff->{stop
},$bin,
719 print "delete row result=$result\n";
720 $result = $s->{sth
}{insert_fdata
}->execute($gff->{ref},
721 $gff->{start
},$gff->{stop
},$bin,
723 $gff->{score
},$gff->{strand
},$gff->{phase
},
725 $gff->{tstart
},$gff->{tstop
});
727 print "insert row result=$result\n";
730 warn $dbh->errstr,"\n" and print "ref=",$gff->{ref}," start=",$gff->{start
}," stop=",$gff->{stop
}," bin=",$bin," typeid=",$typeid," groupid=",$groupid,"\n"
731 and return unless $result;
733 my $fid = $self->insertid($s->{sth
},'fdata')
734 || $self->get_feature_id($gff->{ref},$gff->{start
},$gff->{stop
},$typeid,$groupid);
739 # print STDERR map {"$fid attribute:". $_->[0]."=".$_->[1]."\n"} @{$gff->{attributes}};
741 foreach (@
{$gff->{attributes
}}) {
742 defined(my $attribute_id = $self->get_table_id('fattribute',$_->[0])) or return;
743 $s->{sth
}{insert_fattribute_value
}->execute($fid,$attribute_id,$_->[1]);
746 if ( (++$s->{counter
} % 1000) == 0) {
747 print STDERR
"$s->{counter} records loaded...";
748 print STDERR
-t STDOUT
&& !$ENV{EMACS
} ?
"\r" : "\n";
760 Usage : $integer = $db->get_table_id($table,@ids)
761 Function: get the ID of a group or type
762 Returns : an integer ID or undef
766 This internal method is called by load_gff_line to look up the integer
767 ID of an existing feature type or group. The arguments are the name
768 of the table, and two string identifiers. For feature types, the
769 identifiers are the method and source. For groups, the identifiers
770 are group name and class.
772 This method requires that a statement handler named I<lookup_$table>,
773 have been created previously by setup_load(). It is here to overcome
774 deficiencies in mysql's INSERT syntax.
779 # get the object ID from a named table
785 # irritating warning for null id
789 $id_key = join ':',@ids;
792 my $s = $self->{load_stuff
};
794 my $dbh = $self->features_db;
796 unless (defined($s->{$table}{$id_key})) {
797 $sth->{"lookup_$table"}->execute(@ids);
798 my @result = $sth->{"lookup_$table"}->fetchrow_array;
800 $s->{$table}{$id_key} = $result[0];
802 $sth->{"insert_$table"}->execute(@ids)
803 && ($s->{$table}{$id_key} = $self->insertid($sth,$table));
804 #&& ($s->{$table}{$id_key} = $self->insertid($sth->{"insertid_$table"}));
805 #&& ($s->{$table}{$id_key} = $sth->{"insert_$table"}->insertid);
809 my $id = $s->{$table}{$id_key};
810 unless (defined $id) {
811 warn "No $table id for $id_key ",$dbh->errstr," Record skipped.\n";
823 if ($sth->{"insertid_$table"}->execute()){
824 $insert_id = ($sth->{"insertid_$table"}->fetchrow_array)[0];
827 warn "No CURRVAL for SEQUENCE of table $table ",$sth->errstr,"\n";
836 # my $insertid_sth = shift ;
838 # if ($insertid_sth->execute){
839 # $insert_id = ($insertid_sth->fetchrow_array)[0];
842 # warn "No CURRVAL for SEQUENCE ",$insertid_sth->errstr,"\n";
848 sub insert_sequence
{
850 my($id,$offset,$seq) = @_;
851 my $sth = $self->{_insert_sequence
}
852 ||= $self->dbh->prepare_delayed('insert into fdna values (?,?,?)');
853 $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
859 Usage : @search_results = $db->search_notes("full text search string",$limit)
860 Function: Search the notes for a text string, using mysql full-text search
861 Returns : array of results
862 Args : full text search string, and an optional row limit
865 This is a mysql-specific method. Given a search string, it performs a
866 full-text search of the notes table and returns an array of results.
867 Each row of the returned array is a arrayref containing the following fields:
869 column 1 A Bio::DB::GFF::Featname object, suitable for passing to segment()
870 column 2 The text of the note
871 column 3 A relevance score.
872 column 4 A Bio::DB::GFF::Typename object
878 my ($search_string,$limit) = @_;
880 $search_string =~ tr/*?//d;
882 my @words = $search_string =~ /(\w+)/g;
883 my $regex = join '|',@words;
884 my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
885 my $search = join(' OR ',@searches);
888 SELECT distinct gclass,gname,fattribute_value,fmethod,fsource
889 FROM fgroup,fattribute_to_feature,fdata,ftype
890 WHERE fgroup.gid=fdata.gid
891 AND fdata.fid=fattribute_to_feature.fid
892 AND fdata.ftypeid=ftype.ftypeid
897 my $sth = $self->dbh->do_query($query);
899 while (my ($class,$name,$note,$method,$source) = $sth->fetchrow_array) {
900 next unless $class && $name; # sorry, ignore NULL objects
901 my @matches = $note =~ /($regex)/g;
902 my $relevance = 10*@matches;
903 my $featname = Bio
::DB
::GFF
::Featname
->new($class=>$name);
904 my $type = Bio
::DB
::GFF
::Typename
->new($method,$source);
905 push @results,[$featname,$note,$relevance,$type];
906 last if $limit && @results >= $limit;
911 =head2 make_meta_set_query
913 Title : make_meta_set_query
914 Usage : $sql = $db->make_meta_set_query
915 Function: return SQL fragment for setting a meta parameter
916 Returns : SQL fragment
920 By default this does nothing; meta parameters are not stored or
925 sub make_meta_set_query
{
926 return 'INSERT INTO fmeta VALUES (?,?)';
929 sub make_classes_query
{
931 return 'SELECT DISTINCT gclass FROM fgroup WHERE NOT gclass IS NULL';
937 $self->meta('chunk_size') || DEFAULT_CHUNK
;
940 sub getseqcoords_query
{
942 return GETSEQCOORDS
;
945 sub getaliascoords_query
{
947 return GETALIASCOORDS
;
951 sub getforcedseqcoords_query
{
953 return GETFORCEDSEQCOORDS
;
957 sub getaliaslike_query
{
959 return GETALIASLIKE
;
963 sub make_features_select_part
{
965 my $options = shift || {};
967 if (my $b = $options->{bin_width
}) {
971 1+$b*floor(fstart/$b) as fstart,
972 $b*(1+floor(fstart/$b)) as fstop,
973 NVL2(fsource,fmethod||':'||fsource,fmethod),'bin',
976 NVL2(fsource , fref||':'||fmethod||':'||fsource , fref||':'||fmethod),
982 fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
986 $s .= ",count(fdata.fid)" if $options->{attributes
} && keys %{$options->{attributes
}}>1;
990 sub make_features_from_part_bkup
{
993 my $options = shift || {};
994 #my $index = $sparse ? ' USE INDEX(ftypeid)': '';
996 return $options->{attributes
} ?
"fdata${index},ftype,fgroup,fattribute,fattribute_to_feature\n"
997 : "fdata${index},ftype,fgroup\n";
1001 ####################################
1002 # moved from mysqlopt.pm
1003 ###################################
1005 sub default_meta_values
{
1007 my @values = $self->SUPER::default_meta_values
;
1012 straight_join_limit
=> STRAIGHT_JOIN_LIMIT
,
1018 return $self->meta('min_bin') || MIN_BIN
;
1022 return $self->meta('max_bin') || MAX_BIN
;
1024 sub straight_join_limit
{
1026 return $self->meta('straight_join_limit') || STRAIGHT_JOIN_LIMIT
;