maint: restructure to use Dist::Zilla
[bioperl-live.git] / lib / Bio / DB / GFF / Adaptor / dbi / mysqlcmap.pm
bloba977b6d960d402ece13de7ae26265b8957a5da73
1 package Bio::DB::GFF::Adaptor::dbi::mysqlcmap;
3 =head1 NAME
5 Bio::DB::GFF::Adaptor::dbi::mysqlcmap -- Database adaptor for an integraded
6 CMap/GBrowse mysql schema
8 =head1 SYNOPSIS
10 See L<Bio::DB::GFF>
12 =cut
14 # a simple mysql adaptor
15 use strict;
16 use Data::Dumper;
17 use Bio::DB::GFF::Adaptor::dbi;
18 use Bio::DB::GFF::Util::Rearrange; # for rearrange()
19 use Bio::DB::GFF::Util::Binning;
20 use base qw(Bio::DB::GFF::Adaptor::dbi::mysql);
21 require Bio::DB::GFF::Adaptor::dbi::mysql;
23 use constant GETSEQCOORDS =><<END;
24 SELECT fref,
25 IF(ISNULL(gclass),'Sequence',gclass),
26 min(fstart),
27 max(fstop),
28 fstrand,
29 feature_name as gname
30 FROM fdata,cmap_feature
31 WHERE cmap_feature.feature_name=?
32 AND cmap_feature.gclass=?
33 AND cmap_feature.feature_id=fdata.feature_id
34 GROUP BY fref,fstrand,feature_name
35 END
38 use constant GETALIASCOORDS =><<END;
39 SELECT fref,
40 IF(ISNULL(gclass),'Sequence',gclass),
41 min(fstart),
42 max(fstop),
43 fstrand,
44 feature_name as gname
45 FROM fdata,cmap_feature,fattribute,fattribute_to_feature
46 WHERE fattribute_to_feature.fattribute_value=?
47 AND cmap_feature.gclass=?
48 AND cmap_feature.feature_id=fdata.feature_id
49 AND fattribute.fattribute_name='Alias'
50 AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
51 AND fattribute_to_feature.fid=fdata.fid
52 GROUP BY fref,fstrand,feature_name
53 END
56 use constant GETALIASLIKE =><<END;
57 SELECT fref,
58 IF(ISNULL(gclass),'Sequence',gclass),
59 min(fstart),
60 max(fstop),
61 fstrand,
62 feature_name as gname
63 FROM fdata,cmap_feature,fattribute,fattribute_to_feature
64 WHERE fattribute_to_feature.fattribute_value LIKE ?
65 AND cmap_feature.gclass=?
66 AND cmap_feature.feature_id=fdata.feature_id
67 AND fattribute.fattribute_name='Alias'
68 AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
69 AND fattribute_to_feature.fid=fdata.fid
70 GROUP BY fref,fstrand,feature_name
71 END
74 use constant GETFORCEDSEQCOORDS =><<END;
75 SELECT fref,
76 IF(ISNULL(gclass),'Sequence',gclass),
77 min(fstart),
78 max(fstop),
79 fstrand
80 FROM fdata,cmap_feature
81 WHERE cmap_feature.feature_name=?
82 AND cmap_feature.gclass=?
83 AND fdata.fref=?
84 AND cmap_feature.feature_id=fdata.feature_id
85 GROUP BY fref,fstrand
86 END
89 use constant FULLTEXTSEARCH => <<END;
90 SELECT distinct gclass,feature_name,fattribute_value,MATCH(fattribute_value) AGAINST (?) as score
91 FROM cmap_feature,fattribute_to_feature,fdata
92 WHERE cmap_feature.feature_id=fdata.feature_id
93 AND fdata.fid=fattribute_to_feature.fid
94 AND MATCH(fattribute_value) AGAINST (?)
95 END
98 =head1 DESCRIPTION
100 This adaptor implements a specific mysql database schema that is
101 compatible with Bio::DB::GFF. It inherits from
102 Bio::DB::GFF::Adaptor::dbi, which itself inherits from Bio::DB::GFF.
104 The schema uses several tables:
106 =over 4
108 =item fdata
110 This is the feature data table. Its columns are:
112 fid feature ID (integer)
113 fref reference sequence name (string)
114 fstart start position relative to reference (integer)
115 fstop stop position relative to reference (integer)
116 ftypeid feature type ID (integer)
117 fscore feature score (float); may be null
118 fstrand strand; one of "+" or "-"; may be null
119 fphase phase; one of 0, 1 or 2; may be null
120 feature_id group ID used to be 'gid' (integer)
121 ftarget_start for similarity features, the target start position (integer)
122 ftarget_stop for similarity features, the target stop position (integer)
124 Note that it would be desirable to normalize the reference sequence
125 name, since there are usually many features that share the same
126 reference feature. However, in the current schema, query performance
127 suffers dramatically when this additional join is added.
129 =item cmap_feature (replaces fgroup)
131 This is the group table. There is one row for each group. This is the
132 shared table between CMap and GBrowse. There are many CMap related
133 columns but only a few that GBrowse uses.
135 GBrowse Columns:
137 feature_id the group ID (integer)
138 gclass the class of the group (string)
139 feature_name the name of the group (string)
141 The group table serves multiple purposes. As you might expect, it is
142 used to cluster features that logically belong together, such as the
143 multiple exons of the same transcript. It is also used to assign a
144 name and class to a singleton feature. Finally, the group table is
145 used to identify the target of a similarity hit. This is consistent
146 with the way in which the group field is used in the GFF version 2
147 format.
149 The cmap_feature.feature_id field joins with the fdata.feature_id field.
151 Examples:
153 mysql> select * from cmap_feature where feature_name='sjj_2L52.1';
154 +--------------+-------------+--------------+
155 | feature_id | gclass | feature_name |
156 +--------------+-------------+--------------+
157 | 69736 | PCR_product | sjj_2L52.1 |
158 +--------------+-------------+--------------+
159 1 row in set (0.70 sec)
161 mysql> select fref,fstart,fstop from fdata,cmap_feature
162 where gclass='PCR_product' and feature_name = 'sjj_2L52.1'
163 and fdata.feature_id=cmap_feature.feature_id;
164 +---------------+--------+-------+
165 | fref | fstart | fstop |
166 +---------------+--------+-------+
167 | CHROMOSOME_II | 1586 | 2355 |
168 +---------------+--------+-------+
169 1 row in set (0.03 sec)
171 =item ftype
173 This table contains the feature types, one per row. Columns are:
175 ftypeid the feature type ID (integer)
176 fmethod the feature type method name (string)
177 fsource the feature type source name (string)
179 The ftype.ftypeid field joins with the fdata.ftypeid field. Example:
181 mysql> select fref,fstart,fstop,fmethod,fsource from fdata,cmap_feature,ftype
182 where gclass='PCR_product'
183 and feature_name = 'sjj_2L52.1'
184 and fdata.feature_id=cmap_feature.feature_id
185 and fdata.ftypeid=ftype.ftypeid;
186 +---------------+--------+-------+-------------+-----------+
187 | fref | fstart | fstop | fmethod | fsource |
188 +---------------+--------+-------+-------------+-----------+
189 | CHROMOSOME_II | 1586 | 2355 | PCR_product | GenePairs |
190 +---------------+--------+-------+-------------+-----------+
191 1 row in set (0.08 sec)
193 =item fdna
195 This table holds the raw DNA of the reference sequences. It has three
196 columns:
198 fref reference sequence name (string)
199 foffset offset of this sequence
200 fdna the DNA sequence (longblob)
202 To overcome problems loading large blobs, DNA is automatically
203 fragmented into multiple segments when loading, and the position of
204 each segment is stored in foffset. The fragment size is controlled by
205 the -clump_size argument during initialization.
207 =item fattribute_to_feature
209 This table holds "attributes", which are tag/value pairs stuffed into
210 the GFF line. The first tag/value pair is treated as the group, and
211 anything else is treated as an attribute (weird, huh?).
213 CHR_I assembly_tag Finished 2032 2036 . + . Note "Right: cTel33B"
214 CHR_I assembly_tag Polymorphism 668 668 . + . Note "A->C in cTel33B"
216 The columns of this table are:
218 fid feature ID (integer)
219 fattribute_id ID of the attribute (integer)
220 fattribute_value text of the attribute (text)
222 The fdata.fid column joins with fattribute_to_feature.fid.
224 =item fattribute
226 This table holds the normalized names of the attributes. Fields are:
228 fattribute_id ID of the attribute (integer)
229 fattribute_name Name of the attribute (varchar)
231 =back
233 =head2 Data Loading Methods
235 In addition to implementing the abstract SQL-generating methods of
236 Bio::DB::GFF::Adaptor::dbi, this module also implements the data
237 loading functionality of Bio::DB::GFF.
239 =cut
242 =head2 new
244 Title : new
245 Usage : $db = Bio::DB::GFF->new(@args)
246 Function: create a new adaptor
247 Returns : a Bio::DB::GFF object
248 Args : see below
249 Status : Public
251 The new constructor is identical to the "dbi" adaptor's new() method,
252 except that the prefix "dbi:mysql" is added to the database DSN identifier
253 automatically if it is not there already.
255 Argument Description
256 -------- -----------
258 -dsn the DBI data source, e.g. 'dbi:mysql:ens0040' or "ens0040"
260 -user username for authentication
262 -pass the password for authentication
264 =cut
268 #Defined in mysql.pm
270 =head2 get_dna
272 Title : get_dna
273 Usage : $string = $db->get_dna($name,$start,$stop,$class)
274 Function: get DNA string
275 Returns : a string
276 Args : name, class, start and stop of desired segment
277 Status : Public
279 This method performs the low-level fetch of a DNA substring given its
280 name, class and the desired range. This should probably be moved to
281 the parent class.
283 =cut
285 sub make_features_select_part {
286 my $self = shift;
287 my $options = shift || {};
288 my $s;
289 if (my $b = $options->{bin_width}) {
291 $s = <<END;
292 fref,
293 1+$b*floor(fstart/$b) as fstart,
294 $b*(1+floor(fstart/$b)) as fstop,
295 IF(ISNULL(fsource),fmethod,concat(fmethod,':',fsource)),'bin',
296 count(*) as fscore,
297 '.','.','bin',
298 IF(ISNULL(fsource),concat(fref,':',fmethod),concat(fref,':',fmethod,':',fsource)),
299 NULL,NULL,NULL,NULL
302 } else {
303 $s = <<END;
304 fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,feature_name as gname,ftarget_start,ftarget_stop,fdata.fid,fdata.feature_id
308 $s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
313 # IMPORTANT NOTE:
314 # WHETHER OR NOT THIS WORKS IS CRITICALLY DEPENDENT ON THE RELATIVE MAGNITUDE OF THE
315 sub make_features_from_part {
316 my $self = shift;
317 my $sparse_types = shift;
318 my $options = shift || {};
319 my $sparse_groups = $options->{sparse_groups};
320 my $index = $sparse_groups ? ' USE INDEX(feature_id)'
321 : $sparse_types ? ' USE INDEX(ftypeid)'
322 : '';
323 return $options->{attributes} ? "fdata${index},ftype,cmap_feature,fattribute,fattribute_to_feature\n"
324 : "fdata${index},ftype,cmap_feature\n";
327 ################################ loading and initialization ##################################
329 =head2 schema
331 Title : schema
332 Usage : $schema = $db->schema
333 Function: return the CREATE script for the schema
334 Returns : a list of CREATE statemetns
335 Args : none
336 Status : protected
338 This method returns a list containing the various CREATE statements
339 needed to initialize the database tables.
341 =cut
343 sub schema {
344 my %schema = (
345 fdata =>{
346 table=> q{
347 #create table fdata (
348 # fid int not null auto_increment,
349 # fref varchar(100) not null,
350 # fstart int unsigned not null,
351 # fstop int unsigned not null,
352 # ftypeid int not null,
353 # fscore float,
354 # fstrand enum('+','-'),
355 # fphase enum('0','1','2'),
356 # feature_id int not null,
357 # ftarget_start int unsigned,
358 # ftarget_stop int unsigned,
359 # primary key(fid),
360 # unique index(fref,fstart,fstop,ftypeid,feature_id),
361 # index(ftypeid),
362 # index(feature_id)
363 #) type=MyISAM
366 create table fdata (
367 fid int not null auto_increment,
368 fref varchar(100) not null,
369 fstart int unsigned not null,
370 fstop int unsigned not null,
371 fbin double(20,6) not null,
372 ftypeid int not null,
373 fscore float,
374 fstrand enum('+','-'),
375 fphase enum('0','1','2'),
376 feature_id int not null,
377 ftarget_start int unsigned,
378 ftarget_stop int unsigned,
379 primary key(fid),
380 unique index(fref,fbin,fstart,fstop,ftypeid,feature_id),
381 index(ftypeid),
382 index(feature_id)
383 ) type=MyISAM
384 } # fdata table
385 }, # fdata
387 ftype => {
388 table=> q{
389 create table ftype (
390 ftypeid int not null auto_increment,
391 fmethod varchar(100) not null,
392 fsource varchar(100),
393 primary key(ftypeid),
394 index(fmethod),
395 index(fsource),
396 unique ftype (fmethod,fsource)
397 )type=MyISAM
398 } #ftype table
399 }, #ftype
401 fdna => {
402 table=> q{
403 create table fdna (
404 fref varchar(100) not null,
405 foffset int(10) unsigned not null,
406 fdna longblob,
407 primary key(fref,foffset)
408 )type=MyISAM
409 } # fdna table
410 },#fdna
412 fmeta => {
413 table=> q{
414 create table fmeta (
415 fname varchar(255) not null,
416 fvalue varchar(255) not null,
417 primary key(fname)
418 )type=MyISAM
419 } # fmeta table
420 },#fmeta
422 fattribute => {
423 table=> q{
424 create table fattribute (
425 fattribute_id int(10) unsigned not null auto_increment,
426 fattribute_name varchar(255) not null,
427 primary key(fattribute_id)
428 )type=MyISAM
429 } #fattribute table
430 },#fattribute
432 fattribute_to_feature => {
433 table=> q{
434 create table fattribute_to_feature (
435 fid int(10) not null,
436 fattribute_id int(10) not null,
437 fattribute_value text,
438 key(fid,fattribute_id),
439 key(fattribute_value(48)),
440 fulltext(fattribute_value)
441 )type=MyISAM
442 } # fattribute_to_feature table
443 }, # fattribute_to_feature
446 cmap_attribute => {
447 table=>q{
448 create table cmap_attribute (
449 attribute_id int(11) NOT NULL default '0',
450 table_name varchar(30) NOT NULL default '',
451 object_id int(11) NOT NULL default '0',
452 display_order int(11) NOT NULL default '1',
453 is_public tinyint(4) NOT NULL default '1',
454 attribute_name varchar(200) NOT NULL default '',
455 attribute_value text NOT NULL,
456 PRIMARY KEY (attribute_id),
457 KEY table_name (table_name,object_id,display_order,attribute_name)
458 ) TYPE=MyISAM;
459 } # table
462 cmap_correspondence_evidence => {
463 table=>q{
464 create table cmap_correspondence_evidence (
465 correspondence_evidence_id int(11) NOT NULL default '0',
466 accession_id varchar(20) NOT NULL default '',
467 feature_correspondence_id int(11) NOT NULL default '0',
468 evidence_type_accession varchar(20) NOT NULL default '0',
469 score double(8,2) default NULL,
470 rank int(11) NOT NULL default '0',
471 PRIMARY KEY (correspondence_evidence_id),
472 UNIQUE KEY accession_id (accession_id),
473 KEY feature_correspondence_id (feature_correspondence_id)
474 ) TYPE=MyISAM;
475 } # table
479 cmap_correspondence_lookup => {
480 table=>q{
481 create table cmap_correspondence_lookup (
482 feature_id1 int(11) default NULL,
483 feature_id2 int(11) default NULL,
484 feature_correspondence_id int(11) default NULL,
485 start_position1 double(11,2) default NULL,
486 start_position2 double(11,2) default NULL,
487 stop_position1 double(11,2) default NULL,
488 stop_position2 double(11,2) default NULL,
489 map_id1 int(11) default NULL,
490 map_id2 int(11) default NULL,
491 feature_type_accession1 varchar(20) default NULL,
492 feature_type_accession2 varchar(20) default NULL,
493 KEY feature_id1 (feature_id1),
494 KEY corr_id (feature_correspondence_id),
495 KEY cl_map_id1 (map_id1),
496 KEY cl_map_id2 (map_id2),
497 KEY cl_map_id1_map_id2 (map_id1,map_id2),
498 KEY cl_map_id2_map_id1 (map_id2,map_id1)
499 ) TYPE=MyISAM;
500 } # table
504 cmap_correspondence_matrix => {
505 table=>q{
506 create table cmap_correspondence_matrix (
507 reference_map_aid varchar(20) NOT NULL default '0',
508 reference_map_name varchar(32) NOT NULL default '',
509 reference_map_set_aid varchar(20) NOT NULL default '0',
510 reference_species_aid varchar(20) NOT NULL default '0',
511 link_map_aid varchar(20) default NULL,
512 link_map_name varchar(32) default NULL,
513 link_map_set_aid varchar(20) NOT NULL default '0',
514 link_species_aid varchar(20) NOT NULL default '0',
515 no_correspondences int(11) NOT NULL default '0'
516 ) TYPE=MyISAM;
517 } # table
521 cmap_feature => {
522 table=>q{
523 create table cmap_feature (
524 feature_id int(11) NOT NULL default '0',
525 accession_id varchar(20) NOT NULL default '',
526 map_id int(11) default NULL,
527 feature_type_accession varchar(20) NOT NULL default '0',
528 feature_name varchar(32) NOT NULL default '',
529 is_landmark tinyint(4) NOT NULL default '0',
530 start_position double(11,2) NOT NULL default '0.00',
531 stop_position double(11,2) default NULL,
532 default_rank int(11) NOT NULL default '1',
533 direction tinyint(4) NOT NULL default '1',
534 gclass varchar(100) default NULL,
535 PRIMARY KEY (feature_id),
536 UNIQUE KEY gclass (gclass,feature_name),
537 UNIQUE KEY accession_id (accession_id),
538 KEY feature_name (feature_name),
539 KEY feature_id_map_id (feature_id,map_id),
540 KEY feature_id_map_id_start (feature_id,map_id,start_position),
541 KEY map_id (map_id),
542 KEY map_id_feature_id (map_id,feature_id)
543 ) TYPE=MyISAM;
544 } # table
548 cmap_feature_alias => {
549 table=>q{
550 create table cmap_feature_alias (
551 feature_alias_id int(11) NOT NULL default '0',
552 feature_id int(11) NOT NULL default '0',
553 alias varchar(255) default NULL,
554 PRIMARY KEY (feature_alias_id),
555 UNIQUE KEY feature_id_2 (feature_id,alias),
556 KEY feature_id (feature_id),
557 KEY alias (alias)
558 ) TYPE=MyISAM;
559 } # table
563 cmap_feature_correspondence => {
564 table=>q{
565 create table cmap_feature_correspondence (
566 feature_correspondence_id int(11) NOT NULL default '0',
567 accession_id varchar(20) NOT NULL default '',
568 feature_id1 int(11) NOT NULL default '0',
569 feature_id2 int(11) NOT NULL default '0',
570 is_enabled tinyint(4) NOT NULL default '1',
571 PRIMARY KEY (feature_correspondence_id),
572 UNIQUE KEY accession_id (accession_id),
573 KEY feature_id1 (feature_id1),
574 KEY cmap_feature_corresp_idx (is_enabled,feature_correspondence_id)
575 ) TYPE=MyISAM;
576 } # table
580 cmap_map => {
581 table=>q{
582 create table cmap_map (
583 map_id int(11) NOT NULL default '0',
584 accession_id varchar(20) NOT NULL default '',
585 map_set_id int(11) NOT NULL default '0',
586 map_name varchar(32) NOT NULL default '',
587 display_order int(11) NOT NULL default '1',
588 start_position double(11,2) default NULL,
589 stop_position double(11,2) default NULL,
590 PRIMARY KEY (map_id),
591 UNIQUE KEY accession_id (accession_id),
592 UNIQUE KEY map_id (map_id,map_set_id,map_name,accession_id),
593 KEY map_set_id_index (map_set_id)
594 ) TYPE=MyISAM;
595 } # table
599 cmap_map_set => {
600 table=>q{
601 create table cmap_map_set (
602 map_set_id int(11) NOT NULL default '0',
603 accession_id varchar(20) NOT NULL default '',
604 map_set_name varchar(64) NOT NULL default '',
605 short_name varchar(30) NOT NULL default '',
606 map_type_accession varchar(20) NOT NULL default '0',
607 species_id int(11) NOT NULL default '0',
608 published_on date default NULL,
609 can_be_reference_map tinyint(4) NOT NULL default '1',
610 display_order int(11) NOT NULL default '1',
611 is_enabled tinyint(4) NOT NULL default '1',
612 shape varchar(12) default NULL,
613 color varchar(20) default NULL,
614 width int(11) default NULL,
615 map_units varchar(12) NOT NULL default '',
616 is_relational_map tinyint(11) NOT NULL default '0',
617 PRIMARY KEY (map_set_id),
618 UNIQUE KEY accession_id (accession_id),
619 UNIQUE KEY map_set_id (map_set_id,species_id,short_name,accession_id),
620 KEY cmap_map_set_idx (can_be_reference_map,is_enabled,species_id,display_order,published_on,short_name)
621 ) TYPE=MyISAM;
622 } # table
626 cmap_next_number => {
627 table=>q{
628 create table cmap_next_number (
629 table_name varchar(40) NOT NULL default '',
630 next_number int(11) NOT NULL default '0',
631 PRIMARY KEY (table_name)
632 ) TYPE=MyISAM;
633 }, # table
634 insert=>{next_num=>q[ insert into cmap_next_number (table_name,next_number) VALUES ('cmap_feature',82);]}
638 cmap_species => {
639 table=>q{
640 create table cmap_species (
641 species_id int(11) NOT NULL default '0',
642 accession_id varchar(20) NOT NULL default '',
643 common_name varchar(64) NOT NULL default '',
644 full_name varchar(64) NOT NULL default '',
645 display_order int(11) NOT NULL default '1',
646 PRIMARY KEY (species_id),
647 KEY acc_id_species_id (accession_id,species_id)
648 ) TYPE=MyISAM;
649 } # table
653 cmap_xref => {
654 table=>q{
655 create table cmap_xref (
656 xref_id int(11) NOT NULL default '0',
657 table_name varchar(30) NOT NULL default '',
658 object_id int(11) default NULL,
659 display_order int(11) NOT NULL default '1',
660 xref_name varchar(200) NOT NULL default '',
661 xref_url text NOT NULL,
662 PRIMARY KEY (xref_id),
663 KEY table_name (table_name,object_id,display_order)
664 ) TYPE=MyISAM;
665 } # table
670 return \%schema;
675 =head2 make_classes_query
677 Title : make_classes_query
678 Usage : ($query,@args) = $db->make_classes_query
679 Function: return query fragment for generating list of reference classes
680 Returns : a query and args
681 Args : none
682 Status : public
684 =cut
686 sub make_classes_query {
687 my $self = shift;
688 return 'SELECT DISTINCT gclass FROM cmap_feature WHERE NOT ISNULL(gclass)';
692 =head2 setup_load
694 Title : setup_load
695 Usage : $db->setup_load
696 Function: called before load_gff_line()
697 Returns : void
698 Args : none
699 Status : protected
701 This method performs schema-specific initialization prior to loading a
702 set of GFF records. It prepares a set of DBI statement handlers to be
703 used in loading the data.
705 =cut
707 sub setup_load {
708 my $self = shift;
710 my $dbh = $self->features_db;
712 if ($self->lock_on_load) {
713 my @tables = map { "$_ WRITE"} $self->tables;
714 my $tables = join ', ',@tables;
715 $dbh->do("LOCK TABLES $tables");
718 #xx1
719 my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
720 my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
722 my $lookup_group = $dbh->prepare_delayed('SELECT feature_id FROM cmap_feature WHERE feature_name=? AND gclass=?');
723 my $insert_group = $dbh->prepare_delayed(' INSERT into cmap_feature (feature_id, accession_id,feature_name, gclass ) VALUES (?,feature_id,?,?)');
724 my $aux_insert_group = $dbh->prepare_delayed(' update cmap_next_number set next_number = next_number +1 where table_name=\'cmap_feature\'');
725 my $next_id_group = $dbh->prepare_delayed('select next_number from cmap_next_number where table_name=\'cmap_feature\'');
727 my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
728 my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
729 my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');
731 my $insert_data = $dbh->prepare_delayed(<<END);
732 INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
733 fstrand,fphase,feature_id,ftarget_start,ftarget_stop)
734 VALUES(?,?,?,?,?,?,?,?,?,?,?)
739 $self->{load_stuff}{sth}{lookup_ftype} = $lookup_type;
740 $self->{load_stuff}{sth}{insert_ftype} = $insert_type;
741 #$self->{load_stuff}{sth}{lookup_fgroup} = $lookup_group;
742 #$self->{load_stuff}{sth}{insert_fgroup} = $insert_group;
743 $self->{load_stuff}{sth}{lookup_cmap_feature} = $lookup_group;
744 $self->{load_stuff}{sth}{insert_cmap_feature} = $insert_group;
745 $self->{load_stuff}{sth}{aux_insert_cmap_feature} = $aux_insert_group;
746 $self->{load_stuff}{sth}{next_id_cmap_feature} = $next_id_group;
747 $self->{load_stuff}{sth}{insert_fdata} = $insert_data;
748 $self->{load_stuff}{sth}{lookup_fattribute} = $lookup_attribute;
749 $self->{load_stuff}{sth}{insert_fattribute} = $insert_attribute;
750 $self->{load_stuff}{sth}{insert_fattribute_value} = $insert_attribute_value;
751 $self->{load_stuff}{types} = {};
752 $self->{load_stuff}{groups} = {};
753 $self->{load_stuff}{counter} = 0;
756 =head2 load_gff_line
758 Title : load_gff_line
759 Usage : $db->load_gff_line($fields)
760 Function: called to load one parsed line of GFF
761 Returns : true if successfully inserted
762 Args : hashref containing GFF fields
763 Status : protected
765 This method is called once per line of the GFF and passed a series of
766 parsed data items that are stored into the hashref $fields. The keys are:
768 ref reference sequence
769 source annotation source
770 method annotation method
771 start annotation start
772 stop annotation stop
773 score annotation score (may be undef)
774 strand annotation strand (may be undef)
775 phase annotation phase (may be undef)
776 group_class class of annotation's group (may be undef)
777 group_name ID of annotation's group (may be undef)
778 target_start start of target of a similarity hit
779 target_stop stop of target of a similarity hit
780 attributes array reference of attributes, each of which is a [tag=>value] array ref
782 =cut
784 sub load_gff_line {
785 my $self = shift;
786 my $gff = shift;
788 my $s = $self->{load_stuff};
789 my $dbh = $self->features_db;
790 local $dbh->{PrintError} = 0;
792 defined(my $typeid = $self->get_table_id('ftype', $gff->{method} => $gff->{source})) or return;
793 defined(my $groupid = $self->get_table_id('cmap_feature',$gff->{gname} => $gff->{gclass})) or return;
795 if ($gff->{stop}-$gff->{start}+1 > $self->max_bin) {
796 warn "$gff->{gclass}:$gff->{gname} is longer than ",$self->maxbin,".\n";
797 warn "Please set the maxbin value to a larger length than the largest feature you wish to store.\n";
798 warn "With the command-line tools you do with this with --maxfeature option.\n";
801 my $bin = bin($gff->{start},$gff->{stop},$self->min_bin);
802 my $result = $s->{sth}{insert_fdata}->execute($gff->{ref},
803 $gff->{start},$gff->{stop},$bin,
804 $typeid,
805 $gff->{score},$gff->{strand},$gff->{phase},
806 $groupid,
807 $gff->{tstart},$gff->{tstop});
809 warn $dbh->errstr,"\n" && return unless $result;
811 my $fid = $dbh->{mysql_insertid}
812 || $self->get_feature_id($gff->{ref},$gff->{start},$gff->{stop},$typeid,$groupid);
815 # insert attributes
816 foreach (@{$gff->{attributes}}) {
817 defined(my $attribute_id = $self->get_table_id('fattribute',$_->[0])) or return;
818 $s->{sth}{insert_fattribute_value}->execute($fid,$attribute_id,$_->[1]);
821 if ( (++$s->{counter} % 1000) == 0) {
822 print STDERR "$s->{counter} records loaded...";
823 print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
826 $fid;
829 =head2 get_feature_id
831 Title : get_feature_id
832 Usage : $integer = $db->get_feature_id($ref,$start,$stop,$typeid,$groupid)
833 Function: get the ID of a feature
834 Returns : an integer ID or undef
835 Args : none
836 Status : private
838 This internal method is called by load_gff_line to look up the integer
839 ID of an existing feature. It is ony needed when replacing a feature
840 with new information.
842 =cut
844 # this method is called when needed to look up a feature's ID
845 sub get_feature_id {
846 my $self = shift;
847 my ($ref,$start,$stop,$typeid,$groupid) = @_;
848 my $s = $self->{load_stuff};
849 unless ($s->{get_feature_id}) {
850 my $dbh = $self->features_db;
851 $s->{get_feature_id} =
852 $dbh->prepare_delayed('SELECT fid FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND ftypeid=? AND feature_id=?');
854 my $sth = $s->{get_feature_id} or return;
855 $sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
856 my ($fid) = $sth->fetchrow_array;
857 return $fid;
860 =head2 get_table_id
862 Title : get_table_id
863 Usage : $integer = $db->get_table_id($table,@ids)
864 Function: get the ID of a group or type
865 Returns : an integer ID or undef
866 Args : none
867 Status : private
869 This internal method is called by load_gff_line to look up the integer
870 ID of an existing feature type or group. The arguments are the name
871 of the table, and two string identifiers. For feature types, the
872 identifiers are the method and source. For groups, the identifiers
873 are group name and class.
875 This method requires that a statement handler named I<lookup_$table>,
876 have been created previously by setup_load(). It is here to overcome
877 deficiencies in mysql's INSERT syntax.
879 =cut
882 # get the object ID from a named table
883 sub get_table_id {
884 my $self = shift;
885 my $table = shift;
886 my @ids = @_;
888 # irritating warning for null id
889 my $id_key;
891 local $^W=0;
892 $id_key = join ':',@ids;
895 my $s = $self->{load_stuff};
896 my $sth = $s->{sth};
897 my $dbh = $self->features_db;
899 unless (defined($s->{$table}{$id_key})) {
901 #########################################
902 # retrieval of the last inserted id is now located at the adaptor and not in caching_handle
903 #######################################
904 if ( (my $result = $sth->{"lookup_$table"}->execute(@ids)) > 0) {
905 $s->{$table}{$id_key} = ($sth->{"lookup_$table"}->fetchrow_array)[0];
906 } else {
907 if (defined($sth->{"next_id_$table"})){
909 $sth->{"insert_$table"}->execute(3,'string1','string2');
910 # Can't use auto incrementing
911 $sth->{"next_id_$table"}->execute();
912 $s->{$table}{$id_key} = ($sth->{"next_id_$table"}->fetchrow_array)[0];
913 if ($s->{$table}{$id_key}){
914 $sth->{"insert_$table"}->execute($s->{$table}{$id_key},@ids);
915 $sth->{"aux_insert_$table"}->execute() if $sth->{"aux_insert_$table"};
918 else{
919 $sth->{"insert_$table"}->execute(@ids);
920 $s->{$table}{$id_key} = $self->insertid($sth->{"insert_$table"}) unless $s->{$table}{$id_key};
921 $sth->{"aux_insert_$table"}->execute() if $sth->{"aux_insert_$table"};
926 my $id = $s->{$table}{$id_key};
927 unless (defined $id) {
928 warn "No $table id for $id_key ",$dbh->errstr," Record skipped.\n";
929 return;
931 $id;
936 #-----------------------------------
938 =head2 make_features_by_name_where_part
940 Title : make_features_by_name_where_part
941 Usage : $db->make_features_by_name_where_part
942 Function: create the SQL fragment needed to select a feature by its group name & class
943 Returns : a SQL fragment and bind arguments
944 Args : see below
945 Status : Protected
947 =cut
949 sub make_features_by_name_where_part {
950 my $self = shift;
951 my ($class,$name) = @_;
952 if ($name =~ /\*/) {
953 $name =~ tr/*/%/;
954 return ("cmap_feature.gclass=? AND cmap_feature.feature_name LIKE ?",$class,$name);
955 } else {
956 return ("cmap_feature.gclass=? AND cmap_feature.feature_name=?",$class,$name);
960 =head2 make_features_join_part
962 Title : make_features_join_part
963 Usage : $string = $db->make_features_join_part()
964 Function: make join part of the features query
965 Returns : a string
966 Args : none
967 Status : protected
969 This method creates the part of the features query that immediately
970 follows the WHERE keyword.
972 =cut
974 sub make_features_join_part {
975 my $self = shift;
976 my $options = shift || {};
977 return !$options->{attributes} ? <<END1 : <<END2;
978 cmap_feature.feature_id = fdata.feature_id
979 AND ftype.ftypeid = fdata.ftypeid
980 END1
981 cmap_feature.feature_id = fdata.feature_id
982 AND ftype.ftypeid = fdata.ftypeid
983 AND fattribute.fattribute_id=fattribute_to_feature.fattribute_id
984 AND fdata.fid=fattribute_to_feature.fid
985 END2
988 sub getseqcoords_query {
989 my $self = shift;
990 return GETSEQCOORDS ;
993 sub getaliascoords_query{
994 my $self = shift;
995 return GETALIASCOORDS ;
998 sub getforcedseqcoords_query{
999 my $self = shift;
1000 return GETFORCEDSEQCOORDS ;
1003 sub getaliaslike_query{
1004 my $self = shift;
1005 return GETALIASLIKE ;
1008 =head2 search_notes
1010 Title : search_notes
1011 Usage : @search_results = $db->search_notes("full text search string",$limit)
1012 Function: Search the notes for a text string, using mysql full-text search
1013 Returns : array of results
1014 Args : full text search string, and an optional row limit
1015 Status : public
1017 This is a mysql-specific method. Given a search string, it performs a
1018 full-text search of the notes table and returns an array of results.
1019 Each row of the returned array is a arrayref containing the following fields:
1021 column 1 A Bio::DB::GFF::Featname object, suitable for passing to segment()
1022 column 2 The text of the note
1023 column 3 A relevance score.
1025 =cut
1027 sub search_notes {
1028 my $self = shift;
1029 my ($search_string,$limit) = @_;
1031 $search_string =~ tr/*?//d;
1033 my @words = $search_string =~ /(\w+)/g;
1034 my $regex = join '|',@words;
1035 my @searches = map {"fattribute_value LIKE '%${_}%'"} @words;
1036 my $search = join(' OR ',@searches);
1038 my $query = <<END;
1039 SELECT distinct gclass,feature_name as gname,fattribute_value
1040 FROM cmap_feature,fattribute_to_feature,fdata
1041 WHERE cmap_feature.feature_id=fdata.feature_id
1042 AND fdata.fid=fattribute_to_feature.fid
1045 $query .= " AND ($search) " if ($search);
1047 my $sth = $self->dbh->do_query($query);
1048 my @results;
1049 while (my ($class,$name,$note) = $sth->fetchrow_array) {
1050 next unless $class && $name; # sorry, ignore NULL objects
1051 my @matches = $note =~ /($regex)/g;
1052 my $relevance = 10*@matches;
1053 my $featname = Bio::DB::GFF::Featname->new($class=>$name);
1054 push @results,[$featname,$note,$relevance];
1055 last if $limit && @results >= $limit;
1057 @results;
1060 # sub search_notes {
1061 # my $self = shift;
1062 # my ($search_string,$limit) = @_;
1063 # my $query = FULLTEXTSEARCH;
1064 # $query .= " limit $limit" if defined $limit;
1065 # my $sth = $self->dbh->do_query($query,$search_string,$search_string);
1066 # my @results;
1067 # while (my ($class,$name,$note,$relevance) = $sth->fetchrow_array) {
1068 # next unless $class && $name; # sorry, ignore NULL objects
1069 # $relevance = sprintf("%.2f",$relevance); # trim long floats
1070 # my $featname = Bio::DB::GFF::Featname->new($class=>$name);
1071 # push @results,[$featname,$note,$relevance];
1073 # @results;
1076 =head2 make_features_order_by_part
1078 Title : make_features_order_by_part
1079 Usage : ($query,@args) = $db->make_features_order_by_part()
1080 Function: make the ORDER BY part of the features() query
1081 Returns : a SQL fragment and bind arguments, if any
1082 Args : none
1083 Status : protected
1085 This method creates the part of the features query that immediately
1086 follows the ORDER BY part of the query issued by features() and
1087 related methods.
1089 =cut
1091 sub make_features_order_by_part {
1092 my $self = shift;
1093 my $options = shift || {};
1094 return "cmap_feature.feature_name";
1097 =head2 create_cmap_viewer_link
1099 Title : create_cmap_viewer_link
1100 Usage : $link_str = $db->create_cmap_viewer_link(data_source=>$ds,group_id=>$gid)
1101 Function:
1102 Returns :
1103 Args :
1104 Status :
1107 =cut
1109 sub create_cmap_viewer_link {
1110 my $self = shift;
1111 my %args = @_;
1112 my $data_source = $args{'data_source'};
1113 my $gid = $args{'group_id'};
1114 my $link_str = undef;
1116 my $db = $self->features_db;
1117 my $sql_str = qq[
1118 select f.feature_name,
1119 f.feature_type_accession feature_type_aid,
1120 m.accession_id as map_aid,
1121 ms.accession_id as map_set_aid
1122 from cmap_feature f,
1123 cmap_map m,
1124 cmap_map_set ms
1125 where f.map_id=m.map_id
1126 and ms.map_set_id=m.map_set_id
1127 and f.feature_id=$gid
1130 my $result_ref = $db->selectrow_hashref($sql_str,{ Columns => {} });
1132 if ( $result_ref ) {
1133 $link_str='/cgi-bin/cmap/viewer?ref_map_set_aid='
1134 . $result_ref->{'map_set_aid'}
1135 . '&ref_map_aids='
1136 . $result_ref->{'map_aid'}
1137 . '&data_source='
1138 . $data_source
1139 . '&highlight='
1140 .$result_ref->{'feature_name'}
1141 . '&feature_type_'
1142 .$result_ref->{'feature_type_aid'}
1143 . '=2';
1146 return $link_str;
1152 __END__
1154 =head1 BUGS
1156 none ;-)
1158 =head1 SEE ALSO
1160 L<Bio::DB::GFF>, L<bioperl>
1162 =head1 AUTHOR
1164 Ben Faga E<lt>faga@cshl.orgE<gt>.
1166 Modified from mysql.pm by:
1168 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
1170 Copyright (c) 2002 Cold Spring Harbor Laboratory.
1172 This library is free software; you can redistribute it and/or modify
1173 it under the same terms as Perl itself.
1175 =cut