lib/Bio/DB/GFF/Adaptor/dbi/pg.pm

   1 package Bio::DB::GFF::Adaptor::dbi::pg;
   2
   3 =head1 NAME
   4
   5 Bio::DB::GFF::Adaptor::dbi::pg -- Database adaptor for a specific postgres schema
   6
   7 =head1 NOTES
   8
   9 SQL commands that need to be executed before this adaptor will work:
  10
  11   CREATE DATABASE <dbname>;
  12
  13 Also, select permission needs to be granted for each table in the
  14 database to the owner of the httpd process (usually 'nobody', but
  15 for some RedHat systems it is 'apache') if this adaptor is to be used
  16 with the Generic Genome Browser (gbrowse):
  17
  18   CREATE USER nobody;
  19   GRANT SELECT ON TABLE fmeta                 TO nobody;
  20   GRANT SELECT ON TABLE fgroup                TO nobody;
  21   GRANT SELECT ON TABLE fdata                 TO nobody;
  22   GRANT SELECT ON TABLE fattribute_to_feature TO nobody;
  23   GRANT SELECT ON TABLE fdna                  TO nobody;
  24   GRANT SELECT ON TABLE fattribute            TO nobody;
  25   GRANT SELECT ON TABLE ftype                 TO nobody;
  26
  27 =head2 Optimizing the database
  28
  29 PostgreSQL generally requires some tuning before you get very good
  30 performance for large databases.  For general information on tuning
  31 a PostgreSQL server, see http://www.varlena.com/GeneralBits/Tidbits/perf.html
  32 Of particular importance is executing VACUUM FULL ANALYZE whenever
  33 you change the database.
  34
  35 Additionally, for a GFF database, there are a few items you can tune.
  36 For each automatic class in your GBrowse conf file, there will be one
  37 or two searches done when searching for a feature.  If there are lots
  38 of features, these search can take several seconds.  To speed these searches,
  39 do two things:
  40
  41 =over
  42
  43 =item 1
  44
  45 Set 'enable_seqscan = false' in your postgresql.conf file (and restart
  46 your server).
  47
  48 =item 2
  49
  50 Create 'partial' indexes for each automatic class, doing this for the
  51 example class 'Allele':
  52
  53   CREATE INDEX partial_allele_gclass ON
  54     fgroup (lower('gname')) WHERE gclass='Allele';
  55
  56 And be sure to run VACUUM FULL ANALYZE after creating the indexes.
  57
  58 =back
  59
  60 =cut
  61
  62 # a simple postgres adaptor
  63 use strict;
  64 use Bio::DB::GFF::Util::Binning;
  65 use Bio::DB::GFF::Util::Rearrange; # for rearrange()
  66 use base qw(Bio::DB::GFF::Adaptor::dbi);
  67
  68 use constant MAX_SEGMENT => 100_000_000;  # the largest a segment can get
  69 use constant DEFAULT_CHUNK => 2000;
  70
  71 use constant GETSEQCOORDS =><<END;
  72 SELECT fref,
  73        COALESCE(gclass,'Sequence'),
  74        min(fstart),
  75        max(fstop),
  76        fstrand,
  77        gname
  78   FROM fdata,fgroup
  79   WHERE lower(fgroup.gname) = lower(?)
  80     AND fgroup.gclass=?
  81     AND fgroup.gid=fdata.gid
  82     GROUP BY fref,fstrand,gclass,gname
  83 END
  84 ;
  85
  86 use constant GETALIASCOORDS =><<END;
  87 SELECT fref,
  88        COALESCE(gclass,'Sequence'),
  89        min(fstart),
  90        max(fstop),
  91        fstrand,
  92        gname
  93   FROM fdata,fgroup,fattribute,fattribute_to_feature
  94   WHERE lower(fattribute_to_feature.fattribute_value)=lower(?)
  95     AND fgroup.gclass=?
  96     AND fgroup.gid=fdata.gid
  97     AND fattribute.fattribute_name='Alias'
  98     AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
  99     AND fattribute_to_feature.fid=fdata.fid
 100     GROUP BY fref,fstrand,gclass,gname
 101 END
 102 ;
 103
 104 use constant GETALIASLIKE =><<END;
 105 SELECT fref,
 106        COALESCE(gclass,'Sequence'),
 107        min(fstart),
 108        max(fstop),
 109        fstrand,
 110        gname
 111   FROM fdata,fgroup,fattribute,fattribute_to_feature
 112   WHERE lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
 113     AND fgroup.gclass=?
 114     AND fgroup.gid=fdata.gid
 115     AND fattribute.fattribute_name='Alias'
 116     AND fattribute_to_feature.fattribute_id=fattribute.fattribute_id
 117     AND fattribute_to_feature.fid=fdata.fid
 118     GROUP BY fref,fstrand,gname
 119 END
 120 ;
 121
 122
 123 use constant GETFORCEDSEQCOORDS =><<END;
 124 SELECT fref,
 125        COALESCE(gclass,'Sequence'),
 126        min(fstart),
 127        max(fstop),
 128        fstrand
 129   FROM fdata,fgroup
 130   WHERE lower(fgroup.gname) = lower(?)
 131     AND fgroup.gclass=?
 132     AND lower(fdata.fref) = lower(?)
 133     AND fgroup.gid=fdata.gid
 134     GROUP BY fref,fstrand,gclass
 135 END
 136 ;
 137
 138 use constant FULLTEXTWILDCARD => <<END;
 139 SELECT distinct gclass,gname,fattribute_value
 140     FROM fgroup,fattribute_to_feature,fdata
 141      WHERE fgroup.gid=fdata.gid
 142        AND fdata.fid=fattribute_to_feature.fid
 143        AND lower(fattribute_to_feature.fattribute_value) LIKE lower(?)
 144 END
 145 ;
 146
 147 ########################
 148 # moved from mysqlopt.pm
 149 ########################
 150
 151 # this is the largest that any reference sequence can be (100 megabases)
 152 use constant MAX_BIN    => 100_000_000;
 153
 154 # this is the smallest bin (1 K)
 155 use constant MIN_BIN    => 1000;
 156
 157 # size of range over which it is faster to force mysql to use the range for indexing
 158 use constant STRAIGHT_JOIN_LIMIT => 200_000;
 159
 160 ##############################################################################
 161
 162 =head1 DESCRIPTION
 163
 164 This adaptor implements a specific postgres database schema that is
 165 compatible with Bio::DB::GFF.  It inherits from
 166 Bio::DB::GFF::Adaptor::dbi, which itself inherits from Bio::DB::GFF.
 167
 168 The schema uses several tables:
 169
 170 =over 4
 171
 172 =item fdata
 173
 174 This is the feature data table.  Its columns are:
 175
 176     fid            feature ID (integer)
 177     fref           reference sequence name (string)
 178     fstart         start position relative to reference (integer)
 179     fstop          stop position relative to reference (integer)
 180     ftypeid        feature type ID (integer)
 181     fscore         feature score (float); may be null
 182     fstrand        strand; one of "+" or "-"; may be null
 183     fphase         phase; one of 0, 1 or 2; may be null
 184     gid            group ID (integer)
 185     ftarget_start  for similarity features, the target start position (integer)
 186     ftarget_stop   for similarity features, the target stop position (integer)
 187
 188 Note that it would be desirable to normalize the reference sequence
 189 name, since there are usually many features that share the same
 190 reference feature.  However, in the current schema, query performance
 191 suffers dramatically when this additional join is added.
 192
 193 =item fgroup
 194
 195 This is the group table. There is one row for each group.  Columns:
 196
 197     gid       the group ID (integer)
 198     gclass    the class of the group (string)
 199     gname     the name of the group (string)
 200
 201 The group table serves multiple purposes.  As you might expect, it is
 202 used to cluster features that logically belong together, such as the
 203 multiple exons of the same transcript.  It is also used to assign a
 204 name and class to a singleton feature.  Finally, the group table is
 205 used to identify the target of a similarity hit.  This is consistent
 206 with the way in which the group field is used in the GFF version 2
 207 format.
 208
 209 The fgroup.gid field joins with the fdata.gid field.
 210
 211 Examples:
 212
 213   sql> select * from fgroup where gname='sjj_2L52.1';
 214   +-------+-------------+------------+
 215   | gid   | gclass      | gname      |
 216   +-------+-------------+------------+
 217   | 69736 | PCR_product | sjj_2L52.1 |
 218   +-------+-------------+------------+
 219   1 row in set (0.70 sec)
 220
 221   sql> select fref,fstart,fstop from fdata,fgroup
 222             where gclass='PCR_product' and gname = 'sjj_2L52.1'
 223                   and fdata.gid=fgroup.gid;
 224   +---------------+--------+-------+
 225   | fref          | fstart | fstop |
 226   +---------------+--------+-------+
 227   | CHROMOSOME_II |   1586 |  2355 |
 228   +---------------+--------+-------+
 229   1 row in set (0.03 sec)
 230
 231 =item ftype
 232
 233 This table contains the feature types, one per row.  Columns are:
 234
 235     ftypeid      the feature type ID (integer)
 236     fmethod      the feature type method name (string)
 237     fsource      the feature type source name (string)
 238
 239 The ftype.ftypeid field joins with the fdata.ftypeid field.  Example:
 240
 241   sql> select fref,fstart,fstop,fmethod,fsource from fdata,fgroup,ftype
 242          where gclass='PCR_product'
 243                and gname = 'sjj_2L52.1'
 244                and fdata.gid=fgroup.gid
 245                and fdata.ftypeid=ftype.ftypeid;
 246   +---------------+--------+-------+-------------+-----------+
 247   | fref          | fstart | fstop | fmethod     | fsource   |
 248   +---------------+--------+-------+-------------+-----------+
 249   | CHROMOSOME_II |   1586 |  2355 | PCR_product | GenePairs |
 250   +---------------+--------+-------+-------------+-----------+
 251   1 row in set (0.08 sec)
 252
 253 =item fdna
 254
 255 This table holds the raw DNA of the reference sequences.  It has three
 256 columns:
 257
 258     fref          reference sequence name (string)
 259     foffset       offset of this sequence
 260     fdna          the DNA sequence (longblob)
 261
 262 To overcome problems loading large blobs, DNA is automatically
 263 fragmented into multiple segments when loading, and the position of
 264 each segment is stored in foffset.  The fragment size is controlled by
 265 the -clump_size argument during initialization.
 266
 267 =item fattribute_to_feature
 268
 269 This table holds "attributes", which are tag/value pairs stuffed into
 270 the GFF line.  The first tag/value pair is treated as the group, and
 271 anything else is treated as an attribute (weird, huh?).
 272
 273  CHR_I assembly_tag Finished     2032 2036 . + . Note "Right: cTel33B"
 274  CHR_I assembly_tag Polymorphism 668  668  . + . Note "A->C in cTel33B"
 275
 276 The columns of this table are:
 277
 278     fid                 feature ID (integer)
 279     fattribute_id       ID of the attribute (integer)
 280     fattribute_value    text of the attribute (text)
 281
 282 The fdata.fid column joins with fattribute_to_feature.fid.
 283
 284 =item fattribute
 285
 286 This table holds the normalized names of the attributes.  Fields are:
 287
 288   fattribute_id      ID of the attribute (integer)
 289   fattribute_name    Name of the attribute (varchar)
 290
 291 =back
 292
 293 =head2 Data Loading Methods
 294
 295 In addition to implementing the abstract SQL-generating methods of
 296 Bio::DB::GFF::Adaptor::dbi, this module also implements the data
 297 loading functionality of Bio::DB::GFF.
 298
 299 =cut
 300
 301
 302 =head2 new
 303
 304  Title   : new
 305  Usage   : $db = Bio::DB::GFF->new(@args)
 306  Function: create a new adaptor
 307  Returns : a Bio::DB::GFF object
 308  Args    : see below
 309  Status  : Public
 310
 311 The new constructor is identical to the "dbi" adaptor's new() method,
 312 except that the prefix "dbi:pg" is added to the database DSN identifier
 313 automatically if it is not there already.
 314
 315   Argument       Description
 316   --------       -----------
 317
 318   -dsn           the DBI data source, e.g. 'dbi:Pg:dbname=:ens0040' or "ens0040"
 319
 320   -user          username for authentication
 321
 322   -pass          the password for authentication
 323
 324 =cut
 325
 326 #'
 327
 328 sub new {
 329   my $class = shift;
 330   my ($dsn,$other) = rearrange([
 331                                 [qw(FEATUREDB DB DSN)],
 332                                ],@_);
 333   $dsn = "dbi:Pg:dbname=$dsn" if !ref($dsn) && $dsn !~ /^(dbi|DBI):/;
 334   my $self = $class->SUPER::new(-dsn=>$dsn,%$other);
 335   $self;
 336 }
 337
 338 =head2 schema
 339
 340  Title   : schema
 341  Usage   : $schema = $db->schema
 342  Function: return the CREATE script for the schema
 343  Returns : a list of CREATE statemetns
 344  Args    : none
 345  Status  : protected
 346
 347 This method returns a list containing the various CREATE statements
 348 needed to initialize the database tables.
 349
 350 =cut
 351
 352 sub schema {
 353   my %schema = (
 354                 fdata =>{
 355 table=> q{
 356 CREATE TABLE "fdata" (
 357   "fid" serial NOT NULL,
 358   "fref" character varying(100) DEFAULT '' NOT NULL,
 359   "fstart" integer DEFAULT '0' NOT NULL,
 360   "fstop" integer DEFAULT '0' NOT NULL,
 361   "fbin" double precision DEFAULT '0.000000' NOT NULL,
 362   "ftypeid" integer DEFAULT '0' NOT NULL,
 363   "fscore" double precision DEFAULT NULL,
 364   "fstrand" character varying(3) DEFAULT NULL,
 365   "fphase" character varying(3) DEFAULT NULL,
 366   "gid" integer DEFAULT '0' NOT NULL,
 367   "ftarget_start" integer DEFAULT NULL,
 368   "ftarget_stop" integer DEFAULT NULL,
 369   CONSTRAINT chk_fdata_fstrand CHECK (fstrand IN ('+','-')),
 370   CONSTRAINT chk_fdata_fphase CHECK (fphase IN ('0','1','2')),
 371   CONSTRAINT pk_fdata PRIMARY KEY (fid)
 372 )
 373 }, # fdata table
 374
 375 #CONSTRAINT fref_fdata UNIQUE (fref, fbin, fstart, fstop, ftypeid, gid)
 376 # fdata_fref_idx => q{ CREATE UNIQUE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)},
 377
 378 index=>{
 379                 fdata_fref_idx => q{
 380 CREATE INDEX fdata_fref_idx ON fdata (fref,fbin,fstart,fstop,ftypeid,gid)
 381 },
 382
 383                 fdata_ftypeid_idx => q{
 384 CREATE INDEX fdata_ftypeid_idx ON fdata (ftypeid)
 385 },
 386
 387                 fdata_gid_idx => q{
 388 CREATE INDEX fdata_gid_idx ON fdata (gid)
 389 }
 390          }, # fdata indexes
 391
 392 }, # fdata
 393
 394
 395
 396                 fgroup => {
 397 table => q{
 398 CREATE TABLE "fgroup" (
 399   "gid" serial NOT NULL,
 400   "gclass" character varying(100) DEFAULT NULL,
 401   "gname" character varying(100) DEFAULT NULL,
 402   CONSTRAINT pk_fgroup PRIMARY KEY (gid)
 403 )
 404 }, # fgroup table
 405
 406 index => {
 407                 fgroup_gclass_idx => q{
 408 CREATE UNIQUE INDEX fgroup_gclass_idx ON fgroup (gclass,gname)
 409 },
 410                 fgroup_gname_idx => q{
 411 CREATE INDEX fgroup_gname_idx ON fgroup(gname)
 412 },
 413                 fgroup_lower_gname_idx => q{
 414 CREATE INDEX fgroup_lower_gname_idx ON fgroup (lower(gname))
 415 },
 416            }, # fgroup indexes
 417
 418 }, # fgroup
 419
 420                 ftype => {
 421 table => q{
 422 CREATE TABLE "ftype" (
 423   "ftypeid" serial NOT NULL,
 424   "fmethod" character varying(100) DEFAULT '' NOT NULL,
 425   "fsource" character varying(100) DEFAULT NULL,
 426   CONSTRAINT pk_ftype PRIMARY KEY (ftypeid),
 427   CONSTRAINT ftype_ftype UNIQUE (fmethod, fsource)
 428 )
 429 }, # ftype table
 430
 431 index => {
 432                 ftype_fmethod_idx => q{
 433 CREATE  INDEX ftype_fmethod_idx ON ftype (fmethod)
 434 },
 435
 436                 ftype_fsource_idx => q{
 437 CREATE  INDEX ftype_fsource_idx ON ftype (fsource)
 438 },
 439
 440                 ftype_ftype_idx => q{
 441 CREATE UNIQUE INDEX ftype_ftype_idx ON ftype (fmethod,fsource)
 442 }
 443            }, # ftype indexes
 444
 445 }, # ftype
 446
 447
 448          fdna => {
 449 table => q{
 450 CREATE TABLE "fdna" (
 451   "fref" character varying(100) DEFAULT '' NOT NULL,
 452   "foffset" integer DEFAULT '0' NOT NULL,
 453   "fdna" bytea,
 454   CONSTRAINT pk_fdna PRIMARY KEY (fref, foffset)
 455 )
 456 } #fdna table
 457                  }, #fdna
 458
 459         fmeta => {
 460 table => q{
 461 CREATE TABLE "fmeta" (
 462   "fname" character varying(255) DEFAULT '' NOT NULL,
 463   "fvalue" character varying(255) DEFAULT '' NOT NULL,
 464   CONSTRAINT pk_fmeta PRIMARY KEY (fname)
 465 )
 466 } # fmeta table
 467                  }, # fmeta
 468
 469
 470        fattribute => {
 471 table => q{
 472 CREATE TABLE "fattribute" (
 473   "fattribute_id" serial NOT NULL,
 474   "fattribute_name" character varying(255) DEFAULT '' NOT NULL,
 475   CONSTRAINT pk_fattribute PRIMARY KEY (fattribute_id)
 476 )
 477 }, # fattribute table
 478
 479 }, # fattribute
 480
 481        fattribute_to_feature => {
 482 table => q{
 483 CREATE TABLE "fattribute_to_feature" (
 484   "fid" integer DEFAULT '0' NOT NULL,
 485   "fattribute_id" integer DEFAULT '0' NOT NULL,
 486   "fattribute_value" text
 487 )
 488 }, # fattribute_to_feature table
 489
 490 index => {
 491        fattribute_to_feature_fid => q{
 492 CREATE  INDEX fattribute_to_feature_fid ON fattribute_to_feature (fid,fattribute_id)
 493 },
 494        fattribute_txt_idx => q{
 495 CREATE INDEX fattribute_txt_idx ON fattribute_to_feature (fattribute_value)
 496 },
 497        fattribute_lower_idx => q{
 498 CREATE INDEX fattribute_lower_idx ON fattribute_to_feature (lower(fattribute_value))
 499 },
 500            } # fattribute_to_feature indexes
 501 }, # fattribute_to_feature
 502
 503        finterval_stats => {
 504 table=> q{
 505 CREATE TABLE "finterval_stats" (
 506    "ftypeid"          integer DEFAULT '0' NOT NULL,
 507    "fref"             character varying(100) DEFAULT '' NOT NULL,
 508    "fbin"             integer DEFAULT '0' NOT NULL,
 509    "fcum_count"       integer DEFAULT '0' NOT NULL,
 510    CONSTRAINT pk_finterval_stats PRIMARY KEY (ftypeid,fref,fbin)
 511 )
 512 } # finterval_stats table
 513 },# finterval_stats
 514
 515
 516
 517
 518 );
 519   return \%schema;
 520 }
 521
 522
 523 =head2 setup_load
 524
 525  Title   : setup_load
 526  Usage   : $db->setup_load
 527  Function: called before load_gff_line()
 528  Returns : void
 529  Args    : none
 530  Status  : protected
 531
 532 This method performs schema-specific initialization prior to loading a
 533 set of GFF records.  It prepares a set of DBI statement handlers to be
 534 used in loading the data.
 535
 536 =cut
 537
 538 sub setup_load {
 539   my $self      = shift;
 540   my $schema = $self->schema;
 541
 542   my $dbh = $self->features_db;
 543
 544   if ($self->lock_on_load) {
 545     my @tables = map { "$_ WRITE"} $self->tables;
 546     my $tables = join ', ',@tables;
 547     $dbh->do("LOCK TABLES $tables");
 548   }
 549
 550   my $lookup_type = $dbh->prepare_delayed('SELECT ftypeid FROM ftype WHERE fmethod=? AND fsource=?');
 551   my $insert_type = $dbh->prepare_delayed('INSERT INTO ftype (fmethod,fsource) VALUES (?,?)');
 552   my $insertid_type = $dbh->prepare_delayed("SELECT currval('ftype_ftypeid_seq')");
 553
 554   my $lookup_group = $dbh->prepare_delayed('SELECT gid FROM fgroup WHERE lower(gname)=lower(?) AND gclass=?');
 555   my $insert_group = $dbh->prepare_delayed('INSERT INTO fgroup (gname,gclass) VALUES (?,?)');
 556   my $insertid_group = $dbh->prepare_delayed("SELECT currval('fgroup_gid_seq')");
 557
 558   my $lookup_attribute = $dbh->prepare_delayed('SELECT fattribute_id FROM fattribute WHERE fattribute_name=?');
 559   my $insert_attribute = $dbh->prepare_delayed('INSERT INTO fattribute (fattribute_name) VALUES (?)');
 560   my $insertid_attribute = $dbh->prepare_delayed("SELECT currval('fattribute_fattribute_id_seq')");
 561
 562   my $insert_attribute_value = $dbh->prepare_delayed('INSERT INTO fattribute_to_feature (fid,fattribute_id,fattribute_value) VALUES (?,?,?)');
 563
 564   my $insert_data  = $dbh->prepare_delayed(<<END);
 565 INSERT INTO fdata (fref,fstart,fstop,fbin,ftypeid,fscore,
 566                    fstrand,fphase,gid,ftarget_start,ftarget_stop)
 567        VALUES(?,?,?,?,?,?,?,?,?,?,?)
 568 END
 569 ;
 570   my $delete_existing_data = $dbh->prepare_delayed('DELETE FROM fdata WHERE fref=? AND fstart=? AND fstop=? AND fbin=? AND ftypeid=? AND GID=?');
 571   my $insertid_data = $dbh->prepare_delayed("SELECT currval('fdata_fid_seq')");
 572
 573   $self->{load_stuff}{sth}{lookup_ftype}     = $lookup_type;
 574   $self->{load_stuff}{sth}{insert_ftype}     = $insert_type;
 575   $self->{load_stuff}{sth}{insertid_ftype}   = $insertid_type;
 576   $self->{load_stuff}{sth}{lookup_fgroup}    = $lookup_group;
 577   $self->{load_stuff}{sth}{insert_fgroup}    = $insert_group;
 578   $self->{load_stuff}{sth}{insertid_fgroup}    = $insertid_group;
 579   $self->{load_stuff}{sth}{insertid_fdata}   = $insertid_data;
 580   $self->{load_stuff}{sth}{insert_fdata}     = $insert_data;
 581   $self->{load_stuff}{sth}{delete_existing_fdata} = $delete_existing_data;
 582   $self->{load_stuff}{sth}{lookup_fattribute} = $lookup_attribute;
 583   $self->{load_stuff}{sth}{insert_fattribute} = $insert_attribute;
 584   $self->{load_stuff}{sth}{insertid_fattribute} = $insertid_attribute;
 585   $self->{load_stuff}{sth}{insert_fattribute_value} = $insert_attribute_value;
 586   $self->{load_stuff}{types}  = {};
 587   $self->{load_stuff}{groups} = {};
 588   $self->{load_stuff}{counter} = 0;
 589 }
 590
 591 =head2 load_gff_line
 592
 593  Title   : load_gff_line
 594  Usage   : $db->load_gff_line($fields)
 595  Function: called to load one parsed line of GFF
 596  Returns : true if successfully inserted
 597  Args    : hashref containing GFF fields
 598  Status  : protected
 599
 600 This method is called once per line of the GFF and passed a series of
 601 parsed data items that are stored into the hashref $fields.  The keys are:
 602
 603  ref          reference sequence
 604  source       annotation source
 605  method       annotation method
 606  start        annotation start
 607  stop         annotation stop
 608  score        annotation score (may be undef)
 609  strand       annotation strand (may be undef)
 610  phase        annotation phase (may be undef)
 611  group_class  class of annotation's group (may be undef)
 612  group_name   ID of annotation's group (may be undef)
 613  target_start start of target of a similarity hit
 614  target_stop  stop of target of a similarity hit
 615  attributes   array reference of attributes, each of which is a [tag=>value] array ref
 616
 617 =cut
 618
 619 sub load_gff_line {
 620   my $self = shift;
 621   my $gff = shift;
 622
 623   if (defined $gff->{phase}){
 624      chomp($gff->{phase});
 625      undef($gff->{phase}) if $gff->{phase} eq '.';
 626    }
 627
 628   if (defined $gff->{strand} && $gff->{strand} eq '.'){undef($gff->{strand})};
 629   if (defined $gff->{score}  && $gff->{score} eq '.'){undef($gff->{score})};
 630
 631   my $s    = $self->{load_stuff};
 632   my $dbh  = $self->features_db;
 633   local $dbh->{PrintError} = 0;
 634
 635   defined(my $typeid  = $self->get_table_id('ftype', $gff->{method} => $gff->{source})) or return;
 636   defined(my $groupid = $self->get_table_id('fgroup',$gff->{gname}  => $gff->{gclass})) or return;
 637
 638   my $bin =  bin($gff->{start},$gff->{stop},$self->min_bin);
 639   my $result = $s->{sth}{insert_fdata}->execute($gff->{ref},
 640                                                $gff->{start},$gff->{stop},$bin,
 641                                                $typeid,
 642                                                $gff->{score},$gff->{strand},$gff->{phase},
 643                                                $groupid,
 644                                                $gff->{tstart},$gff->{tstop});
 645
 646   warn $dbh->errstr,"\n" and print "ref=",$gff->{ref}," start=",$gff->{start}," stop=",$gff->{stop}," bin=",$bin," typeid=",$typeid," groupid=",$groupid,"\n"
 647     and return unless $result;
 648
 649   my $fid = $self->insertid($s->{sth},'fdata')
 650     || $self->get_feature_id($gff->{ref},$gff->{start},$gff->{stop},$typeid,$groupid);
 651
 652   # insert attributes
 653   foreach (@{$gff->{attributes}}) {
 654     defined(my $attribute_id = $self->get_table_id('fattribute',$_->[0])) or return;
 655     $s->{sth}{insert_fattribute_value}->execute($fid,$attribute_id,$_->[1]);
 656   }
 657
 658   if ( (++$s->{counter} % 1000) == 0) {
 659     print STDERR "$s->{counter} records loaded...";
 660     print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
 661   }
 662
 663   $fid;
 664 }
 665
 666
 667 sub insertid {
 668   my $self = shift;
 669   my $sth = shift ;
 670   my $table = shift;
 671
 672   my $insert_id;
 673   if ($sth->{"insertid_$table"}->execute()){
 674      $insert_id = ($sth->{"insertid_$table"}->fetchrow_array)[0];
 675   }
 676   else{
 677     warn "No CURRVAL for SEQUENCE of table $table ",$sth->errstr,"\n";
 678     return;
 679   }
 680   return $insert_id;
 681 }
 682
 683
 684 =head2 get_table_id
 685
 686  Title   : get_table_id
 687  Usage   : $integer = $db->get_table_id($table,@ids)
 688  Function: get the ID of a group or type
 689  Returns : an integer ID or undef
 690  Args    : none
 691  Status  : private
 692
 693 This internal method is called by load_gff_line to look up the integer
 694 ID of an existing feature type or group.  The arguments are the name
 695 of the table, and two string identifiers.  For feature types, the
 696 identifiers are the method and source.  For groups, the identifiers
 697 are group name and class.
 698
 699 This method requires that a statement handler named I<lookup_$table>,
 700 have been created previously by setup_load().  It is here to overcome
 701 deficiencies in mysql's INSERT syntax.
 702
 703 =cut
 704
 705 #'
 706 # get the object ID from a named table
 707 sub get_table_id {
 708   my $self   = shift;
 709   my $table  = shift;
 710   my @ids    = @_;
 711
 712   # irritating warning for null id
 713   my $id_key;
 714   {
 715     local $^W=0;
 716     $id_key = join ':',@ids;
 717   }
 718
 719   my $s   = $self->{load_stuff};
 720   my $sth = $s->{sth};
 721   my $dbh = $self->features_db;
 722
 723   unless (defined($s->{$table}{$id_key})) {
 724     $sth->{"lookup_$table"}->execute(@ids);
 725     my @result = $sth->{"lookup_$table"}->fetchrow_array;
 726     if (@result > 0) {
 727       $s->{$table}{$id_key} = $result[0];
 728     } else {
 729       $sth->{"insert_$table"}->execute(@ids)
 730         && ($s->{$table}{$id_key} = $self->insertid($sth,$table));
 731         #&& ($s->{$table}{$id_key} = $self->insertid($sth->{"insertid_$table"}));
 732         #&& ($s->{$table}{$id_key} = $sth->{"insert_$table"}->insertid);
 733     }
 734   }
 735
 736   my $id = $s->{$table}{$id_key};
 737   unless (defined $id) {
 738     warn "No $table id for $id_key ",$dbh->errstr," Record skipped.\n";
 739     return;
 740   }
 741   $id;
 742 }
 743
 744
 745 #sub insertid {
 746 #  my $self = shift;
 747 #  my $insertid_sth = shift ;
 748 #  my $insert_id;
 749 #  if ($insertid_sth->execute){
 750 #     $insert_id = ($insertid_sth->fetchrow_array)[0];
 751 #  }
 752 #  else{
 753 #    warn "No CURRVAL for SEQUENCE  ",$insertid_sth->errstr,"\n";
 754 #    return;
 755 #  }
 756 #  return $insert_id;
 757 #}
 758
 759 sub insert_sequence {
 760   my $self = shift;
 761   my($id,$offset,$seq) = @_;
 762   my $sth = $self->{_insert_sequence}
 763     ||= $self->dbh->prepare_delayed('insert into fdna values (?,?,?)');
 764   $sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
 765 }
 766
 767 =head2 range_query
 768
 769  Title   : range_query
 770  Usage   : $db->range_query($range_type,$refseq,$refclass,$start,$stop,$types,$order_by_group,$attributes,$binsize)
 771  Function: create statement handle for range/overlap queries
 772  Returns : a DBI statement handle
 773  Args    : see below
 774  Status  : Protected
 775
 776 This method constructs the statement handle for this module's central
 777 query: given a range and/or a list of feature types, fetch their GFF
 778 records.  It overrides a method in dbi.pm so that the overlaps query
 779 can write SQL optimized for Postgres.  Specifically, instead of writing
 780 the bin related section as a set of ORs, each bin piece is place in
 781 a separate select and then they are UNIONed together.  This subroutine
 782 requires several replacements for other subroutines in dbi.pm.  In this
 783 module, they are named the same as those in dbi.pm but prefixed with
 784 "pg_".
 785
 786 The positional arguments are as follows:
 787
 788   Argument               Description
 789
 790   $isrange               A flag indicating that this is a range.
 791                          query.  Otherwise an overlap query is
 792                          assumed.
 793
 794   $refseq                The reference sequence name (undef if no range).
 795
 796   $refclass              The reference sequence class (undef if no range).
 797
 798   $start                 The start of the range (undef if none).
 799
 800   $stop                  The stop of the range (undef if none).
 801
 802   $types                 Array ref containing zero or feature types in the
 803                          format [method,source].
 804
 805   $order_by_group        A flag indicating that statement handler should group
 806                          the features by group id (handy for iterative fetches)
 807
 808   $attributes            A hash containing select attributes.
 809
 810   $binsize               A bin size for generating tables of feature density.
 811
 812 =cut
 813
 814 sub range_query {
 815   my $self = shift;
 816   my($rangetype,$refseq,$class,$start,$stop,$types,$sparse,$order_by_group,$attributes,$bin) = @_;
 817
 818   my $dbh = $self->features_db;
 819
 820   #  my @bin_parts = split /\n\s+OR/, $self->bin_query($start,$stop);
 821   #  warn "bin_part: @bin_parts\n";
 822
 823   my %a             = (refseq=>$refseq,class=>$class,start=>$start,stop=>$stop,types=>$types,attributes=>$attributes,bin_width=>$bin);
 824   my ($query, @args, $order_by);
 825
 826   if ($rangetype ne 'overlaps') {
 827
 828     my $select        = $self->make_features_select_part(\%a);
 829     my $from          = $self->make_features_from_part($sparse,\%a);
 830     my $join          = $self->make_features_join_part(\%a);
 831     my $where;
 832        ($where,@args) = $self->make_features_by_range_where_part($rangetype,\%a);
 833     my ($group_by,@more_args) = $self->make_features_group_by_part(\%a);
 834        $order_by      = $self->make_features_order_by_part(\%a) if $order_by_group;
 835
 836     $query         = "SELECT $select FROM $from WHERE $join";
 837     $query           .= " AND $where" if $where;
 838
 839     if ($group_by) {
 840       $query           .= " GROUP BY $group_by";
 841       push @args,@more_args;
 842     }
 843
 844   } else {  # most common case: overlaps query
 845
 846     my @bin_parts            = split /\s*OR/, $self->bin_query($start,$stop);
 847     my $select               = $self->make_features_select_part(\%a);
 848     my $from                 = $self->make_features_from_part($sparse,\%a);
 849     my $join                 = $self->make_features_join_part(\%a);
 850     my $where;
 851     ($where,@args)           = $self->pg_make_features_by_range_where_part($rangetype,\%a);
 852     my ($group_by,@more_args)= $self->make_features_group_by_part(\%a);
 853     $order_by                = $self->pg_make_features_order_by_part(\%a) if $order_by_group;
 854
 855     my @temp_args;
 856     my @query_pieces;
 857     foreach my $bin (@bin_parts) {
 858       my $temp_query = "SELECT $select FROM $from WHERE $join AND $where AND $bin\n";
 859       push @temp_args, @args;
 860
 861       if ($group_by) {
 862         $temp_query    .= " GROUP BY $group_by";
 863         push @temp_args,@more_args;
 864       }
 865
 866       push @query_pieces, $temp_query;
 867     }
 868
 869     @args             = @temp_args;
 870     $query            = join("UNION\n", @query_pieces);
 871
 872   }
 873
 874   $query           .= " ORDER BY $order_by" if $order_by;
 875
 876   $self->dbh->do('set enable_seqscan=off');
 877   my $sth = $self->dbh->do_query($query,@args);
 878   $sth;
 879 }
 880
 881 sub pg_make_features_by_range_where_part {
 882   my $self = shift;
 883   my ($rangetype,$options) = @_;
 884
 885   return unless $rangetype eq 'overlaps';
 886
 887   $options ||= {};
 888   my ($refseq,$class,$start,$stop,$types,$attributes) =
 889     @{$options}{qw(refseq class start stop types attributes)};
 890
 891   my (@query,@args);
 892
 893   if ($refseq) {
 894     my ($q,@a) = $self->refseq_query($refseq,$class);
 895     push @query,$q;
 896     push @args,@a;
 897   }
 898
 899   if (defined $start or defined $stop) {
 900     $start = 0               unless defined($start);
 901     $stop  = MAX_SEGMENT     unless defined($stop);
 902
 903     my ($range_query,@range_args) = $self->pg_overlap_query($start,$stop);
 904
 905     push @query,$range_query;
 906     push @args,@range_args;
 907   }
 908
 909   if (defined $types && @$types) {
 910     my ($type_query,@type_args) = $self->types_query($types);
 911     push @query,$type_query;
 912     push @args,@type_args;
 913   }
 914
 915   if ($attributes) {
 916     my ($attribute_query,@attribute_args) = $self->make_features_by_attribute_where_part($attributes);
 917     push @query,"($attribute_query)";
 918     push @args,@attribute_args;
 919   }
 920
 921   my $query = join "AND",@query;
 922   return wantarray ? ($query,@args) : $self->dbh->dbi_quote($query,@args);
 923 }
 924
 925 sub pg_overlap_query {
 926   my $self = shift;
 927   my ($start,$stop) = @_;
 928
 929   my ($iq,@iargs) = $self->overlap_query_nobin($start,$stop);
 930   my $query = "\n$iq\n";
 931   my @args = @iargs;
 932
 933   return wantarray ? ($query,@args) : $self->dbh->dbi_quote($query,@args);
 934 }
 935
 936 sub pg_make_features_order_by_part {
 937   my $self = shift;
 938   my $options = shift || {};
 939   return "gname";
 940 }
 941
 942 =head2 search_notes
 943
 944 This PostgreSQL adaptor does not implement the search notes method
 945 because it can be very slow (although the code for the method is
 946 contained in this method but commented out).
 947 There is, however, a PostgreSQL adaptor that does implement it in
 948 a more efficient way: L<Bio::DB::GFF::Adaptor::dbi::pg_fts>,
 949 which inherits from this adaptor and uses the optional PostgreSQL
 950 module TSearch2 for full text indexing.  See that adaptor's
 951 documentation for more information.
 952
 953 See also L<Bio::DB::GFF>
 954
 955  Title   : search_notes
 956  Usage   : @search_results = $db->search_notes("full text search string",$limit)
 957  Function: Search the notes for a text string, using mysql full-text search
 958  Returns : array of results
 959  Args    : full text search string, and an optional row limit
 960  Status  : public
 961
 962 This is a replacement for the mysql-specific method.  Given a search string, it
 963 performs a ILIKE search of the notes table and returns an array of results.
 964 Each row of the returned array is a arrayref containing the following fields:
 965
 966   column 1     A Bio::DB::GFF::Featname object, suitable for passing to segment()
 967   column 2     The text of the note
 968   column 3     A relevance score.
 969
 970 Note that for large databases this can be very slow and may result in
 971 time out or 500-cgi errors.  If this is happening on a regular basis,
 972 you should look into using L<Bio::DB::GFF::Adaptor::dbi::pg_fts> which
 973 implements the TSearch2 full text indexing scheme.
 974
 975 =cut
 976
 977 sub search_notes{
 978 #  my $self = shift;
 979 #  my ($search_string,$limit) = @_;
 980 #
 981 #  $search_string =~ tr/*/%/s;
 982 #  $search_string =  '%'.$search_string unless $search_string =~ /^\%/;
 983 #  $search_string =  $search_string.'%' unless $search_string =~ /\%$/;
 984 #  warn "search_string:$search_string";
 985 #  my $query = FULLTEXTWILDCARD;
 986 #  $query   .= " limit $limit" if defined $limit;
 987 #  my $sth   = $self->dbh->do_query($query,$search_string);
 988 #
 989 #  my @results;
 990 #  while (my ($class,$name,$note) = $sth->fetchrow_array) {
 991 #
 992 #     next unless $class && $name;    # sorry, ignore NULL objects
 993 #     my $featname = Bio::DB::GFF::Featname->new($class=>$name);
 994 #
 995 #     push @results,[$featname,$note,0]; #gbrowse expects a score, but
 996 #                                        #pg doesn't give one, thus the 0
 997 #  }
 998 #  warn @results;
 999 #
1000 #  return @results;
1001 }
1002
1003
1004 =head2 make_meta_set_query
1005
1006  Title   : make_meta_set_query
1007  Usage   : $sql = $db->make_meta_set_query
1008  Function: return SQL fragment for setting a meta parameter
1009  Returns : SQL fragment
1010  Args    : none
1011  Status  : public
1012
1013 By default this does nothing; meta parameters are not stored or
1014 retrieved.
1015
1016 =cut
1017
1018 sub make_meta_set_query {
1019    return 'INSERT INTO fmeta VALUES (?,?)';
1020 }
1021
1022 sub make_classes_query {
1023   my $self = shift;
1024   return 'SELECT DISTINCT gclass FROM fgroup WHERE NOT gclass IS NULL';
1025 }
1026
1027
1028 sub chunk_size {
1029   my $self = shift;
1030   $self->meta('chunk_size') || DEFAULT_CHUNK;
1031 }
1032
1033 sub getseqcoords_query {
1034    my $self = shift;
1035    return GETSEQCOORDS ;
1036 }
1037
1038 sub getaliascoords_query{
1039   my $self = shift;
1040   return GETALIASCOORDS ;
1041 }
1042
1043
1044 sub getforcedseqcoords_query{
1045   my $self = shift;
1046   return GETFORCEDSEQCOORDS ;
1047 }
1048
1049
1050 sub getaliaslike_query{
1051   my $self = shift;
1052   return GETALIASLIKE ;
1053 }
1054
1055
1056 sub make_features_select_part {
1057   my $self = shift;
1058   my $options = shift || {};
1059   my $s;
1060   if (my $b = $options->{bin_width}) {
1061
1062     $s = <<END;
1063 fref,
1064   1+$b*floor(fstart/$b)   as fstart,
1065   $b*(1+floor(fstart/$b)) as fstop,
1066   CASE WHEN fsource IS NULL THEN fmethod
1067        ELSE fmethod||':'||fsource,
1068   'bin',
1069   count(*) as fscore,
1070   '.','.','bin',
1071   CASE WHEN fsource IS NULL THEN fref||':'||fmethod
1072        ELSE fref||':'||fmethod||':'||fsource,
1073   NULL,NULL,NULL,NULL
1074 END
1075 ;
1076   } else {
1077     $s = <<END;
1078 fref,fstart,fstop,fsource,fmethod,fscore,fstrand,fphase,gclass,fgroup.gname,ftarget_start,ftarget_stop,fdata.fid,fdata.gid
1079 END
1080 ;
1081 }
1082   $s .= ",count(fdata.fid)" if $options->{attributes} && keys %{$options->{attributes}}>1;
1083   $s;
1084 }
1085
1086 sub make_features_from_part_bkup {
1087   my $self = shift;
1088   my $sparse = shift;
1089   my $options = shift || {};
1090   #my $index = $sparse ? ' USE INDEX(ftypeid)': '';
1091   my $index =  '';
1092   return $options->{attributes} ? "fdata${index},ftype,fgroup,fattribute,fattribute_to_feature\n"
1093                                 : "fdata${index},ftype,fgroup\n";
1094 }
1095
1096
1097 ####################################
1098 # moved from mysqlopt.pm
1099 ###################################
1100 # meta values
1101 sub default_meta_values {
1102   my $self = shift;
1103   my @values = $self->SUPER::default_meta_values;
1104   return (
1105           @values,
1106           max_bin => MAX_BIN,
1107           min_bin => MIN_BIN,
1108           straight_join_limit => STRAIGHT_JOIN_LIMIT,
1109          );
1110 }
1111
1112 sub min_bin {
1113   my $self = shift;
1114   return $self->meta('min_bin') || MIN_BIN;
1115 }
1116 sub max_bin {
1117   my $self = shift;
1118   return $self->meta('max_bin') || MAX_BIN;
1119 }
1120 sub straight_join_limit {
1121   my $self = shift;
1122   return $self->meta('straight_join_limit') || STRAIGHT_JOIN_LIMIT;
1123 }
1124
1125
1126 sub _feature_by_name {
1127   my $self = shift;
1128   my ($class,$name,$location,$callback) = @_;
1129   $callback || $self->throw('must provide a callback argument');
1130
1131   my @bin_parts      = split /\s*OR/, $self->bin_query($location->[1],$location->[2]) if $location;
1132   my $select         = $self->make_features_select_part;
1133   my $from           = $self->make_features_from_part(undef,{sparse_groups=>1});
1134   my ($where,@args)  = $self->make_features_by_name_where_part($class,$name);
1135   my $join           = $self->make_features_join_part;
1136   my $range          = $self->pg_make_features_by_range_where_part('overlaps',
1137                                                                 {refseq=>$location->[0],
1138                                                                  class =>'',
1139                                                                  start=>$location->[1],
1140                                                                  stop =>$location->[2]}) if $location;
1141
1142   my @temp_args;
1143   my @query_pieces;
1144   my $query;
1145   if (@bin_parts) {
1146     foreach my $bin (@bin_parts) {
1147       my $temp_query = "SELECT $select FROM $from WHERE $join AND $where AND $range AND $bin\n";
1148       push @temp_args, @args;
1149       push @query_pieces, $temp_query;
1150     }
1151
1152     @args  = @temp_args;
1153     $query = join("UNION\n", @query_pieces);
1154
1155   } else {
1156     $query  = "SELECT $select FROM $from WHERE $where AND $join";
1157   }
1158
1159   my $sth    = $self->dbh->do_query($query,@args);
1160
1161   my $count = 0;
1162   while (my @row = $sth->fetchrow_array) {
1163     $callback->(@row);
1164     $count++;
1165   }
1166   $sth->finish;
1167   return $count;
1168 }
1169
1170 sub update_sequences {
1171   my $self = shift;
1172   my $dbh  = $self->features_db;
1173
1174   $dbh->do("SELECT setval('public.fdata_fid_seq', max(fid)+1) FROM fdata");
1175   $dbh->do("SELECT setval('public.fattribute_fattribute_id_seq', max(fattribute_id)+1) FROM fattribute");
1176   $dbh->do("SELECT setval('public.fgroup_gid_seq', max(gid)+1) FROM fgroup");
1177   $dbh->do("SELECT setval('public.ftype_ftypeid_seq', max(ftypeid)+1) FROM ftype");
1178
1179   1;
1180 }
1181
1182 =head2 make_features_by_name_where_part
1183
1184  Title   : make_features_by_name_where_part
1185  Usage   : $db->make_features_by_name_where_part
1186  Function: Overrides a function in Bio::DB::GFF::Adaptor::dbi to insure
1187            that searches will be case insensitive. It creates the SQL
1188            fragment needed to select a feature by its group name & class
1189  Returns : a SQL fragment and bind arguments
1190  Args    : see below
1191  Status  : Protected
1192
1193 =cut
1194
1195 sub make_features_by_name_where_part {
1196   my $self = shift;
1197   my ($class,$name) = @_;
1198
1199   if ($name !~ /\*/) {
1200     #allows utilization of an index on lower(gname)
1201     return ("fgroup.gclass=? AND lower(fgroup.gname) = lower(?)",$class,$name);
1202   }
1203   else {
1204     $name =~ tr/*/%/;
1205     return ("fgroup.gclass=? AND lower(fgroup.gname) LIKE lower(?)",$class,$name);
1206   }
1207 }
1208
1209 #
1210 # Methods from dbi.pm that need to be overridden to make
1211 # searching for fref case insensitive
1212 #
1213 #
1214 sub get_dna {
1215   my $self = shift;
1216   my ($ref,$start,$stop,$class) = @_;
1217
1218   my ($offset_start,$offset_stop);
1219
1220   my $has_start = defined $start;
1221   my $has_stop  = defined $stop;
1222
1223   my $reversed;
1224   if ($has_start && $has_stop && $start > $stop) {
1225     $reversed++;
1226     ($start,$stop) = ($stop,$start);
1227   }
1228
1229   # turn start and stop into 0-based offsets
1230   my $cs = $self->dna_chunk_size;
1231   $start -= 1;  $stop -= 1;
1232   $offset_start = int($start/$cs)*$cs;
1233   $offset_stop  = int($stop/$cs)*$cs;
1234
1235   my $sth;
1236   # special case, get it all
1237   if (!($has_start || $has_stop)) {
1238     $sth = $self->dbh->do_query('select fdna,foffset from fdna where lower(fref)=lower(?) order by foffset',$ref);
1239   }
1240
1241   elsif (!$has_stop) {
1242     $sth = $self->dbh->do_query('select fdna,foffset from fdna where lower(fref)=lower(?) and foffset>=? order by foffset',
1243                                 $ref,$offset_start);
1244   }
1245
1246   else {  # both start and stop defined
1247     $sth = $self->dbh->do_query('select fdna,foffset from fdna where lower(fref)=lower(?) and foffset>=? and foffset<=? order by foffset',
1248                                 $ref,$offset_start,$offset_stop);
1249   }
1250
1251   my $dna = '';
1252   while (my($frag,$offset) = $sth->fetchrow_array) {
1253       substr($frag,0,$start-$offset) = '' if $has_start && $start > $offset;
1254       $dna .= $frag;
1255   }
1256   substr($dna,$stop-$start+1) = '' if $has_stop && $stop-$start+1 < length($dna);
1257   if ($reversed) {
1258     $dna = reverse $dna;
1259     $dna =~ tr/gatcGATC/ctagCTAG/;
1260   }
1261
1262   $sth->finish;
1263   $dna;
1264 }
1265
1266
1267 sub refseq_query {
1268   my $self = shift;
1269   my ($refseq,$refclass) = @_;
1270   my $query = "lower(fdata.fref)=lower(?)";
1271   return wantarray ? ($query,$refseq) : $self->dbh->dbi_quote($query,$refseq);
1272 }
1273
1274 sub make_types_where_part {
1275   my $self = shift;
1276   my ($srcseq,$start,$stop,$want_count,$typelist) = @_;
1277   my (@query,@args);
1278   if (defined($srcseq)) {
1279     push @query,'lower(fdata.fref)=lower(?)';
1280     push @args,$srcseq;
1281     if (defined $start or defined $stop) {
1282       $start = 1           unless defined $start;
1283       $stop  = MAX_SEGMENT unless defined $stop;
1284       my ($q,@a) = $self->overlap_query($start,$stop);
1285       push @query,"($q)";
1286       push @args,@a;
1287     }
1288   }
1289   if (defined $typelist && @$typelist) {
1290     my ($q,@a) = $self->types_query($typelist);
1291     push @query,($q);
1292     push @args,@a;
1293   }
1294   my $query = @query ? join(' AND ',@query) : '1=1';
1295   return wantarray ? ($query,@args) : $self->dbh->dbi_quote($query,@args);
1296 }
1297
1298 sub get_feature_id {
1299   my $self = shift;
1300   my ($ref,$start,$stop,$typeid,$groupid) = @_;
1301   my $s = $self->{load_stuff};
1302   unless ($s->{get_feature_id}) {
1303     my $dbh = $self->features_db;
1304     $s->{get_feature_id} =
1305       $dbh->prepare_delayed('SELECT fid FROM fdata WHERE lower(fref)=lower(?) AND fstart=? AND fstop=? AND ftypeid=? AND gid=?');
1306   }
1307   my $sth = $s->{get_feature_id} or return;
1308   $sth->execute($ref,$start,$stop,$typeid,$groupid) or return;
1309   my ($fid) = $sth->fetchrow_array;
1310   return $fid;
1311 }
1312
1313 sub _delete {
1314   my $self = shift;
1315   my $delete_spec = shift;
1316   my $ranges      = $delete_spec->{segments} || [];
1317   my $types       = $delete_spec->{types}    || [];
1318   my $force       = $delete_spec->{force};
1319   my $range_type  = $delete_spec->{range_type};
1320   my $dbh         = $self->features_db;
1321
1322   my $query = 'delete from fdata';
1323   my @where;
1324
1325   my @range_part;
1326   for my $segment (@$ranges) {
1327     my $ref   = $dbh->quote($segment->abs_ref);
1328     my $start = $segment->abs_start;
1329     my $stop  = $segment->abs_stop;
1330     my $range =  $range_type eq 'overlaps'     ? $self->overlap_query($start,$stop)
1331                : $range_type eq 'contains'     ? $self->contains_query($start,$stop)
1332                : $range_type eq 'contained_in' ? $self->contained_in_query($start,$stop)
1333                : $self->throw("Invalid range type '$range_type'");
1334     push @range_part,"(lower(fref)=lower($ref) AND $range)";
1335   }
1336   push @where,'('. join(' OR ',@range_part).')' if @range_part;
1337
1338   # get all the types
1339   if (@$types) {
1340     my $types_where = $self->types_query($types);
1341     my $types_query = "select ftypeid from ftype where $types_where";
1342     my $result      = $dbh->selectall_arrayref($types_query);
1343     my @typeids     = map {$_->[0]} @$result;
1344     my $typelist    = join ',',map{$dbh->quote($_)} @typeids;
1345     $typelist ||= "0"; # don't cause DBI to die with invalid SQL when
1346                        # unknown feature types were requested.
1347     push @where,"(ftypeid in ($typelist))";
1348   }
1349   $self->throw("This operation would delete all feature data and -force not specified")
1350     unless @where || $force;
1351   $query .= " where ".join(' and ',@where) if @where;
1352   warn "$query\n" if $self->debug;
1353   my $result = $dbh->do($query);
1354   defined $result or $self->throw($dbh->errstr);
1355   $result;
1356 }
1357
1358 sub make_abscoord_query {
1359   my $self = shift;
1360   my ($name,$class,$refseq) = @_;
1361   #my $query = GETSEQCOORDS;
1362   my $query = $self->getseqcoords_query();
1363   my $getforcedseqcoords = $self->getforcedseqcoords_query() ;
1364   if ($name =~ /\*/) {
1365     $name =~ s/%/\\%/g;
1366     $name =~ s/_/\\_/g;
1367     $name =~ tr/*/%/;
1368     $query =~ s/gname\) = lower/gname) LIKE lower/;
1369   }
1370   defined $refseq
1371     ? $self->dbh->do_query($getforcedseqcoords,$name,$class,$refseq)
1372     : $self->dbh->do_query($query,$name,$class);
1373 }
1374
1375 sub make_aliasabscoord_query {
1376   my $self = shift;
1377   my ($name,$class) = @_;
1378   #my $query = GETALIASCOORDS;
1379   my $query = $self->getaliascoords_query();
1380   if ($name =~ /\*/) {
1381     $name =~ s/%/\\%/g;
1382     $name =~ s/_/\\_/g;
1383     $name =~ tr/*/%/;
1384     $query =~ s/gname\) = lower/gname) LIKE lower/;
1385   }
1386   $self->dbh->do_query($query,$name,$class);
1387 }
1388
1389
1390 1;