tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / scripts / DB / bioflat_index.PLS
blobfda4fb46a887cf127fa7ca8b5c57dece043f582b
1 #!/usr/bin/perl -w
2 #$Id$
4 =head1 NAME
6 bioflat_index.pl - index sequence files using Bio::DB::Flat
8 =head1 DESCRIPTION
10 Create or update a biological sequence database indexed with the
11 Bio::DB::Flat indexing scheme. The arguments are a list of flat files
12 containing the sequence information to be indexed.
14 =head1 USAGE
16 bioflat_index.pl <options> file1 file2 file3...
18 Options:
20 --create Create or reinitialize the index. If not specified,
21 the index must already exist.
23 --format <format> The format of the sequence files. Must be one
24 of "genbank", "swissprot", "embl" or "fasta".
26 --location <path> Path to the directory in which the index files
27 are stored.
29 --dbname <name> The symbolic name of the database to be created.
31 --indextype <type> Type of index to create. Either "bdb" or "flat".
32 "binarysearch" is the same as "flat".
34 Options can be abbreviated. For example, use -i for --indextype.
36 The following environment variables will be used as defaults if the
37 corresponding options are not provided:
39 OBDA_FORMAT format of sequence file
40 OBDA_LOCATION path to directory in which index files are stored
41 OBDA_DBNAME name of database
42 OBDA_INDEX type of index to create
44 =cut
46 use strict;
47 use Bio::Root::Root;
48 use Bio::Root::IO;
49 use Bio::DB::Flat;
50 use Getopt::Long;
51 use File::Path qw(mkpath rmtree);
53 my ($CREATE,$FORMAT,$LOCATION,$DBNAME,$INDEXTYPE);
55 GetOptions( 'create' => \$CREATE,
56 'format:s' => \$FORMAT,
57 'location:s' => \$LOCATION,
58 'dbname:s' => \$DBNAME,
59 'indextype:s' => \$INDEXTYPE );
61 $FORMAT = $ENV{OBDA_FORMAT} unless defined $FORMAT;
62 $LOCATION = $ENV{OBDA_LOCATION} unless defined $LOCATION;
63 $DBNAME = $ENV{OBDA_DBNAME} unless defined $DBNAME;
64 $INDEXTYPE = $ENV{OBDA_INDEXTYPE} unless defined $INDEXTYPE;
66 my $root = 'Bio::Root::Root';
67 my $io = 'Bio::Root::IO';
69 # confirm that database directory is there
70 defined $LOCATION or
71 $root->throw("please provide a base directory with the --location option");
73 -d $LOCATION or
74 $root->throw("$LOCATION is not a valid directory; use --create to create a new index");
76 defined $DBNAME or
77 $root->throw("please provide a database name with the --dbname option");
79 defined $FORMAT or
80 $root->throw("please specify the format for the input files with the --format option");
82 unless (defined $INDEXTYPE) {
83 $INDEXTYPE = 'flat';
84 $root->warn('setting index type to "flat", use the --indextype option to override');
87 # Confirm that database is there and that --create flag is sensible.
88 my $path = $io->catfile($LOCATION,$DBNAME,'config.dat');
89 if (-e $path) {
90 if ($CREATE) {
91 $root->warn("existing index detected; deleting.");
92 rmtree($io->catfile($LOCATION,$DBNAME),1,1);
93 } else {
94 $root->warn("existing index detected; ignoring --indextype and --format options.");
95 undef $INDEXTYPE;
98 elsif (!$CREATE) {
99 $root->throw("Cannot find database config file at location $path; use --create to create a new index");
102 # open for writing/updating
103 my $db = Bio::DB::Flat->new(-directory => $LOCATION,
104 -dbname => $DBNAME,
105 $INDEXTYPE ? (
106 -index => $INDEXTYPE
108 : (),
109 -write_flag => 1,
110 -format => $FORMAT) or
111 $root->throw("can't create Bio::DB::Flat object");
113 my $entries = $db->build_index(@ARGV);
115 print STDERR "(Re)indexed $entries entries.\n ";
117 __END__