6 ## Used to output the 'usage' message
9 ## Used to parse command line options
12 ## Used to create temporary files, if necessary
16 use Bio
::DB
::SeqFeature
::Store
;
17 use Bio
::DB
::SeqFeature
::Store
::GFF3Loader
;
21 ## The available options. Note, these defaults are 'hard coded' into
22 ## the USAGE POD, so if you change one of the defaults (you shouldn't),
23 ## you should update the USAGE.
25 my $DSN = 'dbi:mysql:test';
26 my $SFCLASS = 'Bio::DB::SeqFeature';
27 my $ADAPTOR = 'DBI::mysql';
31 my $TMP = File
::Spec
->tmpdir();
32 my $IGNORE_SEQREGION = 0;
38 my $NOALIAS_TARGET = 0;
39 my $SUMMARY_STATS = 0;
40 my $NOSUMMARY_STATS = 0;
42 ## Two flags based on http://stackoverflow.com/questions/1232116
43 ## how-to-create-pod-and-use-pod2usage-in-perl
47 GetOptions
( 'd|dsn=s' => \
$DSN,
48 's|seqfeature=s' => \
$SFCLASS,
49 'n|namespace=s' => \
$NAMESPACE,
50 'a|adaptor=s' => \
$ADAPTOR,
51 'v|verbose!' => \
$VERBOSE,
53 'T|temporary-directory=s' => \
$TMP,
54 'i|ignore-seqregion' => \
$IGNORE_SEQREGION,
55 'c|create' => \
$CREATE,
57 'p|password=s' => \
$PASS,
58 'z|zip' => \
$COMPRESS,
59 'S|subfeatures!' => \
$INDEX_SUB,
61 ## Any good single letter choices here?
62 'noalias-target' => \
$NOALIAS_TARGET,
63 'summary' => \
$SUMMARY_STATS,
64 'N|nosummary' => \
$NOSUMMARY_STATS,
66 ## I miss '--help' when it isn't there!
67 'h|help!' => \
$opt_help,
68 'm|man!' => \
$opt_man,
70 or pod2usage
( -message
=>
71 "\nTry 'bp_seqfeature_load.pl --help' for more information\n",
76 ## Should we output usage information?
77 pod2usage
( -verbose
=> 1 ) if $opt_help;
78 pod2usage
( -verbose
=> 2 ) if $opt_man;
80 ## Did we get any files to process?
82 or pod2usage
( -message
=>
83 "\nYou need to pass some GFF or fasta files to load\n",
94 bp_seqfeature_load.pl - Load GFF into a SeqFeature database
98 Pass any number of GFF or fasta format files (or GFF with embedded
99 fasta) to load the features and sequences into a SeqFeature
100 database. The database (and adaptor) to use is specified on the
101 command line. Use the --create flag to create a new SeqFeature
106 bp_seqfeature_load.pl [options] gff_or_fasta_file1 [gff_or_fasta_file2 [...]]
108 Try 'bp_seqfeature_load.pl --help' or '--man' for more information.
116 DBI data source (default dbi:mysql:test)
118 =item -n, --namespace
120 The table prefix to use (default undef) Allows several independent
121 sequence feature databases to be stored in a single database
123 =item -s, --seqfeature
125 The type of SeqFeature to create... RTSC (default Bio::DB::SeqFeature)
129 The storage adaptor (class) to use (default DBI::mysql)
133 Turn on verbose progress reporting (default true) Use --noverbose to
138 Activate fast loading. (default 0) Only available for some adaptors.
140 =item -T, --temporary-directory
142 Specify temporary directory for fast loading (default
143 File::Spec->tmpdir())
145 =item -i, --ignore-seqregion
147 If true, then ignore ##sequence-region directives in the GFF3 file
148 (default, create a feature for each region)
152 Create the database and reinitialize it (default false) Note, this
153 will erase previous database contents, if any.
157 User to connect to database as
161 Password to use to connect to database
165 Compress database tables to save space (default false)
167 =item -S, --subfeatures
169 Turn on indexing of subfeatures (default true) Use --nosubfeatures to
174 Generate summary statistics for coverage graphs (default false) This
175 can be run on a previously loaded database or during the load. It will
176 default to true if --create is used.
178 =item -N, --nosummary
180 Do not generate summary statistics to save some space and load time (default if
181 --create is not specified, use this option to explicitly turn off summary
182 statistics when --create is specified)
184 =item --noalias-target
186 Don't create an Alias attribute whose value is the target_id in a
187 Target attribute (if the feature contains a Target attribute, the
188 default is to create an Alias attribute whose value is the target_id
189 in the Target attribute)
193 Please see http://www.sequenceontology.org/gff3.shtml for information
194 about the GFF3 format. BioPerl extends the format slightly by adding a
195 ##index-subfeatures directive. Set this to a true value if you wish
196 the database to be able to retrieve a feature's individual parts (such
197 as the exons of a transcript) independently of the top level feature:
199 ##index-subfeatures 1
201 It is also possible to control the indexing of subfeatures on a
202 case-by-case basis by adding "index=1" or "index=0" to the feature's
203 attribute list. This should only be used for subfeatures.
205 Subfeature indexing is true by default. Set to false (0) to save lots
206 of database space and speed performance. You may use --nosubfeatures
217 or die "Fast loading is requested, but I cannot write into the directory $TMP";
218 $DSN .= ";mysql_local_infile=1" if $ADAPTOR =~ /mysql/i && $DSN !~ /mysql_local_infile/;
222 @options = ($USER,$PASS) if $USER || $PASS;
224 my $store = Bio
::DB
::SeqFeature
::Store
->new
227 -namespace
=> $NAMESPACE,
228 -adaptor
=> $ADAPTOR,
234 -compress
=> $COMPRESS,
236 or die "Couldn't create connection to the database";
238 $store->init_database('erase') if $CREATE;
239 $SUMMARY_STATS++ if $CREATE; # this is a good thing
241 my $loader = Bio
::DB
::SeqFeature
::Store
::GFF3Loader
->new
244 -sf_class
=> $SFCLASS,
245 -verbose
=> $VERBOSE,
248 -ignore_seqregion
=> $IGNORE_SEQREGION,
249 -index_subfeatures
=> $INDEX_SUB,
250 -noalias_target
=> $NOALIAS_TARGET,
251 -summary_stats
=> $NOSUMMARY_STATS ?
0 : $SUMMARY_STATS,
253 or die "Couldn't create GFF3 loader";
255 # on signals, give objects a chance to call their DESTROY methods
256 $SIG{TERM
} = $SIG{INT
} = sub { undef $loader; undef $store; die "Aborted..."; };
258 $loader->load(@ARGV);