Fix perl dep
[gff3_validator.git] / validate_gff3.pl
blobbe79a3c6d57dd10700377b2ac77f23a08899961b
1 #!/usr/bin/perl
3 =head1 NAME
5 validate_gff3.pl
7 =head1 SYNOPSIS
9 validate_gff3.pl -gff3_file <gff3_file> [-ontology_file <ontology_file1> -ontology_file <ontology_file1> ...]
10 -out <out_file_prefix> -config <config_file>
11 [-db_type <db_type>] [-db_name <db_name>] [-username <username>] [-password <password>]
12 [-verbose <0|1|2>] [-silent <0|1>]
14 =head1 DESCRIPTION
16 This script analyzes a gff3 file and validates a number of points. It uses the GFF3::Validator module for analysis. For
17 further information on analysis steps, please refer to validate_gff3.pod.
19 =head1 USAGE
21 The script uses a MySQL or SQLite database to analyze the gff3 file. The gff3 file is parsed and
22 content relevant to the analysis is loaded into the database. Use of database (as opposed to
23 performing analysis in memory) makes processing of large files feasible and significatly
24 increases overall processing speed. At the end of the analysis a
25 report is generated that lists errors and warnings ordered by line numbers. The report file
26 can be easily processed using grep and other Unix text processing tools.
28 The usage of the script follows with descriptions of command-line parameters:
30 validate_gff3.pl -gff3_file <gff3_file> [-ontology_file <ontology_file1> -ontology_file <ontology_file1> ...]
31 -out <out_file_prefix> -config <config_file>
32 [-db_type <db_type>] [-db_dir <db_dir>]
33 [-dbname <dbname>] [-username <username>] [-password <password>]
34 [-verbose <0|1|2>] [-silent <0|1>]
36 -gff3_file : (Required) Name of gff3 file to process.
37 -ontology_file : (Optional) Name of ontology file, multiple files can be specified.
38 Command-line ontology files and ontology files provided as directives
39 are merged and used for analysis. If neither is provided or is not accesible,
40 default ontology file is retrieved and used.
41 -out : (Required) Prefix to name log and report files, these become <out>.log and <out>.report
42 -config : (Required) Name of config file (see documentation in validate_gff3.cfg provided in the package
43 for further details).
44 -db_type : (Optional) Type of database ('mysql' or 'sqlite').
45 Defaults to 'mysql'.
46 -db_dir : (Optional) Directory to store temp sqlite database files
47 If not available, retrieved from config file (temp_dir param)
48 -dbname : (Optional) Name of MySQL database/SQLite db file to use for analysis.
49 If not available, retrieved from config file.
50 If db_type is 'sqlite' and no dbname is specified and none available in config file, a temp db is used
51 -username : (Optional) Username for analysis database (must have write privileges).
52 If not available, retrieved from config file.
53 If not available, defaults to "".
54 -password : (Optional) Password for analysis database.
55 If not available, retrieved from config file.
56 If not available, defaults to "".
57 -verbose : (Optional) Verbosity of logging.
58 Values:
59 1: Initialization information
60 2: + Progress information
61 3: + Error messages
62 If not available, defaults to 2.
63 -silent : (Optional) Whether to suppress logging to screen
64 Values:
65 0: Log to screen
66 1: Don't log to screen
67 If not available, defaults to 0.
68 -max_messages : (Optional) Whether to report all errors/warnings
69 Values:
70 0: Report all messages
71 <number>: Exit and report after <number> messages
72 If not available, defaults to 0.
74 =cut
76 use strict;
78 use FindBin;
79 use File::Spec;
80 use File::Temp;
81 use lib "$FindBin::RealBin/lib";
83 use GFF3::Validator;
84 use Carp;
85 use Getopt::Long;
87 # Usage
88 my $usage = qq[$FindBin::Script -gff3_file <gff3_file> [-ontology_file <ontology_file1> -ontology_file <ontology_file1> ...]
89 -out <out_file_prefix>
90 [-config <config_file>]
91 [-db_type <db_type>] [-db_dir <db_dir>]
92 [-dbname <dbname>] [-username <username>] [-password <password>]
93 [-verbose <0|1|2>] [-silent <0|1>]];
95 # Parse command-line params
96 my $gff3_file;
97 my @ontology_files;
98 my $out;
99 my $config;
100 my $db_type;
101 my $db_dir;
102 my $dbname;
103 my $username;
104 my $password;
105 my $verbose;
106 my $silent;
107 my $max_messages;
109 my $result = GetOptions ("gff3_file=s" => \$gff3_file,
110 "ontology_files=s" => \@ontology_files,
111 "out=s" => \$out,
112 "config=s" => \$config,
113 "db_type=s" => \$db_type,
114 "db_dir=s" => \$db_dir,
115 "dbname=s" => \$dbname,
116 "username=s" => \$username,
117 "password=s" => \$password,
118 "verbose=s" => \$verbose,
119 "silent=s" => \$silent,
120 "max_messages=s" => \$max_messages,
121 ) or die("Usage: $usage\n");
123 # Check command-line params
124 if (!$gff3_file or !$out ) {
125 die("Usage: $usage\n");
128 # Parse config file
129 $config ||= "$FindBin::RealBin/validate_gff3.cfg";
130 my $config_obj = Config::General->new(-ConfigFile => $config, -CComments => 0);
131 my %config = $config_obj->getall;
133 # Populate defaults from config
134 $db_type = lc($db_type) || 'mysql';
135 croak("Unrecognized database type ($db_type)!") unless $db_type =~ /^(mysql|sqlite)$/;
137 $db_dir ||= $config{temp_dir};
138 unless( -d $db_dir ) {
139 mkdir $db_dir or die "$db_dir does not exist, and can't create it\n";
140 chmod 0777, $db_dir or warn "WARNING: could not set global temp dir $db_dir world-writable\n";
142 croak("Cannot determine db dir!") unless $db_dir;
144 my $datasource;
145 if ($dbname && $db_type eq 'mysql') {
146 $datasource = "DBI:mysql:dbname=$dbname";
148 elsif ($dbname && $db_type eq 'sqlite') {
149 $datasource = "DBI:SQLite:dbname=$dbname";
151 elsif (!$dbname && $db_type eq 'mysql') {
152 $datasource = $config{datasource};
154 elsif (!$dbname && $db_type eq 'sqlite') {
155 my ($temp_fh, $temp_file) = File::Temp::tempfile("validate_gff3_sqlite_XXXXX",
156 DIR => $db_dir,
157 SUFFIX => '.db',
158 UNLINK => 1);
160 $datasource = "DBI:SQLite:dbname=$temp_file";
162 else {
163 $datasource = $config{datasource}; # Placeholder
165 croak("Cannot determine database name!") unless $datasource;
167 # Prepare params
168 my $log_file = "$out.log";
169 my $report_file = "$out.report";
171 # Create validator object
172 my $validator = GFF3::Validator->new(-config => $config,
173 -gff3_file => $gff3_file,
174 -datasource => $datasource,
175 -username => $username,
176 -password => $password,
177 -verbose => $verbose,
178 -silent => $silent,
179 -max_messages => $max_messages,
180 -log_file => $log_file,
181 -report_file => $report_file,
182 -ontology_files => \@ontology_files,
183 -table_id => "", # Currently do not use table id feature within the command-line version
186 # Create/Reset tables to store the data
187 $validator->create_tables;
189 # Load gff3 analysis database
190 $validator->load_analysis_dbs;
192 # Validate unique ids
193 $validator->validate_unique_ids;
195 # Load ontology(s) into memory
196 $validator->load_ontology;
198 # Validate ontology terms
199 $validator->validate_ontology_terms;
201 # Validate parentage
202 $validator->validate_parentage;
204 # Validate derives_from
205 $validator->validate_derives_from;
207 # Dump an error report
208 $validator->dump_report;
210 # Cleanup
211 # $validator->cleanup; # Currently, do not clean up within the command-line version
213 $validator->log("# [END]");
215 =head1 SEE ALSO
217 =head1 AUTHOR
219 Payan Canaran <canaran@cshl.edu>
221 =head1 VERSION
223 $Id: validate_gff3.pl,v 1.1 2007/12/03 14:20:23 canaran Exp $
225 =head1 CREDITS
227 - SQLite support adapted from patch contributed by Robert Buels <rmb32@cornell.edu>.
229 =head1 COPYRIGHT AND LICENSE
231 Copyright (c) 2006-2007 Cold Spring Harbor Laboratory
233 This program is free software; you can redistribute it and/or modify it
234 under the same terms as Perl itself. See DISCLAIMER.txt for
235 disclaimers of warranty.
237 =cut