more tweaking of image loading script.
[phenome.git] / bin / loading_scripts / load_bcs_images.pl
blob5763f5dca90f35f5c99dbec69b3be4d2c9cbba7c
1 #!/usr/bin/perl
3 =head1 NAME
5 load_bcs_images.pl
7 =head1 SYNOPSYS
9 load_bcs_images.pl -D [ sandbox | cxgn | trial ] -H hostname -i dirname -r chado table name [script will load image ids into ChadoTableprop ]
11 =head1 DESCRIPTION
13 Loads images into the SGN database, using the SGN::Image framework.
14 Then link the loaded image with the user-supplied chado objects (e.g. stock, nd_experiment)
17 Requires the following parameters:
19 =over 8
21 =item -D
23 a database parameter, which can either be "cxgn", "sandbox", or "trial". "cxgn" and "sandbox" will cause the script to connect to the respective databases; "trial" will connect to sandbox, but not perform any of the database modifications.
25 =item -H
27 host name
29 =item -m
31 map file. If provided links between stock names - image file name , is read from a mapping file.
32 Row labels are expected to be unique file names, column header for the associated stocks is 'name'
34 =item -i
36 a dirname that contains image filenames or subdirectories named after database accessions, containing one or more images (see option -d) .
38 =item -u
40 use name - from sgn_people.sp_person.
42 =item -b
44 the dir where the database stores the images (the concatenated values from image_path and image_dir from sgn_local.conf or sgn.conf)
46 =item -d
48 files are stored in sub directories named after database accessions
50 =item -e
52 image file extension . Defaults to 'jpg'
55 =item -t
57 trial mode . Nothing will be stored.
60 =back
62 The script will generate an error file, named like the filename supplied, with the extension .err.
64 =head1 AUTHOR(S)
66 Naama Menda (nm249@cornell.edu) October 2010.
68 =cut
70 use strict;
72 use CXGN::Metadata::Schema;
73 use CXGN::Metadata::Metadbdata;
74 use CXGN::DB::InsertDBH;
75 use CXGN::Image;
76 use Bio::Chado::Schema;
77 use CXGN::People::Person;
78 use Carp qw /croak/;
79 use Data::Dumper qw / Dumper /;
81 use File::Basename;
82 use SGN::Context;
83 use Getopt::Std;
85 use CXGN::Tools::File::Spreadsheet;
87 our ($opt_H, $opt_D, $opt_t, $opt_i, $opt_u, $opt_r, $opt_d, $opt_e, $opt_m, $opt_b);
88 getopts('H:D:u:i:e:f:tdr:m:b:');
90 my $dbhost = $opt_H;
91 my $dbname = $opt_D;
92 my $dirname = $opt_i;
93 my $sp_person=$opt_u;
94 my $db_image_dir = $opt_b;
95 my $chado_table = $opt_r;
96 my $ext = $opt_e || 'jpg';
98 if (!$dbhost && !$dbname) {
99 print "dbhost = $dbhost , dbname = $dbname\n";
100 print "opt_t = $opt_t, opt_u = $opt_u, opt_r = $chado_table, opt_i = $dirname\n";
101 usage();
104 if (!$dirname) { print "dirname = $dirname\n" ; usage(); }
106 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
107 dbname=>$dbname,
108 } );
110 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do => ['SET search_path TO public;'] }
114 print STDERR "Generate metadata_id... ";
115 my $metadata_schema = CXGN::Metadata::Schema->connect("dbi:Pg:database=$dbname;host=".$dbh->dbhost(), "postgres", $dbh->dbpass(), {on_connect_do => "SET search_path TO 'metadata', 'public'", });
117 my $sp_person_id= CXGN::People::Person->get_person_by_username($dbh, $sp_person);
118 my %name2id = ();
121 #my $ch = SGN::Context->new();
122 print "PLEASE VERIFY:\n";
123 print "Using dbhost: $dbhost. DB name: $dbname. \n";
124 print "Path to image is: $db_image_dir\n";
125 print "CONTINUE? ";
126 my $a = (<STDIN>);
127 if ($a !~ /[yY]/) { exit(); }
129 if (($dbname eq "sandbox") && ($db_image_dir !~ /sandbox/)) {
130 die "The image directory needs to be set to image_files_sandbox if running on rubisco/sandbox. Please change the image_dir parameter in SGN.conf\n\n";
132 if (($dbname eq "cxgn") && ($db_image_dir =~ /sandbox/)) {
133 warn "The image directory needs to be set to image_files when the script is running on the production database. Please change the image_dir parameter in SGN.conf\n\n";
136 my %image_hash = (); # used to retrieve images that are already loaded
137 my %connections = (); # keep track of object -- image connections that have already been made.
139 print STDERR "Caching stock table...\n";
140 my $object_rs = $schema->resultset("Stock::Stock")->search( { } ) ;
141 while (my $object = $object_rs->next ) {
142 my $id = $object->stock_id;
143 my $name = $object->name;
144 $name2id{lc($name)} = $id;
147 # cache image chado object - image links to prevent reloading of the
148 # same data
150 print "Caching image $chado_table links...\n";
152 my $q = "SELECT * FROM phenome.stock_image";
153 my $sth = $dbh->prepare($q);
154 $sth->execute();
155 while ( my $hashref = $sth->fetchrow_hashref() ) {
156 my $image_id = $hashref->{image_id};
157 my $chado_table_id = $hashref->{stock_id}; ##### table specific
159 print STDERR "\n\nCHADO TABLE ID = $chado_table_id\n\n";
161 my $i = CXGN::Image->new(dbh=>$dbh, image_id=>$image_id, image_dir=>$db_image_dir); # SGN::Image...$ch
162 my $original_filename = $i->get_original_filename();
163 $image_hash{$original_filename} = $i; # this doesn't have the file extension
164 $connections{$image_id."-".$chado_table_id}++;
167 open (ERR, ">load_bcs_images.err") || die "Can't open error file\n";
169 my @files;
170 if (! $opt_d) {
171 @files = glob "$dirname/*.$ext";
173 else {
174 @files = glob "$dirname/*" if $opt_d ;
177 print STDERR "DIRS = ".(join("\n", @files))."\n";
179 my @sub_files;
181 my $new_image_count = 0;
183 my $metadata = CXGN::Metadata::Metadbdata->new($metadata_schema, $sp_person);
184 my $metadata_id = $metadata->store()->get_metadata_id();
186 #read from spreadsheet:
187 my $map_file = $opt_m; #
188 my %name_map;
190 if ($opt_m) {
191 my $s = CXGN::Tools::File::Spreadsheet->new($map_file); #
192 my @rows = $s->row_labels(); #
193 foreach my $file_name (@rows) { #
194 my $stock_name = $s->value_at($file_name, 'name'); #
195 $name_map{$file_name} = $stock_name;
198 foreach my $file (@files) {
199 eval {
200 chomp($file);
201 @sub_files = ($file);
202 @sub_files = glob "$file/*"; # if $opt_d;
204 print STDERR "FILES FOR $file: ".Dumper(\@sub_files)."\n";
206 my $object = basename($file, ".$ext" );
208 # if (!$plot) { die "File $file has no object name in it!"; }
209 my $stock = $schema->resultset("Stock::Stock")->find( {
210 stock_id => $name2id{ lc($object) } } );
211 foreach my $filename (@sub_files) {
212 chomp $filename;
214 my $image_base = basename($filename);
215 my ($object_name, $description, $extension);
216 if ($opt_m) {
217 $object_name = $name_map{$object . "." . $ext } ;
220 print STDERR "OBJECT = $object...\n";
221 if ($image_base =~ /(.*?)\_(.*?)(\..*?)?$/) {
222 $object_name = $1;
223 $description = $2;
224 $extension = $3;
225 print STDERR "OBJECT NAME: $object_name DESCRPTION: $description EXTENSIO: $extension\n";
227 else {
228 $object_name = $object;
231 print "object_name = '".$object_name."' \n";
235 print STDOUT "Processing file $file...\n";
236 print STDOUT "Loading $object_name, image $filename\n";
237 print ERR "Loading $object_name, image $filename\n";
238 my $image_id; # this will be set later, depending if the image is new or not
239 if (! -e $filename) {
240 warn "The specified file $filename does not exist! Skipping...\n";
241 next();
244 if (!exists($name2id{lc($object)})) {
245 message ("$object does not exist in the database...\n");
248 else {
249 print ERR "Adding $filename...\n";
250 if (exists($image_hash{$filename})) {
251 print ERR "$filename is already loaded into the database...\n";
252 $image_id = $image_hash{$filename}->get_image_id();
253 $connections{$image_id."-".$name2id{lc($object)}}++;
254 if ($connections{$image_id."-".$name2id{lc($object)}} > 1) {
255 print ERR "The connection between $object and image $filename has already been made. Skipping...\n";
257 elsif ($image_hash{$filename}) {
258 print ERR qq { Associating $chado_table $name2id{lc($object)} with already loaded image $filename...\n };
261 else {
262 print ERR qq { Generating new image object for image $filename and associating it with $chado_table $object, id $name2id{lc($object) } ...\n };
264 if ($opt_t) {
265 print STDOUT qq { Would associate file $filename to $chado_table $object_name, id $name2id{lc($object)}\n };
266 $new_image_count++;
268 else {
269 my $image = CXGN::Image->new(dbh=>$dbh, image_dir=>$db_image_dir);
270 $image_hash{$filename}=$image;
272 $image->process_image("$filename", $chado_table , $name2id{lc($object)});
273 $image->set_description("$description");
274 $image->set_name(basename($filename , ".$ext"));
275 $image->set_sp_person_id($sp_person_id);
276 $image->set_obsolete("f");
277 $image_id = $image->store();
278 #link the image with the BCS object
279 $new_image_count++;
280 my $image_subpath = $image->image_subpath();
281 print STDERR "FINAL IMAGE PATH = $db_image_dir/$image_subpath\n";
287 print STDERR "Connecting image $filename and id $image_id with stock ".$stock->stock_id()."\n";
288 #store the image_id - stock_id link
289 my $q = "INSERT INTO phenome.stock_image (stock_id, image_id, metadata_id) VALUES (?,?,?)";
290 my $sth = $dbh->prepare($q);
291 $sth->execute($stock->stock_id, $image_id, $metadata_id);
294 if ($@) {
295 print STDOUT "ERROR OCCURRED WHILE SAVING NEW INFORMATION. $@\n";
296 $dbh->rollback();
298 else {
299 $dbh->commit();
306 close(ERR);
307 close(F);
312 print STDOUT "Inserted $new_image_count images.\n";
313 print STDOUT "Done. \n";
315 sub usage {
316 print "Usage: load_images.pl -D dbname [ cxgn | sandbox ] -H dbhost -t [trial mode ] -i input dir -r chado table name for the object to link with the image \n";
317 exit();
320 sub message {
321 my $message=shift;
322 print STDOUT $message;
323 print ERR $message;