checking correctly if an image is loaded and if was associated with a stock
[phenome.git] / bin / loading_scripts / solcap / load_solcap_images.pl
blob2ddfb65fb324417002a539616dbb60c1f26199f8
1 #!/usr/bin/perl
3 =head1 NAME
5 load_solcap_images.pl
7 =head1 SYNOPSYS
9 load_solcap_images.pl -D [ sandbox | cxgn | trial ] -H hostname -i dirname
11 =head1 DESCRIPTION
13 Loads images into the SGN database, using the SGN::Image framework.
14 Then link the loaded image with the relevant stock (see phenome.stock_image table)
16 Requires the following parameters:
18 =over 8
20 =item -D
22 a database parameter, which can either be "cxgn", "sandbox", or "trial". "cxgn" and "sandbox" will cause the script to connect to the respective databases; "trial" will connect to sandbox, but not perform any of the database modifications.
24 =item -H
26 host name
28 =item -i
30 a dirname that contains image filenames or subdirectories named after database accessions, containing one or more images (see option -d) .
32 =item -u
34 user_name - from sgn_people.sp_person.
36 =item -l
38 location, as stored in stockprop (when plots are loaded with load_solcap_plots.pl)
40 =item -d
42 files are stored in sub directories named after database accessions
44 =item -e
46 image file extension . Defaults to 'jpg'
49 =item -t
51 trial mode . Nothing will be stored.
54 =back
56 The script will generate an error file, named like the filename supplied, with the extension .err.
58 =head1 AUTHOR(S)
60 Naama Menda (nm249@cornell.edu) September 2011.
62 =cut
64 use strict;
66 use CXGN::Metadata::Schema;
67 use CXGN::Metadata::Metadbdata;
68 use CXGN::DB::InsertDBH;
69 use SGN::Image;
70 use Bio::Chado::Schema;
71 use CXGN::People::Person;
72 use Carp qw /croak/;
74 use File::Basename;
75 use SGN::Context;
76 use Getopt::Std;
79 our ($opt_H, $opt_D, $opt_t, $opt_i, $opt_u, $opt_d, $opt_e, $opt_f, $opt_l, $opt_y);
80 getopts('H:D:u:i:e:f:tdl:y:');
82 my $dbhost = $opt_H;
83 my $dbname = $opt_D;
84 my $dirname = $opt_i;
85 my $sp_person = $opt_u;
86 my $location = $opt_l;
87 my $year = $opt_y;
89 my $ext = $opt_e || 'jpg';
91 if (!$dbhost && !$dbname) {
92 print "dbhost = $dbhost , dbname = $dbname\n";
93 print "opt_t = $opt_t, opt_u = $opt_u, opt_d= $opt_d opt_i = $dirname\n";
94 usage();
97 if (!$dirname) { print "dirname = $dirname\n" ; usage(); }
99 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
100 dbname=>$dbname,
101 } );
103 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do => ['SET search_path TO public;'] }
105 my $metadata_schema = CXGN::Metadata::Schema->connect(
106 sub { $dbh->get_actual_dbh },
107 { on_connect_do => ['SET search_path TO metadata;'] },
109 my $sp_person_id= CXGN::People::Person->get_person_by_username($dbh, $sp_person);
110 my %name2id = ();
113 my $ch = SGN::Context->new();
114 my $image_dir = $opt_f || $ch->get_conf("image_dir");
116 print "PLEASE VERIFY:\n";
117 print "Using dbhost: $dbhost. DB name: $dbname. \n";
118 print "Path to image is: $image_dir\n";
119 print "CONTINUE? ";
120 my $a = (<STDIN>);
121 if ($a !~ /[yY]/) { exit(); }
123 if (($dbname eq "sandbox") && ($image_dir !~ /sandbox/)) {
124 die "The image directory needs to be set to image_files_sandbox if running on rubisco/sandbox. Please change the image_dir parameter in SGN.conf\n\n";
126 if (($dbname eq "cxgn") && ($image_dir =~ /sandbox/)) {
127 warn "The image directory needs to be set to image_files when the script is running on the production database. Please change the image_dir parameter in SGN.conf\n\n";
130 my %image_hash = (); # used to retrieve images that are already loaded
131 my %connections = (); # keep track of object -- image connections that have already been made.
133 my $object_rs = $schema->resultset("Stock::Stock")->search( { } ) ;
134 while (my $object = $object_rs->next ) {
135 my $id = $object->stock_id;
136 my $name = $object->name;
137 $name2id{lc($name)} = $id;
140 # cache image chado object - image links to prevent reloading of the
141 # same data
143 print "Caching image-stock links...\n";
145 my $q = "SELECT * FROM phenome.stock_image";
146 my $sth = $dbh->prepare($q);
147 $sth->execute();
148 while ( my $hashref = $sth->fetchrow_hashref() ) {
149 my $image_id = $hashref->{image_id};
150 my $chado_table_id = $hashref->{stock_id}; ##### table specific
151 my $i = SGN::Image->new($dbh, $image_id);
152 my $original_filename = $i->get_original_filename();
153 $image_hash{$original_filename} = $i; # this doesn't have the file extension
154 $connections{$image_id."-".$chado_table_id}++;
157 open (ERR, ">load_solcap_images.err") || die "Can't open error file\n";
159 my @files = glob "$dirname/*.$ext";
160 @files = glob "$dirname/*" if $opt_d ;
161 my @sub_files;
163 my $new_image_count = 0;
166 foreach my $file (@files) {
167 eval {
168 chomp($file);
169 @sub_files = ($file);
170 @sub_files = glob "$file/*.$ext" if $opt_d;
172 my $object_name = basename($file, ".$ext" );
173 print "object_name = '".$object_name."' \n";
174 #$individual_name =~s/(W\d{3,4}).*\.JPG/$1/i if $individual_name =~m/^W\d{3}/;
175 #2009_oh_8902_fruit-t
176 # solcap images:
177 #my ($year, $place, $plot, undef) = split /_/ , $object_name;
178 #2009_ca_2138_fruit_c.jpg
179 my ($year, $place, $name, undef,undef) = split /_/ , $object_name;
181 #my $stock = $schema->resultset("Stock::Stock")->find( {
182 # stock_id => $name2id{ lc($name) } } );
183 my ($stock) = $schema->resultset("Stock::Stock")->search(
185 name => $name,
186 uniquename => { 'ilike' => $name . '%' . $year . '%' . $location . '%'}
187 } );
189 if (!$stock) {
190 warn "no stock found for plot # $name ! Skipping !!\n\n";
191 next();
194 foreach my $filename (@sub_files) {
195 chomp $filename;
196 my $file_basename = basename($filename, ".$ext");
197 print STDOUT "Processing file $file...\n";
198 message( "Loading $name, image $filename\n");
199 my $image_id; # this will be set later, depending if the image is new or not
200 if (! -e $filename) {
201 warn "The specified file $filename does not exist! Skipping...\n";
202 next();
205 if (!exists($name2id{lc($name)})) {
206 message ("$name does not exist in the database...\n");
209 else {
210 message( "Adding $filename...\n");
211 if (exists($image_hash{$file_basename})) {
212 message("$filename is already loaded into the database...\n");
213 $image_id = $image_hash{$file_basename}->get_image_id();
214 print "name = $name , stock id = " . $name2id{lc($name)} . "\n\n";
215 $connections{$image_id."-".$name2id{lc($name)}}++;
216 if ($connections{$image_id."-".$name2id{lc($name)}} > 1) {
217 message("The connection between $name and image $filename has already been made. Skipping...\n");
219 elsif ($image_hash{$file_basename}) {
220 message("Associating stock " . $name2id{lc($name)} . "with already loaded image $filename...\n") ;
221 ################################
224 else {
225 message(" Generating new image object for image $filename and associating it with stock $name, id " . $name2id{lc($name) } ." ...\n");
226 my $caption = $name;
228 if ($opt_t) {
229 print STDOUT qq { Would associate file $filename to stock $name, id $name2id{lc($name)}\n };
230 $new_image_count++;
232 else {
233 my $image = SGN::Image->new($dbh);
234 $image_hash{$file_basename}=$image;
236 $image->process_image("$filename", undef, undef);
237 $image->set_description("$caption");
238 $image->set_name(basename($filename , ".$ext"));
239 $image->set_sp_person_id($sp_person_id);
240 $image->set_obsolete("f");
241 $image_id = $image->store();
242 $new_image_count++;
244 my $metadata = CXGN::Metadata::Metadbdata->new($metadata_schema, $sp_person);
245 my $metadata_id = $metadata->store()->get_metadata_id();
246 die "NO METADATA ID FOUND\n" if !$metadata_id;
247 #store the image_id - stock_id link
248 my $q = "INSERT INTO phenome.stock_image (stock_id, image_id, metadata_id) VALUES (?,?,?)";
249 my $sth = $dbh->prepare($q);
250 $sth->execute($stock->stock_id, $image_id, $metadata_id);
255 if ($@) {
256 print STDOUT "ERROR OCCURRED WHILE SAVING NEW INFORMATION. $@\n";
257 $dbh->rollback();
259 else {
260 $dbh->commit();
265 close(ERR);
266 close(F);
269 print STDOUT "Inserted $new_image_count images.\n";
270 print STDOUT "Done. \n";
272 sub usage {
273 print "Usage: load_images.pl -D dbname [ cxgn | sandbox ] -H dbhost -t [trial mode ] -i input dir -l location of the plots (as loaded into stockprop). \n Images will be stored in phenome.stock_image \n";
274 exit();
277 sub message {
278 my $message=shift;
279 print STDOUT $message;
280 print ERR $message;