add project name and year options
[phenome.git] / db / LoadPhenomeInStock.pm
blobd049810ff3bc50791219d824b04155b9bf193a71
1 #!/usr/bin/env perl
4 =head1 NAME
6 LoadPhenomeInStock.pm
8 =head1 SYNOPSIS
10 mx-run ThisPackageName [options] -H hostname -D dbname -u username [-F]
12 this is a subclass of L<CXGN::Metadata::Dbpatch>
13 see the perldoc of parent class for more details.
15 =head1 DESCRIPTION
17 This is a patch for loading data in phenome.population and phenome.individual in the stock module, which will eventually replace these 2 tables.
19 This subclass uses L<Moose>. The parent class uses L<MooseX::Runnable>
21 =head1 AUTHOR
23 Naama Menda<nm249@cornell.edu>
25 =head1 COPYRIGHT & LICENSE
27 Copyright 2010 Boyce Thompson Institute for Plant Research
29 This program is free software; you can redistribute it and/or modify
30 it under the same terms as Perl itself.
32 =cut
35 package LoadPhenomeInStock;
37 use Try::Tiny;
38 use Moose;
39 extends 'CXGN::Metadata::Dbpatch';
41 use Bio::Chado::Schema;
43 use CXGN::Phenome::Population;
44 use CXGN::Phenome::Individual;
45 use CXGN::Chado::Dbxref;
46 use CXGN::People::Person;
48 sub init_patch {
49 my $self=shift;
50 my $name = __PACKAGE__;
51 print "dbpatch name is : '" . $name . "'\n\n";
52 my $description = 'Loading the phenome individual data into the stock module';
53 my @previous_requested_patches = ('CopyAccessionToStock', 'AddStockLinks'); #ADD HERE
55 $self->name($name);
56 $self->description($description);
57 $self->prereq(\@previous_requested_patches);
61 sub patch {
62 my $self=shift;
64 print STDOUT "Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
66 print STDOUT "\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
68 my $schema = Bio::Chado::Schema->connect( sub { $self->dbh->clone } , { on_connect_do => ['SET search_path TO public;'], autocommit => 1 });
70 my %names_hash = ( 'Tomato' => 'Solanum lycopersicum' ,
71 'Potato' => 'Solanum tuberosum',
72 'Eggplant' => 'Solanum melongena',
73 'Pepper' => 'Capsicum annuum',
74 'Coffee' => 'Coffea arabica',
75 'Petunia' => 'Petunia x hybrida',
77 my $coderef = sub {
78 print "Finding/creating cvterm for population\n";
79 my $population_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
80 { name => 'population',
81 cv => 'stock type',
82 db => 'null',
83 dbxref => 'population',
84 });
86 print "Finding/creating cvterm for accession (individual)\n";
87 my $accession_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
88 { name => 'accession',
89 cv => 'stock type',
90 db => 'null',
91 dbxref => 'accession',
92 });
93 print "Finding/creating cvtem for stock relationship 'is_member_of' \n";
95 my $member_of = $schema->resultset("Cv::Cvterm")->create_with(
96 { name => 'is_member_of',
97 cv => 'stock relationship',
98 db => 'null',
99 dbxref => 'is_member_of',
101 #find the cvterm for sgn person_id
102 my $person_id_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
103 { name => 'sp_person_id',
104 cv => 'local',
105 db => 'null',
106 dbxref => 'autocreated:sp_person_id',
109 #set the searchpath
110 $self->dbh->do('set search_path to phenome, public');
111 # first load the populations
112 my $q = "SELECT population_id FROM phenome.population";
113 my $sth = $self->dbh->prepare($q);
114 $sth->execute();
115 while (my $pop_id = $sth->fetchrow_array() ) {
116 my $population = CXGN::Phenome::Population->new($self->dbh, $pop_id);
117 my $name = $population->get_name;
118 my $desc = $population->get_description;
119 my $sp_person_id = $population->get_sp_person_id;
120 my $create_date = $population->get_create_date;
122 my $organism = $schema->resultset("Organism::Organism")->find_or_create( {
123 species => 'any' } );
124 my $organism_id = $organism->organism_id();
125 # background_accession_id
126 if ($population->get_background_accession_id) {
127 my $aq = "SELECT chado_organism_id FROM sgn.accession where accession_id = ? ";
128 my $a_sth = $self->dbh->prepare($aq);
129 $a_sth->execute($population->get_background_accession_id);
130 ($organism_id) = $a_sth->fetchrow_array() ;
132 #############
133 print "creating new stock for population $name\n";
134 my $stock_population = $schema->resultset("Stock::Stock")->find_or_create(
135 { organism_id => $organism_id,
136 name => $name,
137 uniquename => $name,
138 description => $desc,
139 type_id => $population_cvterm->cvterm_id(),
140 } );
141 #now load the stock in the population table
142 $population->set_stock_id($stock_population->stock_id);
143 $population->store();
145 #store the properties for the population
146 $stock_population->create_stockprops({ sp_person_id => $sp_person_id},
147 { autocreate => 1 , cv_name => 'local'});
148 $stock_population->create_stockprops({ create_date => $create_date},
149 { autocreate => 1 , cv_name => 'local' });
150 print "Fetching population dbxrefs \n";
151 my @pop_dbxrefs = $population->get_all_population_dbxrefs;
152 foreach my $pdxref (@pop_dbxrefs) {
153 my $pd = $population->get_population_dbxref($pdxref);
154 my $dbxref_id = $pd->get_dbxref_id;
155 my $d_person_id = $pd->get_sp_person_id;
156 my $d_create_date = $pd->get_create_date;
157 my $d_obsolete = $pd->get_obsolete;
158 my $stock_dbxref = $stock_population->find_or_create_related('stock_dbxrefs', { dbxref_id => $dbxref_id, } );
159 $stock_dbxref->create_stock_dbxrefprops( { sp_person_id => $d_person_id } , { autocreate => 1 , cv_name => 'local' } );
160 $stock_dbxref->create_stock_dbxrefprops( { create_date => $d_create_date } , { autocreate => 1 , cv_name => 'local' } );
161 $stock_dbxref->create_stock_dbxrefprops( { obsolete => $d_obsolete } , { autocreate => 1 , cv_name => 'local' } );
164 print "Loading individuals: \n";
165 my @individuals = $population->get_individuals();
166 foreach my $ind (@individuals) {
167 my $iname = $ind->get_name;
168 my $idesc = $ind->get_description;
169 my $iperson_id = $ind->get_sp_person_id;
170 my $icreate_date = $ind->get_create_date;
171 my $imodified_date = $ind->get_modification_date;
172 my $i_obsolete = $ind->get_obsolete;
173 my $updated_by = $ind->get_updated_by;
174 my $common_name = $ind->get_common_name;
175 my $i_organism_id = $organism_id ;
176 if ($common_name) {
177 print "Finding organism_id for common_name $common_name (" . $names_hash{$common_name} . ")\n";
178 ($i_organism_id) = $schema->resultset("Organism::Organism")->search( {
179 species => $names_hash{$common_name} } )->first->organism_id;
181 print "creating new stock for individual $iname\n";
182 my $stock_individual = $schema->resultset("Stock::Stock")->find(
183 { name => $iname,
184 uniquename => $iname,
185 type_id => $accession_cvterm->cvterm_id(),
186 } );
187 if ($stock_individual) {
188 print "stock exists! Updating description ... \n";
189 $stock_individual->description($idesc);
190 $stock_individual->update;
191 } else {
192 $stock_individual = $schema->resultset("Stock::Stock")->create(
193 { organism_id => $i_organism_id,
194 name => $iname,
195 uniquename => $iname,
196 description => $idesc,
197 type_id => $accession_cvterm->cvterm_id(),
198 is_obsolete => $i_obsolete,
199 } );
201 #load the new stock_id in the individual table
202 $ind->set_stock_id( $stock_individual->stock_id );
203 $ind->store();
205 #load the population relationship
206 $stock_population->find_or_create_related('stock_relationship_objects', {
207 type_id => $member_of->cvterm_id(),
208 subject_id => $stock_individual->stock_id(),
209 } );
210 #load the stock properties
211 print "loading stock properties\n";
212 $stock_individual->create_stockprops({ sp_person_id => $iperson_id},
213 { autocreate => 1 , cv_name => 'local'});
214 $stock_individual->create_stockprops({ create_date => $icreate_date},
215 { autocreate => 1, cv_name => 'local' }) if $icreate_date;
216 $stock_individual->create_stockprops({ modified_date => $imodified_date},
217 { autocreate => 1 , cv_name => 'local'}) if $imodified_date;
218 $stock_individual->create_stockprops({ common_name => $common_name},
219 { autocreate => 1 , cv_name => 'stock_property' , db_name => 'SGN' }) if $common_name;
221 $stock_individual->create_stockprops({ updated_by => $updated_by},
222 { autocreate => 1 , cv_name => 'local'}) if $updated_by;
224 #each individual has dbxrefs, alleles, images, phenotypes
225 print "Storing stock_dbxrefs\n";
226 my @i_dbxrefs = $ind->get_dbxrefs;
227 foreach my $d (@i_dbxrefs) {
228 #store the stock_dbxref
229 my $stock_dbxref = $stock_individual->find_or_create_related('stock_dbxrefs' , { dbxref_id => $d->get_dbxref_id, } );
230 #get the metadata from individual_dbxref
231 my $idx = $ind->get_individual_dbxref($d);
232 my $idx_person_id = $idx->get_sp_person_id;
233 my $idx_create = $idx->get_create_date;
234 my $idx_modified = $idx->get_modification_date;
235 my $idx_obsolete = $idx->get_obsolete;
237 #each dbxref may have individual_dbxref_evidence
238 #(in case of ontology annotations)
239 my @idxe = $idx->get_object_dbxref_evidence;
240 if (!@idxe) { # if this is not an ontology annotation
241 #store the metadata in stock_dbxrefprop
242 print "Storing stock_dbxrefprops\n";
243 $stock_dbxref->create_stock_dbxrefprops( { sp_person_id => $idx_person_id},
244 { autocreate => 1 , cv_name => 'local'}) if $idx_person_id;
245 $stock_dbxref->create_stock_dbxrefprops({ create_date => $idx_create},
246 { autocreate => 1, cv_name => 'local' }) if $idx_create;
248 $stock_dbxref->create_stock_dbxrefprops({ modified_date => $idx_modified },
249 { autocreate => 1 , cv_name => 'local'}) if $idx_modified;
250 $stock_dbxref->create_stock_dbxrefprops({ obsolete => '1'},
251 { autocreate => 1 , cv_name => 'local'}) if $idx_obsolete eq 't' ;
253 else { # evidence matadata overrides individual_dbxref metadata
254 foreach my $ev (@idxe) {
255 print "Storing stock_dbxrefprops for ontology evidence codes \n";
257 my $e_person_id = $ev->get_sp_person_id || $idx_person_id;
258 $stock_dbxref->create_stock_dbxrefprops( { sp_person_id => $e_person_id},
259 { autocreate => 1 , cv_name => 'local'}) if $e_person_id;
260 my $e_create = $ev->get_create_date || $idx_create;
261 $stock_dbxref->create_stock_dbxrefprops({ create_date => $e_create},
262 { autocreate => 1, cv_name => 'local' }) if $e_create;
263 my $e_modified = $ev->get_modification_date || $idx_modified;
264 $stock_dbxref->create_stock_dbxrefprops({ modified_date => $e_modified },
265 { autocreate => 1 , cv_name => 'local'}) if $e_modified;
266 my $e_obsolete = $ev->get_obsolete;
267 $stock_dbxref->create_stock_dbxrefprops({ obsolete => '1'},
268 { autocreate => 1 , cv_name => 'local'}) if $e_obsolete eq 't';
269 my $e_rel = CXGN::Chado::Dbxref->new($self->dbh, $ev->get_relationship_type_id)->get_cvterm_name;
270 $stock_dbxref->create_stock_dbxrefprops({ $e_rel => '1'},
271 { cv_name => 'relationship'}) if $e_rel;
272 my $e_code = CXGN::Chado::Dbxref->new($self->dbh, $ev->get_evidence_code_id)->get_cvterm_name;
273 $stock_dbxref->create_stock_dbxrefprops({ $e_code => '1'},
274 { cv_name => 'evidence_code'}) if $e_code;
275 my $e_desc = CXGN::Chado::Dbxref->new($self->dbh, $ev->get_evidence_description_id)->get_cvterm_name;
276 $stock_dbxref->create_stock_dbxrefprops({ $e_desc => '1'},
277 { cv_name => 'evidence_code'}) if $e_desc;
278 my $e_with = CXGN::Chado::Dbxref->new($self->dbh, $ev->get_evidence_with)->get_dbxref_id;
279 $stock_dbxref->create_stock_dbxrefprops({ evidence_with => $e_with},
280 { autocreate =>1 , cv_name => 'local'}) if $e_with;
281 my $e_ref = CXGN::Chado::Dbxref->new($self->dbh, $ev->get_reference_id)->get_dbxref_id;
282 $stock_dbxref->create_stock_dbxrefprops({ reference => $e_ref},
283 { autocreate =>1 , cv_name => 'local'}) if $e_ref;
287 #individual aliases
288 my @synonyms = $ind->get_aliases;
289 print "Storing synonyms\n";
290 foreach my $s (@synonyms) {
291 $stock_individual->create_stockprops({ synonym => $s},
292 {autocreate => 1,
293 cv_name => 'null'
296 #########find linked alleles
297 my @alleles = $ind->get_alleles;
298 foreach my $a (@alleles) {
299 print "Adding allele ... \n";
300 my $a_id = $a->get_allele_id;
301 $stock_individual->create_stockprops({ 'sgn allele_id' => $a_id},
302 {autocreate => 1,
303 cv_name => 'local'
306 ##find linked images
307 my @images = $ind->get_image_ids;
308 foreach my $i_id (@images) {
309 print "Adding image $i_id\n";
310 $stock_individual->create_stockprops({ 'sgn image_id' => $i_id},
311 {autocreate => 1,
312 cv_name => 'local'
315 ###########################################
316 ## find linked phenotypes
318 # create a new nd_experiment and store the phenotype in the natural div module
319 if ($ind->has_phenotype) {
320 print "Found phenotypes! Storing in Natural Diversity module \n";
321 # get the project
322 my $project_name = $population->get_name;
323 my @pop_owners = $population->get_owners;
324 my $project_desc = "Phenotypes recorded for population $project_name";
325 if ( defined($pop_owners[0]) ) {
326 print "owner is " . $pop_owners[0] . "\n" ;
327 my $owner = CXGN::People::Person->new($self->dbh, $pop_owners[0]);
328 $project_desc .= ' by ' . $owner->get_first_name . ' ' . $owner->get_last_name ;
330 my $project = $schema->resultset("Project::Project")->find_or_create( {
331 name => $project_name,
332 description => $project_desc , } );
333 # get the geolocation
334 my $geo_description = 'unknown';
335 my $geolocation = $schema->resultset("NaturalDiversity::NdGeolocation")->find_or_create( {
336 description => $geo_description , } );
338 # find the cvterm for a phenotyping experiment
339 my $pheno_cvterm = $schema->resultset('Cv::Cvterm')->create_with(
340 { name => 'phenotyping experiment',
341 cv => 'experiment type',
342 db => 'null',
343 dbxref => 'phenotyping experiment',
345 ###store a new nd_experiment. Each phenotyped population is going to get a new experiment_id
346 my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->create( {
347 nd_geolocation_id => $geolocation->nd_geolocation_id(),
348 type_id => $pheno_cvterm->cvterm_id(), } );
350 #link to the project
351 $experiment->find_or_create_related('nd_experiment_projects', {
352 project_id => $project->project_id } );
354 #link to the stock
355 $experiment->find_or_create_related('nd_experiment_stocks' , {
356 stock_id => $stock_individual->stock_id(),
357 type_id => $pheno_cvterm->cvterm_id(),
360 my $pq = "SELECT sp_person_id, phenotype_id FROM public.phenotype WHERE individual_id = ?";
361 my $p_sth = $self->dbh->prepare($pq);
362 $p_sth->execute($ind->get_individual_id);
363 while ( my ($p_person_id, $phenotype_id ) = $p_sth->fetchrow_array ) {
365 #create experimentprop for the person_id
366 if ($p_person_id) {
367 $experiment->find_or_create_related('nd_experimentprops', {
368 value => $p_person_id,
369 type_id => $person_id_cvterm->cvterm_id,
372 my ($phenotype) = $schema->resultset("Phenotype::Phenotype")->find( {
373 phenotype_id => $phenotype_id, });
375 if ( $phenotype->find_related("nd_experiment_phenotypes", {} ) ) {
376 warn "This experiment has been stored before (phenotype_id = $phenotype_id) ! Skipping! \n";
377 next();
379 ########################################################
380 # link the phenotype with the experiment
381 my $nd_experiment_phenotype = $experiment->find_or_create_related('nd_experiment_phenotypes', { phenotype_id => $phenotype_id } );
382 print "Individual stock " . $stock_individual->name . " has phenotype " . $phenotype->uniquename . " linked to experiment " . $experiment->nd_experiment_id . "\n";
385 if ($ind->has_genotype) {
386 #store genotyping experiment
387 # get data from phenome.genotype and genotype_experiment
388 print "Found genotypes! Storing new experiment and linking to phenome.genotype_region\n";
389 # get the project
390 my $project_name = $population->get_name;
391 my @pop_owners = $population->get_owners;
392 my $project_desc = "genotypes recorded for population $project_name";
393 if ( defined($pop_owners[0]) ) {
394 print "owner is " . $pop_owners[0] . "\n" ;
395 my $owner = CXGN::People::Person->new($self->dbh, $pop_owners[0]);
396 $project_desc .= ' by ' . $owner->get_first_name . ' ' . $owner->get_last_name ;
398 my $project = $schema->resultset("Project::Project")->find_or_create( {
399 name => $project_name,
400 description => $project_desc , } );
401 # get the geolocation
402 my $geo_description = 'unknown';
403 my $geolocation = $schema->resultset("NaturalDiversity::NdGeolocation")->find_or_create( {
404 description => $geo_description , } );
406 # find the cvterm for a phenotyping experiment
407 my $geno_cvterm = $schema->resultset('Cv::Cvterm')->create_with(
408 { name => 'genotyping experiment',
409 cv => 'experiment type',
410 db => 'null',
411 dbxref => 'genotyping experiment',
413 ###store a new nd_experiment. Each genotyped population is going to get a new experiment_id
414 my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->create( {
415 nd_geolocation_id => $geolocation->nd_geolocation_id(),
416 type_id => $geno_cvterm->cvterm_id(), } );
418 #link to the project
419 $experiment->find_or_create_related('nd_experiment_projects', {
420 project_id => $project->project_id } );
422 #link to the stock
423 $experiment->find_or_create_related('nd_experiment_stocks' , {
424 stock_id => $stock_individual->stock_id(),
425 type_id => $geno_cvterm->cvterm_id(),
427 ##########################
428 #######################
429 $experiment->find_or_create_related('nd_experimentprops', {
430 value => $pop_owners[0],
431 type_id => $person_id_cvterm->cvterm_id,
432 }) if $pop_owners[0];
434 ########################################################
435 # link the genotype_region with the experiment
436 my @genotypes = $ind->get_genotypes();
437 foreach my $g (@genotypes) {
438 my @regions = $g->get_genotype_regions;
439 foreach my $gr (@regions) {
440 my $gr_id = $gr->get_genotype_region_id;
441 $experiment->create_nd_experimentprops(
442 { 'sgn genotype_region_id' => $gr_id },
443 { autocreate => 1 , cv_name => 'local' } );
444 print "Individual stock " . $stock_individual->name . " has genotype region ($gr_id) linked to experiment " . $experiment->nd_experiment_id . "\n";
450 print "You're done!\n";
451 if ($self->trial) {
452 print "Trail mode! Rolling back transaction\n\n";
453 $schema->txn_rollback;
455 return 1;
458 try {
459 $schema->txn_do($coderef);
460 print "Data committed! \n";
461 } catch {
462 die "Load failed! " . $_ . "\n" ;
466 return 1;