10 mx-run ThisPackageName [options] -H hostname -D dbname -u username [-F]
12 this is a subclass of L<CXGN::Metadata::Dbpatch>
13 see the perldoc of parent class for more details.
17 This is a patch for loading data in phenome.population and phenome.individual in the stock module, which will eventually replace these 2 tables.
19 This subclass uses L<Moose>. The parent class uses L<MooseX::Runnable>
23 Naama Menda<nm249@cornell.edu>
25 =head1 COPYRIGHT & LICENSE
27 Copyright 2010 Boyce Thompson Institute for Plant Research
29 This program is free software; you can redistribute it and/or modify
30 it under the same terms as Perl itself.
35 package LoadPhenomeInStock
;
39 extends
'CXGN::Metadata::Dbpatch';
41 use Bio
::Chado
::Schema
;
43 use CXGN
::Phenome
::Population
;
44 use CXGN
::Phenome
::Individual
;
45 use CXGN
::Chado
::Dbxref
;
46 use CXGN
::People
::Person
;
50 my $name = __PACKAGE__
;
51 print "dbpatch name is : '" . $name . "'\n\n";
52 my $description = 'Loading the phenome individual data into the stock module';
53 my @previous_requested_patches = ('CopyAccessionToStock', 'AddStockLinks'); #ADD HERE
56 $self->description($description);
57 $self->prereq(\
@previous_requested_patches);
64 print STDOUT
"Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
66 print STDOUT
"\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
68 my $schema = Bio
::Chado
::Schema
->connect( sub { $self->dbh->clone } , { on_connect_do
=> ['SET search_path TO public;'], autocommit
=> 1 });
70 my %names_hash = ( 'Tomato' => 'Solanum lycopersicum' ,
71 'Potato' => 'Solanum tuberosum',
72 'Eggplant' => 'Solanum melongena',
73 'Pepper' => 'Capsicum annuum',
74 'Coffee' => 'Coffea arabica',
75 'Petunia' => 'Petunia x hybrida',
78 print "Finding/creating cvterm for population\n";
79 my $population_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
80 { name
=> 'population',
83 dbxref
=> 'population',
86 print "Finding/creating cvterm for accession (individual)\n";
87 my $accession_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
88 { name
=> 'accession',
91 dbxref
=> 'accession',
93 print "Finding/creating cvtem for stock relationship 'is_member_of' \n";
95 my $member_of = $schema->resultset("Cv::Cvterm")->create_with(
96 { name
=> 'is_member_of',
97 cv
=> 'stock_relationship',
99 dbxref
=> 'is_member_of',
101 #find the cvterm for sgn person_id
102 my $person_id_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
103 { name
=> 'sp_person_id',
106 dbxref
=> 'autocreated:sp_person_id',
110 $self->dbh->do('set search_path to phenome, public');
111 # first load the populations
112 my $q = "SELECT population_id FROM phenome.population";
113 my $sth = $self->dbh->prepare($q);
115 while (my $pop_id = $sth->fetchrow_array() ) {
116 my $population = CXGN
::Phenome
::Population
->new($self->dbh, $pop_id);
117 my $name = $population->get_name;
118 my $desc = $population->get_description;
119 my $sp_person_id = $population->get_sp_person_id;
120 my $create_date = $population->get_create_date;
122 my $organism = $schema->resultset("Organism::Organism")->find_or_create( {
123 species
=> 'any' } );
124 my $organism_id = $organism->organism_id();
125 # background_accession_id
126 if ($population->get_background_accession_id) {
127 my $aq = "SELECT chado_organism_id FROM sgn.accession where accession_id = ? ";
128 my $a_sth = $self->dbh->prepare($aq);
129 $a_sth->execute($population->get_background_accession_id);
130 ($organism_id) = $a_sth->fetchrow_array() ;
133 print "creating new stock for population $name\n";
134 my $stock_population = $schema->resultset("Stock::Stock")->find_or_create(
135 { organism_id
=> $organism_id,
138 description
=> $desc,
139 type_id
=> $population_cvterm->cvterm_id(),
141 #now load the stock in the population table
142 $population->set_stock_id($stock_population->stock_id);
143 $population->store();
145 #store the properties for the population
146 $stock_population->create_stockprops({ sp_person_id
=> $sp_person_id},
147 { autocreate
=> 1 , cv_name
=> 'local'});
148 $stock_population->create_stockprops({ create_date
=> $create_date},
149 { autocreate
=> 1 , cv_name
=> 'local' });
150 print "Fetching population dbxrefs \n";
151 my @pop_dbxrefs = $population->get_all_population_dbxrefs;
152 foreach my $pdxref (@pop_dbxrefs) {
153 my $pd = $population->get_population_dbxref($pdxref);
154 my $dbxref_id = $pd->get_dbxref_id;
155 my $d_person_id = $pd->get_sp_person_id;
156 my $d_create_date = $pd->get_create_date;
157 my $d_obsolete = $pd->get_obsolete;
158 my $stock_dbxref = $stock_population->find_or_create_related('stock_dbxrefs', { dbxref_id
=> $dbxref_id, } );
159 $stock_dbxref->create_stock_dbxrefprops( { sp_person_id
=> $d_person_id } , { autocreate
=> 1 , cv_name
=> 'local' } );
160 $stock_dbxref->create_stock_dbxrefprops( { create_date
=> $d_create_date } , { autocreate
=> 1 , cv_name
=> 'local' } );
161 $stock_dbxref->create_stock_dbxrefprops( { obsolete
=> $d_obsolete } , { autocreate
=> 1 , cv_name
=> 'local' } );
164 print "Loading individuals: \n";
165 my @individuals = $population->get_individuals();
166 foreach my $ind (@individuals) {
167 my $iname = $ind->get_name;
168 my $idesc = $ind->get_description;
169 my $iperson_id = $ind->get_sp_person_id;
170 my $icreate_date = $ind->get_create_date;
171 my $imodified_date = $ind->get_modification_date;
172 my $i_obsolete = $ind->get_obsolete;
173 my $updated_by = $ind->get_updated_by;
174 my $common_name = $ind->get_common_name;
175 my $i_organism_id = $organism_id ;
177 print "Finding organism_id for common_name $common_name (" . $names_hash{$common_name} . ")\n";
178 ($i_organism_id) = $schema->resultset("Organism::Organism")->search( {
179 species
=> $names_hash{$common_name} } )->first->organism_id;
181 print "creating new stock for individual $iname\n";
182 my $stock_individual = $schema->resultset("Stock::Stock")->find(
184 uniquename
=> $iname,
185 type_id
=> $accession_cvterm->cvterm_id(),
187 if ($stock_individual) {
188 print "stock exists! Updating description ... \n";
189 $stock_individual->description($idesc);
190 $stock_individual->update;
192 $stock_individual = $schema->resultset("Stock::Stock")->create(
193 { organism_id
=> $i_organism_id,
195 uniquename
=> $iname,
196 description
=> $idesc,
197 type_id
=> $accession_cvterm->cvterm_id(),
198 is_obsolete
=> $i_obsolete,
201 #load the new stock_id in the individual table
202 $ind->set_stock_id( $stock_individual->stock_id );
205 #load the population relationship
206 $stock_population->find_or_create_related('stock_relationship_objects', {
207 type_id
=> $member_of->cvterm_id(),
208 subject_id
=> $stock_individual->stock_id(),
210 #load the stock properties
211 print "loading stock properties\n";
212 $stock_individual->create_stockprops({ sp_person_id
=> $iperson_id},
213 { autocreate
=> 1 , cv_name
=> 'local'});
214 $stock_individual->create_stockprops({ create_date
=> $icreate_date},
215 { autocreate
=> 1, cv_name
=> 'local' }) if $icreate_date;
216 $stock_individual->create_stockprops({ modified_date
=> $imodified_date},
217 { autocreate
=> 1 , cv_name
=> 'local'}) if $imodified_date;
218 $stock_individual->create_stockprops({ common_name
=> $common_name},
219 { autocreate
=> 1 , cv_name
=> 'stock_property' , db_name
=> 'SGN' }) if $common_name;
221 $stock_individual->create_stockprops({ updated_by
=> $updated_by},
222 { autocreate
=> 1 , cv_name
=> 'local'}) if $updated_by;
224 #each individual has dbxrefs, alleles, images, phenotypes
225 print "Storing stock_dbxrefs\n";
226 my @i_dbxrefs = $ind->get_dbxrefs;
227 foreach my $d (@i_dbxrefs) {
228 #store the stock_dbxref
229 my $stock_dbxref = $stock_individual->find_or_create_related('stock_dbxrefs' , { dbxref_id
=> $d->get_dbxref_id, } );
230 #get the metadata from individual_dbxref
231 my $idx = $ind->get_individual_dbxref($d);
232 my $idx_person_id = $idx->get_sp_person_id;
233 my $idx_create = $idx->get_create_date;
234 my $idx_modified = $idx->get_modification_date;
235 my $idx_obsolete = $idx->get_obsolete;
237 #each dbxref may have individual_dbxref_evidence
238 #(in case of ontology annotations)
239 my @idxe = $idx->get_object_dbxref_evidence;
240 if (!@idxe) { # if this is not an ontology annotation
241 #store the metadata in stock_dbxrefprop
242 print "Storing stock_dbxrefprops\n";
243 $stock_dbxref->create_stock_dbxrefprops( { sp_person_id
=> $idx_person_id},
244 { autocreate
=> 1 , cv_name
=> 'local'}) if $idx_person_id;
245 $stock_dbxref->create_stock_dbxrefprops({ create_date
=> $idx_create},
246 { autocreate
=> 1, cv_name
=> 'local' }) if $idx_create;
248 $stock_dbxref->create_stock_dbxrefprops({ modified_date
=> $idx_modified },
249 { autocreate
=> 1 , cv_name
=> 'local'}) if $idx_modified;
250 $stock_dbxref->create_stock_dbxrefprops({ obsolete
=> '1'},
251 { autocreate
=> 1 , cv_name
=> 'local'}) if $idx_obsolete eq 't' ;
253 else { # evidence matadata overrides individual_dbxref metadata
254 foreach my $ev (@idxe) {
255 print "Storing stock_dbxrefprops for ontology evidence codes \n";
257 my $e_person_id = $ev->get_sp_person_id || $idx_person_id;
258 $stock_dbxref->create_stock_dbxrefprops( { sp_person_id
=> $e_person_id},
259 { autocreate
=> 1 , cv_name
=> 'local'}) if $e_person_id;
260 my $e_create = $ev->get_create_date || $idx_create;
261 $stock_dbxref->create_stock_dbxrefprops({ create_date
=> $e_create},
262 { autocreate
=> 1, cv_name
=> 'local' }) if $e_create;
263 my $e_modified = $ev->get_modification_date || $idx_modified;
264 $stock_dbxref->create_stock_dbxrefprops({ modified_date
=> $e_modified },
265 { autocreate
=> 1 , cv_name
=> 'local'}) if $e_modified;
266 my $e_obsolete = $ev->get_obsolete;
267 $stock_dbxref->create_stock_dbxrefprops({ obsolete
=> '1'},
268 { autocreate
=> 1 , cv_name
=> 'local'}) if $e_obsolete eq 't';
269 my $e_rel = CXGN
::Chado
::Dbxref
->new($self->dbh, $ev->get_relationship_type_id)->get_cvterm_name;
270 $stock_dbxref->create_stock_dbxrefprops({ $e_rel => '1'},
271 { cv_name
=> 'relationship'}) if $e_rel;
272 my $e_code = CXGN
::Chado
::Dbxref
->new($self->dbh, $ev->get_evidence_code_id)->get_cvterm_name;
273 $stock_dbxref->create_stock_dbxrefprops({ $e_code => '1'},
274 { cv_name
=> 'evidence_code'}) if $e_code;
275 my $e_desc = CXGN
::Chado
::Dbxref
->new($self->dbh, $ev->get_evidence_description_id)->get_cvterm_name;
276 $stock_dbxref->create_stock_dbxrefprops({ $e_desc => '1'},
277 { cv_name
=> 'evidence_code'}) if $e_desc;
278 my $e_with = CXGN
::Chado
::Dbxref
->new($self->dbh, $ev->get_evidence_with)->get_dbxref_id;
279 $stock_dbxref->create_stock_dbxrefprops({ evidence_with
=> $e_with},
280 { autocreate
=>1 , cv_name
=> 'local'}) if $e_with;
281 my $e_ref = CXGN
::Chado
::Dbxref
->new($self->dbh, $ev->get_reference_id)->get_dbxref_id;
282 $stock_dbxref->create_stock_dbxrefprops({ reference
=> $e_ref},
283 { autocreate
=>1 , cv_name
=> 'local'}) if $e_ref;
288 my @synonyms = $ind->get_aliases;
289 print "Storing synonyms\n";
290 foreach my $s (@synonyms) {
291 $stock_individual->create_stockprops({ stock_synonym
=> $s},
293 #cv_name => 'null' defaults to stock_property
296 #########find linked alleles
297 my @alleles = $ind->get_alleles;
298 foreach my $a (@alleles) {
299 print "Adding allele ... \n";
300 my $a_id = $a->get_allele_id;
301 $stock_individual->create_stockprops({ 'sgn allele_id' => $a_id},
307 my @images = $ind->get_image_ids;
308 foreach my $i_id (@images) {
309 print "Adding image $i_id\n";
310 $stock_individual->create_stockprops({ 'sgn image_id' => $i_id},
315 ###########################################
316 ## find linked phenotypes
318 # create a new nd_experiment and store the phenotype in the natural div module
319 if ($ind->has_phenotype) {
320 print "Found phenotypes! Storing in Natural Diversity module \n";
322 my $project_name = $population->get_name;
323 my @pop_owners = $population->get_owners;
324 my $project_desc = "Phenotypes recorded for population $project_name";
325 if ( defined($pop_owners[0]) ) {
326 print "owner is " . $pop_owners[0] . "\n" ;
327 my $owner = CXGN
::People
::Person
->new($self->dbh, $pop_owners[0]);
328 $project_desc .= ' by ' . $owner->get_first_name . ' ' . $owner->get_last_name ;
330 my $project = $schema->resultset("Project::Project")->find_or_create( {
331 name
=> $project_name,
332 description
=> $project_desc , } );
333 # get the geolocation
334 my $geo_description = 'unknown';
335 my $geolocation = $schema->resultset("NaturalDiversity::NdGeolocation")->find_or_create( {
336 description
=> $geo_description , } );
338 # find the cvterm for a phenotyping experiment
339 my $pheno_cvterm = $schema->resultset('Cv::Cvterm')->create_with(
340 { name
=> 'phenotyping experiment',
341 cv
=> 'experiment_type',
343 dbxref
=> 'phenotyping experiment',
345 ###store a new nd_experiment. Each phenotyped population is going to get a new experiment_id
346 my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->create( {
347 nd_geolocation_id
=> $geolocation->nd_geolocation_id(),
348 type_id
=> $pheno_cvterm->cvterm_id(), } );
351 $experiment->find_or_create_related('nd_experiment_projects', {
352 project_id
=> $project->project_id } );
355 $experiment->find_or_create_related('nd_experiment_stocks' , {
356 stock_id
=> $stock_individual->stock_id(),
357 type_id
=> $pheno_cvterm->cvterm_id(),
360 my $pq = "SELECT sp_person_id, phenotype_id FROM public.phenotype WHERE individual_id = ?";
361 my $p_sth = $self->dbh->prepare($pq);
362 $p_sth->execute($ind->get_individual_id);
363 while ( my ($p_person_id, $phenotype_id ) = $p_sth->fetchrow_array ) {
365 #create experimentprop for the person_id
367 $experiment->find_or_create_related('nd_experimentprops', {
368 value
=> $p_person_id,
369 type_id
=> $person_id_cvterm->cvterm_id,
372 my ($phenotype) = $schema->resultset("Phenotype::Phenotype")->find( {
373 phenotype_id
=> $phenotype_id, });
375 if ( $phenotype->find_related("nd_experiment_phenotypes", {} ) ) {
376 warn "This experiment has been stored before (phenotype_id = $phenotype_id) ! Skipping! \n";
379 ########################################################
380 # link the phenotype with the experiment
381 my $nd_experiment_phenotype = $experiment->find_or_create_related('nd_experiment_phenotypes', { phenotype_id
=> $phenotype_id } );
382 print "Individual stock " . $stock_individual->name . " has phenotype " . $phenotype->uniquename . " linked to experiment " . $experiment->nd_experiment_id . "\n";
385 if ($ind->has_genotype) {
386 #store genotyping experiment
387 # get data from phenome.genotype and genotype_experiment
388 print "Found genotypes! Storing new experiment and linking to phenome.genotype_region\n";
390 my $project_name = $population->get_name;
391 my @pop_owners = $population->get_owners;
392 my $project_desc = "genotypes recorded for population $project_name";
393 if ( defined($pop_owners[0]) ) {
394 print "owner is " . $pop_owners[0] . "\n" ;
395 my $owner = CXGN
::People
::Person
->new($self->dbh, $pop_owners[0]);
396 $project_desc .= ' by ' . $owner->get_first_name . ' ' . $owner->get_last_name ;
398 my $project = $schema->resultset("Project::Project")->find_or_create( {
399 name
=> $project_name,
400 description
=> $project_desc , } );
401 # get the geolocation
402 my $geo_description = 'unknown';
403 my $geolocation = $schema->resultset("NaturalDiversity::NdGeolocation")->find_or_create( {
404 description
=> $geo_description , } );
406 # find the cvterm for a phenotyping experiment
407 my $geno_cvterm = $schema->resultset('Cv::Cvterm')->create_with(
408 { name
=> 'genotyping experiment',
409 cv
=> 'experiment_type',
411 dbxref
=> 'genotyping experiment',
413 ###store a new nd_experiment. Each genotyped population is going to get a new experiment_id
414 my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->create( {
415 nd_geolocation_id
=> $geolocation->nd_geolocation_id(),
416 type_id
=> $geno_cvterm->cvterm_id(), } );
419 $experiment->find_or_create_related('nd_experiment_projects', {
420 project_id
=> $project->project_id } );
423 $experiment->find_or_create_related('nd_experiment_stocks' , {
424 stock_id
=> $stock_individual->stock_id(),
425 type_id
=> $geno_cvterm->cvterm_id(),
427 ##########################
428 #######################
429 $experiment->find_or_create_related('nd_experimentprops', {
430 value
=> $pop_owners[0],
431 type_id
=> $person_id_cvterm->cvterm_id,
432 }) if $pop_owners[0];
434 ########################################################
435 # link the genotype_region with the experiment
436 my @genotypes = $ind->get_genotypes();
437 foreach my $g (@genotypes) {
438 my @regions = $g->get_genotype_regions;
439 foreach my $gr (@regions) {
440 my $gr_id = $gr->get_genotype_region_id;
441 $experiment->create_nd_experimentprops(
442 { 'sgn genotype_region_id' => $gr_id },
443 { autocreate
=> 1 , cv_name
=> 'local' } );
444 print "Individual stock " . $stock_individual->name . " has genotype region ($gr_id) linked to experiment " . $experiment->nd_experiment_id . "\n";
450 print "You're done!\n";
452 print "Trail mode! Rolling back transaction\n\n";
453 $schema->txn_rollback;
459 $schema->txn_do($coderef);
460 print "Data committed! \n";
462 die "Load failed! " . $_ . "\n" ;