8 $this_script.pl -H [dbhost] -D [dbname] [-t]
10 =head1 COMMAND-LINE OPTIONS
16 -t Test run . Rolling back at the end.
25 Naama Menda (nm249@cornell.edu)
35 use CXGN
::Tools
::File
::Spreadsheet
;
36 use CXGN
::People
::Person
;
38 use Bio
::Chado
::Schema
;
39 use CXGN
::DB
::InsertDBH
;
44 our ($opt_H, $opt_D, $opt_i, $opt_t, $opt_u);
52 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
54 dbargs
=> {AutoCommit
=> 0,
58 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } );
59 $dbh->do('SET search_path TO public');
61 #getting the last database ids for resetting at the end in case of rolling back
63 my $last_nd_experiment_id = $schema->resultset('NaturalDiversity::NdExperiment')->get_column('nd_experiment_id')->max;
64 my $last_cvterm_id = $schema->resultset('Cv::Cvterm')->get_column('cvterm_id')->max;
66 my $last_nd_experiment_project_id = $schema->resultset('NaturalDiversity::NdExperimentProject')->get_column('nd_experiment_project_id')->max;
67 my $last_nd_experiment_stock_id = $schema->resultset('NaturalDiversity::NdExperimentStock')->get_column('nd_experiment_stock_id')->max;
68 my $last_nd_experiment_phenotype_id = $schema->resultset('NaturalDiversity::NdExperimentPhenotype')->get_column('nd_experiment_phenotype_id')->max;
69 my $last_phenotype_id = $schema->resultset('Phenotype::Phenotype')->get_column('phenotype_id')->max;
70 my $last_stock_id = $schema->resultset('Stock::Stock')->get_column('stock_id')->max;
71 my $last_stock_relationship_id = $schema->resultset('Stock::StockRelationship')->get_column('stock_relationship_id')->max;
72 my $last_project_id = $schema->resultset('Project::Project')->get_column('project_id')->max;
73 my $last_nd_geolocation_id = $schema->resultset('NaturalDiversity::NdGeolocation')->get_column('nd_geolocation_id')->max;
74 my $last_geoprop_id = $schema->resultset('NaturalDiversity::NdGeolocationprop')->get_column('nd_geolocationprop_id')->max;
75 my $last_projectprop_id = $schema->resultset('Project::Projectprop')->get_column('projectprop_id')->max;
78 'nd_experiment_nd_experiment_id_seq' => $last_nd_experiment_id,
79 'cvterm_cvterm_id_seq' => $last_cvterm_id,
80 'nd_experiment_project_nd_experiment_project_id_seq' => $last_nd_experiment_project_id,
81 'nd_experiment_stock_nd_experiment_stock_id_seq' => $last_nd_experiment_stock_id,
82 'nd_experiment_phenotype_nd_experiment_phenotype_id_seq' => $last_nd_experiment_phenotype_id,
83 'phenotype_phenotype_id_seq' => $last_phenotype_id,
84 'stock_stock_id_seq' => $last_stock_id,
85 'stock_relationship_stock_relationship_id_seq' => $last_stock_relationship_id,
86 'project_project_id_seq' => $last_project_id,
87 'nd_geolocation_nd_geolocation_id_seq' => $last_nd_geolocation_id,
88 'nd_geolocationprop_nd_geolocationprop_id_seq' => $last_geoprop_id,
89 'projectprop_projectprop_id_seq' => $last_projectprop_id,
92 # find the cvterm for a phenotyping experiment
93 my $pheno_cvterm = $schema->resultset('Cv::Cvterm')->create_with(
94 { name
=> 'phenotyping experiment',
95 cv
=> 'experiment type',
97 dbxref
=> 'phenotyping experiment',
100 my $username = $opt_u || 'barley_test' ; #'cassavabase';
101 my $sp_person_id= CXGN
::People
::Person
->get_person_by_username($dbh, $username);
103 die "User $username for cassavabase must be pre-loaded in the database! \n" if !$sp_person_id ;
106 my $accession_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
107 { name
=> 'accession',
110 dbxref
=> 'accession',
112 my $plot_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
118 my $plot_of = $schema->resultset("Cv::Cvterm")->create_with(
120 cv
=> 'stock relationship',
124 my $population_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
125 { name
=> 'population',
128 dbxref
=> 'population',
130 my $member_of = $schema->resultset("Cv::Cvterm")->create_with(
131 { name
=> 'member_of',
132 cv
=> 'stock relationship',
134 dbxref
=> 'member_of',
136 ########################
138 #new spreadsheet, don't skip first column
139 my $spreadsheet=CXGN
::Tools
::File
::Spreadsheet
->new($file);
141 # sp_term scale_name value name_string
142 my $scale_cv_name= "breeders scale";
143 # for this Unit Ontology has to be loaded!
144 my $unit_cv = $schema->resultset("Cv::Cv")->find(
145 { name
=> 'unit.ontology' } );
148 my $organism = $schema->resultset("Organism::Organism")->find_or_create(
151 species
=> 'Hordeum vulgare'
153 my $organism_id = $organism->organism_id();
154 my $location = 'test';
156 my $geolocation = $schema->resultset("NaturalDiversity::NdGeolocation")->find_or_create(
158 description
=> $location,
160 my $project = $schema->resultset("Project::Project")->find_or_create(
162 name
=> "Barley training set data",
163 description
=> "test barley data for genomic selection",
165 my @rows = $spreadsheet->row_labels();
166 my @columns = $spreadsheet->column_labels();
168 my $population_name = 'Barley GS training population';
169 my $population = $schema->resultset("Stock::Stock")->find_or_create(
170 { organism_id
=> $organism_id,
171 name
=> $population_name,
172 uniquename
=> $population_name,
173 type_id
=> $population_cvterm->cvterm_id,
176 foreach my $name (@rows ) {
177 my $stock_name = $name;
178 print "Looking at stock $name \n";
179 #store the plant accession in the plot table
180 my $parent_stock = $schema->resultset("Stock::Stock")->find_or_create(
181 { organism_id
=> $organism_id,
183 uniquename
=> $stock_name,
184 type_id
=> $accession_cvterm->cvterm_id,
186 ##and create the stock_relationship with the population
187 $population->find_or_create_related('stock_relationship_objects', {
188 type_id
=> $member_of->cvterm_id(),
189 subject_id
=> $parent_stock->stock_id(),
191 #store the plot in stock
192 my $plot_stock = $schema->resultset("Stock::Stock")->find_or_create(
193 { organism_id
=> $organism_id,
194 name
=> $stock_name . " plot",
195 uniquename
=> $stock_name . " plot",
196 type_id
=> $plot_cvterm->cvterm_id()
198 ##and create the stock_relationship with the accession
199 $parent_stock->find_or_create_related('stock_relationship_objects', {
200 type_id
=> $plot_of->cvterm_id(),
201 subject_id
=> $plot_stock->stock_id(),
203 #add the owner for this stock
204 #check first if it exists
205 my $owner_insert = "INSERT INTO phenome.stock_owner (sp_person_id, stock_id) VALUES (?,?)";
206 my $sth = $dbh->prepare($owner_insert);
207 my $check_query = "SELECT sp_person_id FROM phenome.stock_owner WHERE ( sp_person_id = ? AND stock_id = ? )";
208 my $person_ids = $dbh->selectcol_arrayref($check_query, undef, ($sp_person_id, $parent_stock->stock_id) );
210 $sth->execute($sp_person_id, $parent_stock->stock_id);
212 $person_ids = $dbh->selectcol_arrayref($check_query, undef, ($sp_person_id, $plot_stock->stock_id) );
214 $sth->execute($sp_person_id, $plot_stock->stock_id);
217 ###store a new nd_experiment. One experiment per stock
218 my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->create(
220 nd_geolocation_id
=> $geolocation->nd_geolocation_id(),
221 type_id
=> $pheno_cvterm->cvterm_id(),
224 $experiment->find_or_create_related('nd_experiment_projects', {
225 project_id
=> $project->project_id()
227 #link the experiment to the stock
228 $experiment->find_or_create_related('nd_experiment_stocks' , {
229 stock_id
=> $plot_stock->stock_id(),
230 type_id
=> $pheno_cvterm->cvterm_id(),
233 LABEL
: foreach my $label (@columns) {
234 my $value = $spreadsheet->value_at($name, $label);
235 next() if $label eq 'Entry' ; #$value !~ /^\d/;
236 print "Storing phenotypes for label $label! \n";
237 ($value, undef) = split (/\s/, $value) ;
238 #print "Value $value \n";
239 #my ($db_name, $co_accession) = split (/\:/ , $label);
240 #print STDERR "db_name = '$db_name' sp_accession = '$sp_accession'\n";
241 #next() if (!$co_accession);
242 my $cvterm = $schema->resultset('Cv::Cvterm')->create_with(
249 ##make sure this rule is valid for all CO terms in the data file!!
250 my $observable_term = $cvterm ;
252 ##observable_id is the same as cvalue_id for scale and qscale, and the parent term for unit.
253 my $phenotype = $cvterm->find_or_create_related(
254 "phenotype_cvalues", {
255 observable_id
=> $cvterm->cvterm_id,
257 uniquename
=> "Stock: " . $plot_stock->stock_id . ", Term: " . $cvterm->name() ,
259 print "Stored phenotype " . $phenotype->phenotype_id() . " (observable = " . $observable_term->name . ") with cvalue " . $cvterm->name . " value = $value \n\n" ;
260 #link the phenotype to nd_experiment
261 my $nd_experiment_phenotype = $experiment->find_or_create_related('nd_experiment_phenotypes', { phenotype_id
=> $phenotype->phenotype_id() } );
267 if ($@
) { print "An error occured! Rolling backl!\n\n $@ \n\n "; }
269 print "TEST RUN. Rolling back and reseting database sequences!!\n\n";
270 foreach my $value ( keys %seq ) {
271 my $maxval= $seq{$value} || 0;
272 if ($maxval) { $dbh->do("SELECT setval ('$value', $maxval, true)") ; }
273 else { $dbh->do("SELECT setval ('$value', 1, false)"); }
276 print "Transaction succeeded! Commiting ! \n\n";