cassava and barley loaders
[phenome.git] / bin / loading_scripts / load_barley_test_data.pl
blob3652f9f96bf7a57095fa4e65dd96f28dc3bd9fb4
2 =head1
4 load_cassava_data.pl
6 =head1 SYNOPSIS
8 $this_script.pl -H [dbhost] -D [dbname] [-t]
10 =head1 COMMAND-LINE OPTIONS
12 -H host name
13 -D database name
14 -i infile
15 -u sgn user name
16 -t Test run . Rolling back at the end.
19 =head2 DESCRIPTION
23 =head2 AUTHOR
25 Naama Menda (nm249@cornell.edu)
27 December 2011
29 =cut
32 #!/usr/bin/perl
33 use strict;
34 use Getopt::Std;
35 use CXGN::Tools::File::Spreadsheet;
36 use CXGN::People::Person;
38 use Bio::Chado::Schema;
39 use CXGN::DB::InsertDBH;
40 use Carp qw /croak/ ;
44 our ($opt_H, $opt_D, $opt_i, $opt_t, $opt_u);
46 getopts('H:i:tD:u:');
48 my $dbhost = $opt_H;
49 my $dbname = $opt_D;
50 my $file = $opt_i;
52 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
53 dbname=>$dbname,
54 dbargs => {AutoCommit => 0,
55 RaiseError => 1}
58 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
59 $dbh->do('SET search_path TO public');
61 #getting the last database ids for resetting at the end in case of rolling back
62 ###############
63 my $last_nd_experiment_id = $schema->resultset('NaturalDiversity::NdExperiment')->get_column('nd_experiment_id')->max;
64 my $last_cvterm_id = $schema->resultset('Cv::Cvterm')->get_column('cvterm_id')->max;
66 my $last_nd_experiment_project_id = $schema->resultset('NaturalDiversity::NdExperimentProject')->get_column('nd_experiment_project_id')->max;
67 my $last_nd_experiment_stock_id = $schema->resultset('NaturalDiversity::NdExperimentStock')->get_column('nd_experiment_stock_id')->max;
68 my $last_nd_experiment_phenotype_id = $schema->resultset('NaturalDiversity::NdExperimentPhenotype')->get_column('nd_experiment_phenotype_id')->max;
69 my $last_phenotype_id = $schema->resultset('Phenotype::Phenotype')->get_column('phenotype_id')->max;
70 my $last_stock_id = $schema->resultset('Stock::Stock')->get_column('stock_id')->max;
71 my $last_stock_relationship_id = $schema->resultset('Stock::StockRelationship')->get_column('stock_relationship_id')->max;
72 my $last_project_id = $schema->resultset('Project::Project')->get_column('project_id')->max;
73 my $last_nd_geolocation_id = $schema->resultset('NaturalDiversity::NdGeolocation')->get_column('nd_geolocation_id')->max;
74 my $last_geoprop_id = $schema->resultset('NaturalDiversity::NdGeolocationprop')->get_column('nd_geolocationprop_id')->max;
75 my $last_projectprop_id = $schema->resultset('Project::Projectprop')->get_column('projectprop_id')->max;
77 my %seq = (
78 'nd_experiment_nd_experiment_id_seq' => $last_nd_experiment_id,
79 'cvterm_cvterm_id_seq' => $last_cvterm_id,
80 'nd_experiment_project_nd_experiment_project_id_seq' => $last_nd_experiment_project_id,
81 'nd_experiment_stock_nd_experiment_stock_id_seq' => $last_nd_experiment_stock_id,
82 'nd_experiment_phenotype_nd_experiment_phenotype_id_seq' => $last_nd_experiment_phenotype_id,
83 'phenotype_phenotype_id_seq' => $last_phenotype_id,
84 'stock_stock_id_seq' => $last_stock_id,
85 'stock_relationship_stock_relationship_id_seq' => $last_stock_relationship_id,
86 'project_project_id_seq' => $last_project_id,
87 'nd_geolocation_nd_geolocation_id_seq' => $last_nd_geolocation_id,
88 'nd_geolocationprop_nd_geolocationprop_id_seq' => $last_geoprop_id,
89 'projectprop_projectprop_id_seq' => $last_projectprop_id,
92 # find the cvterm for a phenotyping experiment
93 my $pheno_cvterm = $schema->resultset('Cv::Cvterm')->create_with(
94 { name => 'phenotyping experiment',
95 cv => 'experiment type',
96 db => 'null',
97 dbxref => 'phenotyping experiment',
98 });
100 my $username = $opt_u || 'barley_test' ; #'cassavabase';
101 my $sp_person_id= CXGN::People::Person->get_person_by_username($dbh, $username);
103 die "User $username for cassavabase must be pre-loaded in the database! \n" if !$sp_person_id ;
106 my $accession_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
107 { name => 'accession',
108 cv => 'stock type',
109 db => 'null',
110 dbxref => 'accession',
112 my $plot_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
113 { name => 'plot',
114 cv => 'stock type',
115 db => 'null',
116 dbxref => 'plot',
118 my $plot_of = $schema->resultset("Cv::Cvterm")->create_with(
119 { name => 'plot_of',
120 cv => 'stock relationship',
121 db => 'null',
122 dbxref => 'plot_of',
124 my $population_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
125 { name => 'population',
126 cv => 'stock type',
127 db => 'null',
128 dbxref => 'population',
130 my $member_of = $schema->resultset("Cv::Cvterm")->create_with(
131 { name => 'member_of',
132 cv => 'stock relationship',
133 db => 'null',
134 dbxref => 'member_of',
136 ########################
138 #new spreadsheet, don't skip first column
139 my $spreadsheet=CXGN::Tools::File::Spreadsheet->new($file);
141 # sp_term scale_name value name_string
142 my $scale_cv_name= "breeders scale";
143 # for this Unit Ontology has to be loaded!
144 my $unit_cv = $schema->resultset("Cv::Cv")->find(
145 { name => 'unit.ontology' } );
148 my $organism = $schema->resultset("Organism::Organism")->find_or_create(
150 genus => 'Hordeum',
151 species => 'Hordeum vulgare'
152 } );
153 my $organism_id = $organism->organism_id();
154 my $location = 'test';
155 #store a geolocation
156 my $geolocation = $schema->resultset("NaturalDiversity::NdGeolocation")->find_or_create(
158 description => $location,
159 } ) ;
160 my $project = $schema->resultset("Project::Project")->find_or_create(
162 name => "Barley training set data",
163 description => "test barley data for genomic selection",
164 } ) ;
165 my @rows = $spreadsheet->row_labels();
166 my @columns = $spreadsheet->column_labels();
168 my $population_name = 'Barley GS training population';
169 my $population = $schema->resultset("Stock::Stock")->find_or_create(
170 { organism_id => $organism_id,
171 name => $population_name,
172 uniquename => $population_name,
173 type_id => $population_cvterm->cvterm_id,
174 } );
175 eval {
176 foreach my $name (@rows ) {
177 my $stock_name = $name;
178 print "Looking at stock $name \n";
179 #store the plant accession in the plot table
180 my $parent_stock = $schema->resultset("Stock::Stock")->find_or_create(
181 { organism_id => $organism_id,
182 name => $stock_name,
183 uniquename => $stock_name,
184 type_id => $accession_cvterm->cvterm_id,
185 } );
186 ##and create the stock_relationship with the population
187 $population->find_or_create_related('stock_relationship_objects', {
188 type_id => $member_of->cvterm_id(),
189 subject_id => $parent_stock->stock_id(),
190 } );
191 #store the plot in stock
192 my $plot_stock = $schema->resultset("Stock::Stock")->find_or_create(
193 { organism_id => $organism_id,
194 name => $stock_name . " plot",
195 uniquename => $stock_name . " plot",
196 type_id => $plot_cvterm->cvterm_id()
198 ##and create the stock_relationship with the accession
199 $parent_stock->find_or_create_related('stock_relationship_objects', {
200 type_id => $plot_of->cvterm_id(),
201 subject_id => $plot_stock->stock_id(),
202 } );
203 #add the owner for this stock
204 #check first if it exists
205 my $owner_insert = "INSERT INTO phenome.stock_owner (sp_person_id, stock_id) VALUES (?,?)";
206 my $sth = $dbh->prepare($owner_insert);
207 my $check_query = "SELECT sp_person_id FROM phenome.stock_owner WHERE ( sp_person_id = ? AND stock_id = ? )";
208 my $person_ids = $dbh->selectcol_arrayref($check_query, undef, ($sp_person_id, $parent_stock->stock_id) );
209 if (!@$person_ids) {
210 $sth->execute($sp_person_id, $parent_stock->stock_id);
212 $person_ids = $dbh->selectcol_arrayref($check_query, undef, ($sp_person_id, $plot_stock->stock_id) );
213 if (!@$person_ids) {
214 $sth->execute($sp_person_id, $plot_stock->stock_id);
216 #################
217 ###store a new nd_experiment. One experiment per stock
218 my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->create(
220 nd_geolocation_id => $geolocation->nd_geolocation_id(),
221 type_id => $pheno_cvterm->cvterm_id(),
222 } );
223 #link to the project
224 $experiment->find_or_create_related('nd_experiment_projects', {
225 project_id => $project->project_id()
226 } );
227 #link the experiment to the stock
228 $experiment->find_or_create_related('nd_experiment_stocks' , {
229 stock_id => $plot_stock->stock_id(),
230 type_id => $pheno_cvterm->cvterm_id(),
232 ##################
233 LABEL: foreach my $label (@columns) {
234 my $value = $spreadsheet->value_at($name, $label);
235 next() if $label eq 'Entry' ; #$value !~ /^\d/;
236 print "Storing phenotypes for label $label! \n";
237 ($value, undef) = split (/\s/, $value) ;
238 #print "Value $value \n";
239 #my ($db_name, $co_accession) = split (/\:/ , $label);
240 #print STDERR "db_name = '$db_name' sp_accession = '$sp_accession'\n";
241 #next() if (!$co_accession);
242 my $cvterm = $schema->resultset('Cv::Cvterm')->create_with(
243 { name => $label,
244 cv => 'barley_test',
245 db => 'barley',
246 dbxref => $label,
248 ####################
249 ##make sure this rule is valid for all CO terms in the data file!!
250 my $observable_term = $cvterm ;
251 #############
252 ##observable_id is the same as cvalue_id for scale and qscale, and the parent term for unit.
253 my $phenotype = $cvterm->find_or_create_related(
254 "phenotype_cvalues", {
255 observable_id => $cvterm->cvterm_id,
256 value => $value ,
257 uniquename => "Stock: " . $plot_stock->stock_id . ", Term: " . $cvterm->name() ,
259 print "Stored phenotype " . $phenotype->phenotype_id() . " (observable = " . $observable_term->name . ") with cvalue " . $cvterm->name . " value = $value \n\n" ;
260 #link the phenotype to nd_experiment
261 my $nd_experiment_phenotype = $experiment->find_or_create_related('nd_experiment_phenotypes', { phenotype_id => $phenotype->phenotype_id() } );
267 if ($@) { print "An error occured! Rolling backl!\n\n $@ \n\n "; }
268 elsif ($opt_t) {
269 print "TEST RUN. Rolling back and reseting database sequences!!\n\n";
270 foreach my $value ( keys %seq ) {
271 my $maxval= $seq{$value} || 0;
272 if ($maxval) { $dbh->do("SELECT setval ('$value', $maxval, true)") ; }
273 else { $dbh->do("SELECT setval ('$value', 1, false)"); }
275 }else {
276 print "Transaction succeeded! Commiting ! \n\n";
277 $dbh->commit();