8 $this_script.pl -H [dbhost] -D [dbname] [-t]
10 =head1 COMMAND-LINE OPTIONS
15 -t Test run . Rolling back at the end.
24 Naama Menda (nm249@cornell.edu)
34 use CXGN
::Tools
::File
::Spreadsheet
;
35 use CXGN
::People
::Person
;
37 use Bio
::Chado
::Schema
;
38 use CXGN
::DB
::InsertDBH
;
43 our ($opt_H, $opt_D, $opt_i, $opt_t);
51 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
53 dbargs
=> {AutoCommit
=> 0,
57 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } );
58 $dbh->do('SET search_path TO public');
60 #getting the last database ids for resetting at the end in case of rolling back
62 my $last_nd_experiment_id = $schema->resultset('NaturalDiversity::NdExperiment')->get_column('nd_experiment_id')->max;
63 my $last_cvterm_id = $schema->resultset('Cv::Cvterm')->get_column('cvterm_id')->max;
65 my $last_nd_experiment_project_id = $schema->resultset('NaturalDiversity::NdExperimentProject')->get_column('nd_experiment_project_id')->max;
66 my $last_nd_experiment_stock_id = $schema->resultset('NaturalDiversity::NdExperimentStock')->get_column('nd_experiment_stock_id')->max;
67 my $last_nd_experiment_phenotype_id = $schema->resultset('NaturalDiversity::NdExperimentPhenotype')->get_column('nd_experiment_phenotype_id')->max;
68 my $last_phenotype_id = $schema->resultset('Phenotype::Phenotype')->get_column('phenotype_id')->max;
69 my $last_stock_id = $schema->resultset('Stock::Stock')->get_column('stock_id')->max;
70 my $last_stock_relationship_id = $schema->resultset('Stock::StockRelationship')->get_column('stock_relationship_id')->max;
71 my $last_project_id = $schema->resultset('Project::Project')->get_column('project_id')->max;
72 my $last_nd_geolocation_id = $schema->resultset('NaturalDiversity::NdGeolocation')->get_column('nd_geolocation_id')->max;
73 my $last_geoprop_id = $schema->resultset('NaturalDiversity::NdGeolocationprop')->get_column('nd_geolocationprop_id')->max;
74 my $last_projectprop_id = $schema->resultset('Project::Projectprop')->get_column('projectprop_id')->max;
77 'nd_experiment_nd_experiment_id_seq' => $last_nd_experiment_id,
78 'cvterm_cvterm_id_seq' => $last_cvterm_id,
79 'nd_experiment_project_nd_experiment_project_id_seq' => $last_nd_experiment_project_id,
80 'nd_experiment_stock_nd_experiment_stock_id_seq' => $last_nd_experiment_stock_id,
81 'nd_experiment_phenotype_nd_experiment_phenotype_id_seq' => $last_nd_experiment_phenotype_id,
82 'phenotype_phenotype_id_seq' => $last_phenotype_id,
83 'stock_stock_id_seq' => $last_stock_id,
84 'stock_relationship_stock_relationship_id_seq' => $last_stock_relationship_id,
85 'project_project_id_seq' => $last_project_id,
86 'nd_geolocation_nd_geolocation_id_seq' => $last_nd_geolocation_id,
87 'nd_geolocationprop_nd_geolocationprop_id_seq' => $last_geoprop_id,
88 'projectprop_projectprop_id_seq' => $last_projectprop_id,
91 # find the cvterm for a phenotyping experiment
92 my $pheno_cvterm = $schema->resultset('Cv::Cvterm')->create_with(
93 { name
=> 'phenotyping experiment',
94 cv
=> 'experiment type',
96 dbxref
=> 'phenotyping experiment',
99 my $username = 'kulakow' ; #'cassavabase';
100 my $sp_person_id= CXGN
::People
::Person
->get_person_by_username($dbh, $username);
102 die "User $username for cassavabase must be pre-loaded in the database! \n" if !$sp_person_id ;
105 my $accession_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
106 { name
=> 'accession',
109 dbxref
=> 'accession',
111 my $plot_cvterm = $schema->resultset("Cv::Cvterm")->create_with(
117 my $plot_of = $schema->resultset("Cv::Cvterm")->create_with(
119 cv
=> 'stock relationship',
123 ########################
125 #new spreadsheet, skip first column
126 my $spreadsheet=CXGN
::Tools
::File
::Spreadsheet
->new($file, 1);
128 # sp_term scale_name value name_string
129 my $scale_cv_name= "breeders scale";
130 # for this Unit Ontology has to be loaded!
131 my $unit_cv = $schema->resultset("Cv::Cv")->find(
132 { name
=> 'unit.ontology' } );
135 my $organism = $schema->resultset("Organism::Organism")->find_or_create(
138 species
=> 'Manihot esculenta',
140 my $organism_id = $organism->organism_id();
142 my @rows = $spreadsheet->row_labels();
143 my @columns = $spreadsheet->column_labels();
146 ##order_ylrg pegno environ location year rep ploidy entry DNA Samp ICASS Year cloned CO_334:0000008
147 foreach my $num (@rows ) {
148 my $plot = $spreadsheet->value_at($num, "pegno");
149 my $location = $spreadsheet->value_at($num, "location");
150 my $year = $spreadsheet->value_at($num, "year");
151 my $stock_name = $spreadsheet->value_at($num , "ICASS");
152 my $replicate = $spreadsheet->value_at($num, "rep");
153 my $uniquename = $stock_name ."_plot_".$plot."_".$replicate."_".$year."_".$location;
155 my $geolocation = $schema->resultset("NaturalDiversity::NdGeolocation")->find_or_create(
157 description
=> $location,
158 ##see if Peter has more data
159 #latitude => $latitude,
160 #longitude => $longitude,
161 #geodetic_datum => $datum,
162 #altitude => $altitude,
164 #store a project- combination of location and year
165 my $project = $schema->resultset("Project::Project")->find_or_create(
167 name
=> "Cassava $location $year",
168 description
=> "Plants assayed at $location in $year",
170 $project->create_projectprops( { 'project year' => $year }, { autocreate
=> 1 } );
171 #store the plant accession in the plot table
172 my $parent_stock = $schema->resultset("Stock::Stock")->find_or_create(
173 { organism_id
=> $organism_id,
175 uniquename
=> $stock_name,
176 type_id
=> $accession_cvterm->cvterm_id,
178 #store the plot in stock
179 my $plot_stock = $schema->resultset("Stock::Stock")->find_or_create(
180 { organism_id
=> $organism_id,
182 uniquename
=> $uniquename,
183 type_id
=> $plot_cvterm->cvterm_id()
185 ##and create the stock_relationship with the accession
186 $parent_stock->find_or_create_related('stock_relationship_objects', {
187 type_id
=> $plot_of->cvterm_id(),
188 subject_id
=> $plot_stock->stock_id(),
190 #add the owner for this stock
191 #check first if it exists
192 my $owner_insert = "INSERT INTO phenome.stock_owner (sp_person_id, stock_id) VALUES (?,?)";
193 my $sth = $dbh->prepare($owner_insert);
194 my $check_query = "SELECT sp_person_id FROM phenome.stock_owner WHERE ( sp_person_id = ? AND stock_id = ? ";
195 my $person_ids = $dbh->selectcol_arrayref($check_query, undef, ($sp_person_id, $plot_stock->stock_id) );
197 $sth->execute($sp_person_id, $plot_stock->stock_id);
199 $person_ids = $dbh->selectcol_arrayref($check_query, undef, ( $sp_person_id, $parent_stock->stock_id) );
201 $sth->execute($sp_person_id, $parent_stock->stock_id);
204 ###store a new nd_experiment. One experiment per stock
205 my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->create(
207 nd_geolocation_id
=> $geolocation->nd_geolocation_id(),
208 type_id
=> $pheno_cvterm->cvterm_id(),
211 $experiment->find_or_create_related('nd_experiment_projects', {
212 project_id
=> $project->project_id()
214 #link the experiment to the stock
215 $experiment->find_or_create_related('nd_experiment_stocks' , {
216 stock_id
=> $plot_stock->stock_id(),
217 type_id
=> $pheno_cvterm->cvterm_id(),
220 LABEL
: foreach my $label (@columns) {
221 my $value = $spreadsheet->value_at($num, $label);
222 ($value, undef) = split (/\s/, $value) ;
223 #print "Value $value \n";
224 next() if $value !~ /^\d/;
226 #db_name = CO , accession = 0000NNN
227 my ($db_name, $co_accession) = split (/\:/ , $label);
228 #print STDERR "db_name = '$db_name' sp_accession = '$sp_accession'\n";
229 next() if (!$co_accession);
230 my $co_term = $schema->resultset("General::Db")->find(
231 { name
=> 'CO' } )->find_related(
232 "dbxrefs", { accession
=>$co_accession , } )->find_related("cvterm" , {});
234 ######################
235 # this is valid for scale and qscale value types
236 #my $parent_cvterm = $schema->resultset("General::Db")->find(
237 # { name => $db_name } )->find_related(
238 #"dbxrefs", { accession=>$sp_accession , } )->find_related("cvterm" , {});
239 # if the value type is 'unit' we use the actual parent term for
240 # the annotation. If it's a 'scale' we need to find out the appropriate
241 #child term, as stored in cvtermprop.
242 # cvtermprops need to be pre-loaded using load_scale_cvtermprops.pl
243 ################################
244 ##make sure this rule is valid for all CO terms in the data file!!
245 my $observable_term = $co_term ;
247 #the term should have PO and PATO mapping maybe TO?
249 #make sure phenotype is loaded correctly for scale, qscale, unit.
250 #also store the unit in phenotype_cvterm
251 #############################3
252 ##observable_id is the same as cvalue_id for scale and qscale, and the parent term for unit.
253 my $phenotype = $co_term->find_or_create_related(
254 "phenotype_cvalues", {
255 observable_id
=> $observable_term->cvterm_id, #co_term
256 #attr_id => $pato_id,
258 uniquename
=> "Stock: " . $plot_stock->stock_id . ", Replicate: $replicate, plot: $plot," . ", Term: " . $co_term->name() ,
260 print "Stored phenotype " . $phenotype->phenotype_id() . " (observable = " . $observable_term->name . ") with cvalue " . $co_term->name . " value = $value \n\n" ;
261 # store the unit for the measurement (if exists) in phenotype_cvterm
262 #$phenotype->find_or_create_related("phenotype_cvterms" , {
263 #cvterm_id => $unit_cvterm->cvterm_id() } ) if $unit_cvterm;
264 #print "Loaded phenotype_cvterm with cvterm '" . $unit_cvterm->name() . " '\n" if $unit_cvterm ;
266 #link the phenotype to nd_experiment
267 my $nd_experiment_phenotype = $experiment->find_or_create_related('nd_experiment_phenotypes', { phenotype_id
=> $phenotype->phenotype_id() } );
273 if ($@
) { print "An error occured! Rolling backl!\n\n $@ \n\n "; }
275 print "TEST RUN. Rolling back and reseting database sequences!!\n\n";
276 foreach my $value ( keys %seq ) {
277 my $maxval= $seq{$value} || 0;
278 if ($maxval) { $dbh->do("SELECT setval ('$value', $maxval, true)") ; }
279 else { $dbh->do("SELECT setval ('$value', 1, false)"); }
282 print "Transaction succeeded! Commiting ! \n\n";