5 update_locus_annotations.pl
9 update_locus_annotationss.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "sandbox"
15 -i path to infile (required)
19 This script updates locus annotations associated with obsolete cvterms to the current ones.
20 The infile provided has two columns, in the first column is the cvterm accession as it is in the database (e.g. GO:0075177),
21 and in the second column is the new cvterm accession ( e.g. GO:0075178) .
22 There is no header on the infile and the infile is .xls or .xlsx.
27 Naama Menda (nm249@cornell.edu)
37 use Spreadsheet
::ParseExcel
;
38 use Spreadsheet
::ParseXLSX
;
39 use Bio
::Chado
::Schema
;
40 use CXGN
::Phenome
::Schema
;
41 use CXGN
::DB
::InsertDBH
;
44 our ($opt_H, $opt_D, $opt_i, $opt_t);
48 if (!$opt_H || !$opt_D || !$opt_i ) {
49 pod2usage
(-verbose
=> 2, -message
=> "Must provide options -H (hostname), -D (database name), -i (input file) \n");
55 # Match a dot, extension .xls / .xlsx
56 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
59 if ($extension eq '.xlsx') {
60 $parser = Spreadsheet
::ParseXLSX
->new();
63 $parser = Spreadsheet
::ParseExcel
->new();
66 my $excel_obj = $parser->parse($opt_i);
68 my $dbh = CXGN
::DB
::InsertDBH
->new({
71 dbargs
=> {AutoCommit
=> 1, RaiseError
=> 1}
74 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } );
76 my $phenome_schema= CXGN
::Phenome
::Schema
->connect( sub { $dbh->get_actual_dbh } , { on_connect_do
=> ['set search_path to public,phenome;'] } );
78 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
79 my ( $row_min, $row_max ) = $worksheet->row_range();
80 my ( $col_min, $col_max ) = $worksheet->col_range();
83 for my $row ( 0 .. $row_max ) {
85 my $db_cvterm = $worksheet->get_cell($row,0)->value();
86 my $file_cvterm = $worksheet->get_cell($row,1)->value();
88 my ($old_db_name, $old_accession ) = split ":", $db_cvterm ;
89 my ($new_db_name, $new_accession ) = split ":" , $file_cvterm;
92 my $old_dbxref = $schema->resultset('General::Dbxref')->find(
94 'db.name' => $old_db_name,
95 'accession' => $old_accession,
99 if ( !defined $old_dbxref ) {
100 print STDERR
"Cannot find cvterm $db_cvterm in the database! skipping\n";
104 if ($new_db_name ne 'DELETE' ) {
105 $new_dbxref = $schema->resultset('General::Dbxref')->find(
107 'db.name' => $new_db_name,
108 'accession' => $new_accession,
112 if ( !defined $new_dbxref ) {
113 print STDERR
"Cannot find cvterm $file_cvterm in the database! skipping\n";
117 my $locus_dbxref = $phenome_schema->resultset('LocusDbxref')->search(
119 dbxref_id
=> $old_dbxref->dbxref_id,
121 my $count = $locus_dbxref->count();
122 print STDERR
"Found $count locus annotations with obsolete cvterm $db_cvterm\n";
123 if ($new_dbxref && $count>0) {
124 print STDERR
"Updating cvterm $db_cvterm to $file_cvterm\n";
126 $locus_dbxref->update( { dbxref_id
=> $new_dbxref->dbxref_id } );
127 } elsif (!$new_dbxref && $count>0) {
128 print STDERR
"Deleting cvterm $db_cvterm from locus_dbxref\n";
129 $locus_dbxref->delete();
134 my $transaction_error;
136 $schema->txn_do($coderef);
138 $transaction_error = $_;
141 if ($transaction_error || $opt_t) {
143 print STDERR
"Transaction error storing terms: $transaction_error\n";
145 print STDERR
"Committing updates.\n";