check if new annotation is already in the db
[phenome.git] / bin / update_locus_annotations.pl
blobfe910040ed3ed49ff9b6e494acbf946d1fd2eeee
1 #!/usr/bin/perl
3 =head1
5 update_locus_annotations.pl
7 =head1 SYNOPSIS
9 update_locus_annotationss.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "sandbox"
15 -i path to infile (required)
17 =head1 DESCRIPTION
19 This script updates locus annotations associated with obsolete cvterms to the current ones.
20 The infile provided has two columns, in the first column is the cvterm accession as it is in the database (e.g. GO:0075177),
21 and in the second column is the new cvterm accession ( e.g. GO:0075178) .
22 There is no header on the infile and the infile is .xls or .xlsx.
25 =head1 AUTHOR
27 Naama Menda (nm249@cornell.edu)
29 =cut
31 use strict;
33 use Getopt::Std;
34 use Data::Dumper;
35 use Carp qw /croak/ ;
36 use Pod::Usage;
37 use Spreadsheet::ParseExcel;
38 use Spreadsheet::ParseXLSX;
39 use Bio::Chado::Schema;
40 use CXGN::Phenome::Schema;
41 use CXGN::DB::InsertDBH;
42 use Try::Tiny;
44 our ($opt_H, $opt_D, $opt_i, $opt_t);
46 getopts('H:D:ti:');
48 if (!$opt_H || !$opt_D || !$opt_i ) {
49 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file) \n");
52 my $dbhost = $opt_H;
53 my $dbname = $opt_D;
55 # Match a dot, extension .xls / .xlsx
56 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
57 my $parser;
59 if ($extension eq '.xlsx') {
60 $parser = Spreadsheet::ParseXLSX->new();
62 else {
63 $parser = Spreadsheet::ParseExcel->new();
66 my $excel_obj = $parser->parse($opt_i);
68 my $dbh = CXGN::DB::InsertDBH->new({
69 dbhost=>$dbhost,
70 dbname=>$dbname,
71 dbargs => {AutoCommit => 1, RaiseError => 1}
72 });
74 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
76 my $phenome_schema= CXGN::Phenome::Schema->connect( sub { $dbh->get_actual_dbh } , { on_connect_do => ['set search_path to public,phenome;'] } );
78 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
79 my ( $row_min, $row_max ) = $worksheet->row_range();
80 my ( $col_min, $col_max ) = $worksheet->col_range();
82 my $coderef = sub {
83 for my $row ( 0 .. $row_max ) {
85 my $db_cvterm = $worksheet->get_cell($row,0)->value();
86 my $file_cvterm = $worksheet->get_cell($row,1)->value();
88 my ($old_db_name, $old_accession ) = split ":", $db_cvterm ;
89 my ($new_db_name, $new_accession ) = split ":" , $file_cvterm;
92 my $old_dbxref = $schema->resultset('General::Dbxref')->find(
94 'db.name' => $old_db_name,
95 'accession' => $old_accession,
97 { join => 'db' }
98 ) ;
99 if ( !defined $old_dbxref ) {
100 print STDERR "Cannot find cvterm $db_cvterm in the database! skipping\n";
101 next();
103 my $new_dbxref;
104 if ($new_db_name ne 'DELETE' ) {
105 $new_dbxref = $schema->resultset('General::Dbxref')->find(
107 'db.name' => $new_db_name,
108 'accession' => $new_accession,
110 { join => 'db'}
112 if ( !defined $new_dbxref ) {
113 print STDERR "Cannot find cvterm $file_cvterm in the database! skipping\n";
114 next();
117 my $locus_dbxref = $phenome_schema->resultset('LocusDbxref')->search(
119 dbxref_id => $old_dbxref->dbxref_id,
120 } ) ;
121 my $count = $locus_dbxref->count();
122 print STDERR "Found $count locus annotations with obsolete cvterm $db_cvterm\n";
123 if ($new_dbxref && $count>0) {
124 #check if new annotation already exists
125 for my $ld ($locus_dbxref->all) {
126 my $exists = $phenome_schema->resultset('LocusDbxref')->find(
128 locus_id =>$ld->locus_id,
129 dbxref_id => $new_dbxref->dbxref_id
132 if ($exists) {
133 print STDERR "cvterm $file_cvterm is already associated with locus ". $ld->locus_id . " Deleting old annotation without an update\n";
134 $ld->delete();
135 } else {
136 print STDERR "Updating cvterm $db_cvterm to $file_cvterm\n";
138 $ld->update( { dbxref_id => $new_dbxref->dbxref_id } );
141 } elsif (!$new_dbxref && $count>0) {
142 print STDERR "Deleting cvterm $db_cvterm from locus_dbxref\n";
143 $locus_dbxref->delete();
148 my $transaction_error;
149 try {
150 $schema->txn_do($coderef);
151 } catch {
152 $transaction_error = $_;
155 if ($transaction_error || $opt_t) {
156 $dbh->rollback;
157 print STDERR "Transaction error storing terms: $transaction_error\n";
158 } else {
159 print STDERR "Committing updates.\n";