update obsolete go and po locus annotations
[phenome.git] / bin / flag_obsolet_annot.pl
blobb2b512c1d79f86933e936bcd099e48afac3e8684
2 =head1 NAME
4 flag_obsolete_annot.pl
6 =head1 DESCRIPTION
8 Usage: perl flag_obsolete_annot.pl -H dbhost -D dbname -o outfile [-v]
10 parameters
12 =over 6
14 =item -H
16 hostname for database [required]
18 =item -D
20 database name [required]
22 =item -v
24 verbose output
26 =item -d
28 db name of the controlled vocabulary
30 =item -o
32 output file
34 =item -t
36 test mode
38 =back
41 The script looks for locus and individual ontology annotations in the database and prints out a
42 list of annotations that require updating since the cvterms were obsoleted.
43 Ususally running this script is a good idea after updating cvterms in the database (see sgn-tools/phenome/load_cvterms.pl)
44 and prior to submitting an ontology association file to PO/GO.
46 =head1 AUTHOR
48 Naama Menda <nm249@cornell.edu>
50 =head1 VERSION AND DATE
52 Version 1.0, July 2013.
54 =cut
57 #! /usr/bin/perl
58 use strict;
60 use Getopt::Std;
62 use CXGN::Phenome::Locus;
63 use Bio::Chado::Schema;
65 use CXGN::Chado::Organism;
67 use CXGN::DB::InsertDBH;
68 use CXGN::Chado::Dbxref;
69 use CXGN::Chado::Cvterm;
70 use CXGN::Chado::Ontology;
71 use CXGN::Chado::Relationship;
73 our ($opt_H, $opt_D, $opt_v, $opt_o, $opt_d, $opt_t);
75 #getopts('F:d:H:o:n:vD:t');
76 getopts('H:o:d:vD:t');
77 my $dbhost = $opt_H;
78 my $dbname = $opt_D;
80 if (!$dbhost && !$dbname) { die "Need -D dbname and -H hostname arguments.\n"; }
82 my $error = 0; # keep track of input errors (in command line switches).
83 if (!$opt_D) {
84 print STDERR "Option -D required. Must be a valid database name.\n";
85 $error=1;
87 if (!$opt_d) {
88 print STDERR "Option -d required with a db name (PO or GO).\n";
89 $error=1;
92 my $file = $opt_o;
94 if (!$file) {
95 print STDERR "A file is required as a command line argument (option -o) .\n";
96 $error=1;
100 die "Some required command lines parameters not set. Aborting.\n" if $error;
103 open (OUT, ">$opt_o") ||die "can't open error file $file for writting.\n" ;
106 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
107 dbname=>$dbname,
108 } );
110 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
111 $dbh->do('SET search_path to public');
113 print STDERR "Connected to database $dbname on host $dbhost.\n";
114 my @locus_annot= CXGN::Phenome::Locus->get_annotations_by_db($dbh, $opt_d);
116 my $count=0;
117 my $u_count =0;
118 print STDERR "Reading annotations from database..\n";
120 eval {
121 foreach my $annot(@locus_annot) {
122 my $locus_id= $annot->get_locus_id();
123 print STDERR "locus_id = $locus_id\n";
124 print STDERR "." if !$opt_v;
125 my $locus= CXGN::Phenome::Locus->new($dbh,$locus_id);
126 my $locus_name = $locus->get_locus_name();
127 my $dbxref=$schema->resultset("General::Dbxref")->find( { dbxref_id => $annot->get_dbxref_id } );
128 my $cvterm = $dbxref->search_related('cvterm')->single;
129 my $cvterm_name=$cvterm->name;
130 my $accession = $dbxref->accession;
131 my $is_obsolete= $cvterm->is_obsolete;
132 my $alt_ids_rs = $cvterm->search_related('cvterm_dbxrefs', { "db.name" => $opt_d } , { join => { "dbxref" => "db" } } );
133 print STDERR "found " . $alt_ids_rs->count . " alternative ids\n\n";
135 if ($is_obsolete) {
136 $count++;
137 print STDERR "Locus $locus_name (id=$locus_id) has obsolete annotation: $accession:$cvterm_name\n";
138 print OUT "Locus_id $locus_id (name = $locus_name) has obsolete annotation: $accession:$cvterm_name\n";
139 if ($alt_ids_rs) {
140 my $first_alt_id = $alt_ids_rs->next;
141 if ($first_alt_id) {
142 $u_count++;
143 my $alt_dbxref = $first_alt_id->dbxref;
144 #update locus_dbxref with the alternative dbxerf_id
145 $annot->update_annotation($alt_dbxref->dbxref_id);
146 print STDERR "*Updated annotation to " . $alt_dbxref->accession . "!\n";
147 print OUT "*Updated annotation to " . $alt_dbxref->accession . " \n";
148 }else {
149 print STDERR "!did not find alternative cvterm for this obsolete annotation! $cvterm_name\n";
150 print OUT "$locus_id : \t $opt_d : $accession: !did not find alternative cvterm for this obsolete annotation! $cvterm_name\n";
156 print STDERR "Found $count obsolete annotations for SGN loci, $u_count annotations were updated.\n printed out file $file... Done.\n";
157 my $pheno_rs = $schema->resultset("Stock::StockCvterm")->search(
159 'db.name' => $opt_d
161 { join => { 'cvterm' => { 'dbxref' => 'db' } },
162 } );
164 $count= 0;
165 $u_count=0;
166 print STDERR "Reading annotations from SGN individual database..\n";
167 while (my $annot = $pheno_rs->next ) {
168 print STDERR "." if !$opt_v;
169 my $stock_id= $annot->stock_id;
170 my $stock= $annot->search_related('stock')->single;
171 my $stock_name=$stock->name();
172 my $cvterm = $annot->search_related('cvterm')->single;
173 my $dbxref = $cvterm->search_related('dbxref')->single;
174 my $cvterm_name=$cvterm->name();
175 my $accession = $dbxref->accession();
176 my $is_obsolete=$cvterm->is_obsolete();
177 my $alt_ids_rs = $cvterm->search_related('cvterm_dbxrefs');
179 if ($is_obsolete) {
180 $count++;
181 print STDERR "Stock $stock_name (id=$stock_id) has obsolete annotation: $accession:$cvterm_name\n";
182 if ($alt_ids_rs) {
183 $u_count++;
184 my $first_alt_id = $alt_ids_rs->next;
185 my $alt_cvterm = $first_alt_id->cvterm;
186 #update stock_cvterm with the alternative cvterm_id
187 #$annot->update( { cvterm_id => $alt_cvterm->cvterm_id } );
188 #print STDERR "*Updated annotation to " . $alt_cvterm->name . "!\n";
189 #print OUT "*Updated annotation to " . $alt_cvterm->name . " \n";
190 }else {
191 print STDERR "!did not find alternative cvterm for this obsolete annotation! $cvterm_name\n";
192 print OUT "!did not find alternative cvterm for this obsolete annotation! $cvterm_name\n";
197 if ($@ || ($opt_t)) {
198 print STDERR "Either running as trial mode (-t) or AN ERROR OCCURRED: $@\n";
199 print OUT "Either running as trial mode (-t) or AN ERROR OCCURRED: $@\n" if $opt_o;
200 $dbh->rollback();
201 exit(0);
203 else { $dbh->commit(); }
206 close OUT;
208 print STDERR "Found $count obsolete annotations for SGN individuals, $u_count annotations were updated.\n printed into out file $file... Done.\n";