adding disconnect for dbh without autocommit
[phenome.git] / bin / po_annotations.pl
blobed183343bc2a6c898dc938c7e28abbf9e8c9115c
2 =head1 NAME
4 po_annotations.pl
6 =head1 DESCRIPTION
8 Usage: perl po_annotations.pl -H dbhost -D dbname -o outfile [-vdnF]
10 parameters
12 =over 6
14 =item -H
16 hostname for database [required]
18 =item -D
20 database name [required]
22 =item -v
24 verbose output
26 =item -d
28 database name for linking (must be in Db table) Default: PO
30 =item -n
32 controlled vocabulary name. Defaults to "plant_structure".
34 =item -o
36 output file
38 =back
40 The script looks for locus and individual plant ontology annotations in the database and prints them out in a file
41 formatted as listed here : http://plantontology.org/docs/otherdocs/assoc-file-format.html
42 The generated file should be submitted to POC (po@plantontology.org)
44 =head1 AUTHOR
46 Naama Menda <nm249@cornell.edu>
48 =head1 VERSION AND DATE
50 Version 0.1, January 2008.
52 =cut
55 use strict;
57 use Getopt::Std;
59 use CXGN::Phenome::Locus;
60 use CXGN::Phenome::Individual;
61 use CXGN::Chado::Organism;
63 use CXGN::DB::InsertDBH;
64 use CXGN::Chado::Dbxref;
65 use CXGN::Chado::Cvterm;
66 use CXGN::Chado::Ontology;
67 use CXGN::Chado::Relationship;
69 our ($opt_H, $opt_D, $opt_v, $opt_d, $opt_n, $opt_o);
71 #getopts('F:d:H:o:n:vD:t');
72 getopts('H:o:n:d:vD:t');
73 my $dbhost = $opt_H;
74 my $dbname = $opt_D;
76 if (!$dbhost && !$dbname) { die "Need -D dbname and -H hostname arguments.\n"; }
78 my $error = 0; # keep track of input errors (in command line switches).
79 if (!$opt_D) {
80 print STDERR "Option -D required. Must be a valid database name.\n";
81 $error=1;
84 if (!$opt_d) { $opt_d="PO"; } # the database name that Dbxrefs should refer to
85 print STDERR "Default for -d: $opt_d (specifies the database names for Dbxref objects)\n";
88 if (!$opt_n) {$opt_n = "plant_structure"; }
89 print STDERR "Default for -n $opt_n (specifies the ontology name for CV objects)\n";
90 my $aspect;
91 if ($opt_n eq 'plant_structure' ) { $aspect = "A";}
92 elsif ($opt_n eq 'plant_growth_and_development_stage') { $aspect = "G"; }
94 my $file = $opt_o;
96 if (!$file) {
97 print STDERR "A file is required as a command line argument.\n";
98 $error=1;
102 die "Some required command lines parameters not set. Aborting.\n" if $error;
105 open (OUT, ">$opt_o") ||die "can't open error file $file for writting.\n" ;
108 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
109 dbname=>$dbname,
110 } );
113 print STDERR "Connected to database $dbname on host $dbhost.\n";
114 my @locus_annot= CXGN::Phenome::Locus->get_locus_annotations($dbh, $opt_n);
116 my ($count, $count_ev)= (0 x 2);
117 print STDERR "Reading annotations from database..\n";
118 foreach my $annot(@locus_annot) {
119 $count++;
120 print STDERR "." if !$opt_v;
121 my $locus= CXGN::Phenome::Locus->new($dbh,$annot->get_locus_id);
122 my $dbxref=CXGN::Chado::Dbxref->new($dbh, $annot->get_dbxref_id);
123 my $dbxref_ev= $annot->get_locus_dbxref_evidence();
125 my $object_id= $locus->get_locus_id();
126 my $symbol = $locus->get_locus_symbol();
127 my $ontology_id= $opt_d. ":" . $dbxref->get_accession();
128 my $ref_object= CXGN::Chado::Dbxref->new($dbh, $dbxref_ev->get_reference_id() );
130 my $ev_object= CXGN::Chado::Dbxref->new($dbh, $dbxref_ev->get_evidence_code_id() );
131 my $r_synonyms= $ev_object->get_cvterm()->get_synonyms();
132 my @synonyms=@$r_synonyms;
133 my $ev_code= $synonyms[1];
135 #skip if no evidence code provided or if inferred from electronic annotation
136 if (!$ev_code || $ev_code eq 'IEA') {
137 next ;
138 print STDERR "no evidence code or electronic annotation. Skipping...\n" if $opt_v;
139 }else { print STDERR "Found annotation for locus $object_id ($symbol) $ontology_id evidence code: $ev_code\n"; }
140 my $db_reference= $ref_object->get_db_name() || warn "!!!No reference found for annotation $ontology_id locus $object_id ($symbol)\n";
141 if ($db_reference eq 'SGN_ref') {
142 $db_reference .= ":" . $ref_object->get_publication()->get_pub_id();
143 }elsif ($db_reference) {
144 $db_reference .= ":" . $ref_object->get_accession();
145 } else { $db_reference = undef; }
147 $count_ev++;
148 my $ev_with_object= CXGN::Chado::Dbxref->new($dbh, $dbxref_ev->get_evidence_with() );
149 my $ev_with_db= $ev_with_object->get_db_name();
150 if ($ev_with_db eq 'DB:GenBank_GI') { $ev_with_db = "NCBI_gi:";} #the db abbreviation in PO/GO
151 my $ev_with= $ev_with_db . $ev_with_object->get_accession();
152 if ($ev_with eq ':') {$ev_with = undef;}
153 my $object_name = $locus->get_locus_name();
154 my @locus_synonyms= $locus->get_locus_aliases(); #an array of LocusSynonym objects..
155 my $locus_s;
156 foreach my $ls(@locus_synonyms) {
157 my $alias= $ls->get_locus_alias();
158 $locus_s .=$alias ."|";
160 chop $locus_s; #remove last "|"
162 my $object_type = "gene";
163 my $organism = CXGN::Chado::Organism->new_with_common_name($dbh, $locus->get_common_name() );
164 my $taxon= "taxon:" . $organism->get_genbank_taxon_id();
165 my $date= $annot->get_modification_date();
166 $date = $annot->get_create_date() if (!$date) ;
167 if (!$date) { warn "!!!No date found for annotation $ontology_id locus $object_id ($symbol)\n" ; }
168 $date = substr $date, 0, 10;
169 $date =~ s/-//g;
171 print OUT "SGN_gene\t$object_id\t$symbol\t\t$ontology_id\t$db_reference\t$ev_code\t$ev_with\t$aspect\t$object_name\t$locus_s\t$object_type\t$taxon\t$date\tSGN\n";
174 print STDERR "Found $count annotations for SGN loci, printed $count_ev into out file $file... Done.\n";
176 my @pheno_annot= CXGN::Phenome::Individual->get_individual_annotations($dbh, $opt_n);
178 my ($count, $count_ev)= (0 x 2);
179 print STDERR "Reading annotations from SGN individual database..\n";
180 foreach my $annot(@pheno_annot) {
181 $count++;
182 print STDERR "." if !$opt_v;
183 my $ind= CXGN::Phenome::Individual->new($dbh,$annot->get_individual_id);
184 my $dbxref=CXGN::Chado::Dbxref->new($dbh, $annot->get_dbxref_id);
185 my $dbxref_ev= $annot->get_individual_dbxref_evidence();
187 my $object_id= $ind->get_individual_id();
188 my $symbol = $ind->get_name();
189 my $object_name= $ind->get_description();
190 my $ontology_id= $opt_d. ":" . $dbxref->get_accession();
191 my $ref_object= CXGN::Chado::Dbxref->new($dbh, $dbxref_ev->get_reference_id() );
193 my $ev_object= CXGN::Chado::Dbxref->new($dbh, $dbxref_ev->get_evidence_code_id() );
194 my $r_synonyms= $ev_object->get_cvterm()->get_synonyms();
195 my @synonyms=@$r_synonyms;
196 my $ev_code= $synonyms[1];
198 #skip if no evidence code provided or if inferred from electronic annotation
199 if (!$ev_code || $ev_code eq 'IEA') {
200 next ;
201 print STDERR "no evidence code or electronic annotation. Skipping...\n" if $opt_v;
202 }else { print STDERR "Found annotation for individual $object_id ($symbol) $ontology_id evidence code: $ev_code\n"; }
203 my $db_reference= $ref_object->get_db_name() || warn "!!!No reference found for annotation $ontology_id individual $object_id ($symbol)\n";
204 if ($db_reference eq 'SGN_ref') {
205 $db_reference .= ":" . $ref_object->get_publication()->get_pub_id();
206 }elsif ($db_reference) {
207 $db_reference .= ":" . $ref_object->get_accession();
208 } else { $db_reference = undef; }
210 $count_ev++;
211 my $ev_with_object= CXGN::Chado::Dbxref->new($dbh, $dbxref_ev->get_evidence_with() );
212 my $ev_with_db= $ev_with_object->get_db_name();
213 if ($ev_with_db eq 'DB:GenBank_GI') { $ev_with_db = "NCBI_gi:";} #the db abbreviation in PO/GO
214 my $ev_with= $ev_with_db . $ev_with_object->get_accession();
215 if ($ev_with eq ':') {$ev_with = undef;}
217 my $ind_s;
219 my $object_type = "phenotype";
220 my $organism = CXGN::Chado::Organism->new_with_common_name($dbh, $ind->get_common_name() );
221 my $taxon= "taxon:" . $organism->get_genbank_taxon_id();
222 my $date= $annot->get_modification_date();
223 $date = $annot->get_create_date() if (!$date) ;
224 if (!$date) { warn "!!!No date found for annotation $ontology_id individual $object_id ($symbol)\n" ; }
225 $date = substr $date, 0, 10;
226 $date =~ s/-//g;
228 print OUT "SGN_phenotype\t$object_id\t$symbol\t\t$ontology_id\t$db_reference\t$ev_code\t$ev_with\t$aspect\t$object_name\t$ind_s\t$object_type\t$taxon\t$date\tSGN\n";
231 close OUT;
233 print STDERR "Found $count annotations for SGN individuals, printed $count_ev into out file $file... Done.\n";