8 Usage: perl po_annotations.pl -H dbhost -D dbname -o outfile [-vdnF]
16 hostname for database [required]
20 database name [required]
28 database name for linking (must be in Db table) Default: PO
32 controlled vocabulary name. Defaults to "plant_structure".
40 The script looks for locus and individual plant ontology annotations in the database and prints them out in a file
41 formatted as listed here : http://plantontology.org/docs/otherdocs/assoc-file-format.html
42 The generated file should be submitted to POC (po@plantontology.org)
46 Naama Menda <nm249@cornell.edu>
48 =head1 VERSION AND DATE
50 Version 0.1, January 2008.
59 use CXGN
::Phenome
::Locus
;
60 use CXGN
::Phenome
::Individual
;
61 use CXGN
::Chado
::Organism
;
63 use CXGN
::DB
::InsertDBH
;
64 use CXGN
::Chado
::Dbxref
;
65 use CXGN
::Chado
::Cvterm
;
66 use CXGN
::Chado
::Ontology
;
67 use CXGN
::Chado
::Relationship
;
69 our ($opt_H, $opt_D, $opt_v, $opt_d, $opt_n, $opt_o);
71 #getopts('F:d:H:o:n:vD:t');
72 getopts
('H:o:n:d:vD:t');
76 if (!$dbhost && !$dbname) { die "Need -D dbname and -H hostname arguments.\n"; }
78 my $error = 0; # keep track of input errors (in command line switches).
80 print STDERR
"Option -D required. Must be a valid database name.\n";
84 if (!$opt_d) { $opt_d="PO"; } # the database name that Dbxrefs should refer to
85 print STDERR
"Default for -d: $opt_d (specifies the database names for Dbxref objects)\n";
88 if (!$opt_n) {$opt_n = "plant_structure"; }
89 print STDERR
"Default for -n $opt_n (specifies the ontology name for CV objects)\n";
91 if ($opt_n eq 'plant_structure' ) { $aspect = "A";}
92 elsif ($opt_n eq 'plant_growth_and_development_stage') { $aspect = "G"; }
97 print STDERR
"A file is required as a command line argument.\n";
102 die "Some required command lines parameters not set. Aborting.\n" if $error;
105 open (OUT
, ">$opt_o") ||die "can't open error file $file for writting.\n" ;
108 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
113 print STDERR
"Connected to database $dbname on host $dbhost.\n";
114 my @locus_annot= CXGN
::Phenome
::Locus
->get_locus_annotations($dbh, $opt_n);
116 my ($count, $count_ev)= (0 x
2);
117 print STDERR
"Reading annotations from database..\n";
118 foreach my $annot(@locus_annot) {
120 print STDERR
"." if !$opt_v;
121 my $locus= CXGN
::Phenome
::Locus
->new($dbh,$annot->get_locus_id);
122 my $dbxref=CXGN
::Chado
::Dbxref
->new($dbh, $annot->get_dbxref_id);
123 my $dbxref_ev= $annot->get_locus_dbxref_evidence();
125 my $object_id= $locus->get_locus_id();
126 my $symbol = $locus->get_locus_symbol();
127 my $ontology_id= $opt_d. ":" . $dbxref->get_accession();
128 my $ref_object= CXGN
::Chado
::Dbxref
->new($dbh, $dbxref_ev->get_reference_id() );
130 my $ev_object= CXGN
::Chado
::Dbxref
->new($dbh, $dbxref_ev->get_evidence_code_id() );
131 my $r_synonyms= $ev_object->get_cvterm()->get_synonyms();
132 my @synonyms=@
$r_synonyms;
133 my $ev_code= $synonyms[1];
135 #skip if no evidence code provided or if inferred from electronic annotation
136 if (!$ev_code || $ev_code eq 'IEA') {
138 print STDERR
"no evidence code or electronic annotation. Skipping...\n" if $opt_v;
139 }else { print STDERR
"Found annotation for locus $object_id ($symbol) $ontology_id evidence code: $ev_code\n"; }
140 my $db_reference= $ref_object->get_db_name() || warn "!!!No reference found for annotation $ontology_id locus $object_id ($symbol)\n";
141 if ($db_reference eq 'SGN_ref') {
142 $db_reference .= ":" . $ref_object->get_publication()->get_pub_id();
143 }elsif ($db_reference) {
144 $db_reference .= ":" . $ref_object->get_accession();
145 } else { $db_reference = undef; }
148 my $ev_with_object= CXGN
::Chado
::Dbxref
->new($dbh, $dbxref_ev->get_evidence_with() );
149 my $ev_with_db= $ev_with_object->get_db_name();
150 if ($ev_with_db eq 'DB:GenBank_GI') { $ev_with_db = "NCBI_gi:";} #the db abbreviation in PO/GO
151 my $ev_with= $ev_with_db . $ev_with_object->get_accession();
152 if ($ev_with eq ':') {$ev_with = undef;}
153 my $object_name = $locus->get_locus_name();
154 my @locus_synonyms= $locus->get_locus_aliases(); #an array of LocusSynonym objects..
156 foreach my $ls(@locus_synonyms) {
157 my $alias= $ls->get_locus_alias();
158 $locus_s .=$alias ."|";
160 chop $locus_s; #remove last "|"
162 my $object_type = "gene";
163 my $organism = CXGN
::Chado
::Organism
->new_with_common_name($dbh, $locus->get_common_name() );
164 my $taxon= "taxon:" . $organism->get_genbank_taxon_id();
165 my $date= $annot->get_modification_date();
166 $date = $annot->get_create_date() if (!$date) ;
167 if (!$date) { warn "!!!No date found for annotation $ontology_id locus $object_id ($symbol)\n" ; }
168 $date = substr $date, 0, 10;
171 print OUT
"SGN_gene\t$object_id\t$symbol\t\t$ontology_id\t$db_reference\t$ev_code\t$ev_with\t$aspect\t$object_name\t$locus_s\t$object_type\t$taxon\t$date\tSGN\n";
174 print STDERR
"Found $count annotations for SGN loci, printed $count_ev into out file $file... Done.\n";
176 my @pheno_annot= CXGN
::Phenome
::Individual
->get_individual_annotations($dbh, $opt_n);
178 my ($count, $count_ev)= (0 x
2);
179 print STDERR
"Reading annotations from SGN individual database..\n";
180 foreach my $annot(@pheno_annot) {
182 print STDERR
"." if !$opt_v;
183 my $ind= CXGN
::Phenome
::Individual
->new($dbh,$annot->get_individual_id);
184 my $dbxref=CXGN
::Chado
::Dbxref
->new($dbh, $annot->get_dbxref_id);
185 my $dbxref_ev= $annot->get_individual_dbxref_evidence();
187 my $object_id= $ind->get_individual_id();
188 my $symbol = $ind->get_name();
189 my $object_name= $ind->get_description();
190 my $ontology_id= $opt_d. ":" . $dbxref->get_accession();
191 my $ref_object= CXGN
::Chado
::Dbxref
->new($dbh, $dbxref_ev->get_reference_id() );
193 my $ev_object= CXGN
::Chado
::Dbxref
->new($dbh, $dbxref_ev->get_evidence_code_id() );
194 my $r_synonyms= $ev_object->get_cvterm()->get_synonyms();
195 my @synonyms=@
$r_synonyms;
196 my $ev_code= $synonyms[1];
198 #skip if no evidence code provided or if inferred from electronic annotation
199 if (!$ev_code || $ev_code eq 'IEA') {
201 print STDERR
"no evidence code or electronic annotation. Skipping...\n" if $opt_v;
202 }else { print STDERR
"Found annotation for individual $object_id ($symbol) $ontology_id evidence code: $ev_code\n"; }
203 my $db_reference= $ref_object->get_db_name() || warn "!!!No reference found for annotation $ontology_id individual $object_id ($symbol)\n";
204 if ($db_reference eq 'SGN_ref') {
205 $db_reference .= ":" . $ref_object->get_publication()->get_pub_id();
206 }elsif ($db_reference) {
207 $db_reference .= ":" . $ref_object->get_accession();
208 } else { $db_reference = undef; }
211 my $ev_with_object= CXGN
::Chado
::Dbxref
->new($dbh, $dbxref_ev->get_evidence_with() );
212 my $ev_with_db= $ev_with_object->get_db_name();
213 if ($ev_with_db eq 'DB:GenBank_GI') { $ev_with_db = "NCBI_gi:";} #the db abbreviation in PO/GO
214 my $ev_with= $ev_with_db . $ev_with_object->get_accession();
215 if ($ev_with eq ':') {$ev_with = undef;}
219 my $object_type = "phenotype";
220 my $organism = CXGN
::Chado
::Organism
->new_with_common_name($dbh, $ind->get_common_name() );
221 my $taxon= "taxon:" . $organism->get_genbank_taxon_id();
222 my $date= $annot->get_modification_date();
223 $date = $annot->get_create_date() if (!$date) ;
224 if (!$date) { warn "!!!No date found for annotation $ontology_id individual $object_id ($symbol)\n" ; }
225 $date = substr $date, 0, 10;
228 print OUT
"SGN_phenotype\t$object_id\t$symbol\t\t$ontology_id\t$db_reference\t$ev_code\t$ev_with\t$aspect\t$object_name\t$ind_s\t$object_type\t$taxon\t$date\tSGN\n";
233 print STDERR
"Found $count annotations for SGN individuals, printed $count_ev into out file $file... Done.\n";