8 Usage: perl dump_sgn_loci.pl -H dbhost -D dbname -o outfile [-n common_name] [-v]
16 hostname for database [required]
20 database name [required]
28 optional- common_name. Limit results to one organism (e.g. tomato)
38 The script dumps sgn loci into a tab delimited file
39 common_name locus_id locus-name locus_symbol list of genbank sequence annotations | list of SGN-unigene ids (genbank|SGN-U)
44 Naama Menda <nm249@cornell.edu>
46 =head1 VERSION AND DATE
48 Version 0.2, March 2012.
58 use CXGN
::Phenome
::Locus
;
59 use CXGN
::Chado
::Organism
;
61 use CXGN
::DB
::InsertDBH
;
62 use CXGN
::Chado
::Dbxref
;
65 use Bio
::Chado
::Schema
;
67 our ($opt_H, $opt_D, $opt_v, $opt_o, $opt_n);
73 if (!$dbhost && !$dbname) { die "Need -D dbname and -H hostname arguments.\n"; }
75 my $error = 0; # keep track of input errors (in command line switches).
77 print STDERR
"Option -D required. Must be a valid database name.\n";
81 print STDERR
"$opt_D, $opt_H, $opt_n, $opt_o\n";
85 print STDERR
"A file is required as a command line argument.\n";
89 die "Some required command lines parameters not set. Aborting.\n" if $error;
91 open (OUT
, ">$opt_o") ||die "can't open error file $file for writting.\n" ;
94 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
99 print STDERR
"Connected to database $dbname on host $dbhost.\n";
100 my $query = "SELECT locus_id FROM phenome.locus";
101 $query .= " WHERE common_name_id = (SELECT common_name_id FROM sgn.common_name where common_name ilike ?) " if $opt_n;
102 $query .= " ORDER BY locus_id";
103 my $common_name = $opt_n || undef;
105 my $sth=$dbh->prepare($query);
106 $sth->execute($common_name);
107 print OUT
"common_name\tlocus_id\tlocus_name\tlocus_symbol\tgene_model\tSGN-unigenes\tGenBank accessions\t\tGO_annotations\tPO_annotations\tSP_annotations\n";
108 while (my ($locus_id) = $sth->fetchrow_array()) {
110 my $locus=CXGN
::Phenome
::Locus
->new($dbh, $locus_id) ;
111 my $symbol=$locus->get_locus_symbol();
112 my $name= $locus->get_locus_name();
113 my $common_name = $locus->get_common_name();
114 my @u_objects= $locus->get_unigenes( {current
=>1} ); #unigene ids
115 my @unigenes = map {'SGN-U' . $_->get_unigene_id() } @u_objects;
116 my $unigene_string= join '|', @unigenes;
117 my %dbxrefs= $locus->get_dbxref_lists(); #dbxref objects
119 my @gb_accs= map {$_->[0]->get_feature->get_uniquename() } @
{ $dbxrefs{'DB:GenBank_GI'} };
120 my $gb_string = join '|', @gb_accs;
121 my $genome_locus = $locus->get_genome_locus;
122 my @go = map { 'GO:' . $_->[0]->get_accession } @
{ $dbxrefs{'GO'} };
123 my $go_string = join '|' , @go;
124 my @po = map {'PO:' . $_->[0]->get_accession } @
{ $dbxrefs{'PO'} };
125 my $po_string = join '|' , @po;
126 my @sp = map {'SP:' . $_->[0]->get_accession } @
{ $dbxrefs{'SP'} };
127 my $sp_string = join '|' , @sp;
129 print OUT
"$common_name\t$locus_id\t$name\t$symbol\t$genome_locus\t$unigene_string\t$gb_string\t$go_string\t$po_string\t$sp_string \n";
130 print STDERR
"$common_name \t $locus_id \t $name \t $symbol \t $genome_locus\t $unigene_string \t $gb_string \n";
137 print STDERR
"Found $count loci.\n printed into out file $file... Done.\n";