8 Usage: perl dump_sgn_loci.pl -H dbhost -D dbname -o outfile [-n common_name] [-v]
16 hostname for database [required]
20 database name [required]
28 optional- common_name. Limit results to one organism (e.g. tomato)
38 The script dumps sgn loci into a tab delimited file
39 common_name locus_id locus-name locus_symbol list of genbank sequence annotations | list of SGN-unigene ids (genbank|SGN-U)
44 Naama Menda <nm249@cornell.edu>
46 =head1 VERSION AND DATE
48 Version 0.1, March 2009.
58 use CXGN
::Phenome
::Locus
;
59 use CXGN
::Chado
::Organism
;
61 use CXGN
::DB
::InsertDBH
;
62 use CXGN
::Chado
::Dbxref
;
64 #use CXGN::Chado::Cvterm;
65 #use CXGN::Chado::Ontology;
66 #use CXGN::Chado::Relationship;
68 our ($opt_H, $opt_D, $opt_v, $opt_o, $opt_n);
74 if (!$dbhost && !$dbname) { die "Need -D dbname and -H hostname arguments.\n"; }
76 my $error = 0; # keep track of input errors (in command line switches).
78 print STDERR
"Option -D required. Must be a valid database name.\n";
82 print STDERR
"$opt_D, $opt_H, $opt_n, $opt_o\n";
86 print STDERR
"A file is required as a command line argument.\n";
90 die "Some required command lines parameters not set. Aborting.\n" if $error;
92 open (OUT
, ">$opt_o") ||die "can't open error file $file for writting.\n" ;
95 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
100 print STDERR
"Connected to database $dbname on host $dbhost.\n";
101 my $query = "SELECT locus_id FROM phenome.locus";
102 $query .= " WHERE common_name_id = (SELECT common_name_id FROM sgn.common_name where common_name ilike ?) " if $opt_n;
103 $query .= " ORDER BY locus_id";
104 my $common_name = $opt_n || undef;
106 my $sth=$dbh->prepare($query);
107 $sth->execute($common_name);
108 print OUT
"common_name\tlocus_id\tlocus_name\tlocus_symbol\tSGN-unigenes\tGenBank accessions\n";
109 while (my ($locus_id) = $sth->fetchrow_array()) {
111 my $locus=CXGN
::Phenome
::Locus
->new($dbh, $locus_id) ;
112 my $symbol=$locus->get_locus_symbol();
113 my $name= $locus->get_locus_name();
114 my $common_name = $locus->get_common_name();
115 my @u_objects= $locus->get_unigenes(); #unigene ids
116 my @unigenes = map {'SGN-U' . $_->get_unigene_id() } @u_objects;
117 my $unigene_string= join '|', @unigenes;
118 my @dbxrefs= $locus->get_dbxrefs_by_type('genbank'); #dbxref objects
120 my @gb_accs= map {$_->get_feature()->get_uniquename() } @dbxrefs;
121 my $gb_string = join '|', @gb_accs;
122 print OUT
"$common_name \t $locus_id \t $name \t $symbol \t $unigene_string \t $gb_string \n";
123 print STDERR
"$common_name \t $locus_id \t $name \t $symbol \t $unigene_string \t $gb_string \n";
130 print STDERR
"Found $count loci.\n printed into out file $file... Done.\n";