adding date prop to experiments
[phenome.git] / bin / dump_sgn_loci.pl
blobd078d93ff4b608def26082bf87f6aa3727ac79cf
2 =head1 NAME
4 dump_sgn_loci.pl
6 =head1 DESCRIPTION
8 Usage: perl dump_sgn_loci.pl -H dbhost -D dbname -o outfile [-n common_name] [-v]
10 parameters
12 =over 5
14 =item -H
16 hostname for database [required]
18 =item -D
20 database name [required]
22 =item -v
24 verbose output
26 =item -n
28 optional- common_name. Limit results to one organism (e.g. tomato)
30 =item -o
32 output file
35 =back
38 The script dumps sgn loci into a tab delimited file
39 common_name locus_id locus-name locus_symbol list of genbank sequence annotations | list of SGN-unigene ids (genbank|SGN-U)
42 =head1 AUTHOR
44 Naama Menda <nm249@cornell.edu>
46 =head1 VERSION AND DATE
48 Version 0.1, March 2009.
50 =cut
53 #! /usr/bin/perl
54 use strict;
56 use Getopt::Std;
58 use CXGN::Phenome::Locus;
59 use CXGN::Chado::Organism;
61 use CXGN::DB::InsertDBH;
62 use CXGN::Chado::Dbxref;
64 #use CXGN::Chado::Cvterm;
65 #use CXGN::Chado::Ontology;
66 #use CXGN::Chado::Relationship;
68 our ($opt_H, $opt_D, $opt_v, $opt_o, $opt_n);
70 getopts('D:H:n:o:f');
71 my $dbhost = $opt_H;
72 my $dbname = $opt_D;
74 if (!$dbhost && !$dbname) { die "Need -D dbname and -H hostname arguments.\n"; }
76 my $error = 0; # keep track of input errors (in command line switches).
77 if (!$opt_D) {
78 print STDERR "Option -D required. Must be a valid database name.\n";
79 $error=1;
82 print STDERR "$opt_D, $opt_H, $opt_n, $opt_o\n";
83 my $file = $opt_o;
85 if (!$file) {
86 print STDERR "A file is required as a command line argument.\n";
87 $error=1;
90 die "Some required command lines parameters not set. Aborting.\n" if $error;
92 open (OUT, ">$opt_o") ||die "can't open error file $file for writting.\n" ;
95 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
96 dbname=>$dbname,
97 } );
100 print STDERR "Connected to database $dbname on host $dbhost.\n";
101 my $query = "SELECT locus_id FROM phenome.locus";
102 $query .= " WHERE common_name_id = (SELECT common_name_id FROM sgn.common_name where common_name ilike ?) " if $opt_n;
103 $query .= " ORDER BY locus_id";
104 my $common_name = $opt_n || undef;
105 my $count=0;
106 my $sth=$dbh->prepare($query);
107 $sth->execute($common_name);
108 print OUT "common_name\tlocus_id\tlocus_name\tlocus_symbol\tSGN-unigenes\tGenBank accessions\n";
109 while (my ($locus_id) = $sth->fetchrow_array()) {
110 my $count++;
111 my $locus=CXGN::Phenome::Locus->new($dbh, $locus_id) ;
112 my $symbol=$locus->get_locus_symbol();
113 my $name= $locus->get_locus_name();
114 my $common_name = $locus->get_common_name();
115 my @u_objects= $locus->get_unigenes(); #unigene ids
116 my @unigenes = map {'SGN-U' . $_->get_unigene_id() } @u_objects;
117 my $unigene_string= join '|', @unigenes;
118 my @dbxrefs= $locus->get_dbxrefs_by_type('genbank'); #dbxref objects
120 my @gb_accs= map {$_->get_feature()->get_uniquename() } @dbxrefs;
121 my $gb_string = join '|', @gb_accs;
122 print OUT "$common_name \t $locus_id \t $name \t $symbol \t $unigene_string \t $gb_string \n";
123 print STDERR "$common_name \t $locus_id \t $name \t $symbol \t $unigene_string \t $gb_string \n";
128 close OUT;
130 print STDERR "Found $count loci.\n printed into out file $file... Done.\n";