parse plots correctly
[phenome.git] / bin / create-loci-blastdb.pl
blob214fc89c8373a40992af587037e6f9723bf876c8
1 #!/usr/bin/perl -w
2 use strict;
3 use Getopt::Long;
4 use CXGN::Phenome::Locus;
5 use CXGN::Transcript::Unigene;
6 use Bio::Chado::Schema;
7 use SGN::View::Feature;
8 use File::Slurp ;
10 my ( $help, $dbname, $dbhost, $outfile );
12 #GetOptions(
14 # 'h' => \$help,
15 # 'dbname=s' => \$dbname,
16 # 'dbhost=s' => \$dbhost,
17 # 'o=s' => \$outfile,
18 # );
20 if ($help) {
21 print <<EOF;
23 Program to create a FASTA file with loci-related sequnces.
24 Currently from SGN unigenes and GenBank accessions (stored in public.feature).
25 FASTA .seq file should be saved in the ftp site where a nightly cron job runs formatdb
26 on updated files.
28 Usage:
30 -o <output filename> - output FASTA file.
31 -dbhost <server hostname> - host running the database to be queried
32 -dbname - name of the database (sandbox, cxgn...)
34 -t <title> - BLAST database title for formatdb
36 EOF
37 exit -1;
40 #print "enter your password\n";
42 #my $pass= <STDIN>;
43 #chomp $pass;
45 my $dbh = CXGN::DB::Connection->new();
46 $dbh->add_search_path(qw/ sgn phenome /);
48 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh } , { on_connect_do => ['SET search_path TO public, phenome;'] } );
50 my $filename = "/data/prod/ftpsite/loci/loci_sequences.fasta";
52 write_file( $filename, '');
53 # or die "Can't open output file ($!)";
54 my @lines ;
56 my $loci_query = "SELECT locus_id FROM phenome.locus WHERE obsolete = 'f' ";
57 my $sth = $dbh->prepare($loci_query);
58 $sth->execute();
59 while ( my ($locus_id) = $sth->fetchrow_array() ) {
60 my $locus = CXGN::Phenome::Locus->new( $dbh, $locus_id );
61 my $common_name = $locus->get_common_name();
62 my @unigenes = $locus->get_unigenes( { full=>1, current=>1} );
63 #get the sequences of the unigenes
64 foreach my $unigene_obj (@unigenes) {
65 my $sgn_id = $unigene_obj->get_sgn_id();
66 my $unigene_seq = $unigene_obj->get_sequence();
67 my $header = $common_name . "_SGNlocusID_" . $locus_id . "_" . $sgn_id;
68 if ( $unigene_seq && length($unigene_seq) < 20000 ) {
69 write_file( $filename, {append => 1 }, ">$header\n$unigene_seq\n" );
70 $| = 1;
71 #push @lines , ">$header\n$unigene_seq\n";
74 #get the sequences of the linked genbank accessions
75 my @locus_dbxrefs = $locus->get_dbxrefs();
76 foreach my $dbxref (@locus_dbxrefs) {
78 eval {
79 my $feature = $dbxref->get_feature();
80 my $accession = $feature->get_uniquename();
81 my $seq = $feature->get_residues();
82 my $length = $feature->get_seqlen();
83 if ( $seq && ( length($seq) < 20000 ) ) {
84 my $header =
85 $common_name . "_SGNlocusID_" . $locus_id . "_" . $accession;
86 write_file( $filename, {append => 1 }, ">$header\n$seq\n" );
87 #push @lines , ">$header\n$seq\n";
90 if ($@) { print $@; }
93 #get the ITAG gene model sequences
94 my $genome_locus = $locus->get_genome_locus;
95 if ($genome_locus) {
96 my ($feature) = $schema->resultset("Sequence::Feature")->search(
98 'me.name' => { 'like' => $genome_locus . '%' },
99 'type.name'=> 'mRNA',
101 { prefetch => 'type' },
103 if ($feature) {
104 my $header = $common_name . "_SGNlocusID_" . $locus_id . "_" . $genome_locus;
105 #for my $seq_ref ( SGN::View::Feature::mrna_cds_protein_sequence($feature) ) {
106 #my $mrna_seq = $seq_ref->[0];
107 my $mrna_seq = $feature->residues;
108 write_file( $filename, {append => 1 }, ">$header\n$mrna_seq\n" );
109 # push @lines , ">$header\n$mrna_seq\n";
114 #close OF;
116 #system("formatdb -p F -i ${output_fname}.seq -n $output_fname -t \"$title\"");