adding disconnect for dbh without autocommit
[phenome.git] / bin / create-loci-blastdb.pl
blobc60f3c8578b4b0885bf3bda3cee3c5c82eae5353
1 #!/usr/bin/perl -w
2 use strict;
3 use Getopt::Long;
4 use CXGN::Phenome::Locus;
5 use CXGN::Transcript::Unigene;
7 my ( $help, $dbname, $dbhost, $outfile );
9 #GetOptions(
11 # 'h' => \$help,
12 # 'dbname=s' => \$dbname,
13 # 'dbhost=s' => \$dbhost,
14 # 'o=s' => \$outfile,
15 # );
17 if ($help) {
18 print <<EOF;
20 Program to create a FASTA file with loci-related sequnces.
21 Currently from SGN unigenes and GenBank accessions (stored in public.feature).
22 FASTA .seq file should be saved in the ftp site where a nightly cron job runs formatdb
23 on updated files.
25 Usage:
28 -o <output filename> - output FASTA file.
29 -dbhost <server hostname> - host running the database to be queried
30 -dbname - name of the database (sandbox, cxgn...)
32 -t <title> - BLAST database title for formatdb
34 EOF
35 exit -1;
38 #print "enter your password\n";
40 #my $pass= <STDIN>;
41 #chomp $pass;
43 my $dbh = CXGN::DB::Connection->new();
44 $dbh->add_search_path(qw/ sgn phenome /);
46 open OF, ">/data/prod/ftpsite/loci/loci_sequences.fasta"
47 or die "Can't open output file ($!)";
49 my $loci_query = "SELECT locus_id FROM phenome.locus WHERE obsolete = 'f' ";
50 my $sth = $dbh->prepare($loci_query);
51 $sth->execute();
52 while ( my ($locus_id) = $sth->fetchrow_array() ) {
53 my $locus = CXGN::Phenome::Locus->new( $dbh, $locus_id );
54 my $common_name = $locus->get_common_name();
55 my @unigenes = $locus->get_unigenes( { full=>1, current=>1} );
56 foreach my $unigene_obj (@unigenes) {
57 my $sgn_id = $unigene_obj->get_sgn_id();
58 my $unigene_seq = $unigene_obj->get_sequence();
59 my $header = $common_name . "_SGNlocusID_" . $locus_id . "_" . $sgn_id;
60 if ( $unigene_seq && length($unigene_seq) < 20000 ) {
61 print OF ">$header\n$unigene_seq\n";
64 my @locus_dbxrefs = $locus->get_dbxrefs();
65 foreach my $dbxref (@locus_dbxrefs) {
67 eval {
68 my $feature = $dbxref->get_feature();
69 my $accession = $feature->get_uniquename();
70 my $seq = $feature->get_residues();
71 my $length = $feature->get_seqlen();
72 if ( $seq && ( length($seq) < 20000 ) ) {
73 my $header =
74 $common_name . "_SGNlocusID_" . $locus_id . "_" . $accession;
75 print OF ">$header\n$seq\n";
78 if ($@) { print $@; }
83 close OF;
85 #system("formatdb -p F -i ${output_fname}.seq -n $output_fname -t \"$title\"");