4 use CXGN
::Phenome
::Locus
;
5 use CXGN
::Transcript
::Unigene
;
7 my ( $help, $dbname, $dbhost, $outfile );
12 # 'dbname=s' => \$dbname,
13 # 'dbhost=s' => \$dbhost,
20 Program to create a FASTA file with loci-related sequnces.
21 Currently from SGN unigenes and GenBank accessions (stored in public.feature).
22 FASTA .seq file should be saved in the ftp site where a nightly cron job runs formatdb
28 -o <output filename> - output FASTA file.
29 -dbhost <server hostname> - host running the database to be queried
30 -dbname - name of the database (sandbox, cxgn...)
32 -t <title> - BLAST database title for formatdb
38 #print "enter your password\n";
43 my $dbh = CXGN
::DB
::Connection
->new();
44 $dbh->add_search_path(qw
/ sgn phenome /);
46 open OF
, ">/data/prod/ftpsite/loci/loci_sequences.fasta"
47 or die "Can't open output file ($!)";
49 my $loci_query = "SELECT locus_id FROM phenome.locus WHERE obsolete = 'f' ";
50 my $sth = $dbh->prepare($loci_query);
52 while ( my ($locus_id) = $sth->fetchrow_array() ) {
53 my $locus = CXGN
::Phenome
::Locus
->new( $dbh, $locus_id );
54 my $common_name = $locus->get_common_name();
55 my @unigenes = $locus->get_unigenes( { full
=>1, current
=>1} );
56 foreach my $unigene_obj (@unigenes) {
57 my $sgn_id = $unigene_obj->get_sgn_id();
58 my $unigene_seq = $unigene_obj->get_sequence();
59 my $header = $common_name . "_SGNlocusID_" . $locus_id . "_" . $sgn_id;
60 if ( $unigene_seq && length($unigene_seq) < 20000 ) {
61 print OF
">$header\n$unigene_seq\n";
64 my @locus_dbxrefs = $locus->get_dbxrefs();
65 foreach my $dbxref (@locus_dbxrefs) {
68 my $feature = $dbxref->get_feature();
69 my $accession = $feature->get_uniquename();
70 my $seq = $feature->get_residues();
71 my $length = $feature->get_seqlen();
72 if ( $seq && ( length($seq) < 20000 ) ) {
74 $common_name . "_SGNlocusID_" . $locus_id . "_" . $accession;
75 print OF
">$header\n$seq\n";
85 #system("formatdb -p F -i ${output_fname}.seq -n $output_fname -t \"$title\"");