tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / examples / tools / run_genscan.pl
blob1290ffe43be391a02a0567a34f87bf99831dc3c5
1 #!/usr/bin/perl
2 # Brian Osborne
3 # script to run genscan on all nucleotide sequences in a fasta file
4 # and save results as the fasta files <file>.gs.pept and <file>.gs.cds,
5 # and <file>.gs.exons
7 use Bio::SeqIO;
8 use Bio::Seq;
9 use Getopt::Long;
10 use Bio::Tools::Genscan;
11 use strict;
13 # GENSCAN matrix
14 my $matrix = "/home/bosborne/src/genscan/HumanIso.smat";
16 my ($file,$i);
18 GetOptions( "f|file=s" => \$file );
19 usage() if ( !$file );
21 my $pept_out = Bio::SeqIO->new(-file => ">$file.gs.pept",
22 -format => "fasta");
23 my $cds_out = Bio::SeqIO->new(-file => ">$file.gs.cds",
24 -format => "fasta");
25 my $exons_out = Bio::SeqIO->new(-file => ">$file.gs.exons",
26 -format => "fasta");
28 my $in = Bio::SeqIO->new(-file => $file , -format => 'Fasta');
30 while ( my $seq = $in->next_seq() ) {
31 die "Input sequence is protein\n" if ( $seq->alphabet eq 'protein' );
33 # create temp file, input to GENSCAN
34 my $temp_out = Bio::SeqIO->new(-file => ">temp.fa",
35 -format => "fasta");
36 $temp_out->write_seq($seq);
38 my $file_id = $seq->display_id;
39 $file_id =~ s/\|/-/g;
41 system "genscan $matrix temp.fa -cds > $file_id.gs.raw";
42 unlink "temp.fa";
44 my $genscan = Bio::Tools::Genscan->new( -file => "$file_id.gs.raw");
45 while ( my $gene = $genscan->next_prediction() ) {
46 $i++;
47 my $pept = $gene->predicted_protein;
48 my $cds = $gene->predicted_cds;
49 my @exon_arr = $gene->exons;
51 if ( defined $cds ) {
52 my $cds_seq = Bio::Seq->new(-seq => $cds->seq,
53 -display_id => $cds->display_id);
54 $cds_out->write_seq($cds_seq);
57 if ( defined $pept ) {
58 my $pept_seq = Bio::Seq->new(-seq => $pept->seq,
59 -display_id => $pept->display_id);
60 $pept_out->write_seq($pept_seq);
63 for my $exon (@exon_arr) {
64 my $desc = $exon->strand . " " . $exon->start . "-" . $exon->end .
65 " " . $exon->primary_tag . " " . "GENSCAN_predicted_$i";
66 my $exon_seq = Bio::Seq->new(-seq => $seq->subseq($exon->start,
67 $exon->end),
68 -display_id => $seq->display_id,
69 -desc => $desc );
70 $exons_out->write_seq($exon_seq);
73 $genscan->close();
74 unlink "$file_id.gs.raw";
77 sub usage {
78 print "
79 Usage : $0 -f <file>
80 Function : run genscan on all nucleotide sequences in a multiple fasta file
81 Output : <file>.gs.pept, <file>.gs.cds, <file>.gs.exons
84 exit;