3 # script to run genscan on all nucleotide sequences in a fasta file
4 # and save results as the fasta files <file>.gs.pept and <file>.gs.cds,
10 use Bio
::Tools
::Genscan
;
14 my $matrix = "/home/bosborne/src/genscan/HumanIso.smat";
18 GetOptions
( "f|file=s" => \
$file );
19 usage
() if ( !$file );
21 my $pept_out = Bio
::SeqIO
->new(-file
=> ">$file.gs.pept",
23 my $cds_out = Bio
::SeqIO
->new(-file
=> ">$file.gs.cds",
25 my $exons_out = Bio
::SeqIO
->new(-file
=> ">$file.gs.exons",
28 my $in = Bio
::SeqIO
->new(-file
=> $file , -format
=> 'Fasta');
30 while ( my $seq = $in->next_seq() ) {
31 die "Input sequence is protein\n" if ( $seq->alphabet eq 'protein' );
33 # create temp file, input to GENSCAN
34 my $temp_out = Bio
::SeqIO
->new(-file
=> ">temp.fa",
36 $temp_out->write_seq($seq);
38 my $file_id = $seq->display_id;
41 system "genscan $matrix temp.fa -cds > $file_id.gs.raw";
44 my $genscan = Bio
::Tools
::Genscan
->new( -file
=> "$file_id.gs.raw");
45 while ( my $gene = $genscan->next_prediction() ) {
47 my $pept = $gene->predicted_protein;
48 my $cds = $gene->predicted_cds;
49 my @exon_arr = $gene->exons;
52 my $cds_seq = Bio
::Seq
->new(-seq
=> $cds->seq,
53 -display_id
=> $cds->display_id);
54 $cds_out->write_seq($cds_seq);
57 if ( defined $pept ) {
58 my $pept_seq = Bio
::Seq
->new(-seq
=> $pept->seq,
59 -display_id
=> $pept->display_id);
60 $pept_out->write_seq($pept_seq);
63 for my $exon (@exon_arr) {
64 my $desc = $exon->strand . " " . $exon->start . "-" . $exon->end .
65 " " . $exon->primary_tag . " " . "GENSCAN_predicted_$i";
66 my $exon_seq = Bio
::Seq
->new(-seq
=> $seq->subseq($exon->start,
68 -display_id
=> $seq->display_id,
70 $exons_out->write_seq($exon_seq);
74 unlink "$file_id.gs.raw";
80 Function : run genscan on all nucleotide sequences in a multiple fasta file
81 Output : <file>.gs.pept, <file>.gs.cds, <file>.gs.exons