Fixed issue where ambiguous taxa names got the same internal ID
[bioperl-live.git] / examples / revcom_dir.pl
blob0bfa4b487195eb223cc22873479ed9ce71d53daa
1 #!/usr/bin/perl
3 ################################################################################
4 #11-17-2001
5 #Jianwen Fang (jwfang1999@yahoo.com)
7 #THis program returns reverse complement sequences of all sequences in the current directory
8 #and save them in the same directory, using the same name with extension ".rev"
9 ###############################################################################
12 use strict;
13 use Bio::Seq;
14 use Bio::SeqIO;
16 my @files = ();
17 my $folder = '.';
18 my $inputFormat;
19 my $outputFormat;
20 my $numSeq;
22 #Fasta FASTA format
23 #EMBL EMBL format
24 #GenBank GenBank format
25 #GCG GCG format
26 #raw Raw format (one sequence per line, no ID)
28 my @format = ('Fasta', 'EMBL', 'GenBank', 'GCG', 'Raw');
30 print("\nWhat is the format of the original sequence files?\n");
31 print("type 0 for Fasta; 1 for EMBL; 2 for GenBank; 3 for GCG; 4 for Raw\n");
32 $inputFormat = <STDIN>;
33 chomp ($inputFormat);
35 print("\nWhat is the format of the reverse complement sequence files you want?\n");
36 print("type 0 for Fasta; 1 for EMBL; 2 for GenBank; 3 for GCG; 4 for Raw\n");
37 $outputFormat = <STDIN>;
38 chomp ($outputFormat);
40 unless(opendir(FOLDER, $folder))
42 print "cannot open folder $folder!\n";
43 exit;
46 @files = grep(!/^\.\.?$/, readdir(FOLDER));
48 foreach my $file (@files)
50 if($file =~ /seq/i)
52 getRevcom($file);
53 $numSeq++;
57 print "$numSeq reverse complement sequences have been saved in current directory\n";
58 exit;
60 ############################################################################
61 #subroutine getRevcom take an backward sequence file name(should with .seq extension) as parameter
62 #return its revcom sequence using the same name with the extension replaced with rev
63 ############################################################################
64 sub getRevcom
66 my $seqFile = $_[0];
67 my $in = Bio::SeqIO->new('-file'=>$seqFile, '-format'=>$format[$inputFormat]);
68 my $seq = $in->next_seq();
69 my $revcomSeq = $seq->revcom();
70 my @outSeqFile = split (/\./, $seqFile);
71 pop @outSeqFile;
72 push(@outSeqFile, 'rev');
73 my $outSeqFile = join('.', @outSeqFile);
74 print "$outSeqFile\n";
75 my $out = Bio::SeqIO->new('-file'=>">$outSeqFile", '-format'=>$format[$outputFormat]);
76 $out->write_seq($revcomSeq);