XML::Parser::PerlSAX required for these tests
[bioperl-live.git] / examples / make_primers.pl
blob1fec90888994fe8408405aa2041c3faaae8e0873
1 #!/usr/bin/perl -w
2 # Author: cckim@stanford.edu
4 # Description: This program designs primers for constructing knockouts
5 # of genes by transformation of PCR products (ref: Datsenko & Wanner,
6 # PNAS 2000). A tab-delimited file containing ORF START STOP is read,
7 # and primers flanking the start & stop coordinates are designed based
8 # on the user-designated sequence file. In addition, primers flanking
9 # the knockout regions are chosen for PCR screening purposes once the
10 # knockout is generated. The script uses Bioperl in order to
11 # determine the primer sequences, which requires getting subsequences
12 # and reverse complementing some of the objects.
14 # make_primers.pl
15 # Purpose: Design primers for the Wanner method of PCR product-based knockouts
16 # Input: FASTA sequence file, tab-delimited coordinates file
17 # Output: Primer output file
18 # July 4, 2001
19 # Charles C. Kim
21 ###########
22 # MODULES #
23 ###########
24 use Bio::Seq;
25 use Getopt::Std;
27 #############
28 # VARIABLES #
29 #############
30 $upgap = 0; # the number of nt upstream of the 5' end to include in the deletion
31 $downgap = 0; # the number of nucleotides downstream of the 3' end to include
32 # in the deletion
33 $oligolength = 40; # the length of the homologous region on each primer
34 $seqfile = ''; # don't specify these filenames unless you want to run
35 $coordfile = ''; # the program on these filenames exclusively
36 $outfile = ''; #
37 %fiveprime_primers = (
38 "P1" => "GTGTAGGCTGGAGCTGCTTC",
40 %threeprime_primers = (
41 "P2" => "CATATGAATATCCTCCTTAG",
42 "P4" => "ATTCCGGGGATCCGTCGACC",
45 #########
46 # FILES #
47 #########
48 getopts('s:c:o:'); # sequence file, coordinates file, output file
50 $seqfile = $opt_s if $opt_s;
51 $coordfile = $opt_c if $opt_c;
52 $outfile = $opt_o if $opt_o;
54 &open_readfile(*SEQFILE, 'sequence', $seqfile);
55 &open_readfile(*COORDFILE, 'coordinate', $coordfile);
56 &open_writefile(*PRIMERFILE, 'output', $outfile);
58 ########
59 # MAIN #
60 ########
62 $seq = '';
63 $count = 0;
64 while (<SEQFILE>) {
65 if (/>/) {
66 $count++;
67 if ($count > 1) {
68 die "More than one sequence present in the input file\n";
70 next;
72 chomp($_);
73 $_ =~ tr/gatc/GATC/;
74 $seq .= $_;
76 close SEQFILE;
78 $seq = Bio::Seq-> new('-seq'=>$seq );
80 while (<COORDFILE>) {
81 chomp($_);
82 next if !$_;
83 (my $name, my $start, my $stop) = split(/\t/, $_);
84 if ($start < $stop) {
85 $upprimer = $seq->subseq($start-$oligolength-$upgap, $start-1-$upgap);
86 $downprimer = $seq->subseq($stop+1+$downgap,$stop+$oligolength+$downgap);
87 $downprimer = Bio::Seq->new('-seq'=>$downprimer);
88 $downprimer = $downprimer->revcom();
89 $downprimer = $downprimer->seq();
90 $uppcr = $seq->subseq($start-$oligolength-$upgap-20,$start-1-$upgap-$oligolength);
91 $downpcr = $seq->subseq($stop+1+$downgap+$oligolength,$stop+$oligolength+$downgap+20);
92 $downpcr = Bio::Seq->new('-seq'=>$downpcr);
93 $downpcr = $downpcr->revcom();
94 $downpcr = $downpcr->seq();
96 elsif ($start > $stop) {
97 $upprimer = $seq->subseq($start+$upgap+1,$start+$oligolength+$upgap);
98 $downprimer = $seq->subseq($stop-$oligolength-$downgap, $stop-1-$downgap);
99 $upprimer = Bio::Seq->new('-seq'=>$upprimer);
100 $upprimer = $upprimer->revcom();
101 $upprimer = $upprimer->seq();
102 $uppcr = $seq->subseq($start+$oligolength+$upgap+1,$start+$oligolength+$upgap+20);
103 $downpcr = $seq->subseq($stop-$oligolength-$downgap-20,$stop-1-$downgap-$oligolength);
104 $uppcr = Bio::Seq->new('-seq'=>$uppcr);
105 $uppcr = $uppcr->revcom();
106 $uppcr = $uppcr->seq();
108 else { die "Problem with start and stop coordinates\n"; }
109 print PRIMERFILE "$name\n";
110 print PRIMERFILE "5'pcr\t$uppcr\n";
111 print PRIMERFILE "3'pcr\t$downpcr\n";
112 print PRIMERFILE "\tExpected wildtype product size: ",abs($start-$stop)+121," bp\n";
113 foreach $entry (sort keys %fiveprime_primers) {
114 print PRIMERFILE "5'+$entry\t$upprimer$fiveprime_primers{$entry}\n";
116 foreach $entry (sort keys %threeprime_primers) {
117 print PRIMERFILE "3'+$entry\t$downprimer$threeprime_primers{$entry}\n";
119 print PRIMERFILE "\n";
120 $upprimer = '';
121 $downprimer = '';
122 $uppcr = '';
123 $downpcr = '';
127 ###############
128 # SUBROUTINES #
129 ###############
131 sub open_readfile {
132 my $filehandle = $_[0];
133 my $filetype = $_[1] if $_[1];
134 my $filename = $_[2] if $_[2];
135 unless ($filename) {
136 print "Enter $filetype filename: ";
137 chomp ($filename=<STDIN>);
139 unless (-e $filename) { die "$filename not found\n"; }
140 open($filehandle,$filename) or die "Couldn't open $filename\n";
141 $filehandle = '';
142 $filetype = '';
143 $filename = '';
146 sub open_writefile {
147 my $filehandle = $_[0];
148 my $filetype = $_[1] if $_[1];
149 my $filename = $_[2] if $_[2];
150 unless ($filename) {
151 print "Enter $filetype filename: ";
152 chomp ($filename=<STDIN>);
154 if (-e $filename) {
155 print "$filename already exists! Overwrite (Y/N)? ";
156 chomp ($_ = <STDIN>);
157 while (/[^yn]/i) {
158 print 'Y or N, please: ';
159 chomp ($_ = <STDIN>);
161 if (/n/i) { die "$filename not overwritten.\n"; }
162 else { open($filehandle, ">$filename") or die "Couldn't open $filename\n"; }
164 else { open($filehandle, ">$filename") or die "Couldn't open $filename\n"; }
165 $filehandle = '';
166 $filetype = '';
167 $filename = '';