[bug 2637]
[bioperl-live.git] / examples / make_primers.pl
blob31e92e37e997443bff44136c97e9fe5b9377bc8d
1 #!/usr/bin/perl -w
2 # $Id$
3 # Author: cckim@stanford.edu
5 # Description: This program designs primers for constructing knockouts
6 # of genes by transformation of PCR products (ref: Datsenko & Wanner,
7 # PNAS 2000). A tab-delimtied file containing ORF START STOP is read,
8 # and primers flanking the start & stop coordinates are designed based
9 # on the user-designated sequence file. In addition, primers flanking
10 # the knockout regions are chosen for PCR screening purposes once the
11 # knockout is generated. The script uses Bioperl in order to
12 # determine the primer sequences, which requires getting subsequences
13 # and reverse complementing some of the objects.
15 # make_primers.pl
16 # Purpose: Design primers for the Wanner method of PCR product-based knockouts
17 # Input: FASTA sequence file, tab-delimited coordinates file
18 # Output: Primer output file
19 # July 4, 2001
20 # Charles C. Kim
22 ###########
23 # MODULES #
24 ###########
25 use Bio::Seq;
26 use Getopt::Std;
28 #############
29 # VARIABLES #
30 #############
31 $upgap = 0; # the number of nt upstream of the 5' end to include in the deletion
32 $downgap = 0; # the number of nucleotides downstream of the 3' end to include
33 # in the deletion
34 $oligolength = 40; # the length of the homologous region on each primer
35 $seqfile = ''; # don't specify these filenames unless you want to run
36 $coordfile = ''; # the program on these filenames exclusively
37 $outfile = ''; #
38 %fiveprime_primers = (
39 "P1" => "GTGTAGGCTGGAGCTGCTTC",
41 %threeprime_primers = (
42 "P2" => "CATATGAATATCCTCCTTAG",
43 "P4" => "ATTCCGGGGATCCGTCGACC",
46 #########
47 # FILES #
48 #########
49 getopts('s:c:o:'); # sequence file, coordinates file, output file
51 $seqfile = $opt_s if $opt_s;
52 $coordfile = $opt_c if $opt_c;
53 $outfile = $opt_o if $opt_o;
55 &open_readfile(*SEQFILE, 'sequence', $seqfile);
56 &open_readfile(*COORDFILE, 'coordinate', $coordfile);
57 &open_writefile(*PRIMERFILE, 'output', $outfile);
59 ########
60 # MAIN #
61 ########
63 $seq = '';
64 $count = 0;
65 while (<SEQFILE>) {
66 if (/>/) {
67 $count++;
68 if ($count > 1) {
69 die "More than one sequence present in the input file\n";
71 next;
73 chomp($_);
74 $_ =~ tr/gatc/GATC/;
75 $seq .= $_;
77 close SEQFILE;
79 $seq = Bio::Seq-> new('-seq'=>$seq );
81 while (<COORDFILE>) {
82 chomp($_);
83 next if !$_;
84 (my $name, my $start, my $stop) = split(/\t/, $_);
85 if ($start < $stop) {
86 $upprimer = $seq->subseq($start-$oligolength-$upgap, $start-1-$upgap);
87 $downprimer = $seq->subseq($stop+1+$downgap,$stop+$oligolength+$downgap);
88 $downprimer = Bio::Seq->new('-seq'=>$downprimer);
89 $downprimer = $downprimer->revcom();
90 $downprimer = $downprimer->seq();
91 $uppcr = $seq->subseq($start-$oligolength-$upgap-20,$start-1-$upgap-$oligolength);
92 $downpcr = $seq->subseq($stop+1+$downgap+$oligolength,$stop+$oligolength+$downgap+20);
93 $downpcr = Bio::Seq->new('-seq'=>$downpcr);
94 $downpcr = $downpcr->revcom();
95 $downpcr = $downpcr->seq();
97 elsif ($start > $stop) {
98 $upprimer = $seq->subseq($start+$upgap+1,$start+$oligolength+$upgap);
99 $downprimer = $seq->subseq($stop-$oligolength-$downgap, $stop-1-$downgap);
100 $upprimer = Bio::Seq->new('-seq'=>$upprimer);
101 $upprimer = $upprimer->revcom();
102 $upprimer = $upprimer->seq();
103 $uppcr = $seq->subseq($start+$oligolength+$upgap+1,$start+$oligolength+$upgap+20);
104 $downpcr = $seq->subseq($stop-$oligolength-$downgap-20,$stop-1-$downgap-$oligolength);
105 $uppcr = Bio::Seq->new('-seq'=>$uppcr);
106 $uppcr = $uppcr->revcom();
107 $uppcr = $uppcr->seq();
109 else { die "Problem with start and stop coordinates\n"; }
110 print PRIMERFILE "$name\n";
111 print PRIMERFILE "5'pcr\t$uppcr\n";
112 print PRIMERFILE "3'pcr\t$downpcr\n";
113 print PRIMERFILE "\tExpected wildtype product size: ",abs($start-$stop)+121," bp\n";
114 foreach $entry (sort keys %fiveprime_primers) {
115 print PRIMERFILE "5'+$entry\t$upprimer$fiveprime_primers{$entry}\n";
117 foreach $entry (sort keys %threeprime_primers) {
118 print PRIMERFILE "3'+$entry\t$downprimer$threeprime_primers{$entry}\n";
120 print PRIMERFILE "\n";
121 $upprimer = '';
122 $downprimer = '';
123 $uppcr = '';
124 $downpcr = '';
128 ###############
129 # SUBROUTINES #
130 ###############
132 sub open_readfile {
133 my $filehandle = $_[0];
134 my $filetype = $_[1] if $_[1];
135 my $filename = $_[2] if $_[2];
136 unless ($filename) {
137 print "Enter $filetype filename: ";
138 chomp ($filename=<STDIN>);
140 unless (-e $filename) { die "$filename not found\n"; }
141 open($filehandle,$filename) or die "Couldn't open $filename\n";
142 $filehandle = '';
143 $filetype = '';
144 $filename = '';
147 sub open_writefile {
148 my $filehandle = $_[0];
149 my $filetype = $_[1] if $_[1];
150 my $filename = $_[2] if $_[2];
151 unless ($filename) {
152 print "Enter $filetype filename: ";
153 chomp ($filename=<STDIN>);
155 if (-e $filename) {
156 print "$filename already exists! Overwrite (Y/N)? ";
157 chomp ($_ = <STDIN>);
158 while (/[^yn]/i) {
159 print 'Y or N, please: ';
160 chomp ($_ = <STDIN>);
162 if (/n/i) { die "$filename not overwritten.\n"; }
163 else { open($filehandle, ">$filename") or die "Couldn't open $filename\n"; }
165 else { open($filehandle, ">$filename") or die "Couldn't open $filename\n"; }
166 $filehandle = '';
167 $filetype = '';
168 $filename = '';