squash warning
[bioperl-live.git] / examples / subsequence.cgi
blob2851b5ee71598eb3a3cba8770f60e25024c6888b
1 #!/usr/bin/perl
3 # see http://zfish.nichd.nih.gov/tools/subsequence.cgi
5 # uncomment and modify the next two lines
6 # if your perl is in a nonstandard directory
7 #use lib '/disk3/local/lib/perl5/site_perl';
8 #use lib '/disk3/local/lib/perl5/';
10 use CGI qw/:standard :html3/;
11 use Bio::DB::GenBank;
12 use File::Temp;
13 use FileHandle;
15 print header,
16 start_html(-title => 'find subsequence of large GenBank entries',-author => 'Jonathan_Epstein\@nih.gov');
17 print_form() unless param;
18 print_results() if param;
20 sub print_results {
21 $gb = new Bio::DB::GenBank;
22 $accession = param('accession');
23 eval {
24 $seq = $gb->get_Seq_by_acc($accession); # Accession Number
26 if ($@) {
27 print "***ERROR: accession $accession not found***\n";
28 return;
30 $segment_start = param('start');
31 $segment_end = param('length_or_end_value');
32 $segment_end = $segment_start+$segment_end-1 if param('length_or_end_choice') eq 'Length';
33 if ($segment_end<$segment_start || $segment_start<0) {
34 print "***ERROR: invalid segment start and end values:$segment_start,$segment_end***\n";
35 return;
37 $len = $seq->length();
38 if ($segment_end>$len) {
39 print "***ERROR: maximum length $len exceeded***\n";
40 return;
42 $subseq = $seq->subseq ($segment_start,$segment_end);
44 $name = "subsequence of $accession";
45 $strand = "+";
46 $strand = "-" if (param('reverse'));
48 # For some reason, there seems to be a problem if you use the file
49 # handle provided by File::Temp. Similarly, there's a problem if you
50 # pass a filename to BioPerl below rather than a file handle. However,
51 # constructing our own file handle and then passing it to BioPerl works
52 # fine.
53 (undef, $filename) = File::Temp::tempfile();
54 $fh = new FileHandle "> $filename";
55 $seqoutlong = Bio::SeqIO->new( '-format' => 'Fasta',-fh => $fh);
56 $seqobj = Bio::PrimarySeq->new ( -seq => $subseq,
57 -id => $name . "[length:$len]:" . $segment_start . "-" . $segment_end . "(" . $strand . "strand)",
58 -moltype => 'dna'
60 $seqobj = $seqobj->revcom if ($strand ne "+");
61 $seqoutlong->write_seq($seqobj);
62 $fh->close;
63 undef $fh;
65 # Now we parse the FASTA file which was just generated, and perform
66 # some simple conversions to HTML.
67 open (TEMPORARY, "<$filename") or die "unable to open temporary file $filename\n";
68 print "<tt>\n";
69 while (<TEMPORARY>) {
70 print $_;
71 print "<br>\n";
73 close TEMPORARY;
74 print "</tt>\n";
75 unlink $filename;
78 sub print_form {
79 print p("This web page permits you to extract a short subsequence of DNA from a large GenBank entry. This is especially useful in an era of huge \"contigs\" of genomic DNA, where you only want to extract a few hundred base pairs for subsequent analysis.\n");
81 print p,"This program also illustrates the power of ",a({-href => 'http://www.BioPerl.org/'}, "BioPerl"), ", a powerful set of tools for molecular biology analysis. The ", a({-href => 'subsequence.pl.txt'}, "source code"), " for this program is less than 90 lines long.\n";
83 print p,"You must specify the GenBank accession number along with a start position. You may specify either the length of the subsequence you wish to extract or, equivalently, the endpoint.\n";
85 print "The sequence may be reverse-complemented if you wish, e.g., the reverse complement of <font color=green>ATCGC</font> is <font color=yellow>GCGAT</font>.\n";
87 print p,"To test this web page, try accession NT_004002, start 50000, length 400.\n";
89 print start_form,table(
90 Tr(td("Enter your GenBank accession"),td(textfield(-name => 'accession',-size => 20))),
91 Tr(td("Start position"),td(textfield(-name => 'start',-size => 10))),
92 Tr(td("Specify length or end position"), td(radio_group (-name => 'length_or_end_choice',-values => [Length, End], default => Length))),
93 Tr(td("Length or end position"), td(textfield (-name => length_or_end_value,-size => 20))),
94 Tr(td("Reverse complement?"), td(checkbox (-name => 'reverse')))),
95 submit ("Find my subsequence");
97 print hr(),"Credits: Jonathan Epstein (Jonathan_Epstein\@nih.gov)";