3 # see http://zfish.nichd.nih.gov/tools/subsequence.cgi
5 # uncomment and modify the next two lines
6 # if your perl is in a nonstandard directory
7 #use lib '/disk3/local/lib/perl5/site_perl';
8 #use lib '/disk3/local/lib/perl5/';
10 use CGI qw
/:standard :html3/;
16 start_html
(-title
=> 'find subsequence of large GenBank entries',-author
=> 'Jonathan_Epstein\@nih.gov');
17 print_form
() unless param
;
18 print_results
() if param
;
21 $gb = new Bio
::DB
::GenBank
;
22 $accession = param
('accession');
24 $seq = $gb->get_Seq_by_acc($accession); # Accession Number
27 print "***ERROR: accession $accession not found***\n";
30 $segment_start = param
('start');
31 $segment_end = param
('length_or_end_value');
32 $segment_end = $segment_start+$segment_end-1 if param
('length_or_end_choice') eq 'Length';
33 if ($segment_end<$segment_start || $segment_start<0) {
34 print "***ERROR: invalid segment start and end values:$segment_start,$segment_end***\n";
37 $len = $seq->length();
38 if ($segment_end>$len) {
39 print "***ERROR: maximum length $len exceeded***\n";
42 $subseq = $seq->subseq ($segment_start,$segment_end);
44 $name = "subsequence of $accession";
46 $strand = "-" if (param
('reverse'));
48 # For some reason, there seems to be a problem if you use the file
49 # handle provided by File::Temp. Similarly, there's a problem if you
50 # pass a filename to BioPerl below rather than a file handle. However,
51 # constructing our own file handle and then passing it to BioPerl works
53 (undef, $filename) = File
::Temp
::tempfile
();
54 $fh = new FileHandle
"> $filename";
55 $seqoutlong = Bio
::SeqIO
->new( '-format' => 'Fasta',-fh
=> $fh);
56 $seqobj = Bio
::PrimarySeq
->new ( -seq
=> $subseq,
57 -id
=> $name . "[length:$len]:" . $segment_start . "-" . $segment_end . "(" . $strand . "strand)",
60 $seqobj = $seqobj->revcom if ($strand ne "+");
61 $seqoutlong->write_seq($seqobj);
65 # Now we parse the FASTA file which was just generated, and perform
66 # some simple conversions to HTML.
67 open (TEMPORARY
, "<$filename") or die "unable to open temporary file $filename\n";
79 print p
("This web page permits you to extract a short subsequence of DNA from a large GenBank entry. This is especially useful in an era of huge \"contigs\" of genomic DNA, where you only want to extract a few hundred base pairs for subsequent analysis.\n");
81 print p
,"This program also illustrates the power of ",a
({-href
=> 'http://www.BioPerl.org/'}, "BioPerl"), ", a powerful set of tools for molecular biology analysis. The ", a
({-href
=> 'subsequence.pl.txt'}, "source code"), " for this program is less than 90 lines long.\n";
83 print p
,"You must specify the GenBank accession number along with a start position. You may specify either the length of the subsequence you wish to extract or, equivalently, the endpoint.\n";
85 print "The sequence may be reverse-complemented if you wish, e.g., the reverse complement of <font color=green>ATCGC</font> is <font color=yellow>GCGAT</font>.\n";
87 print p
,"To test this web page, try accession NT_004002, start 50000, length 400.\n";
89 print start_form
,table
(
90 Tr
(td
("Enter your GenBank accession"),td
(textfield
(-name
=> 'accession',-size
=> 20))),
91 Tr
(td
("Start position"),td
(textfield
(-name
=> 'start',-size
=> 10))),
92 Tr
(td
("Specify length or end position"), td
(radio_group
(-name
=> 'length_or_end_choice',-values => [Length
, End
], default => Length
))),
93 Tr
(td
("Length or end position"), td
(textfield
(-name
=> length_or_end_value
,-size
=> 20))),
94 Tr
(td
("Reverse complement?"), td
(checkbox
(-name
=> 'reverse')))),
95 submit
("Find my subsequence");
97 print hr
(),"Credits: Jonathan Epstein (Jonathan_Epstein\@nih.gov)";