tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / examples / sirna / rnai_finder.cgi
blobe3edbdb70631f18a96a0eb43cd2f206246447289
1 #!/usr/bin/perl -w
3 =pod
5 =head1 NAME
7 rnai_finder.cgi
9 =head1 DESCRIPTION
11 A CGI script using the Bio::Tools::SiRNA package to design RNAi reagents.
12 Retrieves sequences from NCBI and generates output in graphic and tabular form.
14 =head1 INSTALLATION
16 To use this script, place it in an appropriate cgi-bin directory on a web server.
17 The script needs to write its graphic maps to a temporary directory. Please update
18 $TMPDIR and $TMPURL to suit your local configuation.
20 =head1 AUTHOR
22 Donald Jackson (donald.jackson@bms.com)
24 =head1 SEE ALSO
26 L<Bio::Tools::SIRNA>, L<Bio::Graphics::Panel>, L<Bio::DB::NCBIHelper>, L<CGI>
28 =cut
30 use Bio::Tools::SiRNA;
32 use Bio::Graphics::Panel;
33 use Bio::DB::NCBIHelper;
34 use Bio::Seq::RichSeq; # for hand-entry
35 use Bio::SeqFeature::Generic;
37 use GD::Text::Align;
38 use Clone qw(clone);
40 use CGI;
41 use CGI::Carp qw (fatalsToBrowser carpout);
43 my $q = CGI->new;
47 # define a bunch of constants
48 my %COLORRANKS = ( 1 => 'red',
49 2 => 'orchid',
50 3 => 'blue' );
51 my $TMPDIR = '/var/www/htdocs/tmp/';
52 my $TMPURL = '/tmp/';
53 my $ATGPAD = 75; # how far from start do we wait?
54 my $NOLIGOS = 3;
56 my $log = $TMPDIR . 'RNAiFinder.log';
57 open (LOG, ">>$log") or die $!;
58 carpout(LOG);
60 print $q->header,
61 $q->start_html;
63 print $q->h1('RNAi Finder');
66 if ($q->param('Design')) {
67 if ($q->param('accession') and !$q->param('seq')) {
68 $target = get_target();
70 else {
71 $target = make_target();
73 get_rnai($target);
75 else {
76 get_settings();
80 sub get_settings {
81 print <<EOM1;
82 <P>Oligos are designed as described on the <A HREF="http://www.mpibpc.gwdg.de/abteilungen/100/105/sirna.html" TARGET="Tuschl">Tuschl lab web page</A> and are ranked as follows:
83 <UL>
84 <LI><B>New:</B> Selecting 'Pol3-compatible targets' looks for oligos with the
85 pattern NAR(N17)YNN which can be synthesized or expressed from a Pol3 promoter.
86 <br>This selection <b>overrides</b> the 'Cutoff' rank.
87 <LI>Oligos with Rank = 1 (best) match the AAN(19)TT rule.
88 <LI>Oligos with Rank = 2 match the AAN(21) rule
89 <LI>Oligos with Rank = 3 match the NAN(21) rule.
90 </UL>
91 <P>If percent GC and specificity are similar, Rank 1 oligos are better. All 3 prime overhangs are converted to TT; the rest of the sequence is transcribed into RNA</P>
93 <h3>Modifications to published rules:</h3>
94 <ul>
95 <li>
96 Runs of 3 or more consecutive Gs on either strand are skipped - these can cause problems in synthesis.
97 <li>Users may choose to exclude oligos that overlap single nucleotide polymorphisms (ON by default). SNP data comes from the NCBI dbSNP database.
98 <li>'Low-complexity' regions (such as runs of a single nucleotide) are also excluded.
99 </ul>
101 EOM1
103 print $q->start_form;
104 print $q->h2('Enter your sequence and other parameters:'), "\n";
105 print $q->p('The values already here are DEFAULTS - you should change them to suit YOUR sequence');
106 print $q->start_table();
107 print $q->TR( $q->td({-align=> 'left'},
109 $q->textfield(-name => 'mingc', -default => '0.40'),
110 $q->textfield(-name => 'maxgc', -default => '0.60'),
113 $q->td({-align=> 'left'},
114 $q->popup_menu(-name => 'worstrank',
115 -values => [1,2,3],
116 -default => 2,
118 $q->b('OR'),
119 $q->checkbox(-name => 'pol3',
120 -label => 'Pol3 compatible',
121 -default => 0,
125 print $q->TR( $q->th({-align=> 'left'}, 'Exclude oligos with SNPs?'),
126 $q->td($q->radio_group(-name => 'avoid_snps',
127 -values => [1,0],
128 -default => 1,
129 -labels => {1 => 'Yes', 0 => 'No'}
133 print $q->TR( $q->th({-align=> 'left'}, 'Sequence Name:'),
134 $q->td({-align=> 'left'},$q->textfield('accession')),
135 $q->td({-align=> 'left'},
136 $q->em( q(Enter an accession and you won't have to enter the <br>sequence or start/stop. Use accessions beginning with NM_ if possible.))),
139 print $q->TR( $q->th({-align=> 'left'}, ['Position of initiator ATG:',
140 'NT after start to exclude:',
141 'Position of Stop codon:' ]));
142 print $q->TR( $q->td({-align=> 'left'},
143 [$q->textfield(-name => 'cdstart', -default => 1),
144 $q->textfield(-name => 'atgpad', -default => $ATGPAD),
145 $q->textfield('cdend'), ]));
146 print $q->TR( $q->th({-align=> 'left'}, ['Minimum Fraction GC:',
147 'Maximum Fraction GC:',
148 'Rank cutoff',
149 ]));
150 print $q->TR($q->th({-align=> 'left', -colspan=>2},'cDNA Sequence in plain text or FASTA format'),
151 $q->td( $q->a({-href =>'Fasta_format.html', -target => 'Fasta_desc'}, 'What is FASTA format?')),
153 print $q->TR($q->td({-align => 'left', -colspan=>3},
154 $q->textarea( -name =>'seq',
155 -rows => 4,
156 -columns => 80,
157 -wrap => 'virtual',
158 )));
159 print $q->TR( $q->th({-align => 'left', -colspan=>3},
160 'Output options: '));
161 print $q->TR( $q->td({-align=> 'left'},
162 [ $q->checkbox(-name => 'Graphic', -checked => 'checked'),
163 $q->checkbox(-name => 'Table', -checked => 'checked'),
164 ]));
165 print $q->TR($q->td({-align=> 'left', -colspan=>3}, $q->submit('Design')));
166 print $q->end_table();
167 print $q->end_form;
171 sub get_rnai {
172 # design and output RNAi reagents
173 my ($gene) = @_;
175 my $factory = Bio::Tools::SiRNA->new( -target => $gene,
176 -tmpdir => $TMPDIR,
177 -cutoff => $q->param('worstrank') || 2,
178 -avoid_snps => $q->param('avoid_snps') || 1,
179 -min_gc => $q->param('min_gc') || 0.40,
180 -max_gc => $q->param('max_gc') || 0.60,
181 -pol3 => $q->param('pol3') || 0,
184 print $q->p('Designing Pol3-compatible oligos') if ($q->param('pol3'));
186 my @pairs = $factory->design;
188 draw_gene($gene) if ($q->param('Graphic'));
189 print_table($gene->accession, \@pairs) if ($q->param('Table'));
190 print_text($gene->accession, \@pairs) if ($q->param('Text'));
193 sub get_target {
194 my ($acc) = $q->param('accession');
195 my $gb = Bio::DB::NCBIHelper->new();
196 my $seq = $gb->get_Seq_by_acc($acc);
198 if ($seq) {
199 return $seq;
201 else {
202 print_error("Unable to retrieve sequence from GenBank using accession $acc");
203 return;
208 sub make_target {
209 # sanity chex - do we have the necessary info?
210 $q->param('seq') or print_error("Please supply a sequence", 1);
211 my $seq = $q->param('seq');
212 my $name;
214 # is sequence in fasta format?
215 if ($seq =~ /^>/) {
216 my ($head, $realseq) = split (/\n/, $seq, 2);
217 $head =~ /^>(.+?) /;
218 $name = $1;
219 $realseq =~ s/[\n|\r|\s]//g;
220 $seq = $realseq;
222 elsif ($q->param('accession')) {
223 $name = $q->param('accession');
224 $seq =~ s/[\n|\r|\s]//g;
226 else {
227 print_error('Please supply a sequence name!');
228 return;
231 $cds_start = $q->param('cds_start') || 1;
232 $cds_end = $q->param('cds_end') || length($seq);
234 # create a new Bio::Seq::RichSeq object from parameters
235 my $seqobj = Bio::Seq::RichSeq->new( -seq => $seq,
236 -accession_number => $name,
237 -molecule => 'DNA',
240 my $cds = Bio::SeqFeature::Generic->new( -start => $cds_start,
241 -end => $cds_end,
243 $cds->primary_tag('CDS');
244 $seqobj->add_SeqFeature($cds);
245 return $seqobj;
248 sub draw_gene {
249 # now draw a pretty picture
250 my ($gene) = @_;
252 my $panel = Bio::Graphics::Panel->new( -segment => $gene,
253 -width => 600,
254 -pad_top => 100,
255 -pad_bottom => 20,
256 -pad_left => 50,
257 -pad_right => 50,
258 -fontcolor => 'black',
259 -fontcolor2 => 'black',
260 -key_color => 'white',
261 -grid => 1,
262 -key_style => 'between',
263 #-gridcolor => 'lightgray',
266 my $genefeat = Bio::SeqFeature::Generic->new( -start => 1,
267 -end => $gene->length);
269 $panel->add_track( arrow => $genefeat,
270 -bump => 0,
271 -tick => 2,
272 -label => 1,
275 my %feature_classes;
277 foreach $feat($gene->top_SeqFeatures) {
278 $feature_classes{ $feat->primary_tag } ||= [];
280 push(@{ $feature_classes{ $feat->primary_tag } }, $feat);
283 # for some reason, Bio::Graphics insists on drawing subfeatures for SiRNA::Pair objects...
284 $cleanpairs = cleanup_feature($feature_classes{'SiRNA::Pair'});
286 # draw
287 $panel->add_track( transcript => $feature_classes{'gene'},
288 -bgcolor => 'green',
289 -fgcolor => 'black',
290 -fontcolor2 => 'black',
291 -key => 'Gene',
292 -bump => +1,
293 -height => 8,
294 -label => \&feature_label,
295 -description => 1,
298 $panel->add_track( transcript2 => $feature_classes{'CDS'},
299 -bgcolor => 'blue',
300 -fontcolor2 => 'black',
301 -fgcolor => 'black',
302 -key => 'CDS',
303 -bump => +1,
304 -height => 8,
305 -label => \&feature_label,
306 -description => \&feature_desc,
309 $panel->add_track( $feature_classes{'variation'},
310 -bgcolor => 'black',
311 -fgcolor => 'black',
312 -fontcolor2 => 'black',
313 -key => 'SNPs',
314 -bump => +1,
315 -height => 8,
316 -label => \&snp_label,
317 #-glyph => 'triangle',
318 -glyph => 'diamond',
319 -description => \&feature_desc,
322 $panel->add_track( generic => $feature_classes{'Excluded'},
323 -bgcolor => 'silver',
324 -fgcolor => 'black',
325 -fontcolor => 'black',
326 -fontcolor2 => 'black',
327 -key => 'Excluded Regions',
328 -bump => +1,
329 -height => 6,
330 -label => \&feature_label,
331 -description => \&feature_desc,
334 $panel->add_track(
335 generic => $cleanpairs,
336 -bgcolor => \&feature_color,
337 -fgcolor => \&feature_color,
338 -fontcolor => 'black',
339 -fontcolor2 => 'black',
340 -key => 'SiRNA Reagents',
341 -bump => +1,
342 -height => 8,
343 -label => \&feature_label,
344 -glyph => 'generic',
345 -description => \&feature_desc,
348 my $gd = $panel->gd;
349 my $black = $gd->colorAllocate(0,0,0);
350 my $txt = GD::Text::Align->new($gd);
351 $txt->set( valign => 'center', align => 'center', color => $black);
352 #$txt->set_font(['/usr/share/fonts/truetype/VERDANA.TTF',gdGiantFont ], 10);
353 $txt->set_font(gdGiantFont);
354 $txt->set_text("RNAi Reagents for ".$gene->accession );
355 $txt->draw(200, 50, 0);
357 my $pngfile = $TMPDIR . $gene->accession . '.png';
358 my $pngurl = $TMPURL . $gene->accession . '.png';
359 open (IMG, ">$pngfile") or die $!;
360 binmode IMG;
361 print IMG $gd->png;
362 close IMG;
364 # also get the imagemap boxes
365 my @pairboxes = extract_pairs($panel->boxes);
367 print $q->img({-src => $pngurl, -usemap=>"#MAP"});
368 print $q->p('Oligos are color coded: rank 1 in ',
369 $q->font({-color => $COLORRANKS{1}}, $COLORRANKS{1}),
370 ', rank 2 in ',
371 $q->font({-color => $COLORRANKS{2}}, $COLORRANKS{2}),
372 ' and rank 3 in ',
373 $q->font({-color => $COLORRANKS{3}}, $COLORRANKS{3}),
374 '. Click on an oligo to bring it up in the table below');
376 print_imagemap(@pairboxes);
380 sub feature_label {
381 my ($feature) = @_;
382 my (@notes, @label);
383 #$label = ucfirst($feature->primary_tag);
384 foreach (qw(note name product gene)) {
385 if ($feature->has_tag($_)) {
386 @notes = $feature->each_tag_value($_);
387 #$label .= ': ' . $notes[0];
388 push(@label, $notes[0]);
389 last;
392 return join(': ', @label);
393 #return $label;
396 sub feature_color {
397 my ($feature) = @_;
398 my ($rank) = $feature->each_tag_value('rank');
399 #print STDERR "Feature rank: $rank COLOR $COLORRANKS{$rank}\n";
400 return $COLORRANKS{$rank};
401 #return 'red';
405 sub print_table {
406 my ($accession, $pairs) = @_;
408 print $q->h2("RNAi Reagents for $accession");
409 print $q->start_table({-border => 1, -cellpadding => 2});
410 print $q->TR( $q->th(['Reagent #', 'Start', 'Stop', 'Rank', 'Fxn GC', 'Sense Oligo', 'Antisense Oligo', 'Target' ]) ), "\n";
413 my $i = 1;
415 foreach $pair ( sort { $a->start <=> $b->start } @$pairs ) {
416 my $sense = $pair->sense;
417 my $anti = $pair->antisense;
418 my $color = feature_color($pair);
420 # my $blasturl = "http://nunu.hpw.pri.bms.com/biocgi/versablast.pl?p=blastn&sequence=";
421 # $blasturl .= $pair->seq->seq;
422 # $blasturl .= "&action=Nucleotide Databases";
424 print
425 $q->TR( $q->td( [ $q->a({-name => 'RNAi' . $pair->start}) . $i,
426 $pair->start,
427 $pair->end,
428 $q->font({-color => $color},$pair->rank),
429 $pair->fxGC,
430 $q->tt($sense->seq),
431 $q->tt($anti->seq),
432 $q->tt($pair->seq->seq),
433 # $q->a({-href=>$blasturl,
434 # -target=>"blastn"},
435 # "BLAST this target"),
436 ] ) ),
437 "\n";
438 $i++;
440 print $q->end_table;
447 sub print_text {
448 my ($accession, $pairs ) = @_;
449 my ($pair);
451 print "RNAi reagents for $accession \n";
453 print join("\t", qw(Start Stop Rank Sense Antisense)), "\n";
454 foreach $pair (@$pairs ) {
455 my $sense = $pair->sense;
456 my $anti = $pair->antisense;
458 print join("\t", $pair->start, $pair->end, $pair->rank, $sense->seq, $anti->seq), "\n";
466 sub cleanup_feature {
467 my ($flist) = @_;
469 my ($feat, @clean, $cfeat);
471 foreach $feat(@$flist) {
472 $cfeat = clone($feat);
473 # $cfeat = $feat->clone;
474 $cfeat->flush_sub_SeqFeature;
475 push (@clean, $cfeat); # will they
477 return \@clean;
481 sub extract_pairs {
482 # get SiRNA::Pair features ONLY for imagemap
483 return ( grep {ref($_->[0]) eq "Bio::SeqFeature::SiRNA::Pair"} @_ );
486 sub print_imagemap {
487 my @items = @_;
489 print q(<MAP NAME="MAP">), "\n";
491 my $i = 1;
493 foreach $item (@items) {
494 my ($feature, $x1, $y1, $x2, $y2) = @$item;
495 my $fstart = $feature->start; # should be unique
496 my $text = 'RNAi #' . $i. ' Start=' . $feature->start . ' Rank='.$feature->rank;
497 print qq(<AREA SHAPE="RECT" COORDS="$x1,$y1,$x2,$y2" TITLE="$text" HREF="#RNAi$fstart">), "\n";
498 warn "Mouseover text: $text\n";
500 $i++;
502 print "</MAP>\n";
506 sub print_error {
507 # print error messages in big red type. Provide more graceful die/warn to end user
508 my ($msg, $fatal) = @_;
509 print $q->h3($q->font({-color=>'RED'}, $msg));
511 if ($fatal) {
512 print $q->end_html;
513 die "$msg \n";
515 else {
516 warn $msg;
520 sub dump {
521 print $q->start_ul;
523 foreach ($q->param) {
524 print $q->li($_),
525 $q->ul($q->li([ $q->param($_) ]));
529 sub snp_label {
530 # special format for SNPs
531 my ($feature) = @_;
532 my $label;
534 if ( $feature->has_tag('db_xref') ) {
535 my @notes = $feature->each_tag_value('db_xref');
536 $label .= $notes[0];
537 $label .= ' ';
539 if ( $feature->has_tag('allele') ) {
540 my ($nt1, $nt2) = $feature->each_tag_value('allele');
541 $label .= $nt1 . '->' . $nt2;
543 return $label;
546 sub feature_desc {
547 my ($feature) = @_;
548 my $desc = $feature->start;
549 $desc .= '-' . $feature->end unless ($feature->start == $feature->end);
550 return $desc;