11 A CGI script using the Bio::Tools::SiRNA package to design RNAi reagents.
12 Retrieves sequences from NCBI and generates output in graphic and tabular form.
16 To use this script, place it in an appropriate cgi-bin directory on a web server.
17 The script needs to write its graphic maps to a temporary directory. Please update
18 $TMPDIR and $TMPURL to suit your local configuation.
22 Donald Jackson (donald.jackson@bms.com)
26 L<Bio::Tools::SIRNA>, L<Bio::Graphics::Panel>, L<Bio::DB::NCBIHelper>, L<CGI>
30 use Bio
::Tools
::SiRNA
;
32 use Bio
::Graphics
::Panel
;
33 use Bio
::DB
::NCBIHelper
;
34 use Bio
::Seq
::RichSeq
; # for hand-entry
35 use Bio
::SeqFeature
::Generic
;
41 use CGI
::Carp qw
(fatalsToBrowser carpout
);
47 # define a bunch of constants
48 my %COLORRANKS = ( 1 => 'red',
51 my $TMPDIR = '/var/www/htdocs/tmp/';
53 my $ATGPAD = 75; # how far from start do we wait?
56 my $log = $TMPDIR . 'RNAiFinder.log';
57 open (LOG
, ">>$log") or die $!;
63 print $q->h1('RNAi Finder');
66 if ($q->param('Design')) {
67 if ($q->param('accession') and !$q->param('seq')) {
68 $target = get_target
();
71 $target = make_target
();
82 <P>Oligos are designed as described on the <A HREF="http://www.mpibpc.gwdg.de/abteilungen/100/105/sirna.html" TARGET="Tuschl">Tuschl lab web page</A> and are ranked as follows:
84 <LI><B>New:</B> Selecting 'Pol3-compatible targets' looks for oligos with the
85 pattern NAR(N17)YNN which can be synthesized or expressed from a Pol3 promoter.
86 <br>This selection <b>overrides</b> the 'Cutoff' rank.
87 <LI>Oligos with Rank = 1 (best) match the AAN(19)TT rule.
88 <LI>Oligos with Rank = 2 match the AAN(21) rule
89 <LI>Oligos with Rank = 3 match the NAN(21) rule.
91 <P>If percent GC and specificity are similar, Rank 1 oligos are better. All 3 prime overhangs are converted to TT; the rest of the sequence is transcribed into RNA</P>
93 <h3>Modifications to published rules:</h3>
96 Runs of 3 or more consecutive Gs on either strand are skipped - these can cause problems in synthesis.
97 <li>Users may choose to exclude oligos that overlap single nucleotide polymorphisms (ON by default). SNP data comes from the NCBI dbSNP database.
98 <li>'Low-complexity' regions (such as runs of a single nucleotide) are also excluded.
103 print $q->start_form;
104 print $q->h2('Enter your sequence and other parameters:'), "\n";
105 print $q->p('The values already here are DEFAULTS - you should change them to suit YOUR sequence');
106 print $q->start_table();
107 print $q->TR( $q->td({-align=> 'left'},
109 $q->textfield(-name => 'mingc', -default => '0.40'),
110 $q->textfield(-name => 'maxgc', -default => '0.60'),
113 $q->td({-align=> 'left'},
114 $q->popup_menu(-name => 'worstrank',
119 $q->checkbox(-name => 'pol3',
120 -label => 'Pol3 compatible',
125 print $q->TR( $q->th({-align=> 'left'}, 'Exclude oligos with SNPs?'),
126 $q->td($q->radio_group(-name => 'avoid_snps',
129 -labels => {1 => 'Yes', 0 => 'No'}
133 print $q->TR( $q->th({-align=> 'left'}, 'Sequence Name:'),
134 $q->td({-align=> 'left'},$q->textfield('accession')),
135 $q->td({-align=> 'left'},
136 $q->em( q(Enter an accession and you won't have to enter the <br>sequence or start/stop. Use accessions beginning with NM_ if possible.))),
139 print $q->TR( $q->th({-align=> 'left'}, ['Position of initiator ATG:',
140 'NT after start to exclude:',
141 'Position of Stop codon:' ]));
142 print $q->TR( $q->td({-align=> 'left'},
143 [$q->textfield(-name => 'cdstart', -default => 1),
144 $q->textfield(-name => 'atgpad', -default => $ATGPAD),
145 $q->textfield('cdend'), ]));
146 print $q->TR( $q->th({-align=> 'left'}, ['Minimum Fraction GC:',
147 'Maximum Fraction GC:',
150 print $q->TR($q->th({-align=> 'left', -colspan=>2},'cDNA Sequence in plain text or FASTA format'),
151 $q->td( $q->a({-href =>'Fasta_format.html', -target => 'Fasta_desc'}, 'What is FASTA format?')),
153 print $q->TR($q->td({-align => 'left', -colspan=>3},
154 $q->textarea( -name =>'seq',
159 print $q->TR( $q->th({-align => 'left', -colspan=>3},
160 'Output options: '));
161 print $q->TR( $q->td({-align=> 'left'},
162 [ $q->checkbox(-name => 'Graphic', -checked => 'checked'),
163 $q->checkbox(-name => 'Table', -checked => 'checked'),
165 print $q->TR($q->td({-align=> 'left', -colspan=>3}, $q->submit('Design')));
166 print $q->end_table();
172 # design and output RNAi reagents
175 my $factory = Bio::Tools::SiRNA->new( -target => $gene,
177 -cutoff => $q->param('worstrank') || 2,
178 -avoid_snps => $q->param('avoid_snps') || 1,
179 -min_gc => $q->param('min_gc') || 0.40,
180 -max_gc => $q->param('max_gc') || 0.60,
181 -pol3 => $q->param('pol3') || 0,
184 print $q->p('Designing Pol3-compatible oligos') if ($q->param('pol3'));
186 my @pairs = $factory->design;
188 draw_gene($gene) if ($q->param('Graphic'));
189 print_table($gene->accession, \@pairs) if ($q->param('Table'));
190 print_text($gene->accession, \@pairs) if ($q->param('Text'));
194 my ($acc) = $q->param('accession');
195 my $gb = Bio::DB::NCBIHelper->new();
196 my $seq = $gb->get_Seq_by_acc($acc);
202 print_error("Unable to retrieve sequence from GenBank using accession $acc");
209 # sanity chex - do we have the necessary info?
210 $q->param('seq') or print_error("Please supply a sequence", 1);
211 my $seq = $q->param('seq');
214 # is sequence in fasta format?
216 my ($head, $realseq) = split (/\n/, $seq, 2);
219 $realseq =~ s/[\n|\r|\s]//g;
222 elsif ($q->param('accession')) {
223 $name = $q->param('accession');
224 $seq =~ s/[\n|\r|\s]//g;
227 print_error('Please supply a sequence name!');
231 $cds_start = $q->param('cds_start') || 1;
232 $cds_end = $q->param('cds_end') || length($seq);
234 # create a new Bio::Seq::RichSeq object from parameters
235 my $seqobj = Bio::Seq::RichSeq->new( -seq => $seq,
236 -accession_number => $name,
240 my $cds = Bio::SeqFeature::Generic->new( -start => $cds_start,
243 $cds->primary_tag('CDS');
244 $seqobj->add_SeqFeature($cds);
249 # now draw a pretty picture
252 my $panel = Bio::Graphics::Panel->new( -segment => $gene,
258 -fontcolor => 'black',
259 -fontcolor2 => 'black',
260 -key_color => 'white',
262 -key_style => 'between',
263 #-gridcolor => 'lightgray',
266 my $genefeat = Bio::SeqFeature::Generic->new( -start => 1,
267 -end => $gene->length);
269 $panel->add_track( arrow => $genefeat,
277 foreach $feat($gene->top_SeqFeatures) {
278 $feature_classes{ $feat->primary_tag } ||= [];
280 push(@{ $feature_classes{ $feat->primary_tag } }, $feat);
283 # for some reason, Bio::Graphics insists on drawing subfeatures for SiRNA::Pair objects...
284 $cleanpairs = cleanup_feature($feature_classes{'SiRNA::Pair'});
287 $panel->add_track( transcript => $feature_classes{'gene'},
290 -fontcolor2 => 'black',
294 -label => \&feature_label,
298 $panel->add_track( transcript2 => $feature_classes{'CDS'},
300 -fontcolor2 => 'black',
305 -label => \&feature_label,
306 -description => \&feature_desc,
309 $panel->add_track( $feature_classes{'variation'},
312 -fontcolor2 => 'black',
316 -label => \&snp_label,
317 #-glyph => 'triangle',
319 -description => \&feature_desc,
322 $panel->add_track( generic => $feature_classes{'Excluded'},
323 -bgcolor => 'silver',
325 -fontcolor => 'black',
326 -fontcolor2 => 'black',
327 -key => 'Excluded Regions',
330 -label => \&feature_label,
331 -description => \&feature_desc,
335 generic => $cleanpairs,
336 -bgcolor => \&feature_color,
337 -fgcolor => \&feature_color,
338 -fontcolor => 'black',
339 -fontcolor2 => 'black',
340 -key => 'SiRNA Reagents',
343 -label => \&feature_label,
345 -description => \&feature_desc,
349 my $black = $gd->colorAllocate(0,0,0);
350 my $txt = GD::Text::Align->new($gd);
351 $txt->set( valign => 'center', align => 'center', color => $black);
352 #$txt->set_font(['/usr/share/fonts/truetype/VERDANA.TTF',gdGiantFont ], 10);
353 $txt->set_font(gdGiantFont);
354 $txt->set_text("RNAi Reagents for ".$gene->accession );
355 $txt->draw(200, 50, 0);
357 my $pngfile = $TMPDIR . $gene->accession . '.png';
358 my $pngurl = $TMPURL . $gene->accession . '.png';
359 open (IMG, ">$pngfile") or die $!;
364 # also get the imagemap boxes
365 my @pairboxes = extract_pairs($panel->boxes);
367 print $q->img({-src => $pngurl, -usemap=>"#MAP"});
368 print $q->p('Oligos are color coded: rank 1 in ',
369 $q->font({-color => $COLORRANKS{1}}, $COLORRANKS{1}),
371 $q->font({-color => $COLORRANKS{2}}, $COLORRANKS{2}),
373 $q->font({-color => $COLORRANKS{3}}, $COLORRANKS{3}),
374 '. Click on an oligo to bring it up in the table below');
376 print_imagemap(@pairboxes);
383 #$label = ucfirst($feature->primary_tag);
384 foreach (qw(note name product gene)) {
385 if ($feature->has_tag($_)) {
386 @notes = $feature->each_tag_value($_);
387 #$label .= ': ' . $notes[0];
388 push(@label, $notes[0]);
392 return join(': ', @label);
398 my ($rank) = $feature->each_tag_value('rank');
399 #print STDERR "Feature rank: $rank COLOR $COLORRANKS{$rank}\n";
400 return $COLORRANKS{$rank};
406 my ($accession, $pairs) = @_;
408 print $q->h2("RNAi Reagents for $accession");
409 print $q->start_table({-border
=> 1, -cellpadding
=> 2});
410 print $q->TR( $q->th(['Reagent #', 'Start', 'Stop', 'Rank', 'Fxn GC', 'Sense Oligo', 'Antisense Oligo', 'Target' ]) ), "\n";
415 foreach $pair ( sort { $a->start <=> $b->start } @
$pairs ) {
416 my $sense = $pair->sense;
417 my $anti = $pair->antisense;
418 my $color = feature_color
($pair);
420 # my $blasturl = "http://nunu.hpw.pri.bms.com/biocgi/versablast.pl?p=blastn&sequence=";
421 # $blasturl .= $pair->seq->seq;
422 # $blasturl .= "&action=Nucleotide Databases";
425 $q->TR( $q->td( [ $q->a({-name
=> 'RNAi' . $pair->start}) . $i,
428 $q->font({-color
=> $color},$pair->rank),
432 $q->tt($pair->seq->seq),
433 # $q->a({-href=>$blasturl,
434 # -target=>"blastn"},
435 # "BLAST this target"),
448 my ($accession, $pairs ) = @_;
451 print "RNAi reagents for $accession \n";
453 print join("\t", qw(Start Stop Rank Sense Antisense)), "\n";
454 foreach $pair (@
$pairs ) {
455 my $sense = $pair->sense;
456 my $anti = $pair->antisense;
458 print join("\t", $pair->start, $pair->end, $pair->rank, $sense->seq, $anti->seq), "\n";
466 sub cleanup_feature
{
469 my ($feat, @clean, $cfeat);
471 foreach $feat(@
$flist) {
472 $cfeat = clone
($feat);
473 # $cfeat = $feat->clone;
474 $cfeat->flush_sub_SeqFeature;
475 push (@clean, $cfeat); # will they
482 # get SiRNA::Pair features ONLY for imagemap
483 return ( grep {ref($_->[0]) eq "Bio::SeqFeature::SiRNA::Pair"} @_ );
489 print q
(<MAP NAME
="MAP">), "\n";
493 foreach $item (@items) {
494 my ($feature, $x1, $y1, $x2, $y2) = @
$item;
495 my $fstart = $feature->start; # should be unique
496 my $text = 'RNAi #' . $i. ' Start=' . $feature->start . ' Rank='.$feature->rank;
497 print qq(<AREA SHAPE
="RECT" COORDS
="$x1,$y1,$x2,$y2" TITLE
="$text" HREF
="#RNAi$fstart">), "\n";
498 warn "Mouseover text: $text\n";
507 # print error messages in big red type. Provide more graceful die/warn to end user
508 my ($msg, $fatal) = @_;
509 print $q->h3($q->font({-color
=>'RED'}, $msg));
523 foreach ($q->param) {
525 $q->ul($q->li([ $q->param($_) ]));
530 # special format for SNPs
534 if ( $feature->has_tag('db_xref') ) {
535 my @notes = $feature->each_tag_value('db_xref');
539 if ( $feature->has_tag('allele') ) {
540 my ($nt1, $nt2) = $feature->each_tag_value('allele');
541 $label .= $nt1 . '->' . $nt2;
548 my $desc = $feature->start;
549 $desc .= '-' . $feature->end unless ($feature->start == $feature->end);