3 # BioPerl module for Bio::Tools::SiRNA
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Donald Jackson, donald.jackson@bms.com
9 # Copyright Bristol-Myers Squibb
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 SiRNA - Perl object for designing small inhibitory RNAs.
21 use Bio::Tools::SiRNA;
23 my $sirna_designer = Bio::Tools::SiRNA->new( -target => $bio_seq,
26 my @pairs = $sirna_designer->design;
28 foreach $pair (@pairs) {
29 my $sense_oligo_sequence = $pair->sense->seq;
30 my $antisense_oligo_sequence = $pair->antisense->seq;
33 print join ("\t", $pair->start, $pair->end, $pair->rank,
34 $sense_oligo_sequence, $antisense_oligo_sequence), "\n";
39 Package for designing siRNA reagents.
41 Input is a L<Bio::SeqI>-compliant object (the target).
43 Output is a list of Bio::SeqFeature::SiRNA::Pair objects, which are
44 added to the feature table of the target sequence. Each
45 Bio::SeqFeature::SiRNA::Pair contains two subfeatures
46 (Bio::SeqFeature::Oligo objects) which correspond to the individual
47 oligos. These objects provide accessors for the information on the
48 individual reagent pairs.
50 This verion of Bio::Tools::SiRNA represents a major change in architecture.
51 Specific 'rulesets' for siRNA selection as developed by various groups are
52 implemented as Bio::Tools::SiRNA::Ruleset objects, which inherit from
53 Bio::Tools::SiRNA. This will make it easier to add new rule sets or modify
54 existing approaches. Currently the Tuschl and Ui-Tei (2004) rules are
55 implemented. For consistency, the Tuschl rules are implemented by default.
57 In addition, this module provides three 'extra' rules which can be added
58 above and beyond any ruleset.
64 SiRNAs that overlap known SNPs (identified as SeqFeatures with
65 primary tag = variation) can be avoided.
69 Other regions (with primary tag = 'Excluded') can also be skipped. I
70 use this with Bio::Tools::Run::Mdust to avoid low-complexity regions
71 (must be run separately), but other programs could also be used.
75 SiRNAs may also be selected in the 3 prime UTR of a gene by setting
76 $sirna_designer-E<gt>include_3pr() to true.
86 L<Bio::Tools::Run::Mdust>, L<Bio::SeqFeature::SiRNA::Pair>,
87 L<Bio::SeqFeature::SiRNA::Oligo>..
93 User feedback is an integral part of the evolution of this and other
94 Bioperl modules. Send your comments and suggestions preferably to
95 the Bioperl mailing list. Your participation is much appreciated.
97 bioperl-l@bioperl.org - General discussion
98 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
102 Please direct usage questions or support issues to the mailing list:
104 L<bioperl-l@bioperl.org>
106 rather than to the module maintainer directly. Many experienced and
107 reponsive experts will be able look at the problem and quickly
108 address it. Please include a thorough description of the problem
109 with code and data examples if at all possible.
111 =head2 Reporting Bugs
113 Report bugs to the Bioperl bug tracking system to help us keep track
114 of the bugs and their resolution. Bug reports can be submitted via
117 http://bugzilla.open-bio.org/
121 Donald Jackson (donald.jackson@bms.com)
125 The rest of the documentation details each of the object methods.
126 Internal methods are usually preceded with a _
130 package Bio
::Tools
::SiRNA
;
135 use vars
qw($AUTOLOAD);
137 use Bio::Seq::RichSeq;
138 use Bio::SeqFeature::Generic;
139 use Bio::SeqFeature::SiRNA::Oligo;
140 use Bio::SeqFeature::SiRNA::Pair;
143 use base qw(Bio::Root::Root);
146 our %COMP = ( A
=> 'T',
153 our @ARGNAMES = qw(RULES START_PAD END_PAD MIN_GC CUTOFF OLIGOS AVOID_SNPS
154 GSTRING TMPDIR TARGET DEBUG);
160 Usage : my $sirna_designer = Bio::Tools::SiRNA->new();
161 Function : Constructor for designer object
162 Returns : Bio::Tools::SiRNA object
163 Args : target - the target sequence for the SiRNAs as a Bio::Seq::RichSeq
164 start_pad - distance from the CDS start to skip (default 75)
165 end_pad - distance from the CDS end to skip (default 50)
166 include_3pr - set to true to include SiRNAs in the 3prime UTR (default false)
167 rules - rules for selecting siRNAs, currently supporting saigo and tuschl
168 min_gc - minimum GC fraction (NOT percent) (default 0.4)
169 max_gc - maximum GC fraction (NOT percent) (default 0.6)
170 cutoff - worst 'rank' accepted(default 3)
171 avoid_snps - boolean - reject oligos that overlap a variation
172 SeqFeature in the target (default true)
173 gstring - maximum allowed consecutive Gs.
174 Too many can cause problems in synthesis (default 4)
175 Note : All arguments can also be changed/accessed using autoloaded
178 my $start_pad = $sirna_designer->start_pad().
183 my ($proto, @args) = @_;
184 my $pkg = ref($proto) || $proto;
191 @args{@ARGNAMES} = $self->_rearrange(\
@ARGNAMES, @args);
193 if ($args{'RULES'}) {
194 $self->rules($args{'RULES'});
197 $self->{'start_pad'} = $args{'START_PAD'} || 75; # nt from start to mask
198 $self->{'end_pad'} = $args{'END_PAD'} || 50; # nt from end to mask
199 $self->{'include_3pr'} = $args{'INCLUDE_3PR'} || 0; # look for oligos in 3prime UTR
200 $self->{'min_gc'} = $args{'MIN_GC'} || 0.40;
201 $self->{'max_gc'} = $args{'MAX_GC'} || 0.60;
202 $self->{'cutoff'} = $args{'CUTOFF'} || 3; # highest (worst) rank wanted
203 $self->{'oligos'} = [];
204 defined($args{'AVOID_SNPS'}) ?
$self->{'avoid_snps'} = $args{'AVOID_SNPS'} :
205 $self->{'avoid_snps'} = 1; # (t/f to avoid or include reagents that cover SNPs)
206 $self->{'gstring'} = $args{'GSTRING'} || 4; # maximum allowed consecutive Gs - too many can cause problems in oligo synthesis
207 $self->{'tmpdir'} = $args{'TMPDIR'} || $ENV{'TMPDIR'} || $ENV{'TMP'} || '';
208 $self->{'debug'} = $args{'DEBUG'} || 0;
210 $self->target($args{'TARGET'}) if ($args{'TARGET'});
219 Usage : my $target_seq = $sirna_designer->target(); # get the current target
221 $sirna_designer->target($new_target_seq); # set a new target
222 Function : Set/get the target as a Bio::SeqI-compliant object
223 Returns : a Bio::SeqI-compliant object
224 Args : a Bio::SeqI-compliant object (optional)
229 my ($self, $target) = @_;
232 unless ($target->isa('Bio::SeqI')) {
233 $self->throw( -class => 'Bio::Root::BadParameter',
234 -text
=> "Target must be passed as a Bio::Seq object" );
236 if ($target->can('molecule')) {
237 ( grep { uc($target->molecule) eq $_ } qw(DNA MRNA CDNA)) or
238 $self->throw( -class => 'Bio::Root::BadParameter',
239 -text
=> "Sequences of type ". $target->molecule. " are not supported"
243 ($target->alphabet eq 'dna') or
244 $self->throw( -class => 'Bio::Root::BadParameter',
245 -text
=> "Sequences of alphabet ". $target->alphabet. " are not supported"
249 $self->{'target'} = $target;
253 elsif ($self->{'target'}) {
254 return $self->{'target'};
257 $self->throw("Target sequence not defined");
264 Usage : $sirna->rules('ruleset')
265 Purpose : set/get ruleset to use for selecting SiRNA oligo pairs.
266 Returns : not sure yet
267 Args : a ruleset name (currently supported: Tuschl, Saigo)
268 or a Bio::Tools::SiRNA::RulesetI compliant object
273 my ($self, $rules) = @_;
276 $self->_load_ruleset($rules);
278 # default: use tuschl rules
279 unless ($self->{_rules
}) {
280 $self->_load_ruleset('tuschl');
282 return $self->{_rules
};
286 my ($self, $ruleset) = @_;
288 my $rule_module = join('::', ref($self), 'Ruleset', lc($ruleset));
290 eval "require $rule_module";
293 #warn join("\n", '@INC contains:', @INC, undef);
294 $self->throw("Unable to load $rule_module: $@");
299 $self->{_rules
} = $rule_module;
300 bless($self, $rule_module); # recast as subclass
309 Usage : my @pairs = $sirna_designer->design();
310 Purpose : Design SiRNA oligo pairs.
311 Returns : A list of SiRNA pairs as Bio::SeqFeature::SiRNA::Pair objects
319 ($self->rules) or $self->throw('Unable to design siRNAs: no rule set specified');
321 # unless ( grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures ) {
322 # $self->_define_target();
325 my @oligos = $self->_get_oligos();
327 return ( grep { $_->isa('Bio::SeqFeature::SiRNA::Pair') } $self->target->top_SeqFeatures );
332 my ($feat, $cds, $left, $right);
334 my $target = $self->target or
335 $self->throw("Unable to design oligos - no target provided");
337 ($cds) = grep { $_->primary_tag eq 'CDS' } $target->top_SeqFeatures if ($target->can('top_SeqFeatures'));
340 $left = $cds->start + $self->start_pad;
341 if (!$self->include_3pr) {
342 $right = $cds->end - $self->end_pad;
345 $right = $target->length - $self->end_pad;
349 $left = 0 + $self->start_pad;
350 $right = $target->length - $self->end_pad;
354 # is there anything left?
355 if (($right - $left) < 20) {
356 $self->throw("There isn't enough sequence to design oligos. Please reduce start_pad and end_pad or supply more sequence");
358 # define target region
359 my $targregion = Bio
::SeqFeature
::Generic
->new( -start
=> $left,
361 -primary
=> 'Target' );
362 $self->target->add_SeqFeature($targregion);
364 # locate excluded regions
365 my @excluded = grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures;
367 if ($self->avoid_snps) {
368 my @snps = grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures;
369 push(@excluded, @snps);
372 $self->excluded(\
@excluded);
377 sub _get_targetregion
{
380 my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures;
381 $targregion ||= $self->_define_target;
383 $self->throw("Target region for SiRNA design not defined") unless ($targregion);
385 my $seq = $targregion->seq->seq;
386 # but this way I loose start info
387 my $targstart = $targregion->start;
389 return ($seq, $targstart);
392 # MOVE to SiRNA::Ruleset::tuschl
394 # my ($self, $rank) = @_;
395 # return $PATTERNS{$rank};
399 # # use regular expressions to pull out oligos
401 # my ($self, $rank) = @_;
402 # my $regex = $self->_regex($rank);
406 # my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures;
407 # my $seq = $targregion->seq->seq;
408 # # but this way I loose start info
409 # my $targstart = $targregion->start;
411 # # exclude masked region
412 # push(@exclude, grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures);
415 # if ($self->avoid_snps) {
416 # my @snps = grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures;
417 # push(@exclude, @snps);
420 # while ( $seq =~ /$regex/gi ) {
423 # # check for too many Gs (or Cs on the other strand)
424 # next if ( $target =~ /G{ $self->gstring,}/io );
425 # next if ( $target =~ /C{ $self->gstring,}/io );
426 # # skip Ns (for filtering)
427 # next if ( $target =~ /N/i);
429 # my $start = length($`) + $targstart;
430 # my $stop = $start + length($target) -1;
432 # my @gc = ( $target =~ /G|C/gi);
433 # my $fxGC = sprintf("%2.2f", (scalar(@gc) / length($target)));
434 # next if ($fxGC < $self->min_gc);
435 # next if ($fxGC > $self->max_gc);
437 # my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start,
440 # -seq => _get_sense($target),
441 # -source_tag => ref($self),
444 # my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start,
447 # -seq => _get_anti($target),
448 # -source_tag => ref($self),
451 # my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank => $rank,
454 # -antisense => $asense,
455 # -source_tag => ref($self),
458 # unless ($self->_has_overlap($sirna, \@exclude)) {
459 # $self->target->add_SeqFeature($sirna);
467 Usage : $sirna_designer->add_oligos($sequence, $start, $rank);
468 Purpose : Add SiRNA olgos to target Bio::Seq as Bio::SeqFeature::SiRNA::Pair objects
469 Args : Oligo sequence and start position (required), rank/score (optional)
474 my ($self, $seq, $start, $rank) = @_;
476 ($seq) or throw
('No sequence supplied for add_oligos');
477 (defined $start) or throw
('No start position specified for add_oligos');
479 my ($end) = $start + length($seq);
481 my ($sseq) = $self->_get_sense($seq);
482 my $sense = Bio
::SeqFeature
::SiRNA
::Oligo
->new( -start
=> $start,
483 -end
=> ($start + length($sseq)),
486 -source_tag
=> ref($self),
489 my $aseq = $self->_get_anti($seq);
490 my $asense = Bio
::SeqFeature
::SiRNA
::Oligo
->new( -start
=> $end,
491 -end
=> ($end - length($aseq)),
494 -source_tag
=> ref($self),
497 my $sirna = Bio
::SeqFeature
::SiRNA
::Pair
->new( -rank
=> $rank,
500 -antisense
=> $asense,
501 -source_tag
=> ref($self),
504 unless ($self->_has_overlap($sirna, $self->excluded)) {
505 $self->target->add_SeqFeature($sirna);
510 # flag any pairs that overlap an UNDESIRED feature (eg SNP)
511 # return true if there is overlap, false if not
513 my ($self, $test, $flist) = @_;
514 print STDERR
"Checking oligo at ", $test->start, " to ",$test->end, "\n"
517 foreach my $feat (@
$flist) {
518 if (($test->start <= $feat->end) and ($test->end >= $feat->start)) {
519 print STDERR
"Overlaps ", $feat->primary_tag, " at ",
520 $feat->start, " to ", $feat->end, "\n" if ($self->debug);
524 return 0; # default - no overlap
527 # MOVE to SiRNA::Ruleset::tuschl
531 # # trim off 1st 2 nt to get overhang
532 # $target =~ s/^..//;
533 # # convert T's to U's (transcribe)
534 # $target =~ s/T/U/gi;
535 # # force last 2 nt to be T's
536 # $target =~ s/..$/TT/;
543 # my @target = split(//, $target);
544 # my ($nt,@antitarget);
546 # while ($nt = pop @target) {
547 # push(@antitarget, $COMP{$nt});
549 # my $anti = join('', @antitarget);
550 # # trim off 1st 2 nt to get overhang
552 # # convert T's to U's
554 # # convert last 2 NT's to T
555 # $anti =~ s/..$/TT/;
562 my ($self, $value) = @_;
563 my $name = $AUTOLOAD;
566 return if ($name eq 'DESTROY');
569 if (defined $value) {
570 $self->{$name} = $value;
573 unless (exists $self->{$name}) {
574 $self->throw("Attribute $name not defined for ". ref($self));
577 return $self->{$name};
581 my ($self, $char) = @_;
583 return unless ($char);
585 return $COMP{ $char };