2 # BioPerl module for Bio::Tools::SiRNA
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Donald Jackson, donald.jackson@bms.com
8 # Copyright Bristol-Myers Squibb
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 SiRNA - Perl object for designing small inhibitory RNAs.
20 use Bio::Tools::SiRNA;
22 my $sirna_designer = Bio::Tools::SiRNA->new( -target => $bio_seq,
25 my @pairs = $sirna_designer->design;
27 foreach $pair (@pairs) {
28 my $sense_oligo_sequence = $pair->sense->seq;
29 my $antisense_oligo_sequence = $pair->antisense->seq;
32 print join ("\t", $pair->start, $pair->end, $pair->rank,
33 $sense_oligo_sequence, $antisense_oligo_sequence), "\n";
38 Package for designing siRNA reagents.
40 Input is a L<Bio::SeqI>-compliant object (the target).
42 Output is a list of Bio::SeqFeature::SiRNA::Pair objects, which are
43 added to the feature table of the target sequence. Each
44 Bio::SeqFeature::SiRNA::Pair contains two subfeatures
45 (Bio::SeqFeature::Oligo objects) which correspond to the individual
46 oligos. These objects provide accessors for the information on the
47 individual reagent pairs.
49 This verion of Bio::Tools::SiRNA represents a major change in architecture.
50 Specific 'rulesets' for siRNA selection as developed by various groups are
51 implemented as Bio::Tools::SiRNA::Ruleset objects, which inherit from
52 Bio::Tools::SiRNA. This will make it easier to add new rule sets or modify
53 existing approaches. Currently the Tuschl and Ui-Tei (2004) rules are
54 implemented. For consistency, the Tuschl rules are implemented by default.
56 In addition, this module provides three 'extra' rules which can be added
57 above and beyond any ruleset.
63 SiRNAs that overlap known SNPs (identified as SeqFeatures with
64 primary tag = variation) can be avoided.
68 Other regions (with primary tag = 'Excluded') can also be skipped. I
69 use this with Bio::Tools::Run::Mdust to avoid low-complexity regions
70 (must be run separately), but other programs could also be used.
74 SiRNAs may also be selected in the 3 prime UTR of a gene by setting
75 $sirna_designer-E<gt>include_3pr() to true.
85 L<Bio::Tools::Run::Mdust>, L<Bio::SeqFeature::SiRNA::Pair>,
86 L<Bio::SeqFeature::SiRNA::Oligo>..
92 User feedback is an integral part of the evolution of this and other
93 Bioperl modules. Send your comments and suggestions preferably to
94 the Bioperl mailing list. Your participation is much appreciated.
96 bioperl-l@bioperl.org - General discussion
97 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
101 Please direct usage questions or support issues to the mailing list:
103 I<bioperl-l@bioperl.org>
105 rather than to the module maintainer directly. Many experienced and
106 reponsive experts will be able look at the problem and quickly
107 address it. Please include a thorough description of the problem
108 with code and data examples if at all possible.
110 =head2 Reporting Bugs
112 Report bugs to the Bioperl bug tracking system to help us keep track
113 of the bugs and their resolution. Bug reports can be submitted via
116 https://github.com/bioperl/bioperl-live/issues
120 Donald Jackson (donald.jackson@bms.com)
124 The rest of the documentation details each of the object methods.
125 Internal methods are usually preceded with a _
129 package Bio
::Tools
::SiRNA
;
134 use vars
qw($AUTOLOAD);
136 use Bio::Seq::RichSeq;
137 use Bio::SeqFeature::Generic;
138 use Bio::SeqFeature::SiRNA::Oligo;
139 use Bio::SeqFeature::SiRNA::Pair;
142 use base qw(Bio::Root::Root);
145 our %COMP = ( A
=> 'T',
152 our @ARGNAMES = qw(RULES START_PAD END_PAD MIN_GC CUTOFF OLIGOS AVOID_SNPS
153 GSTRING TMPDIR TARGET DEBUG);
159 Usage : my $sirna_designer = Bio::Tools::SiRNA->new();
160 Function : Constructor for designer object
161 Returns : Bio::Tools::SiRNA object
162 Args : target - the target sequence for the SiRNAs as a Bio::Seq::RichSeq
163 start_pad - distance from the CDS start to skip (default 75)
164 end_pad - distance from the CDS end to skip (default 50)
165 include_3pr - set to true to include SiRNAs in the 3prime UTR (default false)
166 rules - rules for selecting siRNAs, currently supporting saigo and tuschl
167 min_gc - minimum GC fraction (NOT percent) (default 0.4)
168 max_gc - maximum GC fraction (NOT percent) (default 0.6)
169 cutoff - worst 'rank' accepted(default 3)
170 avoid_snps - boolean - reject oligos that overlap a variation
171 SeqFeature in the target (default true)
172 gstring - maximum allowed consecutive Gs.
173 Too many can cause problems in synthesis (default 4)
174 Note : All arguments can also be changed/accessed using autoloaded
177 my $start_pad = $sirna_designer->start_pad().
182 my ($proto, @args) = @_;
183 my $pkg = ref($proto) || $proto;
190 @args{@ARGNAMES} = $self->_rearrange(\
@ARGNAMES, @args);
192 if ($args{'RULES'}) {
193 $self->rules($args{'RULES'});
196 $self->{'start_pad'} = $args{'START_PAD'} || 75; # nt from start to mask
197 $self->{'end_pad'} = $args{'END_PAD'} || 50; # nt from end to mask
198 $self->{'include_3pr'} = $args{'INCLUDE_3PR'} || 0; # look for oligos in 3prime UTR
199 $self->{'min_gc'} = $args{'MIN_GC'} || 0.40;
200 $self->{'max_gc'} = $args{'MAX_GC'} || 0.60;
201 $self->{'cutoff'} = $args{'CUTOFF'} || 3; # highest (worst) rank wanted
202 $self->{'oligos'} = [];
203 defined($args{'AVOID_SNPS'}) ?
$self->{'avoid_snps'} = $args{'AVOID_SNPS'} :
204 $self->{'avoid_snps'} = 1; # (t/f to avoid or include reagents that cover SNPs)
205 $self->{'gstring'} = $args{'GSTRING'} || 4; # maximum allowed consecutive Gs - too many can cause problems in oligo synthesis
206 $self->{'tmpdir'} = $args{'TMPDIR'} || $ENV{'TMPDIR'} || $ENV{'TMP'} || '';
207 $self->{'debug'} = $args{'DEBUG'} || 0;
209 $self->target($args{'TARGET'}) if ($args{'TARGET'});
218 Usage : my $target_seq = $sirna_designer->target(); # get the current target
220 $sirna_designer->target($new_target_seq); # set a new target
221 Function : Set/get the target as a Bio::SeqI-compliant object
222 Returns : a Bio::SeqI-compliant object
223 Args : a Bio::SeqI-compliant object (optional)
228 my ($self, $target) = @_;
231 unless ($target->isa('Bio::SeqI')) {
232 $self->throw( -class => 'Bio::Root::BadParameter',
233 -text
=> "Target must be passed as a Bio::Seq object" );
235 if ($target->can('molecule')) {
236 ( grep { uc($target->molecule) eq $_ } qw(DNA MRNA CDNA)) or
237 $self->throw( -class => 'Bio::Root::BadParameter',
238 -text
=> "Sequences of type ". $target->molecule. " are not supported"
242 ($target->alphabet eq 'dna') or
243 $self->throw( -class => 'Bio::Root::BadParameter',
244 -text
=> "Sequences of alphabet ". $target->alphabet. " are not supported"
248 $self->{'target'} = $target;
252 elsif ($self->{'target'}) {
253 return $self->{'target'};
256 $self->throw("Target sequence not defined");
263 Usage : $sirna->rules('ruleset')
264 Purpose : set/get ruleset to use for selecting SiRNA oligo pairs.
265 Returns : not sure yet
266 Args : a ruleset name (currently supported: Tuschl, Saigo)
267 or a Bio::Tools::SiRNA::RulesetI compliant object
272 my ($self, $rules) = @_;
275 $self->_load_ruleset($rules);
277 # default: use tuschl rules
278 unless ($self->{_rules
}) {
279 $self->_load_ruleset('tuschl');
281 return $self->{_rules
};
285 my ($self, $ruleset) = @_;
287 my $rule_module = join('::', ref($self), 'Ruleset', lc($ruleset));
289 eval "require $rule_module";
292 #warn join("\n", '@INC contains:', @INC, undef);
293 $self->throw("Unable to load $rule_module: $@");
298 $self->{_rules
} = $rule_module;
299 bless($self, $rule_module); # recast as subclass
308 Usage : my @pairs = $sirna_designer->design();
309 Purpose : Design SiRNA oligo pairs.
310 Returns : A list of SiRNA pairs as Bio::SeqFeature::SiRNA::Pair objects
318 ($self->rules) or $self->throw('Unable to design siRNAs: no rule set specified');
320 # unless ( grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures ) {
321 # $self->_define_target();
324 my @oligos = $self->_get_oligos();
326 return ( grep { $_->isa('Bio::SeqFeature::SiRNA::Pair') } $self->target->top_SeqFeatures );
331 my ($feat, $cds, $left, $right);
333 my $target = $self->target or
334 $self->throw("Unable to design oligos - no target provided");
336 ($cds) = grep { $_->primary_tag eq 'CDS' } $target->top_SeqFeatures if ($target->can('top_SeqFeatures'));
339 $left = $cds->start + $self->start_pad;
340 if (!$self->include_3pr) {
341 $right = $cds->end - $self->end_pad;
344 $right = $target->length - $self->end_pad;
348 $left = 0 + $self->start_pad;
349 $right = $target->length - $self->end_pad;
353 # is there anything left?
354 if (($right - $left) < 20) {
355 $self->throw("There isn't enough sequence to design oligos. Please reduce start_pad and end_pad or supply more sequence");
357 # define target region
358 my $targregion = Bio
::SeqFeature
::Generic
->new( -start
=> $left,
360 -primary
=> 'Target' );
361 $self->target->add_SeqFeature($targregion);
363 # locate excluded regions
364 my @excluded = grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures;
366 if ($self->avoid_snps) {
367 my @snps = grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures;
368 push(@excluded, @snps);
371 $self->excluded(\
@excluded);
376 sub _get_targetregion
{
379 my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures;
380 $targregion ||= $self->_define_target;
382 $self->throw("Target region for SiRNA design not defined") unless ($targregion);
384 my $seq = $targregion->seq->seq;
385 # but this way I loose start info
386 my $targstart = $targregion->start;
388 return ($seq, $targstart);
391 # MOVE to SiRNA::Ruleset::tuschl
393 # my ($self, $rank) = @_;
394 # return $PATTERNS{$rank};
398 # # use regular expressions to pull out oligos
400 # my ($self, $rank) = @_;
401 # my $regex = $self->_regex($rank);
405 # my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures;
406 # my $seq = $targregion->seq->seq;
407 # # but this way I loose start info
408 # my $targstart = $targregion->start;
410 # # exclude masked region
411 # push(@exclude, grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures);
414 # if ($self->avoid_snps) {
415 # my @snps = grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures;
416 # push(@exclude, @snps);
419 # while ( $seq =~ /$regex/gi ) {
422 # # check for too many Gs (or Cs on the other strand)
423 # next if ( $target =~ /G{ $self->gstring,}/io );
424 # next if ( $target =~ /C{ $self->gstring,}/io );
425 # # skip Ns (for filtering)
426 # next if ( $target =~ /N/i);
428 # my $start = length($`) + $targstart;
429 # my $stop = $start + length($target) -1;
431 # my @gc = ( $target =~ /G|C/gi);
432 # my $fxGC = sprintf("%2.2f", (scalar(@gc) / length($target)));
433 # next if ($fxGC < $self->min_gc);
434 # next if ($fxGC > $self->max_gc);
436 # my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start,
439 # -seq => _get_sense($target),
440 # -source_tag => ref($self),
443 # my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start,
446 # -seq => _get_anti($target),
447 # -source_tag => ref($self),
450 # my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank => $rank,
453 # -antisense => $asense,
454 # -source_tag => ref($self),
457 # unless ($self->_has_overlap($sirna, \@exclude)) {
458 # $self->target->add_SeqFeature($sirna);
466 Usage : $sirna_designer->add_oligos($sequence, $start, $rank);
467 Purpose : Add SiRNA olgos to target Bio::Seq as Bio::SeqFeature::SiRNA::Pair objects
468 Args : Oligo sequence and start position (required), rank/score (optional)
473 my ($self, $seq, $start, $rank) = @_;
475 ($seq) or throw
('No sequence supplied for add_oligos');
476 (defined $start) or throw
('No start position specified for add_oligos');
478 my ($end) = $start + length($seq);
480 my ($sseq) = $self->_get_sense($seq);
481 my $sense = Bio
::SeqFeature
::SiRNA
::Oligo
->new( -start
=> $start,
482 -end
=> ($start + length($sseq)),
485 -source_tag
=> ref($self),
488 my $aseq = $self->_get_anti($seq);
489 my $asense = Bio
::SeqFeature
::SiRNA
::Oligo
->new( -start
=> $end,
490 -end
=> ($end - length($aseq)),
493 -source_tag
=> ref($self),
496 my $sirna = Bio
::SeqFeature
::SiRNA
::Pair
->new( -rank
=> $rank,
499 -antisense
=> $asense,
500 -source_tag
=> ref($self),
503 unless ($self->_has_overlap($sirna, $self->excluded)) {
504 $self->target->add_SeqFeature($sirna);
509 # flag any pairs that overlap an UNDESIRED feature (eg SNP)
510 # return true if there is overlap, false if not
512 my ($self, $test, $flist) = @_;
513 print STDERR
"Checking oligo at ", $test->start, " to ",$test->end, "\n"
516 foreach my $feat (@
$flist) {
517 if (($test->start <= $feat->end) and ($test->end >= $feat->start)) {
518 print STDERR
"Overlaps ", $feat->primary_tag, " at ",
519 $feat->start, " to ", $feat->end, "\n" if ($self->debug);
523 return 0; # default - no overlap
526 # MOVE to SiRNA::Ruleset::tuschl
530 # # trim off 1st 2 nt to get overhang
531 # $target =~ s/^..//;
532 # # convert T's to U's (transcribe)
533 # $target =~ s/T/U/gi;
534 # # force last 2 nt to be T's
535 # $target =~ s/..$/TT/;
542 # my @target = split(//, $target);
543 # my ($nt,@antitarget);
545 # while ($nt = pop @target) {
546 # push(@antitarget, $COMP{$nt});
548 # my $anti = join('', @antitarget);
549 # # trim off 1st 2 nt to get overhang
551 # # convert T's to U's
553 # # convert last 2 NT's to T
554 # $anti =~ s/..$/TT/;
561 my ($self, $value) = @_;
562 my $name = $AUTOLOAD;
565 return if ($name eq 'DESTROY');
568 if (defined $value) {
569 $self->{$name} = $value;
572 unless (exists $self->{$name}) {
573 $self->throw("Attribute $name not defined for ". ref($self));
576 return $self->{$name};
580 my ($self, $char) = @_;
582 return unless ($char);
584 return $COMP{ $char };