bump rc version
[bioperl-live.git] / Bio / Seq / PrimedSeq.pm
blob0990a2af50e84c21e705b1009f13d4adc1dafa68
2 # This is the original copyright statement. I have relied on Chad's module
3 # extensively for this module.
5 # Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved.
6 # This module is free software; you can redistribute it and/or
7 # modify it under the same terms as Perl itself.
9 # Copyright Chad Matsalla
11 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 # But I have modified lots of it, so I guess I should add:
16 # Copyright (c) 2003 bioperl, Rob Edwards. All Rights Reserved.
17 # This module is free software; you can redistribute it and/or
18 # modify it under the same terms as Perl itself.
20 # Copyright Rob Edwards
22 # You may distribute this module under the same terms as perl itself
23 # POD documentation - main docs before the code
26 =head1 NAME
28 Bio::Seq::PrimedSeq - A sequence and a pair of primers matching on it
30 =head1 SYNOPSIS
32 use Bio::Seq;
33 use Bio::Seq::PrimedSeq;
35 my $template = Bio::Seq->new( -seq => 'AGCTTTTCATTCTGACTGCAAC' );
36 my $left = Bio::Seq->new( -seq => 'AGCT' );
37 my $right = Bio::Seq->new( -seq => 'GTTGC' );
39 my $primedseq = Bio::Seq::PrimedSeq->new(
40 -seq => $template, # sequence object
41 -left_primer => $left, # sequence or primer object
42 -right_primer => $right # sequence or primer object
45 # Get the primers as Bio::SeqFeature::Primer objects
46 my @primer_objects = $primedseq->get_primer();
48 # Sequence object representing the PCR product, i.e. the section of the target
49 # sequence contained between the primers (primers included)
50 my $amplicon_seq = $primedseq->amplicon();
52 print 'Got amplicon sequence: '.$amplicon_seq->seq."\n";
53 # Amplicon should be: agctTTTCATTCTGACTgcaac
55 =head1 DESCRIPTION
57 This module was created to address the fact that a primer is more than a
58 SeqFeature and that there is a need to represent the primer-sequence complex and
59 the attributes that are associated with the complex.
61 A PrimedSeq object is initialized with a target sequence and two primers. The
62 location of the primers on the target sequence is calculated if needed so that
63 one can then get the PCR product, or amplicon sequence. From the PrimedSeq object
64 you can also retrieve information about melting temperatures and what not on each
65 of the primers and the amplicon. The L<Bio::Tools::Primer3> module uses PrimedSeq
66 objects extensively.
68 Note that this module does not simulate PCR. It assumes that the primers
69 will match in a single location on the target sequence and does not understand
70 degenerate primers.
72 =over
74 =item *
76 Providing primers as sequence objects
78 If the primers are specified as sequence objects, e.g. L<Bio::PrimarySeq> or
79 L<Bio::Seq>, they are first converted to L<Bio::SeqFeature::Primer> objects.
80 Their location on the target sequence is then calculated and added to the
81 L<Bio::SeqFeature::Primer> objects, which you can retrieve using the get_primer()
82 method.
84 =item *
86 Providing primers as primer objects
88 Because of the limitations of specifying primers as sequence objects, the
89 recommended method is to provide L<Bio::SeqFeature::Primer> objects. If you did
90 not record the location of the primers in the primer object, it will be
91 calculated.
93 =back
95 L<Bio::Seq::PrimedSeq> was initially started by Chad Matsalla, and later
96 improved on by Rob Edwards.
98 =head1 RECIPES
100 =over
102 =item 1.
104 Run Primer3 to get PrimedSeq objects:
106 use Bio::SeqIO;
107 use Bio::Tools::Run::Primer3;
109 # Read a target sequences from a FASTA file
110 my $file = shift || die "Need a file to read";
111 my $seqin = Bio::SeqIO->new(-file => $file);
112 my $seq = $seqin->next_seq;
114 # Use Primer3 to design some primers
115 my $primer3 = Bio::Tools::Run::Primer3->new(-seq => $seq);
116 my $results = $primer3->run; # default parameters
118 # Write all the results in a Genbank file
119 my $seqout = Bio::SeqIO->new(-file => ">primed_sequence.gbk",
120 -format => 'genbank');
121 while (my $primedseq = $results->next_primer) {
122 $seqout->write_seq( $primedseq->annotated_seq );
125 =item 2.
127 Create a genbank file for a sequence and its cognate primers:
129 use Bio::SeqIO;
130 use Bio::Seq::PrimedSeq;
132 # Read a FASTA file that contains the target sequence and its two primers
133 my $file = shift || die "$0 <file>";
134 my $seqin = Bio::SeqIO->new(-file => $file);
135 my ($template, $leftprimer, $rightprimer) =
136 ($seqin->next_seq, $seqin->next_seq, $seqin->next_seq);
138 # Set up a PrimedSeq object
139 my $primedseq = Bio::Seq::PrimedSeq->new(-seq => $template,
140 -left_primer => $leftprimer,
141 -right_primer => $rightprimer);
143 # Write the sequences in an output Genbank file
144 my $seqout = Bio::SeqIO->new(-file => ">primed_sequence.gbk",
145 -format => 'genbank');
146 $seqout->write_seq($primedseq->annotated_sequence);
148 =back
150 =head1 FEEDBACK
152 User feedback is an integral part of the evolution of this and other
153 Bioperl modules. Send your comments and suggestions preferably to one
154 of the Bioperl mailing lists. Your participation is much appreciated.
156 bioperl-l@bioperl.org - General discussion
157 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
159 =head2 Support
161 Please direct usage questions or support issues to the mailing list:
163 I<bioperl-l@bioperl.org>
165 rather than to the module maintainer directly. Many experienced and
166 reponsive experts will be able look at the problem and quickly
167 address it. Please include a thorough description of the problem
168 with code and data examples if at all possible.
170 =head2 Reporting Bugs
172 Report bugs to the Bioperl bug tracking system to help us keep track
173 the bugs and their resolution. Bug reports can be submitted via the
174 web:
176 https://github.com/bioperl/bioperl-live/issues
178 =head1 AUTHOR
180 Rob Edwards, redwards@utmem.edu
182 Based on a module written by Chad Matsalla, bioinformatics1@dieselwurks.com
184 =head1 APPENDIX
186 The rest of the documentation details each of the object
187 methods. Internal methods are usually preceded with a _
189 =cut
192 # Let the code begin...
194 package Bio::Seq::PrimedSeq;
196 use strict;
197 use Bio::SeqFeature::Primer;
199 use base qw(Bio::Root::Root Bio::SeqFeature::Generic);
200 # Since this module occupies the Bio::Seq::* namespace, it should probably
201 # inherit from Bio::Seq before it inherits from Bio::SeqFeature::Generic. But
202 # then, Bio::SeqI and Bio::SeqFeatureI both request a seq() method that return
203 # different things. So, being Bio::SeqI is incompatible with being Bio::SeqFeatureI
206 =head2 new
208 Title : new()
209 Usage : my $primedseq = Bio::SeqFeature::Primer->new(
210 -seq => $sequence,
211 -left_primer => $left_primer,
212 -right_primer => $right_primer
214 Function: Construct a primed sequence.
215 Returns : A Bio::Seq::PrimedSeq object
216 Args : -seq => a Bio::Seq object (required)
217 -left_primer => a Bio::SeqFeature::Primer or sequence object (required)
218 -right_primer => a Bio::SeqFeature::Primer or sequence object (required)
220 If you pass a sequence object to specify a primer, it will be used to
221 construct a Bio::SeqFeature::Primer that you can retrieve with the
222 L<get_primer> method.
224 Many other parameters can be included including all of the output
225 parameters from the primer3 program (see L<Bio::Tools::Primer3>). At
226 the moment these parameters will simply be stored and do anything.
228 =cut
230 sub new {
231 my($class,%args) = @_;
232 my $self = $class->SUPER::new(%args);
234 # Need an amplicon sequence
235 $self->{seq} = delete $args{-seq} || delete $args{-target_sequence} ||
236 $self->throw("Need to provide a sequence during PrimedSeq object construction");
237 if (! ref($self->{seq}) || ! $self->{seq}->isa('Bio::SeqI') ) {
238 $self->throw("The target_sequence must be a Bio::Seq to create this object.");
241 # Need a left and right primers
242 for my $primer ( 'left', 'right' ) {
243 $self->{$primer} = delete $args{'-'.$primer.'_primer'} ||
244 $self->throw("Need to provide both primers during PrimedSeq object construction");
245 if ( ref $self->{$primer} && $self->{$primer}->isa('Bio::PrimarySeqI') ) {
246 # Convert Bio::Seq or Bio::PrimarySeq objects to Bio::SeqFeature::Primer
247 $self->{$primer} = Bio::SeqFeature::Primer->new(-seq => $self->{$primer});
249 if (not (ref $self->{$primer} && $self->{$primer}->isa("Bio::SeqFeature::Primer"))) {
250 $self->throw("Primers must be Bio::SeqFeature::Primer objects but got a ".ref($self->{$primer}));
254 # Save remaining arguments
255 while (my ($arg, $val) = each %args) {
256 $self->{$arg} = $val;
259 # Determine primer location on target if needed
260 if ( not( $self->{'left'}->start && $self->{'left'}->end &&
261 $self->{'right'}->start && $self->{'right'}->end ) ) {
262 $self->_place_primers();
265 return $self;
269 =head2 get_primer
271 Title : get_primer();
272 Usage : my @primers = $primedseq->get_primer();
274 my $primer = $primedseq->get_primer('-left_primer');
275 Function: Get the primers associated with the PrimedSeq object.
276 Returns : A Bio::SeqFeature::Primer object
277 Args : For the left primer, use: l, left, left_primer or -left_primer
278 For the right primer, use: r, right, right_primer or -right_primer
279 For both primers [default], use: b, both, both_primers or -both_primers
281 =cut
283 sub get_primer {
284 my ($self, $arg) = @_;
285 if (! defined $arg ) {
286 return ($self->{left}, $self->{right});
287 } elsif ( $arg =~ /^-?l/ ) {
288 # What a cheat, I couldn't be bothered to write all the exact statements!
289 # Hah, now you can write 'leprechaun' to get the left primer.
290 return $self->{left}
291 } elsif ( $arg =~ /^-?r/ ) {
292 return $self->{right};
293 } elsif ( $arg =~ /^-?b/ ) {
294 return ($self->{left}, $self->{right});
299 =head2 annotated_sequence
301 Title : annotated_sequence
302 Usage : my $annotated_sequence_object = $primedseq->annotate_sequence();
303 Function: Get an annotated sequence object containg the left and right
304 primers
305 Returns : An annotated sequence object or 0 if not defined.
306 Args :
307 Note : Use this method to return a sequence object that you can write
308 out (e.g. in GenBank format). See the example above.
310 =cut
312 sub annotated_sequence {
313 my $self = shift;
314 my $seq = $self->{'seq'}; ### clone??
315 $seq->add_SeqFeature($self->{'left'});
316 $seq->add_SeqFeature($self->{'right'});
317 return $seq;
321 =head2 amplicon
323 Title : amplicon
324 Usage : my $amplicon = $primedseq->amplicon();
325 Function: Retrieve the amplicon as a sequence object. The amplicon sequnce is
326 only the section of the target sequence between the primer matches
327 (primers included).
328 Returns : A Bio::Seq object. To get the sequence use $amplicon->seq
329 Args : None
330 Note :
332 =cut
334 sub amplicon {
335 my ($self) = @_;
336 my $left = $self->{left};
337 my $right = $self->{right};
338 my $target = $self->{seq};
339 return Bio::PrimarySeq->new(
340 -id => 'Amplicon_from_'.($target->id || 'unidentified'),
341 -seq => lc( $left->seq->seq ).
342 uc( $target->subseq($left->end+1, $right->start-1) ).
343 lc( $right->seq->revcom->seq ),
348 =head2 seq
350 Title : seq
351 Usage : my $seqobj = $primedseq->seq();
352 Function: Retrieve the target sequence as a sequence object
353 Returns : A seq object. To get the sequence use $seqobj->seq
354 Args : None
355 Note :
357 =cut
359 sub seq {
360 my $self = shift;
361 return $self->{seq};
365 =head2 _place_primers
367 Title : _place_primers
368 Usage : $self->_place_primers();
369 Function: An internal method to place the primers on the sequence and
370 set up the ranges of the sequences
371 Returns : Nothing
372 Args : None
373 Note : Internal use only
375 =cut
377 sub _place_primers {
378 my $self = shift;
380 # Get the target and primer sequence strings, all in uppercase
381 my $left = $self->{left};
382 my $right = $self->{right};
383 my $target_seq = uc $self->{seq}->seq();
384 my $left_seq = uc $left->seq()->seq();
385 my $right_seq = uc $right->seq()->revcom()->seq();
387 # Locate primers on target sequence
388 my ($before, $middle, $after) = ($target_seq =~ /^(.*)$left_seq(.*)$right_seq(.*)$/);
389 if (not defined $before || not defined $middle || not defined $after) {
390 if ($target_seq !~ /$left_seq/) {
391 $self->throw("Could not place left primer on target");
393 if ($target_seq !~ /$right_seq/) {
394 $self->throw("Could not place right primer on target")
398 # Save location information in primer object
399 my $left_location = length($before) + 1;
400 my $right_location = length($target_seq) - length($after);
402 $left->start($left_location);
403 $left->end($left_location + $left->seq->length - 1);
404 $left->strand(1);
405 $right->start($right_location - $right->seq->length + 1);
406 $right->end($right_location);
407 $right->strand(-1);
409 # If Primer3 information was recorded, compare it to what we calculated
410 if ( exists($left->{PRIMER_LEFT}) || exists($right->{PRIMER_RIGHT}) || exists($self->{PRIMER_PRODUCT_SIZE}) ) {
412 # Bio::Seq::PrimedSeq positions
413 my $amplicon_size = length($left_seq) + length($middle) + length($right_seq);
414 $left_location = $left_location.','.length($left_seq);
415 $right_location = $right_location.','.length($right_seq);
417 # Primer3 positions
418 my $primer_product = $self->{PRIMER_PRODUCT_SIZE};
419 my $primer_left = $left->{PRIMER_LEFT};
420 my $primer_right = $right->{PRIMER_RIGHT};
422 if ( defined($primer_left) && (not $primer_left eq $left_location) ) {
423 $self->warn("Got |$primer_left| from primer3 but calculated ".
424 "|$left_location| for the left primer.");
426 if ( defined($primer_right) && (not $primer_right eq $right_location) ) {
427 $self->warn("Got |$primer_right| from primer3 but calculated ".
428 "|$right_location| for the right primer.");
430 if ( defined($primer_product) && (not $primer_product eq $amplicon_size) ) {
431 $self->warn("Got |$primer_product| from primer3 but calculated ".
432 "|$amplicon_size| for the size.");