sync w/ main trunk
[bioperl-live.git] / Bio / Seq / SequenceTrace.pm
blob5b5acdad0b8d0303fb5137141cb76a1e257215ed
1 # $Id$
3 # BioPerl module for Bio::Seq::SequenceTrace
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com
9 # Copyright Chad Matsalla
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Seq::SequenceTrace - Bioperl object packaging a sequence with its trace
19 =head1 SYNOPSIS
21 # example code here
23 =head1 DESCRIPTION
25 This object stores a sequence with its trace.
27 =head1 FEEDBACK
29 =head2 Mailing Lists
31 User feedback is an integral part of the evolution of this and other
32 Bioperl modules. Send your comments and suggestions preferably to one
33 of the Bioperl mailing lists. Your participation is much appreciated.
35 bioperl-l@bioperl.org - General discussion
36 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
38 =head2 Support
40 Please direct usage questions or support issues to the mailing list:
42 L<bioperl-l@bioperl.org>
44 rather than to the module maintainer directly. Many experienced and
45 reponsive experts will be able look at the problem and quickly
46 address it. Please include a thorough description of the problem
47 with code and data examples if at all possible.
49 =head2 Reporting Bugs
51 Report bugs to the Bioperl bug tracking system to help us keep track
52 the bugs and their resolution. Bug reports can be submitted via the
53 web:
55 http://bugzilla.open-bio.org/
57 =head1 AUTHOR - Chad Matsalla
59 Email bioinformatics@dieselwurks.com
62 The rest of the documentation details each of the object methods.
63 Internal methods are usually preceded with a _
65 =cut
68 package Bio::Seq::SequenceTrace;
71 use strict;
72 use Bio::Seq::QualI;
73 use Bio::PrimarySeqI;
74 use Bio::PrimarySeq;
75 use Bio::Seq::PrimaryQual;
77 use base qw(Bio::Root::Root Bio::Seq::Quality Bio::Seq::TraceI);
79 =head2 new()
81 Title : new()
82 Usage : $st = Bio::Seq::SequenceTrace->new
83 ( -swq => Bio::Seq::SequenceWithQuality,
84 -trace_a => \@trace_values_for_a_channel,
85 -trace_t => \@trace_values_for_t_channel,
86 -trace_g => \@trace_values_for_g_channel,
87 -trace_c => \@trace_values_for_c_channel,
88 -accuracy_a => \@a_accuracies,
89 -accuracy_t => \@t_accuracies,
90 -accuracy_g => \@g_accuracies,
91 -accuracy_c => \@c_accuracies,
92 -peak_indices => '0 5 10 15 20 25 30 35'
94 Function: Returns a new Bio::Seq::SequenceTrace object from basic
95 constructors.
96 Returns : a new Bio::Seq::SequenceTrace object
97 Arguments: I think that these are all describes in the usage above.
99 =cut
101 sub new {
102 my ($class, @args) = @_;
103 my $self = $class->SUPER::new(@args);
104 # default: turn OFF the warnings
105 $self->{supress_warnings} = 1;
106 my($swq,$peak_indices,$trace_a,$trace_t,
107 $trace_g,$trace_c,$acc_a,$acc_t,$acc_g,$acc_c) =
108 $self->_rearrange([qw(
110 PEAK_INDICES
111 TRACE_A
112 TRACE_T
113 TRACE_G
114 TRACE_C
115 ACCURACY_A
116 ACCURACY_T
117 ACCURACY_G
118 ACCURACY_C )], @args);
119 # first, deal with the sequence and quality information
120 if ($swq && ref($swq) eq "Bio::Seq::Quality") {
121 $self->{swq} = $swq;
123 else {
124 $self->throw("A Bio::Seq::SequenceTrace object must be created with a
125 Bio::Seq::Quality object. You provided this type of object: "
126 .ref($swq));
128 if (!$acc_a) {
129 # this means that you probably did not provide traces and accuracies
130 # and that they need to be synthesized
131 $self->set_accuracies();
133 else {
134 $self->accuracies('a',$acc_a);
135 $self->accuracies('t',$acc_t);
136 $self->accuracies('g',$acc_g);
137 $self->accuracies('c',$acc_c);
139 if (!$trace_a) {
140 $self->_synthesize_traces();
142 else {
143 $self->trace('a',$trace_a);
144 $self->trace('t',$trace_t);
145 $self->trace('g',$trace_g);
146 $self->trace('c',$trace_c);
147 $self->peak_indices($peak_indices);
149 $self->id($self->seq_obj->id);
150 return $self;
153 sub swq_obj {
154 my $self = shift;
155 $self->warn('swq_obj() is deprecated: use seq_obj()');
156 return $self->{swq};
161 =head2 trace($base,\@new_values)
163 Title : trace($base,\@new_values)
164 Usage : @trace_Values = @{$obj->trace($base,\@new_values)};
165 Function: Returns the trace values as a reference to an array containing the
166 trace values. The individual elements of the trace array are not validated
167 and can be any numeric value.
168 Returns : A reference to an array.
169 Status :
170 Arguments: $base : which color channel would you like the trace values for?
171 - $base must be one of "A","T","G","C"
172 \@new_values : a reference to an array of values containing trace
173 data for this base
175 =cut
177 sub trace {
178 my ($self,$base_channel,$values) = @_;
179 if (!$base_channel) {
180 $self->throw('You must provide a valid base channel (atgc) to use trace()');
182 $base_channel =~ tr/A-Z/a-z/;
183 if ($base_channel !~ /[acgt]/) {
184 $self->throw('You must provide a valid base channel (atgc) to use trace()');
186 if ($values) {
187 if (ref($values) eq "ARRAY") {
188 $self->{trace}->{$base_channel} = $values;
190 else {
191 my @trace = split(' ',$values);
192 $self->{trace}->{$base_channel} = \@trace;
195 if ($self->{trace}->{$base_channel}) {
196 return $self->{trace}->{$base_channel};
198 else {
199 return;
204 =head2 peak_indices($new_indices)
206 Title : peak_indices($new_indices)
207 Usage : $indices = $obj->peak_indices($new_indices);
208 Function: Return the trace index points for this object.
209 Returns : A scalar
210 Args : If used, the trace indices will be set to the provided value.
212 =cut
214 sub peak_indices {
215 my ($self,$peak_indices)= @_;
216 if ($peak_indices) {
217 if (ref($peak_indices) eq "ARRAY") {
218 $self->{peak_indices} = $peak_indices;
220 else {
221 my @indices = split(' ',$peak_indices);
222 $self->{peak_indices} = \@indices;
225 if (!$self->{peak_indices}) {
226 my @temp = ();
227 $self->{peak_indices} = \@temp;
229 return $self->{peak_indices};
233 =head2 _reset_peak_indices()
235 Title : _rest_peak_indices()
236 Usage : $obj->_reset_peak_indices();
237 Function: Reset the peak indices.
238 Returns : Nothing.
239 Args : None.
240 Notes : When you create a sub_trace_object, the peak indices
241 will still be pointing to the apporpriate location _in the
242 original trace_. In order to fix this, the initial value must
243 be subtracted from each value here. ie. The first peak index
244 must be "1".
246 =cut
248 sub _reset_peak_indices {
249 my $self = shift;
250 my $length = $self->length();
251 my $subtractive = $self->peak_index_at(1);
252 my ($original,$new);
253 $self->peak_index_at(1,"null");
254 for (my $counter=2; $counter<= $length; $counter++) {
255 my $original = $self->peak_index_at($counter);
256 $new = $original - $subtractive;
257 $self->peak_index_at($counter,$new);
259 return;
266 =head2 peak_index_at($position)
268 Title : peak_index_at($position)
269 Usage : $peak_index = $obj->peak_index_at($postition);
270 Function: Return the trace iindex point at this position
271 Returns : A scalar
272 Args : If used, the trace index at this position will be
273 set to the provided value.
275 =cut
277 sub peak_index_at {
278 my ($self,$position,$value)= @_;
279 if ($value) {
280 if ($value eq "null") {
281 $self->peak_indices->[$position-1] = "0";
283 else {
284 $self->peak_indices->[$position-1] = $value;
287 return $self->peak_indices()->[$position-1];
290 =head2 alphabet()
292 Title : alphabet();
293 Usage : $molecule_type = $obj->alphabet();
294 Function: Get the molecule type from the PrimarySeq object.
295 Returns : What what PrimarySeq says the type of the sequence is.
296 Args : None.
298 =cut
300 sub alphabet {
301 my $self = shift;
302 return $self->{swq}->alphabet;
305 =head2 display_id()
307 Title : display_id()
308 Usage : $id_string = $obj->display_id();
309 Function: Returns the display id, aka the common name of the Quality
310 object.
311 The semantics of this is that it is the most likely string to be
312 used as an identifier of the quality sequence, and likely to have
313 "human" readability. The id is equivalent to the ID field of the
314 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
315 database. In fasta format, the >(\S+) is presumed to be the id,
316 though some people overload the id to embed other information.
317 Bioperl does not use any embedded information in the ID field,
318 and people are encouraged to use other mechanisms (accession
319 field for example, or extending the sequence object) to solve
320 this. Notice that $seq->id() maps to this function, mainly for
321 legacy/convience issues.
322 This method sets the display_id for the Quality object.
323 Returns : A string
324 Args : If a scalar is provided, it is set as the new display_id for
325 the Quality object.
326 Status : Virtual
328 =cut
330 sub display_id {
331 my ($self,$value) = @_;
332 if( defined $value) {
333 $self->{swq}->display_id($value);
335 return $self->{swq}->display_id();
339 =head2 accession_number()
341 Title : accession_number()
342 Usage : $unique_biological_key = $obj->accession_number();
343 Function: Returns the unique biological id for a sequence, commonly
344 called the accession_number. For sequences from established
345 databases, the implementors should try to use the correct
346 accession number. Notice that primary_id() provides the unique id
347 for the implemetation, allowing multiple objects to have the same
348 accession number in a particular implementation. For sequences
349 with no accession number, this method should return "unknown".
350 This method sets the accession_number for the Quality
351 object.
352 Returns : A string (the value of accession_number)
353 Args : If a scalar is provided, it is set as the new accession_number
354 for the Quality object.
355 Status : Virtual
358 =cut
360 sub accession_number {
361 my( $self, $acc ) = @_;
362 if (defined $acc) {
363 $self->{swq}->accession_number($acc);
364 } else {
365 $acc = $self->{swq}->accession_number();
366 $acc = 'unknown' unless defined $acc;
368 return $acc;
371 =head2 primary_id()
373 Title : primary_id()
374 Usage : $unique_implementation_key = $obj->primary_id();
375 Function: Returns the unique id for this object in this implementation.
376 This allows implementations to manage their own object ids in a
377 way the implementaiton can control clients can expect one id to
378 map to one object. For sequences with no accession number, this
379 method should return a stringified memory location.
380 This method sets the primary_id for the Quality
381 object.
382 Returns : A string. (the value of primary_id)
383 Args : If a scalar is provided, it is set as the new primary_id for
384 the Quality object.
386 =cut
388 sub primary_id {
389 my ($self,$value) = @_;
390 if ($value) {
391 $self->{swq}->primary_id($value);
393 return $self->{swq}->primary_id();
397 =head2 desc()
399 Title : desc()
400 Usage : $qual->desc($newval); _or_
401 $description = $qual->desc();
402 Function: Get/set description text for this Quality object.
403 Returns : A string. (the value of desc)
404 Args : If a scalar is provided, it is set as the new desc for the
405 Quality object.
407 =cut
409 sub desc {
410 # a mechanism to set the desc for the Quality object.
411 # probably will be used most often by set_common_features()
412 my ($self,$value) = @_;
413 if( defined $value) {
414 $self->{swq}->desc($value);
416 return $self->{swq}->desc();
419 =head2 id()
421 Title : id()
422 Usage : $id = $qual->id();
423 Function: Return the ID of the quality. This should normally be (and
424 actually is in the implementation provided here) just a synonym
425 for display_id().
426 Returns : A string. (the value of id)
427 Args : If a scalar is provided, it is set as the new id for the
428 Quality object.
430 =cut
432 sub id {
433 my ($self,$value) = @_;
434 if (!$self) { $self->throw("no value for self in $value"); }
435 if( defined $value ) {
436 $self->{swq}->display_id($value);
438 return $self->{swq}->display_id();
441 =head2 seq
443 Title : seq()
444 Usage : $string = $obj->seq(); _or_
445 $obj->seq("atctatcatca");
446 Function: Returns the sequence that is contained in the imbedded in the
447 PrimarySeq object within the Quality object
448 Returns : A scalar (the seq() value for the imbedded PrimarySeq object.)
449 Args : If a scalar is provided, the Quality object will
450 attempt to set that as the sequence for the imbedded PrimarySeq
451 object. Otherwise, the value of seq() for the PrimarySeq object
452 is returned.
453 Notes : This is probably not a good idea because you then should call
454 length() to make sure that the sequence and quality are of the
455 same length. Even then, how can you make sure that this sequence
456 belongs with that quality? I provided this to give you rope to
457 hang yourself with. Tie it to a strong device and use a good
458 knot.
460 =cut
462 sub seq {
463 my ($self,$value) = @_;
464 if( defined $value) {
465 $self->{swq}->seq($value);
467 return $self->{swq}->seq();
470 =head2 qual()
472 Title : qual()
473 Usage : @quality_values = @{$obj->qual()}; _or_
474 $obj->qual("10 10 20 40 50");
475 Function: Returns the quality as imbedded in the PrimaryQual object
476 within the Quality object.
477 Returns : A reference to an array containing the quality values in the
478 PrimaryQual object.
479 Args : If a scalar is provided, the Quality object will
480 attempt to set that as the quality for the imbedded PrimaryQual
481 object. Otherwise, the value of qual() for the PrimaryQual
482 object is returned.
483 Notes : This is probably not a good idea because you then should call
484 length() to make sure that the sequence and quality are of the
485 same length. Even then, how can you make sure that this sequence
486 belongs with that quality? I provided this to give you a strong
487 board with which to flagellate yourself.
489 =cut
491 sub qual {
492 my ($self,$value) = @_;
494 if( defined $value) {
495 $self->{swq}->qual($value);
497 return $self->{swq}->qual();
503 =head2 length()
505 Title : length()
506 Usage : $length = $seqWqual->length();
507 Function: Get the length of the Quality sequence/quality.
508 Returns : Returns the length of the sequence and quality
509 Args : None.
511 =cut
513 sub length {
514 my $self = shift;
515 return $self->seq_obj->length;
519 =head2 qual_obj
521 Title : qual_obj($different_obj)
522 Usage : $qualobj = $seqWqual->qual_obj(); _or_
523 $qualobj = $seqWqual->qual_obj($ref_to_primaryqual_obj);
524 Function: Get the Qualilty object that is imbedded in the
525 Quality object or if a reference to a PrimaryQual object
526 is provided, set this as the PrimaryQual object imbedded in the
527 Quality object.
528 Returns : A reference to a Bio::Seq::Quality object.
530 Identical to L<seq_obj>.
532 =cut
534 sub qual_obj {
535 my ($self,$value) = @_;
536 # return $self->{swq}->qual_obj($value);
537 return $self->{swq};
541 =head2 seq_obj
543 Title : seq_obj()
544 Usage : $seqobj = $seqWqual->seq_obj(); _or_
545 $seqobj = $seqWqual->seq_obj($ref_to_primary_seq_obj);
546 Function: Get the PrimarySeq object that is imbedded in the
547 Quality object or if a reference to a PrimarySeq object is
548 provided, set this as the PrimarySeq object imbedded in the
549 Quality object.
550 Returns : A reference to a Bio::PrimarySeq object.
552 =cut
554 sub seq_obj {
555 my ($self,$value) = @_;
556 return $self->{swq};
559 =head2 _set_descriptors
561 Title : _set_descriptors()
562 Usage : $seqWqual->_qual_obj($qual,$seq,$id,$acc,$pid,$desc,$given_id,
563 $alphabet);
564 Function: Set the descriptors for the Quality object. Try to
565 match the descriptors in the PrimarySeq object and in the
566 PrimaryQual object if descriptors were not provided with
567 construction.
568 Returns : Nothing.
569 Args : $qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet as found
570 in the new() method.
571 Notes : Really only intended to be called by the new() method. If
572 you want to invoke a similar function try
573 set_common_descriptors().
575 =cut
578 sub _set_descriptors {
579 my ($self,$qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet) = @_;
580 $self->{swq}->_seq_descriptors($qual,$seq,$id,$acc,$pid,
581 $desc,$given_id,$alphabet);
584 =head2 subseq($start,$end)
586 Title : subseq($start,$end)
587 Usage : $subsequence = $obj->subseq($start,$end);
588 Function: Returns the subseq from start to end, where the first base
589 is 1 and the number is inclusive, ie 1-2 are the first two
590 bases of the sequence.
591 Returns : A string.
592 Args : Two positions.
594 =cut
596 sub subseq {
597 my ($self,@args) = @_;
598 # does a single value work?
599 return $self->{swq}->subseq(@args);
602 =head2 baseat($position)
604 Title : baseat($position)
605 Usage : $base_at_position_6 = $obj->baseat("6");
606 Function: Returns a single base at the given position, where the first
607 base is 1 and the number is inclusive, ie 1-2 are the first two
608 bases of the sequence.
609 Returns : A scalar.
610 Args : A position.
612 =cut
614 sub baseat {
615 my ($self,$val) = @_;
616 return $self->{swq}->subseq($val,$val);
619 =head2 subqual($start,$end)
621 Title : subqual($start,$end)
622 Usage : @qualities = @{$obj->subqual(10,20);
623 Function: returns the quality values from $start to $end, where the
624 first value is 1 and the number is inclusive, ie 1-2 are the
625 first two bases of the sequence. Start cannot be larger than
626 end but can be equal.
627 Returns : A reference to an array.
628 Args : a start position and an end position
630 =cut
632 sub subqual {
633 my ($self,@args) = @_;
634 return $self->{swq}->subqual(@args);
637 =head2 qualat($position)
639 Title : qualat($position)
640 Usage : $quality = $obj->qualat(10);
641 Function: Return the quality value at the given location, where the
642 first value is 1 and the number is inclusive, ie 1-2 are the
643 first two bases of the sequence. Start cannot be larger than
644 end but can be equal.
645 Returns : A scalar.
646 Args : A position.
648 =cut
650 sub qualat {
651 my ($self,$val) = @_;
652 return $self->{swq}->qualat($val);
655 =head2 sub_peak_index($start,$end)
657 Title : sub_peak_index($start,$end)
658 Usage : @peak_indices = @{$obj->sub_peak_index(10,20);
659 Function: returns the trace index values from $start to $end, where the
660 first value is 1 and the number is inclusive, ie 1-2 are the
661 first two trace indices for this channel.
662 Returns : A reference to an array.
663 Args : a start position and an end position
665 =cut
667 sub sub_peak_index {
668 my ($self,$start,$end) = @_;
669 if( $start > $end ){
670 $self->throw("in sub_peak_index, start [$start] has to be greater than end [$end]");
673 if( $start <= 0 || $end > $self->length ) {
674 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
677 # remove one from start, and then length is end-start
679 $start--;
680 $end--;
681 my @sub_peak_index_array = @{$self->{peak_indices}}[$start..$end];
683 # return substr $self->seq(), $start, ($end-$start);
684 return \@sub_peak_index_array;
688 =head2 sub_trace($start,$end)
690 Title : sub_trace($base_channel,$start,$end)
691 Usage : @trace_values = @{$obj->sub_trace('a',10,20)};
692 Function: returns the trace values from $start to $end, where the
693 first value is 1 and the number is inclusive, ie 1-2 are the
694 first two bases of the sequence. Start cannot be larger than
695 end but can be e_peak_index.
696 Returns : A reference to an array.
697 Args : a start position and an end position
699 =cut
701 sub sub_trace {
702 my ($self,$base_channel,$start,$end) = @_;
703 if( $start > $end ){
704 $self->throw("in sub_trace, start [$start] has to be greater than end [$end]");
707 if( $start <= 0 || $end > $self->trace_length() ) {
708 $self->throw("You have to have start positive and length less than the total length of traces [$start:$end] Total ".$self->trace_length."");
711 # remove one from start, and then length is end-start
713 $start--;
714 $end--;
715 my @sub_peak_index_array = @{$self->trace($base_channel)}[$start..$end];
717 # return substr $self->seq(), $start, ($end-$start);
718 return \@sub_peak_index_array;
722 =head2 trace_length()
724 Title : trace_length()
725 Usage : $trace_length = $obj->trace_length();
726 Function: Return the length of the trace if all four traces (atgc)
727 are the same. Otherwise, throw an error.
728 Returns : A scalar.
729 Args : none
731 =cut
733 sub trace_length {
734 my $self = shift;
735 if ( !$self->trace('a') || !$self->trace('t') || !$self->trace('g') || !$self->trace('c') ) {
736 $self->warn("One or more of the trace channels are missing. Cannot give you a length.");
739 my $lengtha = scalar(@{$self->trace('a')});
740 my $lengtht = scalar(@{$self->trace('t')});
741 my $lengthg = scalar(@{$self->trace('g')});
742 my $lengthc = scalar(@{$self->trace('c')});
743 if (($lengtha == $lengtht) && ($lengtha == $lengthg) && ($lengtha == $lengthc) ) {
744 return $lengtha;
746 $self->warn("Not all of the trace indices are the same length".
747 " Here are their lengths: a: $lengtha t:$lengtht ".
748 " g: $lengthg c: $lengthc");
753 =head2 sub_trace_object($start,$end)
755 Title : sub_trace_object($start,$end)
756 Usage : $smaller_object = $object->sub_trace_object('1','100');
757 Function: Get a subset of the sequence, its quality, and its trace.
758 Returns : A reference to a Bio::Seq::SequenceTrace object
759 Args : a start position and an end position
760 Notes :
761 - the start and end position refer to the positions of _bases_.
762 - for example, to get a sub SequenceTrace for bases 5-10,
763 use this routine.
764 - you will get the bases, qualities, and the trace values
765 - you can then use this object to synthesize a new scf
766 using seqIO::scf.
768 =cut
770 sub sub_trace_object {
771 my ($self,$start,$end) = @_;
772 my ($start2,$end2);
773 my @subs = @{$self->sub_peak_index($start,$end)};
774 $start2 = shift(@subs);
775 $end2 = pop(@subs);
776 my $new_object = Bio::Seq::SequenceTrace->new(
777 -swq => Bio::Seq::Quality->new(
778 -seq => $self->subseq($start,$end),
779 -qual => $self->subqual($start,$end),
780 -id => $self->id()
782 -trace_a => $self->sub_trace('a',$start2,$end2),
783 -trace_t => $self->sub_trace('t',$start2,$end2),
784 -trace_g => $self->sub_trace('g',$start2,$end2),
785 -trace_c => $self->sub_trace('c',$start2,$end2),
786 -peak_indices => $self->sub_peak_index($start,$end)
789 $new_object->set_accuracies();
790 $new_object->_reset_peak_indices();
791 return $new_object;
794 =head2 _synthesize_traces()
796 Title : _synthesize_traces()
797 Usage : $obj->_synthesize_traces();
798 Function: Synthesize false traces for this object.
799 Returns : Nothing.
800 Args : None.
801 Notes : This method is intended to be invoked when this
802 object is created with a SWQ object- that is to say that
803 there is a sequence and a set of qualities but there was
804 no actual trace data.
806 =cut
808 sub _synthesize_traces {
809 my ($self) = shift;
810 $self->peak_indices(qw());
811 #ml my $version = 2;
812 # the user should be warned if traces already exist
815 #ml ( my $sequence = $self->seq() ) =~ tr/a-z/A-Z/;
816 #ml my @quals = @{$self->qual()};
817 #ml my $info;
818 # build the ramp for the first base.
819 # a ramp looks like this "1 4 13 29 51 71 80 71 51 29 13 4 1" times the quality score.
820 # REMEMBER: A C G T
821 # note to self-> smooth this thing out a bit later
822 my $ramp_data;
823 @{$ramp_data->{'ramp'}} = qw( 1 4 13 29 51 75 80 75 51 29 13 4 1 );
824 # the width of the ramp
825 $ramp_data->{'ramp_width'} = scalar(@{$ramp_data->{'ramp'}});
826 # how far should the peaks overlap?
827 $ramp_data->{'ramp_overlap'} = 1;
828 # where should the peaks be located?
829 $ramp_data->{'peak_at'} = 7;
830 $ramp_data->{'ramp_total_length'} =
831 $self->seq_obj()->length() * $ramp_data->{'ramp_width'}
832 - $self->seq_obj()->length() * $ramp_data->{'ramp_overlap'};
833 my $pos;
834 my $total_length = $ramp_data->{ramp_total_length};
835 $self->initialize_traces("0",$total_length+2);
836 # now populate them
837 my ($current_base,$place_base_at,$peak_quality,$ramp_counter,$current_ramp,$ramp_position);
838 #ml my $sequence_length = $self->length();
839 my $half_ramp = int($ramp_data->{'ramp_width'}/2);
840 for ($pos = 0; $pos<$self->length();$pos++) {
841 $current_base = uc $self->seq_obj()->subseq($pos+1,$pos+1);
842 # print("Synthesizing the ramp for $current_base\n");
843 my $all_bases = "ATGC";
844 $peak_quality = $self->qual_obj()->qualat($pos+1);
845 # where should the peak for this base be placed? Modeled after a mktrace scf
846 $place_base_at = ($pos * $ramp_data->{'ramp_width'}) -
847 ($pos * $ramp_data->{'ramp_overlap'}) -
848 $half_ramp + $ramp_data->{'ramp_width'} - 1;
849 # print("Placing this base at this position: $place_base_at\n");
850 push @{$self->peak_indices()},$place_base_at;
851 $ramp_position = $place_base_at - $half_ramp;
852 if ($current_base =~ "N" ) {
853 $current_base = "A";
855 for ($current_ramp = 0; $current_ramp < $ramp_data->{'ramp_width'}; $current_ramp++) {
856 # print("Placing a trace value here: $current_base ".($ramp_position+$current_ramp+1)." ".$peak_quality*$ramp_data->{'ramp'}->[$current_ramp]."\n");
857 $self->trace_value_at($current_base,$ramp_position+$current_ramp+1,$peak_quality*$ramp_data->{'ramp'}->[$current_ramp]);
859 $self->peak_index_at($pos+1,
860 $place_base_at+1
862 #ml my $other_bases = $self->_get_other_bases($current_base);
863 # foreach ( split('',$other_bases) ) {
864 # push @{$self->{'text'}->{"v3_base_accuracy"}->{$_}},0;
873 =head2 _dump_traces($transformed)
875 Title : _dump_traces("transformed")
876 Usage : &_dump_traces($ra,$rc,$rg,$rt);
877 Function: Used in debugging. Prints all traces one beside each other.
878 Returns : Nothing.
879 Args : References to the arrays containing the traces for A,C,G,T.
880 Notes : Beats using dumpValue, I'll tell ya. Much better then using
881 join' ' too.
882 - if a scalar is included as an argument (any scalar), this
883 procedure will dump the _delta'd trace. If you don't know what
884 that means you should not be using this.
886 =cut
889 sub _dump_traces {
890 my ($self) = @_;
891 my (@sA,@sT,@sG,@sC);
892 print ("Count\ta\tc\tg\tt\n");
893 my $length = $self->trace_length();
894 for (my $curr=1; $curr <= $length; $curr++) {
895 print(($curr-1)."\t".$self->trace_value_at('a',$curr).
896 "\t".$self->trace_value_at('c',$curr).
897 "\t".$self->trace_value_at('g',$curr).
898 "\t".$self->trace_value_at('t',$curr)."\n");
900 return;
903 =head2 _initialize_traces()
905 Title : _initialize_traces()
906 Usage : $trace_object->_initialize_traces();
907 Function: Creates empty arrays to hold synthetic trace values.
908 Returns : Nothing.
909 Args : None.
911 =cut
913 sub initialize_traces {
914 my ($self,$value,$length) = @_;
915 foreach (qw(a t g c)) {
916 my @temp;
917 for (my $count=0; $count<$length; $count++) {
918 $temp[$count] = $value;
920 $self->trace($_,\@temp);
924 =head2 trace_value_at($channel,$position)
926 Title : trace_value_at($channel,$position)
927 Usage : $value = $trace_object->trace_value_at($channel,$position);
928 Function: What is the value of the trace for this base at this position?
929 Returns : A scalar represnting the trace value here.
930 Args : a base channel (a,t,g,c)
931 a position ( < $trace_object->trace_length() )
933 =cut
935 sub trace_value_at {
936 my ($self,$channel,$position,$value) = @_;
937 if ($value) {
938 $self->trace($channel)->[$position] = $value;
940 return $self->sub_trace($channel,($position),($position))->[0];
943 sub _deprecated_get_scf_version_2_base_structure {
944 # this sub is deprecated- check inside SeqIO::scf
945 my $self = shift;
946 my (@structure,$current);
947 my $length = $self->length();
948 for ($current=1; $current <= $self->length() ; $current++) {
949 my $base_here = $self->seq_obj()->subseq($current,$current);
950 $base_here = lc($base_here);
951 my $probabilities;
952 $probabilities->{$base_here} = $self->qual_obj()->qualat($current);
953 my $other_bases = "atgc";
954 my $empty = "";
955 $other_bases =~ s/$base_here/$empty/e;
956 foreach ( split('',$other_bases) ) {
957 $probabilities->{$_} = "0";
959 @structure = (
960 @structure,
961 $self->peak_index_at($current),
962 $probabilities->{'a'},
963 $probabilities->{'t'},
964 $probabilities->{'g'},
965 $probabilities->{'c'}
969 return \@structure;
972 sub _deprecated_get_scf_version_3_base_structure {
973 my $self = shift;
974 my $structure;
975 $structure = join('',$self->peak_indices());
976 return $structure;
980 =head2 accuracies($channel,$position)
982 Title : trace_value_at($channel,$position)
983 Usage : $value = $trace_object->trace_value_at($channel,$position);
984 Function: What is the value of the trace for this base at this position?
985 Returns : A scalar representing the trace value here.
986 Args : a base channel (a,t,g,c)
987 a position ( < $trace_object->trace_length() )
989 =cut
992 sub accuracies {
993 my ($self,$channel,$value) = @_;
994 if ($value) {
995 if (ref($value) eq "ARRAY") {
996 $self->{accuracies}->{$channel} = $value;
998 else {
999 my @acc = split(' ',$value);
1000 $self->{accuracies}->{$channel} = \@acc;
1003 return $self->{accuracies}->{$channel};
1007 =head2 set_accuracies()
1009 Title : set_sccuracies()
1010 Usage : $trace_object->set_accuracies();
1011 Function: Take a sequence's quality and synthesize proper scf-style
1012 base accuracies that can then be accessed with
1013 accuracies("a") or something like it.
1014 Returns : Nothing.
1015 Args : None.
1017 =cut
1019 sub set_accuracies {
1020 my $self = shift;
1021 my $count = 0;
1022 my $length = $self->length();
1023 for ($count=1; $count <= $length; $count++) {
1024 my $base_here = $self->seq_obj()->subseq($count,$count);
1025 my $qual_here = $self->qual_obj()->qualat($count);
1026 $self->accuracy_at($base_here,$count,$qual_here);
1027 my $other_bases = $self->_get_other_bases($base_here);
1028 foreach (split('',$other_bases)) {
1029 $self->accuracy_at($_,$count,"null");
1035 =head2 scf_dump()
1037 Title : scf_dump()
1038 Usage : $trace_object->scf_dump();
1039 Function: Prints out the contents of the structures representing
1040 the SequenceTrace in a manner similar to io_lib's scf_dump.
1041 Returns : Nothing. Prints out the contents of the structures
1042 used to represent the sequence and its trace.
1043 Args : None.
1044 Notes : Used in debugging, obviously.
1046 =cut
1048 sub scf_dump {
1049 my $self = shift;
1050 my $count;
1051 for ($count=1;$count<=$self->length();$count++) {
1052 my $base_here = lc($self->seq_obj()->subseq($count,$count));
1053 print($base_here." ".sprintf("%05d",$self->peak_index_at($count))."\t");
1054 foreach (sort qw(a c g t)) {
1055 print(sprintf("%03d",$self->accuracy_at($_,$count))."\t");
1057 print("\n");
1059 $self->_dump_traces();
1062 =head2 _get_other_bases($this_base)
1064 Title : _get_other_bases($this_base)
1065 Usage : $other_bases = $trace_object->_get_other_bases($this_base);
1066 Function: A utility routine to return bases other then the one provided.
1067 I was doing this over and over so I put it here.
1068 Returns : Three of a,t,g and c.
1069 Args : A base (atgc)
1070 Notes : $obj->_get_other_bases("a") returns "tgc"
1072 =cut
1074 sub _get_other_bases {
1075 my ($self,$this_base) = @_;
1076 $this_base = lc($this_base);
1077 my $all_bases = "atgc";
1078 my $empty = "";
1079 $all_bases =~ s/$this_base/$empty/e;
1080 return $all_bases;
1084 =head2 accuracy_at($base,$position)
1086 Title : accuracy_at($base,$position)
1087 Usage : $accuracy = $trace_object->accuracy_at($base,$position);
1088 Function:
1089 Returns : Returns the accuracy of finding $base at $position.
1090 Args : 1. a base channel (atgc) 2. a value to _set_ the accuracy
1091 Notes : $obj->_get_other_bases("a") returns "tgc"
1093 =cut
1096 sub accuracy_at {
1097 my ($self,$base,$position,$value) = @_;
1098 $base = lc($base);
1099 if ($value) {
1100 if ($value eq "null") {
1101 $self->{accuracies}->{$base}->[$position-1] = "0";
1103 else {
1104 $self->{accuracies}->{$base}->[$position-1] = $value;
1107 return $self->{accuracies}->{$base}->[$position-1];