add latest changes to, um, Changes
[bioperl-live.git] / Bio / Seq / SequenceTrace.pm
blob5f00549b4f55dbe860fdbb6e346c948a43d1ba56
2 # BioPerl module for Bio::Seq::SequenceTrace
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com
8 # Copyright Chad Matsalla
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Seq::SequenceTrace - Bioperl object packaging a sequence with its trace
18 =head1 SYNOPSIS
20 # example code here
22 =head1 DESCRIPTION
24 This object stores a sequence with its trace.
26 =head1 FEEDBACK
28 =head2 Mailing Lists
30 User feedback is an integral part of the evolution of this and other
31 Bioperl modules. Send your comments and suggestions preferably to one
32 of the Bioperl mailing lists. Your participation is much appreciated.
34 bioperl-l@bioperl.org - General discussion
35 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
37 =head2 Support
39 Please direct usage questions or support issues to the mailing list:
41 I<bioperl-l@bioperl.org>
43 rather than to the module maintainer directly. Many experienced and
44 reponsive experts will be able look at the problem and quickly
45 address it. Please include a thorough description of the problem
46 with code and data examples if at all possible.
48 =head2 Reporting Bugs
50 Report bugs to the Bioperl bug tracking system to help us keep track
51 the bugs and their resolution. Bug reports can be submitted via the
52 web:
54 https://github.com/bioperl/bioperl-live/issues
56 =head1 AUTHOR - Chad Matsalla
58 Email bioinformatics@dieselwurks.com
61 The rest of the documentation details each of the object methods.
62 Internal methods are usually preceded with a _
64 =cut
67 package Bio::Seq::SequenceTrace;
70 use strict;
71 use Bio::Seq::QualI;
72 use Bio::PrimarySeqI;
73 use Bio::PrimarySeq;
74 use Bio::Seq::PrimaryQual;
76 use base qw(Bio::Root::Root Bio::Seq::Quality Bio::Seq::TraceI);
78 =head2 new()
80 Title : new()
81 Usage : $st = Bio::Seq::SequenceTrace->new
82 ( -swq => Bio::Seq::SequenceWithQuality,
83 -trace_a => \@trace_values_for_a_channel,
84 -trace_t => \@trace_values_for_t_channel,
85 -trace_g => \@trace_values_for_g_channel,
86 -trace_c => \@trace_values_for_c_channel,
87 -accuracy_a => \@a_accuracies,
88 -accuracy_t => \@t_accuracies,
89 -accuracy_g => \@g_accuracies,
90 -accuracy_c => \@c_accuracies,
91 -peak_indices => '0 5 10 15 20 25 30 35'
93 Function: Returns a new Bio::Seq::SequenceTrace object from basic
94 constructors.
95 Returns : a new Bio::Seq::SequenceTrace object
96 Arguments: I think that these are all describes in the usage above.
98 =cut
100 sub new {
101 my ($class, @args) = @_;
102 my $self = $class->SUPER::new(@args);
103 # default: turn OFF the warnings
104 $self->{supress_warnings} = 1;
105 my($swq,$peak_indices,$trace_a,$trace_t,
106 $trace_g,$trace_c,$acc_a,$acc_t,$acc_g,$acc_c) =
107 $self->_rearrange([qw(
109 PEAK_INDICES
110 TRACE_A
111 TRACE_T
112 TRACE_G
113 TRACE_C
114 ACCURACY_A
115 ACCURACY_T
116 ACCURACY_G
117 ACCURACY_C )], @args);
118 # first, deal with the sequence and quality information
119 if ($swq && ref($swq) eq "Bio::Seq::Quality") {
120 $self->{swq} = $swq;
122 else {
123 $self->throw("A Bio::Seq::SequenceTrace object must be created with a
124 Bio::Seq::Quality object. You provided this type of object: "
125 .ref($swq));
127 if (!$acc_a) {
128 # this means that you probably did not provide traces and accuracies
129 # and that they need to be synthesized
130 $self->set_accuracies();
132 else {
133 $self->accuracies('a',$acc_a);
134 $self->accuracies('t',$acc_t);
135 $self->accuracies('g',$acc_g);
136 $self->accuracies('c',$acc_c);
138 if (!$trace_a) {
139 $self->_synthesize_traces();
141 else {
142 $self->trace('a',$trace_a);
143 $self->trace('t',$trace_t);
144 $self->trace('g',$trace_g);
145 $self->trace('c',$trace_c);
146 $self->peak_indices($peak_indices);
148 $self->id($self->seq_obj->id);
149 return $self;
152 sub swq_obj {
153 my $self = shift;
154 $self->warn('swq_obj() is deprecated: use seq_obj()');
155 return $self->{swq};
160 =head2 trace($base,\@new_values)
162 Title : trace($base,\@new_values)
163 Usage : @trace_Values = @{$obj->trace($base,\@new_values)};
164 Function: Returns the trace values as a reference to an array containing the
165 trace values. The individual elements of the trace array are not validated
166 and can be any numeric value.
167 Returns : A reference to an array.
168 Status :
169 Arguments: $base : which color channel would you like the trace values for?
170 - $base must be one of "A","T","G","C"
171 \@new_values : a reference to an array of values containing trace
172 data for this base
174 =cut
176 sub trace {
177 my ($self,$base_channel,$values) = @_;
178 if (!$base_channel) {
179 $self->throw('You must provide a valid base channel (atgc) to use trace()');
181 $base_channel =~ tr/A-Z/a-z/;
182 if ($base_channel !~ /[acgt]/) {
183 $self->throw('You must provide a valid base channel (atgc) to use trace()');
185 if ($values) {
186 if (ref($values) eq "ARRAY") {
187 $self->{trace}->{$base_channel} = $values;
189 else {
190 my @trace = split(' ',$values);
191 $self->{trace}->{$base_channel} = \@trace;
194 if ($self->{trace}->{$base_channel}) {
195 return $self->{trace}->{$base_channel};
197 else {
198 return;
203 =head2 peak_indices($new_indices)
205 Title : peak_indices($new_indices)
206 Usage : $indices = $obj->peak_indices($new_indices);
207 Function: Return the trace index points for this object.
208 Returns : A scalar
209 Args : If used, the trace indices will be set to the provided value.
211 =cut
213 sub peak_indices {
214 my ($self,$peak_indices)= @_;
215 if ($peak_indices) {
216 if (ref($peak_indices) eq "ARRAY") {
217 $self->{peak_indices} = $peak_indices;
219 else {
220 my @indices = split(' ',$peak_indices);
221 $self->{peak_indices} = \@indices;
224 if (!$self->{peak_indices}) {
225 my @temp = ();
226 $self->{peak_indices} = \@temp;
228 return $self->{peak_indices};
232 =head2 _reset_peak_indices()
234 Title : _rest_peak_indices()
235 Usage : $obj->_reset_peak_indices();
236 Function: Reset the peak indices.
237 Returns : Nothing.
238 Args : None.
239 Notes : When you create a sub_trace_object, the peak indices
240 will still be pointing to the apporpriate location _in the
241 original trace_. In order to fix this, the initial value must
242 be subtracted from each value here. ie. The first peak index
243 must be "1".
245 =cut
247 sub _reset_peak_indices {
248 my $self = shift;
249 my $length = $self->length();
250 my $subtractive = $self->peak_index_at(1);
251 my ($original,$new);
252 $self->peak_index_at(1,"null");
253 for (my $counter=2; $counter<= $length; $counter++) {
254 my $original = $self->peak_index_at($counter);
255 $new = $original - $subtractive;
256 $self->peak_index_at($counter,$new);
258 return;
265 =head2 peak_index_at($position)
267 Title : peak_index_at($position)
268 Usage : $peak_index = $obj->peak_index_at($postition);
269 Function: Return the trace iindex point at this position
270 Returns : A scalar
271 Args : If used, the trace index at this position will be
272 set to the provided value.
274 =cut
276 sub peak_index_at {
277 my ($self,$position,$value)= @_;
278 if ($value) {
279 if ($value eq "null") {
280 $self->peak_indices->[$position-1] = "0";
282 else {
283 $self->peak_indices->[$position-1] = $value;
286 return $self->peak_indices()->[$position-1];
289 =head2 alphabet()
291 Title : alphabet();
292 Usage : $molecule_type = $obj->alphabet();
293 Function: Get the molecule type from the PrimarySeq object.
294 Returns : What what PrimarySeq says the type of the sequence is.
295 Args : None.
297 =cut
299 sub alphabet {
300 my $self = shift;
301 return $self->{swq}->alphabet;
304 =head2 display_id()
306 Title : display_id()
307 Usage : $id_string = $obj->display_id();
308 Function: Returns the display id, aka the common name of the Quality
309 object.
310 The semantics of this is that it is the most likely string to be
311 used as an identifier of the quality sequence, and likely to have
312 "human" readability. The id is equivalent to the ID field of the
313 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
314 database. In fasta format, the >(\S+) is presumed to be the id,
315 though some people overload the id to embed other information.
316 Bioperl does not use any embedded information in the ID field,
317 and people are encouraged to use other mechanisms (accession
318 field for example, or extending the sequence object) to solve
319 this. Notice that $seq->id() maps to this function, mainly for
320 legacy/convience issues.
321 This method sets the display_id for the Quality object.
322 Returns : A string
323 Args : If a scalar is provided, it is set as the new display_id for
324 the Quality object.
325 Status : Virtual
327 =cut
329 sub display_id {
330 my ($self,$value) = @_;
331 if( defined $value) {
332 $self->{swq}->display_id($value);
334 return $self->{swq}->display_id();
338 =head2 accession_number()
340 Title : accession_number()
341 Usage : $unique_biological_key = $obj->accession_number();
342 Function: Returns the unique biological id for a sequence, commonly
343 called the accession_number. For sequences from established
344 databases, the implementors should try to use the correct
345 accession number. Notice that primary_id() provides the unique id
346 for the implemetation, allowing multiple objects to have the same
347 accession number in a particular implementation. For sequences
348 with no accession number, this method should return "unknown".
349 This method sets the accession_number for the Quality
350 object.
351 Returns : A string (the value of accession_number)
352 Args : If a scalar is provided, it is set as the new accession_number
353 for the Quality object.
354 Status : Virtual
357 =cut
359 sub accession_number {
360 my( $self, $acc ) = @_;
361 if (defined $acc) {
362 $self->{swq}->accession_number($acc);
363 } else {
364 $acc = $self->{swq}->accession_number();
365 $acc = 'unknown' unless defined $acc;
367 return $acc;
370 =head2 primary_id()
372 Title : primary_id()
373 Usage : $unique_implementation_key = $obj->primary_id();
374 Function: Returns the unique id for this object in this implementation.
375 This allows implementations to manage their own object ids in a
376 way the implementaiton can control clients can expect one id to
377 map to one object. For sequences with no accession number, this
378 method should return a stringified memory location.
379 This method sets the primary_id for the Quality
380 object.
381 Returns : A string. (the value of primary_id)
382 Args : If a scalar is provided, it is set as the new primary_id for
383 the Quality object.
385 =cut
387 sub primary_id {
388 my ($self,$value) = @_;
389 if ($value) {
390 $self->{swq}->primary_id($value);
392 return $self->{swq}->primary_id();
396 =head2 desc()
398 Title : desc()
399 Usage : $qual->desc($newval); _or_
400 $description = $qual->desc();
401 Function: Get/set description text for this Quality object.
402 Returns : A string. (the value of desc)
403 Args : If a scalar is provided, it is set as the new desc for the
404 Quality object.
406 =cut
408 sub desc {
409 # a mechanism to set the desc for the Quality object.
410 # probably will be used most often by set_common_features()
411 my ($self,$value) = @_;
412 if( defined $value) {
413 $self->{swq}->desc($value);
415 return $self->{swq}->desc();
418 =head2 id()
420 Title : id()
421 Usage : $id = $qual->id();
422 Function: Return the ID of the quality. This should normally be (and
423 actually is in the implementation provided here) just a synonym
424 for display_id().
425 Returns : A string. (the value of id)
426 Args : If a scalar is provided, it is set as the new id for the
427 Quality object.
429 =cut
431 sub id {
432 my ($self,$value) = @_;
433 if (!$self) { $self->throw("no value for self in $value"); }
434 if( defined $value ) {
435 $self->{swq}->display_id($value);
437 return $self->{swq}->display_id();
440 =head2 seq
442 Title : seq()
443 Usage : $string = $obj->seq(); _or_
444 $obj->seq("atctatcatca");
445 Function: Returns the sequence that is contained in the imbedded in the
446 PrimarySeq object within the Quality object
447 Returns : A scalar (the seq() value for the imbedded PrimarySeq object.)
448 Args : If a scalar is provided, the Quality object will
449 attempt to set that as the sequence for the imbedded PrimarySeq
450 object. Otherwise, the value of seq() for the PrimarySeq object
451 is returned.
452 Notes : This is probably not a good idea because you then should call
453 length() to make sure that the sequence and quality are of the
454 same length. Even then, how can you make sure that this sequence
455 belongs with that quality? I provided this to give you rope to
456 hang yourself with. Tie it to a strong device and use a good
457 knot.
459 =cut
461 sub seq {
462 my ($self,$value) = @_;
463 if( defined $value) {
464 $self->{swq}->seq($value);
466 return $self->{swq}->seq();
469 =head2 qual()
471 Title : qual()
472 Usage : @quality_values = @{$obj->qual()}; _or_
473 $obj->qual("10 10 20 40 50");
474 Function: Returns the quality as imbedded in the PrimaryQual object
475 within the Quality object.
476 Returns : A reference to an array containing the quality values in the
477 PrimaryQual object.
478 Args : If a scalar is provided, the Quality object will
479 attempt to set that as the quality for the imbedded PrimaryQual
480 object. Otherwise, the value of qual() for the PrimaryQual
481 object is returned.
482 Notes : This is probably not a good idea because you then should call
483 length() to make sure that the sequence and quality are of the
484 same length. Even then, how can you make sure that this sequence
485 belongs with that quality? I provided this to give you a strong
486 board with which to flagellate yourself.
488 =cut
490 sub qual {
491 my ($self,$value) = @_;
493 if( defined $value) {
494 $self->{swq}->qual($value);
496 return $self->{swq}->qual();
502 =head2 length()
504 Title : length()
505 Usage : $length = $seqWqual->length();
506 Function: Get the length of the Quality sequence/quality.
507 Returns : Returns the length of the sequence and quality
508 Args : None.
510 =cut
512 sub length {
513 my $self = shift;
514 return $self->seq_obj->length;
518 =head2 qual_obj
520 Title : qual_obj($different_obj)
521 Usage : $qualobj = $seqWqual->qual_obj(); _or_
522 $qualobj = $seqWqual->qual_obj($ref_to_primaryqual_obj);
523 Function: Get the Qualilty object that is imbedded in the
524 Quality object or if a reference to a PrimaryQual object
525 is provided, set this as the PrimaryQual object imbedded in the
526 Quality object.
527 Returns : A reference to a Bio::Seq::Quality object.
529 Identical to L<seq_obj>.
531 =cut
533 sub qual_obj {
534 my ($self,$value) = @_;
535 # return $self->{swq}->qual_obj($value);
536 return $self->{swq};
540 =head2 seq_obj
542 Title : seq_obj()
543 Usage : $seqobj = $seqWqual->seq_obj(); _or_
544 $seqobj = $seqWqual->seq_obj($ref_to_primary_seq_obj);
545 Function: Get the PrimarySeq object that is imbedded in the
546 Quality object or if a reference to a PrimarySeq object is
547 provided, set this as the PrimarySeq object imbedded in the
548 Quality object.
549 Returns : A reference to a Bio::PrimarySeq object.
551 =cut
553 sub seq_obj {
554 my ($self,$value) = @_;
555 return $self->{swq};
558 =head2 _set_descriptors
560 Title : _set_descriptors()
561 Usage : $seqWqual->_qual_obj($qual,$seq,$id,$acc,$pid,$desc,$given_id,
562 $alphabet);
563 Function: Set the descriptors for the Quality object. Try to
564 match the descriptors in the PrimarySeq object and in the
565 PrimaryQual object if descriptors were not provided with
566 construction.
567 Returns : Nothing.
568 Args : $qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet as found
569 in the new() method.
570 Notes : Really only intended to be called by the new() method. If
571 you want to invoke a similar function try
572 set_common_descriptors().
574 =cut
577 sub _set_descriptors {
578 my ($self,$qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet) = @_;
579 $self->{swq}->_seq_descriptors($qual,$seq,$id,$acc,$pid,
580 $desc,$given_id,$alphabet);
583 =head2 subseq($start,$end)
585 Title : subseq($start,$end)
586 Usage : $subsequence = $obj->subseq($start,$end);
587 Function: Returns the subseq from start to end, where the first base
588 is 1 and the number is inclusive, ie 1-2 are the first two
589 bases of the sequence.
590 Returns : A string.
591 Args : Two positions.
593 =cut
595 sub subseq {
596 my ($self,@args) = @_;
597 # does a single value work?
598 return $self->{swq}->subseq(@args);
601 =head2 baseat($position)
603 Title : baseat($position)
604 Usage : $base_at_position_6 = $obj->baseat("6");
605 Function: Returns a single base at the given position, where the first
606 base is 1 and the number is inclusive, ie 1-2 are the first two
607 bases of the sequence.
608 Returns : A scalar.
609 Args : A position.
611 =cut
613 sub baseat {
614 my ($self,$val) = @_;
615 return $self->{swq}->subseq($val,$val);
618 =head2 subqual($start,$end)
620 Title : subqual($start,$end)
621 Usage : @qualities = @{$obj->subqual(10,20);
622 Function: returns the quality values from $start to $end, where the
623 first value is 1 and the number is inclusive, ie 1-2 are the
624 first two bases of the sequence. Start cannot be larger than
625 end but can be equal.
626 Returns : A reference to an array.
627 Args : a start position and an end position
629 =cut
631 sub subqual {
632 my ($self,@args) = @_;
633 return $self->{swq}->subqual(@args);
636 =head2 qualat($position)
638 Title : qualat($position)
639 Usage : $quality = $obj->qualat(10);
640 Function: Return the quality value at the given location, where the
641 first value is 1 and the number is inclusive, ie 1-2 are the
642 first two bases of the sequence. Start cannot be larger than
643 end but can be equal.
644 Returns : A scalar.
645 Args : A position.
647 =cut
649 sub qualat {
650 my ($self,$val) = @_;
651 return $self->{swq}->qualat($val);
654 =head2 sub_peak_index($start,$end)
656 Title : sub_peak_index($start,$end)
657 Usage : @peak_indices = @{$obj->sub_peak_index(10,20);
658 Function: returns the trace index values from $start to $end, where the
659 first value is 1 and the number is inclusive, ie 1-2 are the
660 first two trace indices for this channel.
661 Returns : A reference to an array.
662 Args : a start position and an end position
664 =cut
666 sub sub_peak_index {
667 my ($self,$start,$end) = @_;
668 if( $start > $end ){
669 $self->throw("in sub_peak_index, start [$start] has to be greater than end [$end]");
672 if( $start <= 0 || $end > $self->length ) {
673 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
676 # remove one from start, and then length is end-start
678 $start--;
679 $end--;
680 my @sub_peak_index_array = @{$self->{peak_indices}}[$start..$end];
682 # return substr $self->seq(), $start, ($end-$start);
683 return \@sub_peak_index_array;
687 =head2 sub_trace($start,$end)
689 Title : sub_trace($base_channel,$start,$end)
690 Usage : @trace_values = @{$obj->sub_trace('a',10,20)};
691 Function: returns the trace values from $start to $end, where the
692 first value is 1 and the number is inclusive, ie 1-2 are the
693 first two bases of the sequence. Start cannot be larger than
694 end but can be e_peak_index.
695 Returns : A reference to an array.
696 Args : a start position and an end position
698 =cut
700 sub sub_trace {
701 my ($self,$base_channel,$start,$end) = @_;
702 if( $start > $end ){
703 $self->throw("in sub_trace, start [$start] has to be greater than end [$end]");
706 if( $start <= 0 || $end > $self->trace_length() ) {
707 $self->throw("You have to have start positive and length less than the total length of traces [$start:$end] Total ".$self->trace_length."");
710 # remove one from start, and then length is end-start
712 $start--;
713 $end--;
714 my @sub_peak_index_array = @{$self->trace($base_channel)}[$start..$end];
716 # return substr $self->seq(), $start, ($end-$start);
717 return \@sub_peak_index_array;
721 =head2 trace_length()
723 Title : trace_length()
724 Usage : $trace_length = $obj->trace_length();
725 Function: Return the length of the trace if all four traces (atgc)
726 are the same. Otherwise, throw an error.
727 Returns : A scalar.
728 Args : none
730 =cut
732 sub trace_length {
733 my $self = shift;
734 if ( !$self->trace('a') || !$self->trace('t') || !$self->trace('g') || !$self->trace('c') ) {
735 $self->warn("One or more of the trace channels are missing. Cannot give you a length.");
738 my $lengtha = scalar(@{$self->trace('a')});
739 my $lengtht = scalar(@{$self->trace('t')});
740 my $lengthg = scalar(@{$self->trace('g')});
741 my $lengthc = scalar(@{$self->trace('c')});
742 if (($lengtha == $lengtht) && ($lengtha == $lengthg) && ($lengtha == $lengthc) ) {
743 return $lengtha;
745 $self->warn("Not all of the trace indices are the same length".
746 " Here are their lengths: a: $lengtha t:$lengtht ".
747 " g: $lengthg c: $lengthc");
752 =head2 sub_trace_object($start,$end)
754 Title : sub_trace_object($start,$end)
755 Usage : $smaller_object = $object->sub_trace_object('1','100');
756 Function: Get a subset of the sequence, its quality, and its trace.
757 Returns : A reference to a Bio::Seq::SequenceTrace object
758 Args : a start position and an end position
759 Notes :
760 - the start and end position refer to the positions of _bases_.
761 - for example, to get a sub SequenceTrace for bases 5-10,
762 use this routine.
763 - you will get the bases, qualities, and the trace values
764 - you can then use this object to synthesize a new scf
765 using seqIO::scf.
767 =cut
769 sub sub_trace_object {
770 my ($self,$start,$end) = @_;
771 my ($start2,$end2);
772 my @subs = @{$self->sub_peak_index($start,$end)};
773 $start2 = shift(@subs);
774 $end2 = pop(@subs);
775 my $new_object = Bio::Seq::SequenceTrace->new(
776 -swq => Bio::Seq::Quality->new(
777 -seq => $self->subseq($start,$end),
778 -qual => $self->subqual($start,$end),
779 -id => $self->id()
781 -trace_a => $self->sub_trace('a',$start2,$end2),
782 -trace_t => $self->sub_trace('t',$start2,$end2),
783 -trace_g => $self->sub_trace('g',$start2,$end2),
784 -trace_c => $self->sub_trace('c',$start2,$end2),
785 -peak_indices => $self->sub_peak_index($start,$end)
788 $new_object->set_accuracies();
789 $new_object->_reset_peak_indices();
790 return $new_object;
793 =head2 _synthesize_traces()
795 Title : _synthesize_traces()
796 Usage : $obj->_synthesize_traces();
797 Function: Synthesize false traces for this object.
798 Returns : Nothing.
799 Args : None.
800 Notes : This method is intended to be invoked when this
801 object is created with a SWQ object- that is to say that
802 there is a sequence and a set of qualities but there was
803 no actual trace data.
805 =cut
807 sub _synthesize_traces {
808 my ($self) = shift;
809 $self->peak_indices(qw());
810 #ml my $version = 2;
811 # the user should be warned if traces already exist
814 #ml ( my $sequence = $self->seq() ) =~ tr/a-z/A-Z/;
815 #ml my @quals = @{$self->qual()};
816 #ml my $info;
817 # build the ramp for the first base.
818 # a ramp looks like this "1 4 13 29 51 71 80 71 51 29 13 4 1" times the quality score.
819 # REMEMBER: A C G T
820 # note to self-> smooth this thing out a bit later
821 my $ramp_data;
822 @{$ramp_data->{'ramp'}} = qw( 1 4 13 29 51 75 80 75 51 29 13 4 1 );
823 # the width of the ramp
824 $ramp_data->{'ramp_width'} = scalar(@{$ramp_data->{'ramp'}});
825 # how far should the peaks overlap?
826 $ramp_data->{'ramp_overlap'} = 1;
827 # where should the peaks be located?
828 $ramp_data->{'peak_at'} = 7;
829 $ramp_data->{'ramp_total_length'} =
830 $self->seq_obj()->length() * $ramp_data->{'ramp_width'}
831 - $self->seq_obj()->length() * $ramp_data->{'ramp_overlap'};
832 my $pos;
833 my $total_length = $ramp_data->{ramp_total_length};
834 $self->initialize_traces("0",$total_length+2);
835 # now populate them
836 my ($current_base,$place_base_at,$peak_quality,$ramp_counter,$current_ramp,$ramp_position);
837 #ml my $sequence_length = $self->length();
838 my $half_ramp = int($ramp_data->{'ramp_width'}/2);
839 for ($pos = 0; $pos<$self->length();$pos++) {
840 $current_base = uc $self->seq_obj()->subseq($pos+1,$pos+1);
841 # print("Synthesizing the ramp for $current_base\n");
842 my $all_bases = "ATGC";
843 $peak_quality = $self->qual_obj()->qualat($pos+1);
844 # where should the peak for this base be placed? Modeled after a mktrace scf
845 $place_base_at = ($pos * $ramp_data->{'ramp_width'}) -
846 ($pos * $ramp_data->{'ramp_overlap'}) -
847 $half_ramp + $ramp_data->{'ramp_width'} - 1;
848 # print("Placing this base at this position: $place_base_at\n");
849 push @{$self->peak_indices()},$place_base_at;
850 $ramp_position = $place_base_at - $half_ramp;
851 if ($current_base =~ "N" ) {
852 $current_base = "A";
854 for ($current_ramp = 0; $current_ramp < $ramp_data->{'ramp_width'}; $current_ramp++) {
855 # print("Placing a trace value here: $current_base ".($ramp_position+$current_ramp+1)." ".$peak_quality*$ramp_data->{'ramp'}->[$current_ramp]."\n");
856 $self->trace_value_at($current_base,$ramp_position+$current_ramp+1,$peak_quality*$ramp_data->{'ramp'}->[$current_ramp]);
858 $self->peak_index_at($pos+1,
859 $place_base_at+1
861 #ml my $other_bases = $self->_get_other_bases($current_base);
862 # foreach ( split('',$other_bases) ) {
863 # push @{$self->{'text'}->{"v3_base_accuracy"}->{$_}},0;
872 =head2 _dump_traces($transformed)
874 Title : _dump_traces("transformed")
875 Usage : &_dump_traces($ra,$rc,$rg,$rt);
876 Function: Used in debugging. Prints all traces one beside each other.
877 Returns : Nothing.
878 Args : References to the arrays containing the traces for A,C,G,T.
879 Notes : Beats using dumpValue, I'll tell ya. Much better then using
880 join' ' too.
881 - if a scalar is included as an argument (any scalar), this
882 procedure will dump the _delta'd trace. If you don't know what
883 that means you should not be using this.
885 =cut
888 sub _dump_traces {
889 my ($self) = @_;
890 my (@sA,@sT,@sG,@sC);
891 print ("Count\ta\tc\tg\tt\n");
892 my $length = $self->trace_length();
893 for (my $curr=1; $curr <= $length; $curr++) {
894 print(($curr-1)."\t".$self->trace_value_at('a',$curr).
895 "\t".$self->trace_value_at('c',$curr).
896 "\t".$self->trace_value_at('g',$curr).
897 "\t".$self->trace_value_at('t',$curr)."\n");
899 return;
902 =head2 _initialize_traces()
904 Title : _initialize_traces()
905 Usage : $trace_object->_initialize_traces();
906 Function: Creates empty arrays to hold synthetic trace values.
907 Returns : Nothing.
908 Args : None.
910 =cut
912 sub initialize_traces {
913 my ($self,$value,$length) = @_;
914 foreach (qw(a t g c)) {
915 my @temp;
916 for (my $count=0; $count<$length; $count++) {
917 $temp[$count] = $value;
919 $self->trace($_,\@temp);
923 =head2 trace_value_at($channel,$position)
925 Title : trace_value_at($channel,$position)
926 Usage : $value = $trace_object->trace_value_at($channel,$position);
927 Function: What is the value of the trace for this base at this position?
928 Returns : A scalar represnting the trace value here.
929 Args : a base channel (a,t,g,c)
930 a position ( < $trace_object->trace_length() )
932 =cut
934 sub trace_value_at {
935 my ($self,$channel,$position,$value) = @_;
936 if ($value) {
937 $self->trace($channel)->[$position] = $value;
939 return $self->sub_trace($channel,($position),($position))->[0];
942 sub _deprecated_get_scf_version_2_base_structure {
943 # this sub is deprecated- check inside SeqIO::scf
944 my $self = shift;
945 my (@structure,$current);
946 my $length = $self->length();
947 for ($current=1; $current <= $self->length() ; $current++) {
948 my $base_here = $self->seq_obj()->subseq($current,$current);
949 $base_here = lc($base_here);
950 my $probabilities;
951 $probabilities->{$base_here} = $self->qual_obj()->qualat($current);
952 my $other_bases = "atgc";
953 my $empty = "";
954 $other_bases =~ s/$base_here/$empty/e;
955 foreach ( split('',$other_bases) ) {
956 $probabilities->{$_} = "0";
958 @structure = (
959 @structure,
960 $self->peak_index_at($current),
961 $probabilities->{'a'},
962 $probabilities->{'t'},
963 $probabilities->{'g'},
964 $probabilities->{'c'}
968 return \@structure;
971 sub _deprecated_get_scf_version_3_base_structure {
972 my $self = shift;
973 my $structure;
974 $structure = join('',$self->peak_indices());
975 return $structure;
979 =head2 accuracies($channel,$position)
981 Title : trace_value_at($channel,$position)
982 Usage : $value = $trace_object->trace_value_at($channel,$position);
983 Function: What is the value of the trace for this base at this position?
984 Returns : A scalar representing the trace value here.
985 Args : a base channel (a,t,g,c)
986 a position ( < $trace_object->trace_length() )
988 =cut
991 sub accuracies {
992 my ($self,$channel,$value) = @_;
993 if ($value) {
994 if (ref($value) eq "ARRAY") {
995 $self->{accuracies}->{$channel} = $value;
997 else {
998 my @acc = split(' ',$value);
999 $self->{accuracies}->{$channel} = \@acc;
1002 return $self->{accuracies}->{$channel};
1006 =head2 set_accuracies()
1008 Title : set_sccuracies()
1009 Usage : $trace_object->set_accuracies();
1010 Function: Take a sequence's quality and synthesize proper scf-style
1011 base accuracies that can then be accessed with
1012 accuracies("a") or something like it.
1013 Returns : Nothing.
1014 Args : None.
1016 =cut
1018 sub set_accuracies {
1019 my $self = shift;
1020 my $count = 0;
1021 my $length = $self->length();
1022 for ($count=1; $count <= $length; $count++) {
1023 my $base_here = $self->seq_obj()->subseq($count,$count);
1024 my $qual_here = $self->qual_obj()->qualat($count);
1025 $self->accuracy_at($base_here,$count,$qual_here);
1026 my $other_bases = $self->_get_other_bases($base_here);
1027 foreach (split('',$other_bases)) {
1028 $self->accuracy_at($_,$count,"null");
1034 =head2 scf_dump()
1036 Title : scf_dump()
1037 Usage : $trace_object->scf_dump();
1038 Function: Prints out the contents of the structures representing
1039 the SequenceTrace in a manner similar to io_lib's scf_dump.
1040 Returns : Nothing. Prints out the contents of the structures
1041 used to represent the sequence and its trace.
1042 Args : None.
1043 Notes : Used in debugging, obviously.
1045 =cut
1047 sub scf_dump {
1048 my $self = shift;
1049 my $count;
1050 for ($count=1;$count<=$self->length();$count++) {
1051 my $base_here = lc($self->seq_obj()->subseq($count,$count));
1052 print($base_here." ".sprintf("%05d",$self->peak_index_at($count))."\t");
1053 foreach (sort qw(a c g t)) {
1054 print(sprintf("%03d",$self->accuracy_at($_,$count))."\t");
1056 print("\n");
1058 $self->_dump_traces();
1061 =head2 _get_other_bases($this_base)
1063 Title : _get_other_bases($this_base)
1064 Usage : $other_bases = $trace_object->_get_other_bases($this_base);
1065 Function: A utility routine to return bases other then the one provided.
1066 I was doing this over and over so I put it here.
1067 Returns : Three of a,t,g and c.
1068 Args : A base (atgc)
1069 Notes : $obj->_get_other_bases("a") returns "tgc"
1071 =cut
1073 sub _get_other_bases {
1074 my ($self,$this_base) = @_;
1075 $this_base = lc($this_base);
1076 my $all_bases = "atgc";
1077 my $empty = "";
1078 $all_bases =~ s/$this_base/$empty/e;
1079 return $all_bases;
1083 =head2 accuracy_at($base,$position)
1085 Title : accuracy_at($base,$position)
1086 Usage : $accuracy = $trace_object->accuracy_at($base,$position);
1087 Function:
1088 Returns : Returns the accuracy of finding $base at $position.
1089 Args : 1. a base channel (atgc) 2. a value to _set_ the accuracy
1090 Notes : $obj->_get_other_bases("a") returns "tgc"
1092 =cut
1095 sub accuracy_at {
1096 my ($self,$base,$position,$value) = @_;
1097 $base = lc($base);
1098 if ($value) {
1099 if ($value eq "null") {
1100 $self->{accuracies}->{$base}->[$position-1] = "0";
1102 else {
1103 $self->{accuracies}->{$base}->[$position-1] = $value;
1106 return $self->{accuracies}->{$base}->[$position-1];