Bio::Seq::SeqWithQuality: remove deprecated class
[bioperl-live.git] / lib / Bio / Seq / Quality.pm
blobbf6d56a13c3eb14c22b71b67d2eaa262a3d7936e
2 # BioPerl module for Bio::Seq::Quality
4 # Please direct questions and support issues to
5 # <bioperl-l@bioperl.org>
7 # Cared for by Heikki Lehvaslaiho
9 # Copyright Heikki Lehvaslaiho
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Seq::Quality - Implementation of sequence with residue quality
18 and trace values
20 =head1 SYNOPSIS
22 use Bio::Seq::Quality;
24 # input can be space delimited string or array ref
25 my $qual = '0 1 2 3 4 5 6 7 8 9 11 12';
26 my $trace = '0 5 10 15 20 25 30 35 40 45 50 55';
28 my $seq = Bio::Seq::Quality->new
29 ( -qual => $qual,
30 -trace_indices => $trace,
31 -seq => 'atcgatcgatcg',
32 -id => 'human_id',
33 -accession_number => 'S000012',
34 -verbose => -1 # to silence deprecated methods
37 my $quals = $seq->qual; # array ref
38 my $traces = $seq->trace; # array ref
40 my $quals = $seq->qual_text; # string
41 my $traces = $seq->trace_text; # string
44 # get sub values
45 $quals = $seq->subqual(2, 3); # array ref
46 $traces = $seq->subtrace(2, 3); # array ref
47 $quals = $seq->subqual_text(2, 3); # string
48 $quals = $seq->subtrace_text(2, 3); # string
50 # set sub values
51 $seq->subqual(2, 3, "9 9");
52 $seq->subtrace(2, 3, "9 9");
56 =head1 DESCRIPTION
58 This object stores base quality values together with the sequence
59 string.
61 The implementation is based on Bio::Seq::Meta::Array. qual() and
62 trace() are base methods to store and retrieve information that have
63 extensions to retrieve values as a scalar (e.g. qual_text() ), or get
64 or set subvalues (e.g. subqual() ). See L<Bio::Seq::MetaI> for more
65 details.
67 All the functional code is in Bio::Seq::Meta::Array.
69 There deprecated methods that are included for compatibility with
70 Bio::Seq::SeqWithQuality. These will print a warning unless verbosity
71 of the object is set to be less than zero.
73 =head1 SEE ALSO
75 L<Bio::Seq::MetaI>,
76 L<Bio::Seq::Meta::Array>
78 =head1 FEEDBACK
80 =head2 Mailing Lists
82 User feedback is an integral part of the evolution of this and other
83 Bioperl modules. Send your comments and suggestions preferably to one
84 of the Bioperl mailing lists. Your participation is much appreciated.
86 bioperl-l@bioperl.org - General discussion
87 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
89 =head2 Support
91 Please direct usage questions or support issues to the mailing list:
93 I<bioperl-l@bioperl.org>
95 rather than to the module maintainer directly. Many experienced and
96 reponsive experts will be able look at the problem and quickly
97 address it. Please include a thorough description of the problem
98 with code and data examples if at all possible.
100 =head2 Reporting Bugs
102 Report bugs to the Bioperl bug tracking system to help us keep track
103 the bugs and their resolution. Bug reports can be submitted via the
104 web:
106 https://github.com/bioperl/bioperl-live/issues
108 =head1 AUTHOR - Heikki Lehvaslaiho
110 Email heikki-at-bioperl-dot-org
112 =head1 CONTRIBUTORS
114 Chad Matsalla, bioinformatics at dieselwurks dot com
116 Dan Bolser, dan dot bolser at gmail dot com
118 =head1 APPENDIX
120 The rest of the documentation details each of the object methods.
121 Internal methods are usually preceded with a _
123 =cut
126 # Let the code begin...
129 package Bio::Seq::Quality;
130 use strict;
132 use base qw(Bio::Seq::Meta::Array);
134 ## Is this the right place (and way) to define this?
135 our $MASK_CHAR = 'X';
136 our $DEFAULT_NAME = 'DEFAULT';
137 our $GAP = '-';
138 our $META_GAP = ' ';
140 =head2 new
142 Title : new
143 Usage : $metaseq = Bio::Seq::Quality->new
144 ( -qual => '0 1 2 3 4 5 6 7 8 9 11 12',
145 -trace => '0 5 10 15 20 25 30 35 40 45 50 55',
146 -seq => 'atcgatcgatcg',
147 -id => 'human_id',
148 -accession_number => 'S000012',
150 Function: Constructor for Bio::Seq::Quality class. Note that you can
151 provide an empty quality and trace strings.
153 Returns : a new Bio::Seq::Quality object
155 =cut
158 sub new {
159 my ($class, @args) = @_;
161 my $self = $class->SUPER::new(@args);
163 my($meta, $qual, $trace, $trace_indices, $trace_data) =
164 $self->_rearrange([qw(META
165 QUAL
166 TRACE
167 TRACE_INDICES
168 TRACE_DATA)],
169 @args);
171 $self->{'_meta'}->{$DEFAULT_NAME} = [];
172 $self->{'_meta'}->{'trace'} = [];
173 $self->{trace_data} = $trace_data;
175 $meta && $self->meta($meta);
176 $qual && $self->qual($qual);
177 $trace && $self->named_meta('trace', $trace);
178 $trace_indices && $self->named_meta('trace', $trace_indices);
180 return $self;
185 ## QUAL
187 =head2 qual
189 Title : qual
190 Usage : $qual_values = $obj->qual($values_string);
191 Function:
193 Get and set method for the meta data starting from residue
194 position one. Since it is dependent on the length of the
195 sequence, it needs to be manipulated after the sequence.
197 The length of the returned value always matches the length
198 of the sequence.
200 Returns : reference to an array of meta data
201 Args : new value, string or array ref or Bio::Seq::PrimaryQual, optional
203 Setting quality values resets the clear range.
205 =cut
207 sub qual {
208 my $self = shift;
209 my $value = shift;
210 $value = $value->qual
211 if ref($value) and ref($value) ne 'ARRAY' and
212 $value->isa('Bio::Seq::PrimaryQual');
213 $self->_empty_cache if $value;
214 return $self->named_meta($DEFAULT_NAME, $value);
217 =head2 qual_text
219 Title : qual_text
220 Usage : $qual_values = $obj->qual_text($values_arrayref);
221 Function: Variant of meta() and qual() guarantied to return a string
222 representation of meta data. For details, see L<meta>.
223 Returns : a string
224 Args : new value, optional
226 =cut
228 sub qual_text {
229 return join ' ', @{shift->named_submeta($DEFAULT_NAME, @_)};
232 =head2 subqual
234 Title : subqual
235 Usage : $subset_of_qual_values = $obj->subqual(10, 20, $value_string);
236 $subset_of_qual_values = $obj->subqual(10, undef, $value_string);
237 Function:
239 Get and set method for meta data for subsequences.
241 Numbering starts from 1 and the number is inclusive, ie 1-2
242 are the first two residue of the sequence. Start cannot be
243 larger than end but can be equal.
245 If the second argument is missing the returned values
246 should extend to the end of the sequence.
248 Returns : A reference to an array
249 Args : integer, start position
250 integer, end position, optional when a third argument present
251 new value, optional
253 =cut
255 sub subqual {
256 shift->named_submeta($DEFAULT_NAME, @_);
259 =head2 subqual_text
261 Title : subqual_text
262 Usage : $meta_values = $obj->subqual_text(20, $value_string);
263 Function: Variant of subqual() returning a stringified
264 representation of meta data. For details, see L<Bio::Seq::MetaI>.
265 Returns : a string
266 Args : new value, optional
268 =cut
270 sub subqual_text {
271 return join ' ', @{shift->named_submeta($DEFAULT_NAME, @_)};
274 =head2 quality_length
276 Title : quality_length()
277 Usage : $qual_len = $obj->quality_length();
278 Function: return the number of elements in the quality array
279 Returns : integer
280 Args : -
282 =cut
284 sub quality_length {
285 my ($self) = @_;
286 return $self->named_meta_length($DEFAULT_NAME);
289 =head2 quality_is_flush
291 Title : quality_is_flush
292 Usage : $quality_is_flush = $obj->quality_is_flush()
293 Function: Boolean to tell if the trace length equals the sequence length.
294 Returns true if force_flush() is set.
295 Returns : boolean 1 or 0
296 Args : none
298 =cut
300 sub quality_is_flush {
301 return shift->is_flush('quality');
306 ## TRACE
308 =head2 trace
310 Title : trace
311 Usage : $trace_values = $obj->trace($values_string);
312 Function:
314 Get and set method for the meta data starting from residue
315 position one. Since it is dependent on the length of the
316 sequence, it needs to be manipulated after the sequence.
318 The length of the returned value always matches the length
319 of the sequence.
321 Returns : reference to an array of meta data
322 Args : new value, string or array ref, optional
324 =cut
326 sub trace {
327 my $self = shift;
328 my $value = shift;
329 return $self->named_meta('trace', $value);
332 =head2 trace_text
334 Title : trace_text
335 Usage : $trace_values = $obj->trace_text($values_arrayref);
336 Function: Variant of meta() and trace() guarantied to return a string
337 representation of meta data. For details, see L<meta>.
338 Returns : a string
339 Args : new value, optional
341 =cut
343 sub trace_text {
344 return join ' ', @{shift->named_submeta('trace', @_)};
347 =head2 subtrace
349 Title : subtrace
350 Usage : $subset_of_trace_values = $obj->subtrace(10, 20, $value_string);
351 $subset_of_trace_values = $obj->subtrace(10, undef, $value_string);
352 Function:
354 Get and set method for meta data for subsequences.
356 Numbering starts from 1 and the number is inclusive, ie 1-2
357 are the first two residue of the sequence. Start cannot be
358 larger than end but can be equal.
360 If the second argument is missing the returned values
361 should extend to the end of the sequence.
363 Returns : A reference to an array
364 Args : integer, start position
365 integer, end position, optional when a third argument present
366 new value, optional
369 =cut
371 sub subtrace {
372 return shift->named_submeta('trace', @_);
375 =head2 subtrace_text
377 Title : subtrace_text
378 Usage : $meta_values = $obj->subtrace_text(20, $value_string);
379 Function: Variant of subtrace() returning a stringified
380 representation of meta data. For details, see L<Bio::Seq::MetaI>.
381 Returns : a string
382 Args : new value, optional
384 =cut
386 sub subtrace_text {
387 return join ' ', @{shift->named_submeta('trace', @_)};
390 =head2 trace_length
392 Title : trace_length()
393 Usage : $trace_len = $obj->trace_length();
394 Function: return the number of elements in the trace set
395 Returns : integer
396 Args : -
398 =cut
400 sub trace_length {
401 my ($self) = @_;
402 return $self->named_meta_length('trace');
405 =head2 trace_is_flush
407 Title : trace_is_flush
408 Usage : $trace_is_flush = $obj->trace_is_flush()
409 Function: Boolean to tell if the trace length equals the sequence length.
410 Returns true if force_flush() is set.
411 Returns : boolean 1 or 0
412 Args : none
414 =cut
416 sub trace_is_flush {
417 return shift->is_flush('trace');
422 =head2 get_trace_graph
424 Title : get_trace_graph
425 Usage : @trace_values = $obj->get_trace_graph( -trace => 'a',
426 -scale => 100)
427 Function : Returns array of raw trace values for a trace file, or
428 false if no trace data exists. Requires a value for trace
429 to get either the a, g, c or t trace information, and an
430 optional value for scale, which rescales the data between
431 0 and the provided value, a scale value of '0' performs no
432 scaling
433 Returns : Array or 0
434 Args : string, trace to retrieve, one of a, g, c or t integer,
435 scale, for scaling of trace between 0 and scale, or 0 for
436 no scaling, optional
438 =cut
440 sub get_trace_graph
442 my $self = shift;
443 my($trace, $scale) =
444 $self->_rearrange([qw(TRACE
445 SCALE
447 @_);
448 unless (defined($self->{trace_data})) { return 0 }
449 unless (grep { lc($trace) eq $_ } ('a', 'g', 'c', 't')) { return 0 }
450 $trace = lc($trace) . "_trace";
451 my @trace_data = exists $self->{trace_data}->{$trace} &&
452 ref $self->{trace_data}->{$trace} eq 'ARRAY' ?
453 @{$self->{trace_data}->{$trace}} : ();
454 my $max = $self->{trace_data}->{max_height};
455 if (defined($scale) and $scale != 0)
457 @trace_data = map { $_ / $max * $scale } @trace_data;
459 return @trace_data;
463 =head2 threshold
465 Title : threshold
466 Usage : $qual->threshold($value);
467 Function: Sets the quality threshold.
468 Returns : an integer
469 Args : new value, optional
471 Value used by *clear_range* method below.
473 =cut
475 sub threshold {
476 my $self = shift;
477 my $value = shift;
478 if (defined $value) {
479 $self->throw("Threshold needs to be an integer [$value]")
480 unless $value =~ /^[-+]?\d+$/;
481 $self->_empty_cache
482 if defined $self->{_threshold} and $self->{_threshold} ne $value;
483 $self->{_threshold} = $value;
485 return $self->{_threshold};
489 =head2 mask_below_threshold
491 Title : mask_below_threshold
492 Usage : $count = $obj->count_clear_ranges($threshold);
493 Function: Counts number of ranges in the sequence where quality
494 values are above the threshold
495 Returns : count integer
496 Args : threshold integer, optional
498 Set threshold first using method L<threshold>.
500 =cut
502 sub mask_below_threshold {
503 my $self = shift;
504 my $threshold = shift;
506 $self->threshold($threshold) if defined $threshold;
508 # populate the cache if needed
509 $self->_find_clear_ranges unless defined $self->{_ranges};
511 my $maskSeq = $self->seq;
512 my $maskQual = $self->qual;
514 ## There must be a more efficient way than this!
515 for(my $i=0; $i<length($maskSeq); $i++){
516 #print join ("\t", $i, $maskQual->[$i]), "\n";
517 substr($maskSeq, $i, 1, $MASK_CHAR)
518 if $maskQual->[$i] < $self->{_threshold};
521 ## This is the *wrong* way to do it!
522 #for my $r (@{$self->{_ranges}} ){
523 # substr($maskSeq, $r->{start}, $r->{length}, $MASK_CHAR x $r->{length});
526 return $maskSeq;
529 =head2 count_clear_ranges
531 Title : count_clear_ranges
532 Usage : $count = $obj->count_clear_ranges($threshold);
533 Function: Counts number of ranges in the sequence where quality
534 values are above the threshold
535 Returns : count integer
536 Args : threshold integer, optional
538 Set threshold first using method L<threshold>.
540 =cut
542 sub count_clear_ranges {
543 my $self = shift;
544 my $threshold = shift;
545 $self->threshold($threshold) if defined $threshold;
547 # populate the cache if needed
548 $self->_find_clear_ranges unless defined $self->{_ranges};
550 return scalar @{$self->{_ranges}};
553 =head2 clear_ranges_length
555 Title : clear_ranges_length
556 Usage : $total_lenght = $obj->clear_ranges_length($threshold);
557 Function: Return number of residues with quality values above
558 the threshold in all clear ranges
559 Returns : an integer
560 Args : threshold, optional
562 Set threshold first using method L<threshold>.
564 I think this method needs a better name! count_high_quality_bases? or
565 sum_clear_ranges?
567 =cut
569 sub clear_ranges_length {
570 my $self = shift;
571 my $threshold = shift;
572 $self->threshold($threshold) if defined $threshold;
574 # populate the cache if needed
575 $self->_find_clear_ranges unless defined $self->{_ranges};
577 my $sum;
578 map {$sum += $_->{length}} @{$self->{_ranges}};
579 return $sum;
582 =head2 get_clear_range
584 Title : get_clear_range
585 Usage : $newqualobj = $obj->get_clear_range($threshold);
586 Function: Return longest subsequence that has quality values above
587 the given threshold, or a default value of 13
588 Returns : a new Bio::Seq::Quality object
589 Args : threshold, optional
591 Set threshold first using method L<threshold>.
593 Note, this method could be implemented using some gaussian smoothing
594 of the quality scores. Currently one base below the threshold is
595 enough to end the clear range.
597 =cut
599 sub get_clear_range {
600 my $self = shift;
601 my $threshold = shift;
602 $self->threshold($threshold) if defined $threshold;
604 # populate the cache if needed
605 $self->_find_clear_ranges unless defined $self->{_ranges};
607 # fix for bug 2847
608 return unless defined $self->{_ranges};
610 # pick the longest
611 for (sort {$b->{length} <=> $a->{length} } @{$self->{_ranges}} ){
612 my $newqualobj = Bio::Seq::Quality->new(
613 -seq => $self->subseq( $_->{start}, $_->{end}),
614 -qual => $self->subqual($_->{start}, $_->{end}),
615 -id => $self->id);
617 $newqualobj->threshold($threshold);
619 return $newqualobj;
625 =head2 get_all_clean_ranges
627 Title : get_all_clean_ranges
628 Usage : @ranges = $obj->get_all_clean_ranges($minlength);
629 Function: Return all ranges where quality values are above
630 the threshold. Original ordering.
631 Returns : an ordered array of new Bio::Seq::Quality objects
632 Args : minimum length , optional
634 Set threshold first using method L<threshold>.
636 =cut
638 sub get_all_clean_ranges {
639 my $self = shift;
640 my $minl = shift || 0;
642 $self->throw("Mimimum length needs to be zero or a positive integer [$minl]")
643 unless $minl =~ /^\+?\d+$/;
645 # populate the cache if needed
646 $self->_find_clear_ranges unless defined $self->{_ranges};
648 # return in the order of occurrence
649 my @ranges;
650 for my $r (sort {$b->{start} <=> $a->{start} } @{$self->{_ranges}} ){
651 next if $r->{length} < $minl;
653 ## Constructor should allow "-threshold => ..."!
654 push @ranges, Bio::Seq::Quality->new
655 ( -seq => $self->subseq( $r->{start}, $r->{end}),
656 -qual => $self->subqual($r->{start}, $r->{end}),
657 -id => $self->id
660 return @ranges;
665 # _find_clear_ranges: where range/threshold calculations happen
668 sub _find_clear_ranges {
669 my $self = shift;
670 my $qual = $self->qual;
672 $self->throw("You need to set the threshold value first")
673 unless defined $self->threshold;
675 my $threshold = $self->threshold;
677 my $rangeFlag = 0;
679 for(my $i=0; $i<@$qual; $i++){
680 ## Are we currently within a clear range or not?
681 if($rangeFlag){
682 ## Did we just leave the clear range?
683 if($qual->[$i]<$threshold){
684 ## Log the range
685 my $range;
686 $range->{end} = $i-1;
687 $range->{start} = $rangeFlag;
688 $range->{length} = $i - $rangeFlag;
689 push @{$self->{_ranges}}, $range;
690 ## and reset the range flag.
691 $rangeFlag = 0;
693 ## else nothing changes
695 else{
696 ## Did we just enter a clear range?
697 if($qual->[$i]>=$threshold){
698 ## then set the range flag!
699 $rangeFlag = $i;
701 ## else nothing changes
704 ## Did we exit the last clear range?
705 if($rangeFlag){
706 my $i = scalar(@$qual);
707 ## Log the range
708 my $range;
709 $range->{end} = $i-1;
710 $range->{start} = $rangeFlag;
711 $range->{length} = $i - $rangeFlag;
712 push @{$self->{_ranges}}, $range;
719 sub _empty_cache {
720 my $self = shift;
721 undef $self->{_ranges};
725 ################## deprecated methods ##################
727 sub trace_indices {
728 my $self = shift;
729 return $self->named_meta('trace');
732 sub trace_index_at {
733 my ($self, $val) =@_;
734 return shift @{$self->named_submeta('trace', $val, $val)};
738 sub sub_trace_index {
739 my $self = shift;
740 return $self->named_submeta('trace', @_);
744 sub qualat {
745 my ($self, $val) =@_;
746 return shift @{$self->submeta($val, $val)};
750 sub baseat {
751 my ($self,$val) = @_;
752 return $self->subseq($val,$val);