maint: restructure to use Dist::Zilla
[bioperl-live.git] / lib / Bio / Seq / PrimaryQual.pm
blob6a93d34897773bfc452a715c137f5bf5899bee57
2 # bioperl module for Bio::PrimaryQual
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com>
8 # Copyright Chad Matsalla
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Seq::PrimaryQual - Bioperl lightweight Quality Object
18 =head1 SYNOPSIS
20 use Bio::Seq::PrimaryQual;
22 # you can use either a space-delimited string for quality
24 my $string_quals = "10 20 30 40 50 40 30 20 10";
25 my $qualobj = Bio::Seq::PrimaryQual->new(
26 -qual => $string_quals,
27 -id => 'QualityFragment-12',
28 -accession_number => 'X78121',
31 # _or_ you can use an array of quality values
33 my @q2 = split/ /,$string_quals;
34 $qualobj = Bio::Seq::PrimaryQual->new(
35 -qual => \@q2,
36 -primary_id => 'chads primary_id',
37 -desc => 'chads desc',
38 -accession_number => 'chads accession_number',
39 -id => 'chads id'
42 # to get the quality values out:
44 my @quals = @{$qualobj->qual()};
46 # to give _new_ quality values
48 my $newqualstring = "50 90 1000 20 12 0 0";
49 $qualobj->qual($newqualstring);
52 =head1 DESCRIPTION
54 This module provides a mechanism for storing quality
55 values. Much more useful as part of
56 Bio::Seq::SeqWithQuality where these quality values
57 are associated with the sequence information.
59 =head1 FEEDBACK
61 =head2 Mailing Lists
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to one
65 of the Bioperl mailing lists. Your participation is much appreciated.
67 bioperl-l@bioperl.org - General discussion
68 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
70 =head2 Support
72 Please direct usage questions or support issues to the mailing list:
74 I<bioperl-l@bioperl.org>
76 rather than to the module maintainer directly. Many experienced and
77 reponsive experts will be able look at the problem and quickly
78 address it. Please include a thorough description of the problem
79 with code and data examples if at all possible.
81 =head2 Reporting Bugs
83 Report bugs to the Bioperl bug tracking system to help us keep track
84 the bugs and their resolution. Bug reports can be submitted via the
85 web:
87 https://github.com/bioperl/bioperl-live/issues
89 =head1 AUTHOR - Chad Matsalla
91 Email bioinformatics@dieselwurks.com
93 =head1 APPENDIX
95 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
97 =cut
100 package Bio::Seq::PrimaryQual;
102 use strict;
104 use base qw(Bio::Root::Root Bio::Seq::QualI);
106 our $MATCHPATTERN = '0-9eE\.\s+-';
109 =head2 new()
111 Title : new()
112 Usage : $qual = Bio::Seq::PrimaryQual->new(
113 -qual => '10 20 30 40 50 50 20 10',
114 -id => 'human_id',
115 -accession_number => 'AL000012',
118 Function: Returns a new Bio::Seq::PrimaryQual object from basic
119 constructors, being a string _or_ a reference to an array for the
120 sequence and strings for id and accession_number. Note that you
121 can provide an empty quality string.
122 Returns : a new Bio::Seq::PrimaryQual object
124 =cut
126 sub new {
127 my ($class, @args) = @_;
128 my $self = $class->SUPER::new(@args);
130 # default: turn ON the warnings (duh)
131 my($qual,$id,$acc,$pid,$desc,$given_id,$header) =
132 $self->_rearrange([qw(QUAL
133 DISPLAY_ID
134 ACCESSION_NUMBER
135 PRIMARY_ID
136 DESC
138 HEADER
140 @args);
141 if( defined $id && defined $given_id ) {
142 if( $id ne $given_id ) {
143 $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]");
146 if( defined $given_id ) { $id = $given_id; }
148 # note: the sequence string may be empty
149 $self->qual(defined($qual) ? $qual : []);
150 $header && $self->header($header);
151 $id && $self->display_id($id);
152 $acc && $self->accession_number($acc);
153 $pid && $self->primary_id($pid);
154 $desc && $self->desc($desc);
156 return $self;
160 =head2 qual()
162 Title : qual()
163 Usage : @quality_values = @{$obj->qual()};
164 Function: Get or set the quality as a reference to an array containing the
165 quality values. An error is generated if the quality scores are
166 invalid, see validate_qual().
167 Returns : A reference to an array.
169 =cut
171 sub qual {
172 my ($self,$value) = @_;
174 if( ! defined $value || length($value) == 0 ) {
175 $self->{'qual'} ||= [];
176 } elsif( ref($value) =~ /ARRAY/i ) {
177 # if the user passed in a reference to an array
178 $self->{'qual'} = $value;
179 } else {
180 $self->validate_qual($value, 1);
181 $value =~ s/^\s+//;
182 $self->{'qual'} = [split(/\s+/,$value)];
185 return $self->{'qual'};
189 =head2 seq()
191 Title : seq()
192 Usager : $sequence = $obj->seq();
193 Function : Returns the quality numbers as a space-separated string.
194 Returns : Single string.
195 Args : None.
197 =cut
199 sub seq {
200 return join ' ', @{ shift->qual };
204 =head2 validate_qual($qualstring)
206 Title : validate_qual($qualstring)
207 Usage : print("Valid.") if { &validate_qual($self, $quality_string); }
208 Function: Test that the given quality string is valid. It is expected to
209 contain space-delimited numbers that can be parsed using split /\d+/.
210 However, this validation takes shortcuts and only tests that the
211 string contains characters valid in numbers: 0-9 . eE +-
212 Note that empty quality strings are valid too.
213 Returns : 1 for a valid sequence, 0 otherwise
214 Args : - Scalar containing the quality string to validate.
215 - Boolean to optionally throw an error if validation failed
217 =cut
219 sub validate_qual {
220 my ($self, $qualstr, $throw) = @_;
221 if ( (defined $qualstr ) &&
222 ($qualstr !~ /^[$MATCHPATTERN]*$/) ) {
223 if ($throw) {
224 $self->throw("Failed validation of quality score from '".
225 (defined($self->id)||'[unidentified sequence]')."'. No numeric ".
226 "value found.\n");
228 return 0;
230 return 1;
234 =head2 subqual($start,$end)
236 Title : subqual($start,$end)
237 Usage : @subset_of_quality_values = @{$obj->subqual(10,40)};
238 Function: returns the quality values from $start to $end, where the
239 first value is 1 and the number is inclusive, ie 1-2 are the
240 first two bases of the sequence. Start cannot be larger than
241 end but can be equal.
242 Returns : A reference to an array.
243 Args : a start position and an end position
245 =cut
247 sub subqual {
248 my ($self,$start,$end) = @_;
250 if( $start > $end ){
251 $self->throw("in subqual, start [$start] has to be greater than end [$end]");
254 if( $start <= 0 || $end > $self->length ) {
255 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
258 # remove one from start, and then length is end-start
260 $start--;
261 $end--;
262 my @sub_qual_array = @{$self->{qual}}[$start..$end];
264 # return substr $self->seq(), $start, ($end-$start);
265 return \@sub_qual_array;
270 =head2 display_id()
272 Title : display_id()
273 Usage : $id_string = $obj->display_id();
274 Function: returns the display id, aka the common name of the Quality
275 object.
276 The semantics of this is that it is the most likely string to be
277 used as an identifier of the quality sequence, and likely to have
278 "human" readability. The id is equivalent to the ID field of the
279 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
280 database. In fasta format, the >(\S+) is presumed to be the id,
281 though some people overload the id to embed other information.
282 Bioperl does not use any embedded information in the ID field,
283 and people are encouraged to use other mechanisms (accession
284 field for example, or extending the sequence object) to solve
285 this. Notice that $seq->id() maps to this function, mainly for
286 legacy/convience issues
287 Returns : A string
288 Args : None
290 =cut
292 sub display_id {
293 my ($obj,$value) = @_;
294 if( defined $value) {
295 $obj->{'display_id'} = $value;
297 return $obj->{'display_id'};
301 =head2 header()
303 Title : header()
304 Usage : $header = $obj->header();
305 Function: Get/set the header that the user wants printed for this
306 quality object.
307 Returns : A string
308 Args : None
310 =cut
312 sub header {
313 my ($obj,$value) = @_;
314 if( defined $value) {
315 $obj->{'header'} = $value;
317 return $obj->{'header'};
322 =head2 accession_number()
324 Title : accession_number()
325 Usage : $unique_biological_key = $obj->accession_number();
326 Function: Returns the unique biological id for a sequence, commonly
327 called the accession_number. For sequences from established
328 databases, the implementors should try to use the correct
329 accession number. Notice that primary_id() provides the unique id
330 for the implementation, allowing multiple objects to have the same
331 accession number in a particular implementation. For sequences
332 with no accession number, this method should return "unknown".
333 Returns : A string
334 Args : None
336 =cut
338 sub accession_number {
339 my( $obj, $acc ) = @_;
341 if (defined $acc) {
342 $obj->{'accession_number'} = $acc;
343 } else {
344 $acc = $obj->{'accession_number'};
345 $acc = 'unknown' unless defined $acc;
347 return $acc;
351 =head2 primary_id()
353 Title : primary_id()
354 Usage : $unique_implementation_key = $obj->primary_id();
355 Function: Returns the unique id for this object in this implementation.
356 This allows implementations to manage their own object ids in a
357 way the implementation can control clients can expect one id to
358 map to one object. For sequences with no accession number, this
359 method should return a stringified memory location.
360 Returns : A string
361 Args : None
363 =cut
365 sub primary_id {
366 my ($obj,$value) = @_;
367 if( defined $value) {
368 $obj->{'primary_id'} = $value;
370 return $obj->{'primary_id'};
374 =head2 desc()
376 Title : desc()
377 Usage : $qual->desc($newval);
378 $description = $qual->desc();
379 Function: Get/set description text for a qual object
380 Example :
381 Returns : Value of desc
382 Args : newvalue (optional)
384 =cut
386 sub desc {
387 my ($obj,$value) = @_;
388 if( defined $value) {
389 $obj->{'desc'} = $value;
391 return $obj->{'desc'};
395 =head2 id()
397 Title : id()
398 Usage : $id = $qual->id();
399 Function: Return the ID of the quality. This should normally be (and
400 actually is in the implementation provided here) just a synonym
401 for display_id().
402 Returns : A string.
403 Args : None.
405 =cut
407 sub id {
408 my ($self,$value) = @_;
409 if( defined $value ) {
410 return $self->display_id($value);
412 return $self->display_id();
416 =head2 length()
418 Title : length()
419 Usage : $length = $qual->length();
420 Function: Return the length of the array holding the quality values.
421 Under most circumstances, this should match the number of quality
422 values but no validation is done when the PrimaryQual object is
423 constructed and non-digits could be put into this array. Is this
424 a bug? Just enough rope...
425 Returns : A scalar (the number of elements in the quality array).
426 Args : None.
428 =cut
430 sub length {
431 my $self = shift;
432 if (ref($self->{qual}) ne "ARRAY") {
433 $self->warn("{qual} is not an array here. Why? It appears to be ".ref($self->{qual})."(".$self->{qual}."). Good thing this can _never_ happen.");
435 return scalar(@{$self->{qual}});
439 =head2 qualat()
441 Title : qualat
442 Usage : $quality = $obj->qualat(10);
443 Function: Return the quality value at the given location, where the
444 first value is 1 and the number is inclusive, ie 1-2 are the first
445 two bases of the sequence. Start cannot be larger than end but can
446 be equal.
447 Returns : A scalar.
448 Args : A position.
450 =cut
452 sub qualat {
453 my ($self,$val) = @_;
454 my @qualat = @{$self->subqual($val,$val)};
455 if (scalar(@qualat) == 1) {
456 return $qualat[0];
457 } else {
458 $self->throw("qualat() provided more than one quality.");
462 =head2 to_string()
464 Title : to_string()
465 Usage : $quality = $obj->to_string();
466 Function: Return a textual representation of what the object contains.
467 For this module, this function will return:
468 qual
469 display_id
470 accession_number
471 primary_id
472 desc
474 length
475 Returns : A scalar.
476 Args : None.
478 =cut
480 sub to_string {
481 my ($self,$out,$result) = shift;
482 $out = "qual: ".join(',',@{$self->qual()});
483 foreach (qw(display_id accession_number primary_id desc id length)) {
484 $result = $self->$_();
485 if (!$result) { $result = "<unset>"; }
486 $out .= "$_: $result\n";
488 return $out;
492 sub to_string_automatic {
493 my ($self,$sub_result,$out) = shift;
494 foreach (sort keys %$self) {
495 print("Working on $_\n");
496 eval { $self->$_(); };
497 if ($@) { $sub_result = ref($_); }
498 elsif (!($sub_result = $self->$_())) {
499 $sub_result = "<unset>";
501 if (ref($sub_result) eq "ARRAY") {
502 print("This thing ($_) is an array!\n");
503 $sub_result = join(',',@$sub_result);
505 $out .= "$_: ".$sub_result."\n";
507 return $out;