3 # bioperl module for Bio::PrimaryQual
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com>
9 # Copyright Chad Matsalla
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::Seq::PrimaryQual - Bioperl lightweight Quality Object
21 use Bio::Seq::PrimaryQual;
23 # you can use either a space-delimited string for quality
25 my $string_quals = "10 20 30 40 50 40 30 20 10";
26 my $qualobj = Bio::Seq::PrimaryQual->new
27 ( '-qual' => $string_quals,
28 '-id' => 'QualityFragment-12',
29 '-accession_number' => 'X78121',
32 # _or_ you can use an array of quality values
34 my @q2 = split/ /,$string_quals;
35 $qualobj = Bio::Seq::PrimaryQual->new( '-qual' => \@q2,
36 '-primary_id' => 'chads primary_id',
37 '-desc' => 'chads desc',
38 '-accession_number' => 'chads accession_number',
42 # to get the quality values out:
44 my @quals = @{$qualobj->qual()};
46 # to give _new_ quality values
48 my $newqualstring = "50 90 1000 20 12 0 0";
49 $qualobj->qual($newqualstring);
54 This module provides a mechanism for storing quality
55 values. Much more useful as part of
56 Bio::Seq::SeqWithQuality where these quality values
57 are associated with the sequence information.
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to one
65 of the Bioperl mailing lists. Your participation is much appreciated.
67 bioperl-l@bioperl.org - General discussion
68 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
72 Please direct usage questions or support issues to the mailing list:
74 I<bioperl-l@bioperl.org>
76 rather than to the module maintainer directly. Many experienced and
77 reponsive experts will be able look at the problem and quickly
78 address it. Please include a thorough description of the problem
79 with code and data examples if at all possible.
83 Report bugs to the Bioperl bug tracking system to help us keep track
84 the bugs and their resolution. Bug reports can be submitted via the
87 http://bugzilla.open-bio.org/
89 =head1 AUTHOR - Chad Matsalla
91 Email bioinformatics@dieselwurks.com
95 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
100 # Let the code begin...
102 package Bio
::Seq
::PrimaryQual
;
103 use vars
qw(%valid_type);
107 use base qw(Bio::Root::Root Bio::Seq::QualI);
113 Usage : $qual = Bio::Seq::PrimaryQual->new
114 ( -qual => '10 20 30 40 50 50 20 10',
116 -accession_number => 'AL000012',
119 Function: Returns a new Bio::Seq::PrimaryQual object from basic
120 constructors, being a string _or_ a reference to an array for the
121 sequence and strings for id and accession_number. Note that you
122 can provide an empty quality string.
123 Returns : a new Bio::Seq::PrimaryQual object
131 my ($class, @args) = @_;
132 my $self = $class->SUPER::new
(@args);
134 # default: turn ON the warnings (duh)
135 my($qual,$id,$acc,$pid,$desc,$given_id,$header) =
136 $self->_rearrange([qw(QUAL
145 if( defined $id && defined $given_id ) {
146 if( $id ne $given_id ) {
147 $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]");
150 if( defined $given_id ) { $id = $given_id; }
152 # note: the sequence string may be empty
153 $self->qual(defined($qual) ?
$qual : []);
154 $header && $self->header($header);
155 $id && $self->display_id($id);
156 $acc && $self->accession_number($acc);
157 $pid && $self->primary_id($pid);
158 $desc && $self->desc($desc);
166 Usage : @quality_values = @{$obj->qual()};
167 Function: Returns the quality as a reference to an array containing the
168 quality values. The individual elements of the quality array are
169 not validated and can be any numeric value.
170 Returns : A reference to an array.
175 my ($self,$value) = @_;
177 if( ! defined $value || length($value) == 0 ) {
178 $self->{'qual'} ||= [];
179 } elsif( ref($value) =~ /ARRAY/i ) {
180 # if the user passed in a reference to an array
181 $self->{'qual'} = $value;
182 } elsif(! $self->validate_qual($value)){
183 $self->throw("Attempting to set the quality to [$value] which does not look healthy");
186 $self->{'qual'} = [split(/\s+/,$value)];
189 return $self->{'qual'};
192 =head2 validate_qual($qualstring)
194 Title : validate_qual($qualstring)
195 Usage : print("Valid.") if { &validate_qual($self,$qualities); }
196 Function: Make sure that the quality, if it has length > 0, contains at
197 least one digit. Note that quality strings are parsed into arrays
198 using split/\d+/,$quality_string, so make sure that your quality
199 scalar looks like this if you want it to be parsed properly.
200 Returns : 1 for a valid sequence (WHY? Shouldn\'t it return 0? <boggle>)
201 Args : a scalar (any scalar, why PrimarySeq author?) and a scalar
202 containing the string to validate.
207 # how do I validate quality values?
208 # \d+\s+\d+..., I suppose
209 my ($self,$qualstr) = @_;
210 # why the CORE?? -- (Because Bio::PrimarySeqI namespace has a
211 # length method, you have to qualify
212 # which length to use)
213 return 0 if (!defined $qualstr || CORE
::length($qualstr) <= 0);
214 return 1 if( $qualstr =~ /\d/);
219 =head2 subqual($start,$end)
221 Title : subqual($start,$end)
222 Usage : @subset_of_quality_values = @{$obj->subqual(10,40)};
223 Function: returns the quality values from $start to $end, where the
224 first value is 1 and the number is inclusive, ie 1-2 are the
225 first two bases of the sequence. Start cannot be larger than
226 end but can be equal.
227 Returns : A reference to an array.
228 Args : a start position and an end position
234 my ($self,$start,$end) = @_;
237 $self->throw("in subqual, start [$start] has to be greater than end [$end]");
240 if( $start <= 0 || $end > $self->length ) {
241 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
244 # remove one from start, and then length is end-start
248 my @sub_qual_array = @
{$self->{qual
}}[$start..$end];
250 # return substr $self->seq(), $start, ($end-$start);
251 return \
@sub_qual_array;
258 Usage : $id_string = $obj->display_id();
259 Function: returns the display id, aka the common name of the Quality
261 The semantics of this is that it is the most likely string to be
262 used as an identifier of the quality sequence, and likely to have
263 "human" readability. The id is equivalent to the ID field of the
264 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
265 database. In fasta format, the >(\S+) is presumed to be the id,
266 though some people overload the id to embed other information.
267 Bioperl does not use any embedded information in the ID field,
268 and people are encouraged to use other mechanisms (accession
269 field for example, or extending the sequence object) to solve
270 this. Notice that $seq->id() maps to this function, mainly for
271 legacy/convience issues
278 my ($obj,$value) = @_;
279 if( defined $value) {
280 $obj->{'display_id'} = $value;
282 return $obj->{'display_id'};
289 Usage : $header = $obj->header();
290 Function: Get/set the header that the user wants printed for this
298 my ($obj,$value) = @_;
299 if( defined $value) {
300 $obj->{'header'} = $value;
302 return $obj->{'header'};
306 =head2 accession_number()
308 Title : accession_number()
309 Usage : $unique_biological_key = $obj->accession_number();
310 Function: Returns the unique biological id for a sequence, commonly
311 called the accession_number. For sequences from established
312 databases, the implementors should try to use the correct
313 accession number. Notice that primary_id() provides the unique id
314 for the implemetation, allowing multiple objects to have the same
315 accession number in a particular implementation. For sequences
316 with no accession number, this method should return "unknown".
322 sub accession_number
{
323 my( $obj, $acc ) = @_;
326 $obj->{'accession_number'} = $acc;
328 $acc = $obj->{'accession_number'};
329 $acc = 'unknown' unless defined $acc;
337 Usage : $unique_implementation_key = $obj->primary_id();
338 Function: Returns the unique id for this object in this implementation.
339 This allows implementations to manage their own object ids in a
340 way the implementaiton can control clients can expect one id to
341 map to one object. For sequences with no accession number, this
342 method should return a stringified memory location.
349 my ($obj,$value) = @_;
350 if( defined $value) {
351 $obj->{'primary_id'} = $value;
353 return $obj->{'primary_id'};
360 Usage : $qual->desc($newval);
361 $description = $qual->desc();
362 Function: Get/set description text for a qual object
364 Returns : Value of desc
365 Args : newvalue (optional)
370 my ($obj,$value) = @_;
371 if( defined $value) {
372 $obj->{'desc'} = $value;
374 return $obj->{'desc'};
381 Usage : $id = $qual->id();
382 Function: Return the ID of the quality. This should normally be (and
383 actually is in the implementation provided here) just a synonym
391 my ($self,$value) = @_;
392 if( defined $value ) {
393 return $self->display_id($value);
395 return $self->display_id();
401 Usage : $length = $qual->length();
402 Function: Return the length of the array holding the quality values.
403 Under most circumstances, this should match the number of quality
404 values but no validation is done when the PrimaryQual object is
405 constructed and non-digits could be put into this array. Is this
406 a bug? Just enough rope...
407 Returns : A scalar (the number of elements in the quality array).
414 if (ref($self->{qual
}) ne "ARRAY") {
415 $self->warn("{qual} is not an array here. Why? It appears to be ".ref($self->{qual
})."(".$self->{qual
}."). Good thing this can _never_ happen.");
417 return scalar(@
{$self->{qual
}});
420 =head2 qualat($position)
422 Title : qualat($position)
423 Usage : $quality = $obj->qualat(10);
424 Function: Return the quality value at the given location, where the
425 first value is 1 and the number is inclusive, ie 1-2 are the first
426 two bases of the sequence. Start cannot be larger than end but can
434 my ($self,$val) = @_;
435 my @qualat = @
{$self->subqual($val,$val)};
436 if (scalar(@qualat) == 1) {
440 $self->throw("AAAH! qualat provided more then one quality.");
447 Usage : $quality = $obj->to_string();
448 Function: Return a textual representation of what the object contains.
449 For this module, this function will return:
463 my ($self,$out,$result) = shift;
464 $out = "qual: ".join(',',@
{$self->qual()});
465 foreach (qw(display_id accession_number primary_id desc id)) {
466 $result = $self->$_();
467 if (!$result) { $result = "<unset>"; }
468 $out .= "$_: $result\n";
474 sub to_string_automatic
{
475 my ($self,$sub_result,$out) = shift;
476 foreach (sort keys %$self) {
477 print("Working on $_\n");
478 eval { $self->$_(); };
479 if ($@
) { $sub_result = ref($_); }
480 elsif (!($sub_result = $self->$_())) {
481 $sub_result = "<unset>";
483 if (ref($sub_result) eq "ARRAY") {
484 print("This thing ($_) is an array!\n");
485 $sub_result = join(',',@
$sub_result);
487 $out .= "$_: ".$sub_result."\n";