tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Seq / PrimaryQual.pm
blob9dc5dd3ea34a19ca7063da90ec6b0aa72444d162
1 # $Id$
3 # bioperl module for Bio::PrimaryQual
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com>
9 # Copyright Chad Matsalla
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Seq::PrimaryQual - Bioperl lightweight Quality Object
19 =head1 SYNOPSIS
21 use Bio::Seq::PrimaryQual;
23 # you can use either a space-delimited string for quality
25 my $string_quals = "10 20 30 40 50 40 30 20 10";
26 my $qualobj = Bio::Seq::PrimaryQual->new
27 ( '-qual' => $string_quals,
28 '-id' => 'QualityFragment-12',
29 '-accession_number' => 'X78121',
32 # _or_ you can use an array of quality values
34 my @q2 = split/ /,$string_quals;
35 $qualobj = Bio::Seq::PrimaryQual->new( '-qual' => \@q2,
36 '-primary_id' => 'chads primary_id',
37 '-desc' => 'chads desc',
38 '-accession_number' => 'chads accession_number',
39 '-id' => 'chads id'
42 # to get the quality values out:
44 my @quals = @{$qualobj->qual()};
46 # to give _new_ quality values
48 my $newqualstring = "50 90 1000 20 12 0 0";
49 $qualobj->qual($newqualstring);
52 =head1 DESCRIPTION
54 This module provides a mechanism for storing quality
55 values. Much more useful as part of
56 Bio::Seq::SeqWithQuality where these quality values
57 are associated with the sequence information.
59 =head1 FEEDBACK
61 =head2 Mailing Lists
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to one
65 of the Bioperl mailing lists. Your participation is much appreciated.
67 bioperl-l@bioperl.org - General discussion
68 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
70 =head2 Support
72 Please direct usage questions or support issues to the mailing list:
74 I<bioperl-l@bioperl.org>
76 rather than to the module maintainer directly. Many experienced and
77 reponsive experts will be able look at the problem and quickly
78 address it. Please include a thorough description of the problem
79 with code and data examples if at all possible.
81 =head2 Reporting Bugs
83 Report bugs to the Bioperl bug tracking system to help us keep track
84 the bugs and their resolution. Bug reports can be submitted via the
85 web:
87 http://bugzilla.open-bio.org/
89 =head1 AUTHOR - Chad Matsalla
91 Email bioinformatics@dieselwurks.com
93 =head1 APPENDIX
95 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
97 =cut
100 # Let the code begin...
102 package Bio::Seq::PrimaryQual;
103 use vars qw(%valid_type);
104 use strict;
107 use base qw(Bio::Root::Root Bio::Seq::QualI);
110 =head2 new()
112 Title : new()
113 Usage : $qual = Bio::Seq::PrimaryQual->new
114 ( -qual => '10 20 30 40 50 50 20 10',
115 -id => 'human_id',
116 -accession_number => 'AL000012',
119 Function: Returns a new Bio::Seq::PrimaryQual object from basic
120 constructors, being a string _or_ a reference to an array for the
121 sequence and strings for id and accession_number. Note that you
122 can provide an empty quality string.
123 Returns : a new Bio::Seq::PrimaryQual object
125 =cut
130 sub new {
131 my ($class, @args) = @_;
132 my $self = $class->SUPER::new(@args);
134 # default: turn ON the warnings (duh)
135 my($qual,$id,$acc,$pid,$desc,$given_id,$header) =
136 $self->_rearrange([qw(QUAL
137 DISPLAY_ID
138 ACCESSION_NUMBER
139 PRIMARY_ID
140 DESC
142 HEADER
144 @args);
145 if( defined $id && defined $given_id ) {
146 if( $id ne $given_id ) {
147 $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]");
150 if( defined $given_id ) { $id = $given_id; }
152 # note: the sequence string may be empty
153 $self->qual(defined($qual) ? $qual : []);
154 $header && $self->header($header);
155 $id && $self->display_id($id);
156 $acc && $self->accession_number($acc);
157 $pid && $self->primary_id($pid);
158 $desc && $self->desc($desc);
160 return $self;
163 =head2 qual()
165 Title : qual()
166 Usage : @quality_values = @{$obj->qual()};
167 Function: Returns the quality as a reference to an array containing the
168 quality values. The individual elements of the quality array are
169 not validated and can be any numeric value.
170 Returns : A reference to an array.
172 =cut
174 sub qual {
175 my ($self,$value) = @_;
177 if( ! defined $value || length($value) == 0 ) {
178 $self->{'qual'} ||= [];
179 } elsif( ref($value) =~ /ARRAY/i ) {
180 # if the user passed in a reference to an array
181 $self->{'qual'} = $value;
182 } elsif(! $self->validate_qual($value)){
183 $self->throw("Attempting to set the quality to [$value] which does not look healthy");
184 } else {
185 $value =~ s/^\s+//;
186 $self->{'qual'} = [split(/\s+/,$value)];
189 return $self->{'qual'};
192 =head2 validate_qual($qualstring)
194 Title : validate_qual($qualstring)
195 Usage : print("Valid.") if { &validate_qual($self,$qualities); }
196 Function: Make sure that the quality, if it has length > 0, contains at
197 least one digit. Note that quality strings are parsed into arrays
198 using split/\d+/,$quality_string, so make sure that your quality
199 scalar looks like this if you want it to be parsed properly.
200 Returns : 1 for a valid sequence (WHY? Shouldn\'t it return 0? <boggle>)
201 Args : a scalar (any scalar, why PrimarySeq author?) and a scalar
202 containing the string to validate.
204 =cut
206 sub validate_qual {
207 # how do I validate quality values?
208 # \d+\s+\d+..., I suppose
209 my ($self,$qualstr) = @_;
210 # why the CORE?? -- (Because Bio::PrimarySeqI namespace has a
211 # length method, you have to qualify
212 # which length to use)
213 return 0 if (!defined $qualstr || CORE::length($qualstr) <= 0);
214 return 1 if( $qualstr =~ /\d/);
216 return 0;
219 =head2 subqual($start,$end)
221 Title : subqual($start,$end)
222 Usage : @subset_of_quality_values = @{$obj->subqual(10,40)};
223 Function: returns the quality values from $start to $end, where the
224 first value is 1 and the number is inclusive, ie 1-2 are the
225 first two bases of the sequence. Start cannot be larger than
226 end but can be equal.
227 Returns : A reference to an array.
228 Args : a start position and an end position
230 =cut
233 sub subqual {
234 my ($self,$start,$end) = @_;
236 if( $start > $end ){
237 $self->throw("in subqual, start [$start] has to be greater than end [$end]");
240 if( $start <= 0 || $end > $self->length ) {
241 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
244 # remove one from start, and then length is end-start
246 $start--;
247 $end--;
248 my @sub_qual_array = @{$self->{qual}}[$start..$end];
250 # return substr $self->seq(), $start, ($end-$start);
251 return \@sub_qual_array;
255 =head2 display_id()
257 Title : display_id()
258 Usage : $id_string = $obj->display_id();
259 Function: returns the display id, aka the common name of the Quality
260 object.
261 The semantics of this is that it is the most likely string to be
262 used as an identifier of the quality sequence, and likely to have
263 "human" readability. The id is equivalent to the ID field of the
264 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
265 database. In fasta format, the >(\S+) is presumed to be the id,
266 though some people overload the id to embed other information.
267 Bioperl does not use any embedded information in the ID field,
268 and people are encouraged to use other mechanisms (accession
269 field for example, or extending the sequence object) to solve
270 this. Notice that $seq->id() maps to this function, mainly for
271 legacy/convience issues
272 Returns : A string
273 Args : None
275 =cut
277 sub display_id {
278 my ($obj,$value) = @_;
279 if( defined $value) {
280 $obj->{'display_id'} = $value;
282 return $obj->{'display_id'};
286 =head2 header()
288 Title : header()
289 Usage : $header = $obj->header();
290 Function: Get/set the header that the user wants printed for this
291 quality object.
292 Returns : A string
293 Args : None
295 =cut
297 sub header {
298 my ($obj,$value) = @_;
299 if( defined $value) {
300 $obj->{'header'} = $value;
302 return $obj->{'header'};
306 =head2 accession_number()
308 Title : accession_number()
309 Usage : $unique_biological_key = $obj->accession_number();
310 Function: Returns the unique biological id for a sequence, commonly
311 called the accession_number. For sequences from established
312 databases, the implementors should try to use the correct
313 accession number. Notice that primary_id() provides the unique id
314 for the implemetation, allowing multiple objects to have the same
315 accession number in a particular implementation. For sequences
316 with no accession number, this method should return "unknown".
317 Returns : A string
318 Args : None
320 =cut
322 sub accession_number {
323 my( $obj, $acc ) = @_;
325 if (defined $acc) {
326 $obj->{'accession_number'} = $acc;
327 } else {
328 $acc = $obj->{'accession_number'};
329 $acc = 'unknown' unless defined $acc;
331 return $acc;
334 =head2 primary_id()
336 Title : primary_id()
337 Usage : $unique_implementation_key = $obj->primary_id();
338 Function: Returns the unique id for this object in this implementation.
339 This allows implementations to manage their own object ids in a
340 way the implementaiton can control clients can expect one id to
341 map to one object. For sequences with no accession number, this
342 method should return a stringified memory location.
343 Returns : A string
344 Args : None
346 =cut
348 sub primary_id {
349 my ($obj,$value) = @_;
350 if( defined $value) {
351 $obj->{'primary_id'} = $value;
353 return $obj->{'primary_id'};
357 =head2 desc()
359 Title : desc()
360 Usage : $qual->desc($newval);
361 $description = $qual->desc();
362 Function: Get/set description text for a qual object
363 Example :
364 Returns : Value of desc
365 Args : newvalue (optional)
367 =cut
369 sub desc {
370 my ($obj,$value) = @_;
371 if( defined $value) {
372 $obj->{'desc'} = $value;
374 return $obj->{'desc'};
378 =head2 id()
380 Title : id()
381 Usage : $id = $qual->id();
382 Function: Return the ID of the quality. This should normally be (and
383 actually is in the implementation provided here) just a synonym
384 for display_id().
385 Returns : A string.
386 Args : None.
388 =cut
390 sub id {
391 my ($self,$value) = @_;
392 if( defined $value ) {
393 return $self->display_id($value);
395 return $self->display_id();
398 =head2 length()
400 Title : length()
401 Usage : $length = $qual->length();
402 Function: Return the length of the array holding the quality values.
403 Under most circumstances, this should match the number of quality
404 values but no validation is done when the PrimaryQual object is
405 constructed and non-digits could be put into this array. Is this
406 a bug? Just enough rope...
407 Returns : A scalar (the number of elements in the quality array).
408 Args : None.
410 =cut
412 sub length {
413 my $self = shift;
414 if (ref($self->{qual}) ne "ARRAY") {
415 $self->warn("{qual} is not an array here. Why? It appears to be ".ref($self->{qual})."(".$self->{qual}."). Good thing this can _never_ happen.");
417 return scalar(@{$self->{qual}});
420 =head2 qualat($position)
422 Title : qualat($position)
423 Usage : $quality = $obj->qualat(10);
424 Function: Return the quality value at the given location, where the
425 first value is 1 and the number is inclusive, ie 1-2 are the first
426 two bases of the sequence. Start cannot be larger than end but can
427 be equal.
428 Returns : A scalar.
429 Args : A position.
431 =cut
433 sub qualat {
434 my ($self,$val) = @_;
435 my @qualat = @{$self->subqual($val,$val)};
436 if (scalar(@qualat) == 1) {
437 return $qualat[0];
439 else {
440 $self->throw("AAAH! qualat provided more then one quality.");
444 =head2 to_string()
446 Title : to_string()
447 Usage : $quality = $obj->to_string();
448 Function: Return a textual representation of what the object contains.
449 For this module, this function will return:
450 qual
451 display_id
452 accession_number
453 primary_id
454 desc
456 length
457 Returns : A scalar.
458 Args : None.
460 =cut
462 sub to_string {
463 my ($self,$out,$result) = shift;
464 $out = "qual: ".join(',',@{$self->qual()});
465 foreach (qw(display_id accession_number primary_id desc id)) {
466 $result = $self->$_();
467 if (!$result) { $result = "<unset>"; }
468 $out .= "$_: $result\n";
470 return $out;
474 sub to_string_automatic {
475 my ($self,$sub_result,$out) = shift;
476 foreach (sort keys %$self) {
477 print("Working on $_\n");
478 eval { $self->$_(); };
479 if ($@) { $sub_result = ref($_); }
480 elsif (!($sub_result = $self->$_())) {
481 $sub_result = "<unset>";
483 if (ref($sub_result) eq "ARRAY") {
484 print("This thing ($_) is an array!\n");
485 $sub_result = join(',',@$sub_result);
487 $out .= "$_: ".$sub_result."\n";
489 return $out;