* sync with trunk
[bioperl-live.git] / Bio / LiveSeq / SeqI.pm
blob9179f71bb06a2dd19c9e39d448fd60f325f82d38
1 # $Id$
3 # bioperl module for Bio::LiveSeq::SeqI
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
7 # Copyright Joseph Insana
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::LiveSeq::SeqI - Abstract sequence interface class for LiveSeq
17 =head1 SYNOPSIS
19 # documentation needed
21 =head1 DESCRIPTION
23 This class implements BioPerl PrimarySeqI interface for Live Seq objects.
25 One of the main difference in LiveSequence compared to traditional
26 "string" sequences is that coordinate systems are flexible. Typically
27 gene nucleotide numbering starts from 1 at the first character of the
28 initiator codon (A in ATG). This means that negative positions are
29 possible and common!
31 Secondly, the sequence manipulation methods do not return a new
32 sequence object but change the current object. The current status can
33 be written out to BioPerl sequence objects.
35 =head1 FEEDBACK
37 =head2 Mailing Lists
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to one
41 of the Bioperl mailing lists. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 =head2 Reporting Bugs
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 the bugs and their resolution. Bug reports can be submitted via the
50 web:
52 http://bugzilla.open-bio.org/
54 =head1 AUTHOR - Joseph A.L. Insana
56 Email: Insana@ebi.ac.uk, jinsana@gmx.net
58 =head1 APPENDIX
60 The rest of the documentation details each of the object
61 methods. Internal methods are usually preceded with a _
63 Some note on the terminology/notation of method names:
64 label: a unique pointer to a single nucleotide
65 position: the position of a nucleotide according to a particular coordinate
66 system (e.g. counting downstream from a particular label taken as
67 number 1)
68 base: the one letter code for a nucleotide (i.e.: "a" "t" "c" "g")
70 a base is the "value" that an "element" of a "chain" can assume
71 (see documentation on the Chain datastructure if interested)
73 =cut
76 # Let the code begin...
78 package Bio::LiveSeq::SeqI;
79 use strict;
80 use Bio::Tools::CodonTable; # for the translate() function
82 use base qw(Bio::Root::Root Bio::LiveSeq::ChainI Bio::PrimarySeqI);
84 =head2 seq
86 Title : seq
87 Usage : $string = $obj->seq()
88 Function: Returns the complete sequence of an object as a string of letters.
89 Suggested cases are upper case for proteins and lower case for
90 DNA sequence (IUPAC standard),
91 Returns : a string
94 =cut
96 sub seq {
97 my $self = shift;
98 my ($start,$end) = ($self->start(),$self->end());
99 if ($self->strand() == 1) {
100 return $self->{'seq'}->down_chain2string($start,undef,$end);
101 } else { # reverse strand
102 my $str = $self->{'seq'}->up_chain2string($start,undef,$end);
103 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
104 return $str;
108 =head2 all_labels
110 Title : all_labels
111 Usage : @labels = $obj->all_labels()
112 Function: all the labels of every nucleotide an object is composed of
113 Returns : an array of labels
114 Args : none
116 =cut
118 sub all_labels {
119 my $self = shift;
120 my ($start,$end) = ($self->start(),$self->end());
121 my $labels;
122 if ($self->strand() == 1) {
123 $labels=$self->{'seq'}->down_labels($start,$end);
124 } else {
125 $labels=$self->{'seq'}->up_labels($start,$end);
127 return (@{$labels});
130 =head2 labelsubseq
132 Title : labelsubseq
133 Usage : $dna->labelsubseq();
134 : $dna->labelsubseq($startlabel);
135 : $dna->labelsubseq($startlabel,$length);
136 : $dna->labelsubseq($startlabel,undef,$endlabel);
137 e.g. : $dna->labelsubseq(4,undef,8);
138 Function: prints the sequence as string. The difference between labelsubseq
139 and normal subseq is that it uses /labels/ as arguments, instead
140 than positions. This allows for faster and more efficient lookup,
141 skipping the (usually) lengthy conversion of positions into labels.
142 This is expecially useful for manipulating with high power
143 LiveSeq objects, knowing the labels and exploiting their
144 usefulness.
145 Returns : a string
146 Errorcode -1
147 Args : without arguments it returns the entire sequence
148 with a startlabel it returns the sequence downstream that label
149 if a length is specified, it returns only that number of bases
150 if an endlabel is specified, it overrides the length argument
151 and prints instead up to that label (included)
152 Defaults: $startlabel defaults to the beginning of the entire sequence
153 $endlabel defaults to the end of the entire sequence
155 =cut
157 # NOTE: unsecuremode is to be used /ONLY/ if sure of the start and end labels, expecially that they follow each other in the correct order!!!!
159 sub labelsubseq {
160 my ($self,$start,$length,$end,$unsecuremode) = @_;
161 if (defined $unsecuremode && $unsecuremode eq "unsecuremoderequested")
162 { # to skip security checks (faster)
163 unless ($start) {
164 $start=$self->start;
166 if ($end) {
167 if ($end == $start) {
168 $length=1;
169 undef $end;
170 } else {
171 undef $length;
173 } else {
174 unless ($length) {
175 $end=$self->end;
178 } else {
179 if ($start) {
180 unless ($self->{'seq'}->valid($start)) {
181 $self->warn("Start label not valid"); return (-1);
184 if ($end) {
185 if ($end == $start) {
186 $length=1;
187 undef $end;
188 } else {
189 unless ($self->{'seq'}->valid($end)) {
190 $self->warn("End label not valid"); return (-1);
192 unless ($self->follows($start,$end) == 1) {
193 $self->warn("End label does not follow Start label!"); return (-1);
195 undef $length;
199 if ($self->strand() == 1) {
200 return $self->{'seq'}->down_chain2string($start,$length,$end);
201 } else { # reverse strand
202 my $str = $self->{'seq'}->up_chain2string($start,$length,$end);
203 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
204 return $str;
208 =head2 subseq
210 Title : subseq
211 Usage : $substring = $obj->subseq(10,40);
212 : $substring = $obj->subseq(10,undef,4);
213 Function: returns the subseq from start to end, where the first base
214 is 1 and the number is inclusive, ie 1-2 are the first two
215 bases of the sequence
217 Start cannot be larger than end but can be equal.
219 Allows for negative numbers $obj->subseq(-10,-1). By
220 definition, there is no 0!
221 -5 -1 1 5
222 gctagcgcccaac atggctcgctg
224 This allows to retrieve sequences upstream from given position.
226 The precedence is from left to right: if END is given LENGTH is
227 ignored.
229 Examples: $obj->subseq(-10,undef,10) returns 10 elements before position 1
230 $obj->subseq(4,8) returns elements from the 4th to the 8th, inclusive
232 Returns : a string
233 Errorcode: -1
234 Args : start, integer, defaults to start of the sequence
235 end, integer, '' or undef, defaults to end of the sequence
236 length, integer, '' or undef
237 an optional strand (1 or -1) 4th argument
238 if strand argument is not given, it will default to the object
239 argment. This argument is useful when a call is issued from a child
240 of a parent object containing the subseq method
242 =cut
245 # check the fact about reverse strand!
246 # is it feasible? Is it correct? Should we do it? How about exons? Does it
247 # work when you ask subseq of an exon?
248 # eliminated now (Mon night)
249 sub subseq {
250 ##my ($self,$pos1,$pos2,$length,$strand) = @_;
251 my ($self,$pos1,$pos2,$length,$strand) = @_;
252 ##unless (defined ($strand)) { # if optional [strand] argument not given
253 ## $strand=$self->strand;
255 $strand=$self->strand;
256 my ($str,$startlabel,$endlabel);
257 if (defined ($length)) {
258 if ($length < 1) {
259 $self->warn("No sense asking for a subseq of length < 1");
260 return (-1);
263 unless (defined ($pos1)) {
264 #print "\n##### DEBUG pos1 not defined\n";
265 $startlabel=$self->start;
266 } else {
267 if ($pos1 == 0) { # if position = 0 complain
268 $self->warn("Position cannot be 0!"); return (-1);
270 ##if ($strand == 1) { # CHECK THIS!
271 if ((defined ($pos2))&&($pos1>$pos2)) {
272 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
274 ##} else { # CHECK THIS!
275 ## if ((defined ($pos2))&&($pos1<$pos2)) {
276 ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!)"; return (-1);
277 ## }
279 $startlabel=$self->label($pos1);
280 if ($startlabel < 1) {
281 $self->warn("position $pos1 not valid as start of subseq!"); return (-1);
284 unless (defined ($pos2)) {
285 #print "\n##### pos2 not defined\n";
286 unless (defined ($length)) {
287 $endlabel=$self->end;
289 } else {
290 if ($pos2 == 0) { # if position = 0 complain
291 $self->warn("Position cannot be 0!"); return (-1);
293 undef $length;
294 ##if ($strand == 1) { # CHECK THIS!
295 if ((defined ($pos1))&&($pos1>$pos2)) {
296 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
298 ##} else { # CHECK THIS!
299 ## if ((defined ($pos1))&&($pos1<$pos2)) {
300 ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!"); return (-1);
301 ## }
303 $endlabel=$self->label($pos2);
304 if ($endlabel < 1) {
305 $self->warn("position $pos2 not valid as end of subseq!"); return (-1);
308 #print "\n ####DEBUG: start $startlabel end $endlabel length $length strand $strand\n";
310 if ($strand == 1) {
311 $str = $self->{'seq'}->down_chain2string($startlabel,$length,$endlabel);
312 } else { # reverse strand
313 $str = $self->{'seq'}->up_chain2string($startlabel,$length,$endlabel);
314 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
316 return $str;
319 =head2 length
321 Title : length
322 Usage : $seq->length();
323 Function: returns the number of nucleotides (or the number of aminoacids)
324 in the entire sequence
325 Returns : an integer
326 Errorcode -1
327 Args : none
329 =cut
331 sub length {
332 my $self=shift;
333 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
334 if ($strand == 1) {
335 return $self->{'seq'}->down_subchain_length($start,$end);
336 } else {
337 return $self->{'seq'}->up_subchain_length($start,$end);
341 =head2 display_id
343 Title : display_id
344 Usage : $id_string = $obj->display_id();
345 Function: returns the display id, alias the common name of the object
347 The semantics of this is that it is the most likely string
348 to be used as an identifier of the sequence, and likely to
349 have "human" readability. The id is equivalent to the ID
350 field of the GenBank/EMBL databanks and the id field of the
351 Swissprot/sptrembl database. In fasta format, the >(\S+) is
352 presumed to be the id, though some people overload the id
353 to embed other information.
355 See also: accession_number
356 Returns : a string
357 Args : none
359 =cut
361 sub display_id {
362 my ($self,$value) = @_;
363 if(defined $value) {
364 $self->{'display_id'} = $value;
366 return $self->{'display_id'};
370 =head2 accession_number
372 Title : accession_number
373 Usage : $unique_biological_key = $obj->accession_number;
374 Function: Returns the unique biological id for a sequence, commonly
375 called the accession_number.
376 Notice that primary_id() provides the unique id for the
377 implemetation, allowing multiple objects to have the same accession
378 number in a particular implementation.
380 For objects with no accession_number this method returns "unknown".
381 Returns : a string
382 Args : none
384 =cut
386 sub accession_number {
387 my ($self,$value) = @_;
388 if (defined $value) {
389 $self->{'accession_number'} = $value;
391 unless (exists $self->{'accession_number'}) {
392 return "unknown";
393 } else {
394 return $self->{'accession_number'};
398 =head2 primary_id
400 Title : primary_id
401 Usage : $unique_implementation_key = $obj->primary_id;
402 Function: Returns the unique id for this object in this
403 implementation. This allows implementations to manage their own
404 object ids in a way the implementation can control. Clients can
405 expect one id to map to one object.
407 For sequences with no primary_id, this method returns
408 a stringified memory location.
410 Returns : A string
411 Args : None
413 =cut
416 sub primary_id {
417 my ($self,$value) = @_;
418 if(defined $value) {
419 $self->{'primary_id'} = $value;
421 unless (exists $self->{'primary_id'}) {
422 return "$self";
423 } else {
424 return $self->{'primary_id'};
428 =head2 change
430 Title : change
431 Usage : $substring = $obj->change('AA', 10);
432 Function: changes, modifies, mutates the LiveSequence
433 Examples:
434 $obj->change('', 10); delete nucleotide #10
435 $obj->change('', 10, 2); delete two nucleotides starting from #10
436 $obj->change('G', 10); change nuc #10 to 'G'
437 $obj->change('GA', 10, 4); replace #10 and 3 following with 'GA'
438 $obj->change('GA', 10, 2)); is same as $obj->change('GA', 10);
439 $obj->change('GA', 10, 0 ); insert 'GA' before nucleotide at #10
440 $obj->change('GA', 10, 1); GA inserted before #10, #10 deleted
441 $obj->change('GATC', 10, 2); GATC inserted before #10, #10&#11 deleted
442 $obj->change('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted
445 Returns : a string of deleted bases (if any) or 1 (everything OK)
446 Errorcode: -1
447 Args : seq, string, or '' ('' = undef = 0 = deletion)
448 start, integer
449 length, integer (optional)
451 =cut
453 sub change {
454 &positionchange;
457 =head2 positionchange
459 Title : positionchange
460 Function: Exactly like change. I.e. change() defaults to positionchange()
462 =cut
464 sub positionchange {
465 my ($self,$newseq,$position,$length)=@_;
466 unless ($position) {
467 $self->warn("Position not given or position 0");
468 return (-1);
470 my $label=$self->label($position);
471 unless ($label > 0) { # label not found or error
472 $self->warn("No valid label found at that position!");
473 return (-1);
475 return ($self->labelchange($newseq,$label,$length));
478 =head2 labelchange
480 Title : labelchange
481 Function: Exactly like change but uses a /label/ instead than a position
482 as second argument. This allows for multiple changes in a LiveSeq
483 without the burden of recomputing positions. I.e. for a multiple
484 change in two different points of the LiveSeq, the approach would
485 be the following: fetch the correct labels out of the two different
486 positions (method: label($position)) and then use the labelchange()
487 method to modify the sequence using those labels instead than
488 relying on the positions (that would have modified after the
489 first change).
491 =cut
493 sub labelchange {
494 my ($self,$newseq,$label,$length)=@_;
495 unless ($self->valid($label)) {
496 if ($self->{'seq'}->valid($label)) {
497 #$self->warn("Label \'$label\' not valid for executing a LiveSeq change for the object asked but it's ok for DNAlevel change, reverting to that");
498 shift @_;
499 return($self->{'seq'}->labelchange(@_));
500 } else {
501 $self->warn("Label \'$label\' not valid for executing a LiveSeq change");
502 return (-1);
505 unless ($newseq) { # it means this is a simple deletion
506 if (defined($length)) {
507 unless ($length >= 0) {
508 $self->warn("No sense having length < 0 in a deletion");
509 return (-1);
511 } else {
512 $self->warn("Length not defined for deletion!");
513 return (-1);
515 return $self->_delete($label,$length);
517 my $newseqlength=CORE::length($newseq);
518 if (defined($length)) {
519 unless ($length >= 0) {
520 $self->warn("No sense having length < 0 in a change()");
521 return (-1);
523 } else {
524 $length=$newseqlength; # defaults to pointmutation(s)
526 if ($length == 0) { # it means this is a simple insertion, length def&==0
527 my ($insertbegin,$insertend)=$self->_praeinsert($label,$newseq);
528 if ($insertbegin == -1) {
529 return (-1);
530 } else {
531 return (1);
534 if ($newseqlength == $length) { # it means this is simple pointmutation(s)
535 return $self->_mutate($label,$newseq,$length);
537 # if we arrived here then change is complex mixture
538 my $strand=$self->strand();
539 my $afterendlabel=$self->label($length+1,$label,$strand); # get the label at $length+1 positions after $label
540 unless ($afterendlabel > 0) { # label not found or error
541 $self->warn("No valid afterendlabel found for executing the complex mutation!");
542 return (-1);
544 my $deleted=$self->_delete($label,$length); # first delete length nucs
545 if ($deleted == -1) { # if errors
546 return (-1);
547 } else { # then insert the newsequence
548 my ($insertbegin,$insertend)=$self->_praeinsert($afterendlabel,$newseq);
549 if ($insertbegin == -1) {
550 return (-1);
551 } else {
552 return (1);
557 # internal methods for change()
559 # arguments: label for beginning of deletion, new sequence to insert
560 # returns: labels of beginning and end of the inserted sequence
561 # errorcode: -1
562 sub _praeinsert {
563 my ($self,$label,$newseq)=@_;
564 my ($insertbegin,$insertend);
565 my $strand=$self->strand();
566 if ($strand == 1) {
567 ($insertbegin,$insertend)=($self->{'seq'}->praeinsert_string($newseq,$label));
568 } else { # since it's reverse strand and we insert in forward direction....
569 $newseq=reverse($newseq);
570 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
571 ($insertend,$insertbegin)=($self->{'seq'}->postinsert_string($newseq,$label));
573 if (($insertbegin==0)||($insertend==0)) {
574 $self->warn("Some error occurred while inserting!");
575 return (-1);
576 } else {
577 return ($insertbegin,$insertend);
581 # arguments: label for beginning of deletion, length of deletion
582 # returns: string of deleted bases
583 # errorcode: -1
584 sub _delete {
585 my ($self,$label,$length)=@_;
586 my $strand=$self->strand();
587 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
588 unless ($endlabel > 0) { # label not found or error
589 $self->warn("No valid endlabel found for executing the deletion!");
590 return (-1);
592 # this is important in Transcript to fix exon structure
593 $self->_deletecheck($label,$endlabel);
594 my $deletedseq;
595 if ($strand == 1) {
596 $deletedseq=$self->{'seq'}->splice_chain($label,undef,$endlabel);
597 } else {
598 $deletedseq=$self->{'seq'}->splice_chain($endlabel,undef,$label);
599 $deletedseq=reverse($deletedseq); # because we are on reverse strand and we cut anyway
600 # in forward direction
601 $deletedseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
603 return ($deletedseq);
606 # empty function, overridden in Transcript, not useful here
607 sub _deletecheck {
610 # arguments: label for beginning of mutation, newsequence, number of mutations
611 # returns: 1 all OK
612 # errorcode: -1
613 sub _mutate {
614 my ($self,$label,$newseq,$length)=@_; # length is equal to length(newseq)
615 my ($i,$base,$nextlabel);
616 my @labels; # array of labels
617 my $strand=$self->strand();
618 if ($length == 1) { # special cases first
619 @labels=($label);
620 } else {
621 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
622 unless ($endlabel > 0) { # label not found or error
623 $self->warn("No valid endlabel found for executing the mutation!");
624 return (-1);
626 if ($length == 2) { # another special case
627 @labels=($label,$endlabel);
628 } else { # more than 3 bases changed
629 # this wouldn't work for Transcript
630 #my $labelsarrayref;
631 #if ($strand == 1) {
632 #$labelsarrayref=$self->{'seq'}->down_labels($label,$endlabel);
633 #} else {
634 #$labelsarrayref=$self->{'seq'}->up_labels($label,$endlabel);
636 #@labels=@{$labelsarrayref};
637 #if ($length != scalar(@labels)) { # not enough labels returned
638 #$self->warn("Not enough valid labels found for executing the mutation!");
639 #return (-1);
642 # this should be more general
643 @labels=($label); # put the first one
644 while ($label != $endlabel) {
645 $nextlabel=$self->label(2,$label,$strand); # retrieve the next label
646 push (@labels,$nextlabel);
647 $label=$nextlabel; # move on reference
651 if ($strand == -1) { # only for reverse strand
652 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
654 my $errorcheck; # if not equal to $length after summing for all changes, error did occurr
655 $i = 0;
656 foreach $base (split(//,$newseq)) {
657 $errorcheck += $self->{'seq'}->set_value_at_label($base,$labels[$i]);
658 $i++;
660 if ($errorcheck != $length) {
661 $self->warn("Some error occurred while mutating!");
662 return (-1);
663 } else {
664 return (1);
668 =head2 valid
670 Title : valid
671 Usage : $boolean = $obj->valid($label)
672 Function: tests if a label exists inside the object
673 Returns : boolean
674 Args : label
676 =cut
678 # argument: label
679 # returns: 1 YES 0 NO
680 sub valid {
681 my ($self,$label)=@_;
682 my $checkme;
683 my @labels=$self->all_labels;
684 foreach $checkme (@labels) {
685 if ($label == $checkme) {
686 return (1); # found
689 return (0); # not found
693 =head2 start
695 Title : start
696 Usage : $startlabel=$obj->start()
697 Function: returns the label of the first nucleotide of the object (exon, CDS)
698 Returns : label
699 Args : none
701 =cut
703 sub start {
704 my ($self) = @_;
705 return $self->{'start'}; # common for all classes BUT DNA (which redefines it) and Transcript (that takes the information from the Exons)
708 =head2 end
710 Title : end
711 Usage : $endlabel=$obj->end()
712 Function: returns the label of the last nucleotide of the object (exon, CDS)
713 Returns : label
714 Args : none
716 =cut
718 sub end {
719 my ($self) = @_;
720 return $self->{'end'};
723 =head2 strand
725 Title : strand
726 Usage : $strand=$obj->strand()
727 $obj->strand($strand)
728 Function: gets or sets strand information, being 1 or -1 (forward or reverse)
729 Returns : -1 or 1
730 Args : none OR -1 or 1
732 =cut
734 sub strand {
735 my ($self,$strand) = @_;
736 if ($strand) {
737 if (($strand != 1)&&($strand != -1)) {
738 $self->warn("strand information not changed because strand identifier not valid");
739 } else {
740 $self->{'strand'} = $strand;
743 return $self->{'strand'};
746 =head2 alphabet
748 Title : alphabet
749 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
750 Function: Returns the type of sequence being one of
751 'dna', 'rna' or 'protein'. This is case sensitive.
753 Returns : a string either 'dna','rna','protein'.
754 Args : none
756 =cut
759 sub alphabet {
760 my %valid_type = map {$_, 1} qw( dna rna protein );
761 my ($self,$value) = @_;
762 if (defined $value) {
763 $value = 'dna' if $value =~ /dna/i;
764 $value = 'rna' if $value =~ /rna/i;
765 unless ( $valid_type{$value} ) {
766 $self->warn("Molecular type '$value' is not a valid type");
768 $self->{'alphabet'} = $value;
770 return $self->{'alphabet'};
773 =head2 coordinate_start
775 Title : coordinate_start
776 Usage : $coordstartlabel=$obj->coordinate_start()
777 : $coordstartlabel=$obj->coordinate_start($label)
778 Function: returns and optionally sets the first label of the coordinate
779 system used
780 For some objects only labels inside the object or in frame (for
781 Translation objects) will be allowed to get set as coordinate start
783 Returns : label. It returns 0 if label not found.
784 Errorcode -1
785 Args : an optional reference $label that is position 1
787 =cut
790 sub coordinate_start {
791 my ($self,$label) = @_;
792 if ($label) {
793 if ($self->valid($label)) {
794 $self->{'coordinate_start'} = $label;
795 } else {
796 $self->warn("The label you are trying to set as coordinate_start is not valid for this object");
799 my $coord_start = $self->{'coordinate_start'};
800 if ($coord_start) {
801 return $coord_start;
802 } else {
803 return $self->start();
807 =head2 label
809 Title : label
810 Usage : $seq->label($position)
811 : $seq->label($position,$firstlabel)
812 Examples: $nextlabel=$seq->label(2,$label) -> retrieves the following label
813 : $prevlabel=$seq->label(-1,$label) -> retrieves the preceding label
815 Function: returns the label of the nucleotide at $position from current
816 coordinate start
817 Returns : a label. It returns 0 if label not found.
818 Errorcode -1
819 Args : a position,
820 an optional reference $firstlabel that is to be used as position 1
821 an optional strand (1 or -1) argument
822 if strand argument is not given, it will default to the object
823 argument. This argument is useful when a call is issued from a child
824 of a parent object containing the subseq method
826 =cut
829 sub label {
830 my ($self,$position,$firstlabel,$strand)=@_;
831 my $label;
832 unless (defined ($firstlabel)) {
833 $firstlabel=$self->coordinate_start;
835 unless ($position) { # if position = 0 complain ?
836 $self->warn("Position not given or position 0");
837 return (-1);
839 unless (defined ($strand)) { # if optional [strand] argument not given
840 $strand=$self->strand;
842 if ($strand == 1) {
843 if ($position > 0) {
844 $label=$self->{'seq'}->down_get_label_at_pos($position,$firstlabel)
845 } else { # if < 0
846 $label=$self->{'seq'}->up_get_label_at_pos(1 - $position,$firstlabel)
848 } else {
849 if ($position > 0) {
850 $label=$self->{'seq'}->up_get_label_at_pos($position,$firstlabel)
851 } else { # if < 0
852 $label=$self->{'seq'}->down_get_label_at_pos(1 - $position,$firstlabel)
855 return $label;
859 =head2 position
861 Title : position
862 Usage : $seq->position($label)
863 : $seq->position($label,$firstlabel)
864 Function: returns the position of nucleotide at $label
865 Returns : the position of the label from current coordinate start
866 Errorcode 0
867 Args : a label pointing to a certain nucleotide (e.g. start of exon)
868 an optional "firstlabel" as reference to count from
869 an optional strand (1 or -1) argument
870 if strand argument is not given, it will default to the object
871 argument. This argument is useful when a call is issued from a child
872 of a parent object containing the subseq method
874 =cut
877 sub position {
878 my ($self,$label,$firstlabel,$strand)=@_;
879 unless (defined ($strand)) { # if optional [strand] argument not given
880 $strand=$self->strand;
882 unless (defined ($firstlabel)) {
883 $firstlabel=$self->coordinate_start;
885 unless ($self->valid($label)) {
886 $self->warn("label not valid");
887 return (0);
889 if ($firstlabel == $label) {
890 return (1);
892 my ($coordpos,$position0,$position);
893 $position0=$self->{'seq'}->down_get_pos_of_label($label);
894 $coordpos=$self->{'seq'}->down_get_pos_of_label($firstlabel);
895 $position=$position0-$coordpos+1;
896 if ($position <= 0) {
897 $position--;
899 if ($strand == -1) {
900 #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",1-$position;
901 return (1-$position);
902 } else {
903 #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",$position;
904 return ($position);
908 =head2 follows
910 Title : follows
911 Usage : $seq->follows($firstlabel,$secondlabel)
912 : $seq->follows($firstlabel,$secondlabel,$strand)
913 Function: checks if SECONDlabel follows FIRSTlabel, undependent of the strand
914 i.e. it checks downstream for forward strand and
915 upstream for reverse strand
916 Returns : 1 or 0
917 Errorcode -1
918 Args : two labels
919 an optional strand (1 or -1) argument
920 if strand argument is not given, it will default to the object
921 argument. This argument is useful when a call is issued from a child
922 of a parent object containing the subseq method
924 =cut
927 # wraparound to is_downstream and is_upstream that chooses the correct one
928 # depending on the strand
929 sub follows {
930 my ($self,$firstlabel,$secondlabel,$strand)=@_;
931 unless (defined ($strand)) { # if optional [strand] argument not given
932 $strand=$self->strand;
934 if ($strand == 1) {
935 return ($self->{'seq'}->is_downstream($firstlabel,$secondlabel));
936 } else {
937 return ($self->{'seq'}->is_upstream($firstlabel,$secondlabel));
941 #=head2 translate
943 # Title : translate
944 # Usage : $protein_seq = $obj->translate
945 # Function: Provides the translation of the DNA sequence
946 # using full IUPAC ambiguities in DNA/RNA and amino acid codes.
948 # The resulting translation is identical to EMBL/TREMBL database
949 # translations.
951 # Returns : a string
952 # Args : character for terminator (optional) defaults to '*'
953 # character for unknown amino acid (optional) defaults to 'X'
954 # frame (optional) valid values 0, 1, 3, defaults to 0
955 # codon table id (optional) defaults to 1
957 #=cut
959 #sub translate {
960 # my ($self) = shift;
961 # return ($self->translate_string($self->seq,@_));
964 #=head2 translate_string
966 # Title : translate_string
967 # Usage : $protein_seq = $obj->translate_string("attcgtgttgatcgatta");
968 # Function: Like translate, but can be used to translate subsequences after
969 # having retrieved them as string.
970 # Args : 1st argument is a string. Optional following arguments: like in
971 # the translate method
973 #=cut
976 #sub translate_string {
977 # my($self) = shift;
978 # my($seq) = shift;
979 # my($stop, $unknown, $frame, $tableid) = @_;
980 # my($i, $len, $output) = (0,0,'');
981 # my($codon) = "";
982 # my $aa;
985 # ## User can pass in symbol for stop and unknown codons
986 # unless(defined($stop) and $stop ne '') { $stop = "*"; }
987 # unless(defined($unknown) and $unknown ne '') { $unknown = "X"; }
988 # unless(defined($frame) and $frame ne '') { $frame = 0; }
990 # ## the codon table ID
991 # if ($self->translation_table) {
992 # $tableid = $self->translation_table;
994 # unless(defined($tableid) and $tableid ne '') { $tableid = 1; }
996 # ##Error if monomer is "Amino"
997 # $self->warn("Can't translate an amino acid sequence.")
998 # if (defined $self->alphabet && $self->alphabet eq 'protein');
1000 # ##Error if frame is not 0, 1 or 2
1001 # $self->warn("Valid values for frame are 0, 1, 2, not [$frame].")
1002 # unless ($frame == 0 or $frame == 1 or $frame == 2);
1004 # #thows a warning if ID is invalid
1005 # my $codonTable = Bio::Tools::CodonTable->new( -id => $tableid);
1007 # # deal with frame offset.
1008 # if( $frame ) {
1009 # $seq = substr ($seq,$frame);
1012 # for $codon ( grep { CORE::length == 3 } split(/(.{3})/, $seq) ) {
1013 # my $aa = $codonTable->translate($codon);
1014 # if ($aa eq '*') {
1015 # $output .= $stop;
1017 # elsif ($aa eq 'X') {
1018 # $output .= $unknown;
1020 # else {
1021 # $output .= $aa ;
1022 # }
1024 # #if( substr($output,-1,1) eq $stop ) {
1025 # # chop $output;
1026 # #}
1028 # return ($output);
1031 =head2 gene
1033 Title : gene
1034 Usage : my $gene=$obj->gene;
1035 Function: Gets or sets the reference to the LiveSeq::Gene object.
1036 Objects that are features of a LiveSeq Gene will have this
1037 attribute set automatically.
1039 Returns : reference to an object of class Gene
1040 Note : if Gene object is not set, this method will return 0;
1041 Args : none or reference to object of class Bio::LiveSeq::Gene
1043 =cut
1045 sub gene {
1046 my ($self,$value) = @_;
1047 if (defined $value) {
1048 $self->{'gene'} = $value;
1050 unless (exists $self->{'gene'}) {
1051 return (0);
1052 } else {
1053 return $self->{'gene'};
1057 =head2 obj_valid
1059 Title : obj_valid
1060 Usage : if ($obj->obj_valid) {do something;}
1061 Function: Checks if start and end labels are still valid for the ojbect,
1062 i.e. tests if the LiveSeq object is still valid
1063 Returns : boolean
1064 Args : none
1066 =cut
1068 sub obj_valid {
1069 my $self=shift;
1070 unless (($self->{'seq'}->valid($self->start()))&&($self->{'seq'}->valid($self->end()))) {
1071 return (0);
1073 return (1);
1076 =head2 name
1078 Title : name
1079 Usage : $name = $obj->name;
1080 : $name = $obj->name("ABCD");
1081 Function: Returns or sets the name of the object.
1082 If there is no name, it will return "unknown";
1083 Returns : A string
1084 Args : None
1086 =cut
1088 sub name {
1089 my ($self,$value) = @_;
1090 if (defined $value) {
1091 $self->{'name'} = $value;
1093 unless (exists $self->{'name'}) {
1094 return "unknown";
1095 } else {
1096 return $self->{'name'};
1100 =head2 desc
1102 Title : desc
1103 Usage : $desc = $obj->desc;
1104 : $desc = $obj->desc("ABCD");
1105 Function: Returns or sets the description of the object.
1106 If there is no description, it will return "unknown";
1107 Returns : A string
1108 Args : None
1110 =cut
1112 sub desc {
1113 my ($self,$value) = @_;
1114 if (defined $value) {
1115 $self->{'desc'} = $value;
1117 unless (exists $self->{'desc'}) {
1118 return "unknown";
1119 } else {
1120 return $self->{'desc'};
1124 =head2 source
1126 Title : source
1127 Usage : $name = $obj->source;
1128 : $name = $obj->source("Homo sapiens");
1129 Function: Returns or sets the organism that is source of the object.
1130 If there is no source, it will return "unknown";
1131 Returns : A string
1132 Args : None
1134 =cut
1136 sub source {
1137 my ($self,$value) = @_;
1138 if (defined $value) {
1139 $self->{'source'} = $value;
1141 unless (exists $self->{'source'}) {
1142 return "unknown";
1143 } else {
1144 return $self->{'source'};
1148 sub delete_Obj {
1149 my $self = shift;
1150 my @values= values %{$self};
1151 my @keys= keys %{$self};
1153 foreach my $key ( @keys ) {
1154 delete $self->{$key};
1156 foreach my $value ( @values ) {
1157 if (index(ref($value),"LiveSeq") != -1) { # object case
1158 eval {
1159 # delete $self->{$value};
1160 $value->delete_Obj;
1162 } elsif (index(ref($value),"ARRAY") != -1) { # array case
1163 my @array=@{$value};
1164 my $element;
1165 foreach $element (@array) {
1166 eval {
1167 $element->delete_Obj;
1170 } elsif (index(ref($value),"HASH") != -1) { # object case
1171 my %hash=%{$value};
1172 my $element;
1173 foreach $element (%hash) {
1174 eval {
1175 $element->delete_Obj;
1180 return(1);