Merge branch 'master' of github.com:bioperl/bioperl-live
[bioperl-live.git] / Bio / LiveSeq / SeqI.pm
blob0197f73797fcf8af19ae963fde3b1aeffcb683b6
2 # bioperl module for Bio::LiveSeq::SeqI
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
8 # Copyright Joseph Insana
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::LiveSeq::SeqI - Abstract sequence interface class for LiveSeq
18 =head1 SYNOPSIS
20 # documentation needed
22 =head1 DESCRIPTION
24 This class implements BioPerl PrimarySeqI interface for Live Seq objects.
26 One of the main difference in LiveSequence compared to traditional
27 "string" sequences is that coordinate systems are flexible. Typically
28 gene nucleotide numbering starts from 1 at the first character of the
29 initiator codon (A in ATG). This means that negative positions are
30 possible and common!
32 Secondly, the sequence manipulation methods do not return a new
33 sequence object but change the current object. The current status can
34 be written out to BioPerl sequence objects.
36 =head1 FEEDBACK
38 =head2 Mailing Lists
40 User feedback is an integral part of the evolution of this and other
41 Bioperl modules. Send your comments and suggestions preferably to one
42 of the Bioperl mailing lists. Your participation is much appreciated.
44 bioperl-l@bioperl.org - General discussion
45 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47 =head2 Support
49 Please direct usage questions or support issues to the mailing list:
51 I<bioperl-l@bioperl.org>
53 rather than to the module maintainer directly. Many experienced and
54 reponsive experts will be able look at the problem and quickly
55 address it. Please include a thorough description of the problem
56 with code and data examples if at all possible.
58 =head2 Reporting Bugs
60 Report bugs to the Bioperl bug tracking system to help us keep track
61 the bugs and their resolution. Bug reports can be submitted via the
62 web:
64 https://github.com/bioperl/bioperl-live/issues
66 =head1 AUTHOR - Joseph A.L. Insana
68 Email: Insana@ebi.ac.uk, jinsana@gmx.net
70 =head1 APPENDIX
72 The rest of the documentation details each of the object
73 methods. Internal methods are usually preceded with a _
75 Some note on the terminology/notation of method names:
76 label: a unique pointer to a single nucleotide
77 position: the position of a nucleotide according to a particular coordinate
78 system (e.g. counting downstream from a particular label taken as
79 number 1)
80 base: the one letter code for a nucleotide (i.e.: "a" "t" "c" "g")
82 a base is the "value" that an "element" of a "chain" can assume
83 (see documentation on the Chain datastructure if interested)
85 =cut
88 # Let the code begin...
90 package Bio::LiveSeq::SeqI;
91 use strict;
92 use Bio::Tools::CodonTable; # for the translate() function
94 use base qw(Bio::Root::Root Bio::LiveSeq::ChainI Bio::PrimarySeqI);
96 =head2 seq
98 Title : seq
99 Usage : $string = $obj->seq()
100 Function: Returns the complete sequence of an object as a string of letters.
101 Suggested cases are upper case for proteins and lower case for
102 DNA sequence (IUPAC standard),
103 Returns : a string
106 =cut
108 sub seq {
109 my $self = shift;
110 my ($start,$end) = ($self->start(),$self->end());
111 if ($self->strand() == 1) {
112 return $self->{'seq'}->down_chain2string($start,undef,$end);
113 } else { # reverse strand
114 my $str = $self->{'seq'}->up_chain2string($start,undef,$end);
115 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
116 return $str;
120 =head2 all_labels
122 Title : all_labels
123 Usage : @labels = $obj->all_labels()
124 Function: all the labels of every nucleotide an object is composed of
125 Returns : an array of labels
126 Args : none
128 =cut
130 sub all_labels {
131 my $self = shift;
132 my ($start,$end) = ($self->start(),$self->end());
133 my $labels;
134 if ($self->strand() == 1) {
135 $labels=$self->{'seq'}->down_labels($start,$end);
136 } else {
137 $labels=$self->{'seq'}->up_labels($start,$end);
139 return (@{$labels});
142 =head2 labelsubseq
144 Title : labelsubseq
145 Usage : $dna->labelsubseq();
146 : $dna->labelsubseq($startlabel);
147 : $dna->labelsubseq($startlabel,$length);
148 : $dna->labelsubseq($startlabel,undef,$endlabel);
149 e.g. : $dna->labelsubseq(4,undef,8);
150 Function: prints the sequence as string. The difference between labelsubseq
151 and normal subseq is that it uses /labels/ as arguments, instead
152 than positions. This allows for faster and more efficient lookup,
153 skipping the (usually) lengthy conversion of positions into labels.
154 This is especially useful for manipulating with high power
155 LiveSeq objects, knowing the labels and exploiting their
156 usefulness.
157 Returns : a string
158 Errorcode -1
159 Args : without arguments it returns the entire sequence
160 with a startlabel it returns the sequence downstream that label
161 if a length is specified, it returns only that number of bases
162 if an endlabel is specified, it overrides the length argument
163 and prints instead up to that label (included)
164 Defaults: $startlabel defaults to the beginning of the entire sequence
165 $endlabel defaults to the end of the entire sequence
167 =cut
169 # NOTE: unsecuremode is to be used /ONLY/ if sure of the start and end labels, especially that they follow each other in the correct order!!!!
171 sub labelsubseq {
172 my ($self,$start,$length,$end,$unsecuremode) = @_;
173 if (defined $unsecuremode && $unsecuremode eq "unsecuremoderequested")
174 { # to skip security checks (faster)
175 unless ($start) {
176 $start=$self->start;
178 if ($end) {
179 if ($end == $start) {
180 $length=1;
181 undef $end;
182 } else {
183 undef $length;
185 } else {
186 unless ($length) {
187 $end=$self->end;
190 } else {
191 if ($start) {
192 unless ($self->{'seq'}->valid($start)) {
193 $self->warn("Start label not valid"); return (-1);
196 if ($end) {
197 if ($end == $start) {
198 $length=1;
199 undef $end;
200 } else {
201 unless ($self->{'seq'}->valid($end)) {
202 $self->warn("End label not valid"); return (-1);
204 unless ($self->follows($start,$end) == 1) {
205 $self->warn("End label does not follow Start label!"); return (-1);
207 undef $length;
211 if ($self->strand() == 1) {
212 return $self->{'seq'}->down_chain2string($start,$length,$end);
213 } else { # reverse strand
214 my $str = $self->{'seq'}->up_chain2string($start,$length,$end);
215 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
216 return $str;
220 =head2 subseq
222 Title : subseq
223 Usage : $substring = $obj->subseq(10,40);
224 : $substring = $obj->subseq(10,undef,4);
225 Function: returns the subseq from start to end, where the first base
226 is 1 and the number is inclusive, ie 1-2 are the first two
227 bases of the sequence
229 Start cannot be larger than end but can be equal.
231 Allows for negative numbers $obj->subseq(-10,-1). By
232 definition, there is no 0!
233 -5 -1 1 5
234 gctagcgcccaac atggctcgctg
236 This allows one to retrieve sequences upstream from given position.
238 The precedence is from left to right: if END is given LENGTH is
239 ignored.
241 Examples: $obj->subseq(-10,undef,10) returns 10 elements before position 1
242 $obj->subseq(4,8) returns elements from the 4th to the 8th, inclusive
244 Returns : a string
245 Errorcode: -1
246 Args : start, integer, defaults to start of the sequence
247 end, integer, '' or undef, defaults to end of the sequence
248 length, integer, '' or undef
249 an optional strand (1 or -1) 4th argument
250 if strand argument is not given, it will default to the object
251 argment. This argument is useful when a call is issued from a child
252 of a parent object containing the subseq method
254 =cut
257 # check the fact about reverse strand!
258 # is it feasible? Is it correct? Should we do it? How about exons? Does it
259 # work when you ask subseq of an exon?
260 # eliminated now (Mon night)
261 sub subseq {
262 ##my ($self,$pos1,$pos2,$length,$strand) = @_;
263 my ($self,$pos1,$pos2,$length,$strand) = @_;
264 ##unless (defined ($strand)) { # if optional [strand] argument not given
265 ## $strand=$self->strand;
267 $strand=$self->strand;
268 my ($str,$startlabel,$endlabel);
269 if (defined ($length)) {
270 if ($length < 1) {
271 $self->warn("No sense asking for a subseq of length < 1");
272 return (-1);
275 unless (defined ($pos1)) {
276 #print "\n##### DEBUG pos1 not defined\n";
277 $startlabel=$self->start;
278 } else {
279 if ($pos1 == 0) { # if position = 0 complain
280 $self->warn("Position cannot be 0!"); return (-1);
282 ##if ($strand == 1) { # CHECK THIS!
283 if ((defined ($pos2))&&($pos1>$pos2)) {
284 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
286 ##} else { # CHECK THIS!
287 ## if ((defined ($pos2))&&($pos1<$pos2)) {
288 ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!)"; return (-1);
289 ## }
291 $startlabel=$self->label($pos1);
292 if ($startlabel < 1) {
293 $self->warn("position $pos1 not valid as start of subseq!"); return (-1);
296 unless (defined ($pos2)) {
297 #print "\n##### pos2 not defined\n";
298 unless (defined ($length)) {
299 $endlabel=$self->end;
301 } else {
302 if ($pos2 == 0) { # if position = 0 complain
303 $self->warn("Position cannot be 0!"); return (-1);
305 undef $length;
306 ##if ($strand == 1) { # CHECK THIS!
307 if ((defined ($pos1))&&($pos1>$pos2)) {
308 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
310 ##} else { # CHECK THIS!
311 ## if ((defined ($pos1))&&($pos1<$pos2)) {
312 ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!"); return (-1);
313 ## }
315 $endlabel=$self->label($pos2);
316 if ($endlabel < 1) {
317 $self->warn("position $pos2 not valid as end of subseq!"); return (-1);
320 #print "\n ####DEBUG: start $startlabel end $endlabel length $length strand $strand\n";
322 if ($strand == 1) {
323 $str = $self->{'seq'}->down_chain2string($startlabel,$length,$endlabel);
324 } else { # reverse strand
325 $str = $self->{'seq'}->up_chain2string($startlabel,$length,$endlabel);
326 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
328 return $str;
331 =head2 length
333 Title : length
334 Usage : $seq->length();
335 Function: returns the number of nucleotides (or the number of aminoacids)
336 in the entire sequence
337 Returns : an integer
338 Errorcode -1
339 Args : none
341 =cut
343 sub length {
344 my $self=shift;
345 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
346 if ($strand == 1) {
347 return $self->{'seq'}->down_subchain_length($start,$end);
348 } else {
349 return $self->{'seq'}->up_subchain_length($start,$end);
353 =head2 display_id
355 Title : display_id
356 Usage : $id_string = $obj->display_id();
357 Function: returns the display id, alias the common name of the object
359 The semantics of this is that it is the most likely string
360 to be used as an identifier of the sequence, and likely to
361 have "human" readability. The id is equivalent to the ID
362 field of the GenBank/EMBL databanks and the id field of the
363 Swissprot/sptrembl database. In fasta format, the >(\S+) is
364 presumed to be the id, though some people overload the id
365 to embed other information.
367 See also: accession_number
368 Returns : a string
369 Args : none
371 =cut
373 sub display_id {
374 my ($self,$value) = @_;
375 if(defined $value) {
376 $self->{'display_id'} = $value;
378 return $self->{'display_id'};
382 =head2 accession_number
384 Title : accession_number
385 Usage : $unique_biological_key = $obj->accession_number;
386 Function: Returns the unique biological id for a sequence, commonly
387 called the accession_number.
388 Notice that primary_id() provides the unique id for the
389 implemetation, allowing multiple objects to have the same accession
390 number in a particular implementation.
392 For objects with no accession_number this method returns "unknown".
393 Returns : a string
394 Args : none
396 =cut
398 sub accession_number {
399 my ($self,$value) = @_;
400 if (defined $value) {
401 $self->{'accession_number'} = $value;
403 unless (exists $self->{'accession_number'}) {
404 return "unknown";
405 } else {
406 return $self->{'accession_number'};
410 =head2 primary_id
412 Title : primary_id
413 Usage : $unique_implementation_key = $obj->primary_id;
414 Function: Returns the unique id for this object in this
415 implementation. This allows implementations to manage their own
416 object ids in a way the implementation can control. Clients can
417 expect one id to map to one object.
419 For sequences with no primary_id, this method returns
420 a stringified memory location.
422 Returns : A string
423 Args : None
425 =cut
428 sub primary_id {
429 my ($self,$value) = @_;
430 if(defined $value) {
431 $self->{'primary_id'} = $value;
433 unless (exists $self->{'primary_id'}) {
434 return "$self";
435 } else {
436 return $self->{'primary_id'};
440 =head2 change
442 Title : change
443 Usage : $substring = $obj->change('AA', 10);
444 Function: changes, modifies, mutates the LiveSequence
445 Examples:
446 $obj->change('', 10); delete nucleotide #10
447 $obj->change('', 10, 2); delete two nucleotides starting from #10
448 $obj->change('G', 10); change nuc #10 to 'G'
449 $obj->change('GA', 10, 4); replace #10 and 3 following with 'GA'
450 $obj->change('GA', 10, 2)); is same as $obj->change('GA', 10);
451 $obj->change('GA', 10, 0 ); insert 'GA' before nucleotide at #10
452 $obj->change('GA', 10, 1); GA inserted before #10, #10 deleted
453 $obj->change('GATC', 10, 2); GATC inserted before #10, #10&#11 deleted
454 $obj->change('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted
457 Returns : a string of deleted bases (if any) or 1 (everything OK)
458 Errorcode: -1
459 Args : seq, string, or '' ('' = undef = 0 = deletion)
460 start, integer
461 length, integer (optional)
463 =cut
465 sub change {
466 &positionchange;
469 =head2 positionchange
471 Title : positionchange
472 Function: Exactly like change. I.e. change() defaults to positionchange()
474 =cut
476 sub positionchange {
477 my ($self,$newseq,$position,$length)=@_;
478 unless ($position) {
479 $self->warn("Position not given or position 0");
480 return (-1);
482 my $label=$self->label($position);
483 unless ($label > 0) { # label not found or error
484 $self->warn("No valid label found at that position!");
485 return (-1);
487 return ($self->labelchange($newseq,$label,$length));
490 =head2 labelchange
492 Title : labelchange
493 Function: Exactly like change but uses a /label/ instead than a position
494 as second argument. This allows for multiple changes in a LiveSeq
495 without the burden of recomputing positions. I.e. for a multiple
496 change in two different points of the LiveSeq, the approach would
497 be the following: fetch the correct labels out of the two different
498 positions (method: label($position)) and then use the labelchange()
499 method to modify the sequence using those labels instead than
500 relying on the positions (that would have modified after the
501 first change).
503 =cut
505 sub labelchange {
506 my ($self,$newseq,$label,$length)=@_;
507 unless ($self->valid($label)) {
508 if ($self->{'seq'}->valid($label)) {
509 #$self->warn("Label \'$label\' not valid for executing a LiveSeq change for the object asked but it's ok for DNAlevel change, reverting to that");
510 shift @_;
511 return($self->{'seq'}->labelchange(@_));
512 } else {
513 $self->warn("Label \'$label\' not valid for executing a LiveSeq change");
514 return (-1);
517 unless ($newseq) { # it means this is a simple deletion
518 if (defined($length)) {
519 unless ($length >= 0) {
520 $self->warn("No sense having length < 0 in a deletion");
521 return (-1);
523 } else {
524 $self->warn("Length not defined for deletion!");
525 return (-1);
527 return $self->_delete($label,$length);
529 my $newseqlength=CORE::length($newseq);
530 if (defined($length)) {
531 unless ($length >= 0) {
532 $self->warn("No sense having length < 0 in a change()");
533 return (-1);
535 } else {
536 $length=$newseqlength; # defaults to pointmutation(s)
538 if ($length == 0) { # it means this is a simple insertion, length def&==0
539 my ($insertbegin,$insertend)=$self->_praeinsert($label,$newseq);
540 if ($insertbegin == -1) {
541 return (-1);
542 } else {
543 return (1);
546 if ($newseqlength == $length) { # it means this is simple pointmutation(s)
547 return $self->_mutate($label,$newseq,$length);
549 # if we arrived here then change is complex mixture
550 my $strand=$self->strand();
551 my $afterendlabel=$self->label($length+1,$label,$strand); # get the label at $length+1 positions after $label
552 unless ($afterendlabel > 0) { # label not found or error
553 $self->warn("No valid afterendlabel found for executing the complex mutation!");
554 return (-1);
556 my $deleted=$self->_delete($label,$length); # first delete length nucs
557 if ($deleted eq -1) { # if errors
558 return (-1);
559 } else { # then insert the newsequence
560 my ($insertbegin,$insertend)=$self->_praeinsert($afterendlabel,$newseq);
561 if ($insertbegin == -1) {
562 return (-1);
563 } else {
564 return (1);
569 # internal methods for change()
571 # arguments: label for beginning of deletion, new sequence to insert
572 # returns: labels of beginning and end of the inserted sequence
573 # errorcode: -1
574 sub _praeinsert {
575 my ($self,$label,$newseq)=@_;
576 my ($insertbegin,$insertend);
577 my $strand=$self->strand();
578 if ($strand == 1) {
579 ($insertbegin,$insertend)=($self->{'seq'}->praeinsert_string($newseq,$label));
580 } else { # since it's reverse strand and we insert in forward direction....
581 $newseq=reverse($newseq);
582 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
583 ($insertend,$insertbegin)=($self->{'seq'}->postinsert_string($newseq,$label));
585 if (($insertbegin==0)||($insertend==0)) {
586 $self->warn("Some error occurred while inserting!");
587 return (-1);
588 } else {
589 return ($insertbegin,$insertend);
593 # arguments: label for beginning of deletion, length of deletion
594 # returns: string of deleted bases
595 # errorcode: -1
596 sub _delete {
597 my ($self,$label,$length)=@_;
598 my $strand=$self->strand();
599 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
600 unless ($endlabel > 0) { # label not found or error
601 $self->warn("No valid endlabel found for executing the deletion!");
602 return (-1);
604 # this is important in Transcript to fix exon structure
605 $self->_deletecheck($label,$endlabel);
606 my $deletedseq;
607 if ($strand == 1) {
608 $deletedseq=$self->{'seq'}->splice_chain($label,undef,$endlabel);
609 } else {
610 $deletedseq=$self->{'seq'}->splice_chain($endlabel,undef,$label);
611 $deletedseq=reverse($deletedseq); # because we are on reverse strand and we cut anyway
612 # in forward direction
613 $deletedseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
615 return ($deletedseq);
618 # empty function, overridden in Transcript, not useful here
619 sub _deletecheck {
622 # arguments: label for beginning of mutation, newsequence, number of mutations
623 # returns: 1 all OK
624 # errorcode: -1
625 sub _mutate {
626 my ($self,$label,$newseq,$length)=@_; # length is equal to length(newseq)
627 my ($i,$base,$nextlabel);
628 my @labels; # array of labels
629 my $strand=$self->strand();
630 if ($length == 1) { # special cases first
631 @labels=($label);
632 } else {
633 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
634 unless ($endlabel > 0) { # label not found or error
635 $self->warn("No valid endlabel found for executing the mutation!");
636 return (-1);
638 if ($length == 2) { # another special case
639 @labels=($label,$endlabel);
640 } else { # more than 3 bases changed
641 # this wouldn't work for Transcript
642 #my $labelsarrayref;
643 #if ($strand == 1) {
644 #$labelsarrayref=$self->{'seq'}->down_labels($label,$endlabel);
645 #} else {
646 #$labelsarrayref=$self->{'seq'}->up_labels($label,$endlabel);
648 #@labels=@{$labelsarrayref};
649 #if ($length != scalar(@labels)) { # not enough labels returned
650 #$self->warn("Not enough valid labels found for executing the mutation!");
651 #return (-1);
654 # this should be more general
655 @labels=($label); # put the first one
656 while ($label != $endlabel) {
657 $nextlabel=$self->label(2,$label,$strand); # retrieve the next label
658 push (@labels,$nextlabel);
659 $label=$nextlabel; # move on reference
663 if ($strand == -1) { # only for reverse strand
664 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
666 my $errorcheck; # if not equal to $length after summing for all changes, error did occurr
667 $i = 0;
668 foreach $base (split(//,$newseq)) {
669 $errorcheck += $self->{'seq'}->set_value_at_label($base,$labels[$i]);
670 $i++;
672 if ($errorcheck != $length) {
673 $self->warn("Some error occurred while mutating!");
674 return (-1);
675 } else {
676 return (1);
680 =head2 valid
682 Title : valid
683 Usage : $boolean = $obj->valid($label)
684 Function: tests if a label exists inside the object
685 Returns : boolean
686 Args : label
688 =cut
690 # argument: label
691 # returns: 1 YES 0 NO
692 sub valid {
693 my ($self,$label)=@_;
694 my $checkme;
695 my @labels=$self->all_labels;
696 foreach $checkme (@labels) {
697 if ($label == $checkme) {
698 return (1); # found
701 return (0); # not found
705 =head2 start
707 Title : start
708 Usage : $startlabel=$obj->start()
709 Function: returns the label of the first nucleotide of the object (exon, CDS)
710 Returns : label
711 Args : none
713 =cut
715 sub start {
716 my ($self) = @_;
717 return $self->{'start'}; # common for all classes BUT DNA (which redefines it) and Transcript (that takes the information from the Exons)
720 =head2 end
722 Title : end
723 Usage : $endlabel=$obj->end()
724 Function: returns the label of the last nucleotide of the object (exon, CDS)
725 Returns : label
726 Args : none
728 =cut
730 sub end {
731 my ($self) = @_;
732 return $self->{'end'};
735 =head2 strand
737 Title : strand
738 Usage : $strand=$obj->strand()
739 $obj->strand($strand)
740 Function: gets or sets strand information, being 1 or -1 (forward or reverse)
741 Returns : -1 or 1
742 Args : none OR -1 or 1
744 =cut
746 sub strand {
747 my ($self,$strand) = @_;
748 if ($strand) {
749 if (($strand != 1)&&($strand != -1)) {
750 $self->warn("strand information not changed because strand identifier not valid");
751 } else {
752 $self->{'strand'} = $strand;
755 return $self->{'strand'};
758 =head2 alphabet
760 Title : alphabet
761 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
762 Function: Returns the type of sequence being one of
763 'dna', 'rna' or 'protein'. This is case sensitive.
765 Returns : a string either 'dna','rna','protein'.
766 Args : none
768 =cut
771 sub alphabet {
772 my %valid_type = map {$_, 1} qw( dna rna protein );
773 my ($self,$value) = @_;
774 if (defined $value) {
775 $value = 'dna' if $value =~ /dna/i;
776 $value = 'rna' if $value =~ /rna/i;
777 unless ( $valid_type{$value} ) {
778 $self->warn("Molecular type '$value' is not a valid type");
780 $self->{'alphabet'} = $value;
782 return $self->{'alphabet'};
785 =head2 coordinate_start
787 Title : coordinate_start
788 Usage : $coordstartlabel=$obj->coordinate_start()
789 : $coordstartlabel=$obj->coordinate_start($label)
790 Function: returns and optionally sets the first label of the coordinate
791 system used
792 For some objects only labels inside the object or in frame (for
793 Translation objects) will be allowed to get set as coordinate start
795 Returns : label. It returns 0 if label not found.
796 Errorcode -1
797 Args : an optional reference $label that is position 1
799 =cut
802 sub coordinate_start {
803 my ($self,$label) = @_;
804 if ($label) {
805 if ($self->valid($label)) {
806 $self->{'coordinate_start'} = $label;
807 } else {
808 $self->warn("The label you are trying to set as coordinate_start is not valid for this object");
811 my $coord_start = $self->{'coordinate_start'};
812 if ($coord_start) {
813 return $coord_start;
814 } else {
815 return $self->start();
819 =head2 label
821 Title : label
822 Usage : $seq->label($position)
823 : $seq->label($position,$firstlabel)
824 Examples: $nextlabel=$seq->label(2,$label) -> retrieves the following label
825 : $prevlabel=$seq->label(-1,$label) -> retrieves the preceding label
827 Function: returns the label of the nucleotide at $position from current
828 coordinate start
829 Returns : a label. It returns 0 if label not found.
830 Errorcode -1
831 Args : a position,
832 an optional reference $firstlabel that is to be used as position 1
833 an optional strand (1 or -1) argument
834 if strand argument is not given, it will default to the object
835 argument. This argument is useful when a call is issued from a child
836 of a parent object containing the subseq method
838 =cut
841 sub label {
842 my ($self,$position,$firstlabel,$strand)=@_;
843 my $label;
844 unless (defined ($firstlabel)) {
845 $firstlabel=$self->coordinate_start;
847 unless ($position) { # if position = 0 complain ?
848 $self->warn("Position not given or position 0");
849 return (-1);
851 unless (defined ($strand)) { # if optional [strand] argument not given
852 $strand=$self->strand;
854 if ($strand == 1) {
855 if ($position > 0) {
856 $label=$self->{'seq'}->down_get_label_at_pos($position,$firstlabel)
857 } else { # if < 0
858 $label=$self->{'seq'}->up_get_label_at_pos(1 - $position,$firstlabel)
860 } else {
861 if ($position > 0) {
862 $label=$self->{'seq'}->up_get_label_at_pos($position,$firstlabel)
863 } else { # if < 0
864 $label=$self->{'seq'}->down_get_label_at_pos(1 - $position,$firstlabel)
867 return $label;
871 =head2 position
873 Title : position
874 Usage : $seq->position($label)
875 : $seq->position($label,$firstlabel)
876 Function: returns the position of nucleotide at $label
877 Returns : the position of the label from current coordinate start
878 Errorcode 0
879 Args : a label pointing to a certain nucleotide (e.g. start of exon)
880 an optional "firstlabel" as reference to count from
881 an optional strand (1 or -1) argument
882 if strand argument is not given, it will default to the object
883 argument. This argument is useful when a call is issued from a child
884 of a parent object containing the subseq method
886 =cut
889 sub position {
890 my ($self,$label,$firstlabel,$strand)=@_;
891 unless (defined ($strand)) { # if optional [strand] argument not given
892 $strand=$self->strand;
894 unless (defined ($firstlabel)) {
895 $firstlabel=$self->coordinate_start;
897 unless ($self->valid($label)) {
898 $self->warn("label not valid");
899 return (0);
901 if ($firstlabel == $label) {
902 return (1);
904 my ($coordpos,$position0,$position);
905 $position0=$self->{'seq'}->down_get_pos_of_label($label);
906 $coordpos=$self->{'seq'}->down_get_pos_of_label($firstlabel);
907 $position=$position0-$coordpos+1;
908 if ($position <= 0) {
909 $position--;
911 if ($strand == -1) {
912 #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",1-$position;
913 return (1-$position);
914 } else {
915 #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",$position;
916 return ($position);
920 =head2 follows
922 Title : follows
923 Usage : $seq->follows($firstlabel,$secondlabel)
924 : $seq->follows($firstlabel,$secondlabel,$strand)
925 Function: checks if SECONDlabel follows FIRSTlabel, undependent of the strand
926 i.e. it checks downstream for forward strand and
927 upstream for reverse strand
928 Returns : 1 or 0
929 Errorcode -1
930 Args : two labels
931 an optional strand (1 or -1) argument
932 if strand argument is not given, it will default to the object
933 argument. This argument is useful when a call is issued from a child
934 of a parent object containing the subseq method
936 =cut
939 # wraparound to is_downstream and is_upstream that chooses the correct one
940 # depending on the strand
941 sub follows {
942 my ($self,$firstlabel,$secondlabel,$strand)=@_;
943 unless (defined ($strand)) { # if optional [strand] argument not given
944 $strand=$self->strand;
946 if ($strand == 1) {
947 return ($self->{'seq'}->is_downstream($firstlabel,$secondlabel));
948 } else {
949 return ($self->{'seq'}->is_upstream($firstlabel,$secondlabel));
953 #=head2 translate
955 # Title : translate
956 # Usage : $protein_seq = $obj->translate
957 # Function: Provides the translation of the DNA sequence
958 # using full IUPAC ambiguities in DNA/RNA and amino acid codes.
960 # The resulting translation is identical to EMBL/TREMBL database
961 # translations.
963 # Returns : a string
964 # Args : character for terminator (optional) defaults to '*'
965 # character for unknown amino acid (optional) defaults to 'X'
966 # frame (optional) valid values 0, 1, 3, defaults to 0
967 # codon table id (optional) defaults to 1
969 #=cut
971 #sub translate {
972 # my ($self) = shift;
973 # return ($self->translate_string($self->seq,@_));
976 #=head2 translate_string
978 # Title : translate_string
979 # Usage : $protein_seq = $obj->translate_string("attcgtgttgatcgatta");
980 # Function: Like translate, but can be used to translate subsequences after
981 # having retrieved them as string.
982 # Args : 1st argument is a string. Optional following arguments: like in
983 # the translate method
985 #=cut
988 #sub translate_string {
989 # my($self) = shift;
990 # my($seq) = shift;
991 # my($stop, $unknown, $frame, $tableid) = @_;
992 # my($i, $len, $output) = (0,0,'');
993 # my($codon) = "";
994 # my $aa;
997 # ## User can pass in symbol for stop and unknown codons
998 # unless(defined($stop) and $stop ne '') { $stop = "*"; }
999 # unless(defined($unknown) and $unknown ne '') { $unknown = "X"; }
1000 # unless(defined($frame) and $frame ne '') { $frame = 0; }
1002 # ## the codon table ID
1003 # if ($self->translation_table) {
1004 # $tableid = $self->translation_table;
1006 # unless(defined($tableid) and $tableid ne '') { $tableid = 1; }
1008 # ##Error if monomer is "Amino"
1009 # $self->warn("Can't translate an amino acid sequence.")
1010 # if (defined $self->alphabet && $self->alphabet eq 'protein');
1012 # ##Error if frame is not 0, 1 or 2
1013 # $self->warn("Valid values for frame are 0, 1, 2, not [$frame].")
1014 # unless ($frame == 0 or $frame == 1 or $frame == 2);
1016 # #thows a warning if ID is invalid
1017 # my $codonTable = Bio::Tools::CodonTable->new( -id => $tableid);
1019 # # deal with frame offset.
1020 # if( $frame ) {
1021 # $seq = substr ($seq,$frame);
1024 # for $codon ( grep { CORE::length == 3 } split(/(.{3})/, $seq) ) {
1025 # my $aa = $codonTable->translate($codon);
1026 # if ($aa eq '*') {
1027 # $output .= $stop;
1029 # elsif ($aa eq 'X') {
1030 # $output .= $unknown;
1032 # else {
1033 # $output .= $aa ;
1036 # #if( substr($output,-1,1) eq $stop ) {
1037 # # chop $output;
1038 # #}
1040 # return ($output);
1043 =head2 gene
1045 Title : gene
1046 Usage : my $gene=$obj->gene;
1047 Function: Gets or sets the reference to the LiveSeq::Gene object.
1048 Objects that are features of a LiveSeq Gene will have this
1049 attribute set automatically.
1051 Returns : reference to an object of class Gene
1052 Note : if Gene object is not set, this method will return 0;
1053 Args : none or reference to object of class Bio::LiveSeq::Gene
1055 =cut
1057 sub gene {
1058 my ($self,$value) = @_;
1059 if (defined $value) {
1060 $self->{'gene'} = $value;
1062 unless (exists $self->{'gene'}) {
1063 return (0);
1064 } else {
1065 return $self->{'gene'};
1069 =head2 obj_valid
1071 Title : obj_valid
1072 Usage : if ($obj->obj_valid) {do something;}
1073 Function: Checks if start and end labels are still valid for the ojbect,
1074 i.e. tests if the LiveSeq object is still valid
1075 Returns : boolean
1076 Args : none
1078 =cut
1080 sub obj_valid {
1081 my $self=shift;
1082 unless (($self->{'seq'}->valid($self->start()))&&($self->{'seq'}->valid($self->end()))) {
1083 return (0);
1085 return (1);
1088 =head2 name
1090 Title : name
1091 Usage : $name = $obj->name;
1092 : $name = $obj->name("ABCD");
1093 Function: Returns or sets the name of the object.
1094 If there is no name, it will return "unknown";
1095 Returns : A string
1096 Args : None
1098 =cut
1100 sub name {
1101 my ($self,$value) = @_;
1102 if (defined $value) {
1103 $self->{'name'} = $value;
1105 unless (exists $self->{'name'}) {
1106 return "unknown";
1107 } else {
1108 return $self->{'name'};
1112 =head2 desc
1114 Title : desc
1115 Usage : $desc = $obj->desc;
1116 : $desc = $obj->desc("ABCD");
1117 Function: Returns or sets the description of the object.
1118 If there is no description, it will return "unknown";
1119 Returns : A string
1120 Args : None
1122 =cut
1124 sub desc {
1125 my ($self,$value) = @_;
1126 if (defined $value) {
1127 $self->{'desc'} = $value;
1129 unless (exists $self->{'desc'}) {
1130 return "unknown";
1131 } else {
1132 return $self->{'desc'};
1136 =head2 source
1138 Title : source
1139 Usage : $name = $obj->source;
1140 : $name = $obj->source("Homo sapiens");
1141 Function: Returns or sets the organism that is source of the object.
1142 If there is no source, it will return "unknown";
1143 Returns : A string
1144 Args : None
1146 =cut
1148 sub source {
1149 my ($self,$value) = @_;
1150 if (defined $value) {
1151 $self->{'source'} = $value;
1153 unless (exists $self->{'source'}) {
1154 return "unknown";
1155 } else {
1156 return $self->{'source'};
1160 sub delete_Obj {
1161 my $self = shift;
1162 my @values= values %{$self};
1163 my @keys= keys %{$self};
1165 foreach my $key ( @keys ) {
1166 delete $self->{$key};
1168 foreach my $value ( @values ) {
1169 if (index(ref($value),"LiveSeq") != -1) { # object case
1170 eval {
1171 # delete $self->{$value};
1172 $value->delete_Obj;
1174 } elsif (index(ref($value),"ARRAY") != -1) { # array case
1175 my @array=@{$value};
1176 my $element;
1177 foreach $element (@array) {
1178 eval {
1179 $element->delete_Obj;
1182 } elsif (index(ref($value),"HASH") != -1) { # object case
1183 my %hash=%{$value};
1184 my $element;
1185 foreach $element (%hash) {
1186 eval {
1187 $element->delete_Obj;
1192 return(1);