3 # bioperl module for Bio::LiveSeq::SeqI
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
7 # Copyright Joseph Insana
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::LiveSeq::SeqI - Abstract sequence interface class for LiveSeq
19 # documentation needed
23 This class implements BioPerl PrimarySeqI interface for Live Seq objects.
25 One of the main difference in LiveSequence compared to traditional
26 "string" sequences is that coordinate systems are flexible. Typically
27 gene nucleotide numbering starts from 1 at the first character of the
28 initiator codon (A in ATG). This means that negative positions are
31 Secondly, the sequence manipulation methods do not return a new
32 sequence object but change the current object. The current status can
33 be written out to BioPerl sequence objects.
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to one
41 of the Bioperl mailing lists. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 the bugs and their resolution. Bug reports can be submitted via the
52 http://bugzilla.open-bio.org/
54 =head1 AUTHOR - Joseph A.L. Insana
56 Email: Insana@ebi.ac.uk, jinsana@gmx.net
60 The rest of the documentation details each of the object
61 methods. Internal methods are usually preceded with a _
63 Some note on the terminology/notation of method names:
64 label: a unique pointer to a single nucleotide
65 position: the position of a nucleotide according to a particular coordinate
66 system (e.g. counting downstream from a particular label taken as
68 base: the one letter code for a nucleotide (i.e.: "a" "t" "c" "g")
70 a base is the "value" that an "element" of a "chain" can assume
71 (see documentation on the Chain datastructure if interested)
76 # Let the code begin...
78 package Bio
::LiveSeq
::SeqI
;
80 use Bio
::Tools
::CodonTable
; # for the translate() function
82 use base
qw(Bio::Root::Root Bio::LiveSeq::ChainI Bio::PrimarySeqI);
87 Usage : $string = $obj->seq()
88 Function: Returns the complete sequence of an object as a string of letters.
89 Suggested cases are upper case for proteins and lower case for
90 DNA sequence (IUPAC standard),
98 my ($start,$end) = ($self->start(),$self->end());
99 if ($self->strand() == 1) {
100 return $self->{'seq'}->down_chain2string($start,undef,$end);
101 } else { # reverse strand
102 my $str = $self->{'seq'}->up_chain2string($start,undef,$end);
103 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
111 Usage : @labels = $obj->all_labels()
112 Function: all the labels of every nucleotide an object is composed of
113 Returns : an array of labels
120 my ($start,$end) = ($self->start(),$self->end());
122 if ($self->strand() == 1) {
123 $labels=$self->{'seq'}->down_labels($start,$end);
125 $labels=$self->{'seq'}->up_labels($start,$end);
133 Usage : $dna->labelsubseq();
134 : $dna->labelsubseq($startlabel);
135 : $dna->labelsubseq($startlabel,$length);
136 : $dna->labelsubseq($startlabel,undef,$endlabel);
137 e.g. : $dna->labelsubseq(4,undef,8);
138 Function: prints the sequence as string. The difference between labelsubseq
139 and normal subseq is that it uses /labels/ as arguments, instead
140 than positions. This allows for faster and more efficient lookup,
141 skipping the (usually) lengthy conversion of positions into labels.
142 This is expecially useful for manipulating with high power
143 LiveSeq objects, knowing the labels and exploiting their
147 Args : without arguments it returns the entire sequence
148 with a startlabel it returns the sequence downstream that label
149 if a length is specified, it returns only that number of bases
150 if an endlabel is specified, it overrides the length argument
151 and prints instead up to that label (included)
152 Defaults: $startlabel defaults to the beginning of the entire sequence
153 $endlabel defaults to the end of the entire sequence
157 # NOTE: unsecuremode is to be used /ONLY/ if sure of the start and end labels, expecially that they follow each other in the correct order!!!!
160 my ($self,$start,$length,$end,$unsecuremode) = @_;
161 if (defined $unsecuremode && $unsecuremode eq "unsecuremoderequested")
162 { # to skip security checks (faster)
167 if ($end == $start) {
180 unless ($self->{'seq'}->valid($start)) {
181 $self->warn("Start label not valid"); return (-1);
185 if ($end == $start) {
189 unless ($self->{'seq'}->valid($end)) {
190 $self->warn("End label not valid"); return (-1);
192 unless ($self->follows($start,$end) == 1) {
193 $self->warn("End label does not follow Start label!"); return (-1);
199 if ($self->strand() == 1) {
200 return $self->{'seq'}->down_chain2string($start,$length,$end);
201 } else { # reverse strand
202 my $str = $self->{'seq'}->up_chain2string($start,$length,$end);
203 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
211 Usage : $substring = $obj->subseq(10,40);
212 : $substring = $obj->subseq(10,undef,4);
213 Function: returns the subseq from start to end, where the first base
214 is 1 and the number is inclusive, ie 1-2 are the first two
215 bases of the sequence
217 Start cannot be larger than end but can be equal.
219 Allows for negative numbers $obj->subseq(-10,-1). By
220 definition, there is no 0!
222 gctagcgcccaac atggctcgctg
224 This allows to retrieve sequences upstream from given position.
226 The precedence is from left to right: if END is given LENGTH is
229 Examples: $obj->subseq(-10,undef,10) returns 10 elements before position 1
230 $obj->subseq(4,8) returns elements from the 4th to the 8th, inclusive
234 Args : start, integer, defaults to start of the sequence
235 end, integer, '' or undef, defaults to end of the sequence
236 length, integer, '' or undef
237 an optional strand (1 or -1) 4th argument
238 if strand argument is not given, it will default to the object
239 argment. This argument is useful when a call is issued from a child
240 of a parent object containing the subseq method
245 # check the fact about reverse strand!
246 # is it feasible? Is it correct? Should we do it? How about exons? Does it
247 # work when you ask subseq of an exon?
248 # eliminated now (Mon night)
250 ##my ($self,$pos1,$pos2,$length,$strand) = @_;
251 my ($self,$pos1,$pos2,$length,$strand) = @_;
252 ##unless (defined ($strand)) { # if optional [strand] argument not given
253 ## $strand=$self->strand;
255 $strand=$self->strand;
256 my ($str,$startlabel,$endlabel);
257 if (defined ($length)) {
259 $self->warn("No sense asking for a subseq of length < 1");
263 unless (defined ($pos1)) {
264 #print "\n##### DEBUG pos1 not defined\n";
265 $startlabel=$self->start;
267 if ($pos1 == 0) { # if position = 0 complain
268 $self->warn("Position cannot be 0!"); return (-1);
270 ##if ($strand == 1) { # CHECK THIS!
271 if ((defined ($pos2))&&($pos1>$pos2)) {
272 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
274 ##} else { # CHECK THIS!
275 ## if ((defined ($pos2))&&($pos1<$pos2)) {
276 ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!)"; return (-1);
279 $startlabel=$self->label($pos1);
280 if ($startlabel < 1) {
281 $self->warn("position $pos1 not valid as start of subseq!"); return (-1);
284 unless (defined ($pos2)) {
285 #print "\n##### pos2 not defined\n";
286 unless (defined ($length)) {
287 $endlabel=$self->end;
290 if ($pos2 == 0) { # if position = 0 complain
291 $self->warn("Position cannot be 0!"); return (-1);
294 ##if ($strand == 1) { # CHECK THIS!
295 if ((defined ($pos1))&&($pos1>$pos2)) {
296 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
298 ##} else { # CHECK THIS!
299 ## if ((defined ($pos1))&&($pos1<$pos2)) {
300 ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!"); return (-1);
303 $endlabel=$self->label($pos2);
305 $self->warn("position $pos2 not valid as end of subseq!"); return (-1);
308 #print "\n ####DEBUG: start $startlabel end $endlabel length $length strand $strand\n";
311 $str = $self->{'seq'}->down_chain2string($startlabel,$length,$endlabel);
312 } else { # reverse strand
313 $str = $self->{'seq'}->up_chain2string($startlabel,$length,$endlabel);
314 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
322 Usage : $seq->length();
323 Function: returns the number of nucleotides (or the number of aminoacids)
324 in the entire sequence
333 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
335 return $self->{'seq'}->down_subchain_length($start,$end);
337 return $self->{'seq'}->up_subchain_length($start,$end);
344 Usage : $id_string = $obj->display_id();
345 Function: returns the display id, alias the common name of the object
347 The semantics of this is that it is the most likely string
348 to be used as an identifier of the sequence, and likely to
349 have "human" readability. The id is equivalent to the ID
350 field of the GenBank/EMBL databanks and the id field of the
351 Swissprot/sptrembl database. In fasta format, the >(\S+) is
352 presumed to be the id, though some people overload the id
353 to embed other information.
355 See also: accession_number
362 my ($self,$value) = @_;
364 $self->{'display_id'} = $value;
366 return $self->{'display_id'};
370 =head2 accession_number
372 Title : accession_number
373 Usage : $unique_biological_key = $obj->accession_number;
374 Function: Returns the unique biological id for a sequence, commonly
375 called the accession_number.
376 Notice that primary_id() provides the unique id for the
377 implemetation, allowing multiple objects to have the same accession
378 number in a particular implementation.
380 For objects with no accession_number this method returns "unknown".
386 sub accession_number
{
387 my ($self,$value) = @_;
388 if (defined $value) {
389 $self->{'accession_number'} = $value;
391 unless (exists $self->{'accession_number'}) {
394 return $self->{'accession_number'};
401 Usage : $unique_implementation_key = $obj->primary_id;
402 Function: Returns the unique id for this object in this
403 implementation. This allows implementations to manage their own
404 object ids in a way the implementation can control. Clients can
405 expect one id to map to one object.
407 For sequences with no primary_id, this method returns
408 a stringified memory location.
417 my ($self,$value) = @_;
419 $self->{'primary_id'} = $value;
421 unless (exists $self->{'primary_id'}) {
424 return $self->{'primary_id'};
431 Usage : $substring = $obj->change('AA', 10);
432 Function: changes, modifies, mutates the LiveSequence
434 $obj->change('', 10); delete nucleotide #10
435 $obj->change('', 10, 2); delete two nucleotides starting from #10
436 $obj->change('G', 10); change nuc #10 to 'G'
437 $obj->change('GA', 10, 4); replace #10 and 3 following with 'GA'
438 $obj->change('GA', 10, 2)); is same as $obj->change('GA', 10);
439 $obj->change('GA', 10, 0 ); insert 'GA' before nucleotide at #10
440 $obj->change('GA', 10, 1); GA inserted before #10, #10 deleted
441 $obj->change('GATC', 10, 2); GATC inserted before #10, #10 deleted
442 $obj->change('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted
445 Returns : a string of deleted bases (if any) or 1 (everything OK)
447 Args : seq, string, or '' ('' = undef = 0 = deletion)
449 length, integer (optional)
457 =head2 positionchange
459 Title : positionchange
460 Function: Exactly like change. I.e. change() defaults to positionchange()
465 my ($self,$newseq,$position,$length)=@_;
467 $self->warn("Position not given or position 0");
470 my $label=$self->label($position);
471 unless ($label > 0) { # label not found or error
472 $self->warn("No valid label found at that position!");
475 return ($self->labelchange($newseq,$label,$length));
481 Function: Exactly like change but uses a /label/ instead than a position
482 as second argument. This allows for multiple changes in a LiveSeq
483 without the burden of recomputing positions. I.e. for a multiple
484 change in two different points of the LiveSeq, the approach would
485 be the following: fetch the correct labels out of the two different
486 positions (method: label($position)) and then use the labelchange()
487 method to modify the sequence using those labels instead than
488 relying on the positions (that would have modified after the
494 my ($self,$newseq,$label,$length)=@_;
495 unless ($self->valid($label)) {
496 if ($self->{'seq'}->valid($label)) {
497 #$self->warn("Label \'$label\' not valid for executing a LiveSeq change for the object asked but it's ok for DNAlevel change, reverting to that");
499 return($self->{'seq'}->labelchange(@_));
501 $self->warn("Label \'$label\' not valid for executing a LiveSeq change");
505 unless ($newseq) { # it means this is a simple deletion
506 if (defined($length)) {
507 unless ($length >= 0) {
508 $self->warn("No sense having length < 0 in a deletion");
512 $self->warn("Length not defined for deletion!");
515 return $self->_delete($label,$length);
517 my $newseqlength=CORE
::length($newseq);
518 if (defined($length)) {
519 unless ($length >= 0) {
520 $self->warn("No sense having length < 0 in a change()");
524 $length=$newseqlength; # defaults to pointmutation(s)
526 if ($length == 0) { # it means this is a simple insertion, length def&==0
527 my ($insertbegin,$insertend)=$self->_praeinsert($label,$newseq);
528 if ($insertbegin == -1) {
534 if ($newseqlength == $length) { # it means this is simple pointmutation(s)
535 return $self->_mutate($label,$newseq,$length);
537 # if we arrived here then change is complex mixture
538 my $strand=$self->strand();
539 my $afterendlabel=$self->label($length+1,$label,$strand); # get the label at $length+1 positions after $label
540 unless ($afterendlabel > 0) { # label not found or error
541 $self->warn("No valid afterendlabel found for executing the complex mutation!");
544 my $deleted=$self->_delete($label,$length); # first delete length nucs
545 if ($deleted == -1) { # if errors
547 } else { # then insert the newsequence
548 my ($insertbegin,$insertend)=$self->_praeinsert($afterendlabel,$newseq);
549 if ($insertbegin == -1) {
557 # internal methods for change()
559 # arguments: label for beginning of deletion, new sequence to insert
560 # returns: labels of beginning and end of the inserted sequence
563 my ($self,$label,$newseq)=@_;
564 my ($insertbegin,$insertend);
565 my $strand=$self->strand();
567 ($insertbegin,$insertend)=($self->{'seq'}->praeinsert_string($newseq,$label));
568 } else { # since it's reverse strand and we insert in forward direction....
569 $newseq=reverse($newseq);
570 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
571 ($insertend,$insertbegin)=($self->{'seq'}->postinsert_string($newseq,$label));
573 if (($insertbegin==0)||($insertend==0)) {
574 $self->warn("Some error occurred while inserting!");
577 return ($insertbegin,$insertend);
581 # arguments: label for beginning of deletion, length of deletion
582 # returns: string of deleted bases
585 my ($self,$label,$length)=@_;
586 my $strand=$self->strand();
587 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
588 unless ($endlabel > 0) { # label not found or error
589 $self->warn("No valid endlabel found for executing the deletion!");
592 # this is important in Transcript to fix exon structure
593 $self->_deletecheck($label,$endlabel);
596 $deletedseq=$self->{'seq'}->splice_chain($label,undef,$endlabel);
598 $deletedseq=$self->{'seq'}->splice_chain($endlabel,undef,$label);
599 $deletedseq=reverse($deletedseq); # because we are on reverse strand and we cut anyway
600 # in forward direction
601 $deletedseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
603 return ($deletedseq);
606 # empty function, overridden in Transcript, not useful here
610 # arguments: label for beginning of mutation, newsequence, number of mutations
614 my ($self,$label,$newseq,$length)=@_; # length is equal to length(newseq)
615 my ($i,$base,$nextlabel);
616 my @labels; # array of labels
617 my $strand=$self->strand();
618 if ($length == 1) { # special cases first
621 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
622 unless ($endlabel > 0) { # label not found or error
623 $self->warn("No valid endlabel found for executing the mutation!");
626 if ($length == 2) { # another special case
627 @labels=($label,$endlabel);
628 } else { # more than 3 bases changed
629 # this wouldn't work for Transcript
632 #$labelsarrayref=$self->{'seq'}->down_labels($label,$endlabel);
634 #$labelsarrayref=$self->{'seq'}->up_labels($label,$endlabel);
636 #@labels=@{$labelsarrayref};
637 #if ($length != scalar(@labels)) { # not enough labels returned
638 #$self->warn("Not enough valid labels found for executing the mutation!");
642 # this should be more general
643 @labels=($label); # put the first one
644 while ($label != $endlabel) {
645 $nextlabel=$self->label(2,$label,$strand); # retrieve the next label
646 push (@labels,$nextlabel);
647 $label=$nextlabel; # move on reference
651 if ($strand == -1) { # only for reverse strand
652 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
654 my $errorcheck; # if not equal to $length after summing for all changes, error did occurr
656 foreach $base (split(//,$newseq)) {
657 $errorcheck += $self->{'seq'}->set_value_at_label($base,$labels[$i]);
660 if ($errorcheck != $length) {
661 $self->warn("Some error occurred while mutating!");
671 Usage : $boolean = $obj->valid($label)
672 Function: tests if a label exists inside the object
679 # returns: 1 YES 0 NO
681 my ($self,$label)=@_;
683 my @labels=$self->all_labels;
684 foreach $checkme (@labels) {
685 if ($label == $checkme) {
689 return (0); # not found
696 Usage : $startlabel=$obj->start()
697 Function: returns the label of the first nucleotide of the object (exon, CDS)
705 return $self->{'start'}; # common for all classes BUT DNA (which redefines it) and Transcript (that takes the information from the Exons)
711 Usage : $endlabel=$obj->end()
712 Function: returns the label of the last nucleotide of the object (exon, CDS)
720 return $self->{'end'};
726 Usage : $strand=$obj->strand()
727 $obj->strand($strand)
728 Function: gets or sets strand information, being 1 or -1 (forward or reverse)
730 Args : none OR -1 or 1
735 my ($self,$strand) = @_;
737 if (($strand != 1)&&($strand != -1)) {
738 $self->warn("strand information not changed because strand identifier not valid");
740 $self->{'strand'} = $strand;
743 return $self->{'strand'};
749 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
750 Function: Returns the type of sequence being one of
751 'dna', 'rna' or 'protein'. This is case sensitive.
753 Returns : a string either 'dna','rna','protein'.
760 my %valid_type = map {$_, 1} qw( dna rna protein );
761 my ($self,$value) = @_;
762 if (defined $value) {
763 $value = 'dna' if $value =~ /dna/i;
764 $value = 'rna' if $value =~ /rna/i;
765 unless ( $valid_type{$value} ) {
766 $self->warn("Molecular type '$value' is not a valid type");
768 $self->{'alphabet'} = $value;
770 return $self->{'alphabet'};
773 =head2 coordinate_start
775 Title : coordinate_start
776 Usage : $coordstartlabel=$obj->coordinate_start()
777 : $coordstartlabel=$obj->coordinate_start($label)
778 Function: returns and optionally sets the first label of the coordinate
780 For some objects only labels inside the object or in frame (for
781 Translation objects) will be allowed to get set as coordinate start
783 Returns : label. It returns 0 if label not found.
785 Args : an optional reference $label that is position 1
790 sub coordinate_start
{
791 my ($self,$label) = @_;
793 if ($self->valid($label)) {
794 $self->{'coordinate_start'} = $label;
796 $self->warn("The label you are trying to set as coordinate_start is not valid for this object");
799 my $coord_start = $self->{'coordinate_start'};
803 return $self->start();
810 Usage : $seq->label($position)
811 : $seq->label($position,$firstlabel)
812 Examples: $nextlabel=$seq->label(2,$label) -> retrieves the following label
813 : $prevlabel=$seq->label(-1,$label) -> retrieves the preceding label
815 Function: returns the label of the nucleotide at $position from current
817 Returns : a label. It returns 0 if label not found.
820 an optional reference $firstlabel that is to be used as position 1
821 an optional strand (1 or -1) argument
822 if strand argument is not given, it will default to the object
823 argument. This argument is useful when a call is issued from a child
824 of a parent object containing the subseq method
830 my ($self,$position,$firstlabel,$strand)=@_;
832 unless (defined ($firstlabel)) {
833 $firstlabel=$self->coordinate_start;
835 unless ($position) { # if position = 0 complain ?
836 $self->warn("Position not given or position 0");
839 unless (defined ($strand)) { # if optional [strand] argument not given
840 $strand=$self->strand;
844 $label=$self->{'seq'}->down_get_label_at_pos($position,$firstlabel)
846 $label=$self->{'seq'}->up_get_label_at_pos(1 - $position,$firstlabel)
850 $label=$self->{'seq'}->up_get_label_at_pos($position,$firstlabel)
852 $label=$self->{'seq'}->down_get_label_at_pos(1 - $position,$firstlabel)
862 Usage : $seq->position($label)
863 : $seq->position($label,$firstlabel)
864 Function: returns the position of nucleotide at $label
865 Returns : the position of the label from current coordinate start
867 Args : a label pointing to a certain nucleotide (e.g. start of exon)
868 an optional "firstlabel" as reference to count from
869 an optional strand (1 or -1) argument
870 if strand argument is not given, it will default to the object
871 argument. This argument is useful when a call is issued from a child
872 of a parent object containing the subseq method
878 my ($self,$label,$firstlabel,$strand)=@_;
879 unless (defined ($strand)) { # if optional [strand] argument not given
880 $strand=$self->strand;
882 unless (defined ($firstlabel)) {
883 $firstlabel=$self->coordinate_start;
885 unless ($self->valid($label)) {
886 $self->warn("label not valid");
889 if ($firstlabel == $label) {
892 my ($coordpos,$position0,$position);
893 $position0=$self->{'seq'}->down_get_pos_of_label($label);
894 $coordpos=$self->{'seq'}->down_get_pos_of_label($firstlabel);
895 $position=$position0-$coordpos+1;
896 if ($position <= 0) {
900 #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",1-$position;
901 return (1-$position);
903 #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",$position;
911 Usage : $seq->follows($firstlabel,$secondlabel)
912 : $seq->follows($firstlabel,$secondlabel,$strand)
913 Function: checks if SECONDlabel follows FIRSTlabel, undependent of the strand
914 i.e. it checks downstream for forward strand and
915 upstream for reverse strand
919 an optional strand (1 or -1) argument
920 if strand argument is not given, it will default to the object
921 argument. This argument is useful when a call is issued from a child
922 of a parent object containing the subseq method
927 # wraparound to is_downstream and is_upstream that chooses the correct one
928 # depending on the strand
930 my ($self,$firstlabel,$secondlabel,$strand)=@_;
931 unless (defined ($strand)) { # if optional [strand] argument not given
932 $strand=$self->strand;
935 return ($self->{'seq'}->is_downstream($firstlabel,$secondlabel));
937 return ($self->{'seq'}->is_upstream($firstlabel,$secondlabel));
944 # Usage : $protein_seq = $obj->translate
945 # Function: Provides the translation of the DNA sequence
946 # using full IUPAC ambiguities in DNA/RNA and amino acid codes.
948 # The resulting translation is identical to EMBL/TREMBL database
952 # Args : character for terminator (optional) defaults to '*'
953 # character for unknown amino acid (optional) defaults to 'X'
954 # frame (optional) valid values 0, 1, 3, defaults to 0
955 # codon table id (optional) defaults to 1
960 # my ($self) = shift;
961 # return ($self->translate_string($self->seq,@_));
964 #=head2 translate_string
966 # Title : translate_string
967 # Usage : $protein_seq = $obj->translate_string("attcgtgttgatcgatta");
968 # Function: Like translate, but can be used to translate subsequences after
969 # having retrieved them as string.
970 # Args : 1st argument is a string. Optional following arguments: like in
971 # the translate method
976 #sub translate_string {
979 # my($stop, $unknown, $frame, $tableid) = @_;
980 # my($i, $len, $output) = (0,0,'');
985 # ## User can pass in symbol for stop and unknown codons
986 # unless(defined($stop) and $stop ne '') { $stop = "*"; }
987 # unless(defined($unknown) and $unknown ne '') { $unknown = "X"; }
988 # unless(defined($frame) and $frame ne '') { $frame = 0; }
990 # ## the codon table ID
991 # if ($self->translation_table) {
992 # $tableid = $self->translation_table;
994 # unless(defined($tableid) and $tableid ne '') { $tableid = 1; }
996 # ##Error if monomer is "Amino"
997 # $self->warn("Can't translate an amino acid sequence.")
998 # if (defined $self->alphabet && $self->alphabet eq 'protein');
1000 # ##Error if frame is not 0, 1 or 2
1001 # $self->warn("Valid values for frame are 0, 1, 2, not [$frame].")
1002 # unless ($frame == 0 or $frame == 1 or $frame == 2);
1004 # #thows a warning if ID is invalid
1005 # my $codonTable = Bio::Tools::CodonTable->new( -id => $tableid);
1007 # # deal with frame offset.
1009 # $seq = substr ($seq,$frame);
1012 # for $codon ( grep { CORE::length == 3 } split(/(.{3})/, $seq) ) {
1013 # my $aa = $codonTable->translate($codon);
1017 # elsif ($aa eq 'X') {
1018 # $output .= $unknown;
1024 # #if( substr($output,-1,1) eq $stop ) {
1034 Usage : my $gene=$obj->gene;
1035 Function: Gets or sets the reference to the LiveSeq::Gene object.
1036 Objects that are features of a LiveSeq Gene will have this
1037 attribute set automatically.
1039 Returns : reference to an object of class Gene
1040 Note : if Gene object is not set, this method will return 0;
1041 Args : none or reference to object of class Bio::LiveSeq::Gene
1046 my ($self,$value) = @_;
1047 if (defined $value) {
1048 $self->{'gene'} = $value;
1050 unless (exists $self->{'gene'}) {
1053 return $self->{'gene'};
1060 Usage : if ($obj->obj_valid) {do something;}
1061 Function: Checks if start and end labels are still valid for the ojbect,
1062 i.e. tests if the LiveSeq object is still valid
1070 unless (($self->{'seq'}->valid($self->start()))&&($self->{'seq'}->valid($self->end()))) {
1079 Usage : $name = $obj->name;
1080 : $name = $obj->name("ABCD");
1081 Function: Returns or sets the name of the object.
1082 If there is no name, it will return "unknown";
1089 my ($self,$value) = @_;
1090 if (defined $value) {
1091 $self->{'name'} = $value;
1093 unless (exists $self->{'name'}) {
1096 return $self->{'name'};
1103 Usage : $desc = $obj->desc;
1104 : $desc = $obj->desc("ABCD");
1105 Function: Returns or sets the description of the object.
1106 If there is no description, it will return "unknown";
1113 my ($self,$value) = @_;
1114 if (defined $value) {
1115 $self->{'desc'} = $value;
1117 unless (exists $self->{'desc'}) {
1120 return $self->{'desc'};
1127 Usage : $name = $obj->source;
1128 : $name = $obj->source("Homo sapiens");
1129 Function: Returns or sets the organism that is source of the object.
1130 If there is no source, it will return "unknown";
1137 my ($self,$value) = @_;
1138 if (defined $value) {
1139 $self->{'source'} = $value;
1141 unless (exists $self->{'source'}) {
1144 return $self->{'source'};
1150 my @values= values %{$self};
1151 my @keys= keys %{$self};
1153 foreach my $key ( @keys ) {
1154 delete $self->{$key};
1156 foreach my $value ( @values ) {
1157 if (index(ref($value),"LiveSeq") != -1) { # object case
1159 # delete $self->{$value};
1162 } elsif (index(ref($value),"ARRAY") != -1) { # array case
1163 my @array=@
{$value};
1165 foreach $element (@array) {
1167 $element->delete_Obj;
1170 } elsif (index(ref($value),"HASH") != -1) { # object case
1173 foreach $element (%hash) {
1175 $element->delete_Obj;