4 # bioperl module for Bio::LiveSeq::Chain
6 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
8 # Copyright Joseph Insana
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
17 Bio::LiveSeq::Chain - DoubleChain DataStructure for Perl
25 This is a general purpose module (that's why it's not in object-oriented
26 form) that introduces a novel datastructure in PERL. It implements
27 the "double linked chain". The elements of the chain can contain basically
28 everything. From chars to strings, from object references to arrays or hashes.
29 It is used in the LiveSequence project to create a dynamical DNA sequence,
30 easier to manipulate and change. It's use is mainly for sequence variation
31 analysis but it could be used - for example - in e-cell projects.
32 The Chain module in itself doesn't have any biological bias, so can be
33 used for any programming purpose.
35 Each element of the chain (with the exclusion of the first and the last of the
36 chain) is connected to other two elements (the PREVious and the NEXT one).
37 There is no absolute position (like in an array), hence if positions are
38 important, they need to be computed (methods are provided).
39 Otherwise it's easy to keep track of the elements with their "LABELs".
40 There is one LABEL (think of it as a pointer) to each ELEMENT. The labels
41 won't change after insertions or deletions of the chain. So it's
42 always possible to retrieve an element even if the chain has been
43 modified by successive insertions or deletions.
44 From this the high potential profit for bioinformatics: dealing with
45 sequences in a way that doesn't have to rely on positions, without
46 the need of constantly updating them if the sequence changes, even
49 =head1 AUTHOR - Joseph A.L. Insana
51 Email: Insana@ebi.ac.uk, jinsana@gmx.net
55 The rest of the documentation details each of the object
56 methods. Internal methods are usually preceded with a _
60 # Let the code begin...
62 # DoubleChain Data Structure for PERL
63 # by Joseph A.L. Insana - Deathson - Filius Mortis - Fal Mortais
64 # insana@ebi.ac.uk, jinsana@gmx.net
66 package Bio
::LiveSeq
::Chain
;
69 # **** performance concerns
70 # *??* create hash2dchain ???? (with hashkeys used for label)
71 # **????** how about using array of arrays instead than hash of arrays??
73 # further strict complaints:
74 # in verbose $string assignment around line 721 ???
76 # TERMINOLOGY update, naming convention:
77 # "chain" the datastructure
78 # "element" the individual units that compose a chain
79 # "label" the unique name of a single element
80 # "position" the position of an element into the chain according to a
81 # particular coordinate system (e.g. counting from the start)
82 # "value" what is stored in a single element
84 use Carp
qw(croak cluck carp);
85 use Bio
::Root
::Version
;
87 use integer
; # WARNING: this is to increase performance
88 # a little bit of attention has to be given if float need to
89 # be stored as elements of the array
90 # the use of this "integer" affects all operations but not
91 # assignments. So float CAN be assigned as elements of the chain
92 # BUT, if you assign $z=-1.8;, $z will be equal to -1 because
93 # "-" counts as a unary operation!
95 =head2 _updown_chain2string
98 Usage : $string = Bio::LiveSeq::Chain::chain2string("down",$chain,6,9)
99 Function: reads the contents of the chain, outputting a string
102 : down_chain2string($chain) -> all the chain from begin to end
103 : down_chain2string($chain,6) -> from 6 to the end
104 : down_chain2string($chain,6,4) -> from 6, going on 4 elements
105 : down_chain2string($chain,6,"",10) -> from 6 to 10
106 : up_chain2string($chain,10,"",6) -> from 10 to 6 upstream
107 Defaults: start=first element; if len undef, goes to last
108 if last undef, goes to end
109 if last defined, it overrides len (undefining it)
111 Args : "up"||"down" as first argument to specify the reading direction
112 reference (to the chain)
113 [first] [len] [last] optional integer arguments to specify how
114 much and from (and to) where to read
118 # methods rewritten 2.61
119 sub up_chain2string
{
120 _updown_chain2string
("up",@_);
122 sub down_chain2string
{
123 _updown_chain2string
("down",@_);
126 sub _updown_chain2string
{
127 my ($direction,$chain,$first,$len,$last)=@_;
128 unless($chain) { cluck
"no chain input"; return (-1); }
129 my $begin=$chain->{'begin'}; # the label of the BEGIN element
130 my $end=$chain->{'end'}; # the label of the END element
133 if ($direction eq "up") {
134 $flow=2; # used to determine the direction of chain navigation
135 unless ($first) { $first=$end; } # if undef or 0, use $end
136 } else { # defaults to "down"
137 $flow=1; # used to determine the direction of chain navigation
138 unless ($first) { $first=$begin; } # if undef or 0, use $begin
141 unless($chain->{$first}) {
142 cluck
"label for first not defined"; return (-1); }
143 if ($last) { # if last is defined, it gets priority and len is not used
144 unless($chain->{$last}) {
145 cluck
"label for last not defined"; return (-1); }
147 warn "Warning chain2string: argument LAST:$last overriding LEN:$len!";
151 if ($direction eq "up") {
152 $last=$begin; # if last not defined, go 'till begin (or upto len elements)
154 $last=$end; # if last not defined, go 'till end (or upto len elements)
159 my $label=$first; my $i=1;
160 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
161 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
163 # proceed for len elements or until last, whichever comes first
164 # if $len undef goes till end
165 while (($label) && ($label != $afterlast) && ($i <= ($len || $i + 1))) {
166 @array=@
{$chain->{$label}};
167 $string .= $array[0];
168 $label = $array[$flow];
171 return ($string); # if chain is interrupted $string won't be complete
174 =head2 _updown_labels
177 Usage : @labels = Bio::LiveSeq::Chain::_updown_labels("down",$chain,4,16)
178 Function: returns all the labels in a chain or those between two
179 specified ones (termed "first" and "last")
180 Returns : a reference to an array containing the labels
181 Args : "up"||"down" as first argument to specify the reading direction
182 reference (to the chain)
183 [first] [last] (integer for the starting and eneding labels)
188 # arguments: CHAIN_REF [FIRSTLABEL] [LASTLABEL]
189 # returns: reference to array containing the labels
191 my ($chain,$first,$last)=@_;
192 _updown_labels
("down",$chain,$first,$last);
195 my ($chain,$first,$last)=@_;
196 _updown_labels
("up",$chain,$first,$last);
198 # arguments: "up"||"down" CHAIN_REF [FIRSTLABEL] [LASTLABEL]
199 # returns: reference to array containing the labels
201 my ($direction,$chain,$first,$last)=@_;
202 unless($chain) { cluck
"no chain input"; return (0); }
203 my $begin=$chain->{'begin'}; # the label of the BEGIN element
204 my $end=$chain->{'end'}; # the label of the END element
206 if ($direction eq "up") { $flow=2;
207 unless ($first) { $first=$end; }
208 unless ($last) { $last=$begin; }
210 unless ($last) { $last=$end; }
211 unless ($first) { $first=$begin; }
213 unless($chain->{$first}) { warn "not existing label $first"; return (0); }
214 unless($chain->{$last}) { warn "not existing label $last"; return (0); }
216 my $label=$first; my @labels;
217 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
218 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
220 while (($label)&&($label != $afterlast)) {
221 push(@labels,$label);
222 $label=$chain->{$label}[$flow];
224 return (\
@labels); # if chain is interrupted @labels won't be complete
231 Usage : $start = Bio::LiveSeq::Chain::start()
232 Returns : the label marking the start of the chain
240 unless($chain) { cluck
"no chain input"; return (-1); }
241 return ($chain->{'begin'});
247 Usage : $end = Bio::LiveSeq::Chain::end()
248 Returns : the label marking the end of the chain
256 unless($chain) { cluck
"no chain input"; return (-1); }
257 return ($chain->{'end'});
263 Usage : $check = Bio::LiveSeq::Chain::label_exists($chain,$label)
264 Function: It checks if a label is defined, i.e. if an element is there or
266 Returns : 1 if the label exists, 0 if it is not there, -1 error
268 Args : reference to the chain, integer
273 my ($chain,$label)=@_;
274 unless($chain) { cluck
"no chain input"; return (-1); }
275 if ($label && $chain->{$label}) { return (1); } else { return (0) };
279 =head2 down_get_pos_of_label
281 Title : down_get_pos_of_label
282 Usage : $position = Bio::LiveSeq::Chain::down_get_pos_of_label($chain,$label,$first)
283 Function: returns the position of $label counting from $first, i.e. taking
284 $first as 1 of coordinate system. If $first is not specified it will
285 count from the start of the chain.
288 Args : reference to the chain, integer (the label of interest)
289 optional: integer (a different label that will be taken as the
290 first one, i.e. the one to count from)
291 Note: It counts "downstream". To proceed backward use up_get_pos_of_label
295 sub down_get_pos_of_label
{
296 #down_chain2string($_[0],$_[2],undef,$_[1],"counting");
297 my ($chain,$label,$first)=@_;
298 _updown_count
("down",$chain,$first,$label);
300 sub up_get_pos_of_label
{
301 #up_chain2string($_[0],$_[2],undef,$_[1],"counting");
302 my ($chain,$label,$first)=@_;
303 _updown_count
("up",$chain,$first,$label);
306 =head2 down_subchain_length
308 Title : down_subchain_length
309 Usage : $length = Bio::LiveSeq::Chain::down_subchain_length($chain,$first,$last)
310 Function: returns the length of the chain between the labels "first" and "last", included
313 Args : reference to the chain, integer, integer
314 Note: It counts "downstream". To proceed backward use up_subchain_length
318 # arguments: chain_ref [first] [last]
319 # returns the length of the chain between first and last (included)
320 sub down_subchain_length
{
321 #down_chain2string($_[0],$_[1],undef,$_[2],"counting");
322 my ($chain,$first,$last)=@_;
323 _updown_count
("down",$chain,$first,$last);
325 sub up_subchain_length
{
326 #up_chain2string($_[0],$_[1],undef,$_[2],"counting");
327 my ($chain,$first,$last)=@_;
328 _updown_count
("up",$chain,$first,$last);
331 # arguments: DIRECTION CHAIN_REF FIRSTLABEL LASTLABEL
334 my ($direction,$chain,$first,$last)=@_;
335 unless($chain) { cluck
"no chain input"; return (0); }
336 my $begin=$chain->{'begin'}; # the label of the BEGIN element
337 my $end=$chain->{'end'}; # the label of the END element
339 if ($direction eq "up") { $flow=2;
340 unless ($first) { $first=$end; }
341 unless ($last) { $last=$begin; }
343 unless ($last) { $last=$end; }
344 unless ($first) { $first=$begin; }
346 unless($chain->{$first}) { warn "not existing label $first"; return (0); }
347 unless($chain->{$last}) { warn "not existing label $last"; return (0); }
349 my $label=$first; my $count;
350 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
351 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
353 while (($label)&&($label != $afterlast)) {
355 $label=$chain->{$label}[$flow];
357 return ($count); # if chain is interrupted, $i will be up to the breaking point
363 Usage : $errorcode=Bio::LiveSeq::Chain::invert_chain($chain)
364 Function: completely inverts the order of the chain elements; begin is swapped with end and all links updated (PREV&NEXT fields swapped)
365 Returns : 1 if all OK, 0 if errors
367 Args : reference to the chain
373 unless($chain) { cluck
"no chain input"; return (0); }
374 my $begin=$chain->{'begin'}; # the name of the first element
375 my $end=$chain->{'end'}; # the name of the last element
377 $label=$begin; # starts from the beginning
378 while ($label) { # proceed with linked elements, swapping PREV and NEXT
379 @array=@
{$chain->{$label}};
380 ($chain->{$label}[1],$chain->{$label}[2])=($array[2],$array[1]); # swap
381 $label = $array[1]; # go to the next one
383 # now swap begin and end fields
384 ($chain->{'begin'},$chain->{'end'})=($end,$begin);
385 return (1); # that's it
388 # warning that method has changed name
389 #sub mutate_element {
390 #croak "Warning: old method name. Please update code to 'set_value_at_label'\n";
391 # &set_value_at_label;
394 =head2 down_get_value_at_pos
396 Title : down_get_value_at_pos
397 Usage : $value = Bio::LiveSeq::Chain::down_get_value_at_pos($chain,$position,$first)
398 Function: used to access the value of the chain at a particular position instead than directly with a label pointer. It will count the position from the start of the chain or from the label $first, if $first is specified
399 Returns : whatever is stored in the element of the chain
401 Args : reference to the chain, integer, [integer]
402 Note: It works "downstream". To proceed backward use up_get_value_at_pos
406 #sub get_value_at_pos {
407 #croak "Please use instead: down_get_value_at_pos";
408 ##&down_get_value_at_pos;
410 sub down_get_value_at_pos
{
411 my ($chain,$position,$first)=@_;
412 my $label=down_get_label_at_pos
($chain,$position,$first);
413 # check place of change
414 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
415 warn "not existing element $label"; return (0); }
416 return _get_value
($chain,$label);
418 sub up_get_value_at_pos
{
419 my ($chain,$position,$first)=@_;
420 my $label=up_get_label_at_pos
($chain,$position,$first);
421 # check place of change
422 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
423 warn "not existing element $label"; return (0); }
424 return _get_value
($chain,$label);
427 =head2 down_set_value_at_pos
429 Title : down_set_value_at_pos
430 Usage : $errorcode = Bio::LiveSeq::Chain::down_set_value_at_pos($chain,$newvalue,$position,$first)
431 Function: used to store a new value inside an element of the chain at a particular position instead than directly with a label pointer. It will count the position from the start of the chain or from the label $first, if $first is specified
434 Args : reference to the chain, newvalue, integer, [integer]
435 (newvalue can be: integer, string, object reference, hash ref)
436 Note: It works "downstream". To proceed backward use up_set_value_at_pos
437 Note2: If the $newvalue is undef, it will delete the contents of the
438 element but it won't remove the element from the chain.
442 #sub set_value_at_pos {
443 #croak "Please use instead: down_set_value_at_pos";
444 ##&down_set_value_at_pos;
446 sub down_set_value_at_pos
{
447 my ($chain,$value,$position,$first)=@_;
448 my $label=down_get_label_at_pos
($chain,$position,$first);
449 # check place of change
450 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
451 warn "not existing element $label"; return (0); }
452 _set_value
($chain,$label,$value);
455 sub up_set_value_at_pos
{
456 my ($chain,$value,$position,$first)=@_;
457 my $label=up_get_label_at_pos
($chain,$position,$first);
458 # check place of change
459 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
460 warn "not existing element $label"; return (0); }
461 _set_value
($chain,$label,$value);
466 =head2 down_set_value_at_label
468 Title : down_set_value_at_label
469 Usage : $errorcode = Bio::LiveSeq::Chain::down_set_value_at_label($chain,$newvalue,$label)
470 Function: used to store a new value inside an element of the chain defined by its label.
473 Args : reference to the chain, newvalue, integer
474 (newvalue can be: integer, string, object reference, hash ref)
475 Note: It works "downstream". To proceed backward use up_set_value_at_label
476 Note2: If the $newvalue is undef, it will delete the contents of the
477 element but it won't remove the element from the chain.
481 sub set_value_at_label
{
482 my ($chain,$value,$label)=@_;
483 unless($chain) { cluck
"no chain input"; return (0); }
485 # check place of change
486 unless($chain->{$label}) { # complain if label doesn't exist
487 warn "not existing element $label"; return (0); }
488 _set_value
($chain,$label,$value);
492 =head2 down_get_value_at_label
494 Title : down_get_value_at_label
495 Usage : $value = Bio::LiveSeq::Chain::down_get_value_at_label($chain,$label)
496 Function: used to access the value of the chain from one element defined by its label.
497 Returns : whatever is stored in the element of the chain
499 Args : reference to the chain, integer
500 Note: It works "downstream". To proceed backward use up_get_value_at_label
504 sub get_value_at_label
{
506 unless($chain) { cluck
"no chain input"; return (0); }
507 my $label = $_[1]; # the name of the element
509 # check place of change
510 unless($chain->{$label}) { # complain if label doesn't exist
511 warn "not existing label $label"; return (0); }
512 return _get_value
($chain,$label);
515 # arguments: CHAIN_REF LABEL VALUE
517 my ($chain,$label,$value)=@_;
518 $chain->{$label}[0]=$value;
520 # arguments: CHAIN_REF LABEL
522 my ($chain,$label)=@_;
523 return $chain->{$label}[0];
526 =head2 down_get_label_at_pos
528 Title : down_get_label_at_pos
529 Usage : $label = Bio::LiveSeq::Chain::down_get_label_at_pos($chain,$position,$first)
530 Function: used to retrieve the label of an an element of the chain at a particular position. It will count the position from the start of the chain or from the label $first, if $first is specified
533 Args : reference to the chain, integer, [integer]
534 Note: It works "downstream". To proceed backward use up_get_label_at_pos
538 # arguments: CHAIN_REF POSITION [FIRST]
539 # returns: LABEL of element found counting from FIRST
540 sub down_get_label_at_pos
{
541 _updown_get_label_at_pos
("down",@_);
543 sub up_get_label_at_pos
{
544 _updown_get_label_at_pos
("up",@_);
547 # arguments: [DIRECTION] CHAIN_REF POSITION [FIRST]
548 # Default DIRECTION="down"
549 # if FIRST is undefined, FIRST=START (if DIRECTION=down) or FIRST=END (up)
551 sub _updown_get_label_at_pos
{
552 my ($direction,$chain,$position,$first)=@_;
553 unless($chain) { cluck
"no chain input"; return (0); }
554 my $begin=$chain->{'begin'}; # the label of the BEGIN element
555 my $end=$chain->{'end'}; # the label of the END element
557 if ($direction eq "up") { $flow=2; unless ($first) { $first=$end; }
558 } else { $flow=1; unless ($first) { $first=$begin; } }
559 unless($chain->{$first}) { warn "not existing label $first"; return (0); }
563 while ($i < $position) {
564 $label=$chain->{$label}[$flow];
566 unless ($label) { return (0); } # chain ended before position reached
571 # for english_concerned, latin_unconcerned people
572 sub preinsert_string
{ &praeinsert_string
}
573 sub preinsert_array
{ &praeinsert_array
}
575 # praeinsert_string CHAIN_REF STRING [POSITION]
576 # the chars of STRING are passed to praeinsert_array
577 # the chars are inserted in CHAIN, before POSITION
578 # if POSITION is undef, default is to prepend the string to the beginning
579 # i.e. POSITION is START of CHAIN
580 sub praeinsert_string
{
581 my @string=split(//,$_[1]);
582 praeinsert_array
($_[0],\
@string,$_[2]);
585 # postinsert_string CHAIN_REF STRING [POSITION]
586 # the chars of STRING are passed to postinsert_array
587 # the chars are inserted in CHAIN, after POSITION
588 # if POSITION is undef, default is to append the string to the end
589 # i.e. POSITION is END of CHAIN
590 sub postinsert_string
{
591 my @string=split(//,$_[1]);
592 postinsert_array
($_[0],\
@string,$_[2]);
595 # praeinsert_array CHAIN_REF ARRAY_REF [POSITION]
596 # the elements of ARRAY are inserted in CHAIN, before POSITION
597 # if POSITION is undef, default is to prepend the elements to the beginning
598 # i.e. POSITION is START of CHAIN
599 sub praeinsert_array
{
600 _praepostinsert_array
($_[0],"prae",$_[1],$_[2]);
603 # postinsert_array CHAIN_REF ARRAY_REF [POSITION]
604 # the elements of ARRAY are inserted in CHAIN, after POSITION
605 # if POSITION is undef, default is to append the elements to the end
606 # i.e. POSITION is END of CHAIN
607 sub postinsert_array
{
608 _praepostinsert_array
($_[0],"post",$_[1],$_[2]);
612 =head2 _praepostinsert_array
614 Title : _praepostinsert_array
615 Usage : ($insbegin,$insend) = Bio::LiveSeq::Chain::_praepostinsert_array($chainref,"post",$arrayref,$position)
616 Function: the elements of the array specified by $arrayref are inserted (creating a new subchain) in the chain specified by $chainref, before or after (depending on the "prae"||"post" keyword passed as second argument) the specified position.
617 Returns : two labels: the first and the last of the inserted subchain
618 Defaults: if no position is specified, the new chain will be inserted after
619 (post) the first element of the chain
621 Args : chainref, "prae"||"post", arrayref, integer (position)
625 # returns: 0 if errors, otherwise returns references of begin and end of
627 sub _praepostinsert_array
{
629 unless($chain) { cluck
"no chain input"; return (0); }
630 my $praepost=$_[1] || "post"; # defaults to post
633 my $begin=$chain->{'begin'}; # the name of the first element of the chain
634 my $end=$chain->{'end'}; # the name of the the last element of the chain
635 # check if prae or post insertion and prepare accordingly
636 if ($praepost eq "prae") {
638 unless (($position eq 0)||($position)) { $position=$begin; } # if undef, use $begin
641 unless (($position eq 0)||($position)) { $position=$end; } # if undef, use $end
643 # check place of insertion
644 unless($chain->{$position}) { # complain if position doesn't exist
645 warn ("Warning _praepostinsert_array: not existing element $position");
649 # check if there are elements to insert
650 my $elements=$_[2]; # reference to the array containing the new elements
651 my $elements_count=scalar(@
{$elements});
652 unless ($elements_count) {
653 warn ("Warning _praepostinsert_array: no elements input"); return (0); }
655 # create new chainelements with offset=firstfree(chain)
656 my ($insertbegin,$insertend)=_create_chain_elements
($chain,$elements);
659 #print "Executing ${praepost}insertion of $elements_count elements ('@{$elements}') at position: $position\n";
661 # attach the new chain to the old chain
662 # 4 cases: prae@begin, prae@middle, post@middle, post@end
663 # NOTE: in case of double joinings always join wisely so not to
664 # delete the PREV/NEXT attribute before it is needed
667 if ($position==$begin) { # 1st case: prae@begin
668 $noerror=_join_chain_elements
($chain,$insertend,$begin);
669 $chain->{'begin'}=$insertbegin;
670 } else { # 2nd case: prae@middle
671 $noerror=_join_chain_elements
($chain,up_element
($chain,$position),$insertbegin);
672 $noerror=_join_chain_elements
($chain,$insertend,$position);
675 if ($position==$end) { # 4th case: post@end
676 $noerror=_join_chain_elements
($chain,$end,$insertbegin);
677 $chain->{'end'}=$insertend;
678 } else { # 3rd case: post@middle # note the order of joins (important)
679 $noerror=_join_chain_elements
($chain,$insertend,down_element
($chain,$position));
680 $noerror=_join_chain_elements
($chain,$position,$insertbegin);
682 } else { # this should never happen
683 die "_praepostinsert_array: Something went very wrong";
686 # check for errors and return begin,end of insertion
688 return ($insertbegin,$insertend);
689 } else { # something went wrong with the joinings
690 warn "Warning _praepostinsert_array: Joining of insertion failed";
695 # create new chain elements with offset=firstfree
696 # arguments: CHAIN_REF ARRAY_REF
697 # returns: pointers to BEGIN and END of new chained elements created
698 # returns 0 if error(s) encountered
699 sub _create_chain_elements
{
702 warn ("Warning _create_chain_elements: no chain input"); return (0); }
704 my $array_count=scalar(@
{$arrayref});
705 unless ($array_count) {
706 warn ("Warning _create_chain_elements: no elements input"); return (0); }
707 my $begin=$chain->{'firstfree'};
710 foreach $element (@
{$arrayref}) {
712 $chain->{$i}=[$element,$i+1,$i-1];
715 $chain->{'firstfree'}=$i+1; # what a new added element should be called
716 $chain->{'size'} += $end-$begin+1; # increase size of chain
717 # leave sticky edges (to be joined by whoever called this subroutine)
718 $chain->{$begin}[2]=undef;
719 $chain->{$end}[1]=undef;
720 return ($begin,$end); # return pointers to first and last of the newelements
723 # argument: CHAIN_REF ELEMENT
724 # returns: name of DOWN/NEXT element (the downstream one)
725 # returns -1 if error encountered (e.g. chain or elements undefined)
726 # returns 0 if there's no DOWN element
728 _updown_element
("down",@_);
730 # argument: CHAIN_REF ELEMENT
731 # returns: name of UP/PREV element (the upstream one)
732 # returns -1 if error encountered (e.g. chain or elements undefined)
733 # returns 0 if there's no UP element
735 _updown_element
("up",@_);
738 # used by both is_up_element and down_element
739 sub _updown_element
{
740 my $direction=$_[0] || "down"; # defaults to downstream
742 if ($direction eq "up") {
743 $flow=2; # used to determine the direction of chain navigation
745 $flow=1; # used to determine the direction of chain navigation
749 warn ("Warning ${direction}_element: no chain input"); return (-1); }
750 my $me = $_[2]; # the name of the element
751 my $it = $chain->{$me}[$flow]; # the prev||next one, upstream||downstream
753 return ($it); # return the name of prev||next element
755 return (0); # there is no prev||next element ($it is undef)
759 # used by both is_downstream and is_upstream
760 sub _is_updownstream
{
761 my $direction=$_[0] || "down"; # defaults to downstream
763 if ($direction eq "up") {
764 $flow=2; # used to determine the direction of chain navigation
766 $flow=1; # used to determine the direction of chain navigation
770 warn ("Warning is_${direction}stream: no chain input"); return (-1); }
771 my $first=$_[2]; # the name of the first element
772 my $second=$_[3]; # the name of the first element
773 if ($first==$second) {
774 warn ("Warning is_${direction}stream: first==second!!"); return (0); }
775 unless($chain->{$first}) {
776 warn ("Warning is_${direction}stream: first element not defined"); return (-1); }
777 unless($chain->{$second}) {
778 warn ("Warning is_${direction}stream: second element not defined"); return (-1); }
782 while (($label)&&(!($found))) { # searches till the end or till found
783 if ($label==$second) {
786 @array=@
{$chain->{$label}};
787 $label = $array[$flow]; # go to the prev||next one, upstream||downstream
794 Title : is_downstream
795 Usage : Bio::LiveSeq::Chain::is_downstream($chainref,$firstlabel,$secondlabel)
796 Function: checks if SECONDlabel follows FIRSTlabel
797 It runs downstream the elements of the chain from FIRST searching
799 Returns : 1 if SECOND is found /after/ FIRST; 0 otherwise (i.e. if it
800 reaches the end of the chain without having found it)
802 Args : two labels (integer)
807 _is_updownstream
("down",@_);
813 Usage : Bio::LiveSeq::Chain::is_upstream($chainref,$firstlabel,$secondlabel)
814 Function: checks if SECONDlabel follows FIRSTlabel
815 It runs upstream the elements of the chain from FIRST searching
817 Returns : 1 if SECOND is found /after/ FIRST; 0 otherwise (i.e. if it
818 reaches the end of the chain without having found it)
820 Args : two labels (integer)
825 _is_updownstream
("up",@_);
831 Usage : @errorcodes = Bio::LiveSeq::Chain::check_chain()
832 Function: a wraparound to a series of check for consistency of the chain
833 It will check for boundaries, size, backlinking and forwardlinking
834 Returns : array of 4 warn codes, each can be 1 (all ok) or 0 (something wrong)
837 Note : this is slow and through. It is not really needed. It is mostly
838 a code-developer tool.
845 warn ("Warning check_chain: no chain input"); return (-1); }
846 my ($warnbound,$warnsize,$warnbacklink,$warnforlink);
847 $warnbound=&_boundcheck
; # passes on the arguments of the subroutine
848 $warnsize=&_sizecheck
;
849 $warnbacklink=&_downlinkcheck
;
850 $warnforlink=&_uplinkcheck
;
851 return ($warnbound,$warnsize,$warnbacklink,$warnforlink);
854 # consistency check for forwardlinks walking upstream
855 # argument: a chain reference
856 # returns: 1 all OK 0 problems
858 _updownlinkcheck
("up",@_);
861 # consistency check for backlinks walking downstream
862 # argument: a chain reference
863 # returns: 1 all OK 0 problems
865 _updownlinkcheck
("down",@_);
868 # consistency check for links, common to _uplinkcheck and _downlinkcheck
869 # argument: "up"||"down", check_ref
870 # returns: 1 all OK 0 problems
871 sub _updownlinkcheck
{
872 my $direction=$_[0] || "down"; # defaults to downstream
876 warn ("Warning _${direction}linkcheck: no chain input"); return (0); }
877 my $begin=$chain->{'begin'}; # the name of the first element
878 my $end=$chain->{'end'}; # the name of the last element
879 my ($label,@array,$me,$it,$itpoints);
880 if ($direction eq "up") {
881 $flow=2; # used to determine the direction of chain navigation
883 $label=$end; # start from end
885 $flow=1; # used to determine the direction of chain navigation
887 $label=$begin; # start from beginning
891 while ($label) { # proceed with linked elements, checking neighbours
893 @array=@
{$chain->{$label}};
894 $label = $array[$flow]; # go to the next one
896 if ($it) { # no sense in checking if next one not defined (END element)
897 @array=@
{$chain->{$label}};
898 $itpoints=$array[$wolf];
899 unless ($me==$itpoints) {
900 warn "Warning: ${direction}LinkCheck: LINK wrong in $it, that doesn't point back to me ($me). It points to $itpoints\n";
908 # consistency check for size of chain
909 # argument: a chain reference
910 # returns: 1 all OK 0 wrong size
914 warn ("Warning _sizecheck: no chain input"); return (0); }
915 my $begin=$chain->{'begin'}; # the name of the first element
918 my $size=$chain->{'size'};
921 while ($label) { # proceed with linked elements, counting
922 @array=@
{$chain->{$label}};
923 $label = $array[1]; # go to the next one
926 if ($size != $count) {
927 warn "Size check reports error: assumed size: $size, real size: $count ";
934 # consistency check for begin and end (boundaries)
935 # argument: a chain reference
936 # returns: 1 all OK 0 problems
940 warn ("Warning _boundcheck: no chain input"); return (0); }
941 my $begin=$chain->{'begin'}; # the name of the first element
942 my $end=$chain->{'end'}; # the name of the (supposedly) last element
945 # check SYNC of beginning
946 if (($begin)&&($chain->{$begin})) { # if the BEGIN points to existing element
947 if ($chain->{$begin}[2]) { # if BEGIN element has PREV not undef
948 warn "Warning: BEGIN element has PREV field defined \n";
949 warn "\tWDEBUG begin: $begin\t";
950 warn "\tWDEBUG begin's PREV: $chain->{$begin}[2] \n";
954 warn "Warning: BEGIN key of chain does not point to existing element!\n";
955 warn "\tWDEBUG begin: $begin\n";
959 if (($end)&&($chain->{$end})) { # if the END points to an existing element
960 if ($chain->{$end}[1]) { # if END element has NEXT not undef
961 warn "Warning: END element has NEXT field defined \n";
962 warn "\tWDEBUG end: $end\t";
963 warn "\tWDEBUG end's NEXT: $chain->{$end}[1] \n";
967 warn "Warning: END key of chain does not point to existing element!\n";
968 warn "\tWDEBUG end: $end\n";
974 # arguments: chain_ref
975 # returns: the size of the chain (the number of elements)
976 # return code -1: unexistant chain, errors...
980 warn ("Warning chain_length: no chain input"); return (-1); }
981 my $size=$chain->{'size'};
989 # arguments: chain ref, first element name, second element name
990 # returns: 1 or 0 (1 ok, 0 errors)
991 sub _join_chain_elements
{
994 warn ("Warning _join_chain_elements: no chain input"); return (0); }
997 unless(($leftelem)&&($rightelem)) {
998 warn ("Warning _join_chain_elements: element arguments??"); return (0); }
999 if (($chain->{$leftelem})&&($chain->{$rightelem})) { # if the elements exist
1000 $chain->{$leftelem}[1]=$rightelem;
1001 $chain->{$rightelem}[2]=$leftelem;
1004 warn ("Warning _join_chain_elements: elements not defined");
1011 Title : splice_chain
1012 Usage : @errorcodes = Bio::LiveSeq::Chain::splice_chain($chainref,$first,$length,$last)
1013 Function: removes the elements designated by FIRST and LENGTH from a chain.
1014 The chain shrinks accordingly. If LENGTH is omitted, removes
1015 everything from FIRST onward.
1016 If END is specified, LENGTH is ignored and instead the removal
1017 occurs from FIRST to LAST.
1018 Returns : the elements removed as a string
1020 Args : chainref, integer, integer, integer
1027 warn ("Warning splice_chain: no chain input"); return (-1); }
1028 my $begin=$chain->{'begin'}; # the name of the first element
1029 my $end=$chain->{'end'}; # the name of the (supposedly) last element
1031 unless (($first eq 0)||($first)) { $first=$begin; } # if undef, use $begin
1034 my (@array, $string);
1035 my ($beforecut,$aftercut);
1037 unless($chain->{$first}) {
1038 warn ("Warning splice_chain: first element not defined"); return (-1); }
1039 if ($last) { # if last is defined, it gets priority and len is not used
1040 unless($chain->{$last}) {
1041 warn ("Warning splice_chain: last element not defined"); return (-1); }
1043 warn ("Warning splice_chain: argument LAST:$last overriding LEN:$len!");
1047 $last=$end; # if last not defined, go 'till end (or to len, whichever 1st)
1050 $beforecut=$chain->{$first}[2]; # what's the element before 1st deleted?
1051 # if it is undef then it means we are splicing since the beginning
1055 my $afterlast=$chain->{$last}[1]; # if $last=$end $afterlast should be undef
1056 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
1058 # proceed for len elements or until the end, whichever comes first
1059 # if len undef goes till last
1060 while (($label)&&($label != $afterlast) && ($i <= ($len || $i + 1))) {
1061 @array=@
{$chain->{$label}};
1062 $string .= $array[0];
1063 $aftercut = $array[1]; # what's the element next last deleted?
1064 # also used as savevar to change label posdeletion
1065 delete $chain->{$label}; # this can be deleted now
1066 $label=$aftercut; # label is updated using the savevar
1070 # Now fix the chain (sticky edges, fields)
1071 # 4 cases: cut in the middle, cut from beginning, cut till end, cut all
1072 #print "\n\tstickyDEBUG beforecut: $beforecut "; # DEBUG
1073 #print "\taftercut: $aftercut \n"; # DEBUG
1075 if ($aftercut) { # 1st case, middle cut
1076 _join_chain_elements
($chain,$beforecut,$aftercut);
1077 } else { # 3rd case, end cut
1078 $chain->{'end'}=$beforecut; # update the END field
1079 $chain->{$beforecut}[1]=undef; # since we cut till the end
1082 if ($aftercut) { # 2nd case, begin cut
1083 $chain->{'begin'}=$aftercut; # update the BEGIN field
1084 $chain->{$aftercut}[2]=undef; # since we cut from beginning
1085 } else { # 4th case, all has been cut
1086 $chain->{'begin'}=undef;
1087 $chain->{'end'}=undef;
1090 $chain->{'size'}=($chain->{'size'}) - $i + 1; # update the SIZE field
1096 # arguments: CHAIN_REF POSITION [FIRST]
1097 # returns: element counting POSITION from FIRST or from START if FIRST undef
1098 # i.e. returns the element at POSITION counting from FIRST
1099 #sub element_at_pos {
1100 #croak "Warning: old method name. Please update code to 'down_get_label_at_position'\n";
1101 ##&down_element_at_pos;
1103 #sub up_element_at_pos {
1105 ##my @array=up_chain2string($_[0],$_[2],$_[1],undef,"elements");
1106 ##return $array[-1];
1107 #croak "old method name. Update code to: up_get_label_at_position";
1108 ##&up_get_label_at_pos;
1110 #sub down_element_at_pos {
1112 ##my @array=down_chain2string($_[0],$_[2],$_[1],undef,"elements");
1113 ##return $array[-1];
1114 #croak "old method name. Update code to: down_get_label_at_position";
1115 ##&down_get_label_at_pos;
1118 # arguments: CHAIN_REF ELEMENT [FIRST]
1119 # returns: the position of ELEMENT counting from FIRST or from START
1120 #i if FIRST is undef
1121 # i.e. returns the Number of elements between FIRST and ELEMENT
1122 # i.e. returns the position of element taking FIRST as 1 of coordinate system
1123 #sub pos_of_element {
1124 #croak ("Warning: old and ambiguous method name. Please update code to 'down_get_pos_of_label'\n");
1125 ##&down_pos_of_element;
1127 #sub up_pos_of_element {
1128 #croak ("Warning: old method name. Please update code to 'up_get_pos_of_label'\n");
1129 ##up_chain2string($_[0],$_[2],undef,$_[1],"counting");
1131 #sub down_pos_of_element {
1132 #croak ("Warning: old method name. Please update code to 'down_get_pos_of_label'\n");
1133 ##down_chain2string($_[0],$_[2],undef,$_[1],"counting");
1136 # wraparounds to calculate length of subchain from first to last
1137 # arguments: chain_ref [first] [last]
1138 #sub subchain_length {
1139 #croak "Warning: old method name. Please update code to 'down_subchain_length'\n";
1140 ##&down_subchain_length;
1143 # wraparounds to have elements output
1144 # same arguments as chain2string
1145 # returns label|name of every element
1147 #croak ("Warning: method no more supported. Please update code to 'down_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
1151 #croak ("Warning: method no more supported. Please update code to 'up_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
1152 ##up_chain2string($_[0],$_[1],$_[2],$_[3],"elements");
1154 #sub down_elements {
1155 #croak ("Warning: method no more supported. Please update code to 'down_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
1156 ##down_chain2string($_[0],$_[1],$_[2],$_[3],"elements");
1159 # wraparounds to have verbose output
1160 # same arguments as chain2string
1161 # returns the chain in a very verbose way
1162 sub chain2string_verbose
{
1163 carp
"Warning: method no more supported.\n";
1164 &old_down_chain2string_verbose
;
1166 sub up_chain2string_verbose
{
1167 carp
"Warning: method no more supported.\n";
1168 old_up_chain2string
($_[0],$_[1],$_[2],$_[3],"verbose");
1170 sub down_chain2string_verbose
{
1171 carp
"Warning: method no more supported.\n";
1172 old_down_chain2string
($_[0],$_[1],$_[2],$_[3],"verbose");
1176 #croak ("Warning: old method name. Please update code to 'down_chain2string'\n");
1177 ##&down_chain2string;
1179 sub old_up_chain2string
{
1180 old_updown_chain2string
("up",@_);
1182 sub old_down_chain2string
{
1183 old_updown_chain2string
("down",@_);
1186 # common to up_chain2string and down_chain2string
1187 # arguments: "up"||"down" chain_ref [first] [len] [last] [option]
1188 # [option] can be any of "verbose", "counting", "elements"
1190 # defaults: start = first element; if len undef, goes to last
1191 # if last undef, goes to end
1192 # if last def it overrides len (that gets undef)
1194 # example usage: down_chain2string($chain) -> all the chain from begin to end
1195 # example usage: down_chain2string($chain,6) -> from 6 to the end
1196 # example usage: down_chain2string($chain,6,4) -> from 6, going on 4 elements
1197 # example usage: down_chain2string($chain,6,"",10) -> from 6 to 10
1198 # example usage: up_chain2string($chain,10,"",6) -> from 10 to 6 upstream
1199 sub old_updown_chain2string
{
1200 my ($direction,$chain,$first,$len,$last,$option)=@_;
1202 warn ("Warning chain2string: no chain input"); return (-1); }
1203 my $begin=$chain->{'begin'}; # the name of the BEGIN element
1204 my $end=$chain->{'end'}; # the name of the END element
1206 if ($direction eq "up") {
1207 $flow=2; # used to determine the direction of chain navigation
1208 unless ($first) { $first=$end; } # if undef or 0, use $end
1209 } else { # defaults to "down"
1210 $flow=1; # used to determine the direction of chain navigation
1211 unless ($first) { $first=$begin; } # if undef or 0, use $begin
1214 unless($chain->{$first}) {
1215 warn ("Warning chain2string: first element not defined"); return (-1); }
1216 if ($last) { # if last is defined, it gets priority and len is not used
1217 unless($chain->{$last}) {
1218 warn ("Warning chain2string: last element not defined"); return (-1); }
1220 warn ("Warning chain2string: argument LAST:$last overriding LEN:$len!");
1224 if ($direction eq "up") {
1225 $last=$begin; # if last not defined, go 'till begin (or upto len elements)
1227 $last=$end; # if last not defined, go 'till end (or upto len elements)
1230 my (@array, $string, $count);
1231 # call for verbosity (by way of chain2string_verbose);
1232 my $verbose=0; my $elements=0; my @elements; my $counting=0;
1233 if ($option) { # keep strict happy
1234 if ($option eq "verbose") { $verbose=1; }
1235 if ($option eq "elements") { $elements=1; }
1236 if ($option eq "counting") { $counting=1; }
1240 print "BEGIN=$begin"; print " END=$end"; print " SIZE=$chain->{'size'}";
1241 print " FIRSTFREE=$chain->{'firstfree'} \n";
1246 my $afterlast=$chain->{$last}[$flow]; # if $last=$end $afterlast should be undef
1247 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
1249 # proceed for len elements or until last, whichever comes first
1250 # if $len undef goes till end
1251 while (($label)&&($label != $afterlast) && ($i <= ($len || $i + 1))) {
1252 @array=@
{$chain->{$label}};
1254 $string .= "$array[2]_${label}_$array[1]=$array[0] ";
1256 } elsif ($elements) {
1257 push (@elements,$label); # returning element names/references/identifiers
1258 } elsif ($counting) {
1261 $string .= $array[0]; # returning element content
1263 $label = $array[$flow]; # go to next||prev i.e. downstream||upstream
1266 #DEBUG#print "len: $len, first: $first, last: $last, afterlast=$afterlast \n";
1267 if ($verbose) { print "TOTALprinted: $count\n"; }
1270 } elsif ($elements) {
1278 # --------> deleted, no more supported <--------
1279 # creation of a single linked list/chain from a string
1280 # basically could be recreated by taking the *2chain methods and
1281 # omitting to set the 3rd field (label 2) containing the back links
1284 # creation of a double linked list/chain from a string
1285 # returns reference to a hash containing the chain
1286 # arguments: STRING [OFFSET]
1287 # defaults: OFFSET defaults to 1 if undef
1288 # the chain will contain as elements the single characters in the string
1290 my @string=split(//,$_[0]);
1291 array2chain
(\
@string,$_[1]);
1297 Usage : $chainref = Bio::LiveSeq::Chain::array2chain($arrayref,$offset)
1298 Function: creation of a double linked chain from an array
1299 Returns : reference to a hash containing the chain
1300 Defaults: OFFSET defaults to 1 if undef
1302 Args : a reference to an array containing the elements to be chainlinked
1303 an optional integer > 0 (this will be the starting count for
1304 the chain labels instead than having them begin from "1")
1310 my $array_count=scalar(@
{$arrayref});
1311 unless ($array_count) {
1312 warn ("Warning array2chain: no elements input"); return (0); }
1314 if (defined $begin) {
1316 warn "Warning array2chain: Zero or Negative offsets not allowed"; return (0); }
1320 my ($element,%hash);
1321 $hash{'begin'}=$begin;
1323 foreach $element (@
{$arrayref}) {
1325 # hash with keys begin..end pointing to the arrays
1326 $hash{$i}=[$element,$i+1,$i-1];
1330 $hash{firstfree
}=$i+1; # what a new added element should be called
1331 $hash{size
}=$end-$begin+1; # how many elements in the chain
1333 # eliminate pointers to unexisting elements
1334 $hash{$begin}[2]=undef;
1335 $hash{$end}[1]=undef;