nexml.t: Added a missing XML::Twig requirement.
[bioperl-live.git] / Bio / Tree / Statistics.pm
blobeb77a69e1a4ca1b0bddcf6b1a9296bc0c751f35c
2 # BioPerl module for Bio::Tree::Statistics
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tree::Statistics - Calculate certain statistics for a Tree
18 =head1 SYNOPSIS
20 use Bio::Tree::Statistics;
22 =head1 DESCRIPTION
24 This should be where Tree statistics are calculated. It was
25 previously where statistics from a Coalescent simulation.
27 It now contains several methods for calculating L<Tree-Trait
28 statistics>.
30 =head1 FEEDBACK
32 =head2 Mailing Lists
34 User feedback is an integral part of the evolution of this and other
35 Bioperl modules. Send your comments and suggestions preferably to
36 the Bioperl mailing list. Your participation is much appreciated.
38 bioperl-l@bioperl.org - General discussion
39 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
41 =head2 Support
43 Please direct usage questions or support issues to the mailing list:
45 I<bioperl-l@bioperl.org>
47 rather than to the module maintainer directly. Many experienced and
48 reponsive experts will be able look at the problem and quickly
49 address it. Please include a thorough description of the problem
50 with code and data examples if at all possible.
52 =head2 Reporting Bugs
54 Report bugs to the Bioperl bug tracking system to help us keep track
55 of the bugs and their resolution. Bug reports can be submitted via
56 the web:
58 https://github.com/bioperl/bioperl-live/issues
60 =head1 AUTHOR - Jason Stajich
62 Email jason AT bioperl.org
64 =head1 CONTRIBUTORS
66 Heikki Lehvaslaiho, heikki at bioperl dot org
68 =head1 APPENDIX
70 The rest of the documentation details each of the object methods.
71 Internal methods are usually preceded with a _
73 =cut
76 # Let the code begin...
79 package Bio::Tree::Statistics;
80 use strict;
83 use base qw(Bio::Root::Root);
85 =head2 new
87 Title : new
88 Usage : my $obj = Bio::Tree::Statistics->new();
89 Function: Builds a new Bio::Tree::Statistics object
90 Returns : Bio::Tree::Statistics
91 Args :
93 =head2 assess_bootstrap
95 Title : assess_bootstrap
96 Usage : my $tree_with_bs = $stats->assess_bootstrap(\@bs_trees);
97 Function: Calculates the bootstrap for internal nodes based on
98 Returns : L<Bio::Tree::TreeI>
99 Args : Arrayref of L<Bio::Tree::TreeI>s
101 =cut
103 sub assess_bootstrap{
104 my ($self,$bs_trees,$guide_tree) = @_;
105 my @consensus;
107 # internal nodes are defined by their children
109 my (%lookup,%internal);
110 my $i = 0;
111 for my $tree ( $guide_tree, @$bs_trees ) {
112 # Do this as a top down approach, can probably be
113 # improved by caching internal node states, but not going
114 # to worry about it right now.
116 my @allnodes = $tree->get_nodes;
117 my @internalnodes = grep { ! $_->is_Leaf } @allnodes;
118 for my $node ( @internalnodes ) {
119 my @tips = sort map { $_->id }
120 grep { $_->is_Leaf() } $node->get_all_Descendents;
121 my $id = "(".join(",", @tips).")";
122 if( $i == 0 ) {
123 $internal{$id} = $node->internal_id;
124 } else {
125 $lookup{$id}++;
128 $i++;
130 my @save;
131 for my $l ( keys %lookup ) {
132 if( defined $internal{$l} ) {#&& $lookup{$l} > $min_seen ) {
133 my $intnode = $guide_tree->find_node(-internal_id => $internal{$l});
134 $intnode->bootstrap(sprintf("%d",100 * $lookup{$l} / $i));
137 return $guide_tree;
141 =head2 cherries
143 Example : cherries($tree, $node);
144 Description: Count number of paired leaf nodes
145 in a binary tree
146 Returns : integer
147 Exceptions :
148 Args : 1. Bio::Tree::TreeI object
149 2. Bio::Tree::NodeI object within the tree, optional
151 Commonly used statistics assume a binary tree, but this methods
152 returns a value even for trees with polytomies.
154 =cut
156 sub cherries ($;$) {
157 my $self = shift;
158 my $tree = shift;
159 my $node = shift || $tree->get_root_node;
161 my $cherries = 0;
162 my @descs = $node->each_Descendent;
164 if ($descs[0]->is_Leaf and $descs[1]->is_Leaf) {
165 if ($descs[3]) { #polytomy at leaf level
166 $cherries = 0;
167 } else {
168 $cherries = 1;
170 } else {
171 # recurse
172 foreach my $desc (@descs) {
173 $cherries += $self->cherries($tree, $desc);
176 return $cherries;
180 =head2 Tree-Trait statistics
182 The following methods produce descriptors of trait distribution among
183 leaf nodes within the trees. They require that a trait has been set
184 for each leaf node. The tag methods of Bio::Tree::Node are used to
185 store them as key/value pairs. In this way, one tree can store more
186 than one trait.
188 Trees have method add_traits() to set trait values from a file. See the
189 add_trait() method in L<Bio::Tree::TreeFunctionsI>.
191 =head2 fitch
193 Example : fitch($tree, $key, $node);
194 Description: Calculates Parsimony Score (PS) and internal trait
195 values using the Fitch 1971 parsimony algorithm for
196 the subtree a defined by the (internal) node.
197 Node defaults to the root.
198 Returns : true on success
199 Exceptions : leaf nodes have to have the trait defined
200 Args : 1. Bio::Tree::TreeI object
201 2. trait name string
202 3. Bio::Tree::NodeI object within the tree, optional
204 Runs first L<fitch_up> that calculates parsimony scores and then
205 L<fitch_down> that should resolve most of the trait/character state
206 ambiguities.
208 Fitch, W.M., 1971. Toward defining the course of evolution: minimal
209 change for a specific tree topology. Syst. Zool. 20, 406-416.
211 You can access calculated parsimony values using:
213 $score = $node->->get_tag_values('ps_score');
215 and the trait value with:
217 $traitvalue = $node->->get_tag_values('ps_trait'); # only the first
218 @traitvalues = $node->->get_tag_values('ps_trait');
220 Note that there can be more that one trait value, especially for the
221 root node.
223 =cut
225 sub fitch {
226 my $self = shift;
227 my $tree = shift;
228 my $key = shift || $self->throw("Trait name is needed");
229 my $node = shift || $tree->get_root_node;
231 $self->fitch_up($tree, $key, $node);
232 $self->fitch_down($tree, $node);
236 =head2 ps
238 Example : ps($tree, $key, $node);
239 Description: Calculates Parsimony Score (PS) from Fitch 1971
240 parsimony algorithm for the subtree as defined
241 by the (internal) node.
242 Node defaults to the root.
243 Returns : integer, 1 < PS < n, where n is number of branches
244 Exceptions : leaf nodes have to have the trait defined
245 Args : 1. Bio::Tree::TreeI object
246 2. trait name string
247 3. Bio::Tree::NodeI object within the tree, optional
249 This is the first half of the Fitch algorithm that is enough for
250 calculating the resolved parsimony values. The trait/chararacter
251 states are commonly left in ambiguous state. To resolve them, run
252 L<fitch_down>.
254 =cut
256 sub ps { shift->fitch_up(@_) }
259 =head2 fitch_up
261 Example : fitch_up($tree, $key, $node);
262 Description: Calculates Parsimony Score (PS) from the Fitch 1971
263 parsimony algorithm for the subtree as defined
264 by the (internal) node.
265 Node defaults to the root.
266 Returns : integer, 1< PS < n, where n is number of branches
267 Exceptions : leaf nodes have to have the trait defined
268 Args : 1. Bio::Tree::TreeI object
269 2. trait name string
270 3. Bio::Tree::NodeI object within the tree, optional
272 This is a more generic name for L<ps> and indicates that it performs
273 the first bottom-up tree traversal that calculates the parsimony score
274 but usually leaves trait/character states ambiguous. If you are
275 interested in internal trait states, running L<fitch_down> should
276 resolve most of the ambiguities.
278 =cut
280 sub fitch_up {
281 my $self = shift;
282 my $tree = shift;
283 my $key = shift || $self->throw("Trait name is needed");
284 my $node = shift || $tree->get_root_node;
286 if ($node->is_Leaf) {
287 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
288 unless $node->has_tag($key);
289 $node->set_tag_value('ps_trait', $node->get_tag_values($key) );
290 $node->set_tag_value('ps_score', 0 );
291 return; # end of recursion
294 foreach my $child ($node->each_Descendent) {
295 $self->fitch_up($tree, $key, $child);
298 my %intersection;
299 my %union;
300 my $score;
302 foreach my $child ($node->each_Descendent) {
303 foreach my $trait ($child->get_tag_values('ps_trait') ) {
304 $intersection{$trait}++ if $union{$trait};
305 $union{$trait}++;
307 $score += $child->get_tag_values('ps_score');
310 if (keys %intersection) {
311 $node->set_tag_value('ps_trait', keys %intersection);
312 $node->set_tag_value('ps_score', $score);
313 } else {
314 $node->set_tag_value('ps_trait', keys %union);
315 $node->set_tag_value('ps_score', $score+1);
318 if ($self->verbose) {
319 print "-- node --------------------------\n";
320 print "iID: ", $node->internal_id, " (", $node->id, ")\n";
321 print "Trait: ", join (', ', $node->get_tag_values('ps_trait') ), "\n";
322 print "length :", scalar($node->get_tag_values('ps_score')) , "\n";
324 return scalar $node->get_tag_values('ps_score');
328 =head2 fitch_down
330 Example : fitch_down($tree, $node);
331 Description: Runs the second pass from Fitch 1971
332 parsimony algorithm to resolve ambiguous
333 trait states left by first pass.
334 by the (internal) node.
335 Node defaults to the root.
336 Returns : true
337 Exceptions : dies unless the trait is defined in all nodes
338 Args : 1. Bio::Tree::TreeI object
339 2. Bio::Tree::NodeI object within the tree, optional
341 Before running this method you should have ran L<fitch_up> (alias to
342 L<ps> ). Note that it is not guaranteed that all states are completely
343 resolved.
345 =cut
347 sub fitch_down {
348 my $self = shift;
349 my $tree = shift;
350 my $node = shift || $tree->get_root_node;
352 my $key = 'ps_trait';
353 $self->throw ("ERROR: ". $node->internal_id. " needs a value for $key")
354 unless $node->has_tag($key);
356 my $nodev;
357 foreach my $trait ($node->get_tag_values($key) ) {
358 $nodev->{$trait}++;
361 foreach my $child ($node->each_Descendent) {
362 next if $child->is_Leaf; # end of recursion
364 my $intersection;
365 foreach my $trait ($child->get_tag_values($key) ) {
366 $intersection->{$trait}++ if $nodev->{$trait};
369 $self->fitch_down($tree, $child);
370 $child->set_tag_value($key, keys %$intersection);
372 return 1; # success
376 =head2 persistence
378 Example : persistence($tree, $node);
379 Description: Calculates the persistence
380 for node in the subtree defined by the (internal)
381 node. Node defaults to the root.
382 Returns : int, number of generations trait value has to remain same
383 Exceptions : all the nodes need to have the trait defined
384 Args : 1. Bio::Tree::TreeI object
385 2. Bio::Tree::NodeI object within the tree, optional
387 Persistence measures the stability that the trait value has in a
388 tree. It expresses the number of generations the trait value remains
389 the same. All the decendants of the root in the same generation have
390 to share the same value.
392 Depends on Fitch's parsimony score (PS).
394 =cut
396 sub _persistence {
397 my $self = shift;
398 my $tree = shift;
399 my $node = shift;
400 my $value = shift || $self->throw("Value is needed");
403 my $key = 'ps_trait';
405 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
407 return 0 unless $node->get_tag_values($key) eq $value; # wrong value
408 return 1 if $node->is_Leaf; # end of recursion
410 my $persistence = 10000000; # an arbitrarily large number
411 foreach my $child ($node->each_Descendent) {
412 my $pers = $self->_persistence($tree, $child, $value);
413 $persistence = $pers if $pers < $persistence;
415 return $persistence + 1;
418 sub persistence {
419 my $self = shift;
420 my $tree = shift;
421 my $node = shift || $tree->get_root_node;
422 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
424 my $key = 'ps_trait';
425 my $value = $node->get_tag_values($key);
427 #calculate
428 my $persistence = $self->_persistence($tree, $node, $value);
429 $node->set_tag_value('persistance', $persistence);
430 return $persistence;
434 =head2 count_subclusters
436 Example : count_clusters($tree, $node);
437 Description: Calculates the number of sub-clusters
438 in the subtree defined by the (internal)
439 node. Node defaults to the root.
440 Returns : int, count
441 Exceptions : all the nodes need to have the trait defined
442 Args : 1. Bio::Tree::TreeI object
443 2. Bio::Tree::NodeI object within the tree, optional
445 Depends on Fitch's parsimony score (PS).
447 =cut
449 sub _count_subclusters {
450 my $self = shift;
451 my $tree = shift;
452 my $node = shift;
453 my $value = shift || $self->throw("Value is needed");
455 my $key = 'ps_trait';
457 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
458 unless $node->has_tag($key);
460 if ($node->get_tag_values($key) eq $value) {
461 if ($node->get_tag_values('ps_score') == 0) {
462 return 0;
463 } else {
464 my $count = 0;
465 foreach my $child ($node->each_Descendent) {
466 $count += $self->_count_subclusters($tree, $child, $value);
468 return $count;
471 return 1;
474 sub count_subclusters {
475 my $self = shift;
476 my $tree = shift;
477 my $node = shift || $tree->get_root_node;
478 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
480 my $key = 'ps_trait';
481 my $value = $node->get_tag_values($key);
483 return $self->_count_subclusters($tree, $node, $value);
487 =head2 count_leaves
489 Example : count_leaves($tree, $node);
490 Description: Calculates the number of leaves with same trait
491 value as root in the subtree defined by the (internal)
492 node. Requires an unbroken line of identical trait values.
493 Node defaults to the root.
494 Returns : int, number of leaves with this trait value
495 Exceptions : all the nodes need to have the trait defined
496 Args : 1. Bio::Tree::TreeI object
497 2. Bio::Tree::NodeI object within the tree, optional
499 Depends on Fitch's parsimony score (PS).
501 =cut
503 sub _count_leaves {
504 my $self = shift;
505 my $tree = shift;
506 my $node = shift || $tree->get_root_node;
507 my $value = shift;
509 my $key = 'ps_trait';
511 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
512 unless $node->has_tag($key);
514 if ($node->get_tag_values($key) eq $value) {
515 #print $node->id, ": ", $node->get_tag_values($key), "\n";
516 return 1 if $node->is_Leaf; # end of recursion
518 my $count = 0;
519 foreach my $child ($node->each_Descendent) {
520 $count += $self->_count_leaves($tree, $child, $value);
522 return $count;
524 return 0;
527 sub count_leaves {
528 my $self = shift;
529 my $tree = shift;
530 my $node = shift || $tree->get_root_node;
531 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
533 my $key = 'ps_trait';
534 my $value = $node->get_tag_values($key);
536 return $self->_count_leaves($tree, $node, $value);
540 =head2 phylotype_length
542 Example : phylotype_length($tree, $node);
543 Description: Sums up the branch lengths within phylotype
544 exluding the subclusters where the trait values
545 are different
546 Returns : float, length
547 Exceptions : all the nodes need to have the trait defined
548 Args : 1. Bio::Tree::TreeI object
549 2. Bio::Tree::NodeI object within the tree, optional
551 Depends on Fitch's parsimony score (PS).
553 =cut
555 sub _phylotype_length {
556 my $self = shift;
557 my $tree = shift;
558 my $node = shift;
559 my $value = shift;
561 my $key = 'ps_trait';
563 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
564 unless $node->has_tag($key);
566 return 0 if $node->get_tag_values($key) ne $value;
567 return $node->branch_length if $node->is_Leaf; # end of recursion
569 my $length = 0;
570 foreach my $child ($node->each_Descendent) {
571 my $sub_len = $self->_phylotype_length($tree, $child, $value);
572 $length += $sub_len;
573 $length += $child->branch_length if not $child->is_Leaf and $sub_len;
575 return $length;
578 sub phylotype_length {
579 my $self = shift;
580 my $tree = shift;
581 my $node = shift || $tree->get_root_node;
583 my $key = 'ps_trait';
584 my $value = $node->get_tag_values($key);
586 return $self->_phylotype_length($tree, $node, $value);
590 =head2 sum_of_leaf_distances
592 Example : sum_of_leaf_distances($tree, $node);
593 Description: Sums up the branch lengths from root to leaf
594 exluding the subclusters where the trait values
595 are different
596 Returns : float, length
597 Exceptions : all the nodes need to have the trait defined
598 Args : 1. Bio::Tree::TreeI object
599 2. Bio::Tree::NodeI object within the tree, optional
601 Depends on Fitch's parsimony score (PS).
603 =cut
605 sub _sum_of_leaf_distances {
606 my $self = shift;
607 my $tree = shift;
608 my $node = shift;
609 my $value = shift;
611 my $key = 'ps_trait';
613 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
614 unless $node->has_tag($key);
615 return 0 if $node->get_tag_values($key) ne $value;
616 #return $node->branch_length if $node->is_Leaf; # end of recursion
617 return 0 if $node->is_Leaf; # end of recursion
619 my $length = 0;
620 foreach my $child ($node->each_Descendent) {
621 $length += $self->_count_leaves($tree, $child, $value) * $child->branch_length +
622 $self->_sum_of_leaf_distances($tree, $child, $value);
624 return $length;
627 sub sum_of_leaf_distances {
628 my $self = shift;
629 my $tree = shift;
630 my $node = shift || $tree->get_root_node;
632 my $key = 'ps_trait';
633 my $value = $node->get_tag_values($key);
635 return $self->_sum_of_leaf_distances($tree, $node, $value);
639 =head2 genetic_diversity
641 Example : genetic_diversity($tree, $node);
642 Description: Diversity is the sum of root to leaf distances
643 within the phylotype normalised by number of leaf
644 nodes
645 Returns : float, value of genetic diversity
646 Exceptions : all the nodes need to have the trait defined
647 Args : 1. Bio::Tree::TreeI object
648 2. Bio::Tree::NodeI object within the tree, optional
650 Depends on Fitch's parsimony score (PS).
652 =cut
654 sub genetic_diversity {
655 my $self = shift;
656 my $tree = shift;
657 my $node = shift || $tree->get_root_node;
659 return $self->sum_of_leaf_distances($tree, $node) /
660 $self->count_leaves($tree, $node);
664 =head2 statratio
666 Example : statratio($tree, $node);
667 Description: Ratio of the stem length and the genetic diversity of the
668 phylotype L<genetic_diversity>
669 Returns : float, separation score
670 Exceptions : all the nodes need to have the trait defined
671 Args : 1. Bio::Tree::TreeI object
672 2. Bio::Tree::NodeI object within the tree, optional
674 Statratio gives a measure of separation and variability within the phylotype.
675 Larger values identify more rapidly evolving and recent phylotypes.
677 Depends on Fitch's parsimony score (PS).
679 =cut
681 sub statratio {
682 my $self = shift;
683 my $tree = shift;
684 my $node = shift || $tree->get_root_node;
686 my $div = $self->genetic_diversity($tree, $node);
687 return 0 if $div == 0;
688 return $node->branch_length / $div;
693 =head2 ai
695 Example : ai($tree, $key, $node);
696 Description: Calculates the Association Index (AI) of Whang et
697 al. 2001 for the subtree defined by the (internal)
698 node. Node defaults to the root.
699 Returns : real
700 Exceptions : leaf nodes have to have the trait defined
701 Args : 1. Bio::Tree::TreeI object
702 2. trait name string
703 3. Bio::Tree::NodeI object within the tree, optional
705 Association index (AI) gives a more fine grained results than PS since
706 the result is a real number. ~0 E<lt>= AI.
708 Wang, T.H., Donaldson, Y.K., Brettle, R.P., Bell, J.E., Simmonds, P.,
709 2001. Identification of shared populations of human immunodeficiency
710 Virus Type 1 infecting microglia and tissue macrophages outside the
711 central nervous system. J. Virol. 75 (23), 11686-11699.
713 =cut
715 sub _node_ai {
716 my $self = shift;
717 my $node = shift;
718 my $key = shift;
720 my $traits;
721 my $leaf_count = 0;
722 for my $desc ( $node->get_all_Descendents ) {
723 next unless $desc->is_Leaf;
724 $leaf_count++;
725 $self->throw ("Node ". $desc->id. " needs a value for trait [$key]")
726 unless $desc->has_tag($key);
727 my $trait = $desc->get_tag_values($key);
728 $traits->{$trait}++;
730 my $most_common = 0;
731 foreach ( keys %$traits) {
732 $most_common = $traits->{$_} if $traits->{$_} > $most_common;
734 return sprintf "%1.6f", (1 - ($most_common/$leaf_count) ) / (2**($leaf_count-1) );
737 sub ai {
738 my $self = shift;
739 my $tree = shift;
740 my $key = shift || $self->throw("Trait name is needed");
741 my $start_node = shift || $tree->get_root_node;
742 return unless $start_node;
744 my $sum = 0;
745 for my $node ( $start_node->get_all_Descendents ) {
746 next if $node->is_Leaf;
747 $sum += $self->_node_ai($node, $key);
749 return $sum;
753 =head2 mc
755 Example : mc($tree, $key, $node);
756 Description: Calculates the Monophyletic Clade (MC) size statistics
757 for the subtree a defined by the (internal) node.
758 Node defaults to the root;
759 Returns : hashref with trait values as keys
760 Exceptions : leaf nodes have to have the trait defined
761 Args : 1. Bio::Tree::TreeI object
762 2. trait name string
763 3. Bio::Tree::NodeI object within the tree, optional
765 Monophyletic Clade (MC) size statistics by Salemi at al 2005. It is
766 calculated for each trait value. 1 E<lt>= MC E<lt>= nx, where nx is the
767 number of tips with value x:
769 pick the internal node with maximim value for
770 number of of tips with only trait x
772 MC was defined by Parker et al 2008.
774 Salemi, M., Lamers, S.L., Yu, S., de Oliveira, T., Fitch, W.M., McGrath, M.S.,
775 2005. Phylodynamic analysis of Human Immunodeficiency Virus Type 1 in
776 distinct brain compartments provides a model for the neuropathogenesis of
777 AIDS. J. Virol. 79 (17), 11343-11352.
779 Parker, J., Rambaut A., Pybus O., 2008. Correlating viral phenotypes
780 with phylogeny: Accounting for phylogenetic uncertainty Infection,
781 Genetics and Evolution 8 (2008), 239-246.
783 =cut
785 sub _node_mc {
786 my $self = shift;
787 my $node = shift;
788 my $key = shift;
790 my $traits;
791 my $leaf_count = 0;
792 for my $node2 ( $node->get_all_Descendents ) {
793 next unless $node2->is_Leaf;
794 $leaf_count++;
795 my $trait = $node2->get_tag_values($key);
796 $traits->{$trait}++;
798 return $traits;
801 sub mc {
802 my $self = shift;
803 my $tree = shift;
804 my $key = shift || die "Trait name is needed";
805 my $start_node = shift || $tree->get_root_node;
806 return unless $start_node;
808 my $sum; # hashref, keys are trait values
809 my $keys; # hashref, keys are trait values
810 foreach my $node ( $start_node->get_all_Descendents ) {
811 next if $node->is_Leaf;
812 my $traits = $self->_node_mc($node, $key);
813 if (scalar keys %$traits == 1) {
814 my ($value) = keys %$traits;
815 no warnings;
816 $sum->{$value} = $traits->{$value}
817 if $sum->{$value} < $traits->{$value};
818 } else {
819 map { $keys->{$_} = 1 } keys %$traits;
822 # check for cases where there are no clusters
823 foreach my $value (keys %$keys) {
824 $sum->{$value} = 1 unless defined $sum->{$value};
826 return $sum;