3 # BioPerl module for Bio::Tree::RandomFactory
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason@bioperl.org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::Tree::RandomFactory - TreeFactory for generating Random Trees
21 use Bio::Tree::RandomFactory
23 my $factory = Bio::Tree::RandomFactory->new( -taxa => \@taxonnames,
26 # or for anonymous samples
28 my $factory = Bio::Tree::RandomFactory->new( -num_taxa => 6,
32 my $tree = $factory->next_tree;
36 Builds a random tree every time next_tree is called or up to -maxcount times.
38 This module was originally written for Coalescent simulations see
39 L<Bio::PopGen::Simulation::Coalescent>. I've left the next_tree
40 method intact although it is not generating random trees in the
41 phylogenetic sense. I would be happy for someone to provide
42 alternative implementations which can be used here. As written it
43 will generate random topologies but the branch lengths are built from
44 assumptions in the coalescent and are not appropriate for phylogenetic
47 This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
49 Hudson, R. R. 1990. Gene genealogies and the coalescent
50 process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford
51 surveys in evolutionary biology. Vol. 7. Oxford University
60 User feedback is an integral part of the evolution of this and other
61 Bioperl modules. Send your comments and suggestions preferably to
62 the Bioperl mailing list. Your participation is much appreciated.
64 bioperl-l@bioperl.org - General discussion
65 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
69 Please direct usage questions or support issues to the mailing list:
71 I<bioperl-l@bioperl.org>
73 rather than to the module maintainer directly. Many experienced and
74 reponsive experts will be able look at the problem and quickly
75 address it. Please include a thorough description of the problem
76 with code and data examples if at all possible.
80 Report bugs to the Bioperl bug tracking system to help us keep track
81 of the bugs and their resolution. Bug reports can be submitted via
84 http://bugzilla.open-bio.org/
86 =head1 AUTHOR - Jason Stajich
88 Email jason-AT-bioperl.org
92 Matthew Hahn, E<lt>matthew.hahn@duke.eduE<gt>
97 The rest of the documentation details each of the object methods.
98 Internal methods are usually preceded with a _
103 # Let the code begin...
106 package Bio
::Tree
::RandomFactory
;
107 use vars
qw($PRECISION_DIGITS $DefaultNodeType %Defaults);
110 $PRECISION_DIGITS = 3; # Precision for the branchlength
111 $DefaultNodeType = 'Bio::Tree::Node';
112 %Defaults = ('YuleRate' => 1.0, # as set by Sanderson in Rates
113 'Speciation' => 1.0, #
114 'DefaultTreeMethod' => 'yule',
117 use Bio::Tools::RandomDistFunctions;
120 use base qw(Bio::Root::Root Bio::Factory::TreeFactoryI);
125 Usage : my $factory = Bio::Tree::RandomFactory->new(-samples => \@samples,
127 Function: Initializes a Bio::Tree::RandomFactory object
128 Returns : Bio::Tree::RandomFactory
129 Args : -nodetype => Type of Nodes to create [default Bio::Tree::Node]
130 -maxcount => [optional] Maximum num trees to create
131 -randtype => Type of random trees so far support
132 - yule/backward_yule/BY [default]
134 - birthdeath_forward/BDF
135 - birthdeath_backwards/BDB
138 ONE of the following must be specified
139 -taxa => $arrayref of taxa names
140 -num_taxa => integer indicating number of taxa in the tree
145 my ($class,@args) = @_;
146 my $self = $class->SUPER::new
(@args);
148 $self->{'_treecounter'} = 0;
149 $self->{'_maxcount'} = 0;
150 my ($nodetype,$randtype,
151 $maxcount, $samps,$samplesize,
152 $taxa, $num_taxa) = $self->_rearrange([qw(NODETYPE
161 $nodetype ||= $DefaultNodeType;
162 $self->nodetype($nodetype);
163 $taxa = $samps if defined $samps && ! defined $taxa;
164 $num_taxa = $samplesize if $samplesize && ! $num_taxa;
165 if( ! defined $taxa ) {
166 if( ! defined $num_taxa || $num_taxa <= 0 ) {
167 $self->throw("Must specify a valid num_taxa if parameter -TAXA is not specified");
169 foreach ( 1..$num_taxa ) { push @taxa, "Taxon$_"; }
171 if( ref($taxa) !~ /ARRAY/i ) {
172 $self->throw("Must specify a valid ARRAY reference to the parameter -TAXA, did you forget a leading '\\'? for $taxa");
178 defined $maxcount && $self->maxcount($maxcount);
179 $self->{'_count'} = 0;
186 Usage : my $tree = $factory->next_tree
187 Function: Returns a random tree based on the initialized number of nodes
188 NOTE: if maxcount is not specified on initialization or
189 set to a valid integer, subsequent calls to next_tree will
190 continue to return random trees and never return undef
192 Returns : Bio::Tree::TreeI object
199 my ($self,%options) = @_;
200 return if $self->maxcount &&
201 $self->{'_count'}++ >= $self->maxcount;
202 my $rand_type = $options{'randtype'} || $self->random_tree_method;
203 my $nodetype = $self->nodetype;
206 if( $rand_type =~ /(birthdeath_forward|birth|BDF)/i ) {
208 } elsif ( $rand_type =~ /(birthdeath_backward|BDB)/i ) {
209 $treearray = $self->rand_birthdeath_backwards_tree;
210 } elsif( $rand_type =~ /(BY|backwards_yule)/i ||
211 $rand_type =~ /^yule/i ) {
212 my $speciation = $options{'speciation'}; # can be undef
213 $treearray = $self->rand_yule_c_tree($speciation);
215 $self->warn("unrecognized random type $rand_type");
219 foreach my $n ( @
$treearray ) {
220 for my $k ( qw(desc1 desc2) ) {
221 next unless defined $n->{$k};
222 push @
{$n->{'descendents'}}, $nodes[$n->{$k}];
225 $nodetype->new(-id
=> $n->{'nodenum'},
226 -branch_length
=> $n->{'time'},
227 -descendents
=> $n->{'descendents'},
230 my $T = Bio
::Tree
::Tree
->new(-root
=> pop @nodes );
238 Usage : $obj->maxcount($newval)
240 Returns : Maxcount value
241 Args : newvalue (optional)
247 my ($self,$value) = @_;
248 if( defined $value) {
249 if( $value =~ /^(\d+)/ ) {
250 $self->{'_maxcount'} = $1;
252 $self->warn("Must specify a valid Positive integer to maxcount");
253 $self->{'_maxcount'} = 0;
256 return $self->{'_maxcount'};
260 =head2 reset_tree_count
262 Title : reset_tree_count
263 Usage : $factory->reset_tree_count;
264 Function: Reset the tree counter
272 shift->{'_count'} = 0;
278 Usage : $obj->taxa($newval)
279 Function: Set the leaf node names
280 Returns : value of taxa
281 Args : Arrayref of Taxon names
287 my ($self,$value) = @_;
288 if( defined $value) {
289 if( ref($value) !~ /ARRAY/i ) {
290 $self->warn("Must specify a valid array ref to the method 'taxa'");
293 $self->{'_taxa'} = $value;
294 $self->{'_num_taxa'} = scalar @
$value;
296 return $self->{'_taxa'};
303 Usage : $obj->num_taxa($newval)
304 Function: Get the number of Taxa
305 Returns : value of num_taxa
313 return $self->{'_num_taxa'};
317 *num_samples
= \
&num_taxa
;
323 Usage : my $rfloat = $node->random($size)
324 Function: Generates a random number between 0 and $size
325 This is abstracted so that someone can override and provide their
326 own special RNG. This is expected to be a uniform RNG.
327 Returns : Floating point random
328 Args : $maximum size for random number (defaults to 1)
334 my ($self,$max) = @_;
339 =head2 random_tree_method
341 Title : random_tree_method
342 Usage : $obj->random_tree_method($newval)
345 Returns : value of random_tree_method (a scalar)
346 Args : on set, new value (a scalar or undef, optional)
351 sub random_tree_method
{
354 return $self->{'random_tree_method'} = shift if @_;
355 return $self->{'random_tree_method'} || $Defaults{'DefaultTreeMethod'};
361 Usage : $obj->nodetype($newval)
364 Returns : value of nodetype (a scalar)
365 Args : on set, new value (a scalar or undef, optional)
371 my ($self,$value) = @_;
372 if( defined $value) {
373 eval "require $value";
374 if( $@
) { $self->throw("$@: Unrecognized Node type for ".ref($self).
377 my $a = bless {},$value;
378 unless( $a->isa('Bio::Tree::NodeI') ) {
379 $self->throw("Must provide a valid Bio::Tree::NodeI or child class to SeqFactory Not $value");
381 $self->{'nodetype'} = $value;
383 return $self->{'nodetype'};
387 # The assignment of times are based on Mike Sanderson's r8s code
388 # The topology assignment code is based on Richard Hudson's
392 sub rand_yule_c_tree
{
393 my ($self,$speciation) = @_;
394 $speciation ||= $Defaults{'Speciation'};
395 my $n_taxa = $self->num_taxa;
396 my $taxa = $self->taxa || [];
397 my $nodetype = $self->nodetype;
399 my $randfuncs = Bio
::Tools
::RandomDistFunctions
->new();
400 my $rate = $Defaults{'YuleRate'};
401 my (@tree,@list,@times,$i,$in);
402 my $max = 2 * $n_taxa - 1;
403 for($in=0;$in < $max; $in++ ) {
404 push @tree, { 'nodenum' => "Node$in" };
407 for($in=0;$in < $n_taxa;$in++) {
408 $tree[$in]->{'time'} = 0;
409 $tree[$in]->{'desc1'} = undef;
410 $tree[$in]->{'desc2'} = undef;
411 if( my $r = $taxa->[$in] ) {
412 $tree[$in]->{'nodenum'} = $r;
417 for( $i = 0; $i < $n_taxa - 1; $i++ ) {
418 # draw random interval times
419 push @times, $randfuncs->rand_birth_distribution($speciation);
421 # sort smallest to largest
422 @times = sort {$a <=> $b} @times;
423 # topology generation
424 for ($in = $n_taxa; $in > 1; $in-- ) {
425 my $time = shift @times;
427 my $pick = int $self->random($in);
428 my $nodeindex = $list[$pick];
429 $tree[$list[$pick]]->{'time'} = $time;
430 my $swap = 2 * $n_taxa - $in;
431 $tree[$swap]->{'desc1'} = $nodeindex;
432 $list[$pick] = $list[$in-1];
434 $pick = int rand($in - 1);
435 $nodeindex = $list[$pick];
436 $tree[$list[$pick]]->{'time'} = $time;
437 $tree[$swap]->{'desc2'} = $nodeindex;
438 $list[$pick] = $swap;
440 $tree[-1]->{'time'} = shift @times;
446 sub rand_birthdeath_backwards_tree
{
448 my $n_taxa = $self->num_taxa;
449 my $taxa = $self->taxa || [];
451 my $randfuncs = Bio
::Tools
::RandomDistFunctions
->new();
452 my $rate = $Defaults{'YuleRate'};
453 my (@tree,@list,@times,$i,$in);
454 my $max = 2 * $n_taxa - 1;
455 for($in=0;$in < $max; $in++ ) {
456 push @tree, { 'nodenum' => "Node$in" };
459 for($in=0;$in < $n_taxa;$in++) {
460 $tree[$in]->{'time'} = 0;
461 $tree[$in]->{'desc1'} = undef;
462 $tree[$in]->{'desc2'} = undef;
463 if( my $r = $taxa->[$in] ) {
464 # deal with pre-labeled nodes
465 $tree[$in]->{'nodenum'} = $r;
471 # topology generation
472 for ($in = $n_taxa; $in > 1; $in-- ) {
473 my $pick = int $self->random($in);
474 my $nodeindex = $list[$pick];
475 my $swap = 2 * $n_taxa - $in;
476 $time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
477 $tree[$list[$pick]]->{'time'} = $time;
478 $tree[$swap]->{'desc1'} = $nodeindex;
479 $list[$pick] = $list[$in-1];
481 $pick = int rand($in - 1);
482 $nodeindex = $list[$pick];
483 $tree[$list[$pick]]->{'time'} = $time;
484 $tree[$swap]->{'desc2'} = $nodeindex;
485 $list[$pick] = $swap;
487 my $root = $tree[-1];
488 $time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
489 $root->{'time'} = $time;
491 # Normalize times by the root node...
492 for my $node ( @tree ) {
493 $node->{'time'} /= $root->{'time'};
499 # The assignment of times are based on Mike Sanderson's r8s code
500 # The topology assignment code is based on Richard Hudson's
503 sub rand_birth_death_tree
{
504 # Still need to finish
505 # my ($self,$spec_rate,$extinct_rate,$char_rate) = @_;
506 # my $n_taxa = $self->num_taxa;
507 # my $dt = 0.1 / $n_taxa;
509 # my $max = 3 * $n_taxa - 1;
512 # for($in=0;$in < $size;$in++) {
513 # push @tree, { 'nodenum' => $taxa->[$in] || "Node$in",
521 # while( $n_taxa > 1 ) {
522 # if ( event($dt * $spec_rate, $n_taxa) ) {
523 # my $pick = int $self->random($n_taxa);
524 # my $pick2 = int $self->random($n_taxa);
525 # while( $pick2 == $pick ) {
526 # $pick2 = int $self->random($n_taxa);
530 # $tree[$swap]->{'desc1'} = $nodeindex;
536 # $list[$pick] = $list[$in-1];
538 # $pick = int rand($in - 1);
539 # $nodeindex = $list[$pick];
540 # $tree[$swap]->{'desc2'} = $nodeindex;
541 # $list[$pick] = $swap;
542 # $tree[$swap]->{'time'} = $times[$ix++];