trust your DB implementation, particularly if ancestor data are already available
[bioperl-live.git] / Bio / Tree / RandomFactory.pm
blobe478777555358b3c529d38d8c3ef982bbe6f59af
2 # BioPerl module for Bio::Tree::RandomFactory
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tree::RandomFactory - TreeFactory for generating Random Trees
18 =head1 SYNOPSIS
20 use Bio::Tree::RandomFactory
21 my @taxonnames;
22 my $factory = Bio::Tree::RandomFactory->new( -taxa => \@taxonnames,
23 -maxcount => 10);
25 # or for anonymous samples
27 my $factory = Bio::Tree::RandomFactory->new( -num_taxa => 6,
28 -maxcount => 50);
31 my $tree = $factory->next_tree;
33 =head1 DESCRIPTION
35 Builds a random tree every time next_tree is called or up to -maxcount times.
37 This module was originally written for Coalescent simulations see
38 L<Bio::PopGen::Simulation::Coalescent>. I've left the next_tree
39 method intact although it is not generating random trees in the
40 phylogenetic sense. I would be happy for someone to provide
41 alternative implementations which can be used here. As written it
42 will generate random topologies but the branch lengths are built from
43 assumptions in the coalescent and are not appropriate for phylogenetic
44 analyses.
46 This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
48 Hudson, R. R. 1990. Gene genealogies and the coalescent
49 process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford
50 surveys in evolutionary biology. Vol. 7. Oxford University
51 Press, New York
53 Sanderson, M ...
55 =head1 FEEDBACK
57 =head2 Mailing Lists
59 User feedback is an integral part of the evolution of this and other
60 Bioperl modules. Send your comments and suggestions preferably to
61 the Bioperl mailing list. Your participation is much appreciated.
63 bioperl-l@bioperl.org - General discussion
64 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
66 =head2 Support
68 Please direct usage questions or support issues to the mailing list:
70 I<bioperl-l@bioperl.org>
72 rather than to the module maintainer directly. Many experienced and
73 reponsive experts will be able look at the problem and quickly
74 address it. Please include a thorough description of the problem
75 with code and data examples if at all possible.
77 =head2 Reporting Bugs
79 Report bugs to the Bioperl bug tracking system to help us keep track
80 of the bugs and their resolution. Bug reports can be submitted via
81 the web:
83 https://github.com/bioperl/bioperl-live/issues
85 =head1 AUTHOR - Jason Stajich
87 Email jason-AT-bioperl.org
89 =head1 CONTRIBUTORS
91 Matthew Hahn, E<lt>matthew.hahn@duke.eduE<gt>
92 Mike Sanderson
94 =head1 APPENDIX
96 The rest of the documentation details each of the object methods.
97 Internal methods are usually preceded with a _
99 =cut
102 # Let the code begin...
105 package Bio::Tree::RandomFactory;
106 use vars qw($PRECISION_DIGITS $DefaultNodeType %Defaults);
107 use strict;
109 $PRECISION_DIGITS = 3; # Precision for the branchlength
110 $DefaultNodeType = 'Bio::Tree::Node';
111 %Defaults = ('YuleRate' => 1.0, # as set by Sanderson in Rates
112 'Speciation' => 1.0, #
113 'DefaultTreeMethod' => 'yule',
116 use Bio::Tools::RandomDistFunctions;
117 use Bio::Tree::Tree;
119 use base qw(Bio::Root::Root Bio::Factory::TreeFactoryI);
121 =head2 new
123 Title : new
124 Usage : my $factory = Bio::Tree::RandomFactory->new(-samples => \@samples,
125 -maxcount=> $N);
126 Function: Initializes a Bio::Tree::RandomFactory object
127 Returns : Bio::Tree::RandomFactory
128 Args : -nodetype => Type of Nodes to create [default Bio::Tree::Node]
129 -maxcount => [optional] Maximum num trees to create
130 -randtype => Type of random trees so far support
131 - yule/backward_yule/BY [default]
132 - forward_yule/FY
133 - birthdeath_forward/BDF
134 - birthdeath_backwards/BDB
137 ONE of the following must be specified
138 -taxa => $arrayref of taxa names
139 -num_taxa => integer indicating number of taxa in the tree
141 =cut
143 sub new{
144 my ($class,@args) = @_;
145 my $self = $class->SUPER::new(@args);
147 $self->{'_treecounter'} = 0;
148 $self->{'_maxcount'} = 0;
149 my ($nodetype,$randtype,
150 $maxcount, $samps,$samplesize,
151 $taxa, $num_taxa) = $self->_rearrange([qw(NODETYPE
152 RANDTYPE
153 MAXCOUNT
154 SAMPLES
155 SAMPLE_SIZE
156 TAXA
157 NUM_TAXA)],
158 @args);
159 my @taxa;
160 $nodetype ||= $DefaultNodeType;
161 $self->nodetype($nodetype);
162 $taxa = $samps if defined $samps && ! defined $taxa;
163 $num_taxa = $samplesize if $samplesize && ! $num_taxa;
164 if( ! defined $taxa ) {
165 if( ! defined $num_taxa || $num_taxa <= 0 ) {
166 $self->throw("Must specify a valid num_taxa if parameter -TAXA is not specified");
168 foreach ( 1..$num_taxa ) { push @taxa, "Taxon$_"; }
169 } else {
170 if( ref($taxa) !~ /ARRAY/i ) {
171 $self->throw("Must specify a valid ARRAY reference to the parameter -TAXA, did you forget a leading '\\'? for $taxa");
173 @taxa = @$taxa;
176 $self->taxa(\@taxa);
177 defined $maxcount && $self->maxcount($maxcount);
178 $self->{'_count'} = 0;
179 return $self;
182 =head2 next_tree
184 Title : next_tree
185 Usage : my $tree = $factory->next_tree
186 Function: Returns a random tree based on the initialized number of nodes
187 NOTE: if maxcount is not specified on initialization or
188 set to a valid integer, subsequent calls to next_tree will
189 continue to return random trees and never return undef
191 Returns : Bio::Tree::TreeI object
192 Args : none
194 =cut
197 sub next_tree{
198 my ($self,%options) = @_;
199 return if $self->maxcount &&
200 $self->{'_count'}++ >= $self->maxcount;
201 my $rand_type = $options{'randtype'} || $self->random_tree_method;
202 my $nodetype = $self->nodetype;
203 my $treearray;
205 if( $rand_type =~ /(birthdeath_forward|birth|BDF)/i ) {
207 } elsif ( $rand_type =~ /(birthdeath_backward|BDB)/i ) {
208 $treearray = $self->rand_birthdeath_backwards_tree;
209 } elsif( $rand_type =~ /(BY|backwards_yule)/i ||
210 $rand_type =~ /^yule/i ) {
211 my $speciation = $options{'speciation'}; # can be undef
212 $treearray = $self->rand_yule_c_tree($speciation);
213 } else {
214 $self->warn("unrecognized random type $rand_type");
217 my @nodes = ();
218 foreach my $n ( @$treearray ) {
219 for my $k ( qw(desc1 desc2) ) {
220 next unless defined $n->{$k};
221 push @{$n->{'descendents'}}, $nodes[$n->{$k}];
223 push @nodes,
224 $nodetype->new(-id => $n->{'nodenum'},
225 -branch_length => $n->{'time'},
226 -descendents => $n->{'descendents'},
229 my $T = Bio::Tree::Tree->new(-root => pop @nodes );
230 return $T;
234 =head2 maxcount
236 Title : maxcount
237 Usage : $obj->maxcount($newval)
238 Function:
239 Returns : Maxcount value
240 Args : newvalue (optional)
243 =cut
245 sub maxcount{
246 my ($self,$value) = @_;
247 if( defined $value) {
248 if( $value =~ /^(\d+)/ ) {
249 $self->{'_maxcount'} = $1;
250 } else {
251 $self->warn("Must specify a valid Positive integer to maxcount");
252 $self->{'_maxcount'} = 0;
255 return $self->{'_maxcount'};
259 =head2 reset_tree_count
261 Title : reset_tree_count
262 Usage : $factory->reset_tree_count;
263 Function: Reset the tree counter
264 Returns : none
265 Args : none
268 =cut
270 sub reset_count{
271 shift->{'_count'} = 0;
274 =head2 taxa
276 Title : taxa
277 Usage : $obj->taxa($newval)
278 Function: Set the leaf node names
279 Returns : value of taxa
280 Args : Arrayref of Taxon names
283 =cut
285 sub taxa {
286 my ($self,$value) = @_;
287 if( defined $value) {
288 if( ref($value) !~ /ARRAY/i ) {
289 $self->warn("Must specify a valid array ref to the method 'taxa'");
290 $value = [];
292 $self->{'_taxa'} = $value;
293 $self->{'_num_taxa'} = scalar @$value;
295 return $self->{'_taxa'};
299 =head2 num_taxa
301 Title : num_taxa
302 Usage : $obj->num_taxa($newval)
303 Function: Get the number of Taxa
304 Returns : value of num_taxa
305 Args : none
308 =cut
310 sub num_taxa {
311 my ($self) = @_;
312 return $self->{'_num_taxa'};
315 # alias old methods
316 *num_samples = \&num_taxa;
317 *samples = \&taxa;
319 =head2 random
321 Title : random
322 Usage : my $rfloat = $node->random($size)
323 Function: Generates a random number between 0 and $size
324 This is abstracted so that someone can override and provide their
325 own special RNG. This is expected to be a uniform RNG.
326 Returns : Floating point random
327 Args : $maximum size for random number (defaults to 1)
330 =cut
332 sub random{
333 my ($self,$max) = @_;
334 return rand($max);
338 =head2 random_tree_method
340 Title : random_tree_method
341 Usage : $obj->random_tree_method($newval)
342 Function:
343 Example :
344 Returns : value of random_tree_method (a scalar)
345 Args : on set, new value (a scalar or undef, optional)
348 =cut
350 sub random_tree_method{
351 my $self = shift;
353 return $self->{'random_tree_method'} = shift if @_;
354 return $self->{'random_tree_method'} || $Defaults{'DefaultTreeMethod'};
357 =head2 nodetype
359 Title : nodetype
360 Usage : $obj->nodetype($newval)
361 Function:
362 Example :
363 Returns : value of nodetype (a scalar)
364 Args : on set, new value (a scalar or undef, optional)
367 =cut
369 sub nodetype{
370 my ($self,$value) = @_;
371 if( defined $value) {
372 eval "require $value";
373 if( $@ ) { $self->throw("$@: Unrecognized Node type for ".ref($self).
374 "'$value'");}
376 my $a = bless {},$value;
377 unless( $a->isa('Bio::Tree::NodeI') ) {
378 $self->throw("Must provide a valid Bio::Tree::NodeI or child class to SeqFactory Not $value");
380 $self->{'nodetype'} = $value;
382 return $self->{'nodetype'};
386 # The assignment of times are based on Mike Sanderson's r8s code
387 # The topology assignment code is based on Richard Hudson's
388 # make_trees
391 sub rand_yule_c_tree {
392 my ($self,$speciation) = @_;
393 $speciation ||= $Defaults{'Speciation'};
394 my $n_taxa = $self->num_taxa;
395 my $taxa = $self->taxa || [];
396 my $nodetype = $self->nodetype;
398 my $randfuncs = Bio::Tools::RandomDistFunctions->new();
399 my $rate = $Defaults{'YuleRate'};
400 my (@tree,@list,@times,$i,$in);
401 my $max = 2 * $n_taxa - 1;
402 for($in=0;$in < $max; $in++ ) {
403 push @tree, { 'nodenum' => "Node$in" };
405 # setup leaf nodes
406 for($in=0;$in < $n_taxa;$in++) {
407 $tree[$in]->{'time'} = 0;
408 $tree[$in]->{'desc1'} = undef;
409 $tree[$in]->{'desc2'} = undef;
410 if( my $r = $taxa->[$in] ) {
411 $tree[$in]->{'nodenum'} = $r;
413 push @list, $in;
416 for( $i = 0; $i < $n_taxa - 1; $i++ ) {
417 # draw random interval times
418 push @times, $randfuncs->rand_birth_distribution($speciation);
420 # sort smallest to largest
421 @times = sort {$a <=> $b} @times;
422 # topology generation
423 for ($in = $n_taxa; $in > 1; $in-- ) {
424 my $time = shift @times;
426 my $pick = int $self->random($in);
427 my $nodeindex = $list[$pick];
428 $tree[$list[$pick]]->{'time'} = $time;
429 my $swap = 2 * $n_taxa - $in;
430 $tree[$swap]->{'desc1'} = $nodeindex;
431 $list[$pick] = $list[$in-1];
433 $pick = int rand($in - 1);
434 $nodeindex = $list[$pick];
435 $tree[$list[$pick]]->{'time'} = $time;
436 $tree[$swap]->{'desc2'} = $nodeindex;
437 $list[$pick] = $swap;
439 $tree[-1]->{'time'} = shift @times;
440 return \@tree;
445 sub rand_birthdeath_backwards_tree {
446 my ($self) = @_;
447 my $n_taxa = $self->num_taxa;
448 my $taxa = $self->taxa || [];
450 my $randfuncs = Bio::Tools::RandomDistFunctions->new();
451 my $rate = $Defaults{'YuleRate'};
452 my (@tree,@list,@times,$i,$in);
453 my $max = 2 * $n_taxa - 1;
454 for($in=0;$in < $max; $in++ ) {
455 push @tree, { 'nodenum' => "Node$in" };
457 # setup leaf nodes
458 for($in=0;$in < $n_taxa;$in++) {
459 $tree[$in]->{'time'} = 0;
460 $tree[$in]->{'desc1'} = undef;
461 $tree[$in]->{'desc2'} = undef;
462 if( my $r = $taxa->[$in] ) {
463 # deal with pre-labeled nodes
464 $tree[$in]->{'nodenum'} = $r;
466 push @list, $in;
468 my ($time) = (0);
470 # topology generation
471 for ($in = $n_taxa; $in > 1; $in-- ) {
472 my $pick = int $self->random($in);
473 my $nodeindex = $list[$pick];
474 my $swap = 2 * $n_taxa - $in;
475 $time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
476 $tree[$list[$pick]]->{'time'} = $time;
477 $tree[$swap]->{'desc1'} = $nodeindex;
478 $list[$pick] = $list[$in-1];
480 $pick = int rand($in - 1);
481 $nodeindex = $list[$pick];
482 $tree[$list[$pick]]->{'time'} = $time;
483 $tree[$swap]->{'desc2'} = $nodeindex;
484 $list[$pick] = $swap;
486 my $root = $tree[-1];
487 $time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
488 $root->{'time'} = $time;
490 # Normalize times by the root node...
491 for my $node ( @tree ) {
492 $node->{'time'} /= $root->{'time'};
494 return \@tree;
498 # The assignment of times are based on Mike Sanderson's r8s code
499 # The topology assignment code is based on Richard Hudson's
500 # make_trees
502 sub rand_birth_death_tree {
503 # Still need to finish
504 # my ($self,$spec_rate,$extinct_rate,$char_rate) = @_;
505 # my $n_taxa = $self->num_taxa;
506 # my $dt = 0.1 / $n_taxa;
507 # my @tree;
508 # my $max = 3 * $n_taxa - 1;
509 # # setup leaf nodes
511 # for($in=0;$in < $size;$in++) {
512 # push @tree, { 'nodenum' => $taxa->[$in] || "Node$in",
513 # 'time' => 0,
514 # 'desc1' => undef,
515 # 'desc2' => undef,
516 # };
518 # my $time = $dt;
519 # my $idx = 0;
520 # while( $n_taxa > 1 ) {
521 # if ( event($dt * $spec_rate, $n_taxa) ) {
522 # my $pick = int $self->random($n_taxa);
523 # my $pick2 = int $self->random($n_taxa);
524 # while( $pick2 == $pick ) {
525 # $pick2 = int $self->random($n_taxa);
527 # to finish....
529 # $tree[$swap]->{'desc1'} = $nodeindex;
535 # $list[$pick] = $list[$in-1];
537 # $pick = int rand($in - 1);
538 # $nodeindex = $list[$pick];
539 # $tree[$swap]->{'desc2'} = $nodeindex;
540 # $list[$pick] = $swap;
541 # $tree[$swap]->{'time'} = $times[$ix++];