tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Tree / RandomFactory.pm
blob41e374ff96bffa9686231bf709c219d9e1c05047
1 # $Id$
3 # BioPerl module for Bio::Tree::RandomFactory
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason@bioperl.org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tree::RandomFactory - TreeFactory for generating Random Trees
19 =head1 SYNOPSIS
21 use Bio::Tree::RandomFactory
22 my @taxonnames;
23 my $factory = Bio::Tree::RandomFactory->new( -taxa => \@taxonnames,
24 -maxcount => 10);
26 # or for anonymous samples
28 my $factory = Bio::Tree::RandomFactory->new( -num_taxa => 6,
29 -maxcount => 50);
32 my $tree = $factory->next_tree;
34 =head1 DESCRIPTION
36 Builds a random tree every time next_tree is called or up to -maxcount times.
38 This module was originally written for Coalescent simulations see
39 L<Bio::PopGen::Simulation::Coalescent>. I've left the next_tree
40 method intact although it is not generating random trees in the
41 phylogenetic sense. I would be happy for someone to provide
42 alternative implementations which can be used here. As written it
43 will generate random topologies but the branch lengths are built from
44 assumptions in the coalescent and are not appropriate for phylogenetic
45 analyses.
47 This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
49 Hudson, R. R. 1990. Gene genealogies and the coalescent
50 process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford
51 surveys in evolutionary biology. Vol. 7. Oxford University
52 Press, New York
54 Sanderson, M ...
56 =head1 FEEDBACK
58 =head2 Mailing Lists
60 User feedback is an integral part of the evolution of this and other
61 Bioperl modules. Send your comments and suggestions preferably to
62 the Bioperl mailing list. Your participation is much appreciated.
64 bioperl-l@bioperl.org - General discussion
65 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
67 =head2 Support
69 Please direct usage questions or support issues to the mailing list:
71 I<bioperl-l@bioperl.org>
73 rather than to the module maintainer directly. Many experienced and
74 reponsive experts will be able look at the problem and quickly
75 address it. Please include a thorough description of the problem
76 with code and data examples if at all possible.
78 =head2 Reporting Bugs
80 Report bugs to the Bioperl bug tracking system to help us keep track
81 of the bugs and their resolution. Bug reports can be submitted via
82 the web:
84 http://bugzilla.open-bio.org/
86 =head1 AUTHOR - Jason Stajich
88 Email jason-AT-bioperl.org
90 =head1 CONTRIBUTORS
92 Matthew Hahn, E<lt>matthew.hahn@duke.eduE<gt>
93 Mike Sanderson
95 =head1 APPENDIX
97 The rest of the documentation details each of the object methods.
98 Internal methods are usually preceded with a _
100 =cut
103 # Let the code begin...
106 package Bio::Tree::RandomFactory;
107 use vars qw($PRECISION_DIGITS $DefaultNodeType %Defaults);
108 use strict;
110 $PRECISION_DIGITS = 3; # Precision for the branchlength
111 $DefaultNodeType = 'Bio::Tree::Node';
112 %Defaults = ('YuleRate' => 1.0, # as set by Sanderson in Rates
113 'Speciation' => 1.0, #
114 'DefaultTreeMethod' => 'yule',
117 use Bio::Tools::RandomDistFunctions;
118 use Bio::Tree::Tree;
120 use base qw(Bio::Root::Root Bio::Factory::TreeFactoryI);
122 =head2 new
124 Title : new
125 Usage : my $factory = Bio::Tree::RandomFactory->new(-samples => \@samples,
126 -maxcount=> $N);
127 Function: Initializes a Bio::Tree::RandomFactory object
128 Returns : Bio::Tree::RandomFactory
129 Args : -nodetype => Type of Nodes to create [default Bio::Tree::Node]
130 -maxcount => [optional] Maximum num trees to create
131 -randtype => Type of random trees so far support
132 - yule/backward_yule/BY [default]
133 - forward_yule/FY
134 - birthdeath_forward/BDF
135 - birthdeath_backwards/BDB
138 ONE of the following must be specified
139 -taxa => $arrayref of taxa names
140 -num_taxa => integer indicating number of taxa in the tree
142 =cut
144 sub new{
145 my ($class,@args) = @_;
146 my $self = $class->SUPER::new(@args);
148 $self->{'_treecounter'} = 0;
149 $self->{'_maxcount'} = 0;
150 my ($nodetype,$randtype,
151 $maxcount, $samps,$samplesize,
152 $taxa, $num_taxa) = $self->_rearrange([qw(NODETYPE
153 RANDTYPE
154 MAXCOUNT
155 SAMPLES
156 SAMPLE_SIZE
157 TAXA
158 NUM_TAXA)],
159 @args);
160 my @taxa;
161 $nodetype ||= $DefaultNodeType;
162 $self->nodetype($nodetype);
163 $taxa = $samps if defined $samps && ! defined $taxa;
164 $num_taxa = $samplesize if $samplesize && ! $num_taxa;
165 if( ! defined $taxa ) {
166 if( ! defined $num_taxa || $num_taxa <= 0 ) {
167 $self->throw("Must specify a valid num_taxa if parameter -TAXA is not specified");
169 foreach ( 1..$num_taxa ) { push @taxa, "Taxon$_"; }
170 } else {
171 if( ref($taxa) !~ /ARRAY/i ) {
172 $self->throw("Must specify a valid ARRAY reference to the parameter -TAXA, did you forget a leading '\\'? for $taxa");
174 @taxa = @$taxa;
177 $self->taxa(\@taxa);
178 defined $maxcount && $self->maxcount($maxcount);
179 $self->{'_count'} = 0;
180 return $self;
183 =head2 next_tree
185 Title : next_tree
186 Usage : my $tree = $factory->next_tree
187 Function: Returns a random tree based on the initialized number of nodes
188 NOTE: if maxcount is not specified on initialization or
189 set to a valid integer, subsequent calls to next_tree will
190 continue to return random trees and never return undef
192 Returns : Bio::Tree::TreeI object
193 Args : none
195 =cut
198 sub next_tree{
199 my ($self,%options) = @_;
200 return if $self->maxcount &&
201 $self->{'_count'}++ >= $self->maxcount;
202 my $rand_type = $options{'randtype'} || $self->random_tree_method;
203 my $nodetype = $self->nodetype;
204 my $treearray;
206 if( $rand_type =~ /(birthdeath_forward|birth|BDF)/i ) {
208 } elsif ( $rand_type =~ /(birthdeath_backward|BDB)/i ) {
209 $treearray = $self->rand_birthdeath_backwards_tree;
210 } elsif( $rand_type =~ /(BY|backwards_yule)/i ||
211 $rand_type =~ /^yule/i ) {
212 my $speciation = $options{'speciation'}; # can be undef
213 $treearray = $self->rand_yule_c_tree($speciation);
214 } else {
215 $self->warn("unrecognized random type $rand_type");
218 my @nodes = ();
219 foreach my $n ( @$treearray ) {
220 for my $k ( qw(desc1 desc2) ) {
221 next unless defined $n->{$k};
222 push @{$n->{'descendents'}}, $nodes[$n->{$k}];
224 push @nodes,
225 $nodetype->new(-id => $n->{'nodenum'},
226 -branch_length => $n->{'time'},
227 -descendents => $n->{'descendents'},
230 my $T = Bio::Tree::Tree->new(-root => pop @nodes );
231 return $T;
235 =head2 maxcount
237 Title : maxcount
238 Usage : $obj->maxcount($newval)
239 Function:
240 Returns : Maxcount value
241 Args : newvalue (optional)
244 =cut
246 sub maxcount{
247 my ($self,$value) = @_;
248 if( defined $value) {
249 if( $value =~ /^(\d+)/ ) {
250 $self->{'_maxcount'} = $1;
251 } else {
252 $self->warn("Must specify a valid Positive integer to maxcount");
253 $self->{'_maxcount'} = 0;
256 return $self->{'_maxcount'};
260 =head2 reset_tree_count
262 Title : reset_tree_count
263 Usage : $factory->reset_tree_count;
264 Function: Reset the tree counter
265 Returns : none
266 Args : none
269 =cut
271 sub reset_count{
272 shift->{'_count'} = 0;
275 =head2 taxa
277 Title : taxa
278 Usage : $obj->taxa($newval)
279 Function: Set the leaf node names
280 Returns : value of taxa
281 Args : Arrayref of Taxon names
284 =cut
286 sub taxa {
287 my ($self,$value) = @_;
288 if( defined $value) {
289 if( ref($value) !~ /ARRAY/i ) {
290 $self->warn("Must specify a valid array ref to the method 'taxa'");
291 $value = [];
293 $self->{'_taxa'} = $value;
294 $self->{'_num_taxa'} = scalar @$value;
296 return $self->{'_taxa'};
300 =head2 num_taxa
302 Title : num_taxa
303 Usage : $obj->num_taxa($newval)
304 Function: Get the number of Taxa
305 Returns : value of num_taxa
306 Args : none
309 =cut
311 sub num_taxa {
312 my ($self) = @_;
313 return $self->{'_num_taxa'};
316 # alias old methods
317 *num_samples = \&num_taxa;
318 *samples = \&taxa;
320 =head2 random
322 Title : random
323 Usage : my $rfloat = $node->random($size)
324 Function: Generates a random number between 0 and $size
325 This is abstracted so that someone can override and provide their
326 own special RNG. This is expected to be a uniform RNG.
327 Returns : Floating point random
328 Args : $maximum size for random number (defaults to 1)
331 =cut
333 sub random{
334 my ($self,$max) = @_;
335 return rand($max);
339 =head2 random_tree_method
341 Title : random_tree_method
342 Usage : $obj->random_tree_method($newval)
343 Function:
344 Example :
345 Returns : value of random_tree_method (a scalar)
346 Args : on set, new value (a scalar or undef, optional)
349 =cut
351 sub random_tree_method{
352 my $self = shift;
354 return $self->{'random_tree_method'} = shift if @_;
355 return $self->{'random_tree_method'} || $Defaults{'DefaultTreeMethod'};
358 =head2 nodetype
360 Title : nodetype
361 Usage : $obj->nodetype($newval)
362 Function:
363 Example :
364 Returns : value of nodetype (a scalar)
365 Args : on set, new value (a scalar or undef, optional)
368 =cut
370 sub nodetype{
371 my ($self,$value) = @_;
372 if( defined $value) {
373 eval "require $value";
374 if( $@ ) { $self->throw("$@: Unrecognized Node type for ".ref($self).
375 "'$value'");}
377 my $a = bless {},$value;
378 unless( $a->isa('Bio::Tree::NodeI') ) {
379 $self->throw("Must provide a valid Bio::Tree::NodeI or child class to SeqFactory Not $value");
381 $self->{'nodetype'} = $value;
383 return $self->{'nodetype'};
387 # The assignment of times are based on Mike Sanderson's r8s code
388 # The topology assignment code is based on Richard Hudson's
389 # make_trees
392 sub rand_yule_c_tree {
393 my ($self,$speciation) = @_;
394 $speciation ||= $Defaults{'Speciation'};
395 my $n_taxa = $self->num_taxa;
396 my $taxa = $self->taxa || [];
397 my $nodetype = $self->nodetype;
399 my $randfuncs = Bio::Tools::RandomDistFunctions->new();
400 my $rate = $Defaults{'YuleRate'};
401 my (@tree,@list,@times,$i,$in);
402 my $max = 2 * $n_taxa - 1;
403 for($in=0;$in < $max; $in++ ) {
404 push @tree, { 'nodenum' => "Node$in" };
406 # setup leaf nodes
407 for($in=0;$in < $n_taxa;$in++) {
408 $tree[$in]->{'time'} = 0;
409 $tree[$in]->{'desc1'} = undef;
410 $tree[$in]->{'desc2'} = undef;
411 if( my $r = $taxa->[$in] ) {
412 $tree[$in]->{'nodenum'} = $r;
414 push @list, $in;
417 for( $i = 0; $i < $n_taxa - 1; $i++ ) {
418 # draw random interval times
419 push @times, $randfuncs->rand_birth_distribution($speciation);
421 # sort smallest to largest
422 @times = sort {$a <=> $b} @times;
423 # topology generation
424 for ($in = $n_taxa; $in > 1; $in-- ) {
425 my $time = shift @times;
427 my $pick = int $self->random($in);
428 my $nodeindex = $list[$pick];
429 $tree[$list[$pick]]->{'time'} = $time;
430 my $swap = 2 * $n_taxa - $in;
431 $tree[$swap]->{'desc1'} = $nodeindex;
432 $list[$pick] = $list[$in-1];
434 $pick = int rand($in - 1);
435 $nodeindex = $list[$pick];
436 $tree[$list[$pick]]->{'time'} = $time;
437 $tree[$swap]->{'desc2'} = $nodeindex;
438 $list[$pick] = $swap;
440 $tree[-1]->{'time'} = shift @times;
441 return \@tree;
446 sub rand_birthdeath_backwards_tree {
447 my ($self) = @_;
448 my $n_taxa = $self->num_taxa;
449 my $taxa = $self->taxa || [];
451 my $randfuncs = Bio::Tools::RandomDistFunctions->new();
452 my $rate = $Defaults{'YuleRate'};
453 my (@tree,@list,@times,$i,$in);
454 my $max = 2 * $n_taxa - 1;
455 for($in=0;$in < $max; $in++ ) {
456 push @tree, { 'nodenum' => "Node$in" };
458 # setup leaf nodes
459 for($in=0;$in < $n_taxa;$in++) {
460 $tree[$in]->{'time'} = 0;
461 $tree[$in]->{'desc1'} = undef;
462 $tree[$in]->{'desc2'} = undef;
463 if( my $r = $taxa->[$in] ) {
464 # deal with pre-labeled nodes
465 $tree[$in]->{'nodenum'} = $r;
467 push @list, $in;
469 my ($time) = (0);
471 # topology generation
472 for ($in = $n_taxa; $in > 1; $in-- ) {
473 my $pick = int $self->random($in);
474 my $nodeindex = $list[$pick];
475 my $swap = 2 * $n_taxa - $in;
476 $time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
477 $tree[$list[$pick]]->{'time'} = $time;
478 $tree[$swap]->{'desc1'} = $nodeindex;
479 $list[$pick] = $list[$in-1];
481 $pick = int rand($in - 1);
482 $nodeindex = $list[$pick];
483 $tree[$list[$pick]]->{'time'} = $time;
484 $tree[$swap]->{'desc2'} = $nodeindex;
485 $list[$pick] = $swap;
487 my $root = $tree[-1];
488 $time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
489 $root->{'time'} = $time;
491 # Normalize times by the root node...
492 for my $node ( @tree ) {
493 $node->{'time'} /= $root->{'time'};
495 return \@tree;
499 # The assignment of times are based on Mike Sanderson's r8s code
500 # The topology assignment code is based on Richard Hudson's
501 # make_trees
503 sub rand_birth_death_tree {
504 # Still need to finish
505 # my ($self,$spec_rate,$extinct_rate,$char_rate) = @_;
506 # my $n_taxa = $self->num_taxa;
507 # my $dt = 0.1 / $n_taxa;
508 # my @tree;
509 # my $max = 3 * $n_taxa - 1;
510 # # setup leaf nodes
512 # for($in=0;$in < $size;$in++) {
513 # push @tree, { 'nodenum' => $taxa->[$in] || "Node$in",
514 # 'time' => 0,
515 # 'desc1' => undef,
516 # 'desc2' => undef,
517 # };
519 # my $time = $dt;
520 # my $idx = 0;
521 # while( $n_taxa > 1 ) {
522 # if ( event($dt * $spec_rate, $n_taxa) ) {
523 # my $pick = int $self->random($n_taxa);
524 # my $pick2 = int $self->random($n_taxa);
525 # while( $pick2 == $pick ) {
526 # $pick2 = int $self->random($n_taxa);
528 # to finish....
530 # $tree[$swap]->{'desc1'} = $nodeindex;
536 # $list[$pick] = $list[$in-1];
538 # $pick = int rand($in - 1);
539 # $nodeindex = $list[$pick];
540 # $tree[$swap]->{'desc2'} = $nodeindex;
541 # $list[$pick] = $swap;
542 # $tree[$swap]->{'time'} = $times[$ix++];