2 # -*-Perl-*- (for my emacs)
9 bp_heterogeneity_test - a test for distinguishing between selection and population expansion.
13 heterogenetity_test -mut_1/--mutsyn synonymous_mut_count -mut_2/--mutnon nonsyn_mut_count -s/--smaplesize sample_size [-i/--iterations iterations] [-o/--observed observed_D] [-v/--verbose] [--silent ] [-m/--method tajimaD or fuD] [--precision]
17 This is an implementation of the Heterogenetity test as described in
18 Hahn MW, Rausher MD, and Cunningham CW. 2002. Genetics 161(1):11-20.
22 Options in brackets above are optional
24 -s or --samplesize samplesize
25 -mut_1 or --mutsyn synonymous mutation count
26 -mut_2 or --mutnon nonsynonmous mutation count
27 -i or --iterations number of iterations
28 -o or --observed observed D
29 -m or --method tajimaD or fuD for Tajima's D or Fu and Li's D
30 -v or --verbose print out extra verbose messages
31 --silent Be extra quiet
32 --precision Level of precision - specify the number of digits
35 =head2 AUTHOR Matthew Hahn E<lt>matthew.hahn-at-duke.eduE<gt>
37 For more information contact:
39 Matthew Hahn, E<lt>matthew.hahn-at-duke.eduE<gt>
40 Jason Stajich E<lt>jason-at-bioperl-dot-orgE<gt>
45 use Bio
::PopGen
::Simulation
::Coalescent
;
46 use Bio
::PopGen
::Statistics
;
47 use Bio
::PopGen
::Individual
;
48 use Bio
::PopGen
::Genotype
;
51 my $mut_count_1 = 10; # synonymous
52 my $mut_count_2 = 20; # non-synonymous
55 my $observedD = undef;
58 my $precision = '4'; # Let's make the random precision between
59 # 0->1 to 1000th digits
62 's|samplesize|samp_size:i' => \
$sample_size,
63 'mut_1|mutsyn:i' => \
$mut_count_1,
64 'mut_2|mutnon:i' => \
$mut_count_2,
65 'i|iterations:i' => \
$iterations,
66 'o|obsered|observedD:f' => \
$observedD,
67 'v|verbose' => \
$verbose,
68 'm|method:s' => \
$method,
70 'silent' => sub { $verbose = -1; },
71 'p|precision:i' => \
$precision,
79 if( $method ne 'fuD' and $method ne 'tajimaD' ) {
80 die("available methods are [fu and li's D] (fuD) and Tajima's D (tajimaD)");
83 printf("sample size is %d iteration count = %d\n", $sample_size,
86 my $sim = new Bio
::PopGen
::Simulation
::Coalescent
87 (-sample_size
=> $sample_size);
89 for(my $iter = 0; $iter < $iterations; $iter++ ) {
90 my $tree = $sim->next_tree($sample_size);
92 if( $mut_count_1 > 0 ) {
93 $sim->add_Mutations($tree,$mut_count_1,$precision);
95 my @leaves = $tree->get_leaf_nodes;
96 # the outgroup is just an individual with the ancestral state
98 my $outgroup = new Bio
::PopGen
::Individual
();
99 foreach my $m ( $leaves[0]->get_marker_names ) {
100 $outgroup->add_Genotype(Bio
::PopGen
::Genotype
->new
104 if( $method eq 'fuD' ) {
105 $f1 = Bio
::PopGen
::Statistics
->fu_and_li_D(\
@leaves,[$outgroup]);
106 } elsif( $method eq 'tajimaD' ) {
107 $f1 = Bio
::PopGen
::Statistics
->tajima_D(\
@leaves);
109 print "(mutation count = $mut_count_1) D=$f1\n"
114 if( $mut_count_2 > 0 ) {
115 $sim->add_Mutations($tree,$mut_count_2,$precision);
116 my @leaves = $tree->get_leaf_nodes;
117 # the outgroup is just an individual with the ancestral state
119 my $outgroup = new Bio
::PopGen
::Individual
();
120 foreach my $m ( $leaves[0]->get_marker_names ) {
121 $outgroup->add_Genotype(Bio
::PopGen
::Genotype
->new
125 if( $method eq 'fuD' ) {
126 $f2 = Bio
::PopGen
::Statistics
->fu_and_li_D(\
@leaves,[$outgroup]);
127 } elsif( $method eq 'tajimaD' ) {
128 $f2 = Bio
::PopGen
::Statistics
->tajima_D(\
@leaves);
130 print "(mutation count = $mut_count_2) D=$f2\n" if( $verbose >= 0);
133 my $deltaD = ( $f1 - $f2 );
134 push @D_distribution, $deltaD;
135 if( $iter % 10 == 0 && $iter > 0 ) {
136 print STDERR
"iter = $iter\n";
140 if( defined $observedD && $iterations > 1 ) {
141 my @sortedD = sort { $a <=> $b } @D_distribution;
143 for($i = 0; $i < scalar @sortedD; $i++ ) {
144 if( $sortedD[$i] > $observedD ) {
149 printf( "index %d value=%.4f out of %d total (obs=%.4f)\n",
150 $i, $sortedD[$i], scalar @sortedD, $observedD);