tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / scripts / popgen / heterogeneity_test.PLS
blob4b937d1ab345a348c88937b4c88ac874ed9fb470
1 #!/usr/bin/perl -w
2 # -*-Perl-*- (for my emacs)
3 # $Id$
5 use strict;
7 =head1 NAME
9 heterogeneity_test - a test for distinguishing between selection and population expansion.
11 =head1 SYNOPSIS
13 heterogenetity_test -mut_1/--mutsyn synonymous_mut_count -mut_2/--mutnon nonsyn_mut_count -s/--smaplesize sample_size [-i/--iterations iterations] [-o/--observed observed_D] [-v/--verbose] [--silent ] [-m/--method tajimaD or fuD] [--precision]
15 =head2 DESCRIPTION
17 This is an implementation of the Heterogenetity test as described in
18 Hahn MW, Rausher MD, and Cunningham CW. 2002. Genetics 161(1):11-20.
20 =head2 OPTIONS
22 Options in brackets above are optional
24 -s or --samplesize samplesize
25 -mut_1 or --mutsyn synonymous mutation count
26 -mut_2 or --mutnon nonsynonmous mutation count
27 -i or --iterations number of iterations
28 -o or --observed observed D
29 -m or --method tajimaD or fuD for Tajima's D or Fu and Li's D
30 -v or --verbose print out extra verbose messages
31 --silent Be extra quiet
32 --precision Level of precision - specify the number of digits
33 (default 4)
35 =head2 AUTHOR Matthew Hahn E<lt>matthew.hahn-at-duke.eduE<gt>
37 For more information contact:
39 Matthew Hahn, E<lt>matthew.hahn-at-duke.eduE<gt>
40 Jason Stajich E<lt>jason-at-bioperl-dot-orgE<gt>
42 =cut
44 use Getopt::Long;
45 use Bio::PopGen::Simulation::Coalescent;
46 use Bio::PopGen::Statistics;
47 use Bio::PopGen::Individual;
48 use Bio::PopGen::Genotype;
50 my $sample_size = 4;
51 my $mut_count_1 = 10; # synonymous
52 my $mut_count_2 = 20; # non-synonymous
53 my $iterations = 1;
54 my $verbose = 0;
55 my $observedD = undef;
56 my $method = 'fuD';
57 my $help = 0;
58 my $precision = '4'; # Let's make the random precision between
59 # 0->1 to 1000th digits
61 GetOptions(
62 's|samplesize|samp_size:i' => \$sample_size,
63 'mut_1|mutsyn:i' => \$mut_count_1,
64 'mut_2|mutnon:i' => \$mut_count_2,
65 'i|iterations:i' => \$iterations,
66 'o|obsered|observedD:f' => \$observedD,
67 'v|verbose' => \$verbose,
68 'm|method:s' => \$method,
69 'h|help' => \$help,
70 'silent' => sub { $verbose = -1; },
71 'p|precision:i' => \$precision,
74 if( $help ) {
75 system("perldoc",$0);
76 exit(0);
79 if( $method ne 'fuD' and $method ne 'tajimaD' ) {
80 die("available methods are [fu and li's D] (fuD) and Tajima's D (tajimaD)");
82 my @D_distribution;
83 printf("sample size is %d iteration count = %d\n", $sample_size,
84 $iterations);
86 my $sim = new Bio::PopGen::Simulation::Coalescent
87 (-sample_size => $sample_size);
89 for(my $iter = 0; $iter < $iterations; $iter++ ) {
90 my $tree = $sim->next_tree($sample_size);
91 my $f1 = 0;
92 if( $mut_count_1 > 0 ) {
93 $sim->add_Mutations($tree,$mut_count_1,$precision);
95 my @leaves = $tree->get_leaf_nodes;
96 # the outgroup is just an individual with the ancestral state
97 # (no mutations)
98 my $outgroup = new Bio::PopGen::Individual();
99 foreach my $m ( $leaves[0]->get_marker_names ) {
100 $outgroup->add_Genotype(Bio::PopGen::Genotype->new
101 (-marker_name=> $m,
102 -alleles => [ 0 ]));
104 if( $method eq 'fuD' ) {
105 $f1 = Bio::PopGen::Statistics->fu_and_li_D(\@leaves,[$outgroup]);
106 } elsif( $method eq 'tajimaD' ) {
107 $f1 = Bio::PopGen::Statistics->tajima_D(\@leaves);
109 print "(mutation count = $mut_count_1) D=$f1\n"
110 if( $verbose >= 0);
113 my $f2 = 0;
114 if( $mut_count_2 > 0 ) {
115 $sim->add_Mutations($tree,$mut_count_2,$precision);
116 my @leaves = $tree->get_leaf_nodes;
117 # the outgroup is just an individual with the ancestral state
118 # (no mutations)
119 my $outgroup = new Bio::PopGen::Individual();
120 foreach my $m ( $leaves[0]->get_marker_names ) {
121 $outgroup->add_Genotype(Bio::PopGen::Genotype->new
122 (-marker_name=> $m,
123 -alleles => [ 0 ]));
125 if( $method eq 'fuD' ) {
126 $f2 = Bio::PopGen::Statistics->fu_and_li_D(\@leaves,[$outgroup]);
127 } elsif( $method eq 'tajimaD' ) {
128 $f2 = Bio::PopGen::Statistics->tajima_D(\@leaves);
130 print "(mutation count = $mut_count_2) D=$f2\n" if( $verbose >= 0);
133 my $deltaD = ( $f1 - $f2 );
134 push @D_distribution, $deltaD;
135 if( $iter % 10 == 0 && $iter > 0 ) {
136 print STDERR "iter = $iter\n";
140 if( defined $observedD && $iterations > 1 ) {
141 my @sortedD = sort { $a <=> $b } @D_distribution;
142 my $i;
143 for($i = 0; $i < scalar @sortedD; $i++ ) {
144 if( $sortedD[$i] > $observedD ) {
145 last;
149 printf( "index %d value=%.4f out of %d total (obs=%.4f)\n",
150 $i, $sortedD[$i], scalar @sortedD, $observedD);