[bug 2637]
[bioperl-live.git] / examples / popgen / parse_calc_stats.pl
blob2f6a7ac8af82487a9e6071296b9fd9cd3a662c40
1 #!/usr/bin/perl -w
2 # Author: Jason Stajich, jason@bioperl.org
3 # $Id$
4 # $Revision: 6576 $
6 use strict;
8 use Bio::PopGen::IO;
9 use Bio::PopGen::Statistics;
10 use Bio::PopGen::Population;
12 my $io = new Bio::PopGen::IO(-format => 'prettybase',
13 # the Bio::Root::IO->catfile is only
14 # to make file access platform independent
15 -file => Bio::Root::IO->catfile
16 (qw( t data popstats.prettybase)));
18 # This is an example of how to read in data from Bio::PopGen::IO
19 # We're going to make 2 lists, @outgroup, @ingroup
20 # @outgroup is a single individual which is named 'out'
21 # @ingroup is the set of individuals we are testing
22 my (@ingroup,@outgroup);
23 while( my $ind = $io->next_individual ) {
24 if($ind->unique_id =~ /out/) {
25 push @outgroup, $ind;
26 } else {
27 push @ingroup, $ind;
31 # We'll get the names of all the markers (or sites)
32 # that this individual has genotypes for
33 my @marker_names = $ingroup[0]->get_marker_names();
35 # the number of sites is the same as the number of markers
36 # we assume that all the individuals have the same number of sites
37 # or that this data is 'aligned' if these were derived from a
38 # multiple sequence alignment
39 my $sitecount = scalar @marker_names;
41 foreach my $ind ( @ingroup ) {
42 # here let's print out the individual name and all their alleles
43 # for all the markers
44 # like this
45 # Name: INDIVIDUALNAME
46 # A1,A2 B1,B2,...
47 print "Name: ", $ind->unique_id,"\n";
48 print "\t";
49 foreach my $marker ( @marker_names ) {
50 for my $genotype ( $ind->get_Genotypes($marker) ) {
51 my @alleles = $genotype->get_Alleles();
52 # In this example these are actually single alleles anyways...
53 print join(",", @alleles), " ";
56 print "\n";
58 # There is a more compact way to write that
59 print "Name: ", $ind->unique_id,
60 "\n\t", join(" ", map { join(",",$_->get_Alleles) }
61 map { $ind->get_Genotypes($_) } @marker_names),"\n";
62 print "--\n";
65 # We can compute some statistics about these individuals
66 # (underlying assumption is that they are unrelated...)
68 print "Pi: ",Bio::PopGen::Statistics->pi(\@ingroup), "\n";
69 print "Theta: ",Bio::PopGen::Statistics->theta(\@ingroup), "\n";
71 # we can also treat them like a population
72 my $ingroup_pop = new Bio::PopGen::Population(-individuals => \@ingroup);
74 print "Pi: ",Bio::PopGen::Statistics->pi($ingroup_pop), "\n";
75 print "Theta: ",Bio::PopGen::Statistics->theta($ingroup_pop), "\n";
81 # You can also simulate individuals from a coalescent
82 use Bio::PopGen::Simulation::Coalescent;
84 my $ssize = 5;
85 my $sim = new Bio::PopGen::Simulation::Coalescent(-sample_size => $ssize);
86 my $tree = $sim->next_tree;
87 my $mutcount = 100;
88 $sim->add_Mutations($tree, $mutcount);
90 # The leaves are the simulated individuals
91 my @leaves = $tree->get_leaf_nodes;
93 # We can use the Stats module either like Bio::PopGen::Statistics->XXX
94 # or like this:
95 my $stats = new Bio::PopGen::Statistics;
96 # $stats->verbose(1);
97 print "Coalescent pi: ", $stats->pi(\@leaves), "\n";
98 print "Coalescent theta: ", $stats->theta(\@leaves), "\n";
99 my $coalescent_pop = new Bio::PopGen::Population(-individuals => \@leaves);
101 print "Coalescent pi: ", $stats->pi($coalescent_pop), "\n";
102 print "Coalescent theta: ", $stats->theta($coalescent_pop), "\n";