Merge branch 'master' of github.com:bioperl/bioperl-live
[bioperl-live.git] / examples / popgen / parse_calc_stats.pl
blob70751ec1142ca7f517dadf3a339b5a7aef33b8fb
1 #!/usr/bin/perl
2 # Author: Jason Stajich, jason@bioperl.org
3 # $Revision: 6576 $
5 use strict;
7 use Bio::PopGen::IO;
8 use Bio::PopGen::Statistics;
9 use Bio::PopGen::Population;
11 my $io = new Bio::PopGen::IO(-format => 'prettybase',
12 # the Bio::Root::IO->catfile is only
13 # to make file access platform independent
14 -file => Bio::Root::IO->catfile
15 (qw( t data popstats.prettybase)));
17 # This is an example of how to read in data from Bio::PopGen::IO
18 # We're going to make 2 lists, @outgroup, @ingroup
19 # @outgroup is a single individual which is named 'out'
20 # @ingroup is the set of individuals we are testing
21 my (@ingroup,@outgroup);
22 while( my $ind = $io->next_individual ) {
23 if($ind->unique_id =~ /out/) {
24 push @outgroup, $ind;
25 } else {
26 push @ingroup, $ind;
30 # We'll get the names of all the markers (or sites)
31 # that this individual has genotypes for
32 my @marker_names = $ingroup[0]->get_marker_names();
34 # the number of sites is the same as the number of markers
35 # we assume that all the individuals have the same number of sites
36 # or that this data is 'aligned' if these were derived from a
37 # multiple sequence alignment
38 my $sitecount = scalar @marker_names;
40 foreach my $ind ( @ingroup ) {
41 # here let's print out the individual name and all their alleles
42 # for all the markers
43 # like this
44 # Name: INDIVIDUALNAME
45 # A1,A2 B1,B2,...
46 print "Name: ", $ind->unique_id,"\n";
47 print "\t";
48 foreach my $marker ( @marker_names ) {
49 for my $genotype ( $ind->get_Genotypes($marker) ) {
50 my @alleles = $genotype->get_Alleles();
51 # In this example these are actually single alleles anyways...
52 print join(",", @alleles), " ";
55 print "\n";
57 # There is a more compact way to write that
58 print "Name: ", $ind->unique_id,
59 "\n\t", join(" ", map { join(",",$_->get_Alleles) }
60 map { $ind->get_Genotypes($_) } @marker_names),"\n";
61 print "--\n";
64 # We can compute some statistics about these individuals
65 # (underlying assumption is that they are unrelated...)
67 print "Pi: ",Bio::PopGen::Statistics->pi(\@ingroup), "\n";
68 print "Theta: ",Bio::PopGen::Statistics->theta(\@ingroup), "\n";
70 # we can also treat them like a population
71 my $ingroup_pop = new Bio::PopGen::Population(-individuals => \@ingroup);
73 print "Pi: ",Bio::PopGen::Statistics->pi($ingroup_pop), "\n";
74 print "Theta: ",Bio::PopGen::Statistics->theta($ingroup_pop), "\n";
80 # You can also simulate individuals from a coalescent
81 use Bio::PopGen::Simulation::Coalescent;
83 my $ssize = 5;
84 my $sim = new Bio::PopGen::Simulation::Coalescent(-sample_size => $ssize);
85 my $tree = $sim->next_tree;
86 my $mutcount = 100;
87 $sim->add_Mutations($tree, $mutcount);
89 # The leaves are the simulated individuals
90 my @leaves = $tree->get_leaf_nodes;
92 # We can use the Stats module either like Bio::PopGen::Statistics->XXX
93 # or like this:
94 my $stats = new Bio::PopGen::Statistics;
95 # $stats->verbose(1);
96 print "Coalescent pi: ", $stats->pi(\@leaves), "\n";
97 print "Coalescent theta: ", $stats->theta(\@leaves), "\n";
98 my $coalescent_pop = new Bio::PopGen::Population(-individuals => \@leaves);
100 print "Coalescent pi: ", $stats->pi($coalescent_pop), "\n";
101 print "Coalescent theta: ", $stats->theta($coalescent_pop), "\n";