2 # BioPerl module for Bio::PopGen::Population
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::PopGen::Population - A population of individuals
20 use Bio::PopGen::Population;
21 use Bio::PopGen::Individual;
22 my $population = Bio::PopGen::Population->new();
23 my $ind = Bio::PopGen::Individual->new(-unique_id => 'id');
24 $population->add_Individual($ind);
26 for my $ind ( $population->get_Individuals ) {
27 # iterate through the individuals
30 for my $name ( $population->get_marker_names ) {
31 my $marker = $population->get_Marker($name);
34 my $num_inds = $population->get_number_individuals;
36 my $homozygote_f = $population->get_Frequency_Homozygotes;
37 my $heterozygote_f = $population->get_Frequency_Heterozygotes;
39 # make a population haploid by making fake chromosomes through
40 # haplotypes -- ala allele 1 is on chrom 1 and allele 2 is on chrom 2
41 # the number of individuals created will thus be 2 x number in
43 my $happop = $population->haploid_population;
48 This is a collection of individuals. We'll have ways of generating
49 L<Bio::PopGen::MarkerI> objects out so we can calculate allele_frequencies
50 for implementing the various statistical tests.
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to
58 the Bioperl mailing list. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
65 Please direct usage questions or support issues to the mailing list:
67 I<bioperl-l@bioperl.org>
69 rather than to the module maintainer directly. Many experienced and
70 reponsive experts will be able look at the problem and quickly
71 address it. Please include a thorough description of the problem
72 with code and data examples if at all possible.
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 of the bugs and their resolution. Bug reports can be submitted via
80 https://github.com/bioperl/bioperl-live/issues
82 =head1 AUTHOR - Jason Stajich
84 Email jason-at-bioperl.org
88 Matthew Hahn, matthew.hahn-at-duke.edu
92 The rest of the documentation details each of the object methods.
93 Internal methods are usually preceded with a _
98 # Let the code begin...
101 package Bio
::PopGen
::Population
;
104 # Object preamble - inherits from Bio::Root::Root
106 use Bio
::PopGen
::Marker
;
107 use Bio
::PopGen
::Genotype
;
109 use base
qw(Bio::Root::Root Bio::PopGen::PopulationI);
114 Usage : my $obj = Bio::PopGen::Population->new();
115 Function: Builds a new Bio::PopGen::Population object
116 Returns : an instance of Bio::PopGen::Population
117 Args : -individuals => array ref of individuals (optional)
118 -name => population name (optional)
119 -source => a source tag (optional)
120 -description => a short description string of the population (optional)
125 my($class,@args) = @_;
127 my $self = $class->SUPER::new
(@args);
128 $self->{'_individuals'} = [];
129 my ($name,$source,$description,
130 $inds,$checkisa) = $self->_rearrange([qw(NAME
135 if( defined $inds ) {
136 if( ref($inds) !~ /ARRAY/i ) {
137 $self->warn("Need to provide a value array ref for the -individuals initialization flag");
139 $self->add_Individual(@
$inds);
143 defined $name && $self->name($name);
144 defined $source && $self->source($source);
145 defined $description && $self->description($description);
146 $self->{'_checkisa'} = defined $checkisa ?
$checkisa : $CheckISA;
154 Usage : my $name = $pop->name
155 Function: Get the population name
156 Returns : string representing population name
157 Args : [optional] string representing population name
164 return $self->{'_name'} = shift if @_;
165 return $self->{'_name'};
171 Usage : my $description = $pop->description
172 Function: Get the population description
173 Returns : string representing population description
174 Args : [optional] string representing population description
181 return $self->{'_description'} = shift if @_;
182 return $self->{'_description'};
188 Usage : my $source = $pop->source
189 Function: Get the population source
190 Returns : string representing population source
191 Args : [optional] string representing population source
198 return $self->{'_source'} = shift if @_;
199 return $self->{'_source'};
205 Usage : my $annotation_collection = $pop->annotation;
206 Function: Get/set a Bio::AnnotationCollectionI for this population
207 Returns : Bio::AnnotationCollectionI object
208 Args : [optional set] Bio::AnnotationCollectionI object
213 my ($self, $arg) = @_;
214 return $self->{_annotation
} unless $arg;
215 $self->throw("Bio::AnnotationCollectionI required for argument") unless
216 ref($arg) && $arg->isa('Bio::AnnotationCollectionI');
217 return $self->{_annotation
} = $arg;
220 =head2 set_Allele_Frequency
222 Title : set_Allele_Frequency
223 Usage : $population->set_Allele_Frequency('marker' => { 'allele1' => 0.1});
224 Function: Sets an allele frequency for a Marker for this Population
225 This allows the Population to not have individual individual
226 genotypes but rather a set of overall allele frequencies
227 Returns : Count of the number of markers
228 Args : -name => (string) marker name
229 -allele => (string) allele name
230 -frequency => (double) allele frequency - must be between 0 and 1
232 -frequencies => { 'marker1' => { 'allele1' => 0.01,
239 sub set_Allele_Frequency
{
240 my ($self,@args) = @_;
241 my ($name,$allele, $frequency,
242 $frequencies) = $self->_rearrange([qw(NAME
247 if( defined $frequencies ) { # this supercedes the res
248 if( ref($frequencies) =~ /HASH/i ) {
249 my ($markername,$alleles);
250 while( ($markername,$alleles) = each %$frequencies ) {
251 $self->{'_allele_freqs'}->{$markername} =
252 Bio
::PopGen
::Marker
->new(-name
=> $markername,
253 -allele_freq
=> $alleles);
256 $self->throw("Must provide a valid hashref for the -frequencies option");
259 unless( defined $self->{'_allele_freqs'}->{$name} ) {
260 $self->{'_allele_freqs'}->{$name} =
261 Bio
::PopGen
::Marker
->new(-name
=> $name);
263 $self->{'_allele_freqs'}->{$name}->add_Allele_Frequency($allele,$frequency);
265 return scalar keys %{$self->{'_allele_freqs'}};
269 =head2 add_Individual
271 Title : add_Individual
272 Usage : $population->add_Individual(@individuals);
273 Function: Add individuals to a population
274 Returns : count of the current number in the object
275 Args : Array of Individuals
281 my ($self,@inds) = @_;
282 foreach my $i ( @inds ) {
283 next if ! defined $i;
285 unless( $self->{'_checkisa'} ?
$i->isa('Bio::PopGen::IndividualI') : 1 ) {
286 $self->warn("cannot add an individual ($i) which is not a Bio::PopGen::IndividualI");
290 push @
{$self->{'_individuals'}}, @inds;
291 $self->{'_cached_markernames'} = undef;
292 $self->{'_allele_freqs'} = {};
293 return scalar @
{$self->{'_individuals'} || []};
297 =head2 remove_Individuals
299 Title : remove_Individuals
300 Usage : $population->remove_Individuals(@ids);
301 Function: Remove individual(s) to a population
302 Returns : count of the current number in the object
307 sub remove_Individuals
{
308 my ($self,@names) = @_;
310 my %namehash; # O(1) lookup will be faster I think
311 foreach my $n ( @names ) { $namehash{$n}++ }
313 foreach my $ind ( @
{$self->{'_individuals'} || []} ) {
314 unshift @tosplice, $i if( $namehash{$ind->unique_id} );
317 foreach my $index ( @tosplice ) {
318 splice(@
{$self->{'_individuals'}}, $index,1);
320 $self->{'_cached_markernames'} = undef;
321 $self->{'_allele_freqs'} = {};
322 return scalar @
{$self->{'_individuals'} || []};
325 =head2 get_Individuals
327 Title : get_Individuals
328 Usage : my @inds = $pop->get_Individuals();
329 Function: Return the individuals, alternatively restrict by a criteria
330 Returns : Array of Bio::PopGen::IndividualI objects
331 Args : none if want all the individuals OR,
332 -unique_id => To get an individual with a specific id
333 -marker => To only get individuals which have a genotype specific
334 for a specific marker name
340 my ($self,@args) = @_;
341 my @inds = @
{$self->{'_individuals'} || []};
343 if( @args ) { # save a little time here if @args is empty
344 my ($id,$marker) = $self->_rearrange([qw(UNIQUE_ID MARKER)], @args);
348 @inds = grep { $_->unique_id eq $id } @inds;
349 } elsif (defined $marker) {
350 @inds = grep { $_->has_Marker($marker) } @inds;
358 Title : get_Genotypes
359 Usage : my @genotypes = $pop->get_Genotypes(-marker => $name)
360 Function: Get the genotypes for all the individuals for a specific
362 Returns : Array of Bio::PopGen::GenotypeI objects
363 Args : -marker => name of the marker
369 my ($self,@args) = @_;
370 my ($name) = $self->_rearrange([qw(MARKER)],@args);
371 if( defined $name ) {
372 return grep { defined $_ } map { $_->get_Genotypes(-marker
=> $name) }
373 @
{$self->{'_individuals'} || []}
375 $self->warn("You needed to have provided a valid -marker value");
380 =head2 get_marker_names
382 Title : get_marker_names
383 Usage : my @names = $pop->get_marker_names;
384 Function: Get the names of the markers
385 Returns : Array of strings
386 Args : [optional] boolean flag to ignore internal cache status
391 sub get_marker_names
{
392 my ($self,$force) = @_;
393 return @
{$self->{'_cached_markernames'} || []}
394 if( ! $force && defined $self->{'_cached_markernames'});
396 foreach my $n ( map { $_->get_marker_names } $self->get_Individuals() ) {
399 my @nms = keys %unique;
400 if( $nms[0] =~ /^(Site|Codon)/ ) {
401 # sort by site or codon number and do it in
402 # a schwartzian transformation baby!
403 @nms = map { $_->[1] }
404 sort { $a->[0] <=> $b->[0] }
405 map { [$_ =~ /^(?:Codon|Site)-(\d+)/, $_] } @nms;
407 $self->{'_cached_markernames'} = [ @nms ];
408 return @
{$self->{'_cached_markernames'} || []};
415 Usage : my $marker = $population->get_Marker($name)
416 Function: Get a Bio::PopGen::Marker object based on this population
417 Returns : Bio::PopGen::MarkerI object
418 Args : name of the marker
424 my ($self,$markername) = @_;
426 # setup some caching too
427 if( defined $self->{'_allele_freqs'} &&
428 defined ($marker = $self->{'_allele_freqs'}->{$markername}) ) {
429 # marker is now set to the stored value
431 my @genotypes = $self->get_Genotypes(-marker
=> $markername);
432 $marker = Bio
::PopGen
::Marker
->new(-name
=> $markername);
435 $self->warn("No genotypes for Marker $markername in the population");
439 for my $al ( map { $_->get_Alleles} @genotypes ) {
444 foreach my $allele ( keys %alleles ) {
445 $marker->add_Allele_Frequency($allele, $alleles{$allele}/$count);
446 $marker->{_marker_coverage
} = $count/2;
449 $self->{'_allele_freqs'}->{$markername} = $marker;
455 =head2 get_number_individuals
457 Title : get_number_individuals
458 Usage : my $count = $pop->get_number_individuals;
459 Function: Get the count of the number of individuals
460 Returns : integer >= 0
466 sub get_number_individuals
{
467 my ($self,$markername) = @_;
469 if( $self->{'_forced_set_individuals'} ) {
470 return $self->{'_forced_set_individuals'};
473 unless( defined $markername ) {
474 return scalar @
{$self->{'_individuals'} || []};
477 foreach my $individual ( @
{$self->{'_individuals'} || []} ) {
478 $number++ if( $individual->has_Marker($markername));
484 =head2 set_number_individuals
486 Title : set_number_individuals
487 Usage : $pop->set_number_individuals($num);
488 Function: Fixes the number of individuals, call this with
490 Only use this if you know what you are doing,
491 this is only relavent when you are just adding
492 allele frequency data for a population and want to
493 calculate something like theta
495 Args : individual count, calling it with undef or 0
496 will reset the value to return a number
497 calculated from the number of individuals
498 stored for this population.
502 sub set_number_individuals
{
503 my ($self,$indcount) = @_;
504 return $self->{'_forced_set_individuals'} = $indcount;
508 =head2 get_Frequency_Homozygotes
510 Title : get_Frequency_Homozygotes
511 Usage : my $freq = $pop->get_Frequency_Homozygotes;
512 Function: Calculate the frequency of homozygotes in the population
513 Returns : fraction between 0 and 1
519 sub get_Frequency_Homozygotes
{
520 my ($self,$marker,$allelename) = @_;
521 my ($homozygote_count) = 0;
522 return 0 if ! defined $marker || ! defined $allelename;
523 $marker = $marker->name if( defined $marker &&
525 ( $self->{'_checkisa'} ?
526 $marker->isa('Bio::PopGen::MarkerI') : 1));
527 my $total = $self->get_number_individuals($marker);
528 foreach my $genotype ( $self->get_Genotypes($marker) ) {
529 my %alleles = map { $_ => 1} $genotype->get_Alleles();
530 # what to do for non-diploid situations?
531 if( $alleles{$allelename} ) {
532 $homozygote_count++ if( keys %alleles == 1);
535 return $total ?
$homozygote_count / $total : 0;
538 =head2 get_Frequency_Heterozygotes
540 Title : get_Frequency_Heterozygotes
541 Usage : my $freq = $pop->get_Frequency_Homozygotes;
542 Function: Calculate the frequency of homozygotes in the population
543 Returns : fraction between 0 and 1
549 sub get_Frequency_Heterozygotes
{
550 my ($self,$marker,$allelename) = @_;
551 my ($heterozygote_count) = 0;
552 return 0 if ! defined $marker || ! defined $allelename;
553 $marker = $marker->name if( defined $marker && ref($marker) &&
554 ($self->{'_checkisa'} ?
555 $marker->isa('Bio::PopGen::MarkerI') : 1));
557 $self->warn("Passed in a ".ref($marker). " to has_Marker, expecting either a string or a Bio::PopGen::MarkerI");
560 my $total = $self->get_number_individuals($marker);
562 foreach my $genotype ( $self->get_Genotypes($marker) ) {
563 my %alleles = map { $_ => 1} $genotype->get_Alleles();
564 # what to do for non-diploid situations?
565 if( $alleles{$allelename} ) {
566 $heterozygote_count++ if( keys %alleles == 2);
569 return $total ?
$heterozygote_count / $total : 0;
572 =head2 haploid_population
574 Title : haploid_population
575 Usage : my $pop = $population->haploid_population;
576 Function: Make a new population where all the individuals
577 are haploid - effectively an individual out of each
578 chromosome an individual has.
579 Returns : L<Bio::PopGen::PopulationI>
585 sub haploid_population
{
588 my @marker_names = $self->get_marker_names;
590 for my $ind ( $self->get_Individuals ) {
592 my $id = $ind->unique_id;
593 # separate genotypes into 'chromosomes'
594 for my $marker_name( @marker_names ) {
595 my ($genotype) = $ind->get_Genotypes(-marker
=> $marker_name);
597 for my $allele ( $genotype->get_Alleles ) {
598 push @
{$chromosomes[$i]},
599 Bio
::PopGen
::Genotype
->new(-marker_name
=> $marker_name,
600 -individual_id
=> $id.".$i",
601 -alleles
=> [$allele]);
605 for my $chrom ( @chromosomes ) {
606 my $copyind = ref($ind)->new(-unique_id
=> $id.".1",
607 -genotypes
=> $chrom);
611 my $population = ref($self)->new(-name
=> $self->name,
612 -source
=> $self->source,
613 -description
=> $self->description,
614 -individuals
=> \
@inds);