2 # BioPerl module for Bio::PopGen::IO::prettybase
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::PopGen::IO::prettybase - Extract individual allele data from PrettyBase format
20 Do not use directly, use through the Bio::PopGen::IO driver
24 This object will parse comma delimited PrettyBase output. PrettyBase
25 is defined by the SeattleSNPs http://pga.gs.washington.edu/
27 This is expected to be tab delimited (you can vary with the
28 field_delimiter flag SITE SAMPLE ALLELE1 ALLELE2
30 There are 2 initialization parameters, the delimiter
31 (-field_delimiter) [default 'tab'] and a boolean -no_header which
32 specifies if there is no header line to read in. All lines starting
33 with '#' will be skipped
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to
41 the Bioperl mailing list. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 of the bugs and their resolution. Bug reports can be submitted via
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Jason Stajich
67 Email jason-at-bioperl.org
71 Matthew Hahn, matthew.hahn-at-duke.edu
75 The rest of the documentation details each of the object methods.
76 Internal methods are usually preceded with a _
81 # Let the code begin...
84 package Bio
::PopGen
::IO
::prettybase
;
85 use vars
qw($FieldDelim $Header);
88 ($FieldDelim,$Header) =( '\t',0);
91 use Bio::PopGen::Individual;
92 use Bio::PopGen::Population;
93 use Bio::PopGen::Genotype;
95 use base qw(Bio::PopGen::IO);
100 Usage : my $obj = Bio::PopGen::IO::prettybase->new();
101 Function: Builds a new Bio::PopGen::IO::prettybase object
102 Returns : an instance of Bio::PopGen::IO::prettybase
103 Args : -field_delimiter => a field delimiter character or regexp (default is /\t/ )
104 -header => boolean if the file will have a header and parser should
105 skip first line in the file (default is false)
106 -convert_indel_states => convert alleles which are longer than one character
107 to an 'I' meaning insert state, and alleles which are
108 '-' to a delete state.
114 my($self, @args) = @_;
117 $header) = $self->_rearrange([qw(FIELD_DELIMITER
121 $self->flag('header', defined $header ?
$header : $Header);
122 $self->flag('field_delimiter',defined $fieldsep ?
$fieldsep : $FieldDelim);
123 $self->{'_header'} = undef;
124 $self->{'_parsed_individiuals'} = [];
125 $self->{'_parsed'} = 0;
126 $self->flag('convert_indel',$conv_indels || 0);
133 Usage : $obj->flag($flagname,$newval)
134 Function: Get/Set the flag value
135 Returns : value of a flag (a boolean)
136 Args : A flag name, currently we expect
137 'header', 'field_delimiter', or 'allele_delimiter'
138 on set, new value (a boolean or undef, optional)
145 my $fieldname = shift;
146 return unless defined $fieldname;
148 return $self->{'_flag'}->{$fieldname} = shift if @_;
149 return $self->{'_flag'}->{$fieldname};
153 =head2 next_individual
155 Title : next_individual
156 Usage : my $ind = $popgenio->next_individual;
157 Function: Retrieve the next individual from a dataset
158 Returns : Bio::PopGen::IndividualI object
164 sub next_individual
{
166 unless( $self->{'_parsed'} ) {
167 $self->_parse_prettybase;
169 return $self->{'_parsed_individiuals'}->[$self->{'_iterator'}++];
174 =head2 next_population
176 Title : next_population
177 Usage : my $ind = $popgenio->next_population;
178 Function: Retrieve the next population from a dataset
179 Returns : Bio::PopGen::PopulationI object
181 Note : Many implementation will not implement this
185 # Plan is to just return the whole dataset as a single population by
186 # default I think - people would then have each population in a separate
192 while( my $ind = $self->next_individual ) {
196 Bio
::PopGen
::Population
->new(-individuals
=> \
@inds);
200 sub _parse_prettybase
{
203 my $convert_indels = $self->flag('convert_indel');
204 while( defined( $_ = $self->_readline) ) {
205 next if( /^\s*\#/ || /^\s+$/ || ! length($_) );
207 my ($site,$sample,@alleles) = split($self->flag('field_delimiter'),$_);
208 if( ! defined $sample ) {
209 warn("sample id is undefined for $_");
212 for my $allele ( @alleles ) {
215 if( $convert_indels ) {
216 if( length($allele) > 1 ) {
217 # we have an insert state
219 } elsif( $allele eq '-' ) {
220 # have a delete state
226 my $g = Bio
::PopGen
::Genotype
->new(-alleles
=> \
@alleles,
227 -marker_name
=> $site,
228 -individual_id
=> $sample);
231 if( ! defined $inds{$sample} ) {
232 $inds{$sample} = Bio
::PopGen
::Individual
->new(-unique_id
=> $sample);
234 $inds{$sample}->add_Genotype($g);
236 $self->{'_parsed_individiuals'} = [ values %inds ];
237 $self->{'_parsed'} = 1;
242 =head2 write_individual
244 Title : write_individual
245 Usage : $popgenio->write_individual($ind);
246 Function: Write an individual out in the file format
248 Args : L<Bio::PopGen::PopulationI> object(s)
252 sub write_individual
{
253 my ($self,@inds) = @_;
254 foreach my $ind ( @inds ) {
255 if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
256 $self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object");
259 foreach my $marker ( $ind->get_marker_names ) {
260 my $g = $ind->get_Genotypes(-marker
=> $marker);
261 next unless defined $g;
262 $self->_print( join("\t", $marker, $ind->unique_id,
263 $g->get_Alleles), "\n");
271 =head2 write_population
273 Title : write_population
274 Usage : $popgenio->write_population($pop);
275 Function: Write a population out in the file format
277 Args : L<Bio::PopGen::PopulationI> object(s)
278 Note : Many implementation will not implement this
282 sub write_population
{
283 my ($self,@pops) = @_;
284 foreach my $pop ( @pops ) {
285 if (! ref($pop) || ! $pop->isa('Bio::PopGen::PopulationI') ) {
286 $self->warn("Cannot write an object that is not a Bio::PopGen::PopulationI object");
289 my @mnames = $pop->get_marker_names;
290 foreach my $ind ( $pop->get_Individuals ) {
291 if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
292 $self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object");
295 foreach my $marker ( @mnames ) {
296 my $g = $ind->get_Genotypes(-marker
=> $marker);
297 next unless defined $g;
298 $self->_print( join("\t", $marker, $ind->unique_id,
299 $g->get_Alleles), "\n");