changes all issue tracking in preparation for switch to github issues
[bioperl-live.git] / Bio / PopGen / IO / prettybase.pm
blobeda58e8f145656fe431e45e9a1079cb49eea6283
2 # BioPerl module for Bio::PopGen::IO::prettybase
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::PopGen::IO::prettybase - Extract individual allele data from PrettyBase format
18 =head1 SYNOPSIS
20 Do not use directly, use through the Bio::PopGen::IO driver
22 =head1 DESCRIPTION
24 This object will parse comma delimited PrettyBase output. PrettyBase
25 is defined by the SeattleSNPs http://pga.gs.washington.edu/
27 This is expected to be tab delimited (you can vary with the
28 field_delimiter flag SITE SAMPLE ALLELE1 ALLELE2
30 There are 2 initialization parameters, the delimiter
31 (-field_delimiter) [default 'tab'] and a boolean -no_header which
32 specifies if there is no header line to read in. All lines starting
33 with '#' will be skipped
35 =head1 FEEDBACK
37 =head2 Mailing Lists
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to
41 the Bioperl mailing list. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 =head2 Support
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
57 =head2 Reporting Bugs
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 of the bugs and their resolution. Bug reports can be submitted via
61 the web:
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Jason Stajich
67 Email jason-at-bioperl.org
69 =head1 CONTRIBUTORS
71 Matthew Hahn, matthew.hahn-at-duke.edu
73 =head1 APPENDIX
75 The rest of the documentation details each of the object methods.
76 Internal methods are usually preceded with a _
78 =cut
81 # Let the code begin...
84 package Bio::PopGen::IO::prettybase;
85 use vars qw($FieldDelim $Header);
86 use strict;
88 ($FieldDelim,$Header) =( '\t',0);
91 use Bio::PopGen::Individual;
92 use Bio::PopGen::Population;
93 use Bio::PopGen::Genotype;
95 use base qw(Bio::PopGen::IO);
97 =head2 new
99 Title : new
100 Usage : my $obj = Bio::PopGen::IO::prettybase->new();
101 Function: Builds a new Bio::PopGen::IO::prettybase object
102 Returns : an instance of Bio::PopGen::IO::prettybase
103 Args : -field_delimiter => a field delimiter character or regexp (default is /\t/ )
104 -header => boolean if the file will have a header and parser should
105 skip first line in the file (default is false)
106 -convert_indel_states => convert alleles which are longer than one character
107 to an 'I' meaning insert state, and alleles which are
108 '-' to a delete state.
109 (default is false)
111 =cut
113 sub _initialize {
114 my($self, @args) = @_;
115 my ($fieldsep,
116 $conv_indels,
117 $header) = $self->_rearrange([qw(FIELD_DELIMITER
118 CONVERT_INDEL_STATES
119 HEADER)],@args);
121 $self->flag('header', defined $header ? $header : $Header);
122 $self->flag('field_delimiter',defined $fieldsep ? $fieldsep : $FieldDelim);
123 $self->{'_header'} = undef;
124 $self->{'_parsed_individiuals'} = [];
125 $self->{'_parsed'} = 0;
126 $self->flag('convert_indel',$conv_indels || 0);
127 return 1;
130 =head2 flag
132 Title : flag
133 Usage : $obj->flag($flagname,$newval)
134 Function: Get/Set the flag value
135 Returns : value of a flag (a boolean)
136 Args : A flag name, currently we expect
137 'header', 'field_delimiter', or 'allele_delimiter'
138 on set, new value (a boolean or undef, optional)
141 =cut
143 sub flag{
144 my $self = shift;
145 my $fieldname = shift;
146 return unless defined $fieldname;
148 return $self->{'_flag'}->{$fieldname} = shift if @_;
149 return $self->{'_flag'}->{$fieldname};
153 =head2 next_individual
155 Title : next_individual
156 Usage : my $ind = $popgenio->next_individual;
157 Function: Retrieve the next individual from a dataset
158 Returns : Bio::PopGen::IndividualI object
159 Args : none
162 =cut
164 sub next_individual {
165 my ($self) = @_;
166 unless( $self->{'_parsed'} ) {
167 $self->_parse_prettybase;
169 return $self->{'_parsed_individiuals'}->[$self->{'_iterator'}++];
174 =head2 next_population
176 Title : next_population
177 Usage : my $ind = $popgenio->next_population;
178 Function: Retrieve the next population from a dataset
179 Returns : Bio::PopGen::PopulationI object
180 Args : none
181 Note : Many implementation will not implement this
183 =cut
185 # Plan is to just return the whole dataset as a single population by
186 # default I think - people would then have each population in a separate
187 # file.
189 sub next_population{
190 my ($self) = @_;
191 my @inds;
192 while( my $ind = $self->next_individual ) {
193 push @inds, $ind;
195 return unless @inds;
196 Bio::PopGen::Population->new(-individuals => \@inds);
200 sub _parse_prettybase {
201 my $self = shift;
202 my %inds;
203 my $convert_indels = $self->flag('convert_indel');
204 while( defined( $_ = $self->_readline) ) {
205 next if( /^\s*\#/ || /^\s+$/ || ! length($_) );
207 my ($site,$sample,@alleles) = split($self->flag('field_delimiter'),$_);
208 if( ! defined $sample ) {
209 warn("sample id is undefined for $_");
210 next;
212 for my $allele ( @alleles ) {
213 $allele =~ s/^\s+//;
214 $allele =~ s/\s+$//;
215 if( $convert_indels ) {
216 if( length($allele) > 1 ) {
217 # we have an insert state
218 $allele = 'I';
219 } elsif( $allele eq '-' ) {
220 # have a delete state
221 $allele = 'D';
226 my $g = Bio::PopGen::Genotype->new(-alleles => \@alleles,
227 -marker_name => $site,
228 -individual_id=> $sample);
231 if( ! defined $inds{$sample} ) {
232 $inds{$sample} = Bio::PopGen::Individual->new(-unique_id => $sample);
234 $inds{$sample}->add_Genotype($g);
236 $self->{'_parsed_individiuals'} = [ values %inds ];
237 $self->{'_parsed'} = 1;
238 return;
242 =head2 write_individual
244 Title : write_individual
245 Usage : $popgenio->write_individual($ind);
246 Function: Write an individual out in the file format
247 Returns : none
248 Args : L<Bio::PopGen::PopulationI> object(s)
250 =cut
252 sub write_individual{
253 my ($self,@inds) = @_;
254 foreach my $ind ( @inds ) {
255 if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
256 $self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object");
257 next;
259 foreach my $marker ( $ind->get_marker_names ) {
260 my $g = $ind->get_Genotypes(-marker=> $marker);
261 next unless defined $g;
262 $self->_print( join("\t", $marker, $ind->unique_id,
263 $g->get_Alleles), "\n");
271 =head2 write_population
273 Title : write_population
274 Usage : $popgenio->write_population($pop);
275 Function: Write a population out in the file format
276 Returns : none
277 Args : L<Bio::PopGen::PopulationI> object(s)
278 Note : Many implementation will not implement this
280 =cut
282 sub write_population{
283 my ($self,@pops) = @_;
284 foreach my $pop ( @pops ) {
285 if (! ref($pop) || ! $pop->isa('Bio::PopGen::PopulationI') ) {
286 $self->warn("Cannot write an object that is not a Bio::PopGen::PopulationI object");
287 next;
289 my @mnames = $pop->get_marker_names;
290 foreach my $ind ( $pop->get_Individuals ) {
291 if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
292 $self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object");
293 next;
295 foreach my $marker ( @mnames ) {
296 my $g = $ind->get_Genotypes(-marker=> $marker);
297 next unless defined $g;
298 $self->_print( join("\t", $marker, $ind->unique_id,
299 $g->get_Alleles), "\n");