3 # BioPerl module for Bio::PopGen::IO::csv
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason-at-bioperl.org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::PopGen::IO::csv -Extract individual allele data from a CSV parser
21 #Do not use directly, use through the Bio::PopGen::IO driver
24 my $io = Bio::PopGen::IO->new(-format => 'csv',
27 # Some IO might support reading in a population at a time
30 while( my $ind = $io->next_individual ) {
31 push @population, $ind;
36 This object will parse comma delimited format (CSV) or whatever
37 delimiter you specify. It currently doesn't handle the more complex
38 quote escaped CSV format. There are 3 initialization parameters,
39 the delimiter (-field_delimiter) [default ','], (-allele_delimiter)
40 [default ' ']. The third initialization parameter is a boolean
41 -no_header which specifies if there is no header line to read in. All lines starting with '#' will be skipped
43 When no_header is not specific the data is assumed to be of the following form.
44 Having a header line this
45 SAMPLE,MARKERNAME1,MARKERNAME2,...
47 and each data line having the form (diploid data)
48 SAMP1,101 102,100 90,a b
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to
58 the Bioperl mailing list. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
65 Please direct usage questions or support issues to the mailing list:
67 L<bioperl-l@bioperl.org>
69 rather than to the module maintainer directly. Many experienced and
70 reponsive experts will be able look at the problem and quickly
71 address it. Please include a thorough description of the problem
72 with code and data examples if at all possible.
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 of the bugs and their resolution. Bug reports can be submitted via
80 http://bugzilla.open-bio.org/
82 =head1 AUTHOR - Jason Stajich
84 Email jason-at-bioperl.org
88 Matthew Hahn, matthew.hahn-at-duke.edu
92 The rest of the documentation details each of the object methods.
93 Internal methods are usually preceded with a _
98 # Let the code begin...
101 package Bio
::PopGen
::IO
::csv
;
102 use vars
qw($FieldDelim $AlleleDelim $NoHeader);
105 ($FieldDelim,$AlleleDelim,$NoHeader) =( ',', '\s+',0);
107 # Object preamble - inherits from Bio::Root::Root
110 use Bio::PopGen::Individual;
111 use Bio::PopGen::Population;
112 use Bio::PopGen::Genotype;
114 use base qw(Bio::PopGen::IO);
119 Usage : my $obj = Bio::PopGen::IO::csv->new();
120 Function: Builds a new Bio::PopGen::IO::csv object
121 Returns : an instance of Bio::PopGen::IO::csv
122 Args : [optional, these are the current defaults]
123 -field_delimiter => ','
124 -allele_delimiter=> '\s+'
131 my($self, @args) = @_;
132 my ($fieldsep,$all_sep,
133 $noheader) = $self->_rearrange([qw(FIELD_DELIMITER
138 $self->flag('no_header', defined $noheader ?
$noheader : $NoHeader);
139 $self->flag('field_delimiter',defined $fieldsep ?
$fieldsep : $FieldDelim);
140 $self->flag('allele_delimiter',defined $all_sep ?
$all_sep : $AlleleDelim);
142 $self->{'_header'} = undef;
149 Usage : $obj->flag($flagname,$newval)
150 Function: Get/Set the flag value
151 Returns : value of a flag (a boolean)
152 Args : A flag name, currently we expect
153 'no_header', 'field_delimiter', or 'allele_delimiter'
154 on set, new value (a boolean or undef, optional)
161 my $fieldname = shift;
162 return unless defined $fieldname;
164 return $self->{'_flag'}->{$fieldname} = shift if @_;
165 return $self->{'_flag'}->{$fieldname};
169 =head2 next_individual
171 Title : next_individual
172 Usage : my $ind = $popgenio->next_individual;
173 Function: Retrieve the next individual from a dataset
174 Returns : L<Bio::PopGen::IndividualI> object
182 while( defined( $_ = $self->_readline) ) {
183 next if( /^\s*\#/ || /^\s+$/ || ! length($_) );
186 return if ! defined $_;
187 if( $self->flag('no_header') ||
188 defined $self->{'_header'} ) {
190 #########new (allows field delim to be the same as the allele delim
192 my ($samp,@marker_results);
194 if($self->flag('field_delimiter') ne $self->flag('allele_delimiter')){
196 ($samp,@marker_results) = split($self->flag('field_delimiter'),$_);
200 my $fielddelim = $self->flag('field_delimiter');
201 my $alleledelim = $self->flag('allele_delimiter');
203 ($samp) = /(^.+?)$fielddelim/;
206 (@marker_results) = /([\d|\w]+$alleledelim[\d|\w]+)/g;
213 foreach my $m ( @marker_results ) {
217 if( defined $self->{'_header'} ) {
218 $markername = $self->{'_header'}->[$i];
220 $markername = "Marker$i";
222 $self->debug( "markername is $markername alleles are $m\n");
224 my @alleles = split($self->flag('allele_delimiter'), $m);
226 $m = Bio
::PopGen
::Genotype
->new(-alleles
=> \
@alleles,
227 -marker_name
=> $markername,
228 -individual_id
=> $samp);
231 return Bio
::PopGen
::Individual
->new(-unique_id
=> $samp,
232 -genotypes
=> \
@marker_results);
235 $self->{'_header'} = [split($self->flag('field_delimiter'),$_)];
236 return $self->next_individual; # rerun loop again
242 =head2 next_population
244 Title : next_population
245 Usage : my $ind = $popgenio->next_population;
246 Function: Retrieve the next population from a dataset
247 Returns : L<Bio::PopGen::PopulationI> object
249 Note : Many implementation will not implement this
253 # Plan is to just return the whole dataset as a single population by
254 # default I think - people would then have each population in a separate
260 while( my $ind = $self->next_individual ) {
263 Bio
::PopGen
::Population
->new(-individuals
=> \
@inds);
269 =head2 write_individual
271 Title : write_individual
272 Usage : $popgenio->write_individual($ind);
273 Function: Write an individual out in the file format
275 Args : L<Bio::PopGen::PopulationI> object(s)
279 sub write_individual
{
280 my ($self,@inds) = @_;
281 my $fielddelim = $self->flag('field_delimiter');
282 my $alleledelim= $self->flag('allele_delimiter');
284 foreach my $ind ( @inds ) {
285 if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
286 $self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object ($ind)");
289 # we'll go ahead and sort these until
290 # we have a better way to insure a consistent order
291 my @marker_names = sort $ind->get_marker_names;
292 if( ! $self->flag('no_header') &&
293 ! $self->flag('header_written') ) {
294 $self->_print(join($fielddelim, ('SAMPLE', @marker_names)), "\n");
295 $self->flag('header_written',1);
297 $self->_print( join($fielddelim, $ind->unique_id,
298 # we're chaining map here, pay attention and read
299 # starting with the last map
301 # we'll turn genotypes into allele pairs
302 # which will be separated by the allele delimiter
303 map { join($alleledelim,$_->get_Alleles) }
304 # marker names will be sorted so we don't
305 # have to worry about this between individuals
306 # unless the individual set you pass in has
307 # a mixed set of markers...
308 # this will turn marker names into Genotypes
309 map {$ind->get_Genotypes(-marker
=> $_)}
310 @marker_names), "\n")
314 =head2 write_population
316 Title : write_population
317 Usage : $popgenio->write_population($pop);
318 Function: Write a population out in the file format
320 Args : L<Bio::PopGen::PopulationI> object(s)
321 Note : Many implementation will not implement this
325 sub write_population
{
326 my ($self,@pops) = @_;
327 my $fielddelim = $self->flag('field_delimiter');
328 # my $alleledelim= $self->flag('allele_delimiter');
329 my $alleledelim = ' ';
330 foreach my $pop ( @pops ) {
331 if (! ref($pop) || ! $pop->isa('Bio::PopGen::PopulationI') ) {
332 $self->warn("Cannot write an object that is not a Bio::PopGen::PopulationI object");
335 # we'll go ahead and sort these until
336 # we have a better way to insure a consistent order
337 my @marker_names = sort $pop->get_marker_names;
338 if( ! $self->flag('no_header') &&
339 ! $self->flag('header_written') ) {
340 $self->_print( join($fielddelim, ('SAMPLE', @marker_names)),
342 $self->flag('header_written',1);
344 foreach my $ind ( $pop->get_Individuals ) {
345 $self->_print( join($fielddelim, $ind->unique_id,
346 # we're chaining map here, pay attention
347 # and read starting with the last map
349 # we'll turn genotypes into allele pairs
350 # which will be separated by the allele
352 map { join($alleledelim,$_->get_Alleles) }
353 # marker names will be sorted so we don't
354 # have to worry about this between individuals
355 # unless the individual set you pass in has
356 # a mixed set of markers...
357 # this will turn marker names into Genotypes
358 map {$ind->get_Genotypes(-marker
=> $_)}
359 @marker_names), "\n");