2 # BioPerl module for Bio::PopGen::IO::hapmap
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Rich Dobson <r.j.dobson-at-qmul.ac.uk>
8 # Copyright Rich Dobson
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::PopGen::IO::hapmap - A parser for HapMap output data
20 # Do not use directly, use through the Bio::PopGen::IO driver
23 my $io = Bio::PopGen::IO->new(-format => 'hapmap',
24 -file => 'data.hapmap');
26 # Some IO might support reading in a population at a time
29 while( my $ind = $io->next_individual ) {
30 push @population, $ind;
35 A driver module for Bio::PopGen::IO for parsing hapmap data.
41 User feedback is an integral part of the evolution of this and other
42 Bioperl modules. Send your comments and suggestions preferably to
43 the Bioperl mailing list. Your participation is much appreciated.
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50 Please direct usage questions or support issues to the mailing list:
52 I<bioperl-l@bioperl.org>
54 rather than to the module maintainer directly. Many experienced and
55 reponsive experts will be able look at the problem and quickly
56 address it. Please include a thorough description of the problem
57 with code and data examples if at all possible.
61 Report bugs to the Bioperl bug tracking system to help us keep track
62 of the bugs and their resolution. Bug reports can be submitted via
65 https://github.com/bioperl/bioperl-live/issues
67 =head1 AUTHOR - Rich Dobson
69 Email r.j.dobson-at-qmul.ac.uk
73 Jason Stajich, jason-at-bioperl.org
77 The rest of the documentation details each of the object methods.
78 Internal methods are usually preceded with a _
83 # Let the code begin...
85 package Bio
::PopGen
::IO
::hapmap
;
86 use vars
qw($FieldDelim $AlleleDelim $NoHeader $StartingCol);
89 ($FieldDelim,$AlleleDelim,$NoHeader,$StartingCol) =( '\s+','',0,11);
91 use Bio::PopGen::Individual;
92 use Bio::PopGen::Population;
93 use Bio::PopGen::Genotype;
95 use base qw(Bio::PopGen::IO);
101 Usage : my $obj = Bio::PopGen::IO::hapmap->new();
102 Function: Builds a new Bio::PopGen::IO::hapmap object
103 Returns : an instance of Bio::PopGen::IO::hapmap
104 Args : [optional, these are the current defaults]
105 -field_delimiter => ','
106 -allele_delimiter=> '\s+'
108 -starting_column => 11
115 my($self, @args) = @_;
117 $Bio::PopGen
::Genotype
::BlankAlleles
='';
119 my ($fieldsep,$all_sep,
120 $noheader, $start_col) = $self->_rearrange([qw(FIELD_DELIMITER
126 $self->flag('no_header', defined $noheader ?
$noheader : $NoHeader);
127 $self->flag('field_delimiter',defined $fieldsep ?
$fieldsep : $FieldDelim);
128 $self->flag('allele_delimiter',defined $all_sep ?
$all_sep : $AlleleDelim);
129 $self->starting_column(defined $start_col ?
$start_col : $StartingCol );
131 $self->{'_header'} = undef;
139 Usage : $obj->flag($flagname,$newval)
140 Function: Get/Set the flag value
141 Returns : value of a flag (a boolean)
142 Args : A flag name, currently we expect
143 'no_header', 'field_delimiter', or 'allele_delimiter'
144 on set, new value (a boolean or undef, optional)
151 my $fieldname = shift;
152 return unless defined $fieldname;
153 return $self->{'_flag'}->{$fieldname} = shift if @_;
154 return $self->{'_flag'}->{$fieldname};
161 my (@cols,@rows,@idheader);
162 while ($_ = $self->_readline){
164 next if( /^\s*\#/ || /^\s+$/ || ! length($_) );
165 if( /^rs\#\s+alleles\s+chrom\s+pos\s+strand/ ) {
166 @idheader = split $self->flag('field_delimiter');
168 push @cols, [split $self->flag('field_delimiter')];
171 my $startingcol = $self->starting_column;
173 $self->{'_header'} = [ map { $_->[0] } @cols];
174 for my $n ($startingcol.. $#{ $cols[ 0 ]}) {
175 my $column = [ $idheader[$n],
176 map{ $_->[ $n ] } @cols ];
177 push (@rows, $column);
179 $self->{'_pivot'} = [@rows];
184 =head2 next_individual
186 Title : next_individual
187 Usage : my $ind = $popgenio->next_individual;
188 Function: Retrieve the next individual from a dataset
189 Returns : A Bio::PopGen::IndividualI object
192 See L<Bio::PopGen::IndividualI>
196 sub next_individual
{
198 unless($self->{'_pivot'}){
199 #if it's the first time then pivot the table and store.
200 #Lines will now be read from the stored pivot version of the input file
204 $_ = $self->{'_pivot'}->[$self->{'_i'}++];
206 return unless defined $_;
208 # Store all the marker related info. Now that the pivot has taken
209 # place this is in the first few lines of the file Maybe this
210 # should be put in a marker object. Doesn't seem to fit too well
213 my ($samp,@marker_results) = @
$_;
215 # at some point use all this info
217 foreach my $m ( @marker_results ) {
221 if( defined $self->{'_header'} ) {
222 $markername = $self->{'_header'}->[$i-1];
224 $markername = "Marker$i";
227 my @alleles = split($self->flag('allele_delimiter'), $m);
228 if( @alleles != 2 ) {
229 $self->warn("$m for $samp\n");
231 $m = Bio
::PopGen
::Genotype
->new(-alleles
=> \
@alleles,
232 -marker_name
=> $markername,
233 -marker_type
=> 'S', # Guess hapmap only has SNP data
234 -individual_id
=> $samp);
239 return new Bio
::PopGen
::Individual
(-unique_id
=> $samp,
240 -genotypes
=> \
@marker_results);
244 =head2 next_population
246 Title : next_population
247 Usage : my $ind = $popgenio->next_population;
248 Function: Retrieve the next population from a dataset
249 Returns : Bio::PopGen::PopulationI object
251 Note : Many implementation will not implement this
253 See L<Bio::PopGen::PopulationI>
257 sub next_population
{
260 while( my $ind = $self->next_individual ) {
263 Bio
::PopGen
::Population
->new(-individuals
=> \
@inds);
266 =head2 write_individual
268 Title : write_individual
269 Usage : $popgenio->write_individual($ind);
270 Function: Write an individual out in the file format
271 NOT SUPPORTED BY hapmap format
273 Args : Bio::PopGen::PopulationI object(s)
275 See L<Bio::PopGen::PopulationI>
279 sub write_individual
{
280 my ($self,@inds) = @_;
282 # data from hapmap is output, not input, so
283 # we don't need a method for writing and input file
285 $self->throw_not_implemented();
288 =head2 write_population
290 Title : write_population
291 Usage : $popgenio->write_population($pop);
292 Function: Write a population out in the file format
293 NOT SUPPORTED BY hapmap format
295 Args : Bio::PopGen::PopulationI object(s)
296 Note : Many implementation will not implement this
298 See L<Bio::PopGen::PopulationI>
302 sub write_population
{
303 my ($self,@inds) = @_;
304 $self->throw_not_implemented();
308 =head2 starting_column
310 Title : starting_column
311 Usage : $obj->starting_column($newval)
312 Function: Column where data starts
314 Returns : value of starting_column (a scalar)
315 Args : on set, new value (a scalar or undef, optional)
322 return $self->{'starting_column'} = shift if @_;
323 return $self->{'starting_column'};