changes all issue tracking in preparation for switch to github issues
[bioperl-live.git] / Bio / PopGen / IO / hapmap.pm
blobe1d7095e1ed4479e594dcafd16ef859fb2b796e4
2 # BioPerl module for Bio::PopGen::IO::hapmap
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Rich Dobson <r.j.dobson-at-qmul.ac.uk>
8 # Copyright Rich Dobson
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::PopGen::IO::hapmap - A parser for HapMap output data
18 =head1 SYNOPSIS
20 # Do not use directly, use through the Bio::PopGen::IO driver
22 use Bio::PopGen::IO;
23 my $io = Bio::PopGen::IO->new(-format => 'hapmap',
24 -file => 'data.hapmap');
26 # Some IO might support reading in a population at a time
28 my @population;
29 while( my $ind = $io->next_individual ) {
30 push @population, $ind;
33 =head1 DESCRIPTION
35 A driver module for Bio::PopGen::IO for parsing hapmap data.
37 =head1 FEEDBACK
39 =head2 Mailing Lists
41 User feedback is an integral part of the evolution of this and other
42 Bioperl modules. Send your comments and suggestions preferably to
43 the Bioperl mailing list. Your participation is much appreciated.
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 =head2 Support
50 Please direct usage questions or support issues to the mailing list:
52 I<bioperl-l@bioperl.org>
54 rather than to the module maintainer directly. Many experienced and
55 reponsive experts will be able look at the problem and quickly
56 address it. Please include a thorough description of the problem
57 with code and data examples if at all possible.
59 =head2 Reporting Bugs
61 Report bugs to the Bioperl bug tracking system to help us keep track
62 of the bugs and their resolution. Bug reports can be submitted via
63 the web:
65 https://github.com/bioperl/bioperl-live/issues
67 =head1 AUTHOR - Rich Dobson
69 Email r.j.dobson-at-qmul.ac.uk
71 =head1 CONTRIBUTORS
73 Jason Stajich, jason-at-bioperl.org
75 =head1 APPENDIX
77 The rest of the documentation details each of the object methods.
78 Internal methods are usually preceded with a _
80 =cut
83 # Let the code begin...
85 package Bio::PopGen::IO::hapmap;
86 use vars qw($FieldDelim $AlleleDelim $NoHeader $StartingCol);
87 use strict;
89 ($FieldDelim,$AlleleDelim,$NoHeader,$StartingCol) =( '\s+','',0,11);
91 use Bio::PopGen::Individual;
92 use Bio::PopGen::Population;
93 use Bio::PopGen::Genotype;
95 use base qw(Bio::PopGen::IO);
98 =head2 new
100 Title : new
101 Usage : my $obj = Bio::PopGen::IO::hapmap->new();
102 Function: Builds a new Bio::PopGen::IO::hapmap object
103 Returns : an instance of Bio::PopGen::IO::hapmap
104 Args : [optional, these are the current defaults]
105 -field_delimiter => ','
106 -allele_delimiter=> '\s+'
107 -no_header => 0,
108 -starting_column => 11
110 =cut
113 sub _initialize {
115 my($self, @args) = @_;
117 $Bio::PopGen::Genotype::BlankAlleles='';
119 my ($fieldsep,$all_sep,
120 $noheader, $start_col) = $self->_rearrange([qw(FIELD_DELIMITER
121 ALLELE_DELIMITER
122 NO_HEADER
123 STARTING_COLUMN)],
124 @args);
126 $self->flag('no_header', defined $noheader ? $noheader : $NoHeader);
127 $self->flag('field_delimiter',defined $fieldsep ? $fieldsep : $FieldDelim);
128 $self->flag('allele_delimiter',defined $all_sep ? $all_sep : $AlleleDelim);
129 $self->starting_column(defined $start_col ? $start_col : $StartingCol );
131 $self->{'_header'} = undef;
132 return 1;
136 =head2 flag
138 Title : flag
139 Usage : $obj->flag($flagname,$newval)
140 Function: Get/Set the flag value
141 Returns : value of a flag (a boolean)
142 Args : A flag name, currently we expect
143 'no_header', 'field_delimiter', or 'allele_delimiter'
144 on set, new value (a boolean or undef, optional)
146 =cut
148 sub flag {
150 my $self = shift;
151 my $fieldname = shift;
152 return unless defined $fieldname;
153 return $self->{'_flag'}->{$fieldname} = shift if @_;
154 return $self->{'_flag'}->{$fieldname};
158 sub _pivot {
159 my ($self) = @_;
161 my (@cols,@rows,@idheader);
162 while ($_ = $self->_readline){
163 chomp($_);
164 next if( /^\s*\#/ || /^\s+$/ || ! length($_) );
165 if( /^rs\#\s+alleles\s+chrom\s+pos\s+strand/ ) {
166 @idheader = split $self->flag('field_delimiter');
167 } else {
168 push @cols, [split $self->flag('field_delimiter')];
171 my $startingcol = $self->starting_column;
173 $self->{'_header'} = [ map { $_->[0] } @cols];
174 for my $n ($startingcol.. $#{ $cols[ 0 ]}) {
175 my $column = [ $idheader[$n],
176 map{ $_->[ $n ] } @cols ];
177 push (@rows, $column);
179 $self->{'_pivot'} = [@rows];
180 $self->{'_i'} = 0;
184 =head2 next_individual
186 Title : next_individual
187 Usage : my $ind = $popgenio->next_individual;
188 Function: Retrieve the next individual from a dataset
189 Returns : A Bio::PopGen::IndividualI object
190 Args : none
192 See L<Bio::PopGen::IndividualI>
194 =cut
196 sub next_individual {
197 my ($self) = @_;
198 unless($self->{'_pivot'}){
199 #if it's the first time then pivot the table and store.
200 #Lines will now be read from the stored pivot version of the input file
201 $self->_pivot;
204 $_ = $self->{'_pivot'}->[$self->{'_i'}++];
206 return unless defined $_;
208 # Store all the marker related info. Now that the pivot has taken
209 # place this is in the first few lines of the file Maybe this
210 # should be put in a marker object. Doesn't seem to fit too well
211 # though
213 my ($samp,@marker_results) = @$_;
215 # at some point use all this info
216 my $i = 1;
217 foreach my $m ( @marker_results ) {
218 $m =~ s/^\s+//;
219 $m =~ s/\s+$//;
220 my $markername;
221 if( defined $self->{'_header'} ) {
222 $markername = $self->{'_header'}->[$i-1];
223 } else {
224 $markername = "Marker$i";
227 my @alleles = split($self->flag('allele_delimiter'), $m);
228 if( @alleles != 2 ) {
229 $self->warn("$m for $samp\n");
230 } else {
231 $m = Bio::PopGen::Genotype->new(-alleles => \@alleles,
232 -marker_name => $markername,
233 -marker_type => 'S', # Guess hapmap only has SNP data
234 -individual_id => $samp);
236 $i++;
239 return new Bio::PopGen::Individual(-unique_id => $samp,
240 -genotypes => \@marker_results);
244 =head2 next_population
246 Title : next_population
247 Usage : my $ind = $popgenio->next_population;
248 Function: Retrieve the next population from a dataset
249 Returns : Bio::PopGen::PopulationI object
250 Args : none
251 Note : Many implementation will not implement this
253 See L<Bio::PopGen::PopulationI>
255 =cut
257 sub next_population {
258 my ($self) = @_;
259 my @inds;
260 while( my $ind = $self->next_individual ) {
261 push @inds, $ind;
263 Bio::PopGen::Population->new(-individuals => \@inds);
266 =head2 write_individual
268 Title : write_individual
269 Usage : $popgenio->write_individual($ind);
270 Function: Write an individual out in the file format
271 NOT SUPPORTED BY hapmap format
272 Returns : none
273 Args : Bio::PopGen::PopulationI object(s)
275 See L<Bio::PopGen::PopulationI>
277 =cut
279 sub write_individual {
280 my ($self,@inds) = @_;
282 # data from hapmap is output, not input, so
283 # we don't need a method for writing and input file
285 $self->throw_not_implemented();
288 =head2 write_population
290 Title : write_population
291 Usage : $popgenio->write_population($pop);
292 Function: Write a population out in the file format
293 NOT SUPPORTED BY hapmap format
294 Returns : none
295 Args : Bio::PopGen::PopulationI object(s)
296 Note : Many implementation will not implement this
298 See L<Bio::PopGen::PopulationI>
300 =cut
302 sub write_population {
303 my ($self,@inds) = @_;
304 $self->throw_not_implemented();
308 =head2 starting_column
310 Title : starting_column
311 Usage : $obj->starting_column($newval)
312 Function: Column where data starts
313 Example :
314 Returns : value of starting_column (a scalar)
315 Args : on set, new value (a scalar or undef, optional)
317 =cut
319 sub starting_column{
320 my $self = shift;
322 return $self->{'starting_column'} = shift if @_;
323 return $self->{'starting_column'};