nexml.t: Added a missing XML::Twig requirement.
[bioperl-live.git] / Bio / PopGen / TagHaplotype.pm
blob7d37c5d3fab97fa8a8dde1342da44f653d8c2317
1 # module Bio::PopGen::TagHaplotype.pm
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Pedro M. Gomez-Fabre <pgf18872-at-gsk-dot-com>
7 # Copyright Pedro M. Gomez-Fabre
9 # You may distribute this module under the same term as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::PopGen::TagHaplotype.pm - Haplotype tag object.
18 =head1 SYNOPSIS
20 use Bio::PopGen::TagHaplotype;
22 my $obj = Bio::PopGen::TagHaplotype -> new($hap);
24 =head1 DESCRIPTION
26 This module take as input a haplotype and try toe get the minimal set
27 of SNP that define the haplotype. This module can be use alone. But
28 due to the tagging haplotype process is exponential one. My suggestion
29 is that before to use this module you pass your data under Select.mp
30 module also on this folder. In any case if, you provide an haplotype
31 the module will try to find the answer to your question.
33 =head1 CONSTRUCTORS
35 my $obj = Bio::PopGen::TagHaplotype -> new($hap);
37 were $hap is the reference to an array of array with the haplotype.
39 $hap= [[0, 0, 0],
40 [1, 0, 0],
41 [0, 1, 1]
44 =head1 FEEDBACK
46 =head2 Mailing Lists
48 User feedback is an integral part of the evolution of this and other
49 Bioperl modules. Send your comments and suggestions preferably to
50 the Bioperl mailing list. Your participation is much appreciated.
52 bioperl-l@bioperl.org - General discussion
53 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
55 =head2 Support
57 Please direct usage questions or support issues to the mailing list:
59 I<bioperl-l@bioperl.org>
61 rather than to the module maintainer directly. Many experienced and
62 reponsive experts will be able look at the problem and quickly
63 address it. Please include a thorough description of the problem
64 with code and data examples if at all possible.
66 =head2 Reporting Bugs
68 Report bugs to the Bioperl bug tracking system to help us keep track
69 of the bugs and their resolution. Bug reports can be submitted via
70 the web:
72 https://github.com/bioperl/bioperl-live/issues
74 =head1 AUTHOR - Pedro M. Gomez-Fabre
76 Email pgf18872-at-gsk-dot-com
78 =cut
81 # Let the code begin...
83 package Bio::PopGen::TagHaplotype;
84 use strict;
86 use Data::Dumper;
87 use Storable qw(dclone);
89 use base qw(Bio::Root::Root);
91 my $USAGE = <<EOF
92 Usage:
93 Bio::PopGen::TagHaplotype->new(-haplotype_block => \$hapblockref)
95 EOF
98 =head2 new
100 Title : new
101 Function: constructor of the class.
102 Returns : self hash
103 Args : input haplotype (array of array)
104 Status : public
106 =cut
108 #------------------------
109 sub new{
110 #------------------------
111 my ($class, @args) = @_;
113 my $self = $class->SUPER::new(@args);
115 my ($haplotype_block) = $self->_rearrange([qw(HAPLOTYPE_BLOCK)],@args);
117 if ($haplotype_block) {
118 $self->haplotype_block($haplotype_block);
120 else{
121 $self->throw("haplotype has not been supplied\n$USAGE");
124 # check that the haplotype block is well formed.
125 for (my $i=0; $i<$#$haplotype_block+1; $i++){
126 if ( $#{$haplotype_block->[0]} !=
127 $#{$haplotype_block->[$i]} ){
129 $self->throw("The haplotype matrix is not well formed (Not squared)");
133 # make the calculation
134 my $tag_list = _scan_snp( $self ->haplotype_block );
136 if ($tag_list){
137 $self ->tag_list($tag_list);
139 else {
140 $self ->tag_list(undef);
143 if ( defined $self->tag_list){
144 $self ->tag_length(scalar @{$self->tag_list});
146 else {
147 $self ->tag_length(0); #"NO TAGS FOUND!"
150 return $self;
153 =head2 haplotype_block
155 Title : haplotype_block
156 Usage : my $haplotype_block = $TagHaplotype->haplotype_block();
157 Function: Get the haplotype block for a haplotype tagging selection
158 Returns : reference of array
159 Args : reference of array with haplotype pattern
162 =cut
164 sub haplotype_block{
165 my ($self) =shift;
166 return $self->{'_haplotype_block'} = shift if @_;
167 return $self->{'_haplotype_block'};
171 =head2 input_block
173 Title : input_block
174 Usage : $obj->input_block()
175 Function: returns haplotype block. By now will produce the same output than
176 $self->haplotype_block. but for compatiblity, this method is kept.
177 This method is deprecated.
178 Returns : reference to array of array with the haplotype input value
179 Args : none
180 Status : public
182 =cut
184 #------------------------
185 sub input_block{
186 #------------------------
187 my $self = shift;
189 $self->warn(ref($self). "::input_block - deprecated method. Use haplotype_block() instead.");
190 return $self->haplotype_block;
193 =head2 tag_list
195 Title : tag_list
196 Usage : $obj->tag_list()
197 Function: returns the list of SNPs combination that identify the
198 haplotype. All combinations are displayed as arrays
199 Returns : reference to array of array.
200 Args : none
201 Status : public
203 =cut
205 #------------------------
206 sub tag_list{
207 #------------------------
208 my ($self) = shift;
209 return $self->{'_tag_list'}= shift if @_;
210 return $self->{'_tag_list'};
213 =head2 tag_length
215 Title : tag_length
216 Usage : $obj->tag_length()
217 Function: returns the length of the tag.
218 Returns : scalar
219 Args : none
220 Status : public
222 =cut
224 #------------------------
225 sub tag_length{
226 #------------------------
227 my ($self) =shift;
228 return $self ->{'_tag_length'} = shift if @_;
229 return $self ->{'_tag_length'};
232 =head2 _scan_snp
234 Title : _scan_snp
235 Usage : internal
236 Function: scan sets increasing the length until find a non degenerated
237 pattern.
238 Returns : scalar
239 Args : none
240 Status : private
242 =cut
244 #------------------------
245 sub _scan_snp{
246 #------------------------
247 my ($hap)=@_;
249 my $hap_length = scalar @{$hap->[0]}; ## store the haplotype length
251 for my $i(1..$hap_length){
253 my $list = _gen_comb($hap_length, $i);
255 my $snp_collection = _scan_combinations($hap, $list);
257 # if there is any element on the collection.
258 # We have reached our goal and
259 # we can stop the calculation.
260 if($#$snp_collection>-1){
261 return $snp_collection;
266 =head2 _gen_comb
268 Title : _gen_comb
269 Usage : internal
270 Function: we supply the length of the haplotype and the length of the
271 word we need to find and the functions returns the possible
272 list of combinations.
273 Returns : scalar
274 Args : none
275 Status : private
277 =cut
279 #------------------------
280 sub _gen_comb{
281 #------------------------
283 my ($hap_length,$n) = @_;
285 my @array = (); # list with all elements we have to combine
288 for(0..$hap_length-1){ push @array, $_ };
291 # we need some parameters to create the combination list.
292 # This parameters can be changed if we can modify the list values
295 my $m = -1; # this parameter start the calculation at value
296 # m+1 on the recursive cicle.
298 my $value = []; ## seems to have not too much sense here, but is
299 ## needed on the recursion and need to be started
300 ## from here
301 my $list = [];
303 _generateCombinations ( \@array, \$m, \$n, $value, $list);
305 return $list;
309 =head2 _generateCombinations
311 Title : _generateCombinations
312 Usage : internal
313 Function: Recursive function that produce all combinations for a set
315 i.e.:
317 1, 2, 3, 4
319 and word of B<3> will produce:
321 1, 2, 3
322 1, 2, 4
323 1, 3, 4
324 2, 3, 4
326 Returns :
327 Args : none
328 Status : private
330 =cut
332 #------------------------
333 sub _generateCombinations{
334 #------------------------
335 my ($rarr, $rm, $rn, $rvalue,$rlist)=@_;
337 for (my $i = ($$rm+1); $i<scalar @$rarr; $i++){
338 push (my @value2,@$rvalue,$rarr->[$i]);
339 if (scalar @value2<$$rn){
340 _generateCombinations($rarr,\$i, $rn, \@value2, $rlist);
342 if (scalar @value2==$$rn){
343 push @$rlist, [@value2];
345 if(scalar @value2>$$rn){
346 last;
351 # take the list of combinations
352 # i.e.: 1 2 3
353 # 1 2 4
354 # 1 3 4
355 # 2 3 4
357 # generate a sub array from the haplotype with the snp tag for the combination
358 # and check all haplotypes for these columns.
359 # if two haplotypes have the same value. we can not define the haplotype
360 # without ambiguity.
361 # Will return a list of valid combinations (SNP Tags)
364 =head2 _scan_combinations
366 Title : _scan_combinations
367 Usage : internal
368 Function: take the haplotype and a list of possible combination
369 for that length. Generate a subset and scan it to find if
370 the information is enought to define the haplotype set.
371 Returns :
372 Args : none
373 Status : private
375 =cut
377 #------------------------
378 sub _scan_combinations {
379 #------------------------
381 my($hap,$list) = @_;
383 my $valid_combination = undef;
385 # we have to check every snp combinations from the list
386 for my $i (0..$#$list){
388 # extract from the big array the one we will use for tag calculations
389 my $subArray = _get_subArray ($hap, $list->[$i]);
391 my $degeneration = _deg_test($subArray);
393 if(!$degeneration){
394 push @$valid_combination, [@{$list->[$i]}];
397 return $valid_combination;
400 # return 1 if two arrays are degenerated (same haplotype)
401 #------------------------
402 sub _deg_test{
403 #------------------------
405 my ($hap)= @_;
407 # for every sub array we compare each element with the rest
408 for my $c1(0..$#$hap){
409 for my $c2($c1+1..$#$hap){
410 my $degeneration = compare_arrays($hap->[$c1], $hap->[$c2]);
411 if ($degeneration){
412 # if the two arrays are the same
413 return 1;
419 #------------------------
420 sub _get_subArray {
421 #------------------------
422 my($hap, $combination) =@_;
424 my $out = []; # output array to be tested
426 for my $i (0..$#$hap){
427 foreach(@$combination){
428 push @{$out->[$i]}, $hap->[$i][$_];
431 return $out;
435 # take two arrays and compare their values
436 # Returns : 1 if the two values are the same
437 # 0 if the values are different
440 #------------------------
441 sub compare_arrays {
442 #------------------------
443 my ($first, $second) = @_;
444 return 0 unless @$first == @$second;
445 for (my $i = 0; $i < @$first; $i++) {
446 return 0 if $first->[$i] ne $second->[$i];
448 return 1;