1 # module Bio::PopGen::TagHaplotype.pm
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Pedro M. Gomez-Fabre <pgf18872-at-gsk-dot-com>
7 # Copyright Pedro M. Gomez-Fabre
9 # You may distribute this module under the same term as perl itself
12 # POD documentation - main docs before the code
16 Bio::PopGen::TagHaplotype.pm - Haplotype tag object.
20 use Bio::PopGen::TagHaplotype;
22 my $obj = Bio::PopGen::TagHaplotype -> new($hap);
26 This module take as input a haplotype and try toe get the minimal set
27 of SNP that define the haplotype. This module can be use alone. But
28 due to the tagging haplotype process is exponential one. My suggestion
29 is that before to use this module you pass your data under Select.mp
30 module also on this folder. In any case if, you provide an haplotype
31 the module will try to find the answer to your question.
35 my $obj = Bio::PopGen::TagHaplotype -> new($hap);
37 were $hap is the reference to an array of array with the haplotype.
48 User feedback is an integral part of the evolution of this and other
49 Bioperl modules. Send your comments and suggestions preferably to
50 the Bioperl mailing list. Your participation is much appreciated.
52 bioperl-l@bioperl.org - General discussion
53 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
57 Please direct usage questions or support issues to the mailing list:
59 I<bioperl-l@bioperl.org>
61 rather than to the module maintainer directly. Many experienced and
62 reponsive experts will be able look at the problem and quickly
63 address it. Please include a thorough description of the problem
64 with code and data examples if at all possible.
68 Report bugs to the Bioperl bug tracking system to help us keep track
69 of the bugs and their resolution. Bug reports can be submitted via
72 https://github.com/bioperl/bioperl-live/issues
74 =head1 AUTHOR - Pedro M. Gomez-Fabre
76 Email pgf18872-at-gsk-dot-com
81 # Let the code begin...
83 package Bio
::PopGen
::TagHaplotype
;
87 use Storable
qw(dclone);
89 use base
qw(Bio::Root::Root);
93 Bio::PopGen::TagHaplotype->new(-haplotype_block => \$hapblockref)
101 Function: constructor of the class.
103 Args : input haplotype (array of array)
108 #------------------------
110 #------------------------
111 my ($class, @args) = @_;
113 my $self = $class->SUPER::new
(@args);
115 my ($haplotype_block) = $self->_rearrange([qw(HAPLOTYPE_BLOCK)],@args);
117 if ($haplotype_block) {
118 $self->haplotype_block($haplotype_block);
121 $self->throw("haplotype has not been supplied\n$USAGE");
124 # check that the haplotype block is well formed.
125 for (my $i=0; $i<$#$haplotype_block+1; $i++){
126 if ( $#{$haplotype_block->[0]} !=
127 $#{$haplotype_block->[$i]} ){
129 $self->throw("The haplotype matrix is not well formed (Not squared)");
133 # make the calculation
134 my $tag_list = _scan_snp
( $self ->haplotype_block );
137 $self ->tag_list($tag_list);
140 $self ->tag_list(undef);
143 if ( defined $self->tag_list){
144 $self ->tag_length(scalar @
{$self->tag_list});
147 $self ->tag_length(0); #"NO TAGS FOUND!"
153 =head2 haplotype_block
155 Title : haplotype_block
156 Usage : my $haplotype_block = $TagHaplotype->haplotype_block();
157 Function: Get the haplotype block for a haplotype tagging selection
158 Returns : reference of array
159 Args : reference of array with haplotype pattern
166 return $self->{'_haplotype_block'} = shift if @_;
167 return $self->{'_haplotype_block'};
174 Usage : $obj->input_block()
175 Function: returns haplotype block. By now will produce the same output than
176 $self->haplotype_block. but for compatiblity, this method is kept.
177 This method is deprecated.
178 Returns : reference to array of array with the haplotype input value
184 #------------------------
186 #------------------------
189 $self->warn(ref($self). "::input_block - deprecated method. Use haplotype_block() instead.");
190 return $self->haplotype_block;
196 Usage : $obj->tag_list()
197 Function: returns the list of SNPs combination that identify the
198 haplotype. All combinations are displayed as arrays
199 Returns : reference to array of array.
205 #------------------------
207 #------------------------
209 return $self->{'_tag_list'}= shift if @_;
210 return $self->{'_tag_list'};
216 Usage : $obj->tag_length()
217 Function: returns the length of the tag.
224 #------------------------
226 #------------------------
228 return $self ->{'_tag_length'} = shift if @_;
229 return $self ->{'_tag_length'};
236 Function: scan sets increasing the length until find a non degenerated
244 #------------------------
246 #------------------------
249 my $hap_length = scalar @
{$hap->[0]}; ## store the haplotype length
251 for my $i(1..$hap_length){
253 my $list = _gen_comb
($hap_length, $i);
255 my $snp_collection = _scan_combinations
($hap, $list);
257 # if there is any element on the collection.
258 # We have reached our goal and
259 # we can stop the calculation.
260 if($#$snp_collection>-1){
261 return $snp_collection;
270 Function: we supply the length of the haplotype and the length of the
271 word we need to find and the functions returns the possible
272 list of combinations.
279 #------------------------
281 #------------------------
283 my ($hap_length,$n) = @_;
285 my @array = (); # list with all elements we have to combine
288 for(0..$hap_length-1){ push @array, $_ };
291 # we need some parameters to create the combination list.
292 # This parameters can be changed if we can modify the list values
295 my $m = -1; # this parameter start the calculation at value
296 # m+1 on the recursive cicle.
298 my $value = []; ## seems to have not too much sense here, but is
299 ## needed on the recursion and need to be started
303 _generateCombinations
( \
@array, \
$m, \
$n, $value, $list);
309 =head2 _generateCombinations
311 Title : _generateCombinations
313 Function: Recursive function that produce all combinations for a set
319 and word of B<3> will produce:
332 #------------------------
333 sub _generateCombinations
{
334 #------------------------
335 my ($rarr, $rm, $rn, $rvalue,$rlist)=@_;
337 for (my $i = ($$rm+1); $i<scalar @
$rarr; $i++){
338 push (my @value2,@
$rvalue,$rarr->[$i]);
339 if (scalar @value2<$$rn){
340 _generateCombinations
($rarr,\
$i, $rn, \
@value2, $rlist);
342 if (scalar @value2==$$rn){
343 push @
$rlist, [@value2];
345 if(scalar @value2>$$rn){
351 # take the list of combinations
357 # generate a sub array from the haplotype with the snp tag for the combination
358 # and check all haplotypes for these columns.
359 # if two haplotypes have the same value. we can not define the haplotype
361 # Will return a list of valid combinations (SNP Tags)
364 =head2 _scan_combinations
366 Title : _scan_combinations
368 Function: take the haplotype and a list of possible combination
369 for that length. Generate a subset and scan it to find if
370 the information is enought to define the haplotype set.
377 #------------------------
378 sub _scan_combinations
{
379 #------------------------
383 my $valid_combination = undef;
385 # we have to check every snp combinations from the list
386 for my $i (0..$#$list){
388 # extract from the big array the one we will use for tag calculations
389 my $subArray = _get_subArray
($hap, $list->[$i]);
391 my $degeneration = _deg_test
($subArray);
394 push @
$valid_combination, [@
{$list->[$i]}];
397 return $valid_combination;
400 # return 1 if two arrays are degenerated (same haplotype)
401 #------------------------
403 #------------------------
407 # for every sub array we compare each element with the rest
408 for my $c1(0..$#$hap){
409 for my $c2($c1+1..$#$hap){
410 my $degeneration = compare_arrays
($hap->[$c1], $hap->[$c2]);
412 # if the two arrays are the same
419 #------------------------
421 #------------------------
422 my($hap, $combination) =@_;
424 my $out = []; # output array to be tested
426 for my $i (0..$#$hap){
427 foreach(@
$combination){
428 push @
{$out->[$i]}, $hap->[$i][$_];
435 # take two arrays and compare their values
436 # Returns : 1 if the two values are the same
437 # 0 if the values are different
440 #------------------------
442 #------------------------
443 my ($first, $second) = @_;
444 return 0 unless @
$first == @
$second;
445 for (my $i = 0; $i < @
$first; $i++) {
446 return 0 if $first->[$i] ne $second->[$i];