Merge pull request #202 from RoyChaudhuri/master
[bioperl-live.git] / scripts / popgen / bp_composite_LD.pl
blob742cb71a22a67ea6b69eba2ebfbb86fb530e5243
1 #!/usr/bin/perl
2 # -*-Perl-*-
4 =head1 NAME
6 bp_composite_LD -i filename.prettybase.txt --sortbyld E<gt> outfile
8 =head1 SYNOPSIS
10 bp_composite_LD -i filename.prettybase [-o out.LD] [-f prettybase/csv] [--sortbyld] [--noconvertindels]
12 =head2 DESCRIPTION
14 This a script which allows an easy way to calculate composite LD.
16 =head2 OPTIONS
18 -i or --in filename
20 -f or --format genotype format (prettybase or CSV)
22 --sortbyld To see data sorted by LD instead of just all the
23 site1/site2 pair LD values.
25 -o or --out output filename, otherwise will print to STDOUT
27 --noconvert (applicable for prettybase format file only)
28 if specified will NOT attempt to convert indel
29 states to 'I' and delete states ('-') to 'D'.
31 -h or --help see this documentation
33 =head2 AUTHOR Jason Stajich, Matthew Hahn
35 For more information contact:
37 Matthew Hahn, E<lt>matthew.hahn-at-duke.eduE<gt>
38 Jason Stajich E<lt>jason-at-bioperl-dot-orgE<gt>
41 =cut
43 use strict;
44 use warnings;
46 use Bio::PopGen::IO;
47 use Bio::PopGen::Statistics;
48 use Bio::PopGen::Population;
50 use Getopt::Long;
52 my ($file,$outfile,$sortbyld,$format,$noconvert,$verbose);
53 $format = 'prettybase'; # default format is prettybase
54 GetOptions(
55 'i|in:s' => \$file, # pass the filename as
56 'o|out:s' => \$outfile,
57 'f|format:s' => \$format,
58 'sortbyld' => \$sortbyld,
59 'noconvert' => \$noconvert,
60 'v|verbose' => \$verbose,
61 'h|help' => sub { system('perldoc', $0);
62 exit; },
65 if( ! $file ) {
66 $file = shift @ARGV; # if no -i specified
69 my $io = Bio::PopGen::IO->new(-format => $format,
70 -verbose=> $verbose,
71 -CONVERT_INDEL_STATES => ! $noconvert,
72 -file => $file);
74 my $stats = Bio::PopGen::Statistics->new(-verbose => $verbose);
75 my $pop = $io->next_population;
77 my %LD_matrix = $stats->composite_LD($pop);
79 # sites can be ordered by sorting their names
81 my @sites;
82 my $out;
83 if( $outfile ) {
84 open $out, '>', $outfile or die "Could not write file '$outfile': $!\n";
85 } else {
86 $out = \*STDOUT;
88 foreach my $site1 ( sort keys %LD_matrix ) {
89 foreach my $site2 ( sort keys %{ $LD_matrix{$site1} } ) {
90 my $LD = $LD_matrix{$site1}->{$site2}; # LD for site1,site2
91 if( $sortbyld ) {
92 push @sites, [ $site1,$site2,@$LD];
93 } else {
94 printf $out "%s,%s - LD=%.4f chisq=%.4f\n",$site1,$site2,@$LD;
99 if( $sortbyld ) {
100 foreach my $s ( sort { $b->[3] <=> $a->[3] } @sites ) {
101 my ($site1,$site2,$ld,$chisq) = @$s;
102 print $out "$site1,$site2 - LD=$ld, chisq=$chisq\n";