modified: tasks/common.wdl
[GalaxyCodeBases.git] / tools / simulators / popudepth / getsnpwm.pl
blob428bdec53b46fe5d9e52bc01382247c6ac8bfdb6
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use Time::HiRes qw ( gettimeofday tv_interval );
5 use Galaxy::ShowHelp;
7 $main::VERSION=0.1.2;
9 our $opts='i:o:bv';
10 our($opt_i, $opt_o, $opt_v,$opt_b);
12 our $help=<<EOH;
13 \t-i Add_ref files list (./indsnp.lst)
14 \t-o Output PWM file (snp.pwm)
15 \t-v show verbose info to STDOUT
16 \t-b No pause for batch runs
17 EOH
18 # \t-d dbSNP SQLite data file (_tdbSNP.sqlite)
20 ShowHelp();
22 $opt_i='./indsnp.lst' if ! defined $opt_i;
23 $opt_o='snp.pwm' if ! $opt_o;
25 print STDERR "From [$opt_i] to [$opt_o]\n";
26 if (! $opt_b) {print STDERR 'press [Enter] to continue...'; <>;}
28 #http://doc.bioperl.org/bioperl-live/Bio/Tools/IUPAC.html#BEGIN1
29 my %IUB = ( A => [qw(A)],
30 C => [qw(C)],
31 G => [qw(G)],
32 T => [qw(T)],
33 U => [qw(U)],
34 M => [qw(A C)],
35 R => [qw(A G)],
36 W => [qw(A T)],
37 S => [qw(C G)],
38 Y => [qw(C T)],
39 K => [qw(G T)],
40 V => [qw(A C G)],
41 H => [qw(A C T)],
42 D => [qw(A G T)],
43 B => [qw(C G T)],
44 X => [qw(G A T C)],
45 N => [qw(G A T C)]
48 my $start_time = [gettimeofday];
50 my @files;
51 open L,'<',$opt_i or die "Error opening $opt_i: $!\n";
52 while (<L>) {
53 chomp;
54 if (-s $_) {push @files,$_;}
55 else {warn "[!]$_ not available.\n";}
57 close L;
58 warn '[!]Total: ',scalar @files," files.\n";
60 my $snpcount;
61 open O,'>',$opt_o or die "Error opening $opt_o: $!\n";
62 for my $file (@files) {
63 open I,'<',$file or die "Error opening $file: $!\n";
64 while (<I>) {
65 chomp;
66 my ($chr,$pos,$bases) = split /\t/;
67 ++$snpcount;
68 my @base = split / /,$bases;
69 print O "$chr\t$pos\t$base[0]\t";
70 #if ($opt_v) {
71 # $bases =~ s/ //g;
72 # print ">$chr\t$pos\t$base[0]\t$bases\n";
74 print ">$chr\t$pos\t" if $opt_v;
75 my ($d,$i,%counter,%stat);
76 $stat{1}=$stat{2}=$stat{3}=$stat{4}=0;
77 for (@base) {
78 my $arr=$IUB{$_} or next; # So, $i != @base;
79 $d = @$arr;
80 print "$_$d" if $opt_v;
81 ++$stat{$d};
82 $d = 1/$d;
83 for (@$arr) {
84 $counter{$_} += $d;
85 #$i += $d;
87 ++$i;
89 print O "$i,$stat{1},$stat{2},$stat{3},$stat{4}\t";
90 @base=();
91 for (sort {$counter{$b} <=> $counter{$a}} keys %counter) {
92 push @base,sprintf('%s:%f',"$_",$counter{$_}/$i);
94 print O join(',',@base),"\n";
95 print "\n$i,$stat{1},$stat{2},$stat{3},$stat{4}\t",join(',',@base),"\n\n" if $opt_v;
97 close I;
99 close O;
101 warn "[!]Total: $snpcount SNPs.\n[!]Done !\n";
103 my $stop_time = [gettimeofday];
104 #$|=1;
105 print STDERR "\n Time Elapsed:\t",tv_interval( $start_time, $stop_time )," second(s)\n";