modified: tasks/common.wdl
[GalaxyCodeBases.git] / tools / linkage / exmarkerseq.pl
blob01c0555810e38ba074b472f0933601e011173d71
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use lib '/nas/RD_09C/resequencing/soft/lib';
5 use GalaxyXS::ChromByte 1.02;# ':all';
7 unless (@ARGV > 0) {
8 print "perl $0 <chr.nfo> <fabychr_path> <marker_len> <marker_file> <parent_snp> <out_file>\n";
9 exit 0;
12 my ($chrnfo,$fabychr_path,$marker_len,$marker_file,$psnp_file,$out_file)=@ARGV;
13 $marker_len=int $marker_len;
14 die "[x]marker_len must > 10 !\n" if $marker_len < 10;
15 open M,'<',$marker_file or die $!;
16 <M>;
17 my $t=tell M;
18 $_=<M>;
19 my $ChrID=(split /\t/)[0];
20 seek M,$t,0;
22 my ($ChrLen,$ChrMem,$SNPMem)=(0);
23 open ChrNFO,'<',$chrnfo or die "[x]Error opening $chrnfo: $!\n";
24 while (<ChrNFO>) {
25 #next if /^#/;
26 my ($chrid,$len)=split /\t/;
27 next if $chrid ne $ChrID;
28 $ChrLen=$len;
30 close ChrNFO;
31 die "[x]Cannot find correct ChrLen for [$ChrID] !\n" if $ChrLen<1;
32 $ChrMem=initchr($ChrLen);
33 $SNPMem=initchr($ChrLen); # OK, leave the 8th bit and alloc a new byte !
35 my ($seq,@seq);
36 open F,'<',$fabychr_path."/$ChrID.fa" or die $!;
37 while(<F>) {
38 chomp;
39 my ($id)=split / /,$_,2;
40 $id =~ s/^>//;
41 die "[x]Wrong RefSeq ! ($ChrID != $id)\n" if $ChrID ne $id;
42 $/=">";
43 $seq=<F>;
44 chomp $seq;
45 $seq =~ s/\s//g;
46 $/="\n";
48 close F;
50 @seq=split //,$seq;
51 $seq='';
53 my %bIUB = ( A => 1,
54 C => 4,
55 G => 8,
56 T => 2,
57 M => 5,
58 R => 9,
59 W => 3,
60 S => 12,
61 Y => 6,
62 K => 10,
63 V => 13,
64 H => 7,
65 D => 11,
66 B => 14,
67 N => 15
69 my %bIUBex = (
70 U => 2,
71 X => 15,
73 my (%ubIUB,%rbIUB);
74 #$rbIUB{$bIUB{$k}}=$_ for keys %bIUB;
75 @rbIUB{ values %bIUB } = keys %bIUB;
77 #@hash1{ keys %hash2 } = values %hash2;
78 @bIUB{ keys %bIUBex } = values %bIUBex;
80 for my $k (keys %bIUB) {
81 $ubIUB{$k}=16*$bIUB{$k};
84 for my $pos (1..$ChrLen) {
85 my $v=0;
86 my $base=shift @seq; # to save running memory as $seq[$pos-1]
87 if (exists $bIUB{$base}) {
88 $v=$bIUB{$base};
89 #warn $seq[$pos-1]," $v\t$pos\n";
90 } else {
91 warn "[!]$pos -> ",$base;
93 setbase($ChrMem,$pos,$v) if $v;
95 @seq=();
97 open PSNP,'<',$psnp_file or die "[x]Error opening $psnp_file: $!\n";
98 while (<PSNP>) {
99 next if /^#/;
100 my ($chr,$pos,$ref,$ga,undef,undef,$gb)=split /\s+/;
101 my @bases=split //,$ga.$gb;
102 my $v;
103 $v |= $bIUB{$_} for @bases;
104 #print STDERR "$ref [$ga.$gb] $v\t";
105 #$v = orbase($ChrMem,$pos,$v) if $v && ($v != 240);
106 #warn "$v\n";
107 setbase($SNPMem,$pos,$v) if $v && ($v != 15);
109 close PSNP;
111 my @poses;
112 open O,'>',$out_file or die $!;
113 while (<M>) {
114 my $pos=(split /\t/)[1];
115 =pod
116 my ($left,$right)=($pos-$marker_len,$pos+$marker_len);
117 my $marker_seq=substr($seq,$left-1,$marker_len).'N'.substr($seq,$pos,$marker_len);
118 print O ">${ChrID}_${pos}m\n$marker_seq\n";
119 =cut
120 # my $v=getbase($ChrMem,$pos);
121 #my $base=$v & 15;
122 # my $snp=($v & 240) >> 4; # $v >> 4 is wrong !
123 my $base=getbase($ChrMem,$pos);
124 my $snp=getbase($SNPMem,$pos);
125 #warn "[$snp][$base]\t$pos\n";
126 if ($snp) {
127 setbase($ChrMem,$pos,$snp);
128 setbase($SNPMem,$pos,15);
130 push @poses,$pos;
132 close M;
133 for my $pos (@poses) {
134 my ($left,$right)=($pos-$marker_len,$pos+$marker_len);
135 my $marker_seq;
136 for my $ipos ($left..$right) {
137 #my $v=getbase($ChrMem,$ipos);
138 my $base=getbase($ChrMem,$ipos);
139 my $snp=getbase($SNPMem,$ipos);
140 $base=$rbIUB{$base};
141 if ($snp==15) { # or $ipos == $pos
142 $base = lc $base;
144 $marker_seq .= $base; #."($snp)";
146 print O ">${ChrID}_${pos}m${marker_len}\n$marker_seq\n";
148 close O;
150 __END__
151 ./exmarkerseq.pl chr.nfo ./fabychr/ 45 ./dat20101214/Chr01.marker ./psnp/parent_Chr01.snp eChr01.marker
152 cat chrorder | while read a; do ./exmarkerseq.pl chr.nfo ./fabychr/ 45 ./dat20101214/${a}.marker ./psnp/parent_$a.snp ex45${a}.marker;done &
154 #!/bin/sh
155 #$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH,BLASTDB
156 #$ -cwd -r y -l vf=8g
157 #$ -o /dev/null -e /dev/null
158 #$ -S /bin/bash -t 1-12
159 SEEDFILE=./chrorder
160 a=$(sed -n -e "$SGE_TASK_ID p" $SEEDFILE)
161 eval ./exmarkerseq.pl chr.nfo ./fabychr/ 25 ./dat20101214/${a}.marker ./psnp/parent_$a.snp ./out20110106/ex25${a}.marker