modified: tasks/common.wdl
[GalaxyCodeBases.git] / tools / linkage / blast2pos.pl
blob2994c982004c2e61e4c97f70efda953642fe9228
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use lib '/nas/RD_09C/resequencing/soft/lib';
5 use Galaxy::ShowHelp;
6 use Time::HiRes qw ( gettimeofday tv_interval );
7 use Data::Dump qw(ddx);
9 $main::VERSION=0.0.1;
11 our $opts='i:o:n:g:v:r:c:l:p:bqd';
12 our($opt_i, $opt_n, $opt_g, $opt_r, $opt_c, $opt_l, $opt_o, $opt_v, $opt_b, $opt_q, $opt_d, $opt_h, $opt_p);
14 our $help=<<EOH;
15 \t-i Blast filted list (./markerblastf.lst)
16 \t-l linkage map list (./linkagemap.lst)
17 \t-p marker dat (./markerpospa64.dat)
18 \t-n N zone file (Pa64.Nzone)
19 \t-o Output file
20 \t-v Verbose level (undef=0)
21 \t-b No pause for batch runs
22 \t-d Debug Mode, for test only
23 EOH
25 ShowHelp();
27 $opt_i='./markerblastf.lst' if ! $opt_i;
28 $opt_p='./markerpospa64.dat' if ! $opt_p;
29 $opt_l='./linkagemap.lst' if ! $opt_l;
30 $opt_n='Pa64.Nzone' if ! $opt_n;
31 $opt_v=0 if ! $opt_v;
33 print STDERR "From [$opt_i] to [$opt_o] refer to [$opt_n][$opt_l][$opt_p]\n";
34 print STDERR "DEBUG Mode on !\n" if $opt_d;
35 print STDERR "Verbose Mode [$opt_v] !\n" if $opt_v;
36 unless ($opt_b) {print STDERR 'press [Enter] to continue...'; <>;}
38 my $start_time = [gettimeofday];
39 #BEGIN
40 my %Scaffords; # \%( $chr->[$sPosRel,$scM,$sWeight] )
41 my %LinkageMap;
43 open L,'<',$opt_l or die "Error: $!\n";
44 while (<L>) {
45 chomp;
46 my ($ChrID,$File)=split /\t/;
47 open LM,'<',$File or die "Error: $!\n";
48 while (<LM>) {
49 next if /^#/;
50 my ($Chr,$Pos,$cM)=split /\t/;
51 die "[x][$ChrID] not match [$File] !\n" if $Chr ne $ChrID;
52 $LinkageMap{$Chr}{$Pos}=$cM;
55 close L;
57 my (%cMPos,%cMlst);
58 open L,'<',$opt_p or die "Error: $!\n";
59 while (<L>) {
60 next if /^#/;
61 chomp;
62 my ($Marker,$Chr,$cM,$Pos,$strand,$weight)=split /\t/;
63 $cMPos{$Chr}{$cM}=$Pos;
65 close L;
66 for my $chr (keys %cMPos) {
67 $cMlst{$chr} = [sort {$a <=> $b} keys %{$cMPos{$chr}}];
69 my $Err=50/135; # +- 100 * 0.5/135
70 sub cMtoPos($$) {
71 my ($Chr,$cM,$Strand)=@_;
72 ($Chr,$Strand)=split /\t/,$Chr;
73 my ($thecMlower,$thecMUpper)=(0,0);
74 if ($cM > $cMlst{$Chr}->[0]) {
75 for (@{$cMlst{$Chr}}) {
76 $thecMUpper=$_;
77 last if $_ > $cM;
78 next if $_ == $cM;
79 $thecMlower=$thecMUpper;
81 } else {
82 $thecMUpper=$cMlst{$Chr}->[1];
84 my ($thePoslower,$thePosUpper)=($cMPos{$Chr}{$thecMlower},$cMPos{$Chr}{$thecMUpper});
85 die "[x]Pos Equal @ $Chr,$cM,$thecMlower,$thecMUpper" if $thePosUpper==$thePoslower;
86 my $BPtocM=($thePosUpper-$thePoslower)/($thecMUpper-$thecMlower);
87 die "[x]Pos Order Error @ $Chr,$thecMlower,$thecMUpper" if $BPtocM <= 0; # So, deal with unorder mannually ...
88 my $Pos=($cM-$thecMlower)*$BPtocM+$thePoslower;
89 my $ErrR=$Err*$BPtocM;
90 my @R=($Pos,$ErrR,$thePoslower,$thePosUpper);
91 $_=int(0.5+10*$_)/10 for @R;
92 return [@R,$thecMlower,$thecMUpper];
95 sub getRel($$$$$) {
96 my ($Qid,$Qs,$Qe,$Ss,$Se,$BTOP)=@_;
97 my ($mChr,$mPos,$mSiderLen)=@{&splitMarkerid($Qid)};
98 my $cM=$LinkageMap{$mChr}{$mPos};
99 unless (defined $cM) {
100 warn "[!]Cannot find Marker $Qid @ $mChr,$mPos !\n";
101 $cM=0;
103 my $LeftBPalongQ=$mSiderLen-$Qs+1;
104 my $WalkingOn=$LeftBPalongQ;
105 my @btop0=split /(\D+)/,$BTOP;
106 # $ perl -le '$a="45YT9-Ac-c-c-11TC4";@b=split /(\D+)/,$a;print "[",join("|",@b),"]"'
107 # [45|YT|9|-Ac-c-c-|11|TC|4]
108 my @btop=();
109 for (@btop0) {
110 if (/\d+/) {
111 push @btop,$_;
112 } else {
113 my @bin=split /([\w-]{2})/;
114 # $ perl -le '$a="-Ac-c-c-";@b=split /([\w-]{2})/,$a,0;print "[",join("|",@b),"]"'
115 # [|-A||c-||c-||c-]
116 for (@bin) {
117 next unless $_;
118 push @btop,$_;
122 my $LeftBPalongS=0;
123 for (@btop) {
124 if ($WalkingOn <= 0) {
125 print STDERR "$Qid [$_] " if $opt_v and $WalkingOn == 0;
126 last;
128 print STDERR "-$Qid [$_]-" if $opt_v>1;
129 if (/\d/) {
130 $LeftBPalongS += $_;
131 $WalkingOn -= $_;
132 } else {
133 my @op=split //;
134 if (/-/) {
135 --$WalkingOn if $op[1] eq '-' and $op[0] ne '-';
136 ++$LeftBPalongS if $op[0] eq '-' and $op[1] ne '-';
137 } else {
138 --$WalkingOn;
139 ++$LeftBPalongS;
142 print STDERR "-$WalkingOn $LeftBPalongS\n" if $opt_v>1;
143 my $NOP;
145 my $strand;
146 if ($Ss < $Se) {
147 $strand=1;
148 $mChr .= "\t+";
149 } else {
150 $strand=-1;
151 $mChr .= "\t-";
153 my $Spos=$Ss + $strand*$LeftBPalongS;
154 warn "$mChr,$Spos,$cM\n" if $opt_v;
155 return [$mChr,$Spos,$cM];
157 sub splitMarkerid($) {
158 my $MarkerID=$_[0];
159 my ($mChr,$mPos,$mSiderLen)=split /[_m]/,$MarkerID,3;
160 return [$mChr,$mPos,$mSiderLen];
162 sub pushScaf($) {
163 my ($Qid,$Sid,$Pidentity,$AlnLen,$identical,$mismatches,$Gap,$Qs,$Qe,$Ss,$Se,$E,$bitScore,$BTOP,$Hit)=@{$_[0]};
164 my ($sPosRel,$scM,$sWeight,$Count)=(0,0,0,0);
165 my ($mChr,$mPos,$mSiderLen)=@{&splitMarkerid($Qid)};
166 if ($Ss < $Se) {
167 $mChr .= "\t+";
168 } else {
169 $mChr .= "\t-";
171 my $weight=-log($E)/$Hit;
172 my %Dat;
173 if (exists $Scaffords{$Sid}) {
174 %Dat=%{$Scaffords{$Sid}};
175 if (exists $Scaffords{$Sid}{$mChr}) {
176 ($sPosRel,$scM,$sWeight,$Count)=@{$Dat{$mChr}};
178 } else {
179 %Dat=($mChr=>[$sPosRel,$scM,$sWeight,$Count]);
181 my ($chr,$pos,$cM)=@{&getRel($Qid,$Qs,$Qe,$Ss,$Se,$BTOP)};
182 $sPosRel += $pos*$weight;
183 $scM += $cM*$weight;
184 $sWeight += $weight;
185 ++$Count;
186 $Dat{$mChr}=[$sPosRel,$scM,$sWeight,$Count];
187 $Scaffords{$Sid}=\%Dat;
190 open L,'<',$opt_i or die "Error: $!\n";
191 while (<L>) {
192 chomp;
193 my ($ChrID,$File)=split /\t/;
194 open IN,'<',$File or die "Error: $!\n";
195 while (<IN>) {
196 next if /^#/;
197 # Fields: query id, subject id, % identity, alignment length, identical, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score, BTOP
198 #-outfmt '6 qseqid sseqid pident length nident mismatch gapopen qstart qend sstart send evalue bitscore btop'
199 #Chr01_457584m45 Scaffold011460 95.45 88 87 4 0 1 88 1020 933 1e-34 147 16CA2WA25MA6WA35 1
200 chomp;
201 my @Dat=split /\t/;
202 my ($Qid,$Sid,$Pidentity,$AlnLen,$identical,$mismatches,$Gap,$Qs,$Qe,$Ss,$Se,$E,$bitScore,$BTOP,$Hit)=@Dat;
203 next if $Sid =~ /^chr/i; # Just skip ...
204 &pushScaf(\@Dat);
206 close IN;
208 ddx \%Scaffords if $opt_v>1;
209 open O,'>',$opt_o or die "Error: $!\n";
210 print O "#ScaffordID\tScaffordAnchorOffect\tChrAnchored\tStrand\tcM\tPosT\tPosErr\tWeight,Count,thecMlower,thecMUpper,thePoslower,thePosUpper\n";
211 for my $ScaffID (keys %Scaffords) {
212 my @dat=(); # [key,value(weight)]
213 for (keys %{$Scaffords{$ScaffID}}) {
214 push @dat,[$_,${$Scaffords{$ScaffID}{$_}}[2]];
216 @dat = sort {$b->[1] <=> $a->[1]} @dat;
217 my ($sPosRel,$scM,$sWeight,$Count)=@{$Scaffords{$ScaffID}{$dat[0][0]}};
218 $sPosRel=int(0.5+10*$sPosRel/$sWeight)/10;
219 $scM=int(0.5+1000*$scM/$sWeight)/1000;
220 $Count=-$Count; # just flag
221 my ($pos,$err,$thePoslower,$thePosUpper,$thecMlower,$thecMUpper)=@{&cMtoPos($dat[0][0],$scM)};
222 $Scaffords{$ScaffID}=[$dat[0][0],$sPosRel,$scM,$sWeight,$Count];
223 print O join("\t",$ScaffID,$sPosRel,$dat[0][0],$scM,$pos,$err,"$sWeight,$Count,$thecMlower,$thecMUpper,$thePoslower,$thePosUpper"),"\n";
225 close O;
226 ddx \%Scaffords if $opt_v>1;
228 __END__
229 grep -h caff ./out/f3545Chr*.pa64 > f3545ChrScaff.pa64
230 ./relocate.pl -bi f3545ChrScaff.pa64 -o f3545ChrScaff.pos