modified: FGI/OYK.pm
[GalaxyCodeBases.git] / perl / etc / justonce / oyk / FGI / OYK.pm
blob8ed20289acbe37a430f7875d8e3d1865210ab105
1 package main;
2 use strict;
3 use warnings;
4 use Data::Dump qw(ddx);
6 my $SgeQueue = 'fgi.q';
7 my $SgeProject = 'fgiccs';
8 my $SgeJobPrefix = 'nip'.(time() % 9999);
10 sub Scutadapt($$$$) {
11 my ($cwd,$cnt,$flst,$outP) = @_;
12 my $SHcutadapt = <<"END_SH";
13 #!/bin/bash
14 #\$ -S /bin/bash
15 #\$ -q $SgeQueue -P $SgeProject
16 #\$ -N p0${SgeJobPrefix}cut
17 #\$ -l vf=600M,num_proc=2
18 #\$ -binding linear:3
19 #\$ -cwd -r y
20 #\$ -v PERL5LIB,PATH,LD_LIBRARY_PATH
21 #\$ -e /dev/null -o /dev/null
23 #\$ -t 1-$cnt
25 cd $cwd
27 if [ -t 1 ] && [ "x\$SKIP" = 'x1' ]; then
28 while read -u 9 THELINE; do
29 read -ra INDAT <<<"\$THELINE"
30 if [ -e "\${INDAT[1]}.fq.gz" ]; then
31 ln -si \${INDAT[1]}.fq.gz $outP/\${INDAT[0]}.fq.gz
32 elif [ -e "\${INDAT[1]}_1.fq.gz" ] && [ -e "\${INDAT[1]}_2.fq.gz" ]; then
33 echo "[!]Cannot use SKIP mode for PE, using Read 1 only [\${INDAT[1]}_1.fq.gz] !"
34 ln -si \${INDAT[1]}_1.fq.gz $outP/\${INDAT[0]}.fq.gz
35 else
36 echo "[x]Cannot find .fq.gz file(s) for [\${INDAT[1]}(_[12])?.fq.gz] !"
38 done 9<$flst
39 exit 0
42 INFILE=`sed -n "\${SGE_TASK_ID}p" $flst`
43 read -ra INDAT <<<"\$INFILE"
45 ADAPTERSTR='-g GAACGACATGGCTACGATCCGACTT -a AAGTCGGAGGCCAAGCGGTCTTAGGAAGACAA -A AAGTCGGATCGTAGCCATGTCGTTC -G TTGTCTTCCTAAGACCGCTTGGCCTCCGACTT'
47 if [ -e "\${INDAT[1]}.fq.gz" ]; then
48 cutadapt -j 2 -m 1 -O 6 -n 2 \$ADAPTERSTR --interleaved -o $outP/\${INDAT[0]}.fq.gz \${INDAT[1]}.fq.gz >$outP/\${INDAT[0]}.log
49 elif [ -e "\${INDAT[1]}_1.fq.gz" ] && [ -e "\${INDAT[1]}_2.fq.gz" ]; then
50 cutadapt -j 2 -m 1 -O 6 -n 2 \$ADAPTERSTR --interleaved -o $outP/\${INDAT[0]}.fq.gz \${INDAT[1]}_1.fq.gz \${INDAT[1]}_2.fq.gz >$outP/\${INDAT[0]}.log
51 else
52 echo "[x]Cannot find .fq.gz file(s) for [\${INDAT[1]}(_[12])?.fq.gz] !"
53 exit 1
56 if [ ! -s $outP/\${INDAT[0]}.fq.gz ]; then
57 exit 1
59 END_SH
60 return $SHcutadapt;
63 sub Sbwamem($$$$$$$) {
64 my ($cwd,$cnt,$flst,$outP,$FQprefix,$pRef,$SHcutadapt) = @_;
65 my $SHbwa = <<"END_SH";
66 #!/bin/bash
67 #\$ -S /bin/bash
68 #\$ -q $SgeQueue -P $SgeProject
69 #\$ -N p1${SgeJobPrefix}bwa -hold_jid p0${SgeJobPrefix}cut
70 #\$ -l vf=6G,num_proc=7
71 #\$ -binding linear:12
72 #\$ -cwd -r y
73 #\$ -v PERL5LIB,PATH,LD_LIBRARY_PATH
74 #\$ -e /dev/null -o /dev/null
76 #\$ -t 1-$cnt
78 cd $cwd
80 INFILE=`sed -n "\${SGE_TASK_ID}p" $flst`
81 read -ra INDAT <<<"\$INFILE"
83 if [ ! -s $FQprefix/\${INDAT[0]}.fq.gz ]; then
84 bash $SHcutadapt
87 bwa mem -t 12 -Y $pRef -R "\@RG\\tID:\${INDAT[0]}\\tSM:\${INDAT[0]}" -p $FQprefix/\${INDAT[0]}.fq.gz 2>$outP/\${INDAT[0]}.log | samtools view -bS - | samtools sort -m 2G -T $outP/\${INDAT[0]}.tmp -o $outP/\${INDAT[0]}.bam
89 samtools index $outP/\${INDAT[0]}.bam
91 if [ ! -s $outP/\${INDAT[0]}.bam ]; then
92 exit 1
94 END_SH
95 return $SHbwa;
98 sub Smpileup($$$$$$$$$) {
99 my ($cwd,$cnt,$flst,$outP,$BAMprefix,$LSTprefix,$pRef,$OYKprefix,$theMode) = @_;
100 my $Smpileup = <<"END_SH";
101 #!/bin/bash
102 #\$ -S /bin/bash
103 #\$ -q $SgeQueue -P $SgeProject
104 #\$ -N p2${SgeJobPrefix}mplp -hold_jid p1${SgeJobPrefix}bwa
105 #\$ -l vf=500M,num_proc=1
106 #\$ -binding linear:2
107 #\$ -cwd -r y
108 #\$ -v PERL5LIB,PATH,LD_LIBRARY_PATH
109 #\$ -e /dev/null -o /dev/null
111 #\$ -t 1-$cnt
113 cd $cwd
115 INFILE=`sed -n "\${SGE_TASK_ID}p" $flst`
116 read -ra INDAT <<<"\$INFILE"
118 samtools mpileup -b $LSTprefix/p\${INDAT[2]}.bams.lst -d 4000 -Q 20 -f $pRef -v -t 'DP,AD,ADF,ADR,SP,INFO/AD,INFO/ADF,INFO/ADR' -p -o $outP/\${INDAT[2]}.vcf.gz
119 bcftools call -Oz -v -m $outP/\${INDAT[2]}.vcf.gz -o $outP/\${INDAT[2]}.snp.gz
120 bcftools index $outP/\${INDAT[2]}.vcf.gz &
121 bcftools index $outP/\${INDAT[2]}.snp.gz
124 bcftools query -f '%CHROM\\t%POS\\t%REF,%ALT\\t%QUAL[\\t%TGT;%AD]\\n' -S $LSTprefix/p\${INDAT[2]}.M.lst -i'POS=501' $outP/\${INDAT[2]}.snp.gz >$OYKprefix/p\${INDAT[2]}.M.tsv
125 bcftools query -f '%CHROM\\t%POS\\t%REF,%ALT\\t%QUAL[\\t%TGT;%AD]\\n' -S $LSTprefix/p\${INDAT[2]}.F.lst -i'POS=501' $outP/\${INDAT[2]}.snp.gz >$OYKprefix/p\${INDAT[2]}.F.tsv
126 bcftools query -f '%CHROM\\t%POS\\t%REF,%ALT\\t%QUAL[\\t%TGT;%AD]\\n' -S $LSTprefix/p\${INDAT[2]}.C.lst -i'POS=501' $outP/\${INDAT[2]}.snp.gz >$OYKprefix/p\${INDAT[2]}.C.tsv
128 ./oyka.pl $theMode $OYKprefix/p\${INDAT[2]}.M.tsv $OYKprefix/p\${INDAT[2]}.F.tsv $OYKprefix/p\${INDAT[2]}.C.tsv $OYKprefix/r\${INDAT[2]}
130 if [ ! -s $outP/\${INDAT[2]}.snp.gz ]; then
131 exit 1
133 END_SH
134 return $Smpileup;