4 use Data
::Dump
qw(ddx);
6 my $SgeQueue = 'fgi.q';
7 my $SgeProject = 'fgiccs';
8 my $SgeJobPrefix = 'nip'.(time() % 9999);
11 my ($cwd,$cnt,$flst,$outP) = @_;
12 my $SHcutadapt = <<"END_SH";
15 #\$ -q $SgeQueue -P $SgeProject
16 #\$ -N p0${SgeJobPrefix}cut
17 #\$ -l vf=600M,num_proc=2
20 #\$ -v PERL5LIB,PATH,LD_LIBRARY_PATH
21 #\$ -e /dev/null -o /dev/null
27 if [ -t 1 ] && [ "x\$SKIP" = 'x1' ]; then
28 while read -u 9 THELINE; do
29 read -ra INDAT <<<"\$THELINE"
30 if [ -e "\${INDAT[1]}.fq.gz" ]; then
31 ln -si \${INDAT[1]}.fq.gz $outP/\${INDAT[0]}.fq.gz
32 elif [ -e "\${INDAT[1]}_1.fq.gz" ] && [ -e "\${INDAT[1]}_2.fq.gz" ]; then
33 echo "[!]Cannot use SKIP mode for PE, using Read 1 only [\${INDAT[1]}_1.fq.gz] !"
34 ln -si \${INDAT[1]}_1.fq.gz $outP/\${INDAT[0]}.fq.gz
36 echo "[x]Cannot find .fq.gz file(s) for [\${INDAT[1]}(_[12])?.fq.gz] !"
42 INFILE=`sed -n "\${SGE_TASK_ID}p" $flst`
43 read -ra INDAT <<<"\$INFILE"
45 ADAPTERSTR='-g GAACGACATGGCTACGATCCGACTT -a AAGTCGGAGGCCAAGCGGTCTTAGGAAGACAA -A AAGTCGGATCGTAGCCATGTCGTTC -G TTGTCTTCCTAAGACCGCTTGGCCTCCGACTT'
47 if [ -e "\${INDAT[1]}.fq.gz" ]; then
48 cutadapt -j 2 -m 1 -O 6 -n 2 \$ADAPTERSTR --interleaved -o $outP/\${INDAT[0]}.fq.gz \${INDAT[1]}.fq.gz >$outP/\${INDAT[0]}.log
49 elif [ -e "\${INDAT[1]}_1.fq.gz" ] && [ -e "\${INDAT[1]}_2.fq.gz" ]; then
50 cutadapt -j 2 -m 1 -O 6 -n 2 \$ADAPTERSTR --interleaved -o $outP/\${INDAT[0]}.fq.gz \${INDAT[1]}_1.fq.gz \${INDAT[1]}_2.fq.gz >$outP/\${INDAT[0]}.log
52 echo "[x]Cannot find .fq.gz file(s) for [\${INDAT[1]}(_[12])?.fq.gz] !"
56 if [ ! -s $outP/\${INDAT[0]}.fq.gz ]; then
63 sub Sbwamem($$$$$$$) {
64 my ($cwd,$cnt,$flst,$outP,$FQprefix,$pRef,$SHcutadapt) = @_;
65 my $SHbwa = <<"END_SH
";
68 #\$ -q $SgeQueue -P $SgeProject
69 #\$ -N p1${SgeJobPrefix}bwa -hold_jid p0${SgeJobPrefix}cut
70 #\$ -l vf=6G,num_proc=7
71 #\$ -binding linear:12
73 #\$ -v PERL5LIB,PATH,LD_LIBRARY_PATH
74 #\$ -e /dev/null -o /dev/null
80 INFILE=`sed -n "\
${SGE_TASK_ID
}p
" $flst`
81 read -ra INDAT <<<"\
$INFILE"
83 if [ ! -s $FQprefix/\${INDAT[0]}.fq.gz ]; then
87 bwa mem -t 12 -Y $pRef -R "\
@RG\\tID
:\
${INDAT
[0]}\\tSM
:\
${INDAT
[0]}" -p $FQprefix/\${INDAT[0]}.fq.gz 2>$outP/\${INDAT[0]}.log | samtools view -bS - | samtools sort -m 2G -T $outP/\${INDAT[0]}.tmp -o $outP/\${INDAT[0]}.bam
89 samtools index $outP/\${INDAT[0]}.bam
91 if [ ! -s $outP/\${INDAT[0]}.bam ]; then
98 sub Smpileup($$$$$$$$$) {
99 my ($cwd,$cnt,$flst,$outP,$BAMprefix,$LSTprefix,$pRef,$OYKprefix,$theMode) = @_;
100 my $Smpileup = <<"END_SH";
103 #\$ -q $SgeQueue -P $SgeProject
104 #\$ -N p2${SgeJobPrefix}mplp -hold_jid p1${SgeJobPrefix}bwa
105 #\$ -l vf=500M,num_proc=1
106 #\$ -binding linear:2
108 #\$ -v PERL5LIB,PATH,LD_LIBRARY_PATH
109 #\$ -e /dev/null -o /dev/null
115 INFILE
=`sed -n "\${SGE_TASK_ID}p" $flst`
116 read -ra INDAT
<<<"\$INFILE"
118 samtools mpileup
-b
$LSTprefix/p\${INDAT[2]}.bams.lst -d 4000 -Q 20 -f $pRef -v -t 'DP,AD,ADF,ADR,SP,INFO/AD
,INFO
/ADF,INFO/ADR
' -p -o $outP/\${INDAT[2]}.vcf.gz
119 bcftools call -Oz -v -m $outP/\${INDAT[2]}.vcf.gz -o $outP/\${INDAT[2]}.snp.gz
120 bcftools index $outP/\${INDAT[2]}.vcf.gz &
121 bcftools index $outP/\${INDAT[2]}.snp.gz
124 bcftools query -f '%CHROM\\t
%POS\\t
%REF,%ALT\\t
%QUAL[\\t
%TGT;%AD]\\n
' -S $LSTprefix/p\${INDAT[2]}.M.lst -i'POS
=501' $outP/\${INDAT[2]}.snp.gz >$OYKprefix/p\${INDAT[2]}.M.tsv
125 bcftools query -f '%CHROM\\t
%POS\\t
%REF,%ALT\\t
%QUAL[\\t
%TGT;%AD]\\n
' -S $LSTprefix/p\${INDAT[2]}.F.lst -i'POS
=501' $outP/\${INDAT[2]}.snp.gz >$OYKprefix/p\${INDAT[2]}.F.tsv
126 bcftools query -f '%CHROM\\t
%POS\\t
%REF,%ALT\\t
%QUAL[\\t
%TGT;%AD]\\n
' -S $LSTprefix/p\${INDAT[2]}.C.lst -i'POS
=501' $outP/\${INDAT[2]}.snp.gz >$OYKprefix/p\${INDAT[2]}.C.tsv
128 ./oyka.pl $theMode $OYKprefix/p\${INDAT[2]}.M.tsv $OYKprefix/p\${INDAT[2]}.F.tsv $OYKprefix/p\${INDAT[2]}.C.tsv $OYKprefix/r\${INDAT[2]}
130 if [ ! -s $outP/\${INDAT[2]}.snp.gz ]; then