modified: diffout.py
[GalaxyCodeBases.git] / perl / bsim / simbsvf.pl
blob5a9177e3e23608c92ea6e9451e9eaecfc2b24bca
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump qw(ddx);
5 use Cwd qw(abs_path getcwd);
6 use Parallel::ForkManager;
8 use FindBin 1.51 qw($RealBin);
9 use lib "$RealBin";
10 use simVirusInserts; # 同时输出甲基化与非甲基化的结果。
11 #use simVirusInsertsOld; # 只输出100%甲基化的reads(四种碱基的)
13 my $RefNratioMax = 0.01; # /Nn/
14 our $RefMratioMax = 0.02; # masked as lower case in *.mfa.gz
16 die "Simulate Directional libraries of Bisulfite-Sequencing data.\nUsage: $0 <Host> <Virus> <Outprefix> [ReadLen=90 [Ticks.ini]]\nInvoke as: mkdir sim90 && cd sim90 && $0 && cd ..\n" if @ARGV <3;
18 my ($Reff,$Virf,$outp,$ReadLen,$TicksINI)=@ARGV;
19 $ReadLen = 90 unless defined $ReadLen;
21 #$Reff='hs_ref_GRCh38.p2_chr18.mfa.gz';
22 #$Virf='HBV.AJ507799.2.fa.gz';
24 my $Refh = openfile($Reff);
25 my $Refstr = getRefChr1st($Refh);
26 my $RefLen = length $Refstr;
27 close $Refh;
28 my $Virfh = openfile($Virf);
29 my $Virstr = getRefChr1st($Virfh);
30 my $VirLen = length $Virstr;
31 close $Virfh;
32 $Virstr .= $Virstr; # circle
34 open INI,'>',$outp.'.ini' or die $!;
35 my $Refabsf = abs_path($Reff);
36 my $Virabsf = abs_path($Virf);
37 my $Cwdabs = abs_path(getcwd()."/../run");
38 print INI <<"CONTENT";
39 [RefFiles]
40 HostRef=$Refabsf
41 VirusRef=$Virabsf
43 [Output]
44 WorkDir=$Cwdabs
45 ProjectID=$outp
47 CONTENT
49 my (%fqFiles,%Para,%Merge);
51 my @ReadLen = qw(50 90 150);
52 my @VirFragLens = qw(5 10 15 20 30 50 75 90 100 120 150 180 200 250 280 300 350 400 450 500 550 600);
53 my @PEins = qw(250 500 750);
55 my $maxPEins = 750;
57 my ($pRefticks,$pVirticks);
58 if (defined $TicksINI and -f $TicksINI) {
59 print STDERR "Loading Ticks from [$TicksINI]: ";
60 my $inih = openfile($TicksINI);
61 while (<$inih>) {
62 chomp;
63 if (/^Refticks=\d/) {
64 my @dat = split /=/,$_;
65 @dat = split /,/,$dat[1];
66 $pRefticks = \@dat;
67 print STDERR "Ref:",scalar @dat,', ';
68 } elsif (/^Virticks=\d/) {
69 my @dat = split /=/,$_;
70 @dat = split /,/,$dat[1];
71 $pVirticks = \@dat;
72 print STDERR "Vir:",scalar @dat,', ';
75 print STDERR "\b\b.\n";
76 } else {
77 my $SeqReadLen = $ReadLen[-1];
78 my $RefBorder = $maxPEins + 1000;
79 $pRefticks = getticks($RefBorder,$Refstr,$RefLen,$maxPEins,$RefNratioMax);
80 $pVirticks = getticks($VirFragLens[-1],$Virstr,$VirLen,int(0.9+ 0.5*$VirFragLens[-1]),$RefNratioMax);
82 #$Para{pRefticks} = $pRefticks;
83 #$Para{pVirticks} = $pVirticks;
85 my $pm = Parallel::ForkManager->new(8); # https://metacpan.org/pod/Parallel::ForkManager#Data-structure-retrieval
86 #@ReadLen = (50, 90); @VirFragLens = (10, 20); @PEins = (250, 500);
88 # data structure retrieval and handling
89 $pm -> run_on_finish ( # called BEFORE the first call to start()
90 sub {
91 my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $data_structure_reference) = @_;
92 # retrieve data structure from child
93 if (defined($data_structure_reference)) { # children are not forced to send anything
94 my @Dat = @{$data_structure_reference}; # child passed a array reference
95 #print ":@Dat\n";
96 $fqFiles{$Dat[0]} = [$Dat[1],$Dat[2]];
97 #取消混合的 push @{$Merge{$Dat[3]}{$Dat[4]}},$Dat[0];
98 } else { # problems occurring during storage or retrieval will throw a warning
99 print qq|[x]No message received from child process $pid!\n|;
104 for my $vf (@VirFragLens) {
105 for my $rl (@ReadLen) {
106 DOIT:
107 for my $pi (@PEins) {
108 next if $pi < $rl *4/3;
109 %Para = (
110 PEinsertLen => $pi,
111 SeqReadLen => $rl,
112 RefNratioMax => $RefNratioMax,
113 RefLen => $RefLen,
114 VirLen => $VirLen,
115 VirFrag => $vf,
116 OutPrefix => "${outp}_i${pi}v${vf}r${rl}",
117 pRefticks => $pRefticks,
118 pVirticks => $pVirticks,
120 $pm->start and next DOIT;
121 dosim($Refstr,$Virstr,\%Para);
122 #$fqFiles{$Para{OutPrefix}} = [$Para{PEinsertLen},$Para{VirFrag}];
123 #push @{$Merge{$pi}{$vf}},$Para{OutPrefix};
124 $pm->finish(0, [$Para{OutPrefix},$Para{PEinsertLen},$Para{VirFrag},$pi,$vf]);
128 $pm->wait_all_children;
130 my @fps = sort keys %fqFiles;
131 print INI "[DataFiles]\n";
132 for my $i (0 .. $#fps) {
133 print INI "F$fps[$i].1=",abs_path($fps[$i].'.1.fq.gz'),"\n";
134 print INI "F$fps[$i].2=",abs_path($fps[$i].'.2.fq.gz'),"\n";
136 for my $pi (@PEins) {
137 for my $vf (@VirFragLens) {
138 next unless exists $Merge{$pi}{$vf};
139 my @OutPrefix = @{$Merge{$pi}{$vf}};
140 print INI "Mv${vf}i${pi}.1=",join(',',(map {abs_path($_.'.1.fq.gz')} @OutPrefix)),"\n";
141 print INI "Mv${vf}i${pi}.2=",join(',',(map {abs_path($_.'.2.fq.gz')} @OutPrefix)),"\n";
144 print INI "\n[InsertSizes]\n";
145 for my $i (0 .. $#fps) {
146 print INI "F$fps[$i]=",$fqFiles{$fps[$i]}->[0],"\nF$fps[$i].SD=",10+$i/100,"\n";
148 my $i=0;
149 for my $pi (@PEins) {
150 for my $vf (@VirFragLens) {
151 next unless exists $Merge{$pi}{$vf};
152 ++$i;
153 print INI "Mv${vf}i${pi}=",500,"\nMv${vf}i${pi}.SD=",$i/10,"\n";
156 print INI "\n[Simed]\n";
157 for my $i (0 .. $#fps) {
158 print INI "F$fps[$i].VirFrag=",$fqFiles{$fps[$i]}->[1],"\n";
160 for my $pi (@PEins) {
161 for my $vf (@VirFragLens) {
162 next unless exists $Merge{$pi}{$vf};
163 print INI "Mv${vf}i${pi}.VirFrag=$vf\n";
166 print INI 'Refticks=',join(',',@$pRefticks),"\n";
167 print INI 'Virticks=',join(',',@$pVirticks),"\n";
168 close INI;
170 __END__