modified: tasks/common.wdl
[GalaxyCodeBases.git] / tools / simulators / fa2fqideal.pl
blob25dae5afb105cd3640111e989c63b24a795a037a
1 #!/bin/env perl
2 use lib '/share/raid010/resequencing/soft/lib';
3 use lib 'E:/BGI/toGit/perlib/etc';
4 use strict;
5 use warnings;
6 use Time::HiRes qw ( gettimeofday tv_interval );
7 use Galaxy::ShowHelp;
9 $main::VERSION=0.1.0;
11 our $opts='i:o:l:s:n:v:bd';
12 our($opt_i, $opt_o, $opt_v, $opt_b, $opt_d, $opt_l, $opt_s, $opt_n);
14 our $desc='';
15 our $help=<<EOH;
16 \t-i Genome FASTA file (./genome.fa)
17 \t-l Reads Length (76)
18 \t-n iNsert size (500)
19 \t-s Slip distance (8)
20 \t-o Output FQ prefix (./out)-N-RL-Sd_{1,2}.fa
21 \t-v Verbose level (undef=0)
22 \t-b No pause for batch runs
23 \t-d Debug Mode, for test only
24 EOH
26 ShowHelp();
28 $opt_i='./genome.fa' if ! $opt_i;
29 no warnings;
30 $opt_v=int $opt_v;
31 $opt_n=int $opt_n;
32 $opt_l=int $opt_l;
33 $opt_s=int $opt_s;
34 use warnings;
35 $opt_n=500 if ! $opt_n;
36 $opt_l=76 if ! $opt_l;
37 $opt_s=8 if ! $opt_s;
38 $opt_o='./out' if ! $opt_o;
39 my $outfile="${opt_o}-${opt_n}-${opt_l}-${opt_s}";
40 my $lenB=$opt_n-$opt_l;
41 my $dep=int(20*$opt_l/$opt_s)/10;
43 my $maxN=int(.3*$opt_l);
45 print STDERR "From [$opt_i] to [${opt_o}]-${opt_n}-${opt_l}-${opt_s}_{1,2}.fa, [$opt_n][$opt_l][$opt_s]\nPE depth will be [$dep].\n";
46 print STDERR "DEBUG Mode on !\n" if $opt_d;
47 print STDERR "Verbose Mode [$opt_v] !\n" if $opt_v;
48 unless ($opt_b) {print STDERR 'press [Enter] to continue...'; <>;}
50 sub revcom($) {
51 my $str = $_[0];
52 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
53 my $rev = reverse $str;
54 #$rev =~ tr/[](){}<>/][)(}{></;
55 return $rev;
57 my $start_time = [gettimeofday];
58 #BEGIN
59 open G,'<',$opt_i or die "Error opening $opt_i: $!\n";
60 open OA,'>',$outfile.'_1.fa' or die "Error opening ${outfile}_1.fa: $!\n";
61 open OB,'>',$outfile.'_2.fa' or die "Error opening ${outfile}_2.fa: $!\n";
62 my ($str,$a,$b,$t);
63 while (<G>) {
64 s/^>//;
65 my $title = $_;
66 my $seqname = $1 if($title =~ /^(\S+)/);
67 print STDERR "[!]Seq. >$seqname:\b";
68 $/=">";
69 my $seq=<G>;
70 chomp $seq;
71 $seq=~s/\s//g;
72 $/="\n";
73 my $Len=length $seq;
74 #$seq='';
75 print STDERR "\t$Len .\b";
76 for (my $i=0;$i<$Len-$opt_n;$i+=$opt_s) {
77 $str=substr $seq,$i,$opt_n;
78 $a=substr $str,0,$opt_l;
79 $t=$a=~tr/Nn/nN/;
80 next if $t > $maxN;
81 $b=substr $str,$lenB;
82 $t=$b=~tr/Nn/nN/;
83 next if $t > $maxN;
84 $b=revcom($b);
85 $t=$i/$opt_s;
86 print OA ">${seqname}_",$t,'/1 ',$i+1,"\n$a\n";
87 print OB ">${seqname}_",$t,'/2 ',$i+$lenB+1,"\n$b\n";
89 warn "-\n";
91 close OA;
92 close OB;
93 close G;
95 #END
96 my $stop_time = [gettimeofday];
98 print STDERR "\nTime Elapsed:\t",tv_interval( $start_time, $stop_time )," second(s).\n";
99 __END__