2 use lib
'/share/raid010/resequencing/soft/lib';
3 use lib
'E:/BGI/toGit/perlib/etc';
6 use Time
::HiRes qw
( gettimeofday tv_interval
);
14 our $opts='i:q:o:s:l:r:b';
15 our($opt_i, $opt_q, $opt_s, $opt_l, $opt_r, $opt_o, $opt_b);
17 #our $desc='1.filter fq, 2.stats';
21 \t-o Output Prefix for (./out)_{1,2}.fq
22 \t-s Insert_Size (500)
25 \t-b No pause for batch runs
30 die "[x]Must specify FASTA file !\n" if ! $opt_i;
31 die "[x]-i $opt_i not exists !\n" unless -f
$opt_i;
32 $opt_s = 500 unless $opt_s;
33 $opt_r = 75 unless $opt_r;
34 $opt_l = 50 unless $opt_l;
35 $opt_o = './out' unless $opt_o;
39 print STDERR
"From [$opt_i],[$opt_q] with [$opt_s,$opt_r,$opt_l] to [$opt_o]_{1,2}.fq\n";
40 unless ($opt_b) {print STDERR
'press [Enter] to continue...'; <>;}
42 my $start_time = [gettimeofday
];
45 if ($opt_i =~ /\.gz$/) {
46 $fafile = "gzip -dc $opt_i |";
47 } elsif ($opt_i =~ /\.bz2$/) {
48 $fafile = "bzip2 -dc $opt_i |";
49 } else { $fafile=$opt_i; }
51 if ($opt_q =~ /\.gz$/) {
52 $qfile = "gzip -dc $opt_q |";
53 } elsif ($opt_q =~ /\.bz2$/) {
54 $qfile = "bzip2 -dc $opt_q |";
55 } else { $qfile=$opt_q; }
58 Bio
::SeqIO
->new( -file
=> $fafile,
63 Bio
::SeqIO
->new( -file
=> $qfile,
67 open FQ1
,'>',$opt_o.'_1.fq' or die "[x]Error on ${opt_o}_{1,2}.fq: $!\n";
68 open FQ2
,'>',$opt_o.'_2.fq';
72 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
73 my $rev = reverse $str;
74 #$rev =~ tr/[](){}<>/][)(}{></;
80 #$Q = 10 * log( 1 + 10 ** (ord($sq) - 64) / 10.0) ) / log(10);
83 #$sQ = -10 * log($e / (1 - $e)) / log(10);
84 $Q=chr($q+64); # version 1.3+ of the GA pipeline encodes Phred quality scores from 0-62 using ASCII 64-126
90 print STDERR
'[!]Begin: ';
91 my ($Count,$as,$bs,$aq,$bq,$t)=(0);
93 my $seq_obj = $in_seq_obj->next_seq || last;
94 my $qual_obj = $in_qual_obj->next_seq;
95 die "[x]foo!\n" unless $seq_obj->id eq $qual_obj->id;
96 my $id = $seq_obj->id;
97 my $seq = $seq_obj->seq;
98 my $qual = &Qstr
($qual_obj->qual);
100 next if $len < $min_r * 2;
101 if ($len <= 2*$opt_r) {
103 $as=substr $seq,0,$t;
104 $bs=&revcom
(substr $seq,$t-1);
105 $aq=substr $qual,0,$t;
106 $bq=reverse(substr $qual,$t-1);
107 print FQ1
"\@${id}_${len}/1\n$as\n+\n$aq\n";
108 print FQ2
"\@${id}_${len}/2\n$bs\n+\n$bq\n";
109 } else { # $len mod $opt_l is another border, skipped
111 while ($len-$s > $opt_s) {
112 $as=substr $seq,$s,$opt_r;
113 $bs=&revcom
(substr $seq , $s+$opt_s-$opt_r , $opt_r);
114 $aq=substr $qual,$s,$opt_r;
115 $bq=reverse(substr $qual , $s+$opt_s-$opt_r , $opt_r);
116 if (length $bs < 33) {
120 print FQ1
"\@${id}_${len}_${s}/1\n$as\n+\n$aq\n";
121 print FQ2
"\@${id}_${len}_${s}/2\n$bs\n+\n$bq\n";
126 print STDERR
'.' unless $Count % 1000;
128 warn "\n[!] Total $Count PE out.\n";
134 my $stop_time = [gettimeofday
];
136 print STDERR
"\nTime Elapsed:\t",tv_interval
( $start_time, $stop_time )," second(s).\n";
138 find
./oryza_sativa__indica_cultivar_group/fasta
.oryza_sativa__indica_cultivar_group
.*.gz
|perl
-lane
'(undef,$p,$f)=split /\//;@t=split /\./,$f;print "-i ./$p/$f -q ./$p/qual.$t[1].$t[2].gz -o ./fq/fa9311_$t[2] 2>./fq/fa9311_$t[2].log"' > cmd
.lst
142 #$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
143 #$ -cwd -r y -l vf=116M
144 #$ -o /dev/null -e /dev/null
145 #$ -S /bin/bash -t 1-13
147 SEED
=$(sed
-n
-e
"$SGE_TASK_ID p" $SEEDFILE)
148 eval perl
./fa2fq
.pl
-b
$SEED
150 ./soappe.pl 495,505 200 ./fq
/fa9311_001_1.fq ./fq
/fa9311_001_2.fq ./9311/9311_main_chr.fa.index ./soap/t1
152 find
./fq/*_1
.fq
|perl
-lane
'(undef,$p,$f)=split /\//;@t=split /_1\./,$f;print "./$p/$f ./$p/$t[0]_2.fq ./9311/9311_main_chr.fa.index ./soap/$t[0]"' > cmds
.lst
156 #$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
157 #$ -cwd -r y -l vf=1.8g
158 #$ -o /dev/null -e /dev/null
160 #$ -S /bin/bash -t 1-13
162 SEED
=$(sed
-n
-e
"$SGE_TASK_ID p" $SEEDFILE)
163 eval perl
./soappe
.pl
495,505 200 $SEED
167 use Bio
::Seq
::Quality
;
171 die "pass a fasta and a fasta-quality file\n"
175 my ($seq_infile,$qual_infile)
176 = (scalar @ARGV == 1) ?
($ARGV[0], "$ARGV[0].qual") : @ARGV;
178 ## Create input objects for both a seq (fasta) and qual file
181 Bio
::SeqIO
->new( -file
=> $seq_infile,
186 Bio
::SeqIO
->new( -file
=> $qual_infile,
191 Bio
::SeqIO
->new( -format
=> 'fastq'
195 ## create objects for both a seq and its associated qual
196 my $seq_obj = $in_seq_obj->next_seq || last;
197 my $qual_obj = $in_qual_obj->next_seq;
204 ## Here we use seq and qual object methods feed info for new BSQ
208 new
( -id
=> $seq_obj->id,
209 -seq
=> $seq_obj->seq,
210 -qual
=> $qual_obj->qual,
214 $out_fastq_obj->write_fastq($bsq_obj);
217 my $seqin = Bio
::SeqIO
->new(-file
=> "/usr/local/bin/gunzip -c $infile |",
218 -format
=> $informat);
220 my $seqout = Bio
::SeqIO
->new(-file
=> ">$outfile",