6 use Time
::HiRes qw
( gettimeofday tv_interval
);
9 #use Fcntl qw(:DEFAULT :flock);
13 our $opts='i:o:s:d:r:bvfk:p:m:t:n';
14 our($opt_i, $opt_o, $opt_s, $opt_r, $opt_d, $opt_v, $opt_f, $opt_b, $opt_k, $opt_p, $opt_m, $opt_t, $opt_n);
17 \t-i Input Indel files path (./indel) [ *.gz/*.bz2 files are OK ]
18 \t-f Input SV files in single file.
19 \t-s Specie name for shareing SQLite data file (human)
20 \t [Specie name MUST be ths SAME throughout !]
21 \t-d GFF SQLite data file (_dbGFF.sqlite)
22 \t-o Output SQLite data file (_result_indel.sqlite)
23 \t-r Output Result txt file (_result_indel.txt)
24 \t-v show verbose info to STDOUT
25 \t-b No pause for batch runs
26 \t-k KOG (./Glyma1.KOG)
27 \t-p PANTHER (./Glyma.PANTHER)
28 \t-m PFAM (./Glyma.PFAM)
29 \t-t Samples list (./samples.list) [sample\\s+]
32 # \t-d dbSNP SQLite data file (_tdbSNP.sqlite)
36 $opt_i='./indel' if ! defined $opt_i;
37 $opt_d='_dbGFF.sqlite' if ! $opt_d;
38 $opt_r='_result_indel.txt' if ! $opt_r;
39 $opt_o='_result_indel.sqlite' if ! $opt_o;
40 $opt_s='human' if ! $opt_s;
41 $opt_k='./Glyma1.KOG' if ! $opt_k;
42 $opt_p='./Glyma.PANTHER' if ! $opt_p;
43 $opt_m='./Glyma.PFAM' if ! $opt_m;
44 $opt_t='./samples.list' if ! $opt_t;
46 print STDERR
"From [$opt_i] [$opt_d], $opt_k,$opt_p,$opt_m,$opt_t\n to [$opt_o] [$opt_r]\tSpecie:[$opt_s]\n";
47 if ($opt_n) {print STDERR
"Mono Mode.\n"}
48 if ($opt_f) {print STDERR
"File Input Mode.\n"}
49 else {print STDERR
"Dir. Input Mode.\n"}
50 if (! $opt_b) {print STDERR
'press [Enter] to continue...'; <>;}
52 my (@Samples,$Samples);
53 open( SAMP
,'<',$opt_t) or die "Error: $!\n";
56 my ($sample)=split /\s+/;
57 push @Samples,$sample;
60 $Samples=join '^',@Samples;
66 getHash
($opt_p,\
%hPa);
67 getHash
($opt_m,\
%hPf);
69 my( $file ,$hash) = @_;
70 open (F
,"$file") or die $!;
73 my($key,$a,$b)=split /\s+/,$_,3;
75 my $value = "#$a\t$b";
76 #print "$key\t$value\n";
77 $$hash{$key} = $value;
83 if ($opt_f) {@files=($opt_i);}
85 opendir INDIR
,$opt_i or die "Error: $!\n";
86 @files = grep /\.gz$|\.indel\.final$|\.bzip2$/i,readdir INDIR
;
90 my $start_time = [gettimeofday
];
98 my $dbh = DBI
->connect('dbi:SQLite:dbname='.$opt_o,'','',\
%attr)
100 my $dbhd = DBI
->connect('dbi:SQLite:dbname='.$opt_d,'','',\
%attr)
103 our $sthd=$dbhd->prepare( "SELECT DISTINCT primary_inf,name,start,end FROM gff$opt_s WHERE
104 chrid = :1 AND start <= :2 AND end >= :2" );
109 CREATE TABLE IF NOT EXISTS reindel
{---}
117 for (split /;/,$sql) {
121 $dbh->do($_) or die $dbh->errstr;
125 my $sth = $dbh->prepare( "INSERT INTO reindel$opt_s
126 ( chrid,pos,delta,het,primary_inf,anno_name ) VALUES ( ?,?,?,?,?,? )" );
128 sub check_mRNA_is_dup
($$) {
129 my ($mRNA_arr,$elm_arr)=@_;
130 my ($mRNA_start,$mRNA_end,$mRNA_name)=@
$mRNA_arr[2,3,1]; # primary_inf,name,start,end
131 for my $item (@
$elm_arr) {
133 if ($$item[0] =~ /gene/i) {
134 $mRNA_name=~s/\.\d+$//;
137 if ( $name eq $mRNA_name ) {
138 warn "[GFF][!]: element($$item[2]-$$item[3]) outsides mRNA:$mRNA_name($mRNA_start-$mRNA_end)"
139 if ( ($mRNA_start > $$item[2] or $mRNA_end < $$item[3]) and $$item[0] !~ /gene/i);
148 my ($rv,$res,$restr);
153 my ($chrid,$pos)=split /\t/,$line;
164 #$delta =~ /^(\D+?)(\d+)$/;
165 #my ($type,$count,$endpos) = ($1,$2,$pos);
166 #if ($type =~ /I/i) { $endpos=$pos+$count; } # well, I/D/etc.
167 # elsif ($type =~ /D/i) { $endpos=$pos-$count; }
168 #my ($start,$end)=sort {$a <=> $b} ($pos,$endpos);
169 # ($start,$end) = sort {$a <=> $b} ($start,$end);
170 #warn "$chrid,$svtype,$start,$end\n" if $start > $end;
171 #print "$chrid,$svtype,$start,$end\n"; sleep 1;
172 $sthd->execute($chrid,$pos);
173 $rv = $sthd->fetchall_arrayref;
175 # warn "[GFF][x] No infor. for chr:$chrid:$start-$end\n";
176 $sth->execute($chrid,$pos,'','','UNKNOWN','N/A');
178 print OUTFILE
"$line\tUnknown\tN/A\t$tt\t$tt\t$tt\n";
181 my (@Elements,@mRNA,@EXON,@CDS,@UTR,@Gene,@Intron)=(); # $opt_n
183 #my ($primary_inf,$name)=@$_;
184 if ($$_[0] =~ /mRNA/i) {push @mRNA,$_;}
185 if ($$_[0] =~ /exon/i) {push @EXON,$_;}
186 if ($$_[0] =~ /CDS/i) {push @CDS,$_;push @Elements,$_;}
187 if ($$_[0] =~ /UTR/i) {push @UTR,$_;push @Elements,$_;}
188 if ($$_[0] =~ /Gene/i) {push @Gene,$_;}
189 if ($$_[0] =~ /Intron/i) {push @Intron,$_;push @Elements,$_;}
190 else {push @Elements,$_;}
192 unless ($opt_n) { # NOT WORKING !!!
193 for my $mRNA_arr (@mRNA) {
194 next if ! defined $mRNA_arr;
195 if ( check_mRNA_is_dup
($mRNA_arr,\
@Elements) ) { $mRNA_arr = undef; next;}
197 for my $mRNA_arr (@Gene) {
198 next if ! defined $mRNA_arr;
199 if ( check_mRNA_is_dup
($mRNA_arr,\
@Elements) ) { $mRNA_arr = undef; next;}
201 for my $mRNA_arr (@EXON) {
202 next if ! defined $mRNA_arr;
203 if ( check_mRNA_is_dup
($mRNA_arr,\
@Elements) ) { $mRNA_arr = undef; next;}
206 # now, no duped mRNA.
207 for $res (@CDS,@Intron,@UTR,@EXON,@mRNA,@Gene) {
208 next if ! defined $res;
212 $kk=(defined $hK{$key})?
$hK{$key}:".\t.";
213 $pp=(defined $hPa{$key})?
$hPa{$key}:".\t.";
214 $mm=(defined $hPf{$key})?
$hPf{$key}:".\t.";
215 $restr="$line\t$$res[0]\t$$res[1]\t$kk\t$pp\t$mm\n";
216 print OUTFILE
$restr;
217 ++$Pinf{$chrid}{$$res[0]};
219 $sth->execute($chrid,$pos,'','',@
$res[0,1]);
226 my ($t,%pid,$infile,$outfile)=(0);
227 open(OUTFILE
,">",$opt_r) or die "Error: $!\n";
228 print OUTFILE
"ChrID\tPos\tUp200\tDown200\t$Samples\tType\tGene\tKOG_ID\tKOG_nfo\tPANTHER_ID\tPANTHER_nfo\tPFAM_ID\tPFAM_nfo\n";
231 # next unless /chr\w{1,2}\./; # only 22+2+1
233 #next if /_random\./;
234 $_ = $opt_i.'/'.$_ unless $opt_f;
235 print STDERR
"parsing [$_] ...\t{$t}\n";
237 open( $infile,"-|","bzip2 -dc $_") or die "Error: $!\n";
239 open( $infile,"-|","gzip -dc $_") or die "Error: $!\n";
240 } else {open( $infile,"<",$_) or die "Error: $!\n";}
241 readindel
($infile,$sth); # close file within.
245 print OUTFILE "\n__END__\nSummary:\n";
246 for my $chr (sort keys %Pinf) { # {$a <=> $b}
247 print OUTFILE "chr:$chr\t";
248 for (sort keys %{$Pinf{$chr}}) {
249 print OUTFILE $_,":",$Pinf{$chr}{$_},"\t";
255 print OUTFILE "\n__END__\nSummary:\n#Chr\t";
257 print OUTFILE "$_\t" for (sort keys %Ainf);
259 for my $chr (sort keys %Pinf) { # {$a <=> $b}
260 print OUTFILE "$chr\t";
261 for (sort keys %Ainf) {
262 if (defined $Pinf{$chr}{$_}) {$out=$Pinf{$chr}{$_};}
264 print OUTFILE $out,"\t";
272 CREATE INDEX IF NOT EXISTS svcse
{---} ON reindel
{---}(chrid
,pos);
273 CREATE INDEX IF NOT EXISTS svpi
{---} ON reindel
{---}(primary_inf
);
274 CREATE INDEX IF NOT EXISTS svt
{---} ON reindel
{---}(het
);
275 /; # chrid,pos,delta,het,primary_inf,anno_name
276 for (split /;/,$sql) {
280 $dbh->do($_) or warn $dbh->errstr;
289 #my @threads = threads->list();
290 #$_->join() for @threads;
292 my $stop_time = [gettimeofday
];
295 print STDERR
"\n Time Elapsed:\t",tv_interval
( $start_time, $stop_time )," second(s)\n";
297 print STDERR
"\033[32;1m Please use [$opt_s] as Specie name in later steps.\033[0;0m\n";