3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120720
12 #use Galaxy::ChromString;
13 use Data
::Dump
qw(ddx);
15 die "Usage: $0 <db_file> <bwa ann file> <infile> <out>\n" if @ARGV<4;
21 warn "From [$infs] to [$outfs] with [$dbfile,$annfile]\n";
23 open D
,'<',$dbfile or die;
24 open A
,'<',$annfile or die;
25 my $IN = openfile
($infs);
26 open O
,'>',$outfs or die;
27 print O
"# From [$infs] to [$outfs] with [$dbfile,$annfile]\n";
29 my $dbh = DBI
->connect(
30 "dbi:SQLite:dbname=:memory:","","",
31 {RaiseError
=> 0, PrintError
=> 1, AutoCommit
=> 0}
32 ) or die $DBI::errstr
;
34 CREATE TABLE IF NOT EXISTS gff
43 for (split /;/,$sql) {
45 $dbh->do($_) or die $dbh->errstr;
49 my $sth = $dbh->prepare( "INSERT INTO gff ( chrid,primary_inf,start,end,strand,frame,name ) VALUES ( ?,?,?,?,?,?,? )" );
51 warn "[!]Begin $annfile.\n";
52 my (%ChrLen,$ChrDat,@IDs,@Lens);
55 my (undef,$id) = split / /; # gi|477502308|ref|NW_004457745.1|
56 if ($id =~ /ref\|([^|]+)\|/) {
60 my (undef,$len) = split / /;
66 #$ChrDat = ChrStrInit(\@IDs,\@Lens);
74 warn "[!]Begin $dbfile.\n";
76 my (%GeneDat, %Gene2Chr, $strand, $seqname, $flag);
78 next if /^(#|((\s)*$))/;
79 my ($primary, $start, $end, $frame);
80 if (/^\[([^]]*)\] ([+-]) ([^ ]+) (.+)$/) {
81 #ddx $Gene2Chr{$secname};
84 $seqname = $3; # NW_004465862.1
86 #print "[$secname, $strand, $seqname, $flag]\n";
87 die "[$_]" if length $secname == 0;
90 ($primary, $start, $end, $frame) = split /\t/,$_;
91 if ( $primary eq 'CDS' or $primary eq 'mRNA' ) {
92 push @
{$GeneDat{"$secname\n$strand"}{$primary}},[$start, $end, $frame];
93 ++$Gene2Chr{$secname}{$seqname};
94 $sth->execute( $seqname, $primary, $start, $end, $strand, $frame, $secname );
96 for my $i ( $start .. $end ) {
97 if ( ChrStrANDBase($ChrDat, $seqname, $i, $bitflag{$primary}) > 0 ) {
98 ChrStrORBase($ChrDat, $seqname, $i, ($bitflag{$primary} << 4) );
100 ChrStrORBase($ChrDat, $seqname, $i, $bitflag{$primary});
104 #ddx $GeneDat{"$secname\n$strand"};
106 #ddx $$ChrDat{$seqname};
112 CREATE INDEX IF NOT EXISTS cseGFF ON gff
(chrid
,start
,end
);
113 CREATE INDEX IF NOT EXISTS nGFF ON gff
(name
);
115 for (split /;/,$sql) {
117 $dbh->do($_) or warn $dbh->errstr;
120 my $sthi = $dbh->prepare( "SELECT * FROM gff WHERE chrid=? AND ? BETWEEN start AND end" );
122 sub check_mRNA_is_dup
($$) {
123 my ($mRNA_arr,$elm_arr)=@_;
124 my ($mRNA_start,$mRNA_end,$mRNA_name)=@
$mRNA_arr[2,3,6];
125 for my $item (@
$elm_arr) {
126 if ( $$item[6] eq $mRNA_name ) {
127 warn "ERR: CDS/UTR($$item[2]-$$item[3]) outsides mRNA:$mRNA_name($mRNA_start-$mRNA_end) !" if ( $mRNA_start > $$item[2] or $mRNA_end < $$item[3] );
134 warn "[!]Begin $infs.\n";
138 if (/^#CHROM\tPOS\t/) {
141 #@bamnames = split /\t/;
142 #@bamnames = splice @bamnames, 9;
143 #print O "# Bams: ",join(',',@bamnames),"\n";
144 #print "[@bamnames]\n";
149 my ($chr, $pos, @data) = split /\t/; # undef, $refbase, $altbases, $QUAL, undef, $INFO, $FORMAT,
150 if ($chr =~ /ref\|([^|]+)\|/) {
153 $sthi->execute( $chr, $pos ) or die $sthi->errstr;
154 my $qres = $sthi->fetchall_arrayref;
156 print O
join("\t", $chr, $pos, 'NA', @data),"\n";
157 #print join("\t", $chr, $pos, 'NA'),"\n";
162 if ($$_[1] =~ /mRNA/) {push @mRNA, join(',',@
$_[6,2,3,4,5]);}
163 else {push @CDS, join(',',@
$_[6,2,3,4,5]);}
166 print O
join("\t", $chr, $pos, 'CDS', @data, join(';',@CDS)),"\n";
167 #print join("\t", $chr, $pos, 'CDS'),"\n";
169 } elsif (@mRNA > 0) {
170 print O
join("\t", $chr, $pos, 'mRNA', @data, join(';',@mRNA)),"\n";
171 #print join("\t", $chr, $pos, 'mRNA'),"\n";
179 perl ano_zones
.pl Dasnov3
.db Dasnov3
.ann bam2
.bcgv
.vcf
.gz bam2
.bcgv
.vcf
.ano
181 perl ano_zones
.pl Dasnov3
.db Dasnov3
.ann bam2q20
.snp
.lst bam2q20
.snp
.ano
182 perl ano_zones
.pl Dasnov3
.db Dasnov3
.ann bam2
.bam
.depth
.gz bam2
.bam
.depth
.ano
184 grep Total_Bases sai
/*.fqstat
| awk
-F
" " 'BEGIN {x=0} {x+=$2} END {print x}'
185 grep -P
"\tNA\t" bam2q20
.snp
.ano
| wc
-l
186 grep -P
"\tCDS\t" bam2q20
.snp
.ano
| wc
-l
187 grep -P
"\tmRNA\t" bam2q20
.snp
.ano
| wc
-l
189 grep -P
"\tNA\t" bam2
.bam
.depth
.ano
| wc
-l
190 grep -P
"\tNA\t" bam2
.bam
.depth
.ano
| awk
-F
" " 'BEGIN {x=0} {x+=$4} END {print x}'
191 grep -P
"\tCDS\t" bam2
.bam
.depth
.ano
| wc
-l
192 grep -P
"\tCDS\t" bam2
.bam
.depth
.ano
| awk
-F
" " 'BEGIN {x=0} {x+=$4} END {print x}'
193 grep -P
"\tmRNA\t" bam2
.bam
.depth
.ano
| wc
-l
194 grep -P
"\tmRNA\t" bam2
.bam
.depth
.ano
| awk
-F
" " 'BEGIN {x=0} {x+=$4} END {print x}'