renamed: etc/Server/clr/etc/NetworkManager/system-connections/YourWiFiSSIDhere...
[GalaxyCodeBases.git] / R / vcfdensity.Rscript
blob8ea35038e588ec87592ac0d7e49c09949b92374f
1 #!/usr/bin/env Rscript
3 argv <- commandArgs(T)
5 if (!interactive()) {
6         if (is.null(argv) | length(argv)<3) {
7                 cat("Usage: ./vcfdensity.Rscript <WinSize> <input.vcf.gz> <output.prefix>\n")
8                 q(status=1)
9         } else {
10                 WinSize <- as.integer(argv[1])
11                 if (is.na(WinSize)) WinSize <- 0L
12                 InVCFgz <- trimws(argv[2],'both')
13                 OutP <- trimws(argv[3],'both')
14                 cat("[!] WinSize=[",WinSize,"], [",InVCFgz,"]->[",OutP,"].*\n",sep='')
15                 if (!file.exists(InVCFgz)) {
16                         cat("[x] File not found:[", InVCFgz, "] !\n",sep='')
17                         q(status=-1)
18                 }
19                 if ( WinSize < 1L ) {
20                         cat("[x] WinSize=[", WinSize, "] is smaller than [1] !\n",sep='')
21                         q(status=-1)
22                 }
23         }
26 library('data.table')
27 library('zoo')
28 library('utils')
29 library('graphics')
30 library('grDevices')
32 classes <- c(V1="factor",V2="integer")
33 tabAll <- fread(paste0("gzip -dc ",InVCFgz,"|awk '!/^#|\tINDEL;/'"),header=F,stringsAsFactors=T,sep="\t",autostart=100,select=c(1,2), colClasses=classes, data.table=T,verbose=F)
35 setnames(tabAll,1,'Chr')
36 setnames(tabAll,2,'Pos')
37 print(head(tabAll))
39 dorolling <- function(x, rollwin) {
40         chrdat <- integer(max(x))
41         chrdat[x] <- 1L
42         res0 <- rollapply(chrdat, rollwin, sum, by = rollwin)
43         return(res0)
45 resArr <- tabAll[, dorolling(Pos,rollwin=WinSize), by=Chr]
46 resAll <- unlist(resArr$V1,use.names=F)
47 cat("[!] Stat done.\n")
49 tbl <- table(resAll)
50 print(tbl)
51 write.table(tbl, paste0(OutP,".tsv"), sep = "\t", quote=F,row.names=F,col.names=F)
53 reshist <- hist(resAll,plot=F)
54 pdf(file=paste0(OutP,".pdf"),title='Histogram of VCF Density')
55 plot(reshist,freq=F,main='Histogram of SNP windowed density',xlab=paste0('SNP Count in every ',WinSize,' bps'))
56 dev.off()
58 resPhist <- hist(resAll[resAll!=0],plot=F)
59 pdf(file=paste0(OutP,".nonZero.pdf"),title='Histogram of VCF Density')
60 plot(resPhist,freq=F,main='Histogram of SNP windowed density (+)',xlab=paste0('SNP Count in every ',WinSize,' bps'))
61 dev.off()