6 if (is.null(argv) | length(argv)<3) {
7 cat("Usage: ./vcfdensity.Rscript <WinSize> <input.vcf.gz> <output.prefix>\n")
10 WinSize <- as.integer(argv[1])
11 if (is.na(WinSize)) WinSize <- 0L
12 InVCFgz <- trimws(argv[2],'both')
13 OutP <- trimws(argv[3],'both')
14 cat("[!] WinSize=[",WinSize,"], [",InVCFgz,"]->[",OutP,"].*\n",sep='')
15 if (!file.exists(InVCFgz)) {
16 cat("[x] File not found:[", InVCFgz, "] !\n",sep='')
20 cat("[x] WinSize=[", WinSize, "] is smaller than [1] !\n",sep='')
32 classes <- c(V1="factor",V2="integer")
33 tabAll <- fread(paste0("gzip -dc ",InVCFgz,"|awk '!/^#|\tINDEL;/'"),header=F,stringsAsFactors=T,sep="\t",autostart=100,select=c(1,2), colClasses=classes, data.table=T,verbose=F)
35 setnames(tabAll,1,'Chr')
36 setnames(tabAll,2,'Pos')
39 dorolling <- function(x, rollwin) {
40 chrdat <- integer(max(x))
42 res0 <- rollapply(chrdat, rollwin, sum, by = rollwin)
45 resArr <- tabAll[, dorolling(Pos,rollwin=WinSize), by=Chr]
46 resAll <- unlist(resArr$V1,use.names=F)
47 cat("[!] Stat done.\n")
51 write.table(tbl, paste0(OutP,".tsv"), sep = "\t", quote=F,row.names=F,col.names=F)
53 reshist <- hist(resAll,plot=F)
54 pdf(file=paste0(OutP,".pdf"),title='Histogram of VCF Density')
55 plot(reshist,freq=F,main='Histogram of SNP windowed density',xlab=paste0('SNP Count in every ',WinSize,' bps'))
58 resPhist <- hist(resAll[resAll!=0],plot=F)
59 pdf(file=paste0(OutP,".nonZero.pdf"),title='Histogram of VCF Density')
60 plot(resPhist,freq=F,main='Histogram of SNP windowed density (+)',xlab=paste0('SNP Count in every ',WinSize,' bps'))