3 # Copyright (c) 2017 Leiden University Medical Center
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 Int timeMinutes = 1 + ceil(size(inputFile, "G"))
30 String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0"
33 String outputGz = outputDir + "/" + basename(inputFile) + ".gz"
37 mkdir -p "$(dirname ~{outputGz})"
38 bgzip -c ~{inputFile} > ~{outputGz}
39 tabix ~{outputGz} -p ~{type}
43 File compressed = outputGz
44 File index = outputGz + ".tbi"
48 time_minutes: timeMinutes
54 inputFile: {description: "The file to be compressed and indexed.", category: "required"}
55 outputDir: {description: "The directory in which the output will be placed.", category: "required"}
56 type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"}
57 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
58 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
69 String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
74 mkdir -p "~{outputDir}"
75 ln -s ~{inputFile} "~{outputDir}/$(basename ~{inputFile})"
77 "~{outputDir}/$(basename ~{inputFile})"
81 File outputIndex = outputDir + "/" + basename(inputFile) + ".fai"
91 inputFile: {description: "The input fasta file.", category: "required"}
92 outputDir: {description: "Output directory path.", category: "required"}
93 memory: {description: "The amount of memory available to the job.", category: "advanced"}
94 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
97 outputIndex: {description: "Index of the input fasta file."}
109 Int? excludeSpecificFilter
110 Boolean appendReadNumber = false
111 Boolean outputQuality = false
112 Int? compressionLevel
116 Int timeMinutes = 1 + ceil(size(inputBam) * 2)
117 String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
122 ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \
123 ~{"-2 " + outputRead2} \
124 ~{"-0 " + outputRead0} \
125 ~{"-f " + includeFilter} \
126 ~{"-F " + excludeFilter} \
127 ~{"-G " + excludeSpecificFilter} \
128 ~{true="-N" false="-n" appendReadNumber} \
129 ~{true="-O" false="" outputQuality} \
130 ~{"-c " + compressionLevel} \
131 ~{"--threads " + threads} \
136 File read1 = outputRead1
137 File? read2 = outputRead2
138 File? read0 = outputRead0
145 time_minutes: timeMinutes
150 inputBam: {description: "The bam file to process.", category: "required"}
151 outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"}
152 outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"}
153 outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"}
154 includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"}
155 excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"}
156 excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"}
157 appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"}
158 outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"}
159 threads: {description: "The number of threads to use.", category: "advanced"}
160 memory: {description: "The amount of memory this job will use.", category: "advanced"}
161 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
162 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
163 category: "advanced"}
167 task FilterShortReadsBam {
172 Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8)
173 String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
176 String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai")
180 mkdir -p "$(dirname ~{outputPathBam})"
181 samtools view -h ~{bamFile} | \
182 awk 'length($10) > 30 || $1 ~/^@/' | \
183 samtools view -bS -> ~{outputPathBam}
184 samtools index ~{outputPathBam} ~{outputPathBamIndex}
188 File filteredBam = outputPathBam
189 File filteredBamIndex = outputPathBamIndex
194 time_minutes: timeMinutes
199 bamFile: {description: "The bam file to process.", category: "required"}
200 outputPathBam: {description: "The filtered bam file.", category: "common"}
201 memory: {description: "The amount of memory this job will use.", category: "advanced"}
202 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
203 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
212 String memory = "256M" # Only 40.5 MiB used for 150G bam file.
213 Int timeMinutes = 1 + ceil(size(inputBam, "G"))
214 String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
219 mkdir -p "$(dirname ~{outputPath})"
220 samtools flagstat ~{inputBam} > ~{outputPath}
224 File flagstat = outputPath
229 time_minutes: timeMinutes
235 inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"}
236 outputPath: {description: "The location the ouput should be written to.", category: "required"}
237 memory: {description: "The amount of memory needed for the job.", category: "advanced"}
238 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
239 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
240 category: "advanced"}
247 String? outputBamPath
249 Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4)
250 String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
253 # Select_first is needed, otherwise womtool validate fails.
254 String outputPath = select_first([outputBamPath, basename(bamFile)])
255 String bamIndexPath = sub(outputPath, "\.bam$", ".bai")
260 # Make sure outputBamPath does not exist.
261 if [ ! -f ~{outputPath} ]
263 mkdir -p "$(dirname ~{outputPath})"
264 ln ~{bamFile} ~{outputPath}
266 samtools index ~{outputPath} ~{bamIndexPath}
271 File indexedBam = outputPath
272 File index = bamIndexPath
277 time_minutes: timeMinutes
283 bamFile: {description: "The BAM file for which an index should be made.", category: "required"}
284 outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.",
286 memory: {description: "The amount of memory needed for the job.", category: "advanced"}
287 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
288 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
289 category: "advanced"}
298 Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2)
299 String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
304 mkdir -p "$(dirname ~{outputBamPath})"
305 samtools markdup ~{inputBam} ~{outputBamPath}
309 File outputBam = outputBamPath
314 time_minutes: timeMinutes
319 inputBam: {description: "The BAM file to be processed.", category: "required"}
320 outputBamPath: {description: "The location of the output BAM file.", category: "required"}
321 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
322 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
323 category: "advanced"}
329 Array[File]+ bamFiles
330 String outputBamPath = "merged.bam"
334 Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2)
335 String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
337 String indexPath = sub(outputBamPath, "\.bam$",".bai")
339 # Samtools uses additional threads for merge.
342 mkdir -p "$(dirname ~{outputBamPath})"
344 --threads ~{threads - 1} \
345 ~{true="-f" false="" force} \
346 ~{outputBamPath} ~{sep=' ' bamFiles}
347 samtools index ~{outputBamPath} ~{indexPath}
351 File outputBam = outputBamPath
352 File outputBamIndex = indexPath
358 time_minutes: timeMinutes
363 bamFiles: {description: "The BAM files to merge.", category: "required"}
364 outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"}
365 force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"}
366 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
367 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
368 category: "advanced"}
375 String outputPath = basename(inputBam, "\.bam") + ".sorted.bam"
376 Boolean sortByName = false
377 Int compressionLevel = 1
379 Int memoryPerThreadGb = 4
380 Int memoryGb = 1 + threads * memoryPerThreadGb
381 String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
382 Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3)
385 # Select first needed as outputPath is optional input. (bug in cromwell)
386 String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai")
390 mkdir -p "$(dirname ~{outputPath})"
392 -l ~{compressionLevel} \
393 ~{true="-n" false="" sortByName} \
394 ~{"--threads " + threads} \
395 -m ~{memoryPerThreadGb}G \
400 ~{outputPath} ~{bamIndexPath}
404 File outputBam = outputPath
405 File outputBamIndex = bamIndexPath
410 memory: "~{memoryGb}G"
412 time_minutes: timeMinutes
417 inputBam: {description: "The input SAM file.", category: "required"}
418 outputPath: {description: "Output directory path + output file.", category: "required"}
419 sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"}
420 compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"}
421 memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"}
422 memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"}
423 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
424 threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"}
425 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
428 outputBam: {description: "Sorted BAM file."}
435 String outputFilePath = "indexed.vcf.gz"
437 Int timeMinutes = 1 + ceil(size(inputFile, "G") * 2)
438 String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0"
440 # FIXME: It is better to do the indexing on VCF creation. Not in a separate task. With file localization this gets hairy fast.
443 mkdir -p "$(dirname ~{outputFilePath})"
444 if [ ! -f ~{outputFilePath} ]
446 ln ~{inputFile} ~{outputFilePath}
448 tabix ~{outputFilePath} -p ~{type}
452 File indexedFile = outputFilePath
453 File index = outputFilePath + ".tbi"
457 time_minutes: timeMinutes
463 inputFile: {description: "The file to be indexed.", category: "required"}
464 outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.",
466 type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"}
467 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
468 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
469 category: "advanced"}
477 String outputFileName = "view.bam"
478 Boolean uncompressedBamOutput = false
481 Int? excludeSpecificFilter
486 Int timeMinutes = 1 + ceil(size(inFile, "G") * 5)
487 String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
489 String outputIndexPath = basename(outputFileName) + ".bai"
491 # Always output to bam and output header
494 mkdir -p "$(dirname ~{outputFileName})"
496 ~{"-T " + referenceFasta} \
497 ~{"-o " + outputFileName} \
498 ~{true="-u " false="" uncompressedBamOutput} \
499 ~{"-f " + includeFilter} \
500 ~{"-F " + excludeFilter} \
501 ~{"-G " + excludeSpecificFilter} \
502 ~{"-q " + MAPQthreshold} \
503 ~{"--threads " + (threads - 1)} \
505 samtools index ~{outputFileName} ~{outputIndexPath}
509 File outputBam = outputFileName
510 File outputBamIndex = outputIndexPath
516 time_minutes: timeMinutes
522 inFile: {description: "A BAM, SAM or CRAM file.", category: "required"}
523 referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"}
524 outputFileName: {description: "The location the output BAM file should be written.", category: "common"}
525 uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"}
526 includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"}
527 excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"}
528 excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"}
529 MAPQthreshold: {description: "Equivalent to samtools view's `-q` option.", category: "advanced"}
530 threads: {description: "The number of threads to use.", category: "advanced"}
531 memory: {description: "The amount of memory this job will use.", category: "advanced"}
532 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
533 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
534 category: "advanced"}