3 # Copyright (c) 2017 Leiden University Medical Center
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 task BedToIntervalList {
27 String outputPath = "regions.interval_list"
32 String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
37 mkdir -p "$(dirname ~{outputPath})"
38 picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
46 File intervalList = outputPath
51 time_minutes: timeMinutes
57 bedFile: {description: "A bed file.", category: "required"}
58 dict: {description: "A sequence dict file.", category: "required"}
59 outputPath: {description: "The location the output interval list should be written to.", category: "advanced"}
60 memory: {description: "The amount of memory this job will use.", category: "advanced"}
61 javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
63 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
64 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
69 task CollectMultipleMetrics {
74 File referenceFastaDict
75 File referenceFastaFai
78 Boolean collectAlignmentSummaryMetrics = true
79 Boolean collectInsertSizeMetrics = true
80 Boolean qualityScoreDistribution = true
81 Boolean meanQualityByCycle = true
82 Boolean collectBaseDistributionByCycle = true
83 Boolean collectGcBiasMetrics = true
84 #FIXME: Boolean rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999
85 Boolean collectSequencingArtifactMetrics = true
86 Boolean collectQualityYieldMetrics = true
88 Int memoryMb = javaXmxMb + 512
90 # Additional * 2 because picard multiple metrics reads the reference fasta twice.
91 Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6)
92 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
98 mkdir -p "$(dirname ~{basename})"
99 picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \
100 CollectMultipleMetrics \
102 R=~{referenceFasta} \
105 ~{true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \
106 ~{true="PROGRAM=CollectInsertSizeMetrics" false="" collectInsertSizeMetrics} \
107 ~{true="PROGRAM=QualityScoreDistribution" false="" qualityScoreDistribution} \
108 ~{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \
109 ~{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \
110 ~{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \
111 ~{true="PROGRAM=CollectSequencingArtifactMetrics" false=""
112 collectSequencingArtifactMetrics} \
113 ~{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics}
117 File? alignmentSummary = basename + ".alignment_summary_metrics"
118 File? baitBiasDetail = basename + ".bait_bias_detail_metrics"
119 File? baitBiasSummary = basename + ".bait_bias_summary_metrics"
120 File? baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics"
121 File? baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf"
122 File? errorSummary = basename + ".error_summary_metrics"
123 File? gcBiasDetail = basename + ".gc_bias.detail_metrics"
124 File? gcBiasPdf = basename + ".gc_bias.pdf"
125 File? gcBiasSummary = basename + ".gc_bias.summary_metrics"
126 File? insertSizeHistogramPdf = basename + ".insert_size_histogram.pdf"
127 File? insertSize = basename + ".insert_size_metrics"
128 File? preAdapterDetail = basename + ".pre_adapter_detail_metrics"
129 File? preAdapterSummary = basename + ".pre_adapter_summary_metrics"
130 File? qualityByCycle = basename + ".quality_by_cycle_metrics"
131 File? qualityByCyclePdf = basename + ".quality_by_cycle.pdf"
132 File? qualityDistribution = basename + ".quality_distribution_metrics"
133 File? qualityDistributionPdf = basename + ".quality_distribution.pdf"
134 File? qualityYield = basename + ".quality_yield_metrics"
135 # Using a glob is easier. But will lead to very ugly output directories.
136 Array[File] allStats = select_all([
140 baseDistributionByCycle,
141 baseDistributionByCyclePdf,
146 insertSizeHistogramPdf,
153 qualityDistributionPdf,
160 time_minutes: timeMinutes
161 memory: "~{memoryMb}M"
166 inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"}
167 inputBamIndex: {description: "The index of the input BAM file.", category: "required"}
168 referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
169 referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
170 category: "required"}
171 referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
172 basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"}
173 collectAlignmentSummaryMetrics: {description: "Equivalent to the `PROGRAM=CollectAlignmentSummaryMetrics` argument.",
174 category: "advanced"}
175 collectInsertSizeMetrics: {description: "Equivalent to the `PROGRAM=CollectInsertSizeMetrics` argument.",
176 category: "advanced"}
177 qualityScoreDistribution: {description: "Equivalent to the `PROGRAM=QualityScoreDistribution` argument.",
178 category: "advanced"}
179 meanQualityByCycle: {description: "Equivalent to the `PROGRAM=MeanQualityByCycle` argument.", category: "advanced"}
180 collectBaseDistributionByCycle: {description: "Equivalent to the `PROGRAM=CollectBaseDistributionByCycle` argument.",
181 category: "advanced"}
182 collectGcBiasMetrics: {description: "Equivalent to the `PROGRAM=CollectGcBiasMetrics` argument.", category: "advanced"}
183 collectSequencingArtifactMetrics: {description: "Equivalent to the `PROGRAM=CollectSequencingArtifactMetrics` argument.",
184 category: "advanced"}
185 collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.",
186 category: "advanced"}
187 memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
188 javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.",
189 category: "advanced"}
190 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
191 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
192 category: "advanced"}
196 task CollectRnaSeqMetrics {
202 String strandSpecificity = "NONE"
205 String javaXmx = "8G"
206 # With 6 minutes per G there were several timeouts.
207 Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12)
208 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
213 mkdir -p "$(dirname ~{basename})"
214 picard -Xmx~{javaXmx} \
215 CollectRnaSeqMetrics -XX:ParallelGCThreads=1 \
217 O=~{basename}.RNA_Metrics \
218 CHART_OUTPUT=~{basename}.RNA_Metrics.pdf \
219 STRAND_SPECIFICITY=~{strandSpecificity} \
220 REF_FLAT=~{refRefflat}
224 File? chart = basename + ".RNA_Metrics.pdf"
225 File metrics = basename + ".RNA_Metrics"
230 time_minutes: timeMinutes
236 inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"}
237 inputBamIndex: {description: "The index of the input BAM file.", category: "required"}
238 refRefflat: {description: "A refflat file containing gene annotations.", catehory: "required"}
239 basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"}
240 strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics.",
243 memory: {description: "The amount of memory this job will use.", category: "advanced"}
244 javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
245 category: "advanced"}
246 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
247 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
248 category: "advanced"}
252 task CollectTargetedPcrMetrics {
257 File referenceFastaDict
258 File referenceFastaFai
259 File ampliconIntervals
260 Array[File]+ targetIntervals
264 String javaXmx = "3G"
265 Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6)
266 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
271 mkdir -p "$(dirname ~{basename})"
272 picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
273 CollectTargetedPcrMetrics \
275 R=~{referenceFasta} \
276 AMPLICON_INTERVALS=~{ampliconIntervals} \
277 TARGET_INTERVALS=~{sep=" TARGET_INTERVALS=" targetIntervals} \
278 O=~{basename}.targetPcrMetrics \
279 PER_BASE_COVERAGE=~{basename}.targetPcrPerBaseCoverage \
280 PER_TARGET_COVERAGE=~{basename}.targetPcrPerTargetCoverage
284 File perTargetCoverage = basename + ".targetPcrPerTargetCoverage"
285 File perBaseCoverage = basename + ".targetPcrPerBaseCoverage"
286 File metrics = basename + ".targetPcrMetrics"
291 time_minutes: timeMinutes
297 inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"}
298 inputBamIndex: {description: "The index of the input BAM file.", category: "required"}
299 referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
300 referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
301 category: "required"}
302 referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
303 ampliconIntervals: {description: "An interval list describinig the coordinates of the amplicons sequenced.",
304 category: "required"}
305 targetIntervals: {description: "An interval list describing the coordinates of the targets sequenced.",
306 category: "required"}
307 basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"}
309 memory: {description: "The amount of memory this job will use.", category: "advanced"}
310 javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
311 category: "advanced"}
312 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
313 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
314 category: "advanced"}
318 task CreateSequenceDictionary {
324 String javaXmx = "2G"
325 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
330 mkdir -p "~{outputDir}"
331 picard -Xmx~{javaXmx} \
332 -XX:ParallelGCThreads=1 \
333 CreateSequenceDictionary \
334 REFERENCE=~{inputFile} \
335 OUTPUT="~{outputDir}/$(basename ~{inputFile}).dict"
339 File outputDict = outputDir + "/" + basename(inputFile) + ".dict"
349 inputFile: {description: "The input fasta file.", category: "required"}
350 outputDir: {description: "Output directory path.", category: "required"}
351 memory: {description: "The amount of memory available to the job.", category: "advanced"}
352 javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
353 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
356 outputDict: {description: "Dictionary of the input fasta file."}
360 # Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs
361 task GatherBamFiles {
363 Array[File]+ inputBams
364 Array[File]+ inputBamsIndex
367 Int memoryMb = javaXmxMb + 512
369 Int? compressionLevel
370 Boolean createMd5File = false
371 # One minute per input gigabyte.
372 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1)
373 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
378 mkdir -p "$(dirname ~{outputBamPath})"
379 picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \
381 INPUT=~{sep=' INPUT=' inputBams} \
382 OUTPUT=~{outputBamPath} \
383 ~{"COMPRESSION_LEVEL=" + compressionLevel} \
385 CREATE_MD5_FILE=~{true="true" false="false" createMd5File}
389 File outputBam = outputBamPath
390 File outputBamIndex = sub(outputBamPath, "\.bam$", ".bai")
391 File? outputBamMd5 = outputBamPath + ".md5"
396 time_minutes: timeMinutes
397 memory: "~{memoryMb}M"
402 inputBams: {description: "The BAM files to be merged together.", category: "required"}
403 inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"}
404 outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"}
405 compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"}
406 createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"}
407 memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
408 javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.",
409 category: "advanced"}
410 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
411 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
412 category: "advanced"}
418 Array[File]+ inputVcfs
419 Array[File]+ inputVcfIndexes
420 String outputVcfPath = "out.vcf.gz"
423 String javaXmx = "4G"
424 Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2)
425 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
430 mkdir -p "$(dirname ~{outputVcfPath})"
431 picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
433 INPUT=~{sep=' INPUT=' inputVcfs} \
434 OUTPUT=~{outputVcfPath}
438 File outputVcf = outputVcfPath
444 time_minutes: timeMinutes
449 inputVcfs: {description: "The VCF files to be merged together.", category: "required"}
450 inputVcfIndexes: {description: "The indexes of the input VCF files.", category: "required"}
451 outputVcfPath: {description: "The path where the merged VCF file will be written.", caregory: "required"}
453 memory: {description: "The amount of memory this job will use.", category: "advanced"}
454 javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
455 category: "advanced"}
456 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
457 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
458 category: "advanced"}
462 # Mark duplicate reads to avoid counting non-independent observations
463 task MarkDuplicates {
465 Array[File]+ inputBams
468 Int compressionLevel = 1
469 Boolean createMd5File = false
470 Boolean useJdkInflater = true # Slightly faster than the intel one.
471 # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater.
472 # NOTE: this might change in the future when the intel deflater is updated!
473 Boolean useJdkDeflater = true
475 # In GATK Best practices pipeline MarkDuplicates is given a 7G VM.
476 # https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L1040
477 Int javaXmxMb = 6656 # 6.5G
478 String memoryMb = javaXmxMb + 512
480 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8)
481 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
483 # The program default for READ_NAME_REGEX is appropriate in nearly every case.
484 # Sometimes we wish to supply "null" in order to turn off optical duplicate detection
485 # This can be desirable if you don't mind the estimated library size being wrong and
486 # optical duplicate detection is taking >7 days and failing
487 String? read_name_regex
490 # Task is assuming query-sorted input so that the Secondary and Supplementary reads get
491 # marked correctly. This works because the output of BWA is query-grouped and therefore,
492 # so is the output of MergeBamAlignment. While query-grouped isn't actually query-sorted,
493 # it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname"
497 mkdir -p "$(dirname ~{outputBamPath})"
498 picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \
500 INPUT=~{sep=' INPUT=' inputBams} \
501 OUTPUT=~{outputBamPath} \
502 METRICS_FILE=~{metricsPath} \
503 COMPRESSION_LEVEL=~{compressionLevel} \
504 VALIDATION_STRINGENCY=SILENT \
505 ~{"READ_NAME_REGEX=" + read_name_regex} \
506 OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \
509 ADD_PG_TAG_TO_READS=false \
510 CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \
511 USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
512 USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater}
516 File outputBam = outputBamPath
517 File outputBamIndex = sub(outputBamPath, "\.bam$", ".bai")
518 File? outputBamMd5 = outputBamPath + ".md5"
519 File metricsFile = metricsPath
524 time_minutes: timeMinutes
525 memory: "~{memoryMb}M"
530 inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"}
531 outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"}
532 metricsPath: {description: "The location where the output metrics file should be written.", category: "required"}
533 read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"}
534 createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"}
535 useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
536 useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
537 compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"}
538 memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
539 javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.",
540 category: "advanced"}
541 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
542 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
543 category: "advanced"}
547 # Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
550 Array[File]+ inputVCFs
551 Array[File]+ inputVCFsIndexes
555 String javaXmx = "4G"
556 Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2
557 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
558 Int compressionLevel = 1
559 Boolean useJdkInflater = true # Slightly faster than the intel one.
560 # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater.
561 # NOTE: this might change in the future when the intel deflater is updated!
562 Boolean useJdkDeflater = true
566 # Using MergeVcfs instead of GatherVcfs so we can create indices
567 # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket
571 mkdir -p "$(dirname ~{outputVcfPath})"
572 picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
574 INPUT=~{sep=' INPUT=' inputVCFs} \
575 OUTPUT=~{outputVcfPath} \
576 COMPRESSION_LEVEL=~{compressionLevel} \
577 USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
578 USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater}
582 File outputVcf = outputVcfPath
583 File outputVcfIndex = outputVcfPath + ".tbi"
588 time_minutes: timeMinutes
594 inputVCFs: {description: "The VCF files to be merged.", category: "required"}
595 inputVCFsIndexes: {description: "The indexes of the VCF files.", category: "required"}
596 outputVcfPath: {description: "The location the output VCF file should be written to.", category: "required"}
598 memory: {description: "The amount of memory this job will use.", category: "advanced"}
599 javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
600 category: "advanced"}
601 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
602 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
603 category: "advanced"}
604 useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
605 useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
606 compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"}
614 Boolean paired = true
616 String memory = "17G"
617 String javaXmx = "16G" # High memory default to avoid crashes.
618 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
622 String outputRead1 = basename(inputBam, "\.[bs]am") + "_R1.fastq.gz"
623 String outputRead2 = basename(inputBam, "\.[bs]am") + "_R2.fastq.gz"
624 String outputUnpaired = basename(inputBam, "\.[bs]am") + "_unpaired.fastq.gz"
628 picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
631 ~{"FASTQ=" + outputRead1} \
632 ~{if paired then "SECOND_END_FASTQ=" + outputRead2 else ""} \
633 ~{if paired then "UNPAIRED_FASTQ=" + outputUnpaired else ""}
637 File read1 = outputRead1
638 File? read2 = if paired then outputRead2 else NONE
639 File? unpairedRead = if paired then outputUnpaired else NONE
648 task ScatterIntervalList {
654 String javaXmx = "3G"
655 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
661 picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
663 SCATTER_COUNT=~{scatter_count} \
664 SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \
667 INPUT=~{interval_list} \
672 Array[File] out = glob("scatter_list/*/*.interval_list")
673 Int interval_count = read_int(stdout())
686 Boolean sortByName = false
687 Boolean createMd5File = false
688 Int maxRecordsInRam = 500000
689 Int compressionLevel = 1
691 # Default ram of 4 GB. Using 125001.0 to prevent an answer of
692 # 4.000000001 which gets rounded to 5.
693 # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778
694 Int XmxGb = ceil(maxRecordsInRam / 125001.0)
695 Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3)
696 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
701 mkdir -p "$(dirname ~{outputPath})"
702 picard -Xmx~{XmxGb}G -XX:ParallelGCThreads=1 SortSam \
704 OUTPUT=~{outputPath} \
705 MAX_RECORDS_IN_RAM=~{maxRecordsInRam} \
706 SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \
708 COMPRESSION_LEVEL=~{compressionLevel} \
709 VALIDATION_STRINGENCY=SILENT \
710 CREATE_MD5_FILE=~{true="true" false="false" createMd5File}
715 File outputBam = outputPath
716 File outputBamIndex = sub(outputPath, "\.bam$", ".bai")
721 memory: "~{1 + XmxGb}G"
722 time_minutes: timeMinutes
727 inputBam: {description: "The unsorted input BAM file", category: "required"}
728 outputPath: {description: "The location the output BAM file should be written to.", category: "required"}
729 XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.",
730 category: "advanced"}
731 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
732 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
733 category: "advanced"}
739 Array[File]+ vcfFiles
744 String javaXmx = "8G"
745 Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5)
746 String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
752 mkdir -p "$(dirname ~{outputVcfPath})"
753 picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
755 I=~{sep=" I=" vcfFiles} \
756 ~{"SEQUENCE_DICTIONARY=" + dict} \
761 File outputVcf = outputVcfPath
762 File outputVcfIndex = outputVcfPath + ".tbi"
767 time_minutes: timeMinutes
773 vcfFiles: {description: "The VCF files to merge and sort.", category: "required"}
774 outputVcfPath: {description: "The location the sorted VCF files should be written to.", category: "required"}
775 dict: {description: "A sequence dictionary matching the VCF files.", category: "advanced"}
777 memory: {description: "The amount of memory this job will use.", category: "advanced"}
778 javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
779 category: "advanced"}
780 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
781 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
782 category: "advanced"}
789 String outputPath = "./picard/renamed.vcf"
792 String javaXmx = "8G"
793 Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2)
794 String dockerImage = "quay.io/biocontainers/picard:2.19.0--0"
799 mkdir -p "$(dirname ~{outputPath})"
800 picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
804 NEW_SAMPLE_NAME=~{newSampleName}
808 File renamedVcf = outputPath
813 time_minutes: timeMinutes
819 inputVcf: {description: "The VCF file to process.", category: "required"}
820 outputPath: {description: "The location the output VCF file should be written.", category: "common"}
821 newSampleName: {description: "A string to replace the old sample name.", category: "required"}
822 memory: {description: "The memory required to run the programs", category: "advanced"}
823 javaXmx: {description: "The max. memory allocated for JAVA", category: "advanced"}
824 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
825 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}