3 # Copyright (c) 2017 Leiden University Medical Center
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 Int compressionLevel = 1
30 # Time minute calculation does not work well for higher number of threads.
31 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads
32 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
34 String bamIndexPath = sub(outputPath, "\.bam$", ".csi")
38 mkdir -p "$(dirname ~{outputPath})"
40 --threads ~{threads} \
41 -l ~{compressionLevel} \
43 ~{outputPath} ~{sep=' ' inputBams}
44 # sambamba creates an index for us.
45 mv ~{outputPath}.csi ~{bamIndexPath}
48 File outputBam = outputPath
49 File outputBamIndex = bamIndexPath
54 memory: "~{memoryMb}M"
55 time_minutes: timeMinutes
64 Int compressionLevel = 1
65 # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1.
66 Int sortBufferSize = 2048
67 Int ioBufferSize = 128
68 Boolean removeDuplicates = false
73 # Sambamba scales like this: 1 thread is fully utilized (1).
74 # 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7.
75 # 2 threads reduces wall clock time by more than 40%.
77 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size.
78 # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB.
79 Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize
80 # Time minute calculation does not work well for higher number of threads.
81 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads
82 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
85 String bamIndexPath = sub(outputPath, "\.bam$", ".bai")
89 mkdir -p "$(dirname ~{outputPath})"
91 --nthreads ~{threads} \
92 -l ~{compressionLevel} \
93 ~{true="-r" false="" removeDuplicates} \
94 ~{"--hash-table-size " + hashTableSize} \
95 ~{"--overflow-list-size " + overFlowListSize} \
96 ~{"--sort-buffer-size " + sortBufferSize} \
97 ~{"--io-buffer-size " + ioBufferSize} \
98 ~{sep=' ' inputBams} ~{outputPath}
99 # sambamba creates an index for us.
100 mv ~{outputPath}.bai ~{bamIndexPath}
104 File outputBam = outputPath
105 File outputBamIndex = bamIndexPath
110 memory: "~{memoryMb}M"
111 time_minutes: timeMinutes
117 inputBams: {description: "The input BAM files.", category: "required"}
118 outputPath: {description: "Output directory path + output file.", category: "required"}
119 compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"}
120 sortBufferSize: {description: "The amount of mb allocated to the sort buffer.", category: "advanced"}
121 ioBufferSize: {description: "The amount of mb allocated to each IO buffer. Sambamba uses two IO buffers.", category: "advanced"}
122 removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"}
123 hashTableSize: {description: "Sets sambamba's hash table size.", category: "advanced"}
124 overFlowListSize: {description: "Sets sambamba's overflow list size.", category: "advanced"}
125 threads: {description: "The number of threads that will be used for this task.", category: "advanced"}
126 memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"}
127 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
128 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
131 outputBam: {description: "Sorted BAM file."}
132 outputBamIndex: {description: "Sorted BAM file index."}
139 String outputPath = basename(inputBam, "\.bam") + ".sorted.bam"
140 Boolean sortByName = false
141 Int compressionLevel = 1
143 Int memoryPerThreadGb = 4
145 Int memoryGb = 1 + threads * memoryPerThreadGb
146 Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3)
147 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
150 # Select first needed as outputPath is optional input (bug in cromwell).
151 String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai")
155 mkdir -p "$(dirname ~{outputPath})"
157 -l ~{compressionLevel} \
158 ~{true="-n" false="" sortByName} \
159 ~{"--nthreads " + threads} \
160 -m ~{memoryPerThreadGb}G \
163 # sambamba creates an index for us.
164 mv ~{outputPath}.bai ~{bamIndexPath}
168 File outputBam = outputPath
169 File outputBamIndex = bamIndexPath
174 memory: "~{memoryGb}G"
176 time_minutes: timeMinutes
181 inputBam: {description: "The input SAM file.", category: "required"}
182 outputPath: {description: "Output directory path + output file.", category: "required"}
183 sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"}
184 compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"}
185 memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"}
186 threads: {description: "The number of threads that will be used for this task.", category: "advanced"}
187 memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"}
188 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
189 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
192 outputBam: {description: "Sorted BAM file."}
193 outputBamIndex: {description: "Sorted BAM file index."}