1 ##Copyright Broad Institute, 2018
3 ## This WDL converts paired FASTQ to uBAM and adds read group information
5 ## Requirements/expectations :
6 ## - Pair-end sequencing data in FASTQ format (one file per orientation)
7 ## - The following metada descriptors per sample:
8 ## ```readgroup fastq_pair1_file_path fastq_pair2_file_path sample_name library_name platform_unit run_date platform_name sequecing_center```
11 ## - Set of unmapped BAMs, one per read group
12 ## - File of a list of the generated unmapped BAMs
14 ## Cromwell version support
15 ## - Successfully tested on v32
16 ## - Does not work on versions < v23 due to output syntax
18 ## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
19 ## For program versions, see docker containers.
22 ## This script is released under the WDL source code license (BSD-3) (see LICENSE in
23 ## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
24 ## be subject to different licenses. Users are responsible for checking that they are
25 ## authorized to run all programs before running this script. Please see the docker
26 ## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
27 ## licensing information pertaining to the included programs.
30 workflow ConvertPairedFastQsToUnmappedBamWf {
32 Array[String] sample_name
35 Array[String] readgroup_name
36 Array[String] library_name
37 Array[String] platform_unit
38 Array[String] run_date
39 Array[String] platform_name
40 Array[String] sequencing_center
44 String? gatk_docker_override
45 String gatk_docker = select_first([gatk_docker_override, "broadinstitute/gatk:latest"])
46 String? gatk_path_override
47 String gatk_path = select_first([gatk_path_override, "gatk"])
48 Int? preemptible_attempts
50 # Convert multiple pairs of input fastqs in parallel
51 scatter (i in range(length(readgroup_name))) {
53 # Convert pair of FASTQs to uBAM
54 call PairedFastQsToUnmappedBAM {
56 sample_name = sample_name[i],
59 readgroup_name = readgroup_name[i],
60 library_name = library_name[i],
61 platform_unit = platform_unit[i],
62 run_date = run_date[i],
63 platform_name = platform_name[i],
64 sequencing_center = sequencing_center[i],
65 gatk_path = gatk_path,
67 preemptible_attempts = preemptible_attempts
71 #Create a file with a list of the generated ubams
74 array_of_files = PairedFastQsToUnmappedBAM.output_bam,
75 fofn_name = ubam_list_name,
79 # Outputs that will be retained when execution is complete
81 Array[File] output_bams = PairedFastQsToUnmappedBAM.output_bam
82 File unmapped_bam_list = CreateFoFN.fofn_list
88 # Convert a pair of FASTQs to uBAM
89 task PairedFastQsToUnmappedBAM {
99 String sequencing_center
104 Int? preemptible_attempts
109 ${gatk_path} --java-options "-Xmx3000m" \
112 --FASTQ2 ${fastq_2} \
113 --OUTPUT ${readgroup_name}.unmapped.bam \
114 --READ_GROUP_NAME ${readgroup_name} \
115 --SAMPLE_NAME ${sample_name} \
116 --LIBRARY_NAME ${library_name} \
117 --PLATFORM_UNIT ${platform_unit} \
118 --RUN_DATE ${run_date} \
119 --PLATFORM ${platform_name} \
120 --SEQUENCING_CENTER ${sequencing_center}
124 memory: select_first([machine_mem_gb, 10]) + " GB"
126 disks: "local-disk " + select_first([disk_space_gb, 100]) + " HDD"
127 preemptible: select_first([preemptible_attempts, 3])
130 File output_bam = "${readgroup_name}.unmapped.bam"
136 Array[String] array_of_files
143 mv ${write_lines(array_of_files)} ${fofn_name}.list
146 File fofn_list = "${fofn_name}.list"