From f500b64eed3d819d994487a512175dda783744f1 Mon Sep 17 00:00:00 2001 From: Lucas Taniguti Date: Sun, 17 Dec 2023 22:28:37 -0300 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20HAPPY=20task=20and=20test=20f?= =?UTF-8?q?iles?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tasks/happy/happy.wdl | 69 ++++++++++++++++++++ tests/tasks/happy/happy/inputs.json | 9 +++ tests/tasks/happy/happy/run.sh | 5 ++ tests/tasks/happy/happy/test_happy_task.yaml | 28 ++++++++ 4 files changed, 111 insertions(+) create mode 100644 tasks/happy/happy.wdl create mode 100644 tests/tasks/happy/happy/inputs.json create mode 100644 tests/tasks/happy/happy/run.sh create mode 100644 tests/tasks/happy/happy/test_happy_task.yaml diff --git a/tasks/happy/happy.wdl b/tasks/happy/happy.wdl new file mode 100644 index 0000000..2f75cb6 --- /dev/null +++ b/tasks/happy/happy.wdl @@ -0,0 +1,69 @@ +version 1.0 + +task HAPPY { + input { + File truth_vcf + File query_vcf + String prefix + File fasta + File fasta_fai + File? regions_bed + File? targets_bed + File? false_positives_bed + File? stratification_tsv + + Int cpus + Boolean stub = false + } + + + command <<< + hap.py -v > version.txt # It's not working as expected. Just prints Hap.py + + if [ ~{stub} == "true" ]; then + touch ~{prefix}.summary.csv \ + ~{prefix}.roc.all.csv.gz \ + ~{prefix}.roc.Locations.INDEL.csv.gz \ + ~{prefix}.roc.Locations.INDEL.PASS.csv.gz \ + ~{prefix}.roc.Locations.SNP.csv.gz \ + ~{prefix}.roc.Locations.SNP.PASS.csv.gz \ + ~{prefix}.extended.csv \ + ~{prefix}.runinfo.json \ + ~{prefix}.metrics.json.gz \ + ~{prefix}.vcf.gz \ + ~{prefix}.vcf.gz.tbi + exit 0 + fi + + hap.py \ + ~{truth_vcf} \ + ~{query_vcf} \ + ~{"--reference " + fasta} \ + ~{"--threads " + cpus} \ + ~{"--R " + regions_bed } \ + ~{"--T " + targets_bed } \ + ~{"--false-positives " + false_positives_bed} \ + ~{"--stratification " + stratification_tsv} \ + -o ~{prefix} + >>> + + output { + File summary_csv = "~{prefix}.summary.csv" + File roc_all_csv = "~{prefix}.roc.all.csv.gz" + File roc_indel_locations_csv = "~{prefix}.roc.Locations.INDEL.csv.gz" + File roc_indel_locations_pass_csv = "~{prefix}.roc.Locations.INDEL.PASS.csv.gz" + File roc_snp_locations_csv = "~{prefix}.roc.Locations.SNP.csv.gz" + File roc_snp_locations_pass_csv = "~{prefix}.roc.Locations.SNP.PASS.csv.gz" + File extended_csv = "~{prefix}.extended.csv" + File runinfo = "~{prefix}.runinfo.json" + File metrics_json = "~{prefix}.metrics.json.gz" + File vcf = "~{prefix}.vcf.gz" + File tbi = "~{prefix}.vcf.gz.tbi" + File version = "version.txt" + } + + runtime { + docker: "quay.io/biocontainers/hap.py:0.3.14--py27h5c5a3ab_0" + cpu: cpus + } +} diff --git a/tests/tasks/happy/happy/inputs.json b/tests/tasks/happy/happy/inputs.json new file mode 100644 index 0000000..57d3577 --- /dev/null +++ b/tests/tasks/happy/happy/inputs.json @@ -0,0 +1,9 @@ +{ + "truth_vcf": "truth.vcf.gz", + "query_vcf": "query.vcf.gz", + "prefix": "benchmark-outputs", + "fasta": "reference.fasta", + "fasta_fai": "reference.fasta.fai", + "cpus": 4, + "stub": true +} diff --git a/tests/tasks/happy/happy/run.sh b/tests/tasks/happy/happy/run.sh new file mode 100644 index 0000000..d744a49 --- /dev/null +++ b/tests/tasks/happy/happy/run.sh @@ -0,0 +1,5 @@ +set -e + +touch truth.vcf.gz query.vcf.gz reference.fasta reference.fasta.fai + +miniwdl run --task HAPPY -i tests/tasks/happy/happy/inputs.json tasks/happy/happy.wdl diff --git a/tests/tasks/happy/happy/test_happy_task.yaml b/tests/tasks/happy/happy/test_happy_task.yaml new file mode 100644 index 0000000..6cd1009 --- /dev/null +++ b/tests/tasks/happy/happy/test_happy_task.yaml @@ -0,0 +1,28 @@ +- name: Check if HAPPY produces the expected outputs + tags: + - happy/happy + command: bash tests/tasks/happy/happy/run.sh + files: + - path: _LAST/out/summary_csv/benchmark-outputs.summary.csv + - path: _LAST/out/roc_all_csv/benchmark-outputs.roc.all.csv.gz + - path: _LAST/out/roc_indel_locations_csv/benchmark-outputs.roc.Locations.INDEL.csv.gz + - path: _LAST/out/roc_indel_locations_pass_csv/benchmark-outputs.roc.Locations.INDEL.PASS.csv.gz + - path: _LAST/out/roc_snp_locations_csv/benchmark-outputs.roc.Locations.SNP.csv.gz + - path: _LAST/out/roc_snp_locations_pass_csv/benchmark-outputs.roc.Locations.SNP.PASS.csv.gz + - path: _LAST/out/extended_csv/benchmark-outputs.extended.csv + - path: _LAST/out/runinfo/benchmark-outputs.runinfo.json + - path: _LAST/out/metrics_json/benchmark-outputs.metrics.json.gz + - path: _LAST/out/vcf/benchmark-outputs.vcf.gz + - path: _LAST/out/tbi/benchmark-outputs.vcf.gz.tbi + - path: _LAST/out/version/version.txt + contains: + - "Hap.py" + - path: _LAST/command + contains: + - "hap.py \\" + - "-o benchmark-outputs" + - "--threads 4" + contains_regex: + - "--reference .+\\/reference.fasta" + - ".+\\/truth.vcf.gz" + - ".+\\/query.vcf.gz"