diff --git a/workflow/envs/rtg-tools.yaml b/workflow/envs/rtg-tools.yaml
new file mode 100644
index 0000000..98873c8
--- /dev/null
+++ b/workflow/envs/rtg-tools.yaml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - nodefaults
+dependencies:
+  - rtg-tools =3.12.1
\ No newline at end of file
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index f86a593..c97332b 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -41,7 +41,7 @@ coverages = {
 
 common_src = [
     workflow.source_path("../scripts/common/__init__.py"),
-    workflow.source_path("../scripts/common/happy_report.py"),
+    workflow.source_path("../scripts/common/classification.py"),
 ]
 
 
@@ -396,3 +396,5 @@ if "variant-calls" in config:
     wildcard_constraints:
         callset="|".join(config["variant-calls"]),
         classification="|".join(["fp", "fn"]),
+        comparison="genotype|existence",
+        vartype="snvs|indels",
diff --git a/workflow/rules/eval.smk b/workflow/rules/eval.smk
index 6120c24..38633ee 100644
--- a/workflow/rules/eval.smk
+++ b/workflow/rules/eval.smk
@@ -78,38 +78,46 @@ checkpoint stat_truth:
         "../scripts/stat-truth.py"
 
 
+rule generate_sdf:
+    input:
+        genome="resources/reference/genome.fasta",
+        genome_index="resources/reference/genome.fasta.fai",
+    output:
+        directory("resources/reference/genome-sdf"),
+    log:
+        "logs/rtg-tools/sdf.log",
+    conda:
+        "../envs/rtg-tools.yaml"
+    shell:
+        "rtg format --output {output} {input.genome} &> {log}"
+
+
 rule benchmark_variants:
     input:
         truth=get_stratified_truth(),
-        truth_idx=get_stratified_truth(".csi"),
+        truth_idx=get_stratified_truth(".tbi"),
         query="results/stratified-variants/{callset}/{cov}.vcf.gz",
-        genome="resources/reference/genome.fasta",
-        genome_index="resources/reference/genome.fasta.fai",
+        query_index="results/stratified-variants/{callset}/{cov}.vcf.gz.tbi",
+        genome="resources/reference/genome-sdf",
     output:
-        multiext(
-            "results/happy/{callset}/{cov}/report",
-            ".runinfo.json",
-            ".vcf.gz",
-            ".summary.csv",
-            ".extended.csv",
-            ".metrics.json.gz",
-            ".roc.all.csv.gz",
-            ".roc.Locations.INDEL.csv.gz",
-            ".roc.Locations.INDEL.PASS.csv.gz",
-            ".roc.Locations.SNP.csv.gz",
-        ),
-    params:
-        prefix=get_happy_prefix,
-        engine="vcfeval",
+        "results/vcfeval/{callset}/{cov}/output.vcf.gz",
     log:
-        "logs/happy/{callset}/{cov}.log",
-    wrapper:
-        "v1.7.2/bio/hap.py/hap.py"
+        "logs/vcfeval/{callset}/{cov}.log",
+    params:
+        output=lambda w, output: os.path.dirname(output[0]),
+    conda:
+        "../envs/rtg-tools.yaml"
+    threads: 32
+    shell:
+        "rm -r {params.output}; rtg vcfeval --threads {threads} --ref-overlap --all-records "
+        "--output-mode ga4gh --baseline {input.truth} --calls {input.query} "
+        "--output {params.output} --template {input.genome} &> {log}"
 
 
 rule calc_precision_recall:
     input:
-        calls="results/happy/{callset}/{cov}/report.vcf.gz",
+        calls="results/vcfeval/{callset}/{cov}/output.vcf.gz",
+        idx="results/vcfeval/{callset}/{cov}/output.vcf.gz.tbi",
         common_src=common_src,
     output:
         snvs="results/precision-recall/callsets/{callset}/{cov}.{vartype}.tsv",
@@ -186,7 +194,7 @@ rule report_precision_recall:
 
 rule extract_fp_fn:
     input:
-        calls="results/happy/{callset}/{cov}/report.vcf.gz",
+        calls="results/vcfeval/{callset}/{cov}/output.vcf.gz",
         common_src=common_src,
     output:
         "results/fp-fn/callsets/{cov}/{callset}/{classification}.tsv",
diff --git a/workflow/rules/utils.smk b/workflow/rules/utils.smk
index ec384e4..83d6779 100644
--- a/workflow/rules/utils.smk
+++ b/workflow/rules/utils.smk
@@ -14,8 +14,6 @@ rule index_vcf_variants:
         "{prefix}.vcf.gz",
     output:
         "{prefix}.vcf.gz.tbi",
-    params:
-        extra="--tbi",
     log:
         "logs/index-vcf-variants/{prefix}.log",
     wrapper:
diff --git a/workflow/scripts/calc-precision-recall.py b/workflow/scripts/calc-precision-recall.py
index 4f9a8d8..3db4071 100644
--- a/workflow/scripts/calc-precision-recall.py
+++ b/workflow/scripts/calc-precision-recall.py
@@ -9,7 +9,7 @@
 import pandas as pd
 import pysam
 
-from common.happy_report import CompareExactGenotype, CompareExistence, Class
+from common.classification import CompareExactGenotype, CompareExistence, Class
 
 
 class Classifications:
@@ -90,6 +90,7 @@ def collect_results(vartype):
     ]
 
 
-vartype = "SNP" if snakemake.wildcards.vartype == "snvs" else "INDEL"
+assert snakemake.wildcards.vartype in ["snvs", "indels"]
+vartype = "SNV" if snakemake.wildcards.vartype == "snvs" else "INDEL"
 
 collect_results(vartype).to_csv(snakemake.output[0], sep="\t", index=False)
diff --git a/workflow/scripts/common/happy_report.py b/workflow/scripts/common/classification.py
similarity index 92%
rename from workflow/scripts/common/happy_report.py
rename to workflow/scripts/common/classification.py
index 55707cb..540c863 100644
--- a/workflow/scripts/common/happy_report.py
+++ b/workflow/scripts/common/classification.py
@@ -52,7 +52,20 @@ def get_decision(record, sample):
 
 
 def get_vartypes(record):
-    return record.samples[0]["BVT"], record.samples[1]["BVT"]
+    ref_allele = record.alleles[0]
+    alts_truth = alt_alleles(record, 0)
+    alts_query = alt_alleles(record, 1)
+    is_snv = lambda alt: len(alt) == 1 and len(ref_allele) == 1
+
+    def vartype(alts):
+        _is_snv = set(map(is_snv, alts))
+        if not _is_snv:
+            return None
+        # we expect different types
+        # assert len(_is_snv) == 1, f"{record.pos}, {_is_snv}"
+        return "SNV" if all(_is_snv) else "INDEL"
+
+    return vartype(alts_truth), vartype(alts_query)
 
 
 def alt_alleles(record, sample):
diff --git a/workflow/scripts/extract-fp-fn.py b/workflow/scripts/extract-fp-fn.py
index 802149f..db902a9 100644
--- a/workflow/scripts/extract-fp-fn.py
+++ b/workflow/scripts/extract-fp-fn.py
@@ -7,7 +7,7 @@
 import csv
 import pysam
 
-from common.happy_report import CompareExistence, Class, is_het
+from common.classification import CompareExistence, Class, is_het
 
 cmp = CompareExistence()
 varfile = pysam.VariantFile(snakemake.input.calls)