diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
index 65ad7bbb..d74b0dfa 100644
--- a/.github/workflows/github-actions.yml
+++ b/.github/workflows/github-actions.yml
@@ -1,6 +1,6 @@
 name: DeepRVAT
 run-name: DeepRVAT 🧬🧪💻🧑‍🔬
-on: [ push ]
+on: [push]
 
 jobs:
   DeepRVAT-Pipeline-Smoke-Tests:
@@ -77,7 +77,6 @@ jobs:
           --snakefile ${{ github.workspace }}/pipelines/seed_gene_discovery.snakefile --show-failed-logs
         shell: micromamba-shell {0}
 
-
   DeepRVAT-Preprocessing-Pipeline-Smoke-Tests:
     runs-on: ubuntu-latest
     steps:
@@ -143,6 +142,10 @@ jobs:
           cache-environment: true
           cache-downloads: true
 
+      - name: Install biotoolbox
+        run: cpanm Bio::ToolBox@1.691 --force
+        shell: micromamba-shell {0}
+
       - name: Install DeepRVAT
         run: pip install -e ${{ github.workspace }}
         shell: micromamba-shell {0}
@@ -162,6 +165,20 @@ jobs:
           -O workdir/reference/GRCh38.primary_assembly.genome.fa.gz \
           && gzip -d workdir/reference/GRCh38.primary_assembly.genome.fa.gz
 
+      - name: Cache GTF file
+        id: cache-gtf
+        uses: actions/cache@v4
+        with:
+          path: example/preprocess/workdir/reference
+          key: ${{ runner.os }}-reference-gtf
+
+      - name: Download gtf data
+        if: steps.cache-gtf.outputs.cache-hit != 'true'
+        run: |
+          cd ${{ github.workspace }}/example/preprocess && \
+          wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/gencode.v44.annotation.gtf.gz \
+          -O workdir/reference/gencode.v44.annotation.gtf.gz
+
       - name: Run preprocessing pipeline
         run: |
           python -m snakemake -j 2 --directory ${{ github.workspace }}/example/preprocess \
@@ -169,12 +186,10 @@ jobs:
           --configfile ${{ github.workspace }}/pipelines/config/deeprvat_preprocess_config.yaml --show-failed-logs
         shell: micromamba-shell {0}
 
-
   DeepRVAT-Preprocessing-Pipeline-Tests-With-QC:
     runs-on: ubuntu-latest
     needs: DeepRVAT-Preprocessing-Pipeline-Smoke-Tests
     steps:
-
       - name: Check out repository code
         uses: actions/checkout@v4
       - uses: mamba-org/setup-micromamba@v1.8.0
@@ -184,6 +199,10 @@ jobs:
           cache-environment: true
           cache-downloads: true
 
+      - name: Install biotoolbox
+        run: cpanm Bio::ToolBox@1.691 --force
+        shell: micromamba-shell {0}
+
       - name: Install DeepRVAT
         run: pip install -e ${{ github.workspace }}
         shell: micromamba-shell {0}
@@ -203,6 +222,20 @@ jobs:
           -O workdir/reference/GRCh38.primary_assembly.genome.fa.gz \
           && gzip -d workdir/reference/GRCh38.primary_assembly.genome.fa.gz
 
+      - name: Cache GTF file
+        id: cache-gtf
+        uses: actions/cache@v4
+        with:
+          path: example/preprocess/workdir/reference
+          key: ${{ runner.os }}-reference-gtf
+
+      - name: Download gtf data
+        if: steps.cache-gtf.outputs.cache-hit != 'true'
+        run: |
+          cd ${{ github.workspace }}/example/preprocess && \
+          wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/gencode.v44.annotation.gtf.gz \
+          -O workdir/reference/gencode.v44.annotation.gtf.gz
+
       - name: Run preprocessing pipeline
         run: |
           python -m snakemake -j 2 --directory ${{ github.workspace }}/example/preprocess \
diff --git a/deeprvat_preprocessing_env.yml b/deeprvat_preprocessing_env.yml
index 771b3a29..09b80c06 100644
--- a/deeprvat_preprocessing_env.yml
+++ b/deeprvat_preprocessing_env.yml
@@ -14,3 +14,7 @@ dependencies:
   - snakemake=7.17.1
   - bcftools=1.17
   - samtools=1.17
+  - bedtools=2.31.1
+  - perl-app-cpanminus=1.7047
+  - bedops=2.4.41
+  - gcc=13.2.0  
\ No newline at end of file
diff --git a/docs/_static/preprocess_rulegraph_no_qc.svg b/docs/_static/preprocess_rulegraph_no_qc.svg
index 6ddf980d..d41f4483 100644
--- a/docs/_static/preprocess_rulegraph_no_qc.svg
+++ b/docs/_static/preprocess_rulegraph_no_qc.svg
@@ -1,28 +1,28 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<!-- Generated by graphviz version 9.0.0 (20230911.1827)
+<!-- Generated by graphviz version 10.0.1 (20240210.2158)
  -->
 <!-- Title: snakemake_dag Pages: 1 -->
-<svg width="362pt" height="548pt"
- viewBox="0.00 0.00 362.12 548.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 544)">
+<svg width="362pt" height="692pt"
+ viewBox="0.00 0.00 362.12 692.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 688)">
 <title>snakemake_dag</title>
-<polygon fill="white" stroke="none" points="-4,4 -4,-544 358.12,-544 358.12,4 -4,4"/>
+<polygon fill="white" stroke="none" points="-4,4 -4,-688 358.12,-688 358.12,4 -4,4"/>
 <!-- 0 -->
 <g id="node1" class="node">
 <title>0</title>
-<path fill="none" stroke="#d8b456" stroke-width="2" d="M222,-36C222,-36 192,-36 192,-36 186,-36 180,-30 180,-24 180,-24 180,-12 180,-12 180,-6 186,0 192,0 192,0 222,0 222,0 228,0 234,-6 234,-12 234,-12 234,-24 234,-24 234,-30 228,-36 222,-36"/>
+<path fill="none" stroke="black" stroke-width="2" d="M222,-36C222,-36 192,-36 192,-36 186,-36 180,-30 180,-24 180,-24 180,-12 180,-12 180,-6 186,0 192,0 192,0 222,0 222,0 228,0 234,-6 234,-12 234,-12 234,-24 234,-24 234,-30 228,-36 222,-36"/>
 <text text-anchor="middle" x="207" y="-14.12" font-family="sans" font-size="10.00">all</text>
 </g>
 <!-- 1 -->
 <g id="node2" class="node">
 <title>1</title>
-<path fill="none" stroke="#56d86b" stroke-width="2" d="M166.5,-108C166.5,-108 87.5,-108 87.5,-108 81.5,-108 75.5,-102 75.5,-96 75.5,-96 75.5,-84 75.5,-84 75.5,-78 81.5,-72 87.5,-72 87.5,-72 166.5,-72 166.5,-72 172.5,-72 178.5,-78 178.5,-84 178.5,-84 178.5,-96 178.5,-96 178.5,-102 172.5,-108 166.5,-108"/>
+<path fill="none" stroke="black" stroke-width="2" d="M166.5,-108C166.5,-108 87.5,-108 87.5,-108 81.5,-108 75.5,-102 75.5,-96 75.5,-96 75.5,-84 75.5,-84 75.5,-78 81.5,-72 87.5,-72 87.5,-72 166.5,-72 166.5,-72 172.5,-72 178.5,-78 178.5,-84 178.5,-84 178.5,-96 178.5,-96 178.5,-102 172.5,-108 166.5,-108"/>
 <text text-anchor="middle" x="127" y="-86.12" font-family="sans" font-size="10.00">combine_genotypes</text>
 </g>
 <!-- 1&#45;&gt;0 -->
-<g id="edge2" class="edge">
+<g id="edge1" class="edge">
 <title>1&#45;&gt;0</title>
 <path fill="none" stroke="grey" stroke-width="2" d="M147.19,-71.34C156.21,-63.44 167.04,-53.96 176.99,-45.26"/>
 <polygon fill="grey" stroke="grey" stroke-width="2" points="179.04,-48.11 184.26,-38.9 174.43,-42.85 179.04,-48.11"/>
@@ -30,7 +30,7 @@
 <!-- 2 -->
 <g id="node3" class="node">
 <title>2</title>
-<path fill="none" stroke="#56d89a" stroke-width="2" d="M162.75,-180C162.75,-180 91.25,-180 91.25,-180 85.25,-180 79.25,-174 79.25,-168 79.25,-168 79.25,-156 79.25,-156 79.25,-150 85.25,-144 91.25,-144 91.25,-144 162.75,-144 162.75,-144 168.75,-144 174.75,-150 174.75,-156 174.75,-156 174.75,-168 174.75,-168 174.75,-174 168.75,-180 162.75,-180"/>
+<path fill="none" stroke="black" stroke-width="2" d="M162.75,-180C162.75,-180 91.25,-180 91.25,-180 85.25,-180 79.25,-174 79.25,-168 79.25,-168 79.25,-156 79.25,-156 79.25,-150 85.25,-144 91.25,-144 91.25,-144 162.75,-144 162.75,-144 168.75,-144 174.75,-150 174.75,-156 174.75,-156 174.75,-168 174.75,-168 174.75,-174 168.75,-180 162.75,-180"/>
 <text text-anchor="middle" x="127" y="-158.12" font-family="sans" font-size="10.00">preprocess_no_qc</text>
 </g>
 <!-- 2&#45;&gt;1 -->
@@ -42,17 +42,17 @@
 <!-- 3 -->
 <g id="node4" class="node">
 <title>3</title>
-<path fill="none" stroke="#70d856" stroke-width="2" d="M342.12,-252C342.12,-252 281.88,-252 281.88,-252 275.88,-252 269.88,-246 269.88,-240 269.88,-240 269.88,-228 269.88,-228 269.88,-222 275.88,-216 281.88,-216 281.88,-216 342.12,-216 342.12,-216 348.12,-216 354.12,-222 354.12,-228 354.12,-228 354.12,-240 354.12,-240 354.12,-246 348.12,-252 342.12,-252"/>
+<path fill="none" stroke="black" stroke-width="2" d="M342.12,-252C342.12,-252 281.88,-252 281.88,-252 275.88,-252 269.88,-246 269.88,-240 269.88,-240 269.88,-228 269.88,-228 269.88,-222 275.88,-216 281.88,-216 281.88,-216 342.12,-216 342.12,-216 348.12,-216 354.12,-222 354.12,-228 354.12,-228 354.12,-240 354.12,-240 354.12,-246 348.12,-252 342.12,-252"/>
 <text text-anchor="middle" x="312" y="-230.12" font-family="sans" font-size="10.00">add_variant_ids</text>
 </g>
 <!-- 3&#45;&gt;0 -->
-<g id="edge3" class="edge">
+<g id="edge2" class="edge">
 <title>3&#45;&gt;0</title>
 <path fill="none" stroke="grey" stroke-width="2" d="M303.23,-215.12C285.16,-178.3 243.65,-93.69 221.51,-48.58"/>
 <polygon fill="grey" stroke="grey" stroke-width="2" points="224.7,-47.13 217.15,-39.69 218.42,-50.21 224.7,-47.13"/>
 </g>
 <!-- 3&#45;&gt;2 -->
-<g id="edge8" class="edge">
+<g id="edge6" class="edge">
 <title>3&#45;&gt;2</title>
 <path fill="none" stroke="grey" stroke-width="2" d="M269.09,-216.76C244.56,-207.48 213.43,-195.7 186.56,-185.54"/>
 <polygon fill="grey" stroke="grey" stroke-width="2" points="187.97,-182.33 177.38,-182.06 185.5,-188.88 187.97,-182.33"/>
@@ -60,7 +60,7 @@
 <!-- 4 -->
 <g id="node5" class="node">
 <title>4</title>
-<path fill="none" stroke="#d85656" stroke-width="2" d="M241.5,-324C241.5,-324 156.5,-324 156.5,-324 150.5,-324 144.5,-318 144.5,-312 144.5,-312 144.5,-300 144.5,-300 144.5,-294 150.5,-288 156.5,-288 156.5,-288 241.5,-288 241.5,-288 247.5,-288 253.5,-294 253.5,-300 253.5,-300 253.5,-312 253.5,-312 253.5,-318 247.5,-324 241.5,-324"/>
+<path fill="none" stroke="black" stroke-width="2" d="M241.5,-324C241.5,-324 156.5,-324 156.5,-324 150.5,-324 144.5,-318 144.5,-312 144.5,-312 144.5,-300 144.5,-300 144.5,-294 150.5,-288 156.5,-288 156.5,-288 241.5,-288 241.5,-288 247.5,-288 253.5,-294 253.5,-300 253.5,-300 253.5,-312 253.5,-312 253.5,-318 247.5,-324 241.5,-324"/>
 <text text-anchor="middle" x="199" y="-302.12" font-family="sans" font-size="10.00">concatenate_variants</text>
 </g>
 <!-- 4&#45;&gt;3 -->
@@ -69,62 +69,62 @@
 <path fill="none" stroke="grey" stroke-width="2" d="M227.81,-287.15C241.35,-278.76 257.72,-268.62 272.41,-259.52"/>
 <polygon fill="grey" stroke="grey" stroke-width="2" points="274.01,-262.65 280.67,-254.41 270.33,-256.7 274.01,-262.65"/>
 </g>
-<!-- 9 -->
-<g id="node10" class="node">
-<title>9</title>
-<path fill="none" stroke="#d88556" stroke-width="2" d="M239.5,-252C239.5,-252 130.5,-252 130.5,-252 124.5,-252 118.5,-246 118.5,-240 118.5,-240 118.5,-228 118.5,-228 118.5,-222 124.5,-216 130.5,-216 130.5,-216 239.5,-216 239.5,-216 245.5,-216 251.5,-222 251.5,-228 251.5,-228 251.5,-240 251.5,-240 251.5,-246 245.5,-252 239.5,-252"/>
+<!-- 12 -->
+<g id="node13" class="node">
+<title>12</title>
+<path fill="none" stroke="black" stroke-width="2" d="M239.5,-252C239.5,-252 130.5,-252 130.5,-252 124.5,-252 118.5,-246 118.5,-240 118.5,-240 118.5,-228 118.5,-228 118.5,-222 124.5,-216 130.5,-216 130.5,-216 239.5,-216 239.5,-216 245.5,-216 251.5,-222 251.5,-228 251.5,-228 251.5,-240 251.5,-240 251.5,-246 245.5,-252 239.5,-252"/>
 <text text-anchor="middle" x="185" y="-230.12" font-family="sans" font-size="10.00">create_parquet_variant_ids</text>
 </g>
-<!-- 4&#45;&gt;9 -->
-<g id="edge14" class="edge">
-<title>4&#45;&gt;9</title>
+<!-- 4&#45;&gt;12 -->
+<g id="edge18" class="edge">
+<title>4&#45;&gt;12</title>
 <path fill="none" stroke="grey" stroke-width="2" d="M195.47,-287.34C194.15,-280.75 192.62,-273.08 191.13,-265.67"/>
 <polygon fill="grey" stroke="grey" stroke-width="2" points="194.57,-264.99 189.17,-255.87 187.7,-266.36 194.57,-264.99"/>
 </g>
 <!-- 5 -->
 <g id="node6" class="node">
 <title>5</title>
-<path fill="none" stroke="#9fd856" stroke-width="2" d="M201,-396C201,-396 171,-396 171,-396 165,-396 159,-390 159,-384 159,-384 159,-372 159,-372 159,-366 165,-360 171,-360 171,-360 201,-360 201,-360 207,-360 213,-366 213,-372 213,-372 213,-384 213,-384 213,-390 207,-396 201,-396"/>
-<text text-anchor="middle" x="186" y="-374.12" font-family="sans" font-size="10.00">variants</text>
+<path fill="none" stroke="black" stroke-width="2" d="M202,-396C202,-396 172,-396 172,-396 166,-396 160,-390 160,-384 160,-384 160,-372 160,-372 160,-366 166,-360 172,-360 172,-360 202,-360 202,-360 208,-360 214,-366 214,-372 214,-372 214,-384 214,-384 214,-390 208,-396 202,-396"/>
+<text text-anchor="middle" x="187" y="-374.12" font-family="sans" font-size="10.00">variants</text>
 </g>
 <!-- 5&#45;&gt;4 -->
 <g id="edge10" class="edge">
 <title>5&#45;&gt;4</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M189.28,-359.34C190.5,-352.75 191.93,-345.08 193.3,-337.67"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="196.74,-338.35 195.12,-327.88 189.86,-337.07 196.74,-338.35"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M190.03,-359.34C191.16,-352.75 192.47,-345.08 193.74,-337.67"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="197.18,-338.33 195.42,-327.89 190.28,-337.15 197.18,-338.33"/>
 </g>
 <!-- 6 -->
 <g id="node7" class="node">
 <title>6</title>
-<path fill="none" stroke="#56d8c9" stroke-width="2" d="M137,-468C137,-468 103,-468 103,-468 97,-468 91,-462 91,-456 91,-456 91,-444 91,-444 91,-438 97,-432 103,-432 103,-432 137,-432 137,-432 143,-432 149,-438 149,-444 149,-444 149,-456 149,-456 149,-462 143,-468 137,-468"/>
-<text text-anchor="middle" x="120" y="-446.12" font-family="sans" font-size="10.00">normalize</text>
+<path fill="none" stroke="black" stroke-width="2" d="M165,-468C165,-468 131,-468 131,-468 125,-468 119,-462 119,-456 119,-456 119,-444 119,-444 119,-438 125,-432 131,-432 131,-432 165,-432 165,-432 171,-432 177,-438 177,-444 177,-444 177,-456 177,-456 177,-462 171,-468 165,-468"/>
+<text text-anchor="middle" x="148" y="-446.12" font-family="sans" font-size="10.00">normalize</text>
 </g>
 <!-- 6&#45;&gt;5 -->
 <g id="edge11" class="edge">
 <title>6&#45;&gt;5</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M136.65,-431.34C143.79,-423.77 152.29,-414.75 160.21,-406.35"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="162.73,-408.78 167.04,-399.1 157.64,-403.98 162.73,-408.78"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M157.84,-431.34C161.74,-424.34 166.32,-416.12 170.69,-408.28"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="173.74,-409.99 175.55,-399.55 167.63,-406.58 173.74,-409.99"/>
 </g>
-<!-- 10 -->
-<g id="node11" class="node">
-<title>10</title>
-<path fill="none" stroke="#ced856" stroke-width="2" d="M114,-324C114,-324 84,-324 84,-324 78,-324 72,-318 72,-312 72,-312 72,-300 72,-300 72,-294 78,-288 84,-288 84,-288 114,-288 114,-288 120,-288 126,-294 126,-300 126,-300 126,-312 126,-312 126,-318 120,-324 114,-324"/>
+<!-- 13 -->
+<g id="node14" class="node">
+<title>13</title>
+<path fill="none" stroke="black" stroke-width="2" d="M114,-324C114,-324 84,-324 84,-324 78,-324 72,-318 72,-312 72,-312 72,-300 72,-300 72,-294 78,-288 84,-288 84,-288 114,-288 114,-288 120,-288 126,-294 126,-300 126,-300 126,-312 126,-312 126,-318 120,-324 114,-324"/>
 <text text-anchor="middle" x="99" y="-302.12" font-family="sans" font-size="10.00">sparsify</text>
 </g>
-<!-- 6&#45;&gt;10 -->
-<g id="edge15" class="edge">
-<title>6&#45;&gt;10</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M117.34,-431.02C113.87,-407.54 107.74,-366.11 103.53,-337.64"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="107.01,-337.21 102.08,-327.83 100.08,-338.24 107.01,-337.21"/>
+<!-- 6&#45;&gt;13 -->
+<g id="edge19" class="edge">
+<title>6&#45;&gt;13</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M141.79,-431.02C133.62,-407.33 119.14,-365.35 109.31,-336.86"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="112.71,-336.01 106.14,-327.7 106.1,-338.29 112.71,-336.01"/>
 </g>
 <!-- 7 -->
 <g id="node8" class="node">
 <title>7</title>
-<path fill="none" stroke="#5682d8" stroke-width="2" d="M76,-540C76,-540 12,-540 12,-540 6,-540 0,-534 0,-528 0,-528 0,-516 0,-516 0,-510 6,-504 12,-504 12,-504 76,-504 76,-504 82,-504 88,-510 88,-516 88,-516 88,-528 88,-528 88,-534 82,-540 76,-540"/>
+<path fill="none" stroke="black" stroke-width="2" d="M76,-540C76,-540 12,-540 12,-540 6,-540 0,-534 0,-528 0,-528 0,-516 0,-516 0,-510 6,-504 12,-504 12,-504 76,-504 76,-504 82,-504 88,-510 88,-516 88,-516 88,-528 88,-528 88,-534 82,-540 76,-540"/>
 <text text-anchor="middle" x="44" y="-518.12" font-family="sans" font-size="10.00">extract_samples</text>
 </g>
 <!-- 7&#45;&gt;2 -->
-<g id="edge6" class="edge">
+<g id="edge7" class="edge">
 <title>7&#45;&gt;2</title>
 <path fill="none" stroke="grey" stroke-width="2" d="M44,-503.07C44,-476.13 44,-423.62 44,-379 44,-379 44,-379 44,-305 44,-260.18 75.2,-216.95 99.34,-190.2"/>
 <polygon fill="grey" stroke="grey" stroke-width="2" points="101.81,-192.68 106.1,-182.99 96.7,-187.89 101.81,-192.68"/>
@@ -132,38 +132,80 @@
 <!-- 7&#45;&gt;6 -->
 <g id="edge12" class="edge">
 <title>7&#45;&gt;6</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M63.18,-503.34C71.66,-495.52 81.82,-486.16 91.19,-477.53"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="93.35,-480.3 98.34,-470.95 88.61,-475.15 93.35,-480.3"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M70.51,-503.15C82.74,-494.93 97.46,-485.02 110.77,-476.06"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="112.68,-478.99 119.02,-470.5 108.77,-473.18 112.68,-478.99"/>
 </g>
 <!-- 8 -->
 <g id="node9" class="node">
 <title>8</title>
-<path fill="none" stroke="#56b1d8" stroke-width="2" d="M160.12,-540C160.12,-540 117.88,-540 117.88,-540 111.88,-540 105.88,-534 105.88,-528 105.88,-528 105.88,-516 105.88,-516 105.88,-510 111.88,-504 117.88,-504 117.88,-504 160.12,-504 160.12,-504 166.12,-504 172.12,-510 172.12,-516 172.12,-516 172.12,-528 172.12,-528 172.12,-534 166.12,-540 160.12,-540"/>
-<text text-anchor="middle" x="139" y="-518.12" font-family="sans" font-size="10.00">index_fasta</text>
+<path fill="none" stroke="black" stroke-width="2" d="M239.12,-612C239.12,-612 196.88,-612 196.88,-612 190.88,-612 184.88,-606 184.88,-600 184.88,-600 184.88,-588 184.88,-588 184.88,-582 190.88,-576 196.88,-576 196.88,-576 239.12,-576 239.12,-576 245.12,-576 251.12,-582 251.12,-588 251.12,-588 251.12,-600 251.12,-600 251.12,-606 245.12,-612 239.12,-612"/>
+<text text-anchor="middle" x="218" y="-590.12" font-family="sans" font-size="10.00">index_fasta</text>
 </g>
 <!-- 8&#45;&gt;6 -->
 <g id="edge13" class="edge">
 <title>8&#45;&gt;6</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M134.21,-503.34C132.4,-496.67 130.28,-488.89 128.25,-481.39"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="131.65,-480.56 125.65,-471.82 124.89,-482.39 131.65,-480.56"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M217.08,-575.3C215.48,-556.61 211.08,-526.74 199,-504 194.03,-494.64 186.92,-485.78 179.59,-478.08"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="182.07,-475.62 172.48,-471.12 177.17,-480.62 182.07,-475.62"/>
 </g>
-<!-- 9&#45;&gt;0 -->
-<g id="edge1" class="edge">
-<title>9&#45;&gt;0</title>
+<!-- 9 -->
+<g id="node10" class="node">
+<title>9</title>
+<path fill="none" stroke="black" stroke-width="2" d="M178.12,-540C178.12,-540 117.88,-540 117.88,-540 111.88,-540 105.88,-534 105.88,-528 105.88,-528 105.88,-516 105.88,-516 105.88,-510 111.88,-504 117.88,-504 117.88,-504 178.12,-504 178.12,-504 184.12,-504 190.12,-510 190.12,-516 190.12,-516 190.12,-528 190.12,-528 190.12,-534 184.12,-540 178.12,-540"/>
+<text text-anchor="middle" x="148" y="-518.12" font-family="sans" font-size="10.00">expand_regions</text>
+</g>
+<!-- 8&#45;&gt;9 -->
+<g id="edge15" class="edge">
+<title>8&#45;&gt;9</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M200.34,-575.34C192.69,-567.69 183.56,-558.56 175.08,-550.08"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="177.59,-547.64 168.04,-543.04 172.64,-552.59 177.59,-547.64"/>
+</g>
+<!-- 9&#45;&gt;6 -->
+<g id="edge14" class="edge">
+<title>9&#45;&gt;6</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M148,-503.34C148,-496.75 148,-489.08 148,-481.67"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="151.5,-481.93 148,-471.93 144.5,-481.93 151.5,-481.93"/>
+</g>
+<!-- 10 -->
+<g id="node11" class="node">
+<title>10</title>
+<path fill="none" stroke="black" stroke-width="2" d="M155,-612C155,-612 115,-612 115,-612 109,-612 103,-606 103,-600 103,-600 103,-588 103,-588 103,-582 109,-576 115,-576 115,-576 155,-576 155,-576 161,-576 167,-582 167,-588 167,-588 167,-600 167,-600 167,-606 161,-612 155,-612"/>
+<text text-anchor="middle" x="135" y="-590.12" font-family="sans" font-size="10.00">create_bed</text>
+</g>
+<!-- 10&#45;&gt;9 -->
+<g id="edge16" class="edge">
+<title>10&#45;&gt;9</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M138.28,-575.34C139.5,-568.75 140.93,-561.08 142.3,-553.67"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="145.74,-554.35 144.12,-543.88 138.86,-553.07 145.74,-554.35"/>
+</g>
+<!-- 11 -->
+<g id="node12" class="node">
+<title>11</title>
+<path fill="none" stroke="black" stroke-width="2" d="M150,-684C150,-684 120,-684 120,-684 114,-684 108,-678 108,-672 108,-672 108,-660 108,-660 108,-654 114,-648 120,-648 120,-648 150,-648 150,-648 156,-648 162,-654 162,-660 162,-660 162,-672 162,-672 162,-678 156,-684 150,-684"/>
+<text text-anchor="middle" x="135" y="-662.12" font-family="sans" font-size="10.00">fiter_gtf</text>
+</g>
+<!-- 11&#45;&gt;10 -->
+<g id="edge17" class="edge">
+<title>11&#45;&gt;10</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M135,-647.34C135,-640.75 135,-633.08 135,-625.67"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="138.5,-625.93 135,-615.93 131.5,-625.93 138.5,-625.93"/>
+</g>
+<!-- 12&#45;&gt;0 -->
+<g id="edge3" class="edge">
+<title>12&#45;&gt;0</title>
 <path fill="none" stroke="grey" stroke-width="2" d="M186.84,-215.12C190.59,-178.62 199.17,-95.13 203.84,-49.74"/>
 <polygon fill="grey" stroke="grey" stroke-width="2" points="207.3,-50.29 204.84,-39.99 200.34,-49.58 207.3,-50.29"/>
 </g>
-<!-- 9&#45;&gt;2 -->
+<!-- 12&#45;&gt;2 -->
 <g id="edge5" class="edge">
-<title>9&#45;&gt;2</title>
+<title>12&#45;&gt;2</title>
 <path fill="none" stroke="grey" stroke-width="2" d="M170.36,-215.34C164.23,-207.93 156.95,-199.14 150.12,-190.9"/>
 <polygon fill="grey" stroke="grey" stroke-width="2" points="152.84,-188.7 143.76,-183.23 147.45,-193.17 152.84,-188.7"/>
 </g>
-<!-- 10&#45;&gt;2 -->
-<g id="edge7" class="edge">
-<title>10&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M100.22,-287.1C101.69,-269.01 104.64,-240.33 110,-216 111.66,-208.48 113.93,-200.5 116.28,-193.08"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="119.53,-194.39 119.37,-183.79 112.89,-192.17 119.53,-194.39"/>
+<!-- 13&#45;&gt;2 -->
+<g id="edge8" class="edge">
+<title>13&#45;&gt;2</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M99.88,-287.08C101.03,-268.98 103.58,-240.28 109,-216 110.69,-208.44 113.07,-200.44 115.55,-193.02"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="118.8,-194.32 118.84,-183.73 112.21,-191.99 118.8,-194.32"/>
 </g>
 </g>
 </svg>
diff --git a/docs/_static/preprocess_rulegraph_with_qc.svg b/docs/_static/preprocess_rulegraph_with_qc.svg
index 167d7839..7de3c73f 100644
--- a/docs/_static/preprocess_rulegraph_with_qc.svg
+++ b/docs/_static/preprocess_rulegraph_with_qc.svg
@@ -1,271 +1,313 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<!-- Generated by graphviz version 9.0.0 (20230911.1827)
+<!-- Generated by graphviz version 10.0.1 (20240210.2158)
  -->
 <!-- Title: snakemake_dag Pages: 1 -->
-<svg width="621pt" height="548pt"
- viewBox="0.00 0.00 620.50 548.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 544)">
+<svg width="620pt" height="692pt"
+ viewBox="0.00 0.00 619.50 692.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 688)">
 <title>snakemake_dag</title>
-<polygon fill="white" stroke="none" points="-4,4 -4,-544 616.5,-544 616.5,4 -4,4"/>
+<polygon fill="white" stroke="none" points="-4,4 -4,-688 615.5,-688 615.5,4 -4,4"/>
 <!-- 0 -->
 <g id="node1" class="node">
 <title>0</title>
-<path fill="none" stroke="#68d856" stroke-width="2" d="M449,-36C449,-36 419,-36 419,-36 413,-36 407,-30 407,-24 407,-24 407,-12 407,-12 407,-6 413,0 419,0 419,0 449,0 449,0 455,0 461,-6 461,-12 461,-12 461,-24 461,-24 461,-30 455,-36 449,-36"/>
-<text text-anchor="middle" x="434" y="-14.12" font-family="sans" font-size="10.00">all</text>
+<path fill="none" stroke="black" stroke-width="2" d="M448,-36C448,-36 418,-36 418,-36 412,-36 406,-30 406,-24 406,-24 406,-12 406,-12 406,-6 412,0 418,0 418,0 448,0 448,0 454,0 460,-6 460,-12 460,-12 460,-24 460,-24 460,-30 454,-36 448,-36"/>
+<text text-anchor="middle" x="433" y="-14.12" font-family="sans" font-size="10.00">all</text>
 </g>
 <!-- 1 -->
 <g id="node2" class="node">
 <title>1</title>
-<path fill="none" stroke="#56d8c1" stroke-width="2" d="M333.5,-108C333.5,-108 254.5,-108 254.5,-108 248.5,-108 242.5,-102 242.5,-96 242.5,-96 242.5,-84 242.5,-84 242.5,-78 248.5,-72 254.5,-72 254.5,-72 333.5,-72 333.5,-72 339.5,-72 345.5,-78 345.5,-84 345.5,-84 345.5,-96 345.5,-96 345.5,-102 339.5,-108 333.5,-108"/>
-<text text-anchor="middle" x="294" y="-86.12" font-family="sans" font-size="10.00">combine_genotypes</text>
+<path fill="none" stroke="black" stroke-width="2" d="M332.5,-108C332.5,-108 253.5,-108 253.5,-108 247.5,-108 241.5,-102 241.5,-96 241.5,-96 241.5,-84 241.5,-84 241.5,-78 247.5,-72 253.5,-72 253.5,-72 332.5,-72 332.5,-72 338.5,-72 344.5,-78 344.5,-84 344.5,-84 344.5,-96 344.5,-96 344.5,-102 338.5,-108 332.5,-108"/>
+<text text-anchor="middle" x="293" y="-86.12" font-family="sans" font-size="10.00">combine_genotypes</text>
 </g>
 <!-- 1&#45;&gt;0 -->
-<g id="edge2" class="edge">
+<g id="edge1" class="edge">
 <title>1&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M329.69,-71.15C349.6,-61.2 374.43,-48.78 394.87,-38.57"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="396.24,-41.79 403.62,-34.19 393.11,-35.53 396.24,-41.79"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M328.69,-71.15C348.6,-61.2 373.43,-48.78 393.87,-38.57"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="395.24,-41.79 402.62,-34.19 392.11,-35.53 395.24,-41.79"/>
 </g>
 <!-- 2 -->
 <g id="node3" class="node">
 <title>2</title>
-<path fill="none" stroke="#88d856" stroke-width="2" d="M302.5,-180C302.5,-180 223.5,-180 223.5,-180 217.5,-180 211.5,-174 211.5,-168 211.5,-168 211.5,-156 211.5,-156 211.5,-150 217.5,-144 223.5,-144 223.5,-144 302.5,-144 302.5,-144 308.5,-144 314.5,-150 314.5,-156 314.5,-156 314.5,-168 314.5,-168 314.5,-174 308.5,-180 302.5,-180"/>
-<text text-anchor="middle" x="263" y="-158.12" font-family="sans" font-size="10.00">preprocess_with_qc</text>
+<path fill="none" stroke="black" stroke-width="2" d="M301.5,-180C301.5,-180 222.5,-180 222.5,-180 216.5,-180 210.5,-174 210.5,-168 210.5,-168 210.5,-156 210.5,-156 210.5,-150 216.5,-144 222.5,-144 222.5,-144 301.5,-144 301.5,-144 307.5,-144 313.5,-150 313.5,-156 313.5,-156 313.5,-168 313.5,-168 313.5,-174 307.5,-180 301.5,-180"/>
+<text text-anchor="middle" x="262" y="-158.12" font-family="sans" font-size="10.00">preprocess_with_qc</text>
 </g>
 <!-- 2&#45;&gt;1 -->
 <g id="edge4" class="edge">
 <title>2&#45;&gt;1</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M270.82,-143.34C273.88,-136.43 277.48,-128.31 280.91,-120.56"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="284,-122.23 284.85,-111.67 277.6,-119.4 284,-122.23"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M269.82,-143.34C272.88,-136.43 276.48,-128.31 279.91,-120.56"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="283,-122.23 283.85,-111.67 276.6,-119.4 283,-122.23"/>
 </g>
 <!-- 3 -->
 <g id="node4" class="node">
 <title>3</title>
-<path fill="none" stroke="#d89556" stroke-width="2" d="M591.12,-252C591.12,-252 530.88,-252 530.88,-252 524.88,-252 518.88,-246 518.88,-240 518.88,-240 518.88,-228 518.88,-228 518.88,-222 524.88,-216 530.88,-216 530.88,-216 591.12,-216 591.12,-216 597.12,-216 603.12,-222 603.12,-228 603.12,-228 603.12,-240 603.12,-240 603.12,-246 597.12,-252 591.12,-252"/>
-<text text-anchor="middle" x="561" y="-230.12" font-family="sans" font-size="10.00">add_variant_ids</text>
+<path fill="none" stroke="black" stroke-width="2" d="M590.12,-252C590.12,-252 529.88,-252 529.88,-252 523.88,-252 517.88,-246 517.88,-240 517.88,-240 517.88,-228 517.88,-228 517.88,-222 523.88,-216 529.88,-216 529.88,-216 590.12,-216 590.12,-216 596.12,-216 602.12,-222 602.12,-228 602.12,-228 602.12,-240 602.12,-240 602.12,-246 596.12,-252 590.12,-252"/>
+<text text-anchor="middle" x="560" y="-230.12" font-family="sans" font-size="10.00">add_variant_ids</text>
 </g>
 <!-- 3&#45;&gt;0 -->
-<g id="edge3" class="edge">
+<g id="edge2" class="edge">
 <title>3&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M550.39,-215.12C528.45,-178.14 477.89,-92.96 451.21,-48"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="454.33,-46.39 446.21,-39.58 448.31,-49.97 454.33,-46.39"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M549.39,-215.12C527.45,-178.14 476.89,-92.96 450.21,-48"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="453.33,-46.39 445.21,-39.58 447.31,-49.97 453.33,-46.39"/>
 </g>
 <!-- 3&#45;&gt;2 -->
-<g id="edge5" class="edge">
+<g id="edge6" class="edge">
 <title>3&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M517.97,-218.29C515.28,-217.48 512.61,-216.71 510,-216 448.89,-199.31 378.12,-184.59 328.03,-174.92"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="328.9,-171.52 318.42,-173.08 327.58,-178.4 328.9,-171.52"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M516.97,-218.29C514.28,-217.48 511.61,-216.71 509,-216 447.89,-199.31 377.12,-184.59 327.03,-174.92"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="327.9,-171.52 317.42,-173.08 326.58,-178.4 327.9,-171.52"/>
 </g>
 <!-- 4 -->
 <g id="node5" class="node">
 <title>4</title>
-<path fill="none" stroke="#56d0d8" stroke-width="2" d="M600.5,-324C600.5,-324 515.5,-324 515.5,-324 509.5,-324 503.5,-318 503.5,-312 503.5,-312 503.5,-300 503.5,-300 503.5,-294 509.5,-288 515.5,-288 515.5,-288 600.5,-288 600.5,-288 606.5,-288 612.5,-294 612.5,-300 612.5,-300 612.5,-312 612.5,-312 612.5,-318 606.5,-324 600.5,-324"/>
-<text text-anchor="middle" x="558" y="-302.12" font-family="sans" font-size="10.00">concatenate_variants</text>
+<path fill="none" stroke="black" stroke-width="2" d="M599.5,-324C599.5,-324 514.5,-324 514.5,-324 508.5,-324 502.5,-318 502.5,-312 502.5,-312 502.5,-300 502.5,-300 502.5,-294 508.5,-288 514.5,-288 514.5,-288 599.5,-288 599.5,-288 605.5,-288 611.5,-294 611.5,-300 611.5,-300 611.5,-312 611.5,-312 611.5,-318 605.5,-324 599.5,-324"/>
+<text text-anchor="middle" x="557" y="-302.12" font-family="sans" font-size="10.00">concatenate_variants</text>
 </g>
 <!-- 4&#45;&gt;3 -->
 <g id="edge14" class="edge">
 <title>4&#45;&gt;3</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M558.76,-287.34C559.04,-280.75 559.37,-273.08 559.69,-265.67"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="563.17,-266.07 560.1,-255.93 556.18,-265.77 563.17,-266.07"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M557.76,-287.34C558.04,-280.75 558.37,-273.08 558.69,-265.67"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="562.17,-266.07 559.1,-255.93 555.18,-265.77 562.17,-266.07"/>
 </g>
-<!-- 9 -->
-<g id="node10" class="node">
-<title>9</title>
-<path fill="none" stroke="#d8b456" stroke-width="2" d="M488.5,-252C488.5,-252 379.5,-252 379.5,-252 373.5,-252 367.5,-246 367.5,-240 367.5,-240 367.5,-228 367.5,-228 367.5,-222 373.5,-216 379.5,-216 379.5,-216 488.5,-216 488.5,-216 494.5,-216 500.5,-222 500.5,-228 500.5,-228 500.5,-240 500.5,-240 500.5,-246 494.5,-252 488.5,-252"/>
-<text text-anchor="middle" x="434" y="-230.12" font-family="sans" font-size="10.00">create_parquet_variant_ids</text>
+<!-- 12 -->
+<g id="node13" class="node">
+<title>12</title>
+<path fill="none" stroke="black" stroke-width="2" d="M487.5,-252C487.5,-252 378.5,-252 378.5,-252 372.5,-252 366.5,-246 366.5,-240 366.5,-240 366.5,-228 366.5,-228 366.5,-222 372.5,-216 378.5,-216 378.5,-216 487.5,-216 487.5,-216 493.5,-216 499.5,-222 499.5,-228 499.5,-228 499.5,-240 499.5,-240 499.5,-246 493.5,-252 487.5,-252"/>
+<text text-anchor="middle" x="433" y="-230.12" font-family="sans" font-size="10.00">create_parquet_variant_ids</text>
 </g>
-<!-- 4&#45;&gt;9 -->
-<g id="edge19" class="edge">
-<title>4&#45;&gt;9</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M526.39,-287.15C511.23,-278.6 492.85,-268.22 476.49,-258.99"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="478.62,-256.17 468.19,-254.3 475.18,-262.26 478.62,-256.17"/>
+<!-- 4&#45;&gt;12 -->
+<g id="edge23" class="edge">
+<title>4&#45;&gt;12</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M525.39,-287.15C510.23,-278.6 491.85,-268.22 475.49,-258.99"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="477.62,-256.17 467.19,-254.3 474.18,-262.26 477.62,-256.17"/>
 </g>
 <!-- 5 -->
 <g id="node6" class="node">
 <title>5</title>
-<path fill="none" stroke="#d85656" stroke-width="2" d="M536,-396C536,-396 506,-396 506,-396 500,-396 494,-390 494,-384 494,-384 494,-372 494,-372 494,-366 500,-360 506,-360 506,-360 536,-360 536,-360 542,-360 548,-366 548,-372 548,-372 548,-384 548,-384 548,-390 542,-396 536,-396"/>
+<path fill="none" stroke="black" stroke-width="2" d="M536,-396C536,-396 506,-396 506,-396 500,-396 494,-390 494,-384 494,-384 494,-372 494,-372 494,-366 500,-360 506,-360 506,-360 536,-360 536,-360 542,-360 548,-366 548,-372 548,-372 548,-384 548,-384 548,-390 542,-396 536,-396"/>
 <text text-anchor="middle" x="521" y="-374.12" font-family="sans" font-size="10.00">variants</text>
 </g>
 <!-- 5&#45;&gt;4 -->
 <g id="edge15" class="edge">
 <title>5&#45;&gt;4</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M530.34,-359.34C534.03,-352.34 538.38,-344.12 542.52,-336.28"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="545.54,-338.05 547.12,-327.58 539.36,-334.78 545.54,-338.05"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M530.08,-359.34C533.68,-352.34 537.91,-344.12 541.94,-336.28"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="544.95,-338.09 546.41,-327.59 538.72,-334.89 544.95,-338.09"/>
 </g>
 <!-- 6 -->
 <g id="node7" class="node">
 <title>6</title>
-<path fill="none" stroke="#56b1d8" stroke-width="2" d="M217,-468C217,-468 183,-468 183,-468 177,-468 171,-462 171,-456 171,-456 171,-444 171,-444 171,-438 177,-432 183,-432 183,-432 217,-432 217,-432 223,-432 229,-438 229,-444 229,-444 229,-456 229,-456 229,-462 223,-468 217,-468"/>
-<text text-anchor="middle" x="200" y="-446.12" font-family="sans" font-size="10.00">normalize</text>
+<path fill="none" stroke="black" stroke-width="2" d="M220,-468C220,-468 186,-468 186,-468 180,-468 174,-462 174,-456 174,-456 174,-444 174,-444 174,-438 180,-432 186,-432 186,-432 220,-432 220,-432 226,-432 232,-438 232,-444 232,-444 232,-456 232,-456 232,-462 226,-468 220,-468"/>
+<text text-anchor="middle" x="203" y="-446.12" font-family="sans" font-size="10.00">normalize</text>
 </g>
 <!-- 6&#45;&gt;5 -->
 <g id="edge16" class="edge">
 <title>6&#45;&gt;5</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M229.81,-442.81C276.43,-433.1 369.37,-413.56 448,-396 458.65,-393.62 470.16,-390.98 480.83,-388.49"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="481.41,-391.95 490.35,-386.27 479.81,-385.14 481.41,-391.95"/>
-</g>
-<!-- 10 -->
-<g id="node11" class="node">
-<title>10</title>
-<path fill="none" stroke="#5673d8" stroke-width="2" d="M70,-324C70,-324 40,-324 40,-324 34,-324 28,-318 28,-312 28,-312 28,-300 28,-300 28,-294 34,-288 40,-288 40,-288 70,-288 70,-288 76,-288 82,-294 82,-300 82,-300 82,-312 82,-312 82,-318 76,-324 70,-324"/>
-<text text-anchor="middle" x="55" y="-302.12" font-family="sans" font-size="10.00">sparsify</text>
-</g>
-<!-- 6&#45;&gt;10 -->
-<g id="edge20" class="edge">
-<title>6&#45;&gt;10</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M170.25,-439.87C147.45,-431.57 116.5,-417.33 96,-396 80.18,-379.54 69.67,-355.87 63.26,-337.08"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="66.62,-336.1 60.29,-327.6 59.95,-338.19 66.62,-336.1"/>
-</g>
-<!-- 11 -->
-<g id="node12" class="node">
-<title>11</title>
-<path fill="none" stroke="#56d882" stroke-width="2" d="M125.12,-252C125.12,-252 82.88,-252 82.88,-252 76.88,-252 70.88,-246 70.88,-240 70.88,-240 70.88,-228 70.88,-228 70.88,-222 76.88,-216 82.88,-216 82.88,-216 125.12,-216 125.12,-216 131.12,-216 137.12,-222 137.12,-228 137.12,-228 137.12,-240 137.12,-240 137.12,-246 131.12,-252 125.12,-252"/>
-<text text-anchor="middle" x="104" y="-230.12" font-family="sans" font-size="10.00">qc_varmiss</text>
-</g>
-<!-- 6&#45;&gt;11 -->
-<g id="edge21" class="edge">
-<title>6&#45;&gt;11</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M187.96,-431.08C172.51,-407.42 145.72,-364.08 129,-324 121.11,-305.09 114.87,-282.91 110.6,-265.42"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="114.03,-264.71 108.35,-255.77 107.21,-266.3 114.03,-264.71"/>
-</g>
-<!-- 12 -->
-<g id="node13" class="node">
-<title>12</title>
-<path fill="none" stroke="#56d8a2" stroke-width="2" d="M180,-324C180,-324 150,-324 150,-324 144,-324 138,-318 138,-312 138,-312 138,-300 138,-300 138,-294 144,-288 150,-288 150,-288 180,-288 180,-288 186,-288 192,-294 192,-300 192,-300 192,-312 192,-312 192,-318 186,-324 180,-324"/>
-<text text-anchor="middle" x="165" y="-302.12" font-family="sans" font-size="10.00">qc_hwe</text>
-</g>
-<!-- 6&#45;&gt;12 -->
-<g id="edge22" class="edge">
-<title>6&#45;&gt;12</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M195.57,-431.02C189.76,-407.44 179.48,-365.73 172.46,-337.25"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="175.91,-336.65 170.12,-327.78 169.12,-338.32 175.91,-336.65"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M232.96,-442.74C279.32,-433.02 371.23,-413.56 449,-396 459.31,-393.67 470.42,-391.09 480.77,-388.65"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="481.47,-392.08 490.39,-386.36 479.85,-385.27 481.47,-392.08"/>
 </g>
 <!-- 13 -->
 <g id="node14" class="node">
 <title>13</title>
-<path fill="none" stroke="#bed856" stroke-width="2" d="M260.88,-252C260.88,-252 205.12,-252 205.12,-252 199.12,-252 193.12,-246 193.12,-240 193.12,-240 193.12,-228 193.12,-228 193.12,-222 199.12,-216 205.12,-216 205.12,-216 260.88,-216 260.88,-216 266.88,-216 272.88,-222 272.88,-228 272.88,-228 272.88,-240 272.88,-240 272.88,-246 266.88,-252 260.88,-252"/>
-<text text-anchor="middle" x="233" y="-230.12" font-family="sans" font-size="10.00">qc_read_depth</text>
+<path fill="none" stroke="black" stroke-width="2" d="M79,-252C79,-252 49,-252 49,-252 43,-252 37,-246 37,-240 37,-240 37,-228 37,-228 37,-222 43,-216 49,-216 49,-216 79,-216 79,-216 85,-216 91,-222 91,-228 91,-228 91,-240 91,-240 91,-246 85,-252 79,-252"/>
+<text text-anchor="middle" x="64" y="-230.12" font-family="sans" font-size="10.00">sparsify</text>
 </g>
 <!-- 6&#45;&gt;13 -->
-<g id="edge23" class="edge">
+<g id="edge24" class="edge">
 <title>6&#45;&gt;13</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M202.76,-431.12C208.39,-394.62 221.26,-311.13 228.26,-265.74"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="231.7,-266.39 229.77,-255.97 224.78,-265.32 231.7,-266.39"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M173.2,-432.98C139.19,-412.77 85.51,-374.46 64,-324 56.22,-305.74 56.25,-283.4 58.21,-265.66"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="61.65,-266.26 59.6,-255.87 54.72,-265.27 61.65,-266.26"/>
 </g>
 <!-- 14 -->
 <g id="node15" class="node">
 <title>14</title>
-<path fill="none" stroke="#d8d356" stroke-width="2" d="M342,-396C342,-396 260,-396 260,-396 254,-396 248,-390 248,-384 248,-384 248,-372 248,-372 248,-366 254,-360 260,-360 260,-360 342,-360 342,-360 348,-360 354,-366 354,-372 354,-372 354,-384 354,-384 354,-390 348,-396 342,-396"/>
-<text text-anchor="middle" x="301" y="-374.12" font-family="sans" font-size="10.00">qc_allelic_imbalance</text>
+<path fill="none" stroke="black" stroke-width="2" d="M127.12,-324C127.12,-324 84.88,-324 84.88,-324 78.88,-324 72.88,-318 72.88,-312 72.88,-312 72.88,-300 72.88,-300 72.88,-294 78.88,-288 84.88,-288 84.88,-288 127.12,-288 127.12,-288 133.12,-288 139.12,-294 139.12,-300 139.12,-300 139.12,-312 139.12,-312 139.12,-318 133.12,-324 127.12,-324"/>
+<text text-anchor="middle" x="106" y="-302.12" font-family="sans" font-size="10.00">qc_varmiss</text>
 </g>
 <!-- 6&#45;&gt;14 -->
-<g id="edge24" class="edge">
+<g id="edge25" class="edge">
 <title>6&#45;&gt;14</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M225.75,-431.15C237.62,-422.93 251.92,-413.02 264.84,-404.06"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="266.59,-407.11 272.81,-398.54 262.6,-401.36 266.59,-407.11"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M190.71,-431.02C174.24,-406.9 144.82,-363.83 125.36,-335.34"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="128.42,-333.62 119.89,-327.34 122.64,-337.57 128.42,-333.62"/>
+</g>
+<!-- 15 -->
+<g id="node16" class="node">
+<title>15</title>
+<path fill="none" stroke="black" stroke-width="2" d="M199,-324C199,-324 169,-324 169,-324 163,-324 157,-318 157,-312 157,-312 157,-300 157,-300 157,-294 163,-288 169,-288 169,-288 199,-288 199,-288 205,-288 211,-294 211,-300 211,-300 211,-312 211,-312 211,-318 205,-324 199,-324"/>
+<text text-anchor="middle" x="184" y="-302.12" font-family="sans" font-size="10.00">qc_hwe</text>
+</g>
+<!-- 6&#45;&gt;15 -->
+<g id="edge26" class="edge">
+<title>6&#45;&gt;15</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M200.59,-431.02C197.45,-407.54 191.91,-366.11 188.1,-337.64"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="191.58,-337.29 186.79,-327.84 184.65,-338.21 191.58,-337.29"/>
 </g>
 <!-- 16 -->
 <g id="node17" class="node">
 <title>16</title>
-<path fill="none" stroke="#9fd856" stroke-width="2" d="M426.75,-396C426.75,-396 385.25,-396 385.25,-396 379.25,-396 373.25,-390 373.25,-384 373.25,-384 373.25,-372 373.25,-372 373.25,-366 379.25,-360 385.25,-360 385.25,-360 426.75,-360 426.75,-360 432.75,-360 438.75,-366 438.75,-372 438.75,-372 438.75,-384 438.75,-384 438.75,-390 432.75,-396 426.75,-396"/>
-<text text-anchor="middle" x="406" y="-374.12" font-family="sans" font-size="10.00">qc_indmiss</text>
+<path fill="none" stroke="black" stroke-width="2" d="M259.88,-252C259.88,-252 204.12,-252 204.12,-252 198.12,-252 192.12,-246 192.12,-240 192.12,-240 192.12,-228 192.12,-228 192.12,-222 198.12,-216 204.12,-216 204.12,-216 259.88,-216 259.88,-216 265.88,-216 271.88,-222 271.88,-228 271.88,-228 271.88,-240 271.88,-240 271.88,-246 265.88,-252 259.88,-252"/>
+<text text-anchor="middle" x="232" y="-230.12" font-family="sans" font-size="10.00">qc_read_depth</text>
 </g>
 <!-- 6&#45;&gt;16 -->
-<g id="edge26" class="edge">
+<g id="edge27" class="edge">
 <title>6&#45;&gt;16</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M229.93,-439.88C260.4,-430.47 309.22,-415.02 360.3,-396.85"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="361.19,-400.25 369.42,-393.58 358.83,-393.67 361.19,-400.25"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M205.43,-431.2C208.73,-406.96 214.83,-362.2 220,-324 222.63,-304.59 225.57,-282.8 227.88,-265.64"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="231.35,-266.11 229.21,-255.73 224.41,-265.17 231.35,-266.11"/>
+</g>
+<!-- 17 -->
+<g id="node18" class="node">
+<title>17</title>
+<path fill="none" stroke="black" stroke-width="2" d="M344,-396C344,-396 262,-396 262,-396 256,-396 250,-390 250,-384 250,-384 250,-372 250,-372 250,-366 256,-360 262,-360 262,-360 344,-360 344,-360 350,-360 356,-366 356,-372 356,-372 356,-384 356,-384 356,-390 350,-396 344,-396"/>
+<text text-anchor="middle" x="303" y="-374.12" font-family="sans" font-size="10.00">qc_allelic_imbalance</text>
+</g>
+<!-- 6&#45;&gt;17 -->
+<g id="edge28" class="edge">
+<title>6&#45;&gt;17</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M228.49,-431.15C240.25,-422.93 254.4,-413.02 267.2,-404.06"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="268.89,-407.15 275.07,-398.55 264.87,-401.42 268.89,-407.15"/>
+</g>
+<!-- 19 -->
+<g id="node20" class="node">
+<title>19</title>
+<path fill="none" stroke="black" stroke-width="2" d="M427.75,-396C427.75,-396 386.25,-396 386.25,-396 380.25,-396 374.25,-390 374.25,-384 374.25,-384 374.25,-372 374.25,-372 374.25,-366 380.25,-360 386.25,-360 386.25,-360 427.75,-360 427.75,-360 433.75,-360 439.75,-366 439.75,-372 439.75,-372 439.75,-384 439.75,-384 439.75,-390 433.75,-396 427.75,-396"/>
+<text text-anchor="middle" x="407" y="-374.12" font-family="sans" font-size="10.00">qc_indmiss</text>
+</g>
+<!-- 6&#45;&gt;19 -->
+<g id="edge30" class="edge">
+<title>6&#45;&gt;19</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M232.77,-439.93C263,-430.58 311.4,-415.21 361.53,-397.15"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="362.56,-400.5 370.76,-393.79 360.17,-393.92 362.56,-400.5"/>
 </g>
 <!-- 7 -->
 <g id="node8" class="node">
 <title>7</title>
-<path fill="none" stroke="#5692d8" stroke-width="2" d="M102,-540C102,-540 38,-540 38,-540 32,-540 26,-534 26,-528 26,-528 26,-516 26,-516 26,-510 32,-504 38,-504 38,-504 102,-504 102,-504 108,-504 114,-510 114,-516 114,-516 114,-528 114,-528 114,-534 108,-540 102,-540"/>
-<text text-anchor="middle" x="70" y="-518.12" font-family="sans" font-size="10.00">extract_samples</text>
+<path fill="none" stroke="black" stroke-width="2" d="M76,-540C76,-540 12,-540 12,-540 6,-540 0,-534 0,-528 0,-528 0,-516 0,-516 0,-510 6,-504 12,-504 12,-504 76,-504 76,-504 82,-504 88,-510 88,-516 88,-516 88,-528 88,-528 88,-534 82,-540 76,-540"/>
+<text text-anchor="middle" x="44" y="-518.12" font-family="sans" font-size="10.00">extract_samples</text>
 </g>
 <!-- 7&#45;&gt;2 -->
-<g id="edge12" class="edge">
+<g id="edge7" class="edge">
 <title>7&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M54.46,-503.11C33.84,-477.46 0,-427.92 0,-379 0,-379 0,-379 0,-305 0,-260.47 11.01,-243.53 46,-216 89.32,-181.91 151.43,-169.34 198.11,-164.9"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="198.14,-168.41 207.82,-164.1 197.57,-161.43 198.14,-168.41"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M36.15,-503.01C25.29,-476.43 7,-424.86 7,-379 7,-379 7,-379 7,-305 7,-264.36 -0.87,-244.61 28,-216 51.55,-192.66 136.28,-177.86 197.01,-169.99"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="197.08,-173.51 206.57,-168.8 196.21,-166.57 197.08,-173.51"/>
 </g>
 <!-- 7&#45;&gt;6 -->
-<g id="edge17" class="edge">
+<g id="edge19" class="edge">
 <title>7&#45;&gt;6</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M103.14,-503.15C120.05,-494.05 140.78,-482.89 158.72,-473.23"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="160.22,-476.39 167.37,-468.57 156.9,-470.23 160.22,-476.39"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M84.53,-503.15C108.04,-492.81 137.58,-479.8 161.28,-469.37"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="162.57,-472.62 170.31,-465.39 159.75,-466.22 162.57,-472.62"/>
 </g>
 <!-- 8 -->
 <g id="node9" class="node">
 <title>8</title>
-<path fill="none" stroke="#d87556" stroke-width="2" d="M221.12,-540C221.12,-540 178.88,-540 178.88,-540 172.88,-540 166.88,-534 166.88,-528 166.88,-528 166.88,-516 166.88,-516 166.88,-510 172.88,-504 178.88,-504 178.88,-504 221.12,-504 221.12,-504 227.12,-504 233.12,-510 233.12,-516 233.12,-516 233.12,-528 233.12,-528 233.12,-534 227.12,-540 221.12,-540"/>
-<text text-anchor="middle" x="200" y="-518.12" font-family="sans" font-size="10.00">index_fasta</text>
+<path fill="none" stroke="black" stroke-width="2" d="M189.12,-612C189.12,-612 146.88,-612 146.88,-612 140.88,-612 134.88,-606 134.88,-600 134.88,-600 134.88,-588 134.88,-588 134.88,-582 140.88,-576 146.88,-576 146.88,-576 189.12,-576 189.12,-576 195.12,-576 201.12,-582 201.12,-588 201.12,-588 201.12,-600 201.12,-600 201.12,-606 195.12,-612 189.12,-612"/>
+<text text-anchor="middle" x="168" y="-590.12" font-family="sans" font-size="10.00">index_fasta</text>
 </g>
 <!-- 8&#45;&gt;6 -->
-<g id="edge18" class="edge">
+<g id="edge17" class="edge">
 <title>8&#45;&gt;6</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M200,-503.34C200,-496.75 200,-489.08 200,-481.67"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="203.5,-481.93 200,-471.93 196.5,-481.93 203.5,-481.93"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M171.36,-575.17C174.86,-557.13 180.7,-528.48 187,-504 188.94,-496.46 191.25,-488.39 193.51,-480.87"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="196.81,-482.04 196.41,-471.46 190.12,-479.98 196.81,-482.04"/>
 </g>
-<!-- 9&#45;&gt;0 -->
-<g id="edge1" class="edge">
-<title>9&#45;&gt;0</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M434,-215.12C434,-178.62 434,-95.13 434,-49.74"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="437.5,-50 434,-40 430.5,-50 437.5,-50"/>
+<!-- 9 -->
+<g id="node10" class="node">
+<title>9</title>
+<path fill="none" stroke="black" stroke-width="2" d="M268.12,-540C268.12,-540 207.88,-540 207.88,-540 201.88,-540 195.88,-534 195.88,-528 195.88,-528 195.88,-516 195.88,-516 195.88,-510 201.88,-504 207.88,-504 207.88,-504 268.12,-504 268.12,-504 274.12,-504 280.12,-510 280.12,-516 280.12,-516 280.12,-528 280.12,-528 280.12,-534 274.12,-540 268.12,-540"/>
+<text text-anchor="middle" x="238" y="-518.12" font-family="sans" font-size="10.00">expand_regions</text>
 </g>
-<!-- 9&#45;&gt;2 -->
-<g id="edge7" class="edge">
-<title>9&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M390.41,-215.15C368.58,-206.22 341.92,-195.31 318.63,-185.77"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="320.19,-182.63 309.61,-182.08 317.54,-189.11 320.19,-182.63"/>
+<!-- 8&#45;&gt;9 -->
+<g id="edge20" class="edge">
+<title>8&#45;&gt;9</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M185.66,-575.34C193.31,-567.69 202.44,-558.56 210.92,-550.08"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="213.36,-552.59 217.96,-543.04 208.41,-547.64 213.36,-552.59"/>
 </g>
-<!-- 10&#45;&gt;2 -->
-<g id="edge13" class="edge">
-<title>10&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M50.87,-287.06C47.46,-267.31 45.51,-235.75 62,-216 79.43,-195.12 146.02,-180.46 197.83,-171.93"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="198.33,-175.39 207.65,-170.36 197.23,-168.48 198.33,-175.39"/>
+<!-- 9&#45;&gt;6 -->
+<g id="edge18" class="edge">
+<title>9&#45;&gt;6</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M229.17,-503.34C225.67,-496.34 221.56,-488.12 217.64,-480.28"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="220.91,-478.99 213.3,-471.61 214.65,-482.12 220.91,-478.99"/>
 </g>
-<!-- 11&#45;&gt;2 -->
-<g id="edge10" class="edge">
-<title>11&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M138.11,-217.98C159.19,-208.7 186.66,-196.61 210.48,-186.12"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="211.67,-189.42 219.41,-182.19 208.85,-183.01 211.67,-189.42"/>
+<!-- 10 -->
+<g id="node11" class="node">
+<title>10</title>
+<path fill="none" stroke="black" stroke-width="2" d="M271,-612C271,-612 231,-612 231,-612 225,-612 219,-606 219,-600 219,-600 219,-588 219,-588 219,-582 225,-576 231,-576 231,-576 271,-576 271,-576 277,-576 283,-582 283,-588 283,-588 283,-600 283,-600 283,-606 277,-612 271,-612"/>
+<text text-anchor="middle" x="251" y="-590.12" font-family="sans" font-size="10.00">create_bed</text>
+</g>
+<!-- 10&#45;&gt;9 -->
+<g id="edge21" class="edge">
+<title>10&#45;&gt;9</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M247.72,-575.34C246.5,-568.75 245.07,-561.08 243.7,-553.67"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="247.14,-553.07 241.88,-543.88 240.26,-554.35 247.14,-553.07"/>
+</g>
+<!-- 11 -->
+<g id="node12" class="node">
+<title>11</title>
+<path fill="none" stroke="black" stroke-width="2" d="M266,-684C266,-684 236,-684 236,-684 230,-684 224,-678 224,-672 224,-672 224,-660 224,-660 224,-654 230,-648 236,-648 236,-648 266,-648 266,-648 272,-648 278,-654 278,-660 278,-660 278,-672 278,-672 278,-678 272,-684 266,-684"/>
+<text text-anchor="middle" x="251" y="-662.12" font-family="sans" font-size="10.00">fiter_gtf</text>
+</g>
+<!-- 11&#45;&gt;10 -->
+<g id="edge22" class="edge">
+<title>11&#45;&gt;10</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M251,-647.34C251,-640.75 251,-633.08 251,-625.67"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="254.5,-625.93 251,-615.93 247.5,-625.93 254.5,-625.93"/>
+</g>
+<!-- 12&#45;&gt;0 -->
+<g id="edge3" class="edge">
+<title>12&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M433,-215.12C433,-178.62 433,-95.13 433,-49.74"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="436.5,-50 433,-40 429.5,-50 436.5,-50"/>
 </g>
 <!-- 12&#45;&gt;2 -->
-<g id="edge11" class="edge">
+<g id="edge9" class="edge">
 <title>12&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M164.97,-287.09C165.76,-267.95 169.47,-237.46 184,-216 191.58,-204.81 202.38,-195.35 213.58,-187.67"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="215.21,-190.78 221.79,-182.47 211.47,-184.87 215.21,-190.78"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M389.41,-215.15C367.58,-206.22 340.92,-195.31 317.63,-185.77"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="319.19,-182.63 308.61,-182.08 316.54,-189.11 319.19,-182.63"/>
 </g>
 <!-- 13&#45;&gt;2 -->
-<g id="edge6" class="edge">
+<g id="edge13" class="edge">
 <title>13&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M240.57,-215.34C243.5,-208.51 246.93,-200.5 250.21,-192.83"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="253.41,-194.25 254.14,-183.68 246.98,-191.5 253.41,-194.25"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M91.7,-222.67C97.73,-220.45 104.07,-218.13 110,-216 138.74,-205.68 170.64,-194.51 197.76,-185.1"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="198.69,-188.49 207,-181.91 196.4,-181.87 198.69,-188.49"/>
 </g>
 <!-- 14&#45;&gt;2 -->
-<g id="edge8" class="edge">
+<g id="edge11" class="edge">
 <title>14&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M300.23,-359.38C298.63,-329.53 294.04,-267.24 282,-216 280.22,-208.41 277.7,-200.41 275.08,-192.99"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="278.38,-191.82 271.6,-183.68 271.83,-194.27 278.38,-191.82"/>
-</g>
-<!-- 15 -->
-<g id="node16" class="node">
-<title>15</title>
-<path fill="none" stroke="#56d863" stroke-width="2" d="M473.12,-324C473.12,-324 340.88,-324 340.88,-324 334.88,-324 328.88,-318 328.88,-312 328.88,-312 328.88,-300 328.88,-300 328.88,-294 334.88,-288 340.88,-288 340.88,-288 473.12,-288 473.12,-288 479.12,-288 485.12,-294 485.12,-300 485.12,-300 485.12,-312 485.12,-312 485.12,-318 479.12,-324 473.12,-324"/>
-<text text-anchor="middle" x="407" y="-302.12" font-family="sans" font-size="10.00">process_individual_missingness</text>
+<path fill="none" stroke="grey" stroke-width="2" d="M116.14,-287.3C127.8,-268.06 148.4,-237.22 172,-216 184.23,-205 199.1,-195.14 213.2,-186.99"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="214.59,-190.22 221.63,-182.3 211.19,-184.1 214.59,-190.22"/>
 </g>
 <!-- 15&#45;&gt;2 -->
-<g id="edge9" class="edge">
+<g id="edge12" class="edge">
 <title>15&#45;&gt;2</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M388.76,-287.02C363.88,-262.48 319.08,-218.3 290.22,-189.84"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="292.82,-187.49 283.25,-182.96 287.91,-192.48 292.82,-187.49"/>
+<path fill="none" stroke="grey" stroke-width="2" d="M179.01,-287.1C174.79,-268.23 171.15,-238.22 183,-216 189.06,-204.63 198.76,-195.27 209.32,-187.75"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="211.01,-190.83 217.52,-182.47 207.22,-184.94 211.01,-190.83"/>
 </g>
-<!-- 16&#45;&gt;15 -->
-<g id="edge25" class="edge">
-<title>16&#45;&gt;15</title>
-<path fill="none" stroke="grey" stroke-width="2" d="M406.25,-359.34C406.35,-352.75 406.46,-345.08 406.56,-337.67"/>
-<polygon fill="grey" stroke="grey" stroke-width="2" points="410.06,-337.98 406.7,-327.93 403.06,-337.88 410.06,-337.98"/>
+<!-- 16&#45;&gt;2 -->
+<g id="edge10" class="edge">
+<title>16&#45;&gt;2</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M239.57,-215.34C242.5,-208.51 245.93,-200.5 249.21,-192.83"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="252.41,-194.25 253.14,-183.68 245.98,-191.5 252.41,-194.25"/>
+</g>
+<!-- 17&#45;&gt;2 -->
+<g id="edge5" class="edge">
+<title>17&#45;&gt;2</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M301.8,-359.38C299.5,-329.53 293.54,-267.24 281,-216 279.15,-208.43 276.61,-200.43 273.98,-193.01"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="277.29,-191.85 270.51,-183.7 270.73,-194.3 277.29,-191.85"/>
+</g>
+<!-- 18 -->
+<g id="node19" class="node">
+<title>18</title>
+<path fill="none" stroke="black" stroke-width="2" d="M472.12,-324C472.12,-324 339.88,-324 339.88,-324 333.88,-324 327.88,-318 327.88,-312 327.88,-312 327.88,-300 327.88,-300 327.88,-294 333.88,-288 339.88,-288 339.88,-288 472.12,-288 472.12,-288 478.12,-288 484.12,-294 484.12,-300 484.12,-300 484.12,-312 484.12,-312 484.12,-318 478.12,-324 472.12,-324"/>
+<text text-anchor="middle" x="406" y="-302.12" font-family="sans" font-size="10.00">process_individual_missingness</text>
+</g>
+<!-- 18&#45;&gt;2 -->
+<g id="edge8" class="edge">
+<title>18&#45;&gt;2</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M387.76,-287.02C362.88,-262.48 318.08,-218.3 289.22,-189.84"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="291.82,-187.49 282.25,-182.96 286.91,-192.48 291.82,-187.49"/>
+</g>
+<!-- 19&#45;&gt;18 -->
+<g id="edge29" class="edge">
+<title>19&#45;&gt;18</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M406.75,-359.34C406.65,-352.75 406.54,-345.08 406.44,-337.67"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="409.94,-337.88 406.3,-327.93 402.94,-337.98 409.94,-337.88"/>
 </g>
 </g>
 </svg>
diff --git a/docs/preprocessing.md b/docs/preprocessing.md
index b9564cf1..ba8f4497 100644
--- a/docs/preprocessing.md
+++ b/docs/preprocessing.md
@@ -72,16 +72,23 @@ sparse_dir_name : sparse
 
 # Expected to be found in working_dir/reference_dir
 reference_fasta_file : GRCh38.primary_assembly.genome.fa
+gtf_file : gencode.v44.annotation.gtf.gz
+
+# Mac memory used by convert2bed
+convert2bed_max_mem: 64G
+
+# Increase the BED entry by the same number base pairs in each direction
+region_expand: 3000
 
 # You can specify a different zcat cmd for example gzcat here, default zcat
 zcat_cmd:
-   ```
-
+  
+```
 The config above would use the following directory structure:
 
 ```shell
 parent_directory
-`-- workdir
+-- workdir
     |-- norm
     |   |-- bcf
     |   |-- sparse
@@ -151,13 +158,19 @@ wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/GRCh38
 gzip -d workdir/reference/GRCh38.primary_assembly.genome.fa.gz
 ```
 
-4. Run with the example config
+4. Download the gtf file
+
+```shell
+wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/gencode.v44.annotation.gtf.gz -P workdir/reference/GRCh38.primary_assembly.genome.fa.gz
+```
+
+5. Run with the example config
 
 ```shell
 snakemake -j 1 --snakefile ../../pipelines/preprocess_with_qc.snakefile --configfile ../../pipelines/config/deeprvat_preprocess_config.yaml
 ```
 
-5. Enjoy the preprocessed data 🎉
+6. Enjoy the preprocessed data 🎉
 
 ```shell
 ls -l workdir/preprocesed
@@ -195,13 +208,20 @@ wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/GRCh38
 gzip -d workdir/reference/GRCh38.primary_assembly.genome.fa.gz
 ```
 
-4. Run with the example config
+5. Download the gtf file
+
+```shell
+wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/gencode.v44.annotation.gtf.gz -P workdir/reference/GRCh38.primary_assembly.genome.fa.gz
+```
+
+
+6. Run with the example config
 
 ```shell
 snakemake -j 1 --snakefile ../../pipelines/preprocess_no_qc.snakefile --configfile ../../pipelines/config/deeprvat_preprocess_config.yaml
 ```
 
-5. Enjoy the preprocessed data 🎉
+7. Enjoy the preprocessed data 🎉
 
 ```shell
 ls -l workdir/preprocesed
diff --git a/example/preprocess/data/vcf/test_vcf_data_c21_b1.vcf.gz b/example/preprocess/data/vcf/test_vcf_data_c21_b1.vcf.gz
index df2edae1..5294f3c6 100644
Binary files a/example/preprocess/data/vcf/test_vcf_data_c21_b1.vcf.gz and b/example/preprocess/data/vcf/test_vcf_data_c21_b1.vcf.gz differ
diff --git a/example/preprocess/data/vcf/test_vcf_data_c21_b1.vcf.gz.tbi b/example/preprocess/data/vcf/test_vcf_data_c21_b1.vcf.gz.tbi
new file mode 100644
index 00000000..63f5991c
Binary files /dev/null and b/example/preprocess/data/vcf/test_vcf_data_c21_b1.vcf.gz.tbi differ
diff --git a/example/preprocess/data/vcf/test_vcf_data_c22_b1.vcf.gz b/example/preprocess/data/vcf/test_vcf_data_c22_b1.vcf.gz
index 6228dc90..3a70b5b1 100644
Binary files a/example/preprocess/data/vcf/test_vcf_data_c22_b1.vcf.gz and b/example/preprocess/data/vcf/test_vcf_data_c22_b1.vcf.gz differ
diff --git a/example/preprocess/data/vcf/test_vcf_data_c22_b1.vcf.gz.tbi b/example/preprocess/data/vcf/test_vcf_data_c22_b1.vcf.gz.tbi
new file mode 100644
index 00000000..85c1cb3e
Binary files /dev/null and b/example/preprocess/data/vcf/test_vcf_data_c22_b1.vcf.gz.tbi differ
diff --git a/example/preprocess/workdir/reference/gencode.v44.annotation.gtf.gz b/example/preprocess/workdir/reference/gencode.v44.annotation.gtf.gz
new file mode 100644
index 00000000..f1796e6a
Binary files /dev/null and b/example/preprocess/workdir/reference/gencode.v44.annotation.gtf.gz differ
diff --git a/pipelines/config/deeprvat_preprocess_config.yaml b/pipelines/config/deeprvat_preprocess_config.yaml
index 335dd6c9..881c53a4 100644
--- a/pipelines/config/deeprvat_preprocess_config.yaml
+++ b/pipelines/config/deeprvat_preprocess_config.yaml
@@ -24,6 +24,13 @@ sparse_dir_name : sparse
 
 # Expected to be found in working_dir/reference_dir
 reference_fasta_file : GRCh38.primary_assembly.genome.fa
+gtf_file : gencode.v44.annotation.gtf.gz
+
+# Max memory used by convert2bed
+convert2bed_max_mem: 64G
+
+# Increase the BED entry by the same number base pairs in each direction
+region_expand: 3000
 
 # You can specify a different zcat cmd for example gzcat here, default zcat
 zcat_cmd:
\ No newline at end of file
diff --git a/pipelines/preprocessing/preprocess.snakefile b/pipelines/preprocessing/preprocess.snakefile
index 20eb0c89..865c3ebe 100644
--- a/pipelines/preprocessing/preprocess.snakefile
+++ b/pipelines/preprocessing/preprocess.snakefile
@@ -35,6 +35,13 @@ qc_allelic_imbalance_dir = qc_dir / "allelic_imbalance"
 qc_duplicate_vars_dir = qc_dir / "duplicate_vars"
 qc_filtered_samples_dir = qc_dir / "filtered_samples"
 
+gtf_workdir = working_dir / "gtf"
+
+gtf_file = reference_dir / config["gtf_file"]
+gtf_filtered_file = gtf_workdir / f"{gtf_file.stem}_filtered_genes.gtf"
+bed_file = gtf_workdir / f"{gtf_file.stem}_filtered_genes.bed"
+expanded_bed = gtf_workdir / f"{gtf_file.stem}_filtered_expanded_regions.bed"
+
 vcf_stems, vcf_files, vcf_look_up = deeprvat_preprocess.parse_file_path_list(config["vcf_files_list"])
 chromosomes = config["included_chromosomes"]
 
@@ -56,12 +63,48 @@ rule normalize:
         samplefile=norm_dir / "samples_chr.csv",
         fasta=fasta_file,
         fastaindex=fasta_index_file,
+        expanded_bed=expanded_bed,
     params:
         vcf_file=lambda wildcards: vcf_look_up[wildcards.vcf_stem],
     output:
         bcf_file=bcf_dir / "{vcf_stem}.bcf",
     shell:
-        f"""{load_bcftools} bcftools view --samples-file {{input.samplefile}} --output-type u {{params.vcf_file}} | bcftools view --include 'COUNT(GT="alt") > 0' --output-type u | bcftools norm -m-both -f {{input.fasta}} --output-type b --output {{output.bcf_file}}"""
+        f"""{load_bcftools} bcftools view -R "{{input.expanded_bed}}" "{{params.vcf_file}}" --output-type u \
+        | bcftools view --samples-file {{input.samplefile}} --output-type u  \
+        | bcftools view --include 'COUNT(GT="alt") > 0' --output-type u \
+        | bcftools norm -m-both -f {{input.fasta}} --output-type b --output {{output.bcf_file}}"""
+
+
+rule fiter_gtf:
+    input:
+        gtf_file,
+    output:
+        gtf_filtered_file,
+    shell:
+        'get_features.pl --in "{input}" --out "{output}" --include "gene_type=protein_coding" --feature "gene" --gtf'
+
+
+rule create_bed:
+    input:
+        gtf_filtered_file,
+    output:
+        bed_file
+    params:
+        maxmem=config["convert2bed_max_mem"]
+    shell:
+        'convert2bed --max-mem={params.maxmem} --input=gtf --output=bed  < "{input}" > "{output}"'
+
+
+rule expand_regions:
+    input:
+        bed=bed_file,
+        faidx=fasta_index_file,
+    params:
+        region_expand=config["region_expand"],
+    output:
+        expanded_bed
+    shell:
+        'bedtools slop -i "{input.bed}" -g "{input.faidx}" -b {params.region_expand}  > "{output}"'
 
 
 rule index_fasta: