workflow/Snakefile

"""
Snakefile for doing the higher stages of data processing (everything beyond build_raw)
This includes:
- building the tcm
- dsp parameter generation
- building dsp
- hit pars generation
- building hit
- building evt
- the same for partition level tiers
"""

from pathlib import Path
import os
import sys
import glob
from datetime import datetime
from collections import OrderedDict
import logging

from dbetto import AttrsDict
from legendmeta import LegendMetadata
from legenddataflow import CalGrouping, execenv, utils
from legenddataflow.patterns import get_pattern_tier

utils.subst_vars_in_snakemake_config(workflow, config)
config = AttrsDict(config)

check_in_cycle = True
configs = utils.config_path(config)
chan_maps = utils.chan_map_path(config)
meta = utils.metadata_path(config)
det_status = utils.det_status_path(config)
basedir = workflow.basedir

time = datetime.now().strftime("%Y%m%dT%H%M%SZ")

# NOTE: this will attempt a clone of legend-metadata, if the directory does not exist
metadata = LegendMetadata(meta)
if "legend_metadata_version" in config:
    metadata.checkout(config.legend_metadata_version)

part = CalGrouping(config, Path(det_status) / "cal_groupings.yaml")


wildcard_constraints:
    experiment=r"\w+",
    period=r"p\d{2}",
    run=r"r\d{3}",
    datatype=r"\w{3}",
    timestamp=r"\d{8}T\d{6}Z",


include: "rules/channel_merge.smk"
include: "rules/filelist_gen.smk"
include: "rules/chanlist_gen.smk"
include: "rules/common.smk"
include: "rules/main.smk"
include: "rules/tcm.smk"
include: "rules/dsp_pars_geds.smk"
include: "rules/dsp_pars_spms.smk"
include: "rules/dsp.smk"
include: "rules/psp_pars_geds.smk"
include: "rules/psp.smk"
include: "rules/hit_pars_geds.smk"
include: "rules/hit.smk"
include: "rules/pht_pars_geds.smk"
include: "rules/pht_pars_geds_fast.smk"
include: "rules/pht.smk"
include: "rules/ann.smk"
include: "rules/evt.smk"
include: "rules/skm.smk"
include: "rules/blinding_calibration.smk"
include: "rules/qc_phy.smk"


localrules:
    gen_filelist,
    autogen_output,


onstart:
    print("INFO: starting workflow")

    # Make sure some packages are initialized before we begin to avoid race conditions
    # https://numba.readthedocs.io/en/stable/developer/caching.html#cache-sharing
    if not workflow.touch:
        shell(
            execenv.execenv_pyexe(config, "python")
            + "-c 'import dspeed, lgdo, matplotlib, pygama'"
        )

        # Log parameter catalogs in validity files
    hit_par_cat_file = Path(utils.pars_path(config)) / "hit" / "validity.yaml"
    if hit_par_cat_file.is_file():
        hit_par_cat_file.unlink()
    try:
        Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
        hit_par_catalog.write_to(hit_par_cat_file)
    except NameError:
        print("No hit parameter catalog found")

    pht_par_cat_file = Path(utils.pars_path(config)) / "pht" / "validity.yaml"
    if pht_par_cat_file.is_file():
        pht_par_cat_file.unlink()
    try:
        Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
        pht_par_catalog.write_to(pht_par_cat_file)
    except NameError:
        print("No pht parameter catalog found")

    dsp_par_cat_file = Path(utils.pars_path(config)) / "dsp" / "validity.yaml"
    if dsp_par_cat_file.is_file():
        dsp_par_cat_file.unlink()
    try:
        Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
        dsp_par_catalog.write_to(dsp_par_cat_file)
    except NameError:
        print("No dsp parameter catalog found")

    psp_par_cat_file = Path(utils.pars_path(config)) / "psp" / "validity.yaml"
    if psp_par_cat_file.is_file():
        psp_par_cat_file.unlink()
    try:
        Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
        psp_par_catalog.write_to(psp_par_cat_file)
    except NameError:
        print("No psp parameter catalog found")


onsuccess:
    from snakemake.report import auto_report
    from snakemake_interface_report_plugins.settings import ReportSettingsBase
    from snakemake_interface_report_plugins.registry import ReportPluginRegistry
    import asyncio

    rep_dir = Path(
        f"{log_path(config)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}"
    )
    rep_dir.mkdir(parents=True, exist_ok=True)
    # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html")
    for form in ["html"]:  # , "pdf", "tex"
        report_plugin = ReportPluginRegistry().get_plugin(form)
        report_settings = report_plugin.settings_cls
        report_settings.path = rep_dir / f"report.{form}"
        # get_settings(path=rep_dir / f"report.{form}")
        # report_settings.path = rep_dir / f"report.{form}"
        asyncio.run(
            auto_report(workflow.persistence.dag, report_plugin, report_settings)
        )

    with (rep_dir / "dag.txt").open("w") as f:
        f.writelines(str(workflow.persistence.dag))

    with (rep_dir / "rg.txt").open("w") as f:
        f.writelines(str(workflow.persistence.dag.rule_dot()))

        # remove .gen files
    files = glob.glob("*.gen")
    for file in files:
        if os.path.isfile(file):
            os.remove(file)

    files = glob.glob(os.path.join(utils.filelist_path(config), "*"))
    for file in files:
        if os.path.isfile(file):
            os.remove(file)
    if os.path.exists(utils.filelist_path(config)):
        os.rmdir(utils.filelist_path(config))


rule gen_filelist:
    """Generate file list.

    This rule is used as a "checkpoint", so when it is run it will update the
    DAG based on the files it finds. It does this by taking in the search
    pattern, using this to find all the files that match this pattern, deriving
    the keys from the files found and generating the list of new files needed.
    """
    input:
        lambda wildcards: get_filelist(
            wildcards,
            config,
            get_pattern_tier(config, "raw", check_in_cycle=False),
            ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
            analysis_runs_file=Path(det_status) / "runlists.yaml",
        ),
    output:
        temp(Path(utils.filelist_path(config)) / "{label}-{tier}.filelist"),
    script:
        "src/legenddataflow/scripts/write_filelist.py"