pre compile channelmaps and det statuses

ggmarshall · ggmarshall · commit 290cfca720ac · 2025-03-13T04:07:28.000+01:00
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -18,10 +18,11 @@ from datetime import datetime
 from collections import OrderedDict
 import logging
 
-from dbetto import AttrsDict
+from dbetto import AttrsDict, TextDB
 from legendmeta import LegendMetadata
 from legenddataflow import CalGrouping, execenv, utils
 from legenddataflow.patterns import get_pattern_tier
+from legenddataflow.pre_compile_catalog import pre_compile_catalog
 
 utils.subst_vars_in_snakemake_config(workflow, config)
 config = AttrsDict(config)
@@ -33,6 +34,9 @@ meta = utils.metadata_path(config)
 det_status = utils.det_status_path(config)
 basedir = workflow.basedir
 
+det_status_textdb = pre_compile_catalog(Path(det_status) / "statuses")
+channelmap_textdb = pre_compile_catalog(Path(chan_maps) / "channelmaps")
+
 time = datetime.now().strftime("%Y%m%dT%H%M%SZ")
 
 # NOTE: this will attempt a clone of legend-metadata, if the directory does not exist
diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk
@@ -3,6 +3,7 @@
 import os
 import random
 import re
+from pathlib import Path
 
 from legenddataflow.FileKey import ChannelProcKey
 from legenddataflow.patterns import (
@@ -11,28 +12,42 @@ from legenddataflow.patterns import (
 )
 from legenddataflow import execenv_pyexe
 from legenddataflow.utils import filelist_path
+from dbetto import TextDB
+from dbetto.catalog import Catalog
 
 
-def get_chanlist(config, keypart, workflow, det_status, chan_maps, system):
+def get_chanlist(config, keypart, workflow, det_status, channelmap, system):
     key = ChannelProcKey.parse_keypart(keypart)
 
-    flist_path = filelist_path(config)
-    os.makedirs(flist_path, exist_ok=True)
-    output_file = os.path.join(
-        flist_path,
-        f"all-{key.experiment}-{key.period}-{key.run}-{key.datatype}-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}",
-    )
-
-    os.system(
-        execenv_pyexe(config, "create-chankeylist")
-        + f"--det-status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} "
-        f"--datatype {key.datatype} --output-file {output_file} --system {system}"
-    )
-
-    with open(output_file) as r:
-        chan_list = r.read().splitlines()
-    os.remove(output_file)
-    return chan_list
+    if isinstance(det_status, (str, Path)):
+        det_status = TextDB(det_status, lazy=True)
+
+    if isinstance(channelmap, (str, Path)):
+        channelmap = TextDB(channelmap, lazy=True)
+
+    if isinstance(det_status, TextDB):
+        status_map = det_status.statuses.on(key.timestamp, system=key.datatype)
+    else:
+        status_map = det_status.valid_for(key.timestamp, system=key.datatype)
+    if isinstance(channelmap, TextDB):
+        chmap = channelmap.channelmaps.on(key.timestamp)
+    else:
+        chmap = channelmap.valid_for(key.timestamp)
+
+    # only restrict to a certain system (geds, spms, ...)
+    channels = []
+    for channel in chmap.map("system", unique=False)[system].map("name"):
+        if channel not in status_map:
+            msg = f"{channel} is not found in the status map (on {key.timestamp})"
+            raise RuntimeError(msg)
+        if status_map[channel].processable is False:
+            continue
+        channels.append(channel)
+
+    if len(channels) == 0:
+        print("WARNING: No channels found")  # noqa: T201
+
+    return channels
 
 
 def get_par_chanlist(
diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk
@@ -15,8 +15,8 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None, system="geds"):
                 f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
                 tier,
                 basedir,
-                det_status,
-                chan_maps,
+                det_status_textdb,
+                channelmap_textdb,
                 system=system,
             ),
         output:
@@ -37,8 +37,8 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None, system="geds"):
                 f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
                 tier,
                 basedir,
-                det_status,
-                chan_maps,
+                det_status_textdb,
+                channelmap_textdb,
                 system=system,
                 name="objects",
                 extension="pkl",
@@ -68,8 +68,8 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None, system="geds"):
                     f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
                     tier,
                     basedir,
-                    det_status,
-                    chan_maps,
+                    det_status_textdb,
+                    channelmap_textdb,
                     system=system,
                 ),
             output:
@@ -97,8 +97,8 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None, system="geds"):
                 f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
                 lh5_tier,
                 basedir,
-                det_status,
-                chan_maps,
+                det_status_textdb,
+                channelmap_textdb,
                 system=system,
                 extension="lh5" if lh5_merge is True else inspect.signature(get_par_chanlist).parameters['extension'].default,
             ),
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
@@ -122,5 +122,8 @@ def get_search_pattern(tier):
 
 
 def get_table_name(metadata, config, datatype, timestamp, detector, tier):
-    chmap = metadata.channelmap(timestamp, system=datatype)
+    if isinstance(metadata, str):
+        chmap = metadata.channelmap(timestamp, system=datatype)
+    elif isinstance(metadata, Catalog):
+        chmap = metadata.valid_for(timestamp, system=datatype)
     return config.table_format[tier].format(ch=chmap[detector].daq.rawid)
diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk
@@ -27,7 +27,12 @@ rule build_pars_dsp_tau_geds:
         datatype="cal",
         channel="{channel}",
         raw_table_name=lambda wildcards: get_table_name(
-            metadata, config, "cal", wildcards.timestamp, wildcards.channel, "raw"
+            channelmap_textdb,
+            config,
+            "cal",
+            wildcards.timestamp,
+            wildcards.channel,
+            "raw",
         ),
     output:
         decay_const=temp(get_pattern_pars_tmp_channel(config, "dsp", "decay_constant")),
@@ -57,14 +62,19 @@ rule build_pars_evtsel_geds:
             filelist_path(config), "all-{experiment}-{period}-{run}-cal-raw.filelist"
         ),
         pulser_file=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
-        database=get_pattern_pars_tmp_channel(config, "dsp", "decay_constant"),
+        database=rules.build_pars_dsp_tau_geds.output.decay_const,
         raw_cal_curve=get_blinding_curve_file,
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
         raw_table_name=lambda wildcards: get_table_name(
-            metadata, config, "cal", wildcards.timestamp, wildcards.channel, "raw"
+            channelmap_textdb,
+            config,
+            "cal",
+            wildcards.timestamp,
+            wildcards.channel,
+            "raw",
         ),
     output:
         peak_file=temp(
@@ -97,14 +107,19 @@ rule build_pars_dsp_nopt_geds:
         files=os.path.join(
             filelist_path(config), "all-{experiment}-{period}-{run}-fft-raw.filelist"
         ),
-        database=get_pattern_pars_tmp_channel(config, "dsp", "decay_constant"),
-        inplots=get_pattern_plts_tmp_channel(config, "dsp", "decay_constant"),
+        database=rules.build_pars_dsp_tau_geds.output.decay_const,
+        inplots=rules.build_pars_dsp_tau_geds.output.plots,
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
         raw_table_name=lambda wildcards: get_table_name(
-            metadata, config, "cal", wildcards.timestamp, wildcards.channel, "raw"
+            channelmap_textdb,
+            config,
+            "cal",
+            wildcards.timestamp,
+            wildcards.channel,
+            "raw",
         ),
     output:
         dsp_pars_nopt=temp(
@@ -137,15 +152,20 @@ rule build_pars_dsp_dplms_geds:
         fft_files=os.path.join(
             filelist_path(config), "all-{experiment}-{period}-{run}-fft-raw.filelist"
         ),
-        peak_file=get_pattern_pars_tmp_channel(config, "dsp", "peaks", extension="lh5"),
-        database=get_pattern_pars_tmp_channel(config, "dsp", "noise_optimization"),
-        inplots=get_pattern_plts_tmp_channel(config, "dsp", "noise_optimization"),
+        peak_file=rules.build_pars_evtsel_geds.output.peak_file,
+        database=rules.build_pars_dsp_nopt_geds.output.dsp_pars_nopt,
+        inplots=rules.build_pars_dsp_nopt_geds.output.plots,
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
         raw_table_name=lambda wildcards: get_table_name(
-            metadata, config, "cal", wildcards.timestamp, wildcards.channel, "raw"
+            channelmap_textdb,
+            config,
+            "cal",
+            wildcards.timestamp,
+            wildcards.channel,
+            "raw",
         ),
     output:
         dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp", "dplms")),
@@ -176,15 +196,20 @@ rule build_pars_dsp_dplms_geds:
 # This rule builds the optimal energy filter parameters for the dsp using calibration dsp files
 rule build_pars_dsp_eopt_geds:
     input:
-        peak_file=get_pattern_pars_tmp_channel(config, "dsp", "peaks", extension="lh5"),
-        decay_const=get_pattern_pars_tmp_channel(config, "dsp", "dplms"),
-        inplots=get_pattern_plts_tmp_channel(config, "dsp", "dplms"),
+        peak_file=rules.build_pars_evtsel_geds.output.peak_file,
+        decay_const=rules.build_pars_dsp_dplms_geds.output.dsp_pars,
+        inplots=rules.build_pars_dsp_dplms_geds.output.plots,
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
         raw_table_name=lambda wildcards: get_table_name(
-            metadata, config, "cal", wildcards.timestamp, wildcards.channel, "raw"
+            channelmap_textdb,
+            config,
+            "cal",
+            wildcards.timestamp,
+            wildcards.channel,
+            "raw",
         ),
     output:
         dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp_eopt")),
@@ -246,8 +271,8 @@ rule build_svm_dsp_geds:
 
 rule build_pars_dsp_svm_geds:
     input:
-        dsp_pars=get_pattern_pars_tmp_channel(config, "dsp_eopt"),
-        svm_file=get_pattern_pars(config, "dsp", "svm", extension="pkl"),
+        dsp_pars=rules.build_pars_dsp_eopt_geds.output.dsp_pars,
+        svm_file=rules.build_svm_dsp_geds.output.dsp_pars,
     output:
         dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp")),
     log:
diff --git a/workflow/rules/dsp_pars_spms.smk b/workflow/rules/dsp_pars_spms.smk
@@ -17,13 +17,13 @@ rule build_pars_dsp_tau_spms:
             config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-{wildcards.datatype}-{wildcards.timestamp}-channels",
             workflow,
-            det_status,
-            chan_maps,
+            det_status_textdb,
+            channelmap_textdb,
             system="spms",
         ),
         raw_table_names=lambda wildcards: [
             get_table_name(
-                metadata,
+                channelmap_textdb,
                 config,
                 wildcards.datatype,
                 wildcards.timestamp,
@@ -34,8 +34,8 @@ rule build_pars_dsp_tau_spms:
                 config,
                 f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-{wildcards.datatype}-{wildcards.timestamp}-channels",
                 workflow,
-                det_status,
-                chan_maps,
+                det_status_textdb,
+                channelmap_textdb,
                 system="spms",
             )
         ],
@@ -44,7 +44,7 @@ rule build_pars_dsp_tau_spms:
     output:
         patt.get_pattern_pars(config, "dsp", name="spms", datatype="{datatype}"),
     log:
-        patt.get_pattern_log(config, "pars_spms", time, datatype="{datatype}"),
+        patt.get_pattern_log(config, "pars_spms", time),
     group:
         "par-dsp"
     shell:
diff --git a/workflow/src/legenddataflow/pre_compile_catalog.py b/workflow/src/legenddataflow/pre_compile_catalog.py
@@ -0,0 +1,22 @@
+from datetime import datetime, timezone
+from pathlib import Path
+
+from dbetto import TextDB
+from dbetto.catalog import Catalog
+
+
+def pre_compile_catalog(validity_path: str | Path):
+    if isinstance(validity_path, str):
+        validity_path = Path(validity_path)
+    catalog = Catalog.read_from(validity_path / "validity.yaml")
+    entries = {}
+    textdb = TextDB(validity_path, lazy=False)
+    for system in catalog.entries:
+        entries[system] = []
+        for entry in catalog.entries[system]:
+            db = textdb.on(
+                datetime.fromtimestamp(entry.valid_from, tz=timezone.utc), system=system
+            )
+            new_entry = Catalog.Entry(entry.valid_from, db)
+            entries[system].append(new_entry)
+    return Catalog(entries)