From 4c16dff54a6da929008888d1f86dbcb0c62d7312 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 12 Mar 2025 21:22:18 +0100 Subject: [PATCH 1/3] specify instreams and crate for aux --- .pre-commit-config.yaml | 2 +- .../src/legenddataflow/scripts/tier/raw_orca.py | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8f713bef..074c838b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -78,7 +78,7 @@ repos: rev: "v2.3.0" hooks: - id: codespell - args: ["-L", "nd,unparseable,compiletime,livetime,fom,puls"] + args: ["-L", "nd,unparseable,compiletime,livetime,fom,puls,crate"] - repo: https://github.com/shellcheck-py/shellcheck-py rev: "v0.10.0.1" diff --git a/workflow/src/legenddataflow/scripts/tier/raw_orca.py b/workflow/src/legenddataflow/scripts/tier/raw_orca.py index 9ee5a0f1..c4f954b4 100644 --- a/workflow/src/legenddataflow/scripts/tier/raw_orca.py +++ b/workflow/src/legenddataflow/scripts/tier/raw_orca.py @@ -27,7 +27,7 @@ def build_tier_raw_orca() -> None: configs = TextDB(args.configs, lazy=True) config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "tier_raw" + "tier_raw_orca" ] build_log(config_dict, args.log) @@ -63,11 +63,25 @@ def build_tier_raw_orca() -> None: spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels) Props.add_to(all_config, spm_config) + if "muon_config" in list(channel_dict): + muon_config = Props.read_from(channel_dict["muon_config"]) + muon_channels = list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["muon"] + .map("daq.rawid") + ) + top_key = next(iter(muon_config)) + muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted( + muon_channels + ) + Props.add_to(all_config, muon_config) + if "auxs_config" in list(channel_dict): aux_config = Props.read_from(channel_dict["auxs_config"]) aux_channels = list( chmap.channelmaps.on(args.timestamp) .map("system", unique=False)["auxs"] + .map("daq.crate", unique=False)[1] .map("daq.rawid") ) aux_channels += list( From 43242ae2fa0cf93b5db7562ad2191096c0769020 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 12 Mar 2025 21:42:54 +0100 Subject: [PATCH 2/3] add rules for decoding gzip and bzip orca files --- workflow/rules/raw.smk | 56 +++++++++++++++++++ workflow/src/legenddataflow/FileKey.py | 7 ++- .../src/legenddataflow/create_pars_keylist.py | 9 ++- .../legenddataflow/scripts/tier/raw_fcio.py | 8 ++- .../legenddataflow/scripts/tier/raw_orca.py | 21 +++---- 5 files changed, 83 insertions(+), 18 deletions(-) diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk index 25f9b37e..86001173 100644 --- a/workflow/rules/raw.smk +++ b/workflow/rules/raw.smk @@ -48,6 +48,62 @@ rule build_raw_orca: "{params.ro_input} {output}" +rule build_raw_orca_bz2: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(config, extension="orca.bz2"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(config, "tier_raw", time), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + execenv_pyexe(config, "build-tier-raw-orca") + "--log {log} " + f"--configs {ro(configs)} " + f"--chan-maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" + + +rule build_raw_orca_gzip: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(config, extension="orca.gz"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(config, "tier_raw", time), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + execenv_pyexe(config, "build-tier-raw-orca") + "--log {log} " + f"--configs {ro(configs)} " + f"--chan-maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" + + rule build_raw_fcio: """ This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file diff --git a/workflow/src/legenddataflow/FileKey.py b/workflow/src/legenddataflow/FileKey.py index 6857e05a..43162a24 100644 --- a/workflow/src/legenddataflow/FileKey.py +++ b/workflow/src/legenddataflow/FileKey.py @@ -32,7 +32,12 @@ def regex_from_filepattern(filepattern): f.append(f"(?P={wildcard})") else: wildcards.append(wildcard) - f.append(f"(?P<{wildcard}>.+)") + if wildcard == "ext": + f.append( + f"(?P<{wildcard}>.*)" + ) # this means ext will capture everything after 1st dot + else: + f.append(f"(?P<{wildcard}>" + r"[^\.\/]+)") last = match.end() f.append(re.escape(filepattern[last:])) f.append("$") diff --git a/workflow/src/legenddataflow/create_pars_keylist.py b/workflow/src/legenddataflow/create_pars_keylist.py index b779b128..45e1a304 100644 --- a/workflow/src/legenddataflow/create_pars_keylist.py +++ b/workflow/src/legenddataflow/create_pars_keylist.py @@ -53,9 +53,14 @@ def generate_par_keylist(keys): @staticmethod def match_entries(entry1, entry2): - datatype2 = ProcessingFileKey.get_filekey_from_filename(entry2.file[0]).datatype + datatype2 = ProcessingFileKey.get_filekey_from_filename( + Path(entry2.file[0]).name + ).datatype for entry in entry1.file: - if ProcessingFileKey.get_filekey_from_filename(entry).datatype == datatype2: + if ( + ProcessingFileKey.get_filekey_from_filename(Path(entry).name).datatype + == datatype2 + ): pass else: entry2.file.append(entry) diff --git a/workflow/src/legenddataflow/scripts/tier/raw_fcio.py b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py index 67942b93..a351d758 100644 --- a/workflow/src/legenddataflow/scripts/tier/raw_fcio.py +++ b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py @@ -47,4 +47,10 @@ def build_tier_raw_fcio() -> None: if "muon_config" in channel_dict: raise NotImplementedError() - build_raw(args.input, out_spec=all_config, filekey=args.output, **settings) + build_raw( + args.input, + out_spec=all_config, + in_stream_type="Flashcam", + filekey=args.output, + **settings, + ) diff --git a/workflow/src/legenddataflow/scripts/tier/raw_orca.py b/workflow/src/legenddataflow/scripts/tier/raw_orca.py index c4f954b4..47ba3919 100644 --- a/workflow/src/legenddataflow/scripts/tier/raw_orca.py +++ b/workflow/src/legenddataflow/scripts/tier/raw_orca.py @@ -100,17 +100,10 @@ def build_tier_raw_orca() -> None: ) Props.add_to(all_config, aux_config) - if "muon_config" in list(channel_dict): - muon_config = Props.read_from(channel_dict["muon_config"]) - muon_channels = list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["muon"] - .map("daq.rawid") - ) - top_key = next(iter(muon_config)) - muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted( - muon_channels - ) - Props.add_to(all_config, muon_config) - - build_raw(args.input, out_spec=all_config, filekey=args.output, **settings) + build_raw( + args.input, + out_spec=all_config, + in_stream_type="ORCA", + filekey=args.output, + **settings, + ) From 9a6f95535b2d06a1c83a2a48ba2e7785109869c7 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 12 Mar 2025 21:53:16 +0100 Subject: [PATCH 3/3] reuse orca rule --- workflow/rules/raw.smk | 50 ++---------------------------------------- 1 file changed, 2 insertions(+), 48 deletions(-) diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk index 86001173..e001047b 100644 --- a/workflow/rules/raw.smk +++ b/workflow/rules/raw.smk @@ -48,60 +48,14 @@ rule build_raw_orca: "{params.ro_input} {output}" -rule build_raw_orca_bz2: - """ - This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file - """ +use rule build_raw_orca as build_raw_orca_bz2 with: input: get_pattern_tier_daq(config, extension="orca.bz2"), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: ro(input), - output: - get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle), - log: - get_pattern_log(config, "tier_raw", time), - group: - "tier-raw" - resources: - mem_swap=110, - runtime=300, - shell: - execenv_pyexe(config, "build-tier-raw-orca") + "--log {log} " - f"--configs {ro(configs)} " - f"--chan-maps {ro(chan_maps)} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "{params.ro_input} {output}" -rule build_raw_orca_gzip: - """ - This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file - """ +use rule build_raw_orca as build_raw_orca_gz with: input: get_pattern_tier_daq(config, extension="orca.gz"), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: ro(input), - output: - get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle), - log: - get_pattern_log(config, "tier_raw", time), - group: - "tier-raw" - resources: - mem_swap=110, - runtime=300, - shell: - execenv_pyexe(config, "build-tier-raw-orca") + "--log {log} " - f"--configs {ro(configs)} " - f"--chan-maps {ro(chan_maps)} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "{params.ro_input} {output}" rule build_raw_fcio: