Skip to content

Commit fd6970b

Browse files
committed
add rules for decoding gzip and bzip orca files
1 parent d684cc6 commit fd6970b

File tree

3 files changed

+69
-3
lines changed

3 files changed

+69
-3
lines changed

workflow/rules/raw.smk

+56
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,62 @@ rule build_raw_orca:
4848
"{params.ro_input} {output}"
4949

5050

51+
rule build_raw_orca_bz2:
52+
"""
53+
This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
54+
"""
55+
input:
56+
get_pattern_tier_daq(config, extension="orca.bz2"),
57+
params:
58+
timestamp="{timestamp}",
59+
datatype="{datatype}",
60+
ro_input=lambda _, input: ro(input),
61+
output:
62+
get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle),
63+
log:
64+
get_pattern_log(config, "tier_raw", time),
65+
group:
66+
"tier-raw"
67+
resources:
68+
mem_swap=110,
69+
runtime=300,
70+
shell:
71+
execenv_pyexe(config, "build-tier-raw-orca") + "--log {log} "
72+
f"--configs {ro(configs)} "
73+
f"--chan-maps {ro(chan_maps)} "
74+
"--datatype {params.datatype} "
75+
"--timestamp {params.timestamp} "
76+
"{params.ro_input} {output}"
77+
78+
79+
rule build_raw_orca_gzip:
80+
"""
81+
This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
82+
"""
83+
input:
84+
get_pattern_tier_daq(config, extension="orca.gz"),
85+
params:
86+
timestamp="{timestamp}",
87+
datatype="{datatype}",
88+
ro_input=lambda _, input: ro(input),
89+
output:
90+
get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle),
91+
log:
92+
get_pattern_log(config, "tier_raw", time),
93+
group:
94+
"tier-raw"
95+
resources:
96+
mem_swap=110,
97+
runtime=300,
98+
shell:
99+
execenv_pyexe(config, "build-tier-raw-orca") + "--log {log} "
100+
f"--configs {ro(configs)} "
101+
f"--chan-maps {ro(chan_maps)} "
102+
"--datatype {params.datatype} "
103+
"--timestamp {params.timestamp} "
104+
"{params.ro_input} {output}"
105+
106+
51107
rule build_raw_fcio:
52108
"""
53109
This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file

workflow/src/legenddataflow/FileKey.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,12 @@ def regex_from_filepattern(filepattern):
3232
f.append(f"(?P={wildcard})")
3333
else:
3434
wildcards.append(wildcard)
35-
f.append(f"(?P<{wildcard}>.+)")
35+
if wildcard == "ext":
36+
f.append(
37+
f"(?P<{wildcard}>.*)"
38+
) # this means ext will capture everything after 1st dot
39+
else:
40+
f.append(f"(?P<{wildcard}>" + r"[^\.\/]+)")
3641
last = match.end()
3742
f.append(re.escape(filepattern[last:]))
3843
f.append("$")

workflow/src/legenddataflow/create_pars_keylist.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,14 @@ def generate_par_keylist(keys):
5353

5454
@staticmethod
5555
def match_entries(entry1, entry2):
56-
datatype2 = ProcessingFileKey.get_filekey_from_filename(entry2.file[0]).datatype
56+
datatype2 = ProcessingFileKey.get_filekey_from_filename(
57+
Path(entry2.file[0]).name
58+
).datatype
5759
for entry in entry1.file:
58-
if ProcessingFileKey.get_filekey_from_filename(entry).datatype == datatype2:
60+
if (
61+
ProcessingFileKey.get_filekey_from_filename(Path(entry).name).datatype
62+
== datatype2
63+
):
5964
pass
6065
else:
6166
entry2.file.append(entry)

0 commit comments

Comments
 (0)