Skip to content

Commit

Permalink
Merge pull request #559 from opencybersecurityalliance/k2-fix-typeuid…
Browse files Browse the repository at this point in the history
…-scalar

fix eventId mapping in transform_dataframe
  • Loading branch information
subbyte authored Jul 26, 2024
2 parents 3c7789d + da95775 commit 1581e27
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 21 deletions.
39 changes: 21 additions & 18 deletions packages/kestrel_core/src/kestrel/frontend/completor.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,24 +132,27 @@ def do_complete(

# handle optional components
if ast:
stmt = ast.children[-1].children[0]
cmd = (
stmt.children[1].data.value
if stmt.data.value == "assignment"
else stmt.data.value
)
if cmd == "disp":
for clause in ("attr_clause", "limit_clause", "offset_clause"):
if not list(stmt.find_data(clause)):
suggestions.append("ATTR")
elif cmd in ("expression", "find") and not list(
stmt.find_data("where_clause")
):
suggestions.append("WHERE")
elif cmd in ("get", "find") and not list(stmt.find_data("timerange")):
suggestions.append("START")
elif cmd == "apply" and not list(stmt.find_data("args")):
suggestions.append("WITH")
if ast.children:
stmt = ast.children[-1].children[0]
cmd = (
stmt.children[1].data.value
if stmt.data.value == "assignment"
else stmt.data.value
)
if cmd == "disp":
for clause in ("attr_clause", "limit_clause", "offset_clause"):
if not list(stmt.find_data(clause)):
suggestions.append("ATTR")
elif cmd in ("expression", "find") and not list(
stmt.find_data("where_clause")
):
suggestions.append("WHERE")
elif cmd in ("get", "find") and not list(stmt.find_data("timerange")):
suggestions.append("START")
elif cmd == "apply" and not list(stmt.find_data("args")):
suggestions.append("WITH")
else:
suggestions = ["DISP", "APPLY", "EXPLAIN", "INFO", "SAVE", "DESCRIBE"]

suggestions = [x for x in set(suggestions) if x]
_p = last_word_prefix
Expand Down
9 changes: 7 additions & 2 deletions packages/kestrel_core/src/kestrel/interface/codegen/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def __init__(

@typechecked
def _map_identifier_field(self, field) -> ColumnElement:
if self.data_mapping and not self.is_subquery:
if self.data_mapping:
comps = translate_comparison_to_native(self.data_mapping, field, "", None)
if len(comps) > 1:
raise InvalidMappingWithMultipleIdentifierFields(comps)
Expand All @@ -155,10 +155,15 @@ def _render_comp(self, comp: FBasicComparison) -> BinaryExpression:
*[self._map_identifier_field(field) for field in comp.fields]
)
rendered_comp = comp2func[comp.op](col, comp.value)
elif self.data_mapping and not self.is_subquery: # translation needed
elif self.data_mapping:
comps = translate_comparison_to_native(
self.data_mapping, comp.field, comp.op, comp.value
)
if self.is_subquery:
# do not translate field
# only translate value
comps = [(comp.field, op, value) for (_, op, value) in comps]

translated_comps = (
(
~comp2func[op](column(field), value)
Expand Down
13 changes: 12 additions & 1 deletion packages/kestrel_core/src/kestrel/mapping/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,18 @@ def translate_dataframe(df: DataFrame, to_native_nested_map: dict) -> DataFrame:
transformer_name = transformer_names.pop()
if isinstance(transformer_name, dict):
# Not actually a named function; it's a literal value map
df[col] = df[col].replace(transformer_name)
value_map = {}
for k, vl in transformer_name.items():
if len(vl) > 1:
raise NotImplementedError(
"Multiple to OCSF value mapping"
)
else:
value_map[k] = vl[0]
# use .apply intead of .replace to handle type correctly
df[col] = df[col].apply(
lambda x: value_map[x] if x in value_map else x
)
else:
s = run_transformer_on_series(
transformer_name, df[col].dropna()
Expand Down

0 comments on commit 1581e27

Please sign in to comment.