Skip to content

Commit

Permalink
Merge pull request #569 from opencybersecurityalliance/k2-fix-nonexis…
Browse files Browse the repository at this point in the history
…t-projection

fix nonexist projection
  • Loading branch information
subbyte authored Jul 29, 2024
2 parents eebe248 + 4232cdf commit e29704c
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 1 deletion.
4 changes: 4 additions & 0 deletions packages/kestrel_core/src/kestrel/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ class InvalidProjectEntityFromEntity(KestrelError):
pass


class EntityNotFound(KestrelError):
pass


class InvalidMappingWithMultipleIdentifierFields(KestrelError):
pass

Expand Down
8 changes: 8 additions & 0 deletions packages/kestrel_core/src/kestrel/interface/codegen/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import sqlalchemy
from kestrel.exceptions import (
EntityNotFound,
InvalidAttributes,
InvalidMappingWithMultipleIdentifierFields,
InvalidProjectEntityFromEntity,
Expand Down Expand Up @@ -36,6 +37,7 @@
translate_comparison_to_native,
translate_projection_to_native,
)
from kestrel.mapping.utils import get_type_from_projection
from pandas import DataFrame
from pandas.io.sql import SQLTable, pandasSQL_builder
from sqlalchemy import and_, asc, column, desc, or_, select, tuple_
Expand Down Expand Up @@ -313,6 +315,12 @@ def add_ProjectEntity(self, proj: ProjectEntity) -> None:
for col in self.source_schema
if col.startswith(prefix)
]
if not pairs and self.source_schema != ["*"]:
# self.source_schema == ["*"] is used in virtual cache (EXPLAIN)
entity_type = get_type_from_projection(proj.ocsf_field)
raise EntityNotFound(
f"No '{entity_type}' found at '{proj.ocsf_field}.*' against the data source."
)

if pairs:
self.projected_schema = [ocsf_field for _, ocsf_field in pairs]
Expand Down
20 changes: 20 additions & 0 deletions packages/kestrel_core/src/kestrel/mapping/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import yaml
from kestrel.utils import list_folder_files

MAPPING_MODULE = "kestrel.mapping"

# cache type mapping
type_mapping = {}


def get_type_from_projection(proj) -> str:
global type_mapping
if not type_mapping:
for f in list_folder_files(MAPPING_MODULE, "types", extension="yaml"):
with open(f, "r") as fp:
mapping_ind = yaml.safe_load(fp)
type_mapping.update(mapping_ind)
if proj in type_mapping:
return type_mapping[proj]
else:
return "unkown_entity"
8 changes: 8 additions & 0 deletions packages/kestrel_core/tests/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from kestrel.frontend.parser import parse_kestrel_and_update_irgraph
from kestrel.ir.graph import IRGraph, IRGraphEvaluable
from kestrel.ir.instructions import Construct, SerializableDataFrame
from kestrel.exceptions import EntityNotFound


@pytest.fixture
Expand Down Expand Up @@ -372,3 +373,10 @@ def test_explain_find_event_to_entity(process_creation_events):
# cache.sql will use "*" as columns for __setitem__ in virtual cache
# so the result is different from test_cache_sqlite::test_explain_find_event_to_entity
assert stmt == f"WITH es AS \n(SELECT DISTINCT * \nFROM {construct.id.hex}v), \nprocs AS \n(SELECT DISTINCT * \nFROM es \nWHERE device.os = \'Linux\')\n SELECT DISTINCT * \nFROM procs"


def test_get_nonexist_entity(process_creation_events):
with Session() as session:
session.irgraph = process_creation_events
with pytest.raises(EntityNotFound):
session.execute("reg = FIND reg_key RESPONDED es WHERE device.os = 'Linux' DISP reg")
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ def to_html_blocks(d: Display) -> Iterable[str]:
if isinstance(d, DataFrame):
d = d.replace("", numpy.nan).dropna(axis="columns", how="all")
escaped_df = d.map(lambda x: x.replace("$", "\\$") if isinstance(x, str) else x)
yield escaped_df.to_html(index=False, na_rep="")
if escaped_df.empty:
yield "<div><i>Nothing Found :-(</i></div>"
else:
yield escaped_df.to_html(index=False, na_rep="")
elif isinstance(d, GraphExplanation):
for graphlet in d.graphlets:
graph = IRGraph(graphlet.graph)
Expand Down

0 comments on commit e29704c

Please sign in to comment.