Skip to content

Commit

Permalink
Update cos_agent lib with generic HostHealth rules (#232)
Browse files Browse the repository at this point in the history
* Remove host_health.rules file in favour of generic alert rules
* Inject generic alert rules via `cos_agent`
* Import central rule groups from cos-lib
  • Loading branch information
MichaelThamm authored Feb 5, 2025
1 parent 4498342 commit 4de78c4
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 29 deletions.
9 changes: 6 additions & 3 deletions lib/charms/grafana_agent/v0/cos_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def __init__(self, *args):

import pydantic
from cosl import DashboardPath40UID, JujuTopology, LZMABase64
from cosl.rules import AlertRules
from cosl.rules import AlertRules, generic_alert_groups
from ops.charm import RelationChangedEvent
from ops.framework import EventBase, EventSource, Object, ObjectEvents
from ops.model import ModelError, Relation
Expand All @@ -254,7 +254,7 @@ class _MetricsEndpointDict(TypedDict):

LIBID = "dc15fa84cef84ce58155fb84f6c6213a"
LIBAPI = 0
LIBPATCH = 17
LIBPATCH = 18

PYDEPS = ["cosl >= 0.0.50", "pydantic"]

Expand All @@ -268,7 +268,6 @@ class _MetricsEndpointDict(TypedDict):
logger = logging.getLogger(__name__)
SnapEndpoint = namedtuple("SnapEndpoint", "owner, name")


# Note: MutableMapping is imported from the typing module and not collections.abc
# because subscripting collections.abc.MutableMapping was added in python 3.9, but
# most of our charms are based on 20.04, which has python 3.8.
Expand Down Expand Up @@ -732,6 +731,10 @@ def _metrics_alert_rules(self) -> Dict:
query_type="promql", topology=JujuTopology.from_charm(self._charm)
)
alert_rules.add_path(self._metrics_rules, recursive=self._recursive)
alert_rules.add(
generic_alert_groups.application_rules,
group_name_prefix=JujuTopology.from_charm(self._charm).identifier,
)
return alert_rules.as_dict()

@property
Expand Down
25 changes: 0 additions & 25 deletions src/prometheus_alert_rules/host_health.rules

This file was deleted.

30 changes: 29 additions & 1 deletion tests/scenario/test_cos_agent_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
COSAgentProvider,
COSAgentRequirer,
)
from cosl.rules import generic_alert_groups
from ops.charm import CharmBase
from ops.framework import Framework
from ops.testing import Context, PeerRelation, State, SubordinateRelation
Expand Down Expand Up @@ -119,6 +120,25 @@ def requirer_ctx(requirer_charm):
return Context(charm_type=requirer_charm, meta=requirer_charm.META)


def test_cos_agent_injects_generic_alerts(provider_ctx):
# GIVEN a cos-agent subordinate relation
cos_agent = SubordinateRelation("cos-agent")

# WHEN the relation_changed event fires
state_out = provider_ctx.run(
provider_ctx.on.relation_changed(relation=cos_agent, remote_unit=1),
State(relations=[cos_agent]),
)

config = json.loads(
state_out.get_relation(cos_agent.id).local_unit_data[CosAgentPeersUnitData.KEY]
)
# THEN the metrics_alert_rules groups should only contain the generic alert groups
assert (
config["metrics_alert_rules"]["groups"] == generic_alert_groups.application_rules["groups"]
)


def test_cos_agent_changed_no_remote_data(provider_ctx):
cos_agent = SubordinateRelation("cos-agent")

Expand All @@ -130,7 +150,15 @@ def test_cos_agent_changed_no_remote_data(provider_ctx):
config = json.loads(
state_out.get_relation(cos_agent.id).local_unit_data[CosAgentPeersUnitData.KEY]
)
assert config["metrics_alert_rules"] == {}

# the cos_agent lib injects generic (HostHealth) alert rules and should be filtered for the test
config["metrics_alert_rules"]["groups"] = [
group
for group in config["metrics_alert_rules"]["groups"]
if "_HostHealth_" not in group["name"]
]

assert config["metrics_alert_rules"] == {"groups": []}
assert config["log_alert_rules"] == {}
assert len(config["dashboards"]) == 1
assert len(config["metrics_scrape_jobs"]) == 1
Expand Down

0 comments on commit 4de78c4

Please sign in to comment.