diff --git a/lib/charms/grafana_agent/v0/cos_agent.py b/lib/charms/grafana_agent/v0/cos_agent.py index a33d0b9..ed8ea6f 100644 --- a/lib/charms/grafana_agent/v0/cos_agent.py +++ b/lib/charms/grafana_agent/v0/cos_agent.py @@ -235,7 +235,7 @@ def __init__(self, *args): import pydantic from cosl import DashboardPath40UID, JujuTopology, LZMABase64 -from cosl.rules import AlertRules +from cosl.rules import AlertRules, generic_alert_groups from ops.charm import RelationChangedEvent from ops.framework import EventBase, EventSource, Object, ObjectEvents from ops.model import ModelError, Relation @@ -254,7 +254,7 @@ class _MetricsEndpointDict(TypedDict): LIBID = "dc15fa84cef84ce58155fb84f6c6213a" LIBAPI = 0 -LIBPATCH = 17 +LIBPATCH = 18 PYDEPS = ["cosl >= 0.0.50", "pydantic"] @@ -268,7 +268,6 @@ class _MetricsEndpointDict(TypedDict): logger = logging.getLogger(__name__) SnapEndpoint = namedtuple("SnapEndpoint", "owner, name") - # Note: MutableMapping is imported from the typing module and not collections.abc # because subscripting collections.abc.MutableMapping was added in python 3.9, but # most of our charms are based on 20.04, which has python 3.8. @@ -732,6 +731,10 @@ def _metrics_alert_rules(self) -> Dict: query_type="promql", topology=JujuTopology.from_charm(self._charm) ) alert_rules.add_path(self._metrics_rules, recursive=self._recursive) + alert_rules.add( + generic_alert_groups.application_rules, + group_name_prefix=JujuTopology.from_charm(self._charm).identifier, + ) return alert_rules.as_dict() @property diff --git a/src/prometheus_alert_rules/host_health.rules b/src/prometheus_alert_rules/host_health.rules deleted file mode 100644 index f4a3814..0000000 --- a/src/prometheus_alert_rules/host_health.rules +++ /dev/null @@ -1,25 +0,0 @@ -groups: -- name: HostHealth - rules: - - alert: HostDown - expr: up < 1 - for: 5m - labels: - severity: critical - annotations: - summary: Host '{{ $labels.instance }}' is down. - description: >- - Host '{{ $labels.instance }}' is down. - VALUE = {{ $value }} - LABELS = {{ $labels }} - - alert: HostUnavailable - expr: absent(up) - for: 5m - labels: - severity: critical - annotations: - summary: Metrics not received from host '{{ $labels.instance }}'. - description: >- - The metrics endpoint for host '{{ $labels.instance }}' is unreachable. - VALUE = {{ $value }} - LABELS = {{ $labels }} diff --git a/tests/scenario/test_cos_agent_e2e.py b/tests/scenario/test_cos_agent_e2e.py index ac7eab5..2e57ae1 100644 --- a/tests/scenario/test_cos_agent_e2e.py +++ b/tests/scenario/test_cos_agent_e2e.py @@ -9,6 +9,7 @@ COSAgentProvider, COSAgentRequirer, ) +from cosl.rules import generic_alert_groups from ops.charm import CharmBase from ops.framework import Framework from ops.testing import Context, PeerRelation, State, SubordinateRelation @@ -119,6 +120,25 @@ def requirer_ctx(requirer_charm): return Context(charm_type=requirer_charm, meta=requirer_charm.META) +def test_cos_agent_injects_generic_alerts(provider_ctx): + # GIVEN a cos-agent subordinate relation + cos_agent = SubordinateRelation("cos-agent") + + # WHEN the relation_changed event fires + state_out = provider_ctx.run( + provider_ctx.on.relation_changed(relation=cos_agent, remote_unit=1), + State(relations=[cos_agent]), + ) + + config = json.loads( + state_out.get_relation(cos_agent.id).local_unit_data[CosAgentPeersUnitData.KEY] + ) + # THEN the metrics_alert_rules groups should only contain the generic alert groups + assert ( + config["metrics_alert_rules"]["groups"] == generic_alert_groups.application_rules["groups"] + ) + + def test_cos_agent_changed_no_remote_data(provider_ctx): cos_agent = SubordinateRelation("cos-agent") @@ -130,7 +150,15 @@ def test_cos_agent_changed_no_remote_data(provider_ctx): config = json.loads( state_out.get_relation(cos_agent.id).local_unit_data[CosAgentPeersUnitData.KEY] ) - assert config["metrics_alert_rules"] == {} + + # the cos_agent lib injects generic (HostHealth) alert rules and should be filtered for the test + config["metrics_alert_rules"]["groups"] = [ + group + for group in config["metrics_alert_rules"]["groups"] + if "_HostHealth_" not in group["name"] + ] + + assert config["metrics_alert_rules"] == {"groups": []} assert config["log_alert_rules"] == {} assert len(config["dashboards"]) == 1 assert len(config["metrics_scrape_jobs"]) == 1