diff --git a/aiven_poke/cluster.py b/aiven_poke/cluster.py index 6e37fed..68a3dc7 100644 --- a/aiven_poke/cluster.py +++ b/aiven_poke/cluster.py @@ -2,11 +2,12 @@ import logging from collections import defaultdict -from k8s.base import Model +from k8s.base import Model, Equality from k8s.client import NotFound from k8s.fields import Field from k8s.models.common import ObjectMeta from k8s.models.namespace import Namespace +from k8s.models.secret import Secret from prometheus_client import Summary from aiven_poke.settings import Settings @@ -50,17 +51,21 @@ def __init__(self, settings: Settings): self._latency = Summary("k8s_latency_seconds", "Kubernetes latency", ["action", "resource"]) @functools.lru_cache - def get_slack_channel(self, team): - if self._settings.override_slack_channel is not None: - return self._settings.override_slack_channel + def get_namespace(self, team): try: with self._latency.labels("get", "namespace").time(): - namespace = Namespace.get(team) + return Namespace.get(team) except NotFound: if not team.startswith("team"): - return self.get_slack_channel("team" + team) + return self.get_namespace("team" + team) if not team.startswith("team-"): - return self.get_slack_channel("team-" + team[4:]) + return self.get_namespace("team-" + team[4:]) + + @functools.lru_cache + def get_slack_channel(self, team): + if self._settings.override_slack_channel is not None: + return self._settings.override_slack_channel + namespace = self.get_namespace(team) annotations = namespace.metadata.annotations slack_channel = annotations.get(SLACK_CHANNEL_KEY) if not slack_channel: @@ -80,3 +85,17 @@ def get_cluster_topics(self, gauge): gauge.inc() LOG.info("%d namespaces with topics found in cluster", len(namespaced_topics)) return namespaced_topics + + @functools.lru_cache + def get_aiven_secrets_by_name(self, team): + namespace = self.get_namespace(team) + aiven_secrets_by_name = {} + with self._latency.labels("list", "secret").time(): + secrets = Secret.find(namespace=namespace.metadata.name, labels={ + "type": Equality("aivenator.aiven.nais.io") + }) + for secret in secrets: + service_user = secret.metadata.annotations.get("kafka.aiven.nais.io/serviceUser") + if service_user: + aiven_secrets_by_name[service_user] = secret + return aiven_secrets_by_name diff --git a/aiven_poke/main.py b/aiven_poke/main.py index 52dab63..d0dc01a 100644 --- a/aiven_poke/main.py +++ b/aiven_poke/main.py @@ -11,7 +11,7 @@ from aiven_poke.aiven import AivenKafka from aiven_poke.cluster import Cluster from aiven_poke.endpoints import start_server -from aiven_poke.models import TeamTopic +from aiven_poke.models import TeamTopic, ExpiringUser from aiven_poke.settings import Settings from aiven_poke.slack import Poke @@ -50,6 +50,7 @@ def main(): _init_logging() settings = Settings() cluster = Cluster(settings) + cluster_name = os.getenv("NAIS_CLUSTER_NAME", "unknown") server = start_server() exit_code = 0 try: @@ -60,14 +61,14 @@ def main(): topic_gauge = Gauge("number_of_topics", "Number of topics found", ["source"]) team_gauge = Gauge("number_of_teams", "Number of teams with topics", ["source"]) expiring_users_gauge = Gauge("number_of_expiring_users", - "Number of service users with expiring credentials") + "Number of service users with expiring credentials", ["cluster"]) aiven = AivenKafka(settings.aiven_token.get_secret_value(), settings.main_project) - poke = Poke(settings) + poke = Poke(settings, cluster_name) if settings.topics_enabled: handle_topics(aiven, poke, cluster, team_gauge, topic_gauge) if settings.expiring_users_enabled: - handle_expiring_users(aiven, poke, cluster, expiring_users_gauge) + handle_expiring_users(aiven, poke, cluster, expiring_users_gauge.labels(cluster_name)) if settings.push_gateway_address: push_to_gateway(settings.push_gateway_address, job='aiven-poke', registry=REGISTRY) @@ -83,6 +84,10 @@ def main(): sys.exit(exit_code) +def _is_secret_protected(secret): + return secret.metadata.annotations.get("aivenator.aiven.nais.io/protected") == "true" + + def handle_expiring_users(aiven, poke, cluster, expiring_users_gauge): users = aiven.get_users() expiring_users_per_team = defaultdict(list) @@ -90,7 +95,13 @@ def handle_expiring_users(aiven, poke, cluster, expiring_users_gauge): for user in users: if user.expiring_cert_not_valid_after_time is None: continue - expiring_users_per_team[user.team].append(user) + aiven_secrets = cluster.get_aiven_secrets_by_name(user.team) + secret = aiven_secrets.get(user.username) + if not secret: + continue + expiring_user = ExpiringUser(user.team, user.username, _is_secret_protected(secret), + user.expiring_cert_not_valid_after_time) + expiring_users_per_team[user.team].append(expiring_user) count += 1 expiring_users_gauge.set(count) diff --git a/aiven_poke/models.py b/aiven_poke/models.py index f39b92d..f838039 100644 --- a/aiven_poke/models.py +++ b/aiven_poke/models.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from datetime import datetime @dataclass(frozen=True) @@ -8,4 +9,12 @@ class TeamTopic: topics: frozenset[str] def __post_init__(self): - object.__setattr__(self, "topics", frozenset(self.topics)) \ No newline at end of file + object.__setattr__(self, "topics", frozenset(self.topics)) + + +@dataclass +class ExpiringUser: + team: str + username: str + is_protected: bool + expiring_cert_not_valid_after_time: datetime diff --git a/aiven_poke/slack/__init__.py b/aiven_poke/slack/__init__.py index 515d145..4694138 100644 --- a/aiven_poke/slack/__init__.py +++ b/aiven_poke/slack/__init__.py @@ -17,14 +17,15 @@ class Poke: - def __init__(self, settings: Settings): - self.settings = settings + def __init__(self, settings: Settings, cluster_name): + self._cluster_name = cluster_name + self._settings = settings self._latency = Summary("slack_request_latency_seconds", "Slack requests latency") def topics(self, missing: Iterable[TeamTopic]): """Poke the teams with topics missing in the cluster""" for team_topic in missing: - payload = create_topic_payload(team_topic, self.settings.main_project) + payload = create_topic_payload(team_topic, self._settings.main_project) self.post_payload(payload) channel = team_topic.slack_channel topics = ", ".join(team_topic.topics) @@ -34,16 +35,16 @@ def users(self, expiring_users: MutableMapping[str, list[User]], slack_channels: """Poke the teams with users with expiring credentials""" for team, users in expiring_users.items(): channel = slack_channels[team] - payload = create_user_payload(team, channel, users, self.settings.main_project) + payload = create_user_payload(team, channel, users, self._settings.main_project, self._cluster_name) self.post_payload(payload) usernames = ", ".join(user.username for user in users) LOG.info("Notified %s about expiring users: %s", channel, usernames) def post_payload(self, payload): - if self.settings.webhook_enabled and self.settings.webhook_url is not None: + if self._settings.webhook_enabled and self._settings.webhook_url is not None: data = dataclasses.asdict(payload) with self._latency.time(): - resp = SESSION.post(self.settings.webhook_url, json=data) + resp = SESSION.post(self._settings.webhook_url, json=data) if LOG.isEnabledFor(logging.DEBUG): dumped = dump.dump_all(resp).decode('utf-8') LOG.debug(dumped) @@ -53,6 +54,7 @@ def post_payload(self, payload): if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) - tt = TeamTopic("aura", "#pig-aiven", frozenset(("aura.test-topic", "aura.topic-test", "aura.probably-a-test-too"))) - poke = Poke(Settings()) + tt = TeamTopic("nais", "#jk-tullekanal", + frozenset(("nais.test-topic", "nais.topic-test", "nais.probably-a-test-too"))) + poke = Poke(Settings(), "test") poke.topics([tt]) diff --git a/aiven_poke/slack/users.py b/aiven_poke/slack/users.py index c815b46..27b63d1 100644 --- a/aiven_poke/slack/users.py +++ b/aiven_poke/slack/users.py @@ -1,11 +1,12 @@ import textwrap -from .payload import Payload, Attachment, Color, Header, Text, TextType, TextSection, FieldsSection +from .payload import Payload, Attachment, Color, Header, Text, TextType, TextSection +from ..models import ExpiringUser RENEW_DOC = "https://doc.nais.io/how-to-guides/persistence/kafka/renew-credentials-for-non-nais/" FALLBACK = " ".join(textwrap.dedent("""\ - Your team has users with expiring credentials in the {main_project} pool. + Your team has users with expiring credentials in the {main_project} pool, defined in {cluster_name}. Consult the nais documentation to find out how to fix these. The following users needs attention: {usernames} @@ -16,7 +17,8 @@ When the certificates are getting close to expiring, warnings are issued to avoid interruptions in service. The applications using the affected users needs to get new credentials before the expiry date. """).splitlines()) -USERS_HEADER = "*Found service users in the {main_project} pool belonging to team `{team}` with expiring credentials*" +USERS_HEADER = ("*Found service users defined in `{cluster_name}`, for the `{main_project}` pool," + " with expiring credentials*") SOLUTION_HEADER = "*Solution*" SOLUTION_PROTECTED = " ".join(textwrap.dedent(f"""\ For users used by legacy applications not running in the nais platform, consult the documentation on @@ -28,21 +30,37 @@ NEEDS_HELP = " ".join(textwrap.dedent("""\ If you need help, reach out in <#C5KUST8N6|nais> """).splitlines()) +USER_ROW = "`{username}`{protected}: Expires after {expires}" -def create_user_payload(team, channel, users, main_project): - users_header = USERS_HEADER.format(main_project=main_project, team=team) +def create_user_payload(team, channel, users: list[ExpiringUser], main_project, cluster_name): usernames = [user.username for user in users] - fallback_text = FALLBACK.format(main_project=main_project, usernames="\n* ".join(usernames)) + fallback_text = FALLBACK.format(main_project=main_project, usernames="\n* ".join(usernames), + cluster_name=cluster_name) + users_header = USERS_HEADER.format(main_project=main_project, team=team, cluster_name=cluster_name) + blocks = [ + Header(Text(TextType.PLAIN, MAIN_HEADER)), + TextSection(Text(TextType.MRKDWN, WHAT_IS_THIS)), + TextSection(Text(TextType.MRKDWN, users_header)), + ] + + rows = [] + for user in users: + params = { + "username": user.username, + "protected": " (non-nais)" if user.is_protected else "", + "expires": user.expiring_cert_not_valid_after_time, + } + rows.append(USER_ROW.format(**params)) + blocks.append(TextSection(Text(TextType.MRKDWN, "\n".join(rows)))) + + blocks.extend([ + TextSection(Text(TextType.MRKDWN, SOLUTION_HEADER)), + TextSection(Text(TextType.MRKDWN, SOLUTION_PROTECTED.format(team=team))), + TextSection(Text(TextType.MRKDWN, SOLUTION_NAIS_APP.format(team=team))), + TextSection(Text(TextType.MRKDWN, NEEDS_HELP)), + ]) + return Payload(channel, attachments=[ - Attachment(Color.WARNING, fallback_text, blocks=[ - Header(Text(TextType.PLAIN, MAIN_HEADER)), - TextSection(Text(TextType.MRKDWN, WHAT_IS_THIS)), - TextSection(Text(TextType.MRKDWN, users_header)), - FieldsSection([Text(TextType.MRKDWN, "`{}`".format(u)) for u in usernames]), - TextSection(Text(TextType.MRKDWN, SOLUTION_HEADER)), - TextSection(Text(TextType.MRKDWN, SOLUTION_PROTECTED.format(team=team))), - TextSection(Text(TextType.MRKDWN, SOLUTION_NAIS_APP.format(team=team))), - TextSection(Text(TextType.MRKDWN, NEEDS_HELP)), - ]) + Attachment(Color.WARNING, fallback_text, blocks=blocks) ]) diff --git a/tests/test_slack.py b/tests/test_slack.py index fc32ff6..f536ec9 100644 --- a/tests/test_slack.py +++ b/tests/test_slack.py @@ -22,6 +22,6 @@ def payload(self): def test_post_payload(self, payload): with mock.patch("aiven_poke.slack.SESSION") as m: settings = Settings(aiven_token="fake_token", webhook_url=WEBHOOK_URL) - poke = Poke(settings) + poke = Poke(settings, "test") poke.post_payload(payload) m.post.assert_called()