Skip to content

Commit

Permalink
Add more detailed output about users
Browse files Browse the repository at this point in the history
  • Loading branch information
mortenlj committed Mar 7, 2024
1 parent 3098da3 commit 84122a7
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 38 deletions.
33 changes: 26 additions & 7 deletions aiven_poke/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
import logging
from collections import defaultdict

from k8s.base import Model
from k8s.base import Model, Equality
from k8s.client import NotFound
from k8s.fields import Field
from k8s.models.common import ObjectMeta
from k8s.models.namespace import Namespace
from k8s.models.secret import Secret
from prometheus_client import Summary

from aiven_poke.settings import Settings
Expand Down Expand Up @@ -50,17 +51,21 @@ def __init__(self, settings: Settings):
self._latency = Summary("k8s_latency_seconds", "Kubernetes latency", ["action", "resource"])

@functools.lru_cache
def get_slack_channel(self, team):
if self._settings.override_slack_channel is not None:
return self._settings.override_slack_channel
def get_namespace(self, team):
try:
with self._latency.labels("get", "namespace").time():
namespace = Namespace.get(team)
return Namespace.get(team)
except NotFound:
if not team.startswith("team"):
return self.get_slack_channel("team" + team)
return self.get_namespace("team" + team)
if not team.startswith("team-"):
return self.get_slack_channel("team-" + team[4:])
return self.get_namespace("team-" + team[4:])

@functools.lru_cache
def get_slack_channel(self, team):
if self._settings.override_slack_channel is not None:
return self._settings.override_slack_channel
namespace = self.get_namespace(team)
annotations = namespace.metadata.annotations
slack_channel = annotations.get(SLACK_CHANNEL_KEY)
if not slack_channel:
Expand All @@ -80,3 +85,17 @@ def get_cluster_topics(self, gauge):
gauge.inc()
LOG.info("%d namespaces with topics found in cluster", len(namespaced_topics))
return namespaced_topics

@functools.lru_cache
def get_aiven_secrets_by_name(self, team):
namespace = self.get_namespace(team)
aiven_secrets_by_name = {}
with self._latency.labels("list", "secret").time():
secrets = Secret.find(namespace=namespace.metadata.name, labels={
"type": Equality("aivenator.aiven.nais.io")
})
for secret in secrets:
service_user = secret.metadata.annotations.get("kafka.aiven.nais.io/serviceUser")
if service_user:
aiven_secrets_by_name[service_user] = secret
return aiven_secrets_by_name
21 changes: 16 additions & 5 deletions aiven_poke/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from aiven_poke.aiven import AivenKafka
from aiven_poke.cluster import Cluster
from aiven_poke.endpoints import start_server
from aiven_poke.models import TeamTopic
from aiven_poke.models import TeamTopic, ExpiringUser
from aiven_poke.settings import Settings
from aiven_poke.slack import Poke

Expand Down Expand Up @@ -50,6 +50,7 @@ def main():
_init_logging()
settings = Settings()
cluster = Cluster(settings)
cluster_name = os.getenv("NAIS_CLUSTER_NAME", "unknown")
server = start_server()
exit_code = 0
try:
Expand All @@ -60,14 +61,14 @@ def main():
topic_gauge = Gauge("number_of_topics", "Number of topics found", ["source"])
team_gauge = Gauge("number_of_teams", "Number of teams with topics", ["source"])
expiring_users_gauge = Gauge("number_of_expiring_users",
"Number of service users with expiring credentials")
"Number of service users with expiring credentials", ["cluster"])

aiven = AivenKafka(settings.aiven_token.get_secret_value(), settings.main_project)
poke = Poke(settings)
poke = Poke(settings, cluster_name)
if settings.topics_enabled:
handle_topics(aiven, poke, cluster, team_gauge, topic_gauge)
if settings.expiring_users_enabled:
handle_expiring_users(aiven, poke, cluster, expiring_users_gauge)
handle_expiring_users(aiven, poke, cluster, expiring_users_gauge.labels(cluster_name))

if settings.push_gateway_address:
push_to_gateway(settings.push_gateway_address, job='aiven-poke', registry=REGISTRY)
Expand All @@ -83,14 +84,24 @@ def main():
sys.exit(exit_code)


def _is_secret_protected(secret):
return secret.metadata.annotations.get("aivenator.aiven.nais.io/protected") == "true"


def handle_expiring_users(aiven, poke, cluster, expiring_users_gauge):
users = aiven.get_users()
expiring_users_per_team = defaultdict(list)
count = 0
for user in users:
if user.expiring_cert_not_valid_after_time is None:
continue
expiring_users_per_team[user.team].append(user)
aiven_secrets = cluster.get_aiven_secrets_by_name(user.team)
secret = aiven_secrets.get(user.username)
if not secret:
continue
expiring_user = ExpiringUser(user.team, user.username, _is_secret_protected(secret),
user.expiring_cert_not_valid_after_time)
expiring_users_per_team[user.team].append(expiring_user)
count += 1
expiring_users_gauge.set(count)

Expand Down
11 changes: 10 additions & 1 deletion aiven_poke/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataclasses import dataclass
from datetime import datetime


@dataclass(frozen=True)
Expand All @@ -8,4 +9,12 @@ class TeamTopic:
topics: frozenset[str]

def __post_init__(self):
object.__setattr__(self, "topics", frozenset(self.topics))
object.__setattr__(self, "topics", frozenset(self.topics))


@dataclass
class ExpiringUser:
team: str
username: str
is_protected: bool
expiring_cert_not_valid_after_time: datetime
18 changes: 10 additions & 8 deletions aiven_poke/slack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@


class Poke:
def __init__(self, settings: Settings):
self.settings = settings
def __init__(self, settings: Settings, cluster_name):
self._cluster_name = cluster_name
self._settings = settings
self._latency = Summary("slack_request_latency_seconds", "Slack requests latency")

def topics(self, missing: Iterable[TeamTopic]):
"""Poke the teams with topics missing in the cluster"""
for team_topic in missing:
payload = create_topic_payload(team_topic, self.settings.main_project)
payload = create_topic_payload(team_topic, self._settings.main_project)
self.post_payload(payload)
channel = team_topic.slack_channel
topics = ", ".join(team_topic.topics)
Expand All @@ -34,16 +35,16 @@ def users(self, expiring_users: MutableMapping[str, list[User]], slack_channels:
"""Poke the teams with users with expiring credentials"""
for team, users in expiring_users.items():
channel = slack_channels[team]
payload = create_user_payload(team, channel, users, self.settings.main_project)
payload = create_user_payload(team, channel, users, self._settings.main_project, self._cluster_name)
self.post_payload(payload)
usernames = ", ".join(user.username for user in users)
LOG.info("Notified %s about expiring users: %s", channel, usernames)

def post_payload(self, payload):
if self.settings.webhook_enabled and self.settings.webhook_url is not None:
if self._settings.webhook_enabled and self._settings.webhook_url is not None:
data = dataclasses.asdict(payload)
with self._latency.time():
resp = SESSION.post(self.settings.webhook_url, json=data)
resp = SESSION.post(self._settings.webhook_url, json=data)
if LOG.isEnabledFor(logging.DEBUG):
dumped = dump.dump_all(resp).decode('utf-8')
LOG.debug(dumped)
Expand All @@ -53,6 +54,7 @@ def post_payload(self, payload):

if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
tt = TeamTopic("aura", "#pig-aiven", frozenset(("aura.test-topic", "aura.topic-test", "aura.probably-a-test-too")))
poke = Poke(Settings())
tt = TeamTopic("nais", "#jk-tullekanal",
frozenset(("nais.test-topic", "nais.topic-test", "nais.probably-a-test-too")))
poke = Poke(Settings(), "test")
poke.topics([tt])
50 changes: 34 additions & 16 deletions aiven_poke/slack/users.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import textwrap

from .payload import Payload, Attachment, Color, Header, Text, TextType, TextSection, FieldsSection
from .payload import Payload, Attachment, Color, Header, Text, TextType, TextSection
from ..models import ExpiringUser

RENEW_DOC = "https://doc.nais.io/how-to-guides/persistence/kafka/renew-credentials-for-non-nais/"

FALLBACK = " ".join(textwrap.dedent("""\
Your team has users with expiring credentials in the {main_project} pool.
Your team has users with expiring credentials in the {main_project} pool, defined in {cluster_name}.
Consult the nais documentation to find out how to fix these.
The following users needs attention:
{usernames}
Expand All @@ -16,7 +17,8 @@
When the certificates are getting close to expiring, warnings are issued to avoid interruptions in service.
The applications using the affected users needs to get new credentials before the expiry date.
""").splitlines())
USERS_HEADER = "*Found service users in the {main_project} pool belonging to team `{team}` with expiring credentials*"
USERS_HEADER = ("*Found service users defined in `{cluster_name}`, for the `{main_project}` pool,"
" with expiring credentials*")
SOLUTION_HEADER = "*Solution*"
SOLUTION_PROTECTED = " ".join(textwrap.dedent(f"""\
For users used by legacy applications not running in the nais platform, consult the documentation on
Expand All @@ -28,21 +30,37 @@
NEEDS_HELP = " ".join(textwrap.dedent("""\
If you need help, reach out in <#C5KUST8N6|nais>
""").splitlines())
USER_ROW = "`{username}`{protected}: Expires after {expires}"


def create_user_payload(team, channel, users, main_project):
users_header = USERS_HEADER.format(main_project=main_project, team=team)
def create_user_payload(team, channel, users: list[ExpiringUser], main_project, cluster_name):
usernames = [user.username for user in users]
fallback_text = FALLBACK.format(main_project=main_project, usernames="\n* ".join(usernames))
fallback_text = FALLBACK.format(main_project=main_project, usernames="\n* ".join(usernames),
cluster_name=cluster_name)
users_header = USERS_HEADER.format(main_project=main_project, team=team, cluster_name=cluster_name)
blocks = [
Header(Text(TextType.PLAIN, MAIN_HEADER)),
TextSection(Text(TextType.MRKDWN, WHAT_IS_THIS)),
TextSection(Text(TextType.MRKDWN, users_header)),
]

rows = []
for user in users:
params = {
"username": user.username,
"protected": " (non-nais)" if user.is_protected else "",
"expires": user.expiring_cert_not_valid_after_time,
}
rows.append(USER_ROW.format(**params))
blocks.append(TextSection(Text(TextType.MRKDWN, "\n".join(rows))))

blocks.extend([
TextSection(Text(TextType.MRKDWN, SOLUTION_HEADER)),
TextSection(Text(TextType.MRKDWN, SOLUTION_PROTECTED.format(team=team))),
TextSection(Text(TextType.MRKDWN, SOLUTION_NAIS_APP.format(team=team))),
TextSection(Text(TextType.MRKDWN, NEEDS_HELP)),
])

return Payload(channel, attachments=[
Attachment(Color.WARNING, fallback_text, blocks=[
Header(Text(TextType.PLAIN, MAIN_HEADER)),
TextSection(Text(TextType.MRKDWN, WHAT_IS_THIS)),
TextSection(Text(TextType.MRKDWN, users_header)),
FieldsSection([Text(TextType.MRKDWN, "`{}`".format(u)) for u in usernames]),
TextSection(Text(TextType.MRKDWN, SOLUTION_HEADER)),
TextSection(Text(TextType.MRKDWN, SOLUTION_PROTECTED.format(team=team))),
TextSection(Text(TextType.MRKDWN, SOLUTION_NAIS_APP.format(team=team))),
TextSection(Text(TextType.MRKDWN, NEEDS_HELP)),
])
Attachment(Color.WARNING, fallback_text, blocks=blocks)
])
2 changes: 1 addition & 1 deletion tests/test_slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ def payload(self):
def test_post_payload(self, payload):
with mock.patch("aiven_poke.slack.SESSION") as m:
settings = Settings(aiven_token="fake_token", webhook_url=WEBHOOK_URL)
poke = Poke(settings)
poke = Poke(settings, "test")
poke.post_payload(payload)
m.post.assert_called()

0 comments on commit 84122a7

Please sign in to comment.