diff --git a/health-check/.gitignore b/health-check/.gitignore index c2d9455011c1..ca6c93b7c530 100644 --- a/health-check/.gitignore +++ b/health-check/.gitignore @@ -10,3 +10,7 @@ __pycache__ **/config/grafana/dashboards/supportconfig_with_logs.json .vscode/ + +# Generated files +**/exporters/config.yml +**/exporters/metrics diff --git a/health-check/src/uyuni_health_check/config.py b/health-check/src/uyuni_health_check/config.py index 970046522004..27b015a83340 100644 --- a/health-check/src/uyuni_health_check/config.py +++ b/health-check/src/uyuni_health_check/config.py @@ -1,3 +1,9 @@ +""" +Module that contains functionality related to reading `config.toml` +and getting paths used for configuration, templating, or building +containers +""" + import functools import os from typing import Any, Dict, List @@ -10,12 +16,16 @@ CONFIG_DIR = os.path.join(BASE_DIR, "config") TEMPLATES_DIR = os.path.join(CONFIG_DIR, "templates") CONTAINERS_DIR = os.path.join(BASE_DIR, "containers") -CONFIG_TOML_PATH = os.environ.get("HEALTH_CHECK_TOML", os.path.join(BASE_DIR, "config.toml")) +CONFIG_TOML_PATH = os.environ.get( + "HEALTH_CHECK_TOML", os.path.join(BASE_DIR, "config.toml") +) + @functools.lru_cache def _init_jinja_env() -> jinja2.Environment: return jinja2.Environment(loader=jinja2.FileSystemLoader(TEMPLATES_DIR)) + @functools.lru_cache def parse_config() -> Dict: if not os.path.exists(CONFIG_TOML_PATH): @@ -25,56 +35,67 @@ def parse_config() -> Dict: conf = tomli.load(f) return conf + def get_json_template_filepath(json_relative_path: str) -> str: return os.path.join(TEMPLATES_DIR, json_relative_path) + def load_jinja_template(template: str) -> jinja2.Template: return _init_jinja_env().get_template(template) + def load_dockerfile_dir(dockerfile_dir: str) -> str: return os.path.join(CONTAINERS_DIR, dockerfile_dir) + def get_config_dir_path(component: str) -> str: return os.path.join(CONFIG_DIR, component) -def load_prop(property: str) -> Any: + +def load_prop(property_path: str) -> Any: res = parse_config().copy() - for prop_part in property.split('.'): + for prop_part in property_path.split("."): try: res = res[prop_part] except Exception as e: raise ValueError( - f"Invalid config lookup ({property}); trying to get {prop_part} from {res}" + f"Invalid config lookup ({property_path}); trying to get {prop_part} from {res}" ) from e return res + def write_config(component: str, config_file_path: str, content: str, is_json=False): basedir = Path(get_config_dir_path(component)) if not basedir.exists(): basedir.mkdir(parents=True) file_path = os.path.join(basedir, config_file_path) - with open(file_path, "w") as file: + with open(file_path, "w", encoding="UTF-8") as file: if is_json: json.dump(content, file, indent=4) else: file.write(content) + def get_config_file_path(component): return os.path.join(get_config_dir_path(component), "config.yaml") + def get_sources_dir(component): return os.path.join(BASE_DIR, component) + def get_grafana_config_dir(): return os.path.join(CONFIG_DIR, "grafana") + def get_prometheus_config_dir(): return os.path.join(CONFIG_DIR, "prometheus") + def get_all_container_image_names() -> List[str]: res = [] conf = parse_config().copy() for section in conf.values(): if "image" in section: res.append(section.get("image")) - return res \ No newline at end of file + return res diff --git a/health-check/src/uyuni_health_check/config.toml b/health-check/src/uyuni_health_check/config.toml index 1334a4b9c3f4..b65595c07135 100644 --- a/health-check/src/uyuni_health_check/config.toml +++ b/health-check/src/uyuni_health_check/config.toml @@ -10,10 +10,6 @@ container_name = "health_check_loki" jobs = ["cobbler", "postgresql", "rhn", "apache"] image = "docker.io/grafana/loki" -[logcli] -logcli_container_name = "uyuni_health_check_logcli" -logcli_image_name = "logcli" - [exporter] container_name = "health_check_supportconfig_exporter" image = "localhost/supportconfig-exporter" diff --git a/health-check/src/uyuni_health_check/containers/logcli/Dockerfile b/health-check/src/uyuni_health_check/containers/logcli/Dockerfile deleted file mode 100644 index bc962bd3bb62..000000000000 --- a/health-check/src/uyuni_health_check/containers/logcli/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM opensuse/leap:latest - - -COPY logcli-linux-amd64 /usr/bin/logcli -RUN chmod a+x /usr/bin/logcli - -ENTRYPOINT ["logcli"] diff --git a/health-check/src/uyuni_health_check/containers/manager.py b/health-check/src/uyuni_health_check/containers/manager.py index bf557e400e70..8cc9ab0e7a25 100644 --- a/health-check/src/uyuni_health_check/containers/manager.py +++ b/health-check/src/uyuni_health_check/containers/manager.py @@ -1,3 +1,5 @@ +"""Module that contains podman-related functionality""" + from typing import List from uyuni_health_check import config from uyuni_health_check.utils import run_command, console @@ -12,12 +14,18 @@ def podman(cmd: List[str], verbose=False, raise_exc=True) -> List: return run_command(["podman"] + cmd, verbose, raise_exc) -def build_image(name: str, containerfile_path: str, build_args: List[str] | None = None, verbose: bool = False) -> None: +def build_image( + name: str, + containerfile_path: str, + build_args: List[str] | None = None, + verbose: bool = False, +) -> None: """ Build a container image """ podman_args = ["build", "-t", f"{name}"] if build_args: + # pylint: disable-next=expression-not-assigned [podman_args.append(f'--build-arg="{param}"') for param in build_args] podman_args.append(containerfile_path) @@ -28,7 +36,11 @@ def image_exists(image): """ Check if the image is present in podman images result """ - stdout, _, _ = podman(["images", "--quiet", "-f", f"reference={image}"], verbose=False, raise_exc=False) + stdout, _, _ = podman( + ["images", "--quiet", "-f", f"reference={image}"], + verbose=False, + raise_exc=False, + ) return stdout.strip() != "" @@ -36,7 +48,9 @@ def network_exists(network): """ Check if the podman network is up and running """ - _, _, returncode = podman(["network", "exists", f"{network}"], verbose=False, raise_exc=False) + _, _, returncode = podman( + ["network", "exists", f"{network}"], verbose=False, raise_exc=False + ) return returncode == 0 diff --git a/health-check/src/uyuni_health_check/containers/promtail/complete_checker.py b/health-check/src/uyuni_health_check/containers/promtail/complete_checker.py index 733535c45fa6..1cfd5ffb2f33 100644 --- a/health-check/src/uyuni_health_check/containers/promtail/complete_checker.py +++ b/health-check/src/uyuni_health_check/containers/promtail/complete_checker.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +"""Check when Promptail has finished processing logs""" import os import re @@ -8,10 +9,10 @@ import logging -path_list= "" +path_list = "" positions_file = "/tmp/positions.yaml" -logging.basicConfig(filename='/var/log/complete_checker.log', level=logging.INFO) +logging.basicConfig(filename="/var/log/complete_checker.log", level=logging.INFO) logger = logging.getLogger(__name__) @@ -23,42 +24,40 @@ def complete() -> bool: time.sleep(1) logger.info("the positions file is present!") - while True: - with open(positions_file) as f: + with open(positions_file, encoding="UTF-8") as f: logger.info("before opening positions file") data = f.read() - #fpath_pos_list = re.findall(r'([\w\/\.-]+\.log(?:\.gz)?)\s*:\s*"(\d+)"', data) fpath_pos_list = re.findall(r'([\w\/\.-]+\.log)\s*:\s*"(\d+)"', data) - logger.info(f"matches in path and pos list: {fpath_pos_list}") + logger.info("matches in path and pos list: %s", fpath_pos_list) if fpath_pos_list: break time.sleep(5) - - with open(positions_file) as f: + + with open(positions_file, encoding="UTF-8") as f: for fpath_size in fpath_pos_list: log_file_path = fpath_size[0] log_file_pos = int(fpath_size[1]) file_size = os.path.getsize(log_file_path) if log_file_pos != file_size: - logging.info(f"Final of file not reached yet for: {log_file_path}") + logging.info("Final of file not reached yet for: %s", log_file_path) return False - + logging.info("Promtail completed processing!") return True -def push_flag_to_loki(loki_url="http://health_check_loki:3100", job_name="promtail-complete-job", flag="complete"): + +def push_flag_to_loki( + loki_url="http://health_check_loki:3100", + job_name="promtail-complete-job", + flag="complete", +): log_entry = { "streams": [ { - "stream": { - "job": job_name, - "flag": flag - }, - "values": [ - [str(int(time.time() * 1e9)), "Promtail finished!d"] - ] + "stream": {"job": job_name, "flag": flag}, + "values": [[str(int(time.time() * 1e9)), "Promtail finished!d"]], } ] } @@ -66,7 +65,7 @@ def push_flag_to_loki(loki_url="http://health_check_loki:3100", job_name="promta response = requests.post( f"{loki_url}/loki/api/v1/push", headers={"Content-Type": "application/json"}, - data=json.dumps(log_entry) + data=json.dumps(log_entry), ) if response.status_code == 204: @@ -74,15 +73,15 @@ def push_flag_to_loki(loki_url="http://health_check_loki:3100", job_name="promta else: print("Failed to push log to Loki:", response.text) + if __name__ == "__main__": - logging.basicConfig(filename='/var/log/complete_checker.log', level=logging.INFO) + logging.basicConfig(filename="/var/log/complete_checker.log", level=logging.INFO) logger = logging.getLogger(__name__) - logger.info('Started') - while(1): + logger.info("Started") + while True: if complete(): break time.sleep(10) push_flag_to_loki() - diff --git a/health-check/src/uyuni_health_check/containers/promtail/run.py b/health-check/src/uyuni_health_check/containers/promtail/run.py index 96b6832d82c4..c3c4352ace27 100644 --- a/health-check/src/uyuni_health_check/containers/promtail/run.py +++ b/health-check/src/uyuni_health_check/containers/promtail/run.py @@ -1,19 +1,28 @@ #!/usr/bin/env python3.11 +"""Manage the Promptail process""" import subprocess import time -import os -import signal + def is_process_running(process_name): try: - subprocess.run(["pgrep", "-f", process_name], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + subprocess.run( + ["pgrep", "-f", process_name], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) return True except subprocess.CalledProcessError: return False + def launch_process(command): - return subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return subprocess.Popen( + command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + promtail_command = "promtail --config.file=/etc/promtail/config.yml" promtail_process = launch_process(promtail_command) @@ -25,6 +34,6 @@ def launch_process(command): if not is_process_running("promtail"): print("Promtail process is not running. Relaunching...") promtail_process = launch_process(promtail_command) - + # Delay between checks to prevent constant CPU usage time.sleep(10) diff --git a/health-check/src/uyuni_health_check/exporters/exporter.py b/health-check/src/uyuni_health_check/exporters/exporter.py index 02e5a200429f..0c93542504b9 100644 --- a/health-check/src/uyuni_health_check/exporters/exporter.py +++ b/health-check/src/uyuni_health_check/exporters/exporter.py @@ -1,3 +1,5 @@ +"""Module that manages the supportconfig exporter container""" + from uyuni_health_check import config from uyuni_health_check.utils import console from uyuni_health_check.containers.manager import ( @@ -10,12 +12,12 @@ def prepare_exporter(supportconfig_path: str, verbose: bool): """ - Build the prometheus exporter image and deploy it on the server + Build the exporter image and deploy it on the server :param server: the Uyuni server to deploy the exporter on """ - exporter_name = config.load_prop('exporter.container_name') - image = config.load_prop('exporter.image') + exporter_name = config.load_prop("exporter.container_name") + image = config.load_prop("exporter.image") console.log("[bold]Deploying supportconfig exporter") if container_is_running(f"{exporter_name}"): @@ -26,11 +28,7 @@ def prepare_exporter(supportconfig_path: str, verbose: bool): if not image_exists(image): console.log(f"[bold]Building {image} image") - build_image( - image, - config.load_dockerfile_dir("exporter"), - verbose=verbose - ) + build_image(image, config.load_dockerfile_dir("exporter"), verbose=verbose) console.log(f"[green]The {image} image was built successfully") console.log(f"[bold]Deploying {exporter_name} container") @@ -39,7 +37,7 @@ def prepare_exporter(supportconfig_path: str, verbose: bool): "--replace", "--detach", "--network", - config.load_prop('podman.network_name'), + config.load_prop("podman.network_name"), "--publish", "9000:9000", "--volume", diff --git a/health-check/src/uyuni_health_check/exporters/static_metrics.py b/health-check/src/uyuni_health_check/exporters/static_metrics.py index d009cd733301..d432eb07c9b5 100644 --- a/health-check/src/uyuni_health_check/exporters/static_metrics.py +++ b/health-check/src/uyuni_health_check/exporters/static_metrics.py @@ -1,13 +1,15 @@ +"""Module for collecting metrics that are single-value integers""" + import os import re from abc import ABC, abstractmethod # SupportConfigMetricsCollector automatically exposes all of the following metrics -# as instance variables and merges them in the metrics.json +# as instance variables and merges them in the metrics.json metrics_config = { "tomcat_xmx_size": { "filepath": "spacewalk-debug/conf/tomcat/tomcat/conf.d/tomcat_java_opts.conf", - "pattern": r'-Xmx(\d)+([kKmMgG])', + "pattern": r"-Xmx(\d)+([kKmMgG])", }, "max_threads": { "filepath": "spacewalk-debug/conf/tomcat/tomcat/server.xml", @@ -19,85 +21,86 @@ }, "java_salt_batch_size": { "filepath": "spacewalk-debug/conf/rhn/rhn/rhn.conf", - "pattern": r'^java.salt_batch_size\s*=\s*(\d+)\s*$', + "pattern": r"^java.salt_batch_size\s*=\s*(\d+)\s*$", "default": 200, }, "taskomatic_channel_repodata_workers": { "filepath": "spacewalk-debug/conf/rhn/rhn/rhn.conf", - "pattern": r'^java.taskomatic_channel_repodata_workers\s*=\s*(\d+)\s*$', + "pattern": r"^java.taskomatic_channel_repodata_workers\s*=\s*(\d+)\s*$", "default": 2, }, "org_quartz_threadpool_threadcount": { "filepath": "spacewalk-debug/conf/rhn/rhn/rhn.conf", - "pattern": r'^org.quartz.threadPool.threadCount\s*=\s*(\d+)\s*$', + "pattern": r"^org.quartz.threadPool.threadCount\s*=\s*(\d+)\s*$", "default": 20, }, "org_quartz_scheduler_idlewaittime": { "filepath": "spacewalk-debug/conf/rhn/rhn/rhn.conf", - "pattern": r'^org.quartz.scheduler.idleWaitTime\s*=\s*(\d+)\s*$', + "pattern": r"^org.quartz.scheduler.idleWaitTime\s*=\s*(\d+)\s*$", "default": 5000, }, "taskomatic_minion_action_executor_parallel_threads": { "filepath": "spacewalk-debug/conf/rhn/rhn/rhn.conf", - "pattern": r'^taskomatic.minion_action_executor.parallel_threads\s*=\s*(\d+)\s*$', + "pattern": r"^taskomatic.minion_action_executor.parallel_threads\s*=\s*(\d+)\s*$", "default": 1, }, "java_message_queue_thread_pool_size": { "filepath": "spacewalk-debug/conf/rhn/rhn/rhn.conf", - "pattern": r'^java.message_queue_thread_pool_size\s*=\s*(\d+)\s*$', + "pattern": r"^java.message_queue_thread_pool_size\s*=\s*(\d+)\s*$", "default": 5, }, "salt_thread_pool": { "filepath": "plugin-saltconfiguration.txt", - "pattern": r'^thread_pool\s*:\s*(\d+)\s*$', + "pattern": r"^thread_pool\s*:\s*(\d+)\s*$", "default": 100, }, "salt_pub_hwm": { "filepath": "plugin-saltconfiguration.txt", - "pattern": r'^pub_hwm\s*:\s*(\d+)\s*$', + "pattern": r"^pub_hwm\s*:\s*(\d+)\s*$", "default": 1000, }, "cpu_count": { "filepath": "hardware.txt", - "pattern": r'CPU\(s\):\s+(\d+)', + "pattern": r"CPU\(s\):\s+(\d+)", "label": "hw", }, "mem_total": { "filepath": "basic-health-check.txt", - "pattern": r'Mem:\s+(\d+)\s+\d+\s+\d+', + "pattern": r"Mem:\s+(\d+)\s+\d+\s+\d+", "label": "memory", }, "mem_used": { "filepath": "basic-health-check.txt", - "pattern": r'Mem:\s+\d+\s+(\d+)\s+\d+', + "pattern": r"Mem:\s+\d+\s+(\d+)\s+\d+", "label": "memory", }, "mem_free": { "filepath": "basic-health-check.txt", - "pattern": r'Mem:\s+\d+\s+\d+\s+(\d+)', + "pattern": r"Mem:\s+\d+\s+\d+\s+(\d+)", "label": "memory", }, "swap_total": { "filepath": "basic-health-check.txt", - "pattern": r'Swap:\s+(\d+)\s+\d+\s+\d+', + "pattern": r"Swap:\s+(\d+)\s+\d+\s+\d+", "label": "memory", }, "swap_used": { "filepath": "basic-health-check.txt", - "pattern": r'Swap:\s+\d+\s+(\d+)\s+\d+', + "pattern": r"Swap:\s+\d+\s+(\d+)\s+\d+", "label": "memory", }, "swap_free": { "filepath": "basic-health-check.txt", - "pattern": r'Swap:\s+\d+\s+\d+\s+(\d+)', + "pattern": r"Swap:\s+\d+\s+\d+\s+(\d+)", "label": "memory", }, "major_version": { "filepath": "basic-environment.txt", - "pattern": r'^SUSE Manager release (\d)' - } + "pattern": r"^SUSE Manager release (\d)", + }, } + class StaticMetric(ABC): def __init__(self, name, supportconfig_path, filepath): self.name = name @@ -111,14 +114,21 @@ def is_present(self): def get_value(self): pass + class LogFileStaticMetric(StaticMetric): - def __init__(self, name, supportconfig_path, filepath, pattern, default = None): + """ + A representation of a single metric that is captured by a regex and is an integer. + """ + + def __init__(self, name, supportconfig_path, filepath, pattern, default=None): super().__init__(name, supportconfig_path, filepath) self.pattern = pattern self.default = default def get_value(self): - with open(os.path.join(self.supportconfig_path, self.filepath)) as f: + with open( + os.path.join(self.supportconfig_path, self.filepath), encoding="UTF-8" + ) as f: content = f.read() pattern = re.compile(self.pattern, flags=re.MULTILINE) match = re.search(pattern, content) @@ -131,25 +141,30 @@ def get_value(self): xmx_unit = match.group(2) if match else None if xmx_value is None or xmx_unit is None: return 0 - if xmx_unit == 'm' or xmx_unit == 'M': + if xmx_unit == "m" or xmx_unit == "M": return int(xmx_value) - if xmx_unit == 'k' or xmx_unit == 'K': + if xmx_unit == "k" or xmx_unit == "K": return int(xmx_value) * 1024 - if xmx_unit == 'g' or xmx_unit == 'G': + if xmx_unit == "g" or xmx_unit == "G": return int(xmx_value) * 1024 * 1024 else: return int(match.group(1)) - def is_present(self): - return super().is_present() class StaticMetricFactory: @staticmethod def create_metric(name, supportconfig_path, filepath, pattern, default=None): return LogFileStaticMetric(name, supportconfig_path, filepath, pattern, default) + def create_static_metrics_collection(supportconfig_path): return { - name: StaticMetricFactory.create_metric(name, supportconfig_path, config["filepath"], config["pattern"], config.get("default")) + name: StaticMetricFactory.create_metric( + name, + supportconfig_path, + config["filepath"], + config["pattern"], + config.get("default"), + ) for name, config in metrics_config.items() - } \ No newline at end of file + } diff --git a/health-check/src/uyuni_health_check/exporters/supportconfig_exporter.py b/health-check/src/uyuni_health_check/exporters/supportconfig_exporter.py index 2eb3dfe6c10e..45d1ec462abe 100644 --- a/health-check/src/uyuni_health_check/exporters/supportconfig_exporter.py +++ b/health-check/src/uyuni_health_check/exporters/supportconfig_exporter.py @@ -1,6 +1,10 @@ # SPDX-FileCopyrightText: 2023 SUSE LLC # # SPDX-License-Identifier: Apache-2.0 +""" +Main supportconfig exporter module that collects metrics into a json file +and serves the file by using an HTTP server. +""" from collections import defaultdict, namedtuple import os @@ -18,8 +22,9 @@ import static_metrics from static_metrics import metrics_config + def sigterm_handler(**kwargs): - del kwargs # unused + del kwargs # unused print("Detected SIGTERM. Exiting.") sys.exit(0) @@ -28,12 +33,16 @@ def sigterm_handler(**kwargs): class SupportConfigMetricsCollector: + """A collector that collects metrics, and exports them into a file.""" + def __init__(self, supportconfig_path=None): if not supportconfig_path: raise ValueError("A 'supportconfig_path' must be set via config.yml file") self.supportconfig_path = supportconfig_path - self.static_metrics_collection = static_metrics.create_static_metrics_collection(self.supportconfig_path) + self.static_metrics_collection = ( + static_metrics.create_static_metrics_collection(self.supportconfig_path) + ) self.disk_layout = [] self.salt_keys = [] self.salt_jobs = [] @@ -47,16 +56,16 @@ def __init__(self, supportconfig_path=None): self.fs_mount_out_of_space = -1 self.roles = [ { - 'name':'master', - 'value': 0, + "name": "master", + "value": 0, }, { - 'name':'proxy', - 'value': 0, + "name": "proxy", + "value": 0, }, { - 'name': 'client', - 'value': 0, + "name": "client", + "value": 0, }, ] self.parse() @@ -81,12 +90,14 @@ def parse(self): def parse_shared_buffers_to_mem_ratio(self, memory: int): """ - Parse the ratio of PostgreSQL's shared_buffers property to the amount of RAM. + Parse the ratio of PostgreSQL"s shared_buffers property to the amount of RAM. """ - filename = os.path.join(self.supportconfig_path, "spacewalk-debug/database/postgresql.conf") + filename = os.path.join( + self.supportconfig_path, "spacewalk-debug/database/postgresql.conf" + ) if not os.path.exists(filename): return - with open(filename) as f: + with open(filename, encoding="UTF-8") as f: content = f.read() regex = r"^shared_buffers\s*=\s*(\d+)(\w+)$" pattern = re.compile(regex, flags=re.MULTILINE) @@ -97,13 +108,15 @@ def parse_shared_buffers_to_mem_ratio(self, memory: int): buffer_unit = match.group(2) if not shared_buffers.isnumeric(): - print(f"Error when parsing shared_buffers; expected int, got: {shared_buffers}") + print( + f"Error when parsing shared_buffers; expected int, got: {shared_buffers}" + ) return shared_buffers = int(shared_buffers) match buffer_unit.lower(): case "kb": - ... # conversion done + ... # conversion done case "mb": shared_buffers *= 1024 case "gb": @@ -120,10 +133,12 @@ def parse_prefork_c_params(self): if not os.path.exists(filename): return - with open(filename) as f: + with open(filename, encoding="UTF-8") as f: content = f.read() # Parse contents of server-tuning.conf first - file_regex = r"(?s)/etc/apache2/server-tuning.conf(.*?)\[ Configuration File \]" + file_regex = ( + r"(?s)/etc/apache2/server-tuning.conf(.*?)\[ Configuration File \]" + ) # Then, parse the MaxRequestWorkers property from the prefork part max_req_regex = r"(?s)(.*?)MaxRequestWorkers\s+(\d+)$" # Finally, parse ServerLimit prop @@ -145,20 +160,24 @@ def parse_prefork_c_params(self): max_clients = max_req_match.groups()[-1] self.max_clients = int(max_clients) except ValueError: - print(f"Error when parsing max_clients; expected int, got: {max_clients}") + print( + f"Error when parsing max_clients; expected int, got: {max_clients}" + ) if server_lim_match: try: server_limit = server_lim_match.groups()[-1] self.server_limit = int(server_limit) except ValueError: - print(f"Error when parsing ServerLimit; expected int, got: {server_limit}") + print( + f"Error when parsing ServerLimit; expected int, got: {server_limit}" + ) def parse_roles(self): mapping = { - 'plugin-susemanagerclient.txt': 'client', - 'plugin-susemanagerproxy.txt': 'proxy', - 'plugin-susemanager.txt': 'master' + "plugin-susemanagerclient.txt": "client", + "plugin-susemanagerproxy.txt": "proxy", + "plugin-susemanager.txt": "master", } for file, role in mapping.items(): @@ -184,21 +203,17 @@ def _check_vol_params(self, mount: str, min_size_gb: int, fs: Dict) -> Dict: Can return multiple duplicate returns, e.g. when checking for two non-existent mounts /foo/bar and /foo/baz, both of which have common path /, then the result will contain two "/" entries. However, /foo/bar might have different min_size req., so - the result for the two entries might be different, e.g. '/foo/bar' reduces to '/', which - fulfills the min requirement for '/foo/bar' but not for '/foo/baz'. + the result for the two entries might be different, e.g. "/foo/bar" reduces to "/", which + fulfills the min requirement for "/foo/bar" but not for "/foo/baz". See self.parse_disk_layout for more information about disk layout parsing. - Return a dict with 'too_small' and 'out_of_space' props, where 1 = True and 0 = False. + Return a dict with "too_small" and "out_of_space" props, where 1 = True and 0 = False. This is necessary for Grafana alerts, which cannot work with True/False. """ - res = { - 'mount': mount, - 'too_small': -1, - 'out_of_space': -1 - } + res = {"mount": mount, "too_small": -1, "out_of_space": -1} if mount not in fs: - if mount == '/': + if mount == "/": return {} mount = os.path.dirname(mount) return self._check_vol_params(mount, min_size_gb, fs) @@ -210,17 +225,23 @@ def _check_vol_params(self, mount: str, min_size_gb: int, fs: Dict) -> Dict: # size format: 2T, or 560G size = disk["size"] if size[-1:].isnumeric(): - size, unit = size, 'n/a' + unit = "n/a" else: size, unit = float(size[:-1]), size[-1:] match unit.lower(): - case 'k': size /= (1024 * 1024) - case 'm': size /= 1024 - case 'g': ... # already in GB - case 't': size *= 1024 - case 'n/a': ... #no unit - case _: print(f"Error when parsing shared buffer unit: {unit}") + case "k": + size /= 1024 * 1024 + case "m": + size /= 1024 + case "g": + ... # already in GB + case "t": + size *= 1024 + case "n/a": + ... # no unit + case _: + print(f"Error when parsing shared buffer unit: {unit}") res["too_small"] = 1 if min_size_gb > size else 0 return res @@ -230,7 +251,7 @@ def _parse_path_data(self, mounts: Dict) -> Tuple: Reduce _check_vol_params output dictionary to "is there any mount that is too small" and "is there any mount that is running out of space". """ - too_small, out_of_space = 0,0 + too_small, out_of_space = 0, 0 for mount_data_list in mounts.values(): for mount in mount_data_list: too_small = max(mount["too_small"], too_small) @@ -249,27 +270,50 @@ def _gen_mounts_for_checking(self) -> Dict: 5: { "master": [], "proxy": [], - } + }, } - path_conf = namedtuple('PathConf', ['mount', 'min_size_gb', 'alternate_mount'], defaults=(None, None, None)) + path_conf = namedtuple( + "PathConf", + ["mount", "min_size_gb", "alternate_mount"], + defaults=(None, None, None), + ) # MLM 4.x, master - res[4]["master"].append(path_conf('/', 40)) - res[4]["master"].append(path_conf('/var/lib/pgsql', 50, '/pgsql_storage')) - res[4]["master"].append(path_conf('/var/spacewalk', 100, '/manager_storage')) - res[4]["master"].append(path_conf('/var/cache', 10)) + res[4]["master"].append(path_conf("/", 40)) + res[4]["master"].append(path_conf("/var/lib/pgsql", 50, "/pgsql_storage")) + res[4]["master"].append(path_conf("/var/spacewalk", 100, "/manager_storage")) + res[4]["master"].append(path_conf("/var/cache", 10)) # MLM 4.x, proxy - res[4]["proxy"].append(path_conf('/srv', 100)) - res[4]["proxy"].append(path_conf('/var/cache', 100)) + res[4]["proxy"].append(path_conf("/srv", 100)) + res[4]["proxy"].append(path_conf("/var/cache", 100)) # MLM 5.x, master - res[5]["master"].append(path_conf('/', 20)) - res[5]["master"].append(path_conf('/var/lib/containers/storage/volumes/var-pgsql', 50, '/pgsql_storage')) - res[5]["master"].append(path_conf('/var/lib/containers/storage/volumes/var-pgsql', 100, '/manager_storage')) - res[5]["master"].append(path_conf('/var/lib/containers/storage/volumes/var-cache', 10)) + res[5]["master"].append(path_conf("/", 20)) + res[5]["master"].append( + path_conf( + "/var/lib/containers/storage/volumes/var-pgsql", 50, "/pgsql_storage" + ) + ) + res[5]["master"].append( + path_conf( + "/var/lib/containers/storage/volumes/var-pgsql", 100, "/manager_storage" + ) + ) + res[5]["master"].append( + path_conf("/var/lib/containers/storage/volumes/var-cache", 10) + ) # MLM 5.x, proxy - res[5]["proxy"].append(path_conf('/', 40,)) - res[5]["proxy"].append(path_conf('/var/lib/containers/storage/volumes/srv-www', 100)) - res[5]["proxy"].append(path_conf('/var/lib/containers/storage/volumes/var-cache', 100)) + res[5]["proxy"].append( + path_conf( + "/", + 40, + ) + ) + res[5]["proxy"].append( + path_conf("/var/lib/containers/storage/volumes/srv-www", 100) + ) + res[5]["proxy"].append( + path_conf("/var/lib/containers/storage/volumes/var-cache", 100) + ) return res def check_space_on_fs(self): @@ -282,30 +326,42 @@ def check_space_on_fs(self): role = [role for role in self.roles if role["value"] == 1] if not role: - print("Cannot determine filesystem requirements; cannot determine server role (master, minion, proxy?)") + print( + "Cannot determine filesystem requirements; cannot determine server role (master, minion, proxy?)" + ) return role = role.pop()["name"] - paths = self._gen_mounts_for_checking().get(self.major_version, {}).get(role, {}) + paths = ( + self._gen_mounts_for_checking().get(self.major_version, {}).get(role, {}) + ) if not paths: print("Cannot determine filesystem requirements") return for path in paths: - mount = path.alternate_mount if path.alternate_mount and (path.alternate_mount in fs) else path.mount + mount = ( + path.alternate_mount + if path.alternate_mount and (path.alternate_mount in fs) + else path.mount + ) fs_obj = self._check_vol_params(mount, path.min_size_gb, fs) if not fs_obj: print(f"Could not find {mount}") continue mounts[fs_obj["mount"]].append(fs_obj) - self.fs_mount_insufficient, self.fs_mount_out_of_space = self._parse_path_data(mounts) + self.fs_mount_insufficient, self.fs_mount_out_of_space = self._parse_path_data( + mounts + ) def parse_disk_layout(self): - diskinfo_path = os.path.join(self.supportconfig_path, "spacewalk-debug/diskinfo") + diskinfo_path = os.path.join( + self.supportconfig_path, "spacewalk-debug/diskinfo" + ) if not os.path.isfile(diskinfo_path): return - with open(diskinfo_path) as f: + with open(diskinfo_path, encoding="UTF-8") as f: # skip header next(f) for line in f: @@ -313,22 +369,27 @@ def parse_disk_layout(self): cols = line.split() if len(cols) < 6: continue - self.disk_layout.append({ - "mounted on": cols[5], - "size": cols[1], - "available": cols[3], - "use %": cols[4], - "filesystem": cols[0], - }) + self.disk_layout.append( + { + "mounted on": cols[5], + "size": cols[1], + "available": cols[3], + "use %": cols[4], + "filesystem": cols[0], + } + ) def exists_salt_configuration_file(self): - if os.path.isfile(os.path.join(self.supportconfig_path, "plugin-saltconfiguration.txt")): + if os.path.isfile( + os.path.join(self.supportconfig_path, "plugin-saltconfiguration.txt") + ): return True - + def read_salt_configuration(self): content = None with open( - os.path.join(self.supportconfig_path, "plugin-saltconfiguration.txt") + os.path.join(self.supportconfig_path, "plugin-saltconfiguration.txt"), + encoding="UTF-8", ) as f: content = f.read() attrs_to_expose = [ @@ -341,19 +402,24 @@ def read_salt_configuration(self): for attr in attrs_to_expose: prop = { "name": attr, - "value": int(re.findall(f"^{attr}: ([0-9]+)$", content, re.MULTILINE)[-1]), + "value": int( + re.findall(f"^{attr}: ([0-9]+)$", content, re.MULTILINE)[-1] + ), } ret.append(prop) return ret def exists_salt_keys_file(self): - if os.path.isfile(os.path.join(self.supportconfig_path, "plugin-saltminionskeys.txt")): + if os.path.isfile( + os.path.join(self.supportconfig_path, "plugin-saltminionskeys.txt") + ): return True def read_salt_keys(self): content = None with open( - os.path.join(self.supportconfig_path, "plugin-saltminionskeys.txt") + os.path.join(self.supportconfig_path, "plugin-saltminionskeys.txt"), + encoding="UTF-8", ) as f: content = f.read() ret = [] @@ -372,30 +438,37 @@ def read_salt_keys(self): for i, key_type in enumerate(key_types): prop = { "name": key_type, - "value": parsed[i].strip().split("\n") if parsed[i].strip() else [] + "value": parsed[i].strip().split("\n") if parsed[i].strip() else [], } prop["length"] = len(prop["value"]) ret.append(prop) return ret - + def exists_salt_jobs_file(self): if os.path.isfile(os.path.join(self.supportconfig_path, "plugin-saltjobs.txt")): return True def read_salt_jobs(self): content = None - with open(os.path.join(self.supportconfig_path, "plugin-saltjobs.txt")) as f: + with open( + os.path.join(self.supportconfig_path, "plugin-saltjobs.txt"), + encoding="UTF-8", + ) as f: content = f.read() res = [] job_matches = re.findall( - "^'([0-9]+)':[\s\S]*?Function:\s+([\w.]+)[\s\S]*?StartTime:\s+(\d{4},\s\w{3}\s\d{2}\s\d{2}:\d{2}:\d{2}\.\d{6})", content, re.MULTILINE + r"^'([0-9]+)':[\s\S]*?Function:\s+([\w.]+)[\s\S]*?StartTime:\s+(\d{4},\s\w{3}\s\d{2}\s\d{2}:\d{2}:\d{2}\.\d{6})", + content, + re.MULTILINE, ) for job_match in job_matches: - res.append({ - "id": job_match[0], - "fun": job_match[1], - "start_time": job_match[2], - }) + res.append( + { + "id": job_match[0], + "fun": job_match[1], + "start_time": job_match[2], + } + ) return res def get_static_metrics(self): @@ -427,24 +500,36 @@ def merge_metrics(self): def _append_static_properties(self, res_dict: Dict): for metric_name, metric_obj in metrics_config.items(): - self._append_value_to_dict(res_dict, metric_obj.get("label", "config"), metric_name) + self._append_value_to_dict( + res_dict, metric_obj.get("label", "config"), metric_name + ) def parse_num_of_channels(self) -> None: """ Approximate the number of active channels by counting reposync log files modified within 24h of the most recently modified file. """ - reposync_log_path = Path(f"{self.supportconfig_path}/spacewalk-debug/rhn-logs/rhn/reposync") + reposync_log_path = Path( + f"{self.supportconfig_path}/spacewalk-debug/rhn-logs/rhn/reposync" + ) if not reposync_log_path.exists(): return - log_files = sorted(reposync_log_path.iterdir(), key=os.path.getmtime, reverse=True) + log_files = sorted( + reposync_log_path.iterdir(), key=os.path.getmtime, reverse=True + ) most_recent_mtime = os.path.getmtime(log_files[0]) one_day_seconds = 86400 - log_files = [log_f for log_f in log_files if most_recent_mtime - os.path.getmtime(log_f) <= one_day_seconds] + log_files = [ + log_f + for log_f in log_files + if most_recent_mtime - os.path.getmtime(log_f) <= one_day_seconds + ] self.num_of_channels = len(log_files) - def _append_value_to_dict(self, res_dict: Dict, dict_property: str, prop: str) -> None: + def _append_value_to_dict( + self, res_dict: Dict, dict_property: str, prop: str + ) -> None: """ Mutate res_dict such that any `self.prop`, if it exists, is added to res_dict[dict_property] as a {"name": prop, "value": self.prop} object. @@ -465,20 +550,22 @@ def write_metrics(self): with open(filename, "w", encoding="UTF-8") as f: json.dump(metrics, f, indent=4) + class Handler(http.server.SimpleHTTPRequestHandler): def __init__(self, *args, **kwargs): super().__init__(*args, directory="/opt/metrics", **kwargs) + def main(): print("Supportconfig Exporter started") - if os.path.exists("config.yml"): - with open("config.yml", "r") as config_file: - try: - config = yaml.safe_load(config_file) - port = int(config["port"]) - supportconfig_path = config["supportconfig_path"] - except yaml.YAMLError as error: - print(f"Could not load {config_file}: {error}") + if not os.path.exists("config.yml"): + print("Could not find config.yml") + exit(1) + + with open("config.yml", "r", encoding="UTF-8") as config_file: + config = yaml.safe_load(config_file) + port = int(config["port"]) + supportconfig_path = config["supportconfig_path"] collector = SupportConfigMetricsCollector(supportconfig_path) collector.write_metrics() @@ -486,5 +573,6 @@ def main(): print("serving at port", port) httpd.serve_forever() + if __name__ == "__main__": main() diff --git a/health-check/src/uyuni_health_check/grafana/grafana_manager.py b/health-check/src/uyuni_health_check/grafana/grafana_manager.py index cbe69c9c437c..4ed4db0d2d8e 100644 --- a/health-check/src/uyuni_health_check/grafana/grafana_manager.py +++ b/health-check/src/uyuni_health_check/grafana/grafana_manager.py @@ -1,14 +1,16 @@ +"""A module that manages the Grafana container""" + import json from uyuni_health_check import config from uyuni_health_check.utils import console from uyuni_health_check.containers.manager import ( - console, build_image, image_exists, container_is_running, podman, ) + def prepare_grafana(from_datetime: str, to_datetime: str, verbose: bool): name = config.load_prop("grafana.container_name") image = config.load_prop("grafana.image") @@ -20,9 +22,13 @@ def prepare_grafana(from_datetime: str, to_datetime: str, verbose: bool): build_grafana_image(image, verbose) grafana_cfg = config.get_config_dir_path("grafana") - console.log("GRAFANA CFG DIR: ",grafana_cfg) - grafana_dasthboard_template = config.get_json_template_filepath("grafana_dashboard/supportconfig_with_logs.template.json") - render_grafana_dashboard_cfg(grafana_dasthboard_template, from_datetime, to_datetime) + console.log("GRAFANA CFG DIR: ", grafana_cfg) + grafana_dasthboard_template = config.get_json_template_filepath( + "grafana_dashboard/supportconfig_with_logs.template.json" + ) + render_grafana_dashboard_cfg( + grafana_dasthboard_template, from_datetime, to_datetime + ) podman( [ @@ -45,9 +51,10 @@ def prepare_grafana(from_datetime: str, to_datetime: str, verbose: bool): name, image, ], - verbose + verbose, ) + def build_grafana_image(image: str, verbose: bool): if image_exists(image): return @@ -57,13 +64,18 @@ def build_grafana_image(image: str, verbose: bool): build_image(image, image_path, verbose=verbose) console.log(f"[green]The {image} image was built successfully") -def render_grafana_dashboard_cfg(grafana_dashboard_template, from_datetime, to_datetime): + +def render_grafana_dashboard_cfg( + grafana_dashboard_template, from_datetime, to_datetime +): """ Render grafana dashboard file """ - with open(grafana_dashboard_template, 'r') as f: + with open(grafana_dashboard_template, "r", encoding="UTF-8") as f: data = json.load(f) data["time"]["from"] = from_datetime data["time"]["to"] = to_datetime - config.write_config("grafana", "dashboards/supportconfig_with_logs.json", data, is_json=True) \ No newline at end of file + config.write_config( + "grafana", "dashboards/supportconfig_with_logs.json", data, is_json=True + ) diff --git a/health-check/src/uyuni_health_check/loki/logs_gatherer.py b/health-check/src/uyuni_health_check/loki/logs_gatherer.py deleted file mode 100644 index 78d50ec51b94..000000000000 --- a/health-check/src/uyuni_health_check/loki/logs_gatherer.py +++ /dev/null @@ -1,110 +0,0 @@ -from uyuni_health_check.containers.manager import podman -from uyuni_health_check.utils import run_command, HealthException, console -from uyuni_health_check.outputter import outputter -from uyuni_health_check import config -from rich.markdown import Markdown -from datetime import datetime, timedelta -from rich import print -import json -from json.decoder import JSONDecodeError - - -def show_full_error_logs(from_datetime, to_datetime, since, console: "Console", loki=None): - """ - Get and show the error logs stats - """ - print( - Markdown(f"- Getting summary of errors in logs") - ) - print() - query = f"{{job=~\".+\"}} |~ \"(?i)error|(?i)severe|(?i)critical|(?i)fatal\"" - stdout, stderr = query_loki(from_dt=from_datetime, to_dt=to_datetime, since = since, query=query) - lines = stdout.strip().split("\n") - json_objects = [] - - if len(lines[0]) != 0: - for line in lines: - try: - json_data = json.loads(line) - json_objects.append(json_data) - except json.JSONDecodeError as e: - console.print(f"[red]Failed to parse JSON:[/red] {e}") - console.print(f"[yellow]Raw output:[/yellow] {line}") - - combined_json = json.dumps(json_objects, indent=4) - outputter.print_paginated_json(combined_json) - - -def show_error_logs_stats(from_datetime, to_datetime, since, console: "Console", loki=None): - """ - Get and show the error logs stats - """ - print( - Markdown(f"- Getting summary of errors in logs") - ) - print() - query = f'count_over_time({{job=~".+"}} |~ "(?i)error|(?i)severe|(?i)critical|(?i)fatal" [{since}d])' - # Returns a JSON. - stdout, stderr = query_loki(from_dt=from_datetime, to_dt=to_datetime, since = since, query=query) - - try: - data = json.loads(stdout) - except JSONDecodeError: - raise HealthException(f"Invalid logcli response: {stdout}") - - outputter.show(data) - -def query_loki(from_dt, to_dt, since, query): - - loki_container_name = config.load_prop('loki.loki_container_name') - loki_port = config.load_prop('loki.loki_port') - loki_url = f"http://{loki_container_name}:{loki_port}" - - network_name = config.load_prop('podman.network_name') - logcli_container_name = config.load_prop('logcli.logcli_container_name') - - logcli_image_name = config.load_prop('logcli.logcli_image_name') - - podman_args = [ - "run", - "--rm", - "--replace", - "--network", - network_name, - "--name", - logcli_container_name, - logcli_image_name, - "query", - "--quiet", - "--output=jsonl", - f"--addr={loki_url}", - "--limit=150", - ] - - if to_dt and not from_dt: - # Doesn't make sense to have to without from - raise HealthException - - if from_dt: - podman_args.extend([ - f"--from={from_dt}", - ]) - - if to_dt: - # to_dt has priority over since if the two parameters are present - podman_args.extend([ - f"--to={to_dt}" - ]) - - else: - # Since should always have a default value - podman_args.extend([ - f"--since={since}" - ]) - else: - # The default "from" is "since" days ago. Since should always have a default value. - from_time = (datetime.utcnow() - timedelta(days=since)).isoformat() - podman_args.append(query) - stdout, stderr, _ = podman(cmd=podman_args) - return [stdout, stderr] - diff --git a/health-check/src/uyuni_health_check/loki/loki_manager.py b/health-check/src/uyuni_health_check/loki/loki_manager.py index 961cfc29a2c1..9ed3da971c43 100644 --- a/health-check/src/uyuni_health_check/loki/loki_manager.py +++ b/health-check/src/uyuni_health_check/loki/loki_manager.py @@ -1,3 +1,5 @@ +"""Module that controls the Loki and Promtail containers""" + import io import os import requests @@ -20,7 +22,7 @@ LOKI_WAIT_TIMEOUT = 120 -def download_component_build_image(image:str, verbose=False): +def download_component_build_image(image: str, verbose=False): if image_exists(image): return @@ -130,24 +132,33 @@ def render_promtail_cfg(supportconfig_path=None, promtail_template=None): config.write_config("promtail", "config.yaml", promtail_template.render(**opts)) - -def check_series_in_loki(loki_url, job_name="promtail-complete-job", flag="complete", message="Promtail finished!d"): +def check_series_in_loki( + loki_url, + job_name="promtail-complete-job", + flag="complete", + message="Promtail finished!d", +): query = f'{{job="{job_name}", flag="{flag}"}} |= "{message}"' end = int(time.time()) start = end - 60 * 60 response = requests.get( f"{loki_url}/loki/api/v1/query", - params={"query": query, "start": start * 1_000_000_000, "end": end * 1_000_000_000} + params={ + "query": query, + "start": start * 1_000_000_000, + "end": end * 1_000_000_000, + }, ) if response.status_code == 200: data = response.json() - return len(data['data']['result']) > 0 + return len(data["data"]["result"]) > 0 else: print("Failed to query Loki:", response.text) return False + def wait_promtail_init(): loki_url = "http://localhost:3100" start_time = time.time() @@ -162,6 +173,7 @@ def wait_promtail_init(): time.sleep(10) console.log("Promtail finished processing logs") + def wait_loki_init(verbose=False): """ Try to figure out when loki is ready to answer our requests. @@ -169,7 +181,7 @@ def wait_loki_init(verbose=False): - loki to be up - promtail to have read the logs and the loki ingester having handled them """ - metrics = None + metrics = {} timeout = False request_message_bytes_sum = 0 loki_ingester_chunk_entries_count = 0 @@ -206,13 +218,13 @@ def wait_loki_init(verbose=False): if response.status_code == 200: content = response.content.decode() request_message_bytes_sum = re.findall( - 'loki_request_message_bytes_sum{.*"loki_api_v1_push"} (\d+)', content + r'loki_request_message_bytes_sum{.*"loki_api_v1_push"} (\d+)', content ) request_message_bytes_sum = ( int(request_message_bytes_sum[0]) if request_message_bytes_sum else 0 ) loki_ingester_chunk_entries_count = re.findall( - "loki_ingester_chunk_entries_count (\d+)", content + r"loki_ingester_chunk_entries_count (\d+)", content ) loki_ingester_chunk_entries_count = ( int(loki_ingester_chunk_entries_count[0]) @@ -225,14 +237,14 @@ def wait_loki_init(verbose=False): console.log("promtail metrics 9081 status code", response.status_code) if response.status_code == 200: content = response.content.decode() - active = re.findall("promtail_targets_active_total (\d+)", content) + active = re.findall(r"promtail_targets_active_total (\d+)", content) encoded_bytes_total = re.findall( - "promtail_encoded_bytes_total{.*} (\d+)", content + r"promtail_encoded_bytes_total{.*} (\d+)", content ) sent_bytes_total = re.findall( - "promtail_sent_bytes_total{.*} (\d+)", content + r"promtail_sent_bytes_total{.*} (\d+)", content ) - active_files = re.findall("promtail_files_active_total (\d+)", content) + active_files = re.findall(r"promtail_files_active_total (\d+)", content) lags = re.findall( 'promtail_stream_lag_seconds{filename="([^"]+)".*} ([0-9.]+)', content ) @@ -260,7 +272,7 @@ def wait_loki_init(verbose=False): # check if promtail is ready if verbose: console.log("Waiting for promtail to be ready") - response = requests.get(f"http://localhost:9081/ready") + response = requests.get("http://localhost:9081/ready") if verbose: console.log("promtail ready 9081 status code", response.status_code) if response.status_code == 200: @@ -270,7 +282,6 @@ def wait_loki_init(verbose=False): else: ready = False - # check timeout if (time.time() - start_time) > LOKI_WAIT_TIMEOUT: timeout = True @@ -283,6 +294,3 @@ def wait_loki_init(verbose=False): console.print(loki_ingester_chunk_entries_count) console.print(request_message_bytes_sum) console.log("[bold]Loki and promtail are now ready to receive requests") - - - \ No newline at end of file diff --git a/health-check/src/uyuni_health_check/main.py b/health-check/src/uyuni_health_check/main.py index 8a8f06002442..4de87ee6f2e4 100644 --- a/health-check/src/uyuni_health_check/main.py +++ b/health-check/src/uyuni_health_check/main.py @@ -1,4 +1,5 @@ """Main module for the health check tool""" + import click import os from rich.markdown import Markdown @@ -8,7 +9,10 @@ from uyuni_health_check.utils import console, HealthException from uyuni_health_check.loki.loki_manager import run_loki from uyuni_health_check.exporters import exporter -from uyuni_health_check.containers.manager import create_podman_network, clean_containers +from uyuni_health_check.containers.manager import ( + create_podman_network, + clean_containers, +) @click.group() @@ -24,7 +28,7 @@ help="Show more stdout, including image building", ) @click.pass_context -def cli(ctx:click.Context, supportconfig_path: str, verbose: bool): +def cli(ctx: click.Context, supportconfig_path: str, verbose: bool): ctx.ensure_object(dict) ctx.obj["verbose"] = verbose ctx.obj["supportconfig_path"] = supportconfig_path @@ -98,7 +102,7 @@ def clean(ctx: click.Context): def main(): console.print(Markdown("# Uyuni Health Check")) - cli() # pylint: disable=no-value-for-parameter + cli() # pylint: disable=no-value-for-parameter if __name__ == "__main__": diff --git a/health-check/src/uyuni_health_check/metrics.py b/health-check/src/uyuni_health_check/metrics.py deleted file mode 100644 index 321e65f13812..000000000000 --- a/health-check/src/uyuni_health_check/metrics.py +++ /dev/null @@ -1,129 +0,0 @@ -import json -import re -import time -from datetime import datetime, timedelta -from json.decoder import JSONDecodeError - -import requests -from rich import print -from rich.columns import Columns -from rich.console import Console -from rich.markdown import Markdown -from rich.panel import Panel -from rich.table import Table -from rich.text import Text - -from uyuni_health_check.utils import HealthException, run_command, console -from uyuni_health_check.containers.manager import podman - - -def show_supportconfig_metrics(metrics: dict, console: "Console"): - if metrics: - tables = [] - tables.append(show_salt_jobs_summary(metrics)) - tables.append(show_salt_keys_summary(metrics)) - tables.append(show_salt_master_configuration_summary(metrics)) - console.print(Columns(tables), justify="center") - else: - console.print( - "[yellow]Some metrics are still missing. Wait some seconds and execute again", - justify="center", - ) - -def show_salt_jobs_summary(metrics: dict): - table = Table(show_header=True, header_style="bold magenta") - table.add_column("Salt function name") - table.add_column("Total") - - for metric, value in sorted( - metrics["salt_jobs"].items(), reverse=True, key=lambda item: item[1] - ): - table.add_row(metric, str(int(value))) - - return table - - -def show_salt_keys_summary(metrics: dict): - table = Table(show_header=True, header_style="bold magenta") - table.add_column("Salt keys") - table.add_column("Total") - - for metric, value in sorted( - metrics["salt_keys"].items(), reverse=True, key=lambda item: item[1] - ): - table.add_row(metric, str(int(value))) - - return table - - -def show_salt_master_configuration_summary(metrics: dict): - table = Table(show_header=True, header_style="bold magenta") - table.add_column("Salt Master Configuration") - table.add_column("Value") - - for metric, value in sorted( - metrics["salt_master_config"].items(), reverse=True, key=lambda item: item[1] - ): - table.add_row(metric, str(int(value))) - - return table - - -def _fetch_metrics_from_exporter( - console: "Console", host="localhost", port=9000, max_retries=5 -): - for i in range(max_retries): - try: - metrics_raw = requests.get(f"http://{host}:{port}").content.decode() - return metrics_raw - except requests.exceptions.RequestException as exc: - if i < max_retries - 1: - time.sleep(1) - console.log("[italic]retrying...") - else: - console.log( - "[italic red]There was an error while fetching metrics from exporter[/italic red]" - ) - print(f"{exc}") - exit(1) - - -def fetch_metrics_from_supportconfig_exporter( - console: "Console", host="localhost", port=9000, max_retries=5 -): - if not host: - host = "localhost" - - metrics_raw = _fetch_metrics_from_exporter(console, host, port, max_retries) - - salt_jobs = re.findall(r'salt_jobs{fun="(.+)",jid="(.+)"} (.+)', metrics_raw) - salt_keys = re.findall(r'salt_keys{name="(.+)"} (.+)', metrics_raw) - salt_master_config = re.findall( - r'salt_master_config{name="(.+)"} (.+)', metrics_raw - ) - - if not salt_jobs or not salt_keys or not salt_master_config: - console.log( - "[yellow]Some metrics might be still missing. Wait some seconds and execute again" - ) - - metrics = { - "salt_jobs": {}, - "salt_keys": {}, - "salt_master_config": {}, - } - - for m in salt_jobs: - if m[0] in metrics["salt_jobs"]: - metrics["salt_jobs"][m[0]] += 1 - else: - metrics["salt_jobs"][m[0]] = 1 - - for m in salt_master_config: - metrics["salt_master_config"][m[0]] = float(m[1]) - - for m in salt_keys: - metrics["salt_keys"][m[0]] = float(m[1]) - - console.log("[green]metrics have been successfully collected") - return metrics diff --git a/health-check/src/uyuni_health_check/outputter/outputter.py b/health-check/src/uyuni_health_check/outputter/outputter.py deleted file mode 100644 index e5788aadd755..000000000000 --- a/health-check/src/uyuni_health_check/outputter/outputter.py +++ /dev/null @@ -1,40 +0,0 @@ -from rich.panel import Panel -from rich.markdown import Markdown -from rich.panel import Panel -from rich.table import Table -from rich.text import Text -from uyuni_health_check.utils import console - -def show(data): - if data: - console.print( - Panel( - Text( - f"Ooops! Errors found!", - justify="center", - ) - ), - style="italic red blink", - ) - table = Table(show_header=True, header_style="bold magenta", expand=True) - table.add_column("File") - table.add_column("Errors") - - for metric in data: - table.add_row(metric["metric"]["filename"], metric["values"][-1][1]) - - console.print(table) - else: - console.print( - Panel( - Text( - f"Good news! No errors detected in logs.", - justify="center", - ) - ), - style="italic green", - ) - -def print_paginated_json(data): - with console.pager(): - console.print_json(data) diff --git a/health-check/src/uyuni_health_check/prometheus/prometheus_manager.py b/health-check/src/uyuni_health_check/prometheus/prometheus_manager.py deleted file mode 100644 index 9214bfb99b82..000000000000 --- a/health-check/src/uyuni_health_check/prometheus/prometheus_manager.py +++ /dev/null @@ -1,31 +0,0 @@ -from uyuni_health_check import config -from uyuni_health_check.containers.manager import ( - console, - container_is_running, - podman, -) -from uyuni_health_check.utils import console - - -def prepare_prometheus(verbose=False): - if container_is_running("uyuni-health-check-prometheus"): - console.log( - "Skipped as the uyuni-health-check-prometheus container is already running" - ) - else: - podman_command = [ - "run", - "-d", - "--replace", - "--network", - "health-check-network", - "-p", - "9090:9090", - "-v", - f"{config.get_prometheus_config_dir()}:/etc/prometheus/", - "--name", - "uyuni-health-check-prometheus", - "docker.io/prom/prometheus", - ] - console.log(" ".join(podman_command)) - podman(podman_command) diff --git a/health-check/src/uyuni_health_check/utils.py b/health-check/src/uyuni_health_check/utils.py index 0aabfe4c146f..c2f51bcc582f 100644 --- a/health-check/src/uyuni_health_check/utils.py +++ b/health-check/src/uyuni_health_check/utils.py @@ -1,3 +1,5 @@ +"""Utils module for various utility functions""" + from datetime import datetime, timedelta import subprocess from typing import List @@ -8,16 +10,17 @@ console = Console() + def validate_date(ctx: click.Context, param: str, date: str | None) -> str | None: del ctx, param if not date: return try: - datetime.fromisoformat(date.replace('Z', '+00:00')) + datetime.fromisoformat(date.replace("Z", "+00:00")) return date - except ValueError: - raise click.BadParameter("Date must be in ISO8601 format") + except ValueError as e: + raise click.BadParameter("Date must be in ISO8601 format") from e def get_dates(since: int) -> tuple: @@ -39,6 +42,7 @@ def run_command(cmd: List[str], verbose=False, raise_exc=True) -> List: stderr=subprocess.PIPE, stdin=subprocess.DEVNULL, universal_newlines=True, + check=False, ) stdout, stderr, retcode = process.stdout, process.stderr, process.returncode @@ -49,15 +53,19 @@ def run_command(cmd: List[str], verbose=False, raise_exc=True) -> List: return [stdout, stderr, retcode] + def _handle_text_from_process(verbose: bool, *objs: str): if verbose: for obj in objs: - console.log(Text.from_ansi(obj.strip())) + console.log(Text.from_ansi(obj.strip())) + def _check_retcode(retcode: int): match retcode: - case 0: ... # success - case 127: raise OSError("Command not found; podman is required") + case 0: + ... # success + case 127: + raise OSError("Command not found; podman is required") case _: raise HealthException("An error happened while running Podman")