Skip to content

Commit

Permalink
monitor: compute image age from zfs creation property (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
delan committed Jan 2, 2025
1 parent 326cd78 commit 0f09e84
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 7 deletions.
6 changes: 6 additions & 0 deletions monitor/src/get-snapshot-creation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env zsh
# usage: get-snapshot-creation.sh <pool/path/to/zvol@snapshot>
set -euo pipefail -o bsdecho
dataset_and_snapshot=$1; shift

zfs get -Hpo value creation "$dataset_and_snapshot"
3 changes: 2 additions & 1 deletion monitor/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,10 +403,11 @@ fn monitor_thread() -> eyre::Result<()> {
busy,
excess_idle,
wanted,
image_age,
},
) in profile_runner_counts.iter()
{
info!("profile {key}: {healthy}/{target} healthy runners ({idle} idle, {reserved} reserved, {busy} busy, {started_or_crashed} started or crashed, {excess_idle} excess idle, {wanted} wanted)");
info!("profile {key}: image age {image_age:?}, {healthy}/{target} healthy runners ({idle} idle, {reserved} reserved, {busy} busy, {started_or_crashed} started or crashed, {excess_idle} excess idle, {wanted} wanted)");
}
for (_id, runner) in runners.iter() {
runner.log_info();
Expand Down
30 changes: 28 additions & 2 deletions monitor/src/profile.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
use std::process::Command;
use std::{
process::Command,
time::{Duration, SystemTime, UNIX_EPOCH},
};

use jane_eyre::eyre;
use jane_eyre::eyre::{self, Context};
use serde::{Deserialize, Serialize};
use tracing::{debug, info};

use crate::{
data::get_profile_data_path,
libvirt::update_screenshot,
runner::{Runner, Runners, Status},
zfs::snapshot_creation_time_unix,
DOTENV,
};

Expand All @@ -30,6 +34,7 @@ pub struct RunnerCounts {
pub busy: usize,
pub excess_idle: usize,
pub wanted: usize,
pub image_age: Option<Duration>,
}

impl Profile {
Expand Down Expand Up @@ -96,6 +101,7 @@ impl Profile {
busy: self.busy_runner_count(runners),
excess_idle: self.excess_idle_runner_count(runners),
wanted: self.wanted_runner_count(runners),
image_age: self.image_age().ok().flatten(),
}
}

Expand Down Expand Up @@ -166,6 +172,26 @@ impl Profile {
}
}

pub fn image_age(&self) -> eyre::Result<Option<Duration>> {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.wrap_err("Failed to get current time")?;
let creation_time =
match snapshot_creation_time_unix(&self.base_vm_name, &self.base_image_snapshot) {
Ok(result) => result,
Err(error) => {
debug!(
self.base_vm_name,
?error,
"Failed to get snapshot creation time"
);
return Ok(None);
}
};

Ok(Some(now - creation_time))
}

pub fn update_screenshot(&self) {
if let Err(error) = self.try_update_screenshot() {
debug!(
Expand Down
3 changes: 2 additions & 1 deletion monitor/src/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub struct Dotenv {
pub github_api_suffix: String,
pub libvirt_prefix: String,
pub zfs_prefix: String,
// SERVO_CI_ZFS_CLONE_PREFIX not used
pub zfs_clone_prefix: String,
pub monitor_data_path: Option<String>,
// SERVO_CI_ZVOL_BLOCK_DEVICE_TIMEOUT not used
pub monitor_poll_interval: Duration,
Expand Down Expand Up @@ -54,6 +54,7 @@ impl Dotenv {
github_api_suffix: env_string("SERVO_CI_GITHUB_API_SUFFIX"),
libvirt_prefix: env_string("SERVO_CI_LIBVIRT_PREFIX"),
zfs_prefix: env_string("SERVO_CI_ZFS_PREFIX"),
zfs_clone_prefix: env_string("SERVO_CI_ZFS_CLONE_PREFIX"),
monitor_data_path: env_option_string("SERVO_CI_MONITOR_DATA_PATH"),
monitor_poll_interval: env_duration_secs("SERVO_CI_MONITOR_POLL_INTERVAL"),
api_cache_timeout: env_duration_secs("SERVO_CI_API_CACHE_TIMEOUT"),
Expand Down
46 changes: 43 additions & 3 deletions monitor/src/zfs.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use core::str;
use std::process::{Command, Stdio};
use std::{
process::{Command, Stdio},
time::Duration,
};

use jane_eyre::eyre::{self, Context};
use jane_eyre::eyre::{self, bail, eyre, Context, OptionExt};

use crate::DOTENV;
use crate::{shell::SHELL, DOTENV};

pub fn list_runner_volumes() -> eyre::Result<Vec<String>> {
let output = Command::new("../list-runner-volumes.sh")
Expand All @@ -26,3 +29,40 @@ pub fn list_runner_volumes() -> eyre::Result<Vec<String>> {

Ok(result.collect())
}

pub fn snapshot_creation_time_unix(zvol_name: &str, snapshot_name: &str) -> eyre::Result<Duration> {
let dataset_and_snapshot = format!("{}/{zvol_name}@{snapshot_name}", DOTENV.zfs_clone_prefix);
let output = SHELL
.lock()
.map_err(|e| eyre!("Mutex poisoned: {e:?}"))?
.run(
include_str!("get-snapshot-creation.sh"),
[dataset_and_snapshot],
)?
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?
.wait_with_output()?;
if !output.status.success() {
let stdout = str::from_utf8(&output.stdout)
.to_owned()
.map_err(|_| output.stdout.clone());
let stderr = str::from_utf8(&output.stderr)
.to_owned()
.map_err(|_| output.stderr.clone());
bail!(
"Command exited with status {}: stdout {:?}, stderr {:?}",
output.status,
stdout,
stderr
);
}
let result = str::from_utf8(&output.stdout)
.wrap_err("Failed to decode UTF-8")?
.strip_suffix('\n')
.ok_or_eyre("Failed to strip trailing newline")?
.parse::<u64>()
.wrap_err("Failed to parse as u64")?;

Ok(Duration::from_secs(result))
}

0 comments on commit 0f09e84

Please sign in to comment.