Merge pull request #6 from mrc-ide/mrc-6197

Capture output when running task
mrc-ide · Jan 29, 2025 · 760db73 · 760db73
2 parents feec72e + 7e5a48b
commit 760db73
Show file tree

Hide file tree

Showing 10 changed files with 111 additions and 15 deletions.
diff --git a/src/hipercow/cli.py b/src/hipercow/cli.py
@@ -1,7 +1,7 @@
 import click
 
 from hipercow import root
-from hipercow.task import task_status
+from hipercow.task import task_log, task_status
 from hipercow.task_create import task_create
 from hipercow.task_eval import task_eval
 
@@ -29,6 +29,17 @@ def status(task_id: str):
     click.echo(task_status(r, task_id))
 
 
+@task.command()
+@click.option("--filename", is_flag=True)
+@click.argument("task_id")
+def log(task_id: str, *, filename=False):
+    r = root.open_root()
+    if filename:
+        click.echo(r.path_task_log(task_id))
+    else:
+        click.echo(task_log(r, task_id))
+
+
 @task.command()
 @click.argument("cmd", nargs=-1)
 def create(cmd: tuple[str]):
@@ -40,6 +51,7 @@ def create(cmd: tuple[str]):
 
 @task.command()
 @click.argument("task_id")
-def eval(task_id: str):
+@click.option("--capture/--no-capture", default=False)
+def eval(task_id: str, *, capture: bool):
     r = root.open_root()
-    task_eval(r, task_id)
+    task_eval(r, task_id, capture=capture)
diff --git a/src/hipercow/root.py b/src/hipercow/root.py
@@ -41,7 +41,7 @@ def __init__(self, path: str | Path) -> None:
         self.path = path
 
     def path_task(self, task_id: str) -> Path:
-        return self.path / "tasks" / task_id[:2] / task_id[2:]
+        return self.path / "hipercow" / "tasks" / task_id[:2] / task_id[2:]
 
     def path_task_times(self, task_id: str) -> Path:
         return self.path_task(task_id) / "times"
@@ -52,6 +52,9 @@ def path_task_data(self, task_id: str) -> Path:
     def path_task_result(self, task_id: str) -> Path:
         return self.path_task(task_id) / "result"
 
+    def path_task_log(self, task_id: str) -> Path:
+        return self.path_task(task_id) / "log"
+
 
 def open_root(path: None | str | Path = None) -> Root:
     root = find_file_descend("hipercow", path or Path.cwd())

diff --git a/src/hipercow/task.py b/src/hipercow/task.py
@@ -59,6 +59,16 @@ def task_status(root: Root, task_id: str) -> TaskStatus:
     return TaskStatus.CREATED
 
 
+def task_log(root: Root, task_id: str) -> str:
+    path = root.path_task_log(task_id)
+    if not path.exists():
+        status = task_status(root, task_id)
+        msg = f"Task log for '{task_id}' does not exist (status: {status})"
+        raise Exception(msg)
+    with path.open() as f:
+        return f.read()
+
+
 def set_task_status(root: Root, task_id: str, status: TaskStatus):
     file_create(root.path_task(task_id) / STATUS_FILE_MAP[status])
 

diff --git a/src/hipercow/task_eval.py b/src/hipercow/task_eval.py
@@ -1,6 +1,5 @@
 import os
 import pickle
-import subprocess
 import time
 from dataclasses import dataclass
 
@@ -12,6 +11,7 @@
     set_task_status,
     task_status,
 )
+from hipercow.util import subprocess_run
 
 
 @dataclass
@@ -21,11 +21,12 @@ class TaskResult:
     data: object
 
 
-def task_eval(root: Root, task_id: str) -> None:
-    task_eval_data(root, TaskData.read(root, task_id))
+def task_eval(root: Root, task_id: str, *, capture: bool = False) -> None:
+    data = TaskData.read(root, task_id)
+    task_eval_data(root, data, capture=capture)
 
 
-def task_eval_data(root: Root, data: TaskData) -> None:
+def task_eval_data(root: Root, data: TaskData, *, capture: bool) -> None:
     task_id = data.task_id
     status = task_status(root, task_id)
     if not status.is_runnable():
@@ -38,7 +39,7 @@ def task_eval_data(root: Root, data: TaskData) -> None:
     set_task_status(root, task_id, TaskStatus.RUNNING)
 
     assert data.method == "shell"  # noqa: S101
-    res = task_eval_shell(root, data)
+    res = task_eval_shell(root, data, capture=capture)
 
     t_end = time.time()
 
@@ -52,10 +53,11 @@ def task_eval_data(root: Root, data: TaskData) -> None:
     set_task_status(root, task_id, status)
 
 
-def task_eval_shell(root: Root, data: TaskData) -> TaskResult:
+def task_eval_shell(root: Root, data: TaskData, *, capture=False) -> TaskResult:
     cmd = data.data["cmd"]
     env = dict(os.environ, **data.envvars)
     path = root.path / data.path
-    res = subprocess.run(cmd, check=False, env=env, cwd=path)
+    filename = root.path_task_log(data.task_id) if capture else None
+    res = subprocess_run(cmd, check=False, env=env, cwd=path, filename=filename)
     success = res.returncode == 0
     return TaskResult(data.task_id, success, None)
diff --git a/src/hipercow/util.py b/src/hipercow/util.py
@@ -1,4 +1,5 @@
 import os
+import subprocess
 from contextlib import contextmanager
 from pathlib import Path
 
@@ -34,3 +35,13 @@ def transient_working_directory(path):
 
 def file_create(path: str | Path) -> None:
     Path(path).open("a").close()
+
+
+def subprocess_run(cmd, *, filename: Path | None = None, check=False, **kwargs):
+    if filename is None:
+        return subprocess.run(cmd, **kwargs, check=check)
+    else:
+        with filename.open("wb") as f:
+            return subprocess.run(
+                cmd, check=check, stderr=subprocess.STDOUT, stdout=f, **kwargs
+            )
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -47,3 +47,23 @@ def test_can_run_task(tmp_path):
         # something.  I've checked with the capsys fixture and that
         # does not seem to have it either.
         assert task.task_status(r, task_id) == task.TaskStatus.SUCCESS
+
+
+def test_can_save_and_read_log(tmp_path):
+    runner = CliRunner()
+    with runner.isolated_filesystem(temp_dir=tmp_path):
+        root.init(".")
+        r = root.open_root()
+        res = runner.invoke(cli.create, ["echo", "hello", "world"])
+        task_id = res.stdout.strip()
+
+        res = runner.invoke(cli.eval, [task_id, "--capture"])
+        assert res.exit_code == 0
+
+        res = runner.invoke(cli.log, task_id)
+        assert res.exit_code == 0
+        assert res.output == "hello world\n\n"
+
+        res = runner.invoke(cli.log, [task_id, "--filename"])
+        assert res.exit_code == 0
+        assert res.output.strip() == str(r.path_task_log(task_id))
diff --git a/tests/test_create.py b/tests/test_create.py
@@ -11,7 +11,8 @@ def test_create_simple_task(tmp_path):
     with transient_working_directory(tmp_path):
         tid = tc.task_create_shell(["echo", "hello world"])
     assert re.match("^[0-9a-f]{32}$", tid)
-    assert (tmp_path / "tasks" / tid[:2] / tid[2:] / "data").exists()
+    path_data = tmp_path / "hipercow" / "tasks" / tid[:2] / tid[2:] / "data"
+    assert path_data.exists()
     d = TaskData.read(root.open_root(tmp_path), tid)
     assert isinstance(d, TaskData)
     assert d.task_id == tid

diff --git a/tests/test_task.py b/tests/test_task.py
@@ -1,6 +1,8 @@
+import pytest
+
 from hipercow import root
 from hipercow import task_create as tc
-from hipercow.task import TaskStatus, set_task_status, task_status
+from hipercow.task import TaskStatus, set_task_status, task_log, task_status
 from hipercow.util import transient_working_directory
 
 
@@ -33,5 +35,13 @@ def test_that_missing_tasks_have_missing_status(tmp_path):
     assert task_status(r, "a" * 32) == TaskStatus.MISSING
 
 
+def test_that_missing_tasks_error_on_log_read(tmp_path):
+    root.init(tmp_path)
+    r = root.open_root(tmp_path)
+    task_id = "a" * 32
+    with pytest.raises(Exception, match="Task log for '.+' does not exist"):
+        task_log(r, task_id)
+
+
 def test_can_convert_to_nice_string():
     assert str(TaskStatus.CREATED) == "created"
diff --git a/tests/test_task_eval.py b/tests/test_task_eval.py
@@ -2,7 +2,7 @@
 
 from hipercow import root
 from hipercow import task_create as tc
-from hipercow.task import TaskStatus, task_status
+from hipercow.task import TaskStatus, task_log, task_status
 from hipercow.task_eval import task_eval
 from hipercow.util import transient_working_directory
 
@@ -26,3 +26,17 @@ def test_cant_run_complete_task(tmp_path):
     msg = f"Can't run '{tid}', which has status 'success'"
     with pytest.raises(Exception, match=msg):
         task_eval(r, tid)
+
+
+def test_can_capture_output_to_auto_file(tmp_path):
+    root.init(tmp_path)
+    r = root.open_root(tmp_path)
+    with transient_working_directory(tmp_path):
+        tid = tc.task_create_shell(["echo", "hello world"])
+    task_eval(r, tid, capture=True)
+
+    path = r.path_task_log(tid)
+    with path.open("r") as f:
+        assert f.read().strip() == "hello world"
+
+    assert task_log(r, tid) == "hello world\n"
diff --git a/tests/test_util.py b/tests/test_util.py
@@ -1,6 +1,10 @@
 from pathlib import Path
 
-from hipercow.util import find_file_descend, transient_working_directory
+from hipercow.util import (
+    find_file_descend,
+    subprocess_run,
+    transient_working_directory,
+)
 
 
 def test_find_descend(tmp_path):
@@ -19,3 +23,12 @@ def test_transient_working_directory(tmp_path):
         assert Path.cwd() == here
     with transient_working_directory(tmp_path):
         assert Path.cwd() == tmp_path
+
+
+def test_run_process_and_capture_output(tmp_path):
+    path = tmp_path / "output"
+    res = subprocess_run(["echo", "hello"], filename=path)
+    assert res.returncode == 0
+    assert path.exists()
+    with open(path) as f:
+        assert f.read().strip() == "hello"