diff --git a/tests/ert/unit_tests/scheduler/bin/bsub b/tests/ert/unit_tests/scheduler/bin/bsub index 9a467e7e998..3b7df88de22 100755 --- a/tests/ert/unit_tests/scheduler/bin/bsub +++ b/tests/ert/unit_tests/scheduler/bin/bsub @@ -6,6 +6,8 @@ name="STDIN" jobid="${RANDOM}" jobdir="${PYTEST_TMP_PATH:-.}/mock_jobs/${jobid}" mkdir -p "${jobdir}" +command_invocation_file="${jobdir}/complete_command_invocation" +echo "$0 $@" > "$command_invocation_file" while getopts "o:e:J:q:R:n:P:" opt do diff --git a/tests/ert/unit_tests/scheduler/bin/qsub b/tests/ert/unit_tests/scheduler/bin/qsub index d1a9cf28b87..516f98f368e 100755 --- a/tests/ert/unit_tests/scheduler/bin/qsub +++ b/tests/ert/unit_tests/scheduler/bin/qsub @@ -7,6 +7,8 @@ jobid="test${RANDOM}.localhost" jobdir="${PYTEST_TMP_PATH:-.}/mock_jobs/${jobid}" mkdir -p "${jobdir}" +command_invocation_file="${jobdir}/complete_command_invocation" +echo "$0 $@" > "$command_invocation_file" job_env_file="${jobdir}/env" touch $job_env_file diff --git a/tests/ert/unit_tests/scheduler/bin/sbatch.py b/tests/ert/unit_tests/scheduler/bin/sbatch.py index aed8e41eb11..006f77aeb21 100644 --- a/tests/ert/unit_tests/scheduler/bin/sbatch.py +++ b/tests/ert/unit_tests/scheduler/bin/sbatch.py @@ -40,6 +40,7 @@ def main() -> None: jobdir.mkdir(parents=True, exist_ok=True) (jobdir / "script").write_text(args.script, encoding="utf-8") (jobdir / "name").write_text(args.job_name, encoding="utf-8") + (jobdir / "complete_command_invocation").write_text(shlex.join(sys.argv)) env_file = jobdir / "env" if args.ntasks: env_file.write_text( diff --git a/tests/ert/unit_tests/scheduler/test_lsf_driver.py b/tests/ert/unit_tests/scheduler/test_lsf_driver.py index be110913799..cd867dc22fc 100644 --- a/tests/ert/unit_tests/scheduler/test_lsf_driver.py +++ b/tests/ert/unit_tests/scheduler/test_lsf_driver.py @@ -19,6 +19,8 @@ from hypothesis import strategies as st from ert.config import QueueConfig +from ert.config.queue_config import _parse_realization_memory_str +from ert.mode_definitions import ENSEMBLE_EXPERIMENT_MODE from ert.scheduler import LsfDriver, create_driver from ert.scheduler.driver import SIGNAL_OFFSET from ert.scheduler.lsf_driver import ( @@ -39,6 +41,7 @@ parse_bjobs, parse_bjobs_exec_hosts, ) +from tests.ert.ui_tests.cli.run_cli import run_cli from tests.ert.utils import poll, wait_until from .conftest import mock_bin @@ -1364,3 +1367,60 @@ async def finished(iens: int, returncode: int): # a controlled fashion: if (tmp_path / "trap_handle_installed").exists(): wait_until((tmp_path / "was_killed").exists, timeout=4) + + +@pytest.mark.integration_test +@pytest.mark.usefixtures("copy_poly_case") +def test_queue_options_are_propagated_from_config_to_bsub(monkeypatch): + """ + This end to end test is here to verify that queue_options are correctly + propagated all the way from ert config to the cluster. + """ + mock_bin(monkeypatch, os.getcwd()) + expected_queue = "foo_bar_queue" + expected_resource_string = "location=foo_bar_location" + expected_realization_memory = "9GB" + expected_project_code = "foo_bar_project" + expected_excluded_hosts = "foo_host,bar_host" + expected_num_cpu = 98 + + with open("poly.ert", "a", encoding="utf-8") as f: + f.write( + dedent( + f"""\ + NUM_CPU {expected_num_cpu} + REALIZATION_MEMORY {expected_realization_memory} + QUEUE_SYSTEM LSF + QUEUE_OPTION LSF LSF_QUEUE {expected_queue} + QUEUE_OPTION LSF LSF_RESOURCE {expected_resource_string} + QUEUE_OPTION LSF PROJECT_CODE {expected_project_code} + QUEUE_OPTION LSF EXCLUDE_HOST {expected_excluded_hosts} + NUM_REALIZATIONS 1 + """ + ) + ) + run_cli(ENSEMBLE_EXPERIMENT_MODE, "--disable-monitoring", "poly.ert") + mock_jobs_dir = Path(f"{os.environ.get('PYTEST_TMP_PATH')}/mock_jobs") + job_dir = next( + mock_jobs_dir.iterdir() + ) # There is only one realization in this test + complete_command_invocation = (job_dir / "complete_command_invocation").read_text( + encoding="utf-8" + ) + + assert f"-q {expected_queue}" in complete_command_invocation + assert f"-P {expected_project_code}" in complete_command_invocation + assert f"-n {str(expected_num_cpu)}" in complete_command_invocation + + complete_resource_requirement = (job_dir / "resource_requirement").read_text( + encoding="utf-8" + ) + assert expected_resource_string in complete_resource_requirement + assert ( + f"rusage[mem={_parse_realization_memory_str(expected_realization_memory) // 1024**2}]" + in complete_resource_requirement + ) + assert ( + f"""select[{" && ".join(f"hname!='{host_name}'" for host_name in expected_excluded_hosts.split(","))}]""" + in complete_resource_requirement + ) \ No newline at end of file diff --git a/tests/ert/unit_tests/scheduler/test_openpbs_driver.py b/tests/ert/unit_tests/scheduler/test_openpbs_driver.py index cb39f0046a3..1cac32a4890 100644 --- a/tests/ert/unit_tests/scheduler/test_openpbs_driver.py +++ b/tests/ert/unit_tests/scheduler/test_openpbs_driver.py @@ -14,6 +14,7 @@ from hypothesis import strategies as st from ert.cli.main import ErtCliError +from ert.config.queue_config import _parse_realization_memory_str from ert.mode_definitions import ENSEMBLE_EXPERIMENT_MODE from ert.scheduler.openpbs_driver import ( JOB_STATES, @@ -606,3 +607,45 @@ def test_openpbs_driver_with_poly_example_failing_poll_fails_ert_and_propagates_ "poly.ert", ) assert "RuntimeError: Status polling failed" in caplog.text + + +@pytest.mark.integration_test +@pytest.mark.usefixtures("copy_poly_case") +def test_queue_options_are_propagated_from_config_to_qsub(monkeypatch): + """ + This end to end test is here to verify that queue_options are correctly + propagated all the way from ert config to the cluster. + """ + mock_bin(monkeypatch, os.getcwd()) + expected_queue = "foo_bar_queue" + expected_realization_memory = "9GB" + expected_project_code = "foo_bar_project" + expected_cluster_label = "foo_bar_cluster" + expected_num_cpu = 98 + with open("poly.ert", "a", encoding="utf-8") as f: + f.write( + dedent( + f"""\ + NUM_CPU {expected_num_cpu} + REALIZATION_MEMORY {expected_realization_memory} + QUEUE_SYSTEM TORQUE + QUEUE_OPTION TORQUE QUEUE {expected_queue} + QUEUE_OPTION TORQUE CLUSTER_LABEL {expected_cluster_label} + QUEUE_OPTION TORQUE PROJECT_CODE {expected_project_code} + NUM_REALIZATIONS 1 + """ + ) + ) + run_cli(ENSEMBLE_EXPERIMENT_MODE, "--disable-monitoring", "poly.ert") + mock_jobs_dir = Path(f"mock_jobs") + job_dir = next( + mock_jobs_dir.iterdir() + ) # There is only one realization in this test + complete_command_invocation = (job_dir / "complete_command_invocation").read_text( + encoding="utf-8" + ) + + assert f"-q {expected_queue}" in complete_command_invocation + assert f"-A {expected_project_code}" in complete_command_invocation + assert f"-l ncpus={expected_num_cpu}:mem={_parse_realization_memory_str(expected_realization_memory) // 1024**2}mb" in complete_command_invocation + assert f"-l {expected_cluster_label}" in complete_command_invocation diff --git a/tests/ert/unit_tests/scheduler/test_slurm_driver.py b/tests/ert/unit_tests/scheduler/test_slurm_driver.py index 3e1703976b0..0004d155633 100644 --- a/tests/ert/unit_tests/scheduler/test_slurm_driver.py +++ b/tests/ert/unit_tests/scheduler/test_slurm_driver.py @@ -7,13 +7,16 @@ import sys from contextlib import ExitStack as does_not_raise from pathlib import Path +from textwrap import dedent import pytest from hypothesis import given from hypothesis import strategies as st +from ert.mode_definitions import ENSEMBLE_EXPERIMENT_MODE from ert.scheduler import SlurmDriver from ert.scheduler.slurm_driver import _seconds_to_slurm_time_format +from tests.ert.ui_tests.cli.run_cli import run_cli from tests.ert.utils import poll from .conftest import mock_bin @@ -457,3 +460,58 @@ async def test_slurm_uses_sacct( # Make sure sacct was tried: assert "scontrol failed, trying sacct" in caplog.text + + +from ert.config.queue_config import _parse_realization_memory_str + + +@pytest.mark.integration_test +@pytest.mark.usefixtures("copy_poly_case") +def test_queue_options_are_propagated_from_config_to_sbatch(monkeypatch): + """ + This end to end test is here to verify that queue_options are correctly + propagated all the way from ert config to the cluster. + """ + mock_bin(monkeypatch, os.getcwd()) + expected_partition = "foo_bar_partition" + expected_realization_memory = "9GB" + expected_project_code = "foo_bar_project" + expected_exclude_hosts = "not_foohost,not_barhost" + expected_include_hosts = "foohost,barhost" + expected_max_runtime = 99 + expected_num_cpu = 98 + with open("poly.ert", "a", encoding="utf-8") as f: + f.write( + dedent( + f"""\ + NUM_CPU {expected_num_cpu} + REALIZATION_MEMORY {expected_realization_memory} + QUEUE_SYSTEM SLURM + QUEUE_OPTION SLURM PARTITION {expected_partition} + QUEUE_OPTION SLURM INCLUDE_HOST {expected_include_hosts} + QUEUE_OPTION SLURM EXCLUDE_HOST {expected_exclude_hosts} + QUEUE_OPTION SLURM PROJECT_CODE {expected_project_code} + QUEUE_OPTION SLURM MAX_RUNTIME {expected_max_runtime} + NUM_REALIZATIONS 1 + """ + ) + ) + run_cli(ENSEMBLE_EXPERIMENT_MODE, "--disable-monitoring", "poly.ert") + mock_jobs_dir = Path(f"{os.environ.get('PYTEST_TMP_PATH')}/mock_jobs") + job_dir = next( + mock_jobs_dir.iterdir() + ) # There is only one realization in this test + complete_command_invocation = (job_dir / "complete_command_invocation").read_text( + encoding="utf-8" + ) + + assert f"--ntasks={expected_num_cpu}" in complete_command_invocation + assert f"--mem={_parse_realization_memory_str(expected_realization_memory) // 1024**2}M" in complete_command_invocation + + assert f"--nodelist={expected_include_hosts}" in complete_command_invocation + assert f"--exclude={expected_exclude_hosts}" in complete_command_invocation + assert f"--time={_seconds_to_slurm_time_format(expected_max_runtime + )}" in complete_command_invocation + + assert f"--partition={expected_partition}" in complete_command_invocation + assert f"--account={expected_project_code}" in complete_command_invocation