Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a bug in the runtime when sbatch is used #26772

Merged
merged 3 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions runtime/src/launch/slurm-srun/launch-slurm-srun.c
Original file line number Diff line number Diff line change
Expand Up @@ -360,14 +360,24 @@ static char* chpl_launch_create_command(int argc, char* argv[],

// if were running a batch job
if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL || generate_sbatch_script) {
slurmFilename = (char*)chpl_mem_allocMany(
(strlen(baseSBATCHFilename) + snprintf(NULL, 0, "%d", (int)mypid) + 1),
sizeof(char), CHPL_RT_MD_FILENAME, -1, 0);

const int filename_size = strlen(baseSBATCHFilename) +
snprintf(NULL, 0, "%d", (int)mypid) + 1;

if (filename_size <= 0) {
chpl_internal_error("An unexpected error occured while computing \
sbatch filename size");
}

slurmFilename = (char*)chpl_mem_allocMany(filename_size, sizeof(char),
CHPL_RT_MD_FILENAME, -1, 0);

// set the sbatch filename
snprintf(slurmFilename, strlen(slurmFilename), "%s%d", baseSBATCHFilename,
(int)mypid);
const int ret = snprintf(slurmFilename, filename_size, "%s%d",
baseSBATCHFilename, (int)mypid);

// open the batch file and create the header

slurmFile = fopen(slurmFilename, "w");
fprintf(slurmFile, "#!/bin/sh\n\n");

Expand Down Expand Up @@ -648,10 +658,11 @@ static void chpl_launch_cleanup(void) {
// remove sbatch file unless it was explicitly generated by the user
if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL && !generate_sbatch_script) {
if (unlink(slurmFilename)) {
char* msg = (char*)chpl_mem_allocMany(
(strlen(slurmFilename) + strlen(strerror(errno)) + 36),
sizeof(char), CHPL_RT_MD_FILENAME, -1, 0);
snprintf(msg, strlen(msg), "Error removing temporary file '%s': %s",
const int msg_size = strlen(slurmFilename) +
strlen(strerror(errno)) + 36;
char* msg = (char*)chpl_mem_allocMany(msg_size, sizeof(char),
CHPL_RT_MD_FILENAME, -1, 0);
snprintf(msg, msg_size, "Error removing temporary file '%s': %s",
slurmFilename, strerror(errno));
chpl_warning(msg, 0, 0);
chpl_mem_free(msg, 0, 0);
Expand Down
2 changes: 2 additions & 0 deletions util/test/sub_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2488,6 +2488,8 @@ def run_compileline(flag, lookingfor):
# only notify for a failed execution if launching the test was successful
elif (not launcher_error):
sys.stdout.write('%s[Error execution failed for %s]\n'%(futuretest,test_name))
sys.stdout.write('[Execution output was as follows:]\n')
sys.stdout.write(trim_output(output))

if exectimeout or status != 0 or exec_status != 0:
break
Expand Down