forked from SchedMD/slurm
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathjob_submit.lua.example
224 lines (197 loc) · 6.22 KB
/
job_submit.lua.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
--[[
--Example job_submit.lua file for Slurm
--For more information check:
-- https://slurm.schedmd.com/job_submit_plugins.html
--For the list of available fields check:
-- src/plugins/job_submit/lua/job_submit_lua.c
--]]
log_prefix = 'slurm_job_submit'
function _find_in_str(str, arg)
if str ~= nil then
return string.find(str,arg)
else
return false
end
end
function _log_user_and_debug(fmt, ...)
--[[
Different messages logged to end user should be associated
unique return code, to make those properly displayed in case
of modification of array job.
--]]
--[[ Implicit definition of arg was removed in Lua 5.2 --]]
local arg = {...}
--[[
-- Returning a message to user from slurm_job_modify is supported
-- since Slurm 23.02, using it in older versions will result in
-- an error message in slurmctld logs.
-- In older versions of Lua - prior to Lua 5.2 you may need to use
-- unpack as a built-in instead of table.unpack
]]--
slurm.log_user(fmt, table.unpack(arg))
slurm.log_debug(fmt, table.unpack(arg))
end
-- Do not allow interactive jobs longer than 4 hours except for certain users
function validate_interactive_job(job_desc, uid)
if job_desc['script'] ~= nil then
return slurm.SUCCESS -- no limit for batch jobs
end
if uid == 0 --[[ or uid == SpecialUser --]] then
_log_user_and_debug("Interactive job allowed for uid: %u", uid)
else
local time_limit = job_desc['time_limit']
if (time_limit == slurm.NO_VAL) then
_log_user_and_debug("You must request a time limit within 4 hours for interactive jobs")
return slurm.ESLURM_INVALID_TIME_LIMIT
elseif (time_limit > (4 * 60)) then
_log_user_and_debug("Interactive jobs for time longer than 4h forbidden")
return slurm.ESLURM_INVALID_TIME_LIMIT
end
end
return slurm.SUCCESS
end
function slurm_job_submit(job_desc, part_list, submit_uid)
--[[
Don't block any activity from root.
This may make reproduction of user errors difficult
--]]
if submit_uid == 0 then
return slurm.SUCCESS
end
local rc = validate_interactive_job(job_desc, submit_uid)
if rc ~= slurm.SUCCESS then
return rc
end
--[[
-- Change partition to GPU if job requested any GPU
-- depending on option used this may be visible in
-- different job_desc field:
-- --gres=gpu: -> tres_per_node
-- --gpus-per-task -> tres_per_task
-- --gpus-per-socket -> tres_per_socket
-- --gpus -> tres_per_job
--]]
if _find_in_str(job_desc['tres_per_node'], "gpu") or
_find_in_str(job_desc['tres_per_task'], "gpu") or
_find_in_str(job_desc['tres_per_socket'], "gpu") or
_find_in_str(job_desc['tres_per_job'], "gpu") then
job_desc.partition = 'gpu'
_log_user_and_debug("%s: Set partition to: %s",
log_prefix,
job_desc.partition)
end
--[[ Forbid unlimited --mem if memory specified --]]
if job_desc['min_mem_per_node'] ~= nil then
if job_desc['min_mem_per_node'] == 0 then
slurm.log_user("%s: --mem=0 is not allowed",
log_prefix);
return slurm.ESLURM_ACCOUNTING_POLICY;
end
else
--[[ Enforce --mem specification --]]
if job_desc['min_mem_per_cpu'] == nil then
_log_user_and_debug("%s: Neither --mem nor --mem-per-cpu specified",
log_prefix);
return slurm.ESLURM_ACCOUNTING_POLICY;
else
if job_desc['min_mem_per_cpu'] == 0 then
_log_user_and_debug("%s: --mem-per-cpu=0 is not allowed",
log_prefix);
return slurm.ESLURM_ACCOUNTING_POLICY;
end
end
end
--[[ Forbid usage of MAINT job name --]]
if job_desc['name'] == "MAINT" then
_log_user_and_debug("%s: JobName=MAINT reserved. Please use other name.",
log_prefix);
return slurm.ERROR
end
--[[
-- If no default partition, set to the highest
-- priority partition this user has access to
--]]
if job_desc['partition'] == nil then
local new_partition = nil
local top_priority = -1
local last_priority = -1
local inx = 0
for name, part in pairs(part_list) do
slurm.log_debug2("%s: part name[%d]:%s",
log_prefix,
inx,
part['name'])
inx = inx + 1
--[[
-- Don't change partition if the default
-- is defined in the slurm.conf
--]]
if part['flag_default'] ~= 0 then
top_priority = -1
break
end
if part['priority_job_factor'] ~= nil then
slurm.log_debug3("%s: last priority is %d",
log_prefix,
part['priority_job_factor'])
last_priority = part['priority_job_factor']
if last_priority > top_priority then
top_priority = last_priority
new_partition = part.name
end
end
end
if top_priority >= 0 then
_log_user_and_debug("%s: setting default partition value: %s",
log_prefix,
new_partition)
job_desc.partition = new_partition
end
end
return slurm.SUCCESS
end
function slurm_job_modify(job_desc, job_ptr, part_list, modify_uid)
--[[
-- While working on that it's important to understand that
-- modification of a job array metarecord may differs from specific
-- element modification. When job is not yet splitted to tasks it will
-- be treated as one element.
--]]
--[[ Don't block/modify any update from root --]]
if modify_uid == 0 then
return slurm.SUCCESS
end
local rc = validate_interactive_job(job_desc, modify_uid)
if rc ~= slurm.SUCCESS then
return rc
end
--[[
-- Forbid unlimited --mem in update
-- for instance: scontrol update job=XX MinMemoryNode=0
--]]
if job_desc['min_mem_per_node'] ~= nil then
if job_desc['min_mem_per_node'] == 0 then
_log_user_and_debug("%s: Updating MinMemPerNode=0 is not allowed",
log_prefix)
return slurm.ESLURM_ACCOUNTING_POLICY;
end
end
if job_desc['min_mem_per_cpu'] ~= nil then
if job_desc['min_mem_per_cpu'] == 0 then
_log_user_and_debug("%s: Updating MinMemoryCPU=0 is not allowed",
log_prefix)
return slurm.ESLURM_ACCOUNTING_POLICY;
end
end
--[[ Forbid usage of MAINT job name --]]
if job_desc['name'] == "MAINT" then
_log_user_and_debug("%s: Updating JobName to MAINT is not allowed from non-root user",
log_prefix);
--[[
-- This will show 'Unspecified error' but use of
-- slurm.log_user is limited to job_submit
--]]
return slurm.ERROR
end
return slurm.SUCCESS
end