From 1dd52f2660985501ab825365d3e86582209652b0 Mon Sep 17 00:00:00 2001 From: Gennaro Oliva <oliva.g@na.icar.cnr.it> Date: Fri, 6 Nov 2015 12:45:05 +0100 Subject: [PATCH] Imported Upstream version 15.08.3 --- .gitignore | 3 + META | 4 +- NEWS | 53 +++ contribs/pam_slurm_adopt/README | 91 ++-- contribs/pam_slurm_adopt/pam_slurm_adopt.c | 429 +++++++++++++----- doc/html/Makefile.am | 1 - doc/html/Makefile.in | 1 - doc/html/burst_buffer.shtml | 7 +- doc/html/download.shtml | 4 +- doc/html/lll.gif | Bin 1794 -> 0 bytes doc/html/news.shtml | 14 +- doc/html/quickstart_admin.shtml | 13 +- doc/html/resource_limits.shtml | 12 +- doc/html/team.shtml | 5 +- doc/html/tres.shtml | 6 +- doc/man/man1/sacct.1 | 5 +- doc/man/man1/scontrol.1 | 14 + doc/man/man5/burst_buffer.conf.5 | 6 +- doc/man/man5/slurm.conf.5 | 15 +- etc/slurmd.service.in | 1 + slurm.spec | 6 +- src/api/job_info.c | 3 +- src/common/assoc_mgr.c | 2 + src/common/eio.c | 12 +- src/common/gres.c | 2 +- src/common/read_config.c | 23 +- src/common/slurm_protocol_defs.c | 6 + src/common/stepd_api.c | 13 +- src/db_api/job_report_functions.c | 10 +- .../accounting_storage/mysql/as_mysql_job.c | 40 +- .../mysql/as_mysql_jobacct_process.c | 18 +- .../mysql/as_mysql_rollup.c | 49 +- .../cray/acct_gather_energy_cray.c | 6 + .../ibmaem/acct_gather_energy_ibmaem.c | 1 + .../burst_buffer/cray/burst_buffer_cray.c | 258 ++++++++++- src/plugins/job_submit/lua/job_submit_lua.c | 36 +- .../elasticsearch/jobcomp_elasticsearch.c | 16 +- src/plugins/sched/backfill/backfill.c | 89 +++- src/sacct/print.c | 12 +- src/scontrol/update_part.c | 5 + src/slurmctld/agent.c | 70 ++- src/slurmctld/job_mgr.c | 31 +- src/slurmctld/licenses.c | 3 +- src/slurmctld/node_scheduler.c | 10 +- src/slurmctld/proc_req.c | 3 + src/slurmctld/reservation.c | 3 +- src/slurmctld/slurmctld.h | 10 +- src/slurmctld/step_mgr.c | 121 +++-- src/slurmd/common/proctrack.c | 7 + src/slurmd/slurmd/req.c | 29 +- src/slurmd/slurmstepd/req.c | 16 +- src/smap/smap.c | 15 +- src/squeue/print.c | 14 +- src/squeue/squeue.c | 3 +- testsuite/expect/globals | 31 ++ testsuite/expect/inc21.21_tests | 5 +- testsuite/expect/inc22.1.1 | 4 +- testsuite/expect/inc22.1.3 | 4 +- testsuite/expect/test1.77 | 11 +- testsuite/expect/test2.15 | 3 + testsuite/expect/test21.21 | 5 + testsuite/expect/test22.1 | 16 +- testsuite/expect/test4.13 | 27 +- 63 files changed, 1280 insertions(+), 452 deletions(-) delete mode 100644 doc/html/lll.gif diff --git a/.gitignore b/.gitignore index 65476f3ba..7589a8fe8 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,9 @@ Makefile /etc/cgroup.release_common.example /etc/init.d.slurm /etc/init.d.slurmdbd +/etc/slurmctld.service +/etc/slurmd.service +/etc/slurmdbd.service /libtool /slurm/slurm.h /slurm/stamp-h2 diff --git a/META b/META index 9872fa58c..58972f13a 100644 --- a/META +++ b/META @@ -9,8 +9,8 @@ Name: slurm Major: 15 Minor: 08 - Micro: 2 - Version: 15.08.2 + Micro: 3 + Version: 15.08.3 Release: 1 ## diff --git a/NEWS b/NEWS index 868e82166..090ea4bb2 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,53 @@ This file describes changes in recent versions of Slurm. It primarily documents those changes that are of interest to users and administrators. +* Changes in Slurm 15.08.3 +========================== + -- Correct Slurm's RPM build if Munge is not installed. + -- Job array termination status email ExitCode based upon highest exit code + from any task in the job array rather than the last task. Also change the + state from "Ended" or "Failed" to "Mixed" where appropriate. + -- Squeue recombines pending job array records only if their name and partition + are identical. + -- Fix some minor leaks in the job info and step info API. + -- Export missing QOS id when filling in association with the association + manager. + -- Fix invalid reference if a lua job_submit plugin references a default qos + when a user doesn't exist in the database. + -- Use association enforcement in the lua plugin. + -- Fix a few spots missing defines of accounting_enforce or acct_db_conn + in the plugins. + -- Show requested TRES in scontrol show jobs when job is pending. + -- Improve sched/backfill support for job features, especially XOR construct. + -- Correct scheduling logic for job features option with XOR construct that + could delay a job's initiation. + -- Remove unneeded frees when creating a tres string. + -- Send a tres_alloc_str for the batch step + -- Fix incorrect check for slurmdb_find_tres_count_in_string in various places, + it needed to check for INFINITE64 instead of zero. + -- Don't allow scontrol to create partitions with the name "DEFAULT". + -- burst_buffer/cray: Change error from "invalid request" to "permssion denied" + if a non-authorized user tries to create/destroy a persistent buffer. + -- PrologFlags work: Setting a flag of "Contain" implicitly sets the "Alloc" + flag. Fix code path which could prevent execution of the Prolog when the + "Alloc" or "Contain" flag were set. + -- Fix for acct_gather_energy/cray|ibmaem to work with missed enum. + -- MYSQL - When inserting a job and begin_time is 0 do not set it to + submit_time. 0 means the job isn't eligible yet so we need to treat it so. + -- MYSQL - Don't display ineligible jobs when querying for a window of time. + -- Fix creation of advanced reservation of cores on nodes which are DOWN. + -- Return permission denied if regular user tries to release job held by an + administrator. + -- MYSQL - Fix rollups for multiple jobs running by the same association + in an hour counting multiple times. + -- Burstbuffer/Cray plugin - Fix for persistent burst buffer use. + Don't call paths if no #DW options. + -- Modifications to pam_slurm_adopt to work correctly for the "extern" step. + -- Alphabetize debugflags when printing them out. + -- Fix systemd's slurmd service from killing slurmstepds on shutdown. + -- Fixed counter of not indexed jobs, error_cnt post-increment changed to + pre-increment. + * Changes in Slurm 15.08.2 ========================== -- Fix for tracking node state when jobs that have been allocated exclusive @@ -499,6 +546,10 @@ documents those changes that are of interest to users and administrators. has been modified to accept a "Pack" and "NoPack" option. These options can be used to override the CR_PACK_NODE configuration option. +* Changes in Slurm 14.11.11 +=========================== + -- Fix systemd's slurmd service from killing slurmstepds on shutdown. + * Changes in Slurm 14.11.10 =========================== -- Fix truncation of job reason in squeue. @@ -544,6 +595,8 @@ documents those changes that are of interest to users and administrators. -- Fix issue with sacct, printing 0_0 for array's that had finished in the database but the start record hadn't made it yet. -- Fix sacct -j, (nothing but a comma) to not return all jobs. + -- Prevent slurmstepd from core dumping if /proc/<pid>/stat has + unexpected format. * Changes in Slurm 14.11.9 ========================== diff --git a/contribs/pam_slurm_adopt/README b/contribs/pam_slurm_adopt/README index c4658db31..15799630e 100644 --- a/contribs/pam_slurm_adopt/README +++ b/contribs/pam_slurm_adopt/README @@ -14,7 +14,7 @@ DESCRIPTION This module attempts to determine the job which originated this connection. The module is configurable; these are the default steps: - 1) Check the local stepds for a count of jobs owned by the non-root user + 1) Check the local stepd for a count of jobs owned by the non-root user a) If none, deny (option action_no_jobs) b) If only one, adopt the process into that job c) If multiple, continue @@ -38,36 +38,72 @@ This module has the following options (* = default): a service or similar, it will be tracked and killed by Slurm when the job exits. This sounds bad because it is bad. - 1* = let the connection through without adoption + 1* = Let the connection through without adoption 0 = I am crazy. I want random services to die when root jobs exit. I also like it when RPC calls block for a while then time out. action_no_jobs - The action to perform if the user has no jobs on the node - ignore = let the connection through without adoption - deny* = deny the connection + ignore = Do nothing. Fall through to the next pam module + deny* = Deny the connection - action_unknown - The action to perform when the RPC call does not locate the - source job and the user has multiple jobs on the node to - choose from + action_unknown - The action to perform when the user has multiple jobs on + the node *and* the RPC call does not locate the source job. + If the RPC mechanism works properly in your environment, + this option will likely be relevant *only* when connecting + from a login node. - any* = pick a job in a (somewhat) random fashion - ignore = let the connection through without adoption - deny = deny the connection + newest* = Pick the newest job on the node. The "newest" job is chosen + based on the mtime of the job's step_extern cgroup; asking + Slurm would require an RPC to the controller. The user can ssh + in but may be adopted into a job that exits earlier than the + job they intended to check on. The ssh connection will at + least be subject to appropriate limits and the user can be + informed of better ways to accomplish their objectives if this + becomes a problem + user = Use the /slurm/uid_$UID cgroups. Not all cgroups set + appropriate limits at this level so this may not be very + effective. Additionally, job accounting at this level is + impossible as is automatic cleanup of stray processes when the + job exits. This settings is not recommended. + allow = Let the connection through without adoption + deny = Deny the connection - action_adopt_failure - The action to perform if the job is unable to be - adopted into a job for whatever reason + action_adopt_failure - The action to perform if the process is unable to be + adopted into an identified job for whatever reason - ignore = let the connection through without adoption - deny* = deny the connection + allow* = Let the connection through without adoption + deny = Deny the connection + action_generic_failure - The action to perform it there certain failures + such as inability to talk to the local slurmd or + if the kernel doesn't offer the correct facilities + + ignore* = Do nothing. Fall through to the next pam module + allow = Let the connection through without adoption + deny = Deny the connection log_level - See SlurmdDebug in slurm.conf(5) for available options. The default log_level is info. +SLURM.CONF CONFIGURATION + For best results, all relevant cgroups plugins (e.g. proctrack/cgroup) should + be enabled in slurm.conf. At least one must be enabled for this module to be + even somewhat useful. + + PrologFlags=contain must be set in slurm.conf. This sets up the "extern" step + into which ssh-launched processes will be adopted. + + **** IMPORTANT **** + PrologFlags=contain must be in place *before* using this module. + The module bases its checks on local steps that have already been launched. If + the user has no steps on the node, such as the extern step, the module will + assume that the user has no jobs allocated to the node. Depending on your + configuration of the pam module, you might deny *all* user ssh attempts. + NOTES This module and the related RPC call currently support Linux systems which have network connection information available through /proc/net/tcp{,6}. A @@ -79,20 +115,21 @@ NOTES Slurm is tracking. IPv6 is supported by the RPC data structure itself and the code which sends it - or receives it. Sending the RPC call to an IPv6 address is not currently + and receives it. Sending the RPC call to an IPv6 address is not currently supported by Slurm. Once support is added, remove the relevant check in - slurm_network_callerid (). + slurm_network_callerid(). - proctrack/cgroup is recommended on Linux. + One future action_unknown idea is an option to pick the job with the longest + time remaining. This is not yet implemented. FIREWALLS, IP ADDRESSES, ETC. - slurmd should be accessible on any IP address that a user might launch ssh. - The RPC call to determine the source job must be able to reach the slurmd port - on that particular IP address. + slurmd should be accessible on any IP address from which a user might launch + ssh. The RPC call to determine the source job must be able to reach the slurmd + port on that particular IP address. - If there is no slurmd on the source node, it is better to have the RPC call be - rejected rather than silently dropped. This will allow better responsiveness - to the RPC initiator. + If there is no slurmd on the source node, such as on a login node, it is + better to have the RPC call be rejected rather than silently dropped. This + will allow better responsiveness to the RPC initiator. EXAMPLES / SUGGESTED USAGE Use of this module is recommended on any compute node. @@ -100,10 +137,12 @@ EXAMPLES / SUGGESTED USAGE Add the following line to the appropriate file in /etc/pam.d, such as system-auth or sshd: - account required pam_slurm_adopt.so + account sufficient pam_slurm_adopt.so - If you always want to allow access for an administrative group (eg, wheel), - stack the pam_access module ahead of pam_slurm: + If you always want to allow access for an administrative group (e.g. wheel), + stack the pam_access module after pam_slurm_adopt. A success with + pam_slurm_adopt is sufficient to allow access but the pam_access module can + allow others, such as staff, access even without jobs. account sufficient pam_slurm_adopt.so account required pam_access.so diff --git a/contribs/pam_slurm_adopt/pam_slurm_adopt.c b/contribs/pam_slurm_adopt/pam_slurm_adopt.c index 8af62b7ce..100003c25 100644 --- a/contribs/pam_slurm_adopt/pam_slurm_adopt.c +++ b/contribs/pam_slurm_adopt/pam_slurm_adopt.c @@ -65,6 +65,8 @@ #include "slurm/slurm.h" #include "src/common/slurm_xlator.h" #include "src/common/slurm_protocol_api.h" +#include "src/common/xcgroup_read_config.c" +#include "src/slurmd/common/xcgroup.c" /* This definition would probably be good to centralize somewhere */ #ifndef MAXHOSTNAMELEN @@ -72,16 +74,21 @@ #endif typedef enum { - CALLERID_ACTION_ANY, + CALLERID_ACTION_NEWEST, + CALLERID_ACTION_ALLOW, CALLERID_ACTION_IGNORE, - CALLERID_ACTION_DENY + CALLERID_ACTION_DENY, + CALLERID_ACTION_USER } callerid_action_t; /* module options */ static struct { - int single_job_skip_rpc; + int single_job_skip_rpc; /* Undocumented. If 1 and there is only 1 user + * job, adopt it and skip RPC. If 0, *always* + * try RPC even in single job situations. + * Unlikely to ever be set to 0. */ int ignore_root; - int action_no_jobs; + callerid_action_t action_no_jobs; callerid_action_t action_unknown; callerid_action_t action_adopt_failure; callerid_action_t action_generic_failure; @@ -93,27 +100,85 @@ static void _init_opts(void) opts.single_job_skip_rpc = 1; opts.ignore_root = 1; opts.action_no_jobs = CALLERID_ACTION_DENY; - opts.action_unknown = CALLERID_ACTION_ANY; - opts.action_adopt_failure = CALLERID_ACTION_IGNORE; - opts.action_generic_failure = CALLERID_ACTION_IGNORE; + opts.action_unknown = CALLERID_ACTION_NEWEST; + opts.action_adopt_failure = CALLERID_ACTION_ALLOW; + opts.action_generic_failure = CALLERID_ACTION_ALLOW; opts.log_level = LOG_LEVEL_INFO; } -static int _adopt_process(pid_t pid, uint32_t job_id) +/* Returns SLURM_SUCCESS if opts.action_adopt_failure == CALLERID_ACTION_ALLOW + * or if the process can be adopted into any cgroup. The admin might not have + * configured all the cgroups, so we'll assume that a success in one means they + * were adopted into all the configured ones. A TODO item is to only attempt + * adoption into the configured cgroups. + * + * If job_id==NO_VAL, the process will be adopted into the uid_%u cgroups only. + */ +static int _adopt_process(pid_t pid, uint32_t job_id, uid_t uid) { - /* TODO: add this pid to plugins for task, container, accounting, etc - * need more code here ... */ - info("_adopt_process(%d, %u): UNIMPLEMENTED", pid, job_id); + xcgroup_t cg; + xcgroup_ns_t ns; + int i, rc, cgroup_type_count = 5; + char *cgroup_types[] = + { "memory", "cpuset", "cpuacct", "freezer", "device" }; + char path[PATH_MAX]; + + /* Set default return code based on settings */ + rc = opts.action_adopt_failure == CALLERID_ACTION_ALLOW ? + PAM_SUCCESS : PAM_PERM_DENIED; + + debug3("Calling _adopt_process(%d, %u, %u)", pid, job_id, uid); + + /* job_id == NO_VAL indicates that we should use the uid_%s cgroup */ + if (job_id == NO_VAL) + snprintf(path, PATH_MAX, "/slurm/uid_%u", uid); + else + snprintf(path, PATH_MAX, "/slurm/uid_%u/job_%d/step_extern", + uid, job_id); + + for (i = 0; i < cgroup_type_count; i++) { + if (xcgroup_ns_load(slurm_cgroup_conf, &ns, cgroup_types[i]) + != SLURM_SUCCESS) { + info("_adopt_process(%d, %u, %u): xcgroup_ns_load failed for %s", + pid, job_id, uid, cgroup_types[i]); + continue; + } + if (xcgroup_load(&ns, &cg, path) != SLURM_SUCCESS) { + info("_adopt_process(%d, %u, %u): xcgroup_load failed for cgroup %s, path %s", + pid, job_id, uid, cgroup_types[i], path); + continue; + } + if (xcgroup_set_uint64_param(&cg, "tasks", (uint64_t)pid) + != SLURM_SUCCESS) { + info("_adopt_process(%d, %u, %u): adding pid %d to %s/tasks failed", + pid, job_id, uid, pid, cg.path); + continue; + } + debug("_adopt_process(%d, %u, %u): pid %d adopted into %s", + pid, job_id, uid, pid, cg.path); + /* We will consider one success to be good enough */ + rc = PAM_SUCCESS; + } - /* TODO: change my primary gid to the job's group, if possible */ - return SLURM_SUCCESS; + if (rc == PAM_SUCCESS) + info("Process %d adopted into job %u", pid, job_id); + else + info("Process %d adoption FAILED for all cgroups of job %u", + pid, job_id); + + /* TODO: Change my primary gid to the job's group after + * https://bugzilla.mindrot.org/show_bug.cgi?id=2380 is merged. + * If you are reading this message and nothing has been done with + * that bug, please consider adding a "I would like this too" + * comment. + */ + return rc; } /* Returns negative number on failure. Failures are likely to occur if a step * exits; this is not a problem. */ static uid_t _get_job_uid(step_loc_t *stepd) { - /* BUG: uid_t on Linux is unsigned but stepd_get_uid can return -1 */ uid_t uid = -1; int fd; uint16_t protocol_version; @@ -131,7 +196,6 @@ static uid_t _get_job_uid(step_loc_t *stepd) close(fd); /* The step may have exited. Not a big concern. */ - /* BUG: uid_t on Linux is unsigned but stepd_get_uid can return -1 */ if ((int32_t)uid == -1) debug3("unable to determine uid of step %u.%u on %s", stepd->jobid, stepd->stepid, stepd->nodename); @@ -139,12 +203,35 @@ static uid_t _get_job_uid(step_loc_t *stepd) return uid; } +/* Return mtime of a cgroup. If we can't read the right cgroup information, + * return 0. That results in a (somewhat) random choice of job */ +static time_t _cgroup_creation_time(char *uidcg, uint32_t job_id) +{ + char path[PATH_MAX]; + struct stat statbuf; + + if (snprintf(path, PATH_MAX, "%s/job_%u", uidcg, job_id) >= PATH_MAX) { + info("snprintf: '%s/job_%u' longer than PATH_MAX of %d", + uidcg, job_id, PATH_MAX); + return 0; + } + + if (stat(path, &statbuf) != 0) { + info("Couldn't stat path '%s'", path); + return 0; + } + + return statbuf.st_mtime; +} + static int _indeterminate_multiple(pam_handle_t *pamh, List steps, uid_t uid, uint32_t *job_id) { ListIterator itr = NULL; - int rc = SLURM_FAILURE; + int rc = PAM_PERM_DENIED; step_loc_t *stepd = NULL; + time_t most_recent = 0, cgroup_time = 0; + char uidcg[PATH_MAX]; if (opts.action_unknown == CALLERID_ACTION_DENY) { debug("Denying due to action_unknown=deny"); @@ -153,22 +240,43 @@ static int _indeterminate_multiple(pam_handle_t *pamh, List steps, uid_t uid, PAM_MODULE_NAME ": unable to determine source job"); return PAM_PERM_DENIED; - } else if (opts.action_unknown == CALLERID_ACTION_IGNORE) { - debug("Allowing due to action_unknown=ignore"); + } else if (opts.action_unknown == CALLERID_ACTION_USER) { + debug("Using uid_%u cgroups due to action_unknown=user", uid); + *job_id = (uint32_t)NO_VAL; return PAM_SUCCESS; } + if (snprintf(uidcg, PATH_MAX, "%s/memory/slurm/uid_%u", + slurm_cgroup_conf->cgroup_mountpoint, uid) >= PATH_MAX) { + info("snprintf: '%s/memory/slurm/uid_%u' longer than PATH_MAX of %d", + slurm_cgroup_conf->cgroup_mountpoint, uid, PATH_MAX); + /* Make the uidcg an empty string. This will effectively switch + * to a (somewhat) random selection of job rather than picking + * the latest, but how did you overflow PATH_MAX chars anyway? + */ + uidcg[0] = '\0'; + } + itr = list_iterator_create(steps); while ((stepd = list_next(itr))) { - if (uid == _get_job_uid(stepd)) { - *job_id = stepd->jobid; - rc = SLURM_SUCCESS; - break; + /* Only use container steps from this user */ + if (stepd->stepid == SLURM_EXTERN_CONT && + uid == _get_job_uid(stepd)) { + cgroup_time = _cgroup_creation_time( + uidcg, stepd->jobid); + /* Return the newest job_id, according to cgroup + * creation. Hopefully this is a good way to do this */ + if (cgroup_time > most_recent) { + most_recent = cgroup_time; + *job_id = stepd->jobid; + rc = PAM_SUCCESS; + } } } - /* No jobs from this user exist on this node */ - if (rc != SLURM_SUCCESS) { + /* No jobs from this user exist on this node. This should have been + * caught earlier but wasn't for some reason. */ + if (rc != PAM_SUCCESS) { if (opts.action_no_jobs == CALLERID_ACTION_DENY) { debug("uid %u owns no jobs => deny", uid); send_user_msg(pamh, @@ -177,9 +285,9 @@ static int _indeterminate_multiple(pam_handle_t *pamh, List steps, uid_t uid, ": you have no active jobs on this node"); rc = PAM_PERM_DENIED; } else { - debug("uid %u owns no jobs but action_no_jobs=ignore", + debug("uid %u owns no jobs but action_no_jobs=allow", uid); - rc = PAM_IGNORE; + rc = PAM_SUCCESS; } } @@ -187,10 +295,36 @@ static int _indeterminate_multiple(pam_handle_t *pamh, List steps, uid_t uid, return rc; } -static int _single_job_check(List steps, uid_t uid, uint32_t *job_id) +static int _action_unknown(pam_handle_t *pamh, struct passwd *pwd, List steps) +{ + int rc; + uint32_t job_id; + + if (opts.action_unknown == CALLERID_ACTION_ALLOW) { + debug("Allowing due to action_unknown=allow"); + return PAM_SUCCESS; + } + + /* Both the single job check and the RPC call have failed to ascertain + * the correct job to adopt this into. Time for drastic measures */ + rc = _indeterminate_multiple(pamh, steps, pwd->pw_uid, &job_id); + if (rc == PAM_SUCCESS) { + info("action_unknown: Picked job %u", job_id); + rc = _adopt_process(getpid(), job_id, pwd->pw_uid); + } else { + /* This pam module was worthless, apparently */ + debug("_indeterminate_multiple failed to find a job to adopt this into"); + } + + return rc; +} + +/* _user_job_count returns the count of jobs owned by the user AND sets job_id + * to the last job from the user that is found */ +static int _user_job_count(List steps, uid_t uid, uint32_t *job_id) { ListIterator itr = NULL; - int user_job_cnt = 0, rc = SLURM_FAILURE; + int user_job_cnt = 0; step_loc_t *stepd = NULL; *job_id = (uint32_t)NO_VAL; @@ -203,19 +337,12 @@ static int _single_job_check(List steps, uid_t uid, uint32_t *job_id) if (*job_id != stepd->jobid) { user_job_cnt++; *job_id = stepd->jobid; - rc = SLURM_SUCCESS; } } - if(user_job_cnt > 1) { - debug3("_single_job_check: uid %u has multiple jobs on this node", - uid); - rc = SLURM_FAILURE; - break; - } } list_iterator_destroy(itr); - return rc; + return user_job_cnt; } static int _rpc_network_callerid(struct callerid_conn *conn, char *user_name, @@ -255,6 +382,54 @@ static int _rpc_network_callerid(struct callerid_conn *conn, char *user_name, } } +static int _try_rpc(struct passwd *pwd) +{ + uint32_t job_id; + int rc; + char ip_src_str[INET6_ADDRSTRLEN]; + struct callerid_conn conn; + + /* Gather network information for RPC call. */ + debug("Checking file descriptors for network socket"); + + /* Check my fds for a network socket */ + if (callerid_get_own_netinfo(&conn) != SLURM_SUCCESS) { + /* If this failed, the RPC will surely fail. If we continued + * we'd have to fill in junk for lots of variables. Return so + * that action_unknown will happen */ + error("Unable to find network socket"); + if (opts.action_generic_failure == CALLERID_ACTION_DENY) + return PAM_PERM_DENIED; + else + return PAM_IGNORE; + } + + if (inet_ntop(conn.af, &conn.ip_src, ip_src_str, INET6_ADDRSTRLEN) + == NULL) { + /* This is really odd. If this failed, other functions are so + * likely to fail that we might as well skip the RPC */ + error("inet_ntop failed"); + if (opts.action_generic_failure == CALLERID_ACTION_DENY) + return PAM_PERM_DENIED; + else + return PAM_IGNORE; + } + + /* Ask the slurmd at the source IP address about this connection */ + rc = _rpc_network_callerid(&conn, pwd->pw_name, &job_id); + if (rc == SLURM_SUCCESS) { + rc = _adopt_process(getpid(), job_id, pwd->pw_uid); + return rc; + } + + info("From %s port %d as %s: unable to determine source job", + ip_src_str, + conn.port_src, + pwd->pw_name); + + return PAM_IGNORE; +} + /* Use the pam logging function for now since normal logging is not yet * initialized */ log_level_t _parse_log_level(pam_handle_t *pamh, const char *log_level_str) @@ -314,22 +489,50 @@ static void _parse_opts(pam_handle_t *pamh, int argc, const char **argv) char *v; for (; argc-- > 0; ++argv) { - if (!strncasecmp(*argv, "single_job_skip_rpc=0", 18)) + if (!strncasecmp(*argv, "single_job_skip_rpc=0", 21)) opts.single_job_skip_rpc = 0; else if (!strncasecmp(*argv, "ignore_root=0", 13)) opts.ignore_root = 0; - else if (!strncasecmp(*argv,"action_unknown=",15)) { + else if (!strncasecmp(*argv,"action_no_jobs=",15)) { + v = (char *)(15 + *argv); + if (!strncasecmp(v, "deny", 4)) + opts.action_no_jobs = CALLERID_ACTION_DENY; + else if (!strncasecmp(v, "ignore", 6)) + opts.action_no_jobs = CALLERID_ACTION_IGNORE; + else { + pam_syslog(pamh, + LOG_ERR, + "unrecognized action_no_jobs=%s, setting to 'deny'", + v); + } + } else if (!strncasecmp(*argv,"action_unknown=",15)) { v = (char *)(15 + *argv); - if (!strncasecmp(v, "ignore", 6)) - opts.action_unknown = CALLERID_ACTION_IGNORE; - else if (!strncasecmp(v, "any", 3)) - opts.action_unknown = CALLERID_ACTION_ANY; + if (!strncasecmp(v, "allow", 5)) + opts.action_unknown = CALLERID_ACTION_ALLOW; + else if (!strncasecmp(v, "newest", 6)) + opts.action_unknown = CALLERID_ACTION_NEWEST; else if (!strncasecmp(v, "deny", 4)) opts.action_unknown = CALLERID_ACTION_DENY; + else if (!strncasecmp(v, "user", 4)) + opts.action_unknown = CALLERID_ACTION_USER; else { pam_syslog(pamh, LOG_ERR, - "unrecognized action_unknown=%s, setting to 'any'", + "unrecognized action_unknown=%s, setting to 'newest'", + v); + } + } else if (!strncasecmp(*argv,"action_generic_failure=",23)) { + v = (char *)(23 + *argv); + if (!strncasecmp(v, "allow", 5)) + opts.action_generic_failure = CALLERID_ACTION_ALLOW; + else if (!strncasecmp(v, "ignore", 6)) + opts.action_generic_failure = CALLERID_ACTION_IGNORE; + else if (!strncasecmp(v, "deny", 4)) + opts.action_generic_failure = CALLERID_ACTION_DENY; + else { + pam_syslog(pamh, + LOG_ERR, + "unrecognized action_generic_failure=%s, setting to 'allow'", v); } } else if (!strncasecmp(*argv, "log_level=", 10)) { @@ -349,6 +552,18 @@ static void _log_init(log_level_t level) log_init(PAM_MODULE_NAME, logopts, LOG_AUTHPRIV, NULL); } +static int _load_cgroup_config() +{ + slurm_cgroup_conf = xmalloc(sizeof(slurm_cgroup_conf_t)); + bzero(slurm_cgroup_conf, sizeof(slurm_cgroup_conf_t)); + if (read_slurm_cgroup_conf(slurm_cgroup_conf) != SLURM_SUCCESS) { + info("read_slurm_cgroup_conf failed"); + return SLURM_FAILURE; + } + return SLURM_SUCCESS; +} + + /* Parse arguments, etc then get my socket address/port information. Attempt to * adopt this process into a job in the following order: * 1) If the user has only one job on the node, pick that one @@ -360,20 +575,32 @@ static void _log_init(log_level_t level) PAM_EXTERN int pam_sm_acct_mgmt(pam_handle_t *pamh, int flags __attribute__((unused)), int argc, const char **argv) { - int retval = PAM_IGNORE, rc = PAM_IGNORE; + int retval = PAM_IGNORE, rc, bufsize, user_jobs; char *user_name; - struct callerid_conn conn; uint32_t job_id; - char ip_src_str[INET6_ADDRSTRLEN]; List steps = NULL; struct passwd pwd, *pwd_result; char *buf = NULL; - int bufsize; _init_opts(); _parse_opts(pamh, argc, argv); _log_init(opts.log_level); + switch (opts.action_generic_failure) { + case CALLERID_ACTION_DENY: + rc = PAM_PERM_DENIED; + break; + case CALLERID_ACTION_ALLOW: + rc = PAM_SUCCESS; + break; + case CALLERID_ACTION_IGNORE: + rc = PAM_IGNORE; + break; + /* Newer gcc versions warn if enum cases are missing */ + default: + error("The code is broken!!!!"); + } + retval = pam_get_item(pamh, PAM_USER, (void *) &user_name); if (user_name == NULL || retval != PAM_SUCCESS) { pam_syslog(pamh, LOG_ERR, "No username in PAM_USER? Fail!"); @@ -384,9 +611,9 @@ PAM_EXTERN int pam_sm_acct_mgmt(pam_handle_t *pamh, int flags * basic check that shouldn't be 100% relied on */ if (!opts.ignore_root && (opts.action_unknown == CALLERID_ACTION_DENY || - opts.action_no_jobs != CALLERID_ACTION_IGNORE || - opts.action_adopt_failure != CALLERID_ACTION_IGNORE || - opts.action_generic_failure != CALLERID_ACTION_IGNORE + opts.action_no_jobs != CALLERID_ACTION_ALLOW || + opts.action_adopt_failure != CALLERID_ACTION_ALLOW || + opts.action_generic_failure != CALLERID_ACTION_ALLOW )) { /* Let's get verbose */ info("==============================="); @@ -398,7 +625,7 @@ PAM_EXTERN int pam_sm_acct_mgmt(pam_handle_t *pamh, int flags opts.ignore_root = 1; } - /* Ignoring root is probably best but the admin can allow it*/ + /* Ignoring root is probably best but the admin can allow it */ if (!strcmp(user_name, "root")) { if (opts.ignore_root) { info("Ignoring root user"); @@ -428,34 +655,22 @@ PAM_EXTERN int pam_sm_acct_mgmt(pam_handle_t *pamh, int flags return PAM_SESSION_ERR; } - /* Check my fds for a network socket */ - if (callerid_get_own_netinfo(&conn) != SLURM_SUCCESS) { - /* We could press on for the purposes of the single- or - * multi-job checks, but the RPC will surely fail. If we - * continued we'd have to fill in junk for lots of variables */ - error("Unable to find network socket"); - rc = PAM_IGNORE; - goto cleanup; - } + if (_load_cgroup_config() != SLURM_SUCCESS) + return rc; - if (inet_ntop(conn.af, &conn.ip_src, ip_src_str, INET6_ADDRSTRLEN) - == NULL) { - /* This is really odd. If this failed, other functions are so - * likely to fail that we might as well exit */ - error("inet_ntop failed"); - rc = PAM_IGNORE; - goto cleanup; - } - - /* Get a list of steps on the node. A failure here likely means failures - * everywhere so exit on failure or if no local jobs exist */ + /* Check if there are any steps on the node from any user. A failure here + * likely means failures everywhere so exit on failure or if no local jobs + * exist. */ steps = stepd_available(NULL, NULL); if (!steps) { - error("Error obtaining local step information. Fail."); - rc = PAM_IGNORE; + error("Error obtaining local step information."); goto cleanup; - } else if (list_count(steps) == 0) { - info("No steps on this node from any user"); + } + + /* Check to see if this user has only one job on the node. If so, choose + * that job and adopt this process into it (unless configured not to) */ + user_jobs = _user_job_count(steps, pwd.pw_uid, &job_id); + if (user_jobs == 0) { if (opts.action_no_jobs == CALLERID_ACTION_DENY) { send_user_msg(pamh, "Access denied by " @@ -463,70 +678,38 @@ PAM_EXTERN int pam_sm_acct_mgmt(pam_handle_t *pamh, int flags ": you have no active jobs on this node"); rc = PAM_PERM_DENIED; } else { - info("uid %u owns no jobs but action_no_jobs=ignore", + debug("uid %u owns no jobs but action_no_jobs=ignore", pwd.pw_uid); rc = PAM_IGNORE; } - goto cleanup; - } - - /* Check to see if this user has only one job on the node. If so, choose - * that job and adopt this process into it (unless configured not to) */ - if (opts.single_job_skip_rpc) { - if (_single_job_check(steps, pwd.pw_uid, &job_id) - == SLURM_SUCCESS) { - debug("From %s port %d as %s: _single_job_check succeeded", - ip_src_str, - conn.port_src, - user_name); - - info("From %s port %d as %s: member of job %u", - ip_src_str, - conn.port_src, + } else if (user_jobs == 1) { + if (opts.single_job_skip_rpc) { + info("Connection by user %s: user has only one job %u", user_name, job_id); - rc = _adopt_process(getpid(), job_id); + rc = _adopt_process(getpid(), job_id, pwd.pw_uid); goto cleanup; - } else { - debug("From %s port %d as %s: _single_job_check failed", - ip_src_str, - conn.port_src, - user_name); } + } else { + debug("uid %u has %d jobs", pwd.pw_uid, user_jobs); } - /* Single job check failed or wasn't used. Ask the slurmd (if any) at - * the source IP address about this connection */ - rc = _rpc_network_callerid(&conn, user_name, &job_id); - if (rc == SLURM_SUCCESS) { - rc = _adopt_process(getpid(), job_id); + /* Single job check turned up nothing (or we skipped it). Make RPC call + * to slurmd at source IP. If it can tell us the job, the function calls + * _adopt_process */ + rc = _try_rpc(&pwd); + if (rc == PAM_SUCCESS || rc == PAM_PERM_DENIED) goto cleanup; - } - - info("From %s port %d as %s: unable to determine source job", - ip_src_str, - conn.port_src, - user_name); - /* Both the single job check and the RPC call have failed to ascertain - * the correct job to adopt this into. Time for drastic measures */ - rc = _indeterminate_multiple(pamh, steps, pwd.pw_uid, &job_id); - if (rc == SLURM_SUCCESS) { - info("From %s port %d as %s: picked job %u", - ip_src_str, - conn.port_src, - user_name, - job_id); - rc = _adopt_process(getpid(), job_id); - } else { - /* This pam module was worthless, apparently */ - debug("_indeterminate_multiple failed to find a job to adopt this into"); - } + /* The source of the connection either didn't reply or couldn't + * determine the job ID at the source. Proceed to action_unknown */ + rc = _action_unknown(pamh, &pwd, steps); cleanup: FREE_NULL_LIST(steps); xfree(buf); + xfree(slurm_cgroup_conf); return rc; } diff --git a/doc/html/Makefile.am b/doc/html/Makefile.am index 4c9488eef..90cc9f757 100644 --- a/doc/html/Makefile.am +++ b/doc/html/Makefile.am @@ -120,7 +120,6 @@ html_DATA = \ ibm_pe_fig2.png \ k_function.gif \ linuxstyles.css \ - lll.gif \ mc_support.gif \ plane_ex1.gif \ plane_ex2.gif \ diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in index 5c85cd8f3..38c6fcbe8 100644 --- a/doc/html/Makefile.in +++ b/doc/html/Makefile.in @@ -550,7 +550,6 @@ html_DATA = \ ibm_pe_fig2.png \ k_function.gif \ linuxstyles.css \ - lll.gif \ mc_support.gif \ plane_ex1.gif \ plane_ex2.gif \ diff --git a/doc/html/burst_buffer.shtml b/doc/html/burst_buffer.shtml index 5be39365c..90ffad134 100644 --- a/doc/html/burst_buffer.shtml +++ b/doc/html/burst_buffer.shtml @@ -174,7 +174,10 @@ A sample batch script follows:</p> <p><b>NOTE:</b> The ability to create and destroy persistent burst buffers may be limited by the "Flags" option in the burst_buffer.conf file. -By default only privileged users can create or destroy persistent burst buffers.</p> +See the burst_buffer.conf man page for more information. +By default only <a href="user_permissions.html">privileged users</a> +(Slurm operators and administrators) +can create or destroy persistent burst buffers.</p> <h2><a name="interactive">Interactive Job Options</a></h2> @@ -267,6 +270,6 @@ $ scontrol create reservation StartTime=noon duration=60 \ BurstBuffer=cray:20G </pre> -<p style="text-align:center;">Last modified 18 September 2015</p> +<p style="text-align:center;">Last modified 29 October 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/download.shtml b/doc/html/download.shtml index 249c44351..129212365 100644 --- a/doc/html/download.shtml +++ b/doc/html/download.shtml @@ -29,7 +29,7 @@ a message.</li> <li><b>MUNGE</b> (recommended)<br> In order to compile the "auth/munge" authentication plugin for Slurm, you will need to build and install MUNGE, available from -<a href="https://code.google.com/p/munge/">https://code.google.com/p/munge/</a> and +<a href="http://dun.github.io/munge/">http://dun.github.io/munge/</a> and <a href="http://packages.debian.org/src:munge">Debian</a> and <a href="http://fedoraproject.org/">Fedora</a> and <a href="http://packages.ubuntu.com/src:munge">Ubuntu</a>.</li> @@ -397,6 +397,6 @@ Slurm-based HPC supercomputers. The website of Slurm-web, with screenshots:<br> </ul> -<p style="text-align:center;">Last modified 27 July 2015</p> +<p style="text-align:center;">Last modified 19 October 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/lll.gif b/doc/html/lll.gif deleted file mode 100644 index d1227248e0ac713e2206273b4009d96d3f5184b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1794 zcmeH`{ZA7I7=W*(A1BswM@xHr-#JjL3^*rBg=kx73-tpiY-*9MrMfYXIq=kU7FY{c zK+|mCPKQae;pRDYbP;9~W2{-RR5KZYhy)jPR+&~~25!zS*-Y2_C-&3cJbCi`_9k!g z>MM1{N(&00KokICu~;Y+^M|5-f6VWX`dH?DzL?K9@AbyK-l*3*?`5Kl_c6o77-pVf z9(z3V9#7o&JoIClo~K#gqv;vieb3D@<91KDse5i}hGLnZsE|F5ea3E|u-oI|x04g3 z-A~3%hDb6VK9UHLgr6Wn1Zy8bcnPcD%Hp$Hy;dt@HTg^|UXzJ28GR<B*JNajMz4{z z$7o=T29LquH5eEJ8?-^g7&IP@hS9L5H5#`@>Cq@@jnb`DdX!39$p)pwX(jH)aT;e$ z;kX@FxD^Uo!J1Mi><Y}Sz$gV44-&&D46|eLKqw5cV@Ny*1hFFsiNpi4BM^x|1O$-~ zYbykqAdrMW0s^fdNPw(OAV7iu0RmPKh_?{{2mr7GfC*sR0Q^7y?FI7Kb?h$yU<`l} zWRR)KP8aSMJ-F%`O;p+b>2uVS>61|1@zTJ^*p1Y6t-R!MNhAcgM42zcU+ejzwDoO3 z<>=hulswCiLmyckDqv+h?>16xwa6E*;_?qD%X-A9{&+<=(z+#+%jGsL%Aq5J8_YR5 zOV*s>xl@aF<)7V=yJ>45zfJbcL|1K>?|gNtn0UBUvaI%Qhx;Z898=|1i4%E(vxBOS z2OiDh-!GFDd{`z!k-DA-8yfP@L(}?#v4LX=pQ#_7p5XF?7ZwyJ`ugrJ!s(~@;y=FX zUv3;S=O^q_ckxF4epU_4RSo7Njn^$VAMv&6DhlD;!N=43ptQo+Y}$(6PIMyBZ05YT zB~4z~@&X(=cw}L9(`@kU8<x?of_6IUfDbuBYEg4<jX+db`eqPz?C5vNo1WDP1(Nc@ zP_w?m9L%;g!=0ig+jG9@Olp5{%iH&(x=fqd;pC?a_62$R2z*Vp^!kD;rFOcnOZ@Yz zBCTxi6R9vOqeb;vR(r;<6W)KZ#<gg*?Kw|U3~-KZ*<lX9l~xiC=s~Q>EI^%W0#2Bu zTN%-gJWH@vyOlF0PaJvPoyj?ljtcgmIzFf#hjr2wcO2Se-Of4Y;7Y^T1O3Xrmd?}; z@b~h>N*WVL@Y%^Atcp~ZEx|8b;)-$yVp{GZ?c7vn;_K(9o_wS2ea=5yB!Ve%@$}?q zVy*)i6{On+)xz}yc<+ItQ&>fo@$zKX;i`zE0U8^*G<tEBqk42`4TbfhP2uVg+F@3g zElD=ulvpov_QN{WEHCAq>egLphrmX?xOmPnc4_y5s!L)lbe@CN8Lj1#>NPk&rD6}~ z8u}?8EKgGA^y{-f$f?<vyeZ@4RdLY|_cf<)>^UFc6xdovGHu|}a`<)t>&dK_)=)<( zi+|;gpiSmLf;h+Ql&JY!FuP6KUk-1i`@1uD=Q+!DoBukWAbNOoAs|TI4yjW*MUC&G zZNO1VS|trra$@lDFkfb?!nKmkFVd+aD#+q8I)>`jWg9nR{9ru|cgyQ5KADr7+*7*r zJ=a_HcQ`Ktj~;F}*SJ!)nWtRxhPvV46oGTbqzCt6!mI;3+y87nVEyfotiBo6A=`^< z%9GYk7j`9P)-42*52Aw7yH`FByX1}ZtP5!XzM!D>r5iOuMO~l=y|VE;+W7wcZI;w; zhljgg)=E!41r;k`X|}!Tms=58@4Cx@s^*riFDK2alJT&Z94N~=w;bFsZCT41s(-@8 jgvwyR(Q&9O<H2O0BoatMOHSMxnkk9?bUKN{0ZRS_cnvjd diff --git a/doc/html/news.shtml b/doc/html/news.shtml index c6ae18310..999402d17 100644 --- a/doc/html/news.shtml +++ b/doc/html/news.shtml @@ -69,21 +69,21 @@ to coordinate activities. Future development plans includes: <!-- Universitat Jaume I & Universitat Politecnica de Valencia --> <li>Add support for <a href="http://slurm.schedmd.com/SUG14/remote_gpu.pdf">Remote CUDA (rCUDA)</a></li> -<li>Integration with - <a href="http://en.wikipedia.org/wiki/FlexNet_Publisher">FLEXlm - (Flexnet Publisher)</a> license management.</li> <li>Distributed architecture to support the management of resources with Intel Knight's Landing processors.</li> -<li>IP communications over InfiniBand network for improved performance.</li> <li>Fault-tolerance and jobs dynamic adaptation through communication protocol between Slurm, MPI libraries and the application.</li> +<li>Improved support for provisioning and virtualization.</li> +<!-- <li>IP communications over InfiniBand network for improved performance.</li> +<li>Integration with + <a href="http://en.wikipedia.org/wiki/FlexNet_Publisher">FLEXlm + (Flexnet Publisher)</a> license management.</li> <li>Improved support for high-throughput computing (e.g. multiple slurmctld daemons on a single cluster).</li> <li>Add Kerberos credential support including credential forwarding and refresh.</li> -<li>Improved support for provisioning and virtualization.</li> -<li>Provide a web-based Slurm administration tool.</li> +<li>Provide a web-based Slurm administration tool.</li> --> </ul> -<p style="text-align:center;">Last modified 31 August 2015</p> +<p style="text-align:center;">Last modified 22 October 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/quickstart_admin.shtml b/doc/html/quickstart_admin.shtml index aed44e392..c79de0f9e 100644 --- a/doc/html/quickstart_admin.shtml +++ b/doc/html/quickstart_admin.shtml @@ -38,11 +38,13 @@ those directories will have access rights equal to read/write/execute for everyone minus the umask value (e.g. umask=0022 generates directories with permissions of "drwxr-r-x" and mask=0000 generates directories with permissions of "drwxrwrwx" which is a security problem).</li> +<li>Type <i>ldconfig -n <library_location></i> so that the Slurm libraries +can be found by applications that intend to use Slurm APIs directly.</li> <li>Install the configuration file in <i><sysconfdir>/slurm.conf</i>.<br> NOTE: You will need to install this configuration file on all nodes of the cluster.</li> <li>Start the <i>slurmctld</i> and <i>slurmd</i> daemons.</li> </ol> -<p>NOTE: Items 3 through 6 can be replaced with</p> +<p>NOTE: Items 3 through 8 can be replaced with</p> <ol> <li><i>rpmbuild -ta slurm*.tar.bz2</i></li> <li><i>rpm --install <the rpm files></i></li> @@ -63,6 +65,13 @@ See the README and INSTALL files in the source distribution for more details. <li>Type <i>make</i> to compile Slurm.</li> <li>Type <i>make install</i> to install the programs, documentation, libraries, header files, etc.</li> +<li>Type <i>ldconfig -n <library_location></i> so that the Slurm libraries +can be found by applications that intend to use Slurm APIs directly. The +library location will be a subdirectory of PREFIX (described below) and depend +upon the system type and configuration, typically lib, lib64 and/or lib32. +For example, if PREFIX is "/usr" and the subdirectory is "lib64" then you would +find that a file named "/usr/lib64/libslurm.so" was installed and the command +"ldconfig -n /usr/lib64" should be executed.</li> </ol> <p>A full list of <i>configure</i> options will be returned by the command <i>configure --help</i>. The most commonly used arguments to the @@ -840,6 +849,6 @@ options such as mysql and gui tools via a configuration menu.</p> </pre> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 16 June 2015</p> +<p style="text-align:center;">Last modified 23 October 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/resource_limits.shtml b/doc/html/resource_limits.shtml index 447c25595..41f6ba596 100644 --- a/doc/html/resource_limits.shtml +++ b/doc/html/resource_limits.shtml @@ -9,10 +9,10 @@ is strongly recommended before use of this document.</p> Maui and Moab are not integrated with Slurm's resource limits, but should use their own resource limits mechanisms.</p> -<h2>Hierachy</h2> +<h2>Hierarchy</h2> <p>Slurm's hierarchical limits are enforced in the following order - with Job QOS and Partition QOS order being reversable by using the QOS + with Job QOS and Partition QOS order being reversible by using the QOS flag 'OverPartQOS':</p> <ol> <li>Partition QOS limit</li> @@ -24,7 +24,7 @@ but should use their own resource limits mechanisms.</p> <li>None</li> </ol> -<p>Note: If limits are defined at multiple points in this hierachy, +<p>Note: If limits are defined at multiple points in this hierarchy, the point in this list where the limit is first defined will be used. Consider the following example:</p> <ul> @@ -171,7 +171,7 @@ specified then no limit will apply.</p> <li><b>MaxTRESMinsPerJob=</b> A limit of TRES minutes to be used by a job. If this limit is reached the job will be killed if not running in - Safe mode, othewise the job will pend until enough time is given to + Safe mode, otherwise the job will pend until enough time is given to complete the job. </li> @@ -224,7 +224,7 @@ specified then no limit will apply.</p> children have to the above system. Can also be the string "parent", when used on a user this means that the parent association is used for fairshare. If Fairshare=parent is set on an account, that - account's children will be effectively reparented for fairshare + account's children will be effectively re-parented for fairshare calculations to the first parent of their parent that is not Fairshare=parent. Limits remain the same, only it's fairshare value is affected. @@ -265,6 +265,6 @@ data maintained in the Slurm database. More information can be found in the <a href="priority_multifactor.html">priority/multifactor</a> plugin description.</p> -<p style="text-align: center;">Last modified 24 September 2015</p> +<p style="text-align: center;">Last modified 19 October 2015</p> </ul></body></html> diff --git a/doc/html/team.shtml b/doc/html/team.shtml index 725de28e4..281f0765b 100644 --- a/doc/html/team.shtml +++ b/doc/html/team.shtml @@ -203,6 +203,7 @@ Lead Slurm developers are: <li>Aleksej Saushev</li> <li>Uwe Sauter (High Performance Computing Center Stuttgart, Germany)</li> <li>Chris Scheller (University of Michigan)</li> +<li>Alejandro (Alex) Sanchez (SchedMD)</li> <li>Rod Schultz (Bull)</li> <li>Samuel Senoner (Vienna University of Technology, Austria)</li> <li>David Singleton</li> @@ -229,7 +230,7 @@ Lead Slurm developers are: <li>Daniel M. Weeks (Rensselaer Polytechnic Institute)</li> <li>Nathan Weeks (Iowa State University)</li> <li>Andy Wettstein (University of Chicago)</li> -<li>Tim Wickberg (George Washington University)</li> +<li>Tim Wickberg (SchedMD)</li> <li>Chandler Wilkerson (Rice University)</li> <li>Ramiro Brito Willmersdorf (Universidade Federal de Pemambuco, Brazil)</li> <li>Jay Windley (Linux NetworX)</li> @@ -241,6 +242,6 @@ Lead Slurm developers are: <!-- INDIVIDUALS, PLEASE KEEP IN ALPHABETICAL ORDER --> </ul> -<p style="text-align:center;">Last modified 19 October 2015</p> +<p style="text-align:center;">Last modified 26 October 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/tres.shtml b/doc/html/tres.shtml index f59b34cc2..b4d982070 100644 --- a/doc/html/tres.shtml +++ b/doc/html/tres.shtml @@ -98,13 +98,13 @@ with: number of allocated CPUs. </p> -<p><b>NOTE:</b> TRESBillingWeights is only used when calcuating fairshare and +<p><b>NOTE:</b> TRESBillingWeights is only used when calculating fairshare and doesn't affect job priority directly as it is currently not used for the size of the job. If you want TRES' to play a role in the job's priority then refer to the PriorityWeightTRES option. </p> -<p><b>NOTE:</b> As with PriorityWeightTRES only TRES definied in +<p><b>NOTE:</b> As with PriorityWeightTRES only TRES defined in AccountingStorageTRES are available for TRESBillingWeights. </p> </li> @@ -128,6 +128,6 @@ the PriorityWeightTRES option. for the requested TRES types. More information about these reports can be found on the <a href="sreport.html">sreport manpage</a>. </p> -<p style="text-align:center;">Last modified 8 September 2015</p> +<p style="text-align:center;">Last modified 19 October 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1 index bd0d2ed2a..ab628dc93 100644 --- a/doc/man/man1/sacct.1 +++ b/doc/man/man1/sacct.1 @@ -145,9 +145,8 @@ ReqCPUFreqGov ReqCPUS ReqGRES ReqMem ReqNodes ReqTRES Reservation ReservationId Reserved ResvCPU ResvCPURAW Start State Submit Suspended SystemCPU -Timelimit TotalCPU TRESAlloc TRESReq -UID User UserCPU WCKey -WCKeyID +Timelimit TotalCPU UID User +UserCPU WCKey WCKeyID .ft 1 .fi diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index 3060f74e7..b2a60fe11 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -1607,6 +1607,20 @@ exclusive use by the Slurm compute node daemons (slurmd, slurmstepd). The combined memory limit, in megabytes, on this node for the Slurm compute node daemons (slurmd, slurmstepd). +.TP +The meaning of the memory information is as follows: + +.TP +\fIRealMemory\fP +The total memory, in MB, on the node. + +.TP +\fIAllocMem\fP +The total memory, in MB, currently allocated by jobs on the node. + +.TP +\fIFreeMem\fP +The total memory, in MB, currently free on the node as reported by the OS. .SH "ENVIRONMENT VARIABLES" .PP diff --git a/doc/man/man5/burst_buffer.conf.5 b/doc/man/man5/burst_buffer.conf.5 index 0d620c333..5bfa727ea 100644 --- a/doc/man/man5/burst_buffer.conf.5 +++ b/doc/man/man5/burst_buffer.conf.5 @@ -66,14 +66,16 @@ Supported options include: .TP \fBDisablePersistent\fR Prevents regular users from being able to create and destroy persistent burst buffers. -This is the default behaviour, only privileged users can create or destroy persistent burst buffers. +This is the default behaviour, only privileged users (Slurm operators and +administrators) can create or destroy persistent burst buffers. .TP \fBEmulateCray\fR Emulating a Cray DataWarp system using the dw_wlm_cli script in the burst_buffer/cray plugin. .TP \fBEnablePersistent\fR Enables regular users to create and destroy persistent burst buffers. -By default, only privileged users can create or destroy persistent burst buffers. +By default, only privileged users (Slurm operators and administrators) can +create or destroy persistent burst buffers. .RE .TP diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 004fa76d6..53f4833d9 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1113,7 +1113,9 @@ Also see \fBDefaultStorageHost\fR. The fully qualified file name where job completion records are written when the \fBJobCompType\fR is "jobcomp/filetxt" or the database where job completion records are stored when the \fBJobCompType\fR is a -database. +database or an url with format http://yourelasticserver:port where job +completion records are indexed when the \fBJobCompType\fR is +"jobcomp/elasticsearch". Also see \fBDefaultStorageLoc\fR. .TP @@ -1133,7 +1135,7 @@ Also see \fBDefaultStoragePort\fR. \fBJobCompType\fR The job completion logging mechanism type. Acceptable values at present include "jobcomp/none", "jobcomp/filetxt", -"jobcomp/mysql", and "jobcomp/script"". +"jobcomp/mysql", "jobcomp/elasticsearch" and "jobcomp/script"". The default value is "jobcomp/none", which means that upon job completion the record of the job is purged from the system. If using the accounting infrastructure this plugin may not be of interest since the information @@ -1146,7 +1148,9 @@ parameter. The value "jobcomp/script" indicates that a script specified by the \fBJobCompLoc\fR parameter is to be executed with environment variables indicating the job information. - +The value "jobcomp/elasticsearch" indicates that a record of the job +should be written to an Elasticsearch server specified by the +\fBJobCompLoc\fR parameter. .TP \fBJobCompUser\fR The user account for accessing the job completion database. @@ -1999,10 +2003,12 @@ Currently supported options are: \fBAlloc\fR If set, the Prolog script will be executed at job allocation. By default, Prolog is executed just before the task is launched. Therefore, when salloc -is started, no Prolog is executed. \fBAlloc\fR is useful for preparing things +is started, no Prolog is executed. Alloc is useful for preparing things before a user starts to use any allocated resources. In particular, this flag is needed on a Cray system when cluster compatibility mode is enabled. + +\fBNOTE: Use of the Alloc flag will increase the time required to start jobs.\fR .TP \fBContain\fR At job allocation time, use the ProcTrack plugin to create a job container @@ -2010,6 +2016,7 @@ on all allocated compute nodes. This container may be used for user processes not launched under Slurm control, for example the PAM module may place processes launch through a direct user login into this container. +Setting the Contain implicitly sets the Alloc flag. .TP \fBNoHold\fR If set, the Alloc flag should also be set. This will allow for salloc to not diff --git a/etc/slurmd.service.in b/etc/slurmd.service.in index 3b1061f65..b4703812d 100644 --- a/etc/slurmd.service.in +++ b/etc/slurmd.service.in @@ -7,6 +7,7 @@ ConditionPathExists=@sysconfdir@/slurm.conf Type=forking ExecStart=@sbindir@/slurmd PIDFile=/var/run/slurmd.pid +KillMode=process [Install] WantedBy=multi-user.target diff --git a/slurm.spec b/slurm.spec index 28ff791f9..586c47f40 100644 --- a/slurm.spec +++ b/slurm.spec @@ -726,6 +726,8 @@ test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/launch_slurm.so && echo %{_libdir}/slurm/launch_slurm.so >> $LIST test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/launch_aprun.so && echo %{_libdir}/slurm/launch_aprun.so >> $LIST +test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/mpi_mvapich.so && + echo %{_libdir}/slurm/mpi_mvapich.so >> $LIST test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/power_cray.so && echo %{_libdir}/slurm/power_cray.so >> $LIST test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/select_bluegene.so && @@ -740,6 +742,8 @@ test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/job_container_none.so && echo %{_libdir}/slurm/job_container_none.so >> $LIST test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/task_cray.so && echo %{_libdir}/slurm/task_cray.so >> $LIST +test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/slurmctld_nonstop.so && + echo %{_libdir}/slurm/slurmctld_nonstop.so >> $LIST test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/switch_cray.so && echo %{_libdir}/slurm/switch_cray.so >> $LIST test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/proctrack_cray.so && @@ -946,7 +950,6 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/slurm/mpi_mpich1_shmem.so %{_libdir}/slurm/mpi_mpichgm.so %{_libdir}/slurm/mpi_mpichmx.so -%{_libdir}/slurm/mpi_mvapich.so %{_libdir}/slurm/mpi_openmpi.so %{_libdir}/slurm/mpi_pmi2.so %endif @@ -973,7 +976,6 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/slurm/select_cons_res.so %{_libdir}/slurm/select_linear.so %{_libdir}/slurm/select_serial.so -%{_libdir}/slurm/slurmctld_nonstop.so %{_libdir}/slurm/switch_generic.so %{_libdir}/slurm/switch_none.so %{_libdir}/slurm/task_none.so diff --git a/src/api/job_info.c b/src/api/job_info.c index 6a64b639b..97cfc0ffb 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -693,7 +693,8 @@ line6: /****** Line 16 ******/ /* Tres should already of been converted at this point from simple */ snprintf(tmp_line, sizeof(tmp_line), "TRES=%s", - job_ptr->tres_alloc_str); + job_ptr->tres_alloc_str ? job_ptr->tres_alloc_str : + job_ptr->tres_req_str); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index e74ec3ec3..2ef983d60 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -2280,6 +2280,8 @@ extern int assoc_mgr_fill_in_assoc(void *db_conn, if (!assoc->cluster) assoc->cluster = ret_assoc->cluster; + assoc->def_qos_id = ret_assoc->def_qos_id; + if (!assoc->grp_tres_mins) assoc->grp_tres_mins = ret_assoc->grp_tres_mins; if (!assoc->grp_tres_run_mins) diff --git a/src/common/eio.c b/src/common/eio.c index ba6a7efe7..a57e2bbb8 100644 --- a/src/common/eio.c +++ b/src/common/eio.c @@ -176,12 +176,18 @@ int eio_message_socket_accept(eio_obj_t *obj, List objs) (socklen_t *)&len)) < 0) { if (errno == EINTR) continue; - if (errno == EAGAIN || - errno == ECONNABORTED || - errno == EWOULDBLOCK) { + if ((errno == EAGAIN) || + (errno == ECONNABORTED) || + (errno == EWOULDBLOCK)) { return SLURM_SUCCESS; } error("Error on msg accept socket: %m"); + if ((errno == EMFILE) || + (errno == ENFILE) || + (errno == ENOBUFS) || + (errno == ENOMEM)) { + return SLURM_SUCCESS; + } obj->shutdown = true; return SLURM_SUCCESS; } diff --git a/src/common/gres.c b/src/common/gres.c index 37b220250..6d04fef86 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -6623,7 +6623,7 @@ extern char *gres_2_tres_str(List gres_list, bool is_job, bool locked) continue; /* not tracked */ if (slurmdb_find_tres_count_in_string( - tres_str, tres_rec->id)) + tres_str, tres_rec->id) != INFINITE64) continue; /* already handled */ /* New gres */ diff --git a/src/common/read_config.c b/src/common/read_config.c index 0f852d67d..3ff463b9f 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -4381,7 +4381,7 @@ extern uint16_t prolog_str2flags(char *prolog_flags) if (strcasecmp(tok, "Alloc") == 0) rc |= PROLOG_FLAG_ALLOC; else if (strcasecmp(tok, "Contain") == 0) - rc |= PROLOG_FLAG_CONTAIN; + rc |= (PROLOG_FLAG_ALLOC | PROLOG_FLAG_CONTAIN); else if (strcasecmp(tok, "NoHold") == 0) rc |= PROLOG_FLAG_NOHOLD; else { @@ -4443,6 +4443,11 @@ extern char * debug_flags2str(uint64_t debug_flags) xstrcat(rc, ","); xstrcat(rc, "BurstBuffer"); } + if (debug_flags & DEBUG_FLAG_CPU_FREQ) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "CpuFrequency"); + } if (debug_flags & DEBUG_FLAG_CPU_BIND) { if (rc) xstrcat(rc, ","); @@ -4563,6 +4568,11 @@ extern char * debug_flags2str(uint64_t debug_flags) xstrcat(rc, ","); xstrcat(rc, "NoRealTime"); } + if (debug_flags & DEBUG_FLAG_POWER) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "Power"); + } if (debug_flags & DEBUG_FLAG_PRIO) { if (rc) xstrcat(rc, ","); @@ -4629,16 +4639,7 @@ extern char * debug_flags2str(uint64_t debug_flags) xstrcat(rc, ","); xstrcat(rc, "Wiki"); } - if (debug_flags & DEBUG_FLAG_CPU_FREQ) { - if (rc) - xstrcat(rc, ","); - xstrcat(rc, "CpuFrequency"); - } - if (debug_flags & DEBUG_FLAG_POWER) { - if (rc) - xstrcat(rc, ","); - xstrcat(rc, "Power"); - } + return rc; } diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 73f3fb277..b562c1424 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -675,6 +675,7 @@ extern void slurm_free_job_info_members(job_info_t * job) xfree(job->array_task_str); xfree(job->batch_host); xfree(job->batch_script); + xfree(job->burst_buffer); xfree(job->command); xfree(job->comment); xfree(job->dependency); @@ -701,6 +702,7 @@ extern void slurm_free_job_info_members(job_info_t * job) xfree(job->std_in); xfree(job->std_out); xfree(job->tres_alloc_str); + xfree(job->tres_req_str); xfree(job->wckey); xfree(job->work_dir); } @@ -2880,6 +2882,7 @@ extern void slurm_free_job_step_info_members (job_step_info_t * msg) { if (msg != NULL) { xfree(msg->ckpt_dir); + xfree(msg->gres); xfree(msg->name); xfree(msg->network); xfree(msg->nodes); @@ -2979,6 +2982,7 @@ extern void slurm_free_node_info_members(node_info_t * node) select_g_select_nodeinfo_free(node->select_nodeinfo); node->select_nodeinfo = NULL; xfree(node->version); + xfree(node->tres_fmt_str); /* Do NOT free node, it is an element of an array */ } } @@ -3023,12 +3027,14 @@ extern void slurm_free_partition_info_members(partition_info_t * part) xfree(part->allow_groups); xfree(part->allow_qos); xfree(part->alternate); + xfree(part->billing_weights_str); xfree(part->deny_accounts); xfree(part->deny_qos); xfree(part->name); xfree(part->nodes); xfree(part->node_inx); xfree(part->qos_char); + xfree(part->tres_fmt_str); } } diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c index fbab63098..b0f2fe4eb 100644 --- a/src/common/stepd_api.c +++ b/src/common/stepd_api.c @@ -93,7 +93,7 @@ _slurm_authorized_user() * Should be called when a connect() to a socket returns ECONNREFUSED. * Presumably the ECONNREFUSED means that nothing is attached to the listening * side of the unix domain socket. - * If the socket is at least five minutes old, go ahead an unlink it. + * If the socket is at least 10 minutes old, then unlink it. */ static void _handle_stray_socket(const char *socket_name) @@ -120,7 +120,7 @@ _handle_stray_socket(const char *socket_name) } now = time(NULL); - if ((now - buf.st_mtime) > 300) { + if ((now - buf.st_mtime) > 600) { /* remove the socket */ if (unlink(socket_name) == -1) { if (errno != ENOENT) { @@ -165,14 +165,15 @@ _step_connect(const char *directory, const char *nodename, xstrfmtcat(name, "%s/%s_%u.%u", directory, nodename, jobid, stepid); strcpy(addr.sun_path, name); - len = strlen(addr.sun_path)+1 + sizeof(addr.sun_family); + len = strlen(addr.sun_path) + 1 + sizeof(addr.sun_family); if (connect(fd, (struct sockaddr *) &addr, len) < 0) { - error("%s: connect() failed dir %s node %s job %u step %u %m", + /* Can indicate race condition at step termination */ + debug("%s: connect() failed dir %s node %s step %u.%u %m", __func__, directory, nodename, jobid, stepid); if (errno == ECONNREFUSED) { _handle_stray_socket(name); - if (stepid == NO_VAL) + if (stepid == SLURM_BATCH_SCRIPT) _handle_stray_script(directory, jobid); } xfree(name); @@ -186,7 +187,7 @@ _step_connect(const char *directory, const char *nodename, static char * -_guess_nodename() +_guess_nodename(void) { char host[256]; char *nodename = NULL; diff --git a/src/db_api/job_report_functions.c b/src/db_api/job_report_functions.c index f12523968..2e0ecdc59 100644 --- a/src/db_api/job_report_functions.c +++ b/src/db_api/job_report_functions.c @@ -215,8 +215,9 @@ static List _process_grouped_report( if (!job->elapsed) continue; - if (!(count = slurmdb_find_tres_count_in_string( - job->tres_alloc_str, tres_id))) + if ((count = slurmdb_find_tres_count_in_string( + job->tres_alloc_str, tres_id)) + == INFINITE64) continue; tmp = xstrdup_printf("%"PRIu64, count); @@ -469,8 +470,9 @@ no_objects: while ((job_group = list_next(local_itr))) { uint64_t count; - if (!(count = slurmdb_find_tres_count_in_string( - job->tres_alloc_str, tres_id)) || + if (((count = slurmdb_find_tres_count_in_string( + job->tres_alloc_str, tres_id)) + == INFINITE64) || (count < job_group->min_size) || (count > job_group->max_size)) continue; diff --git a/src/plugins/accounting_storage/mysql/as_mysql_job.c b/src/plugins/accounting_storage/mysql/as_mysql_job.c index 3060590ca..04a704253 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_job.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_job.c @@ -458,8 +458,6 @@ no_rollup_change: gres_alloc = slurm_add_slash_to_quotes(job_ptr->gres_alloc); if (!job_ptr->db_index) { - if (!begin_time) - begin_time = submit_time; query = xstrdup_printf( "insert into \"%s_%s\" " "(id_job, mod_time, id_array_job, id_array_task, " @@ -977,6 +975,12 @@ extern int as_mysql_step_start(mysql_conn_t *mysql_conn, */ snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->gres); nodes = tasks = 1; + if (!step_ptr->tres_alloc_str) + xstrfmtcat(step_ptr->tres_alloc_str, + "%s%u=%u,%u=%u", + step_ptr->tres_alloc_str ? "," : "", + TRES_CPU, 1, + TRES_NODE, 1); } else { char *ionodes = NULL, *temp_nodes = NULL; char temp_bit[BUF_SIZE]; @@ -1001,14 +1005,14 @@ extern int as_mysql_step_start(mysql_conn_t *mysql_conn, if (step_ptr->cpu_count) tasks = step_ptr->cpu_count; else { - if (!(tasks = slurmdb_find_tres_count_in_string( - step_ptr->tres_alloc_str, - TRES_CPU))) { - if (!(tasks = - slurmdb_find_tres_count_in_string( - step_ptr->job_ptr-> - tres_alloc_str, - TRES_CPU))) + if ((tasks = slurmdb_find_tres_count_in_string( + step_ptr->tres_alloc_str, + TRES_CPU)) == INFINITE64) { + if ((tasks = + slurmdb_find_tres_count_in_string( + step_ptr->job_ptr-> + tres_alloc_str, + TRES_CPU)) == INFINITE64) tasks = step_ptr->job_ptr-> total_nodes; } @@ -1144,14 +1148,14 @@ extern int as_mysql_step_complete(mysql_conn_t *mysql_conn, if (step_ptr->cpu_count) tasks = step_ptr->cpu_count; else { - if (!(tasks = slurmdb_find_tres_count_in_string( - step_ptr->tres_alloc_str, - TRES_CPU))) { - if (!(tasks = - slurmdb_find_tres_count_in_string( - step_ptr->job_ptr-> - tres_alloc_str, - TRES_CPU))) + if ((tasks = slurmdb_find_tres_count_in_string( + step_ptr->tres_alloc_str, + TRES_CPU)) == INFINITE64) { + if ((tasks = + slurmdb_find_tres_count_in_string( + step_ptr->job_ptr-> + tres_alloc_str, + TRES_CPU)) == INFINITE64) tasks = step_ptr->job_ptr-> total_nodes; } diff --git a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c index 377366629..cbce5a510 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c @@ -947,11 +947,13 @@ static int _cluster_get_jobs(mysql_conn_t *mysql_conn, job->track_steps = 1; else if (step && (xstrcmp(step->stepname, job->jobname) || - ((j_cpus = slurmdb_find_tres_count_in_string( - job->tres_alloc_str, TRES_CPU)) && - (s_cpus = slurmdb_find_tres_count_in_string( - step->tres_alloc_str, TRES_CPU)) && - j_cpus != s_cpus))) + (((j_cpus = slurmdb_find_tres_count_in_string( + job->tres_alloc_str, TRES_CPU)) + != INFINITE64) && + ((s_cpus = slurmdb_find_tres_count_in_string( + step->tres_alloc_str, TRES_CPU)) + != INFINITE64) && + j_cpus != s_cpus))) job->track_steps = 1; } skip_steps: @@ -1463,7 +1465,8 @@ extern int setup_job_cond_limits(slurmdb_job_cond_t *job_cond, job_cond->usage_start); else xstrfmtcat(*extra, - "(t1.time_eligible < %ld " + "(t1.time_eligible " + "&& t1.time_eligible < %ld " "&& (t1.time_end >= %ld " "|| t1.time_end = 0)))", job_cond->usage_end, @@ -1474,7 +1477,8 @@ extern int setup_job_cond_limits(slurmdb_job_cond_t *job_cond, else xstrcat(*extra, " where ("); xstrfmtcat(*extra, - "(t1.time_eligible < %ld))", + "(t1.time_eligible && " + "t1.time_eligible < %ld))", job_cond->usage_end); } } diff --git a/src/plugins/accounting_storage/mysql/as_mysql_rollup.c b/src/plugins/accounting_storage/mysql/as_mysql_rollup.c index 5c2befc51..faa4324cb 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_rollup.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_rollup.c @@ -307,6 +307,21 @@ static void _add_tres_2_list(List tres_list, char *tres_str, int seconds) return; } +/* This will destroy the *loc_tres given after it is transfered */ +static void _transfer_loc_tres(List *loc_tres, local_id_usage_t *usage) +{ + if (!usage) + return; + + if (!usage->loc_tres) { + usage->loc_tres = *loc_tres; + *loc_tres = NULL; + } else { + _add_job_alloc_time_to_cluster(usage->loc_tres, *loc_tres); + FREE_NULL_LIST(*loc_tres); + } +} + static void _add_tres_time_2_list(List tres_list, char *tres_str, int type, int seconds, int suspend_seconds, bool times_count) @@ -1109,7 +1124,8 @@ extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn, "left outer join \"%s_%s\" as step on " "job.job_db_inx=step.job_db_inx " "and (step.id_step>=0) " - "where (job.time_eligible < %ld && " + "where (job.time_eligible && " + "job.time_eligible < %ld && " "(job.time_end >= %ld || " "job.time_end = 0)) " "group by job.job_db_inx " @@ -1242,15 +1258,11 @@ extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn, /* do the cluster allocated calculation */ calc_cluster: - if (!a_usage) - loc_tres = list_create( - _destroy_local_tres_usage); - else { - if (!a_usage->loc_tres) - a_usage->loc_tres = list_create( - _destroy_local_tres_usage); - loc_tres = a_usage->loc_tres; - } + /* We need to have this clean for each job + * since we add the time to the cluster + * individually. + */ + loc_tres = list_create(_destroy_local_tres_usage); _add_tres_time_2_list(loc_tres, row[JOB_REQ_TRES], TIME_ALLOC, seconds, @@ -1299,8 +1311,7 @@ extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn, /* first figure out the reservation */ if (resv_id) { if (seconds <= 0) { - if (!a_usage) - FREE_NULL_LIST(loc_tres); + _transfer_loc_tres(&loc_tres, a_usage); continue; } /* Since we have already added the @@ -1345,8 +1356,8 @@ extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn, loc_tres, TIME_ALLOC, loc_seconds, 1); } - if (!a_usage) - FREE_NULL_LIST(loc_tres); + + _transfer_loc_tres(&loc_tres, a_usage); continue; } @@ -1355,8 +1366,7 @@ extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn, ever happen. */ if (!c_usage) { - if (!a_usage) - FREE_NULL_LIST(loc_tres); + _transfer_loc_tres(&loc_tres, a_usage); continue; } @@ -1377,9 +1387,10 @@ extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn, loc_tres); } - /* The loc_tres isn't needed after this */ - if (!a_usage) - FREE_NULL_LIST(loc_tres); + /* The loc_tres isn't needed after this so + * transfer to the association and go on our + * merry way. */ + _transfer_loc_tres(&loc_tres, a_usage); /* now reserved time */ if (!row_start || (row_start >= c_usage->start)) { diff --git a/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c b/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c index 7ed6983a2..ee51e3713 100644 --- a/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c +++ b/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c @@ -270,22 +270,28 @@ extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, int rc = SLURM_SUCCESS; acct_gather_energy_t *energy = (acct_gather_energy_t *)data; time_t *last_poll = (time_t *)data; + uint16_t *sensor_cnt = (uint16_t *)data; xassert(_run_in_daemon()); switch (data_type) { case ENERGY_DATA_JOULES_TASK: + case ENERGY_DATA_NODE_ENERGY_UP: if (local_energy->current_watts == NO_VAL) energy->consumed_energy = NO_VAL; else _get_joules_task(energy); break; case ENERGY_DATA_STRUCT: + case ENERGY_DATA_NODE_ENERGY: memcpy(energy, local_energy, sizeof(acct_gather_energy_t)); break; case ENERGY_DATA_LAST_POLL: *last_poll = local_energy->poll_time; break; + case ENERGY_DATA_SENSOR_CNT: + *sensor_cnt = 1; + break; default: error("acct_gather_energy_p_get_data: unknown enum %d", data_type); diff --git a/src/plugins/acct_gather_energy/ibmaem/acct_gather_energy_ibmaem.c b/src/plugins/acct_gather_energy/ibmaem/acct_gather_energy_ibmaem.c index 572b1a178..043fe08a3 100644 --- a/src/plugins/acct_gather_energy/ibmaem/acct_gather_energy_ibmaem.c +++ b/src/plugins/acct_gather_energy/ibmaem/acct_gather_energy_ibmaem.c @@ -275,6 +275,7 @@ extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, switch (data_type) { case ENERGY_DATA_JOULES_TASK: + case ENERGY_DATA_NODE_ENERGY_UP: if (local_energy->current_watts == NO_VAL) energy->consumed_energy = NO_VAL; else diff --git a/src/plugins/burst_buffer/cray/burst_buffer_cray.c b/src/plugins/burst_buffer/cray/burst_buffer_cray.c index c6e13eedf..f9cb9dd90 100644 --- a/src/plugins/burst_buffer/cray/burst_buffer_cray.c +++ b/src/plugins/burst_buffer/cray/burst_buffer_cray.c @@ -109,6 +109,19 @@ static bb_state_t bb_state; static uint32_t last_persistent_id = 1; static char * state_save_loc = NULL; +/* These are defined here so when we link with something other than + * the slurmctld we will have these symbols defined. They will get + * overwritten when linking with the slurmctld. + */ +#if defined (__APPLE__) +int accounting_enforce __attribute__((weak_import)) = 0; +void *acct_db_conn __attribute__((weak_import)) = NULL; +#else +int accounting_enforce = 0; +void *acct_db_conn = NULL; +#endif + + /* Description of each Cray DW configuration entry */ typedef struct bb_configs { @@ -200,6 +213,7 @@ static void * _destroy_persistent(void *x); static void _free_create_args(create_buf_data_t *create_args); static void _free_script_argv(char **script_argv); static bb_job_t *_get_bb_job(struct job_record *job_ptr); +static bool _have_dw_cmd_opts(bb_job_t *bb_job); static void _job_queue_del(void *x); static bb_configs_t *_json_parse_configs_array(json_object *jobj, char *key, int *num); @@ -231,14 +245,16 @@ static void _purge_bb_files(uint32_t job_id, struct job_record *job_ptr); static void _purge_vestigial_bufs(void); static void _python2json(char *buf); static void _recover_bb_state(void); +static int _queue_setup(struct job_record *job_ptr, bb_job_t *bb_job); static int _queue_stage_in(struct job_record *job_ptr, bb_job_t *bb_job); static int _queue_stage_out(struct job_record *job_ptr); static void _queue_teardown(uint32_t job_id, uint32_t user_id, bool hurry); static void _reset_buf_state(uint32_t user_id, uint32_t job_id, char *name, - int new_state); + int new_state, uint64_t buf_size); static void _save_bb_state(void); static void _set_assoc_mgr_ptrs(bb_alloc_t *bb_alloc); static void * _start_pre_run(void *x); +static void * _start_setup(void *x); static void * _start_stage_in(void *x); static void * _start_stage_out(void *x); static void * _start_teardown(void *x); @@ -357,7 +373,7 @@ static void _test_config(void) } } -/* Allocate resources to a job and begin stage-in */ +/* Allocate resources to a job and begin setup/stage-in */ static int _alloc_job_bb(struct job_record *job_ptr, bb_job_t *bb_job, bool job_ready) { @@ -384,7 +400,11 @@ static int _alloc_job_bb(struct job_record *job_ptr, bb_job_t *bb_job, } } } else { - bb_job->state = BB_STATE_STAGED_IN; + /* Job uses persistent burst buffer, just run setup */ + if (bb_job->state < BB_STATE_STAGING_IN) { + bb_job->state = BB_STATE_STAGING_IN; + rc = _queue_setup(job_ptr, bb_job); + } } return rc; @@ -495,6 +515,7 @@ static bb_job_t *_get_bb_job(struct job_record *job_ptr) bb_job->buf_ptr[inx].size = tmp_cnt; bb_job->buf_ptr[inx].state = BB_STATE_PENDING; bb_job->buf_ptr[inx].type = bb_type; + bb_job->persist_add += tmp_cnt; } else if (!strncmp(tok, "destroy_persistent", 17) || !strncmp(tok, "delete_persistent", 16)) { have_bb = true; @@ -839,6 +860,10 @@ static void _recover_bb_state(void) bb_alloc = bb_find_name_rec(name, user_id, &bb_state); } if (bb_alloc) { + if (bb_state.bb_config.debug_flag) { + info("Recovered burst buffer %s from user %u", + bb_alloc->name, bb_alloc->user_id); + } xfree(bb_alloc->account); bb_alloc->account = account; account = NULL; @@ -1220,6 +1245,145 @@ static int _write_file(char *file_name, char *buf) return SLURM_SUCCESS; } +static int _queue_setup(struct job_record *job_ptr, bb_job_t *bb_job) +{ + char *hash_dir = NULL, *job_dir = NULL; + char *client_nodes_file_nid = NULL; + char **setup_argv; + stage_args_t *stage_args; + int hash_inx = job_ptr->job_id % 10; + pthread_attr_t stage_attr; + pthread_t stage_tid = 0; + int rc = SLURM_SUCCESS; + + xstrfmtcat(hash_dir, "%s/hash.%d", state_save_loc, hash_inx); + (void) mkdir(hash_dir, 0700); + xstrfmtcat(job_dir, "%s/job.%u", hash_dir, job_ptr->job_id); + if (job_ptr->sched_nodes) { + xstrfmtcat(client_nodes_file_nid, "%s/client_nids", job_dir); + if (_write_nid_file(client_nodes_file_nid, + job_ptr->sched_nodes, job_ptr->job_id)) + xfree(client_nodes_file_nid); + } + setup_argv = xmalloc(sizeof(char *) * 20); /* NULL terminated */ + setup_argv[0] = xstrdup("dw_wlm_cli"); + setup_argv[1] = xstrdup("--function"); + setup_argv[2] = xstrdup("setup"); + setup_argv[3] = xstrdup("--token"); + xstrfmtcat(setup_argv[4], "%u", job_ptr->job_id); + setup_argv[5] = xstrdup("--caller"); + setup_argv[6] = xstrdup("SLURM"); + setup_argv[7] = xstrdup("--user"); + xstrfmtcat(setup_argv[8], "%d", job_ptr->user_id); + setup_argv[9] = xstrdup("--capacity"); + xstrfmtcat(setup_argv[10], "%s:%s", + bb_state.bb_config.default_pool, + bb_get_size_str(bb_job->total_size)); + setup_argv[11] = xstrdup("--job"); + xstrfmtcat(setup_argv[12], "%s/script", job_dir); + if (client_nodes_file_nid) { +#if defined(HAVE_NATIVE_CRAY) + setup_argv[13] = xstrdup("--nidlistfile"); +#else + setup_argv[13] = xstrdup("--nodehostnamefile"); +#endif + setup_argv[14] = xstrdup(client_nodes_file_nid); + } + + stage_args = xmalloc(sizeof(stage_args_t)); + stage_args->job_id = job_ptr->job_id; + stage_args->timeout = bb_state.bb_config.stage_in_timeout; + stage_args->args1 = setup_argv; +/* stage_args->args2 = NULL; Nothing to stage-in */ + + slurm_attr_init(&stage_attr); + if (pthread_attr_setdetachstate(&stage_attr, PTHREAD_CREATE_DETACHED)) + error("pthread_attr_setdetachstate error %m"); + while (pthread_create(&stage_tid, &stage_attr, _start_setup, + stage_args)) { + if (errno != EAGAIN) { + error("%s: pthread_create: %m", __func__); + _start_setup(stage_args); /* Do in-line */ + break; + } + usleep(100000); + } + slurm_attr_destroy(&stage_attr); + + xfree(hash_dir); + xfree(job_dir); + xfree(client_nodes_file_nid); + return rc; +} + +static void *_start_setup(void *x) +{ + stage_args_t *stage_args; + char **setup_argv, *resp_msg = NULL, *op = NULL; + int rc = SLURM_SUCCESS, status = 0, timeout; + slurmctld_lock_t job_write_lock = + { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; + struct job_record *job_ptr; + bb_job_t *bb_job; + DEF_TIMERS; + + stage_args = (stage_args_t *) x; + setup_argv = stage_args->args1; + + if (stage_args->timeout) + timeout = stage_args->timeout * 1000; + else + timeout = DEFAULT_OTHER_TIMEOUT * 1000; + op = "setup"; + START_TIMER; + resp_msg = bb_run_script("setup", + bb_state.bb_config.get_sys_state, + setup_argv, timeout, &status); + END_TIMER; + if (DELTA_TIMER > 500000) { /* 0.5 secs */ + info("%s: setup for job %u ran for %s", + __func__, stage_args->job_id, TIME_STR); + } else if (bb_state.bb_config.debug_flag) { + debug("%s: setup for job %u ran for %s", + __func__, stage_args->job_id, TIME_STR); + } + _log_script_argv(setup_argv, resp_msg); + if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) { + error("%s: setup for job %u status:%u response:%s", + __func__, stage_args->job_id, status, resp_msg); + rc = SLURM_ERROR; + } + lock_slurmctld(job_write_lock); + job_ptr = find_job_record(stage_args->job_id); + if (!job_ptr) { + error("%s: unable to find job record for job %u", + __func__, stage_args->job_id); + } else if (rc == SLURM_SUCCESS) { + pthread_mutex_lock(&bb_state.bb_mutex); + bb_job = bb_job_find(&bb_state, stage_args->job_id); + if (bb_job) + bb_job->state = BB_STATE_STAGED_IN; + pthread_mutex_unlock(&bb_state.bb_mutex); + } else { + xfree(job_ptr->state_desc); + job_ptr->state_reason = FAIL_BURST_BUFFER_OP; + xstrfmtcat(job_ptr->state_desc, "%s: %s: %s", + plugin_type, op, resp_msg); + job_ptr->priority = 0; /* Hold job */ + pthread_mutex_lock(&bb_state.bb_mutex); + bb_job = bb_job_find(&bb_state, stage_args->job_id); + if (bb_job) + bb_job->state = BB_STATE_COMPLETE; + pthread_mutex_unlock(&bb_state.bb_mutex); + } + unlock_slurmctld(job_write_lock); + + xfree(resp_msg); + _free_script_argv(setup_argv); + xfree(stage_args); + return NULL; +} + static int _queue_stage_in(struct job_record *job_ptr, bb_job_t *bb_job) { char *hash_dir = NULL, *job_dir = NULL; @@ -1818,6 +1982,8 @@ static int _test_size_limit(struct job_record *job_ptr, bb_job_t *bb_job) xassert(bb_job); add_space = bb_job->total_size + bb_job->persist_add; + if (add_space > bb_state.total_space) + return 1; resv_bb = job_test_bb_resv(job_ptr, now); if (resv_bb) { @@ -1832,9 +1998,11 @@ static int _test_size_limit(struct job_record *job_ptr, bb_job_t *bb_job) resv_space += resv_bb_ptr->used_space; } } + if ((add_space + resv_space) > bb_state.total_space) + return 1; add_total_space_needed = bb_state.used_space + add_space + resv_space - - bb_state.total_space; + bb_state.total_space; needed_gres_ptr = xmalloc(sizeof(needed_gres_t) * bb_job->gres_cnt); for (i = 0; i < bb_job->gres_cnt; i++) { needed_gres_ptr[i].name = xstrdup(bb_job->gres_ptr[i].name); @@ -2103,7 +2271,7 @@ static int _parse_bb_opts(struct job_descriptor *job_desc, uint64_t *bb_size, info("%s: User %d disabled from creating " "persistent burst buffer", __func__, submit_uid); - rc = ESLURM_INVALID_BURST_BUFFER_REQUEST; + rc = ESLURM_BURST_BUFFER_PERMISSION; break; } else if (!strncmp(tok, "create_persistent", 17)) { have_bb = true; @@ -2135,7 +2303,7 @@ static int _parse_bb_opts(struct job_descriptor *job_desc, uint64_t *bb_size, info("%s: User %d disabled from destroying " "persistent burst buffer", __func__, submit_uid); - rc = ESLURM_INVALID_BURST_BUFFER_REQUEST; + rc = ESLURM_BURST_BUFFER_PERMISSION; break; } else if (!strncmp(tok, "destroy_persistent", 17)) { have_bb = true; @@ -2635,6 +2803,25 @@ fini: xfree(data_buf); close(path_fd); } +/* Return true if #DW options (excludes #BB options) */ +static bool _have_dw_cmd_opts(bb_job_t *bb_job) +{ + int i; + bb_buf_t *bb_buf; + + xassert(bb_job); + if (bb_job->total_size) + return true; + + for (i = 0, bb_buf = bb_job->buf_ptr; i < bb_job->buf_cnt; + i++, bb_buf++) { + if (!bb_buf->create && !bb_buf->destroy) + return true; + } + + return false; +} + /* * Secondary validation of a job submit request with respect to burst buffer * options. Performed after establishing job ID and creating script file. @@ -2683,6 +2870,11 @@ extern int bb_p_job_validate2(struct job_record *job_ptr, char **err_msg) return rc; } + if (!_have_dw_cmd_opts(bb_job)) { + pthread_mutex_unlock(&bb_state.bb_mutex); + return rc; + } + if (bb_state.bb_config.debug_flag) { info("%s: %s: %s", plugin_type, __func__, jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf))); @@ -2975,10 +3167,7 @@ extern int bb_p_job_test_stage_in(struct job_record *job_ptr, bool test_only) if ((test_only == false) && (_test_size_limit(job_ptr, bb_job) == 0) && (_alloc_job_bb(job_ptr, bb_job, false) == SLURM_SUCCESS)) { - if (bb_job->total_size == 0) - rc = 1; /* Persistent only, space available */ - else - rc = 0; /* Stage-in job buffer now */ + rc = 0; /* Setup/stage-in in progress */ } } else if (bb_job->state == BB_STATE_STAGING_IN) { rc = 0; @@ -3353,6 +3542,13 @@ static int _create_bufs(struct job_record *job_ptr, bb_job_t *bb_job, info("Attempt by job %u to create duplicate " "persistent burst buffer named %s", job_ptr->job_id, buf_ptr->name); + if (bb_job->persist_add >= bb_alloc->size) { + bb_job->persist_add -= bb_alloc->size; + } else { + error("%s: Persistent buffer size underflow for job %u", + __func__, job_ptr->job_id); + bb_job->persist_add = 0; + } continue; } rc++; @@ -3487,8 +3683,16 @@ static bool _test_persistent_use_ready(bb_job_t *bb_job, return true; } +/* Reset data structures based upon a change in buffer state + * IN user_id - User effected + * IN job_id - Job effected + * IN name - Buffer name + * IN new_state - New buffer state + * IN buf_size - Size of created burst buffer only, used to decrement remaining + * space requirement for the job + */ static void _reset_buf_state(uint32_t user_id, uint32_t job_id, char *name, - int new_state) + int new_state, uint64_t buf_size) { bb_buf_t *buf_ptr; bb_job_t *bb_job; @@ -3514,6 +3718,17 @@ static void _reset_buf_state(uint32_t user_id, uint32_t job_id, char *name, if ((old_state == BB_STATE_DELETING) && (new_state == BB_STATE_PENDING)) bb_limit_rem(user_id, buf_ptr->size, &bb_state); + if ((old_state == BB_STATE_ALLOCATING) && + (new_state == BB_STATE_ALLOCATED) && + ((name[0] < '0') || (name[0] > '9'))) { + if (bb_job->persist_add >= buf_size) { + bb_job->persist_add -= buf_size; + } else { + error("%s: Persistent buffer size underflow for job %u", + __func__, job_id); + bb_job->persist_add = 0; + } + } break; } @@ -3607,9 +3822,8 @@ static void *_create_persistent(void *x) resp_msg = NULL; } pthread_mutex_lock(&bb_state.bb_mutex); - _reset_buf_state(create_args->user_id, - create_args->job_id, - create_args->name, BB_STATE_PENDING); + _reset_buf_state(create_args->user_id, create_args->job_id, + create_args->name, BB_STATE_PENDING, 0); bb_state.last_update_time = time(NULL); pthread_mutex_unlock(&bb_state.bb_mutex); unlock_slurmctld(job_write_lock); @@ -3624,9 +3838,9 @@ static void *_create_persistent(void *x) __func__, create_args->job_id); } pthread_mutex_lock(&bb_state.bb_mutex); - _reset_buf_state(create_args->user_id, - create_args->job_id, create_args->name, - BB_STATE_ALLOCATED); + _reset_buf_state(create_args->user_id, create_args->job_id, + create_args->name, BB_STATE_ALLOCATED, + create_args->size); bb_alloc = bb_alloc_name_rec(&bb_state, create_args->name, create_args->user_id); bb_alloc->size = create_args->size; @@ -3751,9 +3965,8 @@ static void *_destroy_persistent(void *x) plugin_type, __func__, resp_msg); } pthread_mutex_lock(&bb_state.bb_mutex); - _reset_buf_state(destroy_args->user_id, - destroy_args->job_id, destroy_args->name, - BB_STATE_PENDING); + _reset_buf_state(destroy_args->user_id, destroy_args->job_id, + destroy_args->name, BB_STATE_PENDING, 0); bb_state.last_update_time = time(NULL); pthread_mutex_unlock(&bb_state.bb_mutex); unlock_slurmctld(job_write_lock); @@ -3762,9 +3975,8 @@ static void *_destroy_persistent(void *x) { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; pthread_mutex_lock(&bb_state.bb_mutex); - _reset_buf_state(destroy_args->user_id, - destroy_args->job_id, destroy_args->name, - BB_STATE_DELETED); + _reset_buf_state(destroy_args->user_id, destroy_args->job_id, + destroy_args->name, BB_STATE_DELETED, 0); /* Modify internal buffer record for purging */ if (bb_alloc) { diff --git a/src/plugins/job_submit/lua/job_submit_lua.c b/src/plugins/job_submit/lua/job_submit_lua.c index 2a08b4e40..e5aaebf5a 100755 --- a/src/plugins/job_submit/lua/job_submit_lua.c +++ b/src/plugins/job_submit/lua/job_submit_lua.c @@ -118,6 +118,18 @@ time_t last_lua_resv_update = (time_t) 0; static pthread_mutex_t lua_lock = PTHREAD_MUTEX_INITIALIZER; #endif +/* These are defined here so when we link with something other than + * the slurmctld we will have these symbols defined. They will get + * overwritten when linking with the slurmctld. + */ +#if defined (__APPLE__) +int accounting_enforce __attribute__((weak_import)) = 0; +void *acct_db_conn __attribute__((weak_import)) = NULL; +#else +int accounting_enforce = 0; +void *acct_db_conn = NULL; +#endif + /*****************************************************************************\ * We've provided a simple example of the type of things you can do with this * plugin. If you develop another plugin that may be of interest to others @@ -228,8 +240,8 @@ static char *_get_default_account(uint32_t user_id) memset(&user, 0, sizeof(slurmdb_user_rec_t)); user.uid = user_id; - if (assoc_mgr_fill_in_user(acct_db_conn, - &user, 0, NULL) != SLURM_ERROR) { + if (assoc_mgr_fill_in_user(acct_db_conn, &user, accounting_enforce, + NULL) != SLURM_ERROR) { return user.default_acct; } else { return NULL; @@ -240,9 +252,8 @@ static char *_get_default_account(uint32_t user_id) static char *_get_default_qos(uint32_t user_id, char *account, char *partition) { slurmdb_assoc_rec_t assoc; - slurmdb_assoc_rec_t *assoc_ptr; slurmdb_qos_rec_t qos; - uint32_t qos_id; + uint32_t qos_id = 0; memset(&assoc, 0, sizeof(slurmdb_assoc_rec_t)); assoc.uid = user_id; @@ -252,21 +263,18 @@ static char *_get_default_qos(uint32_t user_id, char *account, char *partition) } else { assoc.acct = _get_default_account(user_id); } - if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc, 0, - &assoc_ptr, false) != SLURM_ERROR) { - qos_id = assoc_ptr->def_qos_id; - } else { - return NULL; - } - if (!qos_id) { + if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc, accounting_enforce, + NULL, false) != SLURM_ERROR) + qos_id = assoc.def_qos_id; + + if (!qos_id) return NULL; - } memset(&qos, 0, sizeof(slurmdb_qos_rec_t)); qos.id = qos_id; - if (assoc_mgr_fill_in_qos(acct_db_conn, - &qos, 0, NULL, false) != SLURM_ERROR) { + if (assoc_mgr_fill_in_qos(acct_db_conn, &qos, accounting_enforce, + NULL, false) != SLURM_ERROR) { return qos.name; } else { return NULL; diff --git a/src/plugins/jobcomp/elasticsearch/jobcomp_elasticsearch.c b/src/plugins/jobcomp/elasticsearch/jobcomp_elasticsearch.c index f4225f71b..fd0fe769b 100644 --- a/src/plugins/jobcomp/elasticsearch/jobcomp_elasticsearch.c +++ b/src/plugins/jobcomp/elasticsearch/jobcomp_elasticsearch.c @@ -110,6 +110,18 @@ const uint32_t plugin_version = SLURM_VERSION_NUMBER; "\"nodes\":\"%s\",\"total_cpus\":%lu,\"total_nodes\":%lu,"\ "\"derived_exitcode\":%lu,\"exitcode\":%lu,\"state\":\"%s\"" +/* These are defined here so when we link with something other than + * the slurmctld we will have these symbols defined. They will get + * overwritten when linking with the slurmctld. + */ +#if defined (__APPLE__) +int accounting_enforce __attribute__((weak_import)) = 0; +void *acct_db_conn __attribute__((weak_import)) = NULL; +#else +int accounting_enforce = 0; +void *acct_db_conn = NULL; +#endif + /* Type for error string table entries */ typedef struct { int xe_number; @@ -312,7 +324,7 @@ static int _index_job(const char *jobcomp) static int error_cnt = 0; if (log_url == NULL) { - if (((error_cnt++) % 100) == 0) { + if (((++error_cnt) % 100) == 0) { /* Periodically log errors */ error("%s: Unable to save job state for %d " "jobs, caching data", @@ -394,7 +406,7 @@ static int _index_job(const char *jobcomp) curl_global_cleanup(); if (rc == SLURM_ERROR) { - if (((error_cnt++) % 100) == 0) { + if (((++error_cnt) % 100) == 0) { /* Periodically log errors */ error("%s: Unable to save job state for %d " "jobs, caching data", diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index 5b7c48286..16612a857 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -151,7 +151,7 @@ static void _load_config(void); static bool _many_pending_rpcs(void); static bool _more_work(time_t last_backfill_time); static uint32_t _my_sleep(int usec); -static int _num_feature_count(struct job_record *job_ptr); +static int _num_feature_count(struct job_record *job_ptr, bool *has_xor); static void _reset_job_time_limit(struct job_record *job_ptr, time_t now, node_space_map_t *node_space); static int _start_job(struct job_record *job_ptr, bitstr_t *avail_bitmap); @@ -268,7 +268,7 @@ static bool _many_pending_rpcs(void) } /* test if job has feature count specification */ -static int _num_feature_count(struct job_record *job_ptr) +static int _num_feature_count(struct job_record *job_ptr, bool *has_xor) { struct job_details *detail_ptr = job_ptr->details; int rc = 0; @@ -282,6 +282,8 @@ static int _num_feature_count(struct job_record *job_ptr) while ((feat_ptr = (struct feature_record *) list_next(feat_iter))) { if (feat_ptr->count) rc++; + if (feat_ptr->op_code == FEATURE_OP_XOR) + *has_xor = true; } list_iterator_destroy(feat_iter); @@ -298,11 +300,15 @@ static int _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes, bitstr_t *exc_core_bitmap) { - bitstr_t *tmp_bitmap; + bitstr_t *low_bitmap = NULL, *tmp_bitmap = NULL; int rc = SLURM_SUCCESS; - int feat_cnt = _num_feature_count(job_ptr); + bool has_xor = false; + int feat_cnt = _num_feature_count(job_ptr, &has_xor); + struct job_details *detail_ptr = job_ptr->details; List preemptee_candidates = NULL; List preemptee_job_list = NULL; + ListIterator feat_iter; + struct feature_record *feat_ptr; if (feat_cnt) { /* Ideally schedule the job feature by feature, @@ -312,9 +318,6 @@ static int _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap, * one feature count. It should work fairly well * in cases where there are multiple feature * counts. */ - struct job_details *detail_ptr = job_ptr->details; - ListIterator feat_iter; - struct feature_record *feat_ptr; int i = 0, list_size; uint16_t *feat_cnt_orig = NULL, high_cnt = 0; @@ -355,6 +358,77 @@ static int _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap, } list_iterator_destroy(feat_iter); xfree(feat_cnt_orig); + } else if (has_xor) { + /* Cache the feature information and test the individual + * features, one at a time */ + struct feature_record feature_base; + List feature_cache = detail_ptr->feature_list; + time_t low_start = 0; + + detail_ptr->feature_list = list_create(NULL); + feature_base.count = 0; + feature_base.op_code = FEATURE_OP_END; + list_append(detail_ptr->feature_list, &feature_base); + + tmp_bitmap = bit_copy(*avail_bitmap); + feat_iter = list_iterator_create(feature_cache); + while ((feat_ptr = + (struct feature_record *) list_next(feat_iter))) { + feature_base.name = feat_ptr->name; + if ((job_req_node_filter(job_ptr, *avail_bitmap) == + SLURM_SUCCESS) && + (bit_set_count(*avail_bitmap) >= min_nodes)) { + preemptee_candidates = + slurm_find_preemptable_jobs(job_ptr); + rc = select_g_job_test(job_ptr, *avail_bitmap, + min_nodes, max_nodes, + req_nodes, + SELECT_MODE_WILL_RUN, + preemptee_candidates, + &preemptee_job_list, + exc_core_bitmap); + FREE_NULL_LIST(preemptee_job_list); + if ((rc == SLURM_SUCCESS) && + ((low_start == 0) || + (low_start > job_ptr->start_time))) { + low_start = job_ptr->start_time; + low_bitmap = *avail_bitmap; + *avail_bitmap = NULL; + } + } + FREE_NULL_BITMAP(*avail_bitmap); + *avail_bitmap = bit_copy(tmp_bitmap); + } + list_iterator_destroy(feat_iter); + FREE_NULL_BITMAP(tmp_bitmap); + if (low_start) { + job_ptr->start_time = low_start; + rc = SLURM_SUCCESS; + *avail_bitmap = low_bitmap; + } else { + rc = ESLURM_NODES_BUSY; + FREE_NULL_BITMAP(low_bitmap); + } + + /* Restore the original feature information */ + list_destroy(detail_ptr->feature_list); + detail_ptr->feature_list = feature_cache; + } else if (detail_ptr->feature_list) { + if ((job_req_node_filter(job_ptr, *avail_bitmap) != + SLURM_SUCCESS) || + (bit_set_count(*avail_bitmap) < min_nodes)) { + rc = ESLURM_NODES_BUSY; + } else { + preemptee_candidates = + slurm_find_preemptable_jobs(job_ptr); + rc = select_g_job_test(job_ptr, *avail_bitmap, + min_nodes, max_nodes, req_nodes, + SELECT_MODE_WILL_RUN, + preemptee_candidates, + &preemptee_job_list, + exc_core_bitmap); + FREE_NULL_LIST(preemptee_job_list); + } } else { /* Try to schedule the job. First on dedicated nodes * then on shared nodes (if so configured). */ @@ -399,7 +473,6 @@ static int _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap, FREE_NULL_LIST(preemptee_candidates); return rc; - } /* Terminate backfill_agent */ diff --git a/src/sacct/print.c b/src/sacct/print.c index 35b187958..2abcb50f8 100644 --- a/src/sacct/print.c +++ b/src/sacct/print.c @@ -247,9 +247,9 @@ void print_fields(type_t type, void *object) if (step->stats.cpu_min != NO_VAL) got_stats = true; - if (!(step_cpu_tres_rec_count = - slurmdb_find_tres_count_in_string( - step->tres_alloc_str, TRES_CPU))) + if ((step_cpu_tres_rec_count = + slurmdb_find_tres_count_in_string( + step->tres_alloc_str, TRES_CPU)) == INFINITE64) step_cpu_tres_rec_count = slurmdb_find_tres_count_in_string( (job->tres_alloc_str && @@ -267,6 +267,12 @@ void print_fields(type_t type, void *object) break; } + if ((uint64_t)cpu_tres_rec_count == INFINITE64) + cpu_tres_rec_count = 0; + + if ((uint64_t)step_cpu_tres_rec_count == INFINITE64) + step_cpu_tres_rec_count = 0; + list_iterator_reset(print_fields_itr); while((field = list_next(print_fields_itr))) { char *tmp_char = NULL, id[FORMAT_STRING_SIZE]; diff --git a/src/scontrol/update_part.c b/src/scontrol/update_part.c index 79424ad16..43d9fbe43 100644 --- a/src/scontrol/update_part.c +++ b/src/scontrol/update_part.c @@ -448,7 +448,12 @@ scontrol_create_part (int argc, char *argv[]) exit_code = 1; error("PartitionName must be given."); return 0; + } else if (strcasecmp(part_msg.name, "default") == 0) { + exit_code = 1; + error("PartitionName cannot be \"DEFAULT\"."); + return 0; } + if (update_cnt == 0) { exit_code = 1; error("No parameters specified"); diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index 53cbb1da8..6e04f96a2 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -1644,17 +1644,54 @@ static void _set_job_time(struct job_record *job_ptr, uint16_t mail_type, static void _set_job_term_info(struct job_record *job_ptr, uint16_t mail_type, char *buf, int buf_len) { - uint16_t base_state = job_ptr->job_state & JOB_STATE_BASE; - buf[0] = '\0'; + if ((mail_type == MAIL_JOB_END) || (mail_type == MAIL_JOB_FAIL)) { - if (WIFEXITED(job_ptr->exit_code)) { - int exit_code = WEXITSTATUS(job_ptr->exit_code); - snprintf(buf, buf_len, ", %s, ExitCode %d", - job_state_string(base_state), exit_code); + uint16_t base_state; + uint32_t exit_status_min, exit_status_max; + int exit_code_min, exit_code_max; + + base_state = job_ptr->job_state & JOB_STATE_BASE; + if (job_ptr->array_recs) { + exit_status_min = job_ptr->array_recs->min_exit_code; + exit_status_max = job_ptr->array_recs->max_exit_code; + if (WIFEXITED(exit_status_min) && + WIFEXITED(exit_status_max)) { + char *state_string; + exit_code_min = WEXITSTATUS(exit_status_min); + exit_code_max = WEXITSTATUS(exit_status_max); + if ((exit_code_min == 0) && (exit_code_max > 0)) + state_string = "Mixed"; + else { + state_string = + job_state_string(base_state); + } + snprintf(buf, buf_len, ", %s, ExitCode [%d-%d]", + state_string, exit_code_min, + exit_code_max); + } else if (WIFSIGNALED(exit_status_max)) { + exit_code_max = WTERMSIG(exit_status_max); + snprintf(buf, buf_len, ", %s, MaxSignal [%d]", + "Mixed", exit_code_max); + } else if (WIFEXITED(exit_status_max)) { + exit_code_max = WEXITSTATUS(exit_status_max); + snprintf(buf, buf_len, ", %s, MaxExitCode [%d]", + "Mixed", exit_code_max); + } else { + snprintf(buf, buf_len, ", %s", + job_state_string(base_state)); + } } else { - snprintf(buf, buf_len, ", %s", - job_state_string(base_state)); + exit_status_max = job_ptr->exit_code; + if (WIFEXITED(exit_status_max)) { + exit_code_max = WEXITSTATUS(exit_status_max); + snprintf(buf, buf_len, ", %s, ExitCode %d", + job_state_string(base_state), + exit_code_max); + } else { + snprintf(buf, buf_len, ", %s", + job_state_string(base_state)); + } } } else if (buf_len > 0) { buf[0] = '\0'; @@ -1676,9 +1713,24 @@ extern void mail_job_info (struct job_record *job_ptr, uint16_t mail_type) else mi->user_name = xstrdup(job_ptr->mail_user); + /* Use job array master record, if available */ + if ((job_ptr->array_task_id != NO_VAL) && !job_ptr->array_recs) { + struct job_record *master_job_ptr; + master_job_ptr = find_job_record(job_ptr->array_job_id); + if (master_job_ptr && master_job_ptr->array_recs) + job_ptr = master_job_ptr; + } + _set_job_time(job_ptr, mail_type, job_time, sizeof(job_time)); _set_job_term_info(job_ptr, mail_type, term_msg, sizeof(term_msg)); - if (job_ptr->array_task_id != NO_VAL) { + if (job_ptr->array_recs) { + mi->message = xstrdup_printf("SLURM Job_id=%u_* (%u) Name=%s " + "%s%s%s", + job_ptr->array_job_id, + job_ptr->job_id, job_ptr->name, + _mail_type_str(mail_type), + job_time, term_msg); + } else if (job_ptr->array_task_id != NO_VAL) { mi->message = xstrdup_printf("SLURM Job_id=%u_%u (%u) Name=%s " "%s%s%s", job_ptr->array_job_id, diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 071770c26..78fc1e913 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -175,6 +175,7 @@ static void _dump_job_state(struct job_record *dump_job_ptr, Buf buffer); static int _find_batch_dir(void *x, void *key); static void _get_batch_job_dir_ids(List batch_dirs); static time_t _get_last_state_write_time(void); +static void _job_array_comp(struct job_record *job_ptr, bool was_running); static int _job_create(job_desc_msg_t * job_specs, int allocate, int will_run, struct job_record **job_rec_ptr, uid_t submit_uid, char **err_msg, uint16_t protocol_version); @@ -4617,8 +4618,10 @@ extern int job_str_signal(char *job_id_str, uint16_t signal, uint16_t flags, /* Master job record, even wihtout tasks, * counts as one job record */ job_count -= (orig_task_cnt - 1); - } else + } else { + _job_array_comp(job_ptr, false); job_count -= (orig_task_cnt - new_task_count); + } /* Set the task_cnt here since * job_completion_logger needs the total @@ -7273,7 +7276,6 @@ extern void job_set_req_tres( if (!assoc_mgr_locked) assoc_mgr_lock(&locks); - xfree(job_ptr->tres_req_cnt); job_ptr->tres_req_cnt = xmalloc(sizeof(uint64_t) * g_tres_count); if (job_ptr->details) { @@ -7335,6 +7337,7 @@ extern void job_set_alloc_tres(struct job_record *job_ptr, xfree(job_ptr->tres_alloc_str); xfree(job_ptr->tres_alloc_cnt); + xfree(job_ptr->tres_fmt_alloc_str); /* We only need to do this on non-pending jobs */ if (IS_JOB_PENDING(job_ptr)) @@ -7342,7 +7345,6 @@ extern void job_set_alloc_tres(struct job_record *job_ptr, if (!assoc_mgr_locked) assoc_mgr_lock(&locks); - xfree(job_ptr->tres_alloc_cnt); job_ptr->tres_alloc_cnt = xmalloc( sizeof(uint64_t) * slurmctld_tres_cnt); @@ -7381,11 +7383,9 @@ extern void job_set_alloc_tres(struct job_record *job_ptr, true); /* now that the array is filled lets make the string from it */ - xfree(job_ptr->tres_alloc_str); job_ptr->tres_alloc_str = assoc_mgr_make_tres_str_from_array( job_ptr->tres_alloc_cnt, TRES_STR_FLAG_SIMPLE, true); - xfree(job_ptr->tres_fmt_alloc_str); job_ptr->tres_fmt_alloc_str = assoc_mgr_make_tres_str_from_array( job_ptr->tres_alloc_cnt, 0, true); @@ -10602,11 +10602,11 @@ static int _update_job(struct job_record *job_ptr, job_desc_msg_t * job_specs, job_ptr->state_reason = WAIT_HELD; xfree(job_ptr->state_desc); } - } else if (job_specs->priority == INFINITE - && job_ptr->state_reason != WAIT_HELD_USER) { - /* If the job was already released ignore another - * release request. - */ + } else if ((job_ptr->priority != 0) && + (job_specs->priority == INFINITE) && + (job_ptr->state_reason != WAIT_HELD_USER)) { + /* If the job was already released, ignore another + * release request. */ debug("%s: job %d already release ignoring request", __func__, job_ptr->job_id); } else { @@ -11958,7 +11958,7 @@ static void _purge_missing_jobs(int node_inx, time_t now) node_boot_time = node_ptr->boot_time - (msg_timeout + 5); } batch_startup_time = now - batch_start_timeout; - batch_startup_time -= msg_timeout; + batch_startup_time -= MIN(DEFAULT_MSG_TIMEOUT, msg_timeout); job_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { @@ -12645,7 +12645,7 @@ extern bool job_array_start_test(struct job_record *job_ptr) return true; } -static void _job_array_comp(struct job_record *job_ptr) +static void _job_array_comp(struct job_record *job_ptr, bool was_running) { struct job_record *base_job_ptr; uint32_t status; @@ -12672,7 +12672,8 @@ static void _job_array_comp(struct job_record *job_ptr) MAX(status, base_job_ptr-> array_recs->max_exit_code); } - if (base_job_ptr->array_recs->tot_run_tasks) + if (was_running && + base_job_ptr->array_recs->tot_run_tasks) base_job_ptr->array_recs->tot_run_tasks--; base_job_ptr->array_recs->tot_comp_tasks++; } @@ -12695,6 +12696,8 @@ extern void job_completion_logger(struct job_record *job_ptr, bool requeue) (void) bb_g_job_cancel(job_ptr); } + _job_array_comp(job_ptr, true); + if (!IS_JOB_RESIZING(job_ptr) && ((job_ptr->array_task_id == NO_VAL) || test_job_array_finished(job_ptr->array_job_id))) { @@ -12721,8 +12724,6 @@ extern void job_completion_logger(struct job_record *job_ptr, bool requeue) } } - _job_array_comp(job_ptr); - g_slurm_jobcomp_write(job_ptr); /* When starting the resized job everything is taken care of diff --git a/src/slurmctld/licenses.c b/src/slurmctld/licenses.c index 85818bcf0..abb95b2ca 100644 --- a/src/slurmctld/licenses.c +++ b/src/slurmctld/licenses.c @@ -896,9 +896,8 @@ extern char *licenses_2_tres_str(List license_list) continue; /* not tracked */ if (slurmdb_find_tres_count_in_string( - tres_str, tres_rec->id)) + tres_str, tres_rec->id) != INFINITE64) continue; /* already handled */ - /* New license */ xstrfmtcat(tres_str, "%s%u=%"PRIu64, tres_str ? "," : "", diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 8564a6d2e..2c82dd1d5 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1002,7 +1002,7 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, #if 0 { char *tmp_str = bitmap2node_name(job_ptr->details->req_node_bitmap); - info("job %u requires %d:%d:%d nodes %s err:%u", + info("job %u requires %d:%d:%d req_nodes:%s err:%u", job_ptr->job_id, min_nodes, req_nodes, max_nodes, tmp_str, error_code); xfree(tmp_str); @@ -1492,13 +1492,15 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, tried_sched = false; /* need to test these nodes */ if ((switch_record_cnt > 1) && - ((i+1) < node_set_size)) { + ((i+1) < node_set_size) && + (min_feature == max_feature)) { /* Keep accumulating to optimize topology */ continue; } - if ((shared || preempt_flag) && - ((i+1) < node_set_size) && + if ((shared || preempt_flag) && + ((i+1) < node_set_size) && + (min_feature == max_feature) && (node_set_ptr[i].weight == node_set_ptr[i+1].weight)) { /* Keep accumulating so we can pick the diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 0353756fb..914c87954 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -2123,9 +2123,12 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg, bool locked) batch_step.name = "batch"; batch_step.select_jobinfo = job_ptr->select_jobinfo; + step_set_alloc_tres(&batch_step, 1, false, false); + jobacct_storage_g_step_start(acct_db_conn, &batch_step); jobacct_storage_g_step_complete(acct_db_conn, &batch_step); FREE_NULL_BITMAP(batch_step.step_node_bitmap); + xfree(batch_step.tres_alloc_str); } #ifdef HAVE_FRONT_END diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c index 06c8d4535..abb2e27b9 100644 --- a/src/slurmctld/reservation.c +++ b/src/slurmctld/reservation.c @@ -3742,7 +3742,8 @@ static int _select_nodes(resv_desc_msg_t *resv_desc_ptr, FREE_NULL_BITMAP(feature_bitmap); } - if ((resv_desc_ptr->flags & RESERVE_FLAG_MAINT) == 0) { + if (((resv_desc_ptr->flags & RESERVE_FLAG_MAINT) == 0) && + ((resv_desc_ptr->flags & RESERVE_FLAG_SPEC_NODES) == 0)) { /* Nodes must be available */ bit_and(node_bitmap, avail_node_bitmap); } diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 81a7830ce..7b175d33c 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -2124,7 +2124,7 @@ extern void start_power_mgr(pthread_t *thread_id); * IN node_name - name of node which has completed epilog */ extern int step_epilog_complete(struct job_record *job_ptr, - char *node_name); + char *node_name); /* * step_partial_comp - Note the completion of a job step on at least @@ -2138,6 +2138,14 @@ extern int step_epilog_complete(struct job_record *job_ptr, extern int step_partial_comp(step_complete_msg_t *req, uid_t uid, int *rem, uint32_t *max_rc); +/* + * step_set_alloc_tres - set the tres up when allocating the step. + * Only set when job is running. + * NOTE: job write lock must be locked before calling this */ +extern void step_set_alloc_tres( + struct step_record *step_ptr, uint32_t node_count, + bool assoc_mgr_locked, bool make_formatted); + /* Update time stamps for job step suspend */ extern void suspend_job_step(struct job_record *job_ptr); diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 7570abce1..45c5377ee 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -2114,16 +2114,12 @@ step_create(job_step_create_request_msg_t *step_specs, bitstr_t *nodeset; int cpus_per_task, ret_code, i; uint32_t node_count = 0; - uint64_t cpu_count, tres_count; time_t now = time(NULL); char *step_node_list = NULL; uint32_t orig_cpu_count; List step_gres_list = (List) NULL; dynamic_plugin_data_t *select_jobinfo = NULL; uint32_t task_dist; - char *tmp_tres_str = NULL; - assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, - READ_LOCK, NO_LOCK, NO_LOCK }; #ifdef HAVE_ALPS_CRAY uint32_t resv_id = 0; @@ -2534,46 +2530,8 @@ step_create(job_step_create_request_msg_t *step_specs, select_g_step_start(step_ptr); -#ifdef HAVE_BG_L_P - /* Only L and P use this code */ - if (step_ptr->job_ptr->details) - cpu_count = (uint64_t)step_ptr->job_ptr->details->min_cpus; - else - cpu_count = (uint64_t)step_ptr->job_ptr->cpu_cnt; -#else - if (!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) - cpu_count = (uint64_t)step_ptr->job_ptr->total_cpus; - else - cpu_count = (uint64_t)step_ptr->cpu_count; -#endif - xfree(step_ptr->tres_alloc_str); - - tres_count = (uint64_t)step_ptr->pn_min_memory; - if (tres_count & MEM_PER_CPU) { - tres_count &= (~MEM_PER_CPU); - tres_count *= cpu_count; - } else - tres_count *= node_count; - xstrfmtcat(step_ptr->tres_alloc_str, - "%s%u=%"PRIu64",%u=%"PRIu64",%u=%u", - step_ptr->tres_alloc_str ? "," : "", - TRES_CPU, cpu_count, - TRES_MEM, tres_count, - TRES_NODE, node_count); - - if ((tmp_tres_str = gres_2_tres_str(step_ptr->gres_list, 0, true))) { - xstrfmtcat(step_ptr->tres_alloc_str, "%s%s", - step_ptr->tres_alloc_str ? "," : "", - tmp_tres_str); - xfree(tmp_tres_str); - } - - xfree(step_ptr->tres_fmt_alloc_str); - assoc_mgr_lock(&locks); - step_ptr->tres_fmt_alloc_str = slurmdb_make_tres_string_from_simple( - step_ptr->tres_alloc_str, assoc_mgr_tres_list); - assoc_mgr_unlock(&locks); + step_set_alloc_tres(step_ptr, node_count, false, true); jobacct_storage_g_step_start(acct_db_conn, step_ptr); return SLURM_SUCCESS; @@ -3402,6 +3360,83 @@ extern int step_partial_comp(step_complete_msg_t *req, uid_t uid, return SLURM_SUCCESS; } +/* + * step_set_alloc_tres - set the tres up when allocating the step. + * Only set when job is running. + * NOTE: job write lock must be locked before calling this */ +extern void step_set_alloc_tres( + struct step_record *step_ptr, uint32_t node_count, + bool assoc_mgr_locked, bool make_formatted) +{ + uint64_t cpu_count = 1, mem_count = 1; + char *tmp_tres_str = NULL; + + xassert(step_ptr); + + xfree(step_ptr->tres_alloc_str); + xfree(step_ptr->tres_fmt_alloc_str); + + if ((step_ptr->step_id == SLURM_BATCH_SCRIPT) && + step_ptr->job_ptr->job_resrcs) { + /* get the cpus and memory on the first node */ + if (step_ptr->job_ptr->job_resrcs->cpus) + cpu_count = step_ptr->job_ptr->job_resrcs->cpus[0]; + if (step_ptr->job_ptr->job_resrcs->memory_allocated) + mem_count = step_ptr->job_ptr->job_resrcs-> + memory_allocated[0]; + } else { +#ifdef HAVE_BG_L_P + /* Only L and P use this code */ + if (step_ptr->job_ptr->details) + cpu_count = + (uint64_t)step_ptr->job_ptr->details->min_cpus; + else + cpu_count = (uint64_t)step_ptr->job_ptr->cpu_cnt; +#else + if (!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) + cpu_count = (uint64_t)step_ptr->job_ptr->total_cpus; + else + cpu_count = (uint64_t)step_ptr->cpu_count; +#endif + mem_count = (uint64_t)step_ptr->pn_min_memory; + if (mem_count & MEM_PER_CPU) { + mem_count &= (~MEM_PER_CPU); + mem_count *= cpu_count; + } else + mem_count *= node_count; + } + + xstrfmtcat(step_ptr->tres_alloc_str, + "%s%u=%"PRIu64",%u=%"PRIu64",%u=%u", + step_ptr->tres_alloc_str ? "," : "", + TRES_CPU, cpu_count, + TRES_MEM, mem_count, + TRES_NODE, node_count); + + if ((tmp_tres_str = gres_2_tres_str(step_ptr->gres_list, 0, true))) { + xstrfmtcat(step_ptr->tres_alloc_str, "%s%s", + step_ptr->tres_alloc_str ? "," : "", + tmp_tres_str); + xfree(tmp_tres_str); + } + + if (make_formatted) { + assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, + READ_LOCK, NO_LOCK, NO_LOCK }; + if (!assoc_mgr_locked) + assoc_mgr_lock(&locks); + + step_ptr->tres_fmt_alloc_str = + slurmdb_make_tres_string_from_simple( + step_ptr->tres_alloc_str, assoc_mgr_tres_list); + + if (!assoc_mgr_locked) + assoc_mgr_unlock(&locks); + } + + return; +} + /* convert a range of nodes allocated to a step to a hostlist with * names of those nodes */ static hostlist_t _step_range_to_hostlist(struct step_record *step_ptr, diff --git a/src/slurmd/common/proctrack.c b/src/slurmd/common/proctrack.c index 68b88c7ca..995c6533f 100644 --- a/src/slurmd/common/proctrack.c +++ b/src/slurmd/common/proctrack.c @@ -223,6 +223,13 @@ static bool _test_core_dumping(char* stat_fname) /* split into "PID (cmd" and "<rest>" */ str_ptr = (char *)strrchr(proc_stat, ')'); + if (str_ptr == NULL) { + error("\ +%s: unexpected format of %s (%s) bracket missing?", __func__, + stat_fname, proc_stat); + xfree(proc_stat); + return false; + } *str_ptr = '\0'; /* replace trailing ')' with NULL */ /* parse these two strings separately, skipping the leading "(". */ memset (cmd, 0, sizeof(cmd)); diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 0cbfdef9b..7f5988cca 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -800,10 +800,12 @@ _forkexec_slurmstepd(uint16_t type, void *req, #ifndef SLURMSTEPD_MEMCHECK i = read(to_slurmd[0], &rc, sizeof(int)); if (i < 0) { - error("Can not read return code from slurmstepd: %m"); + error("\ +%s: Can not read return code from slurmstepd got %d: %m", __func__, i); rc = SLURM_FAILURE; } else if (i != sizeof(int)) { - error("slurmstepd failed to send return code"); + error("\ +%s: slurmstepd failed to send return code got %d: %m", __func__, i); rc = SLURM_FAILURE; } else { int delta_time = time(NULL) - start_time; @@ -1724,16 +1726,15 @@ static void _rpc_prolog(slurm_msg_t *msg) rc = ESLURMD_PROLOG_FAILED; } - if (slurmctld_conf.prolog_flags & PROLOG_FLAG_CONTAIN) - _make_prolog_mem_container(msg); - - if (container_g_create(req->job_id)) - error("container_g_create(%u): %m", req->job_id); - slurm_mutex_lock(&prolog_mutex); first_job_run = !slurm_cred_jobid_cached(conf->vctx, req->job_id); - if (first_job_run) { + if (slurmctld_conf.prolog_flags & PROLOG_FLAG_CONTAIN) + _make_prolog_mem_container(msg); + + if (container_g_create(req->job_id)) + error("container_g_create(%u): %m", req->job_id); + slurm_cred_insert_jobid(conf->vctx, req->job_id); _add_job_running_prolog(req->job_id); slurm_mutex_unlock(&prolog_mutex); @@ -2518,7 +2519,7 @@ _rpc_health_check(slurm_msg_t *msg) * slurmctld in hopes of avoiding having the node set DOWN due to * slurmd paging and not being able to respond in a timely fashion. */ if (slurm_send_rc_msg(msg, rc) < 0) { - error("Error responding to ping: %m"); + error("Error responding to health check: %m"); send_registration_msg(SLURM_SUCCESS, false); } @@ -2563,7 +2564,7 @@ _rpc_acct_gather_update(slurm_msg_t *msg) * due to slurmd paging and not being able to respond in a * timely fashion. */ if (slurm_send_rc_msg(msg, rc) < 0) { - error("Error responding to ping: %m"); + error("Error responding to account gather: %m"); send_registration_msg(SLURM_SUCCESS, false); } } else { @@ -2669,7 +2670,7 @@ _signal_jobstep(uint32_t jobid, uint32_t stepid, uid_t req_uid, fd = stepd_connect(conf->spooldir, conf->node_name, jobid, stepid, &protocol_version); if (fd == -1) { - debug("signal for nonexistant %u.%u stepd_connect failed: %m", + debug("signal for nonexistent %u.%u stepd_connect failed: %m", jobid, stepid); return ESLURM_INVALID_JOB_ID; } @@ -2756,7 +2757,7 @@ _rpc_checkpoint_tasks(slurm_msg_t *msg) fd = stepd_connect(conf->spooldir, conf->node_name, req->job_id, req->job_step_id, &protocol_version); if (fd == -1) { - debug("checkpoint for nonexistant %u.%u stepd_connect " + debug("checkpoint for nonexistent %u.%u stepd_connect " "failed: %m", req->job_id, req->job_step_id); rc = ESLURM_INVALID_JOB_ID; goto done; @@ -2802,7 +2803,7 @@ _rpc_terminate_tasks(slurm_msg_t *msg) fd = stepd_connect(conf->spooldir, conf->node_name, req->job_id, req->job_step_id, &protocol_version); if (fd == -1) { - debug("kill for nonexistant job %u.%u stepd_connect " + debug("kill for nonexistent job %u.%u stepd_connect " "failed: %m", req->job_id, req->job_step_id); rc = ESLURM_INVALID_JOB_ID; goto done; diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c index 92d7c95c6..7a6ebb623 100644 --- a/src/slurmd/slurmstepd/req.c +++ b/src/slurmd/slurmstepd/req.c @@ -153,7 +153,7 @@ _create_socket(const char *name) if (bind(fd, (struct sockaddr *) &addr, len) < 0) return -2; - if (listen(fd, 5) < 0) + if (listen(fd, 32) < 0) return -3; return fd; @@ -274,7 +274,7 @@ msg_thr_create(stepd_step_rec_t *job) * This gives connection threads a chance to complete any pending * RPCs before the slurmstepd exits. */ -static void _wait_for_connections() +static void _wait_for_connections(void) { struct timespec ts = {0, 0}; int rc = 0; @@ -326,12 +326,18 @@ _msg_socket_accept(eio_obj_t *obj, List objs) (socklen_t *)&len)) < 0) { if (errno == EINTR) continue; - if (errno == EAGAIN - || errno == ECONNABORTED - || errno == EWOULDBLOCK) { + if ((errno == EAGAIN) || + (errno == ECONNABORTED) || + (errno == EWOULDBLOCK)) { return SLURM_SUCCESS; } error("Error on msg accept socket: %m"); + if ((errno == EMFILE) || + (errno == ENFILE) || + (errno == ENOBUFS) || + (errno == ENOMEM)) { + return SLURM_SUCCESS; + } obj->shutdown = true; return SLURM_SUCCESS; } diff --git a/src/smap/smap.c b/src/smap/smap.c index 1501acb96..021b38df7 100644 --- a/src/smap/smap.c +++ b/src/smap/smap.c @@ -184,13 +184,11 @@ int main(int argc, char *argv[]) if (params.cluster_dims == 4) { startx = width; - COLS -= 2; - width = COLS - width; + width = COLS - width - 2; height = LINES; } else if (params.cluster_dims == 3) { startx = width; - COLS -= 2; - width = COLS - width; + width = COLS - width - 2; height = LINES; } else { startx = 0; @@ -476,11 +474,8 @@ static void *_resize_handler(int sig) delwin(text_win); endwin(); - COLS = 0; - LINES = 0; initscr(); doupdate(); /* update now to make sure we get the new size */ - getmaxyx(stdscr, LINES, COLS); if (params.cluster_dims == 4) { height = dim_size[2] * dim_size[3] + dim_size[2] + 3; @@ -508,13 +503,11 @@ static void *_resize_handler(int sig) if (params.cluster_dims == 4) { startx = width; - COLS -= 2; - width = COLS - width; + width = COLS - width - 2; height = LINES; } else if (params.cluster_dims == 3) { startx = width; - COLS -= 2; - width = COLS - width; + width = COLS - width - 2; height = LINES; } else { startx = 0; diff --git a/src/squeue/print.c b/src/squeue/print.c index a0e0bd245..b33ad4f86 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -202,13 +202,20 @@ static void _combine_pending_array_tasks(List job_list) bitmap_size = bit_size(task_bitmap); task_iterator = list_iterator_create(job_list); while ((task_rec_ptr = list_next(task_iterator))) { - if (!IS_JOB_PENDING(task_rec_ptr->job_ptr) || - (task_rec_ptr == job_rec_ptr) || + if (!IS_JOB_PENDING(task_rec_ptr->job_ptr)) + continue; /* Not pending */ + if ((task_rec_ptr == job_rec_ptr) || (task_rec_ptr->job_ptr->array_job_id != job_rec_ptr->job_ptr->array_job_id) || (task_rec_ptr->job_ptr->array_task_id >= bitmap_size)) - continue; + continue; /* Different job array ID */ + if (xstrcmp(task_rec_ptr->job_ptr->name, + job_rec_ptr->job_ptr->name)) + continue; /* Different name */ + if (xstrcmp(task_rec_ptr->job_ptr->partition, + job_rec_ptr->job_ptr->partition)) + continue; /* Different partition */ /* Combine this task into master job array record */ update_cnt++; bit_set(task_bitmap, @@ -959,6 +966,7 @@ int _print_job_reason_list(job_info_t * job, int width, bool right, reason = job_reason_string(job->state_reason); xstrfmtcat(reason_fmt, "(%s)", reason); _print_str(reason_fmt, width, right, true); + xfree(reason_fmt); } else { char *nodes = xstrdup(job->nodes); char *ionodes = NULL; diff --git a/src/squeue/squeue.c b/src/squeue/squeue.c index 4b08787e9..15b5a8c30 100644 --- a/src/squeue/squeue.c +++ b/src/squeue/squeue.c @@ -169,7 +169,8 @@ _get_window_width( void ) static int _print_job ( bool clear_old ) { - static job_info_msg_t * old_job_ptr = NULL, * new_job_ptr; + static job_info_msg_t *old_job_ptr; + job_info_msg_t *new_job_ptr; int error_code; uint16_t show_flags = 0; diff --git a/testsuite/expect/globals b/testsuite/expect/globals index 98920eb8a..325eec79e 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -1880,6 +1880,37 @@ proc test_emulated { } { return $emulated } +################################################################ +# +# Proc: test_killonbadexit +# +# Purpose: Determine if KillOnBadExit is configured to be 1. +# +# +# Returns 1 if KillOnBadExit is 1. +# +################################################################ + +proc test_killonbadexit { } { + global scontrol bin_bash bin_grep + + log_user 0 + set killonbadexit 0 + spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep KillOnBadExit" + expect { + -re "KillOnBadExit *= *1" { + set killonbadexit 1 + exp_continue + } + eof { + wait + } + } + log_user 1 + + return $killonbadexit +} + ################################################################ # # Proc: get_cycle_count diff --git a/testsuite/expect/inc21.21_tests b/testsuite/expect/inc21.21_tests index 438067b4a..5b4f0e944 100644 --- a/testsuite/expect/inc21.21_tests +++ b/testsuite/expect/inc21.21_tests @@ -213,7 +213,7 @@ sleep 10" set running 0 spawn $squeue -h -o "\%t \%r" expect { - -re "PD.Assoc*" { + -re "PD." { incr pending exp_continue } @@ -440,6 +440,9 @@ proc inc21_21_grpwall { test_type limit } { send_user "\n====== Test $test_type" send_user "(Within: inc21.21_tests function: inc21_21_grpwall) ======\n" + # Wait for old jobs to clean up + sleep 2 + # Since wall is a decayed variable lets reset it to make sure the test # gets exactly what we would expect. reset_qos_usage "" $test_qos diff --git a/testsuite/expect/inc22.1.1 b/testsuite/expect/inc22.1.1 index 050c2c16f..58a86a099 100644 --- a/testsuite/expect/inc22.1.1 +++ b/testsuite/expect/inc22.1.1 @@ -37,7 +37,7 @@ proc inc22_1_1 {} { - global account1 accounts users cluster_cpus job1_start user1 + global account1 accounts users cluster_cpus job0_start user1 global node0_down_start node0_cpus node1_cpus cluster sreport global exit_code wc_key_track wckey1 sql_rem @@ -53,7 +53,7 @@ proc inc22_1_1 {} { set end_str [timestamp -format %Y-%m-%dT%X -seconds $period_end] set reported [expr ($period_end - $period_start) * $cluster_cpus] set down [expr ($period_end-$node0_down_start) * $node0_cpus] - set alloc_sec [expr ($period_end-$job1_start) * $node1_cpus] + set alloc_sec [expr ($period_end-$job0_start) * $node1_cpus] set wckey_alloc_sec $alloc_sec set resv 0 set idle [expr $reported - ($down + $alloc_sec + $resv)] diff --git a/testsuite/expect/inc22.1.3 b/testsuite/expect/inc22.1.3 index 0b9740a9f..66ffb452b 100644 --- a/testsuite/expect/inc22.1.3 +++ b/testsuite/expect/inc22.1.3 @@ -39,7 +39,7 @@ proc inc22_1_3 { } { global sreport exit_code node0_down_start node0_down_end node0_cpus global node1_cpus cluster my_pid accounts users cluster_cpus - global job1_start job1_end job1_cpus job1_alloc job2_start + global job0_start job1_end job1_cpus job1_alloc job2_start global job2_end job2_cpus job2_alloc job2_elig job3_start global job3_end job3_cpus job3_alloc job3_elig acct1_alloc acct2_alloc global acct3_alloc total_alloc wckey1_alloc user1_wckey1_alloc @@ -59,7 +59,7 @@ proc inc22_1_3 { } { set reported [expr ($period_end - $period_start) * $cluster_cpus] set down [expr ($node0_down_end - $node0_down_start) * $node0_cpus] - set alloc_sec [expr ($job1_end-$job1_start) * $job1_cpus] + set alloc_sec [expr ($job1_end-$job0_start) * $job1_cpus] set alloc_sec [expr $alloc_sec + (($job2_end-$job2_start) * $job2_cpus)] set alloc_sec [expr $alloc_sec + (($job3_end-$job3_start) * $job3_cpus)] set wckey_alloc_sec1 [expr $job1_alloc + $job3_alloc] diff --git a/testsuite/expect/test1.77 b/testsuite/expect/test1.77 index b9011e777..3ad56b353 100755 --- a/testsuite/expect/test1.77 +++ b/testsuite/expect/test1.77 @@ -38,6 +38,7 @@ set number "\[0-9\]+" set name "banana" set my_cmd ./mycmd set file_in "test$test_id.in" +set file_out "test$test_id.out" # Set env path to SLURM_JOB_NAME set env(SLURM_JOB_NAME) "zebra" @@ -77,13 +78,11 @@ send_user "SUCCESS\n" # Tests -J using sbatch # set found 0 -set sbatch_pid [spawn $sbatch -J $name --wrap=env] +set sbatch_pid [spawn $sbatch -J $name -o $file_out --wrap=env] expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue - - } timeout { send_user "\nFAILURE: sbatch not responding\n" @@ -94,8 +93,8 @@ expect { wait } } -set wait_file [wait_for_file slurm-$job_id.out] -spawn $bin_grep SLURM_JOB_NAME=banana slurm-$job_id.out +set wait_file [wait_for_file $file_out] +spawn $bin_grep SLURM_JOB_NAME=banana $file_out expect { -re "SLURM_JOB_NAME=($alpha_numeric)" { send_user "Job name matched $expect_out(1,string)\n" @@ -112,7 +111,7 @@ if {$found != 1} { exit 1 } if {$exit_code == 0} { - exec $bin_rm -f slurm-$job_id.out + exec $bin_rm -f $file_out send_user "SUCCESS\n" } exec rm -f slurm-$job_id.out diff --git a/testsuite/expect/test2.15 b/testsuite/expect/test2.15 index 87ad3d6fd..9b5b008fc 100755 --- a/testsuite/expect/test2.15 +++ b/testsuite/expect/test2.15 @@ -52,6 +52,9 @@ if { [test_xcpu] } { } elseif {[test_launch_poe]} { send_user "\nWARNING: This test is incompatible with POE systems\n" exit 0 +} elseif {[test_killonbadexit]} { + send_user "\nWARNING: This test is incompatible with KillOnBadExit=1\n" + exit 0 } set node_cnt 2 diff --git a/testsuite/expect/test21.21 b/testsuite/expect/test21.21 index 50d9a328b..5508c3342 100755 --- a/testsuite/expect/test21.21 +++ b/testsuite/expect/test21.21 @@ -83,6 +83,11 @@ array set acct_mod_assoc_test_vals { print_header $test_id +if { [string compare [priority_type] multifactor] } { + send_user "\nWARNING: test only compatible with priority/multifactor plugin\n" + exit $exit_code +} + # Determine what the selecttype param is if {[test_select_type_params "CR_CORE"]} { set selectparam 1 diff --git a/testsuite/expect/test22.1 b/testsuite/expect/test22.1 index 1d30b6567..652500daa 100755 --- a/testsuite/expect/test22.1 +++ b/testsuite/expect/test22.1 @@ -102,17 +102,21 @@ set period_end [exec date -d $end_date +%s] set start_str [timestamp -format %Y-%m-%dT%X -seconds $period_start] set end_str [timestamp -format %Y-%m-%dT%X -seconds $period_end] +#job0 - we really want this to look like job1 but run right before hand. +set job0_start $period_start +set job0_run 1200 +set job0_end [expr $job0_start + $job0_run] #job1 -set job1_start $period_start -set job1_run 3900 -set job1_end [expr $job1_start+$job1_run] +set job1_start [expr $period_start + 1200] +set job1_run 2700 +set job1_end [expr $job1_start + $job1_run] # This will give us the correct time we ran for set job1_diff_str [timestamp -format %X -seconds [expr $midnight+$job1_run]] set job1_start_str [timestamp -format %Y-%m-%dT%X -seconds $job1_start] set job1_end_str [timestamp -format %Y-%m-%dT%X -seconds $job1_end] set job1_nodes $node1 set job1_cpus $node1_cpus -set job1_alloc [expr $job1_run * $job1_cpus] +set job1_alloc [expr ($job1_run + $job0_run) * $job1_cpus] set job1_acct $account1 #job2 @@ -141,7 +145,8 @@ set job3_start $job2_end set job3_run 3900 set job3_end [expr $job3_start+$job3_run] # This will give us the correct time we ran for -set job3_diff_str [timestamp -format %X -seconds [expr $midnight+$job1_run]] +set job3_diff_str [timestamp -format %X -seconds [expr $midnight+$job3_run]] + set job3_start_str [timestamp -format %Y-%m-%dT%X -seconds $job3_start] set job3_end_str [timestamp -format %Y-%m-%dT%X -seconds $job3_end] #run on just node0 @@ -331,6 +336,7 @@ puts $file "on duplicate key update period_start=VALUES(period_start), period_en #now we will put in a job running for an hour and 5 minutes puts $file "insert into job_table (jobid, associd, wckey, wckeyid, uid, gid, `partition`, blockid, cluster, account, eligible, submit, start, end, suspended, name, track_steps, state, comp_code, priority, req_cpus, tres_alloc, nodelist, kill_requid, qos, deleted) values" +puts $file "('65536', '$user1acct1', '$wckey1', '$user1wckey1', '$uid', '$gid', 'debug', '', '$cluster', '$job1_acct', $job0_start, $job0_start, $job0_start, $job0_end, '0', 'test_job1', '0', '3', '0', '$job1_cpus', $job1_cpus, '1=$job1_cpus', '$job1_nodes', '0', '0', '0')" puts $file "('65537', '$user1acct1', '$wckey1', '$user1wckey1', '$uid', '$gid', 'debug', '', '$cluster', '$job1_acct', $job1_start, $job1_start, $job1_start, $job1_end, '0', 'test_job1', '0', '3', '0', '$job1_cpus', $job1_cpus, '1=$job1_cpus', '$job1_nodes', '0', '0', '0')" puts $file ", ('65538', '$user2acct3', '$wckey1', '$user2wckey1', '$uid', '$gid', 'debug', '', '$cluster', '$job2_acct', $job2_elig, $job2_elig, $job2_start, $job2_end, '0', 'test_job2', '0', '3', '0', '$job2_cpus', '$job2_cpus', '1=$job2_cpus', '$job2_nodes', '0', '0', '0')" puts $file ", ('65539', '$user1acct2', '$wckey1', '$user1wckey1', '$uid', '$gid', 'debug', '', '$cluster', '$job3_acct', $job3_elig, $job3_elig, $job3_start, $job3_end, '0', 'test_job3', '0', '3', '0', '$job3_cpus', '$job3_cpus', '1=$job3_cpus', '$job3_nodes', '0', '0', '0')" diff --git a/testsuite/expect/test4.13 b/testsuite/expect/test4.13 index a762c40d9..b92689806 100755 --- a/testsuite/expect/test4.13 +++ b/testsuite/expect/test4.13 @@ -52,7 +52,7 @@ array set node_info { FreeMem freemem Gres gres NodeAddr nodeaddr - NodeName nodehost + NodeHostName nodehost RealMemory memory State statelong Sockets sockets @@ -103,8 +103,7 @@ foreach option [array names node_info] { spawn $scontrol show node $test_node expect { -re "\\m$option=($alpha_numeric_dot)\\M" { - set node_sinfo_vals($node_info($option)) \ - $expect_out(1,string) + set node_sinfo_vals($node_info($option)) $expect_out(1,string) exp_continue } timeout { @@ -122,11 +121,6 @@ set first_option 1 foreach option [array names node_sinfo_vals] { set match 0 - if {$first_option == 1} { - set first_option 0 - } else { - log_user 0 - } spawn $sinfo -n$test_node -O$option -h expect { -nocase -re "$node_sinfo_vals($option)" { @@ -145,10 +139,9 @@ foreach option [array names node_sinfo_vals] { if {$match != 1} { send_user "\nFAILURE: Node information $option output does " send_user "not match sinfo output. " - send_user "Looking for value: $node_sinfo_vals($option)\n" - exit 1 + send_user "Looking for value: $option = $node_sinfo_vals($option)\n" + set exit_code 1 } - log_user 1 } # @@ -218,11 +211,6 @@ set first_option 1 foreach option [array names part_sinfo_vals] { set match 0 - if {$first_option == 1} { - set $first_option 0 - } else { - log_user 0 - } spawn $sinfo -p$test_part -O$option -h expect { -nocase -re "$part_sinfo_vals($option)" { @@ -243,15 +231,16 @@ foreach option [array names part_sinfo_vals] { send_user "output does not match sinfo output. " send_user "Looking for value: " send_user "$option = $part_sinfo_vals($option)\n" - cleanup - exit 1 + set exit_code 1 } - log_user 1 } cleanup if {$exit_code == 0} { send_user "\nSUCCESS\n" +} else { + send_user "\nFAILURE\n" } + exit $exit_code -- GitLab