diff --git a/BUILD.NOTES b/BUILD.NOTES index 20ef2e186eebc0090b3d4081277da68f654fe597..46285a0d58f75cc8cec8599fa980f3240895f5d5 100644 --- a/BUILD.NOTES +++ b/BUILD.NOTES @@ -63,7 +63,12 @@ Linux cluster (See BlueGene and AIX specific notes below for some differences). Use SourceForge admin tool to add new release, including changelog. BlueGene build notes: -3. Use the rpm make target to create the new RPMs. This requires a .rpmmacros +0. If on a bgp system and you want sview export these variables + export CFLAGS="-I/opt/gnome/lib/gtk-2.0/include -I/opt/gnome/lib/glib-2.0/include $CFLAGS" + export LIBS="-L/usr/X11R6/lib64 $LIBS" + export CMD_LDFLAGS='-L/usr/X11R6/lib64' + export PKG_CONFIG_PATH="/opt/gnome/lib64/pkgconfig/:$PKG_CONFIG_PATH" +1. Use the rpm make target to create the new RPMs. This requires a .rpmmacros (.rpmrc for newer versions of rpmbuild) file containing: %_prefix /usr %_slurm_sysconfdir /etc/slurm @@ -89,6 +94,7 @@ To build and run on AIX: then making the buildfarm directory the first one in my PATH. Also, make certain that the "proctrack" rpm is installed. 1. export OBJECT_MODE=32 + export PKG_CONFIG="/usr/bin/pkg-config" 2. Build with: ./configure --enable-debug --prefix=/opt/freeware \ --sysconfdir=/opt/freeware/etc/slurm \ diff --git a/META b/META index b9f60e14189d5b6a874166c039606c3e7cd303b2..3ef94ea782975eac1235ba8abb00ffad6ea29534 100644 --- a/META +++ b/META @@ -3,9 +3,9 @@ Api_revision: 0 Major: 2 Meta: 1 - Micro: 7 + Micro: 8 Minor: 1 Name: slurm Release: 1 Release_tags: dist - Version: 2.1.7 + Version: 2.1.8 diff --git a/NEWS b/NEWS index 85f38030421d7f17e888909d5aa312bce6ced267..85c97fe69df2fc3bc9859946bfc137997c6a00b7 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,43 @@ documents those changes that are of interest to users and admins. * Changes in SLURM 2.1.8 ======================== + -- Update BUILD_NOTES for AIX and bgp systems on how to get sview to + build correctly. + -- Update man page for scontrol when nodes are in the "MIXED" state. + -- Better error messages for sacctmgr. + -- Fix bug in allocation of CPUs with select/cons_res and --cpus-per-task + option. + -- Fix bug in dependency support for afterok and afternotok options to insure + that the job's exit status gets checked for dependent jobs prior to puring + completed job records. + -- Fix bug in sched/backfill that could set an incorrect expected start time + for a job. + -- BLUEGENE - Fix for systems that have midplanes defined in the database + that don't exist. + -- Accounting, fixed bug where if removing an object a rollback wasn't + possible. + -- Fix possible scontrol stack corruption when listing jobs with very a long + job or working directory name (over 511 characters). + -- Insure that SPANK environment variables set by salloc or sbatch get + propagated to the Prolog on all nodes by setting SLURM_SPANK_* environment + variables for srun's use. + -- In sched/wiki2 - Add support for the MODIFYJOB command to alter a job's + comment field + -- When a cluster first registers with the SlurmDBD only send nodes in an + non-usable state. Before all nodes were sent. + -- Alter sacct to be able to query jobs by association id. + -- Edit documentation for scontrol stating ExitCode as something not alterable. + -- Update documentation about ReturnToService and silently rebooting nodes. + -- When combining --ntasks-per-node and --exclusive in an allocation request + the correct thing, giving the allocation the entire node but only + ntasks-per-node, happens. + -- Fix accounting transaction logs when deleting associations to put the + ids instead of the lfts which could change over time. + -- Fix support for salloc, sbatch and srun's --hint option to avoid allocating + a job more sockets per node or more cores per socket than desired. Also + when --hint=compute_bound or --hint=memory_bound then avoid allocating more + than one task per hyperthread (a change in behavior, but almost certainly + a preferable mode of operation). * Changes in SLURM 2.1.7 ======================== @@ -5000,4 +5037,4 @@ documents those changes that are of interest to users and admins. -- Change directory to /tmp in slurmd if daemonizing. -- Logfiles are reopened on reconfigure. -$Id: NEWS 20049 2010-04-16 00:34:17Z da $ +$Id: NEWS 20218 2010-05-07 21:29:56Z jette $ diff --git a/contribs/lua/proctrack.lua b/contribs/lua/proctrack.lua index 61e6b45f979849ecbcf82dfa601527d2f3a6185c..29b23701b4fb6d91a1e531a8afaadad8b9957a1a 100644 --- a/contribs/lua/proctrack.lua +++ b/contribs/lua/proctrack.lua @@ -14,6 +14,8 @@ require "posix" -- --########################################################################-- +local use_release_agent = false + function slurm_container_create (job) local id = cpuset_id_create (job) local cpu_list = cpumap:convert_ids (job.CPUs) @@ -32,7 +34,7 @@ end function slurm_container_signal (id, signo) log_verbose ("slurm_container_signal(%d, %d)\n", id, signo) cpuset_kill (id, signo) - return 0 + return slurm.SUCCESS end function slurm_container_destroy (id) @@ -49,7 +51,7 @@ function slurm_container_find (pid) return id end end - return -1 + return slurm.FAILURE end function slurm_container_has_pid (id, pid) @@ -69,7 +71,7 @@ function slurm_container_wait (id) posix.sleep (s) s = (2*s <= 30) and 2*s or 30 -- Wait a max of 30s end - return 0 + return slurm.SUCCESS end function slurm_container_get_pids (id) @@ -135,6 +137,9 @@ function cpuset_create (name, cpus) posix.umask (mask) cpuset_set_f (path, "cpus", cpus) cpuset_set_f (path, "mems") + if (use_release_agent == true) then + cpuset_set_f (path, "notify_on_release", 1) + end return true end @@ -159,9 +164,15 @@ function cpuset_kill (name, signo) if (not cpuset_exists (name)) then return end local path = string.format ("%s/%s/tasks", cpuset_dir, name) - for pid in io.lines (cpuset_dir .. "/" .. name .. "/tasks") do - log_debug ("Sending signal %d to pid %d", signo, pid) - posix.kill (pid, signo) + + local path_fh = io.open(path) + if path_fh then + while true do + local pid = path_fh:read() + if pid == nil then break end + log_debug ("Sending signal %d to pid %d", signo, pid) + posix.kill (pid, signo) + end end end @@ -200,7 +211,7 @@ function cpuset_id_create (job) end -- Add the lower 16 bits of the stepid: - id = id truncate_to_n_bits (job.stepid, 16) + id = id + truncate_to_n_bits (job.stepid, 16) -- Must truncate result to 32bits until SLURM's job container -- id is no longer represented by uint32_t : @@ -222,12 +233,33 @@ function cpuset_has_pid (id, process_id) return false end +function pid_is_thread (process_id) + local pid_status_path = string.format ("/proc/%d/status",process_id) + local pid_status_fh = io.open(pid_status_path) + + if pid_status_fh then + while true do + local pid_status_line=pid_status_fh:read() + if pid_status_line == nil then break end + if string.match(pid_status_line,'^Tgid:%s+' .. process_id .. '$') then return false end + end + end + return true +end + function cpuset_pids (id) local pids = {} if (cpuset_exists (id)) then local path = string.format ("%s/%s/tasks", cpuset_dir, id) - for task in io.lines (path) do - table.insert (pids, task) + local path_fh = io.open(path) + if path_fh then + while true do + local task=path_fh:read() + if task == nil then break end + if not ( pid_is_thread(task) ) then + table.insert (pids, task) + end + end end end return pids @@ -317,33 +349,18 @@ function cpumap_create () return cpu_map end -function v_log_msg (l, fmt, ...) - slurm.log (l, string.format (fmt, ...)) -end - -function log_msg (fmt, ...) - v_log_msg (0, fmt, ...) -end - -function log_verbose (fmt, ...) - v_log_msg (1, fmt, ...) -end - -function log_debug (fmt, ...) - v_log_msg (2, fmt, ...) -end - -function log_err (fmt, ...) - slurm.error (string.format (fmt, ...)) -end - - --########################################################################-- -- -- Initialization code: -- --########################################################################-- +log_msg = slurm.log_info +log_verbose = slurm.log_verbose +log_debug = slurm.log_debug +log_err = slurm.error + + cpuset_dir = get_cpuset_dir () if cpuset_dir == nil then print "cpuset must be mounted" @@ -355,8 +372,8 @@ root_cpuset.mems = cpuset_read (cpuset_dir, "mems") cpumap = cpumap_create () -slurm.log (string.format ("initialized: root cpuset = %s\n", cpuset_dir)) +log_msg ("initialized: root cpuset = %s\n", cpuset_dir) -return 0 +return slurm.SUCCESS -- vi: filetype=lua ts=4 sw=4 expandtab diff --git a/doc/html/accounting.shtml b/doc/html/accounting.shtml index f4952fcb55819f6f01883d71172c85bc08b5c018..6f4804bb3cdeefe9958c930bb4d2d8a165d31473 100644 --- a/doc/html/accounting.shtml +++ b/doc/html/accounting.shtml @@ -230,7 +230,7 @@ referenced or recorded.</p> <ul> <li><b>AccountingStorageEnforce</b>: This option contains a comma separated list of options you may want to - enforce. The valid options are + enforce. The valid options are any comma separated combination of <ul> <li>associations - This will prevent users from running jobs if their <i>association</i> is not in the database. This option will diff --git a/doc/html/team.shtml b/doc/html/team.shtml index 0a6d9a507e644471a28c98a9bd5506efaf9d8222..b87e9fdf4570b6f7c8f031c6ba472897c5ac20ae 100644 --- a/doc/html/team.shtml +++ b/doc/html/team.shtml @@ -11,7 +11,6 @@ Linux NetworX and many other contributors. <ul> <li>Morris Jette (LLNL, Project leader)</li> <li>Danny Auble (LLNL)</li> -<li>Chris Holmes (HP)</li> <li>Don Lipari (LLNL)</li> </ul> @@ -23,6 +22,7 @@ Linux NetworX and many other contributors. <li>Ernest Artiaga (Barcelona Supercomputer Center, Spain)</li> <li>Susanne Balle (HP)</li> <li>Anton Blanchard (Samba)</li> +<li>Janne Blomqvist (Aalto University, Finland)</li> <li>David Bremer (LLNL)</li> <li>Hongjia Cao (National University of Defense Techonogy, China)</li> <li>Daniel Christians (HP)</li> @@ -35,9 +35,11 @@ Linux NetworX and many other contributors. <li>Kent Engström (National Supercomputer Centre, Sweden)</li> <li>Jim Garlick (LLNL)</li> <li>Didier Gazen (Laboratoire d'Aerologie, France)</li> +<li>Yiannis Georgiou (Bull)</li> <li>Mark Grondona (LLNL)</li> <li>Takao Hatazaki (HP, Japan)</li> <li>Matthieu Hautreux (CEA, France)</li> +<li>Chris Holmes (HP)</li> <li>David Höppner</li> <li>Nathan Huff (North Dakota State University)</li> <li>David Jackson (Adaptive Computing)</li> diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1 index 9d8f219b6aabfccf710a8f8a558cd81ef6d38a7f..fd452fc797984e88c148c2451971d144771a47aa 100644 --- a/doc/man/man1/sacct.1 +++ b/doc/man/man1/sacct.1 @@ -352,6 +352,14 @@ wckey names. Space characters are not allowed in the \f2wckey_list\fP. Default is all wckeys\&. +.TP +\f3\-x \fP\f2associd_list\fP\f3,\fP \f3\-\-associations\fP\f3=\fP\f2assoc_list\fP +Displays the statistics only for the jobs running under the +association ids specified by the \f2assoc_list\fP operand, which is a +comma\-separated list of association ids. +Space characters are not allowed in the \f2assoc_list\fP. Default is +all associations\&. + .TP \f3\-X \fP\f3,\fP \f3\-\-allocations\fP Only show cumulative statistics for each job, not the intermediate steps. diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index b649d091c9748d4bb19c6eef4219874b423b93e3..7431f49c81335c1ba04e680e1c4880035e926b7f 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -173,6 +173,9 @@ The hierarchy may have an arbitrary depth. \fIassociation\fP The entity used to group information consisting of four parameters: \fIaccount\fR, \fIcluster\fR, \fIpartition (optional)\fR, and \fIuser\fR. +Used only with the \fIlist\fR or \fIshow\fR command. Add, modify, and +delete should be done to a user, account or cluster entity. This will +in\-turn update the underlying associations. .TP \fIcluster\fP diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index f0dcb8c9744b9074e4e1436e330da2e1110d5777..beb5d4b2db413689fdf2639219c7c10d8b043bc5 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -365,11 +365,11 @@ Bind tasks according to application hints .TP .B compute_bound Select settings for compute bound applications: -use all cores in each socket +use all cores in each socket, one thread per core .TP .B memory_bound Select settings for memory bound applications: -use only one core in each socket +use only one core in each socket, one thread per core .TP .B [no]multithread [don't] use extra threads with in-core multi-threading diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 65925e84386c1bbbba208532d19c51544988696f..53e286a599d024059b7d362a8cfa6ebe1424ef1c 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -380,11 +380,11 @@ Bind tasks according to application hints .TP .B compute_bound Select settings for compute bound applications: -use all cores in each socket +use all cores in each socket, one thread per core .TP .B memory_bound Select settings for memory bound applications: -use only one core in each socket +use only one core in each socket, one thread per core .TP .B [no]multithread [don't] use extra threads with in-core multi-threading diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index 626466e3ffa2bcfee27e74ef427a6e98ff3336db..553c1c911beef244d25934ef53674ea33d2f5969 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -374,12 +374,6 @@ Set the job's list of excluded node. Multiple node names may be specified using simple node range expressions (e.g. "lx[10\-20]"). Value may be cleared with blank data value, "ExcNodeList=". .TP -\fIExitCode\fP=<exit>:<sig> -Exit status reported for the job by the wait() function. -The first number is the exit code, typically as set by the exit() function. -The second number of the signal that caused the process to terminate if -it was terminated by a signal. -.TP \fIFeatures\fP=<features> Set the job's required node features. The list of features may include multiple feature names separated @@ -546,6 +540,12 @@ The time the job is expected to terminate based on the job's time limit. When the job ends sooner, this field will be updated with the actual end time. .TP +\fIExitCode\fP=<exit>:<sig> +Exit status reported for the job by the wait() function. +The first number is the exit code, typically as set by the exit() function. +The second number of the signal that caused the process to terminate if +it was terminated by a signal. +.TP \fIJobState\fP The current state of the job. .TP @@ -602,8 +602,11 @@ Use quotes to enclose a reason having more than one word. .TP \fIState\fP=<state> Identify the state to be assigned to the node. Possible values are "NoResp", -"ALLOC", "ALLOCATED", "DOWN", "DRAIN", "FAIL", "FAILING", "IDLE", "MAINT", -"POWER_DOWN", "POWER_UP", or "RESUME". +"ALLOC", "ALLOCATED", "DOWN", "DRAIN", "FAIL", "FAILING", "IDLE", +"MIXED", "MAINT", "POWER_DOWN", "POWER_UP", or "RESUME". +If a node is in a "MIXED" state it usually means the node is in +multiple states. For instance if only part of the node is "ALLOCATED" +and the rest of the node is "IDLE" the state will be "MIXED". If you want to remove a node from service, you typically want to set it's state to "DRAIN". "FAILING" is similar to "DRAIN" except that some applications will diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 364c54043f23431b3f6532ee7c00a144b048e85a..8fd76ecc55620de004fad450bf2686a853479770 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -404,11 +404,11 @@ Bind tasks according to application hints .TP .B compute_bound Select settings for compute bound applications: -use all cores in each socket +use all cores in each socket, one thread per core .TP .B memory_bound Select settings for memory bound applications: -use only one core in each socket +use only one core in each socket, one thread per core .TP .B [no]multithread [don't] use extra threads with in-core multi-threading diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index e6f25ed1fecaf0bbcb8447e35db2f57399df5dd3..0dbeeec9a71ad8108aef376910a2f7256b0d973d 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1183,7 +1183,8 @@ and resumes communications). A DOWN node will become available for use upon registration with a valid configuration only if it was set DOWN due to being non\-responsive. If the node was set DOWN for any other reason (low memory, prolog failure, -epilog failure, etc.), its state will not automatically be changed. +epilog failure, silently rebooting, etc.), its state will not automatically +be changed. .TP \fB2\fR A DOWN node will become available for use upon registration with a diff --git a/slurm.spec b/slurm.spec index abcaec43da0bdf2cb4450e058fb33f5df3415471..d03fd7ce5bf85c48aa648abf8e8d1604b54a600c 100644 --- a/slurm.spec +++ b/slurm.spec @@ -83,14 +83,14 @@ %endif Name: slurm -Version: 2.1.7 +Version: 2.1.8 Release: 1%{?dist} Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-2.1.7.tar.bz2 +Source: slurm-2.1.8.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} URL: https://computing.llnl.gov/linux/slurm/ @@ -352,7 +352,7 @@ Gives the ability for SLURM to use Berkeley Lab Checkpoint/Restart ############################################################################# %prep -%setup -n slurm-2.1.7 +%setup -n slurm-2.1.8 %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index f72214ce470336cc92c8f4aacc028c90186ce192..3a62b6a3578fd04d548f3e023c3dfe52c1f2a2f5 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -623,12 +623,9 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ * default=0 */ uint32_t max_nodes; /* maximum number of nodes usable by job, * default=0 */ - uint16_t min_sockets; /* minimum number of sockets per node required - * by job, default=0 */ - uint16_t min_cores; /* minimum number of cores per cpu required - * by job, default=0 */ - uint16_t min_threads; /* minimum number of threads per core required - * by job, default=0 */ + uint16_t min_sockets; /* number of sockets per node required by job */ + uint16_t min_cores; /* number of cores per cpu required by job */ + uint16_t min_threads; /* number of threads per core required by job */ uint16_t cpus_per_task; /* number of processors required for each task */ uint16_t ntasks_per_node;/* number of tasks to invoke on each node */ uint16_t ntasks_per_socket;/* number of tasks to invoke on each socket */ diff --git a/src/api/job_info.c b/src/api/job_info.c index a68522cdb19900f0aaf3f9f9bd32a348a41e1fce..9dd924ce8494b14891c5cce8d8e2ecf2f7c0ec82 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -581,7 +581,8 @@ line13: xstrcat(out, " "); else xstrcat(out, "\n "); - sprintf(tmp_line, "Command=%s", job_ptr->command); + snprintf(tmp_line, sizeof(tmp_line), "Command=%s", + job_ptr->command); xstrcat(out, tmp_line); } if (one_liner) @@ -590,7 +591,7 @@ line13: xstrcat(out, "\n "); /****** Line 17 ******/ - sprintf(tmp_line, "WorkDir=%s", job_ptr->work_dir); + snprintf(tmp_line, sizeof(tmp_line), "WorkDir=%s", job_ptr->work_dir); xstrcat(out, tmp_line); #ifdef HAVE_BG diff --git a/src/common/plugstack.c b/src/common/plugstack.c index b4a53dbcbfa6b2906bc5e6b7e1b5f2b35c9a8435..f748867dd1330a06f9cbaa72f2683eaaf0ee9d16 100644 --- a/src/common/plugstack.c +++ b/src/common/plugstack.c @@ -1297,7 +1297,7 @@ static int _opt_find(struct spank_plugin_opt *p, static struct spank_plugin_opt *_find_remote_option_by_name(const char *str) { - struct spank_plugin_opt *opt; + struct spank_plugin_opt *opt = NULL; struct opt_find_args args; char buf[256]; char *name; @@ -1318,8 +1318,10 @@ static struct spank_plugin_opt *_find_remote_option_by_name(const char args.optname = buf; args.plugin_name = name; - opt = list_find_first(option_cache, (ListFindF) _opt_find, &args); - + if (option_cache) { + opt = list_find_first(option_cache, (ListFindF) _opt_find, + &args); + } if (opt == NULL) { error("warning: plugin \"%s\" option \"%s\" not found.", name, buf); diff --git a/src/common/proc_args.c b/src/common/proc_args.c index d68811a5c04b39826a43e7cecaa9e2750c0895f4..252f68e39d6059bb7a3820d7e2d2d05abd97f2dd 100644 --- a/src/common/proc_args.c +++ b/src/common/proc_args.c @@ -483,6 +483,7 @@ bool verify_socket_core_thread_count(const char *arg, int *min_sockets, { bool tmp_val,ret_val; int i,j; + int max_sockets, max_cores, max_threads; const char *cur_ptr = arg; char buf[3][48]; /* each can hold INT64_MAX - INT64_MAX */ buf[0][0] = '\0'; @@ -519,13 +520,21 @@ bool verify_socket_core_thread_count(const char *arg, int *min_sockets, ret_val = true; tmp_val = get_resource_arg_range(&buf[0][0], "first arg of -B", - min_sockets, NULL, true); + min_sockets, &max_sockets, true); + if ((*min_sockets == 1) && (max_sockets == INT_MAX)) + *min_sockets = NO_VAL; /* Use full range of values */ ret_val = ret_val && tmp_val; + tmp_val = get_resource_arg_range(&buf[1][0], "second arg of -B", - min_cores, NULL, true); + min_cores, &max_cores, true); + if ((*min_cores == 1) && (max_cores == INT_MAX)) + *min_cores = NO_VAL; /* Use full range of values */ ret_val = ret_val && tmp_val; + tmp_val = get_resource_arg_range(&buf[2][0], "third arg of -B", - min_threads, NULL, true); + min_threads, &max_threads, true); + if ((*min_threads == 1) && (max_threads == INT_MAX)) + *min_threads = NO_VAL; /* Use full range of values */ ret_val = ret_val && tmp_val; return ret_val; @@ -566,20 +575,20 @@ bool verify_hint(const char *arg, int *min_sockets, int *min_cores, " help show this help message\n"); return 1; } else if (strcasecmp(tok, "compute_bound") == 0) { - *min_sockets = 1; - *min_cores = 1; + *min_sockets = NO_VAL; + *min_cores = NO_VAL; + *min_threads = 1; *cpu_bind_type |= CPU_BIND_TO_CORES; } else if (strcasecmp(tok, "memory_bound") == 0) { - *min_cores = 1; + *min_cores = 1; + *min_threads = 1; *cpu_bind_type |= CPU_BIND_TO_CORES; } else if (strcasecmp(tok, "multithread") == 0) { - *min_threads = 1; + *min_threads = NO_VAL; *cpu_bind_type |= CPU_BIND_TO_THREADS; } else if (strcasecmp(tok, "nomultithread") == 0) { *min_threads = 1; *cpu_bind_type |= CPU_BIND_TO_THREADS; - if (*ntasks_per_core == NO_VAL) - *ntasks_per_core = 1; } else { error("unrecognized --hint argument \"%s\", " "see --hint=help", tok); diff --git a/src/common/slurm_resource_info.c b/src/common/slurm_resource_info.c index 6896fe0c19620ac72b8121b1651253132cdc5033..c6c543f8fabca26fad6871172e8b3e904c3f06dd 100644 --- a/src/common/slurm_resource_info.c +++ b/src/common/slurm_resource_info.c @@ -91,8 +91,9 @@ static int _isvalue(char *arg) { * given this number given the number of cpus_per_task and * maximum sockets, cores, threads. Note that the value of * cpus is the lowest-level logical processor (LLLP). - * IN min_sockets - Job requested min sockets - * IN min_cores - Job requested min cores + * IN socket_cnt - Job requested socket count + * IN core_cnt - Job requested core count + * IN threads_cnt - Job requested thread count * IN cpus_per_task - Job requested cpus per task * IN ntaskspernode - number of tasks per node * IN ntaskspersocket- number of tasks per socket @@ -106,8 +107,9 @@ static int _isvalue(char *arg) { * * Note: currently only used in the select/linear plugin. */ -int slurm_get_avail_procs(const uint16_t min_sockets, - const uint16_t min_cores, +int slurm_get_avail_procs(const uint16_t socket_cnt, + const uint16_t core_cnt, + const uint16_t thread_cnt, uint16_t cpus_per_task, const uint16_t ntaskspernode, const uint16_t ntaskspersocket, @@ -124,9 +126,18 @@ int slurm_get_avail_procs(const uint16_t min_sockets, uint16_t avail_cpus = 0, max_cpus = 0; uint16_t allocated_cpus = 0, allocated_cores = 0, allocated_sockets = 0; uint16_t max_avail_cpus = 0xffff; /* for alloc_* accounting */ + uint16_t min_sockets = 1, max_sockets = 0xffff; + uint16_t min_cores = 1, max_cores = 0xffff; + uint16_t min_threads = 1, max_threads = 0xffff; int i; /* pick defaults for any unspecified items */ + if (socket_cnt != (uint16_t) NO_VAL) + min_sockets = max_sockets = socket_cnt; + if (core_cnt != (uint16_t) NO_VAL) + min_cores = max_cores = core_cnt; + if (thread_cnt != (uint16_t) NO_VAL) + min_threads = max_threads = thread_cnt; if (cpus_per_task <= 0) cpus_per_task = 1; if (*threads <= 0) @@ -141,8 +152,8 @@ int slurm_get_avail_procs(const uint16_t min_sockets, allocated_sockets++; } #if(DEBUG) - info("get_avail_procs %u %s MIN User_ sockets %u cores %u", - job_id, name, min_sockets, min_cores); + info("get_avail_procs %u %s User_ sockets %u cores %u threads %u", + job_id, name, socket_cnt, core_cnt, thread_cnt); info("get_avail_procs %u %s HW_ sockets %u cores %u threads %u", job_id, name, *sockets, *cores, *threads); info("get_avail_procs %u %s Ntask node %u sockets %u core %u", @@ -200,6 +211,11 @@ int slurm_get_avail_procs(const uint16_t min_sockets, } } + /*** honor socket/core/thread maximums ***/ + *sockets = MIN(*sockets, max_sockets); + *cores = MIN(*cores, max_cores); + *threads = MIN(*threads, max_threads); + if (min_sockets > *sockets) { *cpus = 0; } else { @@ -251,6 +267,11 @@ int slurm_get_avail_procs(const uint16_t min_sockets, if (min_sockets > *sockets) *cpus = 0; + /*** honor socket/core/thread maximums ***/ + *sockets = MIN(*sockets, max_sockets); + *cores = MIN(*cores, max_cores); + *threads = MIN(*threads, max_threads); + /*** compute an overall maximum cpu count honoring ntasks* ***/ max_cpus = *threads; if (ntaskspercore > 0) { diff --git a/src/common/slurm_resource_info.h b/src/common/slurm_resource_info.h index c1a0743ffd54174d81693eb4e4b1b7c16b1f7596..b9f6a4fdb60302726339c709b7b395da96c9a034 100644 --- a/src/common/slurm_resource_info.h +++ b/src/common/slurm_resource_info.h @@ -50,8 +50,9 @@ # endif /* HAVE_INTTYPES_H */ #endif -int slurm_get_avail_procs(const uint16_t minsockets, - const uint16_t mincores, +int slurm_get_avail_procs(const uint16_t socket_cnt, + const uint16_t core_cnt, + const uint16_t thread_cnt, const uint16_t cpuspertask, const uint16_t ntaskspernode, const uint16_t ntaskspersocket, diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c index 50ee94fe35f637607ff4e4796aaf3da33f4cf2a7..fa0285a815af1b7993e95b21cc54a90e4f4bd892 100644 --- a/src/common/slurmdbd_defs.c +++ b/src/common/slurmdbd_defs.c @@ -1360,6 +1360,11 @@ static int _send_fini_msg(void) Buf buffer; dbd_fini_msg_t req; + /* If the connection is already gone, we don't need to send a + fini. */ + if(_fd_writeable(slurmdbd_fd) == -1) + return SLURM_SUCCESS; + buffer = init_buf(1024); pack16((uint16_t) DBD_FINI, buffer); req.commit = 0; diff --git a/src/database/mysql_common.h b/src/database/mysql_common.h index ea416df26c4b80053659b0038fbb728cb9346860..c0bebd48571fe0374b3fa80e9b63c7aef81c4e29 100644 --- a/src/database/mysql_common.h +++ b/src/database/mysql_common.h @@ -60,6 +60,7 @@ #include <mysqld_error.h> typedef struct { + char *auto_incr_query; MYSQL *db_conn; bool rollback; List update_list; diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index d5be23ee51a95728c1da23bdd36d84e34e68735a..52eac1bcb2a2c9bfa4c7451e259904c5dfcee3b1 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -2363,7 +2363,7 @@ static int _remove_common(mysql_conn_t *mysql_conn, MYSQL_ROW row; time_t day_old = now - DELETE_SEC_BACK; bool has_jobs = false; - char *tmp_name_char = _fix_double_quotes(name_char); + char *tmp_name_char = NULL; /* If we have jobs associated with this we do not want to * really delete it for accounting purposes. This is for @@ -2385,9 +2385,13 @@ static int _remove_common(mysql_conn_t *mysql_conn, /* we want to remove completely all that is less than a day old */ if(!has_jobs && table != assoc_table) { query = xstrdup_printf("delete from %s where creation_time>%d " - "&& (%s);" - "alter table %s AUTO_INCREMENT=0;", - table, day_old, name_char, table); + "&& (%s);", + table, day_old, name_char); + /* Make sure the next id we get doesn't create holes + * in the ids. */ + xstrfmtcat(mysql_conn->auto_incr_query, + "alter table %s AUTO_INCREMENT=0;", + table); } if(table != assoc_table) @@ -2396,6 +2400,16 @@ static int _remove_common(mysql_conn_t *mysql_conn, "where deleted=0 && (%s);", table, now, name_char); + /* If we are removing assocs use the assoc_char since the + name_char has lft between statements that can change over + time. The assoc_char has the actual ids of the assocs + which never change. + */ + if(type == DBD_REMOVE_ASSOCS && assoc_char) + tmp_name_char = _fix_double_quotes(assoc_char); + else + tmp_name_char = _fix_double_quotes(name_char); + xstrfmtcat(query, "insert into %s (timestamp, action, name, actor) " "values (%d, %d, \"%s\", \"%s\");", @@ -2598,12 +2612,13 @@ just_update: "fairshare=1, max_jobs=NULL, " "max_nodes_per_job=NULL, " "max_wall_duration_per_job=NULL, " - "max_cpu_mins_per_job=NULL " - "where (%s);" - "alter table %s AUTO_INCREMENT=0;", + "max_cpu_mins_per_job=NULL where (%s);", assoc_table, now, - loc_assoc_char, - assoc_table); + loc_assoc_char); + /* Make sure the next id we get doesn't create holes in the ids. */ + xstrfmtcat(mysql_conn->auto_incr_query, + "alter table %s AUTO_INCREMENT=0;", + assoc_table); if(table != assoc_table) xfree(loc_assoc_char); @@ -2615,6 +2630,7 @@ just_update: if(mysql_conn->rollback) { mysql_db_rollback(mysql_conn->db_conn); } + xfree(mysql_conn->auto_incr_query); list_flush(mysql_conn->update_list); } @@ -3665,7 +3681,8 @@ extern void *acct_storage_p_get_connection(bool make_agent, int conn_num, if(!mysql_db_info) init(); - debug2("acct_storage_p_get_connection: request new connection"); + debug2("acct_storage_p_get_connection: request new connection %d", + rollback); mysql_conn->rollback = rollback; mysql_conn->conn = conn_num; @@ -3690,6 +3707,7 @@ extern int acct_storage_p_close_connection(mysql_conn_t **mysql_conn) acct_storage_p_commit((*mysql_conn), 0); mysql_close_db_connection(&(*mysql_conn)->db_conn); + xfree((*mysql_conn)->auto_incr_query); list_destroy((*mysql_conn)->update_list); xfree((*mysql_conn)); @@ -3708,8 +3726,32 @@ extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit) if(mysql_db_rollback(mysql_conn->db_conn)) error("rollback failed"); } else { - if(mysql_db_commit(mysql_conn->db_conn)) - error("commit failed"); + int rc = SLURM_SUCCESS; + /* Since any use of altering a tables + AUTO_INCREMENT will make it so you can't + rollback, save it until right at the end. + for now we also want to check if + update_list exists since I didn't want to + alter the code all sorts since it is + different in 2.2. + */ + if(mysql_conn->auto_incr_query + && list_count(mysql_conn->update_list)) { + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, + mysql_conn->auto_incr_query); + rc = mysql_db_query( + mysql_conn->db_conn, + mysql_conn->auto_incr_query); + } + + if(rc != SLURM_SUCCESS) { + if(mysql_db_rollback(mysql_conn->db_conn)) + error("rollback failed"); + } else { + if(mysql_db_commit(mysql_conn->db_conn)) + error("commit failed"); + } } } @@ -3833,7 +3875,7 @@ extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit) if(get_qos_count) _set_qos_cnt(mysql_conn->db_conn); } - + xfree(mysql_conn->auto_incr_query); list_flush(mysql_conn->update_list); return SLURM_SUCCESS; diff --git a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c index af64285b2a9c4e81207cb63ecc35a28efe7ebe23..0280748e002f3b93cd98e181b0ddf152316b0b1b 100644 --- a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c @@ -551,26 +551,21 @@ extern int setup_job_cond_limits(mysql_conn_t *mysql_conn, if(!job_cond) return 0; - /* THIS ASSOCID CHECK ALWAYS NEEDS TO BE FIRST!!!!!!! */ if(job_cond->associd_list && list_count(job_cond->associd_list)) { set = 0; - xstrfmtcat(*extra, ", %s as t3 where (", assoc_table); + if(*extra) + xstrcat(*extra, " && ("); + else + xstrcat(*extra, " where ("); itr = list_iterator_create(job_cond->associd_list); while((object = list_next(itr))) { if(set) xstrcat(*extra, " || "); - xstrfmtcat(*extra, "t3.id=%s", object); + xstrfmtcat(*extra, "t1.associd='%s'", object); set = 1; } list_iterator_destroy(itr); xstrcat(*extra, ")"); - table_level="t3"; - /* just incase the association is gone */ - if(set) - xstrcat(*extra, " || "); - xstrfmtcat(*extra, "t3.id is null) && " - "(t2.lft between t3.lft and t3.rgt " - "|| t2.lft is null)"); } if(job_cond->acct_list && list_count(job_cond->acct_list)) { diff --git a/src/plugins/proctrack/lua/proctrack_lua.c b/src/plugins/proctrack/lua/proctrack_lua.c index 00d1dad41386bf122cf3e4baf5a942c434df6e4f..b6a529edae94e65a155a029f3d4a3598f8ba9bfa 100644 --- a/src/plugins/proctrack/lua/proctrack_lua.c +++ b/src/plugins/proctrack/lua/proctrack_lua.c @@ -52,6 +52,7 @@ #include <stdlib.h> #include <unistd.h> #include <dlfcn.h> +#include <pthread.h> #include <slurm/slurm.h> #include <slurm/slurm_errno.h> @@ -61,6 +62,7 @@ #include <lualib.h> #include "src/common/log.h" +#include "src/common/macros.h" #include "src/slurmd/slurmstepd/slurmstepd_job.h" @@ -71,6 +73,14 @@ const uint32_t plugin_version = 90; static const char lua_script_path[] = DEFAULT_SCRIPT_DIR "/proctrack.lua"; static lua_State *L = NULL; +/* + * Mutex for protecting multi-threaded access to this plugin. + * (Only 1 thread at a time should be in here) + */ +#ifdef WITH_PTHREADS +static pthread_mutex_t lua_lock = PTHREAD_MUTEX_INITIALIZER; +#endif + /* * Lua interface to SLURM log facility: */ @@ -133,6 +143,29 @@ static int lua_register_slurm_output_functions () */ lua_newtable (L); luaL_register (L, NULL, slurm_functions); + + /* + * Create more user-friendly lua versions of SLURM log functions. + */ + luaL_loadstring (L, "slurm.error (string.format(unpack({...})))"); + lua_setfield (L, -2, "log_error"); + luaL_loadstring (L, "slurm.log (0, string.format(unpack({...})))"); + lua_setfield (L, -2, "log_info"); + luaL_loadstring (L, "slurm.log (1, string.format(unpack({...})))"); + lua_setfield (L, -2, "log_verbose"); + luaL_loadstring (L, "slurm.log (2, string.format(unpack({...})))"); + lua_setfield (L, -2, "log_debug"); + luaL_loadstring (L, "slurm.log (3, string.format(unpack({...})))"); + lua_setfield (L, -2, "log_debug2"); + + /* + * slurm.SUCCESS and slurm.FAILURE + */ + lua_pushnumber (L, -1); + lua_setfield (L, -2, "FAILURE"); + lua_pushnumber (L, 0); + lua_setfield (L, -2, "SUCCESS"); + lua_setglobal (L, "slurm"); return 0; } @@ -181,6 +214,11 @@ static int check_lua_script_functions () return (rc); } +/* + * NOTE: The init callback should never be called multiple times, + * let alone called from multiple threads. Therefore, locking + * is unecessary here. + */ int init (void) { int rc = SLURM_SUCCESS; @@ -275,173 +313,237 @@ static int lua_job_table_create (slurmd_job_t *job) int slurm_container_create (slurmd_job_t *job) { + int rc = SLURM_ERROR; double id; + + slurm_mutex_lock (&lua_lock); + /* * All lua script functions should have been verified during * initialization: */ lua_getglobal (L, "slurm_container_create"); if (lua_isnil (L, -1)) - return SLURM_FAILURE; + goto out; lua_job_table_create (job); - if (lua_pcall (L, 1, 1, 0) != 0) - return error ("proctrack/lua: %s: slurm_container_create: %s", - lua_script_path, lua_tostring (L, -1)); + if (lua_pcall (L, 1, 1, 0) != 0) { + error ("proctrack/lua: %s: slurm_container_create: %s", + lua_script_path, lua_tostring (L, -1)); + goto out; + } /* * Get the container id off the stack: */ if (lua_isnil (L, -1)) { + error ("proctrack/lua: slurm_container_create did not return id"); lua_pop (L, -1); - return (-1); + goto out; } id = lua_tonumber (L, -1); job->cont_id = id; info ("job->cont_id = %u (%.0f) \n", job->cont_id, id); lua_pop (L, -1); - return (0); + + rc = SLURM_SUCCESS; +out: + slurm_mutex_unlock (&lua_lock); + return rc; } int slurm_container_add (slurmd_job_t *job, pid_t pid) { - int rc; + int rc = SLURM_ERROR; + + slurm_mutex_lock (&lua_lock); lua_getglobal (L, "slurm_container_add"); if (lua_isnil (L, -1)) - return SLURM_FAILURE; + goto out; lua_job_table_create (job); lua_pushnumber (L, job->cont_id); lua_pushnumber (L, pid); - if (lua_pcall (L, 3, 1, 0) != 0) - return error ("running lua function 'slurm_container_add': %s", - lua_tostring (L, -1)); + if (lua_pcall (L, 3, 1, 0) != 0) { + error ("running lua function 'slurm_container_add': %s", + lua_tostring (L, -1)); + goto out; + } rc = lua_tonumber (L, -1); lua_pop (L, -1); +out: + slurm_mutex_unlock (&lua_lock); return (rc); } int slurm_container_signal (uint32_t id, int sig) { - int rc; + int rc = SLURM_ERROR; + + slurm_mutex_lock (&lua_lock); + lua_getglobal (L, "slurm_container_signal"); if (lua_isnil (L, -1)) - return SLURM_FAILURE; + goto out; lua_pushnumber (L, id); lua_pushnumber (L, sig); - if (lua_pcall (L, 2, 1, 0) != 0) - return error ("running lua function 'slurm_container_signal': %s", - lua_tostring (L, -1)); + if (lua_pcall (L, 2, 1, 0) != 0) { + error ("running lua function 'slurm_container_signal': %s", + lua_tostring (L, -1)); + goto out; + } rc = lua_tonumber (L, -1); lua_pop (L, -1); +out: + slurm_mutex_unlock (&lua_lock); return (rc); } int slurm_container_destroy (uint32_t id) { - int rc; + int rc = SLURM_ERROR; + + slurm_mutex_lock (&lua_lock); + lua_getglobal (L, "slurm_container_destroy"); if (lua_isnil (L, -1)) - return SLURM_FAILURE; + goto out; lua_pushnumber (L, id); - if (lua_pcall (L, 1, 1, 0) != 0) - return error ("running lua function 'slurm_container_destroy': %s", - lua_tostring (L, -1)); + if (lua_pcall (L, 1, 1, 0) != 0) { + error ("running lua function 'slurm_container_destroy': %s", + lua_tostring (L, -1)); + goto out; + } rc = lua_tonumber (L, -1); lua_pop (L, -1); + +out: + slurm_mutex_unlock (&lua_lock); return (rc); } uint32_t slurm_container_find (pid_t pid) { - uint32_t id; + uint32_t id = (uint32_t) SLURM_ERROR; + + slurm_mutex_lock (&lua_lock); + lua_getglobal (L, "slurm_container_find"); if (lua_isnil (L, -1)) - return SLURM_FAILURE; + goto out; lua_pushnumber (L, pid); - if (lua_pcall (L, 1, 1, 0) != 0) - return error ("running lua function 'slurm_container_find': %s", - lua_tostring (L, -1)); + if (lua_pcall (L, 1, 1, 0) != 0) { + error ("running lua function 'slurm_container_find': %s", + lua_tostring (L, -1)); + goto out; + } id = (uint32_t) lua_tonumber (L, -1); lua_pop (L, -1); + +out: + slurm_mutex_lock (&lua_lock); return (id); } bool slurm_container_has_pid (uint32_t id, pid_t pid) { - int rc; + int rc = 0; + + slurm_mutex_lock (&lua_lock); + lua_getglobal (L, "slurm_container_has_pid"); if (lua_isnil (L, -1)) - return SLURM_FAILURE; + goto out; lua_pushnumber (L, id); lua_pushnumber (L, pid); - if (lua_pcall (L, 2, 1, 0) != 0) - return error ("running lua function 'slurm_container_has_pid': %s", - lua_tostring (L, -1)); + if (lua_pcall (L, 2, 1, 0) != 0) { + error ("running lua function 'slurm_container_has_pid': %s", + lua_tostring (L, -1)); + goto out; + } rc = lua_toboolean (L, -1); lua_pop (L, -1); + +out: + slurm_mutex_unlock (&lua_lock); return (rc == 1); } int slurm_container_wait (uint32_t id) { - int rc; + int rc = SLURM_ERROR; + + slurm_mutex_lock (&lua_lock); + lua_getglobal (L, "slurm_container_wait"); if (lua_isnil (L, -1)) - return SLURM_FAILURE; + goto out; lua_pushnumber (L, id); - if (lua_pcall (L, 1, 1, 0) != 0) - return error ("running lua function 'slurm_container_wait': %s", - lua_tostring (L, -1)); + if (lua_pcall (L, 1, 1, 0) != 0) { + error ("running lua function 'slurm_container_wait': %s", + lua_tostring (L, -1)); + goto out; + } rc = lua_tonumber (L, -1); lua_pop (L, -1); +out: + slurm_mutex_unlock (&lua_lock); return (rc); } int slurm_container_get_pids (uint32_t cont_id, pid_t **pids, int *npids) { + int rc = SLURM_ERROR; int i = 0; int t = 0; pid_t *p; + *npids = 0; + + slurm_mutex_lock (&lua_lock); + lua_getglobal (L, "slurm_container_get_pids"); if (lua_isnil (L, -1)) - return SLURM_FAILURE; + goto out; lua_pushnumber (L, cont_id); - if (lua_pcall (L, 1, 1, 0) != 0) - return (error ("%s: %s: %s", - "proctrack/lua", - __func__, - lua_tostring (L, -1))); + if (lua_pcall (L, 1, 1, 0) != 0) { + error ("%s: %s: %s", + "proctrack/lua", + __func__, + lua_tostring (L, -1)); + goto out; + } /* * list of PIDs should be returned in a table from the lua * script. If a table wasn't returned then generate an error: */ - if (!lua_istable(L, -1)) - return (error ("%s: %s: function should return a table", - "proctrack/lua", - __func__)); + if (!lua_istable(L, -1)) { + error ("%s: %s: function should return a table", + "proctrack/lua", + __func__); + goto out; + } /* * Save absolute position of table in lua stack @@ -468,6 +570,10 @@ int slurm_container_get_pids (uint32_t cont_id, pid_t **pids, int *npids) lua_pop (L, 1); *pids = p; - return SLURM_SUCCESS; + + rc = SLURM_SUCCESS; +out: + slurm_mutex_unlock (&lua_lock); + return rc; } diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index 8c194fa6ea7522a369a05b842e11fa205d942186..af28f7821f4a1130b3a328817deeacb1f6f778bf 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -121,6 +121,9 @@ static bool _job_is_completing(void); static bool _more_work(void); static int _num_feature_count(struct job_record *job_ptr); static int _start_job(struct job_record *job_ptr, bitstr_t *avail_bitmap); +static bool _test_resv_overlap(node_space_map_t *node_space, + bitstr_t *use_bitmap, uint32_t start_time, + uint32_t end_reserve); static int _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes); @@ -421,7 +424,11 @@ static void _attempt_backfill(void) if (part_ptr == NULL) { part_ptr = find_part_record(job_ptr->partition); - xassert(part_ptr); + if (part_ptr == NULL) { + error("Could not find partition %s for job %u", + job_ptr->partition, job_ptr->job_id); + continue; + } job_ptr->part_ptr = part_ptr; error("partition pointer reset for job %u, part %s", job_ptr->job_id, job_ptr->partition); @@ -536,14 +543,20 @@ static void _attempt_backfill(void) } job_ptr->start_time = MAX(job_ptr->start_time, start_res); + last_job_update = now; if (job_ptr->start_time <= now) { int rc = _start_job(job_ptr, resv_bitmap); - if (rc == ESLURM_ACCOUNTING_POLICY) + if (rc == ESLURM_ACCOUNTING_POLICY) { + /* Unknown future start time, just skip job */ continue; - else if (rc != SLURM_SUCCESS) + } else if (rc != SLURM_SUCCESS) { /* Planned to start job, but something bad * happended. */ break; + } else { + /* Started this job, move to next one */ + continue; + } } if (job_ptr->start_time > (now + BACKFILL_WINDOW)) { /* Starts too far in the future to worry about */ @@ -555,10 +568,19 @@ static void _attempt_backfill(void) break; } + end_reserve = job_ptr->start_time + (time_limit * 60); + if (_test_resv_overlap(node_space, avail_bitmap, + job_ptr->start_time, end_reserve)) { + /* This job overlaps with an existing reservation for + * job to be backfill scheduled, which the sched + * plugin does not know about. Try again later. */ + later_start = job_ptr->start_time; + goto TRY_LATER; + } + /* * Add reservation to scheduling table */ - end_reserve = job_ptr->start_time + (time_limit * 60); bit_not(avail_bitmap); _add_reservation(job_ptr->start_time, end_reserve, avail_bitmap, node_space, &node_space_recs); @@ -721,3 +743,31 @@ static void _add_reservation(uint32_t start_time, uint32_t end_reserve, break; } } + +/* + * Determine if the resource specification for a new job overlaps with a + * reservation that the backfill scheduler has made for a job to be + * started in the future. + * IN use_bitmap - nodes to be allocated + * IN start_time - start time of job + * IN end_reserve - end time of job + */ +static bool _test_resv_overlap(node_space_map_t *node_space, + bitstr_t *use_bitmap, uint32_t start_time, + uint32_t end_reserve) +{ + bool overlap = false; + int j; + + for (j=0; ; ) { + if ((node_space[j].end_time > start_time) && + (node_space[j].begin_time < end_reserve) && + (!bit_super_set(use_bitmap, node_space[j].avail_bitmap))) { + overlap = true; + break; + } + if ((j = node_space[j].next) == 0) + break; + } + return overlap; +} diff --git a/src/plugins/sched/wiki2/job_modify.c b/src/plugins/sched/wiki2/job_modify.c index 6f77e3606009bc79295f85438be627fc95a9b069..fd1d01df3083823cfd182fa19b4f2d46848e3bf2 100644 --- a/src/plugins/sched/wiki2/job_modify.c +++ b/src/plugins/sched/wiki2/job_modify.c @@ -62,7 +62,8 @@ static int _job_modify(uint32_t jobid, char *bank_ptr, char *depend_ptr, char *new_hostlist, uint32_t new_node_cnt, char *part_name_ptr, uint32_t new_time_limit, char *name_ptr, - char *start_ptr, char *feature_ptr, char *env_ptr) + char *start_ptr, char *feature_ptr, char *env_ptr, + char *comment_ptr) { struct job_record *job_ptr; time_t now = time(NULL); @@ -78,6 +79,13 @@ static int _job_modify(uint32_t jobid, char *bank_ptr, return ESLURM_DISABLED; } + if (comment_ptr) { + info("wiki: change job %u comment %s", jobid, comment_ptr); + xfree(job_ptr->comment); + job_ptr->comment = xstrdup(comment_ptr); + last_job_update = now; + } + if (depend_ptr) { int rc = update_job_dependency(job_ptr, depend_ptr); if (rc == SLURM_SUCCESS) { @@ -90,7 +98,6 @@ static int _job_modify(uint32_t jobid, char *bank_ptr, } } - if (env_ptr) { bool have_equal = false; char old_sep[1]; @@ -393,16 +400,24 @@ host_fini: if (rc) { } /* Modify a job: - * CMD=MODIFYJOB ARG=<jobid> PARTITION=<name> NODES=<number> - * DEPEND=afterany:<jobid> TIMELIMT=<seconds> BANK=<name> - * JOBNAME=<name> MINSTARTTIME=<uts> RFEATURES=<features> - * VARIABLELIST=<env_vars> + * CMD=MODIFYJOB ARG=<jobid> + * [BANK=<name>] + * [COMMENT=<whatever>;] + * [DEPEND=afterany:<jobid>] + * [JOBNAME=<name>] + * [MINSTARTTIME=<uts>] + * [NODES=<number>] + * [PARTITION=<name>] + * [RFEATURES=<features>] + * [TIMELIMT=<seconds>] + * [VARIABLELIST=<env_vars>] + * * RET 0 on success, -1 on failure */ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) { char *arg_ptr, *bank_ptr, *depend_ptr, *nodes_ptr, *start_ptr; char *host_ptr, *name_ptr, *part_ptr, *time_ptr, *tmp_char; - char *feature_ptr, *env_ptr; + char *comment_ptr, *feature_ptr, *env_ptr; int i, slurm_rc; uint32_t jobid, new_node_cnt = 0, new_time_limit = 0; static char reply_msg[128]; @@ -429,6 +444,7 @@ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) return -1; } bank_ptr = strstr(cmd_ptr, "BANK="); + comment_ptr = strstr(cmd_ptr, "COMMENT="); depend_ptr = strstr(cmd_ptr, "DEPEND="); host_ptr = strstr(cmd_ptr, "HOSTLIST="); name_ptr = strstr(cmd_ptr, "JOBNAME="); @@ -443,6 +459,32 @@ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) bank_ptr += 5; null_term(bank_ptr); } + if (comment_ptr) { + comment_ptr[7] = ':'; + comment_ptr += 8; + if (comment_ptr[0] == '\"') { + comment_ptr++; + for (i=0; ; i++) { + if (comment_ptr[i] == '\0') + break; + if (comment_ptr[i] == '\"') { + comment_ptr[i] = '\0'; + break; + } + } + } else if (comment_ptr[0] == '\'') { + comment_ptr++; + for (i=0; ; i++) { + if (comment_ptr[i] == '\0') + break; + if (comment_ptr[i] == '\'') { + comment_ptr[i] = '\0'; + break; + } + } + } else + null_term(comment_ptr); + } if (depend_ptr) { depend_ptr[6] = ':'; depend_ptr += 7; @@ -510,10 +552,10 @@ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) null_term(env_ptr); } - /* Look for any un-parsed "=" */ + /* Look for any un-parsed "=" ignoring anything after VARIABLELIST + * which is expected to contain "=" in its value*/ tmp_char = strchr(cmd_ptr, '='); - if (( env_ptr && (tmp_char < env_ptr)) || - (!env_ptr && tmp_char)) { + if (tmp_char && (!env_ptr || (env_ptr > tmp_char))) { tmp_char[0] = '\0'; while (tmp_char[-1] && (!isspace(tmp_char[-1]))) tmp_char--; @@ -523,7 +565,7 @@ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) lock_slurmctld(job_write_lock); slurm_rc = _job_modify(jobid, bank_ptr, depend_ptr, host_ptr, new_node_cnt, part_ptr, new_time_limit, name_ptr, - start_ptr, feature_ptr, env_ptr); + start_ptr, feature_ptr, env_ptr, comment_ptr); unlock_slurmctld(job_write_lock); if (slurm_rc != SLURM_SUCCESS) { *err_code = -700; diff --git a/src/plugins/select/bluegene/plugin/bg_block_info.c b/src/plugins/select/bluegene/plugin/bg_block_info.c index cce43054034ea56c0dc351d049a25dafe2b48cdf..f0d296fdc463d4ec5be1ff70076f084257014028 100644 --- a/src/plugins/select/bluegene/plugin/bg_block_info.c +++ b/src/plugins/select/bluegene/plugin/bg_block_info.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * bg_block_info.c - bluegene block information from the db2 database. * - * $Id: bg_block_info.c 19270 2010-01-19 19:46:45Z da $ + * $Id: bg_block_info.c 20097 2010-04-20 16:42:16Z da $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -184,6 +184,12 @@ extern int block_ready(struct job_record *job_ptr) } else if ((bg_record->user_uid == job_ptr->user_id) && (bg_record->state == RM_PARTITION_READY)) { + /* Clear the state just incase we + missed it somehow. + */ + job_ptr->job_state &= (~JOB_CONFIGURING); + last_job_update = time(NULL); + rc = 1; } else if (bg_record->user_uid != job_ptr->user_id) rc = 0; diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index acd5c679d349e306f2677bcfc8034878b53f2529..e8139e4a061fef5a1822854b70333a197023abd8 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -59,15 +59,7 @@ _STMT_START { \ pthread_mutex_t create_dynamic_mutex = PTHREAD_MUTEX_INITIALIZER; -/* This list is for the test_job_list function because we will be - * adding and removing blocks off the bg_lists->job_running and don't want - * to ruin that list in submit_job it should = bg_lists->job_running - * otherwise it should be a copy of that list. - */ -List job_block_test_list = NULL; - static void _rotate_geo(uint16_t *req_geometry, int rot_cnt); -static int _bg_record_sort_aval_inc(bg_record_t* rec_a, bg_record_t* rec_b); static int _get_user_groups(uint32_t user_id, uint32_t group_id, gid_t *groups, int max_groups, int *ngroups); static int _test_image_perms(char *image_name, List image_list, @@ -126,62 +118,6 @@ static void _rotate_geo(uint16_t *req_geometry, int rot_cnt) } } -/* - * Comparator used for sorting blocks smallest to largest - * - * returns: -1: rec_a < rec_b 0: rec_a == rec_b 1: rec_a > rec_b - * - */ -static int _bg_record_sort_aval_inc(bg_record_t* rec_a, bg_record_t* rec_b) -{ - if((rec_a->job_running == BLOCK_ERROR_STATE) - && (rec_b->job_running != BLOCK_ERROR_STATE)) - return 1; - else if((rec_a->job_running != BLOCK_ERROR_STATE) - && (rec_b->job_running == BLOCK_ERROR_STATE)) - return -1; - else if(!rec_a->job_ptr && rec_b->job_ptr) - return 1; - else if(rec_a->job_ptr && !rec_b->job_ptr) - return -1; - else if(rec_a->job_ptr && rec_b->job_ptr) { - if(rec_a->job_ptr->start_time > rec_b->job_ptr->start_time) - return 1; - else if(rec_a->job_ptr->start_time < rec_b->job_ptr->start_time) - return -1; - } - - return bg_record_cmpf_inc(rec_a, rec_b); -} - -/* - * Comparator used for sorting blocks smallest to largest - * - * returns: -1: rec_a > rec_b 0: rec_a == rec_b 1: rec_a < rec_b - * - */ -static int _bg_record_sort_aval_dec(bg_record_t* rec_a, bg_record_t* rec_b) -{ - if((rec_a->job_running == BLOCK_ERROR_STATE) - && (rec_b->job_running != BLOCK_ERROR_STATE)) - return -1; - else if((rec_a->job_running != BLOCK_ERROR_STATE) - && (rec_b->job_running == BLOCK_ERROR_STATE)) - return 1; - else if(!rec_a->job_ptr && rec_b->job_ptr) - return -1; - else if(rec_a->job_ptr && !rec_b->job_ptr) - return 1; - else if(rec_a->job_ptr && rec_b->job_ptr) { - if(rec_a->job_ptr->start_time > rec_b->job_ptr->start_time) - return -1; - else if(rec_a->job_ptr->start_time < rec_b->job_ptr->start_time) - return 1; - } - - return bg_record_cmpf_inc(rec_a, rec_b); -} - /* * Get a list of groups associated with a specific user_id * Return 0 on success, -1 on failure @@ -360,6 +296,13 @@ static bg_record_t *_find_matching_block(List block_list, /* If test_only we want to fall through to tell the scheduler that it is runnable just not right now. */ + + /* The job running could be reset so set it back up + here if there is a job_ptr + */ + if(bg_record->job_ptr) + bg_record->job_running = bg_record->job_ptr->job_id; + debug3("%s job_running = %d", bg_record->bg_block_id, bg_record->job_running); /*block is messed up some how (BLOCK_ERROR_STATE) @@ -724,18 +667,18 @@ static int _dynamically_request(List block_list, int *blocks_added, list_of_lists = list_create(NULL); if(user_req_nodes) - list_append(list_of_lists, job_block_test_list); + list_append(list_of_lists, bg_lists->job_running); else { list_append(list_of_lists, block_list); - if(job_block_test_list == bg_lists->job_running && - list_count(block_list) != list_count(bg_lists->booted)) { + if(list_count(block_list) != list_count(bg_lists->booted)) { list_append(list_of_lists, bg_lists->booted); if(list_count(bg_lists->booted) - != list_count(job_block_test_list)) - list_append(list_of_lists, job_block_test_list); + != list_count(bg_lists->job_running)) + list_append(list_of_lists, + bg_lists->job_running); } else if(list_count(block_list) - != list_count(job_block_test_list)) { - list_append(list_of_lists, job_block_test_list); + != list_count(bg_lists->job_running)) { + list_append(list_of_lists, bg_lists->job_running); } } itr = list_iterator_create(list_of_lists); @@ -765,20 +708,14 @@ static int _dynamically_request(List block_list, int *blocks_added, list_append(block_list, bg_record); (*blocks_added) = 1; } else { - if(job_block_test_list - == bg_lists->job_running) { - if(configure_block(bg_record) - == SLURM_ERROR) { - destroy_bg_record( - bg_record); - error("_dynamically_" - "request: " - "unable to " - "configure " - "block"); - rc = SLURM_ERROR; - break; - } + if(configure_block(bg_record) + == SLURM_ERROR) { + destroy_bg_record(bg_record); + error("_dynamically_request: " + "unable to configure " + "block"); + rc = SLURM_ERROR; + break; } list_append(block_list, bg_record); print_bg_record(bg_record); @@ -793,7 +730,7 @@ static int _dynamically_request(List block_list, int *blocks_added, continue; } list_sort(block_list, - (ListCmpF)_bg_record_sort_aval_dec); + (ListCmpF)bg_record_sort_aval_inc); rc = SLURM_SUCCESS; break; @@ -1061,15 +998,40 @@ static int _find_best_block_match(List block_list, if(is_test) { List new_blocks = NULL; - List job_list = NULL; + List job_list = list_create(NULL); + ListIterator itr = NULL; debug("trying with empty machine"); - slurm_mutex_lock(&block_state_mutex); - if(job_block_test_list == bg_lists->job_running) - job_list = copy_bg_list(job_block_test_list); - else - job_list = job_block_test_list; - slurm_mutex_unlock(&block_state_mutex); - list_sort(job_list, (ListCmpF)_bg_record_sort_aval_inc); + + /* Here we need to make sure the blocks in the + job list are those in the block list so go + through and grab them and add them to a + separate list. + */ + itr = list_iterator_create(block_list); + while((bg_record = list_next(itr))) { + if(bg_record->job_running != NO_JOB_RUNNING) + list_append(job_list, bg_record); + /* Since the error blocks are at the + end we only really need to look at + the first one to make sure it will + work, so don't add more than one to + the job list. + We do need to check for at least + one error block because that lets + us know not to hold up the entire + machine for a job that won't run + until the error is removed which + could be a very long time. + */ + if(bg_record->job_running == BLOCK_ERROR_STATE) + break; + } + list_iterator_destroy(itr); + + /* Block list is already in the correct order, + earliest avaliable first, + so the job list will also be. No need to + sort. */ while(1) { bool track_down_nodes = true; /* this gets altered in @@ -1077,9 +1039,8 @@ static int _find_best_block_match(List block_list, for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) request.geometry[i] = req_geometry[i]; - bg_record = list_pop(job_list); - if(bg_record) { - if(bg_record->job_ptr) + if((bg_record = list_pop(job_list))) { + if(bg_record->job_ptr) { debug2("taking off %d(%s) " "started at %d " "ends at %d", @@ -1089,8 +1050,21 @@ static int _find_best_block_match(List block_list, start_time, bg_record->job_ptr-> end_time); - else if(bg_record->job_running - == BLOCK_ERROR_STATE) + /* Mark the block as + not running a job, + this should + correspond to the + pointer in the + block_list. We + only look at the + job_running var so + don't remove the + job_ptr. + */ + bg_record->job_running = + NO_JOB_RUNNING; + } else if(bg_record->job_running + == BLOCK_ERROR_STATE) debug2("taking off (%s) " "which is in an error " "state", @@ -1106,7 +1080,6 @@ static int _find_best_block_match(List block_list, if(!(new_blocks = create_dynamic_block( block_list, &request, job_list, track_down_nodes))) { - destroy_bg_record(bg_record); if(errno == ESLURM_INTERCONNECT_FAILURE || !list_count(job_list)) { char *nodes; @@ -1133,10 +1106,9 @@ static int _find_best_block_match(List block_list, if(!(*found_bg_record)) { error("got an empty list back"); list_destroy(new_blocks); - if(bg_record) { - destroy_bg_record(bg_record); + if(bg_record) continue; - } else { + else { rc = SLURM_ERROR; break; } @@ -1145,36 +1117,16 @@ static int _find_best_block_match(List block_list, (*found_bg_record)->bitmap); if(bg_record) { + (*found_bg_record)->job_running = + bg_record->job_running; (*found_bg_record)->job_ptr = bg_record->job_ptr; - destroy_bg_record(bg_record); } - - if(job_block_test_list - != bg_lists->job_running) { - list_append(block_list, - (*found_bg_record)); - while((bg_record = - list_pop(new_blocks))) { - if(block_exist_in_list( - block_list, - bg_record)) - destroy_bg_record( - bg_record); - else { - list_append(block_list, - bg_record); -// print_bg_record(bg_record); - } - } - } - list_destroy(new_blocks); break; } - if(job_block_test_list == bg_lists->job_running) - list_destroy(job_list); + list_destroy(job_list); goto end_it; } else { @@ -1474,8 +1426,6 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, if(bg_conf->layout_mode == LAYOUT_DYNAMIC) slurm_mutex_lock(&create_dynamic_mutex); - job_block_test_list = bg_lists->job_running; - select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_CONN_TYPE, &conn_type); if(conn_type == SELECT_NAV) { @@ -1503,7 +1453,7 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, buf, sizeof(buf), SELECT_PRINT_MIXED); - debug("bluegene:submit_job: %u mode=%d %s nodes=%u-%u-%u", + debug("bluegene:submit_job: %u mode=%d %s nodes=%u-%u-%u", job_ptr->job_id, local_mode, buf, min_nodes, req_nodes, max_nodes); select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, @@ -1540,7 +1490,7 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, /* just remove the preemptable jobs now since we are treating this as a run now deal */ preempt: - list_sort(block_list, (ListCmpF)_bg_record_sort_aval_dec); + list_sort(block_list, (ListCmpF)bg_record_sort_aval_inc); rc = _find_best_block_match(block_list, &blocks_added, job_ptr, slurm_block_bitmap, min_nodes, diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index d69e4addda3c0623c4e21230326786b0bf04f6e8..839f27178a765720623ff736d66e16b6f734a4f6 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -2,7 +2,7 @@ * bg_job_run.c - blue gene job execution (e.g. initiation and termination) * functions. * - * $Id: bg_job_run.c 20011 2010-04-13 19:10:21Z da $ + * $Id: bg_job_run.c 20097 2010-04-20 16:42:16Z da $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -547,6 +547,10 @@ static void _sync_agent(bg_update_t *bg_update_ptr) list_push(bg_lists->booted, bg_record); if(bg_record->state == RM_PARTITION_READY) { + if(bg_record->job_ptr) { + bg_record->job_ptr->job_state &= (~JOB_CONFIGURING); + last_job_update = time(NULL); + } if(bg_record->user_uid != bg_update_ptr->job_ptr->user_id) { int set_user_rc = SLURM_SUCCESS; diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.c b/src/plugins/select/bluegene/plugin/bg_record_functions.c index fe79d6e5f42c6b6917e80fde1786f3cb362db2ae..dea4d824864c121e44a717638e0f1226929aa691 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.c +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.c @@ -522,6 +522,34 @@ extern int bg_record_cmpf_inc(bg_record_t* rec_a, bg_record_t* rec_b) return 0; } +/* + * Comparator used for sorting blocks from earliest avaliable to lastest + * + * returns: -1: rec_a < rec_b 0: rec_a == rec_b 1: rec_a > rec_b + * + */ +extern int bg_record_sort_aval_inc(bg_record_t* rec_a, bg_record_t* rec_b) +{ + if((rec_a->job_running == BLOCK_ERROR_STATE) + && (rec_b->job_running != BLOCK_ERROR_STATE)) + return 1; + else if((rec_a->job_running != BLOCK_ERROR_STATE) + && (rec_b->job_running == BLOCK_ERROR_STATE)) + return -1; + else if(!rec_a->job_ptr && rec_b->job_ptr) + return -1; + else if(rec_a->job_ptr && !rec_b->job_ptr) + return 1; + else if(rec_a->job_ptr && rec_b->job_ptr) { + if(rec_a->job_ptr->end_time > rec_b->job_ptr->end_time) + return 1; + else if(rec_a->job_ptr->end_time < rec_b->job_ptr->end_time) + return -1; + } + + return bg_record_cmpf_inc(rec_a, rec_b); +} + /* if looking at the main list this should have some nice * block_state_mutex locks around it. */ diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.h b/src/plugins/select/bluegene/plugin/bg_record_functions.h index 26ae7daea6d7af6d9b032ceb155849fff253848e..45e2b2a9dba51e42735a55cd0ff0533293b8b230 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.h +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.h @@ -122,6 +122,7 @@ extern void process_nodes(bg_record_t *bg_reord, bool startup); extern List copy_bg_list(List in_list); extern void copy_bg_record(bg_record_t *fir_record, bg_record_t *sec_record); extern int bg_record_cmpf_inc(bg_record_t *rec_a, bg_record_t *rec_b); +extern int bg_record_sort_aval_inc(bg_record_t* rec_a, bg_record_t* rec_b); /* return bg_record from a bg_list */ extern bg_record_t *find_bg_record_in_list(List my_list, char *bg_block_id); diff --git a/src/plugins/select/bluegene/plugin/dynamic_block.c b/src/plugins/select/bluegene/plugin/dynamic_block.c index bca33d8e548299edfd6e0b77080a49a3f6bb65bb..ff6c65e0a304b744fe5313a49c6d0842e4b21d2a 100644 --- a/src/plugins/select/bluegene/plugin/dynamic_block.c +++ b/src/plugins/select/bluegene/plugin/dynamic_block.c @@ -179,6 +179,9 @@ extern List create_dynamic_block(List block_list, break; } + /* Sort the list so the small blocks are in the order + * of ionodes. */ + list_sort(block_list, (ListCmpF)bg_record_cmpf_inc); request->conn_type = SELECT_SMALL; new_blocks = list_create(destroy_bg_record); /* check only blocks that are free and small */ @@ -209,6 +212,8 @@ extern List create_dynamic_block(List block_list, == SLURM_SUCCESS) goto finished; + /* Re-sort the list back to the original order. */ + list_sort(block_list, (ListCmpF)bg_record_sort_aval_inc); list_destroy(new_blocks); new_blocks = NULL; debug2("small block not able to be placed inside others"); @@ -612,8 +617,8 @@ static int _breakup_blocks(List block_list, List new_blocks, int cnodes = request->procs / bg_conf->cpu_ratio; int curr_bp_bit = -1; - debug2("proc count = %d cnodes = %d size = %d", - request->procs, cnodes, request->size); + debug2("cpu_count= %d cnodes=%d o_free=%d o_small=%d", + request->procs, cnodes, only_free, only_small); switch(cnodes) { case 16: @@ -651,6 +656,7 @@ static int _breakup_blocks(List block_list, List new_blocks, /* check for free blocks on the first and second time */ if(only_free && (bg_record->state != RM_PARTITION_FREE)) continue; + /* check small blocks first */ if(only_small && (bg_record->node_cnt > bg_conf->bp_node_cnt)) continue; diff --git a/src/plugins/select/bluegene/plugin/state_test.c b/src/plugins/select/bluegene/plugin/state_test.c index 03c3595bcb77899c99fc8611dea5afe4d049231c..2c81188f9ddd4f107415ad97128fff53d9dece6c 100644 --- a/src/plugins/select/bluegene/plugin/state_test.c +++ b/src/plugins/select/bluegene/plugin/state_test.c @@ -2,7 +2,7 @@ * state_test.c - Test state of Bluegene base partitions and switches. * DRAIN nodes in SLURM that are not usable. * - * $Id: state_test.c 19998 2010-04-12 19:18:46Z da $ + * $Id: state_test.c 20152 2010-04-27 18:01:39Z da $ ***************************************************************************** * Copyright (C) 2004-2007 The Regents of the University of California. * Copyright (C) 2008-2009 Lawrence Livermore National Security. @@ -321,7 +321,13 @@ static int _test_down_nodecards(rm_BP_t *bp_ptr, bool slurmctld_locked) goto clean_up; } - node_name = _get_bp_node_name(bp_ptr); + /* The node_name will only be NULL if this system doesn't + really have the node. + */ + if(!(node_name = _get_bp_node_name(bp_ptr))) { + rc = SLURM_ERROR; + goto clean_up; + } if((rc = bridge_get_data(ncard_list, RM_NodeCardListSize, &num)) != STATUS_OK) { diff --git a/src/plugins/select/cons_res/dist_tasks.c b/src/plugins/select/cons_res/dist_tasks.c index f83e37f23e9ea8cc3456835bfd298e64eb7b4359..9714117c8c445931e694bacde10b2607c2dce8a2 100644 --- a/src/plugins/select/cons_res/dist_tasks.c +++ b/src/plugins/select/cons_res/dist_tasks.c @@ -71,7 +71,7 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr) { bool over_subscribe = false; - uint32_t n, i, tid, maxtasks; + uint32_t n, i, tid, maxtasks, l; uint16_t *avail_cpus; job_resources_t *job_res = job_ptr->job_resrcs; if (!job_res || !job_res->cpus) { @@ -83,6 +83,9 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr) avail_cpus = job_res->cpus; job_res->cpus = xmalloc(job_res->nhosts * sizeof(uint16_t)); + if (job_ptr->details->cpus_per_task > 1) + maxtasks = maxtasks / job_ptr->details->cpus_per_task; + for (tid = 0, i = 0; (tid < maxtasks); i++) { /* cycle counter */ bool space_remaining = false; if (over_subscribe) { @@ -96,10 +99,12 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr) for (n = 0; ((n < job_res->nhosts) && (tid < maxtasks)); n++) { if ((i < avail_cpus[n]) || over_subscribe) { tid++; - if (job_res->cpus[n] < avail_cpus[n]) - job_res->cpus[n]++; - if ((i + 1) < avail_cpus[n]) - space_remaining = true; + for (l = 0; l < job_ptr->details->cpus_per_task; l++) { + if (job_res->cpus[n] < avail_cpus[n]) + job_res->cpus[n]++; + if ((i + 1) < avail_cpus[n]) + space_remaining = true; + } } } if (!space_remaining) { @@ -192,9 +197,17 @@ static void _block_sync_core_bitmap(struct job_record *job_ptr, alloc_cores = true; #endif - if (job_ptr->details && job_ptr->details->mc_ptr && - job_ptr->details->mc_ptr->ntasks_per_core) - ntasks_per_core = job_ptr->details->mc_ptr->ntasks_per_core; + if (job_ptr->details && job_ptr->details->mc_ptr) { + if (job_ptr->details->mc_ptr->ntasks_per_core) { + ntasks_per_core = job_ptr->details->mc_ptr-> + ntasks_per_core; + } + if ((job_ptr->details->mc_ptr->min_threads != + (uint16_t) NO_VAL) && + (job_ptr->details->mc_ptr->min_threads < ntasks_per_core)) { + ntasks_per_core = job_ptr->details->mc_ptr->min_threads; + } + } size = bit_size(job_res->node_bitmap); csize = bit_size(job_res->core_bitmap); @@ -276,9 +289,17 @@ static void _cyclic_sync_core_bitmap(struct job_record *job_ptr, alloc_cores = true; #endif core_map = job_res->core_bitmap; - if (job_ptr->details && job_ptr->details->mc_ptr && - job_ptr->details->mc_ptr->ntasks_per_core) - ntasks_per_core = job_ptr->details->mc_ptr->ntasks_per_core; + if (job_ptr->details && job_ptr->details->mc_ptr) { + if (job_ptr->details->mc_ptr->ntasks_per_core) { + ntasks_per_core = job_ptr->details->mc_ptr-> + ntasks_per_core; + } + if ((job_ptr->details->mc_ptr->min_threads != + (uint16_t) NO_VAL) && + (job_ptr->details->mc_ptr->min_threads < ntasks_per_core)) { + ntasks_per_core = job_ptr->details->mc_ptr->min_threads; + } + } sock_size = select_node_record[0].sockets; sock_start = xmalloc(sock_size * sizeof(uint32_t)); diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index 596577adf6c5f04c4e250c52483fe590ddd4df3b..af31c8f1c6a1420cf4c9e3bc4141986b442cebf2 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -138,12 +138,21 @@ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, uint16_t i, c, sockets = select_node_record[node_i].sockets; uint16_t cores_per_socket = select_node_record[node_i].cores; uint16_t threads_per_core = select_node_record[node_i].vpus; - uint16_t min_cores = 0, min_sockets = 0, ntasks_per_socket = 0; + uint16_t min_cores = 1, min_sockets = 1, ntasks_per_socket = 0; + uint16_t max_cores = 0xffff, max_sockets = 0xffff, max_threads = 0xffff; uint16_t ntasks_per_core = 0xffff; if (job_ptr->details && job_ptr->details->mc_ptr) { - min_cores = job_ptr->details->mc_ptr->min_cores; - min_sockets = job_ptr->details->mc_ptr->min_sockets; + if (job_ptr->details->mc_ptr->min_cores != (uint16_t) NO_VAL) { + min_cores = job_ptr->details->mc_ptr->min_cores; + max_cores = job_ptr->details->mc_ptr->min_cores; + } + if (job_ptr->details->mc_ptr->min_sockets != (uint16_t) NO_VAL){ + min_sockets = job_ptr->details->mc_ptr->min_sockets; + max_sockets = job_ptr->details->mc_ptr->min_sockets; + } + if (job_ptr->details->mc_ptr->min_threads != (uint16_t) NO_VAL) + max_threads = job_ptr->details->mc_ptr->min_threads; if (job_ptr->details->mc_ptr->ntasks_per_core) { ntasks_per_core = job_ptr->details->mc_ptr-> ntasks_per_core; @@ -249,10 +258,22 @@ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, goto fini; } + /* check max_cores and max_sockets */ c = 0; for (i = 0; i < sockets; i++) { + if (free_cores[i] > max_cores) { + /* remove extra cores from this socket */ + uint16_t tmp = free_cores[i] - max_cores; + free_core_count -= tmp; + free_cores[i] -= tmp; + } if (free_cores[i] > 0) c++; + if (free_cores[i] && c > max_sockets) { + /* remove extra sockets from use */ + free_core_count -= free_cores[i]; + free_cores[i] = 0; + } } if (free_core_count < 1) { /* no available resources on this node */ @@ -260,7 +281,6 @@ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, goto fini; } - /* Step 3: Compute task-related data: * ntasks_per_socket, ntasks_per_node and cpus_per_task * to determine the number of tasks to run on this node @@ -270,7 +290,7 @@ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, */ avail_cpus = 0; num_tasks = 0; - threads_per_core = MIN(threads_per_core, ntasks_per_core); + threads_per_core = MIN(threads_per_core, max_threads); for (i = 0; i < sockets; i++) { uint16_t tmp = free_cores[i] * threads_per_core; avail_cpus += tmp; @@ -279,7 +299,11 @@ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, else num_tasks += tmp; } - if (job_ptr->details->ntasks_per_node) + + /* If job requested exclusive rights to the node don't do the + min here since it will make it so we don't allocate the + entire node. */ + if (job_ptr->details->ntasks_per_node && job_ptr->details->shared) num_tasks = MIN(num_tasks, job_ptr->details->ntasks_per_node); if (cpus_per_task < 2) { @@ -371,12 +395,21 @@ uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map, uint16_t i, c, sockets = select_node_record[node_i].sockets; uint16_t cores_per_socket = select_node_record[node_i].cores; uint16_t threads_per_core = select_node_record[node_i].vpus; - uint16_t min_cores = 0, min_sockets = 0; + uint16_t min_cores = 1, min_sockets = 1; + uint16_t max_cores = 0xffff, max_sockets = 0xffff, max_threads = 0xffff; uint16_t ntasks_per_core = 0xffff; if (!cpu_type && job_ptr->details && job_ptr->details->mc_ptr) { - min_cores = job_ptr->details->mc_ptr->min_cores; - min_sockets = job_ptr->details->mc_ptr->min_sockets; + if (job_ptr->details->mc_ptr->min_cores != (uint16_t) NO_VAL) { + min_cores = job_ptr->details->mc_ptr->min_cores; + max_cores = job_ptr->details->mc_ptr->min_cores; + } + if (job_ptr->details->mc_ptr->min_sockets != (uint16_t) NO_VAL){ + min_sockets = job_ptr->details->mc_ptr->min_sockets; + max_sockets = job_ptr->details->mc_ptr->min_sockets; + } + if (job_ptr->details->mc_ptr->min_threads != (uint16_t) NO_VAL) + max_threads = job_ptr->details->mc_ptr->min_threads; if (job_ptr->details->mc_ptr->ntasks_per_core) { ntasks_per_core = job_ptr->details->mc_ptr-> ntasks_per_core; @@ -463,10 +496,22 @@ uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map, goto fini; } + /* Step 2b: check max_cores per socket and max_sockets per node */ c = 0; for (i = 0; i < sockets; i++) { + if (free_cores[i] > max_cores) { + /* remove extra cores from this socket */ + uint16_t tmp = free_cores[i] - max_cores; + free_core_count -= tmp; + free_cores[i] -= tmp; + } if (free_cores[i] > 0) c++; + if (free_cores[i] && (c > max_sockets)) { + /* remove extra sockets from use */ + free_core_count -= free_cores[i]; + free_cores[i] = 0; + } } if (free_core_count < 1) { /* no available resources on this node */ @@ -474,7 +519,6 @@ uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map, goto fini; } - /* Step 3: Compute task-related data: use ntasks_per_core, * ntasks_per_node and cpus_per_task to determine * the number of tasks to run on this node @@ -482,7 +526,10 @@ uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map, * Note: cpus_per_task and ntasks_per_core need to play nice * 2 tasks_per_core vs. 2 cpus_per_task */ - threads_per_core = MIN(threads_per_core, ntasks_per_core); + if (cpu_type) + max_threads = threads_per_core; + else + threads_per_core = MIN(threads_per_core, max_threads); num_tasks = avail_cpus = threads_per_core; i = job_ptr->details->mc_ptr->ntasks_per_core; if (!cpu_type && i > 0) @@ -490,9 +537,12 @@ uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map, /* convert from PER_CORE to TOTAL_FOR_NODE */ avail_cpus *= free_core_count; - num_tasks *= free_core_count; + num_tasks *= free_core_count; - if (job_ptr->details->ntasks_per_node) + /* If job requested exclusive rights to the node don't do the + min here since it will make it so we don't allocate the + entire node */ + if (job_ptr->details->ntasks_per_node && job_ptr->details->shared) num_tasks = MIN(num_tasks, job_ptr->details->ntasks_per_node); if (cpus_per_task < 2) { @@ -525,6 +575,7 @@ uint16_t _allocate_cores(struct job_record *job_ptr, bitstr_t *core_map, avail_cpus = 0; } } + /* clear leftovers */ if (c < core_end) bit_nclear(core_map, c, core_end-1); @@ -1690,9 +1741,12 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, /* This is the case if -O/--overcommit is true */ if (job_ptr->num_procs == job_ptr->details->min_nodes) { struct multi_core_data *mc_ptr = job_ptr->details->mc_ptr; - job_ptr->num_procs *= MAX(1, mc_ptr->min_threads); - job_ptr->num_procs *= MAX(1, mc_ptr->min_cores); - job_ptr->num_procs *= MAX(1, mc_ptr->min_sockets); + if (mc_ptr->min_threads != (uint16_t) NO_VAL) + job_ptr->num_procs *= MAX(1, mc_ptr->min_threads); + if (mc_ptr->min_cores != (uint16_t) NO_VAL) + job_ptr->num_procs *= MAX(1, mc_ptr->min_cores); + if (mc_ptr->min_sockets != (uint16_t) NO_VAL) + job_ptr->num_procs *= MAX(1, mc_ptr->min_sockets); } debug3("cons_res: cr_job_test: evaluating job %u on %u nodes", diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index 62500a7b125fe9728b1a3fd92785f37d64fc553b..7278eea888b3f7916b396f7da14257fc4abe9608 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -1065,9 +1065,9 @@ static struct multi_core_data * _create_default_mc(void) { struct multi_core_data *mc_ptr; mc_ptr = xmalloc(sizeof(struct multi_core_data)); - mc_ptr->min_sockets = 1; - mc_ptr->min_cores = 1; - mc_ptr->min_threads = 1; + mc_ptr->min_sockets = (uint16_t) NO_VAL; + mc_ptr->min_cores = (uint16_t) NO_VAL; + mc_ptr->min_threads = (uint16_t) NO_VAL; /* mc_ptr is initialized to zero by xmalloc*/ /* mc_ptr->ntasks_per_socket = 0; */ /* mc_ptr->ntasks_per_core = 0; */ diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index 59a6c700cfefa0753fec792d2168cabc72aa7976..69e76d73001a67fd5d65b305f0cc8ea1a0c8b9ee 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -392,10 +392,9 @@ static uint16_t _get_avail_cpus(struct job_record *job_ptr, int index) uint16_t avail_cpus; uint16_t cpus, sockets, cores, threads; uint16_t cpus_per_task = 1; - uint16_t ntasks_per_node = 0, ntasks_per_socket = 0; - uint16_t ntasks_per_core = 0; + uint16_t ntasks_per_node = 0, ntasks_per_socket, ntasks_per_core; + uint16_t min_sockets, min_cores, min_threads; multi_core_data_t *mc_ptr = NULL; - int min_sockets = 0, min_cores = 0; if (job_ptr->details == NULL) return (uint16_t) 0; @@ -407,6 +406,15 @@ static uint16_t _get_avail_cpus(struct job_record *job_ptr, int index) if ((mc_ptr = job_ptr->details->mc_ptr)) { ntasks_per_socket = mc_ptr->ntasks_per_socket; ntasks_per_core = mc_ptr->ntasks_per_core; + min_sockets = mc_ptr->min_sockets; + min_cores = mc_ptr->min_cores; + min_threads = mc_ptr->min_threads; + } else { + ntasks_per_socket = 0; + ntasks_per_core = 0; + min_sockets = (uint16_t) NO_VAL; + min_cores = (uint16_t) NO_VAL; + min_threads = (uint16_t) NO_VAL; } node_ptr = &(select_node_ptr[index]); @@ -428,7 +436,7 @@ static uint16_t _get_avail_cpus(struct job_record *job_ptr, int index) #endif avail_cpus = slurm_get_avail_procs( - min_sockets, min_cores, cpus_per_task, + min_sockets, min_cores, min_threads, cpus_per_task, ntasks_per_node, ntasks_per_socket, ntasks_per_core, &cpus, &sockets, &cores, &threads, NULL, SELECT_TYPE_INFO_NONE, diff --git a/src/sacct/options.c b/src/sacct/options.c index c5959b72926c9323d193f5059394b305fbcee2ba..77d4d41bec69872157f5b34ddde7eab2978dfac8 100644 --- a/src/sacct/options.c +++ b/src/sacct/options.c @@ -484,6 +484,8 @@ sacct [<OPTION>] \n\ -V, --version: Print version. \n\ -W, --wckeys: \n\ Only send data about these wckeys. Default is all. \n\ + -x, --associations: \n\ + Only send data about these association id. Default is all.\n\ -X, --allocations: \n\ Only show cumulative statistics for each job, not the \n\ intermediate steps. \n\ @@ -650,6 +652,7 @@ void parse_command_line(int argc, char **argv) {"verbose", 0, 0, 'v'}, {"version", 0, 0, 'V'}, {"wckeys", 1, 0, 'W'}, + {"associations", 1, 0, 'x'}, {0, 0, 0, 0}}; params.opt_uid = getuid(); @@ -661,7 +664,7 @@ void parse_command_line(int argc, char **argv) while (1) { /* now cycle through the command line */ c = getopt_long(argc, argv, - "aA:bcC:dDeE:f:g:hi:I:j:lLnN:o:OpPr:s:S:Ttu:vVW:X", + "aA:bcC:dDeE:f:g:hi:I:j:lLnN:o:OpPr:s:S:Ttu:vVW:x:X", long_options, &optionIndex); if (c == -1) break; @@ -839,6 +842,13 @@ void parse_command_line(int argc, char **argv) case 'V': print_slurm_version(); exit(0); + case 'x': + if(!job_cond->associd_list) + job_cond->associd_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(job_cond->associd_list, optarg); + break; + case 't': case 'X': params.opt_allocs = 1; diff --git a/src/sacctmgr/account_functions.c b/src/sacctmgr/account_functions.c index cb415032e193600eff459f03739961a1d14efd31..133b00e57fd2bb8365c702a2a0c4a33bd720d959 100644 --- a/src/sacctmgr/account_functions.c +++ b/src/sacctmgr/account_functions.c @@ -878,7 +878,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) assoc_list); } else { exit_code=1; - fprintf(stderr, " Problem adding accounts\n"); + fprintf(stderr, " Problem adding accounts: %s\n", + slurm_strerror(rc)); rc = SLURM_ERROR; notice_thread_fini(); goto end_it; @@ -895,7 +896,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) } else { exit_code=1; fprintf(stderr, - " error: Problem adding account associations\n"); + " error: Problem adding account associations: %s\n", + slurm_strerror(rc)); rc = SLURM_ERROR; } @@ -907,7 +909,6 @@ end_it: destroy_acct_association_rec(start_assoc); destroy_acct_account_rec(start_acct); - return rc; } diff --git a/src/sacctmgr/archive_functions.c b/src/sacctmgr/archive_functions.c index ba57aaddd2d3e81cabc9bb47c815e1f9ab034acd..bf7e0be1a623196d6e68dbb0eec97b7789d57fc5 100644 --- a/src/sacctmgr/archive_functions.c +++ b/src/sacctmgr/archive_functions.c @@ -425,7 +425,8 @@ extern int sacctmgr_archive_dump(int argc, char *argv[]) } } else { exit_code=1; - fprintf(stderr, " Problem dumping archive\n"); + fprintf(stderr, " Problem dumping archive: %s\n", + slurm_strerror(rc)); rc = SLURM_ERROR; } destroy_acct_archive_cond(arch_cond); @@ -504,7 +505,8 @@ extern int sacctmgr_archive_load(int argc, char *argv[]) } } else { exit_code=1; - fprintf(stderr, " Problem loading archive file\n"); + fprintf(stderr, " Problem loading archive file: %s\n", + slurm_strerror(rc)); rc = SLURM_ERROR; } diff --git a/src/sacctmgr/cluster_functions.c b/src/sacctmgr/cluster_functions.c index a7b25e8e65deabf01ba17bbb4bf064ad689b5cfa..60bf6252f2d5365ec007fbb55e8ceec67bad93e5 100644 --- a/src/sacctmgr/cluster_functions.c +++ b/src/sacctmgr/cluster_functions.c @@ -404,7 +404,8 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) } } else { exit_code=1; - fprintf(stderr, " Problem adding clusters\n"); + fprintf(stderr, " Problem adding clusters: %s\n", + slurm_strerror(rc)); } end_it: list_destroy(cluster_list); diff --git a/src/sacctmgr/file_functions.c b/src/sacctmgr/file_functions.c index ff461f86f85c0c2530adf13875180c7d37309878..f9a49389dee0a9f3dcacb56f44aee5763de09936 100644 --- a/src/sacctmgr/file_functions.c +++ b/src/sacctmgr/file_functions.c @@ -2278,7 +2278,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) if(rc != SLURM_SUCCESS) { exit_code=1; fprintf(stderr, - " Problem adding cluster\n"); + " Problem adding cluster: %s\n", + slurm_strerror(rc)); rc = SLURM_ERROR; _destroy_sacctmgr_file_opts(file_opts); break; @@ -2698,7 +2699,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) } } else { exit_code=1; - fprintf(stderr, " Problem with requests.\n"); + fprintf(stderr, " Problem with requests: %s\n", + slurm_strerror(rc)); } list_destroy(format_list); diff --git a/src/sacctmgr/qos_functions.c b/src/sacctmgr/qos_functions.c index 696e211ac3ef82a8ead7b29907e25a61d90a5f9c..faa0769f62c803e782dab2fa6829747ceb812d42 100644 --- a/src/sacctmgr/qos_functions.c +++ b/src/sacctmgr/qos_functions.c @@ -448,7 +448,8 @@ extern int sacctmgr_add_qos(int argc, char *argv[]) } } else { exit_code=1; - fprintf(stderr, " Problem adding QOS.\n"); + fprintf(stderr, " Problem adding QOS: %s\n", + slurm_strerror(rc)); rc = SLURM_ERROR; } diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c index e686b97a7ee67725b5a41d2d0cd7ec3b3bfb1043..6f47691d1c96c3ede2965286e0a7df2f31af1e5d 100644 --- a/src/sacctmgr/user_functions.c +++ b/src/sacctmgr/user_functions.c @@ -1045,8 +1045,11 @@ extern int sacctmgr_add_user(int argc, char *argv[]) xstrdup(list_peek(wckey_cond->name_list)); wckey_cond->cluster_list = assoc_cond->cluster_list; wckey_cond->user_list = assoc_cond->user_list; - local_wckey_list = acct_storage_g_get_wckeys( - db_conn, my_uid, wckey_cond); + if(!(local_wckey_list = acct_storage_g_get_wckeys( + db_conn, my_uid, wckey_cond))) + info("If you are a coordinator ignore " + "the previous error"); + wckey_cond->cluster_list = NULL; wckey_cond->user_list = NULL; @@ -1371,7 +1374,8 @@ no_default: wckey_list); } else { exit_code=1; - fprintf(stderr, " Problem adding users\n"); + fprintf(stderr, " Problem adding users: %s\n", + slurm_strerror(rc)); rc = SLURM_ERROR; notice_thread_fini(); goto end_it; @@ -1388,7 +1392,8 @@ no_default: } } else { exit_code=1; - fprintf(stderr, " Problem adding user associations\n"); + fprintf(stderr, " Problem adding user associations: %s\n", + slurm_strerror(rc)); rc = SLURM_ERROR; } @@ -1470,7 +1475,8 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) } } else { exit_code=1; - fprintf(stderr, " Problem adding coordinator\n"); + fprintf(stderr, " Problem adding coordinator: %s\n", + slurm_strerror(rc)); rc = SLURM_ERROR; } diff --git a/src/salloc/opt.c b/src/salloc/opt.c index aab3a2c759e0f17a4c8d68debc55b54f13ae3dc7..3253ec5739997f3126e21366b67753994a45d299 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -562,7 +562,7 @@ _get_int(const char *arg, const char *what) void set_options(const int argc, char **argv) { - int opt_char, option_index = 0; + int opt_char, option_index = 0, max_val = 0; char *tmp; static struct option long_options[] = { {"account", required_argument, 0, 'A'}, @@ -982,17 +982,26 @@ void set_options(const int argc, char **argv) case LONG_OPT_SOCKETSPERNODE: get_resource_arg_range( optarg, "sockets-per-node", &opt.min_sockets_per_node, - NULL, true ); + &max_val, true ); + if ((opt.min_sockets_per_node == 1) && + (max_val == INT_MAX)) + opt.min_sockets_per_node = NO_VAL; break; case LONG_OPT_CORESPERSOCKET: get_resource_arg_range( optarg, "cores-per-socket", &opt.min_cores_per_socket, - NULL, true ); + &max_val, true ); + if ((opt.min_cores_per_socket == 1) && + (max_val == INT_MAX)) + opt.min_cores_per_socket = NO_VAL; break; case LONG_OPT_THREADSPERCORE: get_resource_arg_range( optarg, "threads-per-core", &opt.min_threads_per_core, - NULL, true ); + &max_val, true ); + if ((opt.min_threads_per_core == 1) && + (max_val == INT_MAX)) + opt.min_threads_per_core = NO_VAL; break; case LONG_OPT_NTASKSPERNODE: opt.ntasks_per_node = _get_int(optarg, diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 567f6189323aa9ce0d9eecdb5866d5371ec50dc0..09d63983904777293538e0ab5402a1a3fd7bbc0f 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -102,6 +102,7 @@ static void _signal_while_allocating(int signo); static void _job_complete_handler(srun_job_complete_msg_t *msg); static void _set_exit_code(void); static void _set_rlimits(char **env); +static void _set_spank_env(void); static void _set_submit_dir_env(void); static void _timeout_handler(srun_timeout_msg_t *msg); static void _user_msg_handler(srun_user_msg_t *msg); @@ -167,6 +168,7 @@ int main(int argc, char *argv[]) exit(error_exit); } + _set_spank_env(); _set_submit_dir_env(); if (opt.cwd && chdir(opt.cwd)) { error("chdir(%s): %m", opt.cwd); @@ -428,6 +430,19 @@ static void _set_exit_code(void) } } +/* Propagate SPANK environment via SLURM_SPANK_ environment variables */ +static void _set_spank_env(void) +{ + int i; + + for (i=0; i<opt.spank_job_env_size; i++) { + if (setenvfs("SLURM_SPANK_%s", opt.spank_job_env[i]) < 0) { + error("unable to set %s in environment", + opt.spank_job_env[i]); + } + } +} + /* Set SLURM_SUBMIT_DIR environment variable with current state */ static void _set_submit_dir_env(void) { @@ -553,11 +568,11 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->ntasks_per_core = opt.ntasks_per_core; /* node constraints */ - if (opt.min_sockets_per_node > -1) + if (opt.min_sockets_per_node != NO_VAL) desc->min_sockets = opt.min_sockets_per_node; - if (opt.min_cores_per_socket > -1) + if (opt.min_cores_per_socket != NO_VAL) desc->min_cores = opt.min_cores_per_socket; - if (opt.min_threads_per_core > -1) + if (opt.min_threads_per_core != NO_VAL) desc->min_threads = opt.min_threads_per_core; if (opt.no_kill) diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index e6cd19540eae83903e4ef04cfad57733ee8f7347..8c963d0518dc6e9e02ecdbb22effeba521cdf539 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -1068,7 +1068,7 @@ static void _opt_pbs_batch_script(const char *file, const void *body, int size) static void _set_options(int argc, char **argv) { - int opt_char, option_index = 0; + int opt_char, option_index = 0, max_val = 0; char *tmp; struct option *optz = spank_option_table_create(long_options); @@ -1423,17 +1423,26 @@ static void _set_options(int argc, char **argv) case LONG_OPT_SOCKETSPERNODE: get_resource_arg_range( optarg, "sockets-per-node", &opt.min_sockets_per_node, - NULL, true ); + &max_val, true ); + if ((opt.min_sockets_per_node == 1) && + (max_val == INT_MAX)) + opt.min_sockets_per_node = NO_VAL; break; case LONG_OPT_CORESPERSOCKET: get_resource_arg_range( optarg, "cores-per-socket", &opt.min_cores_per_socket, - NULL, true ); + &max_val, true ); + if ((opt.min_cores_per_socket == 1) && + (max_val == INT_MAX)) + opt.min_cores_per_socket = NO_VAL; break; case LONG_OPT_THREADSPERCORE: get_resource_arg_range( optarg, "threads-per-core", &opt.min_threads_per_core, - NULL, true ); + &max_val, true ); + if ((opt.min_threads_per_core == 1) && + (max_val == INT_MAX)) + opt.min_threads_per_core = NO_VAL; break; case LONG_OPT_NTASKSPERNODE: opt.ntasks_per_node = _get_int(optarg, diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index 6560415096e8ca7d413674b39b2d1c259c0149bd..9c60d4ec80a409982e12b08a05a1b66389d128ec 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -69,6 +69,7 @@ static char *script_wrap(char *command_string); static void _set_exit_code(void); static void _set_prio_process_env(void); static int _set_rlimit_env(void); +static void _set_spank_env(void); static void _set_submit_dir_env(void); static int _set_umask_env(void); @@ -134,6 +135,7 @@ int main(int argc, char *argv[]) } _set_prio_process_env(); + _set_spank_env(); _set_submit_dir_env(); _set_umask_env(); slurm_init_job_desc_msg(&desc); @@ -283,11 +285,11 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc) desc->ntasks_per_core = opt.ntasks_per_core; /* node constraints */ - if (opt.min_sockets_per_node > -1) + if (opt.min_sockets_per_node != NO_VAL) desc->min_sockets = opt.min_sockets_per_node; - if (opt.min_cores_per_socket > -1) + if (opt.min_cores_per_socket != NO_VAL) desc->min_cores = opt.min_cores_per_socket; - if (opt.min_threads_per_core > -1) + if (opt.min_threads_per_core != NO_VAL) desc->min_threads = opt.min_threads_per_core; if (opt.no_kill) @@ -353,6 +355,19 @@ static void _set_exit_code(void) } } +/* Propagate SPANK environment via SLURM_SPANK_ environment variables */ +static void _set_spank_env(void) +{ + int i; + + for (i=0; i<opt.spank_job_env_size; i++) { + if (setenvfs("SLURM_SPANK_%s", opt.spank_job_env[i]) < 0) { + error("unable to set %s in environment", + opt.spank_job_env[i]); + } + } +} + /* Set SLURM_SUBMIT_DIR environment variable with current state */ static void _set_submit_dir_env(void) { diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index e3b15059c9d99b2ccc8eec6f10b9e7194ebefbc0..f7c373206cdcbe582b2506f68ac0c3281d177791 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2928,11 +2928,6 @@ static int _validate_job_create_req(job_desc_msg_t * job_desc) strlen(job_desc->work_dir)); return ESLURM_PATHNAME_TOO_LONG; } - if (!valid_spank_job_env(job_desc->spank_job_env, - job_desc->spank_job_env_size, - job_desc->user_id)) { - return EINVAL; - } return SLURM_SUCCESS; } @@ -3283,30 +3278,18 @@ _set_multi_core_data(job_desc_msg_t * job_desc) { multi_core_data_t * mc_ptr; - if (((job_desc->min_sockets == (uint16_t) NO_VAL) || - (job_desc->min_sockets == (uint16_t) 1)) && - ((job_desc->min_cores == (uint16_t) NO_VAL) || - (job_desc->min_cores == (uint16_t) 1)) && - ((job_desc->min_threads == (uint16_t) NO_VAL) || - (job_desc->min_threads == (uint16_t) 1)) && + if ((job_desc->min_sockets == (uint16_t) NO_VAL) && + (job_desc->min_cores == (uint16_t) NO_VAL) && + (job_desc->min_threads == (uint16_t) NO_VAL) && (job_desc->ntasks_per_socket == (uint16_t) NO_VAL) && (job_desc->ntasks_per_core == (uint16_t) NO_VAL) && (job_desc->plane_size == (uint16_t) NO_VAL)) return NULL; mc_ptr = xmalloc(sizeof(multi_core_data_t)); - if (job_desc->min_sockets != (uint16_t) NO_VAL) - mc_ptr->min_sockets = job_desc->min_sockets; - else - mc_ptr->min_sockets = 1; - if (job_desc->min_cores != (uint16_t) NO_VAL) - mc_ptr->min_cores = job_desc->min_cores; - else - mc_ptr->min_cores = 1; - if (job_desc->min_threads != (uint16_t) NO_VAL) - mc_ptr->min_threads = job_desc->min_threads; - else - mc_ptr->min_threads = 1; + mc_ptr->min_sockets = job_desc->min_sockets; + mc_ptr->min_cores = job_desc->min_cores; + mc_ptr->min_threads = job_desc->min_threads; if (job_desc->ntasks_per_socket != (uint16_t) NO_VAL) mc_ptr->ntasks_per_socket = job_desc->ntasks_per_socket; else @@ -3983,22 +3966,10 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, } else if (!_valid_job_min_mem(job_desc_msg)) return ESLURM_INVALID_TASK_MEMORY; - if (job_desc_msg->min_sockets == (uint16_t) NO_VAL) - job_desc_msg->min_sockets = 1; /* default socket count of 1 */ - if (job_desc_msg->min_cores == (uint16_t) NO_VAL) - job_desc_msg->min_cores = 1; /* default core count of 1 */ - if (job_desc_msg->min_threads == (uint16_t) NO_VAL) - job_desc_msg->min_threads = 1; /* default thread count of 1 */ if (job_desc_msg->min_nodes == NO_VAL) job_desc_msg->min_nodes = 1; /* default node count of 1 */ if (job_desc_msg->num_procs == NO_VAL) job_desc_msg->num_procs = job_desc_msg->min_nodes; - if (job_desc_msg->min_sockets == (uint16_t) NO_VAL) - job_desc_msg->min_sockets = 1; /* default socket count of 1 */ - if (job_desc_msg->min_cores == (uint16_t) NO_VAL) - job_desc_msg->min_cores = 1; /* default core count of 1 */ - if (job_desc_msg->min_threads == (uint16_t) NO_VAL) - job_desc_msg->min_threads = 1; /* default thread count of 1 */ if (job_desc_msg->job_min_cpus == (uint16_t) NO_VAL) job_desc_msg->job_min_cpus = 1; /* default 1 cpu per node */ @@ -6434,7 +6405,27 @@ extern bool job_independent(struct job_record *job_ptr, int will_run) { struct job_details *detail_ptr = job_ptr->details; time_t now = time(NULL); - int rc; + int depend_rc; + bool send_acct_rec = false; + + /* Test dependencies first so we can cancel jobs before dependent + * jobs records get purged (e.g. afterok, afternotok) */ + depend_rc = test_job_dependency(job_ptr); + if (depend_rc == 1) { + job_ptr->state_reason = WAIT_DEPENDENCY; + xfree(job_ptr->state_desc); + return false; + } else if (depend_rc == 2) { + time_t now = time(NULL); + info("Job dependency can't be satisfied, cancelling job %u", + job_ptr->job_id); + job_ptr->job_state = JOB_CANCELLED; + xfree(job_ptr->state_desc); + job_ptr->start_time = now; + job_ptr->end_time = now; + job_completion_logger(job_ptr); + return false; + } if (detail_ptr && (detail_ptr->begin_time > now)) { job_ptr->state_reason = WAIT_TIME; @@ -6448,45 +6439,27 @@ extern bool job_independent(struct job_record *job_ptr, int will_run) return false; /* not yet time */ } - rc = test_job_dependency(job_ptr); - if (rc == 0) { - bool send_acct_rec = false; - if (job_ptr->state_reason == WAIT_DEPENDENCY) { - job_ptr->state_reason = WAIT_NO_REASON; - xfree(job_ptr->state_desc); - } - if (detail_ptr && (detail_ptr->begin_time == 0)) { - detail_ptr->begin_time = now; - send_acct_rec = true; - } else if (job_ptr->state_reason == WAIT_TIME) { - job_ptr->state_reason = WAIT_NO_REASON; - xfree(job_ptr->state_desc); - send_acct_rec = true; - } - if (send_acct_rec && !will_run) { - /* We want to record when a job becomes eligible in - * order to calculate reserved time (a measure of - * system over-subscription), job really is not - * starting now */ - jobacct_storage_g_job_start( - acct_db_conn, slurmctld_cluster_name, job_ptr); - } - return true; - } else if (rc == 1) { - job_ptr->state_reason = WAIT_DEPENDENCY; + /* Job is eligible to start now */ + if (job_ptr->state_reason == WAIT_DEPENDENCY) { + job_ptr->state_reason = WAIT_NO_REASON; xfree(job_ptr->state_desc); - return false; - } else { /* rc == 2 */ - time_t now = time(NULL); - info("Job dependency can't be satisfied, cancelling job %u", - job_ptr->job_id); - job_ptr->job_state = JOB_CANCELLED; + } + if (detail_ptr && (detail_ptr->begin_time == 0)) { + detail_ptr->begin_time = now; + send_acct_rec = true; + } else if (job_ptr->state_reason == WAIT_TIME) { + job_ptr->state_reason = WAIT_NO_REASON; xfree(job_ptr->state_desc); - job_ptr->start_time = now; - job_ptr->end_time = now; - job_completion_logger(job_ptr); - return false; + send_acct_rec = true; + } + if (send_acct_rec && !will_run) { + /* We want to record when a job becomes eligible in order to + * calculate reserved time (a measure ofs ystem over- + * subscription), job really is not starting now */ + jobacct_storage_g_job_start(acct_db_conn, + slurmctld_cluster_name, job_ptr); } + return true; } /* @@ -7879,7 +7852,7 @@ _copy_job_record_to_job_desc(struct job_record *job_ptr) job_desc->max_nodes = details->max_nodes; job_desc->min_sockets = mc_ptr->min_sockets; job_desc->min_cores = mc_ptr->min_cores; - job_desc->min_threads = mc_ptr->min_threads;; + job_desc->min_threads = mc_ptr->min_threads; job_desc->cpus_per_task = details->cpus_per_task; job_desc->ntasks_per_node = details->ntasks_per_node; job_desc->ntasks_per_socket = mc_ptr->ntasks_per_socket; diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index bf70191f62ff37fdfe393da6369e6d346339c21c..a2ffe2c0f0e9ae61fa2666d5c6580fbc5c4dadcf 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -704,7 +704,7 @@ extern int test_job_dependency(struct job_record *job_ptr) { ListIterator depend_iter; struct depend_spec *dep_ptr; - bool failure = false; + bool failure = false, depends = false; struct job_queue *job_queue = NULL; int i, now, job_queue_size = 0; struct job_record *qjob_ptr; @@ -742,7 +742,7 @@ extern int test_job_dependency(struct job_record *job_ptr) if (now) list_delete_item(depend_iter); else - break; + depends = true; } else if (dep_ptr->job_ptr->job_id != dep_ptr->job_id) { /* job is gone, dependency lifted */ list_delete_item(depend_iter); @@ -750,16 +750,16 @@ extern int test_job_dependency(struct job_record *job_ptr) if (!IS_JOB_PENDING(dep_ptr->job_ptr)) list_delete_item(depend_iter); else - break; + depends = true; } else if (dep_ptr->depend_type == SLURM_DEPEND_AFTER_ANY) { if (IS_JOB_FINISHED(dep_ptr->job_ptr)) list_delete_item(depend_iter); else - break; + depends = true; } else if (dep_ptr->depend_type == SLURM_DEPEND_AFTER_NOT_OK) { if (!IS_JOB_FINISHED(dep_ptr->job_ptr)) - break; - if (!IS_JOB_COMPLETE(dep_ptr->job_ptr)) + depends = true; + else if (!IS_JOB_COMPLETE(dep_ptr->job_ptr)) list_delete_item(depend_iter); else { failure = true; @@ -767,8 +767,8 @@ extern int test_job_dependency(struct job_record *job_ptr) } } else if (dep_ptr->depend_type == SLURM_DEPEND_AFTER_OK) { if (!IS_JOB_FINISHED(dep_ptr->job_ptr)) - break; - if (IS_JOB_COMPLETE(dep_ptr->job_ptr)) + depends = true; + else if (IS_JOB_COMPLETE(dep_ptr->job_ptr)) list_delete_item(depend_iter); else { failure = true; @@ -781,7 +781,7 @@ extern int test_job_dependency(struct job_record *job_ptr) if (failure) return 2; - if (dep_ptr) + if (depends) return 1; return 0; } @@ -1148,6 +1148,7 @@ static char **_build_env(struct job_record *job_ptr) name = gid_to_string((uid_t) job_ptr->group_id); setenvf(&my_env, "SLURM_JOB_GROUP", "%s", name); xfree(name); + setenvf(&my_env, "SLURM_JOBID", "%u", job_ptr->job_id); setenvf(&my_env, "SLURM_JOB_ID", "%u", job_ptr->job_id); setenvf(&my_env, "SLURM_JOB_NAME", "%s", job_ptr->name); setenvf(&my_env, "SLURM_JOB_NODELIST", "%s", job_ptr->nodes); diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 0cb2f6886fd8529f85c771a1f61ed1ca8f0859cb..34130d4129af46b9dad9c07b8f183ff8402457fc 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -2217,6 +2217,7 @@ extern int send_nodes_to_accounting(time_t event_time) int rc = SLURM_SUCCESS, i = 0; struct node_record *node_ptr = NULL; uint32_t node_scaling = 0; + char *reason = NULL; slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK, WRITE_LOCK }; @@ -2226,10 +2227,15 @@ extern int send_nodes_to_accounting(time_t event_time) /* send nodes not in not 'up' state */ node_ptr = node_record_table_ptr; for (i = 0; i < node_record_count; i++, node_ptr++) { + if(node_ptr->reason) + reason = node_ptr->reason; + else + reason = "First Registration"; if (node_ptr->name == '\0' || (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr) && !IS_NODE_DOWN(node_ptr))) { - /* on some systems we need to make sure there + /* At this point, the node appears to be up, + but on some systems we need to make sure there aren't some part of a node in an error state. */ if(node_ptr->select_nodeinfo) { uint16_t err_cpus = 0; @@ -2266,17 +2272,15 @@ extern int send_nodes_to_accounting(time_t event_time) acct_db_conn, slurmctld_cluster_name, &send_node, event_time, - NULL); - - continue; + reason); } - } else - continue; - } - rc = clusteracct_storage_g_node_down(acct_db_conn, - slurmctld_cluster_name, - node_ptr, event_time, - NULL); + } + } else + rc = clusteracct_storage_g_node_down( + acct_db_conn, + slurmctld_cluster_name, + node_ptr, event_time, + reason); if (rc == SLURM_ERROR) break; } diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index cfa3db85caa10d242c31f5f6555fbff4033eb820..77052471feac33f2708f76c478f0962774495b8b 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1414,25 +1414,31 @@ extern int job_req_node_filter(struct job_record *job_ptr, continue; } if (mc_ptr && - ((mc_ptr->min_sockets > config_ptr->sockets) || - (mc_ptr->min_cores > config_ptr->cores) || - (mc_ptr->min_threads > config_ptr->threads))){ + (((mc_ptr->min_sockets > config_ptr->sockets) && + (mc_ptr->min_sockets != (uint16_t) NO_VAL)) || + ((mc_ptr->min_cores > config_ptr->cores) && + (mc_ptr->min_cores != (uint16_t) NO_VAL)) || + ((mc_ptr->min_threads > config_ptr->threads) && + (mc_ptr->min_threads != (uint16_t) NO_VAL)))) { bit_clear(avail_bitmap, i); continue; } } else { if ((detail_ptr->job_min_cpus > node_ptr->cpus) || ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > - node_ptr->real_memory) || + node_ptr->real_memory) || (detail_ptr->job_min_tmp_disk > node_ptr->tmp_disk)) { bit_clear(avail_bitmap, i); continue; } if (mc_ptr && - ((mc_ptr->min_sockets > node_ptr->sockets) || - (mc_ptr->min_cores > node_ptr->cores) || - (mc_ptr->min_threads > node_ptr->threads))) { + (((mc_ptr->min_sockets > node_ptr->sockets) && + (mc_ptr->min_sockets != (uint16_t) NO_VAL)) || + ((mc_ptr->min_cores > node_ptr->cores) && + (mc_ptr->min_cores != (uint16_t) NO_VAL)) || + ((mc_ptr->min_threads > node_ptr->threads) && + (mc_ptr->min_threads != (uint16_t) NO_VAL)))) { bit_clear(avail_bitmap, i); continue; } @@ -1539,10 +1545,13 @@ static int _build_node_list(struct job_record *job_ptr, config_ptr->real_memory) || (detail_ptr->job_min_tmp_disk > config_ptr->tmp_disk)) config_filter = 1; - if (mc_ptr && - ((mc_ptr->min_sockets > config_ptr->sockets ) || - (mc_ptr->min_cores > config_ptr->cores ) || - (mc_ptr->min_threads > config_ptr->threads ))) + if (mc_ptr && + (((mc_ptr->min_sockets > config_ptr->sockets) && + (mc_ptr->min_sockets != (uint16_t) NO_VAL)) || + ((mc_ptr->min_cores > config_ptr->cores) && + (mc_ptr->min_cores != (uint16_t) NO_VAL)) || + ((mc_ptr->min_threads > config_ptr->threads) && + (mc_ptr->min_threads != (uint16_t) NO_VAL)))) config_filter = 1; /* since nodes can register with more resources than defined */ @@ -1695,10 +1704,13 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr, node_con->real_memory) && (job_con->job_min_tmp_disk <= node_con->tmp_disk)) job_ok = 1; - if (mc_ptr && - ((mc_ptr->min_sockets <= node_con->sockets) && - (mc_ptr->min_cores <= node_con->cores ) && - (mc_ptr->min_threads <= node_con->threads))) + if (mc_ptr && + (((mc_ptr->min_sockets <= node_con->sockets) || + (mc_ptr->min_sockets == (uint16_t) NO_VAL)) && + ((mc_ptr->min_cores <= node_con->cores) || + (mc_ptr->min_cores == (uint16_t) NO_VAL)) && + ((mc_ptr->min_threads <= node_con->threads) || + (mc_ptr->min_threads == (uint16_t) NO_VAL)))) job_mc_ptr_ok = 1; if (job_ok && (!mc_ptr || job_mc_ptr_ok)) continue; @@ -1721,10 +1733,13 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr, node_ptr->real_memory) && (job_con->job_min_tmp_disk <= node_ptr->tmp_disk)) job_ok = 1; - if (mc_ptr && - ((mc_ptr->min_sockets <= node_ptr->sockets) && - (mc_ptr->min_cores <= node_ptr->cores ) && - (mc_ptr->min_threads <= node_ptr->threads))) + if (mc_ptr && + (((mc_ptr->min_sockets <= node_ptr->sockets) || + (mc_ptr->min_sockets == (uint16_t) NO_VAL)) && + ((mc_ptr->min_cores <= node_ptr->cores) || + (mc_ptr->min_cores == (uint16_t) NO_VAL)) && + ((mc_ptr->min_threads <= node_ptr->threads) || + (mc_ptr->min_threads == (uint16_t) NO_VAL)))) job_mc_ptr_ok = 1; if (job_ok && (!mc_ptr || job_mc_ptr_ok)) continue; diff --git a/src/squeue/sort.c b/src/squeue/sort.c index 1c7e0c0ebd234b31674b5ea2f608c31bdb8e4ab4..d05390cd7595cf43a18bc5d753dffcc3c8d0a7ab 100644 --- a/src/squeue/sort.c +++ b/src/squeue/sort.c @@ -165,6 +165,9 @@ void sort_job_list(List job_list) list_sort(job_list, _sort_job_by_reservation); else if (params.sort[i] == 'z') list_sort(job_list, _sort_job_by_num_sct); + else + error("Invalid sort specification: %c", + params.sort[i]); } } diff --git a/src/srun/opt.c b/src/srun/opt.c index f730ac6fd93b45d2934e0ee0f66b568a372064f2..430588a1f8bdd92135f5e1dde30649e29e38e30a 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -182,6 +182,8 @@ #define LONG_OPT_SIGNAL 0x14e #define LONG_OPT_DEBUG_SLURMD 0x14f +extern char **environ; + /*---- global variables, defined in opt.h ----*/ int _verbose; opt_t opt; @@ -708,7 +710,7 @@ _get_int(const char *arg, const char *what, bool positive) static void set_options(const int argc, char **argv) { - int opt_char, option_index = 0; + int opt_char, option_index = 0, max_val = 0; struct utsname name; static struct option long_options[] = { {"attach", no_argument, 0, 'a'}, @@ -1302,17 +1304,26 @@ static void set_options(const int argc, char **argv) case LONG_OPT_SOCKETSPERNODE: get_resource_arg_range( optarg, "sockets-per-node", &opt.min_sockets_per_node, - NULL, true ); + &max_val, true ); + if ((opt.min_sockets_per_node == 1) && + (max_val == INT_MAX)) + opt.min_sockets_per_node = NO_VAL; break; case LONG_OPT_CORESPERSOCKET: get_resource_arg_range( optarg, "cores-per-socket", &opt.min_cores_per_socket, - NULL, true); + &max_val, true ); + if ((opt.min_cores_per_socket == 1) && + (max_val == INT_MAX)) + opt.min_cores_per_socket = NO_VAL; break; case LONG_OPT_THREADSPERCORE: get_resource_arg_range( optarg, "threads-per-core", &opt.min_threads_per_core, - NULL, true ); + &max_val, true ); + if ((opt.min_threads_per_core == 1) && + (max_val == INT_MAX)) + opt.min_threads_per_core = NO_VAL; break; case LONG_OPT_NTASKSPERNODE: opt.ntasks_per_node = _get_int(optarg, "ntasks-per-node", @@ -1936,6 +1947,33 @@ static bool _opt_verify(void) return verified; } +/* Initialize the the spank_job_env based upon environment variables set + * via salloc or sbatch commands */ +extern void init_spank_env(void) +{ + int i; + char *name, *eq, *value; + + if (environ == NULL) + return; + + for (i=0; environ[i]; i++) { + if (strncmp(environ[i], "SLURM_SPANK_", 12)) + continue; + name = xstrdup(environ[i] + 12); + eq = strchr(name, (int)'='); + if (eq == NULL) { + xfree(name); + break; + } + eq[0] = '\0'; + value = eq + 1; + spank_set_job_env(name, value, 1); + xfree(name); + } + +} + /* Functions used by SPANK plugins to read and write job environment * variables for use within job's Prolog and/or Epilog */ extern char *spank_get_job_env(const char *name) diff --git a/src/srun/opt.h b/src/srun/opt.h index f0c1213ce46a58d7f097ed9fa5caf8dbcfb5821b..8ad26cdc794aa699834ab1f44f6601d64b9cecbe 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * opt.h - definitions for srun option processing - * $Id: opt.h 19316 2010-01-26 00:12:13Z jette $ + * $Id: opt.h 20173 2010-04-29 16:14:33Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008-2009 Lawrence Livermore National Security. @@ -257,4 +257,8 @@ extern int spank_set_job_env(const char *name, const char *value, int overwrite); extern int spank_unset_job_env(const char *name); +/* Initialize the the spank_job_env based upon environment variables set + * via salloc or sbatch commands */ +extern void init_spank_env(void); + #endif /* _HAVE_OPT_H */ diff --git a/src/srun/srun.c b/src/srun/srun.c index abfc55af8def5f289814e985706dbdc44b09591c..8182ba6078d2a3fc8bc83a9c40fbe44ca281bbca 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -209,6 +209,7 @@ int srun(int ac, char **av) /* Initialize plugin stack, read options from plugins, etc. */ + init_spank_env(); if (spank_init(NULL) < 0) { error("Plug-in initialization failed"); exit(error_exit); @@ -754,11 +755,11 @@ static int _validate_relative(resource_allocation_response_msg_t *resp) if (opt.relative_set && ((opt.relative + opt.min_nodes) > resp->node_cnt)) { if (opt.nodes_set_opt) { /* -N command line option used */ - error("--relative and --nodes option incompatable " + error("--relative and --nodes option incompatible " "with count of allocated nodes (%d+%d>%d)", opt.relative, opt.min_nodes, resp->node_cnt); } else { /* SLURM_NNODES option used */ - error("--relative and SLURM_NNODES option incompatable " + error("--relative and SLURM_NNODES option incompatible " "with count of allocated nodes (%d+%d>%d)", opt.relative, opt.min_nodes, resp->node_cnt); } diff --git a/src/sview/block_info.c b/src/sview/block_info.c index f45e2d0bef2afb926d8169b415362152793a8c33..0c681b0184fe1758e22f4c51fb918490f27decd5 100644 --- a/src/sview/block_info.c +++ b/src/sview/block_info.c @@ -521,6 +521,13 @@ static List _create_block_list(partition_info_msg_t *part_info_ptr, block_ptr->color_inx = atoi(block_ptr->bg_block_name+3); #endif + /* on some systems they make there own blocks named + whatever they want, so doing this fixes what could + be a negative number. + */ + if(block_ptr->color_inx < 0) + block_ptr->color_inx = i; + block_ptr->color_inx %= sview_colors_cnt; block_ptr->nodes diff --git a/testsuite/expect/test7.7.prog.c b/testsuite/expect/test7.7.prog.c index 1a0bed977d9e09400f015627124916b514606bf5..2c29c5b8bfc1d6b7e34d371c357d3b6114b2e60f 100644 --- a/testsuite/expect/test7.7.prog.c +++ b/testsuite/expect/test7.7.prog.c @@ -370,7 +370,7 @@ static void _modify_job(long my_job_id) /* "NODES=2 " */ /* "DEPEND=afterany:3 " */ /* "INVALID=123 " */ - /* "VARIABLELIST=TEST_ENV1=test_val1 " */ + "COMMENT=test123 " "VARIABLELIST=TEST_ENV1=test_val1,TEST_ENV2=test_val2 " "TIMELIMIT=10 BANK=test_bank", (uint32_t) now, my_job_id);