diff --git a/META b/META index 8193c01263490597baa98372233d067ec82a5569..391a9e1b176a8237641177e90e01834092894047 100644 --- a/META +++ b/META @@ -7,8 +7,8 @@ Name: slurm Major: 16 Minor: 05 - Micro: 0 - Version: 16.05.0 + Micro: 2 + Version: 16.05.2 Release: 1 # Include leading zero for all pre-releases diff --git a/NEWS b/NEWS index 08a221bfbdddfaf8d46a91293a3ce8b82bde17de..ef64b02979b85852efe00a92f9344888b9b58e27 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,90 @@ This file describes changes in recent versions of Slurm. It primarily documents those changes that are of interest to users and administrators. +* Changes in Slurm 16.05.2 +========================== + -- CRAY - Fix issue where the proctrack plugin could hang if the container + id wasn't able to be made. + -- Move test for job wait reason value of BurstBufferResources and + BurstBufferStageIn later in the scheduling logic. + -- Document which srun options apply to only job, only step, or job and step + allocations. + -- Use more compatible function to get thread name (>= 2.6.11). + -- Fix order of job then step id when noting cleaning flag being set. + -- Make it so the extern step sends a message with accounting information + back to the slurmctld. + -- Make it so the extern step calls the select_g_step_start|finish functions. + -- Don't print error when extern step is canceled because job is ending. + -- Handle a few error codes when dealing with the extern step to make sure + we have the pids added to the system correctly. + -- Add support for job dependencies with job array expressions. Previous logic + required listing each task of job array individually. + -- Make sure tres_cnt is set before creating a slurmdb_assoc_usage_t. + -- Prevent backfill scheduler from starting a second "singleton" job if another + one started during a backfill sleep. + -- Fix for invalid array pointer when creating advanced reservation when job + allocations span heterogeneous nodes (differing core or socket counts). + -- Fix hostlist_ranged_string_xmalloc_dims to correctly not put brackets on + hostlists when brackets == 0. + -- Make sure we don't get brackets when making a range of reserved ports + for a step. + -- Change fatal to an error if port ranges aren't correct when reading state + for steps. + +* Changes in Slurm 16.05.1 +========================== + -- Fix __cplusplus macro in spank.h to allow compilation with C++. + -- Fix compile issue with older glibc < 2.12 + -- Fix for starting batch step with mpi/pmix plugin. + -- Fix for "scontrol -dd show job" with respect to displaying the specific + CPUs allocated to a job on each node. Prior logic would only display + the CPU information for the first node in the job allocation. + -- Print correct return code on failure to update active node features + through sview. + -- Allow QOS timelimit to override partition timelimit when EnforcePartLimits + is set to all/any. + -- Make it so qsub will do a "basename" on a wrapped command for the output + and error files. + -- Fix issue where slurmd could core when running the ipmi energy plugin. + -- Documentation - clean up typos. + -- Add logic so that slurmstepd can be launched under valgrind. + -- Increase buffer size to read /proc/*/stat files. + -- Fix for tracking job resource allocation when slurmctld is reconfigured + while Cray Node Health Check (NHC) is running. Previous logic would fail to + record the job's allocation then perform release operation upon NHC + completion, resulting in underflow error messages. + -- Make "scontrol show daemons" work with long node names. + -- CRAY - Collect energy using a uint64_t instead of uint32_t. + -- Fix incorrect if statements when determining if the user has a default + account or wckey. + -- Prevent job stuck in configuring state if slurmctld daemon restarted while + PrologSlurmctld is running. Also re-issue burst_buffer/pre-load operation + as needed. + -- Correct task affinity support for FreeBSD. + -- Fix for task affinity on KNL in SNC2/Flat mode. + -- Recalculate a job's memory allocation after node reboot if job requests all + of a node's memory and FastSchedule=0 is configured. Intel KNL memory size + can change on reboot with various MCDRAM modes. + -- Fix small memory leak when printing HealthCheckNodeState. + -- Eliminate memory leaks when AuthInfo is configured. + -- Improve sdiag output description in man page. + -- Cray/capmc_resume script modify a node's features (as needed) when the + reinit (reboot) command is issued rather than wait for the nodes to change + to the "on" state. + -- Correctly print ranges when using step values in job arrays. + -- Allow from file names / paths over 256 characters when launching steps, + as well as spaces in the executable name. + -- job_submit.license.lua example modified to send message back to user. + -- Document job --mem=0 option means all memory on a node. + -- Set SLURM_JOB_QOS environment variable to QOS name instead of description. + -- knl_cray.conf file option of CnselectPath added. + -- node_features/knl_cray plugin modified to get current node NUMA and MCDRAM + modes using cnselect command rather than capmc command. + -- liblua - add SLES12 paths to runtime search list. + -- Fix qsub default output and error files for task arrays. + -- Fix qsub to set job_name correctly when wrapping a script (-b y) + -- Cray - set EnforcePartLimits=any in slurm.conf template. + * Changes in Slurm 16.05.0 ========================== -- Update seff to fix warnings with ncpus, and list slurm-perlapi dependency @@ -245,6 +329,26 @@ documents those changes that are of interest to users and administrators. * Changes in Slurm 15.08.13 =========================== + -- Fix issue where slurmd could core when running the ipmi energy plugin. + -- Print correct return code on failure to update node features through sview. + -- Documentation - cleanup typos. + -- Add logic so that slurmstepd can be launched under valgrind. + -- Increase buffer size to read /proc/*/stat files. + -- MYSQL - Handle ER_HOST_IS_BLOCKED better by failing when it occurs instead + of continuously printing the message over and over as the problem will + most likely not resolve itself. + -- Add --disable-bluegene to configure. This will make it so Slurm + can work on a BGAS node. + -- Prevent job stuck in configuring state if slurmctld daemon restarted while + PrologSlurmctld is running. + -- Handle association correctly if using FAIR_TREE as well as shares=Parent + -- Fix race condition when setting priority of a job and the association + doesn't have a parent. + -- MYSQL - Fix issue with adding a reservation if the name has single quotes in + it. + -- Correctly print ranges when using step values in job arrays. + -- Fix for invalid array pointer when creating advanced reservation when job + allocations span heterogeneous nodes (differing core or socket counts). * Changes in Slurm 15.08.12 =========================== diff --git a/auxdir/x_ac_bluegene.m4 b/auxdir/x_ac_bluegene.m4 index 34837a403b98919f3d08c107eb8229e498122904..207c07d4af864cf484c8cd66b1e98de49bc3fea9 100644 --- a/auxdir/x_ac_bluegene.m4 +++ b/auxdir/x_ac_bluegene.m4 @@ -10,12 +10,28 @@ # If found define HAVE_BG and HAVE_FRONT_END and others ##***************************************************************************** - -AC_DEFUN([X_AC_BGL], +AC_DEFUN([X_AC_BG], [ ac_real_bluegene_loaded=no ac_bluegene_loaded=no + AC_MSG_CHECKING([whether BG is explicitly disabled]) + AC_ARG_ENABLE( + [bluegene], + AS_HELP_STRING(--disable-bluegene,Disable Bluegene support for BGAS nodes (or wherever you run a Slurm on a bluegene system not wanting it to act like a Bluegene)), + [ case "$enableval" in + yes) ac_bluegene_loaded=no ;; + no) ac_bluegene_loaded=yes ;; + *) AC_MSG_RESULT([doh!]) + AC_MSG_ERROR([bad value "$enableval" for --disable-bluegene]) ;; + esac ] + ) + + AC_MSG_RESULT([${ac_bluegene_loaded=yes}]) +]) + +AC_DEFUN([X_AC_BGL], +[ AC_ARG_WITH(db2-dir, AS_HELP_STRING(--with-db2-dir=PATH,Specify path to parent directory of DB2 library), [ trydb2dir=$withval ]) # test for bluegene emulation mode @@ -34,7 +50,10 @@ AC_DEFUN([X_AC_BGL], *) AC_MSG_ERROR([bad value "$enableval" for --enable-bgl-emulation]) ;; esac ]) - if test "x$bluegene_emulation" = "xyes" -o "x$bgl_emulation" = "xyes"; then + # Skip if already set + if test "x$ac_bluegene_loaded" = "xyes" ; then + bg_default_dirs="" + elif test "x$bluegene_emulation" = "xyes" -o "x$bgl_emulation" = "xyes"; then AC_DEFINE(HAVE_3D, 1, [Define to 1 if 3-dimensional architecture]) AC_DEFINE(SYSTEM_DIMENSIONS, 3, [3-dimensional architecture]) AC_DEFINE(HAVE_BG, 1, [Define to 1 if emulating or running on Blue Gene system]) diff --git a/auxdir/x_ac_databases.m4 b/auxdir/x_ac_databases.m4 index 0779dc9c17666fb78224159e52bd472abb8a8957..e1390db4a4f2b103826e35d2363c52bfd960af13 100644 --- a/auxdir/x_ac_databases.m4 +++ b/auxdir/x_ac_databases.m4 @@ -18,7 +18,7 @@ AC_DEFUN([X_AC_DATABASES], AC_ARG_WITH( [mysql_config], AS_HELP_STRING(--with-mysql_config=PATH, - Specify path to mysql_config binary), + Specify path of directory where mysql_config binary exists), [_x_ac_mysql_bin="$withval"]) if test x$_x_ac_mysql_bin = xno; then diff --git a/configure b/configure index b26eb02380fc3fc668b0bacbd649e89095283a6e..777c6491a3a38c88d243b3d17da5ba95a0772599 100755 --- a/configure +++ b/configure @@ -987,6 +987,7 @@ enable_option_checking enable_silent_rules enable_maintainer_mode with_rpath +enable_bluegene with_db2_dir enable_bluegene_emulation enable_bgl_emulation @@ -1707,6 +1708,9 @@ Optional Features: --enable-maintainer-mode enable make rules and dependencies not useful (and sometimes confusing) to the casual installer + --disable-bluegene Disable Bluegene support for BGAS nodes (or wherever + you run a Slurm on a bluegene system not wanting it + to act like a Bluegene) --enable-bluegene-emulation deprecated use --enable-bgl-emulation --enable-bgl-emulation Run SLURM in BGL mode on a non-bluegene system @@ -1788,7 +1792,8 @@ Optional Packages: --with-freeipmi=PATH Specify path to freeipmi installation --with-rrdtool=PATH Specify path to rrdtool-devel installation --with-mysql_config=PATH - Specify path to mysql_config binary + Specify path of directory where mysql_config binary + exists --with-alps-emulation Run SLURM against an emulated ALPS system - requires option cray.conf [default=no] --with-cray_dir=PATH Specify path to Cray file installation - /opt/cray @@ -3697,6 +3702,30 @@ fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_with_rpath" >&5 $as_echo "$ac_with_rpath" >&6; } + + + ac_real_bluegene_loaded=no + ac_bluegene_loaded=no + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether BG is explicitly disabled" >&5 +$as_echo_n "checking whether BG is explicitly disabled... " >&6; } + # Check whether --enable-bluegene was given. +if test "${enable_bluegene+set}" = set; then : + enableval=$enable_bluegene; case "$enableval" in + yes) ac_bluegene_loaded=no ;; + no) ac_bluegene_loaded=yes ;; + *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: doh!" >&5 +$as_echo "doh!" >&6; } + as_fn_error $? "bad value \"$enableval\" for --disable-bluegene" "$LINENO" 5 ;; + esac + +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${ac_bluegene_loaded=yes}" >&5 +$as_echo "${ac_bluegene_loaded=yes}" >&6; } + + DEPDIR="${am__leading_dot}deps" ac_config_commands="$ac_config_commands depfiles" @@ -4738,9 +4767,6 @@ fi - ac_real_bluegene_loaded=no - ac_bluegene_loaded=no - # Check whether --with-db2-dir was given. if test "${with_db2_dir+set}" = set; then : @@ -4770,7 +4796,10 @@ if test "${enable_bgl_emulation+set}" = set; then : fi - if test "x$bluegene_emulation" = "xyes" -o "x$bgl_emulation" = "xyes"; then + # Skip if already set + if test "x$ac_bluegene_loaded" = "xyes" ; then + bg_default_dirs="" + elif test "x$bluegene_emulation" = "xyes" -o "x$bgl_emulation" = "xyes"; then $as_echo "#define HAVE_3D 1" >>confdefs.h diff --git a/configure.ac b/configure.ac index 1f2f901b7afb67e8b1bee27c5a162888ad8e4514..869a98935a08080fe88e30eeb2704a243a3fa6a2 100644 --- a/configure.ac +++ b/configure.ac @@ -30,6 +30,9 @@ AC_CONFIG_HEADERS([slurm/slurm.h]) dnl This needs to be close to the front to set CFLAGS=-m64 X_AC_RPATH + +X_AC_BG + X_AC_BGL dnl we need to know if this is a bgl in the Makefile.am to do diff --git a/contribs/cray/capmc_resume.c b/contribs/cray/capmc_resume.c index 1e10bb3f3e8881c835a4de2b2cd770c32ebe784b..e61fb63312337a8ff2ed0bda76b66ed686cd248a 100644 --- a/contribs/cray/capmc_resume.c +++ b/contribs/cray/capmc_resume.c @@ -103,6 +103,7 @@ static s_p_options_t knl_conf_file_options[] = { {"CapmcPath", S_P_STRING}, {"CapmcPollFreq", S_P_UINT32}, {"CapmcTimeout", S_P_UINT32}, + {"CnselectPath", S_P_STRING}, {"DefaultMCDRAM", S_P_STRING}, {"DefaultNUMA", S_P_STRING}, {"LogFile", S_P_STRING}, @@ -692,26 +693,24 @@ int main(int argc, char *argv[]) xfree(mcdram_mode); xfree(numa_mode); - /* Wait for all nodes to change state to "on" */ - _wait_all_nodes_on(); - if ((argc == 3) && !syscfg_path) { slurm_init_update_node_msg(&node_msg); node_msg.node_names = argv[1]; node_msg.features_act = argv[2]; rc = slurm_update_node(&node_msg); + if (rc != SLURM_SUCCESS) { + error("%s: slurm_update_node(\'%s\', \'%s\'): %s\n", + prog_name, argv[1], argv[2], + slurm_strerror(slurm_get_errno())); + } } - if (rc == SLURM_SUCCESS) { - exit(0); - } else { - error("%s: slurm_update_node(\'%s\', \'%s\'): %s\n", - prog_name, argv[1], argv[2], - slurm_strerror(slurm_get_errno())); - exit(1); - } + /* Wait for all nodes to change state to "on" */ + _wait_all_nodes_on(); bit_free(node_bitmap); xfree(prog_name); - exit(0); + if (rc == SLURM_SUCCESS) + exit(0); + exit(1); } diff --git a/contribs/cray/capmc_suspend.c b/contribs/cray/capmc_suspend.c index 3161cebaf3724878141bcf687e6c395f688bc90c..330400a0d5b1f2c7d16361bd316d2bffd7563bae 100644 --- a/contribs/cray/capmc_suspend.c +++ b/contribs/cray/capmc_suspend.c @@ -99,6 +99,7 @@ static s_p_options_t knl_conf_file_options[] = { {"CapmcPath", S_P_STRING}, {"CapmcPollFreq", S_P_UINT32}, {"CapmcTimeout", S_P_UINT32}, + {"CnselectPath", S_P_STRING}, {"DefaultMCDRAM", S_P_STRING}, {"DefaultNUMA", S_P_STRING}, {"LogFile", S_P_STRING}, diff --git a/contribs/cray/slurm.conf.template b/contribs/cray/slurm.conf.template index 52e3f66f01efd3606110c08dc63ae768b20e5c97..38ca31e42e4923b381e5a155d902a7d55cacbd81 100644 --- a/contribs/cray/slurm.conf.template +++ b/contribs/cray/slurm.conf.template @@ -17,7 +17,7 @@ AuthType=auth/munge CoreSpecPlugin=cray CryptoType=crypto/munge #DisableRootJobs=NO -#EnforcePartLimits=NO +EnforcePartLimits=any #Epilog= #EpilogSlurmctld= #FirstJobId=1 diff --git a/contribs/lua/job_submit.license.lua b/contribs/lua/job_submit.license.lua index 4a68a6014cb075afca15398539e78b5b2a6b32ab..76a4e471b4ab263a2c8def574d09e714c0cb8ad0 100644 --- a/contribs/lua/job_submit.license.lua +++ b/contribs/lua/job_submit.license.lua @@ -42,6 +42,7 @@ function slurm_job_submit ( job_desc, part_list, submit_uid ) if bad_license_count > 0 then slurm.log_info("slurm_job_submit: for user %u, invalid licenses value: %s", job_desc.user_id, job_desc.licenses) + slurm.log_user("Invalid licenses value: %s", job_desc.licenses) return slurm.ESLURM_INVALID_LICENSES end diff --git a/contribs/seff/seff b/contribs/seff/seff index a2ecff4b8a00906ca6f6429bce378a9259e6e855..3f5d58b78d7db98e32c512c28bdca62008fcdcb8 100755 --- a/contribs/seff/seff +++ b/contribs/seff/seff @@ -10,7 +10,7 @@ use POSIX qw/pow/; use Sys::Hostname; use Slurmdb ':all'; use Slurm ':all'; -use Data::Dumper; +#use Data::Dumper; my $VERSION = "2.1"; diff --git a/contribs/seff/smail.in b/contribs/seff/smail.in index 8a44f828f0ed0dddf5160b3ddfb3b879be7c9716..32fc820fce7d9aaa9b07a601d18d34a0663697f3 100755 --- a/contribs/seff/smail.in +++ b/contribs/seff/smail.in @@ -40,7 +40,7 @@ if [ -n "$status" ]; then # Fork a child so sleep is asynchronous. { sleep 60 - $SEFF $jobid | $MAIL "$1" "${array[0]},${array[1]}" "$3" + $SEFF $jobid | $MAIL "$@" } & else $MAIL "$@" diff --git a/contribs/torque/qstat.pl b/contribs/torque/qstat.pl index a040d6db9e614209738aa288b2b0e9049b8b2701..b108f871814cee01cea3d35ff3a6f47c94cfbe6a 100755 --- a/contribs/torque/qstat.pl +++ b/contribs/torque/qstat.pl @@ -227,8 +227,13 @@ if(defined($queueList)) { print_job_brief($job, $line); $line++; } + $rc = 0; + } + + # return 0 even if no records printed when using -u flag + if (@userIds) { + $rc = 0; } - $rc = 0; } # Exit with status code diff --git a/contribs/torque/qsub.pl b/contribs/torque/qsub.pl index 294dbb385898c46b1d38066801cffefcfc64fa02..84619cbcbef0786e2a13cf4b122c2ac9cb6330bd 100755 --- a/contribs/torque/qsub.pl +++ b/contribs/torque/qsub.pl @@ -50,6 +50,7 @@ use autouse 'Pod::Usage' => qw(pod2usage); use Slurm ':all'; use Switch; use English; +use File::Basename; my ($start_time, $account, @@ -136,7 +137,7 @@ my $script; my $use_job_name = "sbatch"; if ($ARGV[0]) { - $use_job_name = $ARGV[0]; + $use_job_name = basename($ARGV[0]); foreach (@ARGV) { $script .= "$_ "; } @@ -249,19 +250,27 @@ if($interactive) { if (!$join_output) { if ($err_path) { $command .= " -e $err_path"; - } elsif ($job_name) { - $command .= " -e $job_name.e%j"; } else { - $command .= " -e $use_job_name.e%j"; + if ($job_name) { + $command .= " -e $job_name.e%j"; + } else { + $command .= " -e $use_job_name.e%j"; + } + + $command .= ".%a" if $array; } } if ($out_path) { $command .= " -o $out_path"; - } elsif ($job_name) { - $command .= " -o $job_name.o%j"; } else { - $command .= " -o $use_job_name.o%j"; + if ($job_name) { + $command .= " -o $job_name.o%j"; + } else { + $command .= " -o $use_job_name.o%j"; + } + + $command .= ".%a" if $array; } # The job size specification may be within the batch script, @@ -364,6 +373,7 @@ if ($requeue) { if ($script) { if ($wrap && $wrap =~ 'y') { + $command .= " -J $use_job_name" if !$job_name; $command .=" --wrap=\"$script\""; } else { $command .= " $script"; diff --git a/doc/html/cray_alps.shtml b/doc/html/cray_alps.shtml index 6931ad055c551ef9de81d92a69892346e860e4b9..b8283358e21b4fbdf3647b88fc1a42586d4cd42d 100644 --- a/doc/html/cray_alps.shtml +++ b/doc/html/cray_alps.shtml @@ -27,7 +27,7 @@ Use Slurm's <i>sbatch</i> or <i>salloc</i> commands to create a resource allocation in ALPS. Then use ALPS' <i>aprun</i> command to launch parallel jobs within the resource allocation. -The resource allocation is terminated once the the batch script or the +The resource allocation is terminated once the batch script or the <i>salloc</i> command terminates. Slurm includes a <i>launch/aprun</i> plugin that allow users to use <i>srun</i> to wrap <i>aprun</i> and translate <i>srun</i> options diff --git a/doc/html/crypto_plugins.shtml b/doc/html/crypto_plugins.shtml index b020303c0fff28e338d142f5dd58790eb64ec653..e03e8691dae23f53014ab9c081a5135a82815b23 100644 --- a/doc/html/crypto_plugins.shtml +++ b/doc/html/crypto_plugins.shtml @@ -118,7 +118,7 @@ by crypto_read_private_key() or crypto_read_public_key().</p> <p class="commandline">char *crypto_str_error(void);</p> <p style="margin-left:.2in"><b>Description</b>: Return a string -describing the last error generated by the the cryptographic software.</p> +describing the last error generated by the cryptographic software.</p> <p style="margin-left:.2in"><b>Returns</b>: A pointer to a string.</p> <p class="commandline">int crypto_sign (void *key, char *buffer, int buf_size, diff --git a/doc/html/documentation.shtml b/doc/html/documentation.shtml index b113d2b40bf72c60c79877dc4a2387e2fdd53c53..ed25eb9561178538f7acc7db4f4cd3baa7ec3906 100644 --- a/doc/html/documentation.shtml +++ b/doc/html/documentation.shtml @@ -3,7 +3,9 @@ <h1> Documentation</h1> <p><b>NOTE: This documentation is for Slurm version @SLURM_VERSION@.<br> -Documentation for other versions of Slurm is distributed with the code</b></p> +Documentation for older versions of Slurm are distributed with the source, or +may be found in the <a href="http://slurm.schedmd.com/archive/">archive</a>. +</b></p> <p>Also see <a href="tutorials.html">Tutorials</a> and <a href="publications.html">Publications and Presentations</a>.</p> diff --git a/doc/html/download.shtml b/doc/html/download.shtml index 03a5575cbb46ff6cc6367dd745ff207f7dada388..0feeb5d7aaab83eea4616afa2b1c58b42a974033 100644 --- a/doc/html/download.shtml +++ b/doc/html/download.shtml @@ -71,7 +71,7 @@ It may be included directly in a future release of Slurm.</li><br> <li><b>Debuggers</b> and debugging tools</li> <ul> -<li><a href="http://www.totalviewtech.com/"><b>TotalView</b></a> +<li><a href="http://www.roguewave.com/products-services/totalview"><b>TotalView</b></a> is a GUI-based source code debugger well suited for parallel applications.</li> <li><a href="http://padb.pittman.org.uk/"><b>Padb</b></a> is a job inspection tool for examining and debugging parallel programs, primarily it simplifies the process of gathering stack traces but also supports a wide range of other functions. diff --git a/doc/html/faq.shtml b/doc/html/faq.shtml index c53232f08b0f31fb4cc045787fb1a11fa02353a0..8f4e3e81fb558f7f6586c6f3c3781275ead38abb 100644 --- a/doc/html/faq.shtml +++ b/doc/html/faq.shtml @@ -739,7 +739,7 @@ should either be removed or altered (e.g. SLURM_NNODES, SLURM_NODELIST and SLURM_NPROCS). The <i>scontrol</i> command will generate a script that can be executed to reset local environment variables. -You must retain the SLURM_JOBID environment variable in order for the +You must retain the SLURM_JOB_ID environment variable in order for the <i>srun</i> command to gather information about the job's current state and specify the desired node and/or task count in subsequent <i>srun</i> invocations. A new accounting record is generated when a job is resized showing the to have @@ -748,10 +748,10 @@ An example is shown below.</p> <pre> #!/bin/bash srun my_big_job -scontrol update JobId=$SLURM_JOBID NumNodes=2 -. slurm_job_${SLURM_JOBID}_resize.sh +scontrol update JobId=$SLURM_JOB_ID NumNodes=2 +. slurm_job_${SLURM_JOB_ID}_resize.sh srun -N2 my_small_job -rm slurm_job_${SLURM_JOBID}_resize.* +rm slurm_job_${SLURM_JOB_ID}_resize.* </pre> <p><b>Increasing a job's size</b><br> @@ -822,9 +822,9 @@ icrm2 icrm3 icrm4 -$ salloc -N4 --dependency=expand:$SLURM_JOBID bash +$ salloc -N4 --dependency=expand:$SLURM_JOB_ID bash salloc: Granted job allocation 65543 -$ scontrol update jobid=$SLURM_JOBID NumNodes=0 +$ scontrol update jobid=$SLURM_JOB_ID NumNodes=0 To rese. Slurm environment variables, execute For bash or sh shells: . ./slurm_job_65543_resize.sh For csh shells: source ./slurm_job_65543_resize.csh @@ -832,11 +832,11 @@ $ exit exit salloc: Relinquishing job allocation 65543 -$ scontrol update jobid=$SLURM_JOBID NumNodes=ALL +$ scontrol update jobid=$SLURM_JOB_ID NumNodes=ALL To rese. Slurm environment variables, execute For bash or sh shells: . ./slurm_job_65542_resize.sh For csh shells: source ./slurm_job_65542_resize.csh -$ . ./slurm_job_$SLURM_JOBID_resize.sh +$ . ./slurm_job_$SLURM_JOB_ID_resize.sh $ srun hostname icrm1 @@ -2013,14 +2013,14 @@ export PATH=/bin:/home/slurm/linux/bin LOG=/home/slurm/linux/log/logslurmepilog echo "Start `date`" >> $LOG 2>&1 -echo "Job $SLURM_JOBID exitcode $SLURM_JOB_EXIT_CODE2" >> $LOG 2>&1 +echo "Job $SLURM_JOB_ID exitcode $SLURM_JOB_EXIT_CODE2" >> $LOG 2>&1 exitcode=`echo $SLURM_JOB_EXIT_CODE2|awk '{split($0, a, ":"); print a[1]}'` >> $LOG 2>&1 if [ "$exitcode" == "8" ]; then echo "Found REQUEUE_EXIT_CODE: $REQUEUE_EXIT_CODE" >> $LOG 2>&1 - scontrol requeuehold state=SpecialExit $SLURM_JOBID >> $LOG 2>&1 + scontrol requeuehold state=SpecialExit $SLURM_JOB_ID >> $LOG 2>&1 echo $? >> $LOG 2>&1 else - echo "Job $SLURM_JOBID exit all right" >> $LOG 2>&1 + echo "Job $SLURM_JOB_ID exit all right" >> $LOG 2>&1 fi echo "Done `date`" >> $LOG 2>&1 @@ -2166,6 +2166,6 @@ followed by an optional comma separated list of cluster names to operate on.</p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 25 April 2016</p> +<p style="text-align:center;">Last modified 22 June 2016</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/ibm-pe.shtml b/doc/html/ibm-pe.shtml index 2bdc7716d16d6c117f93f1174c60909e087414e8..a472c24a5192a7148e2db29bc79fcc095ce1d702 100644 --- a/doc/html/ibm-pe.shtml +++ b/doc/html/ibm-pe.shtml @@ -334,7 +334,7 @@ note the following environment variables:</p> <h3>Gang Scheduling</h3> <p>Slurm can be configured to gang schedule (time slice) parallel jobs by -alternately suspending and resuming them. Depending upon the the number of +alternately suspending and resuming them. Depending upon the number of jobs configured to time slice and the time slice interval (as specified in the <i>slurm.conf</i> file using the <b>OverSubscribe</b> and <b>SchedulerTimeSlice</b> options), the job may experience communication timeouts. Set the environment @@ -453,7 +453,7 @@ Slurm (without jobs). The procedure is as follows:</p> <h3>Job Scheduling</h3> <p>Slurm can be configured to gang schedule (time slice) parallel jobs by -alternately suspending and resuming them. Depending upon the the number of +alternately suspending and resuming them. Depending upon the number of jobs configured to time slice and the time slice interval (as specified in the <i>slurm.conf</i> file using the <b>OverSubscribe</b> and <b>SchedulerTimeSlice</b> options), the job may experience communication timeouts. Set the environment diff --git a/doc/html/job_array.shtml b/doc/html/job_array.shtml index 6ef79d3430198cf491d4fa7d4d1734f361c76e33..468ad760b3352bd9865b6ff2d8fbc4e3363d627a 100644 --- a/doc/html/job_array.shtml +++ b/doc/html/job_array.shtml @@ -55,20 +55,20 @@ will generate a job array containing three jobs. If the sbatch command responds< then the environment variables will be set as follows:<br> <br> -SLURM_JOBID=36<br> +SLURM_JOB_ID=36<br> SLURM_ARRAY_JOB_ID=36<br> SLURM_ARRAY_TASK_ID=1<br> <br> -SLURM_JOBID=37<br> +SLURM_JOB_ID=37<br> SLURM_ARRAY_JOB_ID=36<br> SLURM_ARRAY_TASK_ID=2<br> <br> -SLURM_JOBID=38<br> +SLURM_JOB_ID=38<br> SLURM_ARRAY_JOB_ID=36<br> SLURM_ARRAY_TASK_ID=3<br> </p> -<p>All Slurm commands and APIs recognize the SLURM_JOBID value. +<p>All Slurm commands and APIs recognize the SLURM_JOB_ID value. Most commands also recognize the SLURM_ARRAY_JOB_ID plus SLURM_ARRAY_TASK_ID values separated by an underscore as identifying an element of a job array. Using the example above, "37" or "36_2" would be equivalent ways to identify @@ -321,6 +321,6 @@ manage large job counts. Each task of the job array will have a unique Slurm "job_id", but all will have the same "array_job_id" value.</p> -<p style="text-align:center;">Last modified 14 August 2014</p> +<p style="text-align:center;">Last modified 22 June 2016</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/mail.shtml b/doc/html/mail.shtml index 60436d02b810ea6698fd1789bebf558d16f8cbac..852169a9a6a04c53cb1bfb28bf7a88129b3ec507 100644 --- a/doc/html/mail.shtml +++ b/doc/html/mail.shtml @@ -16,12 +16,12 @@ Any person subscribed to this mailing list may post to it. user community to share information.</p> <p>You can join either list - by <a href="http://lists.schedmd.com/cgi-bin/dada/mail.cgi/list">subscribing + by <a href="https://lists.schedmd.com/cgi-bin/dada/mail.cgi/list">subscribing here</a>. <p> You can unsubscribe by logging -into <a href="http://lists.schedmd.com/cgi-bin/dada/mail.cgi/profile_login/">your +into <a href="https://lists.schedmd.com/cgi-bin/dada/mail.cgi/profile_login/">your mailing list profile</a>, or email <a href="mailto:unsubscribe@schedmd.com?Subject=Unsubscribe">unsubscribe@schedmd.com</a>. </p> diff --git a/doc/html/maui.shtml b/doc/html/maui.shtml index 76e4ad72e82644808b6555452f92a9330f213096..2d7c8cd59f8370b6f5e24d7d33edf5694d6acd0d 100644 --- a/doc/html/maui.shtml +++ b/doc/html/maui.shtml @@ -136,7 +136,7 @@ This use of this key is essential to insure that a user not build his own program to cancel other user's jobs in Slurm. This should be no more than 32-bit unsigned integer and match -the the encryption key in Maui (<i>--with-key</i> on the +the encryption key in Maui (<i>--with-key</i> on the configure line) or Moab (<i>KEY</i> parameter in the <i>moab-private.cfg</i> file). Note tha. Slurm's wiki plugin does not include a mechanism diff --git a/doc/html/meetings.shtml b/doc/html/meetings.shtml index eeee5e87d4b39bfd0e870d9022731d149a08a727..a8adcfd424da1fb0d693b6c002c1edd22fcf7964 100644 --- a/doc/html/meetings.shtml +++ b/doc/html/meetings.shtml @@ -6,8 +6,8 @@ 26-27 September 2016<br> Athens, Greece<br> Host: <a href="http://www.grnet.gr/">Greek Research and Technology Network (GRNET)</a></p> -<p>More information coming soon.</p> -<!-- <a href="slurm_ug_cfp.html">Call for Abstracts: Due 1 June 2015</a><br> --> +<!-- <p>More information coming soon.</p> --> +<p><a href="slurm_ug_cfp.html">Call for Abstracts: Due 30 June 2016</a></p> <!-- <a href="slurm_ug_agenda.html">Meeting agenda, registration information, etc.</a><br> --> <!-- <a href="slurm_ug_agenda.html#hotels">Meeting hotels</a><br> --> @@ -44,6 +44,6 @@ Host: Bull</p> Paris, France<br> Host: CEA</p> -<p style="text-align:center;">Last modified 31 December 2015</p> +<p style="text-align:center;">Last modified 23 June 2016</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/power_save.shtml b/doc/html/power_save.shtml index 971094b901f9b6bf6ea4e8e41f59a5240aa03f96..662de7df5105f3d181b5fcba141a68b1f9415397 100644 --- a/doc/html/power_save.shtml +++ b/doc/html/power_save.shtml @@ -196,8 +196,8 @@ available).</p> when node zero of the allocation is ready for use and pre-processing can be performed as needed before using <i>srun</i> to launch job steps. Waiting for all nodes to be booted can be accomplished by adding the -command "<i>scontrol wait_job $SLURM_JOBID</i>" within the script or by -adding that command to the the system <i>Prolog</i> or <i>PrologSlurmctld</i> +command "<i>scontrol wait_job $SLURM_JOB_ID</i>" within the script or by +adding that command to the system <i>Prolog</i> or <i>PrologSlurmctld</i> as configured in <i>slurm.conf</i>, which would create the delay for all jobs on the system. Insure that the <i>Prolog</i> code is zero to avoid draining the node @@ -255,6 +255,6 @@ and perform the following actions: <li>Boot the appropriate image for each node</li> </ol> -<p style="text-align:center;">Last modified 15 April 2015</p> +<p style="text-align:center;">Last modified 22 June 2016</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/qos.shtml b/doc/html/qos.shtml index 74b6d6b225fb29207e8b621fdd2954b52341dc7d..905521f35436d4b74a7a0e36aba5371aa161d236 100644 --- a/doc/html/qos.shtml +++ b/doc/html/qos.shtml @@ -121,7 +121,7 @@ override the requested partition's MaxNodes limit. override the requested partition's MinNodes limit. <li><b>OverPartQOS</b> If set, jobs using this QOS will be able to -override any limits used by the the requested partition's QOS limits. +override any limits used by the requested partition's QOS limits. <li><b>PartitionTimeLimit</b> If set, jobs using this QOS will be able to override the requested partition's TimeLimit. diff --git a/doc/html/schedplugins.shtml b/doc/html/schedplugins.shtml index 4fcde0d0453d97bdbf4726e23a98c93a77bc7eaf..492086ebe683ff0fe22118d58f7472037e269978 100644 --- a/doc/html/schedplugins.shtml +++ b/doc/html/schedplugins.shtml @@ -12,7 +12,7 @@ The <b>backfill</b> scheduler let. Slurm establish the initial job priority and can periodically alter job priorities to change their order within the queue. The <b>wiki</b> scheduler establishes an initial priority of zero (held) for all jobs. These jobs only begin execution when the <b>wiki</b> scheduler -explicitly raises the their priority (releasing them). +explicitly raises their priority (releasing them). Developers may use the model that best fits their needs. Note that a separate <a href="selectplugins.html">node selection plugin</a> is available for controlling that aspect of scheduling.</p> diff --git a/doc/html/selectplugins.shtml b/doc/html/selectplugins.shtml index ab5b697c5ab44810def084d1ec4df2d67da1b797..5381727b14a7584e0a4e4f65f6936d98d1e0f275 100644 --- a/doc/html/selectplugins.shtml +++ b/doc/html/selectplugins.shtml @@ -564,6 +564,20 @@ be sent to the job.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, the plugin should return a Slurm error code.</p> + +<p class="commandline">int select_p_job_mem_confirm (struct job_record *job_ptr);</p> +<p style="margin-left:.2in"><b>Description</b>: Confirm that a job's memory +allocation is still valid after a node is restarted. This is an issue if the +job is allocated all of the memory on a node and that node is restarted with a +different memory size than at the time it is allocated to the job. This would +mostly be an issue on an Intel KNL node where the memory size would vary with +the MCDRAM cache mode.</p> +<p style="margin-left:.2in"><b>Arguments</b>:<br> +<span class="commandline"> job_ptr</span> (input) pointer +to the job to be validated.</p> +<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On +failure, the plugin should return a Slurm error code.</p> + <p class="commandline">int select_p_job_suspend (struct job_record *job_ptr, bool indf_susp);</p> <p style="margin-left:.2in"><b>Description</b>: Suspend the specified job. @@ -779,6 +793,6 @@ cnodelist (e.g. on a BGQ it would look something like '[00000x11331]').</br> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 10 December 2015</p> +<p style="text-align:center;">Last modified 11 June 2016</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/slurm_ug_cfp.shtml b/doc/html/slurm_ug_cfp.shtml index 80af201d1ebe69ce898833a390aea560aff6cf50..30ca1da411c96cd67e1e8badee39e3a198773d1a 100644 --- a/doc/html/slurm_ug_cfp.shtml +++ b/doc/html/slurm_ug_cfp.shtml @@ -1,16 +1,17 @@ <!--#include virtual="header.txt"--> <h1>CALL FOR ABSTRACTS</h1> -<p>Slurm User Group Meeting 2015<br> -15-16 September 2015<br> -Washington DC, USA</p> +<p>Slurm User Group Meeting 2016<br> +26-27 September 2016<br> +Athens, Greece</p> <p>You are invited to submit an abstract of a tutorial, technical presentation -or site report to be given at the Slurm User Group Meeting 2015. This event is +or site report to be given at the Slurm User Group Meeting 2016. This event is sponsored and organized by -<a href="http://www.gwu.edu/">The George Washington University</a> and +<a href="http://www.grnet.gr/">Greek Research and Technology Network (GRNET)</a> and <a href="http://www.schedmd.com/">SchedMD</a>. -It will be held in Washington DC, USA on 15-16 September 2015.</p> +It will be held at the Technopolis, 100 Pireos Street in Athens, Greece +on 26-27 September 2016.</p> <p>This international event is opened to everyone who wants to: <ul> @@ -26,23 +27,22 @@ or tutorial about Slurm is invited to send an abstract to <a href="mailto:slugc@schedmd.com">slugc@schedmd.com</a>.</p> <p><b>Important Dates:</b><br> -1 June 2015: Abstracts due<br> -15 June 2015: Notification of acceptance<br> -15-16 September 2015: Slurm User Group Meeting 2015<br> +30 June 2016: Abstracts due<br> +15 July 2016: Notification of acceptance<br> +26-27 September 2016: Slurm User Group Meeting<br> </p> <p><b>Program Committee:</b><br> +Vangelis Floros (GRNET)<br> Yiannis Georgiou (Bull)<br> Brian Gilmer (Cray)<br> Matthieu Hautreux (CEA)<br> -Morris Jette (SchedMD)<br> -Bruce Pfaff (NASA Goddard Space Flight Center)<br> -Tim Wickberg (The George Washington University)</p> +Morris Jette (SchedMD)</p> <!-- <p><a href="slurm_ug_registration.html">Registration information</a></p> --> -<p style="text-align:center;">Last modified 20 March 2015</p> +<p style="text-align:center;">Last modified 5 July 2016</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/team.shtml b/doc/html/team.shtml index 335226d33881ab9e87b07f7435c020d9c372a642..5ba7f3468233899370fbb7c300a67b7698318b0e 100644 --- a/doc/html/team.shtml +++ b/doc/html/team.shtml @@ -87,6 +87,7 @@ Lead Slurm developers are: <li>Chris Dunlap (Lawrence Livermore National Laboratory)</li> <br> <li>Phil Eckert (Lawrence Livermore National Laboratory)</li> +<li>Robbert Eggermont (Delft University of Technology, Netherlands)</li> <li>Joey Ekstrom (Lawrence Livermore National Laboratory/Brigham Young University)</li> <li>Andrew Elwell</li> <li>Josh England (TGS Management Corporation)</li> diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index 7a744304e2e8540b2da4373305e729cf0c4cbc14..2a708a643bd000d56eab34fa1d2d0eb9dd172a03 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -189,7 +189,7 @@ in\-turn update the underlying associations. .TP \fIcluster\fP The \fIClusterName\fR parameter in the \fIslurm.conf\fR configuration -file, used to differentiate accounts from on different machines. +file, used to differentiate accounts on different machines. .TP \fIconfiguration\fP @@ -719,11 +719,6 @@ controller is placed here. When a slurmctld registers with the database the port the controller is listening on is placed here. -.TP -\fITRES\fP -Trackable RESources (BB (Burst buffer), CPU, Energy, GRES, License, Memory, and -Node) this cluster is accounting for. - .TP \fIFlags\fP Attributes possessed by the cluster. @@ -744,6 +739,12 @@ The numeric value of the select plugin the cluster is using. \fIRPC\fP When a slurmctld registers with the database the rpc version the controller is running is placed here. + +.TP +\fITRES\fP +Trackable RESources (BB (Burst buffer), CPU, Energy, GRES, License, Memory, and +Node) this cluster is accounting for. + .P NOTE: You can also view the information about the root association for the cluster. The Association format fields are described @@ -851,10 +852,6 @@ The name of the cluster event happened on. \fIClusterNodes\fP The hostlist of nodes on a cluster in a cluster event. -.TP -\fITRES\fP -Number of TRES involved with the event. - .TP \fIDuration\fP Time period the event was around for. @@ -892,6 +889,10 @@ On a node event this is the formatted state of the node during the event. On a node event this is the numeric value of the state of the node during the event. +.TP +\fITRES\fP +Number of TRES involved with the event. + .TP \fIUser\fP On a node event this is the user who caused the event to happen. @@ -967,7 +968,7 @@ override the requested partition's MinNodes limit. .TP \fIOverPartQOS\fP If set jobs using this QOS will be able to -override any limits used by the the requested partition's QOS limits. +override any limits used by the requested partition's QOS limits. .TP \fIPartitionTimeLimit\fP If set jobs using this QOS will be able to diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 548e63d8d071329ede4418c3ebd039cbe0ad9f98..1ee90a10d9493550a0a41ee4e3f9f64ba94ad71f 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -21,7 +21,7 @@ section). If no command is specified, then the value of \fBSallocDefaultCommand\fR is not set, then \fBsalloc\fR runs the user's default shell. -The following document describes the the influence of various options on the +The following document describes the influence of various options on the allocation of cpus to jobs and tasks. .br http://slurm.schedmd.com/cpu_management.html @@ -548,7 +548,7 @@ name is the name of the "command" specified on the command line. .TP \fB\-\-jobid\fR=<\fIjobid\fR> Allocate resources as the specified job id. -NOTE: Only valid for user root. +NOTE: Only valid for users root and SlurmUser. .TP \fB\-K\fR, \fB\-\-kill\-command\fR[=\fIsignal\fR] @@ -717,8 +717,13 @@ This parameter would generally be used if whole nodes are allocated to jobs (\fBSelectType=select/linear\fR). Also see \fB\-\-mem\-per\-cpu\fR. \fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. -NOTE: A memory size specification is treated as a special case and grants -the job access to all of the memory on each node. + +NOTE: A memory size specification of zero is treated as a special case and +grants the job access to all of the memory on each node. +If the job is allocated multiple nodes in a heterogeneous cluster, the memory +limit on each node will be that of the node in the allocation with the smallest +memory size (same limit will apply to every node in the job's allocation). + NOTE: Enforcement of memory limits currently relies upon the task/cgroup plugin or enabling of accounting, which samples memory use on a periodic basis (data need not be stored, just collected). In both cases memory use is based upon diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 29591e32ebdb76a5840d216d589b7e6a3d6cdd6b..0f5e0d088c9f4f26de4d2467e60cd577612087ba 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -651,7 +651,7 @@ read on sbatch's standard input. .TP \fB\-\-jobid\fR=<\fIjobid\fR> Allocate resources as the specified job id. -NOTE: Only valid for user root. +NOTE: Only valid for users root and SlurmUser. .TP \fB\-k\fR, \fB\-\-no\-kill\fR @@ -827,8 +827,13 @@ This parameter would generally be used if whole nodes are allocated to jobs (\fBSelectType=select/linear\fR). Also see \fB\-\-mem\-per\-cpu\fR. \fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. -NOTE: A memory size specification is treated as a special case and grants -the job access to all of the memory on each node. + +NOTE: A memory size specification of zero is treated as a special case and +grants the job access to all of the memory on each node. +If the job is allocated multiple nodes in a heterogeneous cluster, the memory +limit on each node will be that of the node in the allocation with the smallest +memory size (same limit will apply to every node in the job's allocation). + NOTE: Enforcement of memory limits currently relies upon the task/cgroup plugin or enabling of accounting, which samples memory use on a periodic basis (data need not be stored, just collected). In both cases memory use is based upon diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index f1d45df0172ebc25f1534634ce397f4d3f0fc7bf..612d7a2c5ddceb27b9e6660bbc73b0750bc822a7 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -986,7 +986,7 @@ value. Also see \fIAvailableFeatures\fP. Typically \fIActiveFeatures\fP will be identical to \fIAvailableFeatures\fP; however \fIActiveFeatures\fP may be configured as a subset of the -\fIAvailableFeatures\fP. For example, a node may be booted in mulitple +\fIAvailableFeatures\fP. For example, a node may be booted in multiple configurations. In that case, all possible configurations may be identified as \fIAvailableFeatures\fP, while \fIActiveFeatures\fP would identify the current node configuration. diff --git a/doc/man/man1/sdiag.1 b/doc/man/man1/sdiag.1 index 42dd620f8220de923aaaa152020f22f6bc05ec71..58832bd7f3fb6a6ff17816a6f2fd36e9fffd107b 100644 --- a/doc/man/man1/sdiag.1 +++ b/doc/man/man1/sdiag.1 @@ -159,21 +159,34 @@ jobs are which makes the backfilling algorithm heavier. .TP \fBDepth Mean\fR Mean of processed jobs during backfilling scheduling cycles since last reset. +Jobs which are found to be ineligible to run when examined by the backfill +scheduler are not counted (e.g. jobs submitted to multiple partitions and +already started, jobs which have reached a QOS or account limit such as +maximum running jobs for an account, etc). .TP \fBDepth Mean (try sched)\fR Mean of processed jobs during backfilling scheduling cycles since last reset. -It counts only processes with a chance to run waiting for available resources. -These jobs are which makes the backfilling algorithm heavier. +Jobs which are found to be ineligible to run when examined by the backfill +scheduler are not counted (e.g. jobs submitted to multiple partitions and +already started, jobs which have reached a QOS or account limit such as +maximum running jobs for an account, etc). .TP \fBLast queue length\fR -Number of jobs pending to be processed by backfilling algorithm. A job appears -as much times as partitions it requested. +Number of jobs pending to be processed by backfilling algorithm. +A job once for each partition it requested. +A pending job array will normally be counted as one job (tasks of a job array +which have already been started/requeued or individually modified will already +have individual job records and are each counted as a separate job). .TP \fBQueue length Mean\fR Mean of jobs pending to be processed by backfilling algorithm. +A job once for each partition it requested. +A pending job array will normally be counted as one job (tasks of a job array +which have already been started/requeued or individually modified will already +have individual job records and are each counted as a separate job). .LP The fourth and fifth blocks of information report the most frequently issued diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1 index c28c9cd8ea32658800d94c58cf3c2993cc260a5a..8186484bac8b5f58620836410cee9527843912ac 100644 --- a/doc/man/man1/sinfo.1 +++ b/doc/man/man1/sinfo.1 @@ -79,7 +79,7 @@ Don't convert units from their original type (e.g. 2048M won't be converted to .TP \fB\-N\fR, \fB\-\-Node\fR -Print information in a node\-oriented format. +Print information in a node\-oriented format with one line per node. The default is to print information in a partition\-oriented format. This is ignored if the \fB\-\-format\fR option is specified. diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1 index 2f56b2681d3cd30c644dec2a2f6a45f1d0fc4c86..5a9c6f80310b72deb8861a20da08f4d7c32cb85c 100644 --- a/doc/man/man1/squeue.1 +++ b/doc/man/man1/squeue.1 @@ -157,7 +157,7 @@ Generic resources (gres) required by the job or step. .TP \fB%B\fR Executing (batch) host. For an allocated session, this is the host on which -the session is executing (i.e. the node from which the the \fBsrun\fR or the +the session is executing (i.e. the node from which the \fBsrun\fR or the \fBsalloc\fR command was executed). For a batch job, this is the node executing the batch script. In the case of a typical Linux cluster, this would be the compute node zero of the allocation. In the case of a BlueGene or a Cray @@ -184,7 +184,7 @@ Minimum size of temporary disk space (in MB) requested by the job. Number of nodes allocated to the job or the minimum number of nodes required by a pending job. The actual number of nodes allocated to a pending job may exceed this number if the job specified a node range count (e.g. -minimum and maximum node counts) or the the job specifies a processor +minimum and maximum node counts) or the job specifies a processor count instead of a node count and the cluster contains nodes with varying processor counts. As a job is completing this number will reflect the current number of nodes allocated. diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index baeff438e47863cccf877280e29e15c323b441e7..26ef41a5db19e75915658d0f571ac918c58e180d 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -10,7 +10,7 @@ srun \- Run parallel jobs Run a parallel job on cluster managed by Slurm. If necessary, srun will first create a resource allocation in which to run the parallel job. -The following document describes the the influence of various options on the +The following document describes the influence of various options on the allocation of cpus to jobs and tasks. .br http://slurm.schedmd.com/cpu_management.html @@ -35,6 +35,8 @@ Bind each task to NICs which are closest to the allocated CPUs. .TP \fBv\fR Verbose mode. Log how tasks are bound to GPU and NIC devices. +.TP +This option applies to job allocations. .RE .TP @@ -42,7 +44,7 @@ Verbose mode. Log how tasks are bound to GPU and NIC devices. Charge resources used by this job to specified account. The \fIaccount\fR is an arbitrary string. The account name may be changed after job submission using the \fBscontrol\fR -command. +command. This option applies to job allocations. .TP \fB\-\-acctg\-freq\fR @@ -96,7 +98,7 @@ interference with the job). .br Smaller (non\-zero) values have a greater impact upon job performance, but a value of 30 seconds is not likely to be noticeable for -applications having less than 10,000 tasks. +applications having less than 10,000 tasks. This option applies job allocations. .RE .TP @@ -126,18 +128,19 @@ CR_Core, CR_Core_Memory, CR_Socket, or CR_Socket_Memory for this option to be honored. This option is not supported on BlueGene systems (select/bluegene plugin is configured). -If not specified, the scontrol show job will display 'ReqS:C:T=*:*:*'. +If not specified, the scontrol show job will display 'ReqS:C:T=*:*:*'. This +option applies to job allocations. .TP \fB\-\-bb\fR=<\fIspec\fR> Burst buffer specification. The form of the specification is system dependent. -Also see \fB\-\-bbf\fR. +Also see \fB\-\-bbf\fR. This option applies to job allocations. .TP \fB\-\-bbf\fR=<\fIfile_name\fR> Path of file containing burst buffer specification. The form of the specification is system dependent. -Also see \fB\-\-bb\fR. +Also see \fB\-\-bb\fR. This option applies to job allocations. .TP \fB\-\-bcast\fR[=<\fIdest_path\fR>] @@ -147,7 +150,8 @@ file path. If no path is specified, copy the file to a file named "slurm_bcast_<job_id>.<step_id>" in the current working. For example, "srun \-\-bcast=/tmp/mine \-N3 a.out" will copy the file "a.out" from your current directory to the file "/tmp/mine" on each of the three -allocated compute nodes and execute that file. +allocated compute nodes and execute that file. This option applies to step +allocations. .TP \fB\-\-begin\fR=<\fItime\fR> @@ -190,6 +194,8 @@ Slurm scheduler (e.g., 60 seconds with the default sched/builtin). \- If a date is specified without a year (e.g., MM/DD) then the current year is assumed, unless the combination of MM/DD and HH:MM:SS has already passed for that year, in which case the next year is used. +.br +This option applies to job allocations. .RE .TP @@ -198,7 +204,7 @@ Specifies the interval between creating checkpoints of the job step. By default, the job step will have no checkpoints created. Acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and -"days\-hours:minutes:seconds". +"days\-hours:minutes:seconds". This option applies to job and step allocations. .TP \fB\-\-checkpoint\-dir\fR=<\fIdirectory\fR> @@ -206,11 +212,12 @@ Specifies the directory into which the job or job step's checkpoint should be written (used by the checkpoint/blcr and checkpoint/xlch plugins only). The default value is the current working directory. Checkpoint files will be of the form "<job_id>.ckpt" for jobs -and "<job_id>.<step_id>.ckpt" for job steps. +and "<job_id>.<step_id>.ckpt" for job steps. This option applies to job and +step allocations. .TP \fB\-\-comment\fR=<\fIstring\fR> -An arbitrary comment. +An arbitrary comment. This option applies to job allocations. .TP \fB\-\-compress\fR[=\fItype\fR] @@ -218,7 +225,8 @@ Compress file before sending it to compute hosts. The optional argument specifies the data compression library to be used. Supported values are "lz4" (default) and "zlib". Some compression libraries may be unavailable on some systems. -For use with the \fB\-\-bcast\fR option. +For use with the \fB\-\-bcast\fR option. This option applies to step +allocations. .TP \fB\-C\fR, \fB\-\-constraint\fR=<\fIlist\fR> @@ -274,19 +282,22 @@ of "rack1" and four nodes must be allocated from nodes with the feature \fBWARNING\fR: When srun is executed from within salloc or sbatch, the constraint value can only contain a single feature name. None of the other operators are currently supported for job steps. +.br +This option applies to job and step allocations. .TP \fB\-\-contiguous\fR If set, then the allocated nodes must form a contiguous set. Not honored with the \fBtopology/tree\fR or \fBtopology/3d_torus\fR -plugins, both of which can modify the node ordering. -Not honored for a job step's allocation. +plugins, both of which can modify the node ordering. This option applies to job +allocations. .TP \fB\-\-cores\-per\-socket\fR=<\fIcores\fR> Restrict node selection to nodes with at least the specified number of cores per socket. See additional information under \fB\-B\fR option -above when task/affinity plugin is enabled. +above when task/affinity plugin is enabled. This option applies to job +allocations. .TP \fB\-\-cpu_bind\fR=[{\fIquiet,verbose\fR},]\fItype\fR @@ -431,6 +442,8 @@ task/cgroup plugin only. .B help Show help message for cpu_bind .RE +.TP +This option applies to job and step allocations. .RE .TP @@ -532,6 +545,8 @@ with linuxproc as the ProctrackType can cause jobs to run too quickly before Accounting is able to poll for job information. As a result not all of accounting information will be present. +This option applies to job and step allocations. + .RE .TP @@ -561,6 +576,8 @@ threads per CPU for a total of two tasks. there are configurations and options which can result in inconsistent allocations when \-c has a value greater than \-c on salloc or sbatch. +This option applies to job allocations. + .TP \fB\-\-deadline\fR=<\fIOPT\fR> remove the job if no ending is possible before @@ -575,6 +592,8 @@ MM/DD[/YY]\-HH:MM[:SS] .br YYYY\-MM\-DD[THH:MM[:SS]]] +This option applies to job allocations. + .TP \fB\-d\fR, \fB\-\-dependency\fR=<\fIdependency_list\fR> Defer the start of this job until the specified dependencies have been @@ -590,7 +609,8 @@ different users. The value may be changed after job submission using the scontrol command. Once a job dependency fails due to the termination state of a preceding job, the dependent job will never be run, even if the preceding job is requeued and -has a different termination state in a subsequent execution. +has a different termination state in a subsequent execution. This option applies +to job allocations. .PD .RS .TP @@ -629,7 +649,8 @@ sharing the same job name and user have terminated. Have the remote processes do a chdir to \fIpath\fR before beginning execution. The default is to chdir to the current working directory of the \fBsrun\fR process. The path can be specified as full path or -relative path to the directory where the command is executed. +relative path to the directory where the command is executed. This +option applies to job allocations. .TP \fB\-e\fR, \fB\-\-error\fR=<\fImode\fR> @@ -639,13 +660,14 @@ redirects stderr to the same file as stdout, if one is specified. The \fB\-\-error\fR option is provided to allow stdout and stderr to be redirected to different locations. See \fBIO Redirection\fR below for more options. -If the specified file already exists, it will be overwritten. +If the specified file already exists, it will be overwritten. This option +applies to job and step allocations. .TP \fB\-E\fR, \fB\-\-preserve-env\fR Pass the current values of environment variables SLURM_NNODES and SLURM_NTASKS through to the \fIexecutable\fR, rather than computing them -from commandline parameters. +from commandline parameters. This option applies to job allocations. .TP \fB\-\-epilog\fR=<\fIexecutable\fR> @@ -654,13 +676,13 @@ The command line arguments for \fIexecutable\fR will be the command and arguments of the job step. If \fIexecutable\fR is "none", then no srun epilog will be run. This parameter overrides the SrunEpilog parameter in slurm.conf. This parameter is completely independent from -the Epilog parameter in slurm.conf. +the Epilog parameter in slurm.conf. This option applies to job allocations. .TP \fB\-\-exclusive[=user|mcs]\fR -This option has two slightly different meanings for job and job step -allocations. +This option applies to job and job step allocations, and has two slightly +different meanings for each one. When used to initiate a job, the job allocation cannot share nodes with other running jobs (or just other users with the "=user" option or "=mcs" option). The default shared/exclusive behavior depends on system configuration and the @@ -703,13 +725,14 @@ By default all environment variables are propagated. With "\-\-export=NONE" no environment variables will be propagated unless explicitly listed (e.g., "\-\-export=NONE,PATH=/bin,SHELL=/bin/bash"). Regardless of this setting, the appropriate "SLURM_*" task environment variables are always exported to the -environment. +environment. This option applies to job allocations. .TP \fB\-\-gid\fR=<\fIgroup\fR> If \fBsrun\fR is run as root, and the \fB\-\-gid\fR option is used, submit the job with \fIgroup\fR's group access permissions. \fIgroup\fR -may be the group name or the numerical group ID. +may be the group name or the numerical group ID. This option applies to +job allocations. .TP \fB\-\-gres\fR=<\fIlist\fR> @@ -724,11 +747,12 @@ A list of available generic consumable resources will be printed and the command will exit if the option argument is "help". Examples of use include "\-\-gres=gpu:2,mic=1", "\-\-gres=gpu:kepler:2", and "\-\-gres=help". -NOTE: By default, a job step is allocated all of the generic resources that -have allocated to the job. To change the behavior so that each job step is -allocated no generic resources, explicitly set the value of \-\-gres to specify -zero counts for each generic resource OR set "\-\-gres=none" OR set the -SLURM_STEP_GRES environment variable to "none". +NOTE: This option applies to job and step allocations. By default, a job step +is allocated all of the generic resources that have allocated to the job. +To change the behavior so that each job step is allocated no generic resources, +explicitly set the value of \-\-gres to specify zero counts for each generic +resource OR set "\-\-gres=none" OR set the SLURM_STEP_GRES environment variable +to "none". .TP \fB\-\-gres\-flags\fR=enforce\-binding @@ -741,13 +765,14 @@ sockets, however the application performance may be improved due to improved communication speed. Requires the node to be configured with more than one socket and resource filtering will be performed on a per\-socket basis. -Applies to job allocation only. +This option applies to job allocations. .TP \fB\-H, \-\-hold\fR Specify the job is to be submitted in a held state (priority of zero). A held job can now be released using scontrol to reset its priority -(e.g. "\fIscontrol release <job_id>\fR"). +(e.g. "\fIscontrol release <job_id>\fR"). This option applies to job +allocations. .TP \fB\-h\fR, \fB\-\-help\fR @@ -773,6 +798,8 @@ Only supported with the task/affinity plugin. .TP .B help show this help message +.TP +This option applies to job allocations. .RE .TP @@ -785,7 +812,8 @@ By default, \fB\-\-immediate\fR is off, and the command will block until resources become available. Since this option's argument is optional, for proper parsing the single letter option must be followed immediately with the value and not include a -space between them. For example "\-I60" and not "\-I 60". +space between them. For example "\-I60" and not "\-I 60". This option applies +to job and step allocations. .TP \fB\-i\fR, \fB\-\-input\fR=<\fImode\fR> @@ -794,7 +822,7 @@ Specify how stdin is to redirected. By default, redirects stdin from the terminal all tasks. See \fBIO Redirection\fR below for more options. For OS X, the poll() function does not support stdin, so input from -a terminal is not possible. +a terminal is not possible. This option applies to job and step allocations. .TP \fB\-J\fR, \fB\-\-job\-name\fR=<\fIjobname\fR> @@ -804,13 +832,15 @@ is the supplied \fBexecutable\fR program's name. NOTE: This information may be written to the slurm_jobacct.log file. This file is space delimited so if a space is used in the \fIjobname\fR name it will cause problems in properly displaying the contents of the slurm_jobacct.log file when the -\fBsacct\fR command is used. +\fBsacct\fR command is used. This option applies to job and step allocations. .TP \fB\-\-jobid\fR=<\fIjobid\fR> Initiate a job step under an already allocated job with job id \fIid\fR. Using this option will cause \fBsrun\fR to behave exactly as if the -SLURM_JOB_ID environment variable was set. +SLURM_JOB_ID environment variable was set. This option applies to job and step +allocations. +NOTE: For job allocations, this is only valid for users root and SlurmUser. .TP \fB\-K\fR, \fB\-\-kill\-on\-bad\-exit\fR[=0|1] @@ -824,13 +854,13 @@ Note: This option takes precedence over the \fB\-W\fR, \fB\-\-wait\fR option to terminate the job immediately if a task exits with a non\-zero exit code. Since this option's argument is optional, for proper parsing the single letter option must be followed immediately with the value and -not include a space between them. For example "\-K1" and not "\-K 1". +not include a space between them. For example "\-K1" and not "\-K 1". This +option applies to job allocations. .TP \fB\-k\fR, \fB\-\-no\-kill\fR Do not automatically terminate a job if one of the nodes it has been -allocated fails. This option is only recognized on a job allocation, -not for the submission of individual job steps. +allocated fails. This option applies to job and step allocations. The job will assume all responsibilities for fault\-tolerance. Tasks launch using this option will not be considered terminated (e.g. \fB\-K\fR, \fB\-\-kill\-on\-bad\-exit\fR and @@ -843,18 +873,18 @@ The default action is to terminate the job upon node failure. \fB\-\-launch-cmd\fR Print external launch command instead of running job normally through Slurm. This option is only valid if using something other than the -\fIlaunch/slurm\fR plugin. +\fIlaunch/slurm\fR plugin. This option applies to step allocations. .TP \fB\-\-launcher\-opts\fR=<\fIoptions\fR> Options for the external launcher if using something other than the -\fIlaunch/slurm\fR plugin. +\fIlaunch/slurm\fR plugin. This option applies to step allocations. .TP \fB\-l\fR, \fB\-\-label\fR Prepend task number to lines of stdout/err. The \fB\-\-label\fR option will prepend lines of output with the remote -task id. +task id. This option applies to step allocations. .TP \fB\-L\fR, \fB\-\-licenses\fR=<\fBlicense\fR> @@ -863,7 +893,7 @@ nodes of the cluster) which must be allocated to this job. License names can be followed by a colon and count (the default count is one). Multiple license names should be comma separated (e.g. -"\-\-licenses=foo:4,bar"). +"\-\-licenses=foo:4,bar"). This option applies to job allocations. .TP .na @@ -1012,6 +1042,8 @@ Rather than packing a job step's tasks as tightly as possible on the nodes, distribute them evenly. This user option will supersede the SelectTypeParameters CR_Pack_Nodes configuration parameter. +.TP +This option applies to job and step allocations. .RE .TP @@ -1023,19 +1055,22 @@ and teardown completed), TIME_LIMIT, TIME_LIMIT_90 (reached 90 percent of time l TIME_LIMIT_80 (reached 80 percent of time limit), and TIME_LIMIT_50 (reached 50 percent of time limit). Multiple \fItype\fR values may be specified in a comma separated list. -The user to be notified is indicated with \fB\-\-mail\-user\fR. +The user to be notified is indicated with \fB\-\-mail\-user\fR. This option +applies to job allocations. .TP \fB\-\-mail\-user\fR=<\fIuser\fR> User to receive email notification of state changes as defined by \fB\-\-mail\-type\fR. -The default value is the submitting user. +The default value is the submitting user. This option applies to job +allocations. .TP \fB\-\-mcs\-label\fR=<\fImcs\fR> Used only when the mcs/group plugin is enabled. This parameter is a group among the groups of the user. -Default value is calculated by the Plugin mcs if it's enabled. +Default value is calculated by the Plugin mcs if it's enabled. This option +applies to job allocations. .TP \fB\-\-mem\fR=<\fIMB\fR> @@ -1050,14 +1085,21 @@ to the amount of memory allocated to the job, but not remove any of the job's memory allocation from being available to other job steps. Also see \fB\-\-mem\-per\-cpu\fR. \fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. -NOTE: A memory size specification is treated as a special case and grants -the job access to all of the memory on each node. + +NOTE: A memory size specification of zero is treated as a special case and +grants the job access to all of the memory on each node. +If the job is allocated multiple nodes in a heterogeneous cluster, the memory +limit on each node will be that of the node in the allocation with the smallest +memory size (same limit will apply to every node in the job's allocation). + NOTE: Enforcement of memory limits currently relies upon the task/cgroup plugin or enabling of accounting, which samples memory use on a periodic basis (data need not be stored, just collected). In both cases memory use is based upon the job's Resident Set Size (RSS). A task may exceed the memory limit until the next periodic accounting sample. +This option applies to job and step allocations. + .TP \fB\-\-mem\-per\-cpu\fR=<\fIMB\fR> Minimum memory required per allocated CPU in MegaBytes. @@ -1080,7 +1122,8 @@ Specifying a memory limit of zero for a job step will restrict the job step to the amount of memory allocated to the job, but not remove any of the job's memory allocation from being available to other job steps. Also see \fB\-\-mem\fR. -\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. This option +applies to job and step allocations. .TP \fB\-\-mem_bind\fR=[{\fIquiet,verbose\fR},]\fItype\fR @@ -1146,17 +1189,21 @@ with [0-9] so they are seen as numerical values by srun. .TP .B help show this help message +.TP +This option applies to job and step allocations. .RE .TP \fB\-\-mincpus\fR=<\fIn\fR> -Specify a minimum number of logical cpus/processors per node. +Specify a minimum number of logical cpus/processors per node. This option +applies to job allocations. .TP \fB\-\-msg\-timeout\fR=<\fIseconds\fR> Modify the job launch message timeout. The default value is \fBMessageTimeout\fR in the Slurm configuration file slurm.conf. Changes to this are typically not recommended, but could be useful to diagnose problems. +This option applies to job allocations. .TP \fB\-\-mpi\fR=<\fImpi_type\fR> @@ -1211,6 +1258,8 @@ force "libpmix" usage. .B none No special MPI processing. This is the default and works with many other versions of MPI. +.TP +This option applies to step allocations. .RE .TP @@ -1219,7 +1268,8 @@ Run a job with different programs and different arguments for each task. In this case, the executable program specified is actually a configuration file specifying the executable and arguments for each task. See \fBMULTIPLE PROGRAM CONFIGURATION\fR -below for details on the configuration file contents. +below for details on the configuration file contents. This option applies to +step allocations. .TP \fB\-N\fR, \fB\-\-nodes\fR=<\fIminnodes\fR[\-\fImaxnodes\fR]> @@ -1245,14 +1295,15 @@ The job will be allocated as many nodes as possible within the range specified and without delaying the initiation of the job. The node count specification may include a numeric value followed by a suffix of "k" (multiplies numeric value by 1,024) or "m" (multiplies numeric value by -1,048,576). +1,048,576). This option applies to job and step allocations. .TP \fB\-n\fR, \fB\-\-ntasks\fR=<\fInumber\fR> Specify the number of tasks to run. Request that \fBsrun\fR allocate resources for \fIntasks\fR tasks. The default is one task per node, but note -that the \fB\-\-cpus\-per\-task\fR option will change this default. +that the \fB\-\-cpus\-per\-task\fR option will change this default. This option +applies to job and step allocations. .TP \fB\-\-network\fR=<\fItype\fR> @@ -1398,6 +1449,8 @@ connections are established for each protocol (LAPI and MPI) and each task. If there are two networks and four tasks on the node then a total of 32 connections are established (2 instances x 2 protocols x 2 networks x 4 tasks). +.TP +This option applies to job and step allocations. .RE .TP @@ -1408,7 +1461,7 @@ by 100. The adjustment range is from \-10000 (highest priority) to 10000 (lowest priority). Only privileged users can specify a negative adjustment. NOTE: This option is presently ignored if \fISchedulerType=sched/wiki\fR or -\fISchedulerType=sched/wiki2\fR. +\fISchedulerType=sched/wiki2\fR. This option applies to job allocations. .TP \fB\-\-ntasks\-per\-core\fR=<\fIntasks\fR> @@ -1421,7 +1474,8 @@ to bind the tasks to specific core unless \fB\-\-cpu_bind=none\fR is specified. NOTE: This option is not supported unless \fISelectTypeParameters=CR_Core\fR or -\fISelectTypeParameters=CR_Core_Memory\fR is configured. +\fISelectTypeParameters=CR_Core_Memory\fR is configured. This option applies to +job allocations. .TP \fB\-\-ntasks\-per\-node\fR=<\fIntasks\fR> @@ -1439,7 +1493,8 @@ a hybrid MPI/OpenMP app where only one MPI "task/rank" should be assigned to each node while allowing the OpenMP portion to utilize all of the parallelism present in the node, or submitting a single setup/cleanup/monitoring job to each node of a pre\-existing -allocation as one step in a larger job script. +allocation as one step in a larger job script. This option applies to job +allocations. .TP \fB\-\-ntasks\-per\-socket\fR=<\fIntasks\fR> @@ -1452,11 +1507,12 @@ to bind the tasks to specific sockets unless \fB\-\-cpu_bind=none\fR is specified. NOTE: This option is not supported unless \fISelectTypeParameters=CR_Socket\fR or -\fISelectTypeParameters=CR_Socket_Memory\fR is configured. +\fISelectTypeParameters=CR_Socket_Memory\fR is configured. This option applies +to job allocations. .TP \fB\-O\fR, \fB\-\-overcommit\fR -Overcommit resources. +Overcommit resources. This option applies to job and step allocations. When applied to job allocation, only one CPU is allocated to the job per node and options used to specify the number of tasks per node, socket, core, etc. are ignored. @@ -1482,13 +1538,14 @@ If the specified file already exists, it will be overwritten. .br If \fB\-\-error\fR is not also specified on the command line, both -stdout and stderr will directed to the file specified by \fB\-\-output\fR. +stdout and stderr will directed to the file specified by \fB\-\-output\fR. This +option applies to job and step allocations. .TP \fB\-\-open\-mode\fR=<\fIappend|truncate\fR> Open the output and error files using append or truncate mode as specified. The default value is specified by the system configuration parameter -\fIJobFileAppend\fR. +\fIJobFileAppend\fR. This option applies to job allocations. .TP \fB\-p\fR, \fB\-\-partition\fR=<\fIpartition_names\fR> @@ -1499,7 +1556,7 @@ than one partition, specify their names in a comma separate list and the one offering earliest initiation will be used with no regard given to the partition name ordering (although higher priority partitions will be considered first). When the job is initiated, the name of the partition used will be placed first -in the job record partition string. +in the job record partition string. This option applies to job allocations. .TP \fB\-\-power\fR=<\fIflags\fR> @@ -1507,12 +1564,14 @@ Comma separated list of power management plugin options. Currently available flags include: level (all nodes allocated to the job should have identical power caps, may be disabled by the Slurm configuration option PowerParameters=job_no_level). +This option applies to job allocations. .TP \fB\-\-priority\fR=<value> Request a specific job priority. May be subject to configuration specific constraints. -Only Slurm operators and administrators can set the priority of a job. +Only Slurm operators and administrators can set the priority of a job. This +option applies to job allocations. .TP \fB\-\-profile\fR=<all|none|[energy[,|task[,|filesystem[,|network]]]]> @@ -1545,6 +1604,9 @@ Filesystem data is collected. .TP \fBNetwork\fR Network (InfiniBand) data is collected. + +.TP +This option applies to job and step allocations. .RE .TP @@ -1554,7 +1616,7 @@ The command line arguments for \fIexecutable\fR will be the command and arguments of the job step. If \fIexecutable\fR is "none", then no srun prolog will be run. This parameter overrides the SrunProlog parameter in slurm.conf. This parameter is completely independent from -the Prolog parameter in slurm.conf. +the Prolog parameter in slurm.conf. This option applies to job allocations. .TP \fB\-\-propagate\fR[=\fIrlimits\fR] @@ -1600,6 +1662,8 @@ The maximum resident set size .TP \fBSTACK\fR The maximum stack size +.TP +This option applies to job allocations. .RE .TP @@ -1610,18 +1674,20 @@ Implicitly sets \fB\-\-error\fR and \fB\-\-output\fR to /dev/null for all tasks except task zero, which may cause those tasks to exit immediately (e.g. shells will typically exit immediately in that situation). -Not currently supported on AIX platforms. +Not currently supported on AIX platforms. This option applies to step +allocations. .TP \fB\-Q\fR, \fB\-\-quiet\fR -Suppress informational messages from srun. Errors will still be displayed. +Suppress informational messages from srun. Errors will still be displayed. This +option applies to job and step allocations. .TP \fB\-q\fR, \fB\-\-quit\-on\-interrupt\fR Quit immediately on single SIGINT (Ctrl\-C). Use of this option disables the status feature normally available when \fBsrun\fR receives a single Ctrl\-C and causes \fBsrun\fR to instead immediately terminate the -running job. +running job. This option applies to step allocations. .TP \fB\-\-qos\fR=<\fIqos\fR> @@ -1629,7 +1695,7 @@ Request a quality of service for the job. QOS values can be defined for each user/cluster/account association in the Slurm database. Users will be limited to their association's defined set of qos's when the Slurm configuration parameter, AccountingStorageEnforce, includes -"qos" in it's definition. +"qos" in it's definition. This option applies to job allocations. .TP \fB\-r\fR, \fB\-\-relative\fR=<\fIn\fR> @@ -1643,13 +1709,14 @@ fatal error when not running within a prior allocation (i.e. when SLURM_JOB_ID is not set). The default for \fIn\fR is 0. If the value of \fB\-\-nodes\fR exceeds the number of nodes identified with the \fB\-\-relative\fR option, a warning message will be -printed and the \fB\-\-relative\fR option will take precedence. +printed and the \fB\-\-relative\fR option will take precedence. This option +applies to step allocations. .TP \fB\-\-reboot\fR Force the allocated nodes to reboot before starting the job. This is only supported with some system configurations and will otherwise be -silently ignored. +silently ignored. This option applies to job allocations. .TP \fB\-\-resv\-ports\fR @@ -1658,16 +1725,18 @@ of port they want to reserve. The parameter MpiParams=ports=12000-12999 must be specified in \fIslurm.conf\fR. If not specified the default reserve number of ports equal to the number of tasks. If the number of reserved ports is zero no ports is reserved. -Used for OpenMPI. +Used for OpenMPI. This option applies to job and step allocations. .TP \fB\-\-reservation\fR=<\fIname\fR> -Allocate resources for the job from the named reservation. +Allocate resources for the job from the named reservation. This option applies +to job allocations. .TP \fB\-\-restart\-dir\fR=<\fIdirectory\fR> Specifies the directory from which the job or job step's checkpoint should -be read (used by the checkpoint/blcrm and checkpoint/xlch plugins only). +be read (used by the checkpoint/blcrm and checkpoint/xlch plugins only). This +option applies to job allocations. \fB\-\-share\fR The \fB\-\-share\fR option has been replaced by the \fB\-\-oversubscribe\fR @@ -1683,7 +1752,7 @@ partition's \fBOverSubscribe\fR option takes precedence over the job's option. This option may result in the allocation being granted sooner than if the \-\-oversubscribe option was not set and allow higher system utilization, but application performance will likely suffer due to competition for resources. -Also see the \-\-exclusive option. +Also see the \-\-exclusive option. This option applies to step allocations. .TP \fB\-S\fR, \fB\-\-core\-spec\fR=<\fInum\fR> @@ -1694,7 +1763,8 @@ Default value is dependent upon the node's configured CoreSpecCount value. If a value of zero is designated and the Slurm configuration option AllowSpecResourcesUsage is enabled, the job will be allowed to override CoreSpecCount and use the specialized resources on nodes it is allocated. -This option can not be used with the \fB\-\-thread\-spec\fR option. +This option can not be used with the \fB\-\-thread\-spec\fR option. This option +applies to job allocations. .TP \fB\-\-signal\fR=<\fIsig_num\fR>[@<\fIsig_time\fR>] @@ -1706,7 +1776,7 @@ be sent up to 60 seconds earlier than specified. \fIsig_time\fR must have an integer value between 0 and 65535. By default, no signal is sent before the job's end time. If a \fIsig_num\fR is specified without any \fIsig_time\fR, -the default time will be 60 seconds. +the default time will be 60 seconds. This option applies to job allocations. .TP \fB\-\-slurmd\-debug\fR=<\fIlevel\fR> @@ -1733,13 +1803,14 @@ Log errors and verbose informational messages .RE The slurmd debug information is copied onto the stderr of -the job. By default only errors are displayed. +the job. By default only errors are displayed. This option applies to job and +step allocations. .TP \fB\-\-sockets\-per\-node\fR=<\fIsockets\fR> Restrict node selection to nodes with at least the specified number of sockets. See additional information under \fB\-B\fR option above when -task/affinity plugin is enabled. +task/affinity plugin is enabled. This option applies to job allocations. .TP \fB\-\-switches\fR=<\fIcount\fR>[@<\fImax\-time\fR>] @@ -1755,7 +1826,8 @@ Acceptable time formats include "minutes", "minutes:seconds", The job's maximum time delay may be limited by the system administrator using the \fBSchedulerParameters\fR configuration parameter with the \fBmax_switch_wait\fR parameter option. -The default max\-time is the max_switch_wait SchedulerParameters. +The default max\-time is the max_switch_wait SchedulerParameters. This option +applies to job allocations. .TP \fB\-T\fR, \fB\-\-threads\fR=<\fInthreads\fR> @@ -1766,7 +1838,7 @@ thread per allocated node up to a maximum of 60 concurrent threads. Specifying this option limits the number of concurrent threads to \fInthreads\fR (less than or equal to 60). This should only be used to set a low thread count for testing on -very small memory computers. +very small memory computers. This option applies to job allocations. .TP \fB\-t\fR, \fB\-\-time\fR=<\fItime\fR> @@ -1782,7 +1854,8 @@ and second values are rounded up to the next minute. A time limit of zero requests that no time limit be imposed. Acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", -"days\-hours", "days\-hours:minutes" and "days\-hours:minutes:seconds". +"days\-hours", "days\-hours:minutes" and "days\-hours:minutes:seconds". This +option applies to job and step allocations. .TP \fB\-\-task\-epilog\fR=<\fIexecutable\fR> @@ -1790,7 +1863,8 @@ The \fBslurmstepd\fR daemon will run \fIexecutable\fR just after each task terminates. This will be executed before any TaskEpilog parameter in slurm.conf is executed. This is meant to be a very short\-lived program. If it fails to terminate within a few seconds, it will be -killed along with any descendant processes. +killed along with any descendant processes. This option applies to step +allocations. .TP \fB\-\-task\-prolog\fR=<\fIexecutable\fR> @@ -1801,7 +1875,7 @@ Besides the normal environment variables, this has SLURM_TASK_PID available to identify the process ID of the task being started. Standard output from this program of the form "export NAME=value" will be used to set environment variables -for the task being spawned. +for the task being spawned. This option applies to step allocations. .TP \fB\-\-test\-only\fR @@ -1811,14 +1885,16 @@ the job. This limits \fBsrun's\fR behavior to just return information; no job is actually submitted. EXCEPTION: On Bluegene/Q systems on when running within an existing job allocation, this disables the use of "runjob" to launch tasks. The program -will be executed directly by the slurmd daemon. +will be executed directly by the slurmd daemon. This option applies to job +allocations. .TP \fB\-\-thread\-spec\fR=<\fInum\fR> Count of specialized threads per node reserved by the job for system operations and not used by the application. The application will not use these threads, but will be charged for their allocation. -This option can not be used with the \fB\-\-core\-spec\fR option. +This option can not be used with the \fB\-\-core\-spec\fR option. This option +applies to job allocations. .TP \fB\-\-threads\-per\-core\fR=<\fIthreads\fR> @@ -1826,7 +1902,7 @@ Restrict node selection to nodes with at least the specified number of threads per core. NOTE: "Threads" refers to the number of processing units on each core rather than the number of application tasks to be launched per core. See additional information under \fB\-B\fR option above when task/affinity -plugin is enabled. +plugin is enabled. This option applies to job allocations. .TP \fB\-\-time\-min\fR=<\fItime\fR> @@ -1839,11 +1915,12 @@ This is performed by a backfill scheduling algorithm to allocate resources otherwise reserved for higher priority jobs. Acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and -"days\-hours:minutes:seconds". +"days\-hours:minutes:seconds". This option applies to job allocations. .TP \fB\-\-tmp\fR=<\fIMB\fR> -Specify a minimum amount of temporary disk space. +Specify a minimum amount of temporary disk space. This option applies to job +allocations. .TP \fB\-u\fR, \fB\-\-unbuffered\fR @@ -1851,7 +1928,8 @@ By default the connection between slurmstepd and the user launched application is over a pipe. The stdio output written by the application is buffered by the glibc until it is flushed or the output is set as unbuffered. See setbuf(3). If this option is specified the tasks are executed with -a pseudo terminal so that the application output is unbuffered. +a pseudo terminal so that the application output is unbuffered. This option +applies to step allocations. .TP \fB\-\-usage\fR Display brief help message and exit. @@ -1864,7 +1942,8 @@ to check access permissions for the target partition. User root may use this option to run jobs as a normal user in a RootOnly partition for example. If run as root, \fBsrun\fR will drop its permissions to the uid specified after node allocation is -successful. \fIuser\fR may be the user name or numerical user ID. +successful. \fIuser\fR may be the user name or numerical user ID. This option +applies to job and step allocations. .TP \fB\-V\fR, \fB\-\-version\fR @@ -1874,7 +1953,7 @@ Display version information and exit. \fB\-v\fR, \fB\-\-verbose\fR Increase the verbosity of srun's informational messages. Multiple \fB\-v\fR's will further increase srun's verbosity. By default only -errors will be displayed. +errors will be displayed. This option applies to job and step allocations. .TP \fB\-W\fR, \fB\-\-wait\fR=<\fIseconds\fR> @@ -1886,7 +1965,7 @@ option can be useful to insure that a job is terminated in a timely fashion in the event that one or more tasks terminate prematurely. Note: The \fB\-K\fR, \fB\-\-kill\-on\-bad\-exit\fR option takes precedence over \fB\-W\fR, \fB\-\-wait\fR to terminate the job immediately if a task -exits with a non\-zero exit code. +exits with a non\-zero exit code. This option applies to job allocations. .TP \fB\-w\fR, \fB\-\-nodelist\fR=<\fIhost1,host2,...\fR or \fIfilename\fR> @@ -1901,12 +1980,13 @@ by the supplied host list, additional resources will be allocated on other nodes as needed. Rather than repeating a host name multiple times, an asterisk and a repetition count may be appended to a host name. For example -"host1,host1" and "host1*2" are equivalent. +"host1,host1" and "host1*2" are equivalent. This option applies to job and step +allocations. .TP \fB\-\-wckey\fR=<\fIwckey\fR> Specify wckey to be used with job. If TrackWCKey=no (default) in the -slurm.conf this value is ignored. +slurm.conf this value is ignored. This option applies to job allocations. .TP \fB\-X\fR, \fB\-\-disable\-status\fR @@ -1914,13 +1994,14 @@ Disable the display of task status when srun receives a single SIGINT (Ctrl\-C). Instead immediately forward the SIGINT to the running job. Without this option a second Ctrl\-C in one second is required to forcibly terminate the job and \fBsrun\fR will immediately exit. May also be -set via the environment variable SLURM_DISABLE_STATUS. +set via the environment variable SLURM_DISABLE_STATUS. This option applies to +job allocations. .TP \fB\-x\fR, \fB\-\-exclude\fR=<\fIhost1,host2,...\fR or \fIfilename\fR> Request that a specific list of hosts not be included in the resources allocated to this job. The host list will be assumed to be a filename -if it contains a "/"character. +if it contains a "/"character. This option applies to job allocations. .TP \fB\-Z\fR, \fB\-\-no\-allocate\fR @@ -1928,7 +2009,8 @@ Run the specified tasks on a set of nodes without creating a Slurm "job" in the Slurm queue structure, bypassing the normal resource allocation step. The list of nodes must be specified with the \fB\-w\fR, \fB\-\-nodelist\fR option. This is a privileged option -only available for the users "SlurmUser" and "root". +only available for the users "SlurmUser" and "root". This option applies to job +allocations. .PP The following options support Blue Gene systems, but may be @@ -1937,12 +2019,14 @@ applicable to other systems as well. .TP \fB\-\-blrts\-image\fR=<\fIpath\fR> Path to blrts image for bluegene block. BGL only. -Default from \fIblugene.conf\fR if not set. +Default from \fIblugene.conf\fR if not set. This option applies to job +allocations. .TP \fB\-\-cnload\-image\fR=<\fIpath\fR> Path to compute node image for bluegene block. BGP only. -Default from \fIblugene.conf\fR if not set. +Default from \fIblugene.conf\fR if not set. This option applies to job +allocations. .TP \fB\-\-conn\-type\fR=<\fItype\fR> @@ -1958,7 +2042,7 @@ for virtual node mode, and HTC_L for Linux mode. For systems that allow a different connection type per dimension you can supply a comma separated list of connection types may be specified, one for each dimension (i.e. M,T,T,T will give you a torus connection is all -dimensions expect the first). +dimensions expect the first). This option applies to job allocations. .TP \fB\-g\fR, \fB\-\-geometry\fR=<\fIXxYxZ\fR> | <\fIAxXxYxZ\fR> @@ -1967,33 +2051,39 @@ systems there are three numbers giving dimensions in the X, Y and Z directions, while on BlueGene/Q systems there are four numbers giving dimensions in the A, X, Y and Z directions and can not be used to allocate sub-blocks. For example "\-\-geometry=1x2x3x4", specifies a block of nodes having -1 x 2 x 3 x 4 = 24 nodes (actually midplanes on BlueGene). +1 x 2 x 3 x 4 = 24 nodes (actually midplanes on BlueGene). This option applies +to job allocations. .TP \fB\-\-ioload\-image\fR=<\fIpath\fR> Path to io image for bluegene block. BGP only. -Default from \fIblugene.conf\fR if not set. +Default from \fIblugene.conf\fR if not set. This option applies to job +allocations. .TP \fB\-\-linux\-image\fR=<\fIpath\fR> Path to linux image for bluegene block. BGL only. -Default from \fIblugene.conf\fR if not set. +Default from \fIblugene.conf\fR if not set. This option applies to job +allocations. .TP \fB\-\-mloader\-image\fR=<\fIpath\fR> Path to mloader image for bluegene block. -Default from \fIblugene.conf\fR if not set. +Default from \fIblugene.conf\fR if not set. This option applies to job +allocations. .TP \fB\-R\fR, \fB\-\-no\-rotate\fR Disables rotation of the job's requested geometry in order to fit an appropriate block. By default the specified geometry can rotate in three dimensions. +This option applies to job allocations. .TP \fB\-\-ramdisk\-image\fR=<\fIpath\fR> Path to ramdisk image for bluegene block. BGL only. -Default from \fIblugene.conf\fR if not set. +Default from \fIblugene.conf\fR if not set. This option applies to job +allocations. .PP .B srun diff --git a/doc/man/man5/knl.conf.5 b/doc/man/man5/knl.conf.5 index 85831960753b98a8fd07daf3edc2d17e848f3d89..f1abe8195816e3de75b3e7d4ad9dac5789474ada 100644 --- a/doc/man/man5/knl.conf.5 +++ b/doc/man/man5/knl.conf.5 @@ -96,7 +96,13 @@ This parameter is used only by the "knl_cray" plugin. \fBCapmcTimeout\fR Time limit for the \fBcapmc\fR program to return status information milliseconds. The default value is 10000 milliseconds and the minumum value is 1000 milliseconds. -This parameter is used only by the "knl_cray.conf" plugin. +This parameter is used only by the "knl_cray" plugin. + +.TP +\fBCnselectPath\fR +Fully qualified path to the \fBcnselect\fR program. +The default value is "/opt/cray/sdb/default/bin/cnselect". +This parameter is used only by the "knl_cray" plugin. .TP \fBDefaultMCDRAM\fR @@ -128,6 +134,7 @@ This is option is used only by the campc_suspend and campc_resume programs Fully qualified path to Intel's \fBsyscfg\fR program, which identifies current KNL configuration by viewing BIOS settings. If not defined, the current BIOS setting will not be available. +This parameter is not used by the "knl_cray" plugin. .SH "EXAMPLE" .LP diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 3c148b721d5d2cf2090b628ab8c02fdf45bc7ce3..ebbdc212e3f953ece09f3aaeef5c5704b1a084cb 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1,4 +1,4 @@ -.TH "slurm.conf" "5" "Slurm Configuration File" "April 2016" "Slurm Configuration File" +.TH "slurm.conf" "5" "Slurm Configuration File" "June 2016" "Slurm Configuration File" .SH "NAME" slurm.conf \- Slurm configuration file @@ -3362,6 +3362,8 @@ exclusive and since they decrease scheduling flexibility are not generally recommended (select no more than one of them). \fBCpusets\fR and \fBSched\fR are mutually exclusive (select only one of them). +All TaskPluginParam options are supported on FreeBSD except \fBCpusets\fR. +The \fBSched\fR option uses cpuset_setaffinity() on FreeBSD, not sched_setaffinity(). .RS .TP 10 diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index f5ef5ee1c3adc69040c7a16d660762765492ccd9..19a7f71f4e91b712840e260369b39c09158c67b8 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -996,6 +996,8 @@ enum ctx_keys { #define BACKFILL_TEST 0x00000008 /* Backfill test in progress */ #define GRES_ENFORCE_BIND 0x00000010 /* Enforce CPU/GRES binding */ #define TEST_NOW_ONLY 0x00000020 /* Test for immediately start only */ +#define NODE_MEM_CALC 0x00000040 /* Per-node memory limit calculated */ +#define NODE_REBOOT 0x00000080 /* Waiting for node reboot */ /*****************************************************************************\ * SLURM HOSTLIST FUNCTIONS diff --git a/slurm/slurm_errno.h b/slurm/slurm_errno.h index d667c422320b778e6595c3ccc51c25ad3b6bbee9..a3e8b5e76c0d6e6797131803445480f0f45eef67 100644 --- a/slurm/slurm_errno.h +++ b/slurm/slurm_errno.h @@ -188,6 +188,7 @@ enum { ESLURM_POWER_RESERVED, ESLURM_INVALID_POWERCAP, ESLURM_INVALID_MCS_LABEL, + ESLURM_BURST_BUFFER_WAIT = 2100, /* switch specific error codes, specific values defined in plugin module */ ESLURM_SWITCH_MIN = 3000, diff --git a/slurm/spank.h b/slurm/spank.h index 2db5ec499bd4bdde7e672ba07545b6528ce44954..42855cd1c869ac65e82a090e75d307d0c65da095 100644 --- a/slurm/spank.h +++ b/slurm/spank.h @@ -417,7 +417,7 @@ extern void slurm_debug2 (const char *format, ...) extern void slurm_debug3 (const char *format, ...) __attribute__ ((format (printf, 1, 2))); -#ifdef __cpluscplus +#ifdef __cplusplus } #endif diff --git a/src/api/allocate.c b/src/api/allocate.c index 359e478d092bc8c65d34fef47333714c3e097336..137f3c8200d977867701e199b0f43a783cb1d3c9 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -749,12 +749,15 @@ static void _destroy_allocation_response_socket(listen_t *listen) static int _handle_msg(slurm_msg_t *msg, resource_allocation_response_msg_t **resp) { - uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + char *auth_info = slurm_get_auth_info(); + uid_t req_uid; uid_t uid = getuid(); uid_t slurm_uid = (uid_t) slurm_get_slurm_user_id(); int rc = 0; + req_uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info); + xfree(auth_info); + if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { error ("Security violation, slurm message from uid %u", (unsigned int) req_uid); diff --git a/src/api/allocate_msg.c b/src/api/allocate_msg.c index e60791c3d834f310e1b07472b592d6ee0423d38a..6f0b9bd786c7175bec79057b3fdaff27705db4d0 100644 --- a/src/api/allocate_msg.c +++ b/src/api/allocate_msg.c @@ -248,12 +248,15 @@ static void _handle_suspend(struct allocation_msg_thread *msg_thr, static void _handle_msg(void *arg, slurm_msg_t *msg) { + char *auth_info = slurm_get_auth_info(); struct allocation_msg_thread *msg_thr = (struct allocation_msg_thread *)arg; - uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + uid_t req_uid; uid_t uid = getuid(); + req_uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info); + xfree(auth_info); + if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { error ("Security violation, slurm message from uid %u", (unsigned int) req_uid); diff --git a/src/api/job_info.c b/src/api/job_info.c index 305c2edafd1a639a8bc8150dc3b18baf80649a2f..d40d35485988ff16d25983055e53c56c54da0a8f 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -684,8 +684,8 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) } xstrcat(out, line_end); } - } else if (job_resrcs && job_resrcs->core_bitmap - && ((last = bit_fls(job_resrcs->core_bitmap) != -1))) { + } else if (job_resrcs && job_resrcs->core_bitmap && + ((last = bit_fls(job_resrcs->core_bitmap)) != -1)) { hl = hostlist_create(job_resrcs->nodes); if (!hl) { error("slurm_sprint_job_info: hostlist_create: %s", diff --git a/src/api/node_info.c b/src/api/node_info.c index 2060964633caf9908e5deadf88c1f2a356fb8fd4..e58111d4aa103cf50f5a17a5f903884144db1d0c 100644 --- a/src/api/node_info.c +++ b/src/api/node_info.c @@ -535,7 +535,7 @@ extern int slurm_load_node_single (node_info_msg_t **resp, * configured sensors on the target machine * IN host - name of node to query, NULL if localhost * IN delta - Use cache if data is newer than this in seconds - * OUT nb_sensors - number of sensors + * OUT sensors_cnt - number of sensors * OUT energy - array of acct_gather_energy_t structures on success or * NULL other wise * RET 0 on success or a slurm error code @@ -552,6 +552,12 @@ extern int slurm_get_node_energy(char *host, uint16_t delta, uint32_t cluster_flags = slurmdb_setup_cluster_flags(); char *this_addr; + xassert(sensor_cnt); + xassert(energy); + + *sensor_cnt = 0; + *energy = NULL; + slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); diff --git a/src/api/step_launch.c b/src/api/step_launch.c index 2c871f826cee8bab3ad37f9ccdd3b653857edd34..0d4c8a165682d6ea5735b8b17380afe19ad8a556 100644 --- a/src/api/step_launch.c +++ b/src/api/step_launch.c @@ -966,6 +966,12 @@ static int _connect_srun_cr(char *addr) unsigned int sa_len; int fd, rc; +#ifdef UNIX_PATH_MAX + if (addr && (strlen(addr) > UNIX_PATH_MAX)) { + error("%s: socket path name too long (%s)", __func__, addr); + return -1; + } +#endif fd = socket(AF_UNIX, SOCK_STREAM, 0); if (fd < 0) { error("failed creating cr socket: %m"); @@ -1493,13 +1499,16 @@ _task_user_managed_io_handler(struct step_launch_state *sls, static void _handle_msg(void *arg, slurm_msg_t *msg) { + char *auth_info = slurm_get_auth_info(); struct step_launch_state *sls = (struct step_launch_state *)arg; - uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + uid_t req_uid; uid_t uid = getuid(); srun_user_msg_t *um; int rc; + req_uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info); + xfree(auth_info); + if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { error ("Security violation, slurm message from uid %u", (unsigned int) req_uid); diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index 16fd1298f1ca282af22f397aa580e0f864ade302..51c73be6162647ae0dac832123d3292eb43e3fb0 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -315,8 +315,11 @@ static void _normalize_assoc_shares_fair_tree( { slurmdb_assoc_rec_t *fs_assoc = assoc; double shares_norm = 0.0; - if (assoc->shares_raw == SLURMDB_FS_USE_PARENT) + + if ((assoc->shares_raw == SLURMDB_FS_USE_PARENT) + && assoc->usage->fs_assoc_ptr) fs_assoc = assoc->usage->fs_assoc_ptr; + if (fs_assoc->usage->level_shares) shares_norm = (double)fs_assoc->shares_raw / @@ -723,7 +726,7 @@ static slurmdb_assoc_rec_t* _find_assoc_parent( } /* locks should be put in place before calling this function - * ASSOC_WRITE, USER_WRITE */ + * ASSOC_WRITE, USER_WRITE, TRES_READ */ static int _set_assoc_parent_and_user(slurmdb_assoc_rec_t *assoc, int reset) { @@ -903,7 +906,7 @@ static void _set_children_level_shares(slurmdb_assoc_rec_t *assoc, /* transfer slurmdb assoc list to be assoc_mgr assoc list */ /* locks should be put in place before calling this function - * ASSOC_WRITE, USER_WRITE */ + * ASSOC_WRITE, USER_WRITE, TRES_READ */ static int _post_assoc_list(void) { slurmdb_assoc_rec_t *assoc = NULL; diff --git a/src/common/bitstring.c b/src/common/bitstring.c index 4834f707a155cd6031487069b4ff317ead995dab..355a4e4eb8a5ec40509f71e332ec89ef24d6415b 100644 --- a/src/common/bitstring.c +++ b/src/common/bitstring.c @@ -48,6 +48,7 @@ #include "src/common/bitstring.h" #include "src/common/log.h" #include "src/common/macros.h" +#include "src/common/xassert.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" @@ -1091,47 +1092,68 @@ bit_unfmt(bitstr_t *b, char *str) * bitfmt2int - convert a string describing bitmap (output from bit_fmt, * e.g. "0-30,45,50-60") into an array of integer (start/end) pairs * terminated by -1 (e.g. "0, 30, 45, 45, 50, 60, -1") + * Also supports the "1-17:4" step format ("1, 5, 9, 13, 17, -1"). * input: bitmap string as produced by bitstring.c : bitfmt * output: an array of integers * NOTE: the caller must xfree the returned memory */ -int32_t * -bitfmt2int (char *bit_str_ptr) +int32_t *bitfmt2int(char *bit_str_ptr) { int32_t *bit_int_ptr, i, bit_inx, size, sum, start_val; + char *tmp = NULL; + int32_t start_task_id = -1; + int32_t end_task_id = -1; + int32_t step = -1; if (bit_str_ptr == NULL) return NULL; - size = strlen (bit_str_ptr) + 1; - bit_int_ptr = xmalloc ( sizeof (int32_t) * - (size * 2 + 1)); /* more than enough space */ - - bit_inx = sum = 0; - start_val = -1; - for (i = 0; i < size; i++) { - if (bit_str_ptr[i] >= '0' && - bit_str_ptr[i] <= '9'){ - sum = (sum * 10) + (bit_str_ptr[i] - '0'); - } - - else if (bit_str_ptr[i] == '-') { - start_val = sum; - sum = 0; - } - - else if (bit_str_ptr[i] == ',' || - bit_str_ptr[i] == '\0') { - if (i == 0) - break; - if (start_val == -1) + if (!(xstrchr(bit_str_ptr, ':'))) { + size = strlen(bit_str_ptr) + 1; + /* more than enough space */ + bit_int_ptr = xmalloc(sizeof(int32_t) * (size * 2 + 1)); + bit_inx = sum = 0; + start_val = -1; + for (i = 0; i < size; i++) { + if (bit_str_ptr[i] >= '0' && + bit_str_ptr[i] <= '9') { + sum = (sum * 10) + (bit_str_ptr[i] - '0'); + } else if (bit_str_ptr[i] == '-') { start_val = sum; - bit_int_ptr[bit_inx++] = start_val; - bit_int_ptr[bit_inx++] = sum; - start_val = -1; - sum = 0; + sum = 0; + } else if (bit_str_ptr[i] == ',' || + bit_str_ptr[i] == '\0') { + if (i == 0) + break; + if (start_val == -1) + start_val = sum; + bit_int_ptr[bit_inx++] = start_val; + bit_int_ptr[bit_inx++] = sum; + start_val = -1; + sum = 0; + } + } + xassert(bit_inx < (size * 2 + 1)); + } else { /* handle step format */ + start_task_id = strtol(bit_str_ptr, &tmp, 10); + if (*tmp != '-') + return NULL; + end_task_id = strtol(tmp + 1, &tmp, 10); + if (*tmp != ':') + return NULL; + step = strtol(tmp + 1, &tmp, 10); + if (*tmp != '\0') + return NULL; + if (end_task_id < start_task_id || step <= 0) + return NULL; + + size = ((end_task_id - start_task_id) / step) + 1; + bit_int_ptr = xmalloc(sizeof(int32_t) * (size * 2 + 1)); + bit_inx = 0; + for(i = start_task_id; i < end_task_id; i += step) { + bit_int_ptr[bit_inx++] = i; /* start of pair */ + bit_int_ptr[bit_inx++] = i; /* end of pair */ } } - assert(bit_inx < (size*2+1)); bit_int_ptr[bit_inx] = -1; return bit_int_ptr; } diff --git a/src/common/eio.c b/src/common/eio.c index 93ec62ef0cdfbdbc91c40a42cb98d7b1a2ae7bd5..6bb6e9637e4856a9bd589cb05f4611d9629f9198 100644 --- a/src/common/eio.c +++ b/src/common/eio.c @@ -82,6 +82,7 @@ struct eio_handle_components { int magic; #endif int fds[2]; + pthread_mutex_t shutdown_mutex; time_t shutdown_time; uint16_t shutdown_wait; List obj_list; @@ -120,6 +121,7 @@ eio_handle_t *eio_handle_create(uint16_t shutdown_wait) eio->obj_list = list_create(eio_obj_destroy); eio->new_objs = list_create(eio_obj_destroy); + slurm_mutex_init(&eio->shutdown_mutex); eio->shutdown_wait = DEFAULT_EIO_SHUTDOWN_WAIT; if (shutdown_wait > 0) eio->shutdown_wait = shutdown_wait; @@ -135,6 +137,7 @@ void eio_handle_destroy(eio_handle_t *eio) close(eio->fds[1]); FREE_NULL_LIST(eio->obj_list); FREE_NULL_LIST(eio->new_objs); + slurm_mutex_destroy(&eio->shutdown_mutex); xassert(eio->magic = ~EIO_MAGIC); xfree(eio); @@ -229,7 +232,9 @@ int eio_signal_shutdown(eio_handle_t *eio) { char c = 1; + slurm_mutex_lock(&eio->shutdown_mutex); eio->shutdown_time = time(NULL); + slurm_mutex_unlock(&eio->shutdown_mutex); if (eio && (write(eio->fds[1], &c, sizeof(char)) != 1)) return error("eio_handle_signal_shutdown: write; %m"); return 0; @@ -281,6 +286,7 @@ int eio_handle_mainloop(eio_handle_t *eio) eio_obj_t **map = NULL; unsigned int maxnfds = 0, nfds = 0; unsigned int n = 0; + time_t shutdown_time; xassert (eio != NULL); xassert (eio->magic == EIO_MAGIC); @@ -314,7 +320,10 @@ int eio_handle_mainloop(eio_handle_t *eio) xassert(nfds <= maxnfds + 1); - if (_poll_internal(pollfds, nfds, eio->shutdown_time) < 0) + slurm_mutex_lock(&eio->shutdown_mutex); + shutdown_time = eio->shutdown_time; + slurm_mutex_unlock(&eio->shutdown_mutex); + if (_poll_internal(pollfds, nfds, shutdown_time) < 0) goto error; if (pollfds[nfds-1].revents & POLLIN) @@ -322,9 +331,11 @@ int eio_handle_mainloop(eio_handle_t *eio) _poll_dispatch(pollfds, nfds - 1, map, eio->obj_list); - if (eio->shutdown_time - && difftime(time(NULL), eio->shutdown_time) - >= eio->shutdown_wait) { + slurm_mutex_lock(&eio->shutdown_mutex); + shutdown_time = eio->shutdown_time; + slurm_mutex_unlock(&eio->shutdown_mutex); + if (shutdown_time && + (difftime(time(NULL), shutdown_time)>=eio->shutdown_wait)) { error("%s: Abandoning IO %d secs after job shutdown " "initiated", __func__, eio->shutdown_wait); break; diff --git a/src/common/hostlist.c b/src/common/hostlist.c index 5cd0cc43b003c00f9594e3aca27c5f140717af40..8173620269ca55a488fb1799e29562f053b21b18 100644 --- a/src/common/hostlist.c +++ b/src/common/hostlist.c @@ -76,7 +76,6 @@ #include "src/common/working_cluster.h" #include "src/common/xassert.h" #include "src/common/xmalloc.h" -#include "src/common/xstring.h" /* * Define slurm-specific aliases for use by plugins, see slurm_xlator.h @@ -1117,7 +1116,7 @@ static int hostrange_hn_within(hostrange_t hr, hostname_t hn) * which case we return true. Otherwise, there is no * possibility that [hn] matches [hr]. */ - if (xstrcmp (hn->hostname, hr->prefix) == 0) + if (strcmp (hn->hostname, hr->prefix) == 0) return 1; else return 0; @@ -1135,7 +1134,7 @@ static int hostrange_hn_within(hostrange_t hr, hostname_t hn) * If hostrange and hostname prefixes don't match, then * there is way the hostname falls within the range [hr]. */ - if (xstrcmp(hr->prefix, hn->prefix) != 0) { + if (strcmp(hr->prefix, hn->prefix) != 0) { int len1, len2, ldiff; int dims = slurmdb_setup_cluster_name_dims(); @@ -1179,7 +1178,7 @@ static int hostrange_hn_within(hostrange_t hr, hostname_t hn) hn->num = strtoul(hn->suffix, NULL, 10); /* Now compare them and see if they match */ - if (xstrcmp(hr->prefix, hn->prefix) != 0) + if (strcmp(hr->prefix, hn->prefix) != 0) return 0; } else return 0; @@ -2701,12 +2700,13 @@ static int _is_bracket_needed(hostlist_t hl, int i) * Assumes hostlist is locked. */ static int -_get_bracketed_list(hostlist_t hl, int *start, const size_t n, char *buf) +_get_bracketed_list(hostlist_t hl, int *start, const size_t n, char *buf, + int brackets) { hostrange_t *hr = hl->hr; int i = *start; int m, len = 0; - int bracket_needed = _is_bracket_needed(hl, i); + int bracket_needed = brackets ? _is_bracket_needed(hl, i) : 0; int zeropad = 0; if (is_cray_system()) { @@ -3299,7 +3299,8 @@ notbox: for (i = 0; i < hl->nranges && len < n;) { if (i) buf[len++] = ','; - len += _get_bracketed_list(hl, &i, n - len, buf + len); + len += _get_bracketed_list(hl, &i, n - len, buf + len, + brackets); } } @@ -3501,7 +3502,7 @@ char *hostlist_next_range(hostlist_iterator_t i) buf_size = 8192; buf = malloc(buf_size); if (buf && - (_get_bracketed_list(i->hl, &j, buf_size, buf) == buf_size)) { + (_get_bracketed_list(i->hl, &j, buf_size, buf, 1) == buf_size)) { buf_size *= 2; buf = realloc(buf, buf_size); } diff --git a/src/common/log.c b/src/common/log.c index 7f2c92030c746a3879fdb802b1d6f4e90e1fa2a8..76ba9ad6e00bb2234f81dfd80e60ae3ca3cd112c 100644 --- a/src/common/log.c +++ b/src/common/log.c @@ -54,6 +54,10 @@ # include "config.h" #endif +#if HAVE_SYS_PRCTL_H +# include <sys/prctl.h> +#endif + #include <stdio.h> #if HAVE_STRING_H @@ -686,10 +690,17 @@ set_idbuf(char *idbuf) int max_len = 12; /* handles current longest thread name */ gettimeofday(&now, NULL); - if (pthread_getname_np(pthread_self(), thread_name, NAMELEN)) { +#if HAVE_SYS_PRCTL_H + if (prctl(PR_GET_NAME, thread_name, NULL, NULL, NULL) < 0) { error("failed to get thread name: %m"); - return; + max_len = 0; + thread_name[0] = '\0'; } +#else + /* skip printing thread name if not available */ + max_len = 0; + thread_name[0] = '\0'; +#endif sprintf(idbuf, "%.15s.%-6d %5d %-*s %p", slurm_ctime(&now.tv_sec) + 4, (int)now.tv_usec, (int)getpid(), max_len, thread_name, diff --git a/src/common/node_select.c b/src/common/node_select.c index 84906bdeba30c4032152d7e929b7335116f85cda..a93a258cb21859f122910a3d336fc4554f992b65 100644 --- a/src/common/node_select.c +++ b/src/common/node_select.c @@ -80,6 +80,7 @@ const char *node_select_syms[] = { "select_p_job_expand", "select_p_job_resized", "select_p_job_signal", + "select_p_job_mem_confirm", "select_p_job_fini", "select_p_job_suspend", "select_p_job_resume", @@ -679,6 +680,21 @@ extern int select_g_job_signal(struct job_record *job_ptr, int signal) (job_ptr, signal); } +/* + * Confirm that a job's memory allocation is still valid after a node is + * restarted. This is an issue if the job is allocated all of the memory on a + * node and that node is restarted with a different memory size than at the time + * it is allocated to the job. This would mostly be an issue on an Intel KNL + * node where the memory size would vary with the MCDRAM cache mode. + */ +extern int select_g_job_mem_confirm(struct job_record *job_ptr) +{ + if (slurm_select_init(0) < 0) + return SLURM_ERROR; + + return (*(ops[select_context_default].job_mem_confirm)) (job_ptr); +} + /* * Note termination of job is starting. Executed from slurmctld. * IN job_ptr - pointer to job being terminated diff --git a/src/common/node_select.h b/src/common/node_select.h index d682071018d150227acd1ae21121f77e34445dc3..af7be2a6381c5a301cf6aea709df1cab1bd5573f 100644 --- a/src/common/node_select.h +++ b/src/common/node_select.h @@ -157,6 +157,7 @@ typedef struct slurm_select_ops { struct node_record *node_ptr); int (*job_signal) (struct job_record *job_ptr, int signal); + int (*job_mem_confirm) (struct job_record *job_ptr); int (*job_fini) (struct job_record *job_ptr); int (*job_suspend) (struct job_record *job_ptr, bool indf_susp); @@ -606,6 +607,15 @@ extern int select_g_job_fini(struct job_record *job_ptr); */ extern int select_g_job_signal(struct job_record *job_ptr, int signal); +/* + * Confirm that a job's memory allocation is still valid after a node is + * restarted. This is an issue if the job is allocated all of the memory on a + * node and that node is restarted with a different memory size than at the time + * it is allocated to the job. This would mostly be an issue on an Intel KNL + * node where the memory size would vary with the MCDRAM cache mode. + */ +extern int select_g_job_mem_confirm(struct job_record *job_ptr); + /* * Suspend a job. Executed from slurmctld. * IN job_ptr - pointer to job being suspended diff --git a/src/common/proc_args.c b/src/common/proc_args.c index 974dc82082dd4aec4600df78f46aed23d146bf43..7da6bce44d74cc55b52ccc92835ae439a4da7869 100644 --- a/src/common/proc_args.c +++ b/src/common/proc_args.c @@ -843,16 +843,14 @@ bool verify_socket_core_thread_count(const char *arg, int *min_sockets, cpu_bind_type_t *cpu_bind_type) { bool tmp_val,ret_val; - int i,j; + int i, j; int max_sockets = 0, max_cores = 0, max_threads = 0; const char *cur_ptr = arg; char buf[3][48]; /* each can hold INT64_MAX - INT64_MAX */ - buf[0][0] = '\0'; - buf[1][0] = '\0'; - buf[2][0] = '\0'; - for (j=0;j<3;j++) { - for (i=0;i<47;i++) { + memset(buf, 0, sizeof(buf)); + for (j = 0; j < 3; j++) { + for (i = 0; i < 47; i++) { if (*cur_ptr == '\0' || *cur_ptr ==':') break; buf[j][i] = *cur_ptr++; @@ -860,7 +858,6 @@ bool verify_socket_core_thread_count(const char *arg, int *min_sockets, if (*cur_ptr == '\0') break; xassert(*cur_ptr == ':'); - buf[j][i] = '\0'; cur_ptr++; } /* if cpu_bind_type doesn't already have a auto preference, choose @@ -878,7 +875,6 @@ bool verify_socket_core_thread_count(const char *arg, int *min_sockets, *cpu_bind_type |= CPU_BIND_TO_THREADS; } } - buf[j][i] = '\0'; ret_val = true; tmp_val = get_resource_arg_range(&buf[0][0], "first arg of -B", diff --git a/src/common/slurm_acct_gather.c b/src/common/slurm_acct_gather.c index e15cd414216e1688d4714648d2fa7f0f4500d7d7..8db8397e853f6d9db3d701789c26be67b870eb59 100644 --- a/src/common/slurm_acct_gather.c +++ b/src/common/slurm_acct_gather.c @@ -42,7 +42,8 @@ #include "src/common/slurm_strcasestr.h" #include "src/common/xstring.h" -bool acct_gather_suspended = false; +static bool acct_gather_suspended = false; +static pthread_mutex_t suspended_mutex = PTHREAD_MUTEX_INITIALIZER; static bool inited = 0; @@ -244,10 +245,23 @@ extern int acct_gather_check_acct_freq_task( extern void acct_gather_suspend_poll(void) { + slurm_mutex_lock(&suspended_mutex); acct_gather_suspended = true; + slurm_mutex_unlock(&suspended_mutex); } extern void acct_gather_resume_poll(void) { + slurm_mutex_lock(&suspended_mutex); acct_gather_suspended = false; + slurm_mutex_unlock(&suspended_mutex); +} + +extern bool acct_gather_suspend_test(void) +{ + bool rc; + slurm_mutex_lock(&suspended_mutex); + rc = acct_gather_suspended; + slurm_mutex_unlock(&suspended_mutex); + return rc; } diff --git a/src/common/slurm_acct_gather.h b/src/common/slurm_acct_gather.h index e6533a92a0148523160a393270639f5b933e6b55..eda1d90697e5a23510adb3b184082526bce19b17 100644 --- a/src/common/slurm_acct_gather.h +++ b/src/common/slurm_acct_gather.h @@ -57,8 +57,6 @@ #include "slurm_acct_gather_infiniband.h" #include "slurm_acct_gather_filesystem.h" -extern bool acct_gather_suspended; - extern int acct_gather_conf_init(void); extern int acct_gather_conf_destroy(void); @@ -69,5 +67,6 @@ extern int acct_gather_check_acct_freq_task( uint32_t job_mem_lim, char *acctg_freq); extern void acct_gather_suspend_poll(void); extern void acct_gather_resume_poll(void); +extern bool acct_gather_suspend_test(void); #endif diff --git a/src/common/slurm_acct_gather_energy.c b/src/common/slurm_acct_gather_energy.c index 56bbf4a5059664ca9c810a22515b93aa1bd06a89..e743ed934be5a4ccca85f836f892c4d755a1bd06 100644 --- a/src/common/slurm_acct_gather_energy.c +++ b/src/common/slurm_acct_gather_energy.c @@ -106,7 +106,7 @@ static void *_watch_node(void *arg) (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - while (init_run && acct_gather_profile_running) { + while (init_run && acct_gather_profile_test()) { /* Do this until shutdown is requested */ slurm_mutex_lock(&g_context_lock); (*(ops.set_data))(ENERGY_DATA_PROFILE, &delta); diff --git a/src/common/slurm_acct_gather_filesystem.c b/src/common/slurm_acct_gather_filesystem.c index 04f0a23e9a9d96904cb2e6cb3aa429beb909d568..fb74ee363053f2812314292084f7131478523f33 100644 --- a/src/common/slurm_acct_gather_filesystem.c +++ b/src/common/slurm_acct_gather_filesystem.c @@ -94,7 +94,7 @@ static void *_watch_node(void *arg) (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - while (init_run && acct_gather_profile_running) { + while (init_run && acct_gather_profile_test()) { /* Do this until shutdown is requested */ slurm_mutex_lock(&g_context_lock); (*(ops.node_update))(); diff --git a/src/common/slurm_acct_gather_infiniband.c b/src/common/slurm_acct_gather_infiniband.c index cefc5b9a28dcd58c34a94841f4afacbcc9f6fb31..bc5e9ee999a2b94eb16ca45901ae25bdb3ff7cfe 100644 --- a/src/common/slurm_acct_gather_infiniband.c +++ b/src/common/slurm_acct_gather_infiniband.c @@ -96,7 +96,7 @@ static void *_watch_node(void *arg) (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - while (init_run && acct_gather_profile_running) { + while (init_run && acct_gather_profile_test()) { /* Do this until shutdown is requested */ slurm_mutex_lock(&g_context_lock); (*(ops.node_update))(); diff --git a/src/common/slurm_acct_gather_profile.c b/src/common/slurm_acct_gather_profile.c index c016c0102d33f6632a78cdcb61c5e54dcbc34f42..3d576bd3b4b0f339a15bda4e69e7508c66cb4939 100644 --- a/src/common/slurm_acct_gather_profile.c +++ b/src/common/slurm_acct_gather_profile.c @@ -108,7 +108,9 @@ static const char *syms[] = { }; acct_gather_profile_timer_t acct_gather_profile_timer[PROFILE_CNT]; -bool acct_gather_profile_running = false; + +static bool acct_gather_profile_running = false; +static pthread_mutex_t profile_running_mutex = PTHREAD_MUTEX_INITIALIZER; static slurm_acct_gather_profile_ops_t ops; static plugin_context_t *g_context = NULL; @@ -141,13 +143,13 @@ static void *_timer_thread(void *args) (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); DEF_TIMERS; - while (init_run && acct_gather_profile_running) { + while (init_run && acct_gather_profile_test()) { slurm_mutex_lock(&g_context_lock); START_TIMER; now = time(NULL); for (i=0; i<PROFILE_CNT; i++) { - if (acct_gather_suspended) { + if (acct_gather_suspend_test()) { /* Handle suspended time as if it * didn't happen */ if (!acct_gather_profile_timer[i].freq) @@ -168,6 +170,8 @@ static void *_timer_thread(void *args) if (!acct_gather_profile_timer[i].freq || (diff < acct_gather_profile_timer[i].freq)) continue; + if (!acct_gather_profile_test()) + break; /* Shutting down */ debug2("profile signalling type %s", acct_gather_profile_type_t_name(i)); @@ -186,6 +190,10 @@ static void *_timer_thread(void *args) usleep(USLEEP_TIME - DELTA_TIMER); } + for (i=0; i < PROFILE_CNT; i++) { + pthread_cond_destroy(&acct_gather_profile_timer[i].notify); + } + return NULL; } @@ -427,11 +435,14 @@ extern int acct_gather_profile_startpoll(char *freq, char *freq_def) if (acct_gather_profile_init() < 0) return SLURM_ERROR; + slurm_mutex_lock(&profile_running_mutex); if (acct_gather_profile_running) { + slurm_mutex_unlock(&profile_running_mutex); error("acct_gather_profile_startpoll: poll already started!"); return retval; } acct_gather_profile_running = true; + slurm_mutex_unlock(&profile_running_mutex); (*(ops.get))(ACCT_GATHER_PROFILE_RUNNING, &profile); xassert(profile != ACCT_GATHER_PROFILE_NOT_SET); @@ -504,20 +515,21 @@ extern void acct_gather_profile_endpoll(void) { int i; + slurm_mutex_lock(&profile_running_mutex); if (!acct_gather_profile_running) { + slurm_mutex_unlock(&profile_running_mutex); debug2("acct_gather_profile_startpoll: poll already ended!"); return; } - acct_gather_profile_running = false; + slurm_mutex_unlock(&profile_running_mutex); for (i=0; i < PROFILE_CNT; i++) { /* end remote threads */ slurm_mutex_lock(&acct_gather_profile_timer[i].notify_mutex); pthread_cond_signal(&acct_gather_profile_timer[i].notify); slurm_mutex_unlock(&acct_gather_profile_timer[i].notify_mutex); - pthread_cond_destroy(&acct_gather_profile_timer[i].notify); - acct_gather_profile_timer[i].freq = 0; + switch (i) { case PROFILE_ENERGY: break; @@ -672,3 +684,13 @@ extern bool acct_gather_profile_g_is_active(uint32_t type) return (*(ops.is_active))(type); } + +extern bool acct_gather_profile_test(void) +{ + bool rc; + slurm_mutex_lock(&profile_running_mutex); + rc = acct_gather_profile_running; + slurm_mutex_unlock(&profile_running_mutex); + return rc; +} + diff --git a/src/common/slurm_acct_gather_profile.h b/src/common/slurm_acct_gather_profile.h index f1c0c62ac7b43da0aa87965a3238a7eee308819b..e77c514954936cd8055e054bac64cee090de59fc 100644 --- a/src/common/slurm_acct_gather_profile.h +++ b/src/common/slurm_acct_gather_profile.h @@ -95,7 +95,6 @@ typedef struct { } acct_gather_profile_timer_t; extern acct_gather_profile_timer_t acct_gather_profile_timer[PROFILE_CNT]; -extern bool acct_gather_profile_running; /* * Load the plugin @@ -113,6 +112,9 @@ extern char *acct_gather_profile_to_string(uint32_t profile); /* translate string of words to uint32_t filled in with bits set to profile */ extern uint32_t acct_gather_profile_from_string(char *profile_str); +/* Return true if acct_gather_profile_running flag is set */ +extern bool acct_gather_profile_test(void); + extern char *acct_gather_profile_type_to_string(uint32_t series); extern uint32_t acct_gather_profile_type_from_string(char *series_str); diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c index 688a26854a8fdf9f4826f1c20dafb3dc82d35165..7898e4bb0cea5a6edaf434d27289038eaa5969c0 100644 --- a/src/common/slurm_errno.c +++ b/src/common/slurm_errno.c @@ -325,6 +325,8 @@ static slurm_errtab_t slurm_errtab[] = { "Required powercap is not valid, check min/max values"}, { ESLURM_INVALID_MCS_LABEL, "Invalid mcs_label specified" }, + { ESLURM_BURST_BUFFER_WAIT, + "Waiting for burst buffer" }, /* slurmd error codes */ { ESLRUMD_PIPE_ERROR_ON_TASK_SPAWN, diff --git a/src/common/slurm_jobacct_gather.c b/src/common/slurm_jobacct_gather.c index dcfd103d2ccf8068fec83f68eff9be1792fac326..9756796b82b68515322aabf524b8b1f143c4411f 100644 --- a/src/common/slurm_jobacct_gather.c +++ b/src/common/slurm_jobacct_gather.c @@ -110,6 +110,7 @@ static slurm_jobacct_gather_ops_t ops; static plugin_context_t *g_context = NULL; static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; static bool init_run = false; +static pthread_mutex_t init_run_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_t watch_tasks_thread_id = 0; static int freq = 0; @@ -119,6 +120,7 @@ static uint64_t cont_id = NO_VAL64; static pthread_mutex_t task_list_lock = PTHREAD_MUTEX_INITIALIZER; static bool jobacct_shutdown = true; +static pthread_mutex_t jobacct_shutdown_mutex = PTHREAD_MUTEX_INITIALIZER; static bool plugin_polling = true; static uint32_t jobacct_job_id = 0; @@ -178,6 +180,15 @@ unpack_error: return SLURM_ERROR; } +static bool _jobacct_shutdown_test(void) +{ + bool rc; + slurm_mutex_lock(&jobacct_shutdown_mutex); + rc = jobacct_shutdown; + slurm_mutex_unlock(&jobacct_shutdown_mutex); + return rc; +} + static void _poll_data(bool profile) { /* Update the data */ @@ -193,6 +204,14 @@ static void _task_sleep(int rem) rem = sleep(rem); /* subject to interupt */ } +static bool _init_run_test(void) +{ + bool rc; + slurm_mutex_lock(&init_run_mutex); + rc = init_run; + slurm_mutex_unlock(&init_run_mutex); + return rc; +} /* _watch_tasks() -- monitor slurm jobs and track their memory usage * @@ -218,7 +237,8 @@ static void *_watch_tasks(void *arg) * spawned, which would prevent a valid checkpoint/restart * with some systems */ _task_sleep(1); - while (init_run && !jobacct_shutdown && acct_gather_profile_running) { + while (_init_run_test() && !_jobacct_shutdown_test() && + acct_gather_profile_test()) { /* Do this until shutdown is requested */ slurm_mutex_lock(&acct_gather_profile_timer[type].notify_mutex); pthread_cond_wait( @@ -241,7 +261,7 @@ extern int jobacct_gather_init(void) char *type = NULL; int retval=SLURM_SUCCESS; - if (slurmdbd_conf || (init_run && g_context)) + if (slurmdbd_conf || (_init_run_test() && g_context)) return retval; slurm_mutex_lock(&g_context_lock); @@ -264,7 +284,9 @@ extern int jobacct_gather_init(void) goto done; } + slurm_mutex_lock(&init_run_mutex); init_run = true; + slurm_mutex_unlock(&init_run_mutex); /* only print the WARNING messages if in the slurmctld */ if (!run_in_daemon("slurmctld")) @@ -303,7 +325,9 @@ extern int jobacct_gather_fini(void) slurm_mutex_lock(&g_context_lock); if (g_context) { + slurm_mutex_lock(&init_run_mutex); init_run = false; + slurm_mutex_unlock(&init_run_mutex); if (watch_tasks_thread_id) { pthread_cancel(watch_tasks_thread_id); @@ -329,12 +353,13 @@ extern int jobacct_gather_startpoll(uint16_t frequency) if (jobacct_gather_init() < 0) return SLURM_ERROR; - if (!jobacct_shutdown) { + if (!_jobacct_shutdown_test()) { error("jobacct_gather_startpoll: poll already started!"); return retval; } - + slurm_mutex_lock(&jobacct_shutdown_mutex); jobacct_shutdown = false; + slurm_mutex_unlock(&jobacct_shutdown_mutex); freq = frequency; @@ -364,7 +389,9 @@ extern int jobacct_gather_endpoll(void) if (jobacct_gather_init() < 0) return SLURM_ERROR; + slurm_mutex_lock(&jobacct_shutdown_mutex); jobacct_shutdown = true; + slurm_mutex_unlock(&jobacct_shutdown_mutex); slurm_mutex_lock(&task_list_lock); FREE_NULL_LIST(task_list); @@ -386,7 +413,7 @@ extern int jobacct_gather_add_task(pid_t pid, jobacct_id_t *jobacct_id, if (!plugin_polling) return SLURM_SUCCESS; - if (jobacct_shutdown) + if (_jobacct_shutdown_test()) return SLURM_ERROR; jobacct = jobacctinfo_create(jobacct_id); @@ -422,7 +449,7 @@ error: extern jobacctinfo_t *jobacct_gather_stat_task(pid_t pid) { - if (!plugin_polling || jobacct_shutdown) + if (!plugin_polling || _jobacct_shutdown_test()) return NULL; else if (pid) { struct jobacctinfo *jobacct = NULL; @@ -476,7 +503,7 @@ extern jobacctinfo_t *jobacct_gather_remove_task(pid_t pid) * mainly for updating energy consumption */ _poll_data(1); - if (jobacct_shutdown) + if (_jobacct_shutdown_test()) return NULL; slurm_mutex_lock(&task_list_lock); diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 5575c0afdcf7a854b8c7530e36a68363e9b89509..e634f91242b276b565bd110e9a89e5e74cb57b4f 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -1750,7 +1750,7 @@ char *slurm_get_accounting_storage_pass(void) * returns the auth_info from slurmctld_conf object (AuthInfo parameter) * cache value in local buffer for best performance * WARNING: The return of this function can be used in many different - * places and SHOULD NOT BE FREED! + * RET char * - AuthInfo value, MUST be xfreed by caller */ extern char *slurm_get_auth_info(void) { @@ -3397,11 +3397,13 @@ total_return: static int _unpack_msg_uid(Buf buffer) { int uid = -1; - void *auth_cred = NULL; + void *auth_cred = NULL, *auth_info; if ((auth_cred = g_slurm_auth_unpack(buffer)) == NULL) return uid; - uid = (int) g_slurm_auth_get_uid(auth_cred, slurm_get_auth_info()); + auth_info = slurm_get_auth_info(); + uid = (int) g_slurm_auth_get_uid(auth_cred, auth_info); + xfree(auth_info); g_slurm_auth_destroy(auth_cred); return uid; diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 85a0b4b9cf4e096e3c9c07914ba5a4e8d566ef9a..40a8e307ce8787e68f749ef62bfe96c1fd478770 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -1999,7 +1999,7 @@ extern char *health_check_node_state_str(uint32_t node_state) if ((node_state & HEALTH_CHECK_NODE_ANY) == HEALTH_CHECK_NODE_ANY) { if (state_str[0]) xstrcat(state_str, ","); - state_str = xstrdup("ANY"); + xstrcat(state_str, "ANY"); return state_str; } diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 163da321c985208082dc934dfe596e0dbd565058..9954882e07ec0948ecc465bb7fe49c5ca077f845 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -5438,13 +5438,12 @@ _pack_composite_msg(composite_msg_t *msg, Buf buffer, uint16_t protocol_version) pack16(tmp_info->msg_index, buffer); if (!tmp_info->auth_cred) { - char *auth = slurm_get_auth_info(); + char *auth_info = slurm_get_auth_info(); /* FIXME: this should handle the - _global_auth_key() as well. - */ + * _global_auth_key() as well. */ tmp_info->auth_cred = - g_slurm_auth_create(NULL, 2, auth); - xfree(auth); + g_slurm_auth_create(NULL, 2, auth_info); + xfree(auth_info); } g_slurm_auth_pack(tmp_info->auth_cred, buffer); @@ -5484,7 +5483,7 @@ _unpack_composite_msg(composite_msg_t **msg, Buf buffer, int i, rc; slurm_msg_t *tmp_info; composite_msg_t *object_ptr = NULL; - char *auth = slurm_get_auth_info(); + char *auth_info = slurm_get_auth_info(); xassert(msg); object_ptr = xmalloc(sizeof(composite_msg_t)); @@ -5516,7 +5515,7 @@ _unpack_composite_msg(composite_msg_t **msg, Buf buffer, goto unpack_error; rc = g_slurm_auth_verify( - tmp_info->auth_cred, NULL, 2, auth); + tmp_info->auth_cred, NULL, 2, auth_info); if (rc != SLURM_SUCCESS) { error("authentication: %s ", @@ -5528,12 +5527,12 @@ _unpack_composite_msg(composite_msg_t **msg, Buf buffer, list_append(object_ptr->msg_list, tmp_info); } } - xfree(auth); + xfree(auth_info); return SLURM_SUCCESS; unpack_error: slurm_free_composite_msg(object_ptr); *msg = NULL; - xfree(auth); + xfree(auth_info); return SLURM_ERROR; } diff --git a/src/common/slurmdb_defs.c b/src/common/slurmdb_defs.c index 3c326e71c8d41545376ed647d17b69325aa4d532..da989358198017cd530f5f03d7e639e109d02b3b 100644 --- a/src/common/slurmdb_defs.c +++ b/src/common/slurmdb_defs.c @@ -582,8 +582,14 @@ extern slurmdb_step_rec_t *slurmdb_create_step_rec() extern slurmdb_assoc_usage_t *slurmdb_create_assoc_usage(int tres_cnt) { - slurmdb_assoc_usage_t *usage = - xmalloc(sizeof(slurmdb_assoc_usage_t)); + slurmdb_assoc_usage_t *usage; + int alloc_size; + + if (!tres_cnt) + fatal("%s: You need to give a tres_cnt to call this function", + __func__); + + usage = xmalloc(sizeof(slurmdb_assoc_usage_t)); usage->level_shares = NO_VAL; usage->shares_norm = NO_VAL64; @@ -593,13 +599,13 @@ extern slurmdb_assoc_usage_t *slurmdb_create_assoc_usage(int tres_cnt) usage->level_fs = 0; usage->fs_factor = 0; - if (tres_cnt) { - int alloc_size = sizeof(uint64_t) * tres_cnt; - usage->tres_cnt = tres_cnt; - usage->grp_used_tres = xmalloc(alloc_size); - usage->grp_used_tres_run_secs = xmalloc(alloc_size); - usage->usage_tres_raw = xmalloc(sizeof(long double) * tres_cnt); - } + usage->tres_cnt = tres_cnt; + + alloc_size = sizeof(uint64_t) * tres_cnt; + usage->grp_used_tres = xmalloc(alloc_size); + usage->grp_used_tres_run_secs = xmalloc(alloc_size); + + usage->usage_tres_raw = xmalloc(sizeof(long double) * tres_cnt); return usage; } diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c index bad03d62611fb0493377f636bbdc65608e79ad35..53f04db8a00e7f5b837256be4f8e83c6e62535a7 100644 --- a/src/common/stepd_api.c +++ b/src/common/stepd_api.c @@ -223,6 +223,7 @@ stepd_connect(const char *directory, const char *nodename, int fd = -1; int rc; void *auth_cred; + char *auth_info; Buf buffer; int len; @@ -243,7 +244,9 @@ stepd_connect(const char *directory, const char *nodename, buffer = init_buf(0); /* Create an auth credential */ - auth_cred = g_slurm_auth_create(NULL, 2, slurm_get_auth_info()); + auth_info = slurm_get_auth_info(); + auth_cred = g_slurm_auth_create(NULL, 2, auth_info); + xfree(auth_info); if (auth_cred == NULL) { error("Creating authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(NULL))); diff --git a/src/common/xlua.c b/src/common/xlua.c index 4c35cbc7ba64a85f7714f73f6823c670b4f91acf..a587226235e1cab3daf7d59dd543410d73130aa8 100644 --- a/src/common/xlua.c +++ b/src/common/xlua.c @@ -51,9 +51,11 @@ int xlua_dlopen(void) !dlopen("liblua-5.2.so", RTLD_NOW | RTLD_GLOBAL) && !dlopen("liblua5.2.so", RTLD_NOW | RTLD_GLOBAL) && !dlopen("liblua5.2.so.0", RTLD_NOW | RTLD_GLOBAL) && + !dlopen("liblua.so.5.2", RTLD_NOW | RTLD_GLOBAL) && !dlopen("liblua-5.1.so", RTLD_NOW | RTLD_GLOBAL) && !dlopen("liblua5.1.so", RTLD_NOW | RTLD_GLOBAL) && - !dlopen("liblua5.1.so.0", RTLD_NOW | RTLD_GLOBAL)) { + !dlopen("liblua5.1.so.0", RTLD_NOW | RTLD_GLOBAL) && + !dlopen("liblua.so.5.1", RTLD_NOW | RTLD_GLOBAL) ) { return error("Failed to open liblua.so: %s", dlerror()); } return SLURM_SUCCESS; diff --git a/src/common/xstring.c b/src/common/xstring.c index 9fe336251677e500603eafe673ce120a6e3c56e6..f75bdbca62c1419422032663115a3bc2a0c492d2 100644 --- a/src/common/xstring.c +++ b/src/common/xstring.c @@ -415,12 +415,14 @@ char * xstrndup(const char *str, size_t n) long int xstrntol(const char *str, char **endptr, size_t n, int base) { long int number = 0; - char new_str[n+1]; + char new_str[n+1], *new_endptr = NULL; memcpy(new_str, str, n); new_str[n] = '\0'; - number = strtol(new_str, endptr, base); + number = strtol(new_str, &new_endptr, base); + if (endptr) + *endptr = ((char *)str) + (new_endptr - new_str); return number; } diff --git a/src/database/mysql_common.c b/src/database/mysql_common.c index 967ac41dca86c52ed6a4aeeb0733227f72901d73..142a82c34bb71ff036a0e4fee06bef9575130586 100644 --- a/src/database/mysql_common.c +++ b/src/database/mysql_common.c @@ -151,17 +151,21 @@ static int _mysql_query_internal(MYSQL *db_conn, char *query) } error("mysql_query failed: %d %s\n%s", errno, err_str, query); if (errno == ER_LOCK_WAIT_TIMEOUT) { + /* FIXME: If we get ER_LOCK_WAIT_TIMEOUT here we need + * to restart the connections, but it appears restarting + * the calling program is the only way to handle this. + * If anyone in the future figures out a way to handle + * this, super. Until then we will need to restart the + * calling program if you ever get this error. + */ fatal("mysql gave ER_LOCK_WAIT_TIMEOUT as an error. " "The only way to fix this is restart the " "calling program"); + } else if (errno == ER_HOST_IS_BLOCKED) { + fatal("MySQL gave ER_HOST_IS_BLOCKED as an error. " + "You will need to call 'mysqladmin flush-hosts' " + "to regain connectivity."); } - /* FIXME: If we get ER_LOCK_WAIT_TIMEOUT here we need - * to restart the connections, but it appears restarting - * the calling program is the only way to handle this. - * If anyone in the future figures out a way to handle - * this, super. Until then we will need to restart the - * calling program if you ever get this error. - */ rc = SLURM_ERROR; } end_it: diff --git a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c index 96b38d117254e468208ce81f534e1d8a832b7e5e..ec56e2e6dff44cd7c5f3c37e2fa91c82de453806 100644 --- a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c +++ b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c @@ -1426,13 +1426,13 @@ extern int filetxt_jobacct_process_archive(slurmdb_archive_cond_t *arch_cond) } /* reopen new logfile in append mode, since slurmctld may write it */ - if (freopen(filein, "a", new_logfile) == NULL) { + if ((new_logfile = freopen(filein, "a", new_logfile)) == NULL) { perror("reopening new logfile"); goto finished2; } while (fgets(line, BUFFER_SIZE, fd)) { - if (fputs(line, new_logfile)<0) { + if (fputs(line, new_logfile) < 0) { perror("writing final records"); goto finished2; } @@ -1441,7 +1441,8 @@ extern int filetxt_jobacct_process_archive(slurmdb_archive_cond_t *arch_cond) printf("%d jobs expired.\n", list_count(exp_list)); finished2: - fclose(new_logfile); + if (new_logfile) + fclose(new_logfile); if (!file_err) { if (unlink(old_logfile_name) == -1) error("Unable to unlink old logfile %s: %m", diff --git a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c index e279f7cb2247f8b27b0ea218d4d668c234e83966..904f7432f16f160d08d666a84c440905f8bf4da9 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c @@ -410,7 +410,7 @@ static int _make_sure_users_have_default( break; } } - if (!rc != SLURM_SUCCESS) + if (rc != SLURM_SUCCESS) break; list_iterator_reset(clus_itr); } diff --git a/src/plugins/accounting_storage/mysql/as_mysql_resv.c b/src/plugins/accounting_storage/mysql/as_mysql_resv.c index 28d0649dbffedcbde4b77f8ce89946795ec5292d..bfe7b9a38e3edf07c2679394573416dd73e4bf92 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_resv.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_resv.c @@ -98,9 +98,11 @@ static int _setup_resv_limits(slurmdb_reservation_rec_t *resv, } if (resv->name) { + char *tmp_char = slurm_add_slash_to_quotes(resv->name); xstrcat(*cols, ", resv_name"); - xstrfmtcat(*vals, ", '%s'", resv->name); - xstrfmtcat(*extra, ", resv_name='%s'", resv->name); + xstrfmtcat(*vals, ", '%s'", tmp_char); + xstrfmtcat(*extra, ", resv_name='%s'", tmp_char); + xfree(tmp_char); } if (resv->nodes) { diff --git a/src/plugins/accounting_storage/mysql/as_mysql_wckey.c b/src/plugins/accounting_storage/mysql/as_mysql_wckey.c index 0495dcee31b4a07b00355d8166a7810e178acc24..8ddd686d051f35ba1280f38c0c6ea00a7cc2771f 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_wckey.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_wckey.c @@ -184,7 +184,7 @@ static int _make_sure_users_have_default( break; } } - if (!rc != SLURM_SUCCESS) + if (rc != SLURM_SUCCESS) break; list_iterator_reset(clus_itr); } diff --git a/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c b/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c index 3af815d813647624dc88909e7e419e0c817117bd..ab10d27d2d646b14e6dadf87df4b1f961e5f7110 100644 --- a/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c +++ b/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c @@ -85,9 +85,9 @@ enum { extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl); -static uint32_t _get_latest_stats(int type) +static uint64_t _get_latest_stats(int type) { - uint32_t data = 0; + uint64_t data = 0; int fd; FILE *fp = NULL; char *file_name; @@ -117,7 +117,7 @@ static uint32_t _get_latest_stats(int type) num_read = read(fd, sbuf, (sizeof(sbuf) - 1)); if (num_read > 0) { sbuf[num_read] = '\0'; - sscanf(sbuf, "%u", &data); + sscanf(sbuf, "%"PRIu64, &data); } fclose(fp); @@ -147,8 +147,8 @@ static void _get_joules_task(acct_gather_energy_t *energy) return; now = time(NULL); - curr_energy = (uint64_t) _get_latest_stats(GET_ENERGY); - curr_power = _get_latest_stats(GET_POWER); + curr_energy = _get_latest_stats(GET_ENERGY); + curr_power = (uint32_t) _get_latest_stats(GET_POWER); if (energy->previous_consumed_energy) { diff_energy = curr_energy - energy->previous_consumed_energy; diff --git a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c index 9865738c53b444084bab38f83d7db489c77d0226..ab32a0595cbee623b2ee60bb3b834d8d34859569 100644 --- a/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c +++ b/src/plugins/acct_gather_energy/ipmi/acct_gather_energy_ipmi.c @@ -834,8 +834,8 @@ static int _get_joules_task(uint16_t delta) acct_gather_energy_t *new, *old; /* sensors list */ - acct_gather_energy_t *energies; - uint16_t sensor_cnt; + acct_gather_energy_t *energies = NULL; + uint16_t sensor_cnt = 0; if (slurm_get_node_energy(NULL, delta, &sensor_cnt, &energies)) { error("_get_joules_task: can't get info from slurmd"); diff --git a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c index a3e839ee775a943ea6faffb541971190661590d9..9650e014ab964fb0dc609e19ee5d72bb7f2c2fb1 100644 --- a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c +++ b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c @@ -248,7 +248,7 @@ static void _hardware(void) if (!xstrncmp(buf, "physical id", sizeof("physical id") - 1)) { sscanf(buf, "physical id\t: %d", &pkg); - if (pkg > MAX_PKGS) + if (pkg >= MAX_PKGS) fatal("Slurm can only handle %d sockets for " "rapl, you seem to have more than that. " "Update src/plugins/acct_gather_energy/" diff --git a/src/plugins/crypto/munge/crypto_munge.c b/src/plugins/crypto/munge/crypto_munge.c index 14947c82ef5eab7bfdf9ef8ecc4929cd81784d93..8010d7cd9fb561b7114300379947bde4410b445f 100644 --- a/src/plugins/crypto/munge/crypto_munge.c +++ b/src/plugins/crypto/munge/crypto_munge.c @@ -115,17 +115,17 @@ static uid_t slurm_user = 0; static char *_auth_opts_to_socket(void) { char *socket = NULL, *sep, *tmp; - char *opts = slurm_get_auth_info(); + char *auth_info = slurm_get_auth_info(); - if (opts) { - tmp = strstr(opts, "socket="); + if (auth_info) { + tmp = strstr(auth_info, "socket="); if (tmp) { /* New format */ socket = xstrdup(tmp + 7); sep = strchr(socket, ','); if (sep) sep[0] = '\0'; } - xfree(opts); + xfree(auth_info); } return socket; diff --git a/src/plugins/launch/aprun/launch_aprun.c b/src/plugins/launch/aprun/launch_aprun.c index cb9a9b2ceeb7dde9ecd959fbed17260510bd1a7a..028eb41a469d2dafb7a75c0d844a14be7615f45c 100644 --- a/src/plugins/launch/aprun/launch_aprun.c +++ b/src/plugins/launch/aprun/launch_aprun.c @@ -316,12 +316,15 @@ static void _handle_timeout(srun_timeout_msg_t *timeout_msg) static void _handle_msg(slurm_msg_t *msg) { static uint32_t slurm_uid = NO_VAL; - uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + char *auth_info = slurm_get_auth_info(); + uid_t req_uid; uid_t uid = getuid(); job_step_kill_msg_t *ss; srun_user_msg_t *um; + req_uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info); + xfree(auth_info); + if (slurm_uid == NO_VAL) slurm_uid = slurm_get_slurm_user_id(); if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { diff --git a/src/plugins/launch/runjob/launch_runjob.c b/src/plugins/launch/runjob/launch_runjob.c index 6778c25f6a94bed75e01146d0a36ee882139cc8e..6bd21ba6caa0f87fd868c339530025071ad0362a 100644 --- a/src/plugins/launch/runjob/launch_runjob.c +++ b/src/plugins/launch/runjob/launch_runjob.c @@ -135,12 +135,15 @@ static void _handle_msg(slurm_msg_t *msg) { static uint32_t slurm_uid = NO_VAL; - uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + char *auth_info = slurm_get_auth_info(); + uid_t req_uid; uid_t uid = getuid(); job_step_kill_msg_t *ss; srun_user_msg_t *um; + req_uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info); + xfree(auth_info); + if (slurm_uid == NO_VAL) slurm_uid = slurm_get_slurm_user_id(); if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { diff --git a/src/plugins/mpi/pmi2/agent.c b/src/plugins/mpi/pmi2/agent.c index f4adf2c75ef0ef768801a9fc84871b98c26ca2f8..843d4fe7441b33395ea0e0862c9ee7fbc6f601b4 100644 --- a/src/plugins/mpi/pmi2/agent.c +++ b/src/plugins/mpi/pmi2/agent.c @@ -56,6 +56,7 @@ #include "src/common/slurm_xlator.h" #include "src/common/eio.h" +#include "src/common/macros.h" #include "src/common/slurm_mpi.h" #include "src/common/xstring.h" #include "src/slurmd/slurmstepd/slurmstepd_job.h" @@ -70,7 +71,8 @@ static int *initialized = NULL; static int *finalized = NULL; static eio_handle_t *pmi2_handle; -static volatile int _agent_running; +static volatile bool agent_running; +static pthread_mutex_t agent_mutex = PTHREAD_MUTEX_INITIALIZER; static bool _tree_listen_readable(eio_obj_t *obj); static int _tree_listen_read(eio_obj_t *obj, List objs); @@ -256,7 +258,7 @@ _handle_pmi1_init(int fd, int lrank) debug3("mpi/pmi2: in _handle_pmi1_init"); while ( (n = read(fd, buf, 64)) < 0 && errno == EINTR); - if (n < 0) { + if ((n < 0) || (n >= 64)) { error("mpi/pmi2: failed to read PMI1 init command"); return SLURM_ERROR; } @@ -304,7 +306,9 @@ _agent(void * unused) eio_obj_t *tree_listen_obj, *task_obj; int i; - _agent_running = 1; + slurm_mutex_lock(&agent_mutex); + agent_running = true; + slurm_mutex_unlock(&agent_mutex); pmi2_handle = eio_handle_create(0); @@ -330,10 +334,22 @@ _agent(void * unused) eio_handle_destroy(pmi2_handle); - _agent_running = 0; + slurm_mutex_lock(&agent_mutex); + agent_running = false; + slurm_mutex_unlock(&agent_mutex); + return NULL; } +static bool _agent_running_test(void) +{ + bool rc; + slurm_mutex_lock(&agent_mutex); + rc = agent_running; + slurm_mutex_unlock(&agent_mutex); + return rc; +} + /* * start the PMI2 agent thread */ @@ -360,7 +376,7 @@ pmi2_start_agent(void) (unsigned long) pmi2_agent_tid); /* wait for the agent to start */ - while (!_agent_running) { + while (!_agent_running_test()) { sched_yield(); } @@ -377,7 +393,7 @@ pmi2_stop_agent(void) if (pmi2_handle != NULL) { eio_signal_shutdown(pmi2_handle); /* wait for the agent to finish */ - while (_agent_running ) { + while (_agent_running_test()) { sched_yield(); } } diff --git a/src/plugins/mpi/pmi2/info.c b/src/plugins/mpi/pmi2/info.c index 8c9dbfef23e074932620f43d36f0c8b4c775a751..459485d8065482a72f5e5f520681a68696b9df5f 100644 --- a/src/plugins/mpi/pmi2/info.c +++ b/src/plugins/mpi/pmi2/info.c @@ -84,7 +84,7 @@ enqueue_nag_req(int fd, int rank, char *key) req = xmalloc(sizeof(nag_req_t)); req->fd = fd; req->rank = rank; - strncpy(req->key, key, PMI2_MAX_KEYLEN); + strncpy(req->key, key, (PMI2_MAX_KEYLEN - 1)); /* Insure NULL at end */ /* insert in the head */ req->next = nag_req_list; diff --git a/src/plugins/mpi/pmi2/spawn.c b/src/plugins/mpi/pmi2/spawn.c index 1bca9c5eed73176848beb992ca6803cbddca1755..8b81787872da0864fb9dcc56c8eff8c5a47722ff 100644 --- a/src/plugins/mpi/pmi2/spawn.c +++ b/src/plugins/mpi/pmi2/spawn.c @@ -153,8 +153,10 @@ spawn_req_pack(spawn_req_t *req, Buf buf) int i, j; spawn_subcmd_t *subcmd; void *auth_cred; + char *auth_info = slurm_get_auth_info(); - auth_cred = g_slurm_auth_create(NULL, 2, slurm_get_auth_info()); + auth_cred = g_slurm_auth_create(NULL, 2, auth_info); + xfree(auth_info); if (auth_cred == NULL) { error("authentication: %s", g_slurm_auth_errstr(g_slurm_auth_errno(NULL)) ); @@ -196,6 +198,7 @@ spawn_req_unpack(spawn_req_t **req_ptr, Buf buf) uint32_t temp32; int i, j; void *auth_cred; + char *auth_info; uid_t auth_uid, my_uid; auth_cred = g_slurm_auth_unpack(buf); @@ -204,7 +207,9 @@ spawn_req_unpack(spawn_req_t **req_ptr, Buf buf) g_slurm_auth_errstr(g_slurm_auth_errno(NULL)) ); return SLURM_ERROR; } - auth_uid = g_slurm_auth_get_uid(auth_cred, slurm_get_auth_info()); + auth_info = slurm_get_auth_info(); + auth_uid = g_slurm_auth_get_uid(auth_cred, auth_info); + xfree(auth_info); (void) g_slurm_auth_destroy(auth_cred); my_uid = getuid(); if ((auth_uid != 0) && (auth_uid != my_uid)) { diff --git a/src/plugins/mpi/pmix/mpi_pmix.c b/src/plugins/mpi/pmix/mpi_pmix.c index e3f29c71189e75b709aa4e5ae3af745f09a31ac3..0f5bb97f35fb51d82e72c6c60896fe047ebed545 100644 --- a/src/plugins/mpi/pmix/mpi_pmix.c +++ b/src/plugins/mpi/pmix/mpi_pmix.c @@ -89,6 +89,9 @@ int p_mpi_hook_slurmstepd_prefork(const stepd_step_rec_t *job, char ***env) pmixp_debug_hang(0); PMIXP_DEBUG("start"); + if (job->batch) + return SLURM_SUCCESS; + if (SLURM_SUCCESS != (ret = pmixp_stepd_init(job, env))) { PMIXP_ERROR("pmixp_stepd_init() failed"); goto err_ext; diff --git a/src/plugins/node_features/knl_cray/node_features_knl_cray.c b/src/plugins/node_features/knl_cray/node_features_knl_cray.c index df2ebe1686d3caa8e165f94c25fbbf671604c875..75a23d868da013139b0bb124e30b2720338a494f 100644 --- a/src/plugins/node_features/knl_cray/node_features_knl_cray.c +++ b/src/plugins/node_features/knl_cray/node_features_knl_cray.c @@ -56,6 +56,7 @@ #include "slurm/slurm.h" #include "src/common/assoc_mgr.h" +#include "src/common/bitstring.h" #include "src/common/fd.h" #include "src/common/gres.h" #include "src/common/list.h" @@ -88,8 +89,8 @@ #define KNL_QUAD 0x0010 #define KNL_MCDRAM_FLAG 0xff00 #define KNL_CACHE 0x0100 -#define KNL_SPLIT 0x0200 -#define KNL_EQUAL 0x0400 +#define KNL_EQUAL 0x0200 +#define KNL_SPLIT 0x0400 #define KNL_FLAT 0x0800 /* These are defined here so when we link with something other than @@ -135,6 +136,7 @@ const uint32_t plugin_version = SLURM_VERSION_NUMBER; static char *capmc_path = NULL; static uint32_t capmc_poll_freq = 45; /* capmc state polling frequency */ static uint32_t capmc_timeout = 0; /* capmc command timeout in msec */ +static char *cnselect_path = NULL; static bool debug_flag = false; static uint16_t allow_mcdram = KNL_MCDRAM_FLAG; static uint16_t allow_numa = KNL_NUMA_FLAG; @@ -159,6 +161,7 @@ static s_p_options_t knl_conf_file_options[] = { {"CapmcPath", S_P_STRING}, {"CapmcPollFreq", S_P_UINT32}, {"CapmcTimeout", S_P_UINT32}, + {"CnselectPath", S_P_STRING}, {"DefaultMCDRAM", S_P_STRING}, {"DefaultNUMA", S_P_STRING}, {"LogFile", S_P_STRING}, @@ -179,6 +182,13 @@ typedef struct mcdram_cfg { uint16_t mcdram_pct; } mcdram_cfg_t; +typedef struct mcdram_cfg2 { + int hbm_pct; + char *mcdram_cfg; + char *nid_str; + bitstr_t *node_bitmap; +} mcdram_cfg2_t; + typedef struct numa_cap { uint32_t nid; char *numa_cfg; @@ -189,6 +199,12 @@ typedef struct numa_cfg { char *numa_cfg; } numa_cfg_t; +typedef struct numa_cfg2 { + char *nid_str; + bitstr_t *node_bitmap; + char *numa_cfg; +} numa_cfg2_t; + static s_p_hashtbl_t *_config_make_tbl(char *filename); static void _free_script_argv(char **script_argv); static mcdram_cap_t *_json_parse_mcdram_cap_array(json_object *jobj, char *key, @@ -199,9 +215,9 @@ static void _json_parse_mcdram_cap_object(json_object *jobj, mcdram_cap_t *ent); static void _json_parse_mcdram_cfg_object(json_object *jobj, mcdram_cfg_t *ent); static numa_cap_t *_json_parse_numa_cap_array(json_object *jobj, char *key, int *num); +static void _json_parse_numa_cap_object(json_object *jobj, numa_cap_t *ent); static numa_cfg_t *_json_parse_numa_cfg_array(json_object *jobj, char *key, int *num); -static void _json_parse_numa_cap_object(json_object *jobj, numa_cap_t *ent); static void _json_parse_numa_cfg_object(json_object *jobj, numa_cfg_t *ent); static int _knl_mcdram_bits_cnt(uint16_t mcdram_num); static uint16_t _knl_mcdram_parse(char *mcdram_str, char *sep); @@ -211,17 +227,24 @@ static int _knl_numa_bits_cnt(uint16_t numa_num); static uint16_t _knl_numa_parse(char *numa_str, char *sep); static char *_knl_numa_str(uint16_t numa_num); static uint16_t _knl_numa_token(char *token); +static mcdram_cfg2_t *_load_current_mcdram(int *num); +static numa_cfg2_t *_load_current_numa(int *num); +static char *_load_mcdram_type(int hbm_pct); +static char *_load_numa_type(char *type); static void _log_script_argv(char **script_argv, char *resp_msg); static void _mcdram_cap_free(mcdram_cap_t *mcdram_cap, int mcdram_cap_cnt); static void _mcdram_cap_log(mcdram_cap_t *mcdram_cap, int mcdram_cap_cnt); static void _mcdram_cfg_free(mcdram_cfg_t *mcdram_cfg, int mcdram_cfg_cnt); +static void _mcdram_cfg2_free(mcdram_cfg2_t *mcdram_cfg2, int mcdram_cfg2_cnt); static void _mcdram_cfg_log(mcdram_cfg_t *mcdram_cfg, int mcdram_cfg_cnt); static void _merge_strings(char **node_features, char *node_cfg, uint16_t allow_types); static void _numa_cap_free(numa_cap_t *numa_cap, int numa_cap_cnt); static void _numa_cap_log(numa_cap_t *numa_cap, int numa_cap_cnt); static void _numa_cfg_free(numa_cfg_t *numa_cfg, int numa_cfg_cnt); +static void _numa_cfg2_free(numa_cfg2_t *numa_cfg, int numa_cfg2_cnt); static void _numa_cfg_log(numa_cfg_t *numa_cfg, int numa_cfg_cnt); +static void _numa_cfg2_log(numa_cfg2_t *numa_cfg, int numa_cfg2_cnt); static uint64_t _parse_size(char *size_str); static char *_run_script(char *cmd_path, char **script_argv, int *status); static void _strip_knl_opts(char **features); @@ -690,6 +713,133 @@ static mcdram_cap_t *_json_parse_mcdram_cap_array(json_object *jobj, char *key, return ents; } +/* Return NID string for all nodes with specified MCDRAM mode (HBM percentage). + * NOTE: Information not returned for nodes which are not up + * NOTE: xfree() the return value. */ +static char *_load_mcdram_type(int hbm_pct) +{ + char **script_argv, *resp_msg; + int i, status = 0; + DEF_TIMERS; + + if (hbm_pct < 0) /* Unsupported configuration on this system */ + return NULL; + script_argv = xmalloc(sizeof(char *) * 4); /* NULL terminated */ + script_argv[0] = xstrdup("cnselect"); + script_argv[1] = xstrdup("-e"); + xstrfmtcat(script_argv[2], "hbmcachepct.eq.%d", hbm_pct); + START_TIMER; + resp_msg = _run_script(cnselect_path, script_argv, &status); + END_TIMER; + if (debug_flag) { + info("%s: %s %s %s ran for %s", __func__, + script_argv[0], script_argv[1], script_argv[2], TIME_STR); + } + if (resp_msg == NULL) { + debug("%s: %s %s %s returned no information", + __func__, script_argv[0], script_argv[1], script_argv[2]); + } else { + i = strlen(resp_msg); + if (resp_msg[i-1] == '\n') + resp_msg[i-1] = '\0'; + } + _log_script_argv(script_argv, resp_msg); + _free_script_argv(script_argv); + if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) { + error("%s: %s %s %s status:%u response:%s", __func__, + script_argv[0], script_argv[1], script_argv[2], + status, resp_msg); + } + return resp_msg; +} + +/* Return table of MCDRAM modes and NID string identifying nodes with that mode. + * Use _mcdram_cfg2_free() to release returned data structure */ +static mcdram_cfg2_t *_load_current_mcdram(int *num) +{ + mcdram_cfg2_t *mcdram_cfg; + int i; + + mcdram_cfg = xmalloc(sizeof(mcdram_cfg2_t) * 4); + + for (i = 0; i < 4; i++) { + mcdram_cfg[i].hbm_pct = mcdram_pct[i]; + mcdram_cfg[i].mcdram_cfg = _knl_mcdram_str(KNL_CACHE << i); + mcdram_cfg[i].nid_str = _load_mcdram_type(mcdram_cfg[i].hbm_pct); + if (mcdram_cfg[i].nid_str && mcdram_cfg[i].nid_str[0]) { + mcdram_cfg[i].node_bitmap = bit_alloc(100000); + (void) bit_unfmt(mcdram_cfg[i].node_bitmap, + mcdram_cfg[i].nid_str); + } + } + *num = 4; + return mcdram_cfg; +} + +/* Return NID string for all nodes with specified NUMA mode. + * NOTE: Information not returned for nodes which are not up + * NOTE: xfree() the return value. */ +static char *_load_numa_type(char *type) +{ + char **script_argv, *resp_msg; + int i, status = 0; + DEF_TIMERS; + + script_argv = xmalloc(sizeof(char *) * 4); /* NULL terminated */ + script_argv[0] = xstrdup("cnselect"); + script_argv[1] = xstrdup("-e"); + xstrfmtcat(script_argv[2], "numa_cfg.eq.%s", type); + START_TIMER; + resp_msg = _run_script(cnselect_path, script_argv, &status); + END_TIMER; + if (debug_flag) { + info("%s: %s %s %s ran for %s", __func__, + script_argv[0], script_argv[1], script_argv[2], TIME_STR); + } + if (resp_msg == NULL) { + debug("%s: %s %s %s returned no information", + __func__, script_argv[0], script_argv[1], script_argv[2]); + } else { + i = strlen(resp_msg); + if (resp_msg[i-1] == '\n') + resp_msg[i-1] = '\0'; + } + _log_script_argv(script_argv, resp_msg); + _free_script_argv(script_argv); + if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) { + error("%s: %s %s %s status:%u response:%s", __func__, + script_argv[0], script_argv[1], script_argv[2], + status, resp_msg); + } + return resp_msg; +} + +/* Return table of NUMA modes and NID string identifying nodes with that mode. + * Use _numa_cfg2_free() to release returned data structure */ +static numa_cfg2_t *_load_current_numa(int *num) +{ + numa_cfg2_t *numa_cfg2; + int i; + + numa_cfg2 = xmalloc(sizeof(numa_cfg2_t) * 5); + numa_cfg2[0].numa_cfg = xstrdup("a2a"); + numa_cfg2[1].numa_cfg = xstrdup("snc2"); + numa_cfg2[2].numa_cfg = xstrdup("snc4"); + numa_cfg2[3].numa_cfg = xstrdup("hemi"); + numa_cfg2[4].numa_cfg = xstrdup("quad"); + + for (i = 0; i < 5; i++) { + numa_cfg2[i].nid_str = _load_numa_type(numa_cfg2[i].numa_cfg); + if (numa_cfg2[i].nid_str && numa_cfg2[i].nid_str[0]) { + numa_cfg2[i].node_bitmap = bit_alloc(100000); + (void) bit_unfmt(numa_cfg2[i].node_bitmap, + numa_cfg2[i].nid_str); + } + } + *num = 5; + return numa_cfg2; +} + static mcdram_cfg_t *_json_parse_mcdram_cfg_array(json_object *jobj, char *key, int *num) { @@ -812,6 +962,20 @@ static void _mcdram_cfg_free(mcdram_cfg_t *mcdram_cfg, int mcdram_cfg_cnt) xfree(mcdram_cfg); } +static void _mcdram_cfg2_free(mcdram_cfg2_t *mcdram_cfg2, int mcdram_cfg2_cnt) +{ + int i; + + if (!mcdram_cfg2) + return; + for (i = 0; i < mcdram_cfg2_cnt; i++) { + xfree(mcdram_cfg2[i].mcdram_cfg); + FREE_NULL_BITMAP(mcdram_cfg2[i].node_bitmap); + xfree(mcdram_cfg2[i].nid_str); + } + xfree(mcdram_cfg2); +} + static void _mcdram_cfg_log(mcdram_cfg_t *mcdram_cfg, int mcdram_cfg_cnt) { int i; @@ -826,6 +990,19 @@ static void _mcdram_cfg_log(mcdram_cfg_t *mcdram_cfg, int mcdram_cfg_cnt) } } +static void _mcdram_cfg2_log(mcdram_cfg2_t *mcdram_cfg2, int mcdram_cfg2_cnt) +{ + int i; + + if (!mcdram_cfg2) + return; + for (i = 0; i < mcdram_cfg2_cnt; i++) { + info("MCDRAM_CFG[%d]: nid_str:%s mcdram_cfg:%s hbm_pct:%d", + i, mcdram_cfg2[i].nid_str, mcdram_cfg2[i].mcdram_cfg, + mcdram_cfg2[i].hbm_pct); + } +} + static void _numa_cap_free(numa_cap_t *numa_cap, int numa_cap_cnt) { int i; @@ -862,6 +1039,20 @@ static void _numa_cfg_free(numa_cfg_t *numa_cfg, int numa_cfg_cnt) xfree(numa_cfg); } +static void _numa_cfg2_free(numa_cfg2_t *numa_cfg2, int numa_cfg2_cnt) +{ + int i; + + if (!numa_cfg2) + return; + for (i = 0; i < numa_cfg2_cnt; i++) { + xfree(numa_cfg2[i].nid_str); + xfree(numa_cfg2[i].numa_cfg); + FREE_NULL_BITMAP(numa_cfg2[i].node_bitmap); + } + xfree(numa_cfg2); +} + static void _numa_cfg_log(numa_cfg_t *numa_cfg, int numa_cfg_cnt) { int i; @@ -874,6 +1065,18 @@ static void _numa_cfg_log(numa_cfg_t *numa_cfg, int numa_cfg_cnt) } } +static void _numa_cfg2_log(numa_cfg2_t *numa_cfg2, int numa_cfg2_cnt) +{ + int i; + + if (!numa_cfg2) + return; + for (i = 0; i < numa_cfg2_cnt; i++) { + info("NUMA_CFG[%d]: nid_str:%s numa_cfg:%s", + i, numa_cfg2[i].nid_str, numa_cfg2[i].numa_cfg); + } +} + /* Run a script and return its stdout plus exit status */ static char *_run_script(char *cmd_path, char **script_argv, int *status) { @@ -1100,7 +1303,7 @@ static void _update_all_node_features( if (numa_cfg) { for (i = 0; i < numa_cfg_cnt; i++) { snprintf(node_name, sizeof(node_name), - "%s%.*d", prefix, width, numa_cfg[i].nid); + "%s%.*u", prefix, width, numa_cfg[i].nid); node_ptr = find_node_record(node_name); if (node_ptr) { _merge_strings(&node_ptr->features_act, @@ -1286,6 +1489,7 @@ extern int init(void) (void) s_p_get_string(&capmc_path, "CapmcPath", tbl); (void) s_p_get_uint32(&capmc_poll_freq, "CapmcPollFreq", tbl); (void) s_p_get_uint32(&capmc_timeout, "CapmcTimeout", tbl); + (void) s_p_get_string(&cnselect_path, "CnselectPath", tbl); if (s_p_get_string(&tmp_str, "DefaultMCDRAM", tbl)) { default_mcdram = _knl_mcdram_parse(tmp_str, ","); if (_knl_mcdram_bits_cnt(default_mcdram) != 1) { @@ -1311,6 +1515,8 @@ extern int init(void) if (!capmc_path) capmc_path = xstrdup("/opt/cray/capmc/default/bin/capmc"); capmc_timeout = MAX(capmc_timeout, 500); + if (!cnselect_path) + cnselect_path = xstrdup("/opt/cray/sdb/default/bin/cnselect"); if (!syscfg_path) verbose("SyscfgPath is not configured"); @@ -1329,6 +1535,7 @@ extern int init(void) info("CapmcPath=%s", capmc_path); info("CapmcPollFreq=%u sec", capmc_poll_freq); info("CapmcTimeout=%u msec", capmc_timeout); + info("CnselectPath=%s", cnselect_path); info("DefaultMCDRAM=%s DefaultNUMA=%s", default_mcdram_str, default_numa_str); info("SyscfgPath=%s", syscfg_path); @@ -1349,6 +1556,7 @@ extern int fini(void) xfree(allowed_uid); allowed_uid_cnt = 0; xfree(capmc_path); + xfree(cnselect_path); capmc_timeout = 0; debug_flag = false; xfree(mcdram_per_node); @@ -1371,15 +1579,17 @@ extern int node_features_p_get_node(char *node_list) { json_object *j; json_object_iter iter; - int i, status = 0, rc = SLURM_SUCCESS; + int i, k, status = 0, rc = SLURM_SUCCESS; DEF_TIMERS; char *resp_msg, **script_argv; mcdram_cap_t *mcdram_cap = NULL; mcdram_cfg_t *mcdram_cfg = NULL; + mcdram_cfg2_t *mcdram_cfg2 = NULL; numa_cap_t *numa_cap = NULL; numa_cfg_t *numa_cfg = NULL; - int mcdram_cap_cnt = 0, mcdram_cfg_cnt = 0; - int numa_cap_cnt = 0, numa_cfg_cnt = 0; + numa_cfg2_t *numa_cfg2 = NULL; + int mcdram_cap_cnt = 0, mcdram_cfg_cnt = 0, mcdram_cfg2_cnt = 0; + int numa_cap_cnt = 0, numa_cfg_cnt = 0, numa_cfg2_cnt = 0; struct node_record *node_ptr; hostlist_t host_list; char *node_name; @@ -1476,6 +1686,8 @@ extern int node_features_p_get_node(char *node_list) } json_object_put(j); /* Frees json memory */ + mcdram_cfg2 = _load_current_mcdram(&mcdram_cfg2_cnt); + /* * Load available NUMA capabilities */ @@ -1559,11 +1771,54 @@ extern int node_features_p_get_node(char *node_list) } json_object_put(j); /* Frees json memory */ + numa_cfg2 = _load_current_numa(&numa_cfg2_cnt); + if (debug_flag) { _mcdram_cap_log(mcdram_cap, mcdram_cap_cnt); _mcdram_cfg_log(mcdram_cfg, mcdram_cfg_cnt); + _mcdram_cfg2_log(mcdram_cfg2, mcdram_cfg2_cnt); _numa_cap_log(numa_cap, numa_cap_cnt); _numa_cfg_log(numa_cfg, numa_cfg_cnt); + _numa_cfg2_log(numa_cfg2, numa_cfg2_cnt); + } + for (i = 0; i < mcdram_cfg_cnt; i++) { + for (k = 0; k < mcdram_cfg2_cnt; k++) { + if (!mcdram_cfg2[k].node_bitmap || + !bit_test(mcdram_cfg2[k].node_bitmap, + mcdram_cfg[i].nid)) + continue; + if (mcdram_cfg[i].mcdram_pct != + mcdram_cfg2[k].hbm_pct) { + debug("%s: HBM mismatch between capmc and cnselect for nid %u (%u != %d)", + __func__, mcdram_cfg[i].nid, + mcdram_cfg[i].mcdram_pct, + mcdram_cfg2[k].hbm_pct); + mcdram_cfg[i].mcdram_pct=mcdram_cfg2[k].hbm_pct; + xfree(mcdram_cfg[i].mcdram_cfg); + mcdram_cfg[i].mcdram_cfg = + xstrdup(mcdram_cfg2[k].mcdram_cfg); + } + break; + } + } + for (i = 0; i < numa_cfg_cnt; i++) { + for (k = 0; k < numa_cfg2_cnt; k++) { + if (!numa_cfg2[k].node_bitmap || + !bit_test(numa_cfg2[k].node_bitmap, + numa_cfg[i].nid)) + continue; + if (xstrcmp(numa_cfg[i].numa_cfg, + numa_cfg2[k].numa_cfg)) { + debug("%s: NUMA mismatch between capmc and cnselect for nid %u (%s != %s)", + __func__, numa_cfg[i].nid, + numa_cfg[i].numa_cfg, + numa_cfg2[k].numa_cfg); + xfree(numa_cfg[i].numa_cfg); + numa_cfg[i].numa_cfg = + xstrdup(numa_cfg2[k].numa_cfg); + } + break; + } } START_TIMER; @@ -1609,8 +1864,10 @@ extern int node_features_p_get_node(char *node_list) fini: _mcdram_cap_free(mcdram_cap, mcdram_cap_cnt); _mcdram_cfg_free(mcdram_cfg, mcdram_cfg_cnt); + _mcdram_cfg2_free(mcdram_cfg2, mcdram_cfg2_cnt); _numa_cap_free(numa_cap, numa_cap_cnt); _numa_cfg_free(numa_cfg, numa_cfg_cnt); + _numa_cfg2_free(numa_cfg2, numa_cfg2_cnt); return rc; } @@ -1619,6 +1876,8 @@ fini: _mcdram_cap_free(mcdram_cap, mcdram_cap_cnt); * avail_modes IN/OUT - append available modes, must be xfreed * current_mode IN/OUT - append current modes, must be xfreed * + * NOTE: Not applicable on Cray systems; can be used on other systems. + * * NOTES about syscfg (from Intel): * To display the BIOS Parameters: * >> syscfg /d biossettings <"BIOS variable Name"> @@ -1629,6 +1888,7 @@ fini: _mcdram_cap_free(mcdram_cap, mcdram_cap_cnt); */ extern void node_features_p_node_state(char **avail_modes, char **current_mode) { +#if 0 char *avail_states = NULL, *cur_state = NULL; char *resp_msg, *argv[10], *avail_sep = "", *cur_sep = "", *tok; int status = 0; @@ -1752,6 +2012,7 @@ extern void node_features_p_node_state(char **avail_modes, char **current_mode) } else { *current_mode = cur_state; } +#endif } /* Test if a job's feature specification is valid */ @@ -1779,6 +2040,18 @@ extern int node_features_p_job_valid(char *job_features) if (numa_cnt > 1) /* Multiple NUMA options */ return ESLURM_INVALID_KNL; + /* snc4 only allowed with cache today due to invalid config information + * reported by kernel to hwloc, then to Slurm */ + if (!job_numa) { + job_numa = default_numa; + } + if (!job_mcdram) { + job_mcdram = default_mcdram; + } + if (job_numa == KNL_SNC4 && job_mcdram != KNL_CACHE) { + return ESLURM_INVALID_KNL; + } + return SLURM_SUCCESS; } diff --git a/src/plugins/priority/multifactor/priority_multifactor.c b/src/plugins/priority/multifactor/priority_multifactor.c index 617303a158e1c621f30c7dd510c1f2fef0315648..5843b6d788d5ccd3bcd173d5fc30ffa1f3897ebe 100644 --- a/src/plugins/priority/multifactor/priority_multifactor.c +++ b/src/plugins/priority/multifactor/priority_multifactor.c @@ -445,8 +445,7 @@ static int _set_children_usage_efctv(List children_list) */ static double _get_fairshare_priority(struct job_record *job_ptr) { - slurmdb_assoc_rec_t *job_assoc = - (slurmdb_assoc_rec_t *)job_ptr->assoc_ptr; + slurmdb_assoc_rec_t *job_assoc; slurmdb_assoc_rec_t *fs_assoc = NULL; double priority_fs = 0.0; assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, @@ -455,7 +454,12 @@ static double _get_fairshare_priority(struct job_record *job_ptr) if (!calc_fairshare) return 0; + assoc_mgr_lock(&locks); + + job_assoc = (slurmdb_assoc_rec_t *)job_ptr->assoc_ptr; + if (!job_assoc) { + assoc_mgr_unlock(&locks); error("Job %u has no association. Unable to " "compute fairshare.", job_ptr->job_id); return 0; @@ -467,8 +471,6 @@ static double _get_fairshare_priority(struct job_record *job_ptr) else fs_assoc = job_assoc; - assoc_mgr_lock(&locks); - if (fuzzy_equal(fs_assoc->usage->usage_efctv, NO_VAL)) priority_p_set_assoc_usage(fs_assoc); diff --git a/src/plugins/proctrack/cray/proctrack_cray.c b/src/plugins/proctrack/cray/proctrack_cray.c index 838029f9034a96483a3c3ab7dd57134283dacb4c..e1c2d7aac41ca22cdc849c30079dcebdea91af3a 100644 --- a/src/plugins/proctrack/cray/proctrack_cray.c +++ b/src/plugins/proctrack/cray/proctrack_cray.c @@ -85,22 +85,25 @@ static void *_create_container_thread(void *args) { stepd_step_rec_t *job = (stepd_step_rec_t *)args; - if ((job->cont_id = (uint64_t)job_create(0, job->uid, 0)) - == (jid_t)-1) { - error ("Failed to create job container: %m"); - return NULL; - } + job->cont_id = (uint64_t)job_create(0, job->uid, 0); /* Signal the container_create we are done */ slurm_mutex_lock(¬ify_mutex); + + /* We need to signal failure or not */ pthread_cond_signal(¬ify); + /* Don't unlock the notify_mutex here, wait, it is not needed * and can cause deadlock if done. */ - /* Wait around for something else to be added and then exit - when that takes place. - */ - pthread_cond_wait(¬ify, ¬ify_mutex); + if (job->cont_id == (jid_t)-1) + error("Failed to create job container: %m"); + else + /* Wait around for something else to be added and then exit + when that takes place. + */ + pthread_cond_wait(¬ify, ¬ify_mutex); + slurm_mutex_unlock(¬ify_mutex); return NULL; @@ -187,10 +190,10 @@ extern int proctrack_p_create(stepd_step_rec_t *job) pthread_cond_wait(¬ify, ¬ify_mutex); slurm_mutex_unlock(¬ify_mutex); slurm_mutex_unlock(&thread_mutex); - - debug("proctrack_p_create: created jid " - "0x%08lx thread 0x%08lx", - job->cont_id, threadid); + if (job->cont_id != (jid_t)-1) + debug("proctrack_p_create: created jid " + "0x%08lx thread 0x%08lx", + job->cont_id, threadid); } else error("proctrack_p_create: already have a cont_id"); diff --git a/src/plugins/proctrack/linuxproc/kill_tree.c b/src/plugins/proctrack/linuxproc/kill_tree.c index 83d792afef096f0e1679329277ac8d7f01c9f1db..48dac22790a6a842b2acaf3240cea266c9696d53 100644 --- a/src/plugins/proctrack/linuxproc/kill_tree.c +++ b/src/plugins/proctrack/linuxproc/kill_tree.c @@ -124,7 +124,8 @@ static void _push_to_hashtbl(pid_t ppid, pid_t pid, static int _get_myname(char *s) { - char path[PATH_MAX], rbuf[1024]; + char path[PATH_MAX], *rbuf; + ssize_t buf_used; int fd; sprintf(path, "/proc/%ld/stat", (long)getpid()); @@ -132,16 +133,21 @@ static int _get_myname(char *s) error("Cannot open /proc/getpid()/stat"); return -1; } - if (read(fd, rbuf, 1024) <= 0) { + rbuf = xmalloc(4096); + buf_used = read(fd, rbuf, 4096); + if ((buf_used <= 0) || (buf_used >= 4096)) { error("Cannot read /proc/getpid()/stat"); + xfree(rbuf); close(fd); return -1; } close(fd); if (sscanf(rbuf, "%*d %s ", s) != 1) { error("Cannot get the command name from /proc/getpid()/stat"); + xfree(rbuf); return -1; } + xfree(rbuf); return 0; } @@ -149,7 +155,8 @@ static xppid_t **_build_hashtbl(void) { DIR *dir; struct dirent *de; - char path[PATH_MAX], *endptr, *num, rbuf[1024]; + char path[PATH_MAX], *endptr, *num, *rbuf; + ssize_t buf_used; char myname[1024], cmd[1024]; char state; int fd; @@ -167,6 +174,7 @@ static xppid_t **_build_hashtbl(void) hashtbl = (xppid_t **)xmalloc(HASH_LEN * sizeof(xppid_t *)); slurm_seterrno(0); + rbuf = xmalloc(4096); while ((de = readdir(dir)) != NULL) { num = de->d_name; if ((num[0] < '0') || (num[0] > '9')) @@ -183,7 +191,8 @@ static xppid_t **_build_hashtbl(void) if ((fd = open(path, O_RDONLY)) < 0) { continue; } - if (read(fd, rbuf, 1024) <= 0) { + buf_used = read(fd, rbuf, 4096); + if ((buf_used <= 0) || (buf_used >= 4096)) { close(fd); continue; } @@ -202,6 +211,7 @@ static xppid_t **_build_hashtbl(void) _push_to_hashtbl((pid_t)ppid, (pid_t)pid, xstrcmp(myname, cmd), cmd, hashtbl); } + xfree(rbuf); closedir(dir); return hashtbl; } @@ -317,39 +327,49 @@ extern int kill_proc_tree(pid_t top, int sig) */ extern pid_t find_ancestor(pid_t process, char *process_name) { - char path[PATH_MAX], rbuf[1024]; + char path[PATH_MAX], *rbuf; + ssize_t buf_used; int fd; long pid, ppid; + rbuf = xmalloc_nz(4096); pid = ppid = (long)process; do { if (ppid <= 1) { - return 0; + pid = 0; + break; } sprintf(path, "/proc/%ld/stat", ppid); if ((fd = open(path, O_RDONLY)) < 0) { - return 0; + pid = 0; + break; } - if (read(fd, rbuf, 1024) <= 0) { + memset(rbuf, 0, 4096); + buf_used = read(fd, rbuf, 4096); + if ((buf_used <= 0) || (buf_used >= 4096)) { close(fd); - return 0; + pid = 0; + break; } close(fd); if (sscanf(rbuf, "%ld %*s %*s %ld", &pid, &ppid) != 2) { - return 0; + pid = 0; + break; } sprintf(path, "/proc/%ld/cmdline", pid); if ((fd = open(path, O_RDONLY)) < 0) { continue; } - if (read(fd, rbuf, 1024) <= 0) { + buf_used = read(fd, rbuf, 4096); + if ((buf_used <= 0) || (buf_used >= 4096)) { close(fd); continue; } close(fd); } while (!strstr(rbuf, process_name)); + xfree(rbuf); return pid; } diff --git a/src/plugins/proctrack/pgid/proctrack_pgid.c b/src/plugins/proctrack/pgid/proctrack_pgid.c index 869136b165523aa7b39a5dc1cf853a2487170b02..196db6168d027f4691cb3d6c2c8ca426e3fbf0c2 100644 --- a/src/plugins/proctrack/pgid/proctrack_pgid.c +++ b/src/plugins/proctrack/pgid/proctrack_pgid.c @@ -196,7 +196,8 @@ proctrack_p_get_pids(uint64_t cont_id, pid_t **pids, int *npids) { DIR *dir; struct dirent *de; - char path[PATH_MAX], *endptr, *num, rbuf[1024]; + char path[PATH_MAX], *endptr, *num, *rbuf; + ssize_t buf_used; char cmd[1024]; char state; int fd, rc = SLURM_SUCCESS; @@ -209,6 +210,7 @@ proctrack_p_get_pids(uint64_t cont_id, pid_t **pids, int *npids) rc = SLURM_ERROR; goto fini; } + rbuf = xmalloc(4096); while ((de = readdir(dir)) != NULL) { num = de->d_name; if ((num[0] < '0') || (num[0] > '9')) @@ -223,7 +225,8 @@ proctrack_p_get_pids(uint64_t cont_id, pid_t **pids, int *npids) if ((fd = open(path, O_RDONLY)) < 0) { continue; } - if (read(fd, rbuf, 1024) <= 0) { + buf_used = read(fd, rbuf, 4096); + if ((buf_used <= 0) || (buf_used >= 4096)) { close(fd); continue; } @@ -243,6 +246,7 @@ proctrack_p_get_pids(uint64_t cont_id, pid_t **pids, int *npids) xrealloc(pid_array, sizeof(pid_t) * (pid_count + 1)); pid_array[pid_count++] = pid; } + xfree(rbuf); closedir(dir); fini: *pids = pid_array; diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index 9167352c7f430a3cb0e0947888fc3f15dd771c52..95f3fb66735a68a495694103e21b9999d4e96089 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -1345,6 +1345,11 @@ next_task: continue; if (!avail_front_end(job_ptr)) continue; /* No available frontend */ + if (!job_independent(job_ptr, 0)) { + /* No longer independent + * (e.g. another singleton started) */ + continue; + } job_ptr->time_limit = save_time_limit; job_ptr->part_ptr = part_ptr; @@ -1503,13 +1508,16 @@ next_task: } if ((job_ptr->start_time <= now) && ((bb = bb_g_job_test_stage_in(job_ptr, true)) != 1)) { - xfree(job_ptr->state_desc); - if (bb == -1) { + if (job_ptr->state_reason != WAIT_NO_REASON) { + ; + } else if (bb == -1) { + xfree(job_ptr->state_desc); job_ptr->state_reason = WAIT_BURST_BUFFER_RESOURCE; job_ptr->start_time = bb_g_job_get_est_start(job_ptr); } else { /* bb == 0 */ + xfree(job_ptr->state_desc); job_ptr->state_reason=WAIT_BURST_BUFFER_STAGING; job_ptr->start_time = now + 1; } diff --git a/src/plugins/select/alps/select_alps.c b/src/plugins/select/alps/select_alps.c index 68c5330164e30d0ee3c72935936386ad1a9f154c..43cdd2e8d0448dc05a86a70d0f4b6b67e51c73a1 100644 --- a/src/plugins/select/alps/select_alps.c +++ b/src/plugins/select/alps/select_alps.c @@ -437,6 +437,11 @@ extern int select_p_job_signal(struct job_record *job_ptr, int signal) return other_job_signal(job_ptr, signal); } +extern int select_p_job_mem_confirm(struct job_record *job_ptr) +{ + return SLURM_SUCCESS; +} + extern int select_p_job_fini(struct job_record *job_ptr) { if (job_ptr == NULL) diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c index f9eb2bf0fc17f70ea80213c66c3b8198c3fe99e1..7560831a2830167bb65d20aed1ff2c1ce0ca5f34 100644 --- a/src/plugins/select/bluegene/select_bluegene.c +++ b/src/plugins/select/bluegene/select_bluegene.c @@ -1752,6 +1752,11 @@ extern int select_p_job_signal(struct job_record *job_ptr, int signal) return SLURM_SUCCESS; } +extern int select_p_job_mem_confirm(struct job_record *job_ptr) +{ + return SLURM_SUCCESS; +} + extern int select_p_job_fini(struct job_record *job_ptr) { int rc = SLURM_ERROR; diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index 9a8718a27792d50d1098f39b64688f576d46b06b..2ba7a1c048a3ca5ad9cc6b6561aeb76c475578d8 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -3081,6 +3081,10 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *node_bitmap, job_ptr->job_id, bit_set_count(node_bitmap)); } + if ((details_ptr->pn_min_memory == 0) && + (select_fast_schedule == 0)) + job_ptr->bit_flags |= NODE_MEM_CALC; /* To be calculated */ + orig_map = bit_copy(node_bitmap); avail_cores = _make_core_bitmap(node_bitmap, job_ptr->details->core_spec); diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index b82b1c659b076b6d21345011583b2e9a2eec425b..589159cd3af25cc02224620dd286eb0c992dfcb5 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -868,8 +868,13 @@ static int _add_job_to_res(struct job_record *job_ptr, int action) break; } if (!p_ptr) { + char *part_name; + if (job_ptr->part_ptr) + part_name = job_ptr->part_ptr->name; + else + part_name = job_ptr->partition; error("cons_res: could not find cr partition %s", - job_ptr->part_ptr->name); + part_name); return SLURM_ERROR; } if (!p_ptr->row) { @@ -2329,6 +2334,42 @@ extern int select_p_job_signal(struct job_record *job_ptr, int signal) return SLURM_SUCCESS; } +extern int select_p_job_mem_confirm(struct job_record *job_ptr) +{ + int i_first, i_last, i, offset; + uint32_t avail_mem, lowest_mem = 0; + + xassert(job_ptr); + + if (((job_ptr->bit_flags & NODE_MEM_CALC) == 0) || + (select_fast_schedule != 0)) + return SLURM_SUCCESS; + if ((job_ptr->details == NULL) || + (job_ptr->job_resrcs == NULL) || + (job_ptr->job_resrcs->node_bitmap == NULL) || + (job_ptr->job_resrcs->memory_allocated == NULL)) + return SLURM_ERROR; + i_first = bit_ffs(job_ptr->job_resrcs->node_bitmap); + if (i_first >= 0) + i_last = bit_fls(job_ptr->job_resrcs->node_bitmap); + else + i_last = i_first - 1; + for (i = i_first, offset = 0; i <= i_last; i++) { + if (!bit_test(job_ptr->job_resrcs->node_bitmap, i)) + continue; + avail_mem = select_node_record[i].real_memory - + select_node_record[i].mem_spec_limit; + job_ptr->job_resrcs->memory_allocated[offset] = avail_mem; + select_node_usage[i].alloc_memory = avail_mem; + if ((offset == 0) || (lowest_mem > avail_mem)) + lowest_mem = avail_mem; + offset++; + } + job_ptr->details->pn_min_memory = lowest_mem; + + return SLURM_SUCCESS; +} + extern int select_p_job_fini(struct job_record *job_ptr) { xassert(job_ptr); @@ -2787,14 +2828,17 @@ extern int select_p_reconfigure(void) } else if (_job_cleaning(job_ptr)) { cleaning_job_cnt++; run_time = (int) difftime(now, job_ptr->end_time); - info("Job %u is cleaning (Node Health Check running for %d secs)", - job_ptr->job_id, run_time); - /* Ideally we want to avoid using this job's resources - * until Node Health Check completes, but current logic - * (line below commented out) will let release resources - * from hung NHC for use by other jobs with - * "scontrol reconfig" command. */ - //_add_job_to_res(job_ptr, 0); + if (run_time >= 300) { + info("Job %u NHC hung for %d secs, releasing " + "resources now, may underflow later)", + job_ptr->job_id, run_time); + /* If/when NHC completes, it will release + * resources that are not marked as allocated + * to this job without line below. */ + //_add_job_to_res(job_ptr, 0); + } else { + _add_job_to_res(job_ptr, 0); + } } } list_iterator_destroy(job_iterator); diff --git a/src/plugins/select/cray/select_cray.c b/src/plugins/select/cray/select_cray.c index 03449e866496308b0c47c9c20fb8ff24779ca151..3f902dc88c804e7732abcff95effc8601f6dfb9f 100644 --- a/src/plugins/select/cray/select_cray.c +++ b/src/plugins/select/cray/select_cray.c @@ -2057,6 +2057,13 @@ extern int select_p_job_signal(struct job_record *job_ptr, int signal) return other_job_signal(job_ptr, signal); } +extern int select_p_job_mem_confirm(struct job_record *job_ptr) +{ + xassert(job_ptr); + + return other_job_mem_confirm(job_ptr); +} + extern int select_p_job_fini(struct job_record *job_ptr) { select_jobinfo_t *jobinfo = job_ptr->select_jobinfo->data; @@ -2190,7 +2197,7 @@ extern int select_p_step_start(struct step_record *step_ptr) #endif jobinfo = step_ptr->job_ptr->select_jobinfo->data; - if (jobinfo->npc) { + if (jobinfo->npc && (step_ptr->step_id != SLURM_EXTERN_CONT)) { int i; select_jobinfo_t *step_jobinfo = step_ptr->select_jobinfo->data; select_nodeinfo_t *nodeinfo; diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index c02e36fa25b62240497cf7245265ca8788ea4cfd..cc16cdc824d5f155dca69faa9d0cb92dbeb33206 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -3743,6 +3743,11 @@ extern int select_p_job_signal(struct job_record *job_ptr, int signal) return SLURM_SUCCESS; } +extern int select_p_job_mem_confirm(struct job_record *job_ptr) +{ + return SLURM_SUCCESS; +} + /* * Note termination of job is starting. Executed from slurmctld. * IN job_ptr - pointer to job being terminated diff --git a/src/plugins/select/other/other_select.c b/src/plugins/select/other/other_select.c index 68babf73a45a3ec2af16abe72f4f7d6beec68405..0fb3286f0fb59ae3e596dab918d006d79d5c937e 100644 --- a/src/plugins/select/other/other_select.c +++ b/src/plugins/select/other/other_select.c @@ -77,6 +77,7 @@ const char *node_select_syms[] = { "select_p_job_expand", "select_p_job_resized", "select_p_job_signal", + "select_p_job_mem_confirm", "select_p_job_fini", "select_p_job_suspend", "select_p_job_resume", @@ -356,11 +357,23 @@ extern int other_job_resized(struct job_record *job_ptr, extern int other_job_signal(struct job_record *job_ptr, int signal) { if (other_select_init() < 0) - return -1; + return SLURM_ERROR; return (*(ops.job_signal))(job_ptr, signal); } +/* + * Pass job memory allocation confirmation request to other plugin. + * IN job_ptr - job to be signalled + */ +extern int other_job_mem_confirm(struct job_record *job_ptr) +{ + if (other_select_init() < 0) + return SLURM_ERROR; + + return (*(ops.job_mem_confirm))(job_ptr); +} + /* * Note termination of job is starting. Executed from slurmctld. * IN job_ptr - pointer to job being terminated diff --git a/src/plugins/select/other/other_select.h b/src/plugins/select/other/other_select.h index d53eb226becf696ab719edb2892271754736f3a1..51b080fef78b656899f9f8133fc5c0340b1ab905 100644 --- a/src/plugins/select/other/other_select.h +++ b/src/plugins/select/other/other_select.h @@ -212,6 +212,12 @@ extern int other_job_resized(struct job_record *job_ptr, */ extern int other_job_signal(struct job_record *job_ptr, int signal); +/* + * Pass job memory allocation confirmation request to other plugin. + * IN job_ptr - job to be signalled + */ +extern int other_job_mem_confirm(struct job_record *job_ptr); + /* * Note termination of job is starting. Executed from slurmctld. * IN job_ptr - pointer to job being terminated diff --git a/src/plugins/select/serial/select_serial.c b/src/plugins/select/serial/select_serial.c index 9e98b28c66f326e4da89bada97dc01b0d042131f..6a044d24dd35fef88f6b64db89c453473a3e709d 100644 --- a/src/plugins/select/serial/select_serial.c +++ b/src/plugins/select/serial/select_serial.c @@ -1711,6 +1711,11 @@ extern int select_p_job_signal(struct job_record *job_ptr, int signal) return SLURM_SUCCESS; } +extern int select_p_job_mem_confirm(struct job_record *job_ptr) +{ + return SLURM_SUCCESS; +} + extern int select_p_job_fini(struct job_record *job_ptr) { xassert(job_ptr); diff --git a/src/plugins/task/affinity/affinity.c b/src/plugins/task/affinity/affinity.c index d4f4a558c0d958fd09bcb07cb64918a3f286dc34..4cbfd1f36cdddad4dd4532dec1e87872002b3b98 100644 --- a/src/plugins/task/affinity/affinity.c +++ b/src/plugins/task/affinity/affinity.c @@ -290,12 +290,34 @@ int get_cpuset(cpu_set_t *mask, stepd_step_rec_t *job) return false; } +/* For sysctl() functions */ +#if defined(__FreeBSD__) || defined(__NetBSD__) +#include <sys/types.h> +#include <sys/sysctl.h> +#endif + +#define BUFFLEN 127 + /* Return true if Power7 processor */ static bool _is_power_cpu(void) { if (is_power == -1) { +#if defined(__FreeBSD__) || defined(__NetBSD__) + + char buffer[BUFFLEN+1]; + size_t len = BUFFLEN; + + if ( sysctlbyname("hw.model", buffer, &len, NULL, 0) == 0 ) + is_power = ( strstr(buffer, "POWER7") != NULL ); + else { + error("_get_is_power: sysctl could not retrieve hw.model"); + return false; + } + +#elif defined(__linux__) + FILE *cpu_info_file; - char buffer[128]; + char buffer[BUFFLEN+1]; char* _cpuinfo_path = "/proc/cpuinfo"; cpu_info_file = fopen(_cpuinfo_path, "r"); if (cpu_info_file == NULL) { @@ -312,6 +334,14 @@ static bool _is_power_cpu(void) } } fclose(cpu_info_file); + +#else + +/* Assuming other platforms don't support sysctlbyname() or /proc/cpuinfo */ +#warning "Power7 check not implemented for this platform." + is_power = 0; + +#endif } if (is_power == 1) @@ -335,7 +365,11 @@ void reset_cpuset(cpu_set_t *new_mask, cpu_set_t *cur_mask) if (slurm_getaffinity(1, sizeof(full_mask), &full_mask)) { /* Try to get full CPU mask from process init */ CPU_ZERO(&full_mask); +#ifdef __FreeBSD__ + CPU_OR(&full_mask, cur_mask); +#else CPU_OR(&full_mask, &full_mask, cur_mask); +#endif } CPU_ZERO(&newer_mask); for (cur_offset = 0; cur_offset < CPU_SETSIZE; cur_offset++) { @@ -360,7 +394,10 @@ int slurm_setaffinity(pid_t pid, size_t size, const cpu_set_t *mask) int rval; char mstr[1 + CPU_SETSIZE / 4]; -#ifdef SCHED_GETAFFINITY_THREE_ARGS +#ifdef __FreeBSD__ + rval = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, + pid, size, mask); +#elif defined(SCHED_GETAFFINITY_THREE_ARGS) rval = sched_setaffinity(pid, size, mask); #else rval = sched_setaffinity(pid, mask); @@ -378,7 +415,19 @@ int slurm_getaffinity(pid_t pid, size_t size, cpu_set_t *mask) char mstr[1 + CPU_SETSIZE / 4]; CPU_ZERO(mask); -#ifdef SCHED_GETAFFINITY_THREE_ARGS + + /* + * The FreeBSD cpuset API is a superset of the Linux API. + * In addition to PIDs, it supports threads, interrupts, + * jails, and potentially other objects. The first two arguments + * to cpuset_*etaffinity() below indicate that the third argument + * is a PID. -1 indicates the PID of the calling process. + * Linux sched_*etaffinity() uses 0 for this. + */ +#ifdef __FreeBSD__ + rval = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, + pid, size, mask); +#elif defined(SCHED_GETAFFINITY_THREE_ARGS) rval = sched_getaffinity(pid, size, mask); #else rval = sched_getaffinity(pid, mask); diff --git a/src/plugins/task/affinity/affinity.h b/src/plugins/task/affinity/affinity.h index eb40f3847329b6b10dbde50fbd39f01a8d38d211..2a0a656588cf8bbe18a16694fb68ba5fa8debcb4 100644 --- a/src/plugins/task/affinity/affinity.h +++ b/src/plugins/task/affinity/affinity.h @@ -32,6 +32,18 @@ * with SLURM; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ + +/* + * FreeBSD and Linux affinity functions have a slightly different interface + * and are defined in different headers. See platform-dependencies in + * affinity.c. + */ +#ifdef __FreeBSD__ +#include <sys/param.h> +#include <sys/cpuset.h> +typedef cpuset_t cpu_set_t; +#endif + #ifdef HAVE_CONFIG_H # include "config.h" #endif diff --git a/src/plugins/task/affinity/cpuset.c b/src/plugins/task/affinity/cpuset.c index 470fc78b667c4e0ce03402f076ad811f35eb367a..1af5b2ffc97898dd56bc981e80f027dd45a8568c 100644 --- a/src/plugins/task/affinity/cpuset.c +++ b/src/plugins/task/affinity/cpuset.c @@ -234,7 +234,8 @@ int slurm_set_cpuset(char *base, char *path, pid_t pid, size_t size, if (fd < 0) { error("open(%s): %m", file_path); } else { - rc = read(fd, mstr, sizeof(mstr)); + memset(mstr, 0, sizeof(mstr)); + rc = read(fd, mstr, sizeof(mstr)-1); /* Insure NULL at end */ close(fd); if (rc < 1) { error("read(%s): %m", file_path); @@ -305,7 +306,8 @@ int slurm_get_cpuset(char *path, pid_t pid, size_t size, cpu_set_t *mask) error("open(%s): %m", file_path); return SLURM_ERROR; } - rc = read(fd, mstr, sizeof(mstr)); + memset(mstr, 0, sizeof(mstr)); + rc = read(fd, mstr, sizeof(mstr)-1); /* Insure NULL at end */ close(fd); if (rc < 1) { error("read(%s): %m", file_path); diff --git a/src/sattach/sattach.c b/src/sattach/sattach.c index e0ea7e5fa5e4317e0abf59c0bd82e07c64a73d5c..9833d1c4ec5e788a3c3ba5aa50ab8d476d759e79 100644 --- a/src/sattach/sattach.c +++ b/src/sattach/sattach.c @@ -284,6 +284,7 @@ static slurm_cred_t *_generate_fake_cred(uint32_t jobid, uint32_t stepid, slurm_cred_arg_t arg; slurm_cred_t *cred; + memset(&arg, 0, sizeof(slurm_cred_arg_t)); arg.jobid = jobid; arg.stepid = stepid; arg.uid = uid; @@ -577,13 +578,16 @@ _exit_handler(message_thread_state_t *mts, slurm_msg_t *exit_msg) static void _handle_msg(void *arg, slurm_msg_t *msg) { - message_thread_state_t *mts = (message_thread_state_t *)arg; - uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); static uid_t slurm_uid; static bool slurm_uid_set = false; + message_thread_state_t *mts = (message_thread_state_t *)arg; + char *auth_info = slurm_get_auth_info(); + uid_t req_uid; uid_t uid = getuid(); + req_uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info); + xfree(auth_info); + if (!slurm_uid_set) { slurm_uid = slurm_get_slurm_user_id(); slurm_uid_set = true; diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index f4b3ccd33934cbaaf71c63f9bc22736fb1d173ef..b74dfb9cea3a5a18e80f377b50889c84d5713ed8 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -547,16 +547,20 @@ static void _print_daemons (void) { slurm_ctl_conf_info_msg_t *conf; - char me[MAX_SLURM_NAME], *b, *c, *n, *token, *save_ptr = NULL; + char node_name_short[MAX_SLURM_NAME]; + char node_name_long[MAX_SLURM_NAME]; + char *b, *c, *n, *token, *save_ptr = NULL; int actld = 0, ctld = 0, d = 0; char daemon_list[] = "slurmctld slurmd"; slurm_conf_init(NULL); conf = slurm_conf_lock(); - gethostname_short(me, MAX_SLURM_NAME); + gethostname_short(node_name_short, MAX_SLURM_NAME); + gethostname(node_name_long, MAX_SLURM_NAME); if ((b = conf->backup_controller)) { - if ((xstrcmp(b, me) == 0) || + if ((xstrcmp(b, node_name_short) == 0) || + (xstrcmp(b, node_name_long) == 0) || (xstrcasecmp(b, "localhost") == 0)) ctld = 1; } @@ -565,7 +569,8 @@ _print_daemons (void) c = xstrdup(conf->control_machine); token = strtok_r(c, ",", &save_ptr); while (token) { - if ((xstrcmp(token, me) == 0) || + if ((xstrcmp(token, node_name_short) == 0) || + (xstrcmp(token, node_name_long) == 0) || (xstrcasecmp(token, "localhost") == 0)) { ctld = 1; break; @@ -576,7 +581,7 @@ _print_daemons (void) } slurm_conf_unlock(); - if ((n = slurm_conf_get_nodename(me))) { + if ((n = slurm_conf_get_nodename(node_name_short))) { d = 1; xfree(n); } else if ((n = slurm_conf_get_aliased_nodename())) { diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c index df69d3a588e579d266da2b3474a1e16d1f4ed319..08daed982056d74c81309e0fc5be6630d0a4fa4f 100644 --- a/src/slurmctld/backup.c +++ b/src/slurmctld/backup.c @@ -398,8 +398,10 @@ static int _background_process_msg(slurm_msg_t * msg) if (msg->msg_type != REQUEST_PING) { bool super_user = false; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + char *auth_info = slurm_get_auth_info(); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info); + + xfree(auth_info); if ((uid == 0) || (uid == getuid())) super_user = true; diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 7dfbcd34b15d1c8ced1076d768d170df0be9deaf..c936f1c54cb97e78076b04edb419de77b5651300 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -465,6 +465,8 @@ int main(int argc, char *argv[]) while (1) { /* initialization for each primary<->backup switch */ + xfree(slurmctld_config.auth_info); + slurmctld_config.auth_info = slurm_get_auth_info(); slurmctld_config.shutdown_time = (time_t) 0; slurmctld_config.resume_backup = false; @@ -740,6 +742,7 @@ int main(int argc, char *argv[]) #endif + xfree(slurmctld_config.auth_info); xfree(slurmctld_cluster_name); if (cnt) { info("Slurmctld shutdown completing with %d active agent " diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index a9945c3cd80f4539ad86949d5c114e4793f6fe7f..b47749f8f88635d4316a5ae990af59b9dccdf398 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -4246,6 +4246,7 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, (error_code == ESLURM_RESERVATION_NOT_USABLE) || (error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) || (error_code == ESLURM_POWER_NOT_AVAIL) || + (error_code == ESLURM_BURST_BUFFER_WAIT) || (error_code == ESLURM_POWER_RESERVED)) { /* Not fatal error, but job can't be scheduled right now */ if (immediate) { @@ -5225,7 +5226,8 @@ static int _part_access_check(struct part_record *part_ptr, if ((part_ptr->state_up & PARTITION_SCHED) && (job_desc->time_limit != NO_VAL) && - (job_desc->time_limit > part_ptr->max_time)) { + (job_desc->time_limit > part_ptr->max_time) && + (!qos_ptr || !(qos_ptr->flags & QOS_FLAG_PART_TIME_LIMIT))) { info("_part_access_check: Job time limit (%u) exceeds limit of " "partition %s(%u)", job_desc->time_limit, part_ptr->name, part_ptr->max_time); @@ -7354,6 +7356,36 @@ static bool _test_nodes_ready(struct job_record *job_ptr) } #endif +/* + * Modify a job's memory limit if allocated all memory on a node and the node + * reboots, possibly with a different memory size (e.g. KNL MCDRAM mode changed) + */ +extern void job_validate_mem(struct job_record *job_ptr) +{ + uint64_t tres_count; + + if ((job_ptr->bit_flags & NODE_MEM_CALC) && + (slurmctld_conf.fast_schedule == 0)) { + select_g_job_mem_confirm(job_ptr); + tres_count = (uint64_t)job_ptr->details->pn_min_memory; + if (tres_count & MEM_PER_CPU) { + tres_count &= (~MEM_PER_CPU); + tres_count *= job_ptr->tres_alloc_cnt[TRES_ARRAY_CPU]; + } else { + tres_count *= job_ptr->tres_alloc_cnt[TRES_ARRAY_NODE]; + } + job_ptr->tres_alloc_cnt[TRES_ARRAY_MEM] = tres_count; + job_ptr->tres_alloc_str = + assoc_mgr_make_tres_str_from_array( + job_ptr->tres_alloc_cnt, TRES_STR_FLAG_SIMPLE, true); + + job_ptr->tres_fmt_alloc_str = + assoc_mgr_make_tres_str_from_array( + job_ptr->tres_alloc_cnt, TRES_STR_CONVERT_UNITS, true); + jobacct_storage_job_start_direct(acct_db_conn, job_ptr); + } +} + /* * job_time_limit - terminate jobs which have exceeded their time limit * global: job_list - pointer global job list @@ -7398,6 +7430,12 @@ void job_time_limit(void) info("%s: Configuration for job %u is complete", __func__, job_ptr->job_id); job_config_fini(job_ptr); + if (job_ptr->bit_flags & NODE_REBOOT) { + job_ptr->bit_flags &= (~NODE_REBOOT); + job_validate_mem(job_ptr); + if (job_ptr->batch_flag) + launch_job(job_ptr); + } } #endif /* This needs to be near the top of the loop, checks every diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index d8d3e7b972e2a1eaa8d700e930d766d2849d1d73..df11cb9aba4d094a5ee157e15f0f6d5aaef1f635 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -137,6 +137,7 @@ static pthread_mutex_t sched_mutex = PTHREAD_MUTEX_INITIALIZER; static int sched_pend_thread = 0; static bool sched_running = false; static struct timeval sched_last = {0, 0}; +static uint32_t max_array_size = NO_VAL; #ifdef HAVE_ALPS_CRAY static int sched_min_interval = 1000000; #else @@ -835,7 +836,8 @@ next_part: part_ptr = (struct part_record *) info("sched: Allocate JobId=%u Partition=%s NodeList=%s #CPUs=%u", job_ptr->job_id, job_ptr->part_ptr->name, job_ptr->nodes, job_ptr->total_cpus); - if (job_ptr->details->prolog_running == 0) { + if ((job_ptr->details->prolog_running == 0) && + ((job_ptr->bit_flags & NODE_REBOOT) == 0)) { launch_msg = build_launch_job_msg(job_ptr, msg->protocol_version); } @@ -1093,7 +1095,7 @@ static int _schedule(uint32_t job_limit) ListIterator job_iterator = NULL, part_iterator = NULL; List job_queue = NULL; int failed_part_cnt = 0, failed_resv_cnt = 0, job_cnt = 0; - int error_code, bb, i, j, part_cnt, time_limit, pend_time; + int error_code, i, j, part_cnt, time_limit, pend_time; uint32_t job_depth = 0, array_task_id; job_queue_rec_t *job_queue_rec; struct job_record *job_ptr = NULL; @@ -1714,30 +1716,6 @@ next_task: last_job_sched_start = MAX(last_job_sched_start, job_ptr->start_time); - bb = bb_g_job_test_stage_in(job_ptr, false); - if (bb != 1) { - if (bb == 0) { - job_ptr->state_reason = - WAIT_BURST_BUFFER_STAGING; - } else { - job_ptr->state_reason = - WAIT_BURST_BUFFER_RESOURCE; - } - if (job_ptr->start_time == 0) { - job_ptr->start_time = last_job_sched_start; - bb_wait_cnt++; - } - xfree(job_ptr->state_desc); - last_job_update = now; - debug3("sched: JobId=%u. State=%s. Reason=%s. " - "Priority=%u.", - job_ptr->job_id, - job_state_string(job_ptr->job_state), - job_reason_string(job_ptr->state_reason), - job_ptr->priority); - continue; - } - if (deadline_time_limit) { save_time_limit = job_ptr->time_limit; job_ptr->time_limit = deadline_time_limit; @@ -1757,6 +1735,18 @@ next_task: job_reason_string(job_ptr->state_reason), job_ptr->priority, job_ptr->partition); fail_by_part = true; + } else if (error_code == ESLURM_BURST_BUFFER_WAIT) { + if (job_ptr->start_time == 0) { + job_ptr->start_time = last_job_sched_start; + bb_wait_cnt++; + } + debug3("sched: JobId=%u. State=%s. Reason=%s. " + "Priority=%u.", + job_ptr->job_id, + job_state_string(job_ptr->job_state), + job_reason_string(job_ptr->state_reason), + job_ptr->priority); + continue; } else if ((error_code == ESLURM_RESERVATION_BUSY) || (error_code == ESLURM_RESERVATION_NOT_USABLE)) { if (job_ptr->resv_ptr && @@ -1821,8 +1811,10 @@ next_task: #endif if (job_ptr->batch_flag == 0) srun_allocate(job_ptr->job_id); - else if (job_ptr->details->prolog_running == 0) + else if ((job_ptr->details->prolog_running == 0) && + ((job_ptr->bit_flags & NODE_REBOOT) == 0)) { launch_job(job_ptr); + } rebuild_job_part_list(job_ptr); job_cnt++; if (is_job_array_head && @@ -2645,6 +2637,75 @@ extern int test_job_dependency(struct job_record *job_ptr) return results; } +/* Given a new job dependency specification, expand job array specifications + * into a collection of task IDs that update_job_dependency can parse. + * (e.g. "after:123_[4-5]" to "after:123_4:123_5") + * Returns NULL if not valid job array specification. + * Returned value must be xfreed. */ +static char *_xlate_array_dep(char *new_depend) +{ + char *new_array_dep = NULL, *array_tmp, *jobid_ptr = NULL, *sep; + bitstr_t *array_bitmap; + int i; + uint32_t job_id; + int32_t t, t_first, t_last; + + if (strstr(new_depend, "_[") == NULL) + return NULL; /* No job array expressions */ + + if (max_array_size == NO_VAL) { + slurm_ctl_conf_t *conf; + conf = slurm_conf_lock(); + max_array_size = conf->max_array_sz; + slurm_conf_unlock(); + } + + for (i = 0; new_depend[i]; i++) { + xstrfmtcat(new_array_dep, "%c", new_depend[i]); + if ((new_depend[i] >= '0') && (new_depend[i] <= '9')) { + if (jobid_ptr == NULL) + jobid_ptr = new_depend + i; + } else if ((new_depend[i] == '_') && (new_depend[i+1] == '[') && + (jobid_ptr != NULL)) { + job_id = (uint32_t) atol(jobid_ptr); + i += 2; /* Skip over "_[" */ + array_tmp = xstrdup(new_depend + i); + sep = strchr(array_tmp, ']'); + if (sep) + sep[0] = '\0'; + array_bitmap = bit_alloc(max_array_size); + if ((sep == NULL) || + (bit_unfmt(array_bitmap, array_tmp) != 0) || + ((t_first = bit_ffs(array_bitmap)) == -1)) { + /* Invalid format */ + xfree(array_tmp); + bit_free(array_bitmap); + xfree(new_array_dep); + return NULL; + } + i += (sep - array_tmp); /* Move to location of ']' */ + xfree(array_tmp); + t_last = bit_fls(array_bitmap); + for (t = t_first; t <= t_last; t++) { + if (!bit_test(array_bitmap, t)) + continue; + if (t == t_first) { + xstrfmtcat(new_array_dep, "%d", t); + } else { + xstrfmtcat(new_array_dep, ":%u_%d", + job_id, t); + } + } + bit_free(array_bitmap); + jobid_ptr = NULL; + } else { + jobid_ptr = NULL; + } + } + + return new_array_dep; +} + /* * Parse a job dependency string and use it to establish a "depend_spec" * list of dependencies. We accept both old format (a single job ID) and @@ -2659,7 +2720,7 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) uint16_t depend_type = 0; uint32_t job_id = 0; uint32_t array_task_id; - char *tok = new_depend, *sep_ptr, *sep_ptr2 = NULL; + char *tok, *new_array_dep, *sep_ptr, *sep_ptr2 = NULL; List new_depend_list = NULL; struct depend_spec *dep_ptr; struct job_record *dep_job_ptr; @@ -2680,10 +2741,12 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) } new_depend_list = list_create(_depend_list_del); - + if ((new_array_dep = _xlate_array_dep(new_depend))) + tok = new_array_dep; + else + tok = new_depend; /* validate new dependency string */ while (rc == SLURM_SUCCESS) { - /* test singleton dependency flag */ if ( strncasecmp(tok, "singleton", 9) == 0 ) { depend_type = SLURM_DEPEND_SINGLETON; @@ -2902,6 +2965,7 @@ extern int update_job_dependency(struct job_record *job_ptr, char *new_depend) } else { FREE_NULL_LIST(new_depend_list); } + xfree(new_array_dep); return rc; } @@ -3618,6 +3682,7 @@ static void *_wait_boot(void *arg) lock_slurmctld(job_write_lock); prolog_running_decr(job_ptr); + job_validate_mem(job_ptr); unlock_slurmctld(job_write_lock); return NULL; @@ -3800,6 +3865,7 @@ extern void prolog_running_decr(struct job_record *job_ptr) job_ptr->job_state &= ~JOB_CONFIGURING; if (job_ptr->batch_flag && + ((job_ptr->bit_flags & NODE_REBOOT) == 0) && (IS_JOB_RUNNING(job_ptr) || IS_JOB_SUSPENDED(job_ptr))) { launch_job(job_ptr); } diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index b77d99b59700af7537707e79be6d7f2b0796258b..238939322fd3b41a4fd9b33cd7c0a5d0727da489 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -2148,7 +2148,7 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, bitstr_t **select_node_bitmap, char *unavail_node_str, char **err_msg) { - int error_code = SLURM_SUCCESS, i, node_set_size = 0; + int bb, error_code = SLURM_SUCCESS, i, node_set_size = 0; bitstr_t *select_bitmap = NULL; struct node_set *node_set_ptr = NULL; struct part_record *part_ptr = NULL; @@ -2215,6 +2215,17 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, return ESLURM_JOB_HELD; } + bb = bb_g_job_test_stage_in(job_ptr, test_only); + if (bb != 1) { + xfree(job_ptr->state_desc); + last_job_update = now; + if (bb == 0) + job_ptr->state_reason = WAIT_BURST_BUFFER_STAGING; + else + job_ptr->state_reason = WAIT_BURST_BUFFER_RESOURCE; + return ESLURM_BURST_BUFFER_WAIT; + } + /* build sets of usable nodes based upon their configuration */ can_reboot = node_features_g_user_update(job_ptr->user_id); error_code = _build_node_list(job_ptr, &node_set_ptr, &node_set_size, @@ -2576,7 +2587,7 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, bit_overlap(job_ptr->node_bitmap, power_node_bitmap) || !bit_super_set(job_ptr->node_bitmap, avail_node_bitmap)) { job_ptr->job_state |= JOB_CONFIGURING; -} + } /* Request asynchronous launch of a prolog for a * non batch job. */ @@ -2713,6 +2724,31 @@ static void _launch_prolog(struct job_record *job_ptr) agent_arg_ptr->msg_type = REQUEST_LAUNCH_PROLOG; agent_arg_ptr->msg_args = (void *) prolog_msg_ptr; + /* At least on a Cray we have to treat this as a real step, so + * this is where to do it. + */ + if (slurmctld_conf.prolog_flags & PROLOG_FLAG_CONTAIN) { + struct step_record step_rec; + slurm_step_layout_t layout; + + memset(&step_rec, 0, sizeof(step_rec)); + memset(&layout, 0, sizeof(layout)); + +#ifdef HAVE_FRONT_END + layout.node_list = job_ptr->front_end_ptr->name; +#else + layout.node_list = job_ptr->nodes; +#endif + layout.node_cnt = agent_arg_ptr->node_count; + + step_rec.step_layout = &layout; + step_rec.step_id = SLURM_EXTERN_CONT; + step_rec.job_ptr = job_ptr; + step_rec.name = "external"; + + select_g_step_start(&step_rec); + } + /* Launch the RPC via agent */ agent_queue_request(agent_arg_ptr); } diff --git a/src/slurmctld/port_mgr.c b/src/slurmctld/port_mgr.c index 1d55b5069888c2443201f58359a878183ca4e48c..0ee4d0c625c450d48dff3e64b1b593869bce71a8 100644 --- a/src/slurmctld/port_mgr.c +++ b/src/slurmctld/port_mgr.c @@ -87,13 +87,16 @@ static void _rebuild_port_array(struct step_record *step_ptr) char *tmp_char; hostlist_t hl; - i = strlen(step_ptr->resv_ports); - tmp_char = xmalloc(i+3); - sprintf(tmp_char, "[%s]", step_ptr->resv_ports); + tmp_char = xstrdup_printf("[%s]", step_ptr->resv_ports); hl = hostlist_create(tmp_char); - if (!hl) - fatal("Invalid reserved ports: %s", step_ptr->resv_ports); xfree(tmp_char); + if (!hl) { + error("Step %u.%u has invalid reserved ports: %s", + step_ptr->job_ptr->job_id, step_ptr->step_id, + step_ptr->resv_ports); + xfree(step_ptr->resv_ports); + return; + } step_ptr->resv_port_array = xmalloc(sizeof(int) * step_ptr->resv_port_cnt); @@ -222,9 +225,13 @@ extern int resv_port_alloc(struct step_record *step_ptr) { int i, port_inx; int *port_array = NULL; - char port_str[16], *tmp_str; + char port_str[16]; hostlist_t hl; static int last_port_alloc = 0; + static int dims = -1; + + if (dims == -1) + dims = slurmdb_setup_cluster_name_dims(); if (step_ptr->resv_port_cnt > port_resv_cnt) { info("step %u.%u needs %u reserved ports, but only %d exist", @@ -264,18 +271,11 @@ extern int resv_port_alloc(struct step_record *step_ptr) hostlist_push_host(hl, port_str); } hostlist_sort(hl); - step_ptr->resv_ports = hostlist_ranged_string_xmalloc(hl); + /* get the ranged string with no brackets on it */ + step_ptr->resv_ports = hostlist_ranged_string_xmalloc_dims(hl, dims, 0); hostlist_destroy(hl); step_ptr->resv_port_array = port_array; - if (step_ptr->resv_ports[0] == '[') { - /* Remove brackets from hostlist */ - step_ptr->resv_ports[i-1] = '\0'; - tmp_str = xstrdup(step_ptr->resv_ports + 1); - xfree(step_ptr->resv_ports); - step_ptr->resv_ports = tmp_str; - } - debug("reserved ports %s for step %u.%u", step_ptr->resv_ports, step_ptr->job_ptr->job_id, step_ptr->step_id); diff --git a/src/slurmctld/power_save.c b/src/slurmctld/power_save.c index ac2fc312e9e64812cae8f3ab19e62566d1bd17d8..1ac60064fa3f2c068f059f9f20aa5ea668ef5f69 100644 --- a/src/slurmctld/power_save.c +++ b/src/slurmctld/power_save.c @@ -285,6 +285,7 @@ extern int power_job_reboot(struct job_record *job_ptr) if (nodes) { job_ptr->job_state |= JOB_CONFIGURING; job_ptr->wait_all_nodes = 1; + job_ptr->bit_flags |= NODE_REBOOT; if (job_ptr->details && job_ptr->details->features && node_features_g_user_update(job_ptr->user_id)) { features = node_features_g_job_xlate( diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 7ac3cc7d6f7ed562630c8db4e72fbf3c79725412..1b4c7f0b24d1f7dc02cf59eacf00e4139dda7d65 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -225,7 +225,7 @@ void slurmctld_req(slurm_msg_t *msg, connection_arg_t *arg) /* Just to validate the cred */ rpc_uid = (uint32_t) g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + slurmctld_config.auth_info); if (g_slurm_auth_errno(msg->auth_cred) != SLURM_SUCCESS) { error("Bad authentication: %s", g_slurm_auth_errstr(g_slurm_auth_errno(msg->auth_cred))); @@ -968,7 +968,8 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) /* Locks: Read config, write job, write node, read partition */ slurmctld_lock_t job_write_lock = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); int immediate = job_desc_msg->immediate; bool do_unlock = false; bool job_waiting = false; @@ -1123,13 +1124,8 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) alloc_msg.account = xstrdup(job_ptr->account); if (job_ptr->qos_ptr) { slurmdb_qos_rec_t *qos; - qos = (slurmdb_qos_rec_t *)job_ptr->qos_ptr; - if (xstrcmp(qos->description, - "Normal QOS default") == 0) - alloc_msg.qos = xstrdup("normal"); - else - alloc_msg.qos = xstrdup(qos->description); + alloc_msg.qos = xstrdup(qos->name); } if (job_ptr->resv_name) alloc_msg.resv_name = xstrdup(job_ptr->resv_name); @@ -1180,7 +1176,8 @@ static void _slurm_rpc_dump_conf(slurm_msg_t * msg) /* Locks: Read config, partition*/ slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_BUILD_INFO from uid=%d", @@ -1223,7 +1220,8 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg) /* Locks: Read config job, write partition (for hiding) */ slurmctld_lock_t job_read_lock = { READ_LOCK, READ_LOCK, NO_LOCK, WRITE_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug3("Processing RPC: REQUEST_JOB_INFO from uid=%d", uid); @@ -1237,7 +1235,7 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg) pack_all_jobs(&dump, &dump_size, job_info_request_msg->show_flags, g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()), + slurmctld_config.auth_info), NO_VAL, msg->protocol_version); unlock_slurmctld(job_read_lock); END_TIMER2("_slurm_rpc_dump_jobs"); @@ -1272,7 +1270,8 @@ static void _slurm_rpc_dump_jobs_user(slurm_msg_t * msg) /* Locks: Read config job, write node (for hiding) */ slurmctld_lock_t job_read_lock = { READ_LOCK, READ_LOCK, NO_LOCK, WRITE_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug3("Processing RPC: REQUEST_JOB_USER_INFO from uid=%d", uid); @@ -1280,7 +1279,7 @@ static void _slurm_rpc_dump_jobs_user(slurm_msg_t * msg) pack_all_jobs(&dump, &dump_size, job_info_request_msg->show_flags, g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()), + slurmctld_config.auth_info), job_info_request_msg->user_id, msg->protocol_version); unlock_slurmctld(job_read_lock); END_TIMER2("_slurm_rpc_dump_job_user"); @@ -1313,7 +1312,8 @@ static void _slurm_rpc_dump_job_single(slurm_msg_t * msg) /* Locks: Read config, job, and node info */ slurmctld_lock_t job_read_lock = { READ_LOCK, READ_LOCK, NO_LOCK, READ_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug3("Processing RPC: REQUEST_JOB_INFO_SINGLE from uid=%d", uid); @@ -1322,7 +1322,7 @@ static void _slurm_rpc_dump_job_single(slurm_msg_t * msg) rc = pack_one_job(&dump, &dump_size, job_id_msg->job_id, job_id_msg->show_flags, g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()), + slurmctld_config.auth_info), msg->protocol_version); unlock_slurmctld(job_read_lock); END_TIMER2("_slurm_rpc_dump_job_single"); @@ -1353,7 +1353,8 @@ static void _slurm_rpc_get_shares(slurm_msg_t *msg) shares_response_msg_t resp_msg; slurm_msg_t response_msg; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_SHARE_INFO from uid=%d", uid); @@ -1382,7 +1383,8 @@ static void _slurm_rpc_get_priority_factors(slurm_msg_t *msg) priority_factors_response_msg_t resp_msg; slurm_msg_t response_msg; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_PRIORITY_FACTORS from uid=%d", uid); @@ -1412,7 +1414,8 @@ static void _slurm_rpc_end_time(slurm_msg_t * msg) /* Locks: Read job */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_JOB_END_TIME from uid=%d", uid); @@ -1448,7 +1451,8 @@ static void _slurm_rpc_dump_front_end(slurm_msg_t * msg) /* Locks: Read config, read node */ slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug3("Processing RPC: REQUEST_FRONT_END_INFO from uid=%d", uid); @@ -1494,7 +1498,8 @@ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg) * select plugins) */ slurmctld_lock_t node_write_lock = { READ_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug3("Processing RPC: REQUEST_NODE_INFO from uid=%d", uid); @@ -1551,7 +1556,8 @@ static void _slurm_rpc_dump_node_single(slurm_msg_t * msg) /* Locks: Read config, read node */ slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug3("Processing RPC: REQUEST_NODE_INFO_SINGLE from uid=%d", uid); @@ -1606,7 +1612,8 @@ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg) /* Locks: Read configuration and partition */ slurmctld_lock_t part_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_PARTITION_INFO uid=%d", uid); @@ -1659,7 +1666,8 @@ static void _slurm_rpc_epilog_complete(slurm_msg_t *msg, /* Locks: Read configuration, write job, write node */ slurmctld_lock_t job_write_lock = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); epilog_complete_msg_t *epilog_msg = (epilog_complete_msg_t *) msg->data; struct job_record *job_ptr; @@ -1747,7 +1755,8 @@ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) /* Locks: Read config, write job, write node */ slurmctld_lock_t job_write_lock = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); struct job_record *job_ptr; START_TIMER; @@ -1862,7 +1871,8 @@ static void _slurm_rpc_complete_job_allocation(slurm_msg_t * msg) slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); struct job_record *job_ptr; char jbuf[JBUFSIZ]; @@ -1962,7 +1972,8 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg, bool locked) slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); bool job_requeue = false; bool dump_job = false, dump_node = false, run_sched = false; struct job_record *job_ptr = NULL; @@ -2202,7 +2213,8 @@ static void _slurm_rpc_job_step_create(slurm_msg_t * msg) /* Locks: Write jobs, read nodes */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; if (slurmctld_conf.debug_flags & DEBUG_FLAG_STEPS) @@ -2310,7 +2322,8 @@ static void _slurm_rpc_job_step_get_info(slurm_msg_t * msg) /* Locks: Read config, job, write partition (for filtering) */ slurmctld_lock_t job_read_lock = { READ_LOCK, READ_LOCK, NO_LOCK, WRITE_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; if (slurmctld_conf.debug_flags & DEBUG_FLAG_STEPS) @@ -2400,7 +2413,8 @@ static void _slurm_rpc_job_will_run(slurm_msg_t * msg) /* Locks: Write job, read node, read partition */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); uint16_t port; /* dummy value */ slurm_addr_t resp_addr; will_run_response_msg_t *resp = NULL; @@ -2494,7 +2508,8 @@ static void _slurm_rpc_node_registration(slurm_msg_t * msg, /* Locks: Read config, write job, write node */ slurmctld_lock_t job_write_lock = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: MESSAGE_NODE_REGISTRATION_STATUS from uid=%d", @@ -2568,7 +2583,8 @@ static void _slurm_rpc_job_alloc_info(slurm_msg_t * msg) /* Locks: Read config, job, read node */ slurmctld_lock_t job_read_lock = { READ_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_JOB_ALLOCATION_INFO from uid=%d", uid); @@ -2645,7 +2661,8 @@ static void _slurm_rpc_job_alloc_info_lite(slurm_msg_t * msg) /* Locks: Read config, job, read node */ slurmctld_lock_t job_read_lock = { READ_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_JOB_ALLOCATION_INFO_LITE from uid=%d", @@ -2725,11 +2742,7 @@ static void _slurm_rpc_job_alloc_info_lite(slurm_msg_t * msg) if (job_ptr->qos_ptr) { slurmdb_qos_rec_t *qos; qos = (slurmdb_qos_rec_t *)job_ptr->qos_ptr; - if (xstrcmp(qos->description, - "Normal QOS default") == 0) - job_info_resp_msg.qos = xstrdup("normal"); - else - job_info_resp_msg.qos=xstrdup(qos->description); + job_info_resp_msg.qos = xstrdup(qos->name); } job_info_resp_msg.resv_name = xstrdup(job_ptr->resv_name); job_info_resp_msg.select_jobinfo = @@ -2785,7 +2798,8 @@ static void _slurm_rpc_job_sbcast_cred(slurm_msg_t * msg) /* Locks: Read config, job, read node */ slurmctld_lock_t job_read_lock = { READ_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_JOB_SBCAST_CRED from uid=%d", uid); @@ -2907,7 +2921,8 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg) /* Locks: Write configuration, job, node and partition */ slurmctld_lock_t config_write_lock = { WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; info("Processing RPC: REQUEST_RECONFIGURE from uid=%d", uid); @@ -2958,7 +2973,8 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg) static void _slurm_rpc_takeover(slurm_msg_t * msg) { int error_code = SLURM_SUCCESS; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); /* We could authenticate here, if desired */ if (!validate_super_user(uid)) { @@ -2981,7 +2997,8 @@ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) int error_code = SLURM_SUCCESS, i; uint16_t options = 0; shutdown_msg_t *shutdown_msg = (shutdown_msg_t *) msg->data; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); /* Locks: Read node */ slurmctld_lock_t node_read_lock = { NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; @@ -3051,7 +3068,8 @@ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) static void _slurm_rpc_shutdown_controller_immediate(slurm_msg_t * msg) { int error_code = SLURM_SUCCESS; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); if (!validate_super_user(uid)) { error("Security violation, SHUTDOWN_IMMEDIATE RPC from uid=%d", @@ -3079,7 +3097,8 @@ static void _slurm_rpc_step_complete(slurm_msg_t *msg, bool locked) /* Locks: Write job, write node */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); bool dump_job = false, dump_node = false; /* init */ @@ -3175,7 +3194,8 @@ static void _slurm_rpc_step_layout(slurm_msg_t *msg) /* Locks: Read config job, write node */ slurmctld_lock_t job_read_lock = { READ_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); struct job_record *job_ptr = NULL; struct step_record *step_ptr = NULL; @@ -3237,7 +3257,8 @@ static void _slurm_rpc_step_update(slurm_msg_t *msg) /* Locks: Write job */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); int rc; START_TIMER; @@ -3269,7 +3290,8 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) /* Locks: Write job, read node, read partition */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); char *err_msg = NULL; START_TIMER; @@ -3444,7 +3466,8 @@ static void _slurm_rpc_update_job(slurm_msg_t * msg) /* Locks: Write job, read node, read partition */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_UPDATE_JOB from uid=%d", uid); @@ -3542,7 +3565,8 @@ static void _slurm_rpc_update_front_end(slurm_msg_t * msg) /* Locks: write node */ slurmctld_lock_t node_write_lock = { NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_UPDATE_FRONT_END from uid=%d", uid); @@ -3586,7 +3610,8 @@ static void _slurm_rpc_update_node(slurm_msg_t * msg) /* Locks: Write job and write node */ slurmctld_lock_t node_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_UPDATE_NODE from uid=%d", uid); @@ -3634,7 +3659,8 @@ static void _slurm_rpc_update_layout(slurm_msg_t * msg) int shrink_size; /* Locks: Write job and write node */ - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_UPDATE_LAYOUT from uid=%d", uid); @@ -3678,7 +3704,8 @@ static void _slurm_rpc_update_partition(slurm_msg_t * msg) * NOTE: job write lock due to gang scheduler support */ slurmctld_lock_t part_write_lock = { READ_LOCK, WRITE_LOCK, READ_LOCK, WRITE_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_UPDATE_PARTITION from uid=%d", uid); @@ -3729,7 +3756,8 @@ static void _slurm_rpc_update_powercap(slurm_msg_t * msg) /* Locks: write configuration, read node */ slurmctld_lock_t config_write_lock = { WRITE_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_UPDATE_POWERCAP from uid=%d", uid); @@ -3795,7 +3823,8 @@ static void _slurm_rpc_delete_partition(slurm_msg_t * msg) /* Locks: write job, read node, write partition */ slurmctld_lock_t part_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, WRITE_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_DELETE_PARTITION from uid=%d", uid); @@ -3838,7 +3867,8 @@ static void _slurm_rpc_resv_create(slurm_msg_t * msg) /* Locks: write node, read partition */ slurmctld_lock_t node_write_lock = { NO_LOCK, NO_LOCK, WRITE_LOCK, READ_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_CREATE_RESERVATION from uid=%d", uid); @@ -3895,7 +3925,8 @@ static void _slurm_rpc_resv_update(slurm_msg_t * msg) /* Locks: write node, read partition */ slurmctld_lock_t node_write_lock = { NO_LOCK, NO_LOCK, WRITE_LOCK, READ_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_UPDATE_RESERVATION from uid=%d", uid); @@ -3938,7 +3969,8 @@ static void _slurm_rpc_resv_delete(slurm_msg_t * msg) /* Locks: read job, write node */ slurmctld_lock_t node_write_lock = { NO_LOCK, READ_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_DELETE_RESERVATION from uid=%d", uid); @@ -3983,7 +4015,8 @@ static void _slurm_rpc_resv_show(slurm_msg_t * msg) /* Locks: read node */ slurmctld_lock_t node_read_lock = { NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); slurm_msg_t response_msg; char *dump; int dump_size; @@ -4075,7 +4108,8 @@ static void _slurm_rpc_update_block(slurm_msg_t * msg) int error_code = SLURM_SUCCESS; DEF_TIMERS; update_block_msg_t *block_desc_ptr = (update_block_msg_t *) msg->data; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); char *name = NULL; START_TIMER; @@ -4188,7 +4222,8 @@ static void _slurm_rpc_block_info(slurm_msg_t * msg) slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; DEF_TIMERS; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_BLOCK_INFO from uid=%d", uid); @@ -4231,7 +4266,8 @@ static void _slurm_rpc_block_info(slurm_msg_t * msg) /* get node select info plugin */ static void _slurm_rpc_burst_buffer_info(slurm_msg_t * msg) { - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); void *resp_buffer = NULL; int resp_buffer_size = 0; int error_code = SLURM_SUCCESS; @@ -4284,7 +4320,8 @@ inline static void _slurm_rpc_suspend(slurm_msg_t * msg) /* Locks: write job and node */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); char *op; START_TIMER; @@ -4336,7 +4373,8 @@ inline static void _slurm_rpc_top_job(slurm_msg_t * msg) /* Locks: write job */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); debug("Processing RPC: REQUEST_TOP_JOB from uid=%u", (unsigned int)uid); @@ -4364,7 +4402,8 @@ inline static void _slurm_rpc_requeue(slurm_msg_t * msg) /* Locks: write job and node */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; @@ -4405,7 +4444,8 @@ inline static void _slurm_rpc_checkpoint(slurm_msg_t * msg) /* Locks: write job lock, read node lock */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); char *op; START_TIMER; @@ -4489,7 +4529,8 @@ inline static void _slurm_rpc_checkpoint_comp(slurm_msg_t * msg) /* Locks: read job */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("Processing RPC: REQUEST_CHECKPOINT_COMP from uid=%d", uid); @@ -4519,7 +4560,8 @@ inline static void _slurm_rpc_checkpoint_task_comp(slurm_msg_t * msg) /* Locks: read job */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); ckpt_ptr = (checkpoint_task_comp_msg_t *) msg->data; START_TIMER; @@ -4785,7 +4827,8 @@ static int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, inline static void _slurm_rpc_trigger_clear(slurm_msg_t * msg) { int rc; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); trigger_info_msg_t * trigger_ptr = (trigger_info_msg_t *) msg->data; DEF_TIMERS; @@ -4800,7 +4843,8 @@ inline static void _slurm_rpc_trigger_clear(slurm_msg_t * msg) inline static void _slurm_rpc_trigger_get(slurm_msg_t * msg) { - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); trigger_info_msg_t *resp_data; trigger_info_msg_t * trigger_ptr = (trigger_info_msg_t *) msg->data; slurm_msg_t response_msg; @@ -4825,8 +4869,10 @@ inline static void _slurm_rpc_trigger_get(slurm_msg_t * msg) inline static void _slurm_rpc_trigger_set(slurm_msg_t * msg) { int rc; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); - gid_t gid = g_slurm_auth_get_gid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); + gid_t gid = g_slurm_auth_get_gid(msg->auth_cred, + slurmctld_config.auth_info); trigger_info_msg_t * trigger_ptr = (trigger_info_msg_t *) msg->data; DEF_TIMERS; @@ -4842,7 +4888,8 @@ inline static void _slurm_rpc_trigger_set(slurm_msg_t * msg) inline static void _slurm_rpc_trigger_pull(slurm_msg_t * msg) { int rc; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); trigger_info_msg_t * trigger_ptr = (trigger_info_msg_t *) msg->data; DEF_TIMERS; @@ -4942,7 +4989,8 @@ inline static void _slurm_rpc_job_notify(slurm_msg_t * msg) /* Locks: read job */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); job_notify_msg_t * notify_msg = (job_notify_msg_t *) msg->data; struct job_record *job_ptr; DEF_TIMERS; @@ -4971,7 +5019,8 @@ inline static void _slurm_rpc_job_notify(slurm_msg_t * msg) inline static void _slurm_rpc_set_debug_flags(slurm_msg_t *msg) { - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); slurmctld_lock_t config_write_lock = { WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; set_debug_flags_msg_t *request_msg = @@ -5014,7 +5063,8 @@ inline static void _slurm_rpc_set_debug_flags(slurm_msg_t *msg) inline static void _slurm_rpc_set_debug_level(slurm_msg_t *msg) { int debug_level, old_debug_level; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); slurmctld_lock_t config_write_lock = { WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; set_debug_level_msg_t *request_msg = @@ -5070,7 +5120,8 @@ inline static void _slurm_rpc_set_debug_level(slurm_msg_t *msg) inline static void _slurm_rpc_set_schedlog_level(slurm_msg_t *msg) { int schedlog_level, old_schedlog_level; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; set_debug_level_msg_t *request_msg = @@ -5121,7 +5172,8 @@ inline static void _slurm_rpc_set_schedlog_level(slurm_msg_t *msg) inline static void _slurm_rpc_accounting_update_msg(slurm_msg_t *msg) { int rc = SLURM_SUCCESS; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); accounting_update_msg_t *update_ptr = (accounting_update_msg_t *) msg->data; bool sent_rc = false; @@ -5179,7 +5231,8 @@ inline static void _slurm_rpc_accounting_update_msg(slurm_msg_t *msg) inline static void _slurm_rpc_reboot_nodes(slurm_msg_t * msg) { int rc; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); #ifndef HAVE_FRONT_END int i; struct node_record *node_ptr; @@ -5243,7 +5296,8 @@ inline static void _slurm_rpc_reboot_nodes(slurm_msg_t * msg) inline static void _slurm_rpc_accounting_first_reg(slurm_msg_t *msg) { - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); time_t event_time = time(NULL); DEF_TIMERS; @@ -5263,7 +5317,8 @@ inline static void _slurm_rpc_accounting_first_reg(slurm_msg_t *msg) inline static void _slurm_rpc_accounting_register_ctld(slurm_msg_t *msg) { - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); DEF_TIMERS; @@ -5290,7 +5345,8 @@ inline static void _slurm_rpc_dump_spank(slurm_msg_t * msg) /* Locks: read job */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); slurm_msg_t response_msg; DEF_TIMERS; @@ -5402,7 +5458,8 @@ inline static void _slurm_rpc_dump_stats(slurm_msg_t * msg) int dump_size; stats_info_request_msg_t *request_msg; slurm_msg_t response_msg; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); request_msg = (stats_info_request_msg_t *)msg->data; @@ -5453,7 +5510,8 @@ _slurm_rpc_dump_licenses(slurm_msg_t * msg) int dump_size; slurm_msg_t response_msg; license_info_request_msg_t *lic_req_msg; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("%s: Processing RPC: REQUEST_LICENSE_INFO uid=%d", @@ -5523,7 +5581,8 @@ _slurm_rpc_kill_job2(slurm_msg_t *msg) int cc; kill = (job_step_kill_msg_t *)msg->data; - uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; info("%s: REQUEST_KILL_JOB job %s uid %d", @@ -5786,7 +5845,8 @@ static void _slurm_rpc_assoc_mgr_info(slurm_msg_t * msg) char *dump = NULL; int dump_size = 0; slurm_msg_t response_msg; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, + slurmctld_config.auth_info); START_TIMER; debug2("%s: Processing RPC: REQUEST_ASSOC_MGR_INFO uid=%d", diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 271ce971a020ccb4d1b67a7263d3f85210cf4e78..bd77ba960c32f72613396eab45a319324655d5d5 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -133,7 +133,7 @@ static int _restore_part_state(List old_part_list, char *old_def_part_name, uint16_t flags); static void _stat_slurm_dirs(void); static int _sync_nodes_to_comp_job(void); -static int _sync_nodes_to_jobs(void); +static int _sync_nodes_to_jobs(bool reconfig); static int _sync_nodes_to_active_job(struct job_record *job_ptr); static void _sync_nodes_to_suspended_job(struct job_record *job_ptr); static void _sync_part_prio(void); @@ -891,6 +891,8 @@ int read_slurm_conf(int recover, bool reconfig) /* initialization */ START_TIMER; + xfree(slurmctld_config.auth_info); + slurmctld_config.auth_info = slurm_get_auth_info(); if (reconfig) { /* in order to re-use job state information, * update nodes_completing string (based on node bitmaps) */ @@ -1069,7 +1071,7 @@ int read_slurm_conf(int recover, bool reconfig) _gres_reconfig(reconfig); reset_job_bitmaps(); /* must follow select_g_job_init() */ - (void) _sync_nodes_to_jobs(); + (void) _sync_nodes_to_jobs(reconfig); (void) sync_job_files(); _purge_old_node_state(old_node_table_ptr, old_node_record_count); _purge_old_part_state(old_part_list, old_def_part_name); @@ -2028,7 +2030,7 @@ static int _preserve_plugins(slurm_ctl_conf_t * ctl_conf_ptr, * RET count of nodes having state changed * Note: Operates on common variables, no arguments */ -static int _sync_nodes_to_jobs(void) +static int _sync_nodes_to_jobs(bool reconfig) { struct job_record *job_ptr; ListIterator job_iterator; @@ -2036,9 +2038,14 @@ static int _sync_nodes_to_jobs(void) job_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { - if (job_ptr->details && job_ptr->details->prolog_running && - !IS_JOB_CONFIGURING(job_ptr)) + if (!reconfig && + job_ptr->details && job_ptr->details->prolog_running) { job_ptr->details->prolog_running = 0; + if (IS_JOB_CONFIGURING(job_ptr)) { + (void) prolog_slurmctld(job_ptr); + (void) bb_g_job_begin(job_ptr); + } + } if (job_ptr->node_bitmap == NULL) ; diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c index 0597b278b6bdea12893e31da34031c535bf16735..91c54c390d8a3d0bf611afcebfe7cd7f1ff21852 100644 --- a/src/slurmctld/reservation.c +++ b/src/slurmctld/reservation.c @@ -4043,7 +4043,7 @@ static void _check_job_compatibility(struct job_record *job_ptr, { uint32_t total_nodes; bitstr_t *full_node_bitmap; - int i_core, i_node; + int i_core, i_node, res_inx; int start = 0; int rep_count = 0; job_resources_t *job_res = job_ptr->job_resrcs; @@ -4068,10 +4068,10 @@ static void _check_job_compatibility(struct job_record *job_ptr, _create_cluster_core_bitmap(core_bitmap); i_node = 0; + res_inx = 0; while (i_node < total_nodes) { - int cores_in_a_node = (job_res->sockets_per_node[i_node] * - job_res->cores_per_socket[i_node]); - + int cores_in_a_node = (job_res->sockets_per_node[res_inx] * + job_res->cores_per_socket[res_inx]); int repeat_node_conf = job_res->sock_core_rep_count[rep_count++]; int node_bitmap_inx; @@ -4082,6 +4082,7 @@ static void _check_job_compatibility(struct job_record *job_ptr, #endif i_node += repeat_node_conf; + res_inx++; while (repeat_node_conf--) { int allocated; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index dba1df43fcb086fee1b35cf1b1cdca520504996f..86f4528d0fd9988d81fb3a71ea820bffa43b19ff 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -146,12 +146,13 @@ \*****************************************************************************/ typedef struct slurmctld_config { + char * auth_info; + time_t boot_time; int daemonize; bool resume_backup; - time_t boot_time; - time_t shutdown_time; + bool scheduling_disabled; int server_thread_count; - bool scheduling_disabled; + time_t shutdown_time; slurm_cred_ctx_t cred_ctx; #ifdef WITH_PTHREADS @@ -1555,6 +1556,12 @@ extern void job_set_alloc_tres( */ extern int job_update_tres_cnt(struct job_record *job_ptr, int node_inx); +/* + * Modify a job's memory limit if allocated all memory on a node and that node + * reboots, possibly with a different memory size (e.g. KNL MCDRAM mode changed) + */ +extern void job_validate_mem(struct job_record *job_ptr); + /* * check_job_step_time_limit - terminate jobsteps which have exceeded * their time limit diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index d82f6878b1cd1a4e91730647c02cf6f8d8842bac..fc634b454c1746a5e95b79e2c74cac3191203ee3 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -742,7 +742,7 @@ int job_step_complete(uint32_t job_id, uint32_t step_id, uid_t uid, return ESLURM_INVALID_JOB_ID; if (step_ptr->step_id == SLURM_EXTERN_CONT) - return SLURM_SUCCESS; + return select_g_step_finish(step_ptr, true); /* If the job is already cleaning we have already been here * before, so just return. */ @@ -752,7 +752,7 @@ int job_step_complete(uint32_t job_id, uint32_t step_id, uid_t uid, if (cleaning) { /* Step hasn't finished cleanup yet. */ debug("%s: Cleaning flag already set for " "job step %u.%u, no reason to cleanup again.", - __func__, step_ptr->step_id, step_ptr->job_ptr->job_id); + __func__, job_ptr->job_id, step_ptr->step_id); return SLURM_SUCCESS; } diff --git a/src/slurmd/common/proctrack.c b/src/slurmd/common/proctrack.c index 995c6533f117a5c3f2b0b19c8b2649e6520feeac..1f813733b2b52385bb8799304f1f65cbbd743dff 100644 --- a/src/slurmd/common/proctrack.c +++ b/src/slurmd/common/proctrack.c @@ -179,9 +179,22 @@ extern int proctrack_g_create(stepd_step_rec_t * job) */ extern int proctrack_g_add(stepd_step_rec_t * job, pid_t pid) { + int i = 0, max_retry = 3, rc; + if (slurm_proctrack_init() < 0) return SLURM_ERROR; + /* Sometimes a plugin is transient in adding a pid, so lets + * try a few times before we call it quits. + */ + while ((rc = (*(ops.add)) (job, pid)) != SLURM_SUCCESS) { + if (i++ > max_retry) + break; + debug("%s: %u.%u couldn't add pid %u, sleeping and trying again", + __func__, job->jobid, job->stepid, pid); + sleep(1); + } + return (*(ops.add)) (job, pid); } diff --git a/src/slurmd/common/slurmstepd_init.h b/src/slurmd/common/slurmstepd_init.h index 60a0e2a1a0450ddcc06cbbe77a909c091dd90c6c..1f7c3c4ed98e7b43ef8267fb85e6960234abfbc5 100644 --- a/src/slurmd/common/slurmstepd_init.h +++ b/src/slurmd/common/slurmstepd_init.h @@ -48,9 +48,12 @@ #include "src/slurmd/slurmstepd/slurmstepd_job.h" #include "src/slurmd/slurmd/slurmd.h" -/* If you want to run memcheck on slurmstepd switch this comment */ -//#define SLURMSTEPD_MEMCHECK 1 -#undef SLURMSTEPD_MEMCHECK +/* If you want to run memory checks on slurmstepd switch this comment */ +#define SLURMSTEPD_MEMCHECK 0 /* Run slurmstepd without memory checks */ +//#define SLURMSTEPD_MEMCHECK 1 /* Run slurmstepd with memcheck */ +//#define SLURMSTEPD_MEMCHECK 2 /* Run slurmstepd with valgrind/memcheck */ +//#define SLURMSTEPD_MEMCHECK 3 /* Run slurmstepd with valgrind/drd */ +//#define SLURMSTEPD_MEMCHECK 4 /* Run slurmstepd with valgrind/helgrind */ typedef enum slurmd_step_tupe { LAUNCH_BATCH_JOB = 0, diff --git a/src/slurmd/common/xcpuinfo.c b/src/slurmd/common/xcpuinfo.c index 0789745d65f49dcd119760dbe54aa4b63b1e773d..20ba00304a42450c5afde175f9059cce8bb47255 100644 --- a/src/slurmd/common/xcpuinfo.c +++ b/src/slurmd/common/xcpuinfo.c @@ -199,10 +199,11 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, hwloc_obj_type_t objtype[LAST_OBJ]; unsigned idx[LAST_OBJ]; int nobj[LAST_OBJ]; + bitstr_t *used_socket = NULL; int actual_cpus; int macid; int absid; - int actual_boards = 1, depth; + int actual_boards = 1, depth, tot_socks = 0, used_sock_offset; int i; debug2("hwloc_topology_init"); @@ -265,11 +266,19 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, * KNL NUMA with no cores are NOT counted. */ nobj[SOCKET] = 0; depth = hwloc_get_type_depth(topology, objtype[SOCKET]); + used_socket = bit_alloc(1024); for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth); i++) { obj = hwloc_get_obj_by_depth(topology, depth, i); - if ((obj->type == objtype[SOCKET]) && - (_core_child_count(topology, obj) > 0)) - nobj[SOCKET]++; + if (obj->type == objtype[SOCKET]) { + if (_core_child_count(topology, obj) > 0) { + nobj[SOCKET]++; + bit_set(used_socket, tot_socks); + } + if (++tot_socks >= 1024) { /* Bitmap size */ + fatal("Socket count exceeds 1024, expand data structure size"); + break; + } + } } nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]); /* @@ -321,7 +330,10 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, (*p_block_map_inv)[i] = i; } /* create map with hwloc */ + used_sock_offset = 0; for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) { + if (!bit_test(used_socket, i)) + continue; for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) { for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) { /* get hwloc_obj by indexes */ @@ -330,8 +342,9 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, if (!obj) continue; macid = obj->os_index; - absid = idx[SOCKET]*nobj[CORE]*nobj[PU] - + idx[CORE]*nobj[PU] + absid = used_sock_offset * + nobj[CORE] * nobj[PU] + + idx[CORE] * nobj[PU] + idx[PU]; if ((macid >= actual_cpus) || @@ -345,9 +358,10 @@ get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, (*p_block_map_inv)[macid] = absid; } } + used_sock_offset++; } } - + FREE_NULL_BITMAP(used_socket); hwloc_topology_destroy(topology); /* update output parameters */ diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index e9d94858041d59b38d80102d69fc703976e56d42..0cf389f4a9bd9086bce9a59fe3375035412edd65 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -755,7 +755,7 @@ _forkexec_slurmstepd(uint16_t type, void *req, return SLURM_FAILURE; } else if (pid > 0) { int rc = SLURM_SUCCESS; -#ifndef SLURMSTEPD_MEMCHECK +#if (SLURMSTEPD_MEMCHECK == 0) int i; time_t start_time = time(NULL); #endif @@ -779,7 +779,7 @@ _forkexec_slurmstepd(uint16_t type, void *req, /* If running under valgrind/memcheck, this pipe doesn't work * correctly so just skip it. */ -#ifndef SLURMSTEPD_MEMCHECK +#if (SLURMSTEPD_MEMCHECK == 0) i = read(to_slurmd[0], &rc, sizeof(int)); if (i < 0) { error("%s: Can not read return code from slurmstepd " @@ -821,11 +821,79 @@ _forkexec_slurmstepd(uint16_t type, void *req, error("close read to_slurmd in parent: %m"); return rc; } else { -#ifndef SLURMSTEPD_MEMCHECK - char *const argv[2] = { (char *)conf->stepd_loc, NULL}; -#else +#if (SLURMSTEPD_MEMCHECK == 1) + /* memcheck test of slurmstepd, option #1 */ char *const argv[3] = {"memcheck", (char *)conf->stepd_loc, NULL}; +#elif (SLURMSTEPD_MEMCHECK == 2) + /* valgrind test of slurmstepd, option #2 */ + uint32_t job_id = 0, step_id = 0; + char log_file[256]; + char *const argv[13] = {"valgrind", "--tool=memcheck", + "--error-limit=no", + "--leak-check=summary", + "--show-reachable=yes", + "--max-stackframe=16777216", + "--num-callers=20", + "--child-silent-after-fork=yes", + "--track-origins=yes", + log_file, (char *)conf->stepd_loc, + NULL}; + if (type == LAUNCH_BATCH_JOB) { + job_id = ((batch_job_launch_msg_t *)req)->job_id; + step_id = ((batch_job_launch_msg_t *)req)->step_id; + } else if (type == LAUNCH_TASKS) { + job_id = ((launch_tasks_request_msg_t *)req)->job_id; + step_id = ((launch_tasks_request_msg_t *)req)->job_step_id; + } + snprintf(log_file, sizeof(log_file), + "--log-file=/tmp/slurmstepd_valgrind_%u.%u", + job_id, step_id); +#elif (SLURMSTEPD_MEMCHECK == 3) + /* valgrind/drd test of slurmstepd, option #3 */ + uint32_t job_id = 0, step_id = 0; + char log_file[256]; + char *const argv[10] = {"valgrind", "--tool=drd", + "--error-limit=no", + "--max-stackframe=16777216", + "--num-callers=20", + "--child-silent-after-fork=yes", + log_file, (char *)conf->stepd_loc, + NULL}; + if (type == LAUNCH_BATCH_JOB) { + job_id = ((batch_job_launch_msg_t *)req)->job_id; + step_id = ((batch_job_launch_msg_t *)req)->step_id; + } else if (type == LAUNCH_TASKS) { + job_id = ((launch_tasks_request_msg_t *)req)->job_id; + step_id = ((launch_tasks_request_msg_t *)req)->job_step_id; + } + snprintf(log_file, sizeof(log_file), + "--log-file=/tmp/slurmstepd_valgrind_%u.%u", + job_id, step_id); +#elif (SLURMSTEPD_MEMCHECK == 4) + /* valgrind/helgrind test of slurmstepd, option #4 */ + uint32_t job_id = 0, step_id = 0; + char log_file[256]; + char *const argv[10] = {"valgrind", "--tool=helgrind", + "--error-limit=no", + "--max-stackframe=16777216", + "--num-callers=20", + "--child-silent-after-fork=yes", + log_file, (char *)conf->stepd_loc, + NULL}; + if (type == LAUNCH_BATCH_JOB) { + job_id = ((batch_job_launch_msg_t *)req)->job_id; + step_id = ((batch_job_launch_msg_t *)req)->step_id; + } else if (type == LAUNCH_TASKS) { + job_id = ((launch_tasks_request_msg_t *)req)->job_id; + step_id = ((launch_tasks_request_msg_t *)req)->job_step_id; + } + snprintf(log_file, sizeof(log_file), + "--log-file=/tmp/slurmstepd_valgrind_%u.%u", + job_id, step_id); +#else + /* no memory checking, default */ + char *const argv[2] = { (char *)conf->stepd_loc, NULL}; #endif int i; int failed = 0; @@ -1161,7 +1229,7 @@ _rpc_launch_tasks(slurm_msg_t *msg) /* It is always 0 for front end systems */ nodeid = nodelist_find(req->complete_nodelist, conf->node_name); #endif - req_uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + req_uid = g_slurm_auth_get_uid(msg->auth_cred, conf->auth_info); memcpy(&req->orig_addr, &msg->orig_addr, sizeof(slurm_addr_t)); super_user = _slurm_authorized_user(req_uid); @@ -1720,7 +1788,7 @@ static void _rpc_prolog(slurm_msg_t *msg) if (req == NULL) return; - req_uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + req_uid = g_slurm_auth_get_uid(msg->auth_cred, conf->auth_info); if (!_slurm_authorized_user(req_uid)) { error("REQUEST_LAUNCH_PROLOG request from uid %u", (unsigned int) req_uid); @@ -1811,7 +1879,7 @@ _rpc_batch_job(slurm_msg_t *msg, bool new_msg) if (new_msg) { uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); if (!_slurm_authorized_user(req_uid)) { error("Security violation, batch launch RPC from uid %d", req_uid); @@ -2016,7 +2084,7 @@ _rpc_job_notify(slurm_msg_t *msg) { job_notify_msg_t *req = msg->data; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); uid_t job_uid; List steps; ListIterator i; @@ -2159,7 +2227,7 @@ static void _rpc_reconfig(slurm_msg_t *msg) { uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); if (!_slurm_authorized_user(req_uid)) error("Security violation, reconfig RPC from uid %d", @@ -2174,7 +2242,7 @@ static void _rpc_shutdown(slurm_msg_t *msg) { uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); forward_wait(msg); if (!_slurm_authorized_user(req_uid)) @@ -2195,7 +2263,7 @@ _rpc_reboot(slurm_msg_t *msg) reboot_msg_t *reboot_msg; slurm_ctl_conf_t *cfg; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); int exit_code; if (!_slurm_authorized_user(req_uid)) @@ -2507,7 +2575,7 @@ _rpc_ping(slurm_msg_t *msg) { int rc = SLURM_SUCCESS; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); static bool first_msg = true; if (!_slurm_authorized_user(req_uid)) { @@ -2558,7 +2626,7 @@ _rpc_health_check(slurm_msg_t *msg) { int rc = SLURM_SUCCESS; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); if (!_slurm_authorized_user(req_uid)) { error("Security violation, health check RPC from uid %d", @@ -2594,7 +2662,7 @@ _rpc_acct_gather_update(slurm_msg_t *msg) { int rc = SLURM_SUCCESS; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); static bool first_msg = true; if (!_slurm_authorized_user(req_uid)) { @@ -2651,7 +2719,7 @@ _rpc_acct_gather_energy(slurm_msg_t *msg) { int rc = SLURM_SUCCESS; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); static bool first_msg = true; if (!_slurm_authorized_user(req_uid)) { @@ -2772,7 +2840,7 @@ _rpc_signal_tasks(slurm_msg_t *msg) { int rc = SLURM_SUCCESS; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); kill_tasks_msg_t *req = (kill_tasks_msg_t *) msg->data; uint32_t flag; uint32_t sig; @@ -2803,7 +2871,7 @@ _rpc_checkpoint_tasks(slurm_msg_t *msg) int fd; int rc = SLURM_SUCCESS; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); checkpoint_tasks_msg_t *req = (checkpoint_tasks_msg_t *) msg->data; uint16_t protocol_version; uid_t uid; @@ -2870,7 +2938,7 @@ _rpc_terminate_tasks(slurm_msg_t *msg) goto done2; } - req_uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + req_uid = g_slurm_auth_get_uid(msg->auth_cred, conf->auth_info); if ((req_uid != uid) && (!_slurm_authorized_user(req_uid))) { debug("kill req from uid %ld for job %u.%u owned by uid %ld", @@ -2911,7 +2979,7 @@ _rpc_step_complete(slurm_msg_t *msg) /* step completion messages are only allowed from other slurmstepd, so only root or SlurmUser is allowed here */ - req_uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + req_uid = g_slurm_auth_get_uid(msg->auth_cred, conf->auth_info); if (!_slurm_authorized_user(req_uid)) { debug("step completion from uid %ld for job %u.%u", (long) req_uid, req->job_id, req->job_step_id); @@ -2945,7 +3013,7 @@ static int _rpc_step_complete_aggr(slurm_msg_t *msg) { int rc; - uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, conf->auth_info); if (!_slurm_authorized_user(uid)) { error("Security violation: step_complete_aggr from uid %d", @@ -3073,7 +3141,7 @@ _rpc_stat_jobacct(slurm_msg_t *msg) debug3("Entering _rpc_stat_jobacct"); /* step completion messages are only allowed from other slurmstepd, so only root or SlurmUser is allowed here */ - req_uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + req_uid = g_slurm_auth_get_uid(msg->auth_cred, conf->auth_info); fd = stepd_connect(conf->spooldir, conf->node_name, req->job_id, req->step_id, &protocol_version); @@ -3206,7 +3274,7 @@ _rpc_network_callerid(slurm_msg_t *msg) rc = _callerid_find_job(conn, &job_id); if (rc == SLURM_SUCCESS) { /* We found the job */ - req_uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + req_uid = g_slurm_auth_get_uid(msg->auth_cred, conf->auth_info); if (!_slurm_authorized_user(req_uid)) { /* Requestor is not root or SlurmUser */ job_uid = _get_job_uid(job_id); @@ -3246,7 +3314,7 @@ _rpc_list_pids(slurm_msg_t *msg) debug3("Entering _rpc_list_pids"); /* step completion messages are only allowed from other slurmstepd, * so only root or SlurmUser is allowed here */ - req_uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + req_uid = g_slurm_auth_get_uid(msg->auth_cred, conf->auth_info); job_uid = _get_job_uid(req->job_id); @@ -3313,7 +3381,7 @@ static void _rpc_timelimit(slurm_msg_t *msg) { uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); kill_job_msg_t *req = msg->data; int nsteps, rc; @@ -3615,8 +3683,8 @@ static int _rpc_file_bcast(slurm_msg_t *msg) file_bcast_msg_t *req = msg->data; file_bcast_info_t key; - key.uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); - key.gid = g_slurm_auth_get_gid(msg->auth_cred, slurm_get_auth_info()); + key.uid = g_slurm_auth_get_uid(msg->auth_cred, conf->auth_info); + key.gid = g_slurm_auth_get_gid(msg->auth_cred, conf->auth_info); key.fname = req->fname; rc = _valid_sbcast_cred(req, key.uid, req->block_no, &key.job_id); @@ -3919,7 +3987,7 @@ _rpc_reattach_tasks(slurm_msg_t *msg) debug2("_rpc_reattach_tasks: nodeid %d in the job step", nodeid); - req_uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); + req_uid = g_slurm_auth_get_uid(msg->auth_cred, conf->auth_info); if ((req_uid != uid) && (!_slurm_authorized_user(req_uid))) { error("uid %ld attempt to attach to job %u.%u owned by %ld", (long) req_uid, req->job_id, req->job_step_id, @@ -4288,7 +4356,7 @@ _rpc_signal_job(slurm_msg_t *msg) { signal_job_msg_t *req = msg->data; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); uid_t job_uid; List steps; ListIterator i; @@ -4452,7 +4520,7 @@ _rpc_suspend_job(slurm_msg_t *msg) int time_slice = -1; suspend_int_msg_t *req = msg->data; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); List steps; ListIterator i; step_loc_t *stepd; @@ -4653,7 +4721,7 @@ _rpc_abort_job(slurm_msg_t *msg) { kill_job_msg_t *req = msg->data; uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); job_env_t job_env; debug("_rpc_abort_job, uid = %d", uid); @@ -4875,7 +4943,7 @@ _rpc_complete_batch(slurm_msg_t *msg) int i, rc, msg_rc; slurm_msg_t resp_msg; uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); complete_batch_script_msg_t *req = msg->data; static int running_serial = -1; uint16_t msg_type; @@ -4960,7 +5028,7 @@ _rpc_terminate_job(slurm_msg_t *msg) int rc = SLURM_SUCCESS; kill_job_msg_t *req = msg->data; uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); int nsteps = 0; int delay; // slurm_ctl_conf_t *cf; @@ -5359,7 +5427,7 @@ _rpc_update_time(slurm_msg_t *msg) { int rc = SLURM_SUCCESS; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); if ((req_uid != conf->slurm_user_id) && (req_uid != 0)) { rc = ESLURM_USER_ID_MISSING; @@ -6205,7 +6273,7 @@ _rpc_forward_data(slurm_msg_t *msg) } req_uid = (uint32_t)g_slurm_auth_get_uid(msg->auth_cred, - slurm_get_auth_info()); + conf->auth_info); /* * although always in localhost, we still convert it to network * byte order, to make it consistent with pack/unpack. diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index f632f1d35e5134153bca3b7ea6ce823027ebf5c9..762f8381ca27cf7c644f03ac49ad08f25ae9ad01 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -817,6 +817,10 @@ _read_config(void) slurm_mutex_lock(&conf->config_mutex); cf = slurm_conf_lock(); + xfree(conf->auth_info); + conf->auth_info = xstrdup(cf->authinfo); + + xfree(conf->chos_loc); conf->chos_loc = xstrdup(cf->chos_loc); conf->last_update = time(NULL); @@ -1246,6 +1250,7 @@ _destroy_conf(void) xfree(conf->acct_gather_filesystem_type); xfree(conf->acct_gather_infiniband_type); xfree(conf->acct_gather_profile_type); + xfree(conf->auth_info); xfree(conf->block_map); xfree(conf->block_map_inv); xfree(conf->chos_loc); diff --git a/src/slurmd/slurmd/slurmd.h b/src/slurmd/slurmd/slurmd.h index 68970f4116be7faa0107a6e471a4b1cbb246cca2..a9fce71535805fe537895abea389c826cd0e9931 100644 --- a/src/slurmd/slurmd/slurmd.h +++ b/src/slurmd/slurmd/slurmd.h @@ -82,6 +82,7 @@ typedef struct slurmd_config { char *prog; /* Program basename */ char ***argv; /* pointer to argument vector */ int *argc; /* pointer to argument count */ + char *auth_info; /* AuthInfo for msg authentication */ char *chos_loc; /* Chroot OS wrapper path */ char *cluster_name; /* conf ClusterName */ char *hostname; /* local hostname */ diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index bfb0fa324ca8dad662d835fa58fd621a8fbed2a2..f5b6e336a7d5431894163a1ff3664b372ad64987 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -990,6 +990,7 @@ static int _spawn_job_container(stepd_step_rec_t *job) jobacct_id_t jobacct_id; int status = 0; pid_t pid; + int rc = SLURM_SUCCESS; debug2("%s: Before call to spank_init()", __func__); if (spank_init(job) < 0) { @@ -1019,11 +1020,16 @@ static int _spawn_job_container(stepd_step_rec_t *job) } else if (pid < 0) { error("fork: %m"); _set_job_state(job, SLURMSTEPD_STEP_ENDING); - return SLURM_ERROR; + rc = SLURM_ERROR; + goto fail1; } job->pgid = pid; - proctrack_g_add(job, pid); + if ((rc = proctrack_g_add(job, pid)) != SLURM_SUCCESS) { + error("%s: Step %u.%u unable to add pid %d to the proctrack plugin", + __func__, job->jobid, job->stepid, pid); + goto fail1; + } jobacct_id.nodeid = job->nodeid; jobacct_id.taskid = job->nodeid; /* Treat node ID as global task ID */ @@ -1072,12 +1078,16 @@ static int _spawn_job_container(stepd_step_rec_t *job) * condition starting another job on these CPUs. */ while (_send_pending_exit_msgs(job)) {;} +fail1: debug2("%s: Before call to spank_fini()", __func__); if (spank_fini(job) < 0) error("spank_fini failed"); debug2("%s: After call to spank_fini()", __func__); - return SLURM_SUCCESS; + _set_job_state(job, SLURMSTEPD_STEP_ENDING); + _send_step_complete_msgs(job); + + return rc; } /* diff --git a/src/slurmd/slurmstepd/pdebug.c b/src/slurmd/slurmstepd/pdebug.c index e1b4b345a0a8744257da8bc4781ae7a84db83e8b..bffccd69dfa69bb34b2d668be7e886f9e02ce721 100644 --- a/src/slurmd/slurmstepd/pdebug.c +++ b/src/slurmd/slurmstepd/pdebug.c @@ -142,24 +142,32 @@ pdebug_stop_current(stepd_step_rec_t *job) static bool _pid_to_wake(pid_t pid) { #ifdef CLONE_PTRACE - char proc_stat[1024], proc_name[22], state[1], *str_ptr; + char *proc_stat, proc_name[22], state[1], *str_ptr; int len, proc_fd, ppid, pgrp, session, tty, tpgid; long unsigned flags; sprintf (proc_name, "/proc/%d/stat", (int) pid); if ((proc_fd = open(proc_name, O_RDONLY, 0)) == -1) return false; /* process is now gone */ - len = read(proc_fd, proc_stat, sizeof(proc_stat)); + proc_stat = xmalloc(4096); + len = read(proc_fd, proc_stat, 4096); close(proc_fd); - if (len < 14) + if (len < 14) { + xfree(proc_stat); return false; + } /* skip over "PID (CMD) " */ - if ((str_ptr = (char *)strrchr(proc_stat, ')')) == NULL) + if ((str_ptr = (char *)strrchr(proc_stat, ')')) == NULL) { + xfree(proc_stat); return false; + } if (sscanf(str_ptr + 2, "%c %d %d %d %d %d %lu ", - state, &ppid, &pgrp, &session, &tty, &tpgid, &flags) != 7) + state, &ppid, &pgrp, &session, &tty, &tpgid, &flags) != 7) { + xfree(proc_stat); return false; + } + xfree(proc_stat); if ((flags & CLONE_PTRACE) == 0) return true; return false; diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c index 0238ad0c3442dce2a9b164b314cffd095b921bd5..3e6ff16ada2829bc593280b3baa3c773f1c1ef7d 100644 --- a/src/slurmd/slurmstepd/req.c +++ b/src/slurmd/slurmstepd/req.c @@ -404,6 +404,7 @@ _handle_accept(void *arg) int rc; uid_t uid; gid_t gid; + char *auth_info; debug3("Entering _handle_accept (new thread)"); xfree(arg); @@ -426,18 +427,21 @@ _handle_accept(void *arg) free_buf(buffer); goto fail; } - rc = g_slurm_auth_verify(auth_cred, NULL, 2, slurm_get_auth_info()); + auth_info = slurm_get_auth_info(); + rc = g_slurm_auth_verify(auth_cred, NULL, 2, auth_info); if (rc != SLURM_SUCCESS) { error("Verifying authentication credential: %s", g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred))); + xfree(auth_info); (void) g_slurm_auth_destroy(auth_cred); free_buf(buffer); goto fail; } /* Get the uid & gid from the credential, then destroy it. */ - uid = g_slurm_auth_get_uid(auth_cred, slurm_get_auth_info()); - gid = g_slurm_auth_get_gid(auth_cred, slurm_get_auth_info()); + uid = g_slurm_auth_get_uid(auth_cred, auth_info); + gid = g_slurm_auth_get_gid(auth_cred, auth_info); + xfree(auth_info); debug3(" Identity: uid=%d, gid=%d", uid, gid); g_slurm_auth_destroy(auth_cred); free_buf(buffer); @@ -791,7 +795,8 @@ _handle_signal_container(int fd, stepd_step_rec_t *job, uid_t uid) ptr = getenvp(job->env, "SLURM_STEP_KILLED_MSG_NODE_ID"); if (ptr) target_node_id = atoi(ptr); - if ((job->nodeid == target_node_id) && (msg_sent == 0) && + if ((job->stepid != SLURM_EXTERN_CONT) && + (job->nodeid == target_node_id) && (msg_sent == 0) && (job->state < SLURMSTEPD_STEP_ENDING)) { time_t now = time(NULL); char entity[24], time_str[24]; @@ -1088,8 +1093,10 @@ _handle_terminate(int fd, stepd_step_rec_t *job, uid_t uid) } if (proctrack_g_signal(job->cont_id, SIGKILL) < 0) { - rc = -1; - errnum = errno; + if (errno != ESRCH) { /* No error if process already gone */ + rc = -1; + errnum = errno; + } verbose("Error sending SIGKILL signal to %u.%u: %m", job->jobid, job->stepid); } else { @@ -1321,9 +1328,20 @@ static int _handle_add_extern_pid_internal(stepd_step_rec_t *job, pid_t pid) jobacct_id.nodeid = job->nodeid; jobacct_id.job = job; - proctrack_g_add(job, pid); - task_g_add_pid(pid); - jobacct_gather_add_task(pid, &jobacct_id, 1); + if (proctrack_g_add(job, pid) != SLURM_SUCCESS) { + error("%s: Job %u can't add pid %d to proctrack plugin in the extern_step.", __func__, job->jobid, pid); + return SLURM_FAILURE; + } + + if (task_g_add_pid(pid) != SLURM_SUCCESS) { + error("%s: Job %u can't add pid %d to task plugin in the extern_step.", __func__, job->jobid, pid); + return SLURM_FAILURE; + } + + if (jobacct_gather_add_task(pid, &jobacct_id, 1) != SLURM_SUCCESS) { + error("%s: Job %u can't add pid %d to jobacct_gather plugin in the extern_step.", __func__, job->jobid, pid); + return SLURM_FAILURE; + } /* spawn a thread that will wait on the pid given */ slurm_attr_init(&attr); diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c index 08254071d24d8459605c2741b55502d5b0b45a82..ae13ff0f1d6adcef99b5ab39609888c355a350c1 100644 --- a/src/slurmd/slurmstepd/slurmstepd.c +++ b/src/slurmd/slurmstepd/slurmstepd.c @@ -347,7 +347,7 @@ _send_ok_to_slurmd(int sock) { /* If running under valgrind/memcheck, this pipe doesn't work correctly * so just skip it. */ -#ifndef SLURMSTEPD_MEMCHECK +#if (SLURMSTEPD_MEMCHECK == 0) int ok = SLURM_SUCCESS; safe_write(sock, &ok, sizeof(int)); return; @@ -361,7 +361,7 @@ _send_fail_to_slurmd(int sock) { /* If running under valgrind/memcheck, this pipe doesn't work correctly * so just skip it. */ -#ifndef SLURMSTEPD_MEMCHECK +#if (SLURMSTEPD_MEMCHECK == 0) int fail = SLURM_FAILURE; if (errno) @@ -379,7 +379,7 @@ _got_ack_from_slurmd(int sock) { /* If running under valgrind/memcheck, this pipe doesn't work correctly * so just skip it. */ -#ifndef SLURMSTEPD_MEMCHECK +#if (SLURMSTEPD_MEMCHECK == 0) int ok; safe_read(sock, &ok, sizeof(int)); return; diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index 531b57527add6acf8154cda2ff82fa2c8ce77cea..66c66ee4c00d631a7fb4cdee342f58b956d2c75c 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -353,6 +353,8 @@ stepd_step_rec_create(launch_tasks_request_msg_t *msg, uint16_t protocol_version slurm_set_addr(&resp_addr, msg->resp_port[nodeid % msg->num_resp_port], NULL); + } else { + memset(&resp_addr, 0, sizeof(slurm_addr_t)); } job->user_managed_io = msg->user_managed_io; if (!msg->io_port) @@ -362,6 +364,8 @@ stepd_step_rec_create(launch_tasks_request_msg_t *msg, uint16_t protocol_version slurm_set_addr(&io_addr, msg->io_port[nodeid % msg->num_io_port], NULL); + } else { + memset(&io_addr, 0, sizeof(slurm_addr_t)); } srun = srun_info_create(msg->cred, &resp_addr, &io_addr, diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c index de1d2e041a8d61113a4312fc84f3211987ff79da..fdbea7325444a86d1eb6b810d9acf132d091187c 100644 --- a/src/slurmd/slurmstepd/task.c +++ b/src/slurmd/slurmstepd/task.c @@ -86,6 +86,7 @@ #include "src/common/log.h" #include "src/common/plugstack.h" #include "src/common/slurm_mpi.h" +#include "src/common/strlcpy.h" #include "src/common/switch.h" #include "src/common/xsignal.h" #include "src/common/xstring.h" @@ -281,70 +282,56 @@ _run_script_and_set_env(const char *name, const char *path, } /* Given a program name, translate it to a fully qualified pathname as needed - * based upon the PATH environment variable and current working directory */ -extern char *build_path(char* fname, char **prog_env, char *cwd) + * based upon the PATH environment variable and current working directory + * Returns xmalloc()'d string that must be xfree()'d */ +extern char *build_path(char *fname, char **prog_env, char *cwd) { - int i; char *path_env = NULL, *dir; - char *file_name, *file_path; + char *file_name; struct stat stat_buf; - int len = 256; + int len = PATH_MAX; - file_name = (char *)xmalloc(len); - /* make copy of file name (end at white space) */ - snprintf(file_name, len, "%s", fname); - for (i = 0; i < len; i++) { - if (file_name[i] == '\0') - break; - if (!isspace(file_name[i])) - continue; - file_name[i] = '\0'; - break; - } + if (!fname) + return NULL; + + file_name = (char *) xmalloc(len); /* check if already absolute path */ - if (file_name[0] == '/') + if (fname[0] == '/') { + /* copy and ensure null termination */ + strlcpy(file_name, fname, len); return file_name; - if (file_name[0] == '.') { - file_path = (char *)xmalloc(len); + } + + if (fname[0] == '.') { if (cwd) { - snprintf(file_path, len, "%s/%s", cwd, file_name); + snprintf(file_name, len, "%s/%s", cwd, fname); } else { - dir = (char *)xmalloc(len); + dir = (char *) xmalloc(len); if (!getcwd(dir, len)) error("getcwd failed: %m"); - snprintf(file_path, len, "%s/%s", dir, file_name); + snprintf(file_name, len, "%s/%s", dir, fname); xfree(dir); } - xfree(file_name); - return file_path; + return file_name; } /* search for the file using PATH environment variable */ - for (i = 0; ; i++) { - if (prog_env[i] == NULL) - return file_name; - if (xstrncmp(prog_env[i], "PATH=", 5)) - continue; - path_env = xstrdup(&prog_env[i][5]); - break; - } + path_env = xstrdup(getenvp(prog_env, "PATH")); - file_path = (char *)xmalloc(len); dir = strtok(path_env, ":"); while (dir) { - snprintf(file_path, len, "%s/%s", dir, file_name); - if ((stat(file_path, &stat_buf) == 0) + snprintf(file_name, len, "%s/%s", dir, fname); + if ((stat(file_name, &stat_buf) == 0) && (! S_ISDIR(stat_buf.st_mode))) break; dir = strtok(NULL, ":"); } if (dir == NULL) /* not found */ - snprintf(file_path, len, "%s", file_name); + strlcpy(file_name, fname, len); - xfree(file_name); xfree(path_env); - return file_path; + return file_name; } static int diff --git a/src/sview/node_info.c b/src/sview/node_info.c index be22e68134bb980b2b2387dba47c2c9d71035a44..9a8a46afa903039b9d07b302db37a47776aeeb6f 100644 --- a/src/sview/node_info.c +++ b/src/sview/node_info.c @@ -1193,7 +1193,7 @@ extern int update_active_features_node(GtkDialog *dialog, const char *nodelist, g_free(edit); goto end_it; } - if ((rc = slurm_update_node(node_msg) == SLURM_SUCCESS)) { + if ((rc = slurm_update_node(node_msg)) == SLURM_SUCCESS) { edit = g_strdup_printf( "Node(s) %s updated successfully.", nodelist); @@ -1283,7 +1283,7 @@ extern int update_avail_features_node(GtkDialog *dialog, const char *nodelist, g_free(edit); goto end_it; } - if ((rc = slurm_update_node(node_msg) == SLURM_SUCCESS)) { + if ((rc = slurm_update_node(node_msg)) == SLURM_SUCCESS) { edit = g_strdup_printf( "Node(s) %s updated successfully.", nodelist); diff --git a/testsuite/expect/inc21.21_tests b/testsuite/expect/inc21.21_tests index 39dd209a771de63472bd674e47e30804940ae80d..4f3562c045d4b05fe4c06ca79b993f9d3558fd6c 100644 --- a/testsuite/expect/inc21.21_tests +++ b/testsuite/expect/inc21.21_tests @@ -60,7 +60,7 @@ proc inc21_21_good { test_type limit } { } set matches 0 - spawn $srun -v -t10 $add [lindex $limit 0][lindex $limit 1] \ + spawn $srun -v -t1 $add [lindex $limit 0][lindex $limit 1] \ --account=$ta -I $bin_id expect { -re "launching ($number)" { @@ -635,7 +635,7 @@ proc inc21_21_grpwall { test_type limit } { set matches 0 send_user "Sleeping for a bit...hang tight\n" - spawn $srun -v -t1 [lindex $limit 0][lindex $limit 1] --account=$ta \ + spawn $srun -v [lindex $limit 0][lindex $limit 1] --account=$ta \ -I $file_in expect { -re "launching ($number)" { @@ -672,7 +672,7 @@ proc inc21_21_grpwall { test_type limit } { } set matches 0 - spawn $srun -v -t1 [lindex $limit 0][lindex $limit 1] --account=$ta \ + spawn $srun -v [lindex $limit 0][lindex $limit 1] --account=$ta \ -I $bin_id expect { -re "Job violates accounting/QOS policy" { diff --git a/testsuite/expect/test1.92 b/testsuite/expect/test1.92 index 03c94517b4de07a24f0db8ac5f56d7a76171f677..2fa69ac5e195cf97c601a32e88193d73f123da61 100755 --- a/testsuite/expect/test1.92 +++ b/testsuite/expect/test1.92 @@ -58,7 +58,7 @@ exit 0 # Create an allocation # set timeout $max_job_delay -set salloc_pid [spawn $salloc -N2 --ntasks-per-node=2 --verbose -t2 $bin_bash] +set salloc_pid [spawn $salloc -N2 --ntasks-per-node=2 --verbose -t4 $bin_bash] expect { -re "salloc: Granted job allocation ($number)" { set job_id $expect_out(1,string) diff --git a/testsuite/expect/test7.2 b/testsuite/expect/test7.2 index ee038dcab02d9579c7c028264c89e83883c4a807..fbea17a3186dc067dea1bb7f1284d158c4cf1a4f 100755 --- a/testsuite/expect/test7.2 +++ b/testsuite/expect/test7.2 @@ -39,6 +39,12 @@ set file_prog_get "test$test_id.prog" print_header $test_id +# PMI library not in Cray RPM +if {[test_cray]} { + send_user "\nWARNING: This test is incompatible with Cray systems\n" + exit $exit_code +} + # # Delete left-over program and rebuild it. #