From a995d6b63dde9d6bf0e1589873d1a1757eb349c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= <remi@rezib.org> Date: Mon, 11 May 2015 23:09:01 +0200 Subject: [PATCH] Imported Upstream version 14.11.6 --- META | 4 +- NEWS | 96 +++++++++ README.rst | 4 +- RELEASE_NOTES | 3 +- auxdir/x_ac_debug.m4 | 23 +-- config.h.in | 9 + configure | 30 +-- configure.ac | 4 +- contribs/cray/opt_modulefiles_slurm.in | 2 +- contribs/pmi2/pmi2.h | 4 +- contribs/pmi2/slurm/pmi2.h | 4 +- contribs/torque/qsub.pl | 14 +- doc/html/accounting.shtml | 37 +++- doc/html/bluegene.shtml | 11 +- doc/html/cpu_management.shtml | 6 +- doc/html/cray_alps.shtml | 22 ++- doc/html/documentation.shtml | 3 +- doc/html/download.shtml | 5 +- doc/html/elastic_computing.shtml | 4 +- doc/html/faq.shtml | 5 +- doc/html/high_throughput.shtml | 5 +- doc/html/job_array.shtml | 4 +- doc/html/job_exit_code.shtml | 6 +- doc/html/maui.shtml | 6 +- doc/html/mc_support.shtml | 183 ++++++++---------- doc/html/meetings.shtml | 6 +- doc/html/moab.shtml | 10 +- doc/html/mpi_guide.shtml | 46 ++--- doc/html/multi_cluster.shtml | 4 +- doc/html/news.shtml | 28 +-- doc/html/platforms.shtml | 37 +--- doc/html/power_save.shtml | 23 +-- doc/html/preempt.shtml | 4 +- doc/html/quickstart_admin.shtml | 30 +-- doc/html/reservations.shtml | 4 +- doc/html/rpc.shtml | 22 +-- doc/html/slurm_ug_cfp.shtml | 29 +-- doc/html/team.shtml | 2 + doc/html/testimonials.shtml | 18 +- doc/html/troubleshoot.shtml | 16 +- doc/man/man1/sacct.1 | 44 +++-- doc/man/man1/sacctmgr.1 | 10 +- doc/man/man1/salloc.1 | 19 +- doc/man/man1/sattach.1 | 3 +- doc/man/man1/sbatch.1 | 20 +- doc/man/man1/sbcast.1 | 2 +- doc/man/man1/scancel.1 | 2 +- doc/man/man1/scontrol.1 | 9 +- doc/man/man1/sdiag.1 | 3 +- doc/man/man1/sh5util.1 | 3 +- doc/man/man1/sinfo.1 | 8 +- doc/man/man1/slurm.1 | 16 +- doc/man/man1/smap.1 | 4 +- doc/man/man1/sprio.1 | 2 +- doc/man/man1/squeue.1 | 4 +- doc/man/man1/sreport.1 | 2 +- doc/man/man1/srun.1 | 26 ++- doc/man/man1/srun_cr.1 | 2 +- doc/man/man1/sshare.1 | 7 +- doc/man/man1/sstat.1 | 2 +- doc/man/man1/strigger.1 | 4 +- doc/man/man1/sview.1 | 3 +- doc/man/man3/slurm_allocate_resources.3 | 5 +- doc/man/man3/slurm_checkpoint_error.3 | 2 +- doc/man/man3/slurm_clear_trigger.3 | 2 +- doc/man/man3/slurm_complete_job.3 | 3 +- doc/man/man3/slurm_free_ctl_conf.3 | 2 +- doc/man/man3/slurm_free_front_end_info_msg.3 | 2 +- doc/man/man3/slurm_free_job_info_msg.3 | 3 +- .../slurm_free_job_step_info_response_msg.3 | 2 +- doc/man/man3/slurm_free_node_info.3 | 3 +- doc/man/man3/slurm_free_partition_info.3 | 3 +- doc/man/man3/slurm_get_errno.3 | 3 +- doc/man/man3/slurm_hostlist_create.3 | 2 +- doc/man/man3/slurm_job_step_create.3 | 2 +- doc/man/man3/slurm_kill_job.3 | 3 +- doc/man/man3/slurm_load_reservations.3 | 3 +- doc/man/man3/slurm_reconfigure.3 | 3 +- doc/man/man3/slurm_resume.3 | 2 +- doc/man/man3/slurm_slurmd_status.3 | 2 +- doc/man/man3/slurm_step_ctx_create.3 | 2 +- doc/man/man3/slurm_step_launch.3 | 4 +- doc/man/man3/slurm_update_job.3 | 2 +- doc/man/man5/acct_gather.conf.5 | 2 +- doc/man/man5/bluegene.conf.5 | 2 +- doc/man/man5/cgroup.conf.5 | 3 +- doc/man/man5/cray.conf.5 | 3 +- doc/man/man5/ext_sensors.conf.5 | 5 +- doc/man/man5/gres.conf.5 | 3 +- doc/man/man5/nonstop.conf.5 | 5 +- doc/man/man5/slurm.conf.5 | 61 ++++-- doc/man/man5/slurmdbd.conf.5 | 7 +- doc/man/man5/topology.conf.5 | 2 +- doc/man/man5/wiki.conf.5 | 3 +- doc/man/man8/slurmctld.8 | 3 +- doc/man/man8/slurmd.8 | 2 +- doc/man/man8/slurmdbd.8 | 3 +- doc/man/man8/slurmstepd.8 | 4 +- doc/man/man8/spank.8 | 12 +- slurm.spec | 20 +- src/common/cbuf.c | 28 +-- src/common/gres.c | 18 +- src/common/layouts_mgr.c | 36 ++-- src/common/pack.c | 36 ++-- src/common/plugstack.c | 2 +- src/common/slurm_protocol_defs.h | 6 +- src/common/slurm_protocol_pack.c | 42 +--- src/common/xcgroup_read_config.c | 1 + src/common/xstring.c | 11 +- src/database/mysql_common.c | 20 ++ src/database/mysql_common.h | 1 + .../mysql/as_mysql_archive.c | 100 +++++++--- .../accounting_storage/mysql/as_mysql_assoc.c | 5 +- .../mysql/as_mysql_rollup.c | 91 ++++++--- .../accounting_storage/mysql/as_mysql_usage.c | 6 +- .../slurmdbd/accounting_storage_slurmdbd.c | 26 +-- src/plugins/job_submit/lua/job_submit_lua.c | 4 + .../jobacct_gather/common/common_jag.c | 4 +- src/plugins/priority/multifactor/fair_tree.c | 18 +- .../multifactor/priority_multifactor.c | 17 +- .../multifactor/priority_multifactor.h | 2 +- src/plugins/proctrack/lua/proctrack_lua.c | 8 +- src/plugins/sched/backfill/backfill.c | 21 +- src/plugins/sched/wiki/get_nodes.c | 2 +- src/plugins/sched/wiki2/get_nodes.c | 2 +- src/plugins/select/alps/basil_interface.c | 6 + src/plugins/select/alps/basil_interface.h | 1 + src/plugins/select/alps/select_alps.c | 40 +++- src/plugins/select/cons_res/job_test.c | 29 +++ src/plugins/select/cray/select_cray.c | 83 ++++++-- src/plugins/switch/cray/util.c | 6 + src/sacct/sacct.h | 2 +- src/sbatch/opt.c | 21 +- src/slurmctld/agent.h | 2 + src/slurmctld/controller.c | 14 +- src/slurmctld/job_mgr.c | 152 ++++++++------- src/slurmctld/job_scheduler.c | 56 +++++- src/slurmctld/node_mgr.c | 33 +++- src/slurmctld/node_scheduler.c | 5 + src/slurmctld/ping_nodes.c | 31 ++- src/slurmctld/proc_req.c | 10 +- src/slurmctld/slurmctld.h | 2 +- src/slurmd/slurmd/get_mach_stat.c | 28 ++- src/slurmd/slurmd/req.c | 7 +- src/slurmd/slurmd/slurmd.c | 4 +- src/slurmdbd/proc_req.c | 7 + src/squeue/print.c | 6 +- src/sreport/cluster_reports.c | 40 ++-- src/sreport/user_reports.c | 17 +- src/srun/libsrun/opt.c | 3 +- src/sshare/process.c | 5 +- src/sshare/sshare.c | 21 +- src/sshare/sshare.h | 6 +- src/sview/sview.c | 6 +- src/sview/sview.h | 4 +- testsuite/expect/Makefile.am | 1 + testsuite/expect/Makefile.in | 1 + testsuite/expect/README | 1 + testsuite/expect/test20.12 | 154 +++++++++++++++ testsuite/expect/test24.1.prog.c | 2 +- testsuite/expect/test24.3.prog.c | 2 +- testsuite/expect/test24.4.prog.c | 2 +- testsuite/expect/test7.11 | 2 +- 163 files changed, 1617 insertions(+), 865 deletions(-) create mode 100755 testsuite/expect/test20.12 diff --git a/META b/META index e13ff98e7..92da708db 100644 --- a/META +++ b/META @@ -9,8 +9,8 @@ Name: slurm Major: 14 Minor: 11 - Micro: 5 - Version: 14.11.5 + Micro: 6 + Version: 14.11.6 Release: 1 ## diff --git a/NEWS b/NEWS index 653655514..bed57bbef 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,102 @@ This file describes changes in recent versions of Slurm. It primarily documents those changes that are of interest to users and administrators. +* Changes in Slurm 14.11.6 +========================== + -- If SchedulerParameters value of bf_min_age_reserve is configured, then + a newly submitted job can start immediately even if there is a higher + priority non-runnable job which has been waiting for less time than + bf_min_age_reserve. + -- qsub wrapper modified to export "all" with -V option + -- RequeueExit and RequeueExitHold configuration parameters modified to accept + numeric ranges. For example "RequeueExit=1,2,3,4" and "RequeueExit=1-4" are + equivalent. + -- Correct the job array specification parser to accept brackets in job array + expression (e.g. "123_[4,7-9]"). + -- Fix for misleading job submit failure errors sent to users. Previous error + could indicate why specific nodes could not be used (e.g. too small memory) + when other nodes could be used, but were not for another reason. + -- Fix squeue --array to display correctly the array elements when the + % separator is specified at the array submission time. + -- Fix priority from not being calculated correctly due to memory issues. + -- Fix a transient pending reason 'JobId=job_id has invalid QOS'. + -- A non-administrator change to job priority will not be persistent except + for holding the job. User's wanting to change a job priority on a persistent + basis should reset it's "nice" value. + -- Print buffer sizes as unsigned values when failed to pack messages. + -- Fix race condition where sprio would print factors without weights applied. + -- Document the sacct option JobIDRaw which for arrays prints the jobid instead + of the arrayTaskId. + -- Allow users to modify MinCPUsNode, MinMemoryNode and MinTmpDiskNode of + their own jobs. + -- Increase the jobid print field in SQUEUE_FORMAT in + opt_modulefiles_slurm.in. + -- Enable compiling without optimizations and with debugging symbols by + default. Disable this by configuring with --disable-debug. + -- job_submit/lua plugin: Add mail_type and mail_user fields. + -- Correct output message from sshare. + -- Use standard statvfs(2) syscall if available, in preference to + non-standard statfs. + -- Add a new option -U/--Users to sshare to display only users + information, parent and ancestors are not printed. + -- Purge 50000 records at a time so that locks can released periodically. + -- Fix potentially uninitialized variables + -- ALPS - Fix issue where a frontend node could become unresponsive and never + added back into the system. + -- Gate epilog complete messages as done with other messages + -- If we have more than a certain number of agents (50) wait longer when gating + rpcs. + -- FrontEnd - ping non-responding or down nodes. + -- switch/cray: If CR_PACK_NODES is configured, then set the environment + variable "PMI_CRAY_NO_SMP_ENV=1" + -- Fix invalid memory reference in SlurmDBD when putting a node up. + -- Allow opening of plugstack.conf even when a symlink. + -- Fix scontrol reboot so that rebooted nodes will not be set down with reason + 'Node xyz unexpectedly rebooted' but will be correctly put back to service. + -- CRAY - Throttle the post NHC operations as to not hog the job write lock + if many steps/jobs finish at once. + -- Disable changes to GRES count while jobs are running on the node. + -- CRAY - Fix issue with scontrol reconfig. + -- slurmd: Remove wrong reporting of "Error reading step ... memory limit". + The logic was treating success as an error. + -- Eliminate "Node ping apparently hung" error messages. + -- Fix average CPU frequency calculation. + -- When allocating resources with resolution of sockets, charge the job for all + CPUs on allocated sockets rather than just the CPUs on used cores. + -- Prevent slurmdbd error if cluster added or removed while rollup in progress. + Removing a cluster can cause slurmdbd to abort. Adding a cluster can cause + the slurmdbd rollup to hang. + -- sview - When right clicking on a tab make sure we don't display the page + list, but only the column list. + -- FRONTEND - If doing a clean start make sure the nodes are brought up in the + database. + -- MySQL - Fix issue when using the TrackSlurmctldDown and nodes are down at + the same time, don't double bill the down time. + -- MySQL - Various memory leak fixes. + -- sreport - Fix Energy displays + -- Fix node manager logic to keep unexpectedly rebooted node in state + NODE_STATE_DOWN even if already down when rebooted. + -- Fix for array jobs submitted to multiple partitions not starting. + -- CRAY - Enable ALPs mpp compatibility code in sbatch for native Slurm. + -- ALPS - Move basil_inventory to less confusing function. + -- Add SchedulerParameters option of "sched_max_job_start=" to limit the + number of jobs that can be started in any single execution of the main + scheduling logic. + -- Fixed compiler warnings generated by gcc version >= 4.6. + -- sbatch to stop parsing script for "#SBATCH" directives after first command, + which matches the documentation. + -- Overwrite the SLURM_JOB_NAME in sbatch if already exist in the environment + and use the one specified on the command line --job-name. + -- Remove xmalloc_nz from unpack functions. If the unpack ever failed the + free afterwards would not have zeroed out memory on the variables that + didn't get unpacked. + -- Improve database interaction from controller. + -- Fix for data shift when loading job archives. + -- ALPS - Added new SchedulerParameters=inventory_interval to specify how + often an inventory request is handled. + -- ALPS - Don't run a release on a reservation on the slurmctld for a batch + job. This is already handled on the stepd when the script finishes. + * Changes in Slurm 14.11.5 ========================== -- Correct the squeue command taking into account that a node can diff --git a/README.rst b/README.rst index e39191907..b939bdb65 100644 --- a/README.rst +++ b/README.rst @@ -4,9 +4,9 @@ Slurm Workload Manager This is the Slurm Workload Manager. Slurm is an open-source cluster resource management and job scheduling system that strives to be simple, scalable, portable, fault-tolerant, and -interconnect agnostic. SLURM currently has been tested only under Linux. +interconnect agnostic. Slurm currently has been tested only under Linux. -As a cluster resource manager, SLURM provides three key functions. First, +As a cluster resource manager, Slurm provides three key functions. First, it allocates exclusive and/or non-exclusive access to resources (compute nodes) to users for some duration of time so they can perform work. Second, it provides a framework for starting, executing, and diff --git a/RELEASE_NOTES b/RELEASE_NOTES index f848dd226..d7ed26713 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -22,7 +22,8 @@ Slurm can be upgraded from version 2.6 or 14.03 to version 14.11 without loss of jobs or other state information. Upgrading directly from an earlier version of Slurm will result in loss of state information. -If using SPANK plugins, they should be recompiled against this version. +If using SPANK plugins that use the Slurm APIs, they should be recompiled when +upgrading Slurm to a new major release. HIGHLIGHTS diff --git a/auxdir/x_ac_debug.m4 b/auxdir/x_ac_debug.m4 index 2dc010d4e..50f67289a 100644 --- a/auxdir/x_ac_debug.m4 +++ b/auxdir/x_ac_debug.m4 @@ -24,7 +24,7 @@ AC_DEFUN([X_AC_DEBUG], [ AC_MSG_CHECKING([whether or not developer options are enabled]) AC_ARG_ENABLE( [developer], - AS_HELP_STRING(--enable-developer,enable developer options (-Werror - also sets --enable-debug as well)), + AS_HELP_STRING(--enable-developer,enable developer options (asserts, -Werror - also sets --enable-debug as well)), [ case "$enableval" in yes) x_ac_developer=yes ;; no) x_ac_developer=no ;; @@ -34,34 +34,35 @@ AC_DEFUN([X_AC_DEBUG], [ ] ) if test "$x_ac_developer" = yes; then - test "$GCC" = yes && CFLAGS="$CFLAGS -Werror" - test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Werror" - # automatically turn on --enable-debug if being a developer - x_ac_debug=yes + test "$GCC" = yes && CFLAGS="$CFLAGS -Werror" + test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Werror" + # automatically turn on --enable-debug if being a developer + x_ac_debug=yes + else + AC_DEFINE([NDEBUG], [1], + [Define to 1 if you are building a production release.] + ) fi AC_MSG_RESULT([${x_ac_developer=no}]) AC_MSG_CHECKING([whether debugging is enabled]) AC_ARG_ENABLE( [debug], - AS_HELP_STRING(--enable-debug,enable debugging code for development (automatically set if --enable-developer is used)), + AS_HELP_STRING(--disable-debug,disable debugging symbols and compile with optimizations), [ case "$enableval" in yes) x_ac_debug=yes ;; no) x_ac_debug=no ;; *) AC_MSG_RESULT([doh!]) AC_MSG_ERROR([bad value "$enableval" for --enable-debug]) ;; esac - ] + ], + [x_ac_debug=yes] ) if test "$x_ac_debug" = yes; then # you will most likely get a -O2 in you compile line, but the last option # is the only one that is looked at. test "$GCC" = yes && CFLAGS="$CFLAGS -Wall -g -O0 -fno-strict-aliasing" test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Wall -g -O0 -fno-strict-aliasing" - else - AC_DEFINE([NDEBUG], [1], - [Define to 1 if you are building a production release.] - ) fi AC_MSG_RESULT([${x_ac_debug=no}]) diff --git a/config.h.in b/config.h.in index c86bbbdd9..f363aa07e 100644 --- a/config.h.in +++ b/config.h.in @@ -278,6 +278,12 @@ /* Define to 1 if you have the <socket.h> header file. */ #undef HAVE_SOCKET_H +/* Define to 1 if you have the `statfs' function. */ +#undef HAVE_STATFS + +/* Define to 1 if you have the `statvfs' function. */ +#undef HAVE_STATVFS + /* Define to 1 if you have the <stdbool.h> header file. */ #undef HAVE_STDBOOL_H @@ -341,6 +347,9 @@ /* Define to 1 if you have the <sys/statfs.h> header file. */ #undef HAVE_SYS_STATFS_H +/* Define to 1 if you have the <sys/statvfs.h> header file. */ +#undef HAVE_SYS_STATVFS_H + /* Define to 1 if you have the <sys/stat.h> header file. */ #undef HAVE_SYS_STAT_H diff --git a/configure b/configure index 11097dc17..e9cc9a4ba 100755 --- a/configure +++ b/configure @@ -1691,10 +1691,10 @@ Optional Features: Run SLURM in an emulated Cray mode --enable-native-cray Run SLURM natively on a Cray without ALPS --enable-cray-network Run SLURM on a non-Cray system with a Cray network - --enable-developer enable developer options (-Werror - also sets - --enable-debug as well) - --enable-debug enable debugging code for development (automatically - set if --enable-developer is used) + --enable-developer enable developer options (asserts, -Werror - also + sets --enable-debug as well) + --disable-debug disable debugging symbols and compile with + optimizations --enable-memory-leak-debug enable memory leak debugging code for development --enable-front-end enable slurmd operation on a front-end @@ -18252,7 +18252,7 @@ for ac_header in mcheck.h values.h socket.h sys/socket.h \ pty.h utmp.h \ sys/syslog.h linux/sched.h \ kstat.h paths.h limits.h sys/statfs.h sys/ptrace.h \ - sys/termios.h float.h + sys/termios.h float.h sys/statvfs.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` @@ -19515,6 +19515,8 @@ for ac_func in \ get_current_dir_name \ faccessat \ eaccess \ + statvfs \ + statfs \ do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` @@ -22550,10 +22552,14 @@ $as_echo "doh!" >&6; } fi if test "$x_ac_developer" = yes; then - test "$GCC" = yes && CFLAGS="$CFLAGS -Werror" - test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Werror" - # automatically turn on --enable-debug if being a developer - x_ac_debug=yes + test "$GCC" = yes && CFLAGS="$CFLAGS -Werror" + test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Werror" + # automatically turn on --enable-debug if being a developer + x_ac_debug=yes + else + +$as_echo "#define NDEBUG 1" >>confdefs.h + fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${x_ac_developer=no}" >&5 $as_echo "${x_ac_developer=no}" >&6; } @@ -22570,6 +22576,8 @@ $as_echo "doh!" >&6; } as_fn_error $? "bad value \"$enableval\" for --enable-debug" "$LINENO" 5 ;; esac +else + x_ac_debug=yes fi @@ -22578,10 +22586,6 @@ fi # is the only one that is looked at. test "$GCC" = yes && CFLAGS="$CFLAGS -Wall -g -O0 -fno-strict-aliasing" test "$GXX" = yes && CXXFLAGS="$CXXFLAGS -Wall -g -O0 -fno-strict-aliasing" - else - -$as_echo "#define NDEBUG 1" >>confdefs.h - fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${x_ac_debug=no}" >&5 $as_echo "${x_ac_debug=no}" >&6; } diff --git a/configure.ac b/configure.ac index 4d0d62a30..87d3d061d 100644 --- a/configure.ac +++ b/configure.ac @@ -129,7 +129,7 @@ AC_CHECK_HEADERS(mcheck.h values.h socket.h sys/socket.h \ pty.h utmp.h \ sys/syslog.h linux/sched.h \ kstat.h paths.h limits.h sys/statfs.h sys/ptrace.h \ - sys/termios.h float.h + sys/termios.h float.h sys/statvfs.h ) AC_HEADER_SYS_WAIT AC_HEADER_TIME @@ -203,6 +203,8 @@ AC_CHECK_FUNCS( \ get_current_dir_name \ faccessat \ eaccess \ + statvfs \ + statfs \ ) AC_CHECK_DECLS([hstrerror, strsignal, sys_siglist]) diff --git a/contribs/cray/opt_modulefiles_slurm.in b/contribs/cray/opt_modulefiles_slurm.in index cc309a8a8..b8c039633 100644 --- a/contribs/cray/opt_modulefiles_slurm.in +++ b/contribs/cray/opt_modulefiles_slurm.in @@ -36,7 +36,7 @@ prepend-path PERL5LIB "$slurmdir/$perldir" # other useful environment variables setenv SINFO_FORMAT {%9P %5a %8s %.10l %.6c %.6z %.7D %10T %N} -setenv SQUEUE_FORMAT {%.6i %.8u %.7a %.14j %.3t %9r %19S %.10M %.10L %.5D %.4C} +setenv SQUEUE_FORMAT {%.8i %.8u %.7a %.14j %.3t %9r %19S %.10M %.10L %.5D %.4C} setenv SQUEUE_ALL {yes} ;# show hidden partitions, too setenv SQUEUE_SORT {-t,e,S} diff --git a/contribs/pmi2/pmi2.h b/contribs/pmi2/pmi2.h index e72a985ff..ddcc36c88 100644 --- a/contribs/pmi2/pmi2.h +++ b/contribs/pmi2/pmi2.h @@ -369,7 +369,7 @@ int PMI2_Job_GetRank(int* rank); PMI2_Info_GetSize - get the number of processes on the node Output parameters: - . rank - the rank of this job + . size - the number of processes on the node Return values: Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. @@ -383,7 +383,7 @@ int PMI2_Info_GetSize(int* size); . jobid - job id of the job to connect to Output parameters: - . conn - connection structure used to exteblish communication with + . conn - connection structure used to establish communication with the remote job Return values: diff --git a/contribs/pmi2/slurm/pmi2.h b/contribs/pmi2/slurm/pmi2.h index e72a985ff..ddcc36c88 100644 --- a/contribs/pmi2/slurm/pmi2.h +++ b/contribs/pmi2/slurm/pmi2.h @@ -369,7 +369,7 @@ int PMI2_Job_GetRank(int* rank); PMI2_Info_GetSize - get the number of processes on the node Output parameters: - . rank - the rank of this job + . size - the number of processes on the node Return values: Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. @@ -383,7 +383,7 @@ int PMI2_Info_GetSize(int* size); . jobid - job id of the job to connect to Output parameters: - . conn - connection structure used to exteblish communication with + . conn - connection structure used to establish communication with the remote job Return values: diff --git a/contribs/torque/qsub.pl b/contribs/torque/qsub.pl index 4235484b8..d2da69a66 100755 --- a/contribs/torque/qsub.pl +++ b/contribs/torque/qsub.pl @@ -244,12 +244,10 @@ if ($variable_list) { } } } else { - my $separator = ""; if ($export_env) { - $command .= " --export="; + $command .= " --export=all"; } else { $command .= " --export=none"; - $separator = ","; } # The logic below ignores quoted commas, but the quotes must be escaped @@ -260,16 +258,14 @@ if ($variable_list) { foreach my $part (@parts) { my ($key, $value) = $part =~ /(.*)=(.*)/; if (defined($key) && defined($value)) { - $command .= "$separator"; - $command .= "$key=$value"; - $separator = ","; + $command .= ",$key=$value"; } elsif (defined($ENV{$part})) { - $command .= "$separator"; - $command .= "$part=$ENV{$part}"; - $separator = ","; + $command .= ",$part=$ENV{$part}"; } } } +} elsif ($export_env && ! $interactive) { + $command .= " --export=all"; } $command .= " --account='$group_list'" if $group_list; diff --git a/doc/html/accounting.shtml b/doc/html/accounting.shtml index ec3131358..f78ff6d55 100644 --- a/doc/html/accounting.shtml +++ b/doc/html/accounting.shtml @@ -114,6 +114,8 @@ If you plan to restrict access to accounting records (e.g. only permit a user to view records of his jobs), then all users should have consistent names and IDs.</p> +<p><b>NOTE:</b> Only lowercase usernames are supported. + <p>The best way to insure security of the data is by authenticating communications to the SlurmDBD and we recommend <a href="https://code.google.com/p/munge/">MUNGE</a> for that purpose. @@ -202,12 +204,33 @@ checking for mysql_config... /usr/bin/mysql_config MySQL test program built properly. </pre> -<p>Note that at least the first time running the slurmdbd with MySQL you need -to make sure your my.cnf file has innodb_buffer_pool_size equal to at least 64 -megabytes. You can accomplish this by adding the line:<br> -<i>innodb_buffer_pool_size=64M</i><br> -under the [mysqld] reference in the my.cnf file and restarting the mysqld. -This is needed when converting large tables over to the new database schema.</p> +<p><b>NOTE:</b> Before running the slurmdbd for the first time, review the +current setting for MySQL's +<a href="http://dev.mysql.com/doc/refman/5.0/en/innodb-parameters.html +#sysvar_innodb_buffer_pool_size">innodb_buffer_pool_size</a>. +Consider setting this +value large enough to handle the size of the database. This helps when +converting large tables over to the new database schema and when purging +old records.</p> + +<p> +ex. +<pre> +mysql> SHOW VARIABLES LIKE 'innodb_buffer_pool_size'; ++-------------------------+-----------+ +| Variable_name | Value | ++-------------------------+-----------+ +| innodb_buffer_pool_size | 134217728 | ++-------------------------+-----------+ +1 row in set (0.00 sec) + +$cat my.cnf +... +[mysqld] +<i>innodb_buffer_pool_size=128M</i> +... +</pre> +</p> <h2>Slurm Accounting Configuration After Build</h2> @@ -788,7 +811,7 @@ as deleted. If an entity has existed for less than 1 day, the entity will be removed completely. This is meant to clean up after typographic errors.</p> -<p style="text-align:center;">Last modified 30 April 2014</p> +<p style="text-align:center;">Last modified 6 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/bluegene.shtml b/doc/html/bluegene.shtml index 117383094..e3febe5ed 100644 --- a/doc/html/bluegene.shtml +++ b/doc/html/bluegene.shtml @@ -341,16 +341,15 @@ started runjob_mux </li> <li><b>WARNING! You need at least IBM driver V1R1M1 efix 008 or this method will not work. Previous versions would load the old -plugin (presumably still in memory) other than the new one. As of SLURM -2.4.2 when the plugin is loaded it will print the version of SLURM -which should let you know if the new one is loaded or not.</b> -<p>This method allows no job loss using the +plugin (presumably still in memory) other than the new one. +Slurm will print its version when the plugin is loaded for validation.</b><br> +This method allows for no job loss using the IBM <i>runjob_mux_refresh_config</i> command. This should reload the plugin and all should be good afterwards. After doing this you may see some warning/error messages about the current running jobs when finishing not being known. This is expected and can usually be ignored.</li> -</ul></p> +</ul> <b>Notes about sub-block allocations:</b> <p> @@ -856,6 +855,6 @@ scheduling logic, etc. </p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 21 December 2012</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/cpu_management.shtml b/doc/html/cpu_management.shtml index 835f9a76e..62ac751c4 100644 --- a/doc/html/cpu_management.shtml +++ b/doc/html/cpu_management.shtml @@ -31,9 +31,7 @@ Architectures</a><br> <a href="dist_plane.html">Plane distribution</a></p> <p>This document describes Slurm CPU management for conventional Linux clusters only. For -information on Cray and IBM BlueGene systems, please refer to the appropriate documents.</p> -<p>The information and examples in this document have been verified on Slurm version 2.4.0. Some -information may not be valid for previous Slurm versions.</p><br> +information on Cray and IBM BlueGene systems, please refer to the appropriate documents.</p><br> <a name="Section1"></a> <h2>CPU Management Steps performed by Slurm</h2> <p>Slurm uses four basic steps to manage CPU resources for a job/step:</p> @@ -3538,6 +3536,6 @@ and binding for this job. For example, task id 2 is bound to CPU id 2. <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 7 February 2012</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/cray_alps.shtml b/doc/html/cray_alps.shtml index cc31a6c8d..fdb934eb4 100644 --- a/doc/html/cray_alps.shtml +++ b/doc/html/cray_alps.shtml @@ -21,7 +21,7 @@ You should be familiar with the Slurm's mode of operation on Linux clusters before studying the differences in Cray system operation described in this document.</p> -<p>Since version 2.3, Slurm is designed to operate as a job scheduler over +<p>Slurm is designed to operate as a job scheduler over Cray's Application Level Placement Scheduler (ALPS). Use Slurm's <i>sbatch</i> or <i>salloc</i> commands to create a resource allocation in ALPS. @@ -29,7 +29,7 @@ Then use ALPS' <i>aprun</i> command to launch parallel jobs within the resource allocation. The resource allocation is terminated once the the batch script or the <i>salloc</i> command terminates. -With 2.5 there is a <i>launch/aprun</i> plugin that allow users to use +Slurm includes a <i>launch/aprun</i> plugin that allow users to use <i>srun</i> to wrap <i>aprun</i> and translate <i>srun</i> options into the equivalent <i>aprun</i> options. Not all <i>srun</i> options can be translated so there are options that are not available. @@ -176,9 +176,6 @@ batch job with a node count specification of zero.</p> <pre> sbatch -N0 pre_process.bash </pre> -<p><b>Note</b>: Support for Cray job allocations with zero compute nodes was -added to Slurm version 2.4. Earlier versions of Slurm will return an error for -zero compute node job requests.</p> <p><b>Note</b>: Job allocations with zero compute nodes can only be made in SLURM partitions explicitly configured with <b>MinNodes=0</b> (the default minimum node count for a partition is one compute node).</p> @@ -425,6 +422,19 @@ cores or threads). the plugin name was changed to <i>select/alps</i> to allow for <a href='cray.shtml'>Native Slurm</a> on a Cray.</b></p> +<p>If you are experiencing performance issues with many jobs you may + consider using the <i>slurm.conf</i> + option <i>SchedulerParameters=inventory_interval=#</i> option. + On a Cray system using Slurm on top of ALPS this limits the amount of times + a Basil Inventory call is made. Normally this call happens every scheduling + consideration to attempt to close a node state change window with respects to + what ALPS has. This call is rather slow, so making it less + frequently improves performance dramatically, but in the situation + where a node changes state the window is as large as this setting. + In an HTC environment this setting is a must and we advise around 10 + seconds. +</p> + <p>Note that the system topology is based upon information gathered from the ALPS database and is based upon the ALPS_NIDORDER configuration in <i>/etc/sysconfig/alps</i>. Excerpts of a <i>slurm.conf</i> file for @@ -698,6 +708,6 @@ Stop the <i>slurmctld</i> daemon, delete the files, and restart the daemon.</p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 29 October 2014</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/documentation.shtml b/doc/html/documentation.shtml index 68d664367..0fb542c1c 100644 --- a/doc/html/documentation.shtml +++ b/doc/html/documentation.shtml @@ -11,6 +11,7 @@ Documentation for other versions of Slurm is distributed with the code</b></p> <h2>Slurm Users</h2> <ul> <li><a href="quickstart.html">Quick Start User Guide</a></li> +<li><a href="pdfs/summary.pdf">Command/option Summary (two pages)</a></li> <li><a href="man_index.html">Man Pages</a></li> <li><a href="rosetta.html">Rosetta Stone of Workload Managers</a></li> <li><a href="job_array.html">Job Array Support</a></li> @@ -131,6 +132,6 @@ Documentation for other versions of Slurm is distributed with the code</b></p> </li> </ul> -<p style="text-align:center;">Last modified 17 November 2014</p> +<p style="text-align:center;">Last modified 25 March 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/download.shtml b/doc/html/download.shtml index 05510c4f2..228e4f7f4 100644 --- a/doc/html/download.shtml +++ b/doc/html/download.shtml @@ -165,8 +165,7 @@ from a real system, or a synthetic trace, are used.<br> Pluggable Authentication Module (PAM) for restricting access to compute nodes where Slurm performs workload management. Access to the node is restricted to user root and users who have been allocated resources on that node. -NOTE: pam_slurm is included within the Slurm distribution for version 2.1 -or higher. +NOTE: pam_slurm is included within the Slurm distribution. For earlier Slurm versions, pam_slurm is available for download from<br> <a href="http://www.schedmd.com/download/extras/pam_slurm-1.6.tar.bz2"> http://www.schedmd.com/download/extras/pam_slurm-1.6.tar.bz2</a><br> @@ -358,6 +357,6 @@ easy and elegantly manner. </ul> -<p style="text-align:center;">Last modified 22 January 2015</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/elastic_computing.shtml b/doc/html/elastic_computing.shtml index 9942be0aa..43de974bf 100644 --- a/doc/html/elastic_computing.shtml +++ b/doc/html/elastic_computing.shtml @@ -4,7 +4,7 @@ <h2>Overview</h2> -<p>Slurm version 2.4 has the ability to support a cluster that grows and +<p>Slurm has the ability to support a cluster that grows and shrinks on demand, typically relying upon a service such as <a href="http://aws.amazon.com/ec2/">Amazon Elastic Computing Cloud (Amazon EC2)</a> for resources. @@ -165,6 +165,6 @@ expands (adds resources).</li> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 15 May 2012</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/faq.shtml b/doc/html/faq.shtml index d069239e0..ba7a6e3f0 100644 --- a/doc/html/faq.shtml +++ b/doc/html/faq.shtml @@ -708,8 +708,7 @@ http://groups.google.com/group/slurm-devel</a></p> <p><a name="job_size"><b>24. Can I change my job's size after it has started running?</b></a><br> -Support to decrease the size of a running job was added to Slurm version 2.2. -The ability to increase the size of a running job was added to Slurm version 2.3. +Slurm supports the ability to both increase and decrease the size of running jobs. While the size of a pending job may be changed with few restrictions, several significant restrictions apply to changing the size of a running job as noted below: @@ -2018,6 +2017,6 @@ add MICs to Slurm queues if necessary, restart the host, use MICs via Slurm.</p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 16 January 2015</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/high_throughput.shtml b/doc/html/high_throughput.shtml index f667a293d..e5a39a0aa 100644 --- a/doc/html/high_throughput.shtml +++ b/doc/html/high_throughput.shtml @@ -74,9 +74,6 @@ be set quite high for memory size, open file count and stack size.</p> <h2>Slurm Configuration</h2> -<p>NOTE: Substantial changes were made in Slurm version 2.4 to support higher -throughput rates. Version 2.5 includes more enhancements.</p> - <p>Several Slurm configuration parameters should be adjusted to reflect the needs of high throughput computing. The changes described below will not be possible in all environments, but these are the configuration @@ -209,6 +206,6 @@ speedup can be achieved by setting the CommitDelay option in the <li><b>PurgeSuspendAfter</b>=1month</li> </ul> -<p style="text-align:center;">Last modified 23 December 2014</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/job_array.shtml b/doc/html/job_array.shtml index f0ed6cfd8..6ef79d343 100644 --- a/doc/html/job_array.shtml +++ b/doc/html/job_array.shtml @@ -255,9 +255,7 @@ $ squeue <h2>Job Dependencies</h2> -<p>Job dependencies for individual job array elements are supported in Slurm -version 2.6.4 and later. -A job which is to be dependent upon an entire job array should specify +<p>A job which is to be dependent upon an entire job array should specify itself dependent upon the ArrayJobID. Since each array element can have a different exit code, the interpretation of the <i>afterok</i> and <i>afternotok</i> clauses will be based upon the highest diff --git a/doc/html/job_exit_code.shtml b/doc/html/job_exit_code.shtml index 0c63582c5..3d88b0389 100644 --- a/doc/html/job_exit_code.shtml +++ b/doc/html/job_exit_code.shtml @@ -57,8 +57,8 @@ script returns an exit code of zero, indicating success. In many cases, a user may not be able to ascertain the success or failure of a job until after they have examined the job's output files.</p> -<p>A new job field, the "derived exit code", has been added to the job -record in Slurm 2.2. It is initially set to the value of the highest +<p>The job includes a "derived exit code" field. +It is initially set to the value of the highest exit code returned by all of the job's steps (srun invocations). The job's derived exit code is determined by the Slurm control daemon and sent to the database when the accounting_storage plugin is @@ -118,6 +118,6 @@ JobID NNodes State ExitCode DerivedExitCode Comment 123 1 COMPLETED 0:0 49:0 out of memory </PRE> -<p style="text-align:center;">Last modified 26 July 2012</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/maui.shtml b/doc/html/maui.shtml index 24fe16b63..fb0a34975 100644 --- a/doc/html/maui.shtml +++ b/doc/html/maui.shtml @@ -116,8 +116,8 @@ includes a description of keywords presently only supported by the sched/wiki2 plugin for use with the Moab Scheduler.</p> -<p>Slurm version 2.0 and higher have internal scheduling capabilities -that are not compatible with Maui. +<p>Slurm has some internal scheduling capabilities which are not compatible +with Maui. <ol> <li>Do not configure Slurm to use the "priority/multifactor" plugin as it would set job priorities which conflict with those set by Maui.</li> @@ -180,6 +180,6 @@ HidePartitionJobs=debug <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 8 May 2009</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/mc_support.shtml b/doc/html/mc_support.shtml index 6566457eb..6549aceb8 100644 --- a/doc/html/mc_support.shtml +++ b/doc/html/mc_support.shtml @@ -5,7 +5,7 @@ <h2>Contents</h2> <UL> <LI> <a href=#defs>Definitions</a> -<LI> <a href=#flags>Overview of new srun flags</a> +<LI> <a href=#flags>Overview of srun flags</a> <LI> <a href=#motivation>Motivation behind high-level srun flags</a> <LI> <a href=#utilities>Extensions to sinfo/squeue/scontrol</a> <LI> <a href=#config>Configuration settings in slurm.conf</a> @@ -14,9 +14,17 @@ <a name=defs> <h2>Definitions</h2></a> -<P> <b>Socket/Core/Thread</b> - Figure 1 illustrates the notion of -Socket, Core and Thread as it is defined in Slurm's multi-core/multi-thread -support documentation.</p> +<dl> +<dt><b>BaseBoard</b> +<dd>Also called motherboard. +<dt><b>LDom</b> +<dd>Locality domain or NUMA domain. May be equivalent to BaseBoard or Socket. +<dt><b>Socket/Core/Thread</b> +<dd>Figure 1 illustrates the notion of Socket, Core and Thread as it is defined +in Slurm's multi-core/multi-thread support documentation. +<dt><b>CPU</b> +<dd>Depending upon system configuration, this can be either a core or a thread. +</dl> <center> <img src="mc_support.gif"> @@ -41,12 +49,12 @@ allowing a process to run on more than one logical processor. </dl> <a name=flags> -<h2>Overview of new srun flags</h2></a> +<h2>Overview of srun flags</h2></a> -<p> Several flags have been defined to allow users to +<p>Many flags have been defined to allow users to better take advantage of this architecture by explicitly specifying the number of sockets, cores, and threads required -by their application. Table 1 summarizes the new multi-core flags. +by their application. Table 1 summarizes these options. <P> <table border=1 cellspacing=0 cellpadding=4> @@ -77,13 +85,13 @@ by their application. Table 1 summarizes the new multi-core flags. <td> Combined shortcut option for --sockets-per-node, --cores-per_cpu, --threads-per_core </td></tr> <tr><td colspan=2> -<b><a href="#srun_dist">New Distributions</b> +<b><a href="#srun_dist">Task Distribution Options</b> </td></tr> <tr> <td> -m / --distribution </td> - <td> Distributions of: block | cyclic | hostfile + <td> Distributions of: arbitrary | block | cyclic | <a href="dist_plane.html"><u>plane=<i>x</i></u></a> - | <u>[block|cyclic]:[block|cyclic]</u> + | <u>[block|cyclic]:[block|cyclic|fcyclic]</u> </td></tr> <tr><td colspan=2> <b><a href="#srun_consres">Memory as a consumable resource</a></b> @@ -100,6 +108,9 @@ by their application. Table 1 summarizes the new multi-core flags. <b><a href="#srun_ntasks">Task invocation control</a></b> </td></tr> <tr> + <td> --cpus-per-task=<i>CPUs</i></td> + <td> number of CPUs required per task +</td></tr> <td> --ntasks-per-node=<i>ntasks</i></td> <td> number of tasks to invoke on each node </td></tr> @@ -108,6 +119,9 @@ by their application. Table 1 summarizes the new multi-core flags. </td></tr> <td> --ntasks-per-core=<i>ntasks</i></td> <td> number of tasks to invoke on each core +</td></tr> + <td> --overcommit</td> + <td> Permit more than one task per CPU </td></tr> <tr><td colspan=2> <b><a href="#srun_hints">Application hints</a></b> @@ -122,15 +136,26 @@ by their application. Table 1 summarizes the new multi-core flags. <td> --hint=[no]multithread</td> <td> [don't] use extra threads with in-core multi-threading </td></tr> +<tr><td colspan=2> +<b><a href="#srun_hints">Resources reserved for system use</a></b> +</td></tr> +<tr> + <td> --core-spec=<i>cores</i></td> + <td> Count of cores to reserve for system use +</td></tr> + <td> --thread-spec=<i>threads</i></td> + <td> Count of threads to reserve for system use (future) +</td></tr> </table> <p> <center> -Table 1: New srun flags to support the multi-core/multi-threaded environment +Table 1: srun flags to support the multi-core/multi-threaded environment </center> <p>It is important to note that many of these flags are only meaningful if the processes' have some affinity to specific CPUs and (optionally) memory. +Inconsistent options generally result in errors. Task affinity is configured using the TaskPlugin parameter in the slurm.conf file. Several options exist for the TaskPlugin depending upon system architecture and available software, any of them except "task/none" will find tasks to CPUs. @@ -155,6 +180,20 @@ See the "Task Launch" section if generating slurm.conf via mask_cpu:<i><list></i> specify a CPU ID binding mask for each task where <i><list></i> is <i><mask1>,<mask2>,...<maskN></i> + rank_ldom bind task by rank to CPUs in a NUMA + locality domain + map_ldom:<i><list></i> specify a NUMA locality domain ID + for each task where <i><list></i> is + <i><ldom1>,<ldom2>,...<ldomN></i> + rank_ldom bind task by rank to CPUs in a NUMA + locality domain where <i><list></i> is + <i><ldom1>,<ldom2>,...<ldomN></i> + mask_ldom:<i><list></i> specify a NUMA locality domain ID mask + for each task where <i><list></i> is + <i><ldom1>,<ldom2>,...<ldomN></i> + boards auto-generated masks bind to boards + ldoms auto-generated masks bind to NUMA locality + domains sockets auto-generated masks bind to sockets cores auto-generated masks bind to cores threads auto-generated masks bind to threads @@ -164,8 +203,8 @@ See the "Task Launch" section if generating slurm.conf via <p> The affinity can be either set to either a specific logical processor (socket, core, threads) or at a coarser granularity than the lowest level of logical processor (core or thread). -In the later case the processes are allowed to roam within a specific -socket or core. +In the later case the processes are allowed to utilize multiple processors +within a specific socket or core. <p>Examples:</p> @@ -188,9 +227,9 @@ of nodes, sockets, cores, and threads:</p> <PRE> -B --extra-node-info=<i>S[:C[:T]]</i> Expands to: - --sockets-per-node=<i>S</i> number of sockets per node to allocate - --cores-per-socket=<i>C</i> number of cores per socket to allocate - --threads-per-core=<i>T</i> number of threads per core to allocate + --sockets-per-node=<i>S</i> number of sockets per node to allocate + --cores-per-socket=<i>C</i> number of cores per socket to allocate + --threads-per-core=<i>T</i> number of threads per core to allocate each field can be 'min' or wildcard '*' <font face="serif">Total cpus requested = (<i>Nodes</i>) x (<i>S</i> x <i>C</i> x <i>T</i>)</font> @@ -231,32 +270,37 @@ note: compare the above with the previous corresponding --cpu_bind=... examples <p>See also 'srun --help' and 'man srun'</p> <a name="srun_dist"> -<h3>New distributions: Extensions to -m / --distribution</h3></a> +<h3>Task distribution options: Extensions to -m / --distribution</h3></a> <p>The -m / --distribution option for distributing processes across nodes has been extended to also describe the distribution within the lowest level of logical processors. Available distributions include: <br> -block | cyclic | hostfile | <u>plane=<i>x</i></u> | <u>[block|cyclic]:[block|cyclic]</u>) +arbitrary | block | cyclic | <u>plane=<i>x</i></u> | <u>[block|cyclic]:[block|cyclic|fcyclic]</u> </p> -<p>The new <A HREF="dist_plane.html">plane distribution</A> (plane=<i>x</i>) -results in a block cyclic distribution of blocksize equal to <i>x</i>. +<p>The <A HREF="dist_plane.html">plane distribution</A> (plane=<i>x</i>) +results in a block:cyclic distribution of blocksize equal to <i>x</i>. In the following we use "lowest level of logical processors" to describe sockets, cores or threads depending of the architecture. -The new distribution divides +The distribution divides the cluster into planes (including a number of the lowest level of logical processors on each node) and then schedule first within each plane and then across planes.</p> -<p>For the two dimensional distributions ([block|cyclic]:[block|cyclic]), +<p>For the two dimensional distributions ([block|cyclic]:[block|cyclic|fcyclic]), the second distribution (after ":") allows users to specify a distribution method for processes within a node and applies to the lowest level of logical -processors (sockets, core or thread depending on the architecture).</p> +processors (sockets, core or thread depending on the architecture). +When a task requires more than one CPU, the <i>cyclic</i> will allocate all +of those CPUs as a group (i.e. within the same socket if possible) while +<i>fcyclic</i> would distribute each of those CPU of the in a cyclic fashion +across sockets.</p> -<p>The binding is enabled automatically when high level flags are used as long as the task/affinity plug-in -is enabled. To disable binding at the job level use --cpu_bind=no.</p> +<p>The binding is enabled automatically when high level flags are used as long +as the task/affinity plug-in is enabled. To disable binding at the job level +use --cpu_bind=no.</p> <p>The distribution flags can be combined with the other switches: @@ -429,7 +473,7 @@ flags much easier to use.</li> <p>Also as illustrated in the example below it is much simpler to specify a different layout using the high-level flags since users do not have to -recalculate mask or CPU IDs. The new approach is very effortless compared to +recalculate mask or CPU IDs. This approach is much simpler than rearranging the mask or map.</p> <p>Given a 32-process MPI job and a four dual-socket dual-core node @@ -437,11 +481,11 @@ cluster, we want to use a block distribution across the four nodes and then a cyclic distribution within the node across the physical processors. We have had several requests from users that they would like this distribution to be the default distribution on multi-core clusters. Below we show how to obtain the -wanted layout using 1) the new high-level flags and 2) --cpubind</p> +wanted layout using 1) the high-level flags and 2) --cpubind</p> <h3>High-Level flags</h3> -<p>Using Slurm's new high-level flag, users can obtain the above layout with:</p> +<p>Using Slurm's high-level flag, users can obtain the above layout with:</p> <DL> <DL> @@ -605,7 +649,7 @@ numbering.)</p> <h3>Block map_cpu on a system with cyclic core numbering</h3> <p>If users do not check their system's core numbering before specifying -the map_cpu list and thereby do not realize that the new system has cyclic core +the map_cpu list and thereby do not realize that the system has cyclic core numbering instead of block numbering then they will not get the expected layout.. For example, if they decide to re-use their mpirun command from above:</p> @@ -854,8 +898,9 @@ TaskPlugin=task/affinity # enable task affinity # "TaskPlugin" : Define a task launch plugin. This may be used to # provide resource management within a node (e.g. pinning # tasks to specific processors). Permissible values are: -# "task/none" : no task launch actions, the default. # "task/affinity" : CPU affinity support +# "task/cgroup" : bind tasks to resources using Linux cgroup +# "task/none" : no task launch actions, the default # # Example: # @@ -874,82 +919,10 @@ FastSchedule=1 NodeName=dualcore[01-16] CPUs=4 CoresPerSocket=2 ThreadsPerCore=1 </PRE> -<p>Below is a more complete description of the configuration possible -using NodeName: - -<PRE> -# -# o Node configuration -# -# The configuration information of nodes (or machines) to be managed -# by Slurm is described here. The only required value in this section -# of the config file is the "NodeName" field, which specifies the -# hostnames of the node or nodes to manage. It is recommended, however, -# that baseline values for the node configuration be established -# using the following parameters (see slurm.config(5) for more info): -# -# "NodeName" : The only required node configuration parameter, NodeName -# specifies a node or set of nodes to be managed by SLURM. -# The special NodeName of "DEFAULT" may be used to establish -# default node configuration parameters for subsequent node -# records. Typically this would be the string that -# `/bin/hostname -s` would return on the node. However -# NodeName may be an arbitrary string if NodeHostname is -# used (see below). -# -# "Feature" : comma separated list of "features" for the given node(s) -# -# "NodeAddr" : preferred address for contacting the node. This may be -# either a name or IP address. -# -# "NodeHostname" -# : the string that `/bin/hostname -s` would return on the -# node. In other words, NodeName may be the name other than -# the real hostname. -# -# "RealMemory" : Amount of real memory (in Megabytes) -# -# "CPUs" : Number of logical processors on the node. -# If CPUs is omitted, it will be inferred from: -# Sockets, CoresPerSocket, and ThreadsPerCore. -# -# "Sockets" : Number of physical processor sockets/chips on the node. -# If Sockets is omitted, it will be inferred from: -# CPUs, CoresPerSocket, and ThreadsPerCore. -# -# "CoresPerSocket" -# : Number of cores in a single physical processor socket -# The CoresPerSocket value describes physical cores, not -# the logical number of processors per socket. -# The default value is 1. -# -# "ThreadsPerCore" -# : Number of logical threads in a single physical core. -# The default value is 1. -# -# "State" : Initial state (IDLE, DOWN, etc.) -# -# "TmpDisk" : Temporary disk space available on node -# -# "Weight" : Priority of node for scheduling purposes -# -# If any of the above values are set for a node or group of nodes, and -# that node checks in to the slurm controller with less than the -# configured resources, the node's state will be set to DOWN, in order -# to avoid scheduling any jobs on a possibly misconfigured machine. -# -# Example Node configuration: -# -# NodeName=DEFAULT CPUs=2 TmpDisk=64000 State=UNKNOWN -# NodeName=host[0-25] NodeAddr=ehost[0-25] Weight=16 -# NodeName=host26 NodeAddr=ehost26 Weight=32 Feature=graphics_card -# NodeName=dualcore01 CPUs=4 CoresPerSocket=2 ThreadsPerCore=1 -# NodeName=dualcore02 CPUs=4o Sockets=2 CoresPerSocket=2 ThreadsPerCore=1 -# NodeName=multicore03 CPUs=64 Sockets=8 CoresPerSocket=4 ThreadsPerCore=2 -</PRE> - +<p>For a more complete description of the various node configuration options +see the slurm.conf man page.</p> <!--------------------------------------------------------------------------> -<p style="text-align:center;">Last modified 21 April 2014</p> +<p style="text-align:center;">Last modified 1 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/meetings.shtml b/doc/html/meetings.shtml index 72b112c50..b316be0f5 100644 --- a/doc/html/meetings.shtml +++ b/doc/html/meetings.shtml @@ -6,8 +6,8 @@ 15-16 September 2015<br> Washington DC, USA<br> Host: <a href="http://www.gwu.edu/">The George Washington University</a></p> -<p>More information to come soon.</p> -<!--<a href="slurm_ug_cfp.html">Call for Abstracts: Due 6 June 2014</a><br>--> +<!-- <p>More information to come soon.</p>--> +<a href="slurm_ug_cfp.html">Call for Abstracts: Due 1 June 2015</a><br> <!-- <a href="slurm_ug_agenda.html#agenda">Meeting agenda</a><br> --> <!-- <a href="slurm_ug_agenda.html#registration">Meeting registration</a><br> --> <!-- <a href="slurm_ug_agenda.html#hotels">Meeting hotels</a><br> --> @@ -40,6 +40,6 @@ Host: Bull</p> Paris, France<br> Host: CEA</p> -<p style="text-align:center;">Last modified 2 March 2015</p> +<p style="text-align:center;">Last modified 20 March 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/moab.shtml b/doc/html/moab.shtml index bce2f1d66..11dfde764 100644 --- a/doc/html/moab.shtml +++ b/doc/html/moab.shtml @@ -37,8 +37,8 @@ SchedulerPort=7321 <p>Running multiple jobs per mode can be accomplished in two different ways. The <i>SelectType=select/cons_res</i> parameter can be used to let -SLURM allocate the individual processors, memory, and other -consumable resources (in Slurm version 1.2.1 or higher). +Slurm allocate the individual processors, memory, and other +consumable resources. Alternately, <i>SelectType=select/linear</i> or <i>SelectType=select/bluegene</i> can be used with the <i>Shared=yes</i> or <i>Shared=force</i> parameter in @@ -46,8 +46,8 @@ partition configuration specifications.</p> <p>The default value of <i>SchedulerPort</i> is 7321.</p> -<p>Slurm version 2.0 and higher have internal scheduling capabilities -that are not compatible with Moab. +<p>Slurm has some internal scheduling capabilities which are not compatible +with Moab. <ol> <li>Do not configure Slurm to use the "priority/multifactor" plugin as it would set job priorities which conflict with those set by Moab.</li> @@ -281,6 +281,6 @@ Write the output to a file with the same name as the user in the <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 14 December 2009</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/mpi_guide.shtml b/doc/html/mpi_guide.shtml index 581eacd47..cce20654c 100644 --- a/doc/html/mpi_guide.shtml +++ b/doc/html/mpi_guide.shtml @@ -56,9 +56,8 @@ with Slurm are provided below. <p>The current versions of Slurm and Open MPI support task launch using the <span class="commandline">srun</span> command. -It relies upon Slurm version 2.0 (or higher) managing reservations of -communication ports for use by the Open MPI version 1.5 (or higher). - +It relies upon Slurm managing reservations of communication ports for use by +the Open MPI version 1.5.</p> <p>If OpenMPI is configured with <i>--with-pmi</i> either pmi or pmi2 the OMPI jobs can be launched directly using the srun command. This is @@ -369,39 +368,28 @@ documentation for "CQ or QP Creation failure".</p> <hr size=4 width="100%"> -<h2><a name="mvapich2" href="http://nowlab.cse.ohio-state.edu/projects/mpi-iba"><b>MVAPICH2</b></a></h2> +<h2><a name="mvapich2" href="http://mvapich.cse.ohio-state.edu/static/media/mvapich/mvapich2-2.0-userguide.html"><b>MVAPICH2</b></a></h2> <p>MVAPICH2 supports launching multithreaded programs by Slurm as well as mpirun_rsh. Please note that if you intend to use use srun, you need to build MVAPICH2 -with Slurm support. -Please refer to the -<a href="http://mvapich.cse.ohio-state.edu/static/media/mvapich/mvapich2-2.0-userguide.html#x1-100004.3.2"> -MVAPICH2 User Guide</a> for details. -You can also use Slurm for only resource allocation (using the salloc or sbatch -command) and launch the jobs using mpirun_rsh. -However, please note that mpirun_rsh does not accept "-env" parameter, so -the command would be something like this:</p> +with Slurm support with a command line of this sort:</p> <pre> -$ mpirun_rsh -np 2 -hostfile <path_to_hostfile> \ - MV2_USE_CUDA=1 MV2_ENABLE_AFFINITY=0 ./mpi <application> +$ ./configure --with-pmi=pmi2 --with-pm=slurm </pre> -<p>Slurm must be configured to use the <i>none</i> MPI plugin to establish -communications between the launched tasks. -This can be accomplished either using the Slurm configuration parameter -<i>MpiDefault=none</i> in <b>slurm.conf</b> or srun's <i>--mpi=none</i> option. -<b>Do not use Slurm's MVAPICH plugin for MVAPICH2.</b> -The program must also be linked with -Slurm's implementation of the PMI library so that tasks can communicate -host and port information at startup. (The system administrator can add -these option to the mpicc and mpif77 commands directly, so the user will not -need to bother). +<p>Use of Slurm's <i>pmi2</i> plugin provides substantially higher performance and +scalability than Slurm's <i>pmi</i> plugin. +If <i>pmi2</i> is not configured to be Slurm's default MPI plugin at your site, +this can be specified using the srun command's "--mpi-pmi2" option as shown +below or with the environment variable setting of "SLURM_MPI_TYPTE=mpi2".</p> <pre> -$ mpicc -L<path_to_slurm_lib> -lpmi ... -$ srun -n16 --mpi=none a.out +$ srun -n16 --mpi=pmi2 a.out </pre> +<p>For more information, please see the +<a href="http://mvapich.cse.ohio-state.edu/static/media/mvapich/mvapich2-2.0-userguide.html">MVAPICH2 User Guide</a></p> + <hr size=4 width="100%"> @@ -458,8 +446,8 @@ Non-MPI jobs started in this configuration will lack the mechanism to launch more than one task per node unless srun's <i>--mpi=none</i> option is used.</p> <p>If you are using MPICH P4 (<i>DEFAULT_DEVICE=ch_p4</i> in -the mpirun script) and Slurm version 1.2.11 or newer, -then it is recommended that you apply the patch in the SLURM +the mpirun script), +then it is recommended that you apply the patch in the Slurm distribution's file <i>contribs/mpich1.slurm.patch</i>. Follow directions within the file to rebuild MPICH. Applications must be relinked with the new library. @@ -524,6 +512,6 @@ $ srun -N4 -n16 a.out <hr size=4 width="100%"> -<p style="text-align:center;">Last modified 28 November 2013</p> +<p style="text-align:center;">Last modified 20 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/multi_cluster.shtml b/doc/html/multi_cluster.shtml index 828137afe..464e49aab 100644 --- a/doc/html/multi_cluster.shtml +++ b/doc/html/multi_cluster.shtml @@ -3,7 +3,7 @@ <h1>Multi-Cluster Operation</h1> <p>A cluster is comprised of all the nodes managed by a single slurmctld -daemon. Slurm version 2.2 offers the ability to target commands to other +daemon. Slurm offers the ability to target commands to other clusters instead of, or in addition to, the local cluster on which the command is invoked. When this behavior is enabled, users can submit jobs to one or many clusters and receive status from those remote @@ -56,6 +56,6 @@ See <a href="accounting.html">accounting</a> for details.</p> option will become active for all of the clusters listed by the <b>"sacctmgr show clusters"</b> command.</p> -<p style="text-align:center;">Last modified 14 February 2011</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/news.shtml b/doc/html/news.shtml index 3ac1941ff..6a73a076d 100644 --- a/doc/html/news.shtml +++ b/doc/html/news.shtml @@ -40,15 +40,23 @@ Major enhancements include: Major enhancements to include: <ul> <!-- SchedMD led --> -<li>Add support for burst buffers, data storage available for before, during - and/or after job computation in support of data staging, checkpoint, - etc.</li> <li>Convert charging from being based upon CPU time allocated to a more general <i>system billing unit</i>, which can be computed as a function of - many different resources (e.g. CPU, memory, power, GPUs, etc.).</li> -<li>Improve recovery time for communication failures when large numbers of - nodes fail simultaneously.</li> -<li>Permit disabling of swap space use.</li> + many different resources (e.g. CPU, memory, power, GPUs, etc.). A job's + consumption of all these resources will be logged in Slurm's database.</li> +<li>Add the ability for a compute node to be allocated to multiple jobs, but + restricted to a single user.</li> +<li>Support added for cluster-wide power capping.</li> +<li>A partition can now have an associated Quality Of Service (QOS). This will + allow a partition to have all of the limits available to a QOS.</li> +<li>Add support for QOS-based job preemption to be used with job suspend/resume + mechanism.</li> +<li>Add support for burst buffers, data storage available for before, during + and/or after job computation in support of data staging, checkpoint, + etc. Plugins provied for Cray Data Warp and a generic script-based + interface.</li> +<li>Add support for optimized job allocations with respect to SGI Hypercube + topology.</li> <!-- Universitat Jaume I & Universitat Politecnica de Valencia --> <li>Add support for <a href="http://slurm.schedmd.com/SUG14/remote_gpu.pdf">Remote CUDA (rCUDA)</a></li> @@ -70,8 +78,6 @@ have not been finalized. Anyone desiring to perform Slurm development should notify <a href="mailto:slurm-dev@schedmd.com">slurm-dev@schedmd.com</a> to coordinate activities. Future development plans includes: <ul> -<li>Energy consumption added as a factor in fair-share scheduling.</li> -<li>Energy aware scheduling added with respect to power caps.</li> <li>Improved support for GPU affinity with respect to CPUs and network resources.</li> <li>Integration with @@ -81,7 +87,7 @@ to coordinate activities. Future development plans includes: MIC processors.</li> <li>IP communications over InfiniBand network for improved performance.</li> <li>Fault-tolerance and jobs dynamic adaptation through communication protocol - between Slurm , MPI libraries and the application.</li> + between Slurm, MPI libraries and the application.</li> <li>Improved support for high-throughput computing (e.g. multiple slurmctld daemons on a single cluster).</li> <li>Add Kerberos credential support including credential forwarding @@ -89,6 +95,6 @@ to coordinate activities. Future development plans includes: <li>Improved support for provisioning and virtualization.</li> <li>Provide a web-based Slurm administration tool.</li> </ul> -<p style="text-align:center;">Last modified 17 November 2014</p> +<p style="text-align:center;">Last modified 31 March 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/platforms.shtml b/doc/html/platforms.shtml index 99dac3bc2..acde10887 100644 --- a/doc/html/platforms.shtml +++ b/doc/html/platforms.shtml @@ -6,47 +6,18 @@ <li><b>AIX</b>—Slurm support for AIX has been thoroughly tested, but we know of no AIX installations using Slurm after 2008. See <a href="ibm.html">IBM AIX User and Administrator Guide</a> for more information.</li> -<li><b>FreeBSD</b>—Slurm support added in version 2.5.5.</li> +<li><b>FreeBSD</b>—Fully supported.</li> <li><b>Linux</b>—Slurm has been thoroughly tested on most popular Linux distributions using i386, ia64, and x86_64 architectures.</li> -<li><b>NetBSD</b>—Slurm support added in version 2.4.</li> +<li><b>NetBSD</b>—Fully supported.</li> <li><b>OS X</b>—Slurm has run OS X in the past, but the current OS X linker does not support Slurm plugins. (OS X dynamically linked objects can be called by the main program. They may not call functions in the main program or other dynamically linked objects, which are features required by Slurm.)</li> -<li><b>Solaris</b>—Slurm support for Solaris (OpenSolaris build 119) was -added in version 2.1.</li> -<li><b>Other</b>—Slurm ports to other systems will be gratefully accepted.</li> -</ul> -<h2>Interconnects</h2> -<ul> -<li><b>BlueGene</b>—Slurm support for IBM's BlueGene/L, BlueGene/P and -BlueGene/Q systems has been thoroughly tested. See -<a href="bluegene.html">Blue Gene User and Administrator Guide</a> -for more information.</li> -<li><b>Cray XC</b>—Operates as a workload manager natively on a - Cray system. See -<li><a href="cray.html">Cray User and Administrator Guide with Native -Slurm</a> for more information.</li> -<li><b>Cray XT and XE</b>—Operates as a scheduler on top of Cray's -ALPS/BASIL software. Supported added in Slurm version 2.3. Note that Cray's -ALPS/BASIL software necessitates some changes in Slurm behavior. See -<li><a href="cray_alps.html">Cray User and Administrator Guide with -ALPS</a> for more information.</li> -<li><b>Ethernet</b>—Ethernet requires no special support from Slurm and has -been thoroughly tested.</li> -<li><b>IBM Infiniband/Torrent</b>—Slurm supports IBM's NRT (Network -Resource Table) interface and -<a href="ibm-pe.html">IBM's PE (Parallel Environment)</a>.</li> -<li><b>Infiniband</b>—Infiniband support has been thoroughly tested.</li> -<li><b>Myrinet</b>—Myrinet, MPICH-GM and MPICH-MX are supported.</li> -<li><b>Quadrics Elan</b>—Slurm support for Quadrics Elan 3 and Elan 4 switches -are available in all versions of Slurm and have been thoroughly tested.</li> -<li><b>Sun Constellation</b>—Resource allocation has been optimized -for the three-dimensional torus interconnect.</li> +<li><b>Solaris</b>—OpenSolaris is fully supported.</li> <li><b>Other</b>—Slurm ports to other systems will be gratefully accepted.</li> </ul> -<p style="text-align:center;">Last modified 27 March 2013</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/power_save.shtml b/doc/html/power_save.shtml index 0cbd1a5be..76adda3b2 100644 --- a/doc/html/power_save.shtml +++ b/doc/html/power_save.shtml @@ -4,11 +4,9 @@ <p>Slurm provides an integrated power saving mechanism for idle nodes. Nodes that remain idle for a configurable period of time can be placed -in a power saving mode. +in a power saving mode, which can reduce power consumption or fully power down +the node. The nodes will be restored to normal operation once work is assigned to them. -Beginning with version 2.0.0, nodes can be fully powered down. -Earlier releases of Slurm do not support the powering down of nodes, -only support of reducing their performance and thus their power consumption. For example, power saving can be accomplished using a <i>cpufreq</i> governor that can change CPU frequency and voltage (note that the <i>cpufreq</i> driver must be enabled in the Linux kernel configuration). @@ -203,17 +201,10 @@ as configured in <i>slurm.conf</i>, which would create the delay for all jobs on the system. Insure that the <i>Prolog</i> code is zero to avoid draining the node (do not use the scontrol exit code to avoid draining the node on error, -for example if the job is explicitly cancelled during startup). -Note that the <i>scontrol wait_job</i> command was added to Slurm version 2.2. -When using earlier versions of Slurm, one may execute "<i>srun /bin/true</i>" -or some other command first to insure that all nodes are booted and ready -for use.</p> - -<p>The <i>salloc</i> and <i>srun</i> commands which create a resource -allocation automatically wait for the nodes to power up in Slurm version 2.2. -When using earlier versions of Slurm, <i>salloc</i> will return immediately -after a resource allocation is made, and one can execute "<i>srun /bin/true</i>" -to ensure that all nodes are booted and ready for use.</p> +for example if the job is explicitly cancelled during startup).</p> + +<p>The <i>salloc</i> and <i>srun</i> commands, which create a resource +allocation, automatically wait for the nodes to power up.</p> <p>Execution of the <i>salloc</i> command also triggers execution of the <i>Prolog</i> script if the <i>Alloc</i> flag is set in <i>PrologFlags</i>. In this case <i>salloc</i> @@ -263,6 +254,6 @@ and perform the following actions: <li>Boot the appropriate image for each node</li> </ol> -<p style="text-align:center;">Last modified 12 August 2014</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/preempt.shtml b/doc/html/preempt.shtml index f4785c1e0..17621bc16 100644 --- a/doc/html/preempt.shtml +++ b/doc/html/preempt.shtml @@ -366,8 +366,6 @@ $ squeue <P> <B>More intelligence in the select plugins</B>: This implementation of preemption relies on intelligent job placement by the <I>select</I> plugins. -In Slurm version 2.0 preemptive placement support was added to the -SelectType plugins, but there is still room for improvement. </P><P> Take the following example: </P> @@ -410,6 +408,6 @@ order to support ideal placements such as this, which can quickly complicate the design. Any and all help is welcome here! </P> -<p style="text-align:center;">Last modified 3 March 2014</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/quickstart_admin.shtml b/doc/html/quickstart_admin.shtml index 14697b45a..f012a8c57 100644 --- a/doc/html/quickstart_admin.shtml +++ b/doc/html/quickstart_admin.shtml @@ -722,18 +722,22 @@ See <i>testsuite/expect/README</i> for more information.</p> <a name="upgrade"><h2>Upgrades</h2></a> <p>Background: The Slurm version numbers contain three digits, which represent -the major, minor and micro release numbers in that order (e.g. 2.5.3 is -major=2, minor=5, micro=3). +the major, minor and micro release numbers in that order (e.g. 14.11.3 is +major=14, minor=11, micro=3). +The major release number indicates the last two digits in the year of release +and the minor release number indicates the month of release. +Thus version 14.11.x was initially released in November 2014. Changes in the RPCs (remote procedure calls) and state files will only be made -if the major and/or minor release number changes. +if the major and/or minor release number changes, which typically happens +about once every nine months or so. Slurm's MPI libraries may also change if the major and/or minor release number change, requiring applications be re-linked (behavior may vary depending upon the MPI implementation used and the specific Slurm changes between releases). Locally developed Slurm plugins may also require modification. Slurm daemons will support RPCs and state files from the two previous minor -releases (e.g. a version 2.6.x SlurmDBD will support slurmctld daemons and -commands with a version of 2.4.x or 2.5.x). -This means that upgrading at least once each year is strongly recommended. +releases (e.g. a version 15.08.x SlurmDBD will support slurmctld daemons and +commands with a version of 14.03.x or 14.11.x). +This means that upgrading at least once each year is recommended. Otherwise, intermediate upgrades will be required to preserve state information. Changes in the micro release number generally represent only bug fixes, but may also include very minor enhancements.</p> @@ -741,7 +745,7 @@ but may also include very minor enhancements.</p> <p>If the SlurmDBD daemon is used, it must be at the same or higher minor release number as the Slurmctld daemons. In other words, when changing the version to a higher release number (e.g -from 2.4 to 2.5) <b>always upgrade the SlurmDBD daemon first</b>. +from 14.11.x to 15.08.x) <b>always upgrade the SlurmDBD daemon first</b>. Database table changes may be required for the upgrade, for example adding new fields to existing tables. If the database contains a large number of entries, <b>the SlurmDBD daemon @@ -755,13 +759,13 @@ although rolling upgrades are also possible (i.e. upgrading the head node(s) first then upgrading the compute and login nodes later at various times). Also see the note above about reverse compatibility.</p> -<p>Pretty much each new major and/or minor release of Slurm (e.g. 2.4.x to 2.5.x) +<p>Pretty much each new major and/or minor release of Slurm (e.g. 14.11.x to 15.08.x) involves changes to the state files with new data structures, new options, etc. -Slurm permits upgrades of up to two major or minor updates (e.g. 2.4.x or 2.5.x -to 2.6.x) without loss of jobs or other state information, but the state +Slurm permits upgrades of up to two major or minor updates (e.g. 14.03.x or 14.11.x +to 15.08.x) without loss of jobs or other state information, but the state information from older state files versions will not be recognized and will be discarded, resulting in loss of all running and pending jobs. -State files are not recognized when downgrading (e.g. from 2.5.x to 2.4.x) +State files are not recognized when downgrading (e.g. from 15.08.x to 14.11.x) and will be discarded, resulting in loss of all running and pending jobs. Therefore when upgrading Slurm (more precisely, the slurmctld daemon), saving the <i>StateSaveLocation</i> (as defined in <i>slurm.conf</i>) @@ -771,7 +775,7 @@ recover the jobs. Jobs submitted under the new version will not be in those state files, but it can let you recover most jobs. An exception to this is that jobs may be lost when installing new pre-release -versions (e.g. 2.5.0-pre1 to 2.5.0-pre2). +versions (e.g. 15.08.0-pre1 to 15.08.0-pre2). We'll try to note these cases in the NEWS file. Contents of major releases are also described in the RELEASE_NOTES file.</p> @@ -827,6 +831,6 @@ options such as mysql and gui tools via a configuration menu.</p> </pre> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 3 February 2015</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/reservations.shtml b/doc/html/reservations.shtml index 2d432ce75..30152f2f7 100644 --- a/doc/html/reservations.shtml +++ b/doc/html/reservations.shtml @@ -155,7 +155,7 @@ identify the partition from which to select the nodes or _one_ feature that every selected node must contain.</p> <p>On a smaller system, one might want to reserve cores rather than -whole nodes. Slurm provides a core reservation capability in version 2.6. +whole nodes. This capability permits the administrator to identify the core count to be reserved on each node as shown in the examples below.<br> <b>NOTE:</b> Core reservations are not available on IBM BlueGene or @@ -354,7 +354,7 @@ considering the initiation of jobs. This will prevent the initiation of some jobs which would complete execution before a reservation given fewer jobs to time-slice with.</p> -<p style="text-align: center;">Last modified 9 September 2014</p> +<p style="text-align: center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/rpc.shtml b/doc/html/rpc.shtml index 4391b8e9d..9b17161b1 100644 --- a/doc/html/rpc.shtml +++ b/doc/html/rpc.shtml @@ -14,8 +14,8 @@ The reason for format changes is typically new fields being added for new Slurm capabilities. Slurm communicatins support the most current release plus the two previous release major and/or minor releases. -For example, Slurm version 2.6.x daemons can communicate with Slurm commands -from version 2.4.x, 2.5.x and 2.6.x. +For example, Slurm version 15.08.x daemons can communicate with Slurm commands +from version 14.03.x, 14.11.x and 15.08.x. The same is true for state save files. Slurm can be upgraded through two major or minor releases without loss of data since the older state files are still recognized. @@ -55,20 +55,20 @@ slurmctld daemon, job specifications that are unknown to the old sbatch should be set by the slurmctld daemon to reasonable values. An example of the changes required are shown below. In this trivial example, we want to add a new "max_nodes" filed to the -message for Slurm vestion 2.6.x.</p> +message for Slurm vestion 15.08.x.</p> <pre> /* * Original code in Slurm v2.5.x */ -if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { +if (protocol_version >= SLURM_14_11_PROTOCOL_VERSION) { pack32(msg->job_id, buffer); pack32(msg->user_id, buffer); pack32(msg->min_nodes, buffer); -} else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { +} else if (protocol_version >= SLURM_14_03_PROTOCOL_VERSION) { pack32(msg->job_id, buffer); pack32(msg->user_id, buffer); -} else if (protocol_version >= SLURM_2_3_PROTOCOL_VERSION) { +} else if (protocol_version >= SLURM_13_08_PROTOCOL_VERSION) { pack32(msg->job_id, buffer); } else { error("pack_whatever_msg: protocol_version " @@ -77,18 +77,18 @@ if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { </pre> <pre> /* - * New code in Slurm v2.6.x + * New code in Slurm v15.08.x */ -if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { +if (protocol_version >= SLURM_15_08_PROTOCOL_VERSION) { pack32(msg->job_id, buffer); pack32(msg->user_id, buffer); pack32(msg->min_nodes, buffer); pack32(msg->max_nodes, buffer); -} else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { +} else if (protocol_version >= SLURM_14_11_PROTOCOL_VERSION) { pack32(msg->job_id, buffer); pack32(msg->user_id, buffer); pack32(msg->min_nodes, buffer); -} else if (protocol_version >= SLURM_2_4_PROTOCOL_VERSION) { +} else if (protocol_version >= SLURM_14_03_PROTOCOL_VERSION) { pack32(msg->job_id, buffer); pack32(msg->user_id, buffer); } else { @@ -97,6 +97,6 @@ if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { } </pre> -<p style="text-align:center;">Last modified 22 August 2013</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/slurm_ug_cfp.shtml b/doc/html/slurm_ug_cfp.shtml index d2826211a..80af201d1 100644 --- a/doc/html/slurm_ug_cfp.shtml +++ b/doc/html/slurm_ug_cfp.shtml @@ -1,15 +1,16 @@ <!--#include virtual="header.txt"--> <h1>CALL FOR ABSTRACTS</h1> -<p>Slurm User Group Meeting 2014<br> -23-24 September 2014<br> -Lugano, Switzerland</p> +<p>Slurm User Group Meeting 2015<br> +15-16 September 2015<br> +Washington DC, USA</p> <p>You are invited to submit an abstract of a tutorial, technical presentation -or site report to be given at the Slurm User Group Meeting 2014. This event is +or site report to be given at the Slurm User Group Meeting 2015. This event is sponsored and organized by -The <a href="http://cscs.ch/">Swiss National Supercomputing Centre</a> and -will be held in Lugano, Switzerland on 23-24 September 2014.</p> +<a href="http://www.gwu.edu/">The George Washington University</a> and +<a href="http://www.schedmd.com/">SchedMD</a>. +It will be held in Washington DC, USA on 15-16 September 2015.</p> <p>This international event is opened to everyone who wants to: <ul> @@ -22,26 +23,26 @@ Slurm</a>, a highly scalable Resource Manager and Job Scheduler</li> <p>Everyone who wants to present their own usage, developments, site report, or tutorial about Slurm is invited to send an abstract to -<a href="mailto:sugc@schedmd.com">sugc@schedmd.com</a>.</p> +<a href="mailto:slugc@schedmd.com">slugc@schedmd.com</a>.</p> <p><b>Important Dates:</b><br> -16 June 2014: Abstracts due<br> -27 June 2014: Notification of acceptance<br> -23-24 September 2014: Slurm User Group Meeting 2014<br> +1 June 2015: Abstracts due<br> +15 June 2015: Notification of acceptance<br> +15-16 September 2015: Slurm User Group Meeting 2015<br> </p> <p><b>Program Committee:</b><br> Yiannis Georgiou (Bull)<br> +Brian Gilmer (Cray)<br> Matthieu Hautreux (CEA)<br> Morris Jette (SchedMD)<br> -Donald Lipari (LLNL, Lawrence Livermore National Laboratory)<br> -Colin McMurtrie (CSCS, Swiss National Supercomputing Centre)<br> -Stephen Trofinoff (CSCS, Swiss National Supercomputing Centre)</p> +Bruce Pfaff (NASA Goddard Space Flight Center)<br> +Tim Wickberg (The George Washington University)</p> <!-- <p><a href="slurm_ug_registration.html">Registration information</a></p> --> -<p style="text-align:center;">Last modified 5 June 2014</p> +<p style="text-align:center;">Last modified 20 March 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/team.shtml b/doc/html/team.shtml index 3814e28ed..d85298102 100644 --- a/doc/html/team.shtml +++ b/doc/html/team.shtml @@ -81,6 +81,7 @@ Lead Slurm developers are: <li>Josh England (TGS Management Corporation)</li> <li>Kent Engström (National Supercomputer Centre, Sweden)</li> <br> +<li>Roland Fehrenbacher (Q-Leap Networks, Germany)</li> <li>Carles Fenoy (Barcelona Supercomputing Center, Spain)</li> <li>Broi Franco (ION)</li> <li>Damien François (Université catholique de Louvain, Belgium)</li> @@ -187,6 +188,7 @@ Lead Slurm developers are: <li>Uwe Sauter (High Performance Computing Center Stuttgart, Germany)</li> <li>Chris Scheller (University of Michigan)</li> <li>Rod Schultz (Bull)</li> +<li>Samuel Senoner (Vienna University of Technology, Austria)</li> <li>David Singleton</li> <li>Filip Skalski (University of Warsaw, Poland)</li> <li>Jason Sollom (Cray)</li> diff --git a/doc/html/testimonials.shtml b/doc/html/testimonials.shtml index f07a98f02..5f6dbd53f 100644 --- a/doc/html/testimonials.shtml +++ b/doc/html/testimonials.shtml @@ -1,6 +1,22 @@ <!--#include virtual="header.txt"--> <h1>Customer Testimonials</h1> +<HR SIZE=4> + +<i> +"With Oxford providing HPC not just to researchers within the +University, but to local businesses and in collaborative projects, +such as the T2K and NQIT projects, the SLURM scheduler really was the +best option to ensure different service level agreements can be +supported. If you look at the Top500 list of the World's fastest +supercomputers, they're now starting to move to SLURM. The scheduler +was specifically requested by the University to support GPUs and the +heterogeneous estate of different CPUs, which the previous TORQUE +scheduler couldn't, so this forms quite an important part of the +overall HPC facility."<br><br> +<a href=http://www.hpcwire.com/off-the-wire/new-hpc-cluster-to-benefit-the-university-of-oxford/>Julian Fielden, Managing Director at OCF</a> +</i> +<HR SIZE=4> <i> "In 2010, when we embarked upon our mission to port Slurm to our Cray XT and XE @@ -152,6 +168,6 @@ Bill Celmaster, XC Program Manager, Hewlett-Packard Company </i> <HR SIZE=4> -<p style="text-align:center;">Last modified 23 January 2013</p> +<p style="text-align:center;">Last modified 14 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/troubleshoot.shtml b/doc/html/troubleshoot.shtml index 265b4dae3..ed562ae8e 100644 --- a/doc/html/troubleshoot.shtml +++ b/doc/html/troubleshoot.shtml @@ -130,7 +130,11 @@ and "<i>scontrol update NodeName=<node> State=resume</i>"). This permits other jobs to use the node, but leaves the non-killable process in place. If the process should ever complete the I/O, the pending SIGKILL -should terminate it immediately.</li> +should terminate it immediately. <b>-OR-</b></li> +<li>Use the <b>UnkillableStepProgram</b> and <b>UnkillableStepTimeout</b> +configuration parameters to automatically respond to processes which can not +be killed, by sending email or rebooting the node. For more information, +see the <i>slurm.conf</i> documentation.</li> </ol> <p class="footer"><a href="#top">top</a></p> @@ -208,13 +212,13 @@ must exist.</li> <li>Check that compatible versions of Slurm exists on all of the nodes (execute "<i>sinfo -V</i>" or "<i>rpm -qa | grep slurm</i>"). The Slurm version numbers contain three digits, which represent -the major, minor and micro release numbers in that order (e.g. 2.5.3 is -major=2, minor=5, micro=3). +the major, minor and micro release numbers in that order (e.g. 14.11.3 is +major=14, minor=11, micro=3). Changes in the RPCs (remote procedure calls) and state files will only be made if the major and/or minor release number changes. Slurm daemons will support RPCs and state files from the two previous minor or -releases (e.g. a version 2.6.x SlurmDBD will support slurmctld daemons and -commands with a version of 2.4.x or 2.5.x).</li> +releases (e.g. a version 15.08.x SlurmDBD will support slurmctld daemons and +commands with a version of 14.03.x or 14.11.x).</li> </ol> <p class="footer"><a href="#top">top</a></p> @@ -301,6 +305,6 @@ partition 000. </ol> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 6 January 20124</p> +<p style="text-align:center;">Last modified 15 April 2015</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1 index 7d21b7fd9..141f64b6e 100644 --- a/doc/man/man1/sacct.1 +++ b/doc/man/man1/sacct.1 @@ -1,4 +1,4 @@ -.TH SACCT "1" "August 2013" "sacct 2.6" "Slurm components" +.TH sacct "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" sacct \- displays accounting data for all jobs and job steps in the @@ -119,24 +119,24 @@ Print a list of fields that can be specified with the \f3\-\-format\fP option. .ft 3 Fields available: -AllocCPUS Account AssocID AveCPU -AveCPUFreq AveDiskRead AveDiskWrite AvePages -AveRSS AveVMSize BlockID Cluster -Comment ConsumedEnergy CPUTime CPUTimeRAW -DerivedExitCode Elapsed Eligible End -ExitCode GID Group JobID -JobName Layout MaxDiskRead MaxDiskReadNode -MaxDiskReadTask MaxDiskWrite MaxDiskWriteNode MaxDiskWriteTask -MaxPages MaxPagesNode MaxPagesTask MaxRSS -MaxRSSNode MaxRSSTask MaxVMSize MaxVMSizeNode -MaxVMSizeTask MinCPU MinCPUNode MinCPUTask -NCPUS NNodes NodeList NTasks -Priority Partition QOSRAW ReqCPUFreq -ReqCPUs ReqMem Reservation ReservationId -Reserved ResvCPU ResvCPURAW Start -State Submit Suspended SystemCPU -Timelimit TotalCPU UID User -UserCPU WCKey WCKeyID +AllocCPUS Account AssocID AveCPU +AveCPUFreq AveDiskRead AveDiskWrite AvePages +AveRSS AveVMSize BlockID Cluster +Comment ConsumedEnergy CPUTime CPUTimeRAW +DerivedExitCode Elapsed Eligible End +ExitCode GID Group JobID +JobIDRaw JobName Layout MaxDiskRead +MaxDiskReadNode MaxDiskReadTask MaxDiskWrite MaxDiskWriteNode +MaxDiskWriteTask MaxPages MaxPagesNode MaxPagesTask +MaxRSS MaxRSSNode MaxRSSTask MaxVMSize +MaxVMSizeNode MaxVMSizeTask MinCPU MinCPUNode +MinCPUTask NCPUS NNodes NodeList +NTasks Priority Partition QOSRAW +ReqCPUFreq ReqCPUs ReqMem Reservation +ReservationId Reserved ResvCPU ResvCPURAW +Start State Submit Suspended +SystemCPU Timelimit TotalCPU UID +User UserCPU WCKey WCKeyID .ft 1 .fi @@ -555,6 +555,12 @@ It is in the form: \f2job.jobstep\fP\c \&. +.TP +\f3JobIDRaw\fP +In case of job array print the jobId instead of the ArrayJobId. +For non job arrays the output is the jobId in the format \f2job.jobstep\fP\c +\&. + .TP \f3JobName\fP The name of the job or job step. The \f3slurm_accounting.log\fP file diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index a5e149351..79aaa64ba 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -1,4 +1,4 @@ -.TH SACCTMGR "1" "April 2009" "sacctmgr 2.0" "Slurm components" +.TH sacctmgr "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" sacctmgr \- Used to view and modify Slurm account information. @@ -224,7 +224,7 @@ List of transactions that have occurred during a given time period. .TP \fIuser\fR -The login name. +The login name. Only lowercase usernames are supported. .TP \fIwckeys\fR @@ -243,12 +243,6 @@ job to be initiated. The default QOS this association and its children should have. This is overridden if set directly on a user. To clear a previously set value use the modify command with a new value of \-1. -.P -NOTE: When read in from the slurmctld the default qos is checked against and if -the default qos isn't in the list of valid QOS for that association -and the association only has access to 1 QOS that will become the -default, otherwise, no default will be set. This should only happen -when removing a QOS from a <= 2.1 sacctmgr. .TP \fIFairshare\fP=<fairshare number | parent> diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 0183395eb..93a0e9652 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -1,4 +1,4 @@ -.TH "salloc" "1" "SLURM 14.11" "November 2014" "SLURM Commands" +.TH salloc "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, @@ -447,7 +447,7 @@ Multiple license names should be comma separated (e.g. .TP \fB\-m\fR, \fB\-\-distribution\fR= -<\fIblock\fR|\fIcyclic\fR|\fIarbitrary\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR]> +\fIarbitrary\fR|<\fIblock\fR|\fIcyclic\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR|\fIfcyclic\fR]> Specify alternate distribution methods for remote processes. In salloc, this only sets environment variables that will be used by @@ -531,6 +531,15 @@ that consecutive tasks share a socket. The cyclic distribution method will distribute tasks to sockets such that consecutive tasks are distributed over consecutive sockets (in a round\-robin fashion). +Tasks requiring more than one CPU will have all of those CPUs allocated on a +single socket if possible. +.TP +.B fcyclic +The fcyclic distribution method will distribute tasks to sockets such +that consecutive tasks are distributed over consecutive sockets (in a +round\-robin fashion). +Tasks requiring more than one CPU will have each CPUs allocated in a cyclic +fashion across sockets. .RE .TP @@ -823,7 +832,7 @@ Create a IP version 4 connection for LAPI communications on one switch network for each task. .TP \fBInstances=2,US,LAPI,MPI\fR -Create two user space connections each for LAPI and MPI communcations on every +Create two user space connections each for LAPI and MPI communications on every switch network for each task. Note that SN_ALL is the default option so every switch network is used. Also note that Instances=2 specifies that two connections are established for each protocol (LAPI and MPI) and each task. @@ -1111,8 +1120,8 @@ This option has been replaced by \fB\-\-immediate\fR=<\fIseconds\fR>. .TP \fB\-w\fR, \fB\-\-nodelist\fR=<\fInode name list\fR> Request a specific list of hosts. -Unless constrained by the maximum node count, the job will contain -\fIall\fR of these hosts. +The job will contain \fIall\fR of these hosts and possibly additional hosts +as needed to satisfy resource requirements. The list may be specified as a comma\-separated list of hosts, a range of hosts (host[1\-5,7,...] for example), or a filename. The host list will be assumed to be a filename if it contains a "/" character. diff --git a/doc/man/man1/sattach.1 b/doc/man/man1/sattach.1 index f35ddf5fb..6484ddd1e 100644 --- a/doc/man/man1/sattach.1 +++ b/doc/man/man1/sattach.1 @@ -1,4 +1,5 @@ -.TH "sattach" "1" "SLURM 2.2" "June 2010" "SLURM Commands" +.TH sattach "1" "Slurm Commands" "April 2015" "Slurm Commands" + .SH "NAME" .LP sattach \- Attach to a SLURM job step. diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index ea5c67413..28b038219 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -1,4 +1,4 @@ -.TH "sbatch" "1" "SLURM 14.11" "November 2014" "SLURM Commands" +.TH sbatch "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" sbatch \- Submit a batch script to SLURM. @@ -11,9 +11,6 @@ sbatch submits a batch script to SLURM. The batch script may be given to sbatch through a file name on the command line, or if no file name is specified, sbatch will read in a script from standard input. The batch script may contain options preceded with "#SBATCH" before any executable commands in the script. -After the first line of the batch script, which typically identifies the -shell to be used, sbatch will stop processing options at the first line which -does NOT begin with "#SBATCH". sbatch exits immediately after the script is successfully transferred to the SLURM controller and assigned a SLURM job ID. The batch script is not @@ -542,7 +539,7 @@ between clusters. .TP \fB\-m\fR, \fB\-\-distribution\fR= -<\fIblock\fR|\fIcyclic\fR|\fIarbitrary\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR]> +\fIarbitrary\fR|<\fIblock\fR|\fIcyclic\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR|\fIfcyclic\fR]> Specify alternate distribution methods for remote processes. In sbatch, this only sets environment variables that will be used by @@ -626,6 +623,15 @@ that consecutive tasks share a socket. The cyclic distribution method will distribute tasks to sockets such that consecutive tasks are distributed over consecutive sockets (in a round\-robin fashion). +Tasks requiring more than one CPU will have all of those CPUs allocated on a +single socket if possible. +.TP +.B fcyclic +The fcyclic distribution method will distribute tasks to sockets such +that consecutive tasks are distributed over consecutive sockets (in a +round\-robin fashion). +Tasks requiring more than one CPU will have each CPUs allocated in a cyclic +fashion across sockets. .RE .TP @@ -1283,8 +1289,8 @@ errors will be displayed. .TP \fB\-w\fR, \fB\-\-nodelist\fR=<\fInode name list\fR> Request a specific list of hosts. -Unless constrained by the maximum node count, the job will contain -\fIall\fR of these hosts. +The job will contain \fIall\fR of these hosts and possibly additional hosts +as needed to satisfy resource requirements. The list may be specified as a comma\-separated list of hosts, a range of hosts (host[1\-5,7,...] for example), or a filename. The host list will be assumed to be a filename if it contains a "/" character. diff --git a/doc/man/man1/sbcast.1 b/doc/man/man1/sbcast.1 index 3236742b6..631957f45 100644 --- a/doc/man/man1/sbcast.1 +++ b/doc/man/man1/sbcast.1 @@ -1,4 +1,4 @@ -.TH SBCAST "1" "April 2006" "sbcast 2.0" "Slurm components" +.TH sbcast "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" sbcast \- transmit a file to the nodes allocated to a SLURM job. diff --git a/doc/man/man1/scancel.1 b/doc/man/man1/scancel.1 index 84f71ce17..8fe5958fe 100644 --- a/doc/man/man1/scancel.1 +++ b/doc/man/man1/scancel.1 @@ -1,4 +1,4 @@ -.TH SCANCEL "1" "January 2013" "scancel 2.6" "Slurm components" +.TH scancel "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" scancel \- Used to signal jobs or job steps that are under the control of Slurm. diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index 6735b00b6..8f0325225 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -1,4 +1,4 @@ -.TH SCONTROL "1" "September 2014" "scontrol 14.11" "Slurm components" +.TH scontrol "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" scontrol \- Used view and modify Slurm configuration and state. @@ -577,17 +577,14 @@ of the cluster) as described in salloc/sbatch/srun man pages. .TP \fIMinCPUsNode\fP=<count> Set the job's minimum number of CPUs per node to the specified value. -Only the Slurm administrator or root can change this parameter. .TP \fIMinMemoryCPU\fP=<megabytes> Set the job's minimum real memory required per allocated CPU to the specified -value. Only the Slurm administrator or root can change this parameter. -Either \fIMinMemoryCPU\fP or \fIMinMemoryNode\fP may be set, but not both. +value. Either \fIMinMemoryCPU\fP or \fIMinMemoryNode\fP may be set, but not both. .TP \fIMinMemoryNode\fP=<megabytes> Set the job's minimum real memory required per node to the specified value. Either \fIMinMemoryCPU\fP or \fIMinMemoryNode\fP may be set, but not both. -Only the Slurm administrator or root can change this parameter. .TP \fIMinTmpDiskNode\fP=<megabytes> Set the job's minimum temporary disk space required per node to the specified value. @@ -1232,7 +1229,7 @@ not possible to also explicitly specify allowed accounts. .TP \fICoreCnt\fP=<num> -This option is only suported when SelectType=select/cons_res. Identify number of +This option is only supported when SelectType=select/cons_res. Identify number of cores to be reserved. If NodeCnt is used, this is the total number of cores to reserve where cores per node is CoreCnt/NodeCnt. If a nodelist is used, this should be an array of core numbers by node: Nodes=node[1\-5] CoreCnt=2,2,3,3,4 diff --git a/doc/man/man1/sdiag.1 b/doc/man/man1/sdiag.1 index fd4bf1ecf..18460a6a0 100644 --- a/doc/man/man1/sdiag.1 +++ b/doc/man/man1/sdiag.1 @@ -1,4 +1,5 @@ -.TH "sdiag" "1" "SLURM 2.4" "December 2011" "SLURM Commands" +.TH sdiag "1" "Slurm Commands" "April 2015" "Slurm Commands" + .SH "NAME" .LP sdiag \- Scheduling diagnostic tool for SLURM diff --git a/doc/man/man1/sh5util.1 b/doc/man/man1/sh5util.1 index ec36c3af0..1b9811af5 100644 --- a/doc/man/man1/sh5util.1 +++ b/doc/man/man1/sh5util.1 @@ -1,4 +1,5 @@ -.TH "sh5util" "1" "SLURM 2.6" "June 2013" "SLURM Commands" +.TH sh5util "1" "Slurm Commands" "April 2015" "Slurm Commands" + .SH "NAME" .LP sh5util \- Tool for merging HDF5 files from the acct_gather_profile diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1 index c87dcedc1..038e6eb17 100644 --- a/doc/man/man1/sinfo.1 +++ b/doc/man/man1/sinfo.1 @@ -1,4 +1,4 @@ -.TH SINFO "1" "August 2013" "sinfo 14.03" "Slurm components" +.TH sinfo "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" sinfo \- view information about SLURM nodes and partitions. @@ -111,7 +111,7 @@ when running with various options are In the above format strings, the use of "#" represents the maximum length of any partition name or node list to be printed. A pass is made over the records to be printed to establish the size in order -to allign the sinfo output, then a second pass is made over the records to +to align the sinfo output, then a second pass is made over the records to print them. Note that the literal character "#" itself is not a valid field length specification, but is only used to document this behaviour. @@ -254,7 +254,7 @@ size of field .TP \fB\-p <partition>\fR, \fB\-\-partition=<partition>\fR -Print information only about the specified partition(s). Mutliple partitions +Print information only about the specified partition(s). Multiple partitions are separated by commas. .TP @@ -418,7 +418,7 @@ Size of temporary disk space in megabytes on these nodes. .SH "NODE STATE CODES" .PP Node state codes are shortened as required for the field size. -These node states may be followed by a special characater to identify +These node states may be followed by a special character to identify state flags associated with the node. The following node sufficies and states are used: .TP 4 diff --git a/doc/man/man1/slurm.1 b/doc/man/man1/slurm.1 index c98f0536c..111dae7b5 100644 --- a/doc/man/man1/slurm.1 +++ b/doc/man/man1/slurm.1 @@ -1,10 +1,10 @@ -.TH SLURM "1" "March 2009" "slurm 2.0" "Slurm system" +.TH Slurm "1" "Slurm System" "April 2015" "Slurm System" .SH "NAME" -slurm \- SLURM system overview. +Slurm \- Slurm Workload Manager overview. .SH "DESCRIPTION" -The Simple Linux Utility for Resource Management (SLURM) is an open source, +The Slurm Workload Manager is an open source, fault-tolerant, and highly scalable cluster management and job scheduling system for large and small Linux clusters. SLURM requires no kernel modifications for its operation and is relatively self-contained. As a cluster resource manager, @@ -19,11 +19,11 @@ gang scheduling (time sharing for parallel jobs), backfill scheduling, resource limits by user or bank account, and sophisticated multifactor job prioritization algorithms. -SLURM has a centralized manager, \fBslurmctld\fR, to monitor resources and +Slurm has a centralized manager, \fBslurmctld\fR, to monitor resources and work. There may also be a backup manager to assume those responsibilities in the event of failure. Each compute server (node) has a \fBslurmd\fR daemon, which can be compared to a remote shell: it waits for work, executes that work, returns -status, and waits for more work. An optional \fBslurmDBD\fR (SLURM DataBase Daemon) +status, and waits for more work. An optional \fBslurmdbd\fR (SLURM DataBase Daemon) can be used for accounting purposes and to maintain resource limit information. Basic user tools include \fBsrun\fR to initiate jobs, @@ -32,7 +32,7 @@ status, and \fBsqueue\fR to report the status of jobs. There is also an administ tool \fBscontrol\fR available to monitor and/or modify configuration and state information. APIs are available for all functions. -SLURM configuration is maintained in the \fBslurm.conf\fR file. +Slurm configuration is maintained in the \fBslurm.conf\fR file. Man pages are available for all SLURM commands, daemons, APIs, plus the \fBslurm.conf\fR file. @@ -50,12 +50,12 @@ Copyright (C) 2010\-2013 SchedMD LLC. This file is part of SLURM, a resource management program. For details, see <http://slurm.schedmd.com/>. .LP -SLURM is free software; you can redistribute it and/or modify it under +Slurm is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. .LP -SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +Slurm is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. diff --git a/doc/man/man1/smap.1 b/doc/man/man1/smap.1 index de20921c2..2ec354974 100644 --- a/doc/man/man1/smap.1 +++ b/doc/man/man1/smap.1 @@ -1,4 +1,4 @@ -.TH SMAP "1" "August 2013" "smap 14.03" "Slurm components" +.TH smap "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" smap \- graphically view information about SLURM jobs, partitions, and set @@ -371,7 +371,7 @@ Clear all partitions created. .SH "NODE STATE CODES" .PP Node state codes are shortened as required for the field size. -These node states may be followed by a special characater to identify +These node states may be followed by a special character to identify state flags associated with the node. The following node sufficies and states are used: .TP 4 diff --git a/doc/man/man1/sprio.1 b/doc/man/man1/sprio.1 index 075a2db70..c76b9508a 100644 --- a/doc/man/man1/sprio.1 +++ b/doc/man/man1/sprio.1 @@ -1,4 +1,4 @@ -.TH SPRIO "1" "May 2011" "sprio 2.3" "SLURM commands" +.TH sprio "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" sprio \- view the factors that comprise a job's scheduling priority diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1 index d623e1b14..c90d1163e 100644 --- a/doc/man/man1/squeue.1 +++ b/doc/man/man1/squeue.1 @@ -1,4 +1,4 @@ -.TH SQUEUE "1" "August 2014" "squeue 14.01" "Slurm components" +.TH squeue "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" squeue \- view information about jobs located in the SLURM scheduling queue. @@ -987,7 +987,7 @@ The job's constraints can not be satisfied. The job's earliest start time has not yet been reached. .TP \fBBlockFreeAction\fR -An IBM BlueGene block is being freedand can not allow more jobs to start. +An IBM BlueGene block is being freed and can not allow more jobs to start. .TP \fBBlockMaxError\fR An IBM BlueGene block has too many cnodes in error state to allow more jobs to start. diff --git a/doc/man/man1/sreport.1 b/doc/man/man1/sreport.1 index 701361b19..7c7a97ad4 100644 --- a/doc/man/man1/sreport.1 +++ b/doc/man/man1/sreport.1 @@ -1,4 +1,4 @@ -.TH SREPORT "1" "October 2010" "sreport 2.0" "Slurm components" +.TH sreport "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" sreport \- Generate reports from the slurm accounting data. diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index ae5a03791..27d4c09db 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -1,4 +1,4 @@ -.TH "srun" "1" "SLURM 14.11" "November 2014" "SLURM Commands" +.TH srun "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" srun \- Run parallel jobs @@ -586,7 +586,7 @@ See \fBEXAMPLE\fR below. .TP \fB\-\-export\fR=<\fIenvironment variables | NONE\fR> -Identify which environment variables are propagated to the laucnhed application. +Identify which environment variables are propagated to the launched application. Multiple environment variable names should be comma separated. Environment variable names may be specified to propagate the current value of those variables (e.g. "\-\-export=EDITOR") or specific values @@ -743,7 +743,7 @@ Multiple license names should be comma separated (e.g. .TP \fB\-m\fR, \fB\-\-distribution\fR= -<\fIblock\fR|\fIcyclic\fR|\fIarbitrary\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR]> +\fIarbitrary\fR|<\fIblock\fR|\fIcyclic\fR|\fIplane=<options>\fR[:\fIblock\fR|\fIcyclic\fR|\fIfcyclic\fR]> Specify alternate distribution methods for remote processes. This option controls the assignment of tasks to the nodes on which @@ -752,6 +752,7 @@ to tasks for binding (task affinity). The first distribution method (before the ":") controls the distribution of resources across nodes. The optional second distribution method (after the ":") controls the distribution of resources across sockets within a node. + Note that with select/cons_res, the number of cpus allocated on each socket and node may be different. Refer to http://slurm.schedmd.com/mc_support.html @@ -824,6 +825,15 @@ that consecutive tasks share a socket. The cyclic distribution method will distribute tasks to sockets such that consecutive tasks are distributed over consecutive sockets (in a round\-robin fashion). +Tasks requiring more than one CPU will have all of those CPUs allocated on a +single socket if possible. +.TP +.B fcyclic +The fcyclic distribution method will distribute tasks to sockets such +that consecutive tasks are distributed over consecutive sockets (in a +round\-robin fashion). +Tasks requiring more than one CPU will have each CPUs allocated in a cyclic +fashion across sockets. .RE .TP @@ -1612,10 +1622,10 @@ Specify a minimum amount of temporary disk space. .TP \fB\-u\fR, \fB\-\-unbuffered\fR -By default the connection between slurmdstepd and the user launched application +By default the connection between slurmstepd and the user launched application is over a pipe. The stdio output written by the application is buffered by the glibc until it is flushed or the output is set as unbuffered. -See setbuf(3).If this option is specified the tasks are executed with +See setbuf(3). If this option is specified the tasks are executed with a pseudo terminal so that the application output is unbuffered. .TP \fB\-\-usage\fR @@ -1656,8 +1666,8 @@ exits with a non\-zero exit code. .TP \fB\-w\fR, \fB\-\-nodelist\fR=<\fIhost1,host2,...\fR or \fIfilename\fR> Request a specific list of hosts. -Unless constrained by the maximum node count, the job will contain -\fIall\fR of these hosts. +The job will contain \fIall\fR of these hosts and possibly additional hosts +as needed to satisfy resource requirements. The list may be specified as a comma\-separated list of hosts, a range of hosts (host[1\-5,7,...] for example), or a filename. The host list will be assumed to be a filename if it contains a "/" character. @@ -1665,7 +1675,7 @@ If you specify a minimum node or processor count larger than can be satisfied by the supplied host list, additional resources will be allocated on other nodes as needed. Rather than repeating a host name multiple times, an asterisk and -a repitition count may be appended to a host name. For example +a repetition count may be appended to a host name. For example "host1,host1" and "host1*2" are equivalent. .TP diff --git a/doc/man/man1/srun_cr.1 b/doc/man/man1/srun_cr.1 index a8b802286..a180e790f 100644 --- a/doc/man/man1/srun_cr.1 +++ b/doc/man/man1/srun_cr.1 @@ -1,4 +1,4 @@ -.TH SRUN_CR "1" "March 2009" "srun_cr 2.0" "slurm components" +.TH srun_cr "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" srun_cr \- run parallel jobs with checkpoint/restart support diff --git a/doc/man/man1/sshare.1 b/doc/man/man1/sshare.1 index e3a493dc0..d1e8c522b 100644 --- a/doc/man/man1/sshare.1 +++ b/doc/man/man1/sshare.1 @@ -1,4 +1,4 @@ -.TH SSHARE "1" "March 2012" "sshare 2.0" "SLURM Commands" +.TH sshare "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" sshare \- Tool for listing the shares of associations to a cluster. @@ -52,6 +52,11 @@ Output will be '|' delimited without a '|' at the end. \fB\-u\fR, \fB\-\-users=\fR Display information for specific users (comma separated list). +.TP +\fB\-U\fR, \fB\-\-Users\fR +If specified only the users information are printed, the parent +and ancestors are not displayed. + .TP \fB\-v\fR, \fB\-\-verbose\fR Display more information about the specified options. diff --git a/doc/man/man1/sstat.1 b/doc/man/man1/sstat.1 index e7c2f131c..550f2393b 100644 --- a/doc/man/man1/sstat.1 +++ b/doc/man/man1/sstat.1 @@ -1,4 +1,4 @@ -.TH SSTAT "1" "August 2011" "sstat 2.3" "Slurm components" +.TH sstat "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" sstat \- Display various status information diff --git a/doc/man/man1/strigger.1 b/doc/man/man1/strigger.1 index 4f6b09843..608619ca5 100644 --- a/doc/man/man1/strigger.1 +++ b/doc/man/man1/strigger.1 @@ -1,4 +1,4 @@ -.TH STRIGGER "1" "June 2015" "strigger 14.03" "Slurm components" +.TH strigger "1" "Slurm Commands" "April 2015" "Slurm Commands" .SH "NAME" strigger \- Used set, get or clear Slurm trigger information. @@ -173,7 +173,7 @@ with the \fB\-\-jobid\fR option. When the \fB\-\-jobid\fR option is used in conjunction with the \fB\-\-up\fR, \fB\-\-down\fR or \fB\-\-drained\fR option, all nodes allocated to that job will considered the nodes used as a -trigger event.Since this option's argument is optional, for proper +trigger event. Since this option's argument is optional, for proper parsing the single letter option must be followed immediately with the value and not include a space between them. For example "\-ntux" and not "\-n tux". diff --git a/doc/man/man1/sview.1 b/doc/man/man1/sview.1 index 727d01b09..ce7eb76fa 100644 --- a/doc/man/man1/sview.1 +++ b/doc/man/man1/sview.1 @@ -1,4 +1,5 @@ -.TH "sview" "1" "SLURM 2.6" "October 2013" "SLURM Commands" +.TH sview "1" "Slurm Commands" "April 2015" "Slurm Commands" + .SH "NAME" .LP sview \- graphical user interface to view and modify SLURM state. diff --git a/doc/man/man3/slurm_allocate_resources.3 b/doc/man/man3/slurm_allocate_resources.3 index 6ab386671..d7b82005c 100644 --- a/doc/man/man3/slurm_allocate_resources.3 +++ b/doc/man/man3/slurm_allocate_resources.3 @@ -1,4 +1,5 @@ -.TH "Slurm API" "3" "April 2006" "Morris Jette" "Slurm job initiation functions" +.TH "Slurm API" "3" "Slurm job initiation functions" "April 2015" "Slurm job initiation functions" + .SH "NAME" slurm_allocate_resources, slurm_allocate_resources_blocking, slurm_allocation_msg_thr_create, slurm_allocation_msg_thr_destroy, @@ -131,7 +132,7 @@ Specifies the pointer to the structure to be created and filled in by the functi \fIslurm_confirm_allocation\fP or \fIslurm_job_will_run\fP. .TP \fIslurm_alloc_msg_thr_ptr\fP -Specigies the pointer to the structure created and returned by the +Specifies the pointer to the structure created and returned by the function \fIslurm_allocation_msg_thr_create\fP. Must be destroyed with function \fIslurm_allocation_msg_thr_destroy\fP. .TP diff --git a/doc/man/man3/slurm_checkpoint_error.3 b/doc/man/man3/slurm_checkpoint_error.3 index f31bf77c0..e7d97bd55 100644 --- a/doc/man/man3/slurm_checkpoint_error.3 +++ b/doc/man/man3/slurm_checkpoint_error.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "March 2009" "Morris Jette" "Slurm checkpoint functions" +.TH "Slurm API" "3" "Slurm checkpoint functions" "April 2015" "Slurm checkpoint functions" .SH "NAME" slurm_checkpoint_able, slurm_checkpoint_complete, slurm_checkpoint_create, diff --git a/doc/man/man3/slurm_clear_trigger.3 b/doc/man/man3/slurm_clear_trigger.3 index 4befda10e..33983dd32 100644 --- a/doc/man/man3/slurm_clear_trigger.3 +++ b/doc/man/man3/slurm_clear_trigger.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "October 2012" "Slurm API 2.5" "Slurm event trigger management functions" +.TH "Slurm API" "3" "Slurm event trigger management functions" "April 2015" "Slurm event trigger management functions" .SH "NAME" diff --git a/doc/man/man3/slurm_complete_job.3 b/doc/man/man3/slurm_complete_job.3 index f79993795..37848e43c 100644 --- a/doc/man/man3/slurm_complete_job.3 +++ b/doc/man/man3/slurm_complete_job.3 @@ -1,4 +1,5 @@ -.TH "Slurm API" "3" "March 2009" "Morris Jette" "Slurm job completion calls" +.TH "Slurm API" "3" "Slurm job completion functions" "April 2015" "Slurm job completion functions" + .SH "NAME" slurm_complete_job \- Slurm job completion call .SH "SYNTAX" diff --git a/doc/man/man3/slurm_free_ctl_conf.3 b/doc/man/man3/slurm_free_ctl_conf.3 index 60b358734..e94da7377 100644 --- a/doc/man/man3/slurm_free_ctl_conf.3 +++ b/doc/man/man3/slurm_free_ctl_conf.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "April 2007" "Morris Jette" "Slurm informational calls" +.TH "Slurm API" "3" "Slurm informational functions" "April 2015" "Slurm informational functions" .SH "NAME" slurm_free_ctl_conf, slurm_load_ctl_conf, diff --git a/doc/man/man3/slurm_free_front_end_info_msg.3 b/doc/man/man3/slurm_free_front_end_info_msg.3 index 17ddb2ebf..2f8271482 100644 --- a/doc/man/man3/slurm_free_front_end_info_msg.3 +++ b/doc/man/man3/slurm_free_front_end_info_msg.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "December 2010" "Morris Jette" "Slurm front end node informational calls" +.TH "Slurm API" "3" "Slurm front end node informational functions" "April 2015" "Slurm front end node informational functions" .SH "NAME" slurm_free_front_end_info_msg, slurm_load_front_end, diff --git a/doc/man/man3/slurm_free_job_info_msg.3 b/doc/man/man3/slurm_free_job_info_msg.3 index ba89de978..6f05a85bd 100644 --- a/doc/man/man3/slurm_free_job_info_msg.3 +++ b/doc/man/man3/slurm_free_job_info_msg.3 @@ -1,4 +1,5 @@ -.TH "Slurm API" "3" "January 2013" "Morris Jette" "Slurm job information reporting functions" +.TH "Slurm API" "3" "Slurm job information reporting functions" "April 2015" "Slurm job information reporting functions" + .SH "NAME" slurm_free_job_alloc_info_response_msg, slurm_free_job_info_msg, slurm_get_end_time, slurm_get_rem_time, slurm_get_select_jobinfo, diff --git a/doc/man/man3/slurm_free_job_step_info_response_msg.3 b/doc/man/man3/slurm_free_job_step_info_response_msg.3 index 5064a07ae..2e54956a0 100644 --- a/doc/man/man3/slurm_free_job_step_info_response_msg.3 +++ b/doc/man/man3/slurm_free_job_step_info_response_msg.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "September 2006" "Morris Jette" "Slurm job step information functions" +.TH "Slurm API" "3" "Slurm job step information functions" "April 2015" "Slurm job step information functions" .SH "NAME" slurm_free_job_step_info_response_msg, slurm_get_job_steps, diff --git a/doc/man/man3/slurm_free_node_info.3 b/doc/man/man3/slurm_free_node_info.3 index 8eb4d04ce..1049e42fa 100644 --- a/doc/man/man3/slurm_free_node_info.3 +++ b/doc/man/man3/slurm_free_node_info.3 @@ -1,4 +1,5 @@ -.TH "Slurm API" "3" "January 2013" "Morris Jette" "Slurm node informational calls" +.TH "Slurm API" "3" "Slurm node informational functions" "April 2015" "Slurm node informational functions" + .SH "NAME" slurm_free_node_info_msg, slurm_load_node, slurm_load_node_single, slurm_print_node_info_msg, slurm_print_node_table, diff --git a/doc/man/man3/slurm_free_partition_info.3 b/doc/man/man3/slurm_free_partition_info.3 index 2fcd692cf..30d688690 100644 --- a/doc/man/man3/slurm_free_partition_info.3 +++ b/doc/man/man3/slurm_free_partition_info.3 @@ -1,4 +1,5 @@ -.TH "Slurm API" "3" "September 2006" "Morris Jette" "Slurm partition information reporting functions" +.TH "Slurm API" "3" "Slurm partition information functions" "April 2015" "Slurm partition information functions" + .SH "NAME" slurm_free_partition_info_msg, slurm_load_partitions, slurm_print_partition_info, slurm_print_partition_info_msg diff --git a/doc/man/man3/slurm_get_errno.3 b/doc/man/man3/slurm_get_errno.3 index 94d8412f9..85f4008fa 100644 --- a/doc/man/man3/slurm_get_errno.3 +++ b/doc/man/man3/slurm_get_errno.3 @@ -1,4 +1,5 @@ -.TH "Slurm API" "3" "April 2006" "Morris Jette" "Slurm error handling functions" +.TH "Slurm API" "3" "Slurm error handling functions" "April 2015" "Slurm error handling functions" + .SH "NAME" slurm_get_errno, slurm_perror, slurm_strerror \- Slurm error handling functions .SH "SYNTAX" diff --git a/doc/man/man3/slurm_hostlist_create.3 b/doc/man/man3/slurm_hostlist_create.3 index 1ac92b543..99f5ae05c 100644 --- a/doc/man/man3/slurm_hostlist_create.3 +++ b/doc/man/man3/slurm_hostlist_create.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "September 2006" "Morris Jette" "Slurm host list support functions" +.TH "Slurm API" "3" "Slurm host list functions" "April 2015" "Slurm host list functions" .SH "NAME" slurm_hostlist_create, slurm_hostlist_shift, slurm_hostlist_destroy diff --git a/doc/man/man3/slurm_job_step_create.3 b/doc/man/man3/slurm_job_step_create.3 index 0d9844fc5..e84fe861c 100644 --- a/doc/man/man3/slurm_job_step_create.3 +++ b/doc/man/man3/slurm_job_step_create.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "March 2007" "Morris Jette" "Slurm job step initiation functions" +.TH "Slurm API" "3" "Slurm job step initiation functions" "April 2015" "Slurm job step initiation functions" .SH "NAME" slurm_free_job_step_create_response_msg, slurm_job_step_create diff --git a/doc/man/man3/slurm_kill_job.3 b/doc/man/man3/slurm_kill_job.3 index ef072b4e2..355a23f82 100644 --- a/doc/man/man3/slurm_kill_job.3 +++ b/doc/man/man3/slurm_kill_job.3 @@ -1,4 +1,5 @@ -.TH "Slurm API" "3" "November 2003" "Morris Jette" "Slurm job signal calls" +.TH "Slurm API" "3" "Slurm job signal functions" "April 2015" "Slurm job signal functions" + .SH "NAME" slurm_kill_job, slurm_kill_job_step, .br diff --git a/doc/man/man3/slurm_load_reservations.3 b/doc/man/man3/slurm_load_reservations.3 index 3f09852f4..1dc80e052 100644 --- a/doc/man/man3/slurm_load_reservations.3 +++ b/doc/man/man3/slurm_load_reservations.3 @@ -1,4 +1,5 @@ -.TH "Slurm API" "3" "January 2009" "David Bremer" "Slurm reservation information reporting functions" +.TH "Slurm API" "3" "Slurm reservation information functions" "April 2015" "Slurm reservation information functions" + .SH "NAME" slurm_load_reservations, slurm_free_reservation_info_msg, slurm_print_reservation_info, slurm_sprint_reservation_info, diff --git a/doc/man/man3/slurm_reconfigure.3 b/doc/man/man3/slurm_reconfigure.3 index d8bc4fdde..0d745b466 100644 --- a/doc/man/man3/slurm_reconfigure.3 +++ b/doc/man/man3/slurm_reconfigure.3 @@ -1,4 +1,5 @@ -.TH "Slurm API" "3" "May 2009" "Morris Jette" "Slurm administrative calls" +.TH "Slurm API" "3" "Slurm administrative functions" "April 2015" "Slurm administrative functions" + .SH "NAME" slurm_create_partition, slurm_create_reservation, slurm_delete_partition, slurm_delete_reservation, diff --git a/doc/man/man3/slurm_resume.3 b/doc/man/man3/slurm_resume.3 index dc518f316..d87c719ac 100644 --- a/doc/man/man3/slurm_resume.3 +++ b/doc/man/man3/slurm_resume.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "August 2014" "Morris Jette" "Slurm suspend, resume and requeue functions" +.TH "Slurm API" "3" "Slurm suspend, resume and requeue functions" "April 2015" "Slurm suspend, resume and requeue functions" .SH "NAME" slurm_suspend, slurm_suspend2, slurm_resume, slurm_resume2, slurm_requeue, diff --git a/doc/man/man3/slurm_slurmd_status.3 b/doc/man/man3/slurm_slurmd_status.3 index 96104b05c..3b080f3b9 100644 --- a/doc/man/man3/slurm_slurmd_status.3 +++ b/doc/man/man3/slurm_slurmd_status.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "Oct 2008" "Danny Auble" "Slurmd status functions" +.TH "Slurm API" "3" "Slurmd status functions" "April 2015" "Slurmd status functions" .SH "NAME" diff --git a/doc/man/man3/slurm_step_ctx_create.3 b/doc/man/man3/slurm_step_ctx_create.3 index e080c55d7..3eb594e64 100644 --- a/doc/man/man3/slurm_step_ctx_create.3 +++ b/doc/man/man3/slurm_step_ctx_create.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "March 2007" "Morris Jette" "Slurm job step context functions" +.TH "Slurm API" "3" "Slurm job step context functions" "April 2015" "Slurm job step context functions" .SH "NAME" slurm_step_ctx_create, slurm_step_ctx_create_no_alloc, diff --git a/doc/man/man3/slurm_step_launch.3 b/doc/man/man3/slurm_step_launch.3 index 351db7221..2e2bedc41 100644 --- a/doc/man/man3/slurm_step_launch.3 +++ b/doc/man/man3/slurm_step_launch.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "July 2008" "Morris Jette" "Slurm job step launch functions" +.TH "Slurm API" "3" "Slurm job step launch functions" "April 2015" "Slurm job step launch functions" .SH "NAME" @@ -69,7 +69,7 @@ the job step to be launched. .SH "DESCRIPTION" .LP -\fBslurm_step_launch_params_t_init\fR Iinitialize a user-allocated +\fBslurm_step_launch_params_t_init\fR initialize a user-allocated slurm_step_launch_params_t structure with default values. default values. This function will NOT allocate any new memory. .LP diff --git a/doc/man/man3/slurm_update_job.3 b/doc/man/man3/slurm_update_job.3 index fbcbc2a52..9302c986e 100644 --- a/doc/man/man3/slurm_update_job.3 +++ b/doc/man/man3/slurm_update_job.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "August 2014" "Morris Jette" "Slurm job and step update functions" +.TH "Slurm API" "3" "Slurm job and step update functions" "April 2015" "Slurm job and step update functions" .SH "NAME" slurm_init_job_desc_msg, slurm_init_update_step_msg, diff --git a/doc/man/man5/acct_gather.conf.5 b/doc/man/man5/acct_gather.conf.5 index b1b72a3fc..9b643c7a2 100644 --- a/doc/man/man5/acct_gather.conf.5 +++ b/doc/man/man5/acct_gather.conf.5 @@ -1,4 +1,4 @@ -.TH "acct_gather.conf" "5" "May 2013" "acct_gather.conf 1.0" "Slurm acct_gather.configuration file" +.TH "acct_gather.conf" "5" "Slurm Configuration File" "April 2015" "Slurm Configuration File" .SH "NAME" acct_gather.conf \- Slurm configuration file for the acct_gather plugins diff --git a/doc/man/man5/bluegene.conf.5 b/doc/man/man5/bluegene.conf.5 index e8e96c00a..cd07ced6d 100644 --- a/doc/man/man5/bluegene.conf.5 +++ b/doc/man/man5/bluegene.conf.5 @@ -1,4 +1,4 @@ -.TH "bluegene.conf" "5" "August 2011" "bluegene.conf 2.3" "SLURM configuration file" +.TH "bluegene.conf" "5" "Slurm Configuration File" "April 2015" "Slurm Configuration File" .SH "NAME" bluegene.conf \- SLURM configuration file for BlueGene systems diff --git a/doc/man/man5/cgroup.conf.5 b/doc/man/man5/cgroup.conf.5 index 39827d62c..3e6338c7c 100644 --- a/doc/man/man5/cgroup.conf.5 +++ b/doc/man/man5/cgroup.conf.5 @@ -1,5 +1,4 @@ -.TH "cgroup.conf" "5" "July 2013" "cgroup.conf 2.6" \ -"Slurm cgroup configuration file" +.TH "cgroup.conf" "5" "Slurm Configuration File" "April 2015" "Slurm Configuration File" .SH "NAME" cgroup.conf \- Slurm configuration file for the cgroup support diff --git a/doc/man/man5/cray.conf.5 b/doc/man/man5/cray.conf.5 index d69793823..713c939e4 100644 --- a/doc/man/man5/cray.conf.5 +++ b/doc/man/man5/cray.conf.5 @@ -1,4 +1,4 @@ -.TH "cray.conf" "5" "December 2013" "cray.conf 2.6" "Slurm configuration file" +.TH "cray.conf" "5" "Slurm Configuration File" "April 2015" "Slurm Configuration File" .SH "NAME" cray.conf \- Slurm configuration file for the Cray\-specific information @@ -82,7 +82,6 @@ The default value is NULL, which will load the user name from the \fImy.cnf\fR f \fBSubAllocate\fR=Yes Only allocate requested node resources instead of the whole node. In both cases the user will be charged for the entire node. -This is the Slurm <=2.5 behavior. .TP \fBSyncTimeout\fR=<seconds> diff --git a/doc/man/man5/ext_sensors.conf.5 b/doc/man/man5/ext_sensors.conf.5 index 46a94558a..086cf257f 100644 --- a/doc/man/man5/ext_sensors.conf.5 +++ b/doc/man/man5/ext_sensors.conf.5 @@ -1,5 +1,4 @@ -.TH "ext_sensors.conf" "5" "February 2013" "ext_sensors.conf 2.2" \ -"Slurm external sensors plugin configuration file" +.TH "ext_sensors.conf" "5" "Slurm Configuration File" "April 2015" "Slurm Configuration File" .SH "NAME" ext_sensors.conf \- Slurm configuration file for the external sensors plugin @@ -111,4 +110,4 @@ details. .SH "SEE ALSO" .LP -\fBslurm.conf\fR(5) +\fBslurm.conf\fR(5) diff --git a/doc/man/man5/gres.conf.5 b/doc/man/man5/gres.conf.5 index f8b2a0002..01c4531f2 100644 --- a/doc/man/man5/gres.conf.5 +++ b/doc/man/man5/gres.conf.5 @@ -1,4 +1,5 @@ -.TH "gres.conf" "5" "April 2014" "gres.conf 14.11" "Slurm configuration file" +.TH "gres.conf" "5" "Slurm Configuration File" "April 2015" "Slurm Configuration File" + .SH "NAME" gres.conf \- Slurm configuration file for generic resource management. diff --git a/doc/man/man5/nonstop.conf.5 b/doc/man/man5/nonstop.conf.5 index a9b36c467..3eeab3cce 100644 --- a/doc/man/man5/nonstop.conf.5 +++ b/doc/man/man5/nonstop.conf.5 @@ -1,4 +1,5 @@ -.TH "nonstop.conf" "5" "February 2014" "nonstop.conf 14.03" "Slurm configuration file" +.TH "nonstop.conf" "5" "Slurm Configuration File" "April 2015" "Slurm Configuration File" + .SH "NAME" nonstop.conf \- Slurm configuration file for fault-tolerant computing. @@ -69,7 +70,7 @@ If a job requires replacement resources and none are immediately available, then permit a job to extend its time limit by the length of time required to secure replacement resources up to the number of minutes specified by \fBTimeLimitDelay\fR. -This option will only take effect if no hot spare resouces are available at +This option will only take effect if no hot spare resources are available at the time replacement resources are requested. This time limit extension is in addition to the value calculated using the \fBTimeLimitExtend\fR. diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index c15aa2871..9a9264eca 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1,4 +1,4 @@ -.TH "slurm.conf" "5" "May 2014" "slurm.conf 14.11" "Slurm configuration file" +.TH "slurm.conf" "5" "Slurm Configuration File" "April 2015" "Slurm Configuration File" .SH "NAME" slurm.conf \- Slurm configuration file @@ -939,7 +939,7 @@ or "jobacct_gather/cgroup" must be configured. \fBNOTE:\fR Changing this configuration parameter changes the contents of the messages between Slurm daemons. Any previously running job steps are managed by a slurmstepd daemon that will persist through the lifetime of -that job step and not change it's communication prototol. Only change this +that job step and not change it's communication protocol. Only change this configuration parameter when there are no running job steps. .TP @@ -1282,7 +1282,7 @@ May not exceed 65533. \fBMemLimitEnforce\fR If set to "no" then Slurm will not terminate the job or the job step if they exceeds the value requested using the --mem-per-cpu option of -salloc/sbatch/srun. This is usefull if jobs need to specify --mem-per-cpu +salloc/sbatch/srun. This is useful if jobs need to specify --mem-per-cpu for scheduling but they should not be terminate if they exceed the estimated value. The default value is 'yes', terminate the job/step if exceed the requested memory. @@ -1866,18 +1866,21 @@ the definitions in the slurm.conf file. .TP \fBRequeueExit\fR Enables automatic job requeue for jobs which exit with the specified values. -Separate multiple exit code by a comma. Jobs will be put back in to pending -state and later scheduled again. Restarted jobs will have the environment -variable \fBSLURM_RESTART_COUNT\fP set to the number of times the job has been -restarted. +Separate multiple exit code by a comma and/or specify numeric ranges using a +"\-" separator (e.g. "RequeueExit=1\-9,18") +Jobs will be put back in to pending state and later scheduled again. +Restarted jobs will have the environment variable \fBSLURM_RESTART_COUNT\fP +set to the number of times the job has been restarted. .TP \fBRequeueExitHold\fR Enables automatic requeue of jobs into pending state in hold, meaning their -priority is zero. Separate multiple exit code by a comma. These jobs are put -in the \fBJOB_SPECIAL_EXIT\fP exit state. Restarted jobs will have the -environment variable \fBSLURM_RESTART_COUNT\fP set to the number of times the -job has been restarted. +priority is zero. +Separate multiple exit code by a comma and/or specify numeric ranges using a +"\-" separator (e.g. "RequeueExitHold=10\-12,16") +These jobs are put in the \fBJOB_SPECIAL_EXIT\fP exit state. +Restarted jobs will have the environment variable \fBSLURM_RESTART_COUNT\fP +set to the number of times the job has been restarted. .TP \fBResumeProgram\fR @@ -2099,7 +2102,10 @@ This option applies only to \fBSchedulerType=sched/backfill\fR. .TP \fBbf_min_age_reserve=#\fR The backfill scheduler will not reserve resources for pending jobs until they -have been pending for at least the specified number of seconds. +have been runnable for at least the specified number of seconds. +In addition jobs waiting for less than the specified number of seconds will +not prevent a newly submitted job from starting immediately, even if the newly +submitted job has a lower priority. This can be valuable if jobs lack time limits or all time limits have the same value. The default value is zero, which will reserve resources for any pending job @@ -2179,6 +2185,15 @@ on such hardware, Slurm will consider each NUMA node within the socket as a separate socket by default. Use the Ignore_NUMA option to report the correct socket count, but \fBnot\fR optimize resource allocations on the NUMA nodes. .TP +\fBinventory_interval=#\fR +On a Cray system using Slurm on top of ALPS this limits the amount of times +a Basil Inventory call is made. Normally this call happens every scheduling +consideration to attempt to close a node state change window with respects to +what ALPS has. This call is rather slow, so making it less frequently improves +performance dramatically, but in the situation where a node changes state the +window is as large as this setting. In an HTC environment this setting is a +must and we advise around 10 seconds. +.TP \fBkill_invalid_depend\fR If a job has an invalid dependency and it can never run terminate it and set its state to be JOB_CANCELLED. By default the job stays pending @@ -2250,6 +2265,11 @@ held so that the scheduler can dispatch it to another host. How frequently, in seconds, the main scheduling loop will execute and test all pending jobs. The default value is 60 seconds. +.TP +\fBsched_max_job_start=#\fR +The maximum number of jobs that the main scheduling logic will start in any +single execution. +The default value is zero, which imposes no limit. .RE .TP @@ -2459,11 +2479,16 @@ Also see the partition configuration parameter \fBLLN\fR use the least loaded nodes in selected partitions. .TP \fBCR_Pack_Nodes\fR -Rather than evenly distributing a job's tasks evenly across it's allocated -nodes, pack them as tightly as possible on the nodes. For example, a job -starting ten tasks acrosss two nodes each without this option will start -five tasks on each of the two nodes. With this option, eight tasks will be -started on the first node and two on the second node. +If a job allocation contains more resources than will be used for launching +tasks (e.g. if whole nodes are allocated to a job), then rather than evenly +distributing a job's tasks evenly across it's allocated nodes, pack them as +tightly as possible on thsee nodes. +For example, consider a job allocation containing two \fBentire\fR nodes with +eight CPUs each. +If the job starts ten tasks across those two nodes without this option, it will +start five tasks on each of the two nodes. +With this option, eight tasks will be started on the first node and two tasks +on the second node. .TP \fBCR_Socket\fR Sockets are consumable resources. @@ -3097,7 +3122,7 @@ memory limit. For example, if a job's real memory limit is 500MB and VSizeFactor is set to 101 then the job will be killed if its real memory exceeds 500MB or its virtual memory exceeds 505MB (101 percent of the real memory limit). -The default valus is 0, which disables enforcement of virtual memory limits. +The default value is 0, which disables enforcement of virtual memory limits. The value may not exceed 65533 percent. .TP diff --git a/doc/man/man5/slurmdbd.conf.5 b/doc/man/man5/slurmdbd.conf.5 index 720af0630..55844519b 100644 --- a/doc/man/man5/slurmdbd.conf.5 +++ b/doc/man/man5/slurmdbd.conf.5 @@ -1,4 +1,5 @@ -.TH "slurmdbd.conf" "21" "July 2014" "slurmdbd.conf 14.11" "Slurm configuration file" +.TH "slurmdbd.conf" "5" "Slurm Configuration File" "April 2015" "Slurm Configuration File" + .SH "NAME" slurmdbd.conf \- Slurm Database Daemon (SlurmDBD) configuration file @@ -54,7 +55,7 @@ reservation data, no otherwise. Default is no. This script can be executed every time a rollup happens (every hour, day and month), depending on the Purge*After options. This script is used to transfer accounting records out of the database into an archive. It is -used in place of the internal process used to acrhive objects. +used in place of the internal process used to archive objects. The script is executed with a no arguments, The following environment variables are set. .RS @@ -207,7 +208,7 @@ SQL statements/queries when dealing with wckeys in the database. .TP \fBDebugLevel\fR The level of detail to provide the Slurm Database Daemon's logs. -The default valus is \fBinfo\fR. +The default value is \fBinfo\fR. .RS .TP 10 \fBquiet\fR diff --git a/doc/man/man5/topology.conf.5 b/doc/man/man5/topology.conf.5 index b15577f23..2aea3f139 100644 --- a/doc/man/man5/topology.conf.5 +++ b/doc/man/man5/topology.conf.5 @@ -1,4 +1,4 @@ -.TH "topology.conf" "5" "December 2011" "topology.conf 2.0" "Slurm configuration file" +.TH "topology.conf" "5" "Slurm Configuration File" "April 2015" "Slurm Configuration File" .SH "NAME" topology.conf \- Slurm configuration file for defining the network topology diff --git a/doc/man/man5/wiki.conf.5 b/doc/man/man5/wiki.conf.5 index b700ee485..96bfa481b 100644 --- a/doc/man/man5/wiki.conf.5 +++ b/doc/man/man5/wiki.conf.5 @@ -1,4 +1,5 @@ -.TH "wiki.conf" "5" "December 2007" "wiki.conf 2.0" "Slurm configuration file" +.TH "wiki.conf" "5" "Slurm Configuration File" "April 2015" "Slurm Configuration File" + .SH "NAME" wiki.conf \- Slurm configuration file for wiki and wiki2 scheduler plugins .SH "DESCRIPTION" diff --git a/doc/man/man8/slurmctld.8 b/doc/man/man8/slurmctld.8 index 85a5e58f2..7e62ec694 100644 --- a/doc/man/man8/slurmctld.8 +++ b/doc/man/man8/slurmctld.8 @@ -1,4 +1,5 @@ -.TH SLURMCTLD "8" "March 2010" "slurmctld 2.2" "Slurm components" +.TH slurmctld "8" "Slurm Daemon" "April 2015" "Slurm Daemon" + .SH "NAME" slurmctld \- The central management daemon of Slurm. .SH "SYNOPSIS" diff --git a/doc/man/man8/slurmd.8 b/doc/man/man8/slurmd.8 index bd9c96555..7b497bd30 100644 --- a/doc/man/man8/slurmd.8 +++ b/doc/man/man8/slurmd.8 @@ -1,4 +1,4 @@ -.TH SLURMD "8" "March 2012" "slurmd 2.4" "Slurm components" +.TH slurmd "8" "Slurm Daemon" "April 2015" "Slurm Daemon" .SH "NAME" slurmd \- The compute node daemon for SLURM. diff --git a/doc/man/man8/slurmdbd.8 b/doc/man/man8/slurmdbd.8 index e78e0de9b..2b7ef3c9e 100644 --- a/doc/man/man8/slurmdbd.8 +++ b/doc/man/man8/slurmdbd.8 @@ -1,4 +1,5 @@ -.TH slurmdbd "8" "March 2010" "slurmdbd 2.2" "Slurm components" +.TH slurmdbd "8" "Slurm Daemon" "April 2015" "Slurm Daemon" + .SH "NAME" slurmdbd \- Slurm Database Daemon. diff --git a/doc/man/man8/slurmstepd.8 b/doc/man/man8/slurmstepd.8 index cbe761443..d36dc49e0 100644 --- a/doc/man/man8/slurmstepd.8 +++ b/doc/man/man8/slurmstepd.8 @@ -1,4 +1,5 @@ -.TH SLURMSTEPD "8" "September 2006" "slurmstepd 2.0" "Slurm components" +.TH slurmstepd "8" "Slurm Component" "April 2015" "Slurm Component" + .SH "NAME" slurmstepd \- The job step manager for SLURM. .SH "SYNOPSIS" @@ -10,6 +11,7 @@ and terminates when the job step does. It is responsible for managing input and output (stdin, stdout and stderr) for the job step along with its accounting and signal processing. \fBslurmstepd\fR should not be initiated by users or system administrators. + .SH "COPYING" Copyright (C) 2006 The Regents of the University of California. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). diff --git a/doc/man/man8/spank.8 b/doc/man/man8/spank.8 index e3b0896c5..62c5dfdc2 100644 --- a/doc/man/man8/spank.8 +++ b/doc/man/man8/spank.8 @@ -1,4 +1,4 @@ -.TH "SPANK" "8" "December 2014" "SPANK" "SLURM plug\-in architecture for Node and job (K)control" +.TH SPANK "8" "Slurm Component" "April 2015" "Slurm Component" .SH "NAME" \fBSPANK\fR \- SLURM Plug\-in Architecture for Node and job (K)control @@ -18,7 +18,8 @@ the \fBSPANK\fR infrastructure provides administrators and other developers a low cost, low effort ability to dynamically modify the runtime behavior of SLURM job launch. .LP -\fBNote\fR: \fBSPANK\fR plugins must be recompiled for each new version. +\fBNote\fR: \fBSPANK\fR plugins using the Slurm APIs need to be recompiled when +upgrading Slurm to a new major release. .LP .SH "SPANK PLUGINS" @@ -181,8 +182,11 @@ are (int *, char ***). See \fBspank.h\fR for more details, and \fBEXAMPLES\fR below for an example of \fBspank_get_item\fR usage. .LP -\fBSPANK\fR plugins may also use the \fBspank_getenv\fR, -\fBspank_setenv\fR, and \fBspank_unsetenv\fR functions to +\fBSPANK\fR functions in the \fBlocal\fB and \fBallocator\fR environment should +use the \fBgetenv\fR, \fBsetenv\fR, and \fBunsetenv\fR functions to view and +modify the job's environment. +\fBSPANK\fR functions in the \fBremote\fR environment should use the +\fBspank_getenv\fR, \fBspank_setenv\fR, and \fBspank_unsetenv\fR functions to view and modify the job's environment. \fBspank_getenv\fR searches the job's environment for the environment variable \fIvar\fR and copies the current value into a buffer \fIbuf\fR diff --git a/slurm.spec b/slurm.spec index 4b67bbfcc..146524383 100644 --- a/slurm.spec +++ b/slurm.spec @@ -16,7 +16,7 @@ # --with cray %_with_cray 1 build for a Cray system without ALPS # --with cray_alps %_with_cray_alps 1 build for a Cray system with ALPS # --with cray_network %_with_cray_network 1 build for a non-Cray system with a Cray network -# --with debug %_with_debug 1 enable extra debugging within Slurm +# --without debug %_without_debug 1 don't compile with debugging symbols # --with lua %_with_lua 1 build Slurm lua bindings (proctrack only for now) # --without munge %_without_munge 1 don't build auth-munge RPM # --with mysql %_with_mysql 1 require mysql support @@ -45,7 +45,6 @@ %slurm_without_opt cray %slurm_without_opt cray_alps %slurm_without_opt cray_network -%slurm_without_opt debug %slurm_without_opt sun_const %slurm_without_opt salloc_background %slurm_without_opt multiple_slurmd @@ -65,6 +64,9 @@ # Use readline by default on all systems %slurm_with_opt readline +# Use debug by default on all systems +%slurm_with_opt debug + # Build with PAM by default on linux %ifos linux %slurm_with_opt pam @@ -414,7 +416,7 @@ Gives the ability for Slurm to use Berkeley Lab Checkpoint/Restart %build %configure \ - %{?slurm_with_debug:--enable-debug} \ + %{!?slurm_with_debug:--disable-debug} \ %{?slurm_with_partial_attach:--enable-partial-attach} \ %{?slurm_with_sun_const:--enable-sun-const} \ %{?with_db2_dir:--with-db2-dir=%{?with_db2_dir}} \ @@ -581,10 +583,10 @@ test -f $RPM_BUILD_ROOT/etc/init.d/slurm && echo /etc/init.d/slurm >> $LIST test -f $RPM_BUILD_ROOT/usr/sbin/rcslurm && echo /usr/sbin/rcslurm >> $LIST -test -f $RPM_BUILD_ROOT/lib/systemd/system/slurmctld.service && - echo /lib/systemd/system/slurmctld.service >> $LIST -test -f $RPM_BUILD_ROOT/lib/systemd/system/slurmd.service && - echo /lib/systemd/system/slurmd.service >> $LIST +test -f $RPM_BUILD_ROOT/usr/lib/systemd/system/slurmctld.service && + echo /usr/lib/systemd/system/slurmctld.service >> $LIST +test -f $RPM_BUILD_ROOT/usr/lib/systemd/system/slurmd.service && + echo /usr/lib/systemd/system/slurmd.service >> $LIST test -f $RPM_BUILD_ROOT/opt/modulefiles/slurm/%{version}-%{release} && echo /opt/modulefiles/slurm/%{version}-%{release} >> $LIST @@ -656,8 +658,8 @@ test -f $RPM_BUILD_ROOT/etc/init.d/slurmdbd && echo /etc/init.d/slurmdbd >> $LIST test -f $RPM_BUILD_ROOT/usr/sbin/rcslurmdbd && echo /usr/sbin/rcslurmdbd >> $LIST -test -f $RPM_BUILD_ROOT/lib/systemd/system/slurmdbd.service && - echo /lib/systemd/system/slurmdbd.service >> $LIST +test -f $RPM_BUILD_ROOT/usr/lib/systemd/system/slurmdbd.service && + echo /usr/lib/systemd/system/slurmdbd.service >> $LIST LIST=./sql.files touch $LIST diff --git a/src/common/cbuf.c b/src/common/cbuf.c index e847eb009..df00baf94 100644 --- a/src/common/cbuf.c +++ b/src/common/cbuf.c @@ -14,15 +14,15 @@ * Software Foundation; either version 2 of the License, or (at your option) * any later version. * - * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under - * certain conditions as described in each individual source file, and - * distribute linked combinations including the two. You must obey the GNU - * General Public License in all respects for all of the code used other than - * OpenSSL. If you modify file(s) with this exception, you may extend this - * exception to your version of the file(s), but you are not obligated to do + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do * so. If you do not wish to do so, delete this exception statement from your - * version. If you delete this exception statement from all source files in + * version. If you delete this exception statement from all source files in * the program, then also delete it here. * * LSD-Tools is distributed in the hope that it will be useful, but WITHOUT @@ -712,7 +712,8 @@ cbuf_peek_line (cbuf_t src, char *dstbuf, int len, int lines) if (m > 0) { pdst = dstbuf; l = cbuf_reader(src, m, (cbuf_iof) cbuf_put_mem, &pdst); - assert(l == m); + if (l) + assert(l == m); } assert(m < len); dstbuf[m] = '\0'; @@ -748,7 +749,8 @@ cbuf_read_line (cbuf_t src, char *dstbuf, int len, int lines) if (m > 0) { pdst = dstbuf; l = cbuf_reader(src, m, (cbuf_iof) cbuf_put_mem, &pdst); - assert(l == m); + if (l) + assert(l == m); } assert(m < len); dstbuf[m] = '\0'; @@ -788,7 +790,8 @@ cbuf_replay_line (cbuf_t src, char *dstbuf, int len, int lines) if (m > 0) { pdst = dstbuf; l = cbuf_replayer(src, m, (cbuf_iof) cbuf_put_mem, &pdst); - assert(l == m); + if (l) + assert(l == m); } /* Append newline if needed and space allows. */ @@ -895,7 +898,8 @@ cbuf_write_line (cbuf_t dst, char *srcbuf, int *ndropped) */ if (ncopy > 0) { n = cbuf_writer(dst, ncopy, (cbuf_iof) cbuf_get_mem, &psrc, &d); - assert(n == ncopy); + if (n) + assert(n == ncopy); ndrop += d; } /* Append newline if needed. diff --git a/src/common/gres.c b/src/common/gres.c index 146edc5b8..305991875 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -1628,11 +1628,23 @@ extern int _node_config_validate(char *node_name, char *orig_config, context_ptr->gres_type, node_name, gres_data->gres_cnt_found, gres_cnt); } - gres_data->gres_cnt_found = gres_cnt; - updated_config = true; + if ((gres_data->gres_cnt_found != NO_VAL) && + (gres_data->gres_cnt_alloc != 0)) { + if (reason_down && (*reason_down == NULL)) { + xstrfmtcat(*reason_down, + "%s count changed and jobs are " + "using them (%u != %u)", + context_ptr->gres_type, + gres_data->gres_cnt_found, gres_cnt); + } + rc = EINVAL; + } else { + gres_data->gres_cnt_found = gres_cnt; + updated_config = true; + } } if (updated_config == false) - return SLURM_SUCCESS; + return rc; if ((set_cnt == 0) && (set_cnt != gres_data->topo_cnt)) { /* Need to clear topology info */ diff --git a/src/common/layouts_mgr.c b/src/common/layouts_mgr.c index 857f82ced..3fffa6459 100644 --- a/src/common/layouts_mgr.c +++ b/src/common/layouts_mgr.c @@ -86,12 +86,12 @@ static void layouts_conf_spec_free(void* x) /* * layout ops - operations associated to layout plugins * - * This struct is populated while opening the plugin and linking the + * This struct is populated while opening the plugin and linking the * associated symbols. See layout_syms description for the name of the "public" * symbols associated to this structure fields. * * Notes : the layouts plugins are able to access the entities hashtable in order - * to read/create/modify entities as necessary during the load_entities and + * to read/create/modify entities as necessary during the load_entities and * build_layout API calls. * */ @@ -141,10 +141,10 @@ static void _layout_plugins_destroy(layout_plugin_t *lp) { * like the key str itself and custom destroy/dump functions. * * The layouts manager keeps an hash table of the various keydefs and use - * the factorized details while parsing the configuration and creating the + * the factorized details while parsing the configuration and creating the * entity_data_t structs associated to the entities. * - * Note custom_* functions are used if they are not NULL* and type equals + * Note custom_* functions are used if they are not NULL* and type equals * L_T_CUSTOM */ typedef struct layouts_keydef_st { @@ -166,7 +166,7 @@ static const char* layouts_keydef_idfunc(void* item) } /* - * layouts_mgr_t - the main structure holding all the layouts, entities and + * layouts_mgr_t - the main structure holding all the layouts, entities and * shared keydefs as well as conf elements and plugins details. */ typedef struct layouts_mgr_st { @@ -326,7 +326,8 @@ static void _entity_add_data(entity_t* e, const char* key, void* data) freefunc = hkey->custom_destroy; } rc = entity_add_data(e, hkey->key, data, freefunc); - xassert(rc); + if (rc) + xassert(rc); } /*****************************************************************************\ @@ -413,7 +414,8 @@ static int _slurm_layouts_init_layouts_walk_helper(void* x, void* arg) layout_init(plugin->layout, spec->name, spec->type, 0, plugin->ops->spec->struct_type); inserted_item = xhash_add(mgr->layouts, plugin->layout); - xassert(inserted_item == plugin->layout); + if (inserted_item) + xassert(inserted_item == plugin->layout); _slurm_layouts_init_keydef(mgr->keydefs, plugin->ops->spec->keyspec, plugin); @@ -668,9 +670,11 @@ static int _layouts_read_config_post(layout_plugin_t* plugin, } xfree(root_nodename); root_node = xtree_add_child(tree, NULL, e, XTREE_APPEND); - xassert(root_node); + if (root_node) + xassert(root_node); inserted_node = list_append(e->nodes, root_node); - xassert(inserted_node == root_node); + if (inserted_node) + xassert(inserted_node == root_node); break; } return SLURM_SUCCESS; @@ -745,7 +749,7 @@ static int _layouts_read_config(layout_plugin_t* plugin) "skipping...", i); continue; } - + /* look for the entity in the entities hash table*/ e = xhash_get(mgr->entities, e_name); if (!e) { @@ -830,7 +834,7 @@ static int _layouts_read_config(layout_plugin_t* plugin) goto cleanup; } } - + rc = SLURM_SUCCESS; cleanup: @@ -891,12 +895,14 @@ uint8_t _layouts_build_xtree_walk(xtree_node_t* node, } free(enclosed_name); enclosed_node = xtree_add_child(p->tree, node, - enclosed_e, + enclosed_e, XTREE_APPEND); - xassert(enclosed_node); + if (enclosed_node) + xassert(enclosed_node); inserted_node = list_append(enclosed_e->nodes, enclosed_node); - xassert(inserted_node == enclosed_node); + if (inserted_node) + xassert(inserted_node == enclosed_node); } hostlist_destroy(enclosed_hostlist); } @@ -1169,7 +1175,7 @@ int slurm_layouts_load_config(void) /* init entity structure on the heap */ entity = (entity_t*) xmalloc(sizeof(struct entity_st)); - entity_init(entity, node_ptr->name, 0); + entity_init(entity, node_ptr->name, 0); entity->ptr = node_ptr; /* add to mgr entity hashtable */ diff --git a/src/common/pack.c b/src/common/pack.c index ff221ab03..b925fe71d 100644 --- a/src/common/pack.c +++ b/src/common/pack.c @@ -100,7 +100,7 @@ Buf create_buf(char *data, int size) Buf my_buf; if (size > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, size, MAX_BUF_SIZE); return NULL; } @@ -126,7 +126,7 @@ void free_buf(Buf my_buf) void grow_buf (Buf buffer, int size) { if ((buffer->size + size) > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + size), MAX_BUF_SIZE); return; } @@ -141,7 +141,7 @@ Buf init_buf(int size) Buf my_buf; if (size > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, size, MAX_BUF_SIZE); return NULL; } @@ -177,7 +177,7 @@ void pack_time(time_t val, Buf buffer) if (remaining_buf(buffer) < sizeof(n64)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; @@ -223,7 +223,7 @@ void packdouble(double val, Buf buffer) nl = HTON_uint64(uval.u); if (remaining_buf(buffer) < sizeof(nl)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; @@ -271,7 +271,7 @@ void pack64(uint64_t val, Buf buffer) if (remaining_buf(buffer) < sizeof(nl)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; @@ -310,7 +310,7 @@ void pack32(uint32_t val, Buf buffer) if (remaining_buf(buffer) < sizeof(nl)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; @@ -436,7 +436,7 @@ void pack16(uint16_t val, Buf buffer) if (remaining_buf(buffer) < sizeof(ns)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; @@ -474,7 +474,7 @@ void pack8(uint8_t val, Buf buffer) { if (remaining_buf(buffer) < sizeof(uint8_t)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; @@ -511,13 +511,13 @@ void packmem(char *valp, uint32_t size_val, Buf buffer) uint32_t ns = htonl(size_val); if (size_val > MAX_PACK_MEM_LEN) { - error("%s: Buffer to be packed is too large (%u > %d)", + error("%s: Buffer to be packed is too large (%u > %u)", __func__, size_val, MAX_PACK_MEM_LEN); return; } if (remaining_buf(buffer) < (sizeof(ns) + size_val)) { if ((buffer->size + size_val + BUF_SIZE) > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + size_val + BUF_SIZE), MAX_BUF_SIZE); return; @@ -556,7 +556,7 @@ int unpackmem_ptr(char **valp, uint32_t * size_valp, Buf buffer) buffer->processed += sizeof(ns); if (*size_valp > MAX_PACK_MEM_LEN) { - error("%s: Buffer to be unpacked is too large (%u > %d)", + error("%s: Buffer to be unpacked is too large (%u > %u)", __func__, *size_valp, MAX_PACK_MEM_LEN); return SLURM_ERROR; } @@ -591,7 +591,7 @@ int unpackmem(char *valp, uint32_t * size_valp, Buf buffer) buffer->processed += sizeof(ns); if (*size_valp > MAX_PACK_MEM_LEN) { - error("%s: Buffer to be unpacked is too large (%u > %d)", + error("%s: Buffer to be unpacked is too large (%u > %u)", __func__, *size_valp, MAX_PACK_MEM_LEN); return SLURM_ERROR; } @@ -626,7 +626,7 @@ int unpackmem_xmalloc(char **valp, uint32_t * size_valp, Buf buffer) buffer->processed += sizeof(ns); if (*size_valp > MAX_PACK_MEM_LEN) { - error("%s: Buffer to be unpacked is too large (%u > %d)", + error("%s: Buffer to be unpacked is too large (%u > %u)", __func__, *size_valp, MAX_PACK_MEM_LEN); return SLURM_ERROR; } @@ -662,7 +662,7 @@ int unpackmem_malloc(char **valp, uint32_t * size_valp, Buf buffer) *size_valp = ntohl(ns); buffer->processed += sizeof(ns); if (*size_valp > MAX_PACK_MEM_LEN) { - error("%s: Buffer to be unpacked is too large (%u > %d)", + error("%s: Buffer to be unpacked is too large (%u > %u)", __func__, *size_valp, MAX_PACK_MEM_LEN); return SLURM_ERROR; } @@ -694,7 +694,7 @@ void packstr_array(char **valp, uint32_t size_val, Buf buffer) if (remaining_buf(buffer) < sizeof(ns)) { if ((buffer->size + BUF_SIZE) > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + BUF_SIZE), MAX_BUF_SIZE); return; @@ -734,7 +734,7 @@ int unpackstr_array(char ***valp, uint32_t * size_valp, Buf buffer) buffer->processed += sizeof(ns); if (*size_valp > MAX_PACK_ARRAY_LEN) { - error("%s: Buffer to be unpacked is too large (%u > %d)", + error("%s: Buffer to be unpacked is too large (%u > %u)", __func__, *size_valp, MAX_PACK_ARRAY_LEN); return SLURM_ERROR; } @@ -759,7 +759,7 @@ void packmem_array(char *valp, uint32_t size_val, Buf buffer) { if (remaining_buf(buffer) < size_val) { if ((buffer->size + size_val + BUF_SIZE) > MAX_BUF_SIZE) { - error("%s: Buffer size limit exceeded (%d > %d)", + error("%s: Buffer size limit exceeded (%u > %u)", __func__, (buffer->size + size_val + BUF_SIZE), MAX_BUF_SIZE); return; diff --git a/src/common/plugstack.c b/src/common/plugstack.c index 1a2a4eabe..8ec118869 100644 --- a/src/common/plugstack.c +++ b/src/common/plugstack.c @@ -553,7 +553,7 @@ static int _spank_stack_load(struct spank_stack *stack, const char *path) * Try to open plugstack.conf. A missing config file is not an * error, but is equivalent to an empty file. */ - if (!(fp = safeopen(path, "r", SAFEOPEN_NOCREATE))) { + if (!(fp = safeopen(path, "r", SAFEOPEN_NOCREATE|SAFEOPEN_LINK_OK))) { if (errno == ENOENT) return (0); error("spank: Failed to open %s: %m", path); diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index c8a4f2576..95a7770ed 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -1330,9 +1330,11 @@ extern char *rpc_num2string(uint16_t opcode); int rc; \ while (remaining > 0) { \ rc = read(fd, ptr, remaining); \ - if ((rc == 0) && (remaining == size)) \ + if ((rc == 0) && (remaining == size)) { \ + debug("%s:%d: %s: safe_read EOF", \ + __FILE__, __LINE__, __CURRENT_FUNC__); \ goto rwfail; \ - else if (rc == 0) { \ + } else if (rc == 0) { \ debug("%s:%d: %s: safe_read (%d of %d) EOF", \ __FILE__, __LINE__, __CURRENT_FUNC__, \ remaining, (int)size); \ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 0f2cda4b8..a8fcc8747 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -3335,15 +3335,8 @@ _unpack_node_info_msg(node_info_msg_t ** msg, Buf buffer, safe_unpack32(&((*msg)->node_scaling), buffer); safe_unpack_time(&((*msg)->last_update), buffer); - if (protocol_version == SLURM_PROTOCOL_VERSION) { - node = (*msg)->node_array = - xmalloc_nz(sizeof(node_info_t) * - (*msg)->record_count); - } else { - node = (*msg)->node_array = - xmalloc(sizeof(node_info_t) * - (*msg)->record_count); - } + node = (*msg)->node_array = + xmalloc(sizeof(node_info_t) * (*msg)->record_count); /* load individual job info */ for (i = 0; i < (*msg)->record_count; i++) { @@ -4655,15 +4648,8 @@ _unpack_job_step_info_response_msg(job_step_info_response_msg_t** msg, safe_unpack_time(&(*msg)->last_update, buffer); safe_unpack32(&(*msg)->job_step_count, buffer); - if (protocol_version == SLURM_PROTOCOL_VERSION) { - step = (*msg)->job_steps = - xmalloc_nz(sizeof(job_step_info_t) * - (*msg)->job_step_count); - } else { - step = (*msg)->job_steps = - xmalloc(sizeof(job_step_info_t) * - (*msg)->job_step_count); - } + step = (*msg)->job_steps = xmalloc(sizeof(job_step_info_t) * + (*msg)->job_step_count); for (i = 0; i < (*msg)->job_step_count; i++) if (_unpack_job_step_info_members(&step[i], buffer, @@ -4704,15 +4690,8 @@ _unpack_job_info_msg(job_info_msg_t ** msg, Buf buffer, safe_unpack32(&((*msg)->record_count), buffer); safe_unpack_time(&((*msg)->last_update), buffer); - if (protocol_version == SLURM_PROTOCOL_VERSION) { - job = (*msg)->job_array = - xmalloc_nz(sizeof(job_info_t) * - (*msg)->record_count); - } else { - job = (*msg)->job_array = - xmalloc(sizeof(job_info_t) * - (*msg)->record_count); - } + job = (*msg)->job_array = xmalloc(sizeof(job_info_t) * + (*msg)->record_count); /* load individual job info */ for (i = 0; i < (*msg)->record_count; i++) { if (_unpack_job_info_members(&job[i], buffer, @@ -9558,13 +9537,8 @@ extern int slurm_unpack_block_info_msg( safe_unpack32(&(buf->record_count), buffer); safe_unpack_time(&(buf->last_update), buffer); - if (protocol_version == SLURM_PROTOCOL_VERSION) { - buf->block_array = xmalloc_nz(sizeof(block_info_t) * - buf->record_count); - } else { - buf->block_array = xmalloc(sizeof(block_info_t) * - buf->record_count); - } + buf->block_array = xmalloc(sizeof(block_info_t) * + buf->record_count); for (i=0; i<buf->record_count; i++) { if (slurm_unpack_block_info_members( diff --git a/src/common/xcgroup_read_config.c b/src/common/xcgroup_read_config.c index 151d71355..6e388106b 100644 --- a/src/common/xcgroup_read_config.c +++ b/src/common/xcgroup_read_config.c @@ -123,6 +123,7 @@ static void conf_get_float (s_p_hashtbl_t *t, char *name, float *fp) return; if (str_to_float (str, fp) < 0) fatal ("cgroup.conf: Invalid value '%s' for %s", str, name); + xfree(str); } /* diff --git a/src/common/xstring.c b/src/common/xstring.c index 79b1aed45..2e367d92b 100644 --- a/src/common/xstring.c +++ b/src/common/xstring.c @@ -119,7 +119,8 @@ static void makespace(char **str, int needed) xrealloc(*str, new_size); actual_size = xsize(*str); - xassert(actual_size == new_size); + if (actual_size) + xassert(actual_size == new_size); } } } @@ -348,8 +349,8 @@ char * xbasename(char *path) */ char * xstrdup(const char *str) { - size_t siz, - rsiz; + size_t siz; + size_t rsiz; char *result; if (str == NULL) { @@ -359,8 +360,8 @@ char * xstrdup(const char *str) result = (char *)xmalloc(siz); rsiz = strlcpy(result, str, siz); - - xassert(rsiz == siz-1); + if (rsiz) + xassert(rsiz == siz-1); return result; } diff --git a/src/database/mysql_common.c b/src/database/mysql_common.c index 4cb13b8ca..4f55b5af9 100644 --- a/src/database/mysql_common.c +++ b/src/database/mysql_common.c @@ -251,6 +251,7 @@ static int _mysql_make_table_current(mysql_conn_t *mysql_conn, char *table_name, if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) { xfree(query); xfree(old_index); + FREE_NULL_LIST(keys_list); return SLURM_ERROR; } xfree(query); @@ -762,6 +763,25 @@ extern int mysql_db_query(mysql_conn_t *mysql_conn, char *query) return rc; } +/* + * Executes a single delete sql query. + * Returns the number of deleted rows, <0 for failure. + */ +extern int mysql_db_delete_affected_rows(mysql_conn_t *mysql_conn, char *query) +{ + int rc = SLURM_SUCCESS; + + if (!mysql_conn || !mysql_conn->db_conn) { + fatal("You haven't inited this storage yet."); + return 0; /* For CLANG false positive */ + } + slurm_mutex_lock(&mysql_conn->lock); + if (!(rc = _mysql_query_internal(mysql_conn->db_conn, query))) + rc = mysql_affected_rows(mysql_conn->db_conn); + slurm_mutex_unlock(&mysql_conn->lock); + return rc; +} + extern int mysql_db_ping(mysql_conn_t *mysql_conn) { int rc; diff --git a/src/database/mysql_common.h b/src/database/mysql_common.h index 054bfd21a..e3adb3ccf 100644 --- a/src/database/mysql_common.h +++ b/src/database/mysql_common.h @@ -104,6 +104,7 @@ extern int mysql_db_get_db_connection(mysql_conn_t *mysql_conn, char *db_name, extern int mysql_db_close_db_connection(mysql_conn_t *mysql_conn); extern int mysql_db_cleanup(); extern int mysql_db_query(mysql_conn_t *mysql_conn, char *query); +extern int mysql_db_delete_affected_rows(mysql_conn_t *mysql_conn, char *query); extern int mysql_db_ping(mysql_conn_t *mysql_conn); extern int mysql_db_commit(mysql_conn_t *mysql_conn); extern int mysql_db_rollback(mysql_conn_t *mysql_conn); diff --git a/src/plugins/accounting_storage/mysql/as_mysql_archive.c b/src/plugins/accounting_storage/mysql/as_mysql_archive.c index e960e1de7..c3d8ad608 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_archive.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_archive.c @@ -48,6 +48,9 @@ #define SLURMDBD_2_5_VERSION 11 /* slurm version 2.5 */ +#define MAX_PURGE_LIMIT 50000 /* Number of records that are purged at a time + so that locks can be periodically released. */ + typedef struct { char *cluster_nodes; char *cpu_count; @@ -472,12 +475,12 @@ static void _pack_local_job(local_job_t *object, packstr(object->name, buffer); packstr(object->nodelist, buffer); packstr(object->node_inx, buffer); - packstr(object->partition, buffer); - packstr(object->priority, buffer); - packstr(object->qos, buffer); - packstr(object->req_cpus, buffer); - packstr(object->req_mem, buffer); - packstr(object->resvid, buffer); + packstr(object->partition, buffer); /* priority */ + packstr(object->priority, buffer); /* qos */ + packstr(object->qos, buffer); /* req_cpus */ + packstr(object->req_cpus, buffer); /* req_mem */ + packstr(object->req_mem, buffer); /* resvid */ + packstr(object->resvid, buffer); /* partition */ packstr(object->start, buffer); packstr(object->state, buffer); packstr(object->submit, buffer); @@ -495,6 +498,21 @@ static int _unpack_local_job(local_job_t *object, { uint32_t tmp32; + /* For protocols <= 14_11, job_req_inx and it's corresponding enum, + * were out of sync. This caused the following variables to have the + * corresponding values: + * job->partition = priority + * job->priority = qos + * job->qos = req_cpus + * job->req_cpus = req_mem + * job->req_mem = resvid + * job->resvid = partition + * + * The values were packed in the above order. To unpack the values + * into the correct variables, the unpacking order is changed to + * accomodate the shift in values. job->partition is unpacked before + * job->start instead of after job->node_inx. */ + if (rpc_version >= SLURM_14_11_PROTOCOL_VERSION) { unpackstr_ptr(&object->account, &tmp32, buffer); unpackstr_ptr(&object->alloc_cpus, &tmp32, buffer); @@ -517,12 +535,12 @@ static int _unpack_local_job(local_job_t *object, unpackstr_ptr(&object->name, &tmp32, buffer); unpackstr_ptr(&object->nodelist, &tmp32, buffer); unpackstr_ptr(&object->node_inx, &tmp32, buffer); - unpackstr_ptr(&object->partition, &tmp32, buffer); unpackstr_ptr(&object->priority, &tmp32, buffer); unpackstr_ptr(&object->qos, &tmp32, buffer); unpackstr_ptr(&object->req_cpus, &tmp32, buffer); unpackstr_ptr(&object->req_mem, &tmp32, buffer); unpackstr_ptr(&object->resvid, &tmp32, buffer); + unpackstr_ptr(&object->partition, &tmp32, buffer); unpackstr_ptr(&object->start, &tmp32, buffer); unpackstr_ptr(&object->state, &tmp32, buffer); unpackstr_ptr(&object->submit, &tmp32, buffer); @@ -550,12 +568,12 @@ static int _unpack_local_job(local_job_t *object, unpackstr_ptr(&object->name, &tmp32, buffer); unpackstr_ptr(&object->nodelist, &tmp32, buffer); unpackstr_ptr(&object->node_inx, &tmp32, buffer); - unpackstr_ptr(&object->partition, &tmp32, buffer); unpackstr_ptr(&object->priority, &tmp32, buffer); unpackstr_ptr(&object->qos, &tmp32, buffer); unpackstr_ptr(&object->req_cpus, &tmp32, buffer); unpackstr_ptr(&object->req_mem, &tmp32, buffer); unpackstr_ptr(&object->resvid, &tmp32, buffer); + unpackstr_ptr(&object->partition, &tmp32, buffer); unpackstr_ptr(&object->start, &tmp32, buffer); unpackstr_ptr(&object->state, &tmp32, buffer); unpackstr_ptr(&object->submit, &tmp32, buffer); @@ -583,11 +601,11 @@ static int _unpack_local_job(local_job_t *object, unpackstr_ptr(&object->name, &tmp32, buffer); unpackstr_ptr(&object->nodelist, &tmp32, buffer); unpackstr_ptr(&object->node_inx, &tmp32, buffer); - unpackstr_ptr(&object->partition, &tmp32, buffer); unpackstr_ptr(&object->priority, &tmp32, buffer); unpackstr_ptr(&object->qos, &tmp32, buffer); unpackstr_ptr(&object->req_cpus, &tmp32, buffer); unpackstr_ptr(&object->resvid, &tmp32, buffer); + unpackstr_ptr(&object->partition, &tmp32, buffer); unpackstr_ptr(&object->start, &tmp32, buffer); unpackstr_ptr(&object->state, &tmp32, buffer); unpackstr_ptr(&object->submit, &tmp32, buffer); @@ -1633,12 +1651,12 @@ static uint32_t _archive_jobs(mysql_conn_t *mysql_conn, char *cluster_name, job.name = row[JOB_REQ_NAME]; job.nodelist = row[JOB_REQ_NODELIST]; job.node_inx = row[JOB_REQ_NODE_INX]; - job.partition = row[JOB_REQ_PARTITION]; - job.priority = row[JOB_REQ_PRIORITY]; - job.qos = row[JOB_REQ_QOS]; - job.req_cpus = row[JOB_REQ_REQ_CPUS]; - job.req_mem = row[JOB_REQ_REQ_MEM]; - job.resvid = row[JOB_REQ_RESVID]; + job.partition = row[JOB_REQ_PARTITION]; /* priority */ + job.priority = row[JOB_REQ_PRIORITY]; /* qos */ + job.qos = row[JOB_REQ_QOS]; /* cpus_req */ + job.req_cpus = row[JOB_REQ_REQ_CPUS]; /* mem_req */ + job.req_mem = row[JOB_REQ_REQ_MEM]; /* id_resv */ + job.resvid = row[JOB_REQ_RESVID]; /* partition */ job.start = row[JOB_REQ_START]; job.state = row[JOB_REQ_STATE]; job.submit = row[JOB_REQ_SUBMIT]; @@ -2232,11 +2250,16 @@ static int _execute_archive(mysql_conn_t *mysql_conn, return rc; } query = xstrdup_printf("delete from \"%s_%s\" where " - "time_start <= %ld && time_end != 0", - cluster_name, event_table, curr_end); + "time_start <= %ld && time_end != 0 " + "LIMIT %d", + cluster_name, event_table, curr_end, + MAX_PURGE_LIMIT); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); - rc = mysql_db_query(mysql_conn, query); + + while ((rc = mysql_db_delete_affected_rows( + mysql_conn, query)) > 0); + xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't remove old event data"); @@ -2269,11 +2292,15 @@ exit_events: return rc; } query = xstrdup_printf("delete from \"%s_%s\" where " - "time_start <= %ld && time_end != 0", - cluster_name, suspend_table, curr_end); + "time_start <= %ld && time_end != 0 " + "LIMIT %d", + cluster_name, suspend_table, curr_end, + MAX_PURGE_LIMIT); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); - rc = mysql_db_query(mysql_conn, query); + + while ((rc = mysql_db_delete_affected_rows( + mysql_conn, query)) > 0); xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't remove old suspend data"); @@ -2307,11 +2334,16 @@ exit_suspend: } query = xstrdup_printf("delete from \"%s_%s\" where " - "time_start <= %ld && time_end != 0", - cluster_name, step_table, curr_end); + "time_start <= %ld && time_end != 0 " + "LIMIT %d", + cluster_name, step_table, curr_end, + MAX_PURGE_LIMIT); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); - rc = mysql_db_query(mysql_conn, query); + + while ((rc = mysql_db_delete_affected_rows( + mysql_conn, query)) > 0); + xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't remove old step data"); @@ -2345,11 +2377,15 @@ exit_steps: query = xstrdup_printf("delete from \"%s_%s\" " "where time_submit <= %ld " - "&& time_end != 0", - cluster_name, job_table, curr_end); + "&& time_end != 0 LIMIT %d", + cluster_name, job_table, curr_end, + MAX_PURGE_LIMIT); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); - rc = mysql_db_query(mysql_conn, query); + + while ((rc = mysql_db_delete_affected_rows( + mysql_conn, query)) > 0); + xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't remove old job data"); @@ -2383,11 +2419,15 @@ exit_jobs: query = xstrdup_printf("delete from \"%s_%s\" " "where time_start <= %ld " - "&& time_end != 0", - cluster_name, resv_table, curr_end); + "&& time_end != 0 LIMIT %d", + cluster_name, resv_table, curr_end, + MAX_PURGE_LIMIT); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); - rc = mysql_db_query(mysql_conn, query); + + while ((rc = mysql_db_delete_affected_rows( + mysql_conn, query)) > 0); + xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't remove old resv data"); diff --git a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c index 1bef5a234..f50b10ba3 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c @@ -890,6 +890,7 @@ static int _modify_unset_users(mysql_conn_t *mysql_conn, list_iterator_destroy(qos_itr); if (delta_itr) list_iterator_destroy(delta_itr); + FREE_NULL_LIST(delta_qos_list); if (list_count(mod_assoc->qos_list) || !list_count(assoc->qos_list)) modified = 1; @@ -2981,6 +2982,7 @@ end_it: if (rc != SLURM_ERROR) { _make_sure_users_have_default(mysql_conn, added_user_list); + FREE_NULL_LIST(added_user_list); if (txn_query) { xstrcat(txn_query, ";"); @@ -3037,8 +3039,7 @@ end_it: list_destroy(assoc_list); } } else { - if (added_user_list) - list_destroy(added_user_list); + FREE_NULL_LIST(added_user_list); xfree(txn_query); reset_mysql_conn(mysql_conn); } diff --git a/src/plugins/accounting_storage/mysql/as_mysql_rollup.c b/src/plugins/accounting_storage/mysql/as_mysql_rollup.c index d4ce6cbb4..2ce6a065f 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_rollup.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_rollup.c @@ -228,13 +228,13 @@ static int _process_cluster_usage(mysql_conn_t *mysql_conn, total_used = c_usage->a_cpu + c_usage->d_cpu + c_usage->pd_cpu; - /* info("We now have (%"PRIu64"+%"PRIu64"+" */ - /* "%"PRIu64")(%"PRIu64") " */ - /* "?= %"PRIu64"", */ - /* c_usage->a_cpu, c_usage->d_cpu, */ - /* c_usage->pd_cpu, total_used, */ - /* c_usage->total_time); */ } + /* info("Cluster %s now has (%"PRIu64"+%"PRIu64"+" */ + /* "%"PRIu64")(%"PRIu64") ?= %"PRIu64"", */ + /* cluster_name, */ + /* c_usage->a_cpu, c_usage->d_cpu, */ + /* c_usage->pd_cpu, total_used, */ + /* c_usage->total_time); */ c_usage->i_cpu = c_usage->total_time - total_used - c_usage->r_cpu; /* sanity check just to make sure we have a @@ -323,6 +323,8 @@ static local_cluster_usage_t *_setup_cluster_usage(mysql_conn_t *mysql_conn, MYSQL_RES *result = NULL; MYSQL_ROW row; int i = 0; + ListIterator c_itr = NULL; + local_cluster_usage_t *loc_c_usage; char *event_req_inx[] = { "node_name", @@ -367,7 +369,7 @@ static local_cluster_usage_t *_setup_cluster_usage(mysql_conn_t *mysql_conn, return NULL; } xfree(query); - + c_itr = list_iterator_create(cluster_down_list); while ((row = mysql_fetch_row(result))) { time_t row_start = slurm_atoul(row[EVENT_REQ_START]); time_t row_end = slurm_atoul(row[EVENT_REQ_END]); @@ -395,8 +397,6 @@ static local_cluster_usage_t *_setup_cluster_usage(mysql_conn_t *mysql_conn, * for the entire period. */ if (state || !c_usage) { - local_cluster_usage_t *loc_c_usage; - loc_c_usage = xmalloc( sizeof(local_cluster_usage_t)); loc_c_usage->cpu_count = row_cpu; @@ -431,8 +431,8 @@ static local_cluster_usage_t *_setup_cluster_usage(mysql_conn_t *mysql_conn, time period we would already have it. */ if (c_usage) { - int local_start = row_start; - int local_end = row_end; + time_t local_start = row_start; + time_t local_end = row_end; int seconds; if (c_usage->start > local_start) local_start = c_usage->start; @@ -440,20 +440,56 @@ static local_cluster_usage_t *_setup_cluster_usage(mysql_conn_t *mysql_conn, local_end = c_usage->end; seconds = (local_end - local_start); if (seconds > 0) { - /* info("node %s adds " */ - /* "(%d)(%d-%d) * %d = %d " */ - /* "to %d", */ + /* info("%p node %s adds " */ + /* "(%d)(%ld-%ld) * %d = %"PRIu64" " */ + /* "to %"PRIu64" (%s - %s)", */ + /* c_usage, */ /* row[EVENT_REQ_NAME], */ /* seconds, */ /* local_end, local_start, */ /* row_cpu, */ - /* seconds * row_cpu, */ - /* row_cpu); */ - c_usage->d_cpu += seconds * row_cpu; + /* seconds * (uint64_t)row_cpu, */ + /* c_usage->d_cpu, */ + /* slurm_ctime(&local_start), */ + /* slurm_ctime(&local_end)); */ + c_usage->d_cpu += seconds * (uint64_t)row_cpu; + /* Now remove this time if there was a + disconnected slurmctld during the + down time. + */ + list_iterator_reset(c_itr); + while ((loc_c_usage = list_next(c_itr))) { + int temp_end = row_end; + int temp_start = row_start; + if (loc_c_usage->start > local_start) + temp_start = loc_c_usage->start; + if (loc_c_usage->end < temp_end) + temp_end = loc_c_usage->end; + seconds = (temp_end - temp_start); + if (seconds < 1) + continue; + + seconds *= row_cpu; + if (seconds >= loc_c_usage->total_time) + loc_c_usage->total_time = 0; + else + loc_c_usage->total_time -= + seconds; + + /* info("Node %s was down for " */ + /* "%d seconds while " */ + /* "cluster %s's slurmctld " */ + /* "wasn't responding %"PRIu64, */ + /* row[EVENT_REQ_NAME], */ + /* seconds, cluster_name, */ + /* loc_c_usage->total_time); */ + } } } } mysql_free_result(result); + list_iterator_destroy(c_itr); + return c_usage; } @@ -852,15 +888,18 @@ extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn, if (loc_c_usage->end < temp_end) temp_end = loc_c_usage->end; loc_seconds = (temp_end - temp_start); - if (loc_seconds > 0) { - /* info(" Job %u was running for " */ - /* "%"PRIu64" seconds while " */ - /* "cluster %s's slurmctld " */ - /* "wasn't responding", */ - /* job_id, */ - /* (uint64_t) */ - /* (seconds * row_acpu), */ - /* cluster_name); */ + if (loc_seconds < 1) + continue; + + loc_seconds *= row_acpu; + /* info(" Job %u was running for " */ + /* "%d seconds while " */ + /* "cluster %s's slurmctld " */ + /* "wasn't responding", */ + /* job_id, loc_seconds, cluster_name); */ + if (loc_seconds >= loc_c_usage->total_time) + loc_c_usage->total_time = 0; + else { loc_c_usage->total_time -= loc_seconds * row_acpu; } diff --git a/src/plugins/accounting_storage/mysql/as_mysql_usage.c b/src/plugins/accounting_storage/mysql/as_mysql_usage.c index fe846f24b..7763cb1f9 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_usage.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_usage.c @@ -926,6 +926,7 @@ extern int as_mysql_roll_usage(mysql_conn_t *mysql_conn, { int rc = SLURM_SUCCESS; int rolledup = 0; + int roll_started = 0; char *cluster_name = NULL; ListIterator itr; pthread_mutex_t rolledup_lock = PTHREAD_MUTEX_INITIALIZER; @@ -984,14 +985,15 @@ extern int as_mysql_roll_usage(mysql_conn_t *mysql_conn, (void *)local_rollup)) fatal("pthread_create: %m"); slurm_attr_destroy(&rollup_attr); + roll_started++; } slurm_mutex_lock(&rolledup_lock); list_iterator_destroy(itr); slurm_mutex_unlock(&as_mysql_cluster_list_lock); - while (rolledup < list_count(as_mysql_cluster_list)) { + while (rolledup < roll_started) { pthread_cond_wait(&rolledup_cond, &rolledup_lock); - debug2("Got %d rolled up", rolledup); + debug2("Got %d of %d rolled up", rolledup, roll_started); } slurm_mutex_unlock(&rolledup_lock); debug2("Everything rolled up"); diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c index dc4250d85..f886033b5 100644 --- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c +++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c @@ -529,7 +529,7 @@ extern int acct_storage_p_commit(void *db_conn, bool commit) { slurmdbd_msg_t req; dbd_fini_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_fini_msg_t)); @@ -552,7 +552,7 @@ extern int acct_storage_p_add_users(void *db_conn, uint32_t uid, { slurmdbd_msg_t req; dbd_list_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_list_msg_t)); get_msg.my_list = user_list; @@ -574,7 +574,7 @@ extern int acct_storage_p_add_coord(void *db_conn, uint32_t uid, { slurmdbd_msg_t req; dbd_acct_coord_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_acct_coord_msg_t)); get_msg.acct_list = acct_list; @@ -596,7 +596,7 @@ extern int acct_storage_p_add_accts(void *db_conn, uint32_t uid, { slurmdbd_msg_t req; dbd_list_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_list_msg_t)); get_msg.my_list = acct_list; @@ -617,7 +617,7 @@ extern int acct_storage_p_add_clusters(void *db_conn, uint32_t uid, { slurmdbd_msg_t req; dbd_list_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_list_msg_t)); get_msg.my_list = cluster_list; @@ -639,7 +639,7 @@ extern int acct_storage_p_add_associations(void *db_conn, uint32_t uid, { slurmdbd_msg_t req; dbd_list_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_list_msg_t)); get_msg.my_list = association_list; @@ -660,7 +660,7 @@ extern int acct_storage_p_add_qos(void *db_conn, uint32_t uid, { slurmdbd_msg_t req; dbd_list_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_list_msg_t)); get_msg.my_list = qos_list; @@ -681,7 +681,7 @@ extern int acct_storage_p_add_res(void *db_conn, uint32_t uid, { slurmdbd_msg_t req; dbd_list_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_list_msg_t)); get_msg.my_list = res_list; @@ -702,7 +702,7 @@ extern int acct_storage_p_add_wckeys(void *db_conn, uint32_t uid, { slurmdbd_msg_t req; dbd_list_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_list_msg_t)); get_msg.my_list = wckey_list; @@ -723,7 +723,7 @@ extern int acct_storage_p_add_reservation(void *db_conn, { slurmdbd_msg_t req; dbd_rec_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_rec_msg_t)); get_msg.rec = resv; @@ -1094,7 +1094,7 @@ extern int acct_storage_p_modify_reservation(void *db_conn, { slurmdbd_msg_t req; dbd_rec_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_rec_msg_t)); get_msg.rec = resv; @@ -1466,7 +1466,7 @@ extern int acct_storage_p_remove_reservation(void *db_conn, { slurmdbd_msg_t req; dbd_rec_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_rec_msg_t)); get_msg.rec = resv; @@ -2093,7 +2093,7 @@ extern int acct_storage_p_roll_usage(void *db_conn, { slurmdbd_msg_t req; dbd_roll_usage_msg_t get_msg; - int rc, resp_code; + int rc, resp_code = SLURM_SUCCESS; memset(&get_msg, 0, sizeof(dbd_roll_usage_msg_t)); get_msg.end = sent_end; diff --git a/src/plugins/job_submit/lua/job_submit_lua.c b/src/plugins/job_submit/lua/job_submit_lua.c index 4ef7b0f8f..a8bbdb0e3 100644 --- a/src/plugins/job_submit/lua/job_submit_lua.c +++ b/src/plugins/job_submit/lua/job_submit_lua.c @@ -509,6 +509,10 @@ static int _job_req_field(const struct job_descriptor *job_desc, lua_pushnumber (L, job_desc->group_id); } else if (!strcmp(name, "licenses")) { lua_pushstring (L, job_desc->licenses); + } else if (!strcmp(name, "mail_type")) { + lua_pushnumber (L, job_desc->mail_type); + } else if (!strcmp(name, "mail_user")) { + lua_pushstring (L, job_desc->mail_user); } else if (!strcmp(name, "max_cpus")) { lua_pushnumber (L, job_desc->max_cpus); } else if (!strcmp(name, "max_nodes")) { diff --git a/src/plugins/jobacct_gather/common/common_jag.c b/src/plugins/jobacct_gather/common/common_jag.c index 84b6775c7..d8889ebca 100644 --- a/src/plugins/jobacct_gather/common/common_jag.c +++ b/src/plugins/jobacct_gather/common/common_jag.c @@ -74,9 +74,9 @@ static uint32_t _update_weighted_freq(struct jobacctinfo *jobacct, jobacct->current_weighted_freq = jobacct->current_weighted_freq + jobacct->this_sampled_cputime * thisfreq; - if (jobacct->last_total_cputime) { + if (jobacct->tot_cpu) { return (jobacct->current_weighted_freq / - jobacct->last_total_cputime); + jobacct->tot_cpu); } else return thisfreq; } diff --git a/src/plugins/priority/multifactor/fair_tree.c b/src/plugins/priority/multifactor/fair_tree.c index ef70af815..967e79dcc 100644 --- a/src/plugins/priority/multifactor/fair_tree.c +++ b/src/plugins/priority/multifactor/fair_tree.c @@ -44,7 +44,7 @@ #include "fair_tree.h" -static void _ft_decay_apply_new_usage(struct job_record *job, time_t *start); +static int _ft_decay_apply_new_usage(struct job_record *job, time_t *start); static void _apply_priority_fs(void); /* Fair Tree code called from the decay thread loop */ @@ -88,18 +88,14 @@ static void _ft_set_assoc_usage_efctv(slurmdb_association_rec_t *assoc) /* Apply usage with decay factor. Call standard functions */ -static void _ft_decay_apply_new_usage(struct job_record *job, time_t *start) +static int _ft_decay_apply_new_usage(struct job_record *job, time_t *start) { - if (!decay_apply_new_usage(job, start)) - return; - - /* Priority 0 is reserved for held jobs. Also skip priority - * calculation for non-pending jobs. */ - if ((job->priority == 0) || !IS_JOB_PENDING(job)) - return; + /* Always return SUCCESS so that list_for_each will + * continue processing list of jobs. For this reason, + * don't call decay_apply_new_usage() directly. */ + decay_apply_new_usage(job, start); - set_priority_factors(*start, job); - last_job_update = time(NULL); + return SLURM_SUCCESS; } diff --git a/src/plugins/priority/multifactor/priority_multifactor.c b/src/plugins/priority/multifactor/priority_multifactor.c index f65bce826..498e259cb 100644 --- a/src/plugins/priority/multifactor/priority_multifactor.c +++ b/src/plugins/priority/multifactor/priority_multifactor.c @@ -1136,14 +1136,19 @@ static void _ticket_based_decay(List job_list, time_t start_time) } -static void _decay_apply_new_usage_and_weighted_factors( +static int _decay_apply_new_usage_and_weighted_factors( struct job_record *job_ptr, time_t *start_time_ptr) { + /* Always return SUCCESS so that list_for_each will + * continue processing list of jobs. */ + if (!decay_apply_new_usage(job_ptr, start_time_ptr)) - return; + return SLURM_SUCCESS; decay_apply_weighted_factors(job_ptr, start_time_ptr); + + return SLURM_SUCCESS; } @@ -1867,9 +1872,12 @@ extern bool decay_apply_new_usage(struct job_record *job_ptr, } -extern void decay_apply_weighted_factors(struct job_record *job_ptr, +extern int decay_apply_weighted_factors(struct job_record *job_ptr, time_t *start_time_ptr) { + /* Always return SUCCESS so that list_for_each will + * continue processing list of jobs. */ + /* * Priority 0 is reserved for held * jobs. Also skip priority @@ -1878,13 +1886,14 @@ extern void decay_apply_weighted_factors(struct job_record *job_ptr, if ((job_ptr->priority == 0) || (!IS_JOB_PENDING(job_ptr) && !(flags & PRIORITY_FLAGS_CALCULATE_RUNNING))) - return; + return SLURM_SUCCESS; job_ptr->priority = _get_priority_internal(*start_time_ptr, job_ptr); last_job_update = time(NULL); debug2("priority for job %u is now %u", job_ptr->job_id, job_ptr->priority); + return SLURM_SUCCESS; } diff --git a/src/plugins/priority/multifactor/priority_multifactor.h b/src/plugins/priority/multifactor/priority_multifactor.h index 43a448d30..65785e14f 100644 --- a/src/plugins/priority/multifactor/priority_multifactor.h +++ b/src/plugins/priority/multifactor/priority_multifactor.h @@ -55,7 +55,7 @@ extern double priority_p_calc_fs_factor( long double usage_efctv, long double shares_norm); extern bool decay_apply_new_usage( struct job_record *job_ptr, time_t *start_time_ptr); -extern void decay_apply_weighted_factors( +extern int decay_apply_weighted_factors( struct job_record *job_ptr, time_t *start_time_ptr); extern void set_assoc_usage_norm(slurmdb_association_rec_t *assoc); extern void set_priority_factors(time_t start_time, struct job_record *job_ptr); diff --git a/src/plugins/proctrack/lua/proctrack_lua.c b/src/plugins/proctrack/lua/proctrack_lua.c index 5eedd0b15..a0c91118a 100644 --- a/src/plugins/proctrack/lua/proctrack_lua.c +++ b/src/plugins/proctrack/lua/proctrack_lua.c @@ -231,8 +231,12 @@ int init (void) * by any lua scripts. */ if (!dlopen("liblua.so", RTLD_NOW | RTLD_GLOBAL) && - !dlopen("liblua5.1.so", RTLD_NOW | RTLD_GLOBAL) && - !dlopen("liblua5.1.so.0", RTLD_NOW | RTLD_GLOBAL)) { + !dlopen("liblua-5.2.so", RTLD_NOW | RTLD_GLOBAL) && + !dlopen("liblua5.2.so", RTLD_NOW | RTLD_GLOBAL) && + !dlopen("liblua5.2.so.0", RTLD_NOW | RTLD_GLOBAL) && + !dlopen("liblua-5.1.so", RTLD_NOW | RTLD_GLOBAL) && + !dlopen("liblua5.1.so", RTLD_NOW | RTLD_GLOBAL) && + !dlopen("liblua5.1.so.0", RTLD_NOW | RTLD_GLOBAL)) { return (error("Failed to open liblua.so: %s", dlerror())); } diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index e5f3ab0c1..3f2a789d8 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -761,7 +761,7 @@ static int _attempt_backfill(void) * Needs to be done with the node-state lock taken. */ START_TIMER; - if (select_g_reconfigure()) { + if (select_g_update_block(NULL)) { debug4("backfill: not scheduling due to ALPS"); return SLURM_SUCCESS; } @@ -1294,7 +1294,8 @@ next_task: /* Update the database if job time limit * changed and move to next job */ - if (save_time_limit != job_ptr->time_limit) + if (save_time_limit != job_ptr->time_limit && + (!with_slurmdbd || job_ptr->db_index)) jobacct_storage_g_job_start(acct_db_conn, job_ptr); job_start_cnt++; @@ -1438,6 +1439,7 @@ static int _start_job(struct job_record *job_ptr, bitstr_t *resv_bitmap) { int rc; bitstr_t *orig_exc_nodes = NULL; + bool is_job_array_head = false; static uint32_t fail_jobid = 0; if (job_ptr->details->exc_node_bitmap) { @@ -1445,8 +1447,21 @@ static int _start_job(struct job_record *job_ptr, bitstr_t *resv_bitmap) bit_or(job_ptr->details->exc_node_bitmap, resv_bitmap); } else job_ptr->details->exc_node_bitmap = bit_copy(resv_bitmap); - + if (job_ptr->array_recs) + is_job_array_head = true; rc = select_nodes(job_ptr, false, NULL, NULL); + if (is_job_array_head && job_ptr->details) { + struct job_record *base_job_ptr; + base_job_ptr = find_job_record(job_ptr->array_job_id); + if (base_job_ptr && base_job_ptr != job_ptr + && base_job_ptr->array_recs) { + FREE_NULL_BITMAP( + base_job_ptr->details->exc_node_bitmap); + if (orig_exc_nodes) + base_job_ptr->details->exc_node_bitmap = + bit_copy(orig_exc_nodes); + } + } if (job_ptr->details) { /* select_nodes() might cancel the job! */ FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap); job_ptr->details->exc_node_bitmap = orig_exc_nodes; diff --git a/src/plugins/sched/wiki/get_nodes.c b/src/plugins/sched/wiki/get_nodes.c index 57a1e6c73..99f119436 100644 --- a/src/plugins/sched/wiki/get_nodes.c +++ b/src/plugins/sched/wiki/get_nodes.c @@ -86,7 +86,7 @@ extern int get_nodes(char *cmd_ptr, int *err_code, char **err_msg) * This relies on the above write lock for the node state. */ lock_slurmctld(node_write_lock); - if (select_g_reconfigure()) { + if (select_g_update_block(NULL)) { unlock_slurmctld(node_write_lock); *err_code = -720; *err_msg = "Unable to run ALPS inventory"; diff --git a/src/plugins/sched/wiki2/get_nodes.c b/src/plugins/sched/wiki2/get_nodes.c index f303f13ae..11fdb5887 100644 --- a/src/plugins/sched/wiki2/get_nodes.c +++ b/src/plugins/sched/wiki2/get_nodes.c @@ -99,7 +99,7 @@ extern int get_nodes(char *cmd_ptr, int *err_code, char **err_msg) * This relies on the above write lock for the node state. */ lock_slurmctld(node_write_lock); - if (select_g_reconfigure()) { + if (select_g_update_block(NULL)) { unlock_slurmctld(node_write_lock); *err_code = -720; *err_msg = "Unable to run ALPS inventory"; diff --git a/src/plugins/select/alps/basil_interface.c b/src/plugins/select/alps/basil_interface.c index 4ca740443..561bf6367 100644 --- a/src/plugins/select/alps/basil_interface.c +++ b/src/plugins/select/alps/basil_interface.c @@ -229,6 +229,12 @@ extern int basil_inventory(void) time_t now = time(NULL); static time_t slurm_alps_mismatch_time = (time_t) 0; static bool logged_sync_timeout = false; + static time_t last_inv_run = 0; + + if ((now - last_inv_run) < inv_interval) + return SLURM_SUCCESS; + + last_inv_run = now; inv = get_full_inventory(version); if (inv == NULL) { diff --git a/src/plugins/select/alps/basil_interface.h b/src/plugins/select/alps/basil_interface.h index faa0cc76b..e00e53cd6 100644 --- a/src/plugins/select/alps/basil_interface.h +++ b/src/plugins/select/alps/basil_interface.h @@ -25,6 +25,7 @@ #include "src/slurmctld/slurmctld.h" extern int dim_size[3]; +extern int inv_interval; /** * struct select_jobinfo - data specific to Cray node selection plugin diff --git a/src/plugins/select/alps/select_alps.c b/src/plugins/select/alps/select_alps.c index b40ec455b..120aa5cba 100644 --- a/src/plugins/select/alps/select_alps.c +++ b/src/plugins/select/alps/select_alps.c @@ -52,8 +52,10 @@ #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> +#include <stdlib.h> #include "src/common/slurm_xlator.h" /* Must be first */ +#include "src/common/slurm_strcasestr.h" #include "other_select.h" #include "basil_interface.h" #include "cray_config.h" @@ -117,6 +119,8 @@ char *uid_to_string (uid_t uid) { return NULL; } # define SIGRTMIN SIGUSR2+1 #endif +int inv_interval = 0; + /* All current (2011) XT/XE installations have a maximum dimension of 3, * smaller systems deploy a 2D Torus which has no connectivity in * X-dimension. We know the highest system dimensions possible here @@ -167,6 +171,27 @@ static bool _zero_size_job ( struct job_record *job_ptr ) return false; } +static void _set_inv_interval(void) +{ + char *tmp_ptr, *sched_params = slurm_get_sched_params(); + int i; + + if (sched_params) { + if (sched_params && + (tmp_ptr = slurm_strcasestr(sched_params, + "inventory_interval="))) { + /* 0123456789012345 */ + i = atoi(tmp_ptr + 19); + if (i < 0) + error("ignoring SchedulerParameters: " + "inventory_interval of %d", i); + else + inv_interval = i; + } + xfree(sched_params); + } +} + /* * init() is called when the plugin is loaded, before any other functions * are called. Put global initialization here. @@ -190,6 +215,7 @@ extern int init ( void ) fatal("SelectTypeParams=other_cons_res is not valid " "for select/alps"); } + _set_inv_interval(); } create_config(); @@ -410,7 +436,12 @@ extern int select_p_job_fini(struct job_record *job_ptr) { if (job_ptr == NULL) return SLURM_SUCCESS; - if ((slurmctld_primary || (job_ptr->job_state == (uint16_t)NO_VAL)) + + /* Don't run the release in the controller for batch jobs. It is + * handled on the stepd end. + */ + if (((slurmctld_primary && !job_ptr->batch_flag) || + (job_ptr->job_state == (uint16_t)NO_VAL)) && !_zero_size_job(job_ptr) && (do_basil_release(job_ptr) != SLURM_SUCCESS)) return SLURM_ERROR; @@ -843,6 +874,9 @@ extern char *select_p_select_jobinfo_xstrdup(select_jobinfo_t *jobinfo, extern int select_p_update_block(update_block_msg_t *block_desc_ptr) { + if (slurmctld_primary && basil_inventory()) + return SLURM_ERROR; + return other_update_block(block_desc_ptr); } @@ -880,8 +914,8 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data) extern int select_p_reconfigure(void) { - if (slurmctld_primary && basil_inventory()) - return SLURM_ERROR; + _set_inv_interval(); + return other_reconfigure(); } diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index e2432c331..86e24a5bb 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -2806,6 +2806,35 @@ alloc_job: */ job_ptr->total_cpus += select_node_record[i].cpus; } + } else if (cr_type & CR_SOCKET) { + int ci = 0; + int s, last_s, sock_cnt = 0; + first = bit_ffs(job_res->node_bitmap); + if (first != -1) + last = bit_fls(job_res->node_bitmap); + else + last = first - 1; + job_ptr->total_cpus = 0; + for (i = first; i <= last; i++) { + if (!bit_test(job_res->node_bitmap, i)) + continue; + sock_cnt = 0; + for (s = 0; s < select_node_record[i].sockets; s++) { + last_s = -1; + for (c = 0; c<select_node_record[i].cores; c++){ + if (bit_test(job_res->core_bitmap, ci)){ + if (s != last_s) { + sock_cnt++; + last_s = s; + } + } + ci++; + } + } + job_ptr->total_cpus += (sock_cnt * + select_node_record[i].cores * + select_node_record[i].vpus); + } } else if (build_cnt >= 0) job_ptr->total_cpus = build_cnt; else diff --git a/src/plugins/select/cray/select_cray.c b/src/plugins/select/cray/select_cray.c index 0b4c61092..6d08d3767 100644 --- a/src/plugins/select/cray/select_cray.c +++ b/src/plugins/select/cray/select_cray.c @@ -151,8 +151,16 @@ static uint32_t blade_cnt = 0; static pthread_mutex_t blade_mutex = PTHREAD_MUTEX_INITIALIZER; static time_t last_npc_update; -#ifdef HAVE_NATIVE_CRAY +static int active_post_nhc_cnt = 0; +static pthread_mutex_t throttle_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t throttle_cond = PTHREAD_COND_INITIALIZER; +#if defined(HAVE_NATIVE_CRAY_GA) && !defined(HAVE_CRAY_NETWORK) +static size_t topology_num_nodes = 0; +static alpsc_topology_t *topology = NULL; +#endif + +#ifdef HAVE_NATIVE_CRAY /* Used for aeld communication */ alpsc_ev_app_t *app_list = NULL; // List of running/suspended apps @@ -868,6 +876,32 @@ static void _set_job_running_restore(select_jobinfo_t *jobinfo) last_npc_update = time(NULL); } +/* These functions prevent the fini's of jobs and steps from keeping + * the slurmctld write locks constantly set after the nhc is ran, + * which can prevent other RPCs and system functions from being + * processed. For example, a steady stream of step or job completions + * can prevent squeue from responding or jobs from being scheduled. */ +static void _throttle_start(void) +{ + slurm_mutex_lock(&throttle_mutex); + while (1) { + if (active_post_nhc_cnt == 0) { + active_post_nhc_cnt++; + break; + } + pthread_cond_wait(&throttle_cond, &throttle_mutex); + } + slurm_mutex_unlock(&throttle_mutex); + usleep(100); +} +static void _throttle_fini(void) +{ + slurm_mutex_lock(&throttle_mutex); + active_post_nhc_cnt--; + pthread_cond_broadcast(&throttle_cond); + slurm_mutex_unlock(&throttle_mutex); +} + static void *_job_fini(void *args) { struct job_record *job_ptr = (struct job_record *)args; @@ -899,6 +933,7 @@ static void *_job_fini(void *args) /***********/ xfree(nhc_info.nodelist); + _throttle_start(); lock_slurmctld(job_write_lock); if (job_ptr->magic == JOB_MAGIC) { select_jobinfo_t *jobinfo = NULL; @@ -914,6 +949,7 @@ static void *_job_fini(void *args) "this should never happen", nhc_info.jobid); unlock_slurmctld(job_write_lock); + _throttle_fini(); return NULL; } @@ -960,6 +996,7 @@ static void *_step_fini(void *args) xfree(nhc_info.nodelist); + _throttle_start(); lock_slurmctld(job_write_lock); if (!step_ptr->job_ptr) { error("For some reason we don't have a job_ptr for " @@ -991,6 +1028,7 @@ static void *_step_fini(void *args) post_job_step(step_ptr); } unlock_slurmctld(job_write_lock); + _throttle_fini(); return NULL; } @@ -1117,6 +1155,11 @@ extern int fini ( void ) _free_blade(&blade_array[i]); xfree(blade_array); +#if defined(HAVE_NATIVE_CRAY_GA) && !defined(HAVE_CRAY_NETWORK) + if (topology) + free(topology); +#endif + slurm_mutex_unlock(&blade_mutex); return other_select_fini(); @@ -1443,23 +1486,27 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) #if defined(HAVE_NATIVE_CRAY_GA) && !defined(HAVE_CRAY_NETWORK) int nn, end_nn, last_nn = 0; bool found = 0; - alpsc_topology_t *topology = NULL; - size_t num_nodes; char *err_msg = NULL; - if (alpsc_get_topology(&err_msg, &topology, &num_nodes)) { - if (err_msg) { - error("(%s: %d: %s) Could not get system " - "topology info: %s", - THIS_FILE, __LINE__, __FUNCTION__, err_msg); - free(err_msg); - } else { - error("(%s: %d: %s) Could not get system " - "topology info: No error message present.", - THIS_FILE, __LINE__, __FUNCTION__); + if (!topology) { + if (alpsc_get_topology(&err_msg, &topology, + &topology_num_nodes)) { + if (err_msg) { + error("(%s: %d: %s) Could not get system " + "topology info: %s", + THIS_FILE, __LINE__, + __FUNCTION__, err_msg); + free(err_msg); + } else { + error("(%s: %d: %s) Could not get system " + "topology info: No error " + "message present.", + THIS_FILE, __LINE__, __FUNCTION__); + } + return SLURM_ERROR; } - return SLURM_ERROR; } + #endif slurm_mutex_lock(&blade_mutex); @@ -1493,7 +1540,7 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) } #if defined(HAVE_NATIVE_CRAY_GA) && !defined(HAVE_CRAY_NETWORK) - end_nn = num_nodes; + end_nn = topology_num_nodes; start_again: @@ -1509,7 +1556,7 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) break; } } - if (end_nn != num_nodes) { + if (end_nn != topology_num_nodes) { /* already looped */ fatal("Node %s(%d) isn't found on the system", node_ptr->name, nodeinfo->nid); @@ -1548,10 +1595,6 @@ extern int select_p_node_init(struct node_record *node_ptr, int node_cnt) /* give back the memory */ xrealloc(blade_array, sizeof(blade_info_t) * blade_cnt); -#if defined(HAVE_NATIVE_CRAY_GA) && !defined(HAVE_CRAY_NETWORK) - free(topology); -#endif - slurm_mutex_unlock(&blade_mutex); return other_node_init(node_ptr, node_cnt); diff --git a/src/plugins/switch/cray/util.c b/src/plugins/switch/cray/util.c index 3648b3a36..2473382b4 100644 --- a/src/plugins/switch/cray/util.c +++ b/src/plugins/switch/cray/util.c @@ -46,6 +46,7 @@ #include "switch_cray.h" #include "slurm/slurm.h" +#include "src/common/slurm_protocol_api.h" #include "src/common/slurm_step_layout.h" #include "src/common/xstring.h" @@ -174,6 +175,11 @@ int set_job_env(stepd_step_rec_t *job, slurm_cray_jobinfo_t *sw_job) non_smp = 1; break; } + if ((non_smp == 0) && + (slurm_get_select_type_param() & CR_PACK_NODES)) { + CRAY_INFO("Non-SMP ordering identified; CR_PACK_NODES"); + non_smp = 1; + } rc = env_array_overwrite_fmt(&job->env, PMI_CRAY_NO_SMP_ENV, "%d", non_smp); if (rc == 0) { diff --git a/src/sacct/sacct.h b/src/sacct/sacct.h index c05e99ebf..02dd2784a 100644 --- a/src/sacct/sacct.h +++ b/src/sacct/sacct.h @@ -69,7 +69,7 @@ #define BRIEF_COMP_FIELDS "jobid,uid,state" #define DEFAULT_FIELDS "jobid,jobname,partition,account,alloccpus,state,exitcode" #define DEFAULT_COMP_FIELDS "jobid,uid,jobname,partition,nnodes,nodelist,state,end" -#define LONG_FIELDS "jobid,jobname,partition,maxvmsize,maxvmsizenode,maxvmsizetask,avevmsize,maxrss,maxrssnode,maxrsstask,averss,maxpages,maxpagesnode,maxpagestask,avepages,mincpu,mincpunode,mincputask,avecpu,ntasks,alloccpus,elapsed,state,exitcode,avecpufreq,reqcpufreq,reqmem,consumedenergy,maxdiskread,maxdiskreadnode,maxdiskreadtask,avediskread,maxdiskwrite,maxdiskwritenode,maxdiskwritetask,avediskwrite,allocgres,reqgres" +#define LONG_FIELDS "jobid,jobidraw,jobname,partition,maxvmsize,maxvmsizenode,maxvmsizetask,avevmsize,maxrss,maxrssnode,maxrsstask,averss,maxpages,maxpagesnode,maxpagestask,avepages,mincpu,mincpunode,mincputask,avecpu,ntasks,alloccpus,elapsed,state,exitcode,avecpufreq,reqcpufreq,reqmem,consumedenergy,maxdiskread,maxdiskreadnode,maxdiskreadtask,avediskread,maxdiskwrite,maxdiskwritenode,maxdiskwritetask,avediskwrite,allocgres,reqgres" #define LONG_COMP_FIELDS "jobid,uid,jobname,partition,nnodes,nodelist,state,start,end,timelimit" diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 9ae4f693a..e6f5a3b18 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -61,6 +61,7 @@ # include <limits.h> #endif +#include <ctype.h> #include <fcntl.h> #include <stdarg.h> /* va_start */ #include <stdio.h> @@ -1030,7 +1031,7 @@ static void _opt_batch_script(const char * file, const void *body, int size) argv = xmalloc(sizeof(char *)); argv[0] = "sbatch"; - while((line = _next_line(body, size, &state)) != NULL) { + while ((line = _next_line(body, size, &state)) != NULL) { lineno++; if (!strncmp(line, magic_word1, magic_word_len1)) ptr = line + magic_word_len1; @@ -1043,7 +1044,19 @@ static void _opt_batch_script(const char * file, const void *body, int size) warned = 1; } } else { + /* Stop parsing script if not a comment */ + bool is_cmd = false; + for (i = 0; line[i]; i++) { + if (isspace(line[i])) + continue; + if (line[i] == '#') + break; + is_cmd = true; + break; + } xfree(line); + if (is_cmd) + break; continue; } @@ -2077,7 +2090,7 @@ static void _parse_pbs_resource_list(char *rl) opt.ntasks_per_node = _get_int(temp, "mpiprocs"); xfree(temp); } -#ifdef HAVE_ALPS_CRAY +#if defined(HAVE_ALPS_CRAY) || defined(HAVE_NATIVE_CRAY) /* * NB: no "mppmem" here since it specifies per-PE memory units, * whereas SLURM uses per-node and per-CPU memory units. @@ -2117,7 +2130,7 @@ static void _parse_pbs_resource_list(char *rl) opt.ntasks_set = true; } xfree(temp); -#endif /* HAVE_ALPS_CRAY */ +#endif /* HAVE_ALPS_CRAY || HAVE_NATIVE_CRAY */ } else if (!strncasecmp(rl+i, "naccelerators=", 14)) { i += 14; temp = _get_pbs_option_value(rl, &i, ','); @@ -2277,7 +2290,7 @@ static bool _opt_verify(void) if ((opt.job_name == NULL) && (opt.script_argc > 0)) opt.job_name = base_name(opt.script_argv[0]); if (opt.job_name) - setenv("SLURM_JOB_NAME", opt.job_name, 0); + setenv("SLURM_JOB_NAME", opt.job_name, 1); /* check for realistic arguments */ if (opt.ntasks < 0) { diff --git a/src/slurmctld/agent.h b/src/slurmctld/agent.h index 6b5fdaf85..6173d19ed 100644 --- a/src/slurmctld/agent.h +++ b/src/slurmctld/agent.h @@ -51,6 +51,8 @@ * total thread count is product of * MAX_AGENT_CNT and * (AGENT_THREAD_COUNT + 2) */ +#define LOTS_OF_AGENTS_CNT 50 +#define LOTS_OF_AGENTS ((get_agent_count() <= LOTS_OF_AGENTS_CNT) ? 0 : 1) typedef struct agent_arg { uint32_t node_count; /* number of nodes to communicate diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index e7a788a93..7ddb2558b 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1338,7 +1338,7 @@ static void _queue_reboot_msg(void) node_ptr->reason = xstrdup("Scheduled reboot"); bit_clear(avail_node_bitmap, i); bit_clear(idle_node_bitmap, i); - node_ptr->last_response = now; + node_ptr->last_response = now + slurm_get_resume_timeout(); } if (reboot_agent_args != NULL) { hostlist_uniq(reboot_agent_args->hostlist); @@ -1378,7 +1378,6 @@ static void *_slurmctld_background(void *no_data) static time_t last_ctld_bu_ping; static time_t last_uid_update; static time_t last_reboot_msg_time; - static bool ping_msg_sent = false; time_t now; int no_resp_msg_interval, ping_interval, purge_job_interval; int group_time, group_force; @@ -1549,21 +1548,10 @@ static void *_slurmctld_background(void *no_data) ping_nodes_now) && is_ping_done()) { now = time(NULL); last_ping_node_time = now; - ping_msg_sent = false; ping_nodes_now = false; lock_slurmctld(node_write_lock); ping_nodes(); unlock_slurmctld(node_write_lock); - } else if ((difftime(now, last_ping_node_time) >= - ping_interval) && !is_ping_done() && - !ping_msg_sent) { - /* log failure once per ping_nodes() call, - * no error if node state update request - * processed while the ping is in progress */ - error("Node ping apparently hung, " - "many nodes may be DOWN or configured " - "SlurmdTimeout should be increased"); - ping_msg_sent = true; } if (slurmctld_conf.inactive_limit && diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index aa4f03022..ad07d1c08 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -125,6 +125,7 @@ List job_list = NULL; /* job_record list */ time_t last_job_update; /* time of last update to job records */ /* Local variables */ +static int bf_min_age_reserve = 0; static uint32_t highest_prio = 0; static uint32_t lowest_prio = TOP_PRIORITY; static int hash_table_size = 0; @@ -3755,7 +3756,9 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, struct job_record **job_pptr, char **err_msg, uint16_t protocol_version) { - static int defer_sched = -1; + static time_t sched_update = 0; + static int defer_sched = 0; + char *sched_params, *tmp_ptr; int error_code, i; bool no_alloc, top_prio, test_only, too_fragmented, independent; struct job_record *job_ptr; @@ -3821,12 +3824,21 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, else too_fragmented = false; - if (defer_sched == -1) { - char *sched_params = slurm_get_sched_params(); + if (sched_update != slurmctld_conf.last_update) { + sched_update = slurmctld_conf.last_update; + sched_params = slurm_get_sched_params(); if (sched_params && strstr(sched_params, "defer")) defer_sched = 1; else defer_sched = 0; + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_min_age_reserve="))) { + bf_min_age_reserve = atoi(tmp_ptr + 19); + if (bf_min_age_reserve < 0) + bf_min_age_reserve = 0; + } else { + bf_min_age_reserve = 0; + } xfree(sched_params); } if (defer_sched == 1) @@ -3837,6 +3849,7 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, else top_prio = true; /* don't bother testing, * it is not runable anyway */ + if (immediate && (too_fragmented || (!top_prio) || (!independent))) { job_ptr->job_state = JOB_FAILED; job_ptr->exit_code = 1; @@ -4804,13 +4817,13 @@ static int _get_job_parts(job_desc_msg_t * job_desc, part_ptr = list_peek(part_ptr_list); } if (part_ptr == NULL) { - info("_valid_job_part: invalid partition specified: %s", - job_desc->partition); + info("%s: invalid partition specified: %s", + __func__, job_desc->partition); return ESLURM_INVALID_PARTITION_NAME; } } else { if (default_part_loc == NULL) { - error("_valid_job_part: default partition not set"); + error("%s: default partition not set", __func__); return ESLURM_DEFAULT_PARTITION_NOT_SET; } part_ptr = default_part_loc; @@ -5639,13 +5652,21 @@ static bool _parse_array_tok(char *tok, bitstr_t *array_bitmap, uint32_t max) char *end_ptr = NULL; int i, first, last, step = 1; + if (tok[0] == '[') /* Strip leading "[" */ + tok++; first = strtol(tok, &end_ptr, 10); + if (end_ptr[0] == ']') /* Strip trailing "]" */ + end_ptr++; if (first < 0) return false; if (end_ptr[0] == '-') { last = strtol(end_ptr + 1, &end_ptr, 10); + if (end_ptr[0] == ']') /* Strip trailing "]" */ + end_ptr++; if (end_ptr[0] == ':') { step = strtol(end_ptr + 1, &end_ptr, 10); + if (end_ptr[0] == ']') /* Strip trailing "]" */ + end_ptr++; if ((end_ptr[0] != '\0') && (end_ptr[0] != '%')) return false; if (step <= 0) @@ -7202,7 +7223,7 @@ static int _list_find_job_old(void *job_entry, void *key) struct job_record *job_ptr = (struct job_record *)job_entry; uint16_t cleaning = 0; - if (IS_JOB_COMPLETING(job_ptr)) { + if (IS_JOB_COMPLETING(job_ptr) && !LOTS_OF_AGENTS) { kill_age = now - (slurmctld_conf.kill_wait + 2 * slurm_get_msg_timeout()); if (job_ptr->time_last_active < kill_age) { @@ -8536,6 +8557,8 @@ extern void sync_job_priorities(void) static bool _top_priority(struct job_record *job_ptr) { struct job_details *detail_ptr = job_ptr->details; + time_t now = time(NULL); + int pend_time; bool top; #ifdef HAVE_BG @@ -8576,6 +8599,15 @@ static bool _top_priority(struct job_record *job_ptr) * indicative of job requeue */ continue; } + + if (bf_min_age_reserve) { + if (job_ptr2->details->begin_time == 0) + continue; + pend_time = difftime(now, job_ptr2-> + details->begin_time); + if (pend_time < bf_min_age_reserve) + continue; + } if (!acct_policy_job_runnable_state(job_ptr2) || !misc_policy_job_runnable_state(job_ptr2) || !part_policy_job_runnable_state(job_ptr2) || @@ -9221,19 +9253,14 @@ static int _update_job(struct job_record *job_ptr, job_desc_msg_t * job_specs, if ((job_specs->pn_min_cpus != (uint16_t) NO_VAL) && (job_specs->pn_min_cpus != 0)) { - if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) + + if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) { error_code = ESLURM_JOB_NOT_PENDING; - else if (authorized - || (detail_ptr->pn_min_cpus - > job_specs->pn_min_cpus)) { + }else { detail_ptr->pn_min_cpus = job_specs->pn_min_cpus; info("update_job: setting pn_min_cpus to %u for " "job_id %u", job_specs->pn_min_cpus, job_ptr->job_id); - } else { - error("Attempt to increase pn_min_cpus for job %u", - job_ptr->job_id); - error_code = ESLURM_ACCESS_DENIED; } } if (error_code != SLURM_SUCCESS) @@ -9555,7 +9582,12 @@ static int _update_job(struct job_record *job_ptr, job_desc_msg_t * job_specs, job_ptr->direct_set_prio = 0; set_job_prio(job_ptr); } else { - job_ptr->direct_set_prio = 1; + if (admin || (job_specs->priority == 0)) { + /* Only administrator can make + * persistent change to a job's + * priority, except holding a job */ + job_ptr->direct_set_prio = 1; + } job_ptr->priority = job_specs->priority; } info("sched: update_job: setting priority to %u for " @@ -9614,13 +9646,14 @@ static int _update_job(struct job_record *job_ptr, job_desc_msg_t * job_specs, goto fini; if (job_specs->pn_min_memory != NO_VAL) { - if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) + + if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) { error_code = ESLURM_JOB_NOT_PENDING; - else if (job_specs->pn_min_memory - == detail_ptr->pn_min_memory) + } else if (job_specs->pn_min_memory + == detail_ptr->pn_min_memory) { debug("sched: update_job: new memory limit identical " "to old limit for job %u", job_ptr->job_id); - else if (authorized) { + } else { char *entity; if (job_specs->pn_min_memory & MEM_PER_CPU) entity = "cpu"; @@ -9636,32 +9669,21 @@ static int _update_job(struct job_record *job_ptr, job_desc_msg_t * job_specs, * since if set by a super user it be set correctly */ job_ptr->limit_set_pn_min_memory = acct_policy_limit_set.pn_min_memory; - } else { - error("sched: Attempt to modify pn_min_memory for " - "job %u", job_ptr->job_id); - error_code = ESLURM_ACCESS_DENIED; } } if (error_code != SLURM_SUCCESS) goto fini; if (job_specs->pn_min_tmp_disk != NO_VAL) { - if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) + + if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) { error_code = ESLURM_JOB_NOT_PENDING; - else if (authorized - || (detail_ptr->pn_min_tmp_disk - > job_specs->pn_min_tmp_disk)) { + } else { detail_ptr->pn_min_tmp_disk = job_specs->pn_min_tmp_disk; info("sched: update_job: setting job_min_tmp_disk to " "%u for job_id %u", job_specs->pn_min_tmp_disk, job_ptr->job_id); - } else { - - error("sched: Attempt to modify pn_min_tmp_disk " - "for job %u", - job_ptr->job_id); - error_code = ESLURM_ACCESS_DENIED; } } if (error_code != SLURM_SUCCESS) @@ -11513,6 +11535,8 @@ void job_fini (void) xfree(job_hash); xfree(job_array_hash_j); xfree(job_array_hash_t); + xfree(requeue_exit); + xfree(requeue_exit_hold); } /* Record the start of one job array task */ @@ -13797,48 +13821,38 @@ init_requeue_policy(void) static int32_t * _make_requeue_array(char *conf_buf, uint32_t *num) { - char *p; - char *p0; - char cnum[12]; - int cc; - int n; - int32_t *ar; + hostset_t hs; + char *tok = NULL, *end_ptr = NULL; + int32_t *ar = NULL, cc = 0; + long val; - if (conf_buf == NULL) { - *num = 0; - return NULL; - } - - info("%s: exit values: %s", __func__, conf_buf); + *num = 0; + if (conf_buf == NULL) + return ar; - p0 = p = xstrdup(conf_buf); - /* First tokenize the string removing , - */ - for (cc = 0; p[cc] != 0; cc++) { - if (p[cc] == ',') - p[cc] = ' '; + xstrfmtcat(tok, "[%s]", conf_buf); + hs = hostset_create(tok); + xfree(tok); + if (!hs) { + error("%s: exit values: %s", __func__, conf_buf); + return ar; } - /* Count the number of exit values - */ - cc = 0; - while (sscanf(p, "%s%n", cnum, &n) != EOF) { - ++cc; - p += n; - } - - ar = xmalloc(cc * sizeof(int)); + debug("%s: exit values: %s", __func__, conf_buf); - cc = 0; - p = p0; - while (sscanf(p, "%s%n", cnum, &n) != EOF) { - ar[cc] = atoi(cnum); - ++cc; - p += n; + ar = xmalloc(sizeof(int32_t) * hostset_count(hs)); + while ((tok = hostset_shift(hs))) { + val = strtol(tok, &end_ptr, 10); + if ((end_ptr[0] == '\0') && (val >= 0)) { + ar[cc++] = val; + } else { + error("%s: exit values: %s (%s)", + __func__, conf_buf, tok); + } + free(tok); } - *num = cc; - xfree(p0); + hostset_destroy(hs); return ar; } diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 6334b86df..35765ab80 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -761,7 +761,7 @@ extern int schedule(uint32_t job_limit) ListIterator job_iterator = NULL, part_iterator = NULL; List job_queue = NULL; int failed_part_cnt = 0, failed_resv_cnt = 0, job_cnt = 0; - int error_code, i, j, part_cnt, time_limit; + int error_code, i, j, part_cnt, time_limit, pend_time; uint32_t job_depth = 0; job_queue_rec_t *job_queue_rec; struct job_record *job_ptr = NULL; @@ -785,6 +785,8 @@ extern int schedule(uint32_t job_limit) static bool wiki_sched = false; static bool fifo_sched = false; static int sched_timeout = 0; + static int sched_max_job_start = 0; + static int bf_min_age_reserve = 0; static int def_job_limit = 100; static int max_jobs_per_part = 0; static int defer_rpc_cnt = 0; @@ -851,6 +853,15 @@ extern int schedule(uint32_t job_limit) batch_sched_delay = 3; } + if (sched_params && + (tmp_ptr = strstr(sched_params, "bf_min_age_reserve="))) { + bf_min_age_reserve = atoi(tmp_ptr + 19); + if (bf_min_age_reserve < 0) + bf_min_age_reserve = 0; + } else { + bf_min_age_reserve = 0; + } + if (sched_params && (tmp_ptr=strstr(sched_params, "build_queue_timeout="))) /* 01234567890123456789 */ @@ -916,12 +927,22 @@ extern int schedule(uint32_t job_limit) sched_interval = 60; } + if (sched_params && + (tmp_ptr=strstr(sched_params, "sched_max_job_start="))) + sched_max_job_start = atoi(tmp_ptr + 20); + if (sched_interval < 0) { + error("Invalid sched_max_job_start: %d", + sched_max_job_start); + sched_max_job_start = 0; + } + xfree(sched_params); sched_update = slurmctld_conf.last_update; info("SchedulerParameters=default_queue_depth=%d," - "max_rpc_cnt=%d,max_sched_time=%d,partition_job_depth=%d", + "max_rpc_cnt=%d,max_sched_time=%d,partition_job_depth=%d," + "sched_max_job_start=%d", def_job_limit, defer_rpc_cnt, sched_timeout, - max_jobs_per_part); + max_jobs_per_part, sched_max_job_start); } if ((defer_rpc_cnt > 0) && @@ -971,7 +992,7 @@ extern int schedule(uint32_t job_limit) * by the node health checker). * This relies on the above write lock for the node state. */ - if (select_g_reconfigure()) { + if (select_g_update_block(NULL)) { unlock_slurmctld(job_write_lock); debug4("sched: not scheduling due to ALPS"); goto out; @@ -1080,6 +1101,10 @@ next_task: debug("sched: loop taking too long, breaking out"); break; } + if (sched_max_job_start && (job_cnt >= sched_max_job_start)) { + debug("sched: sched_max_job_start reached, breaking out"); + break; + } if ((job_ptr->array_task_id != NO_VAL) || job_ptr->array_recs) { if ((reject_array_job_id == job_ptr->array_job_id) && @@ -1202,10 +1227,11 @@ next_task: if (job_ptr->qos_id) { slurmdb_association_rec_t *assoc_ptr; assoc_ptr = (slurmdb_association_rec_t *)job_ptr->assoc_ptr; - if (assoc_ptr && - !bit_test(assoc_ptr->usage->valid_qos, - job_ptr->qos_id) && - !job_ptr->limit_set_qos) { + if (assoc_ptr + && (accounting_enforce & ACCOUNTING_ENFORCE_QOS) + && !bit_test(assoc_ptr->usage->valid_qos, + job_ptr->qos_id) + && !job_ptr->limit_set_qos) { debug("sched: JobId=%u has invalid QOS", job_ptr->job_id); xfree(job_ptr->state_desc); @@ -1324,6 +1350,20 @@ next_task: } } + if (fail_by_part && bf_min_age_reserve) { + /* Consider other jobs in this partition if + * job has been waiting for less than + * bf_min_age_reserve time */ + if (job_ptr->details->begin_time == 0) { + fail_by_part = false; + } else { + pend_time = difftime(now, + job_ptr->details->begin_time); + if (pend_time < bf_min_age_reserve) + fail_by_part = false; + } + } + if (fail_by_part) { /* do not schedule more jobs in this partition * or on nodes in this partition */ diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index dd1190540..9a227848e 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -2290,7 +2290,9 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg, ((slurmctld_conf.ret2service == 2) || !xstrcmp(node_ptr->reason, "Scheduled reboot") || ((slurmctld_conf.ret2service == 1) && - !xstrcmp(node_ptr->reason, "Not responding")))) { + !xstrcmp(node_ptr->reason, "Not responding") && + (node_ptr->boot_time < + node_ptr->last_response)))) { if (reg_msg->job_count) { node_ptr->node_state = NODE_STATE_ALLOCATED | node_flags; @@ -2314,15 +2316,22 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg, } else if (node_ptr->last_response && (node_ptr->boot_time > node_ptr->last_response) && (slurmctld_conf.ret2service != 2)) { - if (!node_ptr->reason) { + if (!node_ptr->reason || + (node_ptr->reason && + !xstrcmp(node_ptr->reason, "Not responding"))) { + if (node_ptr->reason) + xfree(node_ptr->reason); node_ptr->reason_time = now; node_ptr->reason_uid = slurm_get_slurm_user_id(); node_ptr->reason = xstrdup( "Node unexpectedly rebooted"); } - info("Node %s unexpectedly rebooted", - reg_msg->node_name); + info("%s: Node %s unexpectedly rebooted boot_time %d" + "last response %d", + __func__, reg_msg->node_name, + (int)node_ptr->boot_time, + (int)node_ptr->last_response); _make_node_down(node_ptr, now); kill_running_job_by_node_name(reg_msg->node_name); last_node_update = now; @@ -2708,6 +2717,17 @@ extern int validate_nodes_via_front_end( if (reg_msg->energy) memcpy(node_ptr->energy, reg_msg->energy, sizeof(acct_gather_energy_t)); + + if (slurmctld_init_db && + !IS_NODE_DOWN(node_ptr) && + !IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) { + /* reason information is handled in + clusteracct_storage_g_node_up() + */ + clusteracct_storage_g_node_up( + acct_db_conn, node_ptr, now); + } + } if (reg_hostlist) { @@ -2750,14 +2770,13 @@ static void _node_did_resp(front_end_record_t *fe_ptr) time_t now = time(NULL); fe_ptr->last_response = now; -#ifndef HAVE_ALPS_CRAY - /* This is handled by the select/cray plugin */ + if (IS_NODE_NO_RESPOND(fe_ptr)) { info("Node %s now responding", fe_ptr->name); last_front_end_update = now; fe_ptr->node_state &= (~NODE_STATE_NO_RESPOND); } -#endif + node_flags = fe_ptr->node_state & NODE_STATE_FLAGS; if (IS_NODE_UNKNOWN(fe_ptr)) { last_front_end_update = now; diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 4e22f12ba..a5869b95f 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -2528,6 +2528,11 @@ static int _build_node_list(struct job_record *job_ptr, return rc; } + /* Clear message about any nodes which fail to satisfy specific + * job requirements as there are some nodes which can be used */ + if (err_msg) + xfree(*err_msg); + /* If any nodes are powered down, put them into a new node_set * record with a higher scheduling weight. This means we avoid * scheduling jobs on powered down nodes where possible. */ diff --git a/src/slurmctld/ping_nodes.c b/src/slurmctld/ping_nodes.c index 9ed86ce5e..1b2d7c6f5 100644 --- a/src/slurmctld/ping_nodes.c +++ b/src/slurmctld/ping_nodes.c @@ -62,9 +62,12 @@ /* Request that nodes re-register at most every MAX_REG_FREQUENCY pings */ #define MAX_REG_FREQUENCY 20 +/* Log an error for ping that takes more than 100 seconds to complete */ +#define PING_TIMEOUT 100 + static pthread_mutex_t lock_mutex = PTHREAD_MUTEX_INITIALIZER; static int ping_count = 0; - +static time_t ping_start = 0; /* * is_ping_done - test if the last node ping cycle has completed. @@ -74,11 +77,21 @@ static int ping_count = 0; */ bool is_ping_done (void) { + static bool ping_msg_sent = false; bool is_done = true; slurm_mutex_lock(&lock_mutex); - if (ping_count) + if (ping_count) { is_done = false; + if (!ping_msg_sent && + (difftime(time(NULL), ping_start) >= PING_TIMEOUT)) { + error("Node ping apparently hung, " + "many nodes may be DOWN or configured " + "SlurmdTimeout should be increased"); + ping_msg_sent = true; + } + } else + ping_msg_sent = false; slurm_mutex_unlock(&lock_mutex); return is_done; @@ -94,6 +107,7 @@ void ping_begin (void) { slurm_mutex_lock(&lock_mutex); ping_count++; + ping_start = time(NULL); slurm_mutex_unlock(&lock_mutex); } @@ -110,6 +124,7 @@ void ping_end (void) ping_count--; else fatal ("ping_count < 0"); + ping_start = 0; slurm_mutex_unlock(&lock_mutex); } @@ -233,11 +248,13 @@ void ping_nodes (void) (front_end_ptr->last_response >= still_live_time)) continue; - /* Do not keep pinging down nodes since this can induce - * huge delays in hierarchical communication fail-over */ - if (IS_NODE_NO_RESPOND(front_end_ptr) && - IS_NODE_DOWN(front_end_ptr)) - continue; + /* The problems that exist on a normal system with + * hierarchical communication don't exist on a + * front-end system, so it is ok to ping none + * responding or down front-end nodes. */ + /* if (IS_NODE_NO_RESPOND(front_end_ptr) && */ + /* IS_NODE_DOWN(front_end_ptr)) */ + /* continue; */ if (ping_agent_args->protocol_version > front_end_ptr->protocol_version) diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index d76def7db..ed6a20b23 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -568,7 +568,10 @@ static void _throttle_start(int *active_rpc_cnt) pthread_cond_wait(&throttle_cond, &throttle_mutex); } slurm_mutex_unlock(&throttle_mutex); - usleep(1); + if (LOTS_OF_AGENTS) + usleep(1000); + else + usleep(1); } static void _throttle_fini(int *active_rpc_cnt) { @@ -1590,6 +1593,7 @@ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg) * the epilog denoting the completion of a job it its entirety */ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) { + static int active_rpc_cnt = 0; static time_t config_update = 0; static bool defer_sched = false; DEF_TIMERS; @@ -1619,11 +1623,13 @@ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) config_update = slurmctld_conf.last_update; } + _throttle_start(&active_rpc_cnt); lock_slurmctld(job_write_lock); if (job_epilog_complete(epilog_msg->job_id, epilog_msg->node_name, epilog_msg->return_code)) run_scheduler = true; unlock_slurmctld(job_write_lock); + _throttle_fini(&active_rpc_cnt); END_TIMER2("_slurm_rpc_epilog_complete"); if (epilog_msg->return_code) @@ -1646,7 +1652,7 @@ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) * calls can be very high for large machine or large number * of managed jobs. */ - if (!defer_sched) + if (!LOTS_OF_AGENTS && !defer_sched) (void) schedule(0); /* Has own locking */ schedule_node_save(); /* Has own locking */ schedule_job_save(); /* Has own locking */ diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index e5c71d015..624f98376 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -556,7 +556,7 @@ struct job_record { * by the job, decremented while job is * completing (N/A for bluegene * systems) */ - uint16_t cr_enabled; /* specify if if Consumable Resources + uint16_t cr_enabled; /* specify if Consumable Resources * is enabled. Needed since CR deals * with a finer granularity in its * node/cpu scheduling (available cpus diff --git a/src/slurmd/slurmd/get_mach_stat.c b/src/slurmd/slurmd/get_mach_stat.c index 942940136..d7c5eb1a0 100644 --- a/src/slurmd/slurmd/get_mach_stat.c +++ b/src/slurmd/slurmd/get_mach_stat.c @@ -81,6 +81,10 @@ #include <sys/utsname.h> +#ifdef HAVE_SYS_STATVFS_H +# include <sys/statvfs.h> +#endif + #ifdef HAVE_SYS_STATFS_H # include <sys/statfs.h> #else @@ -212,7 +216,29 @@ extern int get_tmp_disk(uint32_t *tmp_disk, char *tmp_fs) { int error_code = 0; -#ifdef HAVE_SYS_VFS_H + +#if defined(HAVE_STATVFS) + struct statvfs stat_buf; + uint64_t total_size = 0; + char *tmp_fs_name = tmp_fs; + + *tmp_disk = 0; + total_size = 0; + + if (tmp_fs_name == NULL) + tmp_fs_name = "/tmp"; + if (statvfs(tmp_fs_name, &stat_buf) == 0) { + total_size = stat_buf.f_blocks * stat_buf.f_frsize; + total_size /= 1024 * 1024; + } + else if (errno != ENOENT) { + error_code = errno; + error ("get_tmp_disk: error %d executing statvfs on %s", + errno, tmp_fs_name); + } + *tmp_disk += (uint32_t)total_size; + +#elif defined(HAVE_STATFS) struct statfs stat_buf; long total_size; float page_size; diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 8405e6322..3c16d7e61 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -1983,9 +1983,10 @@ _load_job_limits(void) if (fd == -1) continue; /* step completed */ - if (!stepd_get_mem_limits(fd, stepd->protocol_version, - &stepd_mem_info)) { - error("Error reading step %u.%u memory limits", + if (stepd_get_mem_limits(fd, stepd->protocol_version, + &stepd_mem_info) != SLURM_SUCCESS) { + error("Error reading step %u.%u memory limits from " + "slurmstepd", stepd->jobid, stepd->stepid); close(fd); continue; diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index eac93d1ac..c334e884b 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -2049,8 +2049,8 @@ static int _memory_spec_init(void) pid_t pid; if (conf->mem_spec_limit == 0) { - info ("Resource spec: system memory limit not configured " - "for this node"); + info ("Resource spec: Reserved system memory limit not " + "configured for this node"); return SLURM_SUCCESS; } if (init_system_memory_cgroup() != SLURM_SUCCESS) { diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c index 1fffe4bdc..4bffdba12 100644 --- a/src/slurmdbd/proc_req.c +++ b/src/slurmdbd/proc_req.c @@ -2777,10 +2777,15 @@ static int _node_state(slurmdbd_conn_t *slurmdbd_conn, _node_state_string(node_state_msg->new_state), node_state_msg->reason, (long)node_state_msg->event_time); + /* clusteracct_storage_g_node_up can change the reason + * field so copy it to avoid memory issues. + */ + node_ptr.reason = xstrdup(node_state_msg->reason); rc = clusteracct_storage_g_node_up( slurmdbd_conn->db_conn, &node_ptr, node_state_msg->event_time); + xfree(node_ptr.reason); } else { debug2("DBD_NODE_STATE: NODE:%s STATE:%s " "REASON:%s UID:%u TIME:%ld", @@ -3450,6 +3455,8 @@ static int _remove_res(slurmdbd_conn_t *slurmdbd_conn, pack16((uint16_t) DBD_GOT_LIST, *out_buffer); slurmdbd_pack_list_msg(&list_msg, slurmdbd_conn->rpc_version, DBD_GOT_LIST, *out_buffer); + FREE_NULL_LIST(list_msg.my_list); + return rc; } diff --git a/src/squeue/print.c b/src/squeue/print.c index d90858cd3..9a21f217b 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -351,11 +351,15 @@ int print_job_from_format(squeue_job_rec_t *job_rec_ptr, List list) xfree(job_rec_ptr->job_ptr->partition); job_rec_ptr->job_ptr->partition = xstrdup(job_rec_ptr-> part_name); - + } if (job_rec_ptr->job_ptr->array_task_str && params.array_flag) { + char *p; + if (max_array_size == -1) max_array_size = slurm_get_max_array_size(); + if ((p = strchr(job_rec_ptr->job_ptr->array_task_str, '%'))) + *p = 0; bitmap = bit_alloc(max_array_size); bit_unfmt(bitmap, job_rec_ptr->job_ptr->array_task_str); xfree(job_rec_ptr->job_ptr->array_task_str); diff --git a/src/sreport/cluster_reports.c b/src/sreport/cluster_reports.c index 9b7f2bc39..c1217511e 100644 --- a/src/sreport/cluster_reports.c +++ b/src/sreport/cluster_reports.c @@ -508,11 +508,17 @@ static int _setup_print_fields_list(List format_list) else field->len = 15; field->print_routine = print_fields_str; - } else if (!strncasecmp("Energy", object, MAX(command_len, 1))) { + } else if (!strncasecmp("Energy", object, + MAX(command_len, 1))) { field->type = PRINT_CLUSTER_ENERGY; field->name = xstrdup("Energy"); - field->len = 10; - field->print_routine = print_fields_int; + if (time_format == SLURMDB_REPORT_TIME_SECS_PER + || time_format == SLURMDB_REPORT_TIME_MINS_PER + || time_format == SLURMDB_REPORT_TIME_HOURS_PER) + field->len = 18; + else + field->len = 10; + field->print_routine = slurmdb_report_print_time; } else { exit_code=1; fprintf(stderr, " Unknown field '%s'\n", object); @@ -742,14 +748,17 @@ extern int cluster_account_by_user(int argc, char *argv[]) field->print_routine( field, slurmdb_report_assoc->cpu_secs, - slurmdb_report_cluster->cpu_secs, + slurmdb_report_cluster-> + cpu_secs, (curr_inx == field_count)); break; case PRINT_CLUSTER_ENERGY: field->print_routine( field, - slurmdb_report_assoc->consumed_energy, - slurmdb_report_cluster->consumed_energy, + slurmdb_report_assoc-> + consumed_energy, + slurmdb_report_cluster-> + consumed_energy, (curr_inx == field_count)); break; default: @@ -903,14 +912,17 @@ extern int cluster_user_by_account(int argc, char *argv[]) field->print_routine( field, slurmdb_report_user->cpu_secs, - slurmdb_report_cluster->cpu_secs, + slurmdb_report_cluster-> + cpu_secs, (curr_inx == field_count)); break; case PRINT_CLUSTER_ENERGY: field->print_routine( field, - slurmdb_report_user->consumed_energy, - slurmdb_report_cluster->consumed_energy, + slurmdb_report_user-> + consumed_energy, + slurmdb_report_cluster-> + consumed_energy, (curr_inx == field_count)); break; default: @@ -1064,14 +1076,17 @@ extern int cluster_user_by_wckey(int argc, char *argv[]) field->print_routine( field, slurmdb_report_user->cpu_secs, - slurmdb_report_cluster->cpu_secs, + slurmdb_report_cluster-> + cpu_secs, (curr_inx == field_count)); break; case PRINT_CLUSTER_ENERGY: field->print_routine( field, - slurmdb_report_user->consumed_energy, - slurmdb_report_cluster->consumed_energy, + slurmdb_report_user-> + consumed_energy, + slurmdb_report_cluster-> + consumed_energy, (curr_inx == field_count)); break; @@ -1235,6 +1250,7 @@ extern int cluster_utilization(int argc, char *argv[]) break; case PRINT_CLUSTER_ENERGY: field->print_routine(field, + total_acct.consumed_energy, total_acct.consumed_energy, (curr_inx == field_count)); diff --git a/src/sreport/user_reports.c b/src/sreport/user_reports.c index c35780079..3c09c3da4 100644 --- a/src/sreport/user_reports.c +++ b/src/sreport/user_reports.c @@ -225,8 +225,13 @@ static int _setup_print_fields_list(List format_list) }else if (!strncasecmp("Energy", object, MAX(command_len, 1))) { field->type = PRINT_USER_ENERGY; field->name = xstrdup("Energy"); - field->len = 10; - field->print_routine = print_fields_int; + if (time_format == SLURMDB_REPORT_TIME_SECS_PER + || time_format == SLURMDB_REPORT_TIME_MINS_PER + || time_format == SLURMDB_REPORT_TIME_HOURS_PER) + field->len = 18; + else + field->len = 10; + field->print_routine = slurmdb_report_print_time; } else { exit_code=1; fprintf(stderr, " Unknown field '%s'\n", object); @@ -372,13 +377,17 @@ extern int user_top(int argc, char *argv[]) field->print_routine( field, slurmdb_report_user->cpu_secs, - slurmdb_report_cluster->cpu_secs, + slurmdb_report_cluster-> + cpu_secs, (curr_inx == field_count)); break; case PRINT_USER_ENERGY: field->print_routine( field, - slurmdb_report_user->consumed_energy, + slurmdb_report_user-> + consumed_energy, + slurmdb_report_cluster-> + consumed_energy, (curr_inx ==field_count)); break; default: diff --git a/src/srun/libsrun/opt.c b/src/srun/libsrun/opt.c index a5a9db7aa..d3c5d8837 100644 --- a/src/srun/libsrun/opt.c +++ b/src/srun/libsrun/opt.c @@ -1778,7 +1778,8 @@ static void _opt_args(int argc, char **argv) #endif /* make sure we have allocated things correctly */ - xassert((command_pos + command_args) <= opt.argc); + if (command_args) + xassert((command_pos + command_args) <= opt.argc); for (i = command_pos; i < opt.argc; i++) { if (!rest || !rest[i-command_pos]) diff --git a/src/sshare/process.c b/src/sshare/process.c index 6cd0b4539..873bcdefd 100644 --- a/src/sshare/process.c +++ b/src/sshare/process.c @@ -42,7 +42,7 @@ extern int long_flag; -extern int process(shares_response_msg_t *resp) +extern int process(shares_response_msg_t *resp, uint16_t options) { uint32_t flags = slurmctld_conf.priority_flags; int rc = SLURM_SUCCESS; @@ -213,6 +213,9 @@ extern int process(shares_response_msg_t *resp) char *tmp_char = NULL; char *local_acct = NULL; + if ((options & PRINT_USERS_ONLY) && share->user == 0) + continue; + while ((field = list_next(itr2))) { switch(field->type) { case PRINT_ACCOUNT: diff --git a/src/sshare/sshare.c b/src/sshare/sshare.c index 8e95aeea2..5315bf6bf 100644 --- a/src/sshare/sshare.c +++ b/src/sshare/sshare.c @@ -70,6 +70,7 @@ main (int argc, char *argv[]) char *temp = NULL; int option_index; bool all_users = 0; + uint16_t options; static struct option long_options[] = { {"accounts", 1, 0, 'A'}, @@ -81,6 +82,7 @@ main (int argc, char *argv[]) {"parsable", 0, 0, 'p'}, {"parsable2",0, 0, 'P'}, {"users", 1, 0, 'u'}, + {"Users", 0, 0, 'U'}, {"verbose", 0, 0, 'v'}, {"version", 0, 0, 'V'}, {"help", 0, 0, OPT_LONG_HELP}, @@ -96,7 +98,7 @@ main (int argc, char *argv[]) slurm_conf_init(NULL); log_init("sshare", opts, SYSLOG_FACILITY_DAEMON, NULL); - while((opt_char = getopt_long(argc, argv, "aA:hlM:npPqu:t:vV", + while((opt_char = getopt_long(argc, argv, "aA:hlM:npPqUu:t:vV", long_options, &option_index)) != -1) { switch (opt_char) { case (int)'?': @@ -153,6 +155,9 @@ main (int argc, char *argv[]) list_create(slurm_destroy_char); _addto_name_char_list(req_msg.user_list, optarg, 0); break; + case 'U': + options |= PRINT_USERS_ONLY; + break; case 'v': quiet_flag = -1; verbosity++; @@ -207,11 +212,13 @@ main (int argc, char *argv[]) } if (req_msg.acct_list && list_count(req_msg.acct_list)) { - fprintf(stderr, "Accounts requested:\n"); - ListIterator itr = list_iterator_create(req_msg.acct_list); - while((temp = list_next(itr))) - fprintf(stderr, "\t: %s\n", temp); - list_iterator_destroy(itr); + if (verbosity) { + fprintf(stderr, "Accounts requested:\n"); + ListIterator itr = list_iterator_create(req_msg.acct_list); + while((temp = list_next(itr))) + fprintf(stderr, "\t: %s\n", temp); + list_iterator_destroy(itr); + } } else { if (req_msg.acct_list && list_count(req_msg.acct_list)) { @@ -236,7 +243,7 @@ main (int argc, char *argv[]) } /* do stuff with it */ - process(resp_msg); + process(resp_msg, options); slurm_free_shares_response_msg(resp_msg); diff --git a/src/sshare/sshare.h b/src/sshare/sshare.h index 324050251..3cf9964f7 100644 --- a/src/sshare/sshare.h +++ b/src/sshare/sshare.h @@ -87,6 +87,10 @@ #define CKPT_WAIT 10 #define MAX_INPUT_FIELDS 128 +/* Print only the users and not the hierarchy. + */ +#define PRINT_USERS_ONLY 0x01 + typedef enum { SSHARE_TIME_SECS, SSHARE_TIME_MINS, @@ -100,6 +104,6 @@ extern sshare_time_format_t time_format; extern char *time_format_string; extern List clusters; -extern int process(shares_response_msg_t *msg); +extern int process(shares_response_msg_t *msg, uint16_t options); #endif diff --git a/src/sview/sview.c b/src/sview/sview.c index c8626946d..75c42a117 100644 --- a/src/sview/sview.c +++ b/src/sview/sview.c @@ -1421,7 +1421,7 @@ extern void toggle_tab_visiblity(GtkToggleButton *toggle_button, return; } -extern void tab_pressed(GtkWidget *widget, GdkEventButton *event, +extern gboolean tab_pressed(GtkWidget *widget, GdkEventButton *event, display_data_t *display_data) { signal_params_t signal_params; @@ -1431,10 +1431,10 @@ extern void tab_pressed(GtkWidget *widget, GdkEventButton *event, /* single click with the right mouse button? */ gtk_notebook_set_current_page(GTK_NOTEBOOK(main_notebook), display_data->extra); - if ((display_data->extra != TAB_PAGE) && (event->button == 3)) { + if ((display_data->extra != TAB_PAGE) && (event->button == 3)) right_button_pressed(NULL, NULL, event, &signal_params, TAB_CLICKED); - } + return true; } extern void close_tab(GtkWidget *widget, GdkEventButton *event, diff --git a/src/sview/sview.h b/src/sview/sview.h index 5f8a8e2f9..07c2c4ee7 100644 --- a/src/sview/sview.h +++ b/src/sview/sview.h @@ -368,8 +368,8 @@ extern void print_grid(int dir); extern void refresh_main(GtkAction *action, gpointer user_data); extern void toggle_tab_visiblity(GtkToggleButton *toggle_button, display_data_t *display_data); -extern void tab_pressed(GtkWidget *widget, GdkEventButton *event, - display_data_t *display_data); +extern gboolean tab_pressed(GtkWidget *widget, GdkEventButton *event, + display_data_t *display_data); extern void close_tab(GtkWidget *widget, GdkEventButton *event, display_data_t *display_data); diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am index e2d2e4c51..7b915e4c1 100644 --- a/testsuite/expect/Makefile.am +++ b/testsuite/expect/Makefile.am @@ -409,6 +409,7 @@ EXTRA_DIST = \ test20.9 \ test20.10 \ test20.11 \ + test20.12 \ test21.1 \ test21.2 \ test21.3 \ diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in index 1f9001eb7..033cfd52e 100644 --- a/testsuite/expect/Makefile.in +++ b/testsuite/expect/Makefile.in @@ -793,6 +793,7 @@ EXTRA_DIST = \ test20.9 \ test20.10 \ test20.11 \ + test20.12 \ test21.1 \ test21.2 \ test21.3 \ diff --git a/testsuite/expect/README b/testsuite/expect/README index 9687c124d..bf1f0289f 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -585,6 +585,7 @@ test20.8 Test the qalter -N option for changing the name of a job. test20.9 Test for the qalter --man option test20.10 Test for the qrerun --help option test20.11 Test for the qrerun --man option +test20.12 Test for qsub -V/-v arguments and their interaction with sbatch scripts test21.# Testing of sacctmgr commands and options. diff --git a/testsuite/expect/test20.12 b/testsuite/expect/test20.12 new file mode 100755 index 000000000..883493969 --- /dev/null +++ b/testsuite/expect/test20.12 @@ -0,0 +1,154 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test for the qsub -V and -v options +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +source ./globals + +set test_id "20.12" +set file_in "test$test_id.input" +set file_out "test$test_id.output" +set exit_code 0 +set variable1_value "DOYOUWANTTO" +set variable2_value "BUILDASNOWMAN" + +print_header $test_id + +if {[file executable $qsub] == 0} { + send_user "\nWARNING: qsub was not found\n" + exit 0 +} + +proc make_random_var_name {} { + set randvar "RANDOM_VARIABLE_" + append randvar [expr int(rand() * 1000 + 1000 )] + return $randvar +} + +proc run_qsub { variable1_name variable1_expect_val variable2_name variable2_expect_val qsub_extra_options extra_batch_header } { + global file_in + global file_out + global exit_code + global test_var_value + global bin_cat + global bin_echo + global bin_rm + global qsub + global number + global env + global variable1_value + global variable2_value + global subtest_name + + send_user "\n==============================================\n" + send_user "$subtest_name\n" + send_user "==============================================\n" + + set job_id 0 + exec rm -f $file_out + make_bash_script $file_in "$extra_batch_header\n$bin_echo $variable1_name=\$$variable1_name\n$bin_echo $variable2_name=\$$variable2_name\n" + + # Set our 2 test variables + set env($variable1_name) $variable1_value + set env($variable2_name) $variable2_value + + set qsub_base_options [concat -l walltime=1:00 -o $file_out $file_in] + set qsub_options [concat $qsub_extra_options $qsub_base_options] + + eval spawn $qsub [lrange $qsub_options 0 end] + + unset env($variable1_name) + unset env($variable2_name) + + expect { + -re "($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: qsub not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$job_id == 0} { + send_user "\nFAILURE: qsub failed to submit job\n" + exit 1 + } + + if {[wait_for_file $file_out] != 0} { + send_user "\nFAILURE: qsub output file not found\n" + catch {exec $qdel $job_id} + exit 1 + } + set matches 0 + spawn $bin_cat $file_out + expect { + "$variable1_name=$variable1_expect_val\r\n" { + incr matches + exp_continue + } + "$variable2_name=$variable2_expect_val\r\n" { + incr matches + exp_continue + } + eof { + wait + } + } + + if {$matches != 2} { + send_user "\nFAILURE: one or more environment variables failed to match their expected values for sub test '$subtest_name'\n" + set exit_code 1 + } + + if {$exit_code == 0} { + exec $bin_rm -f $file_in $file_out + } else { + exit $exit_code + } +} + +# Test -V +set subtest_name "Test -V" +run_qsub [make_random_var_name] $variable1_value [make_random_var_name] $variable2_value "-V" "" + +# Test -V w/sbatch --export=NONE +set subtest_name "Test -V w/sbatch --export=NONE" +run_qsub [make_random_var_name] $variable1_value [make_random_var_name] $variable2_value "-V" "#SBATCH --export=NONE" + +# Test -V, -v var +set subtest_name "Test -V, -v var" +set var1_name [make_random_var_name] +set var2_name [make_random_var_name] +run_qsub $var1_name $variable1_value $var2_name $variable2_value "-V -v $var1_name" "" +unset var1_name var2_name + +# Test -V, -v var w/sbatch --export=NONE +set subtest_name "Test -V, -v var w/sbatch --export=NONE" +set var1_name [make_random_var_name] +set var2_name [make_random_var_name] +run_qsub $var1_name $variable1_value $var2_name $variable2_value "-V -v $var1_name" "#SBATCH --export=NONE" +unset var1_name var2_name + +# Test -V, -v var=val +set subtest_name "Test -V, -v var=val" +set var1_name [make_random_var_name] +set var2_name [make_random_var_name] +run_qsub $var1_name "LETITGOOO" $var2_name $variable2_value "-V -v $var1_name=LETITGOOO" "" +unset var1_name var2_name + +# Test -V, -v var=val w/sbatch --export=NONE +set subtest_name "Test -V, -v var=val w/sbatch --export=NONE" +set var1_name [make_random_var_name] +set var2_name [make_random_var_name] +run_qsub $var1_name "LETITGOOO" $var2_name $variable2_value "-V -v $var1_name=LETITGOOO" "#SBATCH --export=NONE" + +send_user "\nSUCCESS\n" +exit 0 diff --git a/testsuite/expect/test24.1.prog.c b/testsuite/expect/test24.1.prog.c index 177e4f280..564407770 100644 --- a/testsuite/expect/test24.1.prog.c +++ b/testsuite/expect/test24.1.prog.c @@ -329,7 +329,7 @@ int main (int argc, char **argv) sleep(1); memset(&resp, 0, sizeof(shares_response_msg_t)); resp.assoc_shares_list = assoc_mgr_get_shares(NULL, 0, NULL, NULL); - process(&resp); + process(&resp, 0); /* free memory */ if (slurm_priority_fini() != SLURM_SUCCESS) diff --git a/testsuite/expect/test24.3.prog.c b/testsuite/expect/test24.3.prog.c index 1bc9692c4..34b4f708d 100644 --- a/testsuite/expect/test24.3.prog.c +++ b/testsuite/expect/test24.3.prog.c @@ -437,7 +437,7 @@ int main (int argc, char **argv) sleep(1); memset(&resp, 0, sizeof(shares_response_msg_t)); resp.assoc_shares_list = assoc_mgr_get_shares(NULL, 0, NULL, NULL); - process(&resp); + process(&resp, 0); /* free memory */ if (slurm_priority_fini() != SLURM_SUCCESS) diff --git a/testsuite/expect/test24.4.prog.c b/testsuite/expect/test24.4.prog.c index 2d4b3f4ec..e6863538a 100644 --- a/testsuite/expect/test24.4.prog.c +++ b/testsuite/expect/test24.4.prog.c @@ -490,7 +490,7 @@ int main (int argc, char **argv) sleep(1); memset(&resp, 0, sizeof(shares_response_msg_t)); resp.assoc_shares_list = assoc_mgr_get_shares(NULL, 0, NULL, NULL); - process(&resp); + process(&resp, 0); /* free memory */ if (slurm_priority_fini() != SLURM_SUCCESS) diff --git a/testsuite/expect/test7.11 b/testsuite/expect/test7.11 index ed3adfb25..7ea39f099 100755 --- a/testsuite/expect/test7.11 +++ b/testsuite/expect/test7.11 @@ -91,7 +91,7 @@ if {[file exists $spank_conf_file]} { expect { -re "test${test_id}" { send_user "\nFAILURE: spank plugin includes vestigial test${test_id}\n" - send_user " You probably should manually remove it from $spank_conf_file.\n" + send_user " You probably should manually remove it from $spank_conf_file\n" send_user " It was probably left over from some previous test failure.\n" exit 1 } -- GitLab