diff --git a/META b/META index f881ff2f76657ca3e38aa547e81e2019a7775852..3bc0fd1c42755aba71b63fbc944784a70a08cf49 100644 --- a/META +++ b/META @@ -9,8 +9,8 @@ Name: slurm Major: 14 Minor: 03 - Micro: 6 - Version: 14.03.6 + Micro: 7 + Version: 14.03.7 Release: 1 ## diff --git a/NEWS b/NEWS index c29d21213b516ab6be63b4c3f85b83cd13eb4d79..4e8c2019cb61bde27ee184496ef3dbd823f1e84e 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,82 @@ This file describes changes in recent versions of Slurm. It primarily documents those changes that are of interest to users and admins. +* Changes in Slurm 14.03.7 +========================== + -- Correct typos in man pages. + -- Add note to MaxNodesPerUser and multiple jobs running on the same node + counting as multiple nodes. + -- PerlAPI - fix renamed call from slurm_api_set_conf_file to + slurm_conf_reinit. + -- Fix gres race condition that could result in job deallocation error message. + -- Correct NumCPUs count for jobs with --exclusive option. + -- When creating reservation with CoreCnt, check that Slurm uses + SelectType=select/cons_res, otherwise don't send the request to slurmctld + and return an error. + -- Save the state of scheduled node reboots so they will not be lost should the + slurmctld restart. + -- In select/cons_res plugin - Insure the node count does not exceed the task + count. + -- switch/nrt - Unload tables rather than windows at job end, to release CAU. + -- When HealthCheckNodeState is configured as IDLE don't run the + HealthCheckProgram for nodes in any other states than IDLE. + -- Minor sanity check to verify the string sent in isn't NULL when using + bit_unfmt. + -- CRAY NATIVE - Fix issue on heavy systems to only run the NHC once per + job/step completion. + -- Remove unneeded step cleanup for pending steps. + -- Fix issue where if a batch job was manually requeued the batch step + information wasn't stored in accounting. + -- When job is release from a requeue hold state clean up its previous + exit code. + -- Correct the srun man page about how the output from the user application + is sent to srun. + -- Increase the timeout of the main thread while waiting for the i/o thread. + Allow up to 180 seconds for the i/o thread to complete. + -- When using sacct -c to read the job completion data compute the correct + job elapsed time. + -- Perl package: Define some missing node states. + -- When using AccountingStorageType=accounting_storage/mysql zero out the + database index for the array elements avoiding duplicate database values. + -- Reword the explanation of cputime and cputimeraw in the sacct man page. + -- JobCompType allows "jobcomp/mysql" as valid name but the code used + "job_comp/mysql" setting an incorrect default database. + -- Try to load libslurm.so only when necessary. + -- When nodes scheduled for reboot, set state to DOWN rather than FUTURE so + they are still visible to sinfo. State set to IDLE after reboot completes. + -- Apply BatchStartTimeout configuration to task launch and avoid aborting + srun commands due to long running Prolog scripts. + -- Fix minor memory leaks when freeing node_info_t structure. + -- Fix various memory leaks in sview + -- If a batch script is requeued and running steps get correct exit code/signal + previous it was always -2. + -- If step exitcode hasn't been set display with sacct the -2 instead + of acting like it is a signal and exitcode. + -- Send calculated step_rc for batch step instead of raw status as + done for normal steps. + -- If a job times out, set the exit code in accounting to 1 instead of the + signal 1. + -- Update the acct_gather.conf.5 man page removing the reference to + InfinibandOFEDFrequency. + -- Fix gang scheduling for jobs submitted to multiple partitions. + -- Enable srun to submit job to multiple partitions. + -- Update slurm.conf man page. When Epilog or Prolog fail the node state + is set ro DRAIN. + -- Start a job in the highest priority partition possible, even if it requires + preempting other jobs and delaying initiation, rather than using a lower + priority partition. Previous logic would preempt lower priority jobs, but + then might start the job in a lower priority partition and not use the + resources released by the preempted jobs. + -- Fix SelectTypeParameters=CR_PACK_NODES for srun making both job and step + resource allocation. + -- BGQ - Make it possible to pack multiple tasks on a core when not using + the entire cnode. + -- MYSQL - if unable to connect to mysqld close connection that was inited. + -- DBD - when connecting make sure we wait MessageTimeout + 5 since the + timeout when talking to the Database is the same timeout so a race + condition could occur in the requesting client when receiving the response + if the database is unresponsive. + * Changes in Slurm 14.03.6 ========================== -- Added examples to demonstrate the use of the sacct -T option to the man diff --git a/auxdir/Makefile.am b/auxdir/Makefile.am index c4ee546a97a1524222802bcba978cc5c5185fd10..e493002488521afe78ab57bfe32dcbf8ca935cd9 100644 --- a/auxdir/Makefile.am +++ b/auxdir/Makefile.am @@ -7,6 +7,7 @@ EXTRA_DIST = \ acx_pthread.m4 \ slurm.m4 \ + test-driver \ type_socklen_t.m4 \ x_ac__system_configuration.m4 \ x_ac_affinity.m4 \ diff --git a/auxdir/Makefile.in b/auxdir/Makefile.in index d29775c589e2cc54ab43f7e40031fd4a038e0709..69c63957063eb20bba1035aa12dd0b5de75f2daf 100644 --- a/auxdir/Makefile.in +++ b/auxdir/Makefile.in @@ -388,6 +388,7 @@ top_srcdir = @top_srcdir@ EXTRA_DIST = \ acx_pthread.m4 \ slurm.m4 \ + test-driver \ type_socklen_t.m4 \ x_ac__system_configuration.m4 \ x_ac_affinity.m4 \ diff --git a/auxdir/test-driver b/auxdir/test-driver new file mode 100755 index 0000000000000000000000000000000000000000..d30605660a0612aa12702dd7e0d0a3c86e7f7dad --- /dev/null +++ b/auxdir/test-driver @@ -0,0 +1,139 @@ +#! /bin/sh +# test-driver - basic testsuite driver script. + +scriptversion=2013-07-13.22; # UTC + +# Copyright (C) 2011-2013 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to <bug-automake@gnu.org> or send patches to +# <automake-patches@gnu.org>. + +# Make unconditional expansion of undefined variables an error. This +# helps a lot in preventing typo-related bugs. +set -u + +usage_error () +{ + echo "$0: $*" >&2 + print_usage >&2 + exit 2 +} + +print_usage () +{ + cat <<END +Usage: + test-driver --test-name=NAME --log-file=PATH --trs-file=PATH + [--expect-failure={yes|no}] [--color-tests={yes|no}] + [--enable-hard-errors={yes|no}] [--] + TEST-SCRIPT [TEST-SCRIPT-ARGUMENTS] +The '--test-name', '--log-file' and '--trs-file' options are mandatory. +END +} + +test_name= # Used for reporting. +log_file= # Where to save the output of the test script. +trs_file= # Where to save the metadata of the test run. +expect_failure=no +color_tests=no +enable_hard_errors=yes +while test $# -gt 0; do + case $1 in + --help) print_usage; exit $?;; + --version) echo "test-driver $scriptversion"; exit $?;; + --test-name) test_name=$2; shift;; + --log-file) log_file=$2; shift;; + --trs-file) trs_file=$2; shift;; + --color-tests) color_tests=$2; shift;; + --expect-failure) expect_failure=$2; shift;; + --enable-hard-errors) enable_hard_errors=$2; shift;; + --) shift; break;; + -*) usage_error "invalid option: '$1'";; + *) break;; + esac + shift +done + +missing_opts= +test x"$test_name" = x && missing_opts="$missing_opts --test-name" +test x"$log_file" = x && missing_opts="$missing_opts --log-file" +test x"$trs_file" = x && missing_opts="$missing_opts --trs-file" +if test x"$missing_opts" != x; then + usage_error "the following mandatory options are missing:$missing_opts" +fi + +if test $# -eq 0; then + usage_error "missing argument" +fi + +if test $color_tests = yes; then + # Keep this in sync with 'lib/am/check.am:$(am__tty_colors)'. + red='[0;31m' # Red. + grn='[0;32m' # Green. + lgn='[1;32m' # Light green. + blu='[1;34m' # Blue. + mgn='[0;35m' # Magenta. + std='[m' # No color. +else + red= grn= lgn= blu= mgn= std= +fi + +do_exit='rm -f $log_file $trs_file; (exit $st); exit $st' +trap "st=129; $do_exit" 1 +trap "st=130; $do_exit" 2 +trap "st=141; $do_exit" 13 +trap "st=143; $do_exit" 15 + +# Test script is run here. +"$@" >$log_file 2>&1 +estatus=$? +if test $enable_hard_errors = no && test $estatus -eq 99; then + estatus=1 +fi + +case $estatus:$expect_failure in + 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; + 0:*) col=$grn res=PASS recheck=no gcopy=no;; + 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; + 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; + *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; + *:*) col=$red res=FAIL recheck=yes gcopy=yes;; +esac + +# Report outcome to console. +echo "${col}${res}${std}: $test_name" + +# Register the test result, and other relevant metadata. +echo ":test-result: $res" > $trs_file +echo ":global-test-result: $res" >> $trs_file +echo ":recheck: $recheck" >> $trs_file +echo ":copy-in-global-log: $gcopy" >> $trs_file + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c index 901704d5ee17f0df1ae0b969ffe55260c751490b..debd0e3fc8d1dce22404c8d28e742a6ae4643633 100644 --- a/contribs/pam/pam_slurm.c +++ b/contribs/pam/pam_slurm.c @@ -422,7 +422,9 @@ extern void libpam_slurm_init (void) SLURM_API_REVISION, SLURM_API_AGE) >= sizeof(libslurmname) ) { _log_msg (LOG_ERR, "Unable to write libslurmname\n"); - } else if (!(slurm_h = dlopen(libslurmname, RTLD_NOW|RTLD_GLOBAL))) { + } else if ((slurm_h = dlopen(libslurmname, RTLD_NOW|RTLD_GLOBAL))) { + return; + } else { _log_msg (LOG_INFO, "Unable to dlopen %s: %s\n", libslurmname, dlerror ()); } @@ -430,7 +432,9 @@ extern void libpam_slurm_init (void) if (snprintf(libslurmname, sizeof(libslurmname), "libslurm.so.%d", SLURM_API_CURRENT) >= sizeof(libslurmname) ) { _log_msg (LOG_ERR, "Unable to write libslurmname\n"); - } else if (!(slurm_h = dlopen(libslurmname, RTLD_NOW|RTLD_GLOBAL))) { + } else if ((slurm_h = dlopen(libslurmname, RTLD_NOW|RTLD_GLOBAL))) { + return; + } else { _log_msg (LOG_INFO, "Unable to dlopen %s: %s\n", libslurmname, dlerror ()); } diff --git a/contribs/perlapi/libslurm/perl/Slurm.xs b/contribs/perlapi/libslurm/perl/Slurm.xs index 4e40f284e99efeabd4f8e5ac938c9dda473189bf..cf7730e4370f1011efb7058a14b502fb5712de40 100644 --- a/contribs/perlapi/libslurm/perl/Slurm.xs +++ b/contribs/perlapi/libslurm/perl/Slurm.xs @@ -52,7 +52,7 @@ slurm_t slurm_new(char *conf_file=NULL) CODE: if(conf_file) { - slurm_api_set_conf_file(conf_file); + slurm_conf_reinit(conf_file); } RETVAL = new_slurm(); if (RETVAL == NULL) { diff --git a/contribs/perlapi/libslurm/perl/lib/Slurm/Constant.pm b/contribs/perlapi/libslurm/perl/lib/Slurm/Constant.pm index 63ba80349e65994e7a038e9dd1ba5c7cdec4154f..3208e6ed6cd53fa7f8770e9e20fe78739fa5a6bf 100644 --- a/contribs/perlapi/libslurm/perl/lib/Slurm/Constant.pm +++ b/contribs/perlapi/libslurm/perl/lib/Slurm/Constant.pm @@ -174,9 +174,17 @@ This package export constants for use with Slurm. This includes enumerations and =over 2 -=item * NODE_STATE_BASE 0x00ff +=item * NODE_STATE_BASE 0x000f -=item * NODE_STATE_FLAGS 0xff00 +=item * NODE_STATE_FLAGS 0xfff0 + +=item * NODE_STATE_NET 0x0010 + +=item * NODE_STATE_RES 0x0020 + +=item * NODE_STATE_UNDRAIN 0x0040 + +=item * NODE_STATE_CLOUD 0x0080 =item * NODE_RESUME 0x0100 diff --git a/contribs/perlapi/libslurm/perl/slurm-perl.h b/contribs/perlapi/libslurm/perl/slurm-perl.h index b8c8a2cedd1eef7de4a234d56e7ae034f84c0e68..e89b376aba98242c59920c5307c2d4e20a092d95 100644 --- a/contribs/perlapi/libslurm/perl/slurm-perl.h +++ b/contribs/perlapi/libslurm/perl/slurm-perl.h @@ -16,7 +16,7 @@ extern void slurm_xfree(void **, const char *, int, const char *); extern void *slurm_xmalloc(size_t, const char *, int, const char *); -extern void slurm_api_set_conf_file(char *pathname); +extern void slurm_conf_reinit(char *pathname); extern void slurm_api_clear_config(void); extern void slurm_list_iterator_destroy(ListIterator itr); diff --git a/doc/html/download.shtml b/doc/html/download.shtml index 1ffead136f7a73539fb84395e58451d32df0dcd3..e341f1a88bf72ef5e1732665ce9ee619ca70de99 100644 --- a/doc/html/download.shtml +++ b/doc/html/download.shtml @@ -1,21 +1,21 @@ <!--#include virtual="header.txt"--> <h1>Download</h1> -<p>SLURM source can be downloaded from +<p>Slurm source can be downloaded from <a href="http://www.schedmd.com/#repos"> http://www.schedmd.com/#repos</a><br> -SLURM has also been packaged for +Slurm has also been packaged for <a href="http://packages.debian.org/src:slurm-llnl">Debian</a> and <a href="http://packages.ubuntu.com/src:slurm-llnl">Ubuntu</a> (both named <i>slurm-llnl</i>).</p> -<p>A <a href="http://www.bsc.es/marenostrum-support-services/services/slurm-simulator">SLURM simulator</a> +<p>A <a href="http://www.bsc.es/marenostrum-support-services/services/slurm-simulator">Slurm simulator</a> is available to assess various scheduling policies. Under simulation jobs are not actually executed. Instead a job execution trace from a real system or a synthetic trace are used.</p> <!-- -SLURM interface to PHP +Slurm interface to PHP https://github.com/jcftang/slurm/commits/php-slurm http://thammuz.tchpc.tcd.ie/mirrors/php-slurm/1.0/ Development by Peter Vermeulen with help from staff of @@ -31,7 +31,7 @@ a message.</li> <ul> <li><a href="http://www.theether.org/authd">authd</a></li> <li><b>MUNGE</b> (recommended)<br> -In order to compile the "auth/munge" authentication plugin for SLURM, +In order to compile the "auth/munge" authentication plugin for Slurm, you will need to build and install MUNGE, available from <a href="https://code.google.com/p/munge/">https://code.google.com/p/munge/</a> and <a href="http://packages.debian.org/src:munge">Debian</a> and @@ -39,13 +39,13 @@ you will need to build and install MUNGE, available from <a href="http://packages.ubuntu.com/src:munge">Ubuntu</a>.</li> </ul><br> -<li><b>Authentication</b> tools for users that work with SLURM.</li> +<li><b>Authentication</b> tools for users that work with Slurm.</li> <ul> <li><a href="http://sourceforge.net/projects/auks/">AUKS</a><br> AUKS is an utility designed to ease Kerberos V credential support addition -to non-interactive applications, like batch systems (SLURM, LSF, Torque, etc.). -It includes a plugin for the SLURM workload manager. AUKS is not used as -an authentication plugin by the SLURM code itself, but provides a mechanism +to non-interactive applications, like batch systems (Slurm, LSF, Torque, etc.). +It includes a plugin for the Slurm workload manager. AUKS is not used as +an authentication plugin by the Slurm code itself, but provides a mechanism for the application to manage Kerberos V credentials.</li> </ul><br> @@ -85,17 +85,17 @@ Download it from <a href="http://www.openssl.org/">http://www.openssl.org/</a>.< </ul><br> <li><b>DRMAA (Distributed Resource Management Application API)</b><br> -<a href="http://apps.man.poznan.pl/trac/slurm-drmaa">PSNC DRMAA</a> for SLURM +<a href="http://apps.man.poznan.pl/trac/slurm-drmaa">PSNC DRMAA</a> for Slurm is an implementation of <a href="http://www.gridforum.org/">Open Grid Forum</a> <a href="http://www.drmaa.org/">DRMAA 1.0</a> (Distributed Resource Management Application API) <a href="http://www.ogf.org/documents/GFD.133.pdf">specification</a> for submission -and control of jobs to <href="http://slurm.schedmd.com">SLURM</a>. +and control of jobs to <href="http://slurm.schedmd.com">Slurm</a>. Using DRMAA, grid applications builders, portal developers and ISVs can use the same high-level API to link their software with different cluster/resource management systems.</li><br> <li><b>Hostlist</b><br> -A Python program used for manipulation of SLURM hostlists including +A Python program used for manipulation of Slurm hostlists including functions such as intersection and difference. Download the code from:<br> <a href="http://www.nsc.liu.se/~kent/python-hostlist"> http://www.nsc.liu.se/~kent/python-hostlist</a><br><br> @@ -125,10 +125,10 @@ generated using the <b>ib2slurm</b> tool found here: <li><b>I/O Watchdog</b><br> A facility for monitoring user applications, most notably parallel jobs, for <i>hangs</i> which typically have a side-effect of ceasing all write -activity. This faciltiy attempts to monitor all write activity of an +activity. This facility attempts to monitor all write activity of an application and trigger a set of user-defined actions when write activity as ceased for a configurable period of time. A SPANK plugin is provided -for use with SLURM. See the README and man page in the package for more +for use with Slurm. See the README and man page in the package for more details. Download the latest source from:<br> <a href="http://io-watchdog.googlecode.com/files/io-watchdog-0.6.tar.bz2"> http://io-watchdog.googlecode.com/files/io-watchdog-0.6.tar.bz2</a></li><br> @@ -151,14 +151,14 @@ http://io-watchdog.googlecode.com/files/io-watchdog-0.6.tar.bz2</a></li><br> <li><b>PAM Module (pam_slurm)</b><br> Pluggable Authentication Module (PAM) for restricting access to compute nodes -where SLURM performs workload management. Access to the node is restricted to +where Slurm performs workload management. Access to the node is restricted to user root and users who have been allocated resources on that node. -NOTE: pam_slurm is included within the SLURM distribution for version 2.1 +NOTE: pam_slurm is included within the Slurm distribution for version 2.1 or higher. -For earlier SLURM versions, pam_slurm is available for download from<br> +For earlier Slurm versions, pam_slurm is available for download from<br> <a href="http://www.schedmd.com/download/extras/pam_slurm-1.6.tar.bz2"> http://www.schedmd.com/download/extras/pam_slurm-1.6.tar.bz2</a><br> -SLURM's PAM module has also been packaged for +Slurm's PAM module has also been packaged for <a href="http://packages.debian.org/src:libpam-slurm">Debian</a> and <a href="http://packages.ubuntu.com/src:libpam-slurm">Ubuntu</a> (both named <i>libpam-slurm</i>).</li><br> @@ -169,7 +169,7 @@ SLURM's PAM module has also been packaged for <a href="http://www.ogf.org/">Open Grid Forum</a> Advance Reservation API</li> <li><a href="http://web.mit.edu/star/cluster/docs/latest/index.html">StarCluster</a> cloud computing toolkit has a -<a href="https://github.com/jlafon/StarCluster">SLURM port available</a>. +<a href="https://github.com/jlafon/StarCluster">Slurm port available</a>. <li><a href="http://www.platform.com/">Load Sharing Facility (LSF)</a></li> <li><a href="http://www.clusterresources.com/pages/products/maui-cluster-scheduler.php"> Maui Scheduler</a></li> @@ -184,11 +184,11 @@ https://github.com/pedmon/slurm_showq</a>.<br><br> <li><b>Scripting interfaces</b> <ul> -<li>A <b>Perl</b> interface is included in the SLURM distribution in the +<li>A <b>Perl</b> interface is included in the Slurm distribution in the <i>contribs/perlapi</i> directory and packaged in the <i>perapi</i> RPM.</li> <li><a href="http://www.gingergeeks.co.uk/pyslurm/">PySlurm</a> is a -Python/Pyrex module to interface with SLURM. +Python/Pyrex module to interface with Slurm. There is also a Python module to expand and collect hostlist expressions available at <a href="http://www.nsc.liu.se/~kent/python-hostlist/"> http://www.nsc.liu.se/~kent/python-hostlist/</a>.</li> @@ -201,20 +201,20 @@ SQL Alchemy bindings for your slurmdbd database.</li> --> <li><a href="http://www.lua.org/">Lua</a> may be used to implement a -SLURM process tracking plugin. +Slurm process tracking plugin. The Lua script available in <i>contribs/lua/protrack.lua</i> implements containers using CPUSETs. </ul><br> <li><b>SPANK Plugins</b><br> SPANK provides a very generic interface for stackable plug-ins which -may be used to dynamically modify the job launch code in SLURM. SPANK -plugins may be built without access to SLURM source code. They need -only be compiled against SLURM‘s spank.h header file, added to the +may be used to dynamically modify the job launch code in Slurm. SPANK +plugins may be built without access to Slurm source code. They need +only be compiled against Slurm‘s spank.h header file, added to the SPANK config file plugstack.conf, and they will be loaded at runtime during the next job launch. Thus, the SPANK infrastructure provides administrators and other developers a low cost, low effort ability to -dynamically modify the runtime behavior of SLURM job launch. +dynamically modify the runtime behavior of Slurm job launch. As assortment of SPANK plugins are available from<br> <a href="http://code.google.com/p/slurm-spank-plugins/"> http://code.google.com/p/slurm-spank-plugins/</a>.<br> @@ -223,7 +223,7 @@ repository with the following command:<br> <i>svn checkout http://slurm-spank-plugins.googlecode.com/svn/trunk/ slurm-plugins</i></li><br> <li><b>Sqlog</b><br> -A set of scripts that leverages SLURM's job completion logging facility +A set of scripts that leverages Slurm's job completion logging facility in provide information about what jobs were running at any point in the past as well as what resources they used. Download the code from:<br> <a href="http://sqlog.googlecode.com">http://sqlog.googlecode.com</a></li><br> @@ -248,6 +248,70 @@ For more information, see <a href="http://ubmod.sourceforge.net/resource-manager-slurm.html">UDMod home page</a> and <a href="https://github.com/ubccr/ubmod">source code</a>.</li><br> +<li><b>STUBL (Slurm Tools and UBiLities)</b><br> +STUBL is a collection of supplemental tools and utility scripts for Slurm.<br> +<a href="https://github.com/ubccr/stubl">STUBL home page</a>.<br><br> +<dl> +<dt>pbs2sbatch</dt> +<dd>Converts PBS directives to equivalent Slurm sbatch directives. Accommodates + old UB CCR-specific PBS tags like IB1, IB2, etc.</dd> +<dt>pbs2slurm</dt> +<dd>A script that attempts to convert PBS scripts into corresponding sbatch + scripts. It will convert PBS directives as well as PBS environment variables + and will insert bash code to create a SLURM_NODEFILE that is consistent with + the PBS_NODEFILE.</dd> +<dt>slurmbf</dt> +<dd>Analogous to the PBS "showbf -S" command.</dd> +<dt>snodes</dt> +<dd>A customized version of sinfo. Displays node information in an + easy-to-interpet format. Filters can be applied to view (1) specific nodes, + (2) nodes in a specific partition, or (3) nodes in a specifc state.</dd> +<dt>sqstat</dt> +<dd>A customized version of squeue that produces output analogous to the PBS + qstat and xqstat commands (requires clush).</dd> +<dt>fisbatch</dt> +<dd>Friendly Interactive sbatch. A customized version of sbatch that provides a + user-friendly interface to an interactive job with X11 forwarding enabled. + It is analogous to the PBS "qsub -I -X" command. This code was adopted from + srun.x11 (requires clush).</dd> +<dt>sranks</dt> +<dd>A command that lists the overall priorities and associated priority + components of queued jobs in ascending order. Top-ranked jobs will be given + priority by the scheduler but lower ranked jobs may get slotted in first if + they fit into the scheduler's backfill window.</dd> +<dt>sqelp</dt> +<dd>A customized version of squeue that only prints a double-quote if the + information in a column is the same from row to row. Some users find this + type of formatting easier to visually digest.</dd> +<dt>sjeff</dt> +<dd>Determines the efficiency of one or more running jobs. Inefficient jobs are + high-lighted in red text (requires clush).</dd> +<dt>sueff</dt> +<dd>Determines the overall efficiency of the running jobs of one or more users. + Users that are inefficient are highlighted in red text (requires clush).</dd> +<dt>yasqr</dt> +<dd>Yet Another Squeue Replacement. Fixes squeue bugs in earlier versions of + Slurm.</dd> +<dt>sgetscr</dt> +<dd>Retrieves the Slurm/sbatch script and environment files for a job that is + queued or running.</dd> +<dt>snacct</dt> +<dd>Retrieves Slurm accounting information for a given node and for a given + period of time.</dd> +<dt>suacct</dt> +<dd>Retrieves Slurm accounting information for a given user's jobs for a given + period of time.</dd> +<dt>slist</dt> +<dd>Retrieves Slurm accounting and node information for a running or completed + job (requires clush).</dd> +<dt>slogs</dt> +<dd>Retrieves resource usage and accounting information for a user or list of + users. For each job that was run after the given start date, the following + information is gathered from the Slurm accounting logs: Number of CPUs, + Start Time, Elapsed Time, Amount of RAM Requested, Average RAM Used, and + Max RAM Used.</dd> +</dl></li><br> + <li><b>Slurmmon</b><br> Slurmmon is a system for gathering and plotting data about Slurm scheduling and job characteristics. It currently simply sends the data to ganglia, but it @@ -272,6 +336,6 @@ easy and elegantly manner. </ul> -<p style="text-align:center;">Last modified 30 April 2014</p> +<p style="text-align:center;">Last modified 23 July 2014</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/ibm-pe.shtml b/doc/html/ibm-pe.shtml index 7604aeb3b0ceb48d41663549bccb04a6bfcbd553..a55ae91ddddefc92d2289882b5f5e12d72150fca 100644 --- a/doc/html/ibm-pe.shtml +++ b/doc/html/ibm-pe.shtml @@ -446,7 +446,7 @@ Slurm (without jobs). The procedure is as follows:</p> <li>Cancel all jobs.</li> <li>Change the dynamic window count on the compute nodes.</li> <li>Restart Slurm daemons without preserving state -(e.g. "/etc/init.d/slurm/startclean" or initiate the daemons using the +(e.g. "/etc/init.d/slurm startclean" or initiate the daemons using the "-c" option).</li> </ol> @@ -476,6 +476,23 @@ Units (CAU) is disabled and an error message to that effect will be generated in response to such a request. In addition, version 1200 or higher of IBM's NRT API is required to support this functionality.</p> +<h3>Cold Starting</h3> + +<p>If the slurmctld daemon is cold started (without saved state), then +information about previously allocated network resources is lost. +Slurm will release those resources to the best of its ability based upon +information available from the Network Resource Table (NRT) library functions. +These function provide sufficient information to release all resources +<b>except</b> for CAU on a Torrent network (e.g. a PERCS system). +In order to release CAU, it is necessary to cold start the Protocol Network +Services Daemon (PNSD) on compute nodes following the sequence shown below.</p> +<pre> +Stop Slurm daemons: /etc/init.d/slurm stop +Stop PNSD: stopsrc -s pnsd +Start PNSD clean: startsrc -s pnsd -a -c +Start Slurm daemons clean: /etc/init.d/slurm startclean +</pre> + <h3>Design Notes</h3> <p>It is necessary for all nodes that can be used for scheduling a single job @@ -550,6 +567,6 @@ startsrc -s pnsd -a -D <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 29 May 2013</p></td> +<p style="text-align:center;">Last modified 22 July 2013</p></td> <!--#include virtual="footer.txt"--> diff --git a/doc/html/mail.shtml b/doc/html/mail.shtml index b02eb499b3d59408c03bb0114407622d9a0fabcd..ebe968c7cb600c066a4d5fd9caaca4df956cd19c 100644 --- a/doc/html/mail.shtml +++ b/doc/html/mail.shtml @@ -3,9 +3,12 @@ <h1>Mailing Lists</h1> <p>We maintain two Slurm mailing lists:</p> <ul> -<li><b>slurm-announce@schedmd.com</b> is designated for communications about SLURM releases -[low traffic].</li> -<li><b>slurm-dev@schedmd.com</b> is designated for communications to SLURM developers +<li><b>slurm-announce@schedmd.com</b> is designated for communications about +Slurm releases. The ability to post to this list is restricted to a few Slurm +developers [low traffic].</li> +<li><b>slurm-dev@schedmd.com</b> is designated for communications to Slurm +developers and is typically used by Slurm users or system administrators. +Any person subscribed to this mailing list may post to it. [high traffic at times].</li> </ul> <p>To subscribe to either list, vist the link below, sign up and get involved!<p> @@ -18,6 +21,6 @@ You can likewise unsubscribe from either list at the same link. and<br> <a href="http://groups.google.com/group/slurm-devel">http://groups.google.com/group/slurm-devel</a></p> -<p style="text-align:center;">Last modified 3 April 2013</p> +<p style="text-align:center;">Last modified 5 August 2014</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/meetings.shtml b/doc/html/meetings.shtml index 9d0900ec1c4ec9c1216ceac4b8b7dbb4cf1ed844..7f8c46e2b260897002a1b0b8a33d1d988aba9de6 100644 --- a/doc/html/meetings.shtml +++ b/doc/html/meetings.shtml @@ -7,8 +7,9 @@ Lugano, Switzerland<br> Host: <a href="http://cscs.ch/">Swiss National Supercomputing Centre</a></p> <!--<a href="slurm_ug_cfp.html">Call for Abstracts: Due 6 June 2014</a><br>--> -<a href="slurm_ug_agenda.html">Meeting agenda</a><br> -<!--<a href="slurm_ug_registration.html">Registration information</a>--></p> +<a href="slurm_ug_agenda.html#agenda">Meeting agenda</a><br> +<a href="slurm_ug_agenda.html#registration">Meeting registration</a><br> +<a href="slurm_ug_agenda.html#hotels">Meeting hotels</a><br> <br> <h2>Previous Meetings</h2> diff --git a/doc/html/power_save.shtml b/doc/html/power_save.shtml index 91b0cb039cae434faf060666fabf7928c22dc877..dfe70d7e54d6ac6d39d3b9c4c3a4609b0f061a10 100644 --- a/doc/html/power_save.shtml +++ b/doc/html/power_save.shtml @@ -102,7 +102,7 @@ List of partitions with nodes to never place in power saving mode. Multiple partitions may be specified using a comma separator. By default, no nodes are excluded.</li> -<li><b>BatchStartTime</b>: +<li><b>BatchStartTimeout</b>: Specifies how long to wait after a batch job start request is issued before we expect the batch job to be running on the compute node. Depending upon how nodes are returned to service, this value may need to @@ -263,6 +263,6 @@ and perform the following actions: <li>Boot the appropriate image for each node</li> </ol> -<p style="text-align:center;">Last modified 10 December 2012</p> +<p style="text-align:center;">Last modified 12 August 2014</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/qos.shtml b/doc/html/qos.shtml index fd8b58ae95a0d3aa83704c6d4677ebe8543b4803..8fc4ae07cd2e398db5ca2879db7e9a9a384d7835 100644 --- a/doc/html/qos.shtml +++ b/doc/html/qos.shtml @@ -176,6 +176,9 @@ Jobs submitted requesting more nodes than the QOS limit will pend until they conform (possibly indefinitely); to allow such jobs to run may require changing this limit with sacctmgr. See DenyOnLimits below to deny these jobs at submission. +Each job's node allocation is counted separately +(i.e. if a single node has resources allocated to two jobs, this is counted +as two allocated nodes). </li> <li><b>MaxSubmitJobsPerUser</b> diff --git a/doc/html/quickstart.shtml b/doc/html/quickstart.shtml index c038309ace28c1d8ab3ea7e65936420a6d95f267..610bea0cbdd6604dde22aed42e349a7a2e3f5999 100644 --- a/doc/html/quickstart.shtml +++ b/doc/html/quickstart.shtml @@ -3,18 +3,18 @@ <h1>Quick Start User Guide</h1> <h2>Overview</h2> -<p>The Simple Linux Utility for Resource Management (SLURM) is an open source, +<p>The Simple Linux Utility for Resource Management (Slurm) is an open source, fault-tolerant, and highly scalable cluster management and job scheduling system -for large and small Linux clusters. SLURM requires no kernel modifications for +for large and small Linux clusters. Slurm requires no kernel modifications for its operation and is relatively self-contained. As a cluster workload manager, -SLURM has three key functions. First, it allocates exclusive and/or non-exclusive +Slurm has three key functions. First, it allocates exclusive and/or non-exclusive access to resources (compute nodes) to users for some duration of time so they can perform work. Second, it provides a framework for starting, executing, and monitoring work (normally a parallel job) on the set of allocated nodes. Finally, it arbitrates contention for resources by managing a queue of pending work.</p> <h2>Architecture</h2> -<p>As depicted in Figure 1, SLURM consists of a <b>slurmd</b> daemon running on +<p>As depicted in Figure 1, Slurm consists of a <b>slurmd</b> daemon running on each compute node and a central <b>slurmctld</b> daemon running on a management node (with optional fail-over twin). The <b>slurmd</b> daemons provide fault-tolerant hierarchical communications. @@ -26,11 +26,11 @@ All of the commands can run anywhere in the cluster.</p> <div class="figure"> <img src="arch.gif" width=550></br> - Figure 1. SLURM components + Figure 1. Slurm components </div> -<p>The entities managed by these SLURM daemons, shown in Figure 2, include -<b>nodes</b>, the compute resource in SLURM, +<p>The entities managed by these Slurm daemons, shown in Figure 2, include +<b>nodes</b>, the compute resource in Slurm, <b>partitions</b>, which group nodes into logical (possibly overlapping) sets, <b>jobs</b>, or allocations of resources assigned to a user for a specified amount of time, and @@ -46,13 +46,13 @@ or several job steps may independently use a portion of the allocation.</p> <div class="figure"> <img src="entities.gif" width=500></br> - Figure 2. SLURM entities + Figure 2. Slurm entities </div> <p class="footer"><a href="#top">top</a></p> <h2>Commands</h2> -<p>Man pages exist for all SLURM daemons, commands, and API functions. The command +<p>Man pages exist for all Slurm daemons, commands, and API functions. The command option <span class="commandline">--help</span> also provides a brief summary of options. Note that the command options are all case sensitive.</p> @@ -81,15 +81,15 @@ or running job or job step. It can also be used to send an arbitrary signal to all processes associated with a running job or job step.</p> <p><span class="commandline"><b>scontrol</b></span> is the administrative tool -used to view and/or modify SLURM state. Note that many <span class="commandline">scontrol</span> +used to view and/or modify Slurm state. Note that many <span class="commandline">scontrol</span> commands can only be executed as user root.</p> <p><span class="commandline"><b>sinfo</b></span> reports the state of partitions -and nodes managed by SLURM. It has a wide variety of filtering, sorting, and formatting +and nodes managed by Slurm. It has a wide variety of filtering, sorting, and formatting options.</p> <p><span class="commandline"><b>smap</b></span> reports state information for -jobs, partitions, and nodes managed by SLURM, but graphically displays the +jobs, partitions, and nodes managed by Slurm, but graphically displays the information to reflect network topology.</p> <p><span class="commandline"><b>squeue</b></span> reports the state of jobs or @@ -112,7 +112,7 @@ view event triggers. Event triggers include things such as nodes going down or jobs approaching their time limit.</p> <p><span class="commandline"><b>sview</b></span> is a graphical user interface to -get and update state information for jobs, partitions, and nodes managed by SLURM.</p> +get and update state information for jobs, partitions, and nodes managed by Slurm.</p> <p class="footer"><a href="#top">top</a></p> @@ -306,12 +306,12 @@ using the <span class="commandline">srun</span> command to launch the tasks differ, see <a href="#mpi">MPI</a> details below). Finally the shell created by <span class="commandline">salloc</span> would be terminated using the <i>exit</i> command. -SLURM does not automatically migrate executable or data files +Slurm does not automatically migrate executable or data files to the nodes allocated to a job. Either the files must exists on local disk or in some global file system (e.g. NFS or Lustre). We provide the tool <span class="commandline">sbcast</span> to transfer -files to local storage on allocated nodes using SLURM's hierarchical +files to local storage on allocated nodes using Slurm's hierarchical communications. In this example we use <span class="commandline">sbcast</span> to transfer the executable program <i>a.out</i> to <i>/tmp/joe.a.out</i> on local storage @@ -345,27 +345,41 @@ JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) <p class="footer"><a href="#top">top</a></p> +<h2>Best Practices, Large Job Counts</h2> + +<p>Consider putting related work into a single Slurm job with multiple job +steps both for performance reasons and ease of management. +Each Slurm job can contain a multitude of job steps and the overhead in +Slurm for managing job steps is much lower than that of individual jobs.</p> + +<p><a href="job_array.html">Job arrays</a> are an efficient mechanism of +managing a collection of batch jobs with identical resource requirements. +Most Slurm commands can manage job arrays either as individual elements (tasks) +or as a single entity (e.g. delete an entire job array in a single command).</p> + +<p class="footer"><a href="#top">top</a></p> + <h2><a name="mpi">MPI</a></h2> <p>MPI use depends upon the type of MPI being used. There are three fundamentally different modes of operation used by these various MPI implementation. <ol> -<li>SLURM directly launches the tasks and performs initialization +<li>Slurm directly launches the tasks and performs initialization of communications (Quadrics MPI, MPICH2, MPICH-GM, MPICH-MX, MVAPICH, MVAPICH2 and some MPICH1 modes).</li> -<li>SLURM creates a resource allocation for the job and then -mpirun launches tasks using SLURM's infrastructure (OpenMPI, +<li>Slurm creates a resource allocation for the job and then +mpirun launches tasks using Slurm's infrastructure (OpenMPI, LAM/MPI and HP-MPI).</li> -<li>SLURM creates a resource allocation for the job and then -mpirun launches tasks using some mechanism other than SLURM, +<li>Slurm creates a resource allocation for the job and then +mpirun launches tasks using some mechanism other than Slurm, such as SSH or RSH (BlueGene MPI and some MPICH1 modes). -These tasks initiated outside of SLURM's monitoring -or control. SLURM's epilog should be configured to purge +These tasks initiated outside of Slurm's monitoring +or control. Slurm's epilog should be configured to purge these tasks when the job's allocation is relinquished. </li> </ol> <p>Links to instructions for using several varieties of MPI -with SLURM are provided below. +with Slurm are provided below. <ul> <li><a href="mpi_guide.html#bluegene_mpi">BlueGene MPI</a></li> <li><a href="mpi_guide.html#hp_mpi">HP-MPI</a></li> @@ -381,6 +395,6 @@ with SLURM are provided below. <li><a href="mpi_guide.html#quadrics_mpi">Quadrics MPI</a></li> </ul></p> -<p style="text-align:center;">Last modified 2 October 2012</p> +<p style="text-align:center;">Last modified 13 August 2014</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/reservations.shtml b/doc/html/reservations.shtml index e7a509b6fd7f6f131707bf71630c3984deff4dde..5b5a0a1556181938f2acadd1b135b7899d0bb921 100644 --- a/doc/html/reservations.shtml +++ b/doc/html/reservations.shtml @@ -157,7 +157,10 @@ feature that every selected node must contain.</p> <p>On a smaller system, one might want to reserve cores rather than whole nodes. Slurm provides a core reservation capability in version 2.6. This capability permits the administrator to identify the core count to be -reserved on each node as snown in the examples below.</p> +reserved on each node as snown in the examples below.<br> +<b>NOTE:</b> Core reservations are not available on IBM BlueGene or +Cray/ALPS systems, nor when the system is configured to use the select/linear +or select/serial plugins.</p> <pre> # Create a two core reservation for user alan $ scontrol create reservation StartTime=now Duration=60 \ @@ -318,7 +321,7 @@ considering the initiation of jobs. This will prevent the initiation of some jobs which would complete execution before a reservation given fewer jobs to time-slice with.</p> -<p style="text-align: center;">Last modified 13 August 2013</p> +<p style="text-align: center;">Last modified 29 July 2014</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/slurm_ug_agenda.shtml b/doc/html/slurm_ug_agenda.shtml index 112c62042fe7bd0eaf2e956f9fb2746602096a12..aa846085c2a580567ec0f310d423f65f4c199580 100644 --- a/doc/html/slurm_ug_agenda.shtml +++ b/doc/html/slurm_ug_agenda.shtml @@ -4,7 +4,8 @@ <p>Hosted by the <a href="http:///www.cscs.ch">Swiss National Supercomputing Centre</a> -<h1>Agenda</h1> + +<a name="agenda"><h1>Agenda</h1></a> <p>The 2014 Slurm User Group Meeting will be held on September 23 and 24 in Lugano, Switzerland. @@ -13,17 +14,23 @@ and site reports. The <a href="#schedule">Schedule</a> amd <a href="#abstracts">Abstracts</a> are shown below.</p> -<h2>Meeting Information</h2> <p>The meeting will be held at the <a href="http://www.lugano-tourism.ch/en/129/default.aspx"> Convention Centre Lugano</a>, Lugano, Switzerland.</p> -<h2>Hotel Information</h2> + +<a name="registration"><h1>Registration</h1></a> +<p>The conference cost is $350 per person for registration by 23 August and +$400 per person for late registration. +This includes presentations, tutorials, lunch and snacks on both days, +plus dinner on Tuesday evening.<br> +<a href="https://www.eventbrite.com/e/slurm-user-group-meeting-2014-tickets-12322056603">Register here</a>.</p> + + +<a name="hotels"><h1>Hotel Information</h1></a> <p>Hotels may be booked through the Lugano Convention Centre (Palazzo dei Congressi).<br> -<a href="https://www.aec-internet.it/booking_engine/prenota_congresso.htm?graph_be=4&n_tappe=1&headvar=ok&lingua_int=eng&id_stile=7434&id_congresso=54&id_canale=704">Hotel booking</a>. +<a href="https://www.aec-internet.it/booking_engine/prenota_congresso.htm?graph_be=4&n_tappe=1&headvar=ok&lingua_int=eng&id_stile=7434&id_congresso=54&id_canale=704">Hotel booking</a>.</p> -<h2>Registration</h2> -<p>Information will be made available later.</p> <a name="schedule"><h1>Schedule</h1></a> @@ -44,17 +51,17 @@ Convention Centre Lugano</a>, Lugano, Switzerland.</p> </tr> <tr> - <td>08:30 - 08:45</td> + <td>08:30 - 08:40</td> <td>Welcome</td> - <td>TBD (CSCS)</td> + <td>Colin McMurtrie (CSCS)</td> <td>Welcome to Slurm User Group Meeting</td> </tr> <tr> - <td>08:45 - 09:30</td> + <td>08:40 - 09:30</td> <td>Keynote</td> - <td>TBD</td> - <td>TBD</td> + <td>Prof. Felix Schürmann (EPFL)</td> + <td>European Human Brain Project</td> </tr> <tr> @@ -152,9 +159,12 @@ Convention Centre Lugano</a>, Lugano, Switzerland.</p> </tr> <tr> - <td bgcolor="#F0F1C9">17:00 - </td> + <td bgcolor="#F0F1C9">19:00 - </td> <td bgcolor="#F0F1C9">Dinner</td> - <td colspan="2" bgcolor="#F0F1C9">TBD</td> + <td colspan="2" bgcolor="#F0F1C9">Restaurant Pizzeria Cantinone<br> +Piazza Cioccaro 8<br> +Lugano<br> +tel +41(0)91 923 10 68</td> </tr> </table> @@ -310,8 +320,14 @@ Convention Centre Lugano</a>, Lugano, Switzerland.</p> <h2>September 23, 2014</h2> +<h3>Keynote</h3> +<p>Prof. Felix Schürmann (Ecole Polytechnique Fédérale de Lausanne)</p> +<p>Dr. Felix Schürmann is adjunct professor at the Ecole Polytechnique +Fédérale de Lausanne, co-director of the Blue Brain Project +and involved in several research challenges of the European Human Brain Project.</p> + <h3>Overview of Slurm Versions 14.03 and 14.11</h3> -<p> Morris Jette (SchedMD), Yiannis Georgiou (Bull)</p> +<p>Morris Jette (SchedMD), Yiannis Georgiou (Bull)</p> <p>This presentation will describe new capabilities provided in Slurm versions 14.03 (released March 2014) and planned for version 14.11 (to be released in November 2014). Major enhancements in version 14.03 @@ -849,6 +865,6 @@ Convention Centre Lugano</a>, Lugano, Switzerland.</p> system to dynamically allocate disk space on our test high-IOPS SSD scratch system.</p> -<p style="text-align:center;">Last modified 11 July 2014</p> +<p style="text-align:center;">Last modified 19 August 2014</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/team.shtml b/doc/html/team.shtml index f64ac8dacee121c1434eff9781e792bcc574c941..c18330fd62fcbae4b4608eb49ea0c6538f511cd4 100644 --- a/doc/html/team.shtml +++ b/doc/html/team.shtml @@ -50,6 +50,7 @@ Lead Slurm developers are: <li>Franco Broi (ION)</li> <li>Bill Brophy (Bull)</li> <li>John Brunelle (Harvard University FAS Research Computing)</li> +<li>Andrew E. Bruno (University at Buffalo)</li> <br> <li>Luis Cabellos (Instituto de Fisica de Cantabria, Spain)</li> <li>Thomas Cadeau (Bull)</li> @@ -124,6 +125,7 @@ Lead Slurm developers are: <li>Donald Lipari (Lawrence Livermore National Laboratory)</li> <br> <li>Komoto Masahiro</li> +<li>L. Shawn Matott (University at Buffalo)</li> <li>Steven McDougall (SiCortex)</li> <li>Donna Mecozzi (Lawrence Livermore National Laboratory)</li> <li>Bjørn-Helge Mevik (University of Oslo, Norway)</li> @@ -199,6 +201,6 @@ Lead Slurm developers are: <!-- INDIVIDUALS, PLEASE KEEP IN ALPHABETICAL ORDER --> </ul> -<p style="text-align:center;">Last modified 25 April 2014</p> +<p style="text-align:center;">Last modified 23 July 2014</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1 index e4a7770bb3335a26ac41ffc3683564bc585ea5ce..d1940b4bfe09f4eebc63b7aee0fa14740f7c1a8e 100644 --- a/doc/man/man1/sacct.1 +++ b/doc/man/man1/sacct.1 @@ -66,7 +66,7 @@ The default input file is the file named in the .TP "10" \f3\-a\fP\f3,\fP \f3\-\-allusers\fP Displays all users jobs when run by user root or if \fBPrivateData\fP is not -configurred to \fBjobs\fP. +configured to \fBjobs\fP. Otherwise display the current user's jobs .IP @@ -476,13 +476,13 @@ Note: Only in case of exclusive job allocation this value reflects the jobs' real energy consumption. .TP -\f3cputime\fP -Formatted number of cpu seconds a process was allocated. +\f3CPUTime\fP +Formatted (Elapsed time * CPU) count used by a job or step. .TP -\f3cputimeraw\fP -How much cpu time process was allocated in second format, not formatted -like above. +\f3CPUTimeRaw\fP +Unlike above non formatted (Elapsed time * CPU) count for a job or step. +Units are cpu-seconds. .TP \f3DerivedExitCode\fP @@ -879,7 +879,7 @@ used with \-S (\-\-starttime) and \-E (\-\-endtime). When the \-T option is used, the start time of the job will be the specified \-S value if the job was started before the specified time, otherwise the time will be the job's start time. The end time will be the specified \-E -option if the job ends after the specifed time, otherwise it will be +option if the job ends after the specified time, otherwise it will be the jobs end time. NOTE: If no \-s (\-\-state) option is given sacct will diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index 47f5561ad5d0f0b719fc826274fd90e01f266cc4..b083fd08d8d7209f59f08f0715aabd4ebd08ee50 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -318,6 +318,10 @@ Resource plugin. Maximum number of nodes running jobs are able to be allocated in aggregate for this association and all associations which are children of this association. To clear a previously set value use the modify command with a new value of \-1. +.P +NOTE: Each job's node allocation is counted separately (i.e. if a +single node has resources allocated to two jobs, this is counted as +two allocated nodes). .TP \fIGrpSubmitJobs\fP=<max jobs> @@ -610,6 +614,10 @@ this association and all associations which are children of this association. \fIGrpNodes\fP Maximum number of nodes running jobs are able to be allocated in aggregate for this association and all associations which are children of this association. +.P +NOTE: Each job's node allocation is counted separately (i.e. if a +single node has resources allocated to two jobs, this is counted as +two allocated nodes). .TP \fIGrpSubmitJobs\fP @@ -1022,6 +1030,10 @@ Maximum number of running jobs in aggregate for this QOS. \fIGrpNodes\fP Maximum number of nodes running jobs are able to be allocated in aggregate for this QOS. +.P +NOTE: Each job's node allocation is counted separately (i.e. if a +single node has resources allocated to two jobs, this is counted as +two allocated nodes). .TP \fIGrpSubmitJobs\fP @@ -1060,6 +1072,10 @@ Maximum number of nodes each job is able to use. .TP \fIMaxNodesPerUser\fP Maximum number of nodes each user is able to use. +.P +NOTE: Each job's node allocation is counted separately (i.e. if a +single node has resources allocated to two jobs, this is counted as +two allocated nodes). .TP \fIMaxSubmitJobs\fP @@ -1160,6 +1176,10 @@ To clear a previously set value use the modify command with a new value of \-1. Maximum number of nodes running jobs are able to be allocated in aggregate for this QOS. To clear a previously set value use the modify command with a new value of \-1. +.P +NOTE: Each job's node allocation is counted separately (i.e. if a +single node has resources allocated to two jobs, this is counted as +two allocated nodes). .TP \fIGrpSubmitJobs\fP @@ -1212,6 +1232,10 @@ To clear a previously set value use the modify command with a new value of \-1. \fIMaxNodesPerUser\fP Maximum number of nodes each user is able to use. To clear a previously set value use the modify command with a new value of \-1. +.P +NOTE: Each job's node allocation is counted separately (i.e. if a +single node has resources allocated to two jobs, this is counted as +two allocated nodes). .TP \fIMaxSubmitJobs\fP @@ -1613,6 +1637,10 @@ association and all associations which are children of this association. Maximum number of nodes running jobs are able to be allocated in aggregate for this association and all associations which are children of this association. +.P +NOTE: Each job's node allocation is counted separately (i.e. if a +single node has resources allocated to two jobs, this is counted as +two allocated nodes). .TP \fIGrpSubmitJobs=\fP Maximum number of jobs which can be in a pending or @@ -1689,6 +1717,10 @@ association and all associations which are children of this association. Maximum number of nodes running jobs are able to be allocated in aggregate for this association and all associations which are children of this association. +.P +NOTE: Each job's node allocation is counted separately (i.e. if a +single node has resources allocated to two jobs, this is counted as +two allocated nodes). .TP \fIGrpSubmitJobs=\fP Maximum number of jobs which can be in a pending or diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 955dc7d110b7778958e26333d4346d05060577f6..f78676c4255ec90120d8c01389a3926419d418e7 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -316,7 +316,7 @@ Bind by mapping CPU IDs to tasks as specified where <list> is <cpuid1>,<cpuid2>,...<cpuidN>. The mapping is specified for a node and identical mapping is applied to the tasks on every node (i.e. the lowest task ID on each node is mapped to the -first CPU ID specifed in the list, etc.). +first CPU ID specified in the list, etc.). CPU IDs are interpreted as decimal values unless they are preceded with '0x' in which case they are interpreted as hexadecimal values. Not supported unless the entire node is allocated to the job. @@ -326,7 +326,7 @@ Bind by setting CPU masks on tasks as specified where <list> is <mask1>,<mask2>,...<maskN>. The mapping is specified for a node and identical mapping is applied to the tasks on every node (i.e. the lowest task ID on each node is mapped to the -first mask specifed in the list, etc.). +first mask specified in the list, etc.). CPU masks are \fBalways\fR interpreted as hexadecimal values but can be preceded with an optional '0x'. Not supported unless the entire node is allocated to the job. diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 38cddb3275ab76943c369d0c0da0c831f232d49a..069febf4b13b54f9efb1d92e0d5ffae8f737cc69 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -1,4 +1,4 @@ -.TH "sbatch" "1" "SLURM 14.03" "December 2014" "SLURM Commands" +.TH "sbatch" "1" "SLURM 14.03" "August 2014" "SLURM Commands" .SH "NAME" sbatch \- Submit a batch script to SLURM. @@ -341,7 +341,7 @@ Bind by mapping CPU IDs to tasks as specified where <list> is <cpuid1>,<cpuid2>,...<cpuidN>. The mapping is specified for a node and identical mapping is applied to the tasks on every node (i.e. the lowest task ID on each node is mapped to the -first CPU ID specifed in the list, etc.). +first CPU ID specified in the list, etc.). CPU IDs are interpreted as decimal values unless they are preceded with '0x' in which case they are interpreted as hexadecimal values. Not supported unless the entire node is allocated to the job. @@ -351,7 +351,7 @@ Bind by setting CPU masks on tasks as specified where <list> is <mask1>,<mask2>,...<maskN>. The mapping is specified for a node and identical mapping is applied to the tasks on every node (i.e. the lowest task ID on each node is mapped to the -first mask specifed in the list, etc.). +first mask specified in the list, etc.). CPU masks are \fBalways\fR interpreted as hexadecimal values but can be preceded with an optional '0x'. Not supported unless the entire node is allocated to the job. @@ -1521,10 +1521,10 @@ Same as \fB\-a, \-\-array\fR \fBSBATCH_BLRTS_IMAGE\fR Same as \fB\-\-blrts\-image\fR .TP -\fBSLURM_CHECKPOINT\fR +\fBSBATCH_CHECKPOINT\fR Same as \fB\-\-checkpoint\fR .TP -\fBSLURM_CHECKPOINT_DIR\fR +\fBSBATCH_CHECKPOINT_DIR\fR Same as \fB\-\-checkpoint\-dir\fR .TP \fBSBATCH_CLUSTERS\fR or \fBSLURM_CLUSTERS\fR @@ -1669,6 +1669,7 @@ Job array ID (index) number. .TP \fBSLURM_ARRAY_JOB_ID\fR Job array's master job ID number. +.TP \fBSLURM_CHECKPOINT_IMAGE_DIR\fR Directory into which checkpoint images should be written if specified on the execute line. diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index 3e12b1add175c9319e7c962143420d300a29a9a1..f66c277c67a754da6e053522bae79a1fbbcac860 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -1276,9 +1276,10 @@ adding a '+' or '\-' sign before the '=' sign. For example: Flags\-=DAILY (NOTE: this shortcut is not supported for all flags). Currently supported flags include: .RS -.TP 12 +.TP 14 \fIFIRST_CORES\fR Use the lowest numbered cores on a node only. +.TP \fILICENSE_ONLY\fR This is a reservation for licenses only and not compute nodes. If this flag is set, a job using this reservation may use the associated diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 6acd971f4ba17dbbcdacb82f8b7cc8524ae95ef9..272f9fbf3bcdd4983aef4b5c5950be9113bd4cd3 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -313,7 +313,7 @@ Bind by mapping CPU IDs to tasks as specified where <list> is <cpuid1>,<cpuid2>,...<cpuidN>. The mapping is specified for a node and identical mapping is applied to the tasks on every node (i.e. the lowest task ID on each node is mapped to the -first CPU ID specifed in the list, etc.). +first CPU ID specified in the list, etc.). CPU IDs are interpreted as decimal values unless they are preceded with '0x' in which case they are interpreted as hexadecimal values. Not supported unless the entire node is allocated to the job. @@ -323,7 +323,7 @@ Bind by setting CPU masks on tasks as specified where <list> is <mask1>,<mask2>,...<maskN>. The mapping is specified for a node and identical mapping is applied to the tasks on every node (i.e. the lowest task ID on each node is mapped to the -first mask specifed in the list, etc.). +first mask specified in the list, etc.). CPU masks are \fBalways\fR interpreted as hexadecimal values but can be preceded with an optional '0x'. Not supported unless the entire node is allocated to the job. @@ -1218,7 +1218,7 @@ SLURM build time. \fB\-o\fR, \fB\-\-output\fR=<\fImode\fR> Specify the mode for stdout redirection. By default in interactive mode, .B srun -collects stdout from all tasks and line buffers this output to +collects stdout from all tasks and sends this output via TCP/IP to the attached terminal. With \fB\-\-output\fR stdout may be redirected to a file, to one file per task, or to /dev/null. See section \fBIO Redirection\fR below for the various forms of \fImode\fR. diff --git a/doc/man/man5/acct_gather.conf.5 b/doc/man/man5/acct_gather.conf.5 index 28fd17a04c88b85eeb3b2addd020bba6b1a13de2..ab0bb7f88b49dec55ce460d5576a90cf14d5a33d 100644 --- a/doc/man/man5/acct_gather.conf.5 +++ b/doc/man/man5/acct_gather.conf.5 @@ -123,10 +123,6 @@ Options used for AcctGatherInfinbandType/ofed are as follows: .RS .TP 10 -\fBInfinibandOFEDFrequency\fR=<number> -This parameter is the number of seconds between the calls to infiniband counters. - -.TP \fBInfinibandOFEDPort\fR=<number> This parameter represents the port number of the local Infiniband card that we are willing to monitor. The default port is 1. @@ -155,8 +151,6 @@ ProfileHDF5Dir=/app/slurm/profile_data .br # Parameters for AcctGatherInfiniband/ofed plugin .br -InfinibandOFEDFrequency=4 -.br InfinibandOFEDPort=1 .br diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 6fb1f9422c3018dac337b4a9dfb265a0a7cabee1..72622a5f21da45d8839dbc840e34f7b187846252 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -317,6 +317,15 @@ allocation. The default value is 10 (seconds). Larger values may be required if more time is required to execute the \fBProlog\fR, load user environment variables (for Moab spawned jobs), or if the slurmd daemon gets paged from memory. +.br +.br +\fBNote\fR: The test for a job being succesfully launched is only performed when +the Slurm daemon on the compute node registers state with the slurmctld daemon +on the head node, which happens fairly rarely. +Therefore a job will not necessarily be terminated if its start time exceeds +\fBBatchStartTimeout\fR. +This configuration parameter is also applied to launch tasks and avoid aborting +\fBsrun\fR commands due to long running \fBProlog\fR scripts. .TP \fBCacheGroups\fR @@ -1547,7 +1556,7 @@ enable user login, etc. By default there is no prolog. Any configured script is expected to complete execution quickly (in less time than \fBMessageTimeout\fR). If the prolog fails (returns a non\-zero exit code), this will result in the -node being set to a DOWN state and the job requeued to executed on another node. +node being set to a DRAIN state and the job requeued to executed on another node. See \fBProlog and Epilog Scripts\fR for more information. .TP @@ -1813,8 +1822,8 @@ and resumes communications). \fB1\fR A DOWN node will become available for use upon registration with a valid configuration only if it was set DOWN due to being non\-responsive. -If the node was set DOWN for any other reason (low memory, prolog failure, -epilog failure, unexpected reboot, etc.), its state will not automatically +If the node was set DOWN for any other reason (low memory, +unexpected reboot, etc.), its state will not automatically be changed. .TP \fB2\fR @@ -3742,11 +3751,11 @@ allocation. The Epilog, on the other hand, always runs on every node of an allocation when the allocation is released. If the Epilog fails (returns a non\-zero exit code), this will result in the -node being set to a DOWN state. +node being set to a DRAIN state. If the EpilogSlurmctld fails (returns a non\-zero exit code), this will only be logged. If the Prolog fails (returns a non\-zero exit code), this will result in the -node being set to a DOWN state and the job requeued to executed on another node. +node being set to a DRAIN state and the job requeued to executed on another node. If the PrologSlurmctld fails (returns a non\-zero exit code), this will result in the job requeued to executed on another node if possible. Only batch jobs can be requeued. Interactive jobs (salloc and srun) will be cancelled if the diff --git a/src/api/step_io.c b/src/api/step_io.c index ccc02bd94aa8888af7bebc598267b85948c0e944..0e147abdd92f9386889321751ca5ac1253c70574 100644 --- a/src/api/step_io.c +++ b/src/api/step_io.c @@ -1223,7 +1223,10 @@ client_io_handler_finish(client_io_t *cio) return SLURM_SUCCESS; eio_signal_shutdown(cio->eio); - _delay_kill_thread(cio->ioid, 60); + /* Make the thread timeout consistent with + * EIO_SHUTDOWN_WAIT + */ + _delay_kill_thread(cio->ioid, 180); if (pthread_join(cio->ioid, NULL) < 0) { error("Waiting for client io pthread: %m"); return SLURM_ERROR; diff --git a/src/api/step_launch.c b/src/api/step_launch.c index 3d67083a742fcfbf9d494ba1ba81a77e46fc9998..be45f9bd5555a37750a8f888f11d0b264c87062d 100644 --- a/src/api/step_launch.c +++ b/src/api/step_launch.c @@ -1588,6 +1588,13 @@ static int _launch_tasks(slurm_step_ctx_t *ctx, hostlist_destroy(hl); } + /* Extend timeout based upon BatchStartTime to permit for a long + * running Prolog */ + if (timeout <= 0) { + timeout = (slurm_get_msg_timeout() + + slurm_get_batch_start_timeout()) * 1000; + } + slurm_msg_t_init(&msg); msg.msg_type = REQUEST_LAUNCH_TASKS; msg.data = launch_msg; diff --git a/src/common/bitstring.c b/src/common/bitstring.c index ace1cc4839b816a9a9d9c97d16f46590707010c4..51a7b467f4301e0cc8bb2c0cf89d4b32ccec20c1 100644 --- a/src/common/bitstring.c +++ b/src/common/bitstring.c @@ -1026,7 +1026,7 @@ bit_unfmt(bitstr_t *b, char *str) int rc = 0; _assert_bitstr_valid(b); - if (str[0] == '\0') /* no bits set */ + if (!str || str[0] == '\0') /* no bits set */ return rc; intvec = bitfmt2int(str); if (intvec == NULL) diff --git a/src/common/eio.c b/src/common/eio.c index eb6cb18ea285f52ae0215c4f48f6ec2b48b88bfa..88c99654cb4bfa04a48afabc180ee7dc5782f307 100644 --- a/src/common/eio.c +++ b/src/common/eio.c @@ -56,7 +56,7 @@ /* How many seconds to wait after eio_signal_shutdown() is called before * terminating the job and abandoning any I/O remaining to be processed */ -#define EIO_SHUTDOWN_WAIT 60 +#define EIO_SHUTDOWN_WAIT 180 /* * outside threads can stick new objects on the new_objs List and diff --git a/src/common/gres.c b/src/common/gres.c index 9428287b6750784b698a385061dd6e1b89037fbe..eebca88ed914c6c3ce238c8eb4db2e70c14e7216 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -2937,10 +2937,14 @@ static bool _cores_on_gres(bitstr_t *core_bitmap, return true; for (i = 0; i < node_gres_ptr->topo_cnt; i++) { + if (!node_gres_ptr->topo_gres_bitmap[i]) + continue; if (bit_size(node_gres_ptr->topo_gres_bitmap[i]) < gres_inx) continue; if (!bit_test(node_gres_ptr->topo_gres_bitmap[i], gres_inx)) continue; + if (!node_gres_ptr->topo_cpus_bitmap[i]) + return true; if (bit_size(node_gres_ptr->topo_cpus_bitmap[i]) != bit_size(core_bitmap)) break; @@ -2980,8 +2984,8 @@ extern void gres_plugin_job_clear(List job_gres_list) xfree(job_state_ptr->gres_bit_alloc); xfree(job_state_ptr->gres_bit_step_alloc); xfree(job_state_ptr->gres_cnt_step_alloc); + job_state_ptr->node_cnt = 0; } - job_state_ptr->node_cnt = 0; list_iterator_destroy(job_gres_iter); } diff --git a/src/common/proc_args.c b/src/common/proc_args.c index f16b4c7f1d8c71545c67ee0cfedaf1771bfa755f..3571528daa86a6f14b67814fc87981c8735b6827 100644 --- a/src/common/proc_args.c +++ b/src/common/proc_args.c @@ -1167,7 +1167,7 @@ extern void bg_figure_nodes_tasks(int *min_nodes, int *max_nodes, "for you.", *ntasks_per_node, node_cnt, ntpn); *ntasks_per_node = ntpn; - } else if ((node_cnt * ntpn) > *ntasks) { + } else if (!overcommit && ((node_cnt * ntpn) > *ntasks)) { ntpn = (*ntasks + node_cnt - 1) / node_cnt; while (!_check_is_pow_of_2(ntpn)) ntpn++; diff --git a/src/common/read_config.c b/src/common/read_config.c index 33d9981bffe61cc3029f8532d9002b5126e3c522..c23afdce0d152063f605e79bc983b78857021c25 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -2943,7 +2943,7 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_string(&conf->job_comp_loc, "JobCompLoc", hashtbl)) { if (default_storage_loc) conf->job_comp_loc = xstrdup(default_storage_loc); - else if (!strcmp(conf->job_comp_type, "job_comp/mysql")) + else if (!strcmp(conf->job_comp_type, "jobcomp/mysql")) conf->job_comp_loc = xstrdup(DEFAULT_JOB_COMP_DB); else conf->job_comp_loc = xstrdup(DEFAULT_JOB_COMP_LOC); diff --git a/src/common/slurm_jobcomp.h b/src/common/slurm_jobcomp.h index 0f62f452c52faf81f989b0e923f3ea5fe6d88a29..e0899327d8d24aa91d1032d4ba424d27d7a47ddf 100644 --- a/src/common/slurm_jobcomp.h +++ b/src/common/slurm_jobcomp.h @@ -61,6 +61,7 @@ typedef struct { char *partition; char *start_time; char *end_time; + time_t elapsed_time; uint32_t uid; char *uid_name; uint32_t gid; diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index e543f5bdfd84e4b4adc18c2d9a8e7ea29e12fc82..6c435ca032c7e7236200e4112da73edae50d6ecf 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -62,6 +62,7 @@ #include "src/common/job_options.h" #include "src/common/forward.h" #include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_ext_sensors.h" #include "src/common/slurm_acct_gather_energy.h" #include "src/plugins/select/bluegene/bg_enums.h" @@ -2312,10 +2313,13 @@ extern void slurm_free_node_info_members(node_info_t * node) { if (node) { xfree(node->arch); + acct_gather_energy_destroy(node->energy); + ext_sensors_destroy(node->ext_sensors); xfree(node->features); + xfree(node->gres); xfree(node->name); - xfree(node->node_hostname); xfree(node->node_addr); + xfree(node->node_hostname); xfree(node->os); xfree(node->reason); select_g_select_nodeinfo_free(node->select_nodeinfo); diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c index 1d09e33d4582fa60639a2a4b1b3f4cdabcb1a51b..af5b93eca2cc9103530702396f971b05a8fc27bd 100644 --- a/src/common/slurmdbd_defs.c +++ b/src/common/slurmdbd_defs.c @@ -1565,7 +1565,14 @@ static int _send_init_msg() return rc; } - read_timeout = slurm_get_msg_timeout() * 1000; + /* Add 35 seconds here to make sure the DBD has enough time to + process the request. 30 seconds is defined in + src/database/mysql_common.c in mysql_db_get_db_connection + as the time to wait for a mysql connection and 5 seconds to + avoid a race condition since it could time out at the + same rate and not leave any time to send the response back. + */ + read_timeout = (slurm_get_msg_timeout() + 35) * 1000; rc = _get_return_code(SLURM_PROTOCOL_VERSION, read_timeout); if (tmp_errno) errno = tmp_errno; @@ -2125,12 +2132,12 @@ static void *_agent(void *x) break; } list_iterator_destroy(agent_itr); - buffer = pack_slurmdbd_msg(&list_req, - SLURM_PROTOCOL_VERSION); + buffer = pack_slurmdbd_msg( + &list_req, SLURM_PROTOCOL_VERSION); } else if (cnt > 1) { list_msg.my_list = agent_list; - buffer = pack_slurmdbd_msg(&list_req, - SLURM_PROTOCOL_VERSION); + buffer = pack_slurmdbd_msg( + &list_req, SLURM_PROTOCOL_VERSION); } else buffer = (Buf) list_peek(agent_list); } else @@ -2161,7 +2168,8 @@ static void *_agent(void *x) rc = _handle_mult_rc_ret(SLURM_PROTOCOL_VERSION, read_timeout); } else { - rc = _get_return_code(SLURM_PROTOCOL_VERSION, read_timeout); + rc = _get_return_code(SLURM_PROTOCOL_VERSION, + read_timeout); if (rc == EAGAIN) { if (agent_shutdown) { slurm_mutex_unlock(&slurmdbd_lock); diff --git a/src/database/mysql_common.c b/src/database/mysql_common.c index b199b79e97a89b78752872350df90b2dd5d62e79..6c282d4a45e4ada685739ea7d667a85822579a84 100644 --- a/src/database/mysql_common.c +++ b/src/database/mysql_common.c @@ -664,6 +664,11 @@ extern int mysql_db_get_db_connection(mysql_conn_t *mysql_conn, char *db_name, fatal("mysql_init failed: %s", mysql_error(mysql_conn->db_conn)); } else { + /* If this ever changes you will need to alter + * src/common/slurmdbd_defs.c function _send_init_msg to + * handle a different timeout when polling for the + * response. + */ unsigned int my_timeout = 30; #ifdef MYSQL_OPT_RECONNECT my_bool reconnect = 1; @@ -696,6 +701,8 @@ extern int mysql_db_get_db_connection(mysql_conn_t *mysql_conn, char *db_name, } rc = ESLURM_DB_CONNECTION; + mysql_close(mysql_conn->db_conn); + mysql_conn->db_conn = NULL; break; } } else { diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c index 9dd881682f7042d24c68256b1539b350f903f83f..e62a6afd4ba59b4e2c6750fef91366ce6f615b00 100644 --- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c +++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c @@ -650,6 +650,7 @@ extern int jobacct_storage_p_job_complete(void *db_conn, char buf[BUFFER_SIZE]; uint16_t job_state; int duration; + uint32_t exit_code; if (!storage_init) { debug("jobacct init was not called or it failed"); @@ -675,11 +676,19 @@ extern int jobacct_storage_p_job_complete(void *db_conn, duration = job_ptr->end_time - job_ptr->start_time; } + exit_code = job_ptr->exit_code; + if (exit_code == 1) { + /* This wasn't signalled, it was set by Slurm so don't + * treat it like a signal. + */ + exit_code = 256; + } + /* leave the requid as a %d since we want to see if it is -1 in stats */ snprintf(buf, BUFFER_SIZE, "%d %d %u %u %u", JOB_TERMINATED, duration, - job_state, job_ptr->requid, job_ptr->exit_code); + job_state, job_ptr->requid, exit_code); return _print_record(job_ptr, job_ptr->end_time, buf); } diff --git a/src/plugins/accounting_storage/mysql/as_mysql_job.c b/src/plugins/accounting_storage/mysql/as_mysql_job.c index 0c559879b57f5f8f174dd84b12c6b8b0fde1b693..ff6a166f854fd8f30153322ebcab6c90183eeb7f 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_job.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_job.c @@ -728,6 +728,7 @@ extern int as_mysql_job_complete(mysql_conn_t *mysql_conn, char *query = NULL, *nodes = NULL; int rc = SLURM_SUCCESS, job_state; time_t submit_time, end_time; + uint32_t exit_code = 0; if (!job_ptr->db_index && ((!job_ptr->details || !job_ptr->details->submit_time) @@ -832,9 +833,17 @@ extern int as_mysql_job_complete(mysql_conn_t *mysql_conn, xfree(comment); } + exit_code = job_ptr->exit_code; + if (exit_code == 1) { + /* This wasn't signalled, it was set by Slurm so don't + * treat it like a signal. + */ + exit_code = 256; + } + xstrfmtcat(query, ", exit_code=%d, kill_requid=%d where job_db_inx=%d;", - job_ptr->exit_code, job_ptr->requid, + exit_code, job_ptr->requid, job_ptr->db_index); debug3("%d(%s:%d) query\n%s", diff --git a/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c b/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c index 43560ba6bdee0971b809903fdf864179a8841b24..d3b6d1e6b4a791c9d10832dc99e43e18ea4274ac 100644 --- a/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c +++ b/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c @@ -46,6 +46,7 @@ #include "src/common/slurm_jobcomp.h" #include "src/common/xmalloc.h" +#include "src/common/parse_time.h" #include "filetxt_jobcomp_process.h" #define BUFFER_SIZE 4096 @@ -92,6 +93,8 @@ static jobcomp_job_rec_t *_parse_line(List job_info_list) filetxt_jobcomp_info_t *jobcomp_info = NULL; jobcomp_job_rec_t *job = xmalloc(sizeof(jobcomp_job_rec_t)); char *temp = NULL; + time_t start_time; + time_t end_time; itr = list_iterator_create(job_info_list); while((jobcomp_info = list_next(itr))) { @@ -101,8 +104,10 @@ static jobcomp_job_rec_t *_parse_line(List job_info_list) job->partition = xstrdup(jobcomp_info->val); } else if (!strcasecmp("StartTime", jobcomp_info->name)) { job->start_time = xstrdup(jobcomp_info->val); + start_time = parse_time(job->start_time, 1); } else if (!strcasecmp("EndTime", jobcomp_info->name)) { job->end_time = xstrdup(jobcomp_info->val); + end_time = parse_time(job->end_time, 1); } else if (!strcasecmp("Userid", jobcomp_info->name)) { temp = strstr(jobcomp_info->val, "("); if (!temp) { @@ -160,6 +165,7 @@ static jobcomp_job_rec_t *_parse_line(List job_info_list) jobcomp_info->val); } } + job->elapsed_time = end_time - start_time; list_iterator_destroy(itr); return job; diff --git a/src/plugins/jobcomp/mysql/mysql_jobcomp_process.c b/src/plugins/jobcomp/mysql/mysql_jobcomp_process.c index 71353037b3e953d9b05d5a5bcd744f6b0224e27e..7e91188f4d455a09791b3671f8296ea899af9212 100644 --- a/src/plugins/jobcomp/mysql/mysql_jobcomp_process.c +++ b/src/plugins/jobcomp/mysql/mysql_jobcomp_process.c @@ -146,6 +146,9 @@ extern List mysql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) time_str, sizeof(time_str)); + job->elapsed_time = atoi(row[JOBCOMP_REQ_ENDTIME]) + - atoi(row[JOBCOMP_REQ_STARTTIME]); + job->end_time = xstrdup(time_str); if (row[JOBCOMP_REQ_UID]) job->uid = slurm_atoul(row[JOBCOMP_REQ_UID]); diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index ababedfe358dc0d7fd1f1f00b2a7e32d5113cfad..40a97e22eec34ea848387770dd373fb9c93aaef8 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -769,7 +769,9 @@ static int _attempt_backfill(void) xfree(job_queue_rec); if (!IS_JOB_PENDING(job_ptr)) - continue; /* started in other partition */ + continue; /* started in another partition */ + if (job_ptr->preempt_in_progress) + continue; /* scheduled in another partition */ if (!avail_front_end(job_ptr)) continue; /* No available frontend for this job */ if (job_ptr->array_task_id != NO_VAL) { diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index cc33112fbe74b3b5a716b8376c11f4d1509ec4c5..2833e37c1d7ca98c0cb14c508cca794c83289b7a 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -1861,11 +1861,9 @@ static int _choose_nodes(struct job_record *job_ptr, bitstr_t *node_map, } } - /* NOTE: details->min_cpus is 1 by default, - * Only reset max_nodes if user explicitly sets a proc count */ - if ((job_ptr->details->min_cpus > 1) && - (max_nodes > job_ptr->details->min_cpus)) - max_nodes = job_ptr->details->min_cpus; + if ((job_ptr->details->num_tasks > 1) && + (max_nodes > job_ptr->details->num_tasks)) + max_nodes = job_ptr->details->num_tasks; origmap = bit_copy(node_map); @@ -2172,6 +2170,10 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *node_bitmap, FREE_NULL_BITMAP(free_cores); FREE_NULL_BITMAP(avail_cores); xfree(cpu_count); + if (select_debug_flags & DEBUG_FLAG_CPU_BIND) { + info("cons_res: cr_job_test: test 0 fail: " + "waiting for switches"); + } return SLURM_ERROR; } if (cr_type == CR_MEMORY) { @@ -2660,6 +2662,8 @@ alloc_job: last = bit_fls(job_res->node_bitmap); job_ptr->total_cpus = 0; for (i = first; i <= last; i++) { + if (!bit_test(job_res->node_bitmap, i)) + continue; job_ptr->total_cpus += select_node_record[i].cpus; } } else if (build_cnt >= 0) diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index 82734b36e5cc5512b343ceb1f9cb6ed48e2a940e..0a86ceaa4cdd3dab239e74cc07c9dca5a0946fba 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -2048,7 +2048,6 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap, if (!job_ptr->details) return EINVAL; - if (job_ptr->details->core_spec && job_ptr->details->whole_node == 0) { info("Setting Exclusive mode for job %u with CoreSpec=%u", job_ptr->job_id, job_ptr->details->core_spec); @@ -2373,6 +2372,7 @@ extern int select_p_select_nodeinfo_set(struct job_record *job_ptr) if (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr)) return SLURM_SUCCESS; + gres_plugin_job_clear(job_ptr->gres_list); rc = _add_job_to_res(job_ptr, 0); gres_plugin_job_state_log(job_ptr->gres_list, job_ptr->job_id); diff --git a/src/plugins/select/cray/select_cray.c b/src/plugins/select/cray/select_cray.c index 28d66bc917448c00704cb03c2295d7d2c4209bed..a96ff5a387a2dafbc2dc658e748cd8697ddc1d8f 100644 --- a/src/plugins/select/cray/select_cray.c +++ b/src/plugins/select/cray/select_cray.c @@ -1647,12 +1647,12 @@ extern int select_p_job_fini(struct job_record *job_ptr) } if (jobinfo->cleaning == 1) - debug("Cleaning flag already set for job %u, " - "running nhc anyway", job_ptr->job_id); - else + error("Cleaning flag already set for job %u, " + "this should never happen", job_ptr->job_id); + else { jobinfo->cleaning = 1; - - _spawn_cleanup_thread(job_ptr, _job_fini); + _spawn_cleanup_thread(job_ptr, _job_fini); + } return SLURM_SUCCESS; } @@ -1806,12 +1806,13 @@ extern int select_p_step_finish(struct step_record *step_ptr) /* } */ if (jobinfo->cleaning == 1) - debug("Cleaning flag already set for job step %u.%u, " - "running nhc anyway", + error("Cleaning flag already set for job step %u.%u, " + "this should never happen.", step_ptr->step_id, step_ptr->job_ptr->job_id); - else + else { jobinfo->cleaning = 1; - _spawn_cleanup_thread(step_ptr, _step_fini); + _spawn_cleanup_thread(step_ptr, _step_fini); + } return SLURM_SUCCESS; } diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index d8e14c935153acab6320e6c1a16bf2dc55825f69..225746e17694e4523dbee7b53d5f1ef95815229b 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -2905,6 +2905,7 @@ extern int select_p_job_begin(struct job_record *job_ptr) slurm_mutex_lock(&cr_mutex); if (cr_ptr == NULL) _init_node_cr(); + gres_plugin_job_clear(job_ptr->gres_list); if (rc == SLURM_SUCCESS) rc = _add_job_to_nodes(cr_ptr, job_ptr, "select_p_job_begin", 1); gres_plugin_job_state_log(job_ptr->gres_list, job_ptr->job_id); diff --git a/src/plugins/select/serial/select_serial.c b/src/plugins/select/serial/select_serial.c index 26040d50af2a008969ce3beed3c93f14993f54d5..0a981397e063b781d13c73c847a6b902f3fc171a 100644 --- a/src/plugins/select/serial/select_serial.c +++ b/src/plugins/select/serial/select_serial.c @@ -1911,6 +1911,7 @@ extern int select_p_select_nodeinfo_set(struct job_record *job_ptr) if (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr)) return SLURM_SUCCESS; + gres_plugin_job_clear(job_ptr->gres_list); rc = _add_job_to_res(job_ptr, 0); gres_plugin_job_state_log(job_ptr->gres_list, job_ptr->job_id); diff --git a/src/plugins/switch/nrt/nrt.c b/src/plugins/switch/nrt/nrt.c index 5f7e0873c4ec3d5c37cda44b7c9f00ebfd124f8e..c0fb737c3622ac690b8fdd7a5a9e18795ec85b41 100644 --- a/src/plugins/switch/nrt/nrt.c +++ b/src/plugins/switch/nrt/nrt.c @@ -4,7 +4,7 @@ ***************************************************************************** * Copyright (C) 2004-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. - * Copyright (C) 2011-2012 SchedMD LLC. + * Copyright (C) 2011-2014 SchedMD LLC. * Original switch/federation plugin written by Jason King <jking@llnl.gov> * Largely re-written for NRT support by Morris Jette <jette@schedmd.com> * @@ -311,9 +311,6 @@ static void _pack_tableinfo(nrt_tableinfo_t *tableinfo, Buf buf, slurm_nrt_jobinfo_t *jp, uint16_t protocol_version); static char * _state_str(win_state_t state); -static int _unload_window(char *adapter_name, nrt_adapter_t adapter_type, - nrt_job_key_t job_key, - nrt_window_id_t window_id, int retry); static int _unload_window_all_jobs(char *adapter_name, nrt_adapter_t adapter_type, nrt_window_id_t window_id); @@ -3892,60 +3889,6 @@ nrt_load_table(slurm_nrt_jobinfo_t *jp, int uid, int pid, char *job_name) return SLURM_SUCCESS; } -/* - * Try up to "retry" times to unload a window. - */ -static int -_unload_window(char *adapter_name, nrt_adapter_t adapter_type, - nrt_job_key_t job_key, nrt_window_id_t window_id, int retry) -{ - int err, i; - nrt_cmd_clean_window_t clean_window; - nrt_cmd_unload_window_t unload_window; - - for (i = 0; i < retry; i++) { - if (i > 0) { - usleep(100000); - } else { - unload_window.adapter_name = adapter_name; - unload_window.adapter_type = adapter_type; - unload_window.job_key = job_key; - unload_window.window_id = window_id; - } - if (debug_flags & DEBUG_FLAG_SWITCH) { - info("nrt_cmd_wrap(unload_window, %s, %s, %u, %hu)", - adapter_name, _adapter_type_str(adapter_type), - job_key, window_id); - } - - err = nrt_cmd_wrap(NRT_VERSION, NRT_CMD_UNLOAD_WINDOW, - &unload_window); - if (err == NRT_SUCCESS) - return SLURM_SUCCESS; - debug("Unable to unload window for job_key %u, " - "nrt_unload_window(%s, %s): %s", - job_key, adapter_name, _adapter_type_str(adapter_type), - nrt_err_str(err)); - - if (i == 0) { - clean_window.adapter_name = adapter_name; - clean_window.adapter_type = adapter_type; - clean_window.leave_inuse_or_kill = KILL; - clean_window.window_id = window_id; - } - err = nrt_cmd_wrap(NRT_VERSION, NRT_CMD_CLEAN_WINDOW, - &clean_window); - if (err == NRT_SUCCESS) - return SLURM_SUCCESS; - error("Unable to clean window for job_key %u, " - "nrt_clean_window(%s, %u): %s", - job_key, adapter_name, adapter_type, nrt_err_str(err)); - if (err != NRT_EAGAIN) - break; - } - - return SLURM_FAILURE; -} static int _unload_window_all_jobs(char *adapter_name, nrt_adapter_t adapter_type, nrt_window_id_t window_id) @@ -3996,9 +3939,6 @@ static int _unload_job_table(slurm_nrt_jobinfo_t *jp) unload_table.job_key = jp->job_key; for (i = 0; i < jp->tables_per_task; i++) { - if (jp->tableinfo[i].adapter_type != NRT_HFI) - continue; - unload_table.context_id = jp->tableinfo[i].context_id; unload_table.table_id = jp->tableinfo[i].table_id; if (debug_flags & DEBUG_FLAG_SWITCH) { @@ -4020,59 +3960,6 @@ static int _unload_job_table(slurm_nrt_jobinfo_t *jp) return rc; } -static int _unload_job_windows(slurm_nrt_jobinfo_t *jp) -{ - nrt_window_id_t window_id = 0; - int err, i, j, rc = SLURM_SUCCESS; - int retry = 15; - - if (!my_lpar_id_set && !my_network_id_set) - _get_my_id(); - - for (i = 0; i < jp->tables_per_task; i++) { - for (j = 0; j < jp->tableinfo[i].table_length; j++) { - if (jp->tableinfo[i].adapter_type == NRT_IB) { - nrt_ib_task_info_t *ib_tbl_ptr; - ib_tbl_ptr = (nrt_ib_task_info_t *) - jp->tableinfo[i].table; - ib_tbl_ptr += j; - if (ib_tbl_ptr->node_number != my_network_id) - continue; - window_id = ib_tbl_ptr->win_id; - } else if (jp->tableinfo[i].adapter_type == NRT_HFI) { - nrt_hfi_task_info_t *hfi_tbl_ptr; - hfi_tbl_ptr = (nrt_hfi_task_info_t *) - jp->tableinfo[i].table; - hfi_tbl_ptr += j; - if (hfi_tbl_ptr->lpar_id != my_lpar_id) - continue; - window_id = hfi_tbl_ptr->win_id; - } else if ((jp->tableinfo[i].adapter_type==NRT_HPCE) || - (jp->tableinfo[i].adapter_type==NRT_KMUX)) { - nrt_hpce_task_info_t *hpce_tbl_ptr; - hpce_tbl_ptr = (nrt_hpce_task_info_t *) - jp->tableinfo[i].table; - hpce_tbl_ptr += j; - if (hpce_tbl_ptr->node_number != my_network_id) - continue; - window_id = hpce_tbl_ptr->win_id; - } else { - fatal("nrt_unload_window: invalid adapter " - "type: %s", - _adapter_type_str(jp->tableinfo[i]. - adapter_type)); - } - err = _unload_window(jp->tableinfo[i].adapter_name, - jp->tableinfo[i].adapter_type, - jp->job_key, - window_id, retry); - if (err != NRT_SUCCESS) - rc = SLURM_ERROR; - } - } - return rc; -} - /* Assumes that, on error, new switch state information will be * read from node. * @@ -4081,8 +3968,6 @@ static int _unload_job_windows(slurm_nrt_jobinfo_t *jp) extern int nrt_unload_table(slurm_nrt_jobinfo_t *jp) { - int rc, rc1, rc2; - if ((jp == NULL) || (jp->magic == NRT_NULL_MAGIC)) { debug2("(%s: %d: %s) job->switch_job was NULL", THIS_FILE, __LINE__, __FUNCTION__); @@ -4096,15 +3981,7 @@ nrt_unload_table(slurm_nrt_jobinfo_t *jp) _print_jobinfo(jp); } - if (jp->user_space) { - rc1 = _unload_job_windows(jp); - rc2 = _unload_job_table(jp); - rc = MAX(rc1, rc2); - } else { - rc = _unload_job_table(jp); - } - - return rc; + return _unload_job_table(jp); } extern int diff --git a/src/sacct/print.c b/src/sacct/print.c index 44930447ec70941247b187d7a96fd666599e57a2..086ea128ae28f58bd78666b0ff9ce12815a2b0b5 100644 --- a/src/sacct/print.c +++ b/src/sacct/print.c @@ -516,8 +516,7 @@ void print_fields(type_t type, void *object) tmp_int = step->elapsed; break; case JOBCOMP: - tmp_int = job_comp->end_time - - job_comp->start_time; + tmp_int = job_comp->elapsed_time; break; default: tmp_int = NO_VAL; @@ -577,11 +576,13 @@ void print_fields(type_t type, void *object) default: break; } - if (WIFSIGNALED(tmp_int)) - tmp_int2 = WTERMSIG(tmp_int); - tmp_int = WEXITSTATUS(tmp_int); - if (tmp_int >= 128) - tmp_int -= 128; + if (tmp_int != NO_VAL) { + if (WIFSIGNALED(tmp_int)) + tmp_int2 = WTERMSIG(tmp_int); + tmp_int = WEXITSTATUS(tmp_int); + if (tmp_int >= 128) + tmp_int -= 128; + } snprintf(outbuf, sizeof(outbuf), "%d:%d", tmp_int, tmp_int2); diff --git a/src/scontrol/create_res.c b/src/scontrol/create_res.c index 27399ce1e3dc46293265dba84a9b84401f698b22..7490e2ac31447fdf9f6b0a4c3f87170ce4f55b86 100644 --- a/src/scontrol/create_res.c +++ b/src/scontrol/create_res.c @@ -36,6 +36,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ +#define _GNU_SOURCE #include "src/scontrol/scontrol.h" #include "src/slurmctld/reservation.h" @@ -76,7 +77,7 @@ static char * _process_plus_minus(char plus_or_minus, char *src) /* * _parse_flags is used to parse the Flags= option. It handles - * daily, weekly, static_alloc, part_nodes, and maint, optionally + * daily, weekly, static_alloc, part_nodes, and maint, optionally * preceded by + or -, separated by a comma but no spaces. */ static uint32_t _parse_flags(const char *flagstr, const char *msg) @@ -301,8 +302,32 @@ scontrol_parse_res_options(int argc, char *argv[], const char *msg, strncasecmp(tag, "CPUCount", MAX(taglen,5)) == 0) { char *endptr = NULL, *core_cnt, *tok, *ptrptr = NULL; + char *type; int node_inx = 0; + type = slurm_get_select_type(); + if (strcasestr(type, "cray")) { + int param; + param = slurm_get_select_type_param(); + if (! (param & CR_OTHER_CONS_RES)) { + error("CoreCnt or CPUCnt is only " + "suported when " + "SelectTypeParameters " + "includes OTHER_CONS_RES"); + xfree(type); + return -1; + } + } else { + if (strcasestr(type, "cons_res") == NULL) { + error("CoreCnt or CPUCnt is only " + "suported when " + "SelectType includes " + "select/cons_res"); + xfree(type); + return -1; + } + } + xfree(type); core_cnt = xstrdup(val); tok = strtok_r(core_cnt, ",", &ptrptr); while (tok) { diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 1e7f5b42475a802d5059a56a7784c96314c727f8..5a682335d61545fc98f648b4b05b93db958192fe 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1278,6 +1278,10 @@ static void _queue_reboot_msg(void) want_nodes_reboot = true; continue; } + if (IS_NODE_COMPLETING(node_ptr)) { + want_nodes_reboot = true; + continue; + } if (IS_NODE_IDLE(node_ptr) && !IS_NODE_NO_RESPOND(node_ptr) && !IS_NODE_POWER_UP(node_ptr)) /* only active idle nodes */ want_reboot = true; @@ -1298,8 +1302,10 @@ static void _queue_reboot_msg(void) } hostlist_push(reboot_agent_args->hostlist, node_ptr->name); reboot_agent_args->node_count++; - node_ptr->node_state = NODE_STATE_FUTURE | - (node_ptr->node_state & NODE_STATE_FLAGS); + node_ptr->node_state &= ~NODE_STATE_MAINT; + node_ptr->node_state &= NODE_STATE_FLAGS; + node_ptr->node_state |= NODE_STATE_DOWN; + node_ptr->reason = xstrdup("Scheduled reboot"); bit_clear(avail_node_bitmap, i); bit_clear(idle_node_bitmap, i); node_ptr->last_response = now; @@ -1312,6 +1318,7 @@ static void _queue_reboot_msg(void) xfree(host_str); agent_queue_request(reboot_agent_args); last_node_update = now; + schedule_node_save(); } } diff --git a/src/slurmctld/gang.c b/src/slurmctld/gang.c index b7f98ced17c20614e9a518c2c2e3eac7e4611ee5..16030d7e0d782846d70929290387a770dd7d9dc2 100644 --- a/src/slurmctld/gang.c +++ b/src/slurmctld/gang.c @@ -1043,6 +1043,7 @@ static void _scan_slurm_job_list(void) struct gs_part *p_ptr; int i; ListIterator job_iterator; + char *part_name; if (!job_list) { /* no jobs */ if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) @@ -1062,10 +1063,15 @@ static void _scan_slurm_job_list(void) if (IS_JOB_SUSPENDED(job_ptr) && (job_ptr->priority == 0)) continue; /* not suspended by us */ + if (job_ptr->part_ptr && job_ptr->part_ptr->name) + part_name = job_ptr->part_ptr->name; + else + part_name = job_ptr->partition; + if (IS_JOB_SUSPENDED(job_ptr) || IS_JOB_RUNNING(job_ptr)) { /* are we tracking this job already? */ p_ptr = list_find_first(gs_part_list, _find_gs_part, - job_ptr->partition); + part_name); if (!p_ptr) /* no partition */ continue; i = _find_job_index(p_ptr, job_ptr->job_id); @@ -1094,8 +1100,7 @@ static void _scan_slurm_job_list(void) /* if the job is not pending, suspended, or running, then * it's completing or completed. Make sure we've released * this job */ - p_ptr = list_find_first(gs_part_list, _find_gs_part, - job_ptr->partition); + p_ptr = list_find_first(gs_part_list, _find_gs_part, part_name); if (!p_ptr) /* no partition */ continue; _remove_job_from_part(job_ptr->job_id, p_ptr, false); @@ -1216,13 +1221,17 @@ extern int gs_job_start(struct job_record *job_ptr) { struct gs_part *p_ptr; uint16_t job_state; + char *part_name; if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) info("gang: entering gs_job_start for job %u", job_ptr->job_id); /* add job to partition */ + if (job_ptr->part_ptr && job_ptr->part_ptr->name) + part_name = job_ptr->part_ptr->name; + else + part_name = job_ptr->partition; pthread_mutex_lock(&data_mutex); - p_ptr = list_find_first(gs_part_list, _find_gs_part, - job_ptr->partition); + p_ptr = list_find_first(gs_part_list, _find_gs_part, part_name); if (p_ptr) { job_state = _add_job_to_part(p_ptr, job_ptr); /* if this job is running then check for preemption */ @@ -1236,7 +1245,7 @@ extern int gs_job_start(struct job_record *job_ptr) * uninterupted (what else can we do?) */ error("gang: could not find partition %s for job %u", - job_ptr->partition, job_ptr->job_id); + part_name, job_ptr->job_id); } _preempt_job_dequeue(); /* MUST BE OUTSIDE OF data_mutex lock */ @@ -1287,12 +1296,16 @@ extern void gs_wake_jobs(void) extern int gs_job_fini(struct job_record *job_ptr) { struct gs_part *p_ptr; + char *part_name; if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) info("gang: entering gs_job_fini for job %u", job_ptr->job_id); + if (job_ptr->part_ptr && job_ptr->part_ptr->name) + part_name = job_ptr->part_ptr->name; + else + part_name = job_ptr->partition; pthread_mutex_lock(&data_mutex); - p_ptr = list_find_first(gs_part_list, _find_gs_part, - job_ptr->partition); + p_ptr = list_find_first(gs_part_list, _find_gs_part, part_name); if (!p_ptr) { pthread_mutex_unlock(&data_mutex); if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 016078def8b25ef0aad8a2ebcf8880058a903897..21ec0ea2ecd488089abf1602a4e557c7460a6e11 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -3259,6 +3259,11 @@ static void _create_job_array(struct job_record *job_ptr, if (!bit_test(job_specs->array_bitmap, i)) continue; job_ptr_new = _job_rec_copy(job_ptr); + /* Make sure the db_index is zero + * for array elements in case the + * first element had the index assigned. + */ + job_ptr_new->db_index = 0; if (!job_ptr_new) break; job_ptr_new->array_job_id = job_ptr->job_id; @@ -3296,7 +3301,11 @@ static int _select_nodes_parts(struct job_record *job_ptr, bool test_only, rc = select_nodes(job_ptr, test_only, select_node_bitmap); if ((rc != ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE) && - (rc != ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE)) + (rc != ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) && + (rc != ESLURM_NODES_BUSY)) + break; + if ((job_ptr->preempt_in_progress) && + (rc != ESLURM_NODES_BUSY)) break; } list_iterator_destroy(iter); @@ -3692,8 +3701,8 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t flags, } else { _signal_job(job_ptr, signal); } - verbose("job_signal %u of running job %u successful", - signal, job_id); + verbose("job_signal %u of running job %u successful 0x%x", + signal, job_id, job_ptr->job_state); return SLURM_SUCCESS; } @@ -5039,9 +5048,9 @@ extern int validate_job_create_req(job_desc_msg_t * job_desc) _test_strlen(job_desc->req_nodes, "req_nodes", 1024*64) || _test_strlen(job_desc->reservation, "reservation", 1024) || _test_strlen(job_desc->script, "script", 1024 * 1024 * 4) || - _test_strlen(job_desc->std_err, "std_err", MAXPATHLEN) || - _test_strlen(job_desc->std_in, "std_in", MAXPATHLEN) || - _test_strlen(job_desc->std_out, "std_out", MAXPATHLEN) || + _test_strlen(job_desc->std_err, "std_err", MAXPATHLEN) || + _test_strlen(job_desc->std_in, "std_in", MAXPATHLEN) || + _test_strlen(job_desc->std_out, "std_out", MAXPATHLEN) || _test_strlen(job_desc->wckey, "wckey", 1024) || _test_strlen(job_desc->work_dir, "work_dir", MAXPATHLEN)) return ESLURM_PATHNAME_TOO_LONG; @@ -8678,6 +8687,7 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) job_specs->job_id); job_ptr->state_reason = WAIT_NO_REASON; job_ptr->job_state &= ~JOB_SPECIAL_EXIT; + job_ptr->exit_code = 0; xfree(job_ptr->state_desc); } else if ((job_ptr->priority == 0) && (job_specs->priority != INFINITE)) { diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 35055876786d8bdd736b692ffa19bdf489cc681c..051d6e8e0c1e93bb628a0594b7a946bf0eb4da35 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -266,6 +266,7 @@ extern List build_job_queue(bool clear_start, bool backfill) job_queue = list_create(_job_queue_rec_del); job_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { + job_ptr->preempt_in_progress = false; /* initialize */ if (!_job_runnable_test1(job_ptr, clear_start)) continue; @@ -950,7 +951,7 @@ extern int schedule(uint32_t job_limit) while (1) { if (fifo_sched) { if (job_ptr && part_iterator && - IS_JOB_PENDING(job_ptr)) /*started in other part?*/ + IS_JOB_PENDING(job_ptr)) /* test job in next part */ goto next_part; job_ptr = (struct job_record *) list_next(job_iterator); if (!job_ptr) @@ -996,9 +997,11 @@ next_part: part_ptr = (struct part_record *) continue; } if (!IS_JOB_PENDING(job_ptr)) - continue; /* started in other partition */ + continue; /* started in another partition */ job_ptr->part_ptr = part_ptr; } + if (job_ptr->preempt_in_progress) + continue; /* scheduled in another partition */ if ((time(NULL) - sched_start) >= sched_timeout) { debug("sched: loop taking too long, breaking out"); break; diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 4bf8014d18334d729988f365eaa40d1415eae2b2..3dac824b6c3b08a4e9b8e42b59d7df15c5bb105d 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -496,6 +496,8 @@ extern int load_all_node_state ( bool state_only ) node_ptr->real_memory = real_memory; node_ptr->tmp_disk = tmp_disk; } + if (node_state & NODE_STATE_MAINT) + node_ptr->node_state |= NODE_STATE_MAINT; if (node_state & NODE_STATE_POWER_UP) { if (power_save_mode) { node_ptr->node_state |= @@ -702,8 +704,7 @@ extern void pack_all_node (char **buffer_ptr, int *buffer_size, if (((show_flags & SHOW_ALL) == 0) && (uid != 0) && (_node_is_hidden(node_ptr))) hidden = true; - else if (IS_NODE_FUTURE(node_ptr) && - !IS_NODE_MAINT(node_ptr)) /* reboot req sent */ + else if (IS_NODE_FUTURE(node_ptr)) hidden = true; else if (IS_NODE_CLOUD(node_ptr) && IS_NODE_POWER_SAVE(node_ptr)) @@ -787,8 +788,7 @@ extern void pack_one_node (char **buffer_ptr, int *buffer_size, if (((show_flags & SHOW_ALL) == 0) && (uid != 0) && (_node_is_hidden(node_ptr))) hidden = true; - else if (IS_NODE_FUTURE(node_ptr) && - !IS_NODE_MAINT(node_ptr)) /* reboot req sent */ + else if (IS_NODE_FUTURE(node_ptr)) hidden = true; else if (IS_NODE_CLOUD(node_ptr) && IS_NODE_POWER_SAVE(node_ptr)) @@ -2078,10 +2078,9 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg, } } else if (IS_NODE_DOWN(node_ptr) && ((slurmctld_conf.ret2service == 2) || + !xstrcmp(node_ptr->reason, "Scheduled reboot") || ((slurmctld_conf.ret2service == 1) && - (node_ptr->reason != NULL) && - (strncmp(node_ptr->reason, "Not responding", 14) - == 0)))) { + !xstrcmp(node_ptr->reason, "Not responding")))) { if (reg_msg->job_count) { node_ptr->node_state = NODE_STATE_ALLOCATED | node_flags; @@ -2439,10 +2438,11 @@ extern int validate_nodes_via_front_end( } } else if (IS_NODE_DOWN(node_ptr) && ((slurmctld_conf.ret2service == 2) || + !xstrcmp(node_ptr->reason, + "Scheduled reboot") || ((slurmctld_conf.ret2service == 1) && - (node_ptr->reason != NULL) && - (strncmp(node_ptr->reason, - "Not responding", 14) == 0)))) { + !xstrcmp(node_ptr->reason, + "Not responding")))) { update_node_state = true; *newly_up = true; if (node_ptr->run_job_cnt) { @@ -2554,9 +2554,10 @@ static void _node_did_resp(front_end_record_t *fe_ptr) fe_ptr->node_state = NODE_STATE_IDLE | node_flags; } if (IS_NODE_DOWN(fe_ptr) && - (slurmctld_conf.ret2service == 1) && - (fe_ptr->reason != NULL) && - (strncmp(fe_ptr->reason, "Not responding", 14) == 0)) { + ((slurmctld_conf.ret2service == 2) || + !xstrcmp(fe_ptr->reason, "Scheduled reboot") || + ((slurmctld_conf.ret2service == 1) && + !xstrcmp(fe_ptr->reason, "Not responding")))) { last_front_end_update = now; fe_ptr->node_state = NODE_STATE_IDLE | node_flags; info("node_did_resp: node %s returned to service", @@ -2605,9 +2606,10 @@ static void _node_did_resp(struct node_record *node_ptr) } } if (IS_NODE_DOWN(node_ptr) && - (slurmctld_conf.ret2service == 1) && - (node_ptr->reason != NULL) && - (strncmp(node_ptr->reason, "Not responding", 14) == 0)) { + ((slurmctld_conf.ret2service == 2) || + !xstrcmp(node_ptr->reason, "Scheduled reboot") || + ((slurmctld_conf.ret2service == 1) && + !xstrcmp(node_ptr->reason, "Not responding")))) { node_ptr->last_idle = now; node_ptr->node_state = NODE_STATE_IDLE | node_flags; info("node_did_resp: node %s returned to service", diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index ec86c166fa32ef0a78492e5aea7f3de7acc270d1..3081b94f5d37da7e583b5204e32c1cacb0efe5fd 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -818,7 +818,7 @@ _get_req_features(struct node_set *node_set_ptr, int node_set_size, /* _pick_best_nodes() is destructive of the node_set * data structure, so we need to make a copy and then * purge it */ - for (i=0; i<node_set_size; i++) { + for (i = 0; i < node_set_size; i++) { if (!_match_feature(feat_ptr->name, node_set_ptr+i)) continue; @@ -1694,7 +1694,7 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, * free up. total_cpus is set within _get_req_features */ job_ptr->cpu_cnt = job_ptr->total_cpus; - if (!test_only && preemptee_job_list && (error_code == SLURM_SUCCESS)){ + if (!test_only && preemptee_job_list && (error_code == SLURM_SUCCESS)) { struct job_details *detail_ptr = job_ptr->details; time_t now = time(NULL); bool kill_pending = true; @@ -1710,6 +1710,7 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, if ((error_code == ESLURM_NODES_BUSY) && (detail_ptr->preempt_start_time == 0)) { detail_ptr->preempt_start_time = now; + job_ptr->preempt_in_progress = true; } } if (error_code) { @@ -1756,6 +1757,7 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, * memory. */ FREE_NULL_BITMAP(job_ptr->node_bitmap); xfree(job_ptr->nodes); + job_ptr->exit_code = 0; job_ptr->node_bitmap = select_bitmap; diff --git a/src/slurmctld/ping_nodes.c b/src/slurmctld/ping_nodes.c index c7b27935888e1b4abbb7657b4458809e96f11e96..10bb24965d2ad0a782d5303533ae25b5fdb40b02 100644 --- a/src/slurmctld/ping_nodes.c +++ b/src/slurmctld/ping_nodes.c @@ -372,6 +372,14 @@ extern void run_health_check(void) char *host_str = NULL; agent_arg_t *check_agent_args = NULL; + /* Sync plugin internal data with + * node select_nodeinfo. This is important + * after reconfig otherwise select_nodeinfo + * will not return the correct number of + * allocated cpus. + */ + select_g_select_nodeinfo_set_all(); + check_agent_args = xmalloc (sizeof (agent_arg_t)); check_agent_args->msg_type = REQUEST_HEALTH_CHECK; check_agent_args->retry = 0; @@ -381,7 +389,8 @@ extern void run_health_check(void) i < front_end_node_cnt; i++, front_end_ptr++) { if (IS_NODE_NO_RESPOND(front_end_ptr)) continue; - hostlist_push_host(check_agent_args->hostlist, front_end_ptr->name); + hostlist_push_host(check_agent_args->hostlist, + front_end_ptr->name); check_agent_args->node_count++; } #else @@ -410,9 +419,20 @@ extern void run_health_check(void) NODE_STATE_ALLOCATED, &cpus_used); } + /* Here the node state is inferred from + * the cpus allocated on it. + * - cpus_used == 0 + * means node is idle + * - cpus_used < cpus_total + * means the node is in mixed state + * else cpus_used == cpus_total + * means the node is allocated + */ if (cpus_used == 0) { if (!(node_states & HEALTH_CHECK_NODE_IDLE)) continue; + if (!IS_NODE_IDLE(node_ptr)) + continue; } else if (cpus_used < cpus_total) { if (!(node_states & HEALTH_CHECK_NODE_MIXED)) continue; diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index dfa2be9250399e7a1bf76b17c5f5fe07da337f96..b0a3a793347b0002cd797d323f4b44a84b11996c 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1803,9 +1803,12 @@ static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg) * ran so we already finished the last instance of the job so * this would be put on the requeued instance which is * incorrect. + * NOTE: Do not use IS_JOB_PENDING since that doesn't take + * into account the COMPLETING FLAG which is valid, but not + * always set yet when the step exits normally. */ if (association_based_accounting && job_ptr - && !IS_JOB_PENDING(job_ptr)) { + && (job_ptr->job_state != JOB_PENDING)) { struct step_record batch_step; memset(&batch_step, 0, sizeof(struct step_record)); batch_step.job_ptr = job_ptr; @@ -4593,6 +4596,10 @@ inline static void _slurm_rpc_reboot_nodes(slurm_msg_t * msg) node_ptr->node_state |= NODE_STATE_MAINT; want_nodes_reboot = true; } + + if (want_nodes_reboot == true) + schedule_node_save(); + unlock_slurmctld(node_write_lock); FREE_NULL_BITMAP(bitmap); rc = SLURM_SUCCESS; diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c index 6d58834f0e3388cafbc444ccca8b45185121d967..94b94e9f5577f2549589fb8ee2b2301b12483272 100644 --- a/src/slurmctld/reservation.c +++ b/src/slurmctld/reservation.c @@ -3811,7 +3811,7 @@ extern int job_test_resv(struct job_record *job_ptr, time_t *when, if (slurmctld_conf.debug_flags & DEBUG_FLAG_RESERVATION) { char *nodes = bitmap2node_name(*node_bitmap); info("job_test_resv: job:%u reservation:%s nodes:%s", - job_ptr->job_id, nodes, job_ptr->resv_name); + job_ptr->job_id, job_ptr->resv_name, nodes); xfree(nodes); } diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index f26cbc77055a4d82855e9039b73e0d482f47faf3..6881ee73327c4832179780865b6aeddffcd82b06 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -595,6 +595,9 @@ struct job_record { struct part_record *part_ptr; /* pointer to the partition record */ time_t pre_sus_time; /* time job ran prior to last suspend */ time_t preempt_time; /* job preemption signal time */ + bool preempt_in_progress; /* Premption of other jobs in progress + * in order to start this job, + * (Internal use only, don't save) */ uint32_t priority; /* relative priority of the job, * zero == held (don't initiate) */ uint32_t *priority_array; /* partition based priority */ diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 6e91257f5a5094a730e31f9cd3cff89096c8f2d4..3bc1b63f0baa9242f2381fa7fb7be52b3a129f1a 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -212,6 +212,26 @@ static void _internal_step_complete( struct job_record *job_ptr, struct step_record *step_ptr, bool terminated) { + uint16_t cleaning = 0; + + /* No reason to complete a step that hasn't started yet. */ + if (step_ptr->step_id == INFINITE) + return; + + /* If the job is already cleaning we have already been here + before, so just return. + */ + select_g_select_jobinfo_get(step_ptr->select_jobinfo, + SELECT_JOBDATA_CLEANING, + &cleaning); + if (cleaning) { /* Step hasn't finished cleanup yet. */ + debug("%s: Cleaning flag already set for " + "job step %u.%u, no reason to cleanup again.", + __func__, step_ptr->step_id, + step_ptr->job_ptr->job_id); + return; + } + jobacct_storage_g_step_complete(acct_db_conn, step_ptr); job_ptr->derived_ec = MAX(job_ptr->derived_ec, step_ptr->exit_code); @@ -253,14 +273,13 @@ extern void delete_step_records (struct job_record *job_ptr) &cleaning); if (cleaning) /* Step hasn't finished cleanup yet. */ continue; + _internal_step_complete(job_ptr, step_ptr, true); } - _internal_step_complete(job_ptr, step_ptr, true); list_remove (step_iterator); _free_step_rec(step_ptr); } list_iterator_destroy(step_iterator); - gres_plugin_job_clear(job_ptr->gres_list); } @@ -360,9 +379,11 @@ dump_step_desc(job_step_create_request_msg_t *step_spec) step_spec->user_id, step_spec->job_id, step_spec->min_nodes, step_spec->max_nodes, step_spec->cpu_count); - debug3(" cpu_freq=%u num_tasks=%u relative=%u task_dist=%u node_list=%s", + debug3(" cpu_freq=%u num_tasks=%u relative=%u task_dist=%u plane=%u", step_spec->cpu_freq, step_spec->num_tasks, step_spec->relative, - step_spec->task_dist, step_spec->node_list); + step_spec->task_dist, step_spec->plane_size); + debug3(" node_list=%s constraints=%s", + step_spec->node_list, step_spec->features); debug3(" host=%s port=%u name=%s network=%s exclusive=%u", step_spec->host, step_spec->port, step_spec->name, step_spec->network, step_spec->exclusive); @@ -371,9 +392,8 @@ dump_step_desc(job_step_create_request_msg_t *step_spec) debug3(" mem_per_%s=%u resv_port_cnt=%u immediate=%u no_kill=%u", mem_type, mem_value, step_spec->resv_port_cnt, step_spec->immediate, step_spec->no_kill); - debug3(" overcommit=%d time_limit=%u gres=%s constraints=%s", - step_spec->overcommit, step_spec->time_limit, step_spec->gres, - step_spec->features); + debug3(" overcommit=%d time_limit=%u gres=%s", + step_spec->overcommit, step_spec->time_limit, step_spec->gres); } @@ -3022,7 +3042,12 @@ extern int step_partial_comp(step_complete_msg_t *req, uid_t uid, info("step_partial_comp: JobID=%u invalid", req->job_id); return ESLURM_INVALID_JOB_ID; } - if (IS_JOB_PENDING(job_ptr)) { + + /* If we are requeuing the job the completing flag will be set + * but the state will be Pending, so don't use IS_JOB_PENDING + * which won't see the completing flag. + */ + if (job_ptr->job_state == JOB_PENDING) { info("step_partial_comp: JobID=%u pending", req->job_id); return ESLURM_JOB_PENDING; } diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index e3a14368d6292f8fb6dd5b092422b7154b903478..0ca6236b4be244dc74d3d5d99dcb634bff0181f8 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -399,7 +399,7 @@ batch_finish(stepd_step_rec_t *job, int rc) (job->stepid == SLURM_BATCH_SCRIPT)) { verbose("job %u completed with slurm_rc = %d, job_rc = %d", job->jobid, rc, step_complete.step_rc); - _send_complete_batch_script_msg(job, rc, job->task[0]->estatus); + _send_complete_batch_script_msg(job, rc, step_complete.step_rc); } else { _wait_for_children_slurmstepd(job); verbose("job %u.%u completed with slurm_rc = %d, job_rc = %d", diff --git a/src/srun/libsrun/launch.c b/src/srun/libsrun/launch.c index e94f3e56806ec592bbc8be8fa4542aafa397faba..ff82d60cfee0105efedc12fc0b898f12751d3574 100644 --- a/src/srun/libsrun/launch.c +++ b/src/srun/libsrun/launch.c @@ -210,14 +210,20 @@ extern int launch_common_create_job_step(srun_job_t *job, bool use_all_cpus, else job->ctx_params.gres = getenv("SLURM_STEP_GRES"); - if (use_all_cpus) - job->ctx_params.cpu_count = job->cpu_count; - else if (opt.overcommit) - job->ctx_params.cpu_count = job->ctx_params.min_nodes; - else if (opt.cpus_set) + if (opt.overcommit) { + if (use_all_cpus) /* job allocation created by srun */ + job->ctx_params.cpu_count = job->cpu_count; + else + job->ctx_params.cpu_count = job->ctx_params.min_nodes; + } else if (opt.cpus_set) { job->ctx_params.cpu_count = opt.ntasks * opt.cpus_per_task; - else + } else if (opt.ntasks_set) { job->ctx_params.cpu_count = opt.ntasks; + } else if (use_all_cpus) { /* job allocation created by srun */ + job->ctx_params.cpu_count = job->cpu_count; + } else { + job->ctx_params.cpu_count = opt.ntasks; + } job->ctx_params.cpu_freq = opt.cpu_freq; job->ctx_params.relative = (uint16_t)opt.relative; diff --git a/src/sview/block_info.c b/src/sview/block_info.c index ac72eb1f413d674cce57a8d6d4fdd033cc0dd8ba..df7096b5eebc3dfc0854db0af87cdb383f09aa8b 100644 --- a/src/sview/block_info.c +++ b/src/sview/block_info.c @@ -1145,10 +1145,12 @@ display_it: list_iterator_destroy(itr); change_grid_color(grid_button_list, -1, -1, MAKE_WHITE, true, 0); - } else + } else { highlight_grid(GTK_TREE_VIEW(display_widget), SORTID_NODE_INX, SORTID_COLOR_INX, grid_button_list); + gtk_tree_path_free(path); + } if (view == ERROR_VIEW && display_widget) { gtk_widget_destroy(display_widget); diff --git a/src/sview/common.c b/src/sview/common.c index ea6878194a3046d7de56acf09e78f9fc6055ad87..8f554fe5d7995af85e25d458ff272d7283ada4d7 100644 --- a/src/sview/common.c +++ b/src/sview/common.c @@ -522,6 +522,9 @@ static void _add_col_to_treeview(GtkTreeView *tree_view, } else renderer = gtk_cell_renderer_text_new(); + if (model) + g_object_unref(model); + gtk_tree_view_column_pack_start(col, renderer, TRUE); g_object_set_data(G_OBJECT(renderer), "column", @@ -1379,9 +1382,6 @@ extern gboolean key_pressed(GtkTreeView *tree_view, GdkEventKey *event, const signal_params_t *signal_params) { - GtkTreePath *path = NULL; - GtkTreeViewColumn *column; - control_key_in_effect = FALSE; enter_key_in_effect = FALSE; @@ -1392,8 +1392,6 @@ extern gboolean key_pressed(GtkTreeView *tree_view, each_t each; GtkTreeSelection *selection = NULL; - gtk_tree_view_get_cursor(GTK_TREE_VIEW(tree_view), - &path, &column); selection = gtk_tree_view_get_selection(tree_view); memset(&each, 0, sizeof(each_t)); each.tree_view = tree_view; @@ -1882,6 +1880,9 @@ extern void set_for_update(GtkTreeModel *model, int updated) } } } + + if (path) + gtk_tree_path_free(path); } extern void remove_old(GtkTreeModel *model, int updated) diff --git a/src/sview/front_end_info.c b/src/sview/front_end_info.c index d811588418d56529aa4c01d3bdd0731e42b52ae3..3c93285e9540dbe530c32427e1c68d80ffcadb9c 100644 --- a/src/sview/front_end_info.c +++ b/src/sview/front_end_info.c @@ -710,6 +710,7 @@ display_it: highlight_grid(GTK_TREE_VIEW(display_widget), SORTID_NODE_INX, SORTID_COLOR_INX, grid_button_list); + gtk_tree_path_free(path); } if (view == ERROR_VIEW && display_widget) { diff --git a/src/sview/job_info.c b/src/sview/job_info.c index d6104ab371a29f19f3b0e628cb03bc1d98229fde..7e8e957aa89edac275c5b1ec7946b169553c9566 100644 --- a/src/sview/job_info.c +++ b/src/sview/job_info.c @@ -3480,6 +3480,7 @@ display_it: highlight_grid(GTK_TREE_VIEW(display_widget), SORTID_NODE_INX, SORTID_COLOR_INX, grid_button_list); + gtk_tree_path_free(path); } if (view == ERROR_VIEW && display_widget) { diff --git a/src/sview/node_info.c b/src/sview/node_info.c index 47e836600cf40583570bcf421f4e73178775740d..6175a65efd80c36a1010aa8b295724bdabaf80ee 100644 --- a/src/sview/node_info.c +++ b/src/sview/node_info.c @@ -1492,9 +1492,11 @@ display_it: color_inx, color_set_flag, true, 0); xfree(color_inx); xfree(color_set_flag); - } else + } else { highlight_grid(GTK_TREE_VIEW(display_widget), SORTID_POS, (int)NO_VAL, grid_button_list); + gtk_tree_path_free(path); + } if (view == ERROR_VIEW && display_widget) { gtk_widget_destroy(display_widget); diff --git a/src/sview/part_info.c b/src/sview/part_info.c index 407a9d50a9be220402ab19fb587cf632efc09a1d..9c181b7aacaf5db4170534b6e43f9269b64d6ec0 100644 --- a/src/sview/part_info.c +++ b/src/sview/part_info.c @@ -2490,10 +2490,12 @@ display_it: MAKE_WHITE, true, 0); xfree(color_inx); xfree(color_set_flag); - } else + } else { highlight_grid(GTK_TREE_VIEW(display_widget), SORTID_NODE_INX, SORTID_COLOR_INX, grid_button_list); + gtk_tree_path_free(path); + } if (view == ERROR_VIEW && display_widget) { gtk_widget_destroy(display_widget); diff --git a/src/sview/resv_info.c b/src/sview/resv_info.c index 4f38d775c7bddedf90840fe89eece676d36a71d7..38be75c22f4adff47d813340f667c3892d1143da 100644 --- a/src/sview/resv_info.c +++ b/src/sview/resv_info.c @@ -1184,10 +1184,12 @@ display_it: list_iterator_destroy(itr); change_grid_color(grid_button_list, -1, -1, MAKE_WHITE, true, 0); - } else + } else { highlight_grid(GTK_TREE_VIEW(display_widget), SORTID_NODE_INX, SORTID_COLOR_INX, grid_button_list); + gtk_tree_path_free(path); + } if (view == ERROR_VIEW && display_widget) { gtk_widget_destroy(display_widget); diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am index 563b4bbf689df2de4eb13eada1d553c857a443dd..5b3afe2343691931b57bd8262a1df09ac9a023ae 100644 --- a/testsuite/expect/Makefile.am +++ b/testsuite/expect/Makefile.am @@ -120,6 +120,7 @@ EXTRA_DIST = \ test1.95.prog.upc \ test1.96 \ test1.96.prog.c \ + test1.97 \ test2.1 \ test2.2 \ test2.3 \ @@ -425,6 +426,8 @@ EXTRA_DIST = \ test21.28 \ test21.29 \ test21.30 \ + test21.31 \ + test21.32 \ inc21.30.1 \ inc21.30.2 \ inc21.30.3 \ diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in index c112aba43a4227d8cf86018dedc9e86585239a5f..895a1dff48a5c89435b56602ef2ea868e1b89e7d 100644 --- a/testsuite/expect/Makefile.in +++ b/testsuite/expect/Makefile.in @@ -504,6 +504,7 @@ EXTRA_DIST = \ test1.95.prog.upc \ test1.96 \ test1.96.prog.c \ + test1.97 \ test2.1 \ test2.2 \ test2.3 \ @@ -809,6 +810,8 @@ EXTRA_DIST = \ test21.28 \ test21.29 \ test21.30 \ + test21.31 \ + test21.32 \ inc21.30.1 \ inc21.30.2 \ inc21.30.3 \ diff --git a/testsuite/expect/README b/testsuite/expect/README index fec60341782cfa4b1b2a3e72336c29a94fa318f0..e9b8a97641c55b2b0ec51d2627f938f067720154 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -209,6 +209,7 @@ test1.93 Test of LAM-MPI functionality test1.94 Test of MPICH2 task spawn logic test1.95 Basic UPC (Unified Parallel C) test via srun. test1.96 Basic SHMEM test via srun. +test1.97 Test that --ntask-per-node and -c options are enforced **NOTE** The above tests for multiple processor/partition systems only test2.# Testing of scontrol options (to be run as unprivileged user). @@ -609,6 +610,7 @@ test21.28 sacctmgr abort delete test21.29 sacctmgr clear (modify) QoS values test21.30 sacctmgr test if the QoS values are enforced test21.31 sacctmgr modify Resource values +test21.32 Validate that mod qos =,+=,-= change the preempt value test22.# Testing of sreport commands and options. diff --git a/testsuite/expect/globals b/testsuite/expect/globals index 965ac0d1c0205150a26af09ab0a1ef4f009dc47f..20db5f577fc89edd7b35bc2271e4b13674ec8d9c 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -1648,13 +1648,13 @@ proc test_select_type { } { ################################################################ proc test_select_type_params { } { - global scontrol bin_bash bin_grep alpha_numeric_under + global scontrol bin_bash bin_grep alpha_numeric_comma log_user 0 set params "" spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep SelectTypeParameters" expect { - -re "SelectTypeParameters *= *($alpha_numeric_under)" { + -re "SelectTypeParameters *= *($alpha_numeric_comma)" { set params $expect_out(1,string) exp_continue } diff --git a/testsuite/expect/globals_accounting b/testsuite/expect/globals_accounting index 20c458a67908d80961522761e9ee569845cddad4..1ab04afff80d4a3e64d803fca431e85324c94f56 100644 --- a/testsuite/expect/globals_accounting +++ b/testsuite/expect/globals_accounting @@ -1284,6 +1284,55 @@ proc add_qos {name} { return $exit_code } +# +# Modify QoS +# +proc mod_qos { qos_name qos_mod_val_in} { + + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set expected 0 + set wcommand "where name=$qos_name" + set scommand "set" + array set qos_mod_vals $qos_mod_val_in + + if { ![string length $qos_name] } { + send_user "FAILURE: we need a name to modify\n" + return 1 + } + + foreach desc [array names qos_mod_vals] { + + if { ![string compare $qos_mod_vals($desc) " "] } { + set $qos_mod_vals($desc) "''" + } + + set scommand "$scommand $desc=$qos_mod_vals($desc)" + } + + set change_cnt 0 + set my_pid [eval spawn $sacctmgr -i modify qos $wcommand $scommand] + expect { + -re "Modified qos" { + incr change_cnt + } + timeout { + send_user "\nFAILURE sacctmgr not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$change_cnt==0} { + send_user "\nFAILURE: sacctmgr did not change qos $qos_name\n" + set exit_code 1 + } + return $exit_code +} + # # Use sacctmgr to remove the test QoS # diff --git a/testsuite/expect/test1.97 b/testsuite/expect/test1.97 new file mode 100755 index 0000000000000000000000000000000000000000..171aa3b8409cd2440e189359c6c5ce6bfaf67e60 --- /dev/null +++ b/testsuite/expect/test1.97 @@ -0,0 +1,363 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test options --ntask-per-node and -c are enforced +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2014 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc. +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id 1.97 +set num_nodes 3 +set nodelist "" +set cputot 0 +set ntasks 0 +set ntaskpn 0 +set exit_code 0 +set sys_homo 0 +set file_in "test$test_id\_sc" +array set nodes {} +array set tasks {} + +print_header $test_id + +if {![string match *CR_PACK_NODES* [test_select_type_params]]} { + send_user "\nWARNING: this test requirese " + send_user "SelectTypeParameters=CR_PACK_NODES\n" + exit 1 +} + +proc check_node_config { } { + + global scontrol nodelist sys_homo number exit_code + + set match 0 + set low 0 + set tmp 0 + set same 0 + log_user 0 + spawn $scontrol show nodes $nodelist + expect { + -re "CPUTot=($number)" { + if {$match != 0} { + set tmp $expect_out(1,string) + if {$tmp < $low} { + set low $tmp + } elseif {$tmp == $low} { + incr same 1 + } + } else { + set low $expect_out(1,string) + } + incr match 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + log_user 1 + + if {$match != 3} { + send_user "\nFAILURE: could not determine node config\n" + exit 1 + } elseif {$same == 2} { + set sys_homo 1 + return $low + } else { + return $low + } + +} + +proc check_tasks_all {ntaskspn tasks} { + + global scontrol exit_code + + array set ntasks $tasks + set match 0 + for {set i 0} {$i<3} {incr i 1} { + if {$ntasks($i) == $ntaskspn} { + incr match 1 + } + } + + if {$match != 3} { + send_user "\nFAILURE: incorrect number of tasks were set\n" + set exit_code 1 + } +} + +proc check_tasks_off {ntaskspn tasks offset} { + + global scontrol exit_code + + array set ntasks $tasks + set match 0 + for {set i 0} {$i<2} {incr i 1} { + if {$ntasks($i) == $ntaskspn} { + incr match 1 + } + } + + if {$ntasks($i) == [expr $ntaskspn - $offset]} { + incr match 1 + } + + if {$match != 3} { + send_user "\nFAILURE: incorrect number of tasks were set $match != 3\n" + set exit_code 1 + } +} + +proc check_cpu_all {nodes job_cpus ncpus} { + + global scontrol exit_code + + array set nnodes $nodes + array set jcpus $job_cpus + set match 0 + for {set i 0} {$i<3} {incr i 1} { + spawn $scontrol show nodes $nnodes($i) + expect { + -re "CPUTot=[expr $jcpus($i) * $ncpus]" { + incr match 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + } + + if {$match != 3} { + send_user "\nFAILURE: incorrect number of cpus were set\n" + set exit_code 1 + } +} + +proc check_cpu_off {nodes job_cpus ncpus} { + + global scontrol exit_code + + array set nnodes $nodes + array set jcpus $job_cpus + set match 0 + for {set i 0} {$i<2} {incr i 1} { + spawn $scontrol show nodes $nnodes($i) + expect { + -re "CPUTot=[expr $jcpus($i) * $ncpus]" { + incr match 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + } + + spawn $scontrol show nodes $nnodes($i) + expect { + -re "CPUTot=[expr ($jcpus($i) * $ncpus) + $ncpus]" { + incr match 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + if {$match != 3} { + send_user "\nFAILURE: incorrect number of cpus were set $match != 3\n" + set exit_code 1 + } +} + +proc submit_cpu {ntasks ncpus} { + + global srun bin_printenv nodelist exit_code num_nodes tasks nodes + global bin_bash number alpha_numeric_nodelist wait_for_job + + # Wait awhile for the jobs to cleanup + sleep 2 + + set x 0 + spawn $bin_bash -c "$srun -N$num_nodes -n$ntasks -w$nodelist -c$ncpus --exclusive $bin_printenv SLURMD_NODENAME | sort -n | uniq -c" + expect { + -re "($number) ($alpha_numeric_nodelist)" { + set tasks($x) $expect_out(1,string) + set nodes($x) $expect_out(2,string) + incr x 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + if {$x != 3} { + send_user "\nFAILURE: srun did not submit the jobs correctly $x != 3\n" + exit 1 + } +} + +proc submit_tasks {ntasks ntaskpn} { + + global srun bin_printenv nodelist exit_code num_nodes tasks nodes bin_bash + global number alpha_numeric_nodelist + + # Wait awhile for the jobs to clean up + sleep 2 + + set x 0 + spawn $bin_bash -c "$srun -N$num_nodes -n$ntasks --ntasks-per-node=$ntaskpn -w$nodelist --exclusive $bin_printenv SLURMD_NODENAME | sort -n | uniq -c" + expect { + -re "($number) ($alpha_numeric_nodelist)" { + set tasks($x) $expect_out(1,string) + set nodes($x) $expect_out(2,string) + incr x 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + if {$x != 3} { + send_user "\nFAILURE: srun did not submit the jobs correctlty $x != 3\n" + exit 1 + } +} + +######################## Test Starts Here ######################## + +make_bash_script $file_in "true" + +# Submit an exclusive job to get a nodelist +set tmp_id 0 +spawn $sbatch -N3 -o/dev/null --exclusive $file_in +expect { + -re "Submitted batch job ($number)" { + set tmp_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch is not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +wait_for_job $tmp_id DONE + +spawn $scontrol show job $tmp_id +expect { + -re "NodeList=($alpha_numeric_nodelist)" { + set nodelist $expect_out(1,string) + exp_continue + } + -re "NumCPUs=($number)" { + set cputot $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch is not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +############# Test by CPU ############# +send_user "====================Testing CPUs per Task====================\n\n" +set ncpuspt [check_node_config] + +# Submit job with just one cpu per task +submit_cpu $cputot 1 +check_cpu_all [array get nodes] [array get tasks] 1 + +# Submit job with the lowest cpu count of the 3 nodes +submit_cpu [expr $cputot/$ncpuspt] $ncpuspt +check_cpu_all [array get nodes] [array get tasks] $ncpuspt + +if {!$sys_homo} { + # Submit job with lowest cpu count of the 3 nodes and set tasks to 1 + # less the number of cpus (This test only works on heterogenous systems) + submit_cpu [expr ($cputot/$ncpuspt) - 1] $ncpuspt + check_cpu_off [array get nodes] [array get tasks] $ncpuspt +} + +############# Test by node task ############# +send_user "====================Testing Tasks per Node====================\n\n" +set ntask [expr $num_nodes * [check_node_config]] +set ntaskpn [check_node_config] + +# Submit job with ntasks-per-node with the lost cpu count +submit_tasks $ntask $ntaskpn +check_tasks_all $ntaskpn [array get tasks] + +# Submit job with one less number of ntasks to see that task are spread +# across all nodes +submit_tasks [expr $ntask -1] $ntaskpn +check_tasks_off $ntaskpn [array get tasks] 1 + +# Submit job with two less number of ntasks to see that task are spread +# across all nodes +submit_tasks [expr $ntask -2] $ntaskpn +check_tasks_off $ntaskpn [array get tasks] 2 + +if {$exit_code == 0} { + exec $bin_rm $file_in + send_user "\nSUCCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test21.32 b/testsuite/expect/test21.32 new file mode 100755 index 0000000000000000000000000000000000000000..26de344f205f5ae09fc73e3bbf1977ba90b3c1b3 --- /dev/null +++ b/testsuite/expect/test21.32 @@ -0,0 +1,195 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Validate that mod QoS modifies the preempt QoS when using =,+=, +# -= +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2014 SchedMD LLC +# Written by Nathan Yee <nyee32@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc. +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals +source ./globals_accounting + +set test_id 21.32 +set exit_code 0 +set qos_test(0) "test$test_id\_qos_0" +set qos_test(1) "test$test_id\_qos_1" +set qos_test(2) "test$test_id\_qos_2" +set qos_test(3) "test$test_id\_qos_3" +set qos_test(4) "test$test_id\_qos_4" +set access_err 0 + +print_header $test_id + +set qos_names_str 0 +foreach inx [array names qos_test] { + if { $qos_names_str != 0 } { + set qos_names_str "$qos_names_str,$qos_test($inx)" + } else { + set qos_names_str "$qos_test($inx)" + } +} + +proc reset_qos { } { + + global sacctmgr qos_main exit_code qos_test + + set removed 0 + spawn $sacctmgr -i mod qos $qos_test(0) set preempt='' + expect { + "Modified qos" { + set removed 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + if {$removed != 1} { + send_user "\nFAILURE: qos $qos_main\'s preempt qos were not " + send_user "removed\n" + set exit_code 1 + } + +} + +proc _local_mod_qos { preempt_qos } { + + global sacctmgr exit_code qos_test + + set mod 0 + spawn $sacctmgr -i mod qos $qos_test(0) set preempt$preempt_qos + expect { + -re "Modified qos" { + set mod 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + if {$mod != 1} { + send_user "\nFAILURE: QoS was not modified\n" + set exit_code 1 + } +} + +proc check_pre { preempt_qos } { + + global sacctmgr exit_code alpha_numeric_under qos_test + + set match 0 + spawn $sacctmgr show qos $qos_test(0) format=preempt%-80 --noheader + expect { + -re "$preempt_qos" { + set match 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr is not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + if {$match != 1} { + send_user "\nFAILURE: preempted QoS do not match what is expected\n" + set exit_code 1 + } +} + +######################### Test Begins ######################### + +# clean it up (and check to make sure we can do this test +remove_qos $qos_names_str +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} + +# Add a few QoS +add_qos $qos_names_str +if { $exit_code } { + remove_qos $qos_names_str + send_user "\nFAILURE\n" + exit $exit_code +} + +# Add a preempt qos with = +send_user "Add a preempt qos with =\n" +_local_mod_qos "=$qos_test(1)" +check_pre $qos_test(1) + +# Now clear the preempt qos +reset_qos +check_pre " " + +# Add multiple QoSs with = +send_user "Add multiple QoSs with =\n" +_local_mod_qos "=$qos_test(1),$qos_test(2)" +check_pre "$qos_test(1),$qos_test(2)" +reset_qos +check_pre " " + +# Add multiple QoSs with += +send_user "Add multiple QoSs with +=\n" +_local_mod_qos "=$qos_test(1)" +_local_mod_qos "+=$qos_test(2)" +check_pre "$qos_test(1),$qos_test(2)" +_local_mod_qos "+=$qos_test(3),$qos_test(4)" +check_pre "$qos_test(1),$qos_test(2),$qos_test(3),$qos_test(4)" +reset_qos +check_pre " " + +# Remove some of the QoS with -= +send_user "Add multiple QoSs with -=\n" +_local_mod_qos "=$qos_test(1),$qos_test(2),$qos_test(3),$qos_test(4)" +check_pre "$qos_test(1),$qos_test(2),$qos_test(3),$qos_test(4)" +_local_mod_qos "-=$qos_test(2)" +check_pre "$qos_test(1),$qos_test(3),$qos_test(4)" +_local_mod_qos "-=$qos_test(4)" +check_pre "$qos_test(1),$qos_test(3)" +_local_mod_qos "-=$qos_test(1),$qos_test(3)" +check_pre " " + +remove_qos $qos_names_str + +if {$exit_code == 0} { + send_user "\nSUCCCESS\n" +} else { + send_user "\nFAILURE\n" +}