From c72edc36e4d27736b7e331583b0c74aac21f0517 Mon Sep 17 00:00:00 2001 From: Gennaro Oliva <oliva.g@na.icar.cnr.it> Date: Wed, 24 Feb 2016 22:52:38 +0100 Subject: [PATCH] Imported Upstream version 15.08.8 --- META | 4 +- NEWS | 80 ++- auxdir/slurm.m4 | 6 +- config.h.in | 3 - configure | 6 +- configure.ac | 2 +- contribs/cray/csm/gres.conf.j2 | 2 +- contribs/cray/csm/slurmconfgen_smw.py | 151 +++--- contribs/lua/job_submit.lua | 2 +- contribs/pam/pam_slurm.c | 2 +- contribs/perlapi/libslurm/perl/Slurm.xs | 2 +- .../libslurm/perl/lib/Slurm/Constant.pm | 2 +- contribs/torque/qalter.pl | 464 +++++++++--------- contribs/torque/qrerun.pl | 268 +++++----- doc/html/download.shtml | 4 +- doc/html/reservations.shtml | 8 +- doc/html/team.shtml | 8 +- doc/man/man1/sacct.1 | 16 +- doc/man/man1/sacctmgr.1 | 2 +- doc/man/man1/scontrol.1 | 16 +- doc/man/man1/sreport.1 | 4 + doc/man/man5/slurm.conf.5 | 20 +- slurm.spec | 11 +- slurm/slurm.h.in | 16 +- src/api/allocate.c | 2 +- src/api/config_info.c | 94 ++-- src/api/step_io.c | 2 +- src/common/assoc_mgr.c | 30 +- src/common/callerid.c | 14 +- src/common/callerid.h | 6 + src/common/eio.c | 2 +- src/common/env.c | 2 +- src/common/forward.c | 4 + src/common/gres.c | 80 +-- src/common/gres.h | 2 +- src/common/log.c | 4 +- src/common/read_config.c | 2 +- src/common/slurm_protocol_api.c | 38 +- src/common/slurm_protocol_defs.c | 21 +- src/common/slurm_protocol_interface.h | 8 - .../slurm_protocol_socket_implementation.c | 11 +- src/common/slurmdbd_defs.c | 2 +- src/common/stepd_api.c | 9 +- .../accounting_storage/mysql/as_mysql_assoc.c | 128 ++--- .../mysql/as_mysql_jobacct_process.c | 10 +- .../mysql/as_mysql_rollup.c | 50 +- .../accounting_storage/mysql/as_mysql_user.c | 2 +- .../cray/acct_gather_energy_cray.c | 12 + .../rapl/acct_gather_energy_rapl.c | 12 + .../hdf5/sh5util/libsh5util_old/hdf5_api.c | 38 +- .../hdf5/sh5util/sh5util.c | 6 +- .../burst_buffer/common/burst_buffer_common.c | 5 + .../burst_buffer/cray/burst_buffer_cray.c | 6 +- src/plugins/job_submit/lua/job_submit_lua.c | 0 src/plugins/mpi/mpich1_p4/mpich1_p4.c | 2 +- src/plugins/mpi/mvapich/mvapich.c | 2 +- src/plugins/power/common/power_common.c | 5 + src/plugins/power/cray/power_cray.c | 73 ++- src/plugins/sched/wiki2/msg.c | 2 +- src/plugins/select/cons_res/job_test.c | 5 +- src/plugins/select/cons_res/select_cons_res.c | 19 +- src/plugins/select/cray/select_cray.c | 2 +- src/plugins/slurmctld/dynalloc/msg.c | 2 +- src/plugins/task/affinity/affinity.h | 2 +- .../topology/hypercube/topology_hypercube.c | 9 +- src/sacct/options.c | 2 +- src/salloc/opt.c | 2 +- src/salloc/salloc.c | 2 +- src/sbatch/opt.c | 2 +- src/sbatch/sbatch.c | 2 +- src/scancel/scancel.c | 60 ++- src/scontrol/info_assoc_mgr.c | 111 ++++- src/scontrol/scontrol.c | 7 +- src/scontrol/scontrol.h | 2 +- src/slurmctld/acct_policy.c | 22 +- src/slurmctld/backup.c | 1 - src/slurmctld/controller.c | 75 ++- src/slurmctld/job_mgr.c | 44 +- src/slurmctld/job_scheduler.c | 15 +- src/slurmctld/node_mgr.c | 22 + src/slurmctld/partition_mgr.c | 4 +- src/slurmctld/reservation.c | 32 +- src/slurmctld/state_save.c | 4 +- src/slurmd/slurmd/req.c | 2 +- src/slurmd/slurmstepd/io.c | 2 +- src/slurmd/slurmstepd/mgr.c | 2 +- src/slurmd/slurmstepd/req.c | 4 +- src/slurmd/slurmstepd/task.c | 3 +- src/slurmdbd/backup.c | 2 +- src/slurmdbd/rpc_mgr.c | 2 +- src/sreport/cluster_reports.c | 186 +++++-- src/sreport/common.c | 110 +++++ src/sreport/resv_reports.c | 17 +- src/sreport/sreport.h | 19 + src/sreport/user_reports.c | 28 ++ src/srun/libsrun/allocate.c | 4 +- src/srun/libsrun/multi_prog.c | 3 +- src/srun/libsrun/opt.c | 2 +- src/srun/srun_pty.c | 6 +- testsuite/expect/inc21.21_tests | 62 ++- testsuite/expect/test1.97 | 6 + 101 files changed, 1700 insertions(+), 993 deletions(-) mode change 100644 => 100755 contribs/torque/qalter.pl mode change 100644 => 100755 contribs/torque/qrerun.pl mode change 100755 => 100644 src/plugins/job_submit/lua/job_submit_lua.c diff --git a/META b/META index 9bb718378..b591566a9 100644 --- a/META +++ b/META @@ -9,8 +9,8 @@ Name: slurm Major: 15 Minor: 08 - Micro: 7 - Version: 15.08.7 + Micro: 8 + Version: 15.08.8 Release: 1 ## diff --git a/NEWS b/NEWS index 6b15de9ba..904a298c4 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,82 @@ This file describes changes in recent versions of Slurm. It primarily documents those changes that are of interest to users and administrators. +* Changes in Slurm 15.08.9 +========================== + +* Changes in Slurm 15.08.8 +========================== + -- Backfill scheduling properly synchronized with Cray Node Health Check. + Prior logic could result in highest priority job getting improperly + postponed. + -- Make it so daemons also support TopologyParam=NoInAddrAny. + -- If scancel is operating on large number of jobs and RPC responses from + slurmctld daemon are slow then introduce a delay in sending the cancel job + requests from scancel in order to reduce load on slurmctld. + -- Remove redundant logic when updating a job's task count. + -- MySQL - Fix querying jobs with reservations when the id's have rolled. + -- Perl - Fix use of uninitialized variable in slurm_job_step_get_pids. + -- Launch batch job requsting --reboot after the boot completes. + -- Move debug messages like "not the right user" from association manager + to debug3 when trying to find the correct association. + -- Fix incorrect logic when querying assoc_mgr information. + -- Move debug messages to debug3 notifying a gres_bit_alloc was NULL for + gres types without a file. + -- Sanity Check Patch to setup variables for RAPL if in a race for it. + -- GRES - Fix minor typecast issues. + -- burst_buffer/cray - Increase size of intermediate variable used to store + buffer byte size read from DW instance from 32 to 64-bits to avoid overflow + and reporting invalid buffer sizes. + -- Allow an existing reservation with running jobs to be modified without + Flags=IGNORE_JOBS. + -- srun - don't attempt to execve() a directory with a name matching the + requested command + -- Do not automatically relocate an advanced reservation for individual cores + that spans multiple nodes when nodes in that reservation go down (e.g. + a 1 core reservation on node "tux1" will be moved if node "tux1" goes + down, but a reservation containing 2 cores on node "tux1" and 3 cores on + "tux2" will not be moved node "tux1" goes down). Advanced reservations for + whole nodes will be moved by default for down nodes. + -- Avoid possible double free of memory (and likely abort) for slurmctld in + background mode. + -- contribs/cray/csm/slurmconfgen_smw.py - avoid including repurposed compute + nodes in configs. + -- Support AuthInfo in slurmdbd.conf that is different from the value in + slurm.conf. + -- Fix build on FreeBSD 10. + -- Fix hdf5 build on ppc64 by using correct fprintf formatting for types. + -- Fix cosmetic printing of NO_VALs in scontrol show assoc_mgr. + -- Fix perl api for newer perl versions. + -- Fix for jobs requesting cpus-per-task (eg. -c3) that exceed the number of + cpus on a core. + -- Remove unneeded perl files from the .spec file. + -- Flesh out filters for scontrol show assoc_mgr. + -- Add function to remove assoc_mgr_info_request_t members without freeing + structure. + -- Fix build on some non-glibc systems by updating includes. + -- Add new PowerParameters options of get_timeout and set_timeout. The default + set_timeout was increased from 5 seconds to 30 seconds. Also re-read current + power caps periodically or after any failed "set" operation. + -- Fix slurmdbd segfault when listing users with blank user condition. + -- Save the ClusterName to a file in SaveStateLocation, and use that to + verify the state directory belongs to the given cluster at startup to avoid + corruption from multiple clusters attempting to share a state directory. + -- MYSQL - Fix issue when rerolling monthly data to work off correct time + period. This would only hit you if you rerolled a 15.08 prior to this + commit. + -- If FastSchedule=0 is used make sure TRES are set up correctly in accounting. + -- Fix sreport's truncation of columns with large TRES and not using + a parsing option. + -- Make sure count of boards are restored when slurmctld has option -R. + -- When determine if a job can fit into a TRES time limit after resources + have been selected set the time limit appropriately if the job didn't + request one. + -- Fix inadequate locks when updating a partition's TRES. + -- Add new assoc_limit_continue flag to SchedulerParameters. + -- Avoid race in acct_gather_energy_cray if energy requested before available. + -- MYSQL - Avoid having multiple default accounts when a user is added to + a new account and making it a default all at once. + * Changes in Slurm 15.08.7 ========================== -- sched/backfill: If a job can not be started within the configured @@ -156,7 +232,7 @@ documents those changes that are of interest to users and administrators. -- Fix formatting for sacct with variables that switched from uint32_t to uint64_t. -- Fix a typo in sacct man page. - -- Set up extern step to track any childern of an ssh if it leaves anything + -- Set up extern step to track any children of an ssh if it leaves anything else behind. -- Prevent slurmdbd divide by zero if no associations defined at rollup time. -- Multifactor - Add sanity check to make sure pending jobs are handled @@ -6339,7 +6415,7 @@ documents those changes that are of interest to users and administrators. -- Add slurm_*_trigger.3 man pages for event trigger APIs. -- Fix bug in job preemption logic that would free allocated memory twice. -- Fix spelling issues (from Gennaro Oliva) - -- Fix issue when changing parents of an account in accounting all childern + -- Fix issue when changing parents of an account in accounting all children weren't always sent to their respected slurmctlds until a restart. -- Restore support for srun/salloc/sbatch option --hint=nomultithread to bind tasks to cores rather than threads (broken in slurm v2.1.0-pre5). diff --git a/auxdir/slurm.m4 b/auxdir/slurm.m4 index 8aafeb080..06c37e619 100644 --- a/auxdir/slurm.m4 +++ b/auxdir/slurm.m4 @@ -1,6 +1,4 @@ ##***************************************************************************** -## $Id$ -##***************************************************************************** # AUTHOR: # Mark A. Grondona <mgrondona@llnl.gov> # @@ -168,7 +166,7 @@ for name in CURRENT REVISION AGE; do eval SLURM_API_$name=$API done SLURM_API_MAJOR=`expr $SLURM_API_CURRENT - $SLURM_API_AGE` -SLURM_API_VERSION=`printf "0x%02x%02x%02x" $((10#$SLURM_API_MAJOR)) $((10#$SLURM_API_AGE)) $((10#$SLURM_API_REVISION))` +SLURM_API_VERSION=`printf "0x%02x%02x%02x" ${SLURM_API_MAJOR#0} ${SLURM_API_AGE#0} ${SLURM_API_REVISION#0}` AC_DEFINE_UNQUOTED(SLURM_API_VERSION, $SLURM_API_VERSION, [Define the API's version]) AC_DEFINE_UNQUOTED(SLURM_API_CURRENT, $SLURM_API_CURRENT, [API current version]) @@ -196,7 +194,7 @@ RELEASE="`perl -ne 'print,exit if s/^\s*RELEASE:\s*(\S*).*/\1/i' $srcdir/META`" # NOTE: SLURM_VERSION_NUMBER excludes any non-numeric component # (e.g. "pre1" in the MICRO), but may be suitable for the user determining # how to use the APIs or other differences. -SLURM_VERSION_NUMBER="`printf "0x%02x%02x%02x" $((10#$SLURM_MAJOR)) $((10#$SLURM_MINOR)) $((10#$SLURM_MICRO))`" +SLURM_VERSION_NUMBER="`printf "0x%02x%02x%02x" ${SLURM_MAJOR#0} ${SLURM_MINOR#0} ${SLURM_MICRO#0}`" AC_DEFINE_UNQUOTED(SLURM_VERSION_NUMBER, $SLURM_VERSION_NUMBER, [SLURM Version Number]) AC_SUBST(SLURM_VERSION_NUMBER) diff --git a/config.h.in b/config.h.in index 2b7f84d82..e50f0dbe1 100644 --- a/config.h.in +++ b/config.h.in @@ -383,9 +383,6 @@ /* Define to 1 if you have the <sys/systemcfg.h> header file. */ #undef HAVE_SYS_SYSTEMCFG_H -/* Define to 1 if you have the <sys/termios.h> header file. */ -#undef HAVE_SYS_TERMIOS_H - /* Define to 1 if you have the <sys/types.h> header file. */ #undef HAVE_SYS_TYPES_H diff --git a/configure b/configure index 8fef719cf..289933a29 100755 --- a/configure +++ b/configure @@ -2931,7 +2931,7 @@ for name in CURRENT REVISION AGE; do eval SLURM_API_$name=$API done SLURM_API_MAJOR=`expr $SLURM_API_CURRENT - $SLURM_API_AGE` -SLURM_API_VERSION=`printf "0x%02x%02x%02x" $((10#$SLURM_API_MAJOR)) $((10#$SLURM_API_AGE)) $((10#$SLURM_API_REVISION))` +SLURM_API_VERSION=`printf "0x%02x%02x%02x" ${SLURM_API_MAJOR#0} ${SLURM_API_AGE#0} ${SLURM_API_REVISION#0}` cat >>confdefs.h <<_ACEOF @@ -2983,7 +2983,7 @@ RELEASE="`perl -ne 'print,exit if s/^\s*RELEASE:\s*(\S*).*/\1/i' $srcdir/META`" # NOTE: SLURM_VERSION_NUMBER excludes any non-numeric component # (e.g. "pre1" in the MICRO), but may be suitable for the user determining # how to use the APIs or other differences. -SLURM_VERSION_NUMBER="`printf "0x%02x%02x%02x" $((10#$SLURM_MAJOR)) $((10#$SLURM_MINOR)) $((10#$SLURM_MICRO))`" +SLURM_VERSION_NUMBER="`printf "0x%02x%02x%02x" ${SLURM_MAJOR#0} ${SLURM_MINOR#0} ${SLURM_MICRO#0}`" cat >>confdefs.h <<_ACEOF #define SLURM_VERSION_NUMBER $SLURM_VERSION_NUMBER @@ -18367,7 +18367,7 @@ for ac_header in mcheck.h values.h socket.h sys/socket.h \ pty.h utmp.h \ sys/syslog.h linux/sched.h \ kstat.h paths.h limits.h sys/statfs.h sys/ptrace.h \ - sys/termios.h float.h sys/statvfs.h + float.h sys/statvfs.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` diff --git a/configure.ac b/configure.ac index fedf35407..fb34d1666 100644 --- a/configure.ac +++ b/configure.ac @@ -134,7 +134,7 @@ AC_CHECK_HEADERS(mcheck.h values.h socket.h sys/socket.h \ pty.h utmp.h \ sys/syslog.h linux/sched.h \ kstat.h paths.h limits.h sys/statfs.h sys/ptrace.h \ - sys/termios.h float.h sys/statvfs.h + float.h sys/statvfs.h ) AC_HEADER_SYS_WAIT AC_HEADER_TIME diff --git a/contribs/cray/csm/gres.conf.j2 b/contribs/cray/csm/gres.conf.j2 index 938747715..a0bc68759 100644 --- a/contribs/cray/csm/gres.conf.j2 +++ b/contribs/cray/csm/gres.conf.j2 @@ -5,5 +5,5 @@ # # See the gres.conf man page for more information. # -{% for node in nodes.values() %}{% for gres in node.Gres %}NodeName={{ node.NodeName }} Name={{ gres.Name }} {% if gres.File %}File={{ gres.File }}{% else %}Count={{ gres.Count }}{% endif %} +{% for node in nodes.values() %}{% for gres in node.Gres %}NodeName={{ node.NodeName }} Name={{ gres.name }} {% if gres.file %}File={{ gres.file }}{% else %}Count={{ gres.count }}{% endif %} {% endfor %}{% endfor %} diff --git a/contribs/cray/csm/slurmconfgen_smw.py b/contribs/cray/csm/slurmconfgen_smw.py index 7e5e8c23f..993447c6d 100644 --- a/contribs/cray/csm/slurmconfgen_smw.py +++ b/contribs/cray/csm/slurmconfgen_smw.py @@ -6,6 +6,7 @@ import argparse import os +import re import subprocess import sys import time @@ -14,37 +15,37 @@ from jinja2 import Environment, FileSystemLoader NAME = 'slurmconfgen_smw.py' + class Gres(object): """ A class for generic resources """ def __init__(self, name, count): """ Initialize a gres with the given name and count """ - self.Name = name - self.Count = count + self.name = name + self.count = count if name == 'gpu': if count == 1: - self.File = '/dev/nvidia0' + self.file = '/dev/nvidia0' else: - self.File = '/dev/nvidia[0-{0}]'.format(count - 1) + self.file = '/dev/nvidia[0-{0}]'.format(count - 1) elif name == 'mic': if count == 1: - self.File = '/dev/mic0' + self.file = '/dev/mic0' else: - self.File = '/dev/mic[0-{0}]'.format(count - 1) + self.file = '/dev/mic[0-{0}]'.format(count - 1) else: - self.File = None + self.file = None def __eq__(self, other): """ Check if two gres are equal """ - return (self.Name == other.Name and self.Count == other.Count and - self.File == other.File) + return (self.name == other.name and self.count == other.count and + self.file == other.file) def __str__(self): """ Return a gres string suitable for slurm.conf """ - if self.Count == 1: - return self.Name + if self.count == 1: + return self.name else: - return '{0}:{1}'.format(self.Name, self.Count) - + return '{0}:{1}'.format(self.name, self.count) def parse_args(): @@ -64,7 +65,63 @@ def parse_args(): return parser.parse_args() -def get_inventory(partition): +def get_repurposed_computes(partition): + """ Gets a list of repurposed compute nodes for the given partition. """ + print 'Getting list of repurposed compute nodes...' + try: + xtcliout = subprocess.check_output(['/opt/cray/hss/default/bin/xtcli', + 'status', '-m', partition], + stderr=subprocess.STDOUT) + repurposed = [] + for line in xtcliout.splitlines(): + cname = re.match( + r'\s*(c\d+-\d+c[0-2]s(?:\d|1[0-5])n[0-3]):\s+service', + line) + if cname: + repurposed.append(cname.group(1)) + + return repurposed + except subprocess.CalledProcessError: + return [] + + +def get_node(nodexml): + """ Convert node XML into a node dictionary """ + cores = int(nodexml.find('cores').text) + sockets = int(nodexml.find('sockets').text) + memory = int(nodexml.find('memory/sizeGB').text) * 1024 + + node = {'cname': nodexml.find('cname').text, + 'nid': int(nodexml.find('nic').text), + 'CoresPerSocket': cores / sockets, + 'RealMemory': memory, + 'Sockets': sockets, + 'ThreadsPerCore': int(nodexml.find('hyper_threads').text)} + + # Determine the generic resources + craynetwork = 4 + gpu = 0 + mic = 0 + for accelxml in nodexml.findall( + 'accelerator_list/accelerator/type'): + if accelxml.text == 'GPU': + gpu += 1 + elif accelxml.text == 'MIC': + mic += 1 + craynetwork = 2 + else: + print ('WARNING: accelerator type {0} unknown' + .format(accelxml.text)) + + node['Gres'] = [Gres('craynetwork', craynetwork)] + if gpu > 0: + node['Gres'].append(Gres('gpu', gpu)) + if mic > 0: + node['Gres'].append(Gres('mic', mic)) + return node + + +def get_inventory(partition, repurposed): """ Gets a hardware inventory for the given partition. Returns the node dictionary """ print 'Gathering hardware inventory...' @@ -87,39 +144,14 @@ def get_inventory(partition): # Loop through nodes in this module for nodexml in modulexml.findall('node_list/node'): - nid = int(nodexml.find('nic').text) - cores = int(nodexml.find('cores').text) - sockets = int(nodexml.find('sockets').text) - memory = int(nodexml.find('memory/sizeGB').text) * 1024 - - node = {'CoresPerSocket': cores / sockets, - 'RealMemory': memory, - 'Sockets': sockets, - 'ThreadsPerCore': int(nodexml.find('hyper_threads').text)} - - # Determine the generic resources - craynetwork = 4 - gpu = 0 - mic = 0 - for accelxml in nodexml.findall( - 'accelerator_list/accelerator/type'): - if accelxml.text == 'GPU': - gpu += 1 - elif accelxml.text == 'MIC': - mic += 1 - craynetwork = 2 - else: - print ('WARNING: accelerator type {0} unknown' - .format(accelxml.text)) - - node['Gres'] = [Gres('craynetwork', craynetwork)] - if gpu > 0: - node['Gres'].append(Gres('gpu', gpu)) - if mic > 0: - node['Gres'].append(Gres('mic', mic)) + node = get_node(nodexml) + if node['cname'] in repurposed: + print ('Skipping repurposed compute node {}' + .format(node['cname'])) + continue # Add to output data structures - nodes[nid] = node + nodes[node['nid']] = node return nodes @@ -227,7 +259,7 @@ def get_gres_types(nodes): """ Get a set of gres types """ grestypes = set() for node in nodes.values(): - grestypes.update([gres.Name for gres in node['Gres']]) + grestypes.update([gres.name for gres in node['Gres']]) return grestypes @@ -235,8 +267,9 @@ def main(): """ Get hardware info, format it, and write to slurm.conf and gres.conf """ args = parse_args() - # Get info from xthwinv and xtcli - nodes = get_inventory(args.partition) + # Get info from cnode and xthwinv + repurposed = get_repurposed_computes(args.partition) + nodes = get_inventory(args.partition, repurposed) nodelist = rli_compress([int(nid) for nid in nodes]) compact_nodes(nodes) defmem, maxmem = get_mem_per_cpu(nodes) @@ -247,22 +280,22 @@ def main(): print 'Writing Slurm configuration to {0}...'.format(conffile) with open(conffile, 'w') as outfile: outfile.write(jinjaenv.get_template('slurm.conf.j2').render( - script=sys.argv[0], - date=time.asctime(), - controlmachine=args.controlmachine, - grestypes=get_gres_types(nodes), - defmem=defmem, - maxmem=maxmem, - nodes=nodes, - nodelist=nodelist)) + script=sys.argv[0], + date=time.asctime(), + controlmachine=args.controlmachine, + grestypes=get_gres_types(nodes), + defmem=defmem, + maxmem=maxmem, + nodes=nodes, + nodelist=nodelist)) gresfilename = os.path.join(args.output, 'gres.conf') print 'Writing gres configuration to {0}...'.format(gresfilename) with open(gresfilename, 'w') as gresfile: gresfile.write(jinjaenv.get_template('gres.conf.j2').render( - script=sys.argv[0], - date=time.asctime(), - nodes=nodes)) + script=sys.argv[0], + date=time.asctime(), + nodes=nodes)) print 'Done.' diff --git a/contribs/lua/job_submit.lua b/contribs/lua/job_submit.lua index 278a3e11e..2c3b403fc 100644 --- a/contribs/lua/job_submit.lua +++ b/contribs/lua/job_submit.lua @@ -3,7 +3,7 @@ Example lua script demonstrating the SLURM job_submit/lua interface. This is only an example, not meant for use in its current form. - Leave the function names, arguments, local varialbes and setmetatable + Leave the function names, arguments, local variables and setmetatable set up logic in each function unchanged. Change only the logic after the line containing "*** YOUR LOGIC GOES BELOW ***". diff --git a/contribs/pam/pam_slurm.c b/contribs/pam/pam_slurm.c index e01c928c7..f75eb55d7 100644 --- a/contribs/pam/pam_slurm.c +++ b/contribs/pam/pam_slurm.c @@ -382,7 +382,7 @@ _send_denial_msg(pam_handle_t *pamh, struct _options *opts, /* Construct msg to send to app. */ n = snprintf(str, sizeof(str), - "%sAccess denied: user %s (uid=%d) has no active jobs.%s", + "%sAccess denied: user %s (uid=%d) has no active jobs on this node.%s", opts->msg_prefix, user, uid, opts->msg_suffix); if ((n < 0) || (n >= sizeof(str))) _log_msg(LOG_ERR, "exceeded buffer for pam_conv message"); diff --git a/contribs/perlapi/libslurm/perl/Slurm.xs b/contribs/perlapi/libslurm/perl/Slurm.xs index 41b67493a..0463d2a10 100644 --- a/contribs/perlapi/libslurm/perl/Slurm.xs +++ b/contribs/perlapi/libslurm/perl/Slurm.xs @@ -1564,7 +1564,7 @@ HV * slurm_job_step_get_pids(slurm_t self, uint32_t job_id, uint32_t step_id, char *nodelist=NULL) PREINIT: int rc; - job_step_pids_response_msg_t *resp_msg; + job_step_pids_response_msg_t *resp_msg = NULL; CODE: if (self); /* this is needed to avoid a warning about unused variables. But if we take slurm_t self diff --git a/contribs/perlapi/libslurm/perl/lib/Slurm/Constant.pm b/contribs/perlapi/libslurm/perl/lib/Slurm/Constant.pm index 21111ae4b..d80e1f8fe 100644 --- a/contribs/perlapi/libslurm/perl/lib/Slurm/Constant.pm +++ b/contribs/perlapi/libslurm/perl/lib/Slurm/Constant.pm @@ -20,7 +20,7 @@ sub _get_constants { } else { $val = int($val); } - $const{$name} = sub () { $val }; + $const{$name} = sub { $val }; } $got = 1; } diff --git a/contribs/torque/qalter.pl b/contribs/torque/qalter.pl old mode 100644 new mode 100755 index e5ac6a462..d8c9fbed5 --- a/contribs/torque/qalter.pl +++ b/contribs/torque/qalter.pl @@ -1,232 +1,232 @@ -#! /usr/bin/perl -w -############################################################################### -# -# qalter - PBS wrapper for changing job status using scontrol -# -############################################################################### - -use strict; -use FindBin; -use Getopt::Long 2.24 qw(:config no_ignore_case); -use lib "${FindBin::Bin}/../lib/perl"; -use autouse 'Pod::Usage' => qw(pod2usage); -use Slurm ':all'; -use Slurmdb ':all'; # needed for getting the correct cluster dims -use Switch; - -# ------------------------------------------------------------------ -# This makes the assumption job_id will always be the last argument -# ------------------------------------------------------------------- -my $job_id = $ARGV[$#ARGV]; -my ( - $err, - $new_name, - $output, - $rerun, - $resp, - $slurm, - $man, - $help -); - -# Remove this -my $scontrol = "/usr/slurm/bin/scontrol"; - -# ------------------------------ -# Parse Command Line Arguments -# ------------------------------ -GetOptions( - 'N=s' => \$new_name, - 'r=s' => \$rerun, - 'o=s' => \$output, - 'help|?' => \$help, - 'man' => \$man - ) - or pod2usage(2); - -pod2usage(0) if $help; - -if ($man) -{ - if ($< == 0) # Cannot invoke perldoc as root - { - my $id = eval { getpwnam("nobody") }; - $id = eval { getpwnam("nouser") } unless defined $id; - $id = -2 unless defined $id; - $< = $id; - } - $> = $<; # Disengage setuid - $ENV{PATH} = "/bin:/usr/bin"; # Untaint PATH - delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; - if ($0 =~ /^([-\/\w\.]+)$/) { - $0 = $1; # Untaint $0 - } else { - die "Illegal characters were found in \$0 ($0)\n"; - } - pod2usage(-exitstatus => 0, -verbose => 2); -} - -# ---------------------- -# Check input arguments -# ---------------------- -if (@ARGV < 1) { - pod2usage(-message=>"Missing Job ID", -verbose=>0); -} else { - $slurm = Slurm::new(); - $resp = $slurm->get_end_time($job_id); - if (not defined($resp)) { - pod2usage(-message=>"Job id $job_id not valid!", -verbose=>0); - } - if ((not defined($new_name)) and (not defined($rerun)) and (not defined($output))) { - pod2usage(-message=>"no argument given!", -verbose=>0); - } -} - -# -------------------------------------------- -# Use Slurm's Perl API to change name of a job -# -------------------------------------------- -if ($new_name) { - my %update = (); - - $update{job_id} = $job_id; - $update{name} = $new_name; - if (Slurm->update_job(\%update)) { - $err = Slurm->get_errno(); - $resp = Slurm->strerror($err); - pod2usage(-message=>"Job id $job_id name change error: $resp", -verbose=>0); - exit(1); - } -} - -# --------------------------------------------------- -# Use Slurm's Perl API to change the requeue job flag -# --------------------------------------------------- -if ($rerun) { - my %update = (); - - $update{job_id} = $job_id; - if (($rerun eq "n") || ($rerun eq "N")) { - $update{requeue} = 0; - } else { - $update{requeue} = 1; - } - if (Slurm->update_job(\%update)) { - $err = Slurm->get_errno(); - $resp = Slurm->strerror($err); - pod2usage(-message=>"Job id $job_id requeue error: $resp", -verbose=>0); - exit(1); - } -} - -# ------------------------------------------------------------ -# Use Slurm's Perl API to change Comment string -# Comment is used to relocate an output log file -# ------------------------------------------------------------ -if ($output) { - # Example: - # $comment="on:16337,stdout=/gpfsm/dhome/lgerner/tmp/slurm-16338.out,stdout=~lgerner/tmp/new16338.out"; - # - my $comment; - my %update = (); - - # --------------------------------------- - # Get current comment string from job_id - # --------------------------------------- - my($job) = $slurm->load_job($job_id); - $comment = $$job{'job_array'}[0]->{comment}; - - # ---------------- - # Split at stdout - # ---------------- - if ($comment) { - my(@outlog) = split("stdout", $comment); - - # --------------------------------- - # Only 1 stdout argument add a ',' - # --------------------------------- - if ($#outlog < 2) { - $outlog[1] .= "," - } - - # ------------------------------------------------ - # Add new log file location to the comment string - # ------------------------------------------------ - $outlog[2] = "=".$output; - $comment = join("stdout", @outlog); - } else { - $comment = "stdout=$output"; - } - - # ------------------------------------------------- - # Make sure that "%j" is changed to current $job_id - # ------------------------------------------------- - $comment =~ s/%j/$job_id/g ; - - # ----------------------------------------------------- - # Update comment and print usage if there is a response - # ----------------------------------------------------- - $update{job_id} = $job_id; - $update{comment} = $comment; - if (Slurm->update_job(\%update)) { - $err = Slurm->get_errno(); - $resp = Slurm->strerror($err); - pod2usage(-message=>"Job id $job_id comment change error: $resp", -verbose=>0); - exit(1); - } -} -exit(0); - -############################################################################## - -__END__ - -=head1 NAME - -B<qalter> - alter a job name, the job rerun flag or the job output file name. - -=head1 SYNOPSIS - -qalter [-N Name] - [-r y|n] - [-o output file] - <job ID> - -=head1 DESCRIPTION - -The B<qalter> updates job name, job rerun flag or job output(stdout) log location. - -It is aimed to be feature-compatible with PBS' qsub. - -=head1 OPTIONS - -=over 4 - -=item B<-N> - -Update job name in the queue - -=item B<-r> - -Alter a job rerunnable flag. "y" will allow a qrerun to be issued. "n" disable qrerun option. - -=item B<-o> - -Alter a job output log file name (stdout). - -The job log will be move/rename after the job has B<terminated>. - -=item B<-?> | B<--help> - -brief help message - -=item B<-man> - -full documentation - -=back - -=head1 SEE ALSO - -qrerun(1) qsub(1) -=cut - +#! /usr/bin/perl -w +############################################################################### +# +# qalter - PBS wrapper for changing job status using scontrol +# +############################################################################### + +use strict; +use FindBin; +use Getopt::Long 2.24 qw(:config no_ignore_case); +use lib "${FindBin::Bin}/../lib/perl"; +use autouse 'Pod::Usage' => qw(pod2usage); +use Slurm ':all'; +use Slurmdb ':all'; # needed for getting the correct cluster dims +use Switch; + +# ------------------------------------------------------------------ +# This makes the assumption job_id will always be the last argument +# ------------------------------------------------------------------- +my $job_id = $ARGV[$#ARGV]; +my ( + $err, + $new_name, + $output, + $rerun, + $resp, + $slurm, + $man, + $help +); + +# Remove this +my $scontrol = "/usr/slurm/bin/scontrol"; + +# ------------------------------ +# Parse Command Line Arguments +# ------------------------------ +GetOptions( + 'N=s' => \$new_name, + 'r=s' => \$rerun, + 'o=s' => \$output, + 'help|?' => \$help, + 'man' => \$man + ) + or pod2usage(2); + +pod2usage(0) if $help; + +if ($man) +{ + if ($< == 0) # Cannot invoke perldoc as root + { + my $id = eval { getpwnam("nobody") }; + $id = eval { getpwnam("nouser") } unless defined $id; + $id = -2 unless defined $id; + $< = $id; + } + $> = $<; # Disengage setuid + $ENV{PATH} = "/bin:/usr/bin"; # Untaint PATH + delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; + if ($0 =~ /^([-\/\w\.]+)$/) { + $0 = $1; # Untaint $0 + } else { + die "Illegal characters were found in \$0 ($0)\n"; + } + pod2usage(-exitstatus => 0, -verbose => 2); +} + +# ---------------------- +# Check input arguments +# ---------------------- +if (@ARGV < 1) { + pod2usage(-message=>"Missing Job ID", -verbose=>0); +} else { + $slurm = Slurm::new(); + $resp = $slurm->get_end_time($job_id); + if (not defined($resp)) { + pod2usage(-message=>"Job id $job_id not valid!", -verbose=>0); + } + if ((not defined($new_name)) and (not defined($rerun)) and (not defined($output))) { + pod2usage(-message=>"no argument given!", -verbose=>0); + } +} + +# -------------------------------------------- +# Use Slurm's Perl API to change name of a job +# -------------------------------------------- +if ($new_name) { + my %update = (); + + $update{job_id} = $job_id; + $update{name} = $new_name; + if (Slurm->update_job(\%update)) { + $err = Slurm->get_errno(); + $resp = Slurm->strerror($err); + pod2usage(-message=>"Job id $job_id name change error: $resp", -verbose=>0); + exit(1); + } +} + +# --------------------------------------------------- +# Use Slurm's Perl API to change the requeue job flag +# --------------------------------------------------- +if ($rerun) { + my %update = (); + + $update{job_id} = $job_id; + if (($rerun eq "n") || ($rerun eq "N")) { + $update{requeue} = 0; + } else { + $update{requeue} = 1; + } + if (Slurm->update_job(\%update)) { + $err = Slurm->get_errno(); + $resp = Slurm->strerror($err); + pod2usage(-message=>"Job id $job_id requeue error: $resp", -verbose=>0); + exit(1); + } +} + +# ------------------------------------------------------------ +# Use Slurm's Perl API to change Comment string +# Comment is used to relocate an output log file +# ------------------------------------------------------------ +if ($output) { + # Example: + # $comment="on:16337,stdout=/gpfsm/dhome/lgerner/tmp/slurm-16338.out,stdout=~lgerner/tmp/new16338.out"; + # + my $comment; + my %update = (); + + # --------------------------------------- + # Get current comment string from job_id + # --------------------------------------- + my($job) = $slurm->load_job($job_id); + $comment = $$job{'job_array'}[0]->{comment}; + + # ---------------- + # Split at stdout + # ---------------- + if ($comment) { + my(@outlog) = split("stdout", $comment); + + # --------------------------------- + # Only 1 stdout argument add a ',' + # --------------------------------- + if ($#outlog < 2) { + $outlog[1] .= "," + } + + # ------------------------------------------------ + # Add new log file location to the comment string + # ------------------------------------------------ + $outlog[2] = "=".$output; + $comment = join("stdout", @outlog); + } else { + $comment = "stdout=$output"; + } + + # ------------------------------------------------- + # Make sure that "%j" is changed to current $job_id + # ------------------------------------------------- + $comment =~ s/%j/$job_id/g ; + + # ----------------------------------------------------- + # Update comment and print usage if there is a response + # ----------------------------------------------------- + $update{job_id} = $job_id; + $update{comment} = $comment; + if (Slurm->update_job(\%update)) { + $err = Slurm->get_errno(); + $resp = Slurm->strerror($err); + pod2usage(-message=>"Job id $job_id comment change error: $resp", -verbose=>0); + exit(1); + } +} +exit(0); + +############################################################################## + +__END__ + +=head1 NAME + +B<qalter> - alter a job name, the job rerun flag or the job output file name. + +=head1 SYNOPSIS + +qalter [-N Name] + [-r y|n] + [-o output file] + <job ID> + +=head1 DESCRIPTION + +The B<qalter> updates job name, job rerun flag or job output(stdout) log location. + +It is aimed to be feature-compatible with PBS' qsub. + +=head1 OPTIONS + +=over 4 + +=item B<-N> + +Update job name in the queue + +=item B<-r> + +Alter a job rerunnable flag. "y" will allow a qrerun to be issued. "n" disable qrerun option. + +=item B<-o> + +Alter a job output log file name (stdout). + +The job log will be move/rename after the job has B<terminated>. + +=item B<-?> | B<--help> + +brief help message + +=item B<-man> + +full documentation + +=back + +=head1 SEE ALSO + +qrerun(1) qsub(1) +=cut + diff --git a/contribs/torque/qrerun.pl b/contribs/torque/qrerun.pl old mode 100644 new mode 100755 index 2cc1c5419..67b9681f0 --- a/contribs/torque/qrerun.pl +++ b/contribs/torque/qrerun.pl @@ -1,134 +1,134 @@ -#! /usr/bin/perl -w -############################################################################### -# -# qrerun - PBS wrapper to cancel and resubmit a job -# -############################################################################### -# This file is part of SLURM, a resource management program. -# For details, see <http://slurm.schedmd.com/>. -# Please also read the included file: DISCLAIMER. -# -# SLURM is free software; you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation; either version 2 of the License, or (at your option) -# any later version. -# -# In addition, as a special exception, the copyright holders give permission -# to link the code of portions of this program with the OpenSSL library under -# certain conditions as described in each individual source file, and -# distribute linked combinations including the two. You must obey the GNU -# General Public License in all respects for all of the code used other than -# OpenSSL. If you modify file(s) with this exception, you may extend this -# exception to your version of the file(s), but you are not obligated to do -# so. If you do not wish to do so, delete this exception statement from your -# version. If you delete this exception statement from all source files in -# the program, then also delete it here. -# -# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. -# -# You should have received a copy of the GNU General Public License along -# with SLURM; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Based off code with permission copyright 2006, 2007 Cluster Resources, Inc. -############################################################################### -use strict; -use FindBin; -use Getopt::Long 2.24 qw(:config no_ignore_case); -use lib "${FindBin::Bin}/../lib/perl"; -use autouse 'Pod::Usage' => qw(pod2usage); -use Slurm ':all'; -use Slurmdb ':all'; # needed for getting the correct cluster dims -use Switch; - -# Parse Command Line Arguments -my ( - $help, $man, - $err, $pid, $resp -); - -GetOptions( - 'help|?' => \$help, - '--man' => \$man, - ) or pod2usage(2); - -pod2usage(2) if $help; -# Handle man page flag -if ($man) -{ - if ($< == 0) # Cannot invoke perldoc as root - { - my $id = eval { getpwnam("nobody") }; - $id = eval { getpwnam("nouser") } unless defined $id; - $id = -2 unless defined $id; - $< = $id; - } - $> = $<; # Disengage setuid - $ENV{PATH} = "/bin:/usr/bin"; # Untaint PATH - delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; - if ($0 =~ /^([-\/\w\.]+)$/) { $0 = $1; } # Untaint $0 - else { die "Illegal characters were found in \$0 ($0)\n"; } - pod2usage(-exitstatus => 0, -verbose => 2); -} - - -# This makes the assumption JOBID will always be the last argument -my $job_id = $ARGV[$#ARGV]; - -if (@ARGV < 1) { - pod2usage(-message=>"Invalid Argument", -verbose=>1); - exit(1); -} - -if (Slurm->requeue($job_id)) { - $err = Slurm->get_errno(); - $resp = Slurm->strerror($err); - pod2usage(-message=>"Job id $job_id rerun error: $resp", -verbose=>0); - exit(1); -} -exit(0); - -__END__ - -=head1 NAME - -B<qrerun> - To rerun a job is to terminate the job and return the job to the queued state in the execution queue in which the job currently resides. -If a job is marked as not rerunable then the rerun request will fail for that job. - -See the option on the qsub and qalter commands. - -It is aimed to be feature-compatible with PBS' qsub. - -=head1 SYNOPSIS - -B<qrerun> [-? | --help] [--man] [--verbose] <job_id> - -=head1 DESCRIPTION - -The B<qrerun> command directs that the specified job is to be rerun if possible. - -=head1 OPTIONS - -=over 4 - -=item B<-? | --help> - -a brief help message - -=item B<--man> - -full documentation - -=back - -=head1 EXIT STATUS - -On success, B<qrerun> will exit with a value of zero. On failure, B<qrerun> will exit with a value greater than zero. - -=head1 SEE ALSO - -qalter(1) qsub(1) -=cut +#! /usr/bin/perl -w +############################################################################### +# +# qrerun - PBS wrapper to cancel and resubmit a job +# +############################################################################### +# This file is part of SLURM, a resource management program. +# For details, see <http://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# In addition, as a special exception, the copyright holders give permission +# to link the code of portions of this program with the OpenSSL library under +# certain conditions as described in each individual source file, and +# distribute linked combinations including the two. You must obey the GNU +# General Public License in all respects for all of the code used other than +# OpenSSL. If you modify file(s) with this exception, you may extend this +# exception to your version of the file(s), but you are not obligated to do +# so. If you do not wish to do so, delete this exception statement from your +# version. If you delete this exception statement from all source files in +# the program, then also delete it here. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Based off code with permission copyright 2006, 2007 Cluster Resources, Inc. +############################################################################### +use strict; +use FindBin; +use Getopt::Long 2.24 qw(:config no_ignore_case); +use lib "${FindBin::Bin}/../lib/perl"; +use autouse 'Pod::Usage' => qw(pod2usage); +use Slurm ':all'; +use Slurmdb ':all'; # needed for getting the correct cluster dims +use Switch; + +# Parse Command Line Arguments +my ( + $help, $man, + $err, $pid, $resp +); + +GetOptions( + 'help|?' => \$help, + '--man' => \$man, + ) or pod2usage(2); + +pod2usage(2) if $help; +# Handle man page flag +if ($man) +{ + if ($< == 0) # Cannot invoke perldoc as root + { + my $id = eval { getpwnam("nobody") }; + $id = eval { getpwnam("nouser") } unless defined $id; + $id = -2 unless defined $id; + $< = $id; + } + $> = $<; # Disengage setuid + $ENV{PATH} = "/bin:/usr/bin"; # Untaint PATH + delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; + if ($0 =~ /^([-\/\w\.]+)$/) { $0 = $1; } # Untaint $0 + else { die "Illegal characters were found in \$0 ($0)\n"; } + pod2usage(-exitstatus => 0, -verbose => 2); +} + + +# This makes the assumption JOBID will always be the last argument +my $job_id = $ARGV[$#ARGV]; + +if (@ARGV < 1) { + pod2usage(-message=>"Invalid Argument", -verbose=>1); + exit(1); +} + +if (Slurm->requeue($job_id)) { + $err = Slurm->get_errno(); + $resp = Slurm->strerror($err); + pod2usage(-message=>"Job id $job_id rerun error: $resp", -verbose=>0); + exit(1); +} +exit(0); + +__END__ + +=head1 NAME + +B<qrerun> - To rerun a job is to terminate the job and return the job to the queued state in the execution queue in which the job currently resides. +If a job is marked as not rerunable then the rerun request will fail for that job. + +See the option on the qsub and qalter commands. + +It is aimed to be feature-compatible with PBS' qsub. + +=head1 SYNOPSIS + +B<qrerun> [-? | --help] [--man] [--verbose] <job_id> + +=head1 DESCRIPTION + +The B<qrerun> command directs that the specified job is to be rerun if possible. + +=head1 OPTIONS + +=over 4 + +=item B<-? | --help> + +a brief help message + +=item B<--man> + +full documentation + +=back + +=head1 EXIT STATUS + +On success, B<qrerun> will exit with a value of zero. On failure, B<qrerun> will exit with a value greater than zero. + +=head1 SEE ALSO + +qalter(1) qsub(1) +=cut diff --git a/doc/html/download.shtml b/doc/html/download.shtml index 129212365..42498db16 100644 --- a/doc/html/download.shtml +++ b/doc/html/download.shtml @@ -369,6 +369,7 @@ tar -xf json-c-0.12.tar</li> <ul> <li>If you have current build tools<br> cd json-c-0.12<br> +export CFLAGS=-Wno-error=unused-but-set-variable<br> ./configure --prefix=DESIRED_PATH<br> make<br> make install</li> @@ -378,6 +379,7 @@ cd json-c-0.12<br> mv aclocal.m4 aclocal.m4.orig<br> mv ltmain.sh ltmain.sh.orig<br> ./autogen.sh<br> +export CFLAGS=-Wno-error=unused-but-set-variable<br> ./configure --prefix=DESIRED_JSON_PATH<br> make<br> make install</li> @@ -397,6 +399,6 @@ Slurm-based HPC supercomputers. The website of Slurm-web, with screenshots:<br> </ul> -<p style="text-align:center;">Last modified 19 October 2015</p> +<p style="text-align:center;">Last modified 16 February 2016</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/reservations.shtml b/doc/html/reservations.shtml index a63e47f20..16a030cb4 100644 --- a/doc/html/reservations.shtml +++ b/doc/html/reservations.shtml @@ -358,7 +358,11 @@ $ scontrol create reservation user=operator nodes=tux8 \ to jobs are automatically replaced with new idle nodes. The effect of this is to always maintain a constant size pool of resources. This is accomplished by using a "replace" flag as shown in the example below. -This option is not supported on IBM Bluegene systems.</p> +This option is not supported on IBM Bluegene systems or for reservations +of individual cores which span more than one node rather than full nodes +(e.g. a 1 core reservation on node "tux1" will be moved if node "tux1" goes +down, but a reservation containing 2 cores on node "tux1" and 3 cores on "tux2" +will not be moved node "tux1" goes down).</p> <pre> $ scontrol create reservation starttime=now duration=60 \ users=foo nodecnt=2 flags=replace @@ -422,7 +426,7 @@ considering the initiation of jobs. This will prevent the initiation of some jobs which would complete execution before a reservation given fewer jobs to time-slice with.</p> -<p style="text-align: center;">Last modified 20 October 2015</p> +<p style="text-align: center;">Last modified 28 January 2016</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/team.shtml b/doc/html/team.shtml index 5e2b3f04f..3300dce4f 100644 --- a/doc/html/team.shtml +++ b/doc/html/team.shtml @@ -7,7 +7,10 @@ Over 170 individuals have contributed to the project. Lead Slurm developers are: <ul> <li>Danny Auble (SchedMD)</li> +<li>Brian Christiansen (SchedMD)</li> <li>Morris Jette (SchedMD)</li> +<li>Alejandro (Alex) Sanchez (SchedMD)</li> +<li>Tim Wickberg (SchedMD)</li> </ul> <p>Slurm contributors include:</p> @@ -70,7 +73,6 @@ Lead Slurm developers are: <li>Sourav Chakraborty (The Ohio State University)</li> <li>François Chevallier (CEA)</li> <li>Daniel Christians (HP)</li> -<li>Brian Christiansen (SchedMD)</li> <li>Gilles Civario (Bull)</li> <li>Chuck Clouston (Bull)</li> <li>J.T. Conklin</li> @@ -205,7 +207,6 @@ Lead Slurm developers are: <li>Aleksej Saushev</li> <li>Uwe Sauter (High Performance Computing Center Stuttgart, Germany)</li> <li>Chris Scheller (University of Michigan)</li> -<li>Alejandro (Alex) Sanchez (SchedMD)</li> <li>Rod Schultz (Bull)</li> <li>Samuel Senoner (Vienna University of Technology, Austria)</li> <li>David Singleton</li> @@ -228,11 +229,12 @@ Lead Slurm developers are: <br> <li>Garrison Vaughan</li> <br> +<li>Yu Watanabe</li> <li>Pythagoras Watson (Lawrence Livermore National Laboratory)</li> <li>Daniel M. Weeks (Rensselaer Polytechnic Institute)</li> <li>Nathan Weeks (Iowa State University)</li> <li>Andy Wettstein (University of Chicago)</li> -<li>Tim Wickberg (SchedMD)</li> +<li>Jeff White</li> <li>Chandler Wilkerson (Rice University)</li> <li>Ramiro Brito Willmersdorf (Universidade Federal de Pemambuco, Brazil)</li> <li>Jay Windley (Linux NetworX)</li> diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1 index 5566ba040..7cc85bb13 100644 --- a/doc/man/man1/sacct.1 +++ b/doc/man/man1/sacct.1 @@ -437,7 +437,7 @@ Print all fields listed below. .TP \f3AllocCPUs\fP -Count of allocated CPUs. Equivalant to \f3NCPUS\fP. +Count of allocated CPUs. Equivalent to \f3NCPUS\fP. .TP \f3AllocGRES\fP @@ -535,7 +535,7 @@ The format of this fields output is as follows: .RS .PD "0" .HP -\f2[DD\-[hh:]]mm:ss\fP +\f2[DD\-[HH:]]MM:SS\fP .PD .RE .IP @@ -587,8 +587,8 @@ It is in the form: .TP \f3JobIDRaw\fP -In case of job array print the jobId instead of the ArrayJobId. -For non job arrays the output is the jobId in the format \f2job.jobstep\fP\c +In case of job array print the JobId instead of the ArrayJobId. +For non job arrays the output is the JobId in the format \f2job.jobstep\fP\c \&. .TP @@ -679,7 +679,7 @@ The task ID where the mincpu occurred. .TP \f3NCPUS\fP -Count of allocated CPUs. Equivalant to \f3AllocCPUS\fP +Count of allocated CPUs. Equivalent to \f3AllocCPUS\fP Total number of CPUs allocated to the job. @@ -952,8 +952,8 @@ option if the job ends after the specified time, otherwise it will be the jobs end time. NOTE: If no \-s (\-\-state) option is given sacct will -display jobs that ran durning the specified time, otherwise it returns -jobs that were in the state requested durning that period of time. +display jobs that ran during the specified time, otherwise it returns +jobs that were in the state requested during that period of time. Without \-T (normal operation) sacct output would be like this. .RS @@ -975,7 +975,7 @@ By adding the \-T option the job's start and end times are truncated to reflect only the time requested. If a job started after the start time requested or finished before the end time requested those times are not altered. The \-T option -is useful when determining exact run times durning any given period. +is useful when determining exact run times during any given period. .RS .PP .nf diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index f15023156..780b729e2 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -397,7 +397,7 @@ Valid <operator> values include: Set \fIQosLevel\fP to the specified value. \fBNote:\fR the QOS that can be used at a given account in the hierarchy are inherited by the children of that account. By assigning QOS with the \fB=\fR sign only the assigned QOS can be used by the -account and its childern. +account and its children. .TP \fB+=\fR Add the specified <qos> value to the current \fIQosLevel\fP. The account will diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index 7928dbc89..254bee7bc 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -357,10 +357,10 @@ contents of the slurm.conf configuration file. .TP \fBshow\fP \fIENTITY\fP \fIID\fP Display the state of the specified entity with the specified identification. -\fIENTITY\fP may be \fIaliases\fP, \fIcache\fP, \fIconfig\fP, \fIdaemons\fP, \fIfrontend\fP, -\fIjob\fP, \fInode\fP, \fIpartition\fP, \fIpowercap\fP, \fIreservation\fP, \fIslurmd\fP, -\fIstep\fP, \fItopology\fP, \fIhostlist\fP, \fIhostlistsorted\fP or -\fIhostnames\fP +\fIENTITY\fP may be \fIaliases\fP, \fIassoc_mgr\fP, \fIburstbuffer\fP, +\fIconfig\fP, \fIdaemons\fP, \fIfrontend\fP, \fIjob\fP, \fInode\fP, +\fIpartition\fP, \fIpowercap\fP, \fIreservation\fP, \fIslurmd\fP, \fIstep\fP, +\fItopology\fP, \fIhostlist\fP, \fIhostlistsorted\fP or \fIhostnames\fP (also \fIblock\fP or \fIsubmp\fP on BlueGene systems). \fIID\fP can be used to identify a specific element of the identified entity: job ID, node name, partition name, reservation name, or job step ID for @@ -374,8 +374,12 @@ named nodes will be shown. \fINodeHostname\fP (useful to get the list of virtual nodes associated with a real node in a configuration where multiple slurmd daemons execute on a single compute node). -\fIcache\fP displays the current contents of the slurmctld's internal cache -for users and associations. +\fIassoc_mgr\fP displays the current contents of the slurmctld's internal cache +for users, associations and/or qos. The \fIID\fP may be users=<user1>,[...,<userN>], +accounts=<acct1>,[...,<acctN>], qos=<qos1>,[...,<qosN>] and/or +flags=<users,assoc,qos>, used to filter the desired section to be displayed. If +no flags are specified, all sections are displayed. +\fIburstbuffer\fP displays the current status of the BurstBuffer plugin. \fIconfig\fP displays parameter names from the configuration files in mixed case (e.g. SlurmdPort=7003) while derived parameters names are in upper case only (e.g. SLURM_VERSION). diff --git a/doc/man/man1/sreport.1 b/doc/man/man1/sreport.1 index fcd485284..5cebcdf00 100644 --- a/doc/man/man1/sreport.1 +++ b/doc/man/man1/sreport.1 @@ -165,6 +165,10 @@ hierarchical tree. Starting with the specified account or the root account by default this report will list the underlying usage with a sum on each level. Use the 'tree' option to span the tree for better visibility. +NOTE: If there were reservations allowing a whole account any +any idle time in the reservation given to the association for the +account, not the user associations in the account, so it can be +possible a parent account can be larger than the sum of it's children. .TP .B cluster UserUtilizationByAccount This report will display users by account in order of utilization without diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 02d85c878..1aefabf04 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1571,6 +1571,12 @@ maximum power consumption. The default value is 50 percent. Supported by the power/cray plugin. .TP +\fBget_timeout=#\fR +Amount of time allowed to get power state information in milliseconds. +The default value is 5,000 milliseconds or 5 seconds. +Supported by the power/cray plugin and represents the time allowed for the +capmc command to respond to various "get" options. +.TP \fBincrease_rate=#\fR Specifies the maximum rate of change in the power cap for a node where the actual power usage is within \fBupper_threshold\fR (see below) of the power cap. @@ -1604,6 +1610,12 @@ be increased to the maximum. The default value is 300 seconds. Supported by the power/cray plugin. .TP +\fBset_timeout=#\fR +Amount of time allowed to set power state information in milliseconds. +The default value is 30,000 milliseconds or 30 seconds. +Supported by the power/cray plugin and represents the time allowed for the +capmc command to respond to various "set" options. +.TP \fBset_watts=#\fR Specifies the power limit to be set on every compute nodes managed by Slurm. Every node gets this same power cap and there is no variation through time @@ -2366,6 +2378,12 @@ The interpretation of this parameter varies by \fBSchedulerType\fR. Multiple options may be comma separated. .RS .TP +\fBassoc_limit_continue\fR +If set, continue to schedule lower priority jobs in each partition if the +higher priority jobs cannot start due to association limits. Setting this +can improve throughput and utlization, but may potentially starve larger +jobs and prevent them from launching indefinitely. +.TP \fBbatch_sched_delay=#\fR How long, in seconds, the scheduling of batch jobs can be delayed. This can be useful in a high\-throughput environment in which batch jobs are @@ -2391,7 +2409,7 @@ extended period of time. Setting this option will cause the backfill scheduler to continue processing pending jobs from its original job list after releasing locks even if job or node state changes. -This can result in lower priority jobs from being backfill scheduled instead +This can result in lower priority jobs being backfill scheduled instead of newly arrived higher priority jobs, but will permit more queued jobs to be considered for backfill scheduling. .TP diff --git a/slurm.spec b/slurm.spec index 7180ec0e5..59c3ecc46 100644 --- a/slurm.spec +++ b/slurm.spec @@ -398,7 +398,7 @@ Perl tool to print Slurm job state information. The output is designed to give information on the resource usage and availablilty, as well as information about jobs that are currently active on the machine. This output is built using the Slurm utilities, sinfo, squeue and scontrol, the man pages for these -utilites will provide more information and greater depth of understanding +utilities will provide more information and greater depth of understanding %if %{slurm_with pam} %package pam_slurm @@ -696,13 +696,6 @@ test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/accounting_storage_mysql.so && test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/jobcomp_mysql.so && echo %{_libdir}/slurm/jobcomp_mysql.so >> $LIST -LIST=./perlapi.files -touch $LIST -test -f $RPM_BUILD_ROOT/%{_perldir}/auto/Slurm/Slurm.bs && - echo $RPM_BUILD_ROOT/%{_perldir}/auto/Slurm/Slurm.bs >> $LIST -test -f $RPM_BUILD_ROOT/%{_perldir}/auto/Slurmdb/Slurmdb.bs && - echo $RPM_BUILD_ROOT/%{_perldir}/auto/Slurmdb/Slurmdb.bs >> $LIST - LIST=./plugins.files touch $LIST test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/acct_gather_energy_cray.so && @@ -885,7 +878,7 @@ rm -rf $RPM_BUILD_ROOT %endif ############################################################################# -%files -f perlapi.files perlapi +%files perlapi %defattr(-,root,root) %{_perldir}/Slurm.pm %{_perldir}/Slurm/Bitstr.pm diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 3e643ed21..77b978164 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -534,7 +534,7 @@ enum job_state_reason { * (burst buffer) */ WAIT_QOS_MAX_BB_MINS_PER_JOB,/* QOS MaxTRESMinsPerJob exceeded * (burst buffer) */ - WAIT_QOS_MIN_BB, /* QOS MinTRESPerJob not reached + WAIT_QOS_MIN_BB /* QOS MinTRESPerJob not reached * (burst buffer) */ }; @@ -598,7 +598,7 @@ enum select_jobdata_type { SELECT_JOBDATA_CONFIRMED, /* data-> uint8_t ALPS reservation confirmed */ SELECT_JOBDATA_CLEANING, /* data-> uint16_t if the job is in * cleaning state or not. */ - SELECT_JOBDATA_NETWORK, /* data-> char * network info */ + SELECT_JOBDATA_NETWORK /* data-> char * network info */ }; enum select_nodedata_type { @@ -614,7 +614,7 @@ enum select_nodedata_type { * freed with xfree */ SELECT_NODEDATA_RACK_MP, /* data-> char * needs to be * freed with xfree */ - SELECT_NODEDATA_MEM_ALLOC, /* data-> uint32_t */ + SELECT_NODEDATA_MEM_ALLOC /* data-> uint32_t */ }; enum select_print_mode { @@ -634,7 +634,7 @@ enum select_print_mode { SELECT_PRINT_RAMDISK_IMAGE,/* Print just the RAMDISK IMAGE */ SELECT_PRINT_REBOOT, /* Print just the REBOOT */ SELECT_PRINT_RESV_ID, /* Print just Cray/BASIL reservation ID */ - SELECT_PRINT_START_LOC, /* Print just the start location */ + SELECT_PRINT_START_LOC /* Print just the start location */ }; enum select_node_cnt { @@ -703,7 +703,7 @@ enum acct_energy_type { ENERGY_DATA_LAST_POLL, ENERGY_DATA_SENSOR_CNT, ENERGY_DATA_NODE_ENERGY, - ENERGY_DATA_NODE_ENERGY_UP, + ENERGY_DATA_NODE_ENERGY_UP }; /* @@ -764,7 +764,7 @@ typedef enum task_dist_states { SLURM_DIST_CORECFULL = 0x0300, SLURM_DIST_NO_LLLP = 0x1000, - SLURM_DIST_UNKNOWN = 0x2000, + SLURM_DIST_UNKNOWN = 0x2000 } task_dist_states_t; #define SLURM_DIST_STATE_BASE 0x00FFFF @@ -851,7 +851,7 @@ typedef enum accel_bind_type { /* accelerator binding from --accel_bind= */ ACCEL_BIND_VERBOSE = 0x01, /* 'v' verbose */ ACCEL_BIND_CLOSEST_GPU = 0x02, /* 'g' Use closest GPU to the CPU */ ACCEL_BIND_CLOSEST_MIC = 0x04, /* 'm' Use closest NIC to CPU */ - ACCEL_BIND_CLOSEST_NIC = 0x08, /* 'n' Use closest NIC to CPU */ + ACCEL_BIND_CLOSEST_NIC = 0x08 /* 'n' Use closest NIC to CPU */ } accel_bind_type_t; /* The last entry in node_states must be STATE_END, keep in sync with @@ -3177,6 +3177,8 @@ extern void slurm_free_license_info_msg PARAMS((license_info_msg_t *)); extern int slurm_load_assoc_mgr_info PARAMS((assoc_mgr_info_request_msg_t *, assoc_mgr_info_msg_t **)); extern void slurm_free_assoc_mgr_info_msg PARAMS ((assoc_mgr_info_msg_t *)); +extern void slurm_free_assoc_mgr_info_request_members PARAMS ( + (assoc_mgr_info_request_msg_t *)); extern void slurm_free_assoc_mgr_info_request_msg PARAMS ( (assoc_mgr_info_request_msg_t *)); diff --git a/src/api/allocate.c b/src/api/allocate.c index ac36cca60..c8b910dfe 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -47,7 +47,7 @@ #include <stdio.h> #include <unistd.h> #include <sys/types.h> -#include <sys/poll.h> +#include <poll.h> #include <stdbool.h> #include <time.h> #include <netinet/in.h> /* for ntohs() */ diff --git a/src/api/config_info.c b/src/api/config_info.c index 3f10490c5..c969fdd77 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -286,12 +286,12 @@ void slurm_write_ctl_conf ( slurm_ctl_conf_info_msg_t * slurm_ctl_conf_ptr, fprintf(fp, " Default=YES"); if (p[i].def_mem_per_cpu & MEM_PER_CPU) { - if (p[i].def_mem_per_cpu != MEM_PER_CPU) - fprintf(fp, "DefMemPerCPU=%"PRIu32"", - p[i].def_mem_per_cpu & (~MEM_PER_CPU)); - } else if (p[i].def_mem_per_cpu != 0) - fprintf(fp, "DefMemPerNode=%"PRIu32"", - p[i].def_mem_per_cpu); + if (p[i].def_mem_per_cpu != MEM_PER_CPU) + fprintf(fp, "DefMemPerCPU=%"PRIu32"", + p[i].def_mem_per_cpu & (~MEM_PER_CPU)); + } else if (p[i].def_mem_per_cpu != 0) + fprintf(fp, "DefMemPerNode=%"PRIu32"", + p[i].def_mem_per_cpu); if (!p[i].allow_accounts && p[i].deny_accounts) fprintf(fp, "DenyAccounts=%s", p[i].deny_accounts); @@ -301,8 +301,8 @@ void slurm_write_ctl_conf ( slurm_ctl_conf_info_msg_t * slurm_ctl_conf_ptr, if (p[i].default_time != (uint32_t) NO_VAL) { if (p[i].default_time == INFINITE) - fprintf(fp, "DefaultTime=UNLIMITED"); - else { + fprintf(fp, "DefaultTime=UNLIMITED"); + else { char time_line[32]; secs2time_str(p[i].default_time * 60, time_line, sizeof(time_line)); @@ -326,54 +326,54 @@ void slurm_write_ctl_conf ( slurm_ctl_conf_info_msg_t * slurm_ctl_conf_ptr, fprintf(fp, " LLN=YES"); if (p[i].max_cpus_per_node != INFINITE) - fprintf(fp, " MaxCPUsPerNode=%"PRIu32"", + fprintf(fp, " MaxCPUsPerNode=%"PRIu32"", p[i].max_cpus_per_node); if (p[i].max_mem_per_cpu & MEM_PER_CPU) { - if (p[i].max_mem_per_cpu != MEM_PER_CPU) - fprintf(fp, " MaxMemPerCPU=%"PRIu32"", - p[i].max_mem_per_cpu & (~MEM_PER_CPU)); - } else if (p[i].max_mem_per_cpu != 0) - fprintf(fp, " MaxMemPerNode=%"PRIu32"", + if (p[i].max_mem_per_cpu != MEM_PER_CPU) + fprintf(fp, " MaxMemPerCPU=%"PRIu32"", + p[i].max_mem_per_cpu & (~MEM_PER_CPU)); + } else if (p[i].max_mem_per_cpu != 0) + fprintf(fp, " MaxMemPerNode=%"PRIu32"", p[i].max_mem_per_cpu); if (p[i].max_nodes != INFINITE) { char tmp1[16]; - if (cluster_flags & CLUSTER_FLAG_BG) - convert_num_unit((float)p[i].max_nodes, tmp1, - sizeof(tmp1), UNIT_NONE, - CONVERT_NUM_UNIT_EXACT); - else - snprintf(tmp1, sizeof(tmp1), "%u", + if (cluster_flags & CLUSTER_FLAG_BG) + convert_num_unit((float)p[i].max_nodes, tmp1, + sizeof(tmp1), UNIT_NONE, + CONVERT_NUM_UNIT_EXACT); + else + snprintf(tmp1, sizeof(tmp1), "%u", p[i].max_nodes); - fprintf(fp, "MaxNodes=%s", tmp1); - } + fprintf(fp, "MaxNodes=%s", tmp1); + } if (p[i].max_time != INFINITE) { - char time_line[32]; - secs2time_str(p[i].max_time * 60, time_line, - sizeof(time_line)); - fprintf(fp, " MaxTime=%s", time_line); - } + char time_line[32]; + secs2time_str(p[i].max_time * 60, time_line, + sizeof(time_line)); + fprintf(fp, " MaxTime=%s", time_line); + } if (p[i].min_nodes != 1) { char tmp1[16]; if (cluster_flags & CLUSTER_FLAG_BG) - convert_num_unit((float)p[i].min_nodes, tmp1, + convert_num_unit((float)p[i].min_nodes, tmp1, sizeof(tmp1), UNIT_NONE, - CONVERT_NUM_UNIT_EXACT); - else - snprintf(tmp1, sizeof(tmp1), "%u", + CONVERT_NUM_UNIT_EXACT); + else + snprintf(tmp1, sizeof(tmp1), "%u", p[i].min_nodes); - fprintf(fp, " MinNodes=%s", tmp1); + fprintf(fp, " MinNodes=%s", tmp1); } if (p[i].nodes != NULL) fprintf(fp, " Nodes=%s", p[i].nodes); - if (p[i].preempt_mode != (uint16_t) NO_VAL) - fprintf(fp, " PreemptMode=%s", + if (p[i].preempt_mode != (uint16_t) NO_VAL) + fprintf(fp, " PreemptMode=%s", preempt_mode_string(p[i].preempt_mode)); if (p[i].priority != 1) @@ -389,18 +389,18 @@ void slurm_write_ctl_conf ( slurm_ctl_conf_info_msg_t * slurm_ctl_conf_ptr, fprintf(fp, " RootOnly=YES"); if (p[i].cr_type & CR_CORE) - fprintf(fp, " SelectTypeParameters=CR_CORE"); - else if (p[i].cr_type & CR_SOCKET) - fprintf(fp, " SelectTypeParameters=CR_SOCKET"); + fprintf(fp, " SelectTypeParameters=CR_CORE"); + else if (p[i].cr_type & CR_SOCKET) + fprintf(fp, " SelectTypeParameters=CR_SOCKET"); force = p[i].max_share & SHARED_FORCE; - val = p[i].max_share & (~SHARED_FORCE); - if (val == 0) - fprintf(fp, " Shared=EXCLUSIVE"); - else if (force) { - fprintf(fp, " Shared=FORCE:%u", val); - } else if (val != 1) - fprintf(fp, " Shared=YES:%u", val); + val = p[i].max_share & (~SHARED_FORCE); + if (val == 0) + fprintf(fp, " Shared=EXCLUSIVE"); + else if (force) { + fprintf(fp, " Shared=FORCE:%u", val); + } else if (val != 1) + fprintf(fp, " Shared=YES:%u", val); if (p[i].state_up == PARTITION_UP) fprintf(fp, " State=UP"); @@ -414,8 +414,8 @@ void slurm_write_ctl_conf ( slurm_ctl_conf_info_msg_t * slurm_ctl_conf_ptr, fprintf(fp, " State=UNKNOWN"); if (p[i].billing_weights_str != NULL) - fprintf(fp, " TRESBillingWeights=%s", - p[i].billing_weights_str); + fprintf(fp, " TRESBillingWeights=%s", + p[i].billing_weights_str); fprintf(fp, "\n"); } @@ -851,7 +851,7 @@ extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_acct_gather_type); list_append(ret_list, key_pair); - key_pair = xmalloc(sizeof(config_key_pair_t)); + key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobAcctGatherParams"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_acct_gather_params); list_append(ret_list, key_pair); diff --git a/src/api/step_io.c b/src/api/step_io.c index 2ab5e84a4..4c67d34f3 100644 --- a/src/api/step_io.c +++ b/src/api/step_io.c @@ -34,7 +34,7 @@ #include <sys/types.h> #include <sys/socket.h> #include <sys/select.h> -#include <sys/poll.h> +#include <poll.h> #include <arpa/inet.h> #include <errno.h> #include <fcntl.h> diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index f2de7d1d4..699bd1b5f 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -207,11 +207,13 @@ static slurmdb_assoc_rec_t *_find_assoc_rec( while (assoc_ptr) { if ((!assoc->user && (assoc->uid == NO_VAL)) && (assoc_ptr->user || (assoc_ptr->uid != NO_VAL))) { - debug("we are looking for a nonuser association"); + debug3("%s: we are looking for a nonuser association", + __func__); goto next; } else if ((!assoc_ptr->user && (assoc_ptr->uid == NO_VAL)) && (assoc->user || (assoc->uid != NO_VAL))) { - debug("we are looking for a user association"); + debug3("%s: we are looking for a user association", + __func__); goto next; } else if (assoc->user && assoc_ptr->user && ((assoc->uid == NO_VAL) || @@ -220,21 +222,21 @@ static slurmdb_assoc_rec_t *_find_assoc_rec( * associations, so use the name instead */ if (strcasecmp(assoc->user, assoc_ptr->user)) { - debug("2 not the right user %u != %u", - assoc->uid, assoc_ptr->uid); + debug3("%s: 2 not the right user %u != %u", + __func__, assoc->uid, assoc_ptr->uid); goto next; } } else if (assoc->uid != assoc_ptr->uid) { - debug("not the right user %u != %u", - assoc->uid, assoc_ptr->uid); + debug3("%s: not the right user %u != %u", + __func__, assoc->uid, assoc_ptr->uid); goto next; } if (assoc->acct && (!assoc_ptr->acct || strcasecmp(assoc->acct, assoc_ptr->acct))) { - debug("not the right account %s != %s", - assoc->acct, assoc_ptr->acct); + debug3("%s: not the right account %s != %s", + __func__, assoc->acct, assoc_ptr->acct); goto next; } @@ -242,7 +244,7 @@ static slurmdb_assoc_rec_t *_find_assoc_rec( if (!assoc_mgr_cluster_name && assoc->cluster && (!assoc_ptr->cluster || strcasecmp(assoc->cluster, assoc_ptr->cluster))) { - debug("not the right cluster"); + debug3("%s: not the right cluster", __func__); goto next; } @@ -250,7 +252,7 @@ static slurmdb_assoc_rec_t *_find_assoc_rec( && (!assoc_ptr->partition || strcasecmp(assoc->partition, assoc_ptr->partition))) { - debug("not the right partition"); + debug3("%s: not the right partition", __func__); goto next; } @@ -3170,9 +3172,9 @@ no_assocs: while ((tmp_char = list_next(qos_itr))) if ((qos_rec = list_find_first( assoc_mgr_qos_list, - slurmdb_find_tres_in_list, - &tmp_char))) - list_append(ret_list, user_rec); + slurmdb_find_qos_in_list_by_name, + tmp_char))) + list_append(ret_list, qos_rec); tmp_list = ret_list; } else tmp_list = assoc_mgr_qos_list; @@ -3204,7 +3206,7 @@ no_qos: if (user_itr) { while ((tmp_char = list_next(user_itr))) - if (xstrcasecmp(tmp_char, user_rec->name)) + if (!xstrcasecmp(tmp_char, user_rec->name)) break; list_iterator_reset(user_itr); /* not correct user */ diff --git a/src/common/callerid.c b/src/common/callerid.c index d9e13aa4c..c3408e442 100644 --- a/src/common/callerid.c +++ b/src/common/callerid.c @@ -48,11 +48,19 @@ #endif /* HAVE_CONFIG_H */ -//#ifndef _GNU_SOURCE -//#define _GNU_SOURCE -//#endif +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include <arpa/inet.h> #include <ctype.h> + +#ifdef __FreeBSD__ +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#define s6_addr32 __u6_addr.__u6_addr32 +#endif + #if HAVE_DIRENT_H # include <dirent.h> #endif diff --git a/src/common/callerid.h b/src/common/callerid.h index 1f264e519..86e707c88 100644 --- a/src/common/callerid.h +++ b/src/common/callerid.h @@ -38,6 +38,12 @@ #define _SLURM_CALLERID_H #include <arpa/inet.h> +#include <sys/types.h> + +#ifdef __FreeBSD__ +#include <sys/socket.h> +#include <netinet/in.h> +#endif typedef struct callerid_conn { uint32_t port_dst; diff --git a/src/common/eio.c b/src/common/eio.c index a57e2bbb8..7a9b73089 100644 --- a/src/common/eio.c +++ b/src/common/eio.c @@ -39,7 +39,7 @@ # include <config.h> #endif -#include <sys/poll.h> +#include <poll.h> #include <sys/types.h> #include <sys/socket.h> #include <unistd.h> diff --git a/src/common/env.c b/src/common/env.c index 53b553943..ffd34a3f3 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -49,7 +49,7 @@ #include <strings.h> #include <unistd.h> #include <signal.h> -#include <sys/poll.h> +#include <poll.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/param.h> /* MAXPATHLEN */ diff --git a/src/common/forward.c b/src/common/forward.c index 6bd2f2f2b..49a6a062f 100644 --- a/src/common/forward.c +++ b/src/common/forward.c @@ -215,6 +215,10 @@ void *_forward_thread(void *arg) goto cleanup; } + /* These messages don't have a return message, but if + * we got here things worked out so make note of the + * list of nodes as success. + */ if ((fwd_msg->header.msg_type == REQUEST_SHUTDOWN) || (fwd_msg->header.msg_type == REQUEST_RECONFIGURE) || (fwd_msg->header.msg_type == REQUEST_REBOOT_NODES)) { diff --git a/src/common/gres.c b/src/common/gres.c index 590090aca..dc039540f 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -67,6 +67,12 @@ # include <string.h> #endif /* HAVE_CONFIG_H */ +#ifdef __FreeBSD__ +#include <sys/param.h> +#include <sys/cpuset.h> +typedef cpuset_t cpu_set_t; +#endif + #include <sched.h> #include <stdio.h> #include <stdlib.h> @@ -3930,8 +3936,8 @@ static int _job_alloc(void *job_gres_data, void *node_gres_data, char *gres_name, uint32_t job_id, char *node_name, bitstr_t *core_bitmap) { - int i, j, k, sz1, sz2; - uint32_t gres_cnt; + int j, k, sz1, sz2; + uint64_t gres_cnt, i; gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data; gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data; bool type_array_updated = false; @@ -3974,10 +3980,13 @@ static int _job_alloc(void *job_gres_data, void *node_gres_data, */ gres_cnt = job_gres_ptr->gres_cnt_alloc; i = node_gres_ptr->gres_cnt_alloc + gres_cnt; - i -= node_gres_ptr->gres_cnt_avail; - if (i > 0) { - error("gres/%s: job %u node %s overallocated resources by %d", - gres_name, job_id, node_name, i); + + if (i > node_gres_ptr->gres_cnt_avail) { + error("gres/%s: job %u node %s overallocated resources by %" + PRIu64", (%"PRIu64" > %"PRIu64")", + gres_name, job_id, node_name, + i - node_gres_ptr->gres_cnt_avail, + i, node_gres_ptr->gres_cnt_avail); /* proceed with request, give job what's available */ } @@ -4000,9 +4009,10 @@ static int _job_alloc(void *job_gres_data, void *node_gres_data, node_gres_ptr->gres_cnt_alloc += bit_set_count(node_gres_ptr->gres_bit_alloc); } else if (node_gres_ptr->gres_bit_alloc) { - gres_cnt = MIN(bit_size(node_gres_ptr->gres_bit_alloc), - bit_size(job_gres_ptr-> - gres_bit_alloc[node_offset])); + gres_cnt = (uint64_t)MIN( + bit_size(node_gres_ptr->gres_bit_alloc), + bit_size(job_gres_ptr-> + gres_bit_alloc[node_offset])); for (i = 0; i < gres_cnt; i++) { if (bit_test(job_gres_ptr-> gres_bit_alloc[node_offset], i) && @@ -4018,7 +4028,7 @@ static int _job_alloc(void *job_gres_data, void *node_gres_data, i = bit_size(node_gres_ptr->gres_bit_alloc); if (i < node_gres_ptr->gres_cnt_avail) { error("gres/%s: node %s gres bitmap size bad " - "(%d < %"PRIu64")", + "(%"PRIu64" < %"PRIu64")", gres_name, node_name, i, node_gres_ptr->gres_cnt_avail); node_gres_ptr->gres_bit_alloc = @@ -4234,10 +4244,11 @@ static int _job_dealloc(void *job_gres_data, void *node_gres_data, int node_offset, char *gres_name, uint32_t job_id, char *node_name) { - int i, j, k, len, gres_cnt, sz1, sz2; + int i, j, len, sz1, sz2; gres_job_state_t *job_gres_ptr = (gres_job_state_t *) job_gres_data; gres_node_state_t *node_gres_ptr = (gres_node_state_t *) node_gres_data; bool type_array_updated = false; + uint64_t gres_cnt, k; /* * Validate data structures. Either job_gres_data->node_cnt and @@ -4291,8 +4302,11 @@ static int _job_dealloc(void *job_gres_data, void *node_gres_data, node_gres_ptr->gres_cnt_alloc -= job_gres_ptr->gres_cnt_alloc; } else { node_gres_ptr->gres_cnt_alloc = 0; - error("gres/%s: job %u node %s gres count underflow", - gres_name, job_id, node_name); + error("gres/%s: job %u node %s gres count underflow " + "(%"PRIu64" %"PRIu64")", + gres_name, job_id, node_name, + node_gres_ptr->gres_cnt_alloc, + job_gres_ptr->gres_cnt_alloc); } if (job_gres_ptr->gres_bit_alloc && @@ -4300,21 +4314,25 @@ static int _job_dealloc(void *job_gres_data, void *node_gres_data, node_gres_ptr->topo_gres_bitmap && node_gres_ptr->topo_gres_cnt_alloc) { for (i = 0; i < node_gres_ptr->topo_cnt; i++) { - sz1 = bit_size(job_gres_ptr->gres_bit_alloc[node_offset]); + sz1 = bit_size( + job_gres_ptr->gres_bit_alloc[node_offset]); sz2 = bit_size(node_gres_ptr->topo_gres_bitmap[i]); if (sz1 != sz2) continue; - gres_cnt = bit_overlap(job_gres_ptr-> - gres_bit_alloc[node_offset], - node_gres_ptr-> - topo_gres_bitmap[i]); + gres_cnt = (uint64_t)bit_overlap( + job_gres_ptr->gres_bit_alloc[node_offset], + node_gres_ptr->topo_gres_bitmap[i]); if (node_gres_ptr->topo_gres_cnt_alloc[i] >= gres_cnt) { node_gres_ptr->topo_gres_cnt_alloc[i] -= gres_cnt; } else { error("gres/%s: job %u dealloc node %s topo " - "gres count underflow", gres_name, job_id, - node_name); + "gres count underflow " + "(%"PRIu64" %"PRIu64")", + gres_name, job_id, + node_name, + node_gres_ptr->topo_gres_cnt_alloc[i], + gres_cnt); node_gres_ptr->topo_gres_cnt_alloc[i] = 0; } if ((node_gres_ptr->type_cnt == 0) || @@ -4332,9 +4350,12 @@ static int _job_dealloc(void *job_gres_data, void *node_gres_data, gres_cnt; } else { error("gres/%s: job %u dealloc node %s " - "type %s gres count underflow", + "type %s gres count underflow " + "(%"PRIu64" %"PRIu64")", gres_name, job_id, node_name, - node_gres_ptr->type_model[j]); + node_gres_ptr->type_model[j], + node_gres_ptr->type_cnt_alloc[i], + gres_cnt); node_gres_ptr->type_cnt_alloc[j] = 0; } } @@ -4981,8 +5002,8 @@ static uint64_t _step_test(void *step_gres_data, void *job_gres_data, gres_cnt = NO_VAL64; } else { /* Note: We already validated the gres count above */ - debug("gres/%s: %s %u.%u gres_bit_alloc is NULL", - gres_name, __func__, job_id, step_id); + debug3("gres/%s: %s %u.%u gres_bit_alloc is NULL", + gres_name, __func__, job_id, step_id); gres_cnt = NO_VAL64; } @@ -5571,13 +5592,16 @@ static bitstr_t * _get_usable_gres(int context_inx) gres_slurmd_conf_t *gres_slurmd_conf; int gres_inx = 0; - CPU_ZERO(&mask); -#ifdef SCHED_GETAFFINITY_THREE_ARGS +#ifdef __FreeBSD__ + rc = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, + sizeof(mask), &mask); +#elif defined SCHED_GETAFFINITY_THREE_ARGS rc = sched_getaffinity(0, sizeof(mask), &mask); #else rc = sched_getaffinity(0, &mask); #endif + if (rc) { error("sched_getaffinity error: %m"); return usable_gres; @@ -5880,8 +5904,8 @@ static int _step_alloc(void *step_gres_data, void *job_gres_data, if ((job_gres_ptr->gres_bit_alloc == NULL) || (job_gres_ptr->gres_bit_alloc[node_offset] == NULL)) { - debug("gres/%s: %s gres_bit_alloc for %u.%u is NULL", - gres_name, __func__, job_id, step_id); + debug3("gres/%s: %s gres_bit_alloc for %u.%u is NULL", + gres_name, __func__, job_id, step_id); return SLURM_SUCCESS; } diff --git a/src/common/gres.h b/src/common/gres.h index d87ec31c0..e8f6696e6 100644 --- a/src/common/gres.h +++ b/src/common/gres.h @@ -60,7 +60,7 @@ typedef struct gres_slurmd_conf { uint64_t count; /* Specific CPUs associated with this configuration record */ - uint16_t cpu_cnt; + uint32_t cpu_cnt; char *cpus; bitstr_t *cpus_bitmap; /* Using LOCAL mapping */ diff --git a/src/common/log.c b/src/common/log.c index decae2441..b97c1e8fc 100644 --- a/src/common/log.c +++ b/src/common/log.c @@ -70,12 +70,12 @@ # include <stdlib.h> /* for abort() */ #endif -#include <sys/poll.h> +#include <poll.h> +#include <unistd.h> #include <sys/socket.h> #include <sys/stat.h> #include <sys/time.h> #include <sys/types.h> -#include <sys/unistd.h> #include "slurm/slurm_errno.h" #include "src/common/fd.h" diff --git a/src/common/read_config.c b/src/common/read_config.c index 7e67121ed..ef5d1661a 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -4098,7 +4098,7 @@ _validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) long_suspend_time); } else if ((long_suspend_time > -1) && (!strcmp(conf->select_type, "select/bluegene"))) { - error("SuspendTime (power save mode) incomptible with " + error("SuspendTime (power save mode) incompatible with " "select/bluegene"); return SLURM_ERROR; } else diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index c5db87977..b3cbf64b5 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -99,7 +99,6 @@ static char *_global_auth_key(void); static void _remap_slurmctld_errno(void); static int _unpack_msg_uid(Buf buffer); static bool _is_port_ok(int, uint16_t); -static void _slurm_set_addr_any(slurm_addr_t * slurm_address, uint16_t port); #if _DEBUG static void _print_data(char *data, int len); @@ -1703,9 +1702,13 @@ extern char *slurm_get_auth_info(void) char *auth_info; slurm_ctl_conf_t *conf; - conf = slurm_conf_lock(); - auth_info = xstrdup(conf->authinfo); - slurm_conf_unlock(); + if (slurmdbd_conf) { + auth_info = xstrdup(slurmdbd_conf->auth_info); + } else { + conf = slurm_conf_lock(); + auth_info = xstrdup(conf->authinfo); + slurm_conf_unlock(); + } return auth_info; } @@ -2777,26 +2780,11 @@ slurm_init_msg_engine_ports(uint16_t *ports) slurm_fd_t slurm_init_msg_engine_addrname_port(char *addr_name, uint16_t port) { slurm_addr_t addr; - static uint32_t bind_addr = NO_VAL; - - if (bind_addr == NO_VAL) { -#ifdef BIND_SPECIFIC_ADDR - bind_addr = 1; -#else - char *topology_params = slurm_get_topology_param(); - if (topology_params && - slurm_strcasestr(topology_params, "NoInAddrAny")) - bind_addr = 1; - else - bind_addr = 0; - xfree(topology_params); -#endif - } if (addr_name) slurm_set_addr(&addr, port, addr_name); else - _slurm_set_addr_any(&addr, port); + slurm_setup_sockaddr(&addr, port); return slurm_init_msg_engine(&addr); } @@ -3733,16 +3721,6 @@ size_t slurm_read_stream_timeout(slurm_fd_t open_fd, char *buffer, * address conversion and management functions \**********************************************************************/ -/* slurm_set_addr_any - * initialized the slurm_address with the supplied port on INADDR_ANY - * OUT slurm_address - slurm_addr_t to be filled in - * IN port - port in host order - */ -static void _slurm_set_addr_any(slurm_addr_t * slurm_address, uint16_t port) -{ - slurm_set_addr_uint(slurm_address, port, SLURM_INADDR_ANY); -} - /* slurm_set_addr * initializes the slurm_address with the supplied port and host name * OUT slurm_address - slurm_addr_t to be filled in diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index a5684ccc8..ee3ea6a3b 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -3289,11 +3289,13 @@ extern void slurm_free_trigger_msg(trigger_info_msg_t *msg) { int i; - for (i=0; i<msg->record_count; i++) { - xfree(msg->trigger_array[i].res_id); - xfree(msg->trigger_array[i].program); + if (msg->trigger_array) { + for (i = 0; i < msg->record_count; i++) { + xfree(msg->trigger_array[i].res_id); + xfree(msg->trigger_array[i].program); + } + xfree(msg->trigger_array); } - xfree(msg->trigger_array); xfree(msg); } @@ -4239,7 +4241,7 @@ slurm_free_assoc_mgr_info_msg(assoc_mgr_info_msg_t *msg) xfree(msg); } -extern void slurm_free_assoc_mgr_info_request_msg( +extern void slurm_free_assoc_mgr_info_request_members( assoc_mgr_info_request_msg_t *msg) { if (!msg) @@ -4248,6 +4250,15 @@ extern void slurm_free_assoc_mgr_info_request_msg( FREE_NULL_LIST(msg->acct_list); FREE_NULL_LIST(msg->qos_list); FREE_NULL_LIST(msg->user_list); +} + +extern void slurm_free_assoc_mgr_info_request_msg( + assoc_mgr_info_request_msg_t *msg) +{ + if (!msg) + return; + + slurm_free_assoc_mgr_info_request_members(msg); xfree(msg); } diff --git a/src/common/slurm_protocol_interface.h b/src/common/slurm_protocol_interface.h index 72089526b..b3b2fef8e 100644 --- a/src/common/slurm_protocol_interface.h +++ b/src/common/slurm_protocol_interface.h @@ -171,14 +171,6 @@ extern int slurm_recv_timeout ( slurm_fd_t open_fd, char *buffer , /***************************/ /* slurm address functions */ /***************************/ -/* build a slurm address bassed upon ip address and port number - * OUT slurm_address - the constructed slurm_address - * IN port - port to be used - * IN ip_address - the IP address to connect with - */ -extern void slurm_set_addr_uint ( slurm_addr_t * slurm_address , - uint16_t port , uint32_t ip_address ) ; - /* build a slurm address bassed upon host name and port number * OUT slurm_address - the constructed slurm_address * IN port - port to be used diff --git a/src/common/slurm_protocol_socket_implementation.c b/src/common/slurm_protocol_socket_implementation.c index b938f44fc..154f2b8cf 100644 --- a/src/common/slurm_protocol_socket_implementation.c +++ b/src/common/slurm_protocol_socket_implementation.c @@ -49,7 +49,7 @@ #include <netdb.h> #include <errno.h> #include <netinet/in.h> -#include <sys/poll.h> +#include <poll.h> #include <sys/time.h> #include <sys/types.h> #include <signal.h> @@ -710,15 +710,6 @@ static int _slurm_vfcntl(int fd, int cmd, va_list va ) } } -/* sets the fields of a slurm_addr_t */ -extern void slurm_set_addr_uint (slurm_addr_t *addr, uint16_t port, - uint32_t ipaddr) -{ - addr->sin_family = AF_SLURM ; - addr->sin_port = htons(port); - addr->sin_addr.s_addr = htonl(ipaddr); -} - extern void slurm_set_addr_char (slurm_addr_t * addr, uint16_t port, char *host) { struct hostent * he = NULL; diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c index ee06be5b7..055a0b9e3 100644 --- a/src/common/slurmdbd_defs.c +++ b/src/common/slurmdbd_defs.c @@ -54,7 +54,7 @@ #include <pthread.h> #include <stdio.h> #include <syslog.h> -#include <sys/poll.h> +#include <poll.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/socket.h> diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c index f425dfff1..397ceff47 100644 --- a/src/common/stepd_api.c +++ b/src/common/stepd_api.c @@ -973,12 +973,14 @@ stepd_stat_jobacct(int fd, uint16_t protocol_version, int rc = SLURM_SUCCESS; int tasks = 0; + /* NULL return indicates that accounting is disabled */ + if (!(resp->jobacct = jobacctinfo_create(NULL))) + return rc; + debug("Entering stepd_stat_jobacct for job %u.%u", sent->job_id, sent->step_id); - safe_write(fd, &req, sizeof(int)); - /* Receive the jobacct struct and return */ - resp->jobacct = jobacctinfo_create(NULL); + safe_write(fd, &req, sizeof(int)); /* Do not attempt reading data until there is something to read. * Avoid locking the jobacct_gather plugin early and creating @@ -986,6 +988,7 @@ stepd_stat_jobacct(int fd, uint16_t protocol_version, if (wait_fd_readable(fd, 300)) goto rwfail; + /* Fill in the jobacct struct and return */ rc = jobacctinfo_getinfo(resp->jobacct, JOBACCT_DATA_PIPE, &fd, protocol_version); diff --git a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c index 728b3f156..b087b5847 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_assoc.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_assoc.c @@ -2314,6 +2314,8 @@ extern int as_mysql_add_assocs(mysql_conn_t *mysql_conn, uint32_t uid, List local_cluster_list = NULL; List added_user_list = NULL; bool is_coord = false; + slurmdb_update_object_t *update_object = NULL; + List assoc_list_tmp = NULL; if (!assoc_list) { error("No association list given"); @@ -2757,25 +2759,28 @@ extern int as_mysql_add_assocs(mysql_conn_t *mysql_conn, uint32_t uid, } - if (!moved_parent) { - slurmdb_update_object_t *update_object = NULL; + /* Since we are already removed all the items from assoc_list + * we need to work off the update_list from here on out. + */ + itr = list_iterator_create(mysql_conn->update_list);; + while ((update_object = list_next(itr))) { + if (!update_object->objects || + !list_count(update_object->objects)) + continue; + if (update_object->type == SLURMDB_ADD_ASSOC) + break; + } + list_iterator_destroy(itr); - itr = list_iterator_create( - mysql_conn->update_list);; - while ((update_object = list_next(itr))) { - if (!update_object->objects - || !list_count(update_object->objects)) - continue; - if (update_object->type == SLURMDB_ADD_ASSOC) - break; - } - list_iterator_destroy(itr); + if (update_object && update_object->objects + && list_count(update_object->objects)) + assoc_list_tmp = update_object->objects; + + if (assoc_list_tmp) { + ListIterator itr2 = list_iterator_create(assoc_list_tmp); - if (update_object && update_object->objects - && list_count(update_object->objects)) { + if (!moved_parent) { char *cluster_name; - ListIterator itr2 = - list_iterator_create(update_object->objects); slurm_mutex_lock(&as_mysql_cluster_list_lock); itr = list_iterator_create(as_mysql_cluster_list); @@ -2797,78 +2802,28 @@ extern int as_mysql_add_assocs(mysql_conn_t *mysql_conn, uint32_t uid, } list_iterator_destroy(itr); slurm_mutex_unlock(&as_mysql_cluster_list_lock); - list_iterator_destroy(itr2); } - } - /* now reset all the other defaults accordingly. (if needed) */ - itr = list_iterator_create(assoc_list); - while ((object = list_next(itr))) { - if ((object->is_def != 1) || !object->cluster - || !object->acct || !object->user) - continue; + /* make sure we don't have any other default accounts */ + list_iterator_reset(itr2); + while ((object = list_next(itr2))) { + if ((object->is_def != 1) || !object->cluster + || !object->acct || !object->user) + continue; - if ((rc = _reset_default_assoc( - mysql_conn, object, &query, moved_parent ? 0 : 1)) - != SLURM_SUCCESS) { - xfree(query); - goto end_it; + if ((rc = _reset_default_assoc( + mysql_conn, object, + &query, moved_parent ? 0 : 1)) + != SLURM_SUCCESS) { + xfree(query); + goto end_it; + } } - - /* xstrfmtcat(query, "update \"%s_%s\" set is_def=0, " */ - /* "mod_time=%ld " */ - /* "where (user='%s' && acct!='%s' && is_def=1);", */ - /* object->cluster, assoc_table, (long)now, */ - /* object->user, object->acct); */ - - /* if (!moved_parent) { */ - /* MYSQL_RES *result = NULL; */ - /* MYSQL_ROW row; */ - /* /\* If moved parent all the associations will be sent */ - /* so no need to do this extra step. Else, this has */ - /* to be done one at a time so we can send */ - /* the updated assocs back to the slurmctlds */ - /* *\/ */ - /* xstrfmtcat(query, "select id_assoc from \"%s_%s\" " */ - /* "where (user='%s' && acct!='%s' " */ - /* "&& is_def=1);", */ - /* object->cluster, assoc_table, */ - /* object->user, object->acct); */ - /* debug("%d(%s:%d) query\n%s", */ - /* mysql_conn->conn, THIS_FILE, */ - /* __LINE__, query); */ - /* if (!(result = mysql_db_query_ret( */ - /* mysql_conn, query, 1))) { */ - /* xfree(query); */ - /* rc = SLURM_ERROR; */ - /* goto end_it; */ - /* } */ - /* xfree(query); */ - - /* while ((row = mysql_fetch_row(result))) { */ - /* slurmdb_assoc_rec_t *mod_assoc = xmalloc( */ - /* sizeof(slurmdb_assoc_rec_t)); */ - /* slurmdb_init_assoc_rec(mod_assoc, 0); */ - - /* mod_assoc->id = slurm_atoul(row[0]); */ - /* mod_assoc->is_def = 0; */ - - /* if (addto_update_list(mysql_conn->update_list,*/ - /* SLURMDB_MODIFY_ASSOC, */ - /* mod_assoc) */ - /* != SLURM_SUCCESS) { */ - /* slurmdb_destroy_assoc_rec( */ - /* mod_assoc); */ - /* error("couldn't add to " */ - /* "the update list"); */ - /* rc = SLURM_ERROR; */ - /* break; */ - /* } */ - /* } */ - /* mysql_free_result(result); */ - /* } */ + list_iterator_destroy(itr2); + /* This temp list is no longer needed */ + assoc_list_tmp = NULL; } - list_iterator_destroy(itr); + if (query) { if (debug_flags & DEBUG_FLAG_DB_ASSOC) DB_DEBUG(mysql_conn->conn, "query\n%s", query); @@ -2897,7 +2852,6 @@ end_it: } } if (moved_parent) { - List assoc_list = NULL; ListIterator itr = NULL; slurmdb_assoc_rec_t *assoc = NULL; slurmdb_assoc_cond_t assoc_cond; @@ -2914,7 +2868,7 @@ end_it: memset(&assoc_cond, 0, sizeof(slurmdb_assoc_cond_t)); assoc_cond.cluster_list = local_cluster_list; - if (!(assoc_list = + if (!(assoc_list_tmp = as_mysql_get_assocs(mysql_conn, uid, NULL))) { FREE_NULL_LIST(local_cluster_list); return rc; @@ -2927,7 +2881,7 @@ end_it: So we are just going to delete each item as it comes out since we are moving it to the update_list. */ - itr = list_iterator_create(assoc_list); + itr = list_iterator_create(assoc_list_tmp); while ((assoc = list_next(itr))) { if (addto_update_list(mysql_conn->update_list, SLURMDB_MODIFY_ASSOC, @@ -2935,7 +2889,7 @@ end_it: list_remove(itr); } list_iterator_destroy(itr); - FREE_NULL_LIST(assoc_list); + FREE_NULL_LIST(assoc_list_tmp); } } else { FREE_NULL_LIST(added_user_list); diff --git a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c index cbce5a510..63ad8ba2e 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_jobacct_process.c @@ -458,7 +458,15 @@ static int _cluster_get_jobs(mysql_conn_t *mysql_conn, "left join \"%s_%s\" as t2 " "on t1.id_assoc=t2.id_assoc " "left join \"%s_%s\" as t3 " - " on t1.id_resv=t3.id_resv ", + "on t1.id_resv=t3.id_resv && " + "((t1.time_start && " + "(t3.time_start < t1.time_start && " + "(t3.time_end >= t1.time_start || " + "t3.time_end = 0))) || " + "((t3.time_start < t1.time_submit && " + "(t3.time_end >= t1.time_submit || " + "t3.time_end = 0)) || " + "(t3.time_start > t1.time_submit)))", job_fields, cluster_name, job_table, cluster_name, assoc_table, cluster_name, resv_table); diff --git a/src/plugins/accounting_storage/mysql/as_mysql_rollup.c b/src/plugins/accounting_storage/mysql/as_mysql_rollup.c index aadc43a25..6cd08bbd9 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_rollup.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_rollup.c @@ -1624,28 +1624,28 @@ extern int as_mysql_nonhour_rollup(mysql_conn_t *mysql_conn, uint16_t track_wckey = slurm_get_track_wckey(); char *unit_name; - if (!slurm_localtime_r(&curr_start, &start_tm)) { - error("Couldn't get localtime from start %ld", curr_start); - return SLURM_ERROR; - } - - start_tm.tm_sec = 0; - start_tm.tm_min = 0; - start_tm.tm_hour = 0; - start_tm.tm_isdst = -1; + while (curr_start < end) { + if (!slurm_localtime_r(&curr_start, &start_tm)) { + error("Couldn't get localtime from start %ld", + curr_start); + return SLURM_ERROR; + } + start_tm.tm_sec = 0; + start_tm.tm_min = 0; + start_tm.tm_hour = 0; + start_tm.tm_isdst = -1; - if (run_month) { - unit_name = "month"; - start_tm.tm_mday = 1; - start_tm.tm_mon++; - } else { - unit_name = "day"; - start_tm.tm_mday++; - } + if (run_month) { + unit_name = "month"; + start_tm.tm_mday = 1; + start_tm.tm_mon++; + } else { + unit_name = "day"; + start_tm.tm_mday++; + } - curr_end = slurm_mktime(&start_tm); + curr_end = slurm_mktime(&start_tm); - while (curr_start < end) { if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "curr %s is now %ld-%ld", @@ -1666,6 +1666,7 @@ extern int as_mysql_nonhour_rollup(mysql_conn_t *mysql_conn, cluster_name, run_month ? assoc_day_table : assoc_hour_table, curr_end, curr_start, now); + /* We group on deleted here so if there are no entries we don't get an error, just nothing is returned. Else we get a bunch of NULL's @@ -1726,17 +1727,6 @@ extern int as_mysql_nonhour_rollup(mysql_conn_t *mysql_conn, } curr_start = curr_end; - if (!slurm_localtime_r(&curr_start, &start_tm)) { - error("Couldn't get localtime from %s start %ld", - unit_name, curr_start); - return SLURM_ERROR; - } - start_tm.tm_sec = 0; - start_tm.tm_min = 0; - start_tm.tm_hour = 0; - start_tm.tm_mday++; - start_tm.tm_isdst = -1; - curr_end = slurm_mktime(&start_tm); } /* info("stop start %s", slurm_ctime2(&curr_start)); */ diff --git a/src/plugins/accounting_storage/mysql/as_mysql_user.c b/src/plugins/accounting_storage/mysql/as_mysql_user.c index c99a9bf36..cf6738a4e 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_user.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_user.c @@ -1309,7 +1309,7 @@ empty: assoc_itr = list_iterator_create(assoc_list); while ((user = list_next(itr))) { while ((assoc = list_next(assoc_itr))) { - if (strcmp(assoc->user, user->name)) + if (xstrcmp(assoc->user, user->name)) continue; /* Set up the default. This is needed * for older versions primarily that diff --git a/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c b/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c index ee51e3713..3af815d81 100644 --- a/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c +++ b/src/plugins/acct_gather_energy/cray/acct_gather_energy_cray.c @@ -83,6 +83,8 @@ enum { GET_POWER }; +extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl); + static uint32_t _get_latest_stats(int type) { uint32_t data = 0; @@ -274,6 +276,12 @@ extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, xassert(_run_in_daemon()); + if (!local_energy) { + debug("%s: trying to get data %d, but no local_energy yet.", + __func__, data_type); + acct_gather_energy_p_conf_set(NULL); + } + switch (data_type) { case ENERGY_DATA_JOULES_TASK: case ENERGY_DATA_NODE_ENERGY_UP: @@ -338,6 +346,10 @@ extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl) if (!_run_in_daemon()) return; + /* Already been here, we shouldn't need to visit again */ + if (local_energy) + return; + if (!flag_init) { flag_init = 1; local_energy = acct_gather_energy_alloc(1); diff --git a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c index dab425782..8a0b6bf00 100644 --- a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c +++ b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c @@ -139,6 +139,8 @@ static char hostname[MAXHOSTNAMELEN]; static int nb_pkg = 0; +extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl); + static char *_msr_string(int which) { if (which == MSR_RAPL_POWER_UNIT) @@ -486,6 +488,12 @@ extern int acct_gather_energy_p_get_data(enum acct_energy_type data_type, xassert(_run_in_daemon()); + if (!local_energy) { + debug("%s: trying to get data %d, but no local_energy yet.", + __func__, data_type); + acct_gather_energy_p_conf_set(NULL); + } + switch (data_type) { case ENERGY_DATA_JOULES_TASK: case ENERGY_DATA_NODE_ENERGY_UP: @@ -551,6 +559,10 @@ extern void acct_gather_energy_p_conf_set(s_p_hashtbl_t *tbl) if (!_run_in_daemon()) return; + /* Already been here, we shouldn't need to visit again */ + if (local_energy) + return; + _hardware(); for (i = 0; i < nb_pkg; i++) pkg_fd[i] = _open_msr(pkg2cpu[i]); diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/hdf5_api.c b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/hdf5_api.c index 4350e9419..473d20ae6 100644 --- a/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/hdf5_api.c +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/libsh5util_old/hdf5_api.c @@ -195,9 +195,9 @@ static int moffset; // General variable used by insert macros * var variable name * prf prefix for series (usually ',' */ -#define PUT_UINT_SUM(fp, var, prfx) \ - fprintf(fp, "%s%ld,%ld,%ld,%ld", prfx, \ - var.min, var.ave, var.max, var.total); +#define PUT_UINT_SUM(fp, var, prfx) \ + fprintf(fp, "%s%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu64"", \ + prfx, var.min, var.ave, var.max, var.total); /* Macro to put an int min,ave,max,total for a variable to extract file * * Parameters @@ -418,9 +418,10 @@ static void _energy_extract_series( } n_items = size_data / sizeof(profile_energy_t); for (ix=0; ix < n_items; ix++) { - fprintf(fp, "%d,%d,%s,%s,%s,%ld,%ld,%ld\n", job, step, node, - series, energy_data[ix].tod, energy_data[ix].time, - energy_data[ix].power, energy_data[ix].cpu_freq); + fprintf(fp, "%d,%d,%s,%s,%s,%ld,%"PRIu64",%"PRIu64"\n", + job, step, node, series, energy_data[ix].tod, + energy_data[ix].time, energy_data[ix].power, + energy_data[ix].cpu_freq); } return; } @@ -436,7 +437,7 @@ static void _energy_extract_total( "Min_CPU Frequency,Ave_CPU Frequency," "Max_CPU Frequency,Total_CPU Frequency\n"); } - fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + fprintf(fp, "%d,%d,%s,%s,%s,%"PRIu64"", job, step, node, series, energy_data->start_time, energy_data->elapsed_time); PUT_UINT_SUM(fp, energy_data->power, ","); PUT_UINT_SUM(fp, energy_data->cpu_freq, ","); @@ -723,8 +724,8 @@ static void _io_extract_series( } n_items = size_data / sizeof(profile_io_t); for (ix=0; ix < n_items; ix++) { - fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%.3f,%ld,%.3f\n", - job, step, node, series, + fprintf(fp,"%d,%d,%s,%s,%s,%ld,%"PRIu64",%.3f,%"PRIu64 + ",%.3f\n", job, step, node, series, io_data[ix].tod, io_data[ix].time, io_data[ix].reads, io_data[ix].read_size, io_data[ix].writes, io_data[ix].write_size); @@ -746,7 +747,7 @@ static void _io_extract_total( "Min_Write_Megabytes,Ave_Write_Megabytes," "Max_Write_Megabytes,Total_Write_Megabytes\n"); } - fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + fprintf(fp, "%d,%d,%s,%s,%s,%"PRIu64"", job, step, node, series, io_data->start_time, io_data->elapsed_time); PUT_UINT_SUM(fp, io_data->reads, ","); PUT_DBL_SUM(fp, io_data->read_size, ","); @@ -1035,8 +1036,8 @@ static void _network_extract_series( } n_items = size_data / sizeof(profile_network_t); for (ix=0; ix < n_items; ix++) { - fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%.3f,%ld,%.3f\n", - job, step, node,series, + fprintf(fp,"%d,%d,%s,%s,%s,%ld,%"PRIu64",%.3f,%"PRIu64 + ",%.3f\n", job, step, node, series, network_data[ix].tod, network_data[ix].time, network_data[ix].packets_in, network_data[ix].size_in, network_data[ix].packets_out, @@ -1061,7 +1062,7 @@ static void _network_extract_total( "Min_Megabytes_Out,Ave_Megabytes_Out," "Max_Megabytes_Out,Total_Megabytes_Out\n"); } - fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + fprintf(fp, "%d,%d,%s,%s,%s,%"PRIu64"", job, step, node, series, network_data->start_time, network_data->elapsed_time); PUT_UINT_SUM(fp, network_data->packets_in, ","); PUT_DBL_SUM(fp, network_data->size_in, ","); @@ -1428,14 +1429,15 @@ static void _task_extract_series( } n_items = size_data / sizeof(profile_task_t); for (ix=0; ix < n_items; ix++) { - fprintf(fp,"%d,%d,%s,%s,%s,%ld,%ld,%ld,%.3f", + fprintf(fp,"%d,%d,%s,%s,%s,%ld,%"PRIu64",%"PRIu64",%.3f", job, step, node, series, task_data[ix].tod, task_data[ix].time, task_data[ix].cpu_freq, task_data[ix].cpu_time, task_data[ix].cpu_utilization); - fprintf(fp,",%ld,%ld,%ld,%.3f,%.3f\n", task_data[ix].rss, - task_data[ix].vm_size, task_data[ix].pages, - task_data[ix].read_size, task_data[ix].write_size); + fprintf(fp,",%"PRIu64",%"PRIu64",%"PRIu64",%.3f,%.3f\n", + task_data[ix].rss, task_data[ix].vm_size, + task_data[ix].pages, task_data[ix].read_size, + task_data[ix].write_size); } return; } @@ -1462,7 +1464,7 @@ static void _task_extract_total( "Min_Write_Megabytes,Ave_Write_Megabytes," "Max_Write_Megabytes,Total_Write_Megabytes\n"); } - fprintf(fp, "%d,%d,%s,%s,%s,%ld", job, step, node, series, + fprintf(fp, "%d,%d,%s,%s,%s,%"PRIu64"", job, step, node, series, task_data->start_time, task_data->elapsed_time); PUT_UINT_SUM(fp, task_data->cpu_freq, ","); PUT_UINT_SUM(fp, task_data->cpu_time, ","); diff --git a/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c index 9992ec0f9..54edd2e50 100644 --- a/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c +++ b/src/plugins/acct_gather_profile/hdf5/sh5util/sh5util.c @@ -1227,7 +1227,7 @@ static void _item_analysis_uint(hsize_t nb_tables, hid_t *tables, if (group_mode) { fprintf(output_file, "%s,%"PRIu64",%s %s,%"PRIu64",%s %s," - "%"PRIu64",%"PRIu64",%lf,%"PRIu64, + "%"PRIu64",%"PRIu64",%lf,%zu", step_name, et, names[min_idx], nodes[min_idx], min_val, names[max_idx], nodes[max_idx], max_val, @@ -1235,7 +1235,7 @@ static void _item_analysis_uint(hsize_t nb_tables, hid_t *tables, } else { fprintf(output_file, "%s,%"PRIu64",%s,%"PRIu64",%s,%"PRIu64",%" - PRIu64",%lf,%"PRIu64, + PRIu64",%lf,%zu", step_name, et, nodes[min_idx], min_val, nodes[max_idx], max_val, @@ -1324,7 +1324,7 @@ static void _item_analysis_double(hsize_t nb_tables, hid_t *tables, } fprintf(output_file, - "%s,%"PRIu64",%s,%lf,%s,%lf,%lf,%lf,%"PRIu64, + "%s,%"PRIu64",%s,%lf,%s,%lf,%lf,%lf,%zu", step_name, et, names[min_idx], min_val, names[max_idx], max_val, sum, avg, nb_series_in_smp); diff --git a/src/plugins/burst_buffer/common/burst_buffer_common.c b/src/plugins/burst_buffer/common/burst_buffer_common.c index 70e6f3fdb..4ec8d48ec 100644 --- a/src/plugins/burst_buffer/common/burst_buffer_common.c +++ b/src/plugins/burst_buffer/common/burst_buffer_common.c @@ -52,6 +52,11 @@ #include <sys/types.h> #include <unistd.h> +#ifdef __FreeBSD__ +#define POLLRDHUP POLLHUP +#include <signal.h> +#endif + #include "slurm/slurm.h" #include "slurm/slurmdb.h" diff --git a/src/plugins/burst_buffer/cray/burst_buffer_cray.c b/src/plugins/burst_buffer/cray/burst_buffer_cray.c index b07552374..4187a5eab 100644 --- a/src/plugins/burst_buffer/cray/burst_buffer_cray.c +++ b/src/plugins/burst_buffer/cray/burst_buffer_cray.c @@ -133,7 +133,7 @@ typedef struct bb_configs { */ typedef struct bb_instances { uint32_t id; - uint32_t bytes; + uint64_t bytes; char *label; } bb_instances_t; @@ -786,7 +786,7 @@ static void _recover_bb_state(void) uint16_t protocol_version = (uint16_t)NO_VAL; uint32_t data_size = 0, rec_count = 0, name_len = 0; uint32_t id = 0, user_id = 0; - uint64_t size; + uint64_t size = 0; int i, state_fd; char *account = NULL, *name = NULL, *partition = NULL, *qos = NULL; char *end_ptr = NULL; @@ -4399,7 +4399,7 @@ _parse_instance_capacity(json_object *instance, bb_instances_t *ent) { enum json_type type; struct json_object_iter iter; - int x; + int64_t x; json_object_object_foreachC(instance, iter) { type = json_object_get_type(iter.val); diff --git a/src/plugins/job_submit/lua/job_submit_lua.c b/src/plugins/job_submit/lua/job_submit_lua.c old mode 100755 new mode 100644 diff --git a/src/plugins/mpi/mpich1_p4/mpich1_p4.c b/src/plugins/mpi/mpich1_p4/mpich1_p4.c index d065409b0..1d1e3c73c 100644 --- a/src/plugins/mpi/mpich1_p4/mpich1_p4.c +++ b/src/plugins/mpi/mpich1_p4/mpich1_p4.c @@ -43,7 +43,7 @@ #include <fcntl.h> #include <signal.h> #include <stdlib.h> -#include <sys/poll.h> +#include <poll.h> #include <sys/types.h> #include <sys/socket.h> diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c index 586d2f751..af0acdca9 100644 --- a/src/plugins/mpi/mvapich/mvapich.c +++ b/src/plugins/mpi/mvapich/mvapich.c @@ -50,7 +50,7 @@ #include <sys/socket.h> #include <netinet/in.h> #include <strings.h> -#include <sys/poll.h> +#include <poll.h> #include <sys/time.h> #include <arpa/inet.h> diff --git a/src/plugins/power/common/power_common.c b/src/plugins/power/common/power_common.c index acdde02dd..923262d32 100644 --- a/src/plugins/power/common/power_common.c +++ b/src/plugins/power/common/power_common.c @@ -50,6 +50,11 @@ #include <sys/types.h> #include <unistd.h> +#ifdef __FreeBSD__ +#define POLLRDHUP POLLHUP +#include <signal.h> +#endif + #include "slurm/slurm.h" #include "src/common/list.h" diff --git a/src/plugins/power/cray/power_cray.c b/src/plugins/power/cray/power_cray.c index 54f3a41fd..4fa67431d 100644 --- a/src/plugins/power/cray/power_cray.c +++ b/src/plugins/power/cray/power_cray.c @@ -67,8 +67,10 @@ #define DEFAULT_CAPMC_PATH "/opt/cray/capmc/default/bin/capmc" #define DEFAULT_CAP_WATTS 0 #define DEFAULT_DECREASE_RATE 50 +#define DEFAULT_GET_TIMEOUT 5000 #define DEFAULT_INCREASE_RATE 20 #define DEFAULT_LOWER_THRESHOLD 90 +#define DEFAULT_SET_TIMEOUT 30000 #define DEFAULT_UPPER_THRESHOLD 95 #define DEFAULT_RECENT_JOB 300 @@ -139,10 +141,13 @@ static uint32_t decrease_rate = DEFAULT_DECREASE_RATE; static uint32_t increase_rate = DEFAULT_INCREASE_RATE; static uint32_t job_level = NO_VAL; static time_t last_cap_read = 0; +static time_t last_limits_read = 0; static uint32_t lower_threshold = DEFAULT_LOWER_THRESHOLD; static uint32_t recent_job = DEFAULT_RECENT_JOB; static uint32_t upper_threshold = DEFAULT_UPPER_THRESHOLD; static bool stop_power = false; +static int get_timeout = DEFAULT_GET_TIMEOUT; +static int set_timeout = DEFAULT_SET_TIMEOUT; static pthread_t power_thread = 0; static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t term_lock = PTHREAD_MUTEX_INITIALIZER; @@ -299,6 +304,17 @@ static void _load_config(void) else job_level = NO_VAL; + if ((tmp_ptr = strstr(sched_params, "get_timeout="))) { + get_timeout = atoi(tmp_ptr + 12); + if (get_timeout < 1) { + error("PowerParameters: get_timeout=%u invalid", + get_timeout); + get_timeout = DEFAULT_GET_TIMEOUT; + } + } else { + get_timeout = DEFAULT_GET_TIMEOUT; + } + if ((tmp_ptr = strstr(sched_params, "lower_threshold="))) { lower_threshold = atoi(tmp_ptr + 16); if (lower_threshold < 1) { @@ -321,6 +337,17 @@ static void _load_config(void) recent_job = DEFAULT_RECENT_JOB; } + if ((tmp_ptr = strstr(sched_params, "set_timeout="))) { + set_timeout = atoi(tmp_ptr + 12); + if (set_timeout < 1) { + error("PowerParameters: set_timeout=%u invalid", + set_timeout); + set_timeout = DEFAULT_SET_TIMEOUT; + } + } else { + set_timeout = DEFAULT_SET_TIMEOUT; + } + if ((tmp_ptr = strstr(sched_params, "set_watts="))) { set_watts = strtol(tmp_ptr + 10, &end_ptr, 10); if ((end_ptr[0] == 'k') || (end_ptr[0] == 'K')) { @@ -352,15 +379,15 @@ static void _load_config(void) else if (job_level == 1) level_str = "job_level,"; info("PowerParameters=balance_interval=%d,capmc_path=%s," - "cap_watts=%u,decrease_rate=%u,increase_rate=%u,%s" - "lower_threashold=%u,recent_job=%u,set_watts=%u," - "upper_threshold=%u", + "cap_watts=%u,decrease_rate=%u,get_timeout=%d," + "increase_rate=%u,%slower_threashold=%u,recent_job=%u," + "set_timeout=%d,set_watts=%u,upper_threshold=%u", balance_interval, capmc_path, cap_watts, decrease_rate, - increase_rate, level_str, lower_threshold, recent_job, - set_watts, upper_threshold); + get_timeout, increase_rate, level_str, lower_threshold, + recent_job, set_timeout, set_watts, upper_threshold); } - last_cap_read = 0; /* Read node power limits again */ + last_limits_read = 0; /* Read node power limits again */ } static void _get_capabilities(void) @@ -382,8 +409,8 @@ static void _get_capabilities(void) script_argv[2] = NULL; START_TIMER; - cmd_resp = power_run_script("capmc", capmc_path, script_argv, 5000, - NULL, &status); + cmd_resp = power_run_script("capmc", capmc_path, script_argv, + get_timeout, NULL, &status); END_TIMER; if (status != 0) { error("%s: capmc %s: %s", @@ -697,8 +724,8 @@ static void _get_caps(void) script_argv[4] = NULL; START_TIMER; - cmd_resp = power_run_script("capmc", capmc_path, script_argv, 5000, - NULL, &status); + cmd_resp = power_run_script("capmc", capmc_path, script_argv, + get_timeout, NULL, &status); END_TIMER; if (status != 0) { error("%s: capmc %s: %s", @@ -919,8 +946,8 @@ static void _get_nodes_ready(void) script_argv[2] = NULL; START_TIMER; - cmd_resp = power_run_script("capmc", capmc_path, script_argv, 5000, - NULL, &status); + cmd_resp = power_run_script("capmc", capmc_path, script_argv, + get_timeout, NULL, &status); END_TIMER; if (status != 0) { error("%s: capmc %s: %s", __func__, script_argv[1], cmd_resp); @@ -1052,8 +1079,8 @@ static void _get_node_energy_counter(void) script_argv[4] = NULL; START_TIMER; - cmd_resp = power_run_script("capmc", capmc_path, script_argv, 5000, - NULL, &status); + cmd_resp = power_run_script("capmc", capmc_path, script_argv, + get_timeout, NULL, &status); END_TIMER; if (status != 0) { error("%s: capmc %s %s %s: %s", __func__, @@ -1257,16 +1284,18 @@ extern void *_power_agent(void *args) if (wait_time < balance_interval) continue; - if (last_cap_read == 0) { /* On first pass only */ - /* Read initial power caps for every node */ + wait_time = difftime(now, last_cap_read); + if (wait_time > 300) { /* Every 5 minutes */ + /* Read current power caps for every node */ _get_caps(); /* Has node write lock */ + last_cap_read = time(NULL); } - wait_time = difftime(now, last_cap_read); + wait_time = difftime(now, last_limits_read); if (wait_time > 600) { /* Every 10 minutes */ /* Read min/max power for every node */ _get_capabilities(); /* Has node write lock */ - last_cap_read = time(NULL); + last_limits_read = time(NULL); } _get_node_energy_counter(); /* Has node write lock */ _get_nodes_ready(); /* Has node write lock */ @@ -1296,6 +1325,8 @@ static void _clear_node_caps(void) i++, node_ptr++) { if (!node_ptr->power) continue; + if (node_ptr->power->state != 1) /* Not ready, no change */ + continue; node_ptr->power->new_cap_watts = 0; } } @@ -1611,13 +1642,14 @@ static void _set_power_caps(void) xstrcat(json, "\n ]\n}\n"); START_TIMER; cmd_resp = power_run_script("capmc", capmc_path, script_argv, - 5000, json, &status); + set_timeout, json, &status); END_TIMER; if (status != 0) { error("%s: capmc %s %s: %s", __func__, script_argv[1], script_argv[2], cmd_resp); xfree(cmd_resp); + last_cap_read = 0; /* Read node caps again */ return; } else if (debug_flag & DEBUG_FLAG_POWER) { info("%s: capmc %s %s %s", @@ -1651,13 +1683,14 @@ static void _set_power_caps(void) xstrcat(json, "\n ]\n}\n"); START_TIMER; cmd_resp = power_run_script("capmc", capmc_path, script_argv, - 5000, json, &status); + set_timeout, json, &status); END_TIMER; if (status != 0) { error("%s: capmc %s %s: %s", __func__, script_argv[1], script_argv[2], cmd_resp); xfree(cmd_resp); + last_cap_read = 0; /* Read node caps again */ return; } else if (debug_flag & DEBUG_FLAG_POWER) { info("%s: capmc %s %s %s", diff --git a/src/plugins/sched/wiki2/msg.c b/src/plugins/sched/wiki2/msg.c index 143e5b97e..a3a990b6c 100644 --- a/src/plugins/sched/wiki2/msg.c +++ b/src/plugins/sched/wiki2/msg.c @@ -42,7 +42,7 @@ #include "src/slurmctld/locks.h" #include "src/plugins/sched/wiki2/crypto.h" #include "src/plugins/sched/wiki2/msg.h" -#include <sys/poll.h> +#include <poll.h> #define _DEBUG 0 diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index 912485fc0..2fc96ea76 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -233,8 +233,9 @@ static uint16_t _allocate_sc(struct job_record *job_ptr, bitstr_t *core_map, } if ((mc_ptr->ntasks_per_core != (uint16_t) INFINITE) && (mc_ptr->ntasks_per_core)) { - ncpus_per_core = mc_ptr->ntasks_per_core; - ncpus_per_core *= cpus_per_task; + ncpus_per_core = MIN(threads_per_core, + (mc_ptr->ntasks_per_core * + cpus_per_task)); } if ((mc_ptr->threads_per_core != (uint16_t) NO_VAL) && (mc_ptr->threads_per_core < ncpus_per_core)) { diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index 050c5dde2..969daa5f5 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -1721,6 +1721,22 @@ static time_t _guess_job_end(struct job_record * job_ptr, time_t now) return end_time; } +/* Return TRUE if job is in the processing of cleaning up. + * This is used for Cray systems to indicate the Node Health Check (NHC) + * is still running. Until NHC completes, the job's resource use persists + * the select/cons_res plugin data structures. */ +static bool _job_cleaning(struct job_record *job_ptr) +{ + uint16_t cleaning = 0; + + select_g_select_jobinfo_get(job_ptr->select_jobinfo, + SELECT_JOBDATA_CLEANING, + &cleaning); + if (cleaning) + return true; + return false; +} + /* _will_run_test - determine when and where a pending job can start, removes * jobs from node table at termination time and run _test_job() after * each one. Used by SLURM's sched/backfill plugin and Moab. */ @@ -1786,7 +1802,8 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, job_iterator = list_iterator_create(job_list); while ((tmp_job_ptr = (struct job_record *) list_next(job_iterator))) { if (!IS_JOB_RUNNING(tmp_job_ptr) && - !IS_JOB_SUSPENDED(tmp_job_ptr)) + !IS_JOB_SUSPENDED(tmp_job_ptr) && + !_job_cleaning(tmp_job_ptr)) continue; if (tmp_job_ptr->end_time == 0) { error("Job %u has zero end_time", tmp_job_ptr->job_id); diff --git a/src/plugins/select/cray/select_cray.c b/src/plugins/select/cray/select_cray.c index 7d0777262..55bbd87b3 100644 --- a/src/plugins/select/cray/select_cray.c +++ b/src/plugins/select/cray/select_cray.c @@ -51,7 +51,7 @@ #endif #include <stdio.h> -#include <sys/poll.h> +#include <poll.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> diff --git a/src/plugins/slurmctld/dynalloc/msg.c b/src/plugins/slurmctld/dynalloc/msg.c index 6cabdd737..10f195c0c 100644 --- a/src/plugins/slurmctld/dynalloc/msg.c +++ b/src/plugins/slurmctld/dynalloc/msg.c @@ -43,7 +43,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/poll.h> +#include <poll.h> #include <sys/socket.h> #include <netinet/in.h> diff --git a/src/plugins/task/affinity/affinity.h b/src/plugins/task/affinity/affinity.h index d8b44da48..4d81cd4f6 100644 --- a/src/plugins/task/affinity/affinity.h +++ b/src/plugins/task/affinity/affinity.h @@ -52,7 +52,7 @@ #include <sys/wait.h> #include <sys/stat.h> #include <sys/param.h> -#include <sys/poll.h> +#include <poll.h> #include <sys/stat.h> #include <fcntl.h> #include <pwd.h> diff --git a/src/plugins/topology/hypercube/topology_hypercube.c b/src/plugins/topology/hypercube/topology_hypercube.c index 963d76a3f..d1ebf2a8f 100644 --- a/src/plugins/topology/hypercube/topology_hypercube.c +++ b/src/plugins/topology/hypercube/topology_hypercube.c @@ -1085,8 +1085,12 @@ static void _generate_hilbert_integers(void) switch_data * switch_ptr = switch_data_table; int counter, switch_rank; int i, j; - unsigned int hilbert[hypercube_dimensions]; - + unsigned int *hilbert; + + if (hypercube_dimensions <= 0) + return; + + hilbert = xmalloc(sizeof(unsigned int) * hypercube_dimensions); for (i = 0; i < switch_data_cnt; i++, switch_ptr++) { for (j = 0; j < hypercube_dimensions; j++) { hilbert[j] = switch_ptr->coordinates[j]; @@ -1104,6 +1108,7 @@ static void _generate_hilbert_integers(void) } switch_ptr->rank = switch_rank; } + xfree(hilbert); } diff --git a/src/sacct/options.c b/src/sacct/options.c index 3d4ffd099..bc6e80ad1 100644 --- a/src/sacct/options.c +++ b/src/sacct/options.c @@ -320,7 +320,7 @@ sacct [<OPTION>] \n \ -j, --jobs: \n\ Format is <job(.step)>. Display information about this \n\ job or comma-separated list of jobs. The default is all \n\ - jobs. Adding .step will display the specfic job step of \n\ + jobs. Adding .step will display the specific job step of \n\ that job. (A step id of 'batch' will display the \n\ information about the batch step.) \n\ -k, --timelimit-min: \n\ diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 546da28e2..062a39ec0 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -385,7 +385,7 @@ static void _opt_default() opt.req_switch = -1; opt.wait4switch = -1; - opt.nice = 0; + opt.nice = NO_VAL; opt.priority = 0; } diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 20443c399..38d703718 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -675,7 +675,7 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) if (opt.licenses) desc->licenses = xstrdup(opt.licenses); desc->network = opt.network; - if (opt.nice) + if (opt.nice != NO_VAL) desc->nice = NICE_OFFSET + opt.nice; if (opt.priority) desc->priority = opt.priority; diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index b59e1628f..663dfef3c 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -408,7 +408,7 @@ static void _opt_default() opt.ckpt_interval_str = NULL; opt.ckpt_dir = slurm_get_checkpoint_dir(); - opt.nice = 0; + opt.nice = NO_VAL; opt.priority = 0; opt.test_only = false; diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index 0940c239a..9bd574c75 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -398,7 +398,7 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->task_dist = opt.distribution; desc->network = opt.network; - if (opt.nice) + if (opt.nice != NO_VAL) desc->nice = NICE_OFFSET + opt.nice; if (opt.priority) desc->priority = opt.priority; diff --git a/src/scancel/scancel.c b/src/scancel/scancel.c index a9c094528..11c42e994 100644 --- a/src/scancel/scancel.c +++ b/src/scancel/scancel.c @@ -64,15 +64,17 @@ #include "src/common/list.h" #include "src/common/log.h" #include "src/common/read_config.h" +#include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_defs.h" +#include "src/common/timers.h" #include "src/common/xstring.h" #include "src/common/xmalloc.h" #include "src/scancel/scancel.h" #define MAX_CANCEL_RETRY 10 -#define MAX_THREADS 2 - +#define MAX_THREADS 10 +static void _add_delay(void); static int _cancel_jobs (int filter_cnt); static void *_cancel_job_id (void *cancel_info); static void *_cancel_step_id (void *cancel_info); @@ -106,6 +108,9 @@ static pthread_attr_t attr; static int num_active_threads = 0; static pthread_mutex_t num_active_threads_lock; static pthread_cond_t num_active_threads_cond; +static pthread_mutex_t max_delay_lock; +static uint32_t max_resp_time = 0; +static int request_count = 0; int main (int argc, char *argv[]) @@ -650,6 +655,41 @@ static int _cancel_jobs(int filter_cnt) return rc; } +/* scancel can cancel huge numbers of job from a single command line using + * pthreads for parallelism. Add a delay if there are many RPCs and response + * delays get excessive to avoid causing a denial of service attack. */ +static void _add_delay(void) +{ + static int target_resp_time = -1; + static int delay_time = 10000, previous_delay = 0; + int my_delay; + + pthread_mutex_lock(&max_delay_lock); + if (target_resp_time < 0) { + target_resp_time = slurm_get_msg_timeout() / 4; + target_resp_time = MAX(target_resp_time, 3); + target_resp_time = MIN(target_resp_time, 5); + target_resp_time *= 1000000; + debug("%s: target response time = %d", __func__, + target_resp_time); + } + if ((++request_count < MAX_THREADS) || + (max_resp_time <= target_resp_time)) { + pthread_mutex_unlock(&max_delay_lock); + return; + } + + /* Maximum delay of 1 second. Start at 10 msec with Fibonacci backoff */ + my_delay = MIN((delay_time + previous_delay), 1000000); + previous_delay = delay_time; + delay_time = my_delay; + pthread_mutex_unlock(&max_delay_lock); + + info("%s: adding delay in RPC send of %d usec", __func__, my_delay); + usleep(my_delay); + return; +} + static void * _cancel_job_id (void *ci) { @@ -658,6 +698,7 @@ _cancel_job_id (void *ci) bool sig_set = true; uint16_t flags = 0; char *job_type = ""; + DEF_TIMERS; if (cancel_info->sig == (uint16_t) NO_VAL) { cancel_info->sig = SIGKILL; @@ -698,8 +739,15 @@ _cancel_job_id (void *ci) } for (i = 0; i < MAX_CANCEL_RETRY; i++) { + _add_delay(); + START_TIMER; error_code = slurm_kill_job2(cancel_info->job_id_str, cancel_info->sig, flags); + END_TIMER; + pthread_mutex_lock(&max_delay_lock); + max_resp_time = MAX(max_resp_time, DELTA_TIMER); + pthread_mutex_unlock(&max_delay_lock); + if ((error_code == 0) || (errno != ESLURM_TRANSITION_STATE_NO_UPDATE)) break; @@ -744,6 +792,7 @@ _cancel_step_id (void *ci) uint32_t job_id = cancel_info->job_id; uint32_t step_id = cancel_info->step_id; bool sig_set = true; + DEF_TIMERS; if (cancel_info->sig == (uint16_t) NO_VAL) { cancel_info->sig = SIGKILL; @@ -775,6 +824,8 @@ _cancel_step_id (void *ci) cancel_info->job_id_str, step_id); } + _add_delay(); + START_TIMER; if ((!sig_set) || opt.ctld) error_code = slurm_kill_job_step(job_id, step_id, cancel_info->sig); @@ -783,6 +834,11 @@ _cancel_step_id (void *ci) else error_code = slurm_signal_job_step(job_id, step_id, cancel_info->sig); + END_TIMER; + pthread_mutex_lock(&max_delay_lock); + max_resp_time = MAX(max_resp_time, DELTA_TIMER); + pthread_mutex_unlock(&max_delay_lock); + if ((error_code == 0) || ((errno != ESLURM_TRANSITION_STATE_NO_UPDATE) && (errno != ESLURM_JOB_PENDING))) diff --git a/src/scontrol/info_assoc_mgr.c b/src/scontrol/info_assoc_mgr.c index ac842e6e5..1dc33d4a4 100644 --- a/src/scontrol/info_assoc_mgr.c +++ b/src/scontrol/info_assoc_mgr.c @@ -37,9 +37,11 @@ \*****************************************************************************/ #include "scontrol.h" +#include "src/common/slurm_strcasestr.h" static uint32_t tres_cnt = 0; static char **tres_names = NULL; +static uint32_t req_flags = 0; static void _print_tres_line(const char *name, uint64_t *limits, uint64_t *used, uint64_t divider, bool last) @@ -82,7 +84,7 @@ endit: printf("%s", new_line_char); } -static void _print_assoc_mgr_info(const char *name, assoc_mgr_info_msg_t *msg) +static void _print_assoc_mgr_info(assoc_mgr_info_msg_t *msg) { ListIterator itr; slurmdb_user_rec_t *user_rec; @@ -98,7 +100,8 @@ static void _print_assoc_mgr_info(const char *name, assoc_mgr_info_msg_t *msg) tres_names = msg->tres_names; if (!msg->user_list || !list_count(msg->user_list)) { - printf("\nNo users currently cached in Slurm.\n\n"); + if (req_flags & ASSOC_MGR_INFO_FLAG_USERS) + printf("\nNo users currently cached in Slurm.\n\n"); } else { printf("\nUser Records\n\n"); @@ -116,7 +119,9 @@ static void _print_assoc_mgr_info(const char *name, assoc_mgr_info_msg_t *msg) } if (!msg->assoc_list || !list_count(msg->assoc_list)) { - printf("\nNo associations currently cached in Slurm.\n\n"); + if (req_flags & ASSOC_MGR_INFO_FLAG_ASSOC) + printf("\nNo associations currently " + "cached in Slurm.\n\n"); } else { printf("\nAssociation Records\n\n"); @@ -145,14 +150,19 @@ static void _print_assoc_mgr_info(const char *name, assoc_mgr_info_msg_t *msg) "%u/%.2f/%u/%.2f%s", assoc_rec->shares_raw, assoc_rec->usage->shares_norm, - assoc_rec->usage->level_shares, + (assoc_rec->usage->level_shares == NO_VAL) ? + 1 : assoc_rec->usage->level_shares, assoc_rec->usage->fs_factor, new_line_char); printf("UsageRaw/Norm/Efctv=%.2Lf/%.2Lf/%.2Lf%s", assoc_rec->usage->usage_raw, - assoc_rec->usage->usage_norm, - assoc_rec->usage->usage_efctv, + (assoc_rec->usage->usage_norm == + (long double)NO_VAL) ? + 1 : assoc_rec->usage->usage_norm, + (assoc_rec->usage->usage_efctv == + (long double)NO_VAL) ? + 1 : assoc_rec->usage->usage_efctv, new_line_char); if (assoc_rec->parent_acct) @@ -257,7 +267,8 @@ static void _print_assoc_mgr_info(const char *name, assoc_mgr_info_msg_t *msg) } if (!msg->qos_list || !list_count(msg->qos_list)) { - printf("\nNo QOS currently cached in Slurm.\n\n"); + if (req_flags & ASSOC_MGR_INFO_FLAG_QOS) + printf("\nNo QOS currently cached in Slurm.\n\n"); } else { printf("\nQOS Records\n\n"); @@ -374,40 +385,90 @@ static void _print_assoc_mgr_info(const char *name, assoc_mgr_info_msg_t *msg) * */ -extern void scontrol_print_assoc_mgr_info(const char *name) +extern void scontrol_print_assoc_mgr_info(int argc, char *argv[]) { - int cc; + char *tag = NULL, *val = NULL; + int cc, tag_len, i; assoc_mgr_info_request_msg_t req; assoc_mgr_info_msg_t *msg = NULL; - /* FIXME: add more filtering in the future */ memset(&req, 0, sizeof(assoc_mgr_info_request_msg_t)); - req.flags = ASSOC_MGR_INFO_FLAG_ASSOC | ASSOC_MGR_INFO_FLAG_USERS | - ASSOC_MGR_INFO_FLAG_QOS; - if (name) { - req.user_list = list_create(NULL); - list_append(req.user_list, (char *)name); + + for (i = 0; i < argc; ++i) { + tag = argv[i]; + tag_len = strlen(tag); + val = strchr(argv[i], '='); + if (val) { + tag_len = val - argv[i]; + val++; + } + + /* We free every list before creating it. This way we ensure + * we are just appending the last value if user repeats entity. + */ + if (!val || !val[0]) { + fprintf(stderr, "No value given for option %s\n", tag); + goto endit; + } else if (!strncasecmp(tag, "accounts", MAX(tag_len, 1))) { + if (!req.acct_list) + req.acct_list = list_create(slurm_destroy_char); + slurm_addto_char_list(req.acct_list, val); + } else if (!strncasecmp(tag, "flags", MAX(tag_len, 1))) { + if (slurm_strcasestr(val, "users")) + req.flags |= ASSOC_MGR_INFO_FLAG_USERS; + if (slurm_strcasestr(val, "assoc")) + req.flags |= ASSOC_MGR_INFO_FLAG_ASSOC; + if (slurm_strcasestr(val, "qos")) + req.flags |= ASSOC_MGR_INFO_FLAG_QOS; + + if (!req.flags) { + fprintf(stderr, "invalid flag '%s', " + "valid options are " + "'Assoc, QOS, and/or Users'\n", + val); + goto endit; + } + } else if (!strncasecmp(tag, "qos", MAX(tag_len, 1))) { + if (!req.qos_list) + req.qos_list = list_create(slurm_destroy_char); + slurm_addto_char_list(req.qos_list, val); + } else if (!strncasecmp(tag, "users", MAX(tag_len, 1))) { + if (!req.user_list) + req.user_list = list_create(slurm_destroy_char); + slurm_addto_char_list(req.user_list, val); + } else { + exit_code = 1; + if (quiet_flag != 1) + fprintf(stderr, "invalid entity: %s for keyword" + ":show assoc_mgr\n", tag); + goto endit; + } } + + if (!req.flags) + req.flags = ASSOC_MGR_INFO_FLAG_ASSOC | + ASSOC_MGR_INFO_FLAG_USERS | + ASSOC_MGR_INFO_FLAG_QOS; + + req_flags = req.flags; + /* call the controller to get the meat */ cc = slurm_load_assoc_mgr_info(&req, &msg); - FREE_NULL_LIST(req.user_list); - - if (cc != SLURM_PROTOCOL_SUCCESS) { + if (cc == SLURM_PROTOCOL_SUCCESS) { + /* print the info + */ + _print_assoc_mgr_info(msg); + } else { /* Hosed, crap out. */ exit_code = 1; if (quiet_flag != 1) slurm_perror("slurm_load_assoc_mgr_info error"); - return; } - /* print the info - */ - _print_assoc_mgr_info(name, msg); - - /* free at last - */ slurm_free_assoc_mgr_info_msg(msg); +endit: + slurm_free_assoc_mgr_info_request_members(&req); return; } diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index b8b0eef6c..3fce0ce46 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -1475,10 +1475,11 @@ _show_it (int argc, char *argv[]) return; } - if (strncasecmp (argv[1], "layouts", MAX(tag_len, 2)) != 0) + if (strncasecmp (argv[1], "layouts", MAX(tag_len, 2)) == 0 || + strncasecmp (argv[1], "assoc_mgr", MAX(tag_len, 2)) == 0) allow_opt = true; - if (argc > 3 && allow_opt) { + if (argc > 3 && !allow_opt) { exit_code = 1; if (quiet_flag != 1) fprintf(stderr, @@ -1510,7 +1511,7 @@ _show_it (int argc, char *argv[]) scontrol_print_burst_buffer (); } else if (!strncasecmp(tag, "assoc_mgr", MAX(tag_len, 2)) || !strncasecmp(tag, "cache", MAX(tag_len, 2))) { - scontrol_print_assoc_mgr_info(val); + scontrol_print_assoc_mgr_info(argc - 2, argv + 2); } else if (strncasecmp (tag, "config", MAX(tag_len, 1)) == 0) { _print_config (val); } else if (strncasecmp (tag, "daemons", MAX(tag_len, 1)) == 0) { diff --git a/src/scontrol/scontrol.h b/src/scontrol/scontrol.h index 7f14e88e0..3349eaac2 100644 --- a/src/scontrol/scontrol.h +++ b/src/scontrol/scontrol.h @@ -137,7 +137,7 @@ extern int scontrol_load_partitions (partition_info_msg_t ** part_info_pptr); extern int scontrol_load_block (block_info_msg_t **block_info_pptr); extern void scontrol_pid_info(pid_t job_pid); -extern void scontrol_print_assoc_mgr_info(const char *name); +extern void scontrol_print_assoc_mgr_info(int argc, char *argv[]); extern void scontrol_print_burst_buffer(void); extern void scontrol_print_completing (void); extern void scontrol_print_completing_job(job_info_t *job_ptr, diff --git a/src/slurmctld/acct_policy.c b/src/slurmctld/acct_policy.c index 8313858fb..76eb71c42 100644 --- a/src/slurmctld/acct_policy.c +++ b/src/slurmctld/acct_policy.c @@ -608,8 +608,10 @@ static void _set_time_limit(uint32_t *time_limit, uint32_t part_max_time, else (*time_limit) = MIN(limit_max_time, part_max_time); - (*limit_set_time) = 1; - } else if ((*limit_set_time) && ((*time_limit) > limit_max_time)) + if (limit_set_time) + (*limit_set_time) = 1; + } else if (limit_set_time && (*limit_set_time) && + ((*time_limit) > limit_max_time)) (*time_limit) = limit_max_time; } @@ -2524,6 +2526,7 @@ extern bool acct_policy_job_runnable_post_select( uint64_t tres_usage_mins[slurmctld_tres_cnt]; uint64_t tres_run_mins[slurmctld_tres_cnt]; uint64_t job_tres_time_limit[slurmctld_tres_cnt]; + uint32_t time_limit; bool rc = true; bool safe_limits = false; int i, tres_pos; @@ -2534,6 +2537,7 @@ extern bool acct_policy_job_runnable_post_select( READ_LOCK, NO_LOCK, NO_LOCK }; xassert(job_ptr); + xassert(job_ptr->part_ptr); xassert(tres_req_cnt); /* check to see if we are enforcing associations */ @@ -2566,10 +2570,16 @@ extern bool acct_policy_job_runnable_post_select( memset(tres_run_mins, 0, sizeof(tres_run_mins)); memset(tres_usage_mins, 0, sizeof(tres_usage_mins)); memset(job_tres_time_limit, 0, sizeof(job_tres_time_limit)); - for (i=0; i<slurmctld_tres_cnt; i++) { - job_tres_time_limit[i] = (uint64_t)job_ptr->time_limit * - tres_req_cnt[i]; - } + + /* time_limit may be NO_VAL if the partition does not have + * a DefaultTime, in which case the partition max_time should + * be used instead */ + time_limit = job_ptr->time_limit; + _set_time_limit(&time_limit, job_ptr->part_ptr->max_time, + job_ptr->part_ptr->default_time, NULL); + + for (i=0; i<slurmctld_tres_cnt; i++) + job_tres_time_limit[i] = (uint64_t)time_limit * tres_req_cnt[i]; slurmdb_init_qos_rec(&qos_rec, 0, INFINITE); diff --git a/src/slurmctld/backup.c b/src/slurmctld/backup.c index eb4cb1646..1db689af5 100644 --- a/src/slurmctld/backup.c +++ b/src/slurmctld/backup.c @@ -380,7 +380,6 @@ static void *_background_rpc_mgr(void *no_data) (slurmctld_config.shutdown_time == 0)) slurmctld_config.shutdown_time = time(NULL); - slurm_free_msg_data(msg->msg_type, msg->data); slurm_free_msg(msg); slurm_close(newsockfd); /* close new socket */ diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 3f1d1713e..6ceebf0bc 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -128,12 +128,6 @@ * check-in before we ping them */ #define SHUTDOWN_WAIT 2 /* Time to wait for backup server shutdown */ -#if (0) -/* If defined and FastSchedule=0 in slurm.conf, then report the CPU count that a - * node registers with rather than the CPU count defined for the node in slurm.conf */ -#define SLURM_NODE_ACCT_REGISTER 1 -#endif - /**************************************************************************\ * To test for memory leaks, set MEMORY_LEAK_DEBUG to 1 using * "configure --enable-memory-leak-debug" then execute @@ -239,6 +233,7 @@ static void * _slurmctld_rpc_mgr(void *no_data); static void * _slurmctld_signal_hand(void *no_data); static void _test_thread_limit(void); inline static void _update_cred_key(void); +static void _verify_clustername(void); static void _update_nice(void); inline static void _usage(char *prog_name); static bool _valid_controller(void); @@ -277,6 +272,11 @@ int main(int argc, char *argv[]) slurm_conf_reinit(slurm_conf_filename); update_logging(); + + /* verify clustername from conf matches value in spool dir + * exit if inconsistent to protect state files from corruption */ + _verify_clustername(); + _update_nice(); _kill_old_slurmctld(); @@ -1175,12 +1175,12 @@ static int _accounting_cluster_ready(void) time_t event_time = time(NULL); bitstr_t *total_node_bitmap = NULL; char *cluster_nodes = NULL, *cluster_tres_str; - slurmctld_lock_t node_read_lock = { - NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; + slurmctld_lock_t node_write_lock = { + NO_LOCK, NO_LOCK, WRITE_LOCK, WRITE_LOCK }; assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; - lock_slurmctld(node_read_lock); + lock_slurmctld(node_write_lock); /* Now get the names of all the nodes on the cluster at this time and send it also. */ @@ -1197,7 +1197,7 @@ static int _accounting_cluster_ready(void) assoc_mgr_tres_list, TRES_STR_FLAG_SIMPLE); assoc_mgr_unlock(&locks); - unlock_slurmctld(node_read_lock); + unlock_slurmctld(node_write_lock); rc = clusteracct_storage_g_cluster_tres(acct_db_conn, cluster_nodes, @@ -1903,7 +1903,9 @@ static void *_slurmctld_background(void *no_data) if (difftime(now, last_node_acct) >= PERIODIC_NODE_ACCT) { /* Report current node state to account for added - * or reconfigured nodes */ + * or reconfigured nodes. Locks are done + * inside _accounting_cluster_ready, don't + * lock here. */ now = time(NULL); last_node_acct = now; _accounting_cluster_ready(); @@ -2148,7 +2150,6 @@ extern void set_cluster_tres(bool assoc_mgr_locked) if (node_ptr->name == '\0') continue; -#ifdef SLURM_NODE_ACCT_REGISTER if (slurmctld_conf.fast_schedule) { cpu_count += node_ptr->config_ptr->cpus; mem_count += node_ptr->config_ptr->real_memory; @@ -2156,11 +2157,7 @@ extern void set_cluster_tres(bool assoc_mgr_locked) cpu_count += node_ptr->cpus; mem_count += node_ptr->real_memory; } -#else - cpu_count += node_ptr->config_ptr->cpus; - mem_count += node_ptr->config_ptr->real_memory; -#endif cluster_cpus += cpu_count; if (mem_tres) mem_tres->count += mem_count; @@ -2531,6 +2528,52 @@ static void _update_nice(void) error("Unable to reset nice value to %d: %m", new_nice); } +/* Verify that ClusterName from slurm.conf matches the state directory. + * If mismatched exit to protect state files from corruption. + * If the clustername file does not exist, create it. */ +static void _verify_clustername(void) +{ + FILE *fp; + char *filename = NULL; + char name[512]; + xstrfmtcat(filename, "%s/clustername", + slurmctld_conf.state_save_location); + + if ((fp = fopen(filename, "r"))) { + /* read value and compare */ + fgets(name, sizeof(name), fp); + if (xstrcmp(name, slurmctld_conf.cluster_name)) { + fatal("CLUSTER NAME MISMATCH.\n" + "slurmctld has been started with \"" + "ClusterName=%s\", but read \"%s\" from " + "the state files in StateSaveLocation.\n" + "Running multiple clusters from a shared " + "StateSaveLocation WILL CAUSE CORRUPTION.\n" + "Remove %s to override this safety check if " + "this is intentional (e.g., the ClusterName " + "has changed).", name, + slurmctld_conf.cluster_name, filename); + exit(1); + } + } else if (slurmctld_conf.cluster_name) { + debug("creating clustername file: %s", filename); + if (!(fp = fopen(filename, "w"))) { + fatal("%s: failed to create file %s", + __FUNCTION__, filename); + exit(1); + } + + if (fputs(slurmctld_conf.cluster_name, fp) < 0) { + fatal("%s: failed to write to file %s", + __FUNCTION__, filename); + exit(1); + } + } + + xfree(filename); + fclose(fp); +} + /* Kill the currently running slurmctld * NOTE: No need to lock the config data since we are still single-threaded */ static void _kill_old_slurmctld(void) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index d592d3262..6689f1913 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -446,8 +446,8 @@ static struct job_record *_create_job_record(int *error_code, uint32_t num_jobs) struct job_details *detail_ptr; if ((job_count + num_jobs) >= slurmctld_conf.max_job_cnt) { - error("_create_job_record: MaxJobCount reached (%u)", - slurmctld_conf.max_job_cnt); + error("%s: MaxJobCount limit from slurm.conf reached (%u)", + __func__, slurmctld_conf.max_job_cnt); } job_count += num_jobs; @@ -4012,17 +4012,14 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, struct job_record *job_ptr; time_t now = time(NULL); - if (job_specs->array_bitmap) { + if (job_specs->array_bitmap) i = bit_set_count(job_specs->array_bitmap); - if ((job_count + i) >= slurmctld_conf.max_job_cnt) { - info("%s: MaxJobCount limit reached (%d + %d >= %u)", - __func__, job_count, i, - slurmctld_conf.max_job_cnt); - return EAGAIN; - } - } else if (job_count >= slurmctld_conf.max_job_cnt) { - info("%s: MaxJobCount limit reached (%u)", - __func__, slurmctld_conf.max_job_cnt); + else + i = 1; + + if ((job_count + i) >= slurmctld_conf.max_job_cnt) { + error("%s: MaxJobCount limit from slurm.conf reached (%u)", + __func__, slurmctld_conf.max_job_cnt); return EAGAIN; } @@ -4756,7 +4753,6 @@ extern int prolog_complete(uint32_t job_id, { struct job_record *job_ptr; - debug("completing prolog for job %u", job_id); job_ptr = find_job_record(job_id); if (job_ptr == NULL) { info("prolog_complete: invalid JobId=%u", job_id); @@ -10446,28 +10442,6 @@ static int _update_job(struct job_record *job_ptr, job_desc_msg_t * job_specs, info("update_job: setting num_tasks to %u for " "job_id %u", job_specs->num_tasks, job_ptr->job_id); - if (detail_ptr->cpus_per_task) { - uint32_t new_cpus = detail_ptr->num_tasks - / detail_ptr->cpus_per_task; - if ((new_cpus < detail_ptr->min_cpus) || - (!detail_ptr->overcommit && - (new_cpus > detail_ptr->min_cpus))) { - detail_ptr->min_cpus = new_cpus; - detail_ptr->max_cpus = new_cpus; - info("update_job: setting " - "min_cpus to %u for " - "job_id %u", detail_ptr->min_cpus, - job_ptr->job_id); - /* Always use the - * acct_policy_limit_set.* - * since if set by a - * super user it be set correctly */ - job_ptr->limit_set. - tres[TRES_ARRAY_CPU] = - acct_policy_limit_set. - tres[TRES_ARRAY_CPU]; - } - } } } if (error_code != SLURM_SUCCESS) diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 3c1cfeea1..b7b12c96d 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -998,6 +998,7 @@ static int _schedule(uint32_t job_limit) static time_t sched_update = 0; static bool wiki_sched = false; static bool fifo_sched = false; + static bool assoc_limit_continue = false; static int sched_timeout = 0; static int sched_max_job_start = 0; static int bf_min_age_reserve = 0; @@ -1054,6 +1055,11 @@ static int _schedule(uint32_t job_limit) sched_params = slurm_get_sched_params(); + if (sched_params && + (strstr(sched_params, "assoc_limit_continue"))) + assoc_limit_continue = true; + else + assoc_limit_continue = false; if (sched_params && (tmp_ptr=strstr(sched_params, "batch_sched_delay="))) @@ -1660,7 +1666,9 @@ next_task: } else if (error_code == ESLURM_ACCOUNTING_POLICY) { debug3("sched: JobId=%u delayed for accounting policy", job_ptr->job_id); - fail_by_part = true; + /* potentially stall the queue */ + if (!assoc_limit_continue) + fail_by_part = true; } else if ((error_code != ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE) && (error_code != ESLURM_NODE_NOT_AVAIL) && @@ -3317,8 +3325,9 @@ static void *_wait_boot(void *arg) unlock_slurmctld(job_write_lock); } while (wait_node_cnt); - if (job_ptr->details) - job_ptr->details->prolog_running--; + lock_slurmctld(job_write_lock); + prolog_running_decr(job_ptr); + unlock_slurmctld(job_write_lock); return NULL; } diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index eae435ebb..d7e386cff 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -557,6 +557,27 @@ extern int load_all_node_state ( bool state_only ) node_ptr->reason_time = reason_time; node_ptr->reason_uid = reason_uid; } + if (!slurmctld_conf.fast_schedule) { + /* Accounting will need to know the + * last state here otherwise we will + * report incorrect information + * waiting for the node to register. */ + node_ptr->cpus = cpus; + node_ptr->boards = boards; + node_ptr->sockets = sockets; + node_ptr->cores = cores; + node_ptr->core_spec_cnt = + core_spec_cnt; + xfree(node_ptr->cpu_spec_list); + node_ptr->cpu_spec_list = + cpu_spec_list; + cpu_spec_list = NULL; /* Nothing to free */ + node_ptr->threads = threads; + node_ptr->real_memory = real_memory; + node_ptr->mem_spec_limit = + mem_spec_limit; + node_ptr->tmp_disk = tmp_disk; + } node_ptr->gres_list = gres_list; gres_list = NULL; /* Nothing to free */ } else { @@ -603,6 +624,7 @@ extern int load_all_node_state ( bool state_only ) node_ptr->part_cnt = 0; xfree(node_ptr->part_pptr); node_ptr->cpus = cpus; + node_ptr->boards = boards; node_ptr->sockets = sockets; node_ptr->cores = cores; node_ptr->core_spec_cnt = core_spec_cnt; diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 648f5aa69..dbf9fade9 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -135,7 +135,9 @@ static void _calc_part_tres(struct part_record *part_ptr) } /* - * Calcuate and populate the number of tres' for all partitions. + * Calcuate and populate the number of tres' for all + * partitions. Partition write and Node read lock should be set before + * calling this. */ extern void set_partition_tres() { diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c index 196a9dbb0..0a1909d1b 100644 --- a/src/slurmctld/reservation.c +++ b/src/slurmctld/reservation.c @@ -155,7 +155,7 @@ static uint32_t _get_job_duration(struct job_record *job_ptr); static bool _is_account_valid(char *account); static bool _is_resv_used(slurmctld_resv_t *resv_ptr); static bool _job_overlap(time_t start_time, uint32_t flags, - bitstr_t *node_bitmap); + bitstr_t *node_bitmap, char *resv_name); static List _list_dup(List license_list); static int _open_resv_state_file(char **state_file); static void _pack_resv(slurmctld_resv_t *resv_ptr, Buf buffer, @@ -1581,10 +1581,12 @@ unpack_error: /* * Test if a new/updated reservation request will overlap running jobs + * Ignore jobs already running in that specific reservation + * resv_name IN - Name of existing reservation or NULL * RET true if overlap */ static bool _job_overlap(time_t start_time, uint32_t flags, - bitstr_t *node_bitmap) + bitstr_t *node_bitmap, char *resv_name) { ListIterator job_iterator; struct job_record *job_ptr; @@ -1600,7 +1602,9 @@ static bool _job_overlap(time_t start_time, uint32_t flags, while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if (IS_JOB_RUNNING(job_ptr) && (job_ptr->end_time > start_time) && - (bit_overlap(job_ptr->node_bitmap, node_bitmap) > 0)) { + (bit_overlap(job_ptr->node_bitmap, node_bitmap) > 0) && + ((resv_name == NULL) || + (xstrcmp(resv_name, job_ptr->resv_name) != 0))) { overlap = true; break; } @@ -2092,7 +2096,7 @@ extern int create_resv(resv_desc_msg_t *resv_desc_ptr) if (!(resv_desc_ptr->flags & RESERVE_FLAG_IGN_JOBS) && !resv_desc_ptr->core_cnt && _job_overlap(resv_desc_ptr->start_time, - resv_desc_ptr->flags, node_bitmap)) { + resv_desc_ptr->flags, node_bitmap, NULL)) { info("Reservation request overlaps jobs"); rc = ESLURM_NODES_BUSY; goto bad_parse; @@ -2614,7 +2618,7 @@ extern int update_resv(resv_desc_msg_t *resv_desc_ptr) goto update_failure; } if (_job_overlap(resv_ptr->start_time, resv_ptr->flags, - resv_ptr->node_bitmap)) { + resv_ptr->node_bitmap, resv_desc_ptr->name)) { info("Reservation %s request overlaps jobs", resv_desc_ptr->name); error_code = ESLURM_NODES_BUSY; @@ -2982,7 +2986,7 @@ static void _rebuild_core_bitmap(slurmctld_resv_t *resv_ptr) ListIterator job_iterator; struct job_record *job_ptr; - info("Core_bitmap for reservation %s no longer valid, cores addded or removed, rebuilding", + info("Core_bitmap for reservation %s no longer valid, cores added or removed, rebuilding", resv_ptr->name); core_cnt = bit_set_count(resv_ptr->core_bitmap); /* Cores needed */ @@ -3265,10 +3269,15 @@ static void _resv_node_replace(slurmctld_resv_t *resv_ptr) resv_desc.start_time = resv_ptr->start_time; resv_desc.end_time = resv_ptr->end_time; resv_desc.features = resv_ptr->features; + if (!resv_ptr->full_nodes) { + resv_desc.core_cnt = xmalloc(sizeof(uint32_t) * 2); + resv_desc.core_cnt[0] = resv_ptr->core_cnt; + } resv_desc.node_cnt = xmalloc(sizeof(uint32_t) * 2); resv_desc.node_cnt[0] = add_nodes; i = _select_nodes(&resv_desc, &resv_ptr->part_ptr, &new_bitmap, &core_bitmap); + xfree(resv_desc.core_cnt); xfree(resv_desc.node_cnt); xfree(resv_desc.node_list); xfree(resv_desc.partition); @@ -3324,6 +3333,7 @@ static void _validate_node_choice(slurmctld_resv_t *resv_ptr) resv_desc_msg_t resv_desc; if ((resv_ptr->node_bitmap == NULL) || + (!resv_ptr->full_nodes && (resv_ptr->node_cnt > 1)) || (resv_ptr->flags & RESERVE_FLAG_SPEC_NODES) || (resv_ptr->flags & RESERVE_FLAG_STATIC)) return; @@ -3345,10 +3355,15 @@ static void _validate_node_choice(slurmctld_resv_t *resv_ptr) resv_desc.start_time = resv_ptr->start_time; resv_desc.end_time = resv_ptr->end_time; resv_desc.features = resv_ptr->features; + if (!resv_ptr->full_nodes) { + resv_desc.core_cnt = xmalloc(sizeof(uint32_t) * 2); + resv_desc.core_cnt[0] = resv_ptr->core_cnt; + } resv_desc.node_cnt = xmalloc(sizeof(uint32_t) * 2); resv_desc.node_cnt[0]= resv_ptr->node_cnt - i; i = _select_nodes(&resv_desc, &resv_ptr->part_ptr, &tmp_bitmap, &core_bitmap); + xfree(resv_desc.core_cnt); xfree(resv_desc.node_cnt); xfree(resv_desc.node_list); xfree(resv_desc.partition); @@ -3681,8 +3696,8 @@ static int _select_nodes(resv_desc_msg_t *resv_desc_ptr, (end_relative <= resv_desc_ptr->start_time)) continue; if (!resv_ptr->core_bitmap && !resv_ptr->full_nodes) { - error("Reservation has no core_bitmap and " - "full_nodes is zero"); + error("Reservation %s has no core_bitmap and " + "full_nodes is zero", resv_ptr->name); resv_ptr->full_nodes = 1; } if (resv_ptr->full_nodes || !resv_desc_ptr->core_cnt) { @@ -4242,6 +4257,7 @@ extern void job_claim_resv(struct job_record *job_ptr) resv_ptr = (slurmctld_resv_t *) list_find_first (resv_list, _find_resv_name, job_ptr->resv_name); if (!resv_ptr || + (!resv_ptr->full_nodes && (resv_ptr->node_cnt > 1)) || !(resv_ptr->flags & RESERVE_FLAG_REPLACE) || (resv_ptr->flags & RESERVE_FLAG_SPEC_NODES) || (resv_ptr->flags & RESERVE_FLAG_STATIC)) diff --git a/src/slurmctld/state_save.c b/src/slurmctld/state_save.c index 07cd0a396..20832ac41 100644 --- a/src/slurmctld/state_save.c +++ b/src/slurmctld/state_save.c @@ -174,11 +174,9 @@ extern void *slurmctld_state_save(void *no_data) double save_delay; bool run_save; int save_count; - int cc; #if HAVE_SYS_PRCTL_H - cc = prctl(PR_SET_NAME, "slurmctld_sstate", NULL, NULL, NULL); - if (cc < 0) { + if (prctl(PR_SET_NAME, "slurmctld_sstate", NULL, NULL, NULL) < 0) { error("%s: cannot set my name to %s %m", __func__, "slurmctld_sstate"); } diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index e6a8d6d0a..309c91d85 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -50,7 +50,7 @@ #include <stdlib.h> #include <string.h> #include <sys/param.h> -#include <sys/poll.h> +#include <poll.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/un.h> diff --git a/src/slurmd/slurmstepd/io.c b/src/slurmd/slurmstepd/io.c index 5f73eec4b..266d2addf 100644 --- a/src/slurmd/slurmstepd/io.c +++ b/src/slurmd/slurmstepd/io.c @@ -64,7 +64,7 @@ # include <utmp.h> #endif -#include <sys/poll.h> +#include <poll.h> #include <sys/types.h> #include <sys/socket.h> #include <sys/stat.h> diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index b7b5f4624..342a31149 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -62,7 +62,7 @@ #include <sys/wait.h> #include <sys/stat.h> #include <sys/param.h> -#include <sys/poll.h> +#include <poll.h> #include <unistd.h> #include <pwd.h> #include <grp.h> diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c index 83c074765..4dfae0975 100644 --- a/src/slurmd/slurmstepd/req.c +++ b/src/slurmd/slurmstepd/req.c @@ -1227,7 +1227,7 @@ static void _block_on_pid(pid_t pid) sleep(1); } -/* Wait for the pid given and when it ends get and childern it might +/* Wait for the pid given and when it ends get and children it might * of left behind and wait on them instead. */ static void *_wait_extern_pid(void *args) @@ -1259,7 +1259,7 @@ static void *_wait_extern_pid(void *args) } acct_gather_profile_g_task_end(pid); - /* See if we have any childern of init left and add them to track. */ + /* See if we have any children of init left and add them to track. */ proctrack_g_get_pids(job->cont_id, &pids, &npids); for (i = 0; i < npids; i++) { snprintf(proc_stat_file, 256, "/proc/%d/stat", pids[i]); diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c index 5f497aeeb..eae10d09e 100644 --- a/src/slurmd/slurmstepd/task.c +++ b/src/slurmd/slurmstepd/task.c @@ -325,7 +325,8 @@ _build_path(char* fname, char **prog_env) dir = strtok(path_env, ":"); while (dir) { snprintf(file_path, len, "%s/%s", dir, file_name); - if (stat(file_path, &stat_buf) == 0) + if ((stat(file_path, &stat_buf) == 0) + && (! S_ISDIR(stat_buf.st_mode))) break; dir = strtok(NULL, ":"); } diff --git a/src/slurmdbd/backup.c b/src/slurmdbd/backup.c index ff105dae0..ab6776a32 100644 --- a/src/slurmdbd/backup.c +++ b/src/slurmdbd/backup.c @@ -36,7 +36,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ -#include <sys/poll.h> +#include <poll.h> #include "src/common/xmalloc.h" #include "src/common/slurm_protocol_defs.h" diff --git a/src/slurmdbd/rpc_mgr.c b/src/slurmdbd/rpc_mgr.c index 107548605..f449c063c 100644 --- a/src/slurmdbd/rpc_mgr.c +++ b/src/slurmdbd/rpc_mgr.c @@ -43,7 +43,7 @@ #include <arpa/inet.h> #include <pthread.h> #include <signal.h> -#include <sys/poll.h> +#include <poll.h> #include <sys/time.h> #include <sys/types.h> #include <sys/socket.h> diff --git a/src/sreport/cluster_reports.c b/src/sreport/cluster_reports.c index ec2280722..c816b19d1 100644 --- a/src/sreport/cluster_reports.c +++ b/src/sreport/cluster_reports.c @@ -60,16 +60,6 @@ enum { PRINT_CLUSTER_TRES_NAME, }; -typedef enum { - GROUP_BY_ACCOUNT, - GROUP_BY_ACCOUNT_JOB_SIZE, - GROUP_BY_ACCOUNT_JOB_SIZE_DURATION, - GROUP_BY_USER, - GROUP_BY_USER_JOB_SIZE, - GROUP_BY_USER_JOB_SIZE_DURATION, - GROUP_BY_NONE -} report_grouping_t; - static List print_fields_list = NULL; /* types are of print_field_t */ @@ -132,7 +122,8 @@ static int _set_wckey_cond(int *start, int argc, char *argv[], set = 1; } else if (!strncasecmp (argv[i], "End", MAX(command_len, 1))) { wckey_cond->usage_end = parse_time(argv[i]+end, 1); - wckey_cond->usage_end = sanity_check_endtime(wckey_cond->usage_end); + wckey_cond->usage_end = sanity_check_endtime( + wckey_cond->usage_end); set = 1; } else if (!strncasecmp (argv[i], "Format", MAX(command_len, 1))) { @@ -550,6 +541,32 @@ static int _setup_print_fields_list(List format_list) return SLURM_SUCCESS; } +static void _set_usage_column_width(List print_fields_list, + List slurmdb_report_cluster_list) +{ + print_field_t *field, *usage_field = NULL, *energy_field = NULL; + ListIterator itr; + + xassert(print_fields_list); + xassert(slurmdb_report_cluster_list); + + itr = list_iterator_create(print_fields_list); + while ((field = list_next(itr))) { + switch (field->type) { + case PRINT_CLUSTER_AMOUNT_USED: + usage_field = field; + break; + case PRINT_CLUSTER_ENERGY: + energy_field = field; + break; + } + } + list_iterator_destroy(itr); + + sreport_set_usage_column_width(usage_field, energy_field, + slurmdb_report_cluster_list); +} + static List _get_cluster_list(int argc, char *argv[], uint32_t *total_time, char *report_name, List format_list) { @@ -732,7 +749,7 @@ extern int cluster_account_by_user(int argc, char *argv[]) xmalloc(sizeof(slurmdb_assoc_cond_t)); slurmdb_cluster_cond_t cluster_cond; ListIterator itr = NULL; - ListIterator itr2 = NULL; + ListIterator tres_itr = NULL; ListIterator cluster_itr = NULL; List format_list = list_create(slurm_destroy_char); List slurmdb_report_cluster_list = NULL; @@ -794,10 +811,13 @@ extern int cluster_account_by_user(int argc, char *argv[]) "----------------------------------------\n"); } + _set_usage_column_width(print_fields_list, slurmdb_report_cluster_list); + print_fields_header(print_fields_list); list_sort(slurmdb_report_cluster_list, (ListCmpF)sort_cluster_dec); + tres_itr = list_iterator_create(tres_list); cluster_itr = list_iterator_create(slurmdb_report_cluster_list); while ((slurmdb_report_cluster = list_next(cluster_itr))) { //list_sort(slurmdb_report_cluster->assoc_list, @@ -811,8 +831,8 @@ extern int cluster_account_by_user(int argc, char *argv[]) itr = list_iterator_create(slurmdb_report_cluster->assoc_list); while ((slurmdb_report_assoc = list_next(itr))) { slurmdb_tres_rec_t *tres; - itr2 = list_iterator_create(tres_list); - while ((tres = list_next(itr2))) { + list_iterator_reset(tres_itr); + while ((tres = list_next(tres_itr))) { if (tres->id == NO_VAL) continue; _cluster_account_by_user_tres_report( @@ -821,11 +841,11 @@ extern int cluster_account_by_user(int argc, char *argv[]) slurmdb_report_assoc, tree_list); } - list_iterator_destroy(itr2); } list_iterator_destroy(itr); } list_iterator_destroy(cluster_itr); + list_iterator_destroy(tres_itr); end_it: slurmdb_destroy_assoc_cond(assoc_cond); @@ -998,12 +1018,12 @@ extern int cluster_user_by_account(int argc, char *argv[]) "----------------------------------------\n"); } + _set_usage_column_width(print_fields_list, slurmdb_report_cluster_list); + print_fields_header(print_fields_list); cluster_itr = list_iterator_create(slurmdb_report_cluster_list); while ((slurmdb_report_cluster = list_next(cluster_itr))) { - list_sort(slurmdb_report_cluster->user_list, - (ListCmpF)sort_user_dec); itr = list_iterator_create(slurmdb_report_cluster->user_list); while ((slurmdb_report_user = list_next(itr))) { slurmdb_tres_rec_t *tres; @@ -1191,12 +1211,12 @@ extern int cluster_user_by_wckey(int argc, char *argv[]) "----------------------------------------\n"); } + _set_usage_column_width(print_fields_list, slurmdb_report_cluster_list); + print_fields_header(print_fields_list); cluster_itr = list_iterator_create(slurmdb_report_cluster_list); while ((slurmdb_report_cluster = list_next(cluster_itr))) { - list_sort(slurmdb_report_cluster->user_list, - (ListCmpF)sort_user_dec); itr = list_iterator_create(slurmdb_report_cluster->user_list); while ((slurmdb_report_user = list_next(itr))) { slurmdb_tres_rec_t *tres; @@ -1221,9 +1241,12 @@ end_it: return rc; } +/* Note the accounting_list in the cluster variable must already be + * processed/summed before calling this function. + */ static void _cluster_util_tres_report(slurmdb_tres_rec_t *tres, - slurmdb_cluster_rec_t *cluster, - uint32_t total_time, List total_tres_acct) + slurmdb_cluster_rec_t *cluster, + uint32_t total_time) { slurmdb_cluster_accounting_rec_t *total_acct; slurmdb_cluster_accounting_rec_t *total_energy; @@ -1237,7 +1260,7 @@ static void _cluster_util_tres_report(slurmdb_tres_rec_t *tres, uint64_t energy_cnt = 0; if (!(total_acct = list_find_first( - total_tres_acct, + cluster->accounting_list, slurmdb_find_cluster_accting_tres_in_list, &tres->id))) { debug2("error, no %s%s%s(%d) TRES!", @@ -1248,9 +1271,7 @@ static void _cluster_util_tres_report(slurmdb_tres_rec_t *tres, return; } - total_reported = total_acct->alloc_secs + total_acct->down_secs - + total_acct->pdown_secs + total_acct->idle_secs - + total_acct->resv_secs; + total_reported = total_acct->tres_rec.alloc_secs; /* ENERGY could be 0 if there is no power cap set, so just say * we reported the whole thing in that case. @@ -1313,9 +1334,10 @@ static void _cluster_util_tres_report(slurmdb_tres_rec_t *tres, /* For backward compatibility with pre-TRES logic, * get energy_cnt here */ tres_energy = TRES_ENERGY; - if ((total_energy = list_find_first(total_tres_acct, - slurmdb_find_cluster_accting_tres_in_list, - &tres_energy))) + if ((total_energy = list_find_first( + cluster->accounting_list, + slurmdb_find_cluster_accting_tres_in_list, + &tres_energy))) energy_cnt = total_energy->tres_rec.count; field->print_routine(field, energy_cnt, energy_cnt, (curr_inx == field_count)); @@ -1351,10 +1373,12 @@ extern int cluster_utilization(int argc, char *argv[]) uint32_t total_time = 0; List cluster_list = NULL; List format_list = list_create(slurm_destroy_char); + slurmdb_cluster_accounting_rec_t total_acct; + print_field_t *field; + slurmdb_tres_rec_t *tres; print_fields_list = list_create(destroy_print_field); - if (!(cluster_list = _get_cluster_list(argc, argv, &total_time, "Cluster Utilization", format_list))) @@ -1373,12 +1397,10 @@ extern int cluster_utilization(int argc, char *argv[]) _setup_print_fields_list(format_list); FREE_NULL_LIST(format_list); - print_fields_header(print_fields_list); - + memset(&total_acct, 0, sizeof(slurmdb_cluster_accounting_rec_t)); itr = list_iterator_create(cluster_list); while ((cluster = list_next(itr))) { slurmdb_cluster_accounting_rec_t *accting = NULL; - slurmdb_tres_rec_t *tres; List total_tres_acct = NULL; if (!cluster->accounting_list @@ -1392,19 +1414,109 @@ extern int cluster_utilization(int argc, char *argv[]) } list_iterator_destroy(itr3); - itr3 = list_iterator_create(total_tres_acct); - while ((accting = list_next(itr3))) { + /* Swap out the accounting list for the total tres + * acct list. This way we can figure out what the + * largest number is before we have to print the + * columns. + */ + FREE_NULL_LIST(cluster->accounting_list); + cluster->accounting_list = total_tres_acct; + total_tres_acct = NULL; + + itr2 = list_iterator_create(tres_list); + while ((tres = list_next(itr2))) { + if (tres->id == NO_VAL) + continue; + + if (!(accting = list_find_first( + cluster->accounting_list, + slurmdb_find_cluster_accting_tres_in_list, + &tres->id))) { + continue; + } + accting->tres_rec.count /= accting->tres_rec.rec_count; + + total_acct.alloc_secs = MAX(total_acct.alloc_secs, + accting->alloc_secs); + total_acct.down_secs = MAX(total_acct.down_secs, + accting->down_secs); + total_acct.idle_secs = MAX(total_acct.idle_secs, + accting->idle_secs); + total_acct.resv_secs = MAX(total_acct.resv_secs, + accting->resv_secs); + total_acct.over_secs = MAX(total_acct.over_secs, + accting->over_secs); + total_acct.pdown_secs = MAX(total_acct.pdown_secs, + accting->pdown_secs); + + accting->tres_rec.alloc_secs = + accting->alloc_secs + + accting->down_secs + + accting->pdown_secs + + accting->idle_secs + + accting->resv_secs; + + total_acct.tres_rec.alloc_secs = MAX( + total_acct.tres_rec.alloc_secs, + accting->tres_rec.alloc_secs); } list_iterator_destroy(itr3); + } + + itr = list_iterator_create(print_fields_list); + while ((field = list_next(itr))) { + switch (field->type) { + case PRINT_CLUSTER_TRES_ALLOC: + sreport_set_usage_col_width( + field, total_acct.alloc_secs); + break; + case PRINT_CLUSTER_TRES_DOWN: + sreport_set_usage_col_width( + field, total_acct.down_secs); + break; + case PRINT_CLUSTER_TRES_IDLE: + sreport_set_usage_col_width( + field, total_acct.idle_secs); + break; + case PRINT_CLUSTER_TRES_RESV: + sreport_set_usage_col_width( + field, total_acct.resv_secs); + break; + case PRINT_CLUSTER_TRES_OVER: + sreport_set_usage_col_width( + field, total_acct.over_secs); + break; + case PRINT_CLUSTER_TRES_PLAN_DOWN: + sreport_set_usage_col_width( + field, total_acct.pdown_secs); + break; + case PRINT_CLUSTER_TRES_REPORTED: + sreport_set_usage_col_width( + field, total_acct.tres_rec.alloc_secs); + break; + case PRINT_CLUSTER_ENERGY: + sreport_set_usage_col_width( + field, total_acct.alloc_secs); + break; + } + } + list_iterator_destroy(itr); + + print_fields_header(print_fields_list); + + itr = list_iterator_create(cluster_list); + while ((cluster = list_next(itr))) { + if (!cluster->accounting_list || + !list_count(cluster->accounting_list)) + continue; itr2 = list_iterator_create(tres_list); while ((tres = list_next(itr2))) { if (tres->id == NO_VAL) continue; - _cluster_util_tres_report(tres, cluster, total_time, - total_tres_acct); + _cluster_util_tres_report(tres, cluster, total_time); } list_iterator_destroy(itr2); } @@ -1559,6 +1671,8 @@ extern int cluster_wckey_by_user(int argc, char *argv[]) "----------------------------------------\n"); } + _set_usage_column_width(print_fields_list, slurmdb_report_cluster_list); + print_fields_header(print_fields_list); list_sort(slurmdb_report_cluster_list, (ListCmpF)sort_cluster_dec); diff --git a/src/sreport/common.c b/src/sreport/common.c index 9a7977c36..562c9f9a1 100644 --- a/src/sreport/common.c +++ b/src/sreport/common.c @@ -466,3 +466,113 @@ extern void sreport_set_tres_recs(slurmdb_tres_rec_t **cluster_tres_rec, tres_rec_in->id); } } + +extern void sreport_set_usage_col_width(print_field_t *field, uint64_t number) +{ + uint64_t order, max_order; + + //info("got %p %"PRIu64, field, number); + if (!field) + return; + + order = 100000000; + max_order = order * 100000000000000000; + + /* smallest usage we want if this changes change order and + * max_order appropriately */ + field->len = 8; + + while (order < max_order) { + if (number < order) + break; + + field->len++; + order *= 10; + } + + if (time_format == SLURMDB_REPORT_TIME_SECS_PER + || time_format == SLURMDB_REPORT_TIME_MINS_PER + || time_format == SLURMDB_REPORT_TIME_HOURS_PER) + field->len += 9; +} + +extern void sreport_set_usage_column_width(print_field_t *usage_field, + print_field_t *energy_field, + List slurmdb_report_cluster_list) +{ + uint64_t max_usage = 0, max_energy = 0; + ListIterator tres_itr, cluster_itr; + slurmdb_report_cluster_rec_t *slurmdb_report_cluster = NULL; + + xassert(slurmdb_report_cluster_list); + + tres_itr = list_iterator_create(tres_list); + cluster_itr = list_iterator_create(slurmdb_report_cluster_list); + while ((slurmdb_report_cluster = list_next(cluster_itr))) { + slurmdb_tres_rec_t *tres, *tres_rec; + List use_list = slurmdb_report_cluster->tres_list; + + /* The first association will always be the largest + * count of any TRES, so just peek at it. If the + * cluster doesn't have assoications for some reason + * use the cluster main one which has the total time. + */ + + if (slurmdb_report_cluster->assoc_list) { + slurmdb_report_assoc_rec_t *slurmdb_report = + list_peek(slurmdb_report_cluster->assoc_list); + if (slurmdb_report) + use_list = slurmdb_report->tres_list; + } else if (slurmdb_report_cluster->user_list) { + slurmdb_report_user_rec_t *slurmdb_report; + list_sort(slurmdb_report_cluster->user_list, + (ListCmpF)sort_user_dec); + slurmdb_report = + list_peek(slurmdb_report_cluster->user_list); + if (slurmdb_report) + use_list = slurmdb_report->tres_list; + } else { + error("%s: unknown type of slurmdb_report_cluster " + "given for cluster %s", + __func__, slurmdb_report_cluster->name); + continue; + } + + if (energy_field) { + uint32_t tres_id = TRES_CPU; + if ((tres_rec = list_find_first( + use_list, + slurmdb_find_tres_in_list, + &tres_id))) { + max_usage = MAX(max_usage, + tres_rec->alloc_secs); + } + tres_id = TRES_ENERGY; + if ((tres_rec = list_find_first( + use_list, + slurmdb_find_tres_in_list, + &tres_id))) { + max_energy = MAX(max_energy, + tres_rec->alloc_secs); + } + } else { + list_iterator_reset(tres_itr); + while ((tres = list_next(tres_itr))) { + if (tres->id == NO_VAL) + continue; + if (!(tres_rec = list_find_first( + use_list, + slurmdb_find_tres_in_list, + &tres->id))) + continue; + max_usage = MAX(max_usage, + tres_rec->alloc_secs); + } + } + } + list_iterator_destroy(tres_itr); + list_iterator_destroy(cluster_itr); + + sreport_set_usage_col_width(usage_field, max_usage); + sreport_set_usage_col_width(energy_field, max_energy); +} diff --git a/src/sreport/resv_reports.c b/src/sreport/resv_reports.c index 9a19fbb42..ec451a6f3 100644 --- a/src/sreport/resv_reports.c +++ b/src/sreport/resv_reports.c @@ -57,15 +57,6 @@ enum { PRINT_RESV_TRES_USAGE, }; -typedef enum { - GROUP_BY_ACCOUNT, - GROUP_BY_ACCOUNT_JOB_SIZE, - GROUP_BY_ACCOUNT_JOB_SIZE_DURATION, - GROUP_BY_USER, - GROUP_BY_USER_JOB_SIZE, - GROUP_BY_USER_JOB_SIZE_DURATION, - GROUP_BY_NONE -} report_grouping_t; static List print_fields_list = NULL; /* types are of print_field_t */ @@ -228,9 +219,9 @@ static int _setup_print_fields_list(List format_list) if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) - field->len = 20; + field->len = 29; else - field->len = 9; + field->len = 20; field->print_routine = slurmdb_report_print_time; } else if (!strncasecmp("Associations", object, MAX(command_len, 2))) { @@ -261,9 +252,9 @@ static int _setup_print_fields_list(List format_list) if (time_format == SLURMDB_REPORT_TIME_SECS_PER || time_format == SLURMDB_REPORT_TIME_MINS_PER || time_format == SLURMDB_REPORT_TIME_HOURS_PER) - field->len = 20; + field->len = 29; else - field->len = 9; + field->len = 20; field->print_routine = slurmdb_report_print_time; } else if (!strncasecmp("Name", object, MAX(command_len, 2))) { diff --git a/src/sreport/sreport.h b/src/sreport/sreport.h index a9519cd57..68d6347d5 100644 --- a/src/sreport/sreport.h +++ b/src/sreport/sreport.h @@ -90,6 +90,16 @@ #define CKPT_WAIT 10 #define MAX_INPUT_FIELDS 128 +typedef enum { + GROUP_BY_ACCOUNT, + GROUP_BY_ACCOUNT_JOB_SIZE, + GROUP_BY_ACCOUNT_JOB_SIZE_DURATION, + GROUP_BY_USER, + GROUP_BY_USER_JOB_SIZE, + GROUP_BY_USER_JOB_SIZE_DURATION, + GROUP_BY_NONE +} report_grouping_t; + extern slurmdb_report_time_format_t time_format; extern char *time_format_string; extern char *command_name; @@ -125,4 +135,13 @@ extern void sreport_set_tres_recs(slurmdb_tres_rec_t **cluster_tres_rec, List cluster_tres_list, List tres_list, slurmdb_tres_rec_t *tres_rec_in); +/* Since usage columns can get big, instead of always giving a 20 + * column spacing, figure it out here. + */ +extern void sreport_set_usage_col_width(print_field_t *field, uint64_t number); + +extern void sreport_set_usage_column_width(print_field_t *usage_field, + print_field_t *energy_field, + List slurmdb_report_cluster_list); + #endif /* HAVE_SREPORT_H */ diff --git a/src/sreport/user_reports.c b/src/sreport/user_reports.c index 120867925..8071268ab 100644 --- a/src/sreport/user_reports.c +++ b/src/sreport/user_reports.c @@ -361,6 +361,32 @@ static void _user_top_tres_report(slurmdb_tres_rec_t *tres, printf("\n"); } +static void _set_usage_column_width(List print_fields_list, + List slurmdb_report_cluster_list) +{ + print_field_t *field, *usage_field = NULL, *energy_field = NULL; + ListIterator itr; + + xassert(print_fields_list); + xassert(slurmdb_report_cluster_list); + + itr = list_iterator_create(print_fields_list); + while ((field = list_next(itr))) { + switch (field->type) { + case PRINT_USER_USED: + usage_field = field; + break; + case PRINT_USER_ENERGY: + energy_field = field; + break; + } + } + list_iterator_destroy(itr); + + sreport_set_usage_column_width(usage_field, energy_field, + slurmdb_report_cluster_list); +} + extern int user_top(int argc, char *argv[]) { int rc = SLURM_SUCCESS; @@ -422,6 +448,8 @@ extern int user_top(int argc, char *argv[]) "----------------------------------------\n"); } + _set_usage_column_width(print_fields_list, slurmdb_report_cluster_list); + print_fields_header(print_fields_list); cluster_itr = list_iterator_create(slurmdb_report_cluster_list); diff --git a/src/srun/libsrun/allocate.c b/src/srun/libsrun/allocate.c index f5318afda..d74df0b58 100644 --- a/src/srun/libsrun/allocate.c +++ b/src/srun/libsrun/allocate.c @@ -43,7 +43,7 @@ #include <stdlib.h> #include <unistd.h> -#include <sys/poll.h> +#include <poll.h> #include <sys/types.h> #include <pwd.h> @@ -719,7 +719,7 @@ job_desc_msg_create_from_opts (void) } j->user_id = opt.uid; j->dependency = opt.dependency; - if (opt.nice) + if (opt.nice != NO_VAL) j->nice = NICE_OFFSET + opt.nice; if (opt.priority) j->priority = opt.priority; diff --git a/src/srun/libsrun/multi_prog.c b/src/srun/libsrun/multi_prog.c index 8e69ae872..a67df0476 100644 --- a/src/srun/libsrun/multi_prog.c +++ b/src/srun/libsrun/multi_prog.c @@ -99,7 +99,8 @@ _build_path(char* fname) dir = strtok_r(path_env, ":", &ptrptr); while (dir) { snprintf(file_path, sizeof(file_path), "%s/%s", dir, file_name); - if (stat(file_path, &buf) == 0) + if ((stat(file_path, &buf) == 0) + && (! S_ISDIR(buf.st_mode))) break; dir = strtok_r(NULL, ":", &ptrptr); } diff --git a/src/srun/libsrun/opt.c b/src/srun/libsrun/opt.c index 113f64bed..e16e4ff6e 100644 --- a/src/srun/libsrun/opt.c +++ b/src/srun/libsrun/opt.c @@ -547,7 +547,7 @@ static void _opt_default(void) opt.launcher_opts = NULL; opt.launch_cmd = false; - opt.nice = 0; + opt.nice = NO_VAL; opt.priority = 0; opt.sicp_mode = 0; opt.power_flags = 0; diff --git a/src/srun/srun_pty.c b/src/srun/srun_pty.c index ccc1aee9e..a1a1f80fb 100644 --- a/src/srun/srun_pty.c +++ b/src/srun/srun_pty.c @@ -45,14 +45,10 @@ #include <pthread.h> #endif -#ifdef HAVE_SYS_TERMIOS_H -# include <sys/termios.h> -#endif - #include <signal.h> #include <string.h> #include <sys/ioctl.h> -#include <sys/poll.h> +#include <poll.h> #include "slurm/slurm_errno.h" diff --git a/testsuite/expect/inc21.21_tests b/testsuite/expect/inc21.21_tests index e22088bbe..0abc7aee7 100644 --- a/testsuite/expect/inc21.21_tests +++ b/testsuite/expect/inc21.21_tests @@ -286,6 +286,9 @@ proc inc21_21_submit_test { limit } { [array get acct_mod_assoc_test_vals] \ [array get acct_mod_acct_vals]] if { $exit_code } { + # Clear the limits + set acct_mod_assoc_test_vals($limit_job) "-1" + set acct_mod_assoc_test_vals($limit_sub) "-1" return $exit_code } @@ -336,11 +339,16 @@ proc inc21_21_submit_test { limit } { set exit_code 1 break } - + # We need to sleep because of the way the scheduler works + # if we don't sleep then we could + sleep 1 } if { $exit_code } { + # Clear the limits + set acct_mod_assoc_test_vals($limit_job) "-1" + set acct_mod_assoc_test_vals($limit_sub) "-1" return $exit_code } @@ -377,12 +385,12 @@ proc inc21_21_submit_test { limit } { } if { $exit_code } { + # Clear the limits + set acct_mod_assoc_test_vals($limit_job) "-1" + set acct_mod_assoc_test_vals($limit_sub) "-1" return $exit_code } - # sleep the Schedule cycle default is 4 - sleep 4 - set matches 0 set mypid [spawn $squeue -A$ta -h -o "\%i \%t \%r"] expect { @@ -416,6 +424,17 @@ proc inc21_21_submit_test { limit } { } } + if { $matches != 4 } { + send_user "\nFAILURE: jobs are not in the expected state " + send_user "expected ($matches != 4)" + print_err $limit "inc21_21_submit_test" + set exit_code 1 + # Clear the limits + set acct_mod_assoc_test_vals($limit_job) "-1" + set acct_mod_assoc_test_vals($limit_sub) "-1" + return $exit_code + } + # Test to make sure that the grpsubmit and maxsubmit # are enforced with job arrays @@ -460,10 +479,16 @@ proc inc21_21_submit_test { limit } { break } + # We need to sleep because of the way the scheduler works + # if we don't sleep then we could + sleep 1 } if { $exit_code } { + # Clear the limits + set acct_mod_assoc_test_vals($limit_job) "-1" + set acct_mod_assoc_test_vals($limit_sub) "-1" return $exit_code } @@ -500,9 +525,38 @@ proc inc21_21_submit_test { limit } { } if { $exit_code } { + # Clear the limits + set acct_mod_assoc_test_vals($limit_job) "-1" + set acct_mod_assoc_test_vals($limit_sub) "-1" return $exit_code } + set matches 0 + set mypid [spawn $squeue -A$ta -h -o "\%i \%t \%r"] + expect { + -re "($job_id(2)|$job_id(3))_\\\[0\\\].PD.AssocMaxJobsLimit" { + incr matches + exp_continue + } + -re "($job_id(2)|$job_id(3))_\\\[0\\\].PD.AssocGrpJobsLimit" { + incr matches + exp_continue + } + -re "($job_id(0)|$job_id(1))_0.R.None" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: squeue not responding " + print_err $limit "inc21_21_submit_test" + slow_kill $mypid + set exit_code 1 + } + eof { + wait + } + } + spawn $scancel --quiet --account=$ta expect { eof { diff --git a/testsuite/expect/test1.97 b/testsuite/expect/test1.97 index 0a975815c..d11f82c19 100755 --- a/testsuite/expect/test1.97 +++ b/testsuite/expect/test1.97 @@ -232,6 +232,9 @@ proc submit_cpu {ntasks ncpus} { set x 0 spawn $bin_bash -c "$srun -N$num_nodes -n$ntasks -w$nodelist -c$ncpus --exclusive $bin_printenv SLURMD_NODENAME | $bin_sort -V | $bin_uniq -c" expect { + -re "job ($number)" { + exp_continue + } -re "($number) ($alpha_numeric_nodelist)" { set tasks($x) $expect_out(1,string) set nodes($x) $expect_out(2,string) @@ -265,6 +268,9 @@ proc submit_tasks {ntasks ntaskpn} { set x 0 spawn $bin_bash -c "$srun -N$num_nodes -n$ntasks --ntasks-per-node=$ntaskpn -w$nodelist --exclusive $bin_printenv SLURMD_NODENAME | $bin_sort -V | $bin_uniq -c" expect { + -re "job ($number)" { + exp_continue + } -re "($number) ($alpha_numeric_nodelist)" { set tasks($x) $expect_out(1,string) set nodes($x) $expect_out(2,string) -- GitLab