From e05b5ba4246d03a2f19b3e7a6f53bb67000b2740 Mon Sep 17 00:00:00 2001 From: Mehdi Dogguy <mehdi@debian.org> Date: Mon, 8 Sep 2014 21:31:56 +0200 Subject: [PATCH] Imported Upstream version 1.3.10 --- META | 4 +- Makefile.in | 1 + NEWS | 38 +- auxdir/Makefile.in | 1 + auxdir/x_ac_bluegene.m4 | 66 +- config.h.in | 6 + configure | 204 +++- configure.ac | 14 +- contribs/Makefile.in | 1 + contribs/perlapi/Makefile.in | 1 + contribs/phpext/Makefile.in | 1 + contribs/python/Makefile.in | 1 + contribs/python/hostlist/Makefile.in | 1 + contribs/python/hostlist/test/Makefile.in | 1 + contribs/torque/Makefile.in | 1 + doc/Makefile.in | 1 + doc/html/Makefile.in | 1 + doc/html/faq.shtml | 19 +- doc/html/schedplugins.shtml | 2 +- doc/html/team.shtml | 5 +- doc/man/Makefile.in | 1 + doc/man/man1/sacct.1 | 14 +- doc/man/man1/sacctmgr.1 | 516 +++++---- doc/man/man1/salloc.1 | 4 +- doc/man/man1/sbatch.1 | 4 +- doc/man/man1/scontrol.1 | 4 +- doc/man/man1/sinfo.1 | 2 +- doc/man/man1/squeue.1 | 4 +- doc/man/man3/slurm_free_job_info_msg.3 | 2 +- doc/man/man3/slurm_slurmd_status.3 | 1 + doc/man/man5/slurm.conf.5 | 9 +- doc/man/man5/slurmdbd.conf.5 | 4 +- slurm.spec | 6 +- src/Makefile.in | 1 + src/api/Makefile.in | 1 + src/common/Makefile.in | 1 + src/common/assoc_mgr.c | 80 +- src/common/env.c | 58 +- src/common/env.h | 3 +- src/common/hostlist.c | 117 +- src/common/hostlist.h | 24 +- src/common/slurm_accounting_storage.c | 242 +++- src/common/slurm_accounting_storage.h | 17 +- src/common/slurm_rlimits_info.c | 90 +- src/common/slurm_rlimits_info.h | 1 + src/database/Makefile.in | 1 + src/plugins/Makefile.in | 1 + src/plugins/accounting_storage/Makefile.in | 1 + .../accounting_storage/filetxt/Makefile.in | 1 + .../filetxt/accounting_storage_filetxt.c | 3 +- .../accounting_storage/mysql/Makefile.in | 1 + .../mysql/accounting_storage_mysql.c | 273 +++-- .../accounting_storage/none/Makefile.in | 1 + .../none/accounting_storage_none.c | 3 +- .../accounting_storage/pgsql/Makefile.in | 1 + .../pgsql/accounting_storage_pgsql.c | 3 +- .../accounting_storage/slurmdbd/Makefile.in | 1 + .../slurmdbd/accounting_storage_slurmdbd.c | 3 +- src/plugins/auth/Makefile.in | 1 + src/plugins/auth/authd/Makefile.in | 1 + src/plugins/auth/munge/Makefile.in | 1 + src/plugins/auth/none/Makefile.in | 1 + src/plugins/checkpoint/Makefile.in | 1 + src/plugins/checkpoint/aix/Makefile.in | 1 + src/plugins/checkpoint/none/Makefile.in | 1 + src/plugins/checkpoint/ompi/Makefile.in | 1 + src/plugins/checkpoint/xlch/Makefile.in | 1 + src/plugins/crypto/Makefile.in | 1 + src/plugins/crypto/munge/Makefile.in | 1 + src/plugins/crypto/openssl/Makefile.in | 1 + src/plugins/jobacct_gather/Makefile.in | 1 + src/plugins/jobacct_gather/aix/Makefile.in | 1 + src/plugins/jobacct_gather/linux/Makefile.in | 1 + src/plugins/jobacct_gather/none/Makefile.in | 1 + src/plugins/jobcomp/Makefile.in | 1 + src/plugins/jobcomp/filetxt/Makefile.in | 1 + src/plugins/jobcomp/mysql/Makefile.in | 1 + src/plugins/jobcomp/none/Makefile.in | 1 + src/plugins/jobcomp/pgsql/Makefile.in | 1 + src/plugins/jobcomp/script/Makefile.in | 1 + src/plugins/mpi/Makefile.in | 1 + src/plugins/mpi/lam/Makefile.in | 1 + src/plugins/mpi/mpich1_p4/Makefile.in | 1 + src/plugins/mpi/mpich1_shmem/Makefile.in | 1 + src/plugins/mpi/mpichgm/Makefile.in | 1 + src/plugins/mpi/mpichmx/Makefile.in | 1 + src/plugins/mpi/mvapich/Makefile.in | 1 + src/plugins/mpi/none/Makefile.in | 1 + src/plugins/mpi/openmpi/Makefile.in | 1 + src/plugins/proctrack/Makefile.in | 1 + src/plugins/proctrack/aix/Makefile.in | 1 + src/plugins/proctrack/linuxproc/Makefile.in | 1 + src/plugins/proctrack/pgid/Makefile.in | 1 + src/plugins/proctrack/rms/Makefile.in | 1 + src/plugins/proctrack/sgi_job/Makefile.in | 1 + src/plugins/sched/Makefile.in | 1 + src/plugins/sched/backfill/Makefile.in | 1 + src/plugins/sched/builtin/Makefile.in | 1 + src/plugins/sched/gang/Makefile.in | 1 + src/plugins/sched/hold/Makefile.in | 1 + src/plugins/sched/wiki/Makefile.in | 1 + src/plugins/sched/wiki2/Makefile.in | 1 + src/plugins/sched/wiki2/get_jobs.c | 36 +- src/plugins/select/Makefile.in | 1 + src/plugins/select/bluegene/Makefile.in | 1 + .../bluegene/block_allocator/Makefile.in | 1 + .../block_allocator/block_allocator.c | 22 +- .../bluegene/block_allocator/bridge_linker.c | 50 +- .../bluegene/block_allocator/bridge_linker.h | 4 +- .../select/bluegene/plugin/Makefile.in | 1 + .../bluegene/plugin/bg_switch_connections.c | 12 +- src/plugins/select/bluegene/plugin/bluegene.c | 8 +- src/plugins/select/bluegene/plugin/bluegene.h | 6 +- .../select/bluegene/plugin/select_bluegene.c | 4 +- .../select/bluegene/plugin/state_test.c | 14 +- src/plugins/select/bluegene/wrap_rm_api.h | 6 + src/plugins/select/cons_res/Makefile.in | 1 + src/plugins/select/linear/Makefile.in | 1 + src/plugins/switch/Makefile.in | 1 + src/plugins/switch/elan/Makefile.in | 1 + src/plugins/switch/federation/Makefile.in | 1 + src/plugins/switch/none/Makefile.in | 1 + src/plugins/task/Makefile.in | 1 + src/plugins/task/affinity/Makefile.in | 1 + src/plugins/task/none/Makefile.in | 1 + src/sacct/Makefile.in | 1 + src/sacctmgr/Makefile.in | 1 + src/sacctmgr/account_functions.c | 5 - src/sacctmgr/association_functions.c | 149 +-- src/sacctmgr/cluster_functions.c | 16 +- src/sacctmgr/common.c | 17 - src/sacctmgr/file_functions.c | 276 ++--- src/sacctmgr/qos_functions.c | 2 +- src/sacctmgr/sacctmgr.c | 27 +- src/sacctmgr/sacctmgr.h | 17 +- src/sacctmgr/user_functions.c | 2 - src/salloc/Makefile.in | 1 + src/salloc/salloc.c | 57 +- src/sattach/Makefile.in | 1 + src/sbatch/Makefile.in | 1 + src/sbatch/opt.c | 4 - src/sbatch/sbatch.c | 19 +- src/sbcast/Makefile.in | 1 + src/scancel/Makefile.in | 1 + src/scontrol/Makefile.in | 1 + src/scontrol/scontrol.c | 6 +- src/sinfo/Makefile.in | 1 + src/slurmctld/Makefile.in | 1 + src/slurmctld/acct_policy.c | 2 +- src/slurmctld/controller.c | 1 + src/slurmctld/job_mgr.c | 51 +- src/slurmctld/node_scheduler.c | 13 +- src/slurmctld/proc_req.c | 5 +- src/slurmctld/read_config.c | 3 +- src/slurmctld/step_mgr.c | 23 +- src/slurmd/Makefile.in | 1 + src/slurmd/slurmd/Makefile.am | 23 +- src/slurmd/slurmd/Makefile.in | 72 +- src/slurmd/slurmd/req.c | 76 +- src/slurmd/slurmd/slurmd.c | 18 +- src/slurmd/slurmstepd/Makefile.in | 1 + src/slurmd/slurmstepd/mgr.c | 40 +- src/slurmd/slurmstepd/slurmstepd.c | 19 +- src/slurmd/slurmstepd/task.c | 4 +- src/slurmd/slurmstepd/ulimits.c | 6 +- src/slurmdbd/Makefile.in | 1 + src/smap/Makefile.in | 1 + src/squeue/Makefile.in | 1 + src/squeue/opts.c | 69 +- src/squeue/print.c | 25 +- src/squeue/squeue.h | 2 +- src/sreport/Makefile.in | 1 + src/sreport/cluster_reports.c | 11 + src/srun/Makefile.in | 1 + src/srun/allocate.c | 4 +- src/srun/opt.c | 5 - src/srun/srun.c | 10 +- src/sstat/Makefile.in | 1 + src/strigger/Makefile.in | 1 + src/sview/Makefile.in | 1 + testsuite/Makefile.in | 1 + testsuite/expect/Makefile.am | 35 +- testsuite/expect/Makefile.in | 36 +- testsuite/expect/README | 19 +- testsuite/expect/globals | 5 +- testsuite/expect/test1.40 | 138 +++ testsuite/expect/test1.42 | 42 +- testsuite/expect/test1.90 | 1 + testsuite/expect/test15.14 | 35 +- testsuite/expect/test15.23 | 14 +- testsuite/expect/test15.25 | 142 +++ testsuite/expect/test17.18 | 36 +- testsuite/expect/test17.21 | 15 +- testsuite/expect/test21.10 | 4 +- testsuite/expect/test21.11 | 2 +- testsuite/expect/test21.12 | 2 +- testsuite/expect/test21.21 | 279 +++++ testsuite/expect/test21.22 | 1027 +++++++++++++++++ testsuite/expect/test21.5 | 2 +- testsuite/expect/test21.6 | 2 +- testsuite/expect/test6.13 | 10 + testsuite/slurm_unit/Makefile.in | 1 + testsuite/slurm_unit/api/Makefile.in | 1 + testsuite/slurm_unit/api/manual/Makefile.in | 1 + testsuite/slurm_unit/common/Makefile.in | 1 + testsuite/slurm_unit/slurmctld/Makefile.in | 1 + testsuite/slurm_unit/slurmd/Makefile.in | 1 + 207 files changed, 3725 insertions(+), 1314 deletions(-) create mode 100755 testsuite/expect/test1.40 create mode 100755 testsuite/expect/test15.25 create mode 100755 testsuite/expect/test21.21 create mode 100755 testsuite/expect/test21.22 diff --git a/META b/META index 9af55c73d..c604e3a1a 100644 --- a/META +++ b/META @@ -3,9 +3,9 @@ Api_revision: 0 Major: 1 Meta: 1 - Micro: 9 + Micro: 10 Minor: 3 Name: slurm Release: 1 Release_tags: dist - Version: 1.3.9 + Version: 1.3.10 diff --git a/Makefile.in b/Makefile.in index b06636b44..c2bc353e8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -213,6 +213,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/NEWS b/NEWS index 75fdbd9b6..b85698313 100644 --- a/NEWS +++ b/NEWS @@ -3,7 +3,41 @@ documents those changes that are of interest to users and admins. * Changes in SLURM 1.3.10 ========================= - + -- Fix several bugs in the hostlist functions: + - Fix hostset_insert_range() to do proper accounting of hl->nhosts (count). + - Avoid assertion failure when callinsg hostset_create(NULL). + - Fix return type of hostlist and hostset string functions from size_t to + ssize_t. + - Add check for NULL return from hostlist_create(). + - Rewrite of hostrange_hn_within(), avoids reporting "tst0" in the hostlist + "tst". + -- Modify squeue to accept "--nodes=<hostlist>" rather than + "--node=<node_name>" and report all jobs with any allocated nodes from set + of nodes specified. From Par Anderson, National Supercomputer Centre, + Sweden. + -- Fix bug preventing use of TotalView debugger with TaskProlog configured or + or srun's --task-prolog option. + -- Improve reliability of batch job requeue logic in the event that the slurmd + daemon is temporarily non-responsive (for longer than the configured + MessageTimeout value but less than the SlurmdTimeout value). + -- In sched/wiki2 (Moab) report a job's MAXNODES (maximum number of permitted + nodes). + -- Fixed SLURM_TASKS_PER_NODE to live up more to it's name on an allocation. + Will now contain the number of tasks per node instead of the number of CPUs + per node. This is only for a resource allocation. Job steps already have + the environment variable set correctly. + -- Configuration parameter PropagateResourceLimits has new option of "NONE". + -- User's --propagate options take precidence over PropagateResourceLimits + configuration parameter in both srun and sbatch commands. + -- When Moab is in use (salloc or sbatch is executed with the --get-user-env + option to be more specific), load the user's default resource limits rather + than propagating the Moab daemon's limits. + -- Fix bug in slurmctld restart logic for recovery of batch jobs that are + initiated as a job step rather than an independent job (used for LSF). + -- Fix bug that can cause slurmctld restart to fail, bug introduced in SLURM + version 1.3.9. From Eygene Ryabinkin, Kurchatov Institute, Russia. + -- Permit slurmd configuration parameters to be set to new values from + previously unset values. * Changes in SLURM 1.3.9 ======================== @@ -3544,4 +3578,4 @@ documents those changes that are of interest to users and admins. -- Change directory to /tmp in slurmd if daemonizing. -- Logfiles are reopened on reconfigure. -$Id: NEWS 15393 2008-10-13 21:02:25Z da $ +$Id: NEWS 15572 2008-11-03 23:14:27Z jette $ diff --git a/auxdir/Makefile.in b/auxdir/Makefile.in index 12a7cce90..96949443d 100644 --- a/auxdir/Makefile.in +++ b/auxdir/Makefile.in @@ -171,6 +171,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/auxdir/x_ac_bluegene.m4 b/auxdir/x_ac_bluegene.m4 index 24d743be3..e5ffc38c1 100644 --- a/auxdir/x_ac_bluegene.m4 +++ b/auxdir/x_ac_bluegene.m4 @@ -1,5 +1,5 @@ ##***************************************************************************** -## $Id: x_ac_bluegene.m4 14087 2008-05-20 19:35:45Z da $ +## $Id: x_ac_bluegene.m4 15552 2008-10-31 19:53:03Z da $ ##***************************************************************************** # AUTHOR: # Morris Jette <jette1@llnl.gov> @@ -13,7 +13,7 @@ ##***************************************************************************** -AC_DEFUN([X_AC_BLUEGENE], +AC_DEFUN([X_AC_BGL], [ AC_ARG_WITH(db2, AS_HELP_STRING(--with-db2-dir=PATH,Specify path to DB2 library's parent directory), [ trydb2dir=$withval ]) @@ -69,7 +69,7 @@ AC_DEFUN([X_AC_BLUEGENE], have_bg_files=yes saved_LDFLAGS="$LDFLAGS" LDFLAGS="$saved_LDFLAGS $bg_ldflags" - AC_LINK_IFELSE([AC_LANG_PROGRAM([[ int rm_set_serial(char *); ]], [[ rm_set_serial(""); ]])],[have_bg_files=yes],[AC_MSG_ERROR(There is a problem linking to the bluegene api.)]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ int rm_set_serial(char *); ]], [[ rm_set_serial(""); ]])],[have_bg_files=yes],[AC_MSG_ERROR(There is a problem linking to the BG/L api.)]) LDFLAGS="$saved_LDFLAGS" fi @@ -89,3 +89,63 @@ AC_DEFUN([X_AC_BLUEGENE], AC_SUBST(BG_INCLUDES) ]) + +AC_DEFUN([X_AC_BGP], +[ + # Skip if already set + if test ! -z "$have_bg_files" ; then + bg_default_dirs="" + else + bg_default_dirs="/bgsys/drivers/ppcfloor" + fi + + libname=bgpbridge + + for bg_dir in $trydb2dir "" $bg_default_dirs; do + # Skip directories that don't exist + if test ! -z "$bg_dir" -a ! -d "$bg_dir" ; then + continue; + fi + + soloc=$bg_dir/lib64/lib$libname.so + # Search for required BG API libraries in the directory + if test -z "$have_bg_ar" -a -f "$soloc" ; then + have_bgp_ar=yes + bg_ldflags="$bg_ldflags -L$bg_dir/lib64 -L/usr/lib64 -Wl,--unresolved-symbols=ignore-in-shared-libs -l$libname" + fi + + # Search for headers in the directory + if test -z "$have_bg_hdr" -a -f "$bg_dir/include/rm_api.h" ; then + have_bgp_hdr=yes + bg_includes="-I$bg_dir/include" + fi + done + + if test ! -z "$have_bgp_ar" -a ! -z "$have_bgp_hdr" ; then + AC_DEFINE(HAVE_BG, 1, [Define to 1 if emulating or running on Blue Gene system]) + AC_DEFINE(HAVE_FRONT_END, 1, [Define to 1 if running slurmd on front-end only]) + # ac_with_readline="no" + # Test to make sure the api is good + saved_LDFLAGS="$LDFLAGS" + LDFLAGS="$saved_LDFLAGS $bg_ldflags" + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ int rm_set_serial(char *); ]], [[ rm_set_serial(""); ]])],[have_bgp_files=yes],[AC_MSG_ERROR(There is a problem linking to the BG/P api.)]) + LDFLAGS="$saved_LDFLAGS" + fi + + if test ! -z "$have_bgp_files" ; then + BG_INCLUDES="$bg_includes" + AC_DEFINE(HAVE_BG_FILES, 1, [Define to 1 if have Blue Gene files]) + AC_DEFINE(HAVE_BGP_FILES, 1, [Define to 1 if have BG/P files]) + + AC_DEFINE_UNQUOTED(BG_BRIDGE_SO, "$soloc", [Define the BG_BRIDGE_SO value]) + + AC_MSG_CHECKING(for BG serial value) + bg_serial="BGP" + AC_ARG_WITH(bg-serial, + AS_HELP_STRING(--with-bg-serial=NAME,set BG_SERIAL value [[BGP]]), [bg_serial="$withval"]) + AC_MSG_RESULT($bg_serial) + AC_DEFINE_UNQUOTED(BG_SERIAL, "$bg_serial", [Define the BG_SERIAL value]) + fi + + AC_SUBST(BG_INCLUDES) +]) diff --git a/config.h.in b/config.h.in index 0c6cf45fe..8edf2e9ac 100644 --- a/config.h.in +++ b/config.h.in @@ -18,6 +18,9 @@ /* Define to 1 if emulating or running on Blue Gene system */ #undef HAVE_BG +/* Define to 1 if have BG/P files */ +#undef HAVE_BGP_FILES + /* Define to 1 if have Blue Gene files */ #undef HAVE_BG_FILES @@ -361,6 +364,9 @@ /* Define the project's minor version. */ #undef SLURM_MINOR +/* Define Slurm installation prefix */ +#undef SLURM_PREFIX + /* Define the project's version string. */ #undef SLURM_VERSION diff --git a/configure b/configure index bd46c5d6a..d222a0afd 100755 --- a/configure +++ b/configure @@ -938,6 +938,7 @@ DEBUG_MODULES_FALSE SLURMCTLD_PORT SLURMD_PORT SLURMDBD_PORT +SLURM_PREFIX ELAN_LIBS HAVE_ELAN_TRUE HAVE_ELAN_FALSE @@ -1594,6 +1595,7 @@ Optional Packages: --with-tags[=TAGS] include additional configurations [automatic] --with-db2-dir=PATH Specify path to DB2 library's parent directory --with-bg-serial=NAME set BG_SERIAL value BGL + --with-bg-serial=NAME set BG_SERIAL value BGP --with-xcpu=PATH specify path to XCPU directory --with-pkg-config=PATH Specify path to pkg-config binary --with-mysql_config=PATH @@ -7120,7 +7122,7 @@ ia64-*-hpux*) ;; *-*-irix6*) # Find out which ABI we are using. - echo '#line 7123 "configure"' > conftest.$ac_ext + echo '#line 7125 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -9226,11 +9228,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:9229: $lt_compile\"" >&5) + (eval echo "\"\$as_me:9231: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:9233: \$? = $ac_status" >&5 + echo "$as_me:9235: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -9516,11 +9518,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:9519: $lt_compile\"" >&5) + (eval echo "\"\$as_me:9521: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:9523: \$? = $ac_status" >&5 + echo "$as_me:9525: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -9620,11 +9622,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:9623: $lt_compile\"" >&5) + (eval echo "\"\$as_me:9625: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:9627: \$? = $ac_status" >&5 + echo "$as_me:9629: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -11997,7 +11999,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 12000 "configure" +#line 12002 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12097,7 +12099,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 12100 "configure" +#line 12102 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -14498,11 +14500,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:14501: $lt_compile\"" >&5) + (eval echo "\"\$as_me:14503: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:14505: \$? = $ac_status" >&5 + echo "$as_me:14507: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -14602,11 +14604,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:14605: $lt_compile\"" >&5) + (eval echo "\"\$as_me:14607: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:14609: \$? = $ac_status" >&5 + echo "$as_me:14611: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -16200,11 +16202,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:16203: $lt_compile\"" >&5) + (eval echo "\"\$as_me:16205: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:16207: \$? = $ac_status" >&5 + echo "$as_me:16209: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -16304,11 +16306,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:16307: $lt_compile\"" >&5) + (eval echo "\"\$as_me:16309: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:16311: \$? = $ac_status" >&5 + echo "$as_me:16313: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -18524,11 +18526,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:18527: $lt_compile\"" >&5) + (eval echo "\"\$as_me:18529: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:18531: \$? = $ac_status" >&5 + echo "$as_me:18533: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -18814,11 +18816,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:18817: $lt_compile\"" >&5) + (eval echo "\"\$as_me:18819: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:18821: \$? = $ac_status" >&5 + echo "$as_me:18823: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -18918,11 +18920,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:18921: $lt_compile\"" >&5) + (eval echo "\"\$as_me:18923: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:18925: \$? = $ac_status" >&5 + echo "$as_me:18927: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -24767,8 +24769,8 @@ else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - { { echo "$as_me:$LINENO: error: There is a problem linking to the bluegene api." >&5 -echo "$as_me: error: There is a problem linking to the bluegene api." >&2;} + { { echo "$as_me:$LINENO: error: There is a problem linking to the BG/L api." >&5 +echo "$as_me: error: There is a problem linking to the BG/L api." >&2;} { (exit 1); exit 1; }; } fi @@ -24816,6 +24818,138 @@ _ACEOF + # Skip if already set + if test ! -z "$have_bg_files" ; then + bg_default_dirs="" + else + bg_default_dirs="/bgsys/drivers/ppcfloor" + fi + + libname=bgpbridge + + for bg_dir in $trydb2dir "" $bg_default_dirs; do + # Skip directories that don't exist + if test ! -z "$bg_dir" -a ! -d "$bg_dir" ; then + continue; + fi + + soloc=$bg_dir/lib64/lib$libname.so + # Search for required BG API libraries in the directory + if test -z "$have_bg_ar" -a -f "$soloc" ; then + have_bgp_ar=yes + bg_ldflags="$bg_ldflags -L$bg_dir/lib64 -L/usr/lib64 -Wl,--unresolved-symbols=ignore-in-shared-libs -l$libname" + fi + + # Search for headers in the directory + if test -z "$have_bg_hdr" -a -f "$bg_dir/include/rm_api.h" ; then + have_bgp_hdr=yes + bg_includes="-I$bg_dir/include" + fi + done + + if test ! -z "$have_bgp_ar" -a ! -z "$have_bgp_hdr" ; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_BG 1 +_ACEOF + + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FRONT_END 1 +_ACEOF + + # ac_with_readline="no" + # Test to make sure the api is good + saved_LDFLAGS="$LDFLAGS" + LDFLAGS="$saved_LDFLAGS $bg_ldflags" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + int rm_set_serial(char *); +int +main () +{ + rm_set_serial(""); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + have_bgp_files=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + { { echo "$as_me:$LINENO: error: There is a problem linking to the BG/P api." >&5 +echo "$as_me: error: There is a problem linking to the BG/P api." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS="$saved_LDFLAGS" + fi + + if test ! -z "$have_bgp_files" ; then + BG_INCLUDES="$bg_includes" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_BG_FILES 1 +_ACEOF + + +cat >>confdefs.h <<\_ACEOF +#define HAVE_BGP_FILES 1 +_ACEOF + + + +cat >>confdefs.h <<_ACEOF +#define BG_BRIDGE_SO "$soloc" +_ACEOF + + + { echo "$as_me:$LINENO: checking for BG serial value" >&5 +echo $ECHO_N "checking for BG serial value... $ECHO_C" >&6; } + bg_serial="BGP" + +# Check whether --with-bg-serial was given. +if test "${with_bg_serial+set}" = set; then + withval=$with_bg_serial; bg_serial="$withval" +fi + + { echo "$as_me:$LINENO: result: $bg_serial" >&5 +echo "${ECHO_T}$bg_serial" >&6; } + +cat >>confdefs.h <<_ACEOF +#define BG_SERIAL "$bg_serial" +_ACEOF + + fi + + + + # This is here to avoid a bug in the gcc compiler 3.4.6 # Without this flag there is a bug when pointing to other functions # and then using them. It is also advised to set the flag if there @@ -26102,6 +26236,21 @@ _ACEOF +if test "x$prefix" == "xNONE" ; then + +cat >>confdefs.h <<_ACEOF +#define SLURM_PREFIX "/usr/local" +_ACEOF + +else + +cat >>confdefs.h <<_ACEOF +#define SLURM_PREFIX "$prefix" +_ACEOF + +fi + + { echo "$as_me:$LINENO: checking for rms_prgcreate in -lrmscall" >&5 echo $ECHO_N "checking for rms_prgcreate in -lrmscall... $ECHO_C" >&6; } @@ -28332,6 +28481,7 @@ DEBUG_MODULES_FALSE!$DEBUG_MODULES_FALSE$ac_delim SLURMCTLD_PORT!$SLURMCTLD_PORT$ac_delim SLURMD_PORT!$SLURMD_PORT$ac_delim SLURMDBD_PORT!$SLURMDBD_PORT$ac_delim +SLURM_PREFIX!$SLURM_PREFIX$ac_delim ELAN_LIBS!$ELAN_LIBS$ac_delim HAVE_ELAN_TRUE!$HAVE_ELAN_TRUE$ac_delim HAVE_ELAN_FALSE!$HAVE_ELAN_FALSE$ac_delim @@ -28354,7 +28504,6 @@ MUNGE_CPPFLAGS!$MUNGE_CPPFLAGS$ac_delim MUNGE_LDFLAGS!$MUNGE_LDFLAGS$ac_delim WITH_MUNGE_TRUE!$WITH_MUNGE_TRUE$ac_delim WITH_MUNGE_FALSE!$WITH_MUNGE_FALSE$ac_delim -AUTHD_LIBS!$AUTHD_LIBS$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -28396,6 +28545,7 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +AUTHD_LIBS!$AUTHD_LIBS$ac_delim AUTHD_CFLAGS!$AUTHD_CFLAGS$ac_delim WITH_AUTHD_TRUE!$WITH_AUTHD_TRUE$ac_delim WITH_AUTHD_FALSE!$WITH_AUTHD_FALSE$ac_delim @@ -28403,7 +28553,7 @@ UTIL_LIBS!$UTIL_LIBS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 5; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 6; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 diff --git a/configure.ac b/configure.ac index 3c84ab00f..b3e51995b 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -# $Id: configure.ac 15324 2008-10-07 00:16:53Z da $ +# $Id: configure.ac 15551 2008-10-31 19:47:35Z da $ # This file is to be processed with autoconf to generate a configure script dnl Prologue @@ -133,7 +133,8 @@ LDFLAGS="$LDFLAGS " CFLAGS="$CFLAGS $PTHREAD_CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" -X_AC_BLUEGENE +X_AC_BGL +X_AC_BGP X_AC_CFLAGS X_AC_XCPU X_AC_SLURM_SEMAPHORE @@ -169,6 +170,15 @@ dnl X_AC_SLURM_PORTS([6817], [6818], [6819]) +dnl add SLURM_PREFIX to config.h +dnl +if test "x$prefix" == "xNONE" ; then + AC_DEFINE_UNQUOTED(SLURM_PREFIX, "/usr/local", [Define Slurm installation prefix]) +else + AC_DEFINE_UNQUOTED(SLURM_PREFIX, "$prefix", [Define Slurm installation prefix]) +fi +AC_SUBST(SLURM_PREFIX) + dnl check for whether to include Elan support dnl X_AC_ELAN diff --git a/contribs/Makefile.in b/contribs/Makefile.in index a3bfe7509..901ac4182 100644 --- a/contribs/Makefile.in +++ b/contribs/Makefile.in @@ -182,6 +182,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/contribs/perlapi/Makefile.in b/contribs/perlapi/Makefile.in index b8783c521..672005002 100644 --- a/contribs/perlapi/Makefile.in +++ b/contribs/perlapi/Makefile.in @@ -170,6 +170,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/contribs/phpext/Makefile.in b/contribs/phpext/Makefile.in index 6f94c4de3..55b80355e 100644 --- a/contribs/phpext/Makefile.in +++ b/contribs/phpext/Makefile.in @@ -170,6 +170,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/contribs/python/Makefile.in b/contribs/python/Makefile.in index 54f0bb278..9b6fb5d42 100644 --- a/contribs/python/Makefile.in +++ b/contribs/python/Makefile.in @@ -182,6 +182,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/contribs/python/hostlist/Makefile.in b/contribs/python/hostlist/Makefile.in index 64645d4e9..0b11691ae 100644 --- a/contribs/python/hostlist/Makefile.in +++ b/contribs/python/hostlist/Makefile.in @@ -183,6 +183,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/contribs/python/hostlist/test/Makefile.in b/contribs/python/hostlist/test/Makefile.in index 118f5d3a1..cd1584721 100644 --- a/contribs/python/hostlist/test/Makefile.in +++ b/contribs/python/hostlist/test/Makefile.in @@ -170,6 +170,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/contribs/torque/Makefile.in b/contribs/torque/Makefile.in index 048d4a630..61cb08158 100644 --- a/contribs/torque/Makefile.in +++ b/contribs/torque/Makefile.in @@ -177,6 +177,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/doc/Makefile.in b/doc/Makefile.in index 73ab4da8b..97976e866 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -182,6 +182,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in index 54b78c500..9b93fd11f 100644 --- a/doc/html/Makefile.in +++ b/doc/html/Makefile.in @@ -181,6 +181,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/doc/html/faq.shtml b/doc/html/faq.shtml index 2063ed8f2..7d2317340 100644 --- a/doc/html/faq.shtml +++ b/doc/html/faq.shtml @@ -27,6 +27,8 @@ allocated to a SLURM job?</a></li> 30s ago" followed by "srun Job Failed"?</a></li> <li><a href="#memlock">Why is my MPI job failing due to the locked memory (memlock) limit being too low?</a></li> +<li><a href="#inactive">Why is my batch job that launches no job steps being +killed?</a></li> </ol> <h2>For Administrators</h2> <ol> @@ -489,7 +491,7 @@ to execute a parallel job when one of the tasks has exited is not normally productive. This behavior can be changed using srun's <i>--wait=<time></i> option to either change the timeout period or disable the timeout altogether. See srun's man page -for details. +for details.</p> <p><a name="memlock"><b>18. Why is my MPI job failing due to the locked memory (memlock) limit being too low?</b></a><br> @@ -510,7 +512,18 @@ locked memory limit limit to be unlimited on the compute nodes (see full advantage of this limit (e.g. by adding something like <i>"ulimit -l unlimited"</i> to the <i>/etc/init.d/slurm</i> script used to initiate <i>slurmd</i>). -Related information about <a href="#pam">PAM</a> is also available. +Related information about <a href="#pam">PAM</a> is also available.</p> + +<p><a name="inactive"><b>19. Why is my batch job that launches no +job steps being killed?</b></a><br> +SLURM has a configuration parameter <i>InactiveLimit</i> intended +to kill jobs that do not spawn any job steps for a configurable +period of time. Your system administrator may modify the <i>InactiveLimit</i> +to satisfy your needs. Alternatly, you can just spawn a job step +at the beginning of your script to execute in the background. It +will be purged when your script exits or your job otherwise terminates. +A line of this sort near the beginning of your script should suffice:<br> +<i>srun -N1 -n1 sleep 999999 &</i></p> <p class="footer"><a href="#top">top</a></p> @@ -1018,6 +1031,6 @@ Index: src/slurmctld/ping_nodes.c <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 7 October 2008</p> +<p style="text-align:center;">Last modified 24 October 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/schedplugins.shtml b/doc/html/schedplugins.shtml index 112727338..94250edce 100644 --- a/doc/html/schedplugins.shtml +++ b/doc/html/schedplugins.shtml @@ -14,7 +14,7 @@ The <b>wiki</b> scheduler establishes an initial priority of zero (held) for all jobs. These jobs only begin execution when the <b>wiki</b> scheduler explicitly raises the their priority (releasing them). Developers may use the model that best fits their needs. -Note that a separate <a href="selectplugins.html" class="nav">node selection plugin</a> +Note that a separate <a href="selectplugins.html">node selection plugin</a> is available for controlling that aspect of scheduling.</p> <p>SLURM scheduler plugins are SLURM plugins that implement the SLURM scheduler diff --git a/doc/html/team.shtml b/doc/html/team.shtml index e4c8934f2..403a9c3c8 100644 --- a/doc/html/team.shtml +++ b/doc/html/team.shtml @@ -53,10 +53,11 @@ Networking, Italy)</li> <li>Daniel Palermo (HP)</li> <li>Dan Phung (LLNL/Columbia University)</li> <li>Ashley Pittman (Quadrics)</li> -<li>Vijay Ramasubramanian (University of Maryland) </li> +<li>Vijay Ramasubramanian (University of Maryland)</li> <li>Andy Riebs (HP)</li> <li>Asier Roa (Barcelona Supercomputer Center, Spain)<li> <li>Miguel Ros (Barcelona Supercomputer Center, Spain)<li> +<li>Eygene Ryabinkin (Kurchatov Institute, Russia)</li> <li>Federico Sacerdoti (D.E. Shaw)<li> <li>Jeff Squyres (LAM MPI)</li> <li>Prashanth Tamraparni (HP, India)</li> @@ -67,6 +68,6 @@ Networking, Italy)</li> <li>Anne-Marie Wunderlin (Bull)</li> </ul> -<p style="text-align:center;">Last modified 10 October 2008</p> +<p style="text-align:center;">Last modified 1 November 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/Makefile.in b/doc/man/Makefile.in index 5ae613a5e..00f431810 100644 --- a/doc/man/Makefile.in +++ b/doc/man/Makefile.in @@ -178,6 +178,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1 index ca501a074..9b3e95389 100644 --- a/doc/man/man1/sacct.1 +++ b/doc/man/man1/sacct.1 @@ -84,7 +84,7 @@ option is also specified. \f3\-C \fP\f2cluster_list\fP\f3,\fP \f3\-\-cluster\fP\f3=\fP\f2cluster_list\fP Displays the statistics only for the jobs started on the clusters specified by the \f2cluster_list\fP operand, which is a comma\-separated list of clusters. -Space characters are not allowed in the \f2cluster_list\fP. -1 for +Space characters are not allowed in the \f2cluster_list\fP. \-1 for all clusters, default is current cluster you are executing the sacct command on\&. @@ -190,7 +190,7 @@ instead of the current SLURM job accounting log file. Displays the statistics only for the jobs started with the GID specified by the \f2gid_list\fP operand, which is a comma\-separated list of gids. Space characters are not allowed in the \f2gid_list\fP. -Default is no restrictions. This is virtually the same as the --group +Default is no restrictions. This is virtually the same as the \-\-group option\&. .TP @@ -198,7 +198,7 @@ option\&. Displays the statistics only for the jobs started with the GROUP specified by the \f2group_list\fP operand, which is a comma\-separated list of groups. Space characters are not allowed in the \f2group_list\fP. -Default is no restrictions. This is virtually the same as the --gid option\&. +Default is no restrictions. This is virtually the same as the \-\-gid option\&. .TP \f3\-h \fP\f3,\fP \f3\-\-help\fP @@ -389,16 +389,16 @@ Intermediate steps are displayed by default. Displays the statistics only for the jobs started by the specified \f2uid_list\fP operand, which is a comma\-separated list of uids. Space characters are not allowed in the \f2uid_list\fP. --1 for all uids, default is current uid. If run as user root default -is all users. This is virtually the same as the --user option\&. +\-1 for all uids, default is current uid. If run as user root default +is all users. This is virtually the same as the \-\-user option\&. .TP \f3\-u \fP\f2user_list\fP\f3,\fP \f3\-\-user\fP\f3=\fP\f2user_list\fP Displays the statistics only for the jobs started by the specified \f2user_list\fP operand, which is a comma\-separated list of users. Space characters are not allowed in the \f2user_list\fP. --1 for all uids, default is current uid. If run as user root default -is all users. This is virtually the same as the --uid option\&. +\-1 for all uids, default is current uid. If run as user root default +is all users. This is virtually the same as the \-\-uid option\&. .TP \f3\-\-usage\fP diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index cc6669ec9..dacf52d3e 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -110,9 +110,10 @@ Identical to the \fBquit\fR command. Display a description of sacctmgr options and commands. .TP -\fBlist\fR <\fIENTITY\fR> [with <\fISPECS\fR>] -Display information about the specified entities. -By default, all entities are displayed. +\fBlist\fR <\fIENTITY\fR> [<\fISPECS\fR>] +Display information about the specified entity. +By default, all entries are displayed, you can narrow results by +specifing SPECS in your query. Identical to the \fBshow\fR command. .TP @@ -137,9 +138,10 @@ Terminate the execution of sacctmgr. Identical to the \fBexit\fR command. .TP -\fBshow\fR <\fIENTITY\fR> [with <\fISPECS\fR>] -Display information about the specified entities. -By default, all entities are displayed. +\fBshow\fR <\fIENTITY\fR> [<\fISPECS\fR>] +Display information about the specified entity. +By default, all entries are displayed, you can narrow results by +specifing SPECS in your query. Identical to the \fBlist\fR command. .TP @@ -156,8 +158,7 @@ Display the version number of sacctmgr. \fB!!\fP Repeat the last command. -.TP -\fBENTITIES\fR +.SH "ENTITIES" .TP \fIaccount\fP @@ -197,17 +198,7 @@ List of transactions that have occurred during a given time period. \fIuser\fR The login name. - -.TP -\fBSPECIFICATIONS FOR ACCOUNTS\fR - -.TP -\fICluster\fP=<cluster> -Specific cluster to add account to. Default is all in system. - -.TP -\fIDescription\fP=<description> -An arbitrary string describing an account. +.SH "GENERAL SPECIFICATIONS FOR ASSOCIATION BASED ENTITIES" .TP \fIFairshare\fP=<fairshare> @@ -219,7 +210,7 @@ To clear a previously set value use the modify command with a new value of \-1. Maximum number of CPU hours running jobs are able to be allocated in aggregate for this association and all association which are children of this association. To clear a previously set value use the modify command with a new -value of \-1. (NOTE: this limit is not currently enforced in SLURM. +value of \-1. (NOTE: This limit is not currently enforced in SLURM. You can still set this, but have to wait for future versions of SLURM before it is enforced.) @@ -228,7 +219,7 @@ before it is enforced.) Maximum number of CPUs running jobs are able to be allocated in aggregate for this association and all association which are children of this association. To clear a previously set value use the modify command with a new -value of \-1. (NOTE: this limit is not currently enforced in SLURM. +value of \-1. (NOTE: This limit is not currently enforced in SLURM. You can still set this, but have to wait for future versions of SLURM before it is enforced.) @@ -263,7 +254,7 @@ Maximum number of CPU minutes each job is able to use in this account. This is overridden if set directly on a user. Default is the cluster's limit. To clear a previously set value use the modify command with a new -value of \-1. (NOTE: this limit is not currently enforced in SLURM. +value of \-1. (NOTE: This limit is not currently enforced in SLURM. You can still set this, but have to wait for future versions of SLURM before it is enforced.) @@ -273,7 +264,7 @@ Maximum number of CPUs each job is able to use in this account. This is overridden if set directly on a user. Default is the cluster's limit. To clear a previously set value use the modify command with a new -value of \-1. (NOTE: this limit is not currently enforced in SLURM. +value of \-1. (NOTE: This limit is not currently enforced in SLURM. You can still set this, but have to wait for future versions of SLURM before it is enforced.) @@ -309,29 +300,13 @@ Default is the cluster's limit. The value is recorded in minutes with rounding as needed. To clear a previously set value use the modify command with a new value of \-1. -.TP -\fIName\fP=<name> -The name of a bank account. - -.TP -\fIOrganization\fP=<org> -Organization to which the account belongs. - -.TP -\fIParent\fP=<parent> -Parent account of this account. Default is no parent, a top level account. - -.TP -\fIPartition\fP=<name> -Name of SLURM partition these limits apply to. - .TP \fIQosLevel\fP<operator><comma separated list of qos names> (For use with MOAB only.) Specify the default Quality of Service's that jobs are able to run at -for this account. To get a list of vaild QOS's use 'sacctmgr list qos'. +for this account. To get a list of valid QOS's use 'sacctmgr list qos'. This value will override it's parents value and push down to it's -childern as the new default. Setting a QosLevel to '' (two single +children as the new default. Setting a QosLevel to '' (two single quotes with nothing between them) restores it's default setting. You can also use the operator += and \-= to add or remove certain QOS's from a QOS list. @@ -349,108 +324,117 @@ Add the specified <qos> value to the current \fIQosLevel\fP . Remove the specified <qos> value from the current \fIQosLevel\fP. .RE +.SH "SPECIFICATIONS FOR ACCOUNTS" .TP -\fBSPECIFICATIONS FOR CLUSTERS\fR +\fICluster\fP=<cluster> +Specific cluster to add account to. Default is all in system. .TP -\fIFairshare\fP=<fairshare> -Number used in conjunction with other accounts to determine job priority. -To clear a previously set value use the modify command with a new value of \-1. +\fIDescription\fP=<description> +An arbitrary string describing an account. .TP -\fIGrpCPUMins\fP=<max cpu hours> -Maximum number of CPU hours running jobs are able to be allocated in aggregate for -this association and all association which are children of this association. -To clear a previously set value use the modify command with a new -value of \-1. (NOTE: this limit is not currently enforced in SLURM. -You can still set this, but have to wait for future versions of SLURM -before it is enforced.) +\fIName\fP=<name> +The name of a bank account. .TP -\fIGrpCPUs\fP=<max cpus> -Maximum number of CPUs running jobs are able to be allocated in aggregate for -this association and all association which are children of this association. -To clear a previously set value use the modify command with a new -value of \-1. (NOTE: this limit is not currently enforced in SLURM. -You can still set this, but have to wait for future versions of SLURM -before it is enforced.) +\fIOrganization\fP=<org> +Organization to which the account belongs. .TP -\fIGrpJobs\fP=<max cpus> -Maximum number of running jobs in aggregate for -this association and all association which are children of this association. -To clear a previously set value use the modify command with a new value of \-1. +\fIParent\fP=<parent> +Parent account of this account. Default is the root account, a top +level account. + +.SH "" +NOTE: If using the WithAssoc option you can also query against +association specific information to view only certain associations +this account may have. These extra options can be found in the +\fISPECIFICATIONS FOR ASSOCIATIONS\fP section. You can also use the +general specifications list above in the \fIGENERAL SPECIFICATIONS FOR +ASSOCIATION BASED ENTITIES\fP section. + +.SH "LIST/SHOW ACCOUNTS FORMAT OPTIONS" .TP -\fIGrpNodes\fP=<max nodes> -Maximum number of nodes running jobs are able to be allocated in aggregate for -this association and all association which are children of this association. -To clear a previously set value use the modify command with a new value of \-1. +\fIAccount\fP +The name of a bank account. .TP -\fIGrpSubmitJobs\fP=<max jobs> -Maximum number of jobs which can be in a pending or running state at any time -in aggregate for this association and all association which are children of -this association. -To clear a previously set value use the modify command with a new value of \-1. +\fIDescription\fP +An arbitrary string describing an account. .TP -\fIGrpWall\fP=<max wall> -Maximum wall clock time running jobs are able to be allocated in aggregate for -this association and all association which are children of this association. -To clear a previously set value use the modify command with a new value of \-1. +\fIOrganization\fP +Organization to which the account belongs. .TP -\fIMaxCPUMins\fP=<max cpu minutes> -Maximum number of CPU minutes each job is able to use in this account. -This is overridden if set directly on an account or user. -Default is no limit. -To clear a previously set value use the modify command with a new -value of \-1. (NOTE: this limit is not currently enforced in SLURM. -You can still set this, but have to wait for future versions of SLURM -before it is enforced.) - +\fICoordinators\fP +List of users that are a coordinator of the account. (Only filled in +when using the WithCoordinator option.) + +.SH "" +NOTE: If using the WithAssoc option you can also view the information +about the various associations the account may have on all the +clusters in the system. The Association format fields are described +in the \fILIST/SHOW ASSOCIATION FORMAT OPTIONS\fP section. +.RE + + +.SH "SPECIFICATIONS FOR ASSOCIATIONS" + .TP -\fIMaxCPUs\fP=<max cpus> -Maximum number of cpus each job is able to use in this account. -This is overridden if set directly on an account or user. -Default is no limit. -To clear a previously set value use the modify command with a new -value of \-1. (NOTE: this limit is not currently enforced in SLURM. -You can still set this, but have to wait for future versions of SLURM -before it is enforced.) +\fICluster\fP=<comma separated list of cluster names> +List the associations of the cluster(s). .TP -\fIMaxJobs\fP=<max jobs> -Maximum number of jobs each user is allowed to run at one time in this account. -This is overridden if set directly on an account or user. -Default is no limit. -To clear a previously set value use the modify command with a new value of \-1. +\fIAccount\fP=<comma separated list of account names> +List the associations of the account(s). .TP -\fIMaxNodes\fP=<max nodes> -Maximum number of nodes each job is able to use in this account. -This is overridden if set directly on an account or user. -Default is no limit. -To clear a previously set value use the modify command with a new value of \-1. -This is a c\-node limit on BlueGene systems. +\fIUser\fP=<comma separated list of user names> +List the associations of the user(s). .TP -\fIMaxSubmitJobs\fP=<max jobs> -Maximum number of jobs which can this account can have in a pending or running -state at any time. -To clear a previously set value use the modify command with a new value of \-1. +\fIPartition\fP=<comma separated list of partition names> +List the associations of the partition(s). + +.SH "" +NOTE: You can also use the general specifications list above in the +\fIGENERAL SPECIFICATIONS FOR ASSOCIATION BASED ENTITIES\fP section. + +\fBOther options unique for listing associations:\fP .TP -\fIMaxWall\fP=<max wall> -Maximum wall clock time each job is able to use in this account. -This is overridden if set directly on an account or user. -Default is no limit. -<max wall> format is <min> or <min>:<sec> or <hr>:<min>:<sec> or -<days>\-<hr>:<min>:<sec> or <days>\-<hr>. -The value is recorded in minutes with rounding as needed. -To clear a previously set value use the modify command with a new value of \-1. +\fITree\fP +Display account names in a hierarchical fashion. + +.TP +\fIWithDeleted\fP +Display information with previously deleted data. + +.TP +\fIWithSubAccounts\fP +Display information with subaccounts. Only really valuable when used +with the account= option. This will display all the subaccount +associations along with the accounts listed in the option. + +.TP +\fIWOPInfo\fP +Display information without parent information. (i.e. parent id, and +parent account name.) This option also invokes WOPLIMITS. + +.TP +\fIWOPLimits\fP +Display information without hierarchical parent limits. (i.e. will +only display limits where they are set instead of propagating them +from the parent.) + +.RE + + +.SH "SPECIFICATIONS FOR CLUSTERS" .TP \fIName\fP=<name> @@ -458,31 +442,41 @@ The name of a cluster. This should be equal to the \fIClusterName\fR parameter in the \fIslurm.conf\fR configuration file for some Slurm\-managed cluster. +.SH "" +NOTE: You can also use the general specifications list above in the +\fIGENERAL SPECIFICATIONS FOR ASSOCIATION BASED ENTITIES\fP section. + +.RE + +.SH "LIST/SHOW CLUSTER FORMAT OPTIONS" + .TP -\fIQosLevel\fP<operator><comma separated list of qos names> -(For use with MOAB only.) -Specify the default Quality of Service's that jobs are able to run at -for this cluster. To get a list of vaild QOS's use 'sacctmgr list qos'. -This value is overridden if a child has a QOS value directly set. -Setting a QosLevel to '' (two single quotes with nothing between them) -restores it's default setting. You can also use the operator += and -\-= to add or remove certain QOS's from a QOS list. -Valid <operator> values include: -.RS -.TP 5 -\fB=\fR -Set \fIQosLevel\fP to the specified value. +\fICluster\fP +The name of the cluster. + .TP -\fB+=\fR -Add the specified <qos> value to the current \fIQosLevel\fP . +\fIControl Host\fP +When a slurmctld registers with the database the ip address of the +controller is placed here. + .TP -\fB\-=\fR -Remove the specified <qos> value from the current \fIQosLevel\fP. +\fIControl Port\fP +When a slurmctld registers with the database the port the controller +is listening on is placed here. + +.TP +\fIRPC\fP +When a slurmctld registers with the database the rpc version the controller +is running is placed here. + +.SH "" +NOTE: You can also view the information about the root association for +the cluster. The Association format fields are described +in the \fILIST/SHOW ASSOCIATION FORMAT OPTIONS\fP section. .RE -.TP -\fBSPECIFICATIONS FOR COORDINATOR\fR +.SH "SPECIFICATIONS FOR COORDINATOR" .TP \fIAccounts\fP=<comma separated list of account names> Account name to add this user as a coordinator to. @@ -490,81 +484,95 @@ Account name to add this user as a coordinator to. \fINames\fP=<comma separated list of user names> Names of coordinators. +.SH "" +NOTE: To list coordinators use the WithCoordinator options with list +account or list user. +.RE -.TP -\fBSPECIFICATIONS FOR QOS\fR + +.SH "SPECIFICATIONS FOR QOS" .TP \fIDescription\fP=<description> An arbitrary string describing an account. .TP \fINames\fP=<qos> Names of qos. +.RE + + +.SH "SPECIFICATIONS FOR TRANSACTIONS" .TP -\fBSPECIFICATIONS FOR USERS\fR +\fIAccounts\fP=<comma separated list of account names> +Only print out the transactions affecting specified accounts. .TP -\fIAccount\fP=<account> -Account name to add this user to. +\fIAction\fP=<Specific action the list will display> .TP -\fIAdminLevel\fP=<level> -Admin level of user. Valid levels are None, Operator, and Admin. +\fIActor\fP=<Specific name the list will display> .TP -\fICluster\fP=<cluster> -Specific cluster to add user to the account on. Default is all in system. +\fIClusters\fP=<comma separated list of cluster names> +Only print out the transactions affecting specified clusters. .TP -\fIDefaultAccount\fP=<account> -Identify the default bank account name to be used for a job if none is -specified at submission time. +\fIEndTime\fP=<Date and time where list should end> .TP -\fIFairshare\fP=<fairshare> -Number used in conjunction with other users in the same account to -determine job priority. -To clear a previously set value use the modify command with a new value of \-1. +\fIStartTime\fP=<Date and time where list should begin> .TP -\fIMaxCPUMins\fP=<max cpu minutes> -Maximum number of CPU minutes each job is able to use for this user. -To clear a previously set value use the modify command with a new -value of \-1. (NOTE: this limit is not currently enforced in SLURM. -You can still set this, but have to wait for future versions of SLURM -before it is enforced.) +\fIUsers\fP=<comma separated list of user names> +Only print out the transactions affecting specified users. .TP -\fIMaxCPUs\fP=<max cpus> -Maximum number of CPUs each job is able to use for this user. -Default is the account's limit. -To clear a previously set value use the modify command with a new -value of \-1. (NOTE: this limit is not currently enforced in SLURM. -You can still set this, but have to wait for future versions of SLURM -before it is enforced.) +\fIWithAssoc\fP +Get information about which associations were affected by the transactions. +.RE + +.SH "LIST/SHOW TRANSACTIONS FORMAT OPTIONS" .TP -\fIMaxJobs\fP=<max jobs> -Maximum number of jobs each user is allowed to run at one time for this user. -Default is the account's limit. -To clear a previously set value use the modify command with a new value of \-1. +\fIAction\fP .TP -\fIMaxNodes\fP=<max nodes> -Maximum number of nodes this user can allocate in each job using the -account specified. -Default is the account's limit. -This is a c\-node limit on BlueGene systems. +\fIActor\fP .TP -\fIMaxWall\fP=<max wall> -Maximum wall clock time this user can use in each job using the -account specified. -Default is the account's limit. -<max wall> format is <min> or <min>:<sec> or <hr>:<min>:<sec> or -<days>\-<hr>:<min>:<sec> or <days>\-<hr>. -The is recorded in minutes with rounding as needed. -To clear a previously set value use the modify command with a new value of \-1. +\fIInfo\fP + +.TP +\fITimeStamp\fP + +.TP +\fIWhere\fP + +.SH "" +NOTE: If using the WithAssoc option you can also view the information +about the various associations the transaction effected. The +Association format fields are described +in the \fILIST/SHOW ASSOCIATION FORMAT OPTIONS\fP section. +.RE + +.SH "SPECIFICATIONS FOR USERS" + +.TP +\fIAccount\fP=<account> +Account name to add this user to. + +.TP +\fIAdminLevel\fP=<level> +Admin level of user. Valid levels are None, Operator, and Admin. + +.TP +\fICluster\fP=<cluster> +Specific cluster to add user to the account on. Default is all in system. + +.TP +\fIDefaultAccount\fP=<account> +Identify the default bank account name to be used for a job if none is +specified at submission time. .TP \fIName\fP=<name> @@ -574,27 +582,40 @@ Name of user. \fIPartition\fP=<name> Name of SLURM partition these limits apply to. +.SH "" +NOTE: If using the WithAssoc option you can also query against +association specific information to view only certain associations +this account may have. These extra options can be found in the +\fISPECIFICATIONS FOR ASSOCIATIONS\fP section. You can also use the +general specifications list above in the \fIGENERAL SPECIFICATIONS FOR +ASSOCIATION BASED ENTITIES\fP section. +.RE + +.SH "LIST/SHOW USER FORMAT OPTIONS" + .TP -\fIQosLevel\fP<operator><comma separated list of qos names> -(For use with MOAB only.) -Specify the default Quality of Service's that jobs are able to run at -for this user. To get a list of vaild QOS's use 'sacctmgr list qos'. -This value will override it's parents value. -Setting a QosLevel to '' (two single quotes with nothing between them) -restores it's default setting. You can also use the operator += and -\-= to add or remove certain QOS's from a QOS list. -.RS -.TP 5 -\fB=\fR -Set \fIQosLevel\fP to the specified value. +\fIAdminLevel\fP +Admin level of user. + .TP -\fB+=\fR -Add the specified <qos> value to the current \fIQosLevel\fP . +\fIDefaultAccount\fP +An the users default account. + .TP -\fB\-=\fR -Remove the specified <qos> value from the current \fIQosLevel\fP. -.RE +\fICoordinators\fP +List of users that are a coordinator of the account. (Only filled in +when using the WithCoordinator option.) + +.TP +\fIUser\fP +The name of a user. +.SH "" +NOTE: If using the WithAssoc option you can also view the information +about the various associations the user may have on all the +clusters in the system. The Association format fields are described +in the \fILIST/SHOW ASSOCIATION FORMAT OPTIONS\fP section. +.RE .SH "FLAT FILE DUMP AND LOAD" sacctmgr has the capability to load and dump SLURM association data to and @@ -640,60 +661,61 @@ To edit/create a file start with a cluster line for the new cluster Anything included on this line will be the defaults for all associations on this cluster. These options are as follows... .TP -GrpCPUMins= +\fIGrpCPUMins=\fP Maximum number of CPU hours running jobs are able to be allocated in aggregate for this association and all association -which are children of this association. (NOTE: this limit is not +which are children of this association. (NOTE: This limit is not currently enforced in SLURM. You can still set this, but have to wait for future versions of SLURM before it is enforced.) .TP -GrpCPUs= +\fIGrpCPUs=\fP Maximum number of CPUs running jobs are able to be allocated in aggregate for this association and all association which -are children of this association. (NOTE: this limit is not currently +are children of this association. (NOTE: This limit is not currently enforced in SLURM. You can still set this, but have to wait for future versions of SLURM before it is enforced.) .TP -GrpJobs= +\fIGrpJobs=\fP Maximum number of running jobs in aggregate for this association and all association which are children of this association. .TP -GrpNodes= +\fIGrpNodes=\fP Maximum number of nodes running jobs are able to be allocated in aggregate for this association and all association which are children of this association. .TP -GrpSubmitJobs= +\fIGrpSubmitJobs=\fP Maximum number of jobs which can be in a pending or running state at any time in aggregate for this association and all association which are children of this association. .TP -GrpWall= +\fIGrpWall=\fP Maximum wall clock time running jobs are able to be allocated in aggregate for this association and all association which are children of this association. .TP -FairShare= +\fIFairShare=\fP To be used with a scheduler like MOAB to determine priority. .TP -MaxJobs= +\fIMaxJobs=\fP Maximum number of jobs the children of this account can run. .TP -MaxNodesPerJob= +\fIMaxNodesPerJob=\fP Maximum number of nodes per job the children of this account can run. .TP -MaxProcSecondsPerJob= +\fIMaxProcSecondsPerJob=\fP Maximum cpu seconds children of this accounts jobs can run. .TP -MaxWallDurationPerJob= +\fIMaxWallDurationPerJob=\fP Maximum time (not related to job size) children of this accounts jobs can run. .TP -QOS= +\fIQOS=\fP Comma separated list of Quality of Service names (Defined in sacctmgr). .TP Followed by Accounts you want in this fashion... +.na \fBParent\ \-\ root\fP (Defined by default) .br \fBAccount\ \-\ cs\fP:MaxNodesPerJob=5:MaxJobs=4:MaxProcSecondsPerJob=20:FairShare=399:MaxWallDurationPerJob=40:Description='Computer Science':Organization='LC' @@ -701,6 +723,7 @@ Followed by Accounts you want in this fashion... \fBParent\ \-\ cs\fP .br \fBAccount\ \-\ test\fP:MaxNodesPerJob=1:MaxJobs=1:MaxProcSecondsPerJob=1:FairShare=1:MaxWallDurationPerJob=1:Description='Test Account':Organization='Test' +.ad .TP Any of the options after a ':' can be left out and they can be in any order. @@ -709,61 +732,61 @@ BEEN CREATED before the account line in this fashion... .TP All account options are .TP -Description= +\fIDescription=\fP A brief description of the account. .TP -GrpCPUMins= +\fIGrpCPUMins=\fP Maximum number of CPU hours running jobs are able to be allocated in aggregate for this association and all association -which are children of this association. (NOTE: this limit is not +which are children of this association. (NOTE: This limit is not currently enforced in SLURM. You can still set this, but have to wait for future versions of SLURM before it is enforced.) .TP -GrpCPUs= +\fIGrpCPUs=\fP Maximum number of CPUs running jobs are able to be allocated in aggregate for this association and all association which -are children of this association. (NOTE: this limit is not currently +are children of this association. (NOTE: This limit is not currently enforced in SLURM. You can still set this, but have to wait for future versions of SLURM before it is enforced.) .TP -GrpJobs= +\fIGrpJobs=\fP Maximum number of running jobs in aggregate for this association and all association which are children of this association. .TP -GrpNodes= +\fIGrpNodes=\fP Maximum number of nodes running jobs are able to be allocated in aggregate for this association and all association which are children of this association. .TP -GrpSubmitJobs= +\fIGrpSubmitJobs=\fP Maximum number of jobs which can be in a pending or running state at any time in aggregate for this association and all association which are children of this association. .TP -GrpWall= +\fIGrpWall=\fP Maximum wall clock time running jobs are able to be allocated in aggregate for this association and all association which are children of this association. .TP -FairShare= +\fIFairShare=\fP To be used with a scheduler like MOAB to determine priority. .TP -MaxJobs= +\fIMaxJobs=\fP Maximum number of jobs the children of this account can run. .TP -MaxNodesPerJob= +\fIMaxNodesPerJob=\fP Maximum number of nodes per job the children of this account can run. .TP -MaxProcSecondsPerJob= +\fIMaxProcSecondsPerJob=\fP Maximum cpu seconds children of this accounts jobs can run. .TP -MaxWallDurationPerJob= +\fIMaxWallDurationPerJob=\fP Maximum time (not related to job size) children of this accounts jobs can run. .TP -Organization= +\fIOrganization= Name of organization that owns this account. .TP -QOS(=,+=,\-=) +\fI\fIQOS(=,+=,\-=)\fP Comma separated list of Quality of Service names (Defined in sacctmgr). .TP @@ -771,42 +794,44 @@ Comma separated list of Quality of Service names (Defined in sacctmgr). To add users to a account add a line like this after a Parent \- line \fBParent\ \-\ test\fP .br +.na \fBUser\ \-\ adam\fP:MaxNodesPerJob=2:MaxJobs=3:MaxProcSecondsPerJob=4:FairShare=1:MaxWallDurationPerJob=1:AdminLevel=Operator:Coordinator='test' +.ad .TP All user options are .TP -AdminLevel= +\fIAdminLevel=\fP Type of admin this user is (Administrator, Operator) .br \fBMust be defined on the first occurrence of the user.\fP .TP -Coordinator= +\fICoordinator=\fP Comma separated list of accounts this user is coordinator over .br \fBMust be defined on the first occurrence of the user.\fP .TP -DefaultAccount= +\fIDefaultAccount=\fP system wide default account name .br \fBMust be defined on the first occurrence of the user.\fP .TP -FairShare= +\fIFairShare=\fP To be used with a scheduler like MOAB to determine priority. .TP -MaxJobs= +\fIMaxJobs=\fP Maximum number of jobs this user can run. .TP -MaxNodesPerJob= +\fIMaxNodesPerJob=\fP Maximum number of nodes per job this user can run. .TP -MaxProcSecondsPerJob= +\fIMaxProcSecondsPerJob= Maximum cpu seconds this user can run per job. .TP -MaxWallDurationPerJob= +\fIMaxWallDurationPerJob=\fP Maximum time (not related to job size) this user can run. .TP -QOS(=,+=,\-=) +\fIQOS(=,+=,\-=)\fP Comma separated list of Quality of Service names (Defined in sacctmgr). .RE @@ -827,15 +852,18 @@ Comma separated list of Quality of Service names (Defined in sacctmgr). > sacctmgr modify user name=adam cluster=tux account=physics set maxjobs=2 maxtime=30:00 .br -> sacctmgr dump cluster=tux tux_data_file +> sacctmgr list associations cluster=tux format=Account,Cluster,User,Fairshare tree withd +.br +> sacctmgr list transactions StartTime=11/03\-10:30:00 format=Timestamp,Action,Actor +.br> sacctmgr dump cluster=tux tux_data_file .br > sacctmgr load tux_data_file .br .br When modifying an object placing the key words 'set' and the -optional 'where' is crtical to perform correctly below are examples to -produce correct results. As a rule of thumb any thing you put infront +optional 'where' is critical to perform correctly below are examples to +produce correct results. As a rule of thumb any thing you put in front of the set will be used as a quantifier. If you want to put a quantifier after the key word 'set' you should use the key word 'where'. @@ -859,15 +887,15 @@ right> sacctmgr modify user name=adam set fairshare=10 where cluster=tux .br (For use with MOAB only) When changing qos for something only use the '=' operator when wanting -to explitally set the qos to something. In most cases you will want -to use the '+=' or '-=' operator to either add to or remove from the +to explicitly set the qos to something. In most cases you will want +to use the '+=' or '\-=' operator to either add to or remove from the existing qos already in place. .br .br If a user already has qos of normal,standby for a parent or it was explicitly set you should use qos+=expedite to add this to the list in -this fashon. +this fashion. .br .br @@ -876,7 +904,7 @@ this fashon. .br If you are looking to only add the qos expedite to only a certain -accoun and or cluster you can do that by specifing them in the +account and or cluster you can do that by specifing them in the sacctmgr line. .br diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index c4451cfaa..9b53adbcb 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -149,7 +149,7 @@ change directory to \fIpath\fR before beginning execution. .TP \fB\-\-exclusive\fR The job allocation cannot share nodes with other running jobs. This is -the oposite of \-\-shared, whichever option is seen last on the command line +the oposite of \-\-share, whichever option is seen last on the command line will win. (The default shared/exclusive behaviour depends on system configuration.) @@ -510,7 +510,7 @@ Suppress informational messages from salloc. Errors will still be displayed. \fB\-s\fR, \fB\-\-share\fR The job allocation can share nodes with other running jobs. (The default shared/exclusive behaviour depends on system configuration.) -This may result the allocation being granted sooner than if the \-\-shared +This may result the allocation being granted sooner than if the \-\-share option was not set and allow higher system utilization, but application performance will likely suffer due to competition for resources within a node. diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 45290f9ce..563d53fc1 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -146,7 +146,7 @@ See the \fB\-\-input\fR option for filename specification options. .TP \fB\-\-exclusive\fR The job allocation cannot share nodes with other running jobs. This is -the oposite of \-\-shared, whichever option is seen last on the command line +the oposite of \-\-share, whichever option is seen last on the command line will win. (The default shared/exclusive behaviour depends on system configuration.) @@ -587,7 +587,7 @@ behavior on the cluster. \fB\-s\fR, \fB\-\-share\fR The job allocation can share nodes with other running jobs. (The default shared/exclusive behaviour depends on system configuration.) -This may result the allocation being granted sooner than if the \-\-shared +This may result the allocation being granted sooner than if the \-\-share option was not set and allow higher system utilization, but application performance will likely suffer due to competition for resources within a node. diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index 34343bd2e..e384256e5 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -114,7 +114,7 @@ This is the default behavior. .TP \fBnotify\fP \fIjob_id\fP \fImessage\fP -Send a message to standard output of the srun command associated with the +Send a message to standard error of the srun command associated with the specified \fIjob_id\fP. .TP @@ -562,7 +562,7 @@ JobId=65539 UserId=1500 JobState=PENDING TimeLimit=0:20:00 .br scontrol: update JobId=65539 TimeLimit=30:00 Priority=500 .br -scontrol: show hosts tux[1-3] +scontrol: show hostnames tux[1-3] .br tux1 .br diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1 index 146b4dec2..d0c73845f 100644 --- a/doc/man/man1/sinfo.1 +++ b/doc/man/man1/sinfo.1 @@ -473,7 +473,7 @@ debug* up 30:00 0/8/0/8 adev[0-7] Report more complete information about the partition debug: .nf -> sinfo \-\-long \-\-partition=debug +> sinfo --long --partition=debug PARTITION AVAIL TIMELIMIT JOB_SIZE ROOT SHARE GROUPS NODES STATE NODELIST debug* up 30:00 8 no no all 8 idle dev[0-7] .fi diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1 index 9941d0e60..28a453d12 100644 --- a/doc/man/man1/squeue.1 +++ b/doc/man/man1/squeue.1 @@ -50,8 +50,8 @@ Report more of the available information for the selected jobs or job steps, subject to any constraints specified. .TP -\fB\-n <node_name>\fR, \fB\-\-node=<node_name>\fR -Report only on jobs allocated to the specified node. +\fB\-n <hostlist>\fR, \fB\-\-nodes=<hostlist>\fR +Report only on jobs allocated to the specified node or list of nodes. This may either be the \fBNodeName\fR or \fBNodeHostname\fR as defined in \fBslurm.conf(5)\fR in the event that they differ. A node_name of \fBlocalhost\fR is mapped to the current host name. diff --git a/doc/man/man3/slurm_free_job_info_msg.3 b/doc/man/man3/slurm_free_job_info_msg.3 index ce1b8dabd..ff193a185 100644 --- a/doc/man/man3/slurm_free_job_info_msg.3 +++ b/doc/man/man3/slurm_free_job_info_msg.3 @@ -186,7 +186,7 @@ greater than the last time changes where made to that information, new information is not returned. Otherwise all the configuration. job, node, or partition records are returned. .SH "DESCRIPTION" -.JP +.LP \fBslurm_free_resource_allocation_response_msg\fR Free slurm resource allocation response message. .LP diff --git a/doc/man/man3/slurm_slurmd_status.3 b/doc/man/man3/slurm_slurmd_status.3 index 87c4badc1..60ef74bcb 100644 --- a/doc/man/man3/slurm_slurmd_status.3 +++ b/doc/man/man3/slurm_slurmd_status.3 @@ -3,6 +3,7 @@ .SH "NAME" slurm_free_slurmd_status, slurm_load_slurmd_status, slurm_print_slurmd_status +\- Slurmd status functions .SH "SYNTAX" .LP diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index b5afb15d8..61fb4269d 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -338,7 +338,7 @@ each individual node. \fB2\fR Consider the configuration of each node to be that specified in the slurm.conf configuration file and any node with less resources -than configured will not be set DOWN. +than configured will \fBnot\fR be set DOWN. This can be useful for testing purposes. .RE @@ -720,6 +720,9 @@ options may not be supported on some systems): \fBALL\fR All limits listed below .TP +\fBNONE\fR +No limits listed below +.TP \fBAS\fR The maximum address space for a processes .TP @@ -957,7 +960,7 @@ The default value is "root". .TP \fBSlurmctldDebug\fR The level of detail to provide \fBslurmctld\fR daemon's logs. -Values from 0 to 7 are legal, with `0' being "quiet" operation and `7' +Values from 0 to 9 are legal, with `0' being "quiet" operation and `9' being insanely verbose. The default value is 3. @@ -992,7 +995,7 @@ May not exceed 65533. .TP \fBSlurmdDebug\fR The level of detail to provide \fBslurmd\fR daemon's logs. -Values from 0 to 7 are legal, with `0' being "quiet" operation and `7' being +Values from 0 to 9 are legal, with `0' being "quiet" operation and `9' being insanely verbose. The default value is 3. diff --git a/doc/man/man5/slurmdbd.conf.5 b/doc/man/man5/slurmdbd.conf.5 index ab40162d9..b7c695f6a 100644 --- a/doc/man/man5/slurmdbd.conf.5 +++ b/doc/man/man5/slurmdbd.conf.5 @@ -94,8 +94,8 @@ slurm.conf file. .TP \fBDebugLevel\fR The level of detail to provide the Slurm Database Daemon's logs. -Values from 0 to 7 are legal, with `0' being "quiet" operation and -`7' being insanely verbose. +Values from 0 to 9 are legal, with `0' being "quiet" operation and +`9' being insanely verbose. The default value is 3. .TP diff --git a/slurm.spec b/slurm.spec index 7f2f983f3..c94e1ea6a 100644 --- a/slurm.spec +++ b/slurm.spec @@ -71,14 +71,14 @@ %endif Name: slurm -Version: 1.3.9 +Version: 1.3.10 Release: 1%{?dist} Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-1.3.9.tar.bz2 +Source: slurm-1.3.10.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} URL: https://computing.llnl.gov/linux/slurm/ @@ -256,7 +256,7 @@ SLURM process tracking plugin for SGI job containers. ############################################################################# %prep -%setup -n slurm-1.3.9 +%setup -n slurm-1.3.10 %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ diff --git a/src/Makefile.in b/src/Makefile.in index a7366a69a..ead7a79fe 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -182,6 +182,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/api/Makefile.in b/src/api/Makefile.in index 59d5d3dc0..a820b5d8e 100644 --- a/src/api/Makefile.in +++ b/src/api/Makefile.in @@ -230,6 +230,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/common/Makefile.in b/src/common/Makefile.in index e8dddce1a..97f26bb38 100644 --- a/src/common/Makefile.in +++ b/src/common/Makefile.in @@ -277,6 +277,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index 1e4461899..3030490d3 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -147,21 +147,43 @@ static int _local_update_assoc_qos_list(acct_association_rec_t *assoc, static int _set_assoc_parent_and_user(acct_association_rec_t *assoc, List assoc_list) { + static acct_association_rec_t *last_acct_parent = NULL; + static acct_association_rec_t *last_parent = NULL; + if(!assoc || !assoc_list) { error("you didn't give me an association"); return SLURM_ERROR; } if(assoc->parent_id) { - acct_association_rec_t *assoc2 = NULL; - ListIterator itr = list_iterator_create(assoc_list); - while((assoc2 = list_next(itr))) { - if(assoc2->id == assoc->parent_id) { - assoc->parent_assoc_ptr = assoc2; - break; + /* To speed things up we are first looking if we have + a parent_id to look for. If that doesn't work see + if the last parent we had was what we are looking + for. Then if that isn't panning out look at the + last account parent. If still we don't have it we + will look for it in the list. If it isn't there we + will just add it to the parent and call it good + */ + if(last_parent && assoc->parent_id == last_parent->id) { + assoc->parent_assoc_ptr = last_parent; + } else if(last_acct_parent + && assoc->parent_id == last_acct_parent->id) { + assoc->parent_assoc_ptr = last_acct_parent; + } else { + acct_association_rec_t *assoc2 = NULL; + ListIterator itr = list_iterator_create(assoc_list); + while((assoc2 = list_next(itr))) { + if(assoc2->id == assoc->parent_id) { + assoc->parent_assoc_ptr = assoc2; + if(assoc->user) + last_parent = assoc2; + else + last_acct_parent = assoc2; + break; + } } + list_iterator_destroy(itr); } - list_iterator_destroy(itr); } if(assoc->user) { @@ -380,8 +402,12 @@ static int _refresh_local_association_list(void *db_conn, int enforce) curr_itr = list_iterator_create(current_assocs); local_itr = list_iterator_create(local_association_list); - /* add limitss */ + + /* add used limits We only look for the user associations to + * do the parents since a parent may have moved */ while((curr_assoc = list_next(curr_itr))) { + if(!curr_assoc->user) + continue; while((assoc = list_next(local_itr))) { if(assoc->id == curr_assoc->id) break; @@ -389,9 +415,13 @@ static int _refresh_local_association_list(void *db_conn, int enforce) if(!assoc) continue; - assoc->used_jobs = curr_assoc->used_jobs; - assoc->used_submit_jobs = curr_assoc->used_submit_jobs; - assoc->used_shares = curr_assoc->used_shares; + + while(assoc->parent_assoc_ptr) { + assoc->used_jobs += curr_assoc->used_jobs; + assoc->used_submit_jobs += curr_assoc->used_submit_jobs; + assoc->used_shares += curr_assoc->used_shares; + assoc = assoc->parent_assoc_ptr; + } list_iterator_reset(local_itr); } @@ -938,9 +968,35 @@ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update) */ if(parents_changed) { list_iterator_reset(itr); - while((object = list_next(itr))) + while((object = list_next(itr))) { + /* reset the limits because since a parent + changed we could have different usage + */ + if(!object->user) { + object->used_jobs = 0; + object->used_submit_jobs = 0; + object->used_shares = 0; + } _set_assoc_parent_and_user( object, local_association_list); + } + /* Now that we have set up the parents correctly we + can update the used limits + */ + list_iterator_reset(itr); + while((object = list_next(itr))) { + if(!object->user) + continue; + + rec = object; + while(object->parent_assoc_ptr) { + object->used_jobs += rec->used_jobs; + object->used_submit_jobs += + rec->used_submit_jobs; + object->used_shares += rec->used_shares; + object = object->parent_assoc_ptr; + } + } } list_iterator_destroy(itr); diff --git a/src/common/env.c b/src/common/env.c index 44e9216c2..f477bc54d 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -61,6 +61,7 @@ #include "src/common/xstring.h" #include "src/common/node_select.h" #include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_step_layout.h" /* * Define slurm-specific aliases for use by plugins, see slurm_xlator.h @@ -591,7 +592,7 @@ int setup_env(env_t *env) rc = SLURM_FAILURE; } - if (env->task_count + if (env->task_count && setenvf (&env->env, "SLURM_TASKS_PER_NODE", "%s", env->task_count)) { error ("Can't set SLURM_TASKS_PER_NODE env variable"); @@ -779,12 +780,15 @@ extern char *uint32_compressed_to_str(uint32_t array_len, * SLURM_JOBID * SLURM_NNODES * SLURM_NODELIST - * SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node + * SLURM_TASKS_PER_NODE */ void -env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc) +env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, + job_desc_msg_t *desc) { char *bgl_part_id = NULL, *tmp; + slurm_step_layout_t *step_layout = NULL; + uint32_t num_tasks = desc->num_tasks; env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", @@ -796,6 +800,8 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc) alloc->cpus_per_node, alloc->cpu_count_reps); env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); + xfree(tmp); + #ifdef HAVE_AIX /* this puts the "poe" command into batch mode */ env_array_overwrite(dest, "LOADLBATCH", "yes"); @@ -815,8 +821,20 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc) env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", alloc->job_id); env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", alloc->node_cnt); env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", alloc->node_list); + + if(num_tasks == NO_VAL) + num_tasks = desc->num_procs; + step_layout = slurm_step_layout_create(alloc->node_list, + alloc->cpus_per_node, + alloc->cpu_count_reps, + alloc->node_cnt, + num_tasks, + desc->task_dist, + desc->plane_size); + tmp = _uint16_array_to_str(step_layout->node_cnt, + step_layout->tasks); + slurm_step_layout_destroy(step_layout); env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); - xfree(tmp); } @@ -840,7 +858,8 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc) * SLURM_JOBID * SLURM_NNODES * SLURM_NODELIST - * SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node + * SLURM_NPROCS + * SLURM_TASKS_PER_NODE */ extern void env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, @@ -849,6 +868,7 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, char *tmp; uint32_t num_nodes = 0; int i; + slurm_step_layout_t *step_layout = NULL; /* there is no explicit node count in the batch structure, so we need to calculate the node count */ @@ -863,6 +883,7 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, batch->cpus_per_node, batch->cpu_count_reps); env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); + env_array_overwrite_fmt(dest, "ENVIRONMENT", "BATCH"); if (node_name) env_array_overwrite_fmt(dest, "HOSTNAME", "%s", node_name); @@ -875,8 +896,23 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", batch->job_id); env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", num_nodes); env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", batch->nodes); + if(batch->nprocs) { + xfree(tmp); + env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", + batch->nprocs); + step_layout = slurm_step_layout_create(batch->nodes, + batch->cpus_per_node, + batch->cpu_count_reps, + num_nodes, + batch->nprocs, + (uint16_t) + SLURM_DIST_BLOCK, + (uint16_t)NO_VAL); + tmp = _uint16_array_to_str(step_layout->node_cnt, + step_layout->tasks); + slurm_step_layout_destroy(step_layout); + } env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); - xfree(tmp); } @@ -1358,11 +1394,12 @@ static char **_load_env_cache(const char *username) */ char **env_array_user_default(const char *username, int timeout, int mode) { - char *line = NULL, *last = NULL, name[128], *value, *buffer; + char *line = NULL, *last = NULL, name[MAXPATHLEN], *value, *buffer; char **env = NULL; char *starttoken = "XXXXSLURMSTARTPARSINGHEREXXXX"; char *stoptoken = "XXXXSLURMSTOPPARSINGHEREXXXXX"; char cmdstr[256], *env_loc = NULL; + char stepd_path[MAXPATHLEN]; int fildes[2], found, fval, len, rc, timeleft; int buf_read, buf_rem, config_timeout; pid_t child; @@ -1375,6 +1412,8 @@ char **env_array_user_default(const char *username, int timeout, int mode) return NULL; } + snprintf(stepd_path, sizeof(stepd_path), "%s/sbin/slurmstepd", + SLURM_PREFIX); config_timeout = slurm_get_env_timeout(); if (config_timeout == 0) /* just read directly from cache */ return _load_env_cache(username); @@ -1383,7 +1422,10 @@ char **env_array_user_default(const char *username, int timeout, int mode) fatal("Could not locate command: /bin/su"); if (stat("/bin/echo", &buf)) fatal("Could not locate command: /bin/echo"); - if (stat("/bin/env", &buf) == 0) + if (stat(stepd_path, &buf) == 0) { + snprintf(name, sizeof(name), "%s getenv", stepd_path); + env_loc = name; + } else if (stat("/bin/env", &buf) == 0) env_loc = "/bin/env"; else if (stat("/usr/bin/env", &buf) == 0) env_loc = "/usr/bin/env"; diff --git a/src/common/env.h b/src/common/env.h index d47b90cf7..29e097a4d 100644 --- a/src/common/env.h +++ b/src/common/env.h @@ -105,7 +105,8 @@ int setup_env(env_t *env); * ? probably only needed for users... */ void env_array_for_job(char ***dest, - const resource_allocation_response_msg_t *alloc); + const resource_allocation_response_msg_t *alloc, + job_desc_msg_t *desc); /* * Set in "dest" the environment variables relevant to a SLURM batch diff --git a/src/common/hostlist.c b/src/common/hostlist.c index 2b9cf4a1b..43b73b8f9 100644 --- a/src/common/hostlist.c +++ b/src/common/hostlist.c @@ -1,5 +1,5 @@ /*****************************************************************************\ - * $Id: hostlist.c 13672 2008-03-19 23:10:58Z jette $ + * $Id: hostlist.c 15414 2008-10-14 22:38:17Z jette $ ***************************************************************************** * $LSDId: hostlist.c,v 1.14 2003/10/14 20:11:54 grondo Exp $ ***************************************************************************** @@ -1016,19 +1016,48 @@ static hostrange_t hostrange_intersect(hostrange_t h1, hostrange_t h2) */ static int hostrange_hn_within(hostrange_t hr, hostname_t hn) { - int retval = 0; + if (hr->singlehost) { + /* + * If the current hostrange [hr] is a `singlehost' (no valid + * numeric suffix (lo and hi)), then the hostrange [hr] + * stores just one host with name == hr->prefix. + * + * Thus the full hostname in [hn] must match hr->prefix, in + * which case we return true. Otherwise, there is no + * possibility that [hn] matches [hr]. + */ + if (strcmp (hn->hostname, hr->prefix) == 0) + return 1; + else + return 0; + } - if (strcmp(hr->prefix, hn->prefix) == 0) { - if (!hostname_suffix_is_valid(hn)) { - if (hr->singlehost) - retval = 1; - } else if (hn->num <= hr->hi && hn->num >= hr->lo) { + /* + * Now we know [hr] is not a "singlehost", so hostname + * better have a valid numeric suffix, or there is no + * way we can match + */ + if (!hostname_suffix_is_valid (hn)) + return 0; + + /* + * If hostrange and hostname prefixes don't match, then + * there is way the hostname falls within the range [hr]. + */ + if (strcmp(hr->prefix, hn->prefix) != 0) + return 0; + + /* + * Finally, check whether [hn], with a valid numeric suffix, + * falls within the range of [hr]. + */ + if (hn->num <= hr->hi && hn->num >= hr->lo) { int width = hostname_suffix_width(hn); int num = hn->num; - retval = _width_equiv(hr->lo, &hr->width, num, &width); - } + return (_width_equiv(hr->lo, &hr->width, num, &width)); } - return retval; + + return 0; } @@ -2251,6 +2280,7 @@ static void hostlist_coalesce(hostlist_t hl) /* attempt to join ranges at loc and loc-1 in a hostlist */ /* delete duplicates, return the number of hosts deleted */ /* assumes that the hostlist hl has been locked by caller */ +/* returns -1 if no range join occured */ static int _attempt_range_join(hostlist_t hl, int loc) { int ndup; @@ -2290,7 +2320,7 @@ void hostlist_uniq(hostlist_t hl) } -size_t hostlist_deranged_string(hostlist_t hl, size_t n, char *buf) +ssize_t hostlist_deranged_string(hostlist_t hl, size_t n, char *buf) { int i; int len = 0; @@ -2544,7 +2574,7 @@ _test_box(void) } #endif -size_t hostlist_ranged_string(hostlist_t hl, size_t n, char *buf) +ssize_t hostlist_ranged_string(hostlist_t hl, size_t n, char *buf) { int i = 0; int len = 0; @@ -2857,12 +2887,14 @@ void hostset_destroy(hostset_t set) /* inserts a single range object into a hostset * Assumes that the set->hl lock is already held + * Updates hl->nhosts */ static int hostset_insert_range(hostset_t set, hostrange_t hr) { - int i, n = 0; + int i = 0; int inserted = 0; - int retval = 0; + int nhosts = 0; + int ndups = 0; hostlist_t hl; hl = set->hl; @@ -2870,24 +2902,25 @@ static int hostset_insert_range(hostset_t set, hostrange_t hr) if (hl->size == hl->nranges && !hostlist_expand(hl)) return 0; - retval = hostrange_count(hr); + nhosts = hostrange_count(hr); for (i = 0; i < hl->nranges; i++) { if (hostrange_cmp(hr, hl->hr[i]) <= 0) { - n = hostrange_join(hr, hl->hr[i]); - if (n >= 0) { + if ((ndups = hostrange_join(hr, hl->hr[i])) >= 0) hostlist_delete_range(hl, i); - hl->nhosts -= n; - } + else if (ndups < 0) + ndups = 0; hostlist_insert_range(hl, hr, i); /* now attempt to join hr[i] and hr[i-1] */ if (i > 0) { - int m = _attempt_range_join(hl, i); - n += m; + int m; + if ((m = _attempt_range_join(hl, i)) > 0) + ndups += m; } + hl->nhosts += nhosts - ndups; inserted = 1; break; } @@ -2895,10 +2928,17 @@ static int hostset_insert_range(hostset_t set, hostrange_t hr) if (inserted == 0) { hl->hr[hl->nranges++] = hostrange_copy(hr); - n = _attempt_range_join(hl, hl->nranges - 1); + hl->nhosts += nhosts; + if (hl->nranges > 1) { + if ((ndups = _attempt_range_join(hl, hl->nranges - 1)) <= 0) + ndups = 0; + } } - return retval - n; + /* + * Return the number of unique hosts inserted + */ + return nhosts - ndups; } int hostset_insert(hostset_t set, const char *hosts) @@ -2939,6 +2979,30 @@ static int hostset_find_host(hostset_t set, const char *host) return retval; } +int hostset_intersects(hostset_t set, const char *hosts) +{ + int retval = 0; + hostlist_t hl; + char *hostname; + + assert(set->hl->magic == HOSTLIST_MAGIC); + + hl = hostlist_create(hosts); + if (!hl) /* malloc failure */ + return retval; + + while ((hostname = hostlist_pop(hl)) != NULL) { + retval += hostset_find_host(set, hostname); + free(hostname); + if (retval) + break; + } + + hostlist_destroy(hl); + + return retval; +} + int hostset_within(hostset_t set, const char *hosts) { int nhosts, nfound; @@ -2947,7 +3011,8 @@ int hostset_within(hostset_t set, const char *hosts) assert(set->hl->magic == HOSTLIST_MAGIC); - hl = hostlist_create(hosts); + if (!(hl = hostlist_create(hosts))) + return (0); nhosts = hostlist_count(hl); nfound = 0; @@ -2996,12 +3061,12 @@ int hostset_count(hostset_t set) return hostlist_count(set->hl); } -size_t hostset_ranged_string(hostset_t set, size_t n, char *buf) +ssize_t hostset_ranged_string(hostset_t set, size_t n, char *buf) { return hostlist_ranged_string(set->hl, n, buf); } -size_t hostset_deranged_string(hostset_t set, size_t n, char *buf) +ssize_t hostset_deranged_string(hostset_t set, size_t n, char *buf) { return hostlist_deranged_string(set->hl, n, buf); } diff --git a/src/common/hostlist.h b/src/common/hostlist.h index bc4061cc8..4fe69800c 100644 --- a/src/common/hostlist.h +++ b/src/common/hostlist.h @@ -1,5 +1,5 @@ /*****************************************************************************\ - * $Id: hostlist.h 13672 2008-03-19 23:10:58Z jette $ + * $Id: hostlist.h 15445 2008-10-17 16:39:54Z jette $ ***************************************************************************** * $LSDId: hostlist.h,v 1.4 2003/09/19 21:37:34 grondo Exp $ ***************************************************************************** @@ -40,6 +40,8 @@ #ifndef _HOSTLIST_H #define _HOSTLIST_H +#include <unistd.h> /* load ssize_t definition */ + /* max size of internal hostrange buffer */ #define MAXHOSTRANGELEN 8192 @@ -319,7 +321,7 @@ void hostlist_uniq(hostlist_t hl); * hostlist_ranged_string() will write a bracketed hostlist representation * where possible. */ -size_t hostlist_ranged_string(hostlist_t hl, size_t n, char *buf); +ssize_t hostlist_ranged_string(hostlist_t hl, size_t n, char *buf); /* hostlist_deranged_string(): * @@ -330,7 +332,7 @@ size_t hostlist_ranged_string(hostlist_t hl, size_t n, char *buf); * hostlist_deranged_string() will not attempt to write a bracketed * hostlist representation. Every hostname will be explicitly written. */ -size_t hostlist_deranged_string(hostlist_t hl, size_t n, char *buf); +ssize_t hostlist_deranged_string(hostlist_t hl, size_t n, char *buf); /* ----[ hostlist utility functions ]---- */ @@ -433,6 +435,12 @@ int hostset_insert(hostset_t set, const char *hosts); */ int hostset_delete(hostset_t set, const char *hosts); +/* hostset_intersects(): + * Return 1 if any of the hosts specified by "hosts" are within the hostset "set" + * Return 0 if all host in "hosts" is not in the hostset "set" + */ +int hostset_intersects(hostset_t set, const char *hosts); + /* hostset_within(): * Return 1 if all hosts specified by "hosts" are within the hostset "set" * Retrun 0 if every host in "hosts" is not in the hostset "set" @@ -444,6 +452,11 @@ int hostset_within(hostset_t set, const char *hosts); */ char * hostset_shift(hostset_t set); +/* hostset_pop(): + * hostset equivalent to hostlist_pop() + */ +char *hostset_pop(hostset_t set); + /* hostset_shift_range(): * hostset eqivalent to hostlist_shift_range() */ @@ -465,4 +478,9 @@ int hostset_find(hostset_t set, const char *hostname); char * hostset_nth(hostset_t set, int n); +/* hostset_ranged_string(): + * hostset equivelent to hostlist_ranged_string(); + */ +ssize_t hostset_ranged_string(hostset_t set, size_t n, char *buf); + #endif /* !_HOSTLIST_H */ diff --git a/src/common/slurm_accounting_storage.c b/src/common/slurm_accounting_storage.c index 8ce4ae862..fa5facb17 100644 --- a/src/common/slurm_accounting_storage.c +++ b/src/common/slurm_accounting_storage.c @@ -136,7 +136,8 @@ typedef struct slurm_acct_storage_ops { int (*c_get_usage) (void *db_conn, uint32_t uid, void *cluster_rec, time_t start, time_t end); - int (*register_ctld) (char *cluster, uint16_t port); + int (*register_ctld) (void *db_conn, char *cluster, + uint16_t port); int (*job_start) (void *db_conn, char *cluster_name, struct job_record *job_ptr); int (*job_complete) (void *db_conn, @@ -332,6 +333,90 @@ static int _acct_storage_context_destroy(slurm_acct_storage_context_t *c) return SLURM_SUCCESS; } +/* + * Comparator used for sorting immediate childern of acct_hierarchical_recs + * + * returns: -1: assoc_a > assoc_b 0: assoc_a == assoc_b 1: assoc_a < assoc_b + * + */ + +static int _sort_childern_list(acct_hierarchical_rec_t *assoc_a, + acct_hierarchical_rec_t *assoc_b) +{ + int diff = 0; + + /* first just check the lfts and rgts if a lft is inside of the + * others lft and rgt just return it is less + */ + if(assoc_a->assoc->lft > assoc_b->assoc->lft + && assoc_a->assoc->lft < assoc_b->assoc->rgt) + return 1; + + /* check to see if this is a user association or an account. + * We want the accounts at the bottom + */ + if(assoc_a->assoc->user && !assoc_b->assoc->user) + return -1; + else if(!assoc_a->assoc->user && assoc_b->assoc->user) + return 1; + + diff = strcmp(assoc_a->sort_name, assoc_b->sort_name); + + if (diff < 0) + return -1; + else if (diff > 0) + return 1; + + return 0; + +} + +static int _sort_acct_hierarchical_rec_list(List acct_hierarchical_rec_list) +{ + acct_hierarchical_rec_t *acct_hierarchical_rec = NULL; + ListIterator itr; + + if(!list_count(acct_hierarchical_rec_list)) + return SLURM_SUCCESS; + + list_sort(acct_hierarchical_rec_list, (ListCmpF)_sort_childern_list); + + itr = list_iterator_create(acct_hierarchical_rec_list); + while((acct_hierarchical_rec = list_next(itr))) { + if(list_count(acct_hierarchical_rec->childern)) + _sort_acct_hierarchical_rec_list( + acct_hierarchical_rec->childern); + } + list_iterator_destroy(itr); + + return SLURM_SUCCESS; +} + +static int _append_hierarchical_childern_ret_list( + List ret_list, List acct_hierarchical_rec_list) +{ + acct_hierarchical_rec_t *acct_hierarchical_rec = NULL; + ListIterator itr; + + if(!ret_list) + return SLURM_ERROR; + + if(!list_count(acct_hierarchical_rec_list)) + return SLURM_SUCCESS; + + itr = list_iterator_create(acct_hierarchical_rec_list); + while((acct_hierarchical_rec = list_next(itr))) { + list_append(ret_list, acct_hierarchical_rec->assoc); + + if(list_count(acct_hierarchical_rec->childern)) + _append_hierarchical_childern_ret_list( + ret_list, acct_hierarchical_rec->childern); + } + list_iterator_destroy(itr); + + return SLURM_SUCCESS; +} + extern void destroy_acct_user_rec(void *object) { acct_user_rec_t *acct_user = (acct_user_rec_t *)object; @@ -661,6 +746,21 @@ extern void destroy_acct_print_tree(void *object) } } +extern void destroy_acct_hierarchical_rec(void *object) +{ + /* Most of this is pointers to something else that will be + * destroyed elsewhere. + */ + acct_hierarchical_rec_t *acct_hierarchical_rec = + (acct_hierarchical_rec_t *)object; + if(acct_hierarchical_rec) { + if(acct_hierarchical_rec->childern) { + list_destroy(acct_hierarchical_rec->childern); + } + xfree(acct_hierarchical_rec); + } +} + extern void init_acct_association_rec(acct_association_rec_t *assoc) { if(!assoc) @@ -929,7 +1029,7 @@ extern void pack_acct_used_limits(void *in, uint16_t rpc_version, Buf buffer) extern int unpack_acct_used_limits(void **object, uint16_t rpc_version, Buf buffer) { - acct_used_limits_t *object_ptr = xmalloc(sizeof(shares_used_object_t)); + acct_used_limits_t *object_ptr = xmalloc(sizeof(acct_used_limits_t)); *object = (void *)object_ptr; @@ -963,7 +1063,8 @@ extern void pack_update_shares_used(void *in, uint16_t rpc_version, Buf buffer) pack32(object->shares_used, buffer); } -extern int unpack_update_shares_used(void **object, uint16_t rpc_version, Buf buffer) +extern int unpack_update_shares_used(void **object, uint16_t rpc_version, + Buf buffer) { shares_used_object_t *object_ptr = xmalloc(sizeof(shares_used_object_t)); @@ -4160,6 +4261,104 @@ extern acct_admin_level_t str_2_acct_admin_level(char *level) } } +/* This reorders the list into a alphabetical hierarchy returned in a + * separate list. The orginal list is not affected */ +extern List get_hierarchical_sorted_assoc_list(List assoc_list) +{ + List acct_hierarchical_rec_list = + get_acct_hierarchical_rec_list(assoc_list); + List ret_list = list_create(NULL); + + _append_hierarchical_childern_ret_list(ret_list, + acct_hierarchical_rec_list); + list_destroy(acct_hierarchical_rec_list); + + return ret_list; +} + +extern List get_acct_hierarchical_rec_list(List assoc_list) +{ + acct_hierarchical_rec_t *par_arch_rec = NULL; + acct_hierarchical_rec_t *last_acct_parent = NULL; + acct_hierarchical_rec_t *last_parent = NULL; + acct_hierarchical_rec_t *arch_rec = NULL; + acct_association_rec_t *assoc = NULL; + List total_assoc_list = list_create(NULL); + List arch_rec_list = + list_create(destroy_acct_hierarchical_rec); + ListIterator itr, itr2; + + itr = list_iterator_create(assoc_list); + itr2 = list_iterator_create(total_assoc_list); + + while((assoc = list_next(itr))) { + arch_rec = + xmalloc(sizeof(acct_hierarchical_rec_t)); + arch_rec->childern = + list_create(destroy_acct_hierarchical_rec); + arch_rec->assoc = assoc; + + /* To speed things up we are first looking if we have + a parent_id to look for. If that doesn't work see + if the last parent we had was what we are looking + for. Then if that isn't panning out look at the + last account parent. If still we don't have it we + will look for it in the list. If it isn't there we + will just add it to the parent and call it good + */ + if(!assoc->parent_id) { + arch_rec->sort_name = assoc->cluster; + + list_append(arch_rec_list, arch_rec); + list_append(total_assoc_list, arch_rec); + + continue; + } + + if(assoc->user) + arch_rec->sort_name = assoc->user; + else + arch_rec->sort_name = assoc->acct; + + if(last_parent && assoc->parent_id == last_parent->assoc->id) { + par_arch_rec = last_parent; + } else if(last_acct_parent + && assoc->parent_id == last_acct_parent->assoc->id) { + par_arch_rec = last_acct_parent; + } else { + list_iterator_reset(itr2); + while((par_arch_rec = list_next(itr2))) { + if(assoc->parent_id + == par_arch_rec->assoc->id) { + if(assoc->user) + last_parent = par_arch_rec; + else + last_parent + = last_acct_parent + = par_arch_rec; + break; + } + } + } + + if(!par_arch_rec) { + list_append(arch_rec_list, arch_rec); + last_parent = last_acct_parent = arch_rec; + } else + list_append(par_arch_rec->childern, arch_rec); + + list_append(total_assoc_list, arch_rec); + } + list_iterator_destroy(itr); + list_iterator_destroy(itr2); + + list_destroy(total_assoc_list); +// info("got %d", list_count(arch_rec_list)); + _sort_acct_hierarchical_rec_list(arch_rec_list); + + return arch_rec_list; +} + /* IN/OUT: tree_list a list of acct_print_tree_t's */ extern char *get_tree_acct_name(char *name, char *parent, char *cluster, List tree_list) @@ -4167,27 +4366,28 @@ extern char *get_tree_acct_name(char *name, char *parent, char *cluster, ListIterator itr = NULL; acct_print_tree_t *acct_print_tree = NULL; acct_print_tree_t *par_acct_print_tree = NULL; - static char *ret_name = NULL; - static char *last_name = NULL, *last_cluster = NULL; - if(!tree_list) return NULL; itr = list_iterator_create(tree_list); while((acct_print_tree = list_next(itr))) { - if(!strcmp(name, acct_print_tree->name)) { - ret_name = acct_print_tree->print_name; + /* we don't care about users in this list. They are + only there so we don't leak memory */ + if(acct_print_tree->user) + continue; + + if(!strcmp(name, acct_print_tree->name)) break; - } else if(parent && !strcmp(parent, acct_print_tree->name)) { + else if(parent && !strcmp(parent, acct_print_tree->name)) par_acct_print_tree = acct_print_tree; - } + } list_iterator_destroy(itr); if(parent && acct_print_tree) - return ret_name; - + return acct_print_tree->print_name; + acct_print_tree = xmalloc(sizeof(acct_print_tree_t)); acct_print_tree->name = xstrdup(name); if(par_acct_print_tree) @@ -4197,20 +4397,16 @@ extern char *get_tree_acct_name(char *name, char *parent, char *cluster, acct_print_tree->spaces = xstrdup(""); /* user account */ - if(name[0] == '|') + if(name[0] == '|') { acct_print_tree->print_name = xstrdup_printf( "%s%s", acct_print_tree->spaces, parent); - else + acct_print_tree->user = 1; + } else acct_print_tree->print_name = xstrdup_printf( "%s%s", acct_print_tree->spaces, name); - list_append(tree_list, acct_print_tree); - - ret_name = acct_print_tree->print_name; - last_name = name; - last_cluster = cluster; - + return acct_print_tree->print_name; } @@ -4718,11 +4914,13 @@ extern int clusteracct_storage_g_get_usage( (db_conn, uid, cluster_rec, start, end); } -extern int clusteracct_storage_g_register_ctld(char *cluster, uint16_t port) +extern int clusteracct_storage_g_register_ctld( + void *db_conn, char *cluster, uint16_t port) { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; - return (*(g_acct_storage_context->ops.register_ctld))(cluster, port); + return (*(g_acct_storage_context->ops.register_ctld)) + (db_conn, cluster, port); } /* diff --git a/src/common/slurm_accounting_storage.h b/src/common/slurm_accounting_storage.h index dfc7d8fa4..d0f74ac5a 100644 --- a/src/common/slurm_accounting_storage.h +++ b/src/common/slurm_accounting_storage.h @@ -413,8 +413,15 @@ typedef struct { char *name; char *print_name; char *spaces; + uint16_t user; } acct_print_tree_t; +typedef struct { + acct_association_rec_t *assoc; + char *sort_name; + List childern; +} acct_hierarchical_rec_t; + extern void destroy_acct_user_rec(void *object); extern void destroy_acct_account_rec(void *object); extern void destroy_acct_coord_rec(void *object); @@ -437,6 +444,7 @@ extern void destroy_acct_update_object(void *object); extern void destroy_acct_used_limits(void *object); extern void destroy_update_shares_rec(void *object); extern void destroy_acct_print_tree(void *object); +extern void destroy_acct_hierarchical_rec(void *object); extern void init_acct_association_rec(acct_association_rec_t *assoc); extern void init_acct_qos_rec(acct_qos_rec_t *qos); @@ -513,6 +521,12 @@ extern uint32_t str_2_acct_qos(List qos_list, char *level); extern char *acct_admin_level_str(acct_admin_level_t level); extern acct_admin_level_t str_2_acct_admin_level(char *level); +/* The next two functions have pointers to assoc_list so do not + * destroy assoc_list before using the list returned from this function. + */ +extern List get_hierarchical_sorted_assoc_list(List assoc_list); +extern List get_acct_hierarchical_rec_list(List assoc_list); + /* IN/OUT: tree_list a list of acct_print_tree_t's */ extern char *get_tree_acct_name(char *name, char *parent, char *cluster, List tree_list); @@ -813,7 +827,8 @@ extern int clusteracct_storage_g_cluster_procs(void *db_conn, uint32_t procs, time_t event_time); -extern int clusteracct_storage_g_register_ctld(char *cluster, uint16_t port); +extern int clusteracct_storage_g_register_ctld( + void *db_conn, char *cluster, uint16_t port); /* * get info from the storage diff --git a/src/common/slurm_rlimits_info.c b/src/common/slurm_rlimits_info.c index 5dce456b6..b79b872ac 100644 --- a/src/common/slurm_rlimits_info.c +++ b/src/common/slurm_rlimits_info.c @@ -107,6 +107,7 @@ get_slurm_rlimits_info( void ) #define RLIMIT_ "RLIMIT_" #define LEN_RLIMIT_ (sizeof( RLIMIT_ ) - 1) #define RLIMIT_DELIMS ", \t\n" + /* * Parse a comma separated list of RLIMIT names. * @@ -117,21 +118,27 @@ int parse_rlimits( char *rlimits_str, int propagate_flag ) { slurm_rlimits_info_t *rli; /* ptr iterator for rlimits_info[] */ - char *tp; /* token ptr */ - bool found; + char *tp; /* token ptr */ + bool found; + bool propagate_none = false; char *rlimits_str_dup; xassert( rlimits_str ); - if (strcmp( rlimits_str, "ALL" ) == 0) { - /* - * Propagate flag value applies to all rlimits - */ - for (rli = rlimits_info; rli->name; rli++) + if (strcmp(rlimits_str, "NONE") == 0) { + propagate_none = true; + propagate_flag = !propagate_flag; + } + + if (propagate_none || strcmp( rlimits_str, "ALL" ) == 0) { + /* + * Propagate flag value applies to all rlimits + */ + for (rli = rlimits_info; rli->name; rli++) rli->propagate_flag = propagate_flag; rlimits_were_parsed = TRUE; - return( 0 ); - } + return( 0 ); + } /* * Since parse_rlimits may be called multiple times, we @@ -142,39 +149,52 @@ parse_rlimits( char *rlimits_str, int propagate_flag ) for (rli = rlimits_info; rli->name; rli++) rli->propagate_flag = -1; - rlimits_str_dup = xstrdup( rlimits_str ); - if ((tp = strtok( rlimits_str_dup, RLIMIT_DELIMS )) != NULL) { - do { - found = FALSE; - for (rli = rlimits_info; rli->name; rli++) { - /* - * Accept either "RLIMIT_CORE" or "CORE" - */ - if (strncmp( tp, RLIMIT_, LEN_RLIMIT_ ) == 0) - tp += LEN_RLIMIT_; - if (strcmp( tp, rli->name )) - continue; - rli->propagate_flag = propagate_flag; - found = TRUE; - break; - } - if (found == FALSE) { - error( "Bad rlimit name: %s\n", tp ); + rlimits_str_dup = xstrdup( rlimits_str ); + if ((tp = strtok( rlimits_str_dup, RLIMIT_DELIMS )) != NULL) { + do { + found = FALSE; + for (rli = rlimits_info; rli->name; rli++) { + /* + * Accept either "RLIMIT_CORE" or "CORE" + */ + if (strncmp( tp, RLIMIT_, LEN_RLIMIT_ ) == 0) + tp += LEN_RLIMIT_; + if (strcmp( tp, rli->name )) + continue; + rli->propagate_flag = propagate_flag; + found = TRUE; + break; + } + if (found == FALSE) { + error( "Bad rlimit name: %s\n", tp ); xfree( rlimits_str_dup ); - return( -1 ); - } - } while ((tp = strtok( NULL, RLIMIT_DELIMS ))); - } + return( -1 ); + } + } while ((tp = strtok( NULL, RLIMIT_DELIMS ))); + } xfree( rlimits_str_dup ); - /* - * Any rlimits that weren't in the 'rlimits_str' parameter get the + /* + * Any rlimits that weren't in the 'rlimits_str' parameter get the * opposite propagate flag value. - */ + */ for (rli = rlimits_info; rli->name; rli++) if (rli->propagate_flag == -1) - rli->propagate_flag = ( ! propagate_flag ); + rli->propagate_flag = ( ! propagate_flag ); rlimits_were_parsed = TRUE; return( 0 ); } + +extern void print_rlimits(void) +{ + slurm_rlimits_info_t *rli; /* ptr iterator for rlimits_info[] */ + struct rlimit rlp; + + for (rli = rlimits_info; rli->name; rli++) { + if (getrlimit(rli->resource, &rlp) == 0) { + printf("SLURM_RLIMIT_%s=%lu\n", rli->name, + (unsigned long) rlp.rlim_cur); + } + } +} diff --git a/src/common/slurm_rlimits_info.h b/src/common/slurm_rlimits_info.h index 404fbe143..1cffac395 100644 --- a/src/common/slurm_rlimits_info.h +++ b/src/common/slurm_rlimits_info.h @@ -58,5 +58,6 @@ extern slurm_rlimits_info_t *get_slurm_rlimits_info( void ); extern int parse_rlimits( char *rlimits_str, int propagate_flag ); +extern void print_rlimits( void ); #endif /*__SLURM_RLIMITS_INFO_H__*/ diff --git a/src/database/Makefile.in b/src/database/Makefile.in index 6afcf8b7b..e03bb214f 100644 --- a/src/database/Makefile.in +++ b/src/database/Makefile.in @@ -204,6 +204,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/Makefile.in b/src/plugins/Makefile.in index 97b1039e4..bc8895762 100644 --- a/src/plugins/Makefile.in +++ b/src/plugins/Makefile.in @@ -182,6 +182,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/accounting_storage/Makefile.in b/src/plugins/accounting_storage/Makefile.in index c8a98efcc..20d35d8c2 100644 --- a/src/plugins/accounting_storage/Makefile.in +++ b/src/plugins/accounting_storage/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/accounting_storage/filetxt/Makefile.in b/src/plugins/accounting_storage/filetxt/Makefile.in index 672f6b887..c4e2a20be 100644 --- a/src/plugins/accounting_storage/filetxt/Makefile.in +++ b/src/plugins/accounting_storage/filetxt/Makefile.in @@ -205,6 +205,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c index fd8ed1ba2..2a4698362 100644 --- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c +++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c @@ -432,7 +432,8 @@ extern int clusteracct_storage_p_node_up(void *db_conn, return SLURM_SUCCESS; } -extern int clusteracct_storage_p_register_ctld(char *cluster, +extern int clusteracct_storage_p_register_ctld(void *db_conn, + char *cluster, uint16_t port) { return SLURM_SUCCESS; diff --git a/src/plugins/accounting_storage/mysql/Makefile.in b/src/plugins/accounting_storage/mysql/Makefile.in index b3f1531ee..a12bd05c2 100644 --- a/src/plugins/accounting_storage/mysql/Makefile.in +++ b/src/plugins/accounting_storage/mysql/Makefile.in @@ -210,6 +210,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index 6f0616557..98791aa59 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -600,7 +600,36 @@ static int _setup_association_cond_limits(acct_association_cond_t *assoc_cond, if(!assoc_cond) return 0; - if(assoc_cond->with_sub_accts) { + /* we need to check this first so we can update the + with_sub_accts if needed since this the qos_list is a + parent thing + */ + if(assoc_cond->qos_list && list_count(assoc_cond->qos_list)) { + /* we have to do the same thing as with_sub_accts does + first since we are looking for something that is + really most likely a parent thing */ + assoc_cond->with_sub_accts = 1; + prefix = "t2"; + xstrfmtcat(*extra, ", %s as t2 where " + "(t1.lft between t2.lft and t2.rgt) && (", + assoc_table); + set = 0; + itr = list_iterator_create(assoc_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(*extra, " || "); + xstrfmtcat(*extra, + "(%s.qos like '%%,%s' " + "|| %s.qos like '%%,%s,%%' " + "|| %s.delta_qos like '%%,+%s' " + "|| %s.delta_qos like '%%,+%s,%%')", + prefix, object, prefix, object, + prefix, object, prefix, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(*extra, ") &&"); + } else if(assoc_cond->with_sub_accts) { prefix = "t2"; xstrfmtcat(*extra, ", %s as t2 where " "(t1.lft between t2.lft and t2.rgt) &&", @@ -895,26 +924,6 @@ static int _setup_association_cond_limits(acct_association_cond_t *assoc_cond, xstrcat(*extra, ")"); } - if(assoc_cond->qos_list && list_count(assoc_cond->qos_list)) { - set = 0; - xstrcat(*extra, " && ("); - itr = list_iterator_create(assoc_cond->qos_list); - while((object = list_next(itr))) { - if(set) - xstrcat(*extra, " || "); - xstrfmtcat(*extra, - "(%s.qos like '%%,%s' " - "|| %s.qos like '%%,%s,%%' " - "|| %s.delta_qos like '%%,+%s' " - "|| %s.delta_qos like '%%,+%s,%%')", - prefix, object, prefix, object, - prefix, object, prefix, object); - set = 1; - } - list_iterator_destroy(itr); - xstrcat(*extra, ")"); - } - if(assoc_cond->parent_acct_list && list_count(assoc_cond->parent_acct_list)) { set = 0; @@ -932,6 +941,41 @@ static int _setup_association_cond_limits(acct_association_cond_t *assoc_cond, } return set; } + +static uint32_t _get_parent_id( + mysql_conn_t *mysql_conn, char *parent, char *cluster) +{ + uint32_t parent_id = 0; + MYSQL_RES *result = NULL; + MYSQL_ROW row; + char *query = NULL; + + xassert(parent); + xassert(cluster); + + query = xstrdup_printf("select id from %s where user='' " + "and deleted = 0 and acct=\"%s\" " + "and cluster=\"%s\";", + assoc_table, parent, cluster); + debug4("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + + if(!(result = mysql_db_query_ret(mysql_conn->db_conn, query, 1))) { + xfree(query); + return 0; + } + xfree(query); + + if((row = mysql_fetch_row(result))) { + if(row[0]) + parent_id = atoi(row[0]); + } else + error("no association for parent %s on cluster %s", + parent, cluster); + mysql_free_result(result); + + return parent_id; +} + /* This function will take the object given and free it later so it * needed to be removed from a list if in one before */ @@ -2590,9 +2634,15 @@ extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit) } xfree(query); while((row = mysql_fetch_row(result))) { + msg.rpc_version = atoi(row[3]); + if(msg.rpc_version > SLURMDBD_VERSION) { + error("%s at %s(%s) ver %s > %u, can't update", + row[2], row[0], row[1], row[3], + SLURMDBD_VERSION); + continue; + } debug("sending updates to %s at %s(%s) ver %s", row[2], row[0], row[1], row[3]); - msg.rpc_version = atoi(row[3]); slurm_msg_t_init(&req); slurm_set_addr_char(&req.address, atoi(row[1]), row[0]); req.msg_type = ACCOUNTING_UPDATE_MSG; @@ -3167,12 +3217,13 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, char *user_name = NULL; char *tmp_char = NULL; int assoc_id = 0; - int incr = 0, my_left = 0; + int incr = 0, my_left = 0, my_par_id = 0; int affect_rows = 0; int moved_parent = 0; MYSQL_RES *result = NULL; MYSQL_ROW row; char *old_parent = NULL, *old_cluster = NULL; + char *last_parent = NULL, *last_cluster = NULL; char *massoc_req_inx[] = { "id", "parent_acct", @@ -3361,10 +3412,8 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, my_left = atoi(row[0]); mysql_free_result(sel_result); //info("left is %d", my_left); - xfree(old_parent); - xfree(old_cluster); - old_parent = xstrdup(parent); - old_cluster = xstrdup(object->cluster); + old_parent = parent; + old_cluster = object->cluster; incr = 0; } incr += 2; @@ -3461,6 +3510,26 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, object->id = assoc_id; + + /* get the parent id only if we haven't moved the + * parent since we get the total list if that has + * happened */ + if(!moved_parent && + (!last_parent || !last_cluster + || strcmp(parent, last_parent) + || strcmp(object->cluster, last_cluster))) { + uint32_t tmp32 = 0; + if((tmp32 = _get_parent_id(mysql_conn, + parent, + object->cluster))) { + my_par_id = tmp32; + + last_parent = parent; + last_cluster = object->cluster; + } + } + object->parent_id = my_par_id; + if(_addto_update_list(mysql_conn->update_list, ACCT_ADD_ASSOC, object) == SLURM_SUCCESS) { list_remove(itr); @@ -3516,9 +3585,6 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, end_it: - xfree(old_parent); - xfree(old_cluster); - if(rc != SLURM_ERROR) { if(txn_query) { xstrcat(txn_query, ";"); @@ -6553,6 +6619,7 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, acct_association_cond_t *assoc_cond) { #ifdef HAVE_MYSQL + //DEF_TIMERS; char *query = NULL; char *extra = NULL; char *tmp = NULL; @@ -6569,14 +6636,12 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, uint32_t parent_mnpj = INFINITE; uint32_t parent_mwpj = INFINITE; uint64_t parent_mcmpj = INFINITE; + char *parent_acct = NULL; char *parent_qos = NULL; char *parent_delta_qos = NULL; char *last_acct = NULL; - char *last_acct_parent = NULL; char *last_cluster = NULL; - char *last_cluster2 = NULL; - uint32_t user_parent_id = 0; - uint32_t acct_parent_id = 0; + uint32_t parent_id = 0; uint16_t private_data = 0; acct_user_rec_t user; @@ -6692,6 +6757,7 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, with_usage = assoc_cond->with_usage; without_parent_limits = assoc_cond->without_parent_limits; without_parent_info = assoc_cond->without_parent_info; + empty: xfree(tmp); xstrfmtcat(tmp, "t1.%s", assoc_req_inx[i]); @@ -6741,8 +6807,9 @@ empty: mysql_free_result(result); } + //START_TIMER; query = xstrdup_printf("select distinct %s from %s as t1%s " - "order by lft;", + "order by cluster,lft;", tmp, assoc_table, extra); xfree(tmp); xfree(extra); @@ -6761,7 +6828,7 @@ empty: xmalloc(sizeof(acct_association_rec_t)); MYSQL_RES *result2 = NULL; MYSQL_ROW row2; - + list_append(assoc_list, assoc); assoc->id = atoi(row[ASSOC_REQ_ID]); @@ -6808,41 +6875,11 @@ empty: assoc_cond->usage_start, assoc_cond->usage_end); } - + parent_acct = row[ASSOC_REQ_ACCT]; if(!without_parent_info && row[ASSOC_REQ_PARENT][0]) { -/* info("got %s?=%s and %s?=%s", */ -/* row[ASSOC_REQ_PARENT], last_acct_parent, */ -/* row[ASSOC_REQ_CLUSTER], last_cluster); */ - if(!last_acct_parent || !last_cluster - || strcmp(row[ASSOC_REQ_PARENT], last_acct_parent) - || strcmp(row[ASSOC_REQ_CLUSTER], last_cluster)) { - query = xstrdup_printf( - "select id from %s where user='' " - "and deleted = 0 and acct=\"%s\" " - "and cluster=\"%s\";", - assoc_table, row[ASSOC_REQ_PARENT], - row[ASSOC_REQ_CLUSTER]); - debug4("%d(%d) query\n%s", - mysql_conn->conn, __LINE__, query); - - if(!(result2 = mysql_db_query_ret( - mysql_conn->db_conn, - query, 1))) { - xfree(query); - break; - } - xfree(query); - if((row2 = mysql_fetch_row(result2))) { - last_acct_parent = - row[ASSOC_REQ_PARENT]; - last_cluster = row[ASSOC_REQ_CLUSTER]; - acct_parent_id = atoi(row2[0]); - } - mysql_free_result(result2); - } assoc->parent_acct = xstrdup(row[ASSOC_REQ_PARENT]); - assoc->parent_id = acct_parent_id; + parent_acct = row[ASSOC_REQ_PARENT]; } if(row[ASSOC_REQ_PART][0]) @@ -6852,15 +6889,16 @@ empty: else assoc->fairshare = 1; - if((!last_acct || !last_cluster2 - || strcmp(row[ASSOC_REQ_ACCT], last_acct) - || strcmp(row[ASSOC_REQ_CLUSTER], last_cluster2))) { + if(!without_parent_info && + (!last_acct || !last_cluster + || strcmp(parent_acct, last_acct) + || strcmp(row[ASSOC_REQ_CLUSTER], last_cluster))) { query = xstrdup_printf( "call get_parent_limits(\"%s\", " "\"%s\", \"%s\", %u);" "select @par_id, @mj, @msj, @mcpj, " "@mnpj, @mwpj, @mcmpj, @qos, @delta_qos;", - assoc_table, row[ASSOC_REQ_ACCT], + assoc_table, parent_acct, row[ASSOC_REQ_CLUSTER], without_parent_limits); debug4("%d(%d) query\n%s", @@ -6873,11 +6911,11 @@ empty: xfree(query); if(!(row2 = mysql_fetch_row(result2))) { - user_parent_id = 0; + parent_id = 0; goto no_parent_limits; } - user_parent_id = atoi(row2[ASSOC2_REQ_PARENT_ID]); + parent_id = atoi(row2[ASSOC2_REQ_PARENT_ID]); if(!without_parent_limits) { if(row2[ASSOC2_REQ_MCMPJ]) parent_mcmpj = @@ -6933,8 +6971,8 @@ empty: else parent_msj = INFINITE; } - last_acct = row[ASSOC_REQ_ACCT]; - last_cluster2 = row[ASSOC_REQ_CLUSTER]; + last_acct = parent_acct; + last_cluster = row[ASSOC_REQ_CLUSTER]; no_parent_limits: mysql_free_result(result2); } @@ -6979,23 +7017,29 @@ empty: * and a delta qos so if you have a qos don't worry * about the delta. */ + if(row[ASSOC_REQ_QOS][0]) slurm_addto_char_list(assoc->qos_list, row[ASSOC_REQ_QOS]+1); else { - if(parent_qos) - slurm_addto_char_list(assoc->qos_list, - parent_qos+1); /* if qos is set on the association itself do not worry about the deltas */ + + /* add the parents first */ + if(parent_qos) + slurm_addto_char_list(assoc->qos_list, + parent_qos+1); + + /* then add the parents delta */ + if(parent_delta_qos) + slurm_addto_char_list(delta_qos_list, + parent_delta_qos+1); + /* now add the associations */ if(row[ASSOC_REQ_DELTA_QOS][0]) slurm_addto_char_list( delta_qos_list, row[ASSOC_REQ_DELTA_QOS]+1); - if(parent_delta_qos) - slurm_addto_char_list(delta_qos_list, - parent_delta_qos+1); } /* Sometimes we want to see exactly what is here in @@ -7004,6 +7048,7 @@ empty: */ if(with_raw_qos && list_count(delta_qos_list)) { list_transfer(assoc->qos_list, delta_qos_list); + list_flush(delta_qos_list); } else if(list_count(delta_qos_list)) { ListIterator curr_qos_itr = list_iterator_create(assoc->qos_list); @@ -7031,21 +7076,20 @@ empty: break; } } - list_iterator_reset(curr_qos_itr); - if(!curr_qos) + if(!curr_qos) { list_append(assoc->qos_list, xstrdup(new_qos+1)); + } + list_iterator_reset(curr_qos_itr); } } - + list_iterator_destroy(new_qos_itr); list_iterator_destroy(curr_qos_itr); list_flush(delta_qos_list); } - /* don't do this unless this is an user association */ - if(assoc->user && assoc->parent_id != acct_parent_id) - assoc->parent_id = user_parent_id; + assoc->parent_id = parent_id; //info("parent id is %d", assoc->parent_id); //log_assoc_rec(assoc); @@ -7056,7 +7100,7 @@ empty: xfree(parent_delta_qos); xfree(parent_qos); - + //END_TIMER2("get_associations"); return assoc_list; #else return NULL; @@ -8205,10 +8249,57 @@ extern int clusteracct_storage_p_node_up(mysql_conn_t *mysql_conn, #endif } -extern int clusteracct_storage_p_register_ctld(char *cluster, +/* This is only called when not running from the slurmdbd so we can + * assumes some things like rpc_version. + */ +extern int clusteracct_storage_p_register_ctld(mysql_conn_t *mysql_conn, + char *cluster, uint16_t port) { - return SLURM_SUCCESS; +#ifdef HAVE_MYSQL + char *query = NULL; + char *address = NULL; + char hostname[255]; + time_t now = time(NULL); + + if(slurmdbd_conf) + fatal("clusteracct_storage_g_register_ctld " + "should never be called from the slurmdbd."); + + if(_check_connection(mysql_conn) != SLURM_SUCCESS) + return SLURM_ERROR; + + info("Registering slurmctld for cluster %s at port %u in database.", + cluster, port); + gethostname(hostname, sizeof(hostname)); + + /* check if we are running on the backup controller */ + if(slurmctld_conf.backup_controller + && !strcmp(slurmctld_conf.backup_controller, hostname)) { + address = slurmctld_conf.backup_addr; + } else + address = slurmctld_conf.control_addr; + + query = xstrdup_printf( + "update %s set deleted=0, mod_time=%d, " + "control_host='%s', control_port=%u, rpc_version=%d " + "where name='%s';", + cluster_table, now, address, port, SLURMDBD_VERSION, cluster); + xstrfmtcat(query, + "insert into %s " + "(timestamp, action, name, actor, info) " + "values (%d, %d, \"%s\", \"%s\", \"%s %u\");", + txn_table, + now, DBD_MODIFY_CLUSTERS, cluster, + slurmctld_conf.slurm_user_name, address, port); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + + return mysql_db_query(mysql_conn->db_conn, query); + +#else + return SLURM_ERROR; +#endif } extern int clusteracct_storage_p_cluster_procs(mysql_conn_t *mysql_conn, diff --git a/src/plugins/accounting_storage/none/Makefile.in b/src/plugins/accounting_storage/none/Makefile.in index 4d62249e9..38bb59ace 100644 --- a/src/plugins/accounting_storage/none/Makefile.in +++ b/src/plugins/accounting_storage/none/Makefile.in @@ -204,6 +204,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/accounting_storage/none/accounting_storage_none.c b/src/plugins/accounting_storage/none/accounting_storage_none.c index ab95986c8..b80c2d37d 100644 --- a/src/plugins/accounting_storage/none/accounting_storage_none.c +++ b/src/plugins/accounting_storage/none/accounting_storage_none.c @@ -279,7 +279,8 @@ extern int clusteracct_storage_p_node_up(void *db_conn, return SLURM_SUCCESS; } -extern int clusteracct_storage_p_register_ctld(char *cluster, +extern int clusteracct_storage_p_register_ctld(void *db_conn, + char *cluster, uint16_t port) { return SLURM_SUCCESS; diff --git a/src/plugins/accounting_storage/pgsql/Makefile.in b/src/plugins/accounting_storage/pgsql/Makefile.in index 7bb7cda71..e9ec8ea92 100644 --- a/src/plugins/accounting_storage/pgsql/Makefile.in +++ b/src/plugins/accounting_storage/pgsql/Makefile.in @@ -209,6 +209,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c index 35c4d74f9..074164b67 100644 --- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c +++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c @@ -1033,7 +1033,8 @@ extern int clusteracct_storage_p_node_up(PGconn *acct_pgsql_db, #endif } -extern int clusteracct_storage_p_register_ctld(char *cluster, +extern int clusteracct_storage_p_register_ctld(PGconn *acct_pgsql_db, + char *cluster, uint16_t port) { return SLURM_SUCCESS; diff --git a/src/plugins/accounting_storage/slurmdbd/Makefile.in b/src/plugins/accounting_storage/slurmdbd/Makefile.in index 66483726f..964f57db2 100644 --- a/src/plugins/accounting_storage/slurmdbd/Makefile.in +++ b/src/plugins/accounting_storage/slurmdbd/Makefile.in @@ -205,6 +205,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c index 38916d9b2..00dc68ae0 100644 --- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c +++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c @@ -1115,7 +1115,8 @@ extern int clusteracct_storage_p_cluster_procs(void *db_conn, return SLURM_SUCCESS; } -extern int clusteracct_storage_p_register_ctld(char *cluster, +extern int clusteracct_storage_p_register_ctld(void *db_conn, + char *cluster, uint16_t port) { slurmdbd_msg_t msg; diff --git a/src/plugins/auth/Makefile.in b/src/plugins/auth/Makefile.in index c4610851e..493144ceb 100644 --- a/src/plugins/auth/Makefile.in +++ b/src/plugins/auth/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/auth/authd/Makefile.in b/src/plugins/auth/authd/Makefile.in index 8c37e3d26..088eca683 100644 --- a/src/plugins/auth/authd/Makefile.in +++ b/src/plugins/auth/authd/Makefile.in @@ -203,6 +203,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/auth/munge/Makefile.in b/src/plugins/auth/munge/Makefile.in index 850d9f69e..7d1ec2e57 100644 --- a/src/plugins/auth/munge/Makefile.in +++ b/src/plugins/auth/munge/Makefile.in @@ -204,6 +204,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/auth/none/Makefile.in b/src/plugins/auth/none/Makefile.in index 8ef4b2079..80c72fe0c 100644 --- a/src/plugins/auth/none/Makefile.in +++ b/src/plugins/auth/none/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/checkpoint/Makefile.in b/src/plugins/checkpoint/Makefile.in index 6a27134d7..dead96c38 100644 --- a/src/plugins/checkpoint/Makefile.in +++ b/src/plugins/checkpoint/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/checkpoint/aix/Makefile.in b/src/plugins/checkpoint/aix/Makefile.in index 73c445d25..4015bb920 100644 --- a/src/plugins/checkpoint/aix/Makefile.in +++ b/src/plugins/checkpoint/aix/Makefile.in @@ -207,6 +207,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/checkpoint/none/Makefile.in b/src/plugins/checkpoint/none/Makefile.in index af3c69428..cad3af7ab 100644 --- a/src/plugins/checkpoint/none/Makefile.in +++ b/src/plugins/checkpoint/none/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/checkpoint/ompi/Makefile.in b/src/plugins/checkpoint/ompi/Makefile.in index d2a48ab72..0e2c3e196 100644 --- a/src/plugins/checkpoint/ompi/Makefile.in +++ b/src/plugins/checkpoint/ompi/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/checkpoint/xlch/Makefile.in b/src/plugins/checkpoint/xlch/Makefile.in index a873989cc..42cd308ea 100644 --- a/src/plugins/checkpoint/xlch/Makefile.in +++ b/src/plugins/checkpoint/xlch/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/crypto/Makefile.in b/src/plugins/crypto/Makefile.in index 87447b847..0ef0f9092 100644 --- a/src/plugins/crypto/Makefile.in +++ b/src/plugins/crypto/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/crypto/munge/Makefile.in b/src/plugins/crypto/munge/Makefile.in index 16c4f449f..89ed70b0c 100644 --- a/src/plugins/crypto/munge/Makefile.in +++ b/src/plugins/crypto/munge/Makefile.in @@ -204,6 +204,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/crypto/openssl/Makefile.in b/src/plugins/crypto/openssl/Makefile.in index 6c06edf1c..81b74f2f7 100644 --- a/src/plugins/crypto/openssl/Makefile.in +++ b/src/plugins/crypto/openssl/Makefile.in @@ -209,6 +209,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/jobacct_gather/Makefile.in b/src/plugins/jobacct_gather/Makefile.in index 81976b36f..db8074826 100644 --- a/src/plugins/jobacct_gather/Makefile.in +++ b/src/plugins/jobacct_gather/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/jobacct_gather/aix/Makefile.in b/src/plugins/jobacct_gather/aix/Makefile.in index 2e74f286e..a10ba903c 100644 --- a/src/plugins/jobacct_gather/aix/Makefile.in +++ b/src/plugins/jobacct_gather/aix/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/jobacct_gather/linux/Makefile.in b/src/plugins/jobacct_gather/linux/Makefile.in index bb7a009b9..552c0de01 100644 --- a/src/plugins/jobacct_gather/linux/Makefile.in +++ b/src/plugins/jobacct_gather/linux/Makefile.in @@ -204,6 +204,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/jobacct_gather/none/Makefile.in b/src/plugins/jobacct_gather/none/Makefile.in index 00d16a399..d068842ff 100644 --- a/src/plugins/jobacct_gather/none/Makefile.in +++ b/src/plugins/jobacct_gather/none/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/jobcomp/Makefile.in b/src/plugins/jobcomp/Makefile.in index 70c8f3dbe..ed937b96b 100644 --- a/src/plugins/jobcomp/Makefile.in +++ b/src/plugins/jobcomp/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/jobcomp/filetxt/Makefile.in b/src/plugins/jobcomp/filetxt/Makefile.in index 860015aaa..66f53ffec 100644 --- a/src/plugins/jobcomp/filetxt/Makefile.in +++ b/src/plugins/jobcomp/filetxt/Makefile.in @@ -203,6 +203,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/jobcomp/mysql/Makefile.in b/src/plugins/jobcomp/mysql/Makefile.in index bddbb69dd..e90c0e90b 100644 --- a/src/plugins/jobcomp/mysql/Makefile.in +++ b/src/plugins/jobcomp/mysql/Makefile.in @@ -203,6 +203,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/jobcomp/none/Makefile.in b/src/plugins/jobcomp/none/Makefile.in index 5d4d646c0..1de2df8c7 100644 --- a/src/plugins/jobcomp/none/Makefile.in +++ b/src/plugins/jobcomp/none/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/jobcomp/pgsql/Makefile.in b/src/plugins/jobcomp/pgsql/Makefile.in index 71612054e..571ec965b 100644 --- a/src/plugins/jobcomp/pgsql/Makefile.in +++ b/src/plugins/jobcomp/pgsql/Makefile.in @@ -203,6 +203,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/jobcomp/script/Makefile.in b/src/plugins/jobcomp/script/Makefile.in index 2463aab8b..3c9136ac9 100644 --- a/src/plugins/jobcomp/script/Makefile.in +++ b/src/plugins/jobcomp/script/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/mpi/Makefile.in b/src/plugins/mpi/Makefile.in index 5bed871f5..8a92ad404 100644 --- a/src/plugins/mpi/Makefile.in +++ b/src/plugins/mpi/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/mpi/lam/Makefile.in b/src/plugins/mpi/lam/Makefile.in index 554b35b32..0f003b858 100644 --- a/src/plugins/mpi/lam/Makefile.in +++ b/src/plugins/mpi/lam/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/mpi/mpich1_p4/Makefile.in b/src/plugins/mpi/mpich1_p4/Makefile.in index 3d9ef1ecc..7b9a0d5c9 100644 --- a/src/plugins/mpi/mpich1_p4/Makefile.in +++ b/src/plugins/mpi/mpich1_p4/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/mpi/mpich1_shmem/Makefile.in b/src/plugins/mpi/mpich1_shmem/Makefile.in index 8f832878a..9dffc9d58 100644 --- a/src/plugins/mpi/mpich1_shmem/Makefile.in +++ b/src/plugins/mpi/mpich1_shmem/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/mpi/mpichgm/Makefile.in b/src/plugins/mpi/mpichgm/Makefile.in index 23a04991e..c5004cdd2 100644 --- a/src/plugins/mpi/mpichgm/Makefile.in +++ b/src/plugins/mpi/mpichgm/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/mpi/mpichmx/Makefile.in b/src/plugins/mpi/mpichmx/Makefile.in index 05febabcd..d40cf452a 100644 --- a/src/plugins/mpi/mpichmx/Makefile.in +++ b/src/plugins/mpi/mpichmx/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/mpi/mvapich/Makefile.in b/src/plugins/mpi/mvapich/Makefile.in index 0bcd876e3..f838a8325 100644 --- a/src/plugins/mpi/mvapich/Makefile.in +++ b/src/plugins/mpi/mvapich/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/mpi/none/Makefile.in b/src/plugins/mpi/none/Makefile.in index b4ef574dd..77b5324f9 100644 --- a/src/plugins/mpi/none/Makefile.in +++ b/src/plugins/mpi/none/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/mpi/openmpi/Makefile.in b/src/plugins/mpi/openmpi/Makefile.in index e156ebc2f..127621bf1 100644 --- a/src/plugins/mpi/openmpi/Makefile.in +++ b/src/plugins/mpi/openmpi/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/proctrack/Makefile.in b/src/plugins/proctrack/Makefile.in index 02e355b88..a78faad3c 100644 --- a/src/plugins/proctrack/Makefile.in +++ b/src/plugins/proctrack/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/proctrack/aix/Makefile.in b/src/plugins/proctrack/aix/Makefile.in index 599c8f658..449862c50 100644 --- a/src/plugins/proctrack/aix/Makefile.in +++ b/src/plugins/proctrack/aix/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/proctrack/linuxproc/Makefile.in b/src/plugins/proctrack/linuxproc/Makefile.in index 2034e44b7..851cc9962 100644 --- a/src/plugins/proctrack/linuxproc/Makefile.in +++ b/src/plugins/proctrack/linuxproc/Makefile.in @@ -203,6 +203,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/proctrack/pgid/Makefile.in b/src/plugins/proctrack/pgid/Makefile.in index f8a41c911..b44deb356 100644 --- a/src/plugins/proctrack/pgid/Makefile.in +++ b/src/plugins/proctrack/pgid/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/proctrack/rms/Makefile.in b/src/plugins/proctrack/rms/Makefile.in index 4e2607fa9..97b5d4174 100644 --- a/src/plugins/proctrack/rms/Makefile.in +++ b/src/plugins/proctrack/rms/Makefile.in @@ -203,6 +203,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/proctrack/sgi_job/Makefile.in b/src/plugins/proctrack/sgi_job/Makefile.in index 6ad660d31..89035d60e 100644 --- a/src/plugins/proctrack/sgi_job/Makefile.in +++ b/src/plugins/proctrack/sgi_job/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/sched/Makefile.in b/src/plugins/sched/Makefile.in index f91deddde..2440d811d 100644 --- a/src/plugins/sched/Makefile.in +++ b/src/plugins/sched/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/sched/backfill/Makefile.in b/src/plugins/sched/backfill/Makefile.in index cd1bb54d8..fc91512c9 100644 --- a/src/plugins/sched/backfill/Makefile.in +++ b/src/plugins/sched/backfill/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/sched/builtin/Makefile.in b/src/plugins/sched/builtin/Makefile.in index f157306dc..27a7d63a7 100644 --- a/src/plugins/sched/builtin/Makefile.in +++ b/src/plugins/sched/builtin/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/sched/gang/Makefile.in b/src/plugins/sched/gang/Makefile.in index a71c67159..ccc5c3195 100644 --- a/src/plugins/sched/gang/Makefile.in +++ b/src/plugins/sched/gang/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/sched/hold/Makefile.in b/src/plugins/sched/hold/Makefile.in index 4f1130aa2..453378553 100644 --- a/src/plugins/sched/hold/Makefile.in +++ b/src/plugins/sched/hold/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/sched/wiki/Makefile.in b/src/plugins/sched/wiki/Makefile.in index 7efbdab57..a29baf0c8 100644 --- a/src/plugins/sched/wiki/Makefile.in +++ b/src/plugins/sched/wiki/Makefile.in @@ -204,6 +204,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/sched/wiki2/Makefile.in b/src/plugins/sched/wiki2/Makefile.in index fdd07feb3..aa31a9954 100644 --- a/src/plugins/sched/wiki2/Makefile.in +++ b/src/plugins/sched/wiki2/Makefile.in @@ -207,6 +207,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/sched/wiki2/get_jobs.c b/src/plugins/sched/wiki2/get_jobs.c index 709b572ff..b878fbad6 100644 --- a/src/plugins/sched/wiki2/get_jobs.c +++ b/src/plugins/sched/wiki2/get_jobs.c @@ -55,6 +55,7 @@ static uint32_t _get_job_end_time(struct job_record *job_ptr); static char * _get_job_features(struct job_record *job_ptr); static uint32_t _get_job_min_disk(struct job_record *job_ptr); static uint32_t _get_job_min_mem(struct job_record *job_ptr); +static uint32_t _get_job_max_nodes(struct job_record *job_ptr); static uint32_t _get_job_min_nodes(struct job_record *job_ptr); static char * _get_job_state(struct job_record *job_ptr); static uint32_t _get_job_submit_time(struct job_record *job_ptr); @@ -92,13 +93,14 @@ reject_msg_t reject_msgs[REJECT_MSG_MAX]; * NOTE: OR operator not supported * [HOSTLIST=<node1:node2>;] list of required nodes, if any * [STARTDATE=<uts>;] earliest start time, if any + * [MAXNODES=<nodes>;] maximum number of nodes, 0 if no limit * [TASKLIST=<node1:node2>;] nodes in use, if running or completing * [REJMESSAGE=<str>;] reason job is not running, if any * UPDATETIME=<uts>; time last active * [FLAGS=INTERACTIVE;] set if interactive (not batch) job * WCLIMIT=<secs>; wall clock time limit, seconds * TASKS=<cpus>; CPUs required - * NODES=<nodes>; count of nodes required + * NODES=<nodes>; count of nodes required or allocated * DPROCS=<cpus_per_task>; count of CPUs required per task * QUEUETIME=<uts>; submission time * STARTTIME=<uts>; time execution started @@ -270,6 +272,11 @@ static char * _dump_job(struct job_record *job_ptr, time_t update_time) job_ptr->details->begin_time); xstrcat(buf, tmp); } + if (job_ptr->details) { + snprintf(tmp, sizeof(tmp), + "MAXNODES=%u;", _get_job_max_nodes(job_ptr)); + xstrcat(buf, tmp); + } } else if (!IS_JOB_FINISHED(job_ptr)) { char *hosts; hosts = slurm_job2moab_task_list(job_ptr); @@ -458,6 +465,33 @@ static uint32_t _get_job_min_disk(struct job_record *job_ptr) return (uint32_t) 0; } +static uint32_t _get_job_max_nodes(struct job_record *job_ptr) +{ + uint32_t max_nodes = 0; + + if (job_ptr->job_state > JOB_PENDING) { + /* return actual count of currently allocated nodes. + * NOTE: gets decremented to zero while job is completing */ + return job_ptr->node_cnt; + } + + if ((job_ptr->details == NULL) || (job_ptr->part_ptr == NULL)) + return max_nodes; /* should never reach here */ + + if (job_ptr->details->max_nodes) { + max_nodes = job_ptr->details->max_nodes; + if (job_ptr->part_ptr->max_nodes != INFINITE) { + max_nodes = MIN(max_nodes, + job_ptr->part_ptr->max_nodes); + } + } else if (job_ptr->part_ptr->max_nodes == INFINITE) + max_nodes = 0; /* no limits on job or partition */ + else /* use partition limit */ + max_nodes = job_ptr->part_ptr->max_nodes; + + return max_nodes; +} + static uint32_t _get_job_min_nodes(struct job_record *job_ptr) { if (job_ptr->job_state > JOB_PENDING) { diff --git a/src/plugins/select/Makefile.in b/src/plugins/select/Makefile.in index 0a7883657..1528c0132 100644 --- a/src/plugins/select/Makefile.in +++ b/src/plugins/select/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/select/bluegene/Makefile.in b/src/plugins/select/bluegene/Makefile.in index 03cd7fcc5..3fca2c689 100644 --- a/src/plugins/select/bluegene/Makefile.in +++ b/src/plugins/select/bluegene/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/select/bluegene/block_allocator/Makefile.in b/src/plugins/select/bluegene/block_allocator/Makefile.in index 65d67a3ef..a48c96e76 100644 --- a/src/plugins/select/bluegene/block_allocator/Makefile.in +++ b/src/plugins/select/bluegene/block_allocator/Makefile.in @@ -197,6 +197,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.c b/src/plugins/select/bluegene/block_allocator/block_allocator.c index c2981c764..3337f76eb 100644 --- a/src/plugins/select/bluegene/block_allocator/block_allocator.c +++ b/src/plugins/select/bluegene/block_allocator/block_allocator.c @@ -1,7 +1,7 @@ /*****************************************************************************\ - * block_allocator.c - Assorted functions for layout of bglblocks, + * block_allocator.c - Assorted functions for layout of bluegene blocks, * wiring, mapping for smap, etc. - * $Id: block_allocator.c 15191 2008-09-26 15:25:46Z da $ + * $Id: block_allocator.c 15551 2008-10-31 19:47:35Z da $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -808,7 +808,7 @@ extern void ba_init(node_info_msg_t *node_info_ptr) int end[BA_SYSTEM_DIMENSIONS]; #ifdef HAVE_BG_FILES - rm_BGL_t *bg = NULL; + my_bluegene_t *bg = NULL; rm_size3D_t bp_size; int rc = 0; #endif /* HAVE_BG_FILES */ @@ -907,7 +907,7 @@ node_info_error: if (have_db2 && ((DIM_SIZE[X]==0) || (DIM_SIZE[Y]==0) || (DIM_SIZE[Z]==0))) { if ((rc = bridge_get_bg(&bg)) != STATUS_OK) { - error("bridge_get_BGL(): %d", rc); + error("bridge_get_BG(): %d", rc); return; } @@ -921,7 +921,7 @@ node_info_error: error("bridge_get_data(RM_Msize): %d", rc); } if ((rc = bridge_free_bg(bg)) != STATUS_OK) - error("bridge_free_BGL(): %d", rc); + error("bridge_free_BG(): %d", rc); } #endif @@ -1906,7 +1906,7 @@ extern char *bg_err_str(status_t inx) extern int set_bp_map(void) { #ifdef HAVE_BG_FILES - static rm_BGL_t *bg = NULL; + static my_bluegene_t *bg = NULL; int rc; rm_BP_t *my_bp = NULL; ba_bp_map_t *bp_map = NULL; @@ -1932,7 +1932,7 @@ extern int set_bp_map(void) } if ((rc = bridge_get_bg(&bg)) != STATUS_OK) { - error("bridge_get_BGL(): %d", rc); + error("bridge_get_BG(): %d", rc); return -1; } @@ -2001,7 +2001,7 @@ extern int set_bp_map(void) } if ((rc = bridge_free_bg(bg)) != STATUS_OK) - error("bridge_free_BGL(): %s", rc); + error("bridge_free_BG(): %s", rc); #endif _bp_map_initialized = true; @@ -3586,7 +3586,7 @@ static int _set_external_wires(int dim, int count, ba_node_t* source, ba_node_t* target) { #ifdef HAVE_BG_FILES - rm_BGL_t *bg = NULL; + my_bluegene_t *bg = NULL; int rc; int i; rm_wire_t *my_wire = NULL; @@ -3604,7 +3604,7 @@ static int _set_external_wires(int dim, int count, ba_node_t* source, } if ((rc = bridge_get_bg(&bg)) != STATUS_OK) { - error("bridge_get_BGL(): %d", rc); + error("bridge_get_BG(): %d", rc); return -1; } @@ -3747,7 +3747,7 @@ static int _set_external_wires(int dim, int count, ba_node_t* source, _port_enum(to_port)); } if ((rc = bridge_free_bg(bg)) != STATUS_OK) - error("bridge_free_BGL(): %s", rc); + error("bridge_free_BG(): %s", rc); #else diff --git a/src/plugins/select/bluegene/block_allocator/bridge_linker.c b/src/plugins/select/bluegene/block_allocator/bridge_linker.c index f2a0428b4..70fd6685f 100644 --- a/src/plugins/select/bluegene/block_allocator/bridge_linker.c +++ b/src/plugins/select/bluegene/block_allocator/bridge_linker.c @@ -42,7 +42,7 @@ typedef struct { /* all the rm functions */ status_t (*set_serial)(const rm_serial_t serial); - status_t (*get_BGL)(rm_BGL_t **bgl); + status_t (*get_bg)(my_bluegene_t **bg); status_t (*add_partition)(rm_partition_t *partition); status_t (*get_partition)(pm_partition_id_t pid, rm_partition_t **partition); @@ -66,7 +66,7 @@ typedef struct { status_t (*new_partition)(rm_partition_t **partition); status_t (*free_partition)(rm_partition_t *partition); status_t (*free_job)(rm_job_t *job); - status_t (*free_bgl)(rm_BGL_t *bgl); + status_t (*free_bg)(my_bluegene_t *bg); status_t (*free_partition_list)(rm_partition_list_t *part_list); status_t (*free_job_list)(rm_job_list_t *job_list); status_t (*free_nodecard_list)(rm_nodecard_list_t *nc_list); @@ -98,7 +98,7 @@ int _get_syms(int n_syms, const char *names[], void *ptrs[]) { int i, count; void *db_handle = NULL; - +#ifndef HAVE_BGP_FILES #ifdef BG_DB2_SO db_handle = dlopen (BG_DB2_SO, RTLD_LAZY); if (!db_handle) { @@ -110,6 +110,7 @@ int _get_syms(int n_syms, const char *names[], void *ptrs[]) #else fatal("No BG_DB2_SO is set, can't run."); #endif +#endif // HAVE_BGP_FILES #ifdef BG_BRIDGE_SO handle = dlopen (BG_BRIDGE_SO, RTLD_LAZY); @@ -138,6 +139,40 @@ int _get_syms(int n_syms, const char *names[], void *ptrs[]) extern int bridge_init() { +#ifdef HAVE_BGP_FILES + static const char *syms[] = { + "rm_set_serial", + "rm_get_BGP", + "rm_add_partition", + "rm_get_partition", + "rm_get_partition_info", + "rm_modify_partition", + "rm_set_part_owner", + "rm_add_part_user", + "rm_remove_part_user", + "rm_remove_partition", + "rm_get_partitions", + "rm_get_partitions_info", + "rm_get_job", + "rm_get_jobs", + "rm_remove_job", + "rm_get_nodecards", + "rm_new_partition", + "rm_free_partition", + "rm_free_job", + "rm_free_BGP", + "rm_free_partition_list", + "rm_free_job_list", + "rm_free_nodecard_list", + "rm_get_data", + "rm_set_data", + "jm_signal_job", + "jm_cancel_job", + "pm_create_partition", + "pm_destroy_partition", + "setSayMessageParams" + }; +#else static const char *syms[] = { "rm_set_serial", "rm_get_BGL", @@ -170,6 +205,7 @@ extern int bridge_init() "pm_destroy_partition", "setSayMessageParams" }; +#endif int n_syms = sizeof( syms ) / sizeof( char * ); int rc; @@ -200,14 +236,14 @@ extern int bridge_fini() return SLURM_ERROR; } -extern status_t bridge_get_bg(rm_BGL_t **bgl) +extern status_t bridge_get_bg(my_bluegene_t **bg) { int rc = CONNECTION_ERROR; if(!bridge_init()) return rc; slurm_mutex_lock(&api_file_mutex); - rc = (*(bridge_api.get_BGL))(bgl); + rc = (*(bridge_api.get_bg))(bg); slurm_mutex_unlock(&api_file_mutex); return rc; } @@ -440,14 +476,14 @@ extern status_t bridge_free_job(rm_job_t *job) } -extern status_t bridge_free_bg(rm_BGL_t *bgl) +extern status_t bridge_free_bg(my_bluegene_t *bg) { int rc = CONNECTION_ERROR; if(!bridge_init()) return rc; slurm_mutex_lock(&api_file_mutex); - rc = (*(bridge_api.free_bgl))(bgl); + rc = (*(bridge_api.free_bg))(bg); slurm_mutex_unlock(&api_file_mutex); return rc; diff --git a/src/plugins/select/bluegene/block_allocator/bridge_linker.h b/src/plugins/select/bluegene/block_allocator/bridge_linker.h index ed2b1dfdd..74c5e0c94 100644 --- a/src/plugins/select/bluegene/block_allocator/bridge_linker.h +++ b/src/plugins/select/bluegene/block_allocator/bridge_linker.h @@ -70,7 +70,7 @@ extern bool have_db2; extern int bridge_init(); extern int bridge_fini(); -extern status_t bridge_get_bg(rm_BGL_t **bgl); +extern status_t bridge_get_bg(my_bluegene_t **bg); extern status_t bridge_add_block(rm_partition_t *partition); extern status_t bridge_get_block(pm_partition_id_t pid, rm_partition_t **partition); @@ -95,7 +95,7 @@ extern status_t bridge_get_nodecards(rm_bp_id_t bpid, extern status_t bridge_new_block(rm_partition_t **partition); extern status_t bridge_free_block(rm_partition_t *partition); extern status_t bridge_free_job(rm_job_t *job); -extern status_t bridge_free_bg(rm_BGL_t *bgl); +extern status_t bridge_free_bg(my_bluegene_t *bg); extern status_t bridge_free_block_list(rm_partition_list_t *part_list); extern status_t bridge_free_job_list(rm_job_list_t *job_list); extern status_t bridge_free_nodecard_list(rm_nodecard_list_t *nc_list); diff --git a/src/plugins/select/bluegene/plugin/Makefile.in b/src/plugins/select/bluegene/plugin/Makefile.in index 3e66184e5..a4c38a2a8 100644 --- a/src/plugins/select/bluegene/plugin/Makefile.in +++ b/src/plugins/select/bluegene/plugin/Makefile.in @@ -240,6 +240,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/select/bluegene/plugin/bg_switch_connections.c b/src/plugins/select/bluegene/plugin/bg_switch_connections.c index 32fac65a0..146bc5df2 100644 --- a/src/plugins/select/bluegene/plugin/bg_switch_connections.c +++ b/src/plugins/select/bluegene/plugin/bg_switch_connections.c @@ -2,7 +2,7 @@ * bg_switch_connections.c - Blue Gene switch management functions, * establish switch connections * - * $Id: bg_switch_connections.c 11400 2007-04-24 18:50:38Z da $ + * $Id: bg_switch_connections.c 15551 2008-10-31 19:47:35Z da $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -41,11 +41,12 @@ #ifdef HAVE_BG_FILES -static int _get_bp_by_location(rm_BGL_t* my_bg, +static int _get_bp_by_location(my_bluegene_t* my_bg, int* curr_coord, rm_BP_t** bp); -static int _get_switches_by_bpid(rm_BGL_t* my_bg, const char *bpid, +static int _get_switches_by_bpid(my_blugene_t* my_bg, const char *bpid, rm_switch_t **curr_switch); + //static int _set_switch(rm_switch_t* curr_switch, ba_connection_t *int_wire); static int _add_switch_conns(rm_switch_t* curr_switch, ba_switch_t *ba_switch); @@ -58,7 +59,8 @@ static int _used_switches(ba_node_t *ba_node); * "NextBP" goes to, but we don't know, so we have to do this. */ #ifdef HAVE_BG_FILES -static int _get_bp_by_location(rm_BGL_t* my_bg, int* curr_coord, rm_BP_t** bp) +static int _get_bp_by_location(my_bluegene_t* my_bg, int* curr_coord, + rm_BP_t** bp) { static int bp_num = 0; int i, rc; @@ -105,7 +107,7 @@ static int _get_bp_by_location(rm_BGL_t* my_bg, int* curr_coord, rm_BP_t** bp) } static int _get_switches_by_bpid( - rm_BGL_t* my_bg, const char *bpid, + my_bluegene_t* my_bg, const char *bpid, rm_switch_t *coord_switch[BA_SYSTEM_DIMENSIONS]) { static int switch_num = 0; diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index b12227dca..4cd60fc62 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * bluegene.c - blue gene node configuration processing module. * - * $Id: bluegene.c 15370 2008-10-09 23:00:27Z da $ + * $Id: bluegene.c 15551 2008-10-31 19:47:35Z da $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -49,7 +49,7 @@ char* bg_conf = NULL; /* Global variables */ -rm_BGL_t *bg = NULL; +my_bluegene_t *bg = NULL; List bg_list = NULL; /* total list of bg_record entries */ List bg_curr_block_list = NULL; /* current bg blocks in bluegene.conf*/ @@ -111,7 +111,7 @@ extern int init_bg(void) info("Attempting to contact MMCS"); if ((rc = bridge_get_bg(&bg)) != STATUS_OK) { - fatal("init_bg: rm_get_BGL(): %s", bg_err_str(rc)); + fatal("init_bg: rm_get_BG(): %s", bg_err_str(rc)); return SLURM_ERROR; } @@ -196,7 +196,7 @@ extern void fini_bg(void) #ifdef HAVE_BG_FILES if(bg) if ((rc = bridge_free_bg(bg)) != STATUS_OK) - error("bridge_free_BGL(): %s", bg_err_str(rc)); + error("bridge_free_BG(): %s", bg_err_str(rc)); #endif ba_fini(); } diff --git a/src/plugins/select/bluegene/plugin/bluegene.h b/src/plugins/select/bluegene/plugin/bluegene.h index e5afba4c0..0d7dc0ed3 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.h +++ b/src/plugins/select/bluegene/plugin/bluegene.h @@ -1,7 +1,7 @@ /*****************************************************************************\ * bluegene.h - header for blue gene configuration processing module. * - * $Id: bluegene.h 13924 2008-04-23 06:24:55Z da $ + * $Id: bluegene.h 15551 2008-10-31 19:47:35Z da $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -51,7 +51,11 @@ typedef enum bg_layout_type { /* Global variables */ +#ifdef HAVE_BGP_FILES +extern rm_BGP_t *bg; +#else extern rm_BGL_t *bg; +#endif extern char *default_blrtsimage; extern char *default_linuximage; extern char *default_mloaderimage; diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index 433d5705a..6b6eae369 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * select_bluegene.c - node selection plugin for Blue Gene system. * - * $Id: select_bluegene.c 14952 2008-09-03 16:08:14Z da $ + * $Id: select_bluegene.c 15551 2008-10-31 19:47:35Z da $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -274,7 +274,7 @@ extern int select_p_state_save(char *dir_name) slurm_mutex_lock(&block_state_mutex); itr = list_iterator_create(bg_list); while((bg_record = list_next(itr))) { - /* on real bgl systems we only want to keep track of + /* on real bluegene systems we only want to keep track of * the blocks in an error state */ #ifdef HAVE_BG_FILES diff --git a/src/plugins/select/bluegene/plugin/state_test.c b/src/plugins/select/bluegene/plugin/state_test.c index ac651efa0..b1d7cb63f 100644 --- a/src/plugins/select/bluegene/plugin/state_test.c +++ b/src/plugins/select/bluegene/plugin/state_test.c @@ -2,7 +2,7 @@ * state_test.c - Test state of Bluegene base partitions and switches. * DRAIN nodes in SLURM that are not usable. * - * $Id: state_test.c 12928 2007-12-28 21:59:29Z da $ + * $Id: state_test.c 15551 2008-10-31 19:47:35Z da $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -55,7 +55,7 @@ #ifdef HAVE_BG_FILES /* Find the specified BlueGene node ID and drain it from SLURM */ -static void _configure_node_down(rm_bp_id_t bp_id, rm_BGL_t *bg) +static void _configure_node_down(rm_bp_id_t bp_id, my_bluegene_t *bg) { int bp_num, i, rc; rm_bp_id_t bpid; @@ -162,7 +162,7 @@ static char *_convert_bp_state(rm_BP_state_t state) } /* Test for nodes that are not UP in MMCS and DRAIN them in SLURM */ -static void _test_down_nodes(rm_BGL_t *bg) +static void _test_down_nodes(my_bluegene_t *bg) { int bp_num, i, rc; rm_BP_t *my_bp; @@ -246,7 +246,7 @@ static void _test_down_nodes(rm_BGL_t *bg) /* Test for switches that are not UP in MMCS, * when found DRAIN them in SLURM and configure their base partition DOWN */ -static void _test_down_switches(rm_BGL_t *bg) +static void _test_down_switches(my_bluegene_t *bg) { int switch_num, i, rc; rm_switch_t *my_switch; @@ -331,12 +331,12 @@ extern bool node_already_down(char *node_name) extern void test_mmcs_failures(void) { #ifdef HAVE_BG_FILES - rm_BGL_t *bg; + my_bluegene_t *bg; int rc; if ((rc = bridge_get_bg(&bg)) != STATUS_OK) { - error("bridge_get_BGL(): %s", bg_err_str(rc)); + error("bridge_get_BG(): %s", bg_err_str(rc)); return; } @@ -344,7 +344,7 @@ extern void test_mmcs_failures(void) _test_down_switches(bg); _test_down_nodes(bg); if ((rc = bridge_free_bg(bg)) != STATUS_OK) - error("bridge_free_BGL(): %s", bg_err_str(rc)); + error("bridge_free_BG(): %s", bg_err_str(rc)); #endif } diff --git a/src/plugins/select/bluegene/wrap_rm_api.h b/src/plugins/select/bluegene/wrap_rm_api.h index e882f1281..bbb001488 100644 --- a/src/plugins/select/bluegene/wrap_rm_api.h +++ b/src/plugins/select/bluegene/wrap_rm_api.h @@ -57,4 +57,10 @@ #endif +#ifdef HAVE_BGP_FILES +typedef rm_BGP_t my_bluegene_t; +#else +typedef rm_BGL_t my_bluegene_t; +#endif + #endif diff --git a/src/plugins/select/cons_res/Makefile.in b/src/plugins/select/cons_res/Makefile.in index 4e1ec9662..46e8aca56 100644 --- a/src/plugins/select/cons_res/Makefile.in +++ b/src/plugins/select/cons_res/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/select/linear/Makefile.in b/src/plugins/select/linear/Makefile.in index b97714262..6eeefaec2 100644 --- a/src/plugins/select/linear/Makefile.in +++ b/src/plugins/select/linear/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/switch/Makefile.in b/src/plugins/switch/Makefile.in index a7d38ed25..a4587d1df 100644 --- a/src/plugins/switch/Makefile.in +++ b/src/plugins/switch/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/switch/elan/Makefile.in b/src/plugins/switch/elan/Makefile.in index 6fbf77a27..f98e50069 100644 --- a/src/plugins/switch/elan/Makefile.in +++ b/src/plugins/switch/elan/Makefile.in @@ -208,6 +208,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/switch/federation/Makefile.in b/src/plugins/switch/federation/Makefile.in index 11c2462b2..e0f5f292f 100644 --- a/src/plugins/switch/federation/Makefile.in +++ b/src/plugins/switch/federation/Makefile.in @@ -211,6 +211,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/switch/none/Makefile.in b/src/plugins/switch/none/Makefile.in index ba48b744f..f68cbd1a4 100644 --- a/src/plugins/switch/none/Makefile.in +++ b/src/plugins/switch/none/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/task/Makefile.in b/src/plugins/task/Makefile.in index 56b248579..deabe6e02 100644 --- a/src/plugins/task/Makefile.in +++ b/src/plugins/task/Makefile.in @@ -184,6 +184,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/task/affinity/Makefile.in b/src/plugins/task/affinity/Makefile.in index 7b4b935a6..c52ed708d 100644 --- a/src/plugins/task/affinity/Makefile.in +++ b/src/plugins/task/affinity/Makefile.in @@ -214,6 +214,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/plugins/task/none/Makefile.in b/src/plugins/task/none/Makefile.in index 11fc8d070..a8a0b0cd6 100644 --- a/src/plugins/task/none/Makefile.in +++ b/src/plugins/task/none/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/sacct/Makefile.in b/src/sacct/Makefile.in index 5f7bea047..6ea7cd17d 100644 --- a/src/sacct/Makefile.in +++ b/src/sacct/Makefile.in @@ -201,6 +201,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/sacctmgr/Makefile.in b/src/sacctmgr/Makefile.in index c90956531..d40fb0a99 100644 --- a/src/sacctmgr/Makefile.in +++ b/src/sacctmgr/Makefile.in @@ -203,6 +203,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/sacctmgr/account_functions.c b/src/sacctmgr/account_functions.c index eb656fd7f..86c3e9ba7 100644 --- a/src/sacctmgr/account_functions.c +++ b/src/sacctmgr/account_functions.c @@ -76,13 +76,8 @@ static int _set_cond(int *start, int argc, char *argv[], } else if (!end && !strncasecmp (argv[i], "WithRawQOS", 5)) { assoc_cond->with_raw_qos = 1; - } else if (!end && !strncasecmp (argv[i], "WOPInfo", 4)) { - assoc_cond->without_parent_info = 1; } else if (!end && !strncasecmp (argv[i], "WOPLimits", 4)) { assoc_cond->without_parent_limits = 1; - } else if (!end && - !strncasecmp (argv[i], "WithSubAccounts", 5)) { - assoc_cond->with_sub_accts = 1; } else if(!end && !strncasecmp(argv[i], "where", 5)) { continue; } else if(!end diff --git a/src/sacctmgr/association_functions.c b/src/sacctmgr/association_functions.c index fcbedb90e..e797463e6 100644 --- a/src/sacctmgr/association_functions.c +++ b/src/sacctmgr/association_functions.c @@ -242,153 +242,6 @@ static int _set_cond(int *start, int argc, char *argv[], return set; } -/* - * Comparator used for sorting immediate childern of sacctmgr_assocs - * - * returns: -1: assoc_a > assoc_b 0: assoc_a == assoc_b 1: assoc_a < assoc_b - * - */ - -static int _sort_childern_list(sacctmgr_assoc_t *assoc_a, - sacctmgr_assoc_t *assoc_b) -{ - int diff = 0; - - /* first just check the lfts and rgts if a lft is inside of the - * others lft and rgt just return it is less - */ - if(assoc_a->assoc->lft > assoc_b->assoc->lft - && assoc_a->assoc->lft < assoc_b->assoc->rgt) - return 1; - - /* check to see if this is a user association or an account. - * We want the accounts at the bottom - */ - if(assoc_a->assoc->user && !assoc_b->assoc->user) - return -1; - else if(!assoc_a->assoc->user && assoc_b->assoc->user) - return 1; - - diff = strcmp(assoc_a->sort_name, assoc_b->sort_name); - if (diff < 0) - return -1; - else if (diff > 0) - return 1; - - return 0; - -} - -static int _sort_sacctmgr_assoc_list(List sacctmgr_assoc_list) -{ - sacctmgr_assoc_t *sacctmgr_assoc = NULL; - ListIterator itr; - - if(!list_count(sacctmgr_assoc_list)) - return SLURM_SUCCESS; - - list_sort(sacctmgr_assoc_list, (ListCmpF)_sort_childern_list); - - itr = list_iterator_create(sacctmgr_assoc_list); - while((sacctmgr_assoc = list_next(itr))) { - if(list_count(sacctmgr_assoc->childern)) - _sort_sacctmgr_assoc_list(sacctmgr_assoc->childern); - } - list_iterator_destroy(itr); - - return SLURM_SUCCESS; -} - -static int _append_ret_list(List ret_list, List sacctmgr_assoc_list) -{ - sacctmgr_assoc_t *sacctmgr_assoc = NULL; - ListIterator itr; - - if(!ret_list) - return SLURM_ERROR; - - if(!list_count(sacctmgr_assoc_list)) - return SLURM_SUCCESS; - - itr = list_iterator_create(sacctmgr_assoc_list); - while((sacctmgr_assoc = list_next(itr))) { - list_append(ret_list, sacctmgr_assoc->assoc); - - if(list_count(sacctmgr_assoc->childern)) - _append_ret_list(ret_list, sacctmgr_assoc->childern); - } - list_iterator_destroy(itr); - - return SLURM_SUCCESS; -} - -static List _sort_assoc_list(List assoc_list) -{ - List sacctmgr_assoc_list = sacctmgr_get_hierarchical_list(assoc_list); - List ret_list = list_create(NULL); - - _append_ret_list(ret_list, sacctmgr_assoc_list); - list_destroy(sacctmgr_assoc_list); - - return ret_list; -} - -extern List sacctmgr_get_hierarchical_list(List assoc_list) -{ - sacctmgr_assoc_t *par_sacctmgr_assoc = NULL; - sacctmgr_assoc_t *sacctmgr_assoc = NULL; - acct_association_rec_t *assoc = NULL; - List total_assoc_list = list_create(NULL); - List sacctmgr_assoc_list = list_create(destroy_sacctmgr_assoc); - ListIterator itr, itr2; - - itr = list_iterator_create(assoc_list); - itr2 = list_iterator_create(total_assoc_list); - - while((assoc = list_next(itr))) { - sacctmgr_assoc = xmalloc(sizeof(sacctmgr_assoc_t)); - sacctmgr_assoc->childern = list_create(destroy_sacctmgr_assoc); - sacctmgr_assoc->assoc = assoc; - - if(!assoc->parent_id) { - sacctmgr_assoc->sort_name = assoc->cluster; - - list_append(sacctmgr_assoc_list, sacctmgr_assoc); - list_append(total_assoc_list, sacctmgr_assoc); - - list_iterator_reset(itr2); - continue; - } - - while((par_sacctmgr_assoc = list_next(itr2))) { - if(assoc->parent_id == par_sacctmgr_assoc->assoc->id) - break; - } - - if(assoc->user) - sacctmgr_assoc->sort_name = assoc->user; - else - sacctmgr_assoc->sort_name = assoc->acct; - - if(!par_sacctmgr_assoc) - list_append(sacctmgr_assoc_list, sacctmgr_assoc); - else - list_append(par_sacctmgr_assoc->childern, - sacctmgr_assoc); - - list_append(total_assoc_list, sacctmgr_assoc); - list_iterator_reset(itr2); - } - list_iterator_destroy(itr); - list_iterator_destroy(itr2); - - list_destroy(total_assoc_list); -// info("got %d", list_count(sacctmgr_assoc_list)); - _sort_sacctmgr_assoc_list(sacctmgr_assoc_list); - - return sacctmgr_assoc_list; -} - extern int sacctmgr_list_association(int argc, char *argv[]) { int rc = SLURM_SUCCESS; @@ -613,7 +466,7 @@ extern int sacctmgr_list_association(int argc, char *argv[]) return SLURM_ERROR; } first_list = assoc_list; - assoc_list = _sort_assoc_list(first_list); + assoc_list = get_hierarchical_sorted_assoc_list(first_list); itr = list_iterator_create(assoc_list); itr2 = list_iterator_create(print_fields_list); diff --git a/src/sacctmgr/cluster_functions.c b/src/sacctmgr/cluster_functions.c index 74a99cdae..d5453ee92 100644 --- a/src/sacctmgr/cluster_functions.c +++ b/src/sacctmgr/cluster_functions.c @@ -834,13 +834,13 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) { acct_user_cond_t user_cond; acct_user_rec_t *user = NULL; - sacctmgr_assoc_t *sacctmgr_assoc = NULL; + acct_hierarchical_rec_t *acct_hierarchical_rec = NULL; acct_association_rec_t *assoc = NULL; acct_association_cond_t assoc_cond; List assoc_list = NULL; List acct_list = NULL; List user_list = NULL; - List sacctmgr_assoc_list = NULL; + List acct_hierarchical_rec_list = NULL; char *cluster_name = NULL; char *file_name = NULL; char *user_name = NULL; @@ -938,7 +938,7 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) return SLURM_ERROR; } - sacctmgr_assoc_list = sacctmgr_get_hierarchical_list(assoc_list); + acct_hierarchical_rec_list = get_acct_hierarchical_rec_list(assoc_list); acct_list = acct_storage_g_get_accounts(db_conn, my_uid, NULL); @@ -977,8 +977,8 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) line = xstrdup_printf("Cluster - %s", cluster_name); - sacctmgr_assoc = list_peek(sacctmgr_assoc_list); - assoc = sacctmgr_assoc->assoc; + acct_hierarchical_rec = list_peek(acct_hierarchical_rec_list); + assoc = acct_hierarchical_rec->assoc; if(strcmp(assoc->acct, "root")) fprintf(stderr, "Root association not on the top it was %s\n", assoc->acct); @@ -992,12 +992,12 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) } info("%s", line); - print_file_sacctmgr_assoc_list( - fd, sacctmgr_assoc_list, user_list, acct_list); + print_file_acct_hierarchical_rec_list( + fd, acct_hierarchical_rec_list, user_list, acct_list); xfree(cluster_name); xfree(file_name); - list_destroy(sacctmgr_assoc_list); + list_destroy(acct_hierarchical_rec_list); list_destroy(assoc_list); fclose(fd); diff --git a/src/sacctmgr/common.c b/src/sacctmgr/common.c index 7d9e693bb..9faa2ec1c 100644 --- a/src/sacctmgr/common.c +++ b/src/sacctmgr/common.c @@ -98,20 +98,6 @@ static char *_get_qos_list_str(List qos_list) return qos_char; } -extern void destroy_sacctmgr_assoc(void *object) -{ - /* Most of this is pointers to something else that will be - * destroyed elsewhere. - */ - sacctmgr_assoc_t *sacctmgr_assoc = (sacctmgr_assoc_t *)object; - if(sacctmgr_assoc) { - if(sacctmgr_assoc->childern) { - list_destroy(sacctmgr_assoc->childern); - } - xfree(sacctmgr_assoc); - } -} - extern int parse_option_end(char *option) { int end = 0; @@ -308,9 +294,6 @@ extern acct_association_rec_t *sacctmgr_find_account_base_assoc(char *account, assoc_cond.user_list = list_create(NULL); list_append(assoc_cond.user_list, ""); -// info("looking for %s %s in %d", account, cluster, -// list_count(sacctmgr_association_list)); - assoc_list = acct_storage_g_get_associations(db_conn, my_uid, &assoc_cond); diff --git a/src/sacctmgr/file_functions.c b/src/sacctmgr/file_functions.c index 173a4daa4..8a4e110c3 100644 --- a/src/sacctmgr/file_functions.c +++ b/src/sacctmgr/file_functions.c @@ -115,21 +115,21 @@ static int _init_sacctmgr_file_opts(sacctmgr_file_opts_t *file_opts) file_opts->admin = ACCT_ADMIN_NOTSET; - file_opts->fairshare = 1; - - file_opts->grp_cpu_mins = INFINITE; - file_opts->grp_cpus = INFINITE; - file_opts->grp_jobs = INFINITE; - file_opts->grp_nodes = INFINITE; - file_opts->grp_submit_jobs = INFINITE; - file_opts->grp_wall = INFINITE; - - file_opts->max_cpu_mins_pj = INFINITE; - file_opts->max_cpus_pj = INFINITE; - file_opts->max_jobs = INFINITE; - file_opts->max_nodes_pj = INFINITE; - file_opts->max_submit_jobs = INFINITE; - file_opts->max_wall_pj = INFINITE; + file_opts->fairshare = NO_VAL; + + file_opts->grp_cpu_mins = NO_VAL; + file_opts->grp_cpus = NO_VAL; + file_opts->grp_jobs = NO_VAL; + file_opts->grp_nodes = NO_VAL; + file_opts->grp_submit_jobs = NO_VAL; + file_opts->grp_wall = NO_VAL; + + file_opts->max_cpu_mins_pj = NO_VAL; + file_opts->max_cpus_pj = NO_VAL; + file_opts->max_jobs = NO_VAL; + file_opts->max_nodes_pj = NO_VAL; + file_opts->max_submit_jobs = NO_VAL; + file_opts->max_wall_pj = NO_VAL; return SLURM_SUCCESS; } @@ -781,112 +781,6 @@ static int _print_out_assoc(List assoc_list, bool user, bool add) return rc; } -static int _mod_cluster(sacctmgr_file_opts_t *file_opts, - acct_cluster_rec_t *cluster) -{ - int changed = 0; - acct_association_rec_t mod_assoc; - acct_association_cond_t assoc_cond; - char *my_info = NULL; - - init_acct_association_rec(&mod_assoc); - memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); - - if(cluster->root_assoc->fairshare != file_opts->fairshare) { - mod_assoc.fairshare = file_opts->fairshare; - changed = 1; - xstrfmtcat(my_info, - "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", - " Changed fairshare", "Cluster", - cluster->name, - cluster->root_assoc->fairshare, - file_opts->fairshare); - } - if(cluster->root_assoc->max_cpu_mins_pj != - file_opts->max_cpu_mins_pj) { - mod_assoc.max_cpu_mins_pj = - file_opts->max_cpu_mins_pj; - changed = 1; - xstrfmtcat(my_info, - "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", - " Changed MaxCPUMinsPerJob", "Cluster", - cluster->name, - cluster->root_assoc->max_cpu_mins_pj, - file_opts->max_cpu_mins_pj); - } - if(cluster->root_assoc->max_jobs != file_opts->max_jobs) { - mod_assoc.max_jobs = file_opts->max_jobs; - changed = 1; - xstrfmtcat(my_info, - "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", - " Changed MaxJobs", "Cluster", - cluster->name, - cluster->root_assoc->max_jobs, - file_opts->max_jobs); - } - if(cluster->root_assoc->max_nodes_pj != file_opts->max_nodes_pj) { - mod_assoc.max_nodes_pj = file_opts->max_nodes_pj; - changed = 1; - xstrfmtcat(my_info, - "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", - " Changed MaxNodesPerJob", "Cluster", - cluster->name, - cluster->root_assoc->max_nodes_pj, - file_opts->max_nodes_pj); - } - if(cluster->root_assoc->max_wall_pj != - file_opts->max_wall_pj) { - mod_assoc.max_wall_pj = - file_opts->max_wall_pj; - changed = 1; - xstrfmtcat(my_info, - "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", - " Changed MaxWallDurationPerJob", "Cluster", - cluster->name, - cluster->root_assoc->max_wall_pj, - file_opts->max_wall_pj); - } - - if(changed) { - List ret_list = NULL; - - assoc_cond.cluster_list = list_create(NULL); - assoc_cond.acct_list = list_create(NULL); - - list_push(assoc_cond.cluster_list, cluster->name); - list_push(assoc_cond.acct_list, "root"); - - notice_thread_init(); - ret_list = acct_storage_g_modify_associations( - db_conn, my_uid, - &assoc_cond, - &mod_assoc); - notice_thread_fini(); - - list_destroy(assoc_cond.cluster_list); - list_destroy(assoc_cond.acct_list); - -/* if(ret_list && list_count(ret_list)) { */ -/* char *object = NULL; */ -/* ListIterator itr = list_iterator_create(ret_list); */ -/* printf(" Modified cluster defaults for " */ -/* "associations...\n"); */ -/* while((object = list_next(itr))) */ -/* printf(" %s\n", object); */ -/* list_iterator_destroy(itr); */ -/* } */ - - if(ret_list) { - printf("%s", my_info); - list_destroy(ret_list); - } else - changed = 0; - xfree(my_info); - } - - return changed; -} - static int _mod_acct(sacctmgr_file_opts_t *file_opts, acct_account_rec_t *acct, char *parent) { @@ -1132,7 +1026,8 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, static int _mod_assoc(sacctmgr_file_opts_t *file_opts, acct_association_rec_t *assoc, - sacctmgr_mod_type_t mod_type) + sacctmgr_mod_type_t mod_type, + char *parent) { int changed = 0; acct_association_rec_t mod_assoc; @@ -1161,7 +1056,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, init_acct_association_rec(&mod_assoc); memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); - if(assoc->fairshare != file_opts->fairshare) { + if((file_opts->fairshare != NO_VAL) + && (assoc->fairshare != file_opts->fairshare)) { mod_assoc.fairshare = file_opts->fairshare; changed = 1; xstrfmtcat(my_info, @@ -1172,7 +1068,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->fairshare); } - if(assoc->grp_cpu_mins != file_opts->grp_cpu_mins) { + if((file_opts->grp_cpu_mins != NO_VAL) + && (assoc->grp_cpu_mins != file_opts->grp_cpu_mins)) { mod_assoc.grp_cpu_mins = file_opts->grp_cpu_mins; changed = 1; xstrfmtcat(my_info, @@ -1183,7 +1080,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->grp_cpu_mins); } - if(assoc->grp_cpus != file_opts->grp_cpus) { + if((file_opts->grp_cpus != NO_VAL) + && (assoc->grp_cpus != file_opts->grp_cpus)) { mod_assoc.grp_cpus = file_opts->grp_cpus; changed = 1; xstrfmtcat(my_info, @@ -1194,7 +1092,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->grp_cpus); } - if(assoc->grp_jobs != file_opts->grp_jobs) { + if((file_opts->grp_jobs != NO_VAL) + && (assoc->grp_jobs != file_opts->grp_jobs)) { mod_assoc.grp_jobs = file_opts->grp_jobs; changed = 1; xstrfmtcat(my_info, @@ -1205,7 +1104,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->grp_jobs); } - if(assoc->grp_nodes != file_opts->grp_nodes) { + if((file_opts->grp_nodes != NO_VAL) + && (assoc->grp_nodes != file_opts->grp_nodes)) { mod_assoc.grp_nodes = file_opts->grp_nodes; changed = 1; xstrfmtcat(my_info, @@ -1216,7 +1116,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->grp_nodes); } - if(assoc->grp_submit_jobs != file_opts->grp_submit_jobs) { + if((file_opts->grp_submit_jobs != NO_VAL) + && (assoc->grp_submit_jobs != file_opts->grp_submit_jobs)) { mod_assoc.grp_submit_jobs = file_opts->grp_submit_jobs; changed = 1; xstrfmtcat(my_info, @@ -1227,7 +1128,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->grp_submit_jobs); } - if(assoc->grp_wall != file_opts->grp_wall) { + if((file_opts->grp_wall != NO_VAL) + && (assoc->grp_wall != file_opts->grp_wall)) { mod_assoc.grp_wall = file_opts->grp_wall; changed = 1; xstrfmtcat(my_info, @@ -1237,8 +1139,9 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, assoc->grp_wall, file_opts->grp_wall); } - - if(assoc->max_cpu_mins_pj != file_opts->max_cpu_mins_pj) { + + if((file_opts->max_cpu_mins_pj != NO_VAL) + && (assoc->max_cpu_mins_pj != file_opts->max_cpu_mins_pj)) { mod_assoc.max_cpu_mins_pj = file_opts->max_cpu_mins_pj; changed = 1; @@ -1250,7 +1153,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->max_cpu_mins_pj); } - if(assoc->max_cpus_pj != file_opts->max_cpus_pj) { + if((file_opts->max_cpus_pj != NO_VAL) + && (assoc->max_cpus_pj != file_opts->max_cpus_pj)) { mod_assoc.max_cpus_pj = file_opts->max_cpus_pj; changed = 1; xstrfmtcat(my_info, @@ -1261,7 +1165,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->max_cpus_pj); } - if(assoc->max_jobs != file_opts->max_jobs) { + if((file_opts->max_jobs != NO_VAL) + && (assoc->max_jobs != file_opts->max_jobs)) { mod_assoc.max_jobs = file_opts->max_jobs; changed = 1; xstrfmtcat(my_info, @@ -1272,7 +1177,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->max_jobs); } - if(assoc->max_nodes_pj != file_opts->max_nodes_pj) { + if((file_opts->max_nodes_pj != NO_VAL) + && (assoc->max_nodes_pj != file_opts->max_nodes_pj)) { mod_assoc.max_nodes_pj = file_opts->max_nodes_pj; changed = 1; xstrfmtcat(my_info, @@ -1282,8 +1188,9 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, assoc->max_nodes_pj, file_opts->max_nodes_pj); } - - if(assoc->max_submit_jobs != file_opts->max_submit_jobs) { + + if((file_opts->max_submit_jobs != NO_VAL) + && (assoc->max_submit_jobs != file_opts->max_submit_jobs)) { mod_assoc.max_submit_jobs = file_opts->max_submit_jobs; changed = 1; xstrfmtcat(my_info, @@ -1294,7 +1201,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->max_submit_jobs); } - if(assoc->max_wall_pj != file_opts->max_wall_pj) { + if((file_opts->max_wall_pj != NO_VAL) + && (assoc->max_wall_pj != file_opts->max_wall_pj)) { mod_assoc.max_wall_pj = file_opts->max_wall_pj; changed = 1; xstrfmtcat(my_info, @@ -1304,6 +1212,16 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, assoc->max_wall_pj, file_opts->max_wall_pj); } + if(assoc->parent_acct && parent && strcmp(assoc->parent_acct, parent)) { + mod_assoc.parent_acct = parent; + changed = 1; + xstrfmtcat(my_info, + "%-30.30s for %-7.7s %-10.10s %8s -> %s\n", + " Changed Parent", + type, name, + assoc->parent_acct, + parent); + } if(assoc->qos_list && list_count(assoc->qos_list) && file_opts->qos_list && list_count(file_opts->qos_list)) { @@ -1364,11 +1282,9 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, assoc_cond.cluster_list = list_create(NULL); list_push(assoc_cond.cluster_list, assoc->cluster); - if(mod_type >= MOD_ACCT) { - assoc_cond.acct_list = list_create(NULL); - list_push(assoc_cond.acct_list, assoc->acct); - } - + assoc_cond.acct_list = list_create(NULL); + list_push(assoc_cond.acct_list, assoc->acct); + if(mod_type == MOD_USER) { assoc_cond.user_list = list_create(NULL); list_push(assoc_cond.user_list, assoc->user); @@ -1540,10 +1456,12 @@ static acct_association_rec_t *_set_assoc_up(sacctmgr_file_opts_t *file_opts, assoc->grp_submit_jobs = file_opts->grp_submit_jobs; assoc->grp_wall = file_opts->grp_wall; + assoc->max_cpu_mins_pj = file_opts->max_cpu_mins_pj; + assoc->max_cpus_pj = file_opts->max_cpus_pj; assoc->max_jobs = file_opts->max_jobs; assoc->max_nodes_pj = file_opts->max_nodes_pj; + assoc->max_submit_jobs = file_opts->max_submit_jobs; assoc->max_wall_pj = file_opts->max_wall_pj; - assoc->max_cpu_mins_pj = file_opts->max_cpu_mins_pj; if(file_opts->qos_list && list_count(file_opts->qos_list)) assoc->qos_list = copy_char_list(file_opts->qos_list); @@ -1552,27 +1470,28 @@ static acct_association_rec_t *_set_assoc_up(sacctmgr_file_opts_t *file_opts, return assoc; } -static int _print_file_sacctmgr_assoc_childern(FILE *fd, - List sacctmgr_assoc_list, +static int _print_file_acct_hierarchical_rec_childern(FILE *fd, + List acct_hierarchical_rec_list, List user_list, List acct_list) { ListIterator itr = NULL; - sacctmgr_assoc_t *sacctmgr_assoc = NULL; + acct_hierarchical_rec_t *acct_hierarchical_rec = NULL; char *line = NULL; acct_user_rec_t *user_rec = NULL; acct_account_rec_t *acct_rec = NULL; - itr = list_iterator_create(sacctmgr_assoc_list); - while((sacctmgr_assoc = list_next(itr))) { - if(sacctmgr_assoc->assoc->user) { + itr = list_iterator_create(acct_hierarchical_rec_list); + while((acct_hierarchical_rec = list_next(itr))) { + if(acct_hierarchical_rec->assoc->user) { user_rec = sacctmgr_find_user_from_list( - user_list, sacctmgr_assoc->assoc->user); + user_list, acct_hierarchical_rec->assoc->user); line = xstrdup_printf( - "User - %s", sacctmgr_assoc->sort_name); - if(sacctmgr_assoc->assoc->partition) + "User - %s", acct_hierarchical_rec->sort_name); + if(acct_hierarchical_rec->assoc->partition) xstrfmtcat(line, ":Partition='%s'", - sacctmgr_assoc->assoc->partition); + acct_hierarchical_rec-> + assoc->partition); if(user_rec) { xstrfmtcat(line, ":DefaultAccount='%s'", user_rec->default_acct); @@ -1615,9 +1534,10 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, } } else { acct_rec = sacctmgr_find_account_from_list( - acct_list, sacctmgr_assoc->assoc->acct); + acct_list, acct_hierarchical_rec->assoc->acct); line = xstrdup_printf( - "Account - %s", sacctmgr_assoc->sort_name); + "Account - %s", + acct_hierarchical_rec->sort_name); if(acct_rec) { xstrfmtcat(line, ":Description='%s'", acct_rec->description); @@ -1626,7 +1546,8 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, } } - print_file_add_limits_to_line(&line, sacctmgr_assoc->assoc); + print_file_add_limits_to_line(&line, + acct_hierarchical_rec->assoc); if(fprintf(fd, "%s\n", line) < 0) { exit_code=1; @@ -1636,7 +1557,7 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, info("%s", line); } list_iterator_destroy(itr); - print_file_sacctmgr_assoc_list(fd, sacctmgr_assoc_list, + print_file_acct_hierarchical_rec_list(fd, acct_hierarchical_rec_list, user_list, acct_list); return SLURM_SUCCESS; @@ -1708,33 +1629,35 @@ extern int print_file_add_limits_to_line(char **line, } -extern int print_file_sacctmgr_assoc_list(FILE *fd, - List sacctmgr_assoc_list, +extern int print_file_acct_hierarchical_rec_list(FILE *fd, + List acct_hierarchical_rec_list, List user_list, List acct_list) { ListIterator itr = NULL; - sacctmgr_assoc_t *sacctmgr_assoc = NULL; + acct_hierarchical_rec_t *acct_hierarchical_rec = NULL; - itr = list_iterator_create(sacctmgr_assoc_list); - while((sacctmgr_assoc = list_next(itr))) { + itr = list_iterator_create(acct_hierarchical_rec_list); + while((acct_hierarchical_rec = list_next(itr))) { /* info("got here %d with %d from %s %s", */ -/* depth, list_count(sacctmgr_assoc->childern), */ -/* sacctmgr_assoc->assoc->acct, sacctmgr_assoc->assoc->user); */ - if(!list_count(sacctmgr_assoc->childern)) +/* depth, list_count(acct_hierarchical_rec->childern), */ +/* acct_hierarchical_rec->assoc->acct, + acct_hierarchical_rec->assoc->user); */ + if(!list_count(acct_hierarchical_rec->childern)) continue; if(fprintf(fd, "Parent - %s\n", - sacctmgr_assoc->assoc->acct) < 0) { + acct_hierarchical_rec->assoc->acct) < 0) { error("Can't write to file"); return SLURM_ERROR; } info("%s - %s", "Parent", - sacctmgr_assoc->assoc->acct); + acct_hierarchical_rec->assoc->acct); /* info("sending %d from %s", */ -/* list_count(sacctmgr_assoc->childern), */ -/* sacctmgr_assoc->assoc->acct); */ - _print_file_sacctmgr_assoc_childern( - fd, sacctmgr_assoc->childern, user_list, acct_list); +/* list_count(acct_hierarchical_rec->childern), */ +/* acct_hierarchical_rec->assoc->acct); */ + _print_file_acct_hierarchical_rec_childern( + fd, acct_hierarchical_rec->childern, + user_list, acct_list); } list_iterator_destroy(itr); @@ -2021,7 +1944,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) } set = 1; } else { - set = _mod_cluster(file_opts, cluster); + set = _mod_assoc(file_opts, cluster->root_assoc, + MOD_CLUSTER, parent); } _destroy_sacctmgr_file_opts(file_opts); @@ -2121,7 +2045,6 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) !sacctmgr_find_account_base_assoc_from_list( acct_assoc_list, file_opts->name, cluster_name)) { - acct2 = sacctmgr_find_account_from_list( mod_acct_list, file_opts->name); @@ -2156,6 +2079,7 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) } else { debug2("already modified this account"); } + assoc2 = sacctmgr_find_association_from_list( mod_assoc_list, NULL, file_opts->name, @@ -2169,8 +2093,10 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) list_append(mod_assoc_list, assoc2); assoc2->cluster = xstrdup(cluster_name); assoc2->acct = xstrdup(file_opts->name); + assoc2->parent_acct = + xstrdup(assoc->parent_acct); if(_mod_assoc(file_opts, - assoc, MOD_ACCT)) + assoc, MOD_ACCT, parent)) set = 1; } else { debug2("already modified this assoc"); @@ -2274,7 +2200,7 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) assoc2->partition = xstrdup(file_opts->part); if(_mod_assoc(file_opts, - assoc, MOD_USER)) + assoc, MOD_USER, parent)) set = 1; } else { debug2("already modified this assoc"); diff --git a/src/sacctmgr/qos_functions.c b/src/sacctmgr/qos_functions.c index 7177594da..5ca7686ea 100644 --- a/src/sacctmgr/qos_functions.c +++ b/src/sacctmgr/qos_functions.c @@ -451,7 +451,7 @@ extern int sacctmgr_list_qos(int argc, char *argv[]) _set_cond(&i, argc, argv, qos_cond, format_list); if(exit_code) { - destroy_acct_txn_cond(qos_cond); + destroy_acct_qos_cond(qos_cond); list_destroy(format_list); return SLURM_ERROR; } else if(!list_count(format_list)) { diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c index 87cb5ab1a..d3db004aa 100644 --- a/src/sacctmgr/sacctmgr.c +++ b/src/sacctmgr/sacctmgr.c @@ -705,8 +705,8 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ \n\ <SPECS> are different for each command entity pair. \n\ list account - Clusters=, Descriptions=, Format=, Names=, \n\ - Organizations=, Parents=, WithCoor=, \n\ - WithSubAccounts, and WithAssocs \n\ + Organizations=, Parents=, WithAssocs, \n\ + WithCoordinators, WithRawQOS, and WOPLimits \n\ add account - Clusters=, Description=, Fairshare=, \n\ GrpCPUMins=, GrpCPUs=, GrpJobs=, GrpNodes=, \n\ GrpSubmitJob=, GrpWall=, MaxCPUMins=, MaxJobs=,\n\ @@ -749,11 +749,12 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ Preemptee=, Preemptor=, Priority=, and Names= \n\ delete qos - Descriptions=, Ids=, and Names= \n\ \n\ - list transactions - Actor=, EndTime, \n\ - Format=, ID=, and Start= \n\ + list transactions - Accounts=, Action=, Actor=, Clusters=, End=, \n\ + Format=, ID=, Start=, User=, and WithAssoc \n\ \n\ list user - AdminLevel=, DefaultAccounts=, Format=, Names=,\n\ - QosLevel=, WithCoor=, and WithAssocs \n\ + QosLevel=, WithAssocs, WithCoordinators, \n\ + WithRawQOS, and WOPLimits \n\ add user - Accounts=, AdminLevel=, Clusters=, \n\ DefaultAccount=, Fairshare=, MaxCPUMins= \n\ MaxCPUs=, MaxJobs=, MaxNodes=, MaxWall=, \n\ @@ -774,20 +775,26 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ \n\ Association - Account, Cluster, Fairshare, GrpCPUMins, \n\ GrpCPUs, GrpJobs, GrpNodes, GrpSubmitJob, \n\ - GrpWall, ID, LFT, MaxCPUSecs, MaxJobs, \n\ - MaxNodes, MaxWall, QOS, ParentID, \n\ - ParentName, Partition, RGT, User \n\ + GrpWall, ID, LFT, MaxCPUs, MaxCPUMins, \n\ + MaxJobs, MaxNodes, MaxSubmitJobs, MaxWall, QOS,\n\ + ParentID, ParentName, Partition, RawQOS, RGT, \n\ + User \n\ \n\ Cluster - Cluster, ControlHost, ControlPort, Fairshare \n\ - MaxCPUSecs, MaxJobs, MaxNodes, MaxWall \n\ + GrpCPUMins, GrpCPUs, GrpJobs, GrpNodes, \n\ + GrpSubmitJob, GrpWall, MaxCPUs, MaxCPUMins, \n\ + MaxJobs, MaxNodes, MaxSubmitJobs, MaxWall \n\ \n\ QOS - Description, ID, Name \n\ \n\ - Transactions - Action, Actor, ID, Info, TimeStamp, Where \n\ + Transactions - Action, Actor, Info, TimeStamp, Where \n\ \n\ User - AdminLevel, CoordinatorList, DefaultAccount, \n\ User \n\ \n\ + Account/User WithAssoc option will also honor \n\ + all of the options for Association. \n\ + \n\ \n\ All commands entitys, and options are case-insensitive. \n\n"); diff --git a/src/sacctmgr/sacctmgr.h b/src/sacctmgr/sacctmgr.h index 38b94879a..0e4ebb3d4 100644 --- a/src/sacctmgr/sacctmgr.h +++ b/src/sacctmgr/sacctmgr.h @@ -86,13 +86,6 @@ #define CKPT_WAIT 10 #define MAX_INPUT_FIELDS 128 -typedef struct { - acct_association_rec_t *assoc; - char *sort_name; - List childern; -} sacctmgr_assoc_t; - - extern char *command_name; extern int exit_code; /* sacctmgr's exit code, =1 on any error at any time */ extern int exit_flag; /* program to terminate if =1 */ @@ -130,15 +123,9 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[]); extern int sacctmgr_delete_coord(int argc, char *argv[]); extern int sacctmgr_delete_qos(int argc, char *argv[]); -/* this has pointers to assoc_list so do not destroy assoc_list before - * using the list returned from this function. - */ -extern List sacctmgr_get_hierarchical_list(List assoc_list); - extern int sacctmgr_dump_cluster(int argc, char *argv[]); /* common.c */ -extern void destroy_sacctmgr_assoc(void *object); extern int parse_option_end(char *option); extern char *strip_quotes(char *option, int *increased); extern int notice_thread_init(); @@ -195,8 +182,8 @@ extern acct_cluster_rec_t *sacctmgr_find_cluster_from_list( extern int print_file_add_limits_to_line(char **line, acct_association_rec_t *assoc); -extern int print_file_sacctmgr_assoc_list(FILE *fd, - List sacctmgr_assoc_list, +extern int print_file_acct_hierarchical_rec_list(FILE *fd, + List acct_hierarchical_rec_list, List user_list, List acct_list); diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c index 5a7b96113..e2c43dc72 100644 --- a/src/sacctmgr/user_functions.c +++ b/src/sacctmgr/user_functions.c @@ -82,8 +82,6 @@ static int _set_cond(int *start, int argc, char *argv[], } else if (!end && !strncasecmp (argv[i], "WithRawQOS", 5)) { assoc_cond->with_raw_qos = 1; - } else if (!end && !strncasecmp (argv[i], "WOPInfo", 4)) { - assoc_cond->without_parent_info = 1; } else if (!end && !strncasecmp (argv[i], "WOPLimits", 4)) { assoc_cond->without_parent_limits = 1; } else if(!end && !strncasecmp(argv[i], "where", 5)) { diff --git a/src/salloc/Makefile.in b/src/salloc/Makefile.in index 03315a66a..8a8321a94 100644 --- a/src/salloc/Makefile.in +++ b/src/salloc/Makefile.in @@ -198,6 +198,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index dbe87b405..1f1f8e7dd 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -41,11 +41,12 @@ #include <slurm/slurm.h> -#include "src/common/xstring.h" +#include "src/common/env.h" +#include "src/common/read_config.h" +#include "src/common/slurm_rlimits_info.h" #include "src/common/xmalloc.h" #include "src/common/xsignal.h" -#include "src/common/read_config.h" -#include "src/common/env.h" +#include "src/common/xstring.h" #include "src/salloc/salloc.h" #include "src/salloc/opt.h" @@ -79,6 +80,7 @@ static void _ignore_signal(int signo); static void _exit_on_signal(int signo); static void _signal_while_allocating(int signo); static void _job_complete_handler(srun_job_complete_msg_t *msg); +static void _set_rlimits(char **env); static void _timeout_handler(srun_timeout_msg_t *msg); static void _user_msg_handler(srun_user_msg_t *msg); static void _ping_handler(srun_ping_msg_t *msg); @@ -140,6 +142,7 @@ int main(int argc, char *argv[]) opt.get_user_env_mode); if (env == NULL) exit(1); /* error already logged */ + _set_rlimits(env); } /* @@ -149,6 +152,14 @@ int main(int argc, char *argv[]) if (fill_job_desc_from_opts(&desc) == -1) { exit(1); } + if (opt.gid != (gid_t) -1) { + if (setgid(opt.gid) < 0) + fatal("setgid: %m"); + } + if (opt.uid != (uid_t) -1) { + if (setuid(opt.uid) < 0) + fatal("setuid: %m"); + } callbacks.ping = _ping_handler; callbacks.timeout = _timeout_handler; @@ -232,7 +243,7 @@ int main(int argc, char *argv[]) /* * Run the user's command. */ - env_array_for_job(&env, alloc); + env_array_for_job(&env, alloc, &desc); /* Add default task count for srun, if not already set */ if (opt.nprocs_set) env_array_append_fmt(&env, "SLURM_NPROCS", "%d", opt.nprocs); @@ -246,6 +257,7 @@ int main(int argc, char *argv[]) } if (opt.network) env_array_append_fmt(&env, "SLURM_NETWORK", "%s", opt.network); + env_array_set_environment(env); env_array_free(env); pthread_mutex_lock(&allocation_state_lock); @@ -547,6 +559,43 @@ static void _node_fail_handler(srun_node_fail_msg_t *msg) error("Node failure on %s", msg->nodelist); } +static void _set_rlimits(char **env) +{ + slurm_rlimits_info_t *rli; + char env_name[25] = "SLURM_RLIMIT_"; + char *env_value, *p; + struct rlimit r; + //unsigned long env_num; + rlim_t env_num; + + for (rli=get_slurm_rlimits_info(); rli->name; rli++) { + if (rli->propagate_flag != PROPAGATE_RLIMITS) + continue; + strcpy(&env_name[sizeof("SLURM_RLIMIT_")-1], rli->name); + env_value = getenvp(env, env_name); + if (env_value == NULL) + continue; + unsetenvp(env, env_name); + if (getrlimit(rli->resource, &r) < 0) { + error("getrlimit(%s): %m", env_name+6); + continue; + } + env_num = strtol(env_value, &p, 10); + if (p && (p[0] != '\0')) { + error("Invalid environment %s value %s", + env_name, env_value); + continue; + } + if (r.rlim_cur == env_num) + continue; + r.rlim_cur = (rlim_t) env_num; + if (setrlimit(rli->resource, &r) < 0) { + error("setrlimit(%s): %m", env_name+6); + continue; + } + } +} + #ifdef HAVE_BG /* returns 1 if job and nodes are ready for job to begin, 0 otherwise */ static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc) diff --git a/src/sattach/Makefile.in b/src/sattach/Makefile.in index 14d5c2fd7..778c65fb4 100644 --- a/src/sattach/Makefile.in +++ b/src/sattach/Makefile.in @@ -199,6 +199,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/sbatch/Makefile.in b/src/sbatch/Makefile.in index edd178e68..e3609ea9c 100644 --- a/src/sbatch/Makefile.in +++ b/src/sbatch/Makefile.in @@ -198,6 +198,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index b3e18bef0..65d1fce50 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -1891,10 +1891,6 @@ static bool _opt_verify(void) else setenvf(NULL, "SLURM_OPEN_MODE", "t"); } - if (opt.propagate && parse_rlimits( opt.propagate, PROPAGATE_RLIMITS)) { - error( "--propagate=%s is not valid.", opt.propagate ); - verified = false; - } if (opt.dependency) setenvfs("SLURM_JOB_DEPENDENCY=%s", opt.dependency); diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index 70e39c54f..852cbfbd9 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * sbatch.c - Submit a SLURM batch script. * - * $Id: sbatch.c 15034 2008-09-09 20:24:34Z jette $ + * $Id: sbatch.c 15505 2008-10-27 17:39:44Z jette $ ***************************************************************************** * Copyright (C) 2006-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -91,7 +91,13 @@ int main(int argc, char *argv[]) fatal("sbatch parameter parsing"); } - (void) _set_rlimit_env(); + if (opt.get_user_env_time < 0) { + /* Moab does not propage the user's resource limits, so + * slurmd determines the values at the same time that it + * gets the user's default environment variables. */ + (void) _set_rlimit_env(); + } + set_prio_process_env(); set_umask_env(); slurm_init_job_desc_msg(&desc); @@ -469,8 +475,15 @@ static int _set_rlimit_env(void) slurm_conf_lock(); slurm_conf_unlock(); + /* Modify limits with any command-line options */ + if (opt.propagate && parse_rlimits( opt.propagate, PROPAGATE_RLIMITS)) + fatal( "--propagate=%s is not valid.", opt.propagate ); + for (rli = get_slurm_rlimits_info(); rli->name != NULL; rli++ ) { + if (rli->propagate_flag != PROPAGATE_RLIMITS) + continue; + if (getrlimit (rli->resource, rlim) < 0) { error ("getrlimit (RLIMIT_%s): %m", rli->name); rc = SLURM_FAILURE; @@ -505,7 +518,7 @@ static int _set_rlimit_env(void) if (rlim->rlim_cur < rlim->rlim_max) { rlim->rlim_cur = rlim->rlim_max; if (setrlimit (RLIMIT_NOFILE, rlim) < 0) - return (error ("Unable to increase max no. files: %m")); + return (error("Unable to increase max no. files: %m")); } return rc; diff --git a/src/sbcast/Makefile.in b/src/sbcast/Makefile.in index a415e04a0..9b66e4a19 100644 --- a/src/sbcast/Makefile.in +++ b/src/sbcast/Makefile.in @@ -201,6 +201,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/scancel/Makefile.in b/src/scancel/Makefile.in index 635087993..7e149c5e1 100644 --- a/src/scancel/Makefile.in +++ b/src/scancel/Makefile.in @@ -200,6 +200,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/scontrol/Makefile.in b/src/scontrol/Makefile.in index f546570c6..354254e94 100644 --- a/src/scontrol/Makefile.in +++ b/src/scontrol/Makefile.in @@ -201,6 +201,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index 387091e15..8118836f1 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -1075,7 +1075,8 @@ scontrol [<OPTION>] [<COMMAND>] \n\ delete <SPECIFICATIONS> delete the specified partition, kill its jobs\n\ exit terminate scontrol \n\ help print this description of use. \n\ - hide do not display information about hidden partitions.\n\ + hide do not display information about hidden \n\ + partitions \n\ listpids <job_id<.step>> List pids associated with the given jobid, or\n\ all jobs if no id is given (This will only \n\ display the processes on the node which the \n\ @@ -1090,13 +1091,12 @@ scontrol [<OPTION>] [<COMMAND>] \n\ quit terminate this command. \n\ reconfigure re-read configuration files. \n\ requeue <job_id> re-queue a batch job \n\ - setdebug <LEVEL> reset slurmctld debug level \n\ + setdebug <level> set slurmctld debug level \n\ show <ENTITY> [<ID>] display state of identified entity, default \n\ is all records. \n\ shutdown <OPTS> shutdown slurm daemons \n\ suspend <job_id> susend specified job \n\ resume <job_id> resume previously suspended job \n\ - setdebug <level> set slurmctld debug level \n\ update <SPECIFICATIONS> update job, node, partition, or bluegene \n\ block/subbp configuration \n\ verbose enable detailed logging. \n\ diff --git a/src/sinfo/Makefile.in b/src/sinfo/Makefile.in index 75e74ad02..a867a7974 100644 --- a/src/sinfo/Makefile.in +++ b/src/sinfo/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/slurmctld/Makefile.in b/src/slurmctld/Makefile.in index f846590c0..509d3fd2c 100644 --- a/src/slurmctld/Makefile.in +++ b/src/slurmctld/Makefile.in @@ -206,6 +206,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/slurmctld/acct_policy.c b/src/slurmctld/acct_policy.c index e5e068ea8..dcf8a2245 100644 --- a/src/slurmctld/acct_policy.c +++ b/src/slurmctld/acct_policy.c @@ -119,7 +119,7 @@ extern void acct_policy_job_fini(struct job_record *job_ptr) { acct_association_rec_t *assoc_ptr = NULL; - if (!accounting_enforce || !_valid_job_assoc(job_ptr)) + if (!job_ptr->assoc_ptr || !accounting_enforce) return; assoc_ptr = job_ptr->assoc_ptr; diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index dd6f5c3e8..d9285f636 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -425,6 +425,7 @@ int main(int argc, char *argv[]) info("Running as primary controller"); clusteracct_storage_g_register_ctld( + acct_db_conn, slurmctld_cluster_name, slurmctld_conf.slurmctld_port); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index c945e97d3..9622bb341 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -766,8 +766,7 @@ static int _load_job_state(Buf buffer) if (IS_JOB_PENDING(job_ptr)) job_ptr->start_time = now; job_ptr->end_time = now; - if(job_ptr->assoc_id) - jobacct_storage_g_job_complete(acct_db_conn, job_ptr); + job_completion_logger(job_ptr); } else { info("Recovered job %u", job_id); job_ptr->assoc_ptr = (void *) assoc_ptr; @@ -3642,18 +3641,19 @@ void reset_job_bitmaps(void) FREE_NULL_BITMAP(job_ptr->node_bitmap); if ((job_ptr->nodes_completing) && (node_name2bitmap(job_ptr->nodes_completing, - false, &job_ptr->node_bitmap))) { + false, &job_ptr->node_bitmap))) { error("Invalid nodes (%s) for job_id %u", job_ptr->nodes_completing, job_ptr->job_id); job_fail = true; } else if ((job_ptr->node_bitmap == NULL) && job_ptr->nodes && (node_name2bitmap(job_ptr->nodes, false, - &job_ptr->node_bitmap))) { + &job_ptr->node_bitmap))) { error("Invalid nodes (%s) for job_id %u", job_ptr->nodes, job_ptr->job_id); job_fail = true; } + _reset_step_bitmaps(job_ptr); build_node_details(job_ptr); /* set: num_cpu_groups, * cpu_count_reps, node_cnt, * cpus_per_node, node_addr */ @@ -3661,8 +3661,6 @@ void reset_job_bitmaps(void) if (_reset_detail_bitmaps(job_ptr)) job_fail = true; - _reset_step_bitmaps(job_ptr); - if ((job_ptr->kill_on_step_done) && (list_count(job_ptr->step_list) <= 1)) { info("Single job step done, job is complete"); @@ -3754,8 +3752,12 @@ static void _reset_step_bitmaps(struct job_record *job_ptr) job_ptr->job_id, step_ptr->step_id); delete_step_record (job_ptr, step_ptr->step_id); } - if (step_ptr->step_node_bitmap) - step_alloc_lps(step_ptr); + if ((step_ptr->step_node_bitmap == NULL) && + (step_ptr->batch_step == 0)) { + error("Missing node_list for step_id %u.%u", + job_ptr->job_id, step_ptr->step_id); + delete_step_record (job_ptr, step_ptr->step_id); + } } list_iterator_destroy (step_iterator); @@ -5112,6 +5114,7 @@ void job_fini (void) extern void job_completion_logger(struct job_record *job_ptr) { int base_state; + xassert(job_ptr); if (accounting_enforce == ACCOUNTING_ENFORCE_WITH_LIMITS) @@ -5134,6 +5137,24 @@ extern void job_completion_logger(struct job_record *job_ptr) g_slurm_jobcomp_write(job_ptr); + if(!job_ptr->assoc_id) { + acct_association_rec_t assoc_rec; + /* Just incase we turned on accounting after we + started the job + */ + bzero(&assoc_rec, sizeof(acct_association_rec_t)); + assoc_rec.acct = job_ptr->account; + assoc_rec.partition = job_ptr->partition; + assoc_rec.uid = job_ptr->user_id; + + if(!(assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec, + accounting_enforce, + (acct_association_rec_t **) + &job_ptr->assoc_ptr))) { + job_ptr->assoc_id = assoc_rec.id; + } + } + /* * This means the job wasn't ever eligible, but we want to * keep track of all jobs, so we will set the db_inx to @@ -5886,6 +5907,7 @@ extern int job_cancel_by_assoc_id(uint32_t assoc_id) if ((job_ptr->assoc_id != assoc_id) || IS_JOB_FINISHED(job_ptr)) continue; + job_ptr->assoc_ptr = NULL; info("Association deleted, cancelling job %u", job_ptr->job_id); job_signal(job_ptr->job_id, SIGKILL, 0, 0); @@ -5933,10 +5955,21 @@ extern int update_job_account(char *module, struct job_record *job_ptr, assoc_rec.acct = NULL; assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec, accounting_enforce, &assoc_ptr); + if(!assoc_ptr) { + debug("%s: we didn't have an association for account " + "'%s' and user '%s', and we can't seem to find " + "a default one either. Keeping new account " + "'%s'. This will produce trash in accounting. " + "If this is not what you desire please put " + "AccountStorageEnforce=1 in your slurm.conf " + "file.", module, new_account, + job_ptr->user_id, new_account); + assoc_rec.acct = new_account; + } } xfree(job_ptr->account); - if (assoc_rec.acct[0] != '\0') { + if (assoc_rec.acct && assoc_rec.acct[0] != '\0') { job_ptr->account = xstrdup(assoc_rec.acct); info("%s: setting account to %s for job_id %u", module, assoc_rec.acct, job_ptr->job_id); diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 48307eb7b..6dee2be37 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1409,17 +1409,17 @@ extern void build_node_details(struct job_record *job_ptr) job_ptr->node_cnt = hostlist_count(host_list); xrealloc(job_ptr->cpus_per_node, - (sizeof(uint32_t) * job_ptr->node_cnt)); + (sizeof(uint32_t) * job_ptr->node_cnt)); xrealloc(job_ptr->cpu_count_reps, - (sizeof(uint32_t) * job_ptr->node_cnt)); + (sizeof(uint32_t) * job_ptr->node_cnt)); xrealloc(job_ptr->node_addr, - (sizeof(slurm_addr) * job_ptr->node_cnt)); + (sizeof(slurm_addr) * job_ptr->node_cnt)); job_ptr->alloc_lps_cnt = job_ptr->node_cnt; xrealloc(job_ptr->alloc_lps, - (sizeof(uint32_t) * job_ptr->node_cnt)); + (sizeof(uint32_t) * job_ptr->node_cnt)); xrealloc(job_ptr->used_lps, - (sizeof(uint32_t) * job_ptr->node_cnt)); + (sizeof(uint32_t) * job_ptr->node_cnt)); while ((this_node_name = hostlist_shift(host_list))) { node_ptr = find_node_record(this_node_name); @@ -1492,8 +1492,7 @@ extern void build_node_details(struct job_record *job_ptr) } job_ptr->num_cpu_groups = cpu_inx + 1; job_ptr->total_procs = total_procs; - if (job_ptr->used_lps) /* reset counters */ - _alloc_step_cpus(job_ptr); + _alloc_step_cpus(job_ptr); /* reset counters */ } /* diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 55d46b0d5..a527549fe 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1933,8 +1933,9 @@ static void _slurm_rpc_submit_batch_job(slurm_msg_t * msg) if (job_ptr) { /* Active job allocation */ #ifdef HAVE_FRONT_END /* Limited job step support */ - /* Non-super users not permitted to run job steps on front-end. - * A single slurmd can not handle a heavy load. */ + /* Non-super users not permitted to run job steps on + * front-end. A single slurmd can not handle a heavy + * load. */ if (!validate_super_user(uid)) { info("Attempt to execute batch job step by uid=%u", (unsigned int) uid); diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 4c25afdfe..990d1ecc9 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -776,7 +776,8 @@ int read_slurm_conf(int recover) if (node_record_count < 1) { error("read_slurm_conf: no nodes configured."); - _purge_old_node_state(old_node_table_ptr, old_node_record_count); + _purge_old_node_state(old_node_table_ptr, + old_node_record_count); return EINVAL; } diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 5037e3e3a..4eeead5e4 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * step_mgr.c - manage the job step information of slurm - * $Id: step_mgr.c 15194 2008-09-26 20:15:00Z da $ + * $Id: step_mgr.c 15550 2008-10-31 18:52:47Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -712,6 +712,9 @@ extern void step_alloc_lps(struct step_record *step_ptr) int i_node, i_first, i_last; int job_node_inx = -1, step_node_inx = -1; + if (step_ptr->step_layout == NULL) /* batch step */ + return; + i_first = bit_ffs(job_ptr->node_bitmap); i_last = bit_fls(job_ptr->node_bitmap); if (i_first == -1) /* empty bitmap */ @@ -840,7 +843,8 @@ step_create(job_step_create_request_msg_t *step_specs, step_specs->mem_per_task = 0; } else if (step_specs->mem_per_task) { if (slurmctld_conf.max_mem_per_task && - (step_specs->mem_per_task > slurmctld_conf.max_mem_per_task)) + (step_specs->mem_per_task > + slurmctld_conf.max_mem_per_task)) return ESLURM_INVALID_TASK_MEMORY; } else step_specs->mem_per_task = slurmctld_conf.def_mem_per_task; @@ -860,11 +864,16 @@ step_create(job_step_create_request_msg_t *step_specs, return ESLURM_TASKDIST_ARBITRARY_UNSUPPORTED; } - if ((step_specs->host && (strlen(step_specs->host) > MAX_STR_LEN)) || - (step_specs->node_list && (strlen(step_specs->node_list) > MAX_STR_LEN)) || - (step_specs->network && (strlen(step_specs->network) > MAX_STR_LEN)) || - (step_specs->name && (strlen(step_specs->name) > MAX_STR_LEN)) || - (step_specs->ckpt_path && (strlen(step_specs->ckpt_path) > MAX_STR_LEN))) + if ((step_specs->host && + (strlen(step_specs->host) > MAX_STR_LEN)) || + (step_specs->node_list && + (strlen(step_specs->node_list) > MAX_STR_LEN)) || + (step_specs->network && + (strlen(step_specs->network) > MAX_STR_LEN)) || + (step_specs->name && + (strlen(step_specs->name) > MAX_STR_LEN)) || + (step_specs->ckpt_path && + (strlen(step_specs->ckpt_path) > MAX_STR_LEN))) return ESLURM_PATHNAME_TOO_LONG; /* if the overcommit flag is checked we 0 out the cpu_count diff --git a/src/slurmd/Makefile.in b/src/slurmd/Makefile.in index 408c46fd0..3f7652751 100644 --- a/src/slurmd/Makefile.in +++ b/src/slurmd/Makefile.in @@ -182,6 +182,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/slurmd/slurmd/Makefile.am b/src/slurmd/slurmd/Makefile.am index bab7c9b77..0e1a14502 100644 --- a/src/slurmd/slurmd/Makefile.am +++ b/src/slurmd/slurmd/Makefile.am @@ -4,7 +4,6 @@ AUTOMAKE_OPTIONS = foreign sbin_PROGRAMS = slurmd -noinst_PROGRAMS = slurmd.test CPPFLAGS = -DLIBSLURM_SO=\"$(libdir)/libslurm.so\" INCLUDES = -I$(top_srcdir) @@ -14,8 +13,6 @@ slurmd_LDADD = \ $(top_builddir)/src/api/libslurm.o -ldl \ $(PLPA_LIBS) -slurmd_test_LDADD = $(slurmd_LDADD) - SLURMD_SOURCES = \ slurmd.c slurmd.h \ req.c req.h \ @@ -35,13 +32,7 @@ SLURMD_SOURCES = \ $(top_builddir)/src/slurmd/common/task_plugin.h \ $(top_builddir)/src/slurmd/common/reverse_tree.h -slurmd_SOURCES = \ - $(SLURMD_SOURCES) \ - config.c - -slurmd_test_SOURCES = \ - $(SLURMD_SOURCES) \ - testconfig.c +slurmd_SOURCES = $(SLURMD_SOURCES) if HAVE_AIX # We need to set maxdata back to 0 because this effects the "max memory size" @@ -51,18 +42,6 @@ else slurmd_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) endif -slurmd_test_LDFLAGS = $(slurmd_LDFLAGS) - - -config.c: Makefile - @( echo "char *slurm_stepd_path = \"$(prefix)/sbin/slurmstepd\";"\ - ) > config.c - -testconfig.c: Makefile - @( stepdir=`cd $(top_builddir)/src/slurmd/slurmstepd && pwd`;\ - echo "char *slurm_stepd_path = \"$$stepdir/slurmstepd\";"\ - ) > testconfig.c - force: $(slurmd_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/slurmd/slurmd/Makefile.in b/src/slurmd/slurmd/Makefile.in index 9a70905e6..13c7f21a5 100644 --- a/src/slurmd/slurmd/Makefile.in +++ b/src/slurmd/slurmd/Makefile.in @@ -37,7 +37,6 @@ build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ sbin_PROGRAMS = slurmd$(EXEEXT) -noinst_PROGRAMS = slurmd.test$(EXEEXT) subdir = src/slurmd/slurmd DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 @@ -71,13 +70,13 @@ CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h CONFIG_CLEAN_FILES = am__installdirs = "$(DESTDIR)$(sbindir)" sbinPROGRAMS_INSTALL = $(INSTALL_PROGRAM) -PROGRAMS = $(noinst_PROGRAMS) $(sbin_PROGRAMS) +PROGRAMS = $(sbin_PROGRAMS) am__objects_1 = slurmd.$(OBJEXT) req.$(OBJEXT) get_mach_stat.$(OBJEXT) \ read_proc.$(OBJEXT) reverse_tree_math.$(OBJEXT) xcpu.$(OBJEXT) \ proctrack.$(OBJEXT) setproctitle.$(OBJEXT) \ slurmstepd_init.$(OBJEXT) run_script.$(OBJEXT) \ task_plugin.$(OBJEXT) -am_slurmd_OBJECTS = $(am__objects_1) config.$(OBJEXT) +am_slurmd_OBJECTS = $(am__objects_1) slurmd_OBJECTS = $(am_slurmd_OBJECTS) am__DEPENDENCIES_1 = slurmd_DEPENDENCIES = $(top_builddir)/src/common/libdaemonize.la \ @@ -85,14 +84,6 @@ slurmd_DEPENDENCIES = $(top_builddir)/src/common/libdaemonize.la \ slurmd_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(slurmd_LDFLAGS) \ $(LDFLAGS) -o $@ -am_slurmd_test_OBJECTS = $(am__objects_1) testconfig.$(OBJEXT) -slurmd_test_OBJECTS = $(am_slurmd_test_OBJECTS) -am__DEPENDENCIES_2 = $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/api/libslurm.o $(am__DEPENDENCIES_1) -slurmd_test_DEPENDENCIES = $(am__DEPENDENCIES_2) -slurmd_test_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ - $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ - $(slurmd_test_LDFLAGS) $(LDFLAGS) -o $@ DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp am__depfiles_maybe = depfiles @@ -105,8 +96,8 @@ CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ -SOURCES = $(slurmd_SOURCES) $(slurmd_test_SOURCES) -DIST_SOURCES = $(slurmd_SOURCES) $(slurmd_test_SOURCES) +SOURCES = $(slurmd_SOURCES) +DIST_SOURCES = $(slurmd_SOURCES) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) @@ -214,6 +205,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ @@ -284,7 +276,6 @@ slurmd_LDADD = \ $(top_builddir)/src/api/libslurm.o -ldl \ $(PLPA_LIBS) -slurmd_test_LDADD = $(slurmd_LDADD) SLURMD_SOURCES = \ slurmd.c slurmd.h \ req.c req.h \ @@ -304,20 +295,12 @@ SLURMD_SOURCES = \ $(top_builddir)/src/slurmd/common/task_plugin.h \ $(top_builddir)/src/slurmd/common/reverse_tree.h -slurmd_SOURCES = \ - $(SLURMD_SOURCES) \ - config.c - -slurmd_test_SOURCES = \ - $(SLURMD_SOURCES) \ - testconfig.c - +slurmd_SOURCES = $(SLURMD_SOURCES) @HAVE_AIX_FALSE@slurmd_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) # We need to set maxdata back to 0 because this effects the "max memory size" # ulimit, and the ulimit is inherited by child processes. @HAVE_AIX_TRUE@slurmd_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) -Wl,-bmaxdata:0x0 -slurmd_test_LDFLAGS = $(slurmd_LDFLAGS) all: all-am .SUFFIXES: @@ -351,13 +334,6 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh - -clean-noinstPROGRAMS: - @list='$(noinst_PROGRAMS)'; for p in $$list; do \ - f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ - echo " rm -f $$p $$f"; \ - rm -f $$p $$f ; \ - done install-sbinPROGRAMS: $(sbin_PROGRAMS) @$(NORMAL_INSTALL) test -z "$(sbindir)" || $(MKDIR_P) "$(DESTDIR)$(sbindir)" @@ -389,9 +365,6 @@ clean-sbinPROGRAMS: slurmd$(EXEEXT): $(slurmd_OBJECTS) $(slurmd_DEPENDENCIES) @rm -f slurmd$(EXEEXT) $(slurmd_LINK) $(slurmd_OBJECTS) $(slurmd_LDADD) $(LIBS) -slurmd.test$(EXEEXT): $(slurmd_test_OBJECTS) $(slurmd_test_DEPENDENCIES) - @rm -f slurmd.test$(EXEEXT) - $(slurmd_test_LINK) $(slurmd_test_OBJECTS) $(slurmd_test_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -399,7 +372,6 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/config.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/get_mach_stat.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proctrack.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/read_proc.Po@am__quote@ @@ -410,7 +382,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurmd.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slurmstepd_init.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task_plugin.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testconfig.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xcpu.Po@am__quote@ .c.o: @@ -616,8 +587,8 @@ maintainer-clean-generic: @echo "it deletes files that may require special tools to rebuild." clean: clean-am -clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ - clean-sbinPROGRAMS mostlyclean-am +clean-am: clean-generic clean-libtool clean-sbinPROGRAMS \ + mostlyclean-am distclean: distclean-am -rm -rf ./$(DEPDIR) @@ -676,29 +647,20 @@ uninstall-am: uninstall-sbinPROGRAMS .MAKE: install-am install-strip .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ - clean-libtool clean-noinstPROGRAMS clean-sbinPROGRAMS ctags \ - distclean distclean-compile distclean-generic \ - distclean-libtool distclean-tags distdir dvi dvi-am html \ - html-am info info-am install install-am install-data \ - install-data-am install-dvi install-dvi-am install-exec \ - install-exec-am install-html install-html-am install-info \ - install-info-am install-man install-pdf install-pdf-am \ - install-ps install-ps-am install-sbinPROGRAMS install-strip \ - installcheck installcheck-am installdirs maintainer-clean \ + clean-libtool clean-sbinPROGRAMS ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-sbinPROGRAMS install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags uninstall uninstall-am uninstall-sbinPROGRAMS -config.c: Makefile - @( echo "char *slurm_stepd_path = \"$(prefix)/sbin/slurmstepd\";"\ - ) > config.c - -testconfig.c: Makefile - @( stepdir=`cd $(top_builddir)/src/slurmd/slurmstepd && pwd`;\ - echo "char *slurm_stepd_path = \"$$stepdir/slurmstepd\";"\ - ) > testconfig.c - force: $(slurmd_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 9d91d9a92..e7673c33f 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -89,8 +89,6 @@ #define MAXHOSTNAMELEN 64 #endif -extern char *slurm_stepd_path; - typedef struct { int ngids; gid_t *gids; @@ -558,8 +556,11 @@ _forkexec_slurmstepd(slurmd_step_type_t type, void *req, error("close read to_slurmd in parent: %m"); return rc; } else { + char slurm_stepd_path[MAXPATHLEN]; char *const argv[2] = { slurm_stepd_path, NULL}; int failed = 0; + snprintf(slurm_stepd_path, sizeof(slurm_stepd_path), + "%s/sbin/slurmstepd", SLURM_PREFIX); /* * Child forks and exits */ @@ -966,7 +967,7 @@ _rpc_batch_job(slurm_msg_t *msg) int rc = SLURM_SUCCESS; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); char *bg_part_id = NULL; - bool replied = false; + bool replied = false; slurm_addr *cli = &msg->orig_addr; if (!_slurm_authorized_user(req_uid)) { @@ -979,17 +980,35 @@ _rpc_batch_job(slurm_msg_t *msg) if (slurm_cred_revoked(conf->vctx, req->cred)) { error("Job %u already killed, do not launch batch job", req->job_id); - rc = ESLURMD_CREDENTIAL_REVOKED; /* job already ran */ + rc = ESLURMD_CREDENTIAL_REVOKED; /* job already ran */ + goto done; } if ((req->step_id != SLURM_BATCH_SCRIPT) && (req->step_id != 0)) first_job_run = false; - + /* * Insert jobid into credential context to denote that * we've now "seen" an instance of the job */ if (first_job_run) { + /* BlueGene prolog waits for partition boot and is very slow. + * On any system we might need to load environment variables + * for Moab (see --get-user-env), which could also be slow. + * Just reply now and send a separate kill job request if the + * prolog or launch fail. */ + replied = true; + if (slurm_send_rc_msg(msg, rc) < 1) { + /* The slurmctld is no longer waiting for a reply. + * This typically indicates that the slurmd was + * blocked from memory and/or CPUs and the slurmctld + * has requeued the batch job request. */ + error("Could not confirm batch launch for job %u, " + "aborting request", req->job_id); + rc = SLURM_COMMUNICATIONS_SEND_ERROR; + goto done; + } + slurm_cred_insert_jobid(conf->vctx, req->job_id); /* @@ -999,14 +1018,6 @@ _rpc_batch_job(slurm_msg_t *msg) SELECT_DATA_BLOCK_ID, &bg_part_id); - /* BlueGene prolog waits for partition boot and is very slow. - * On any system we might need to load environment variables - * for Moab (see --get-user-env), which could also be slow. - * Just reply now and send a separate kill job request if the - * prolog or launch fail. */ - slurm_send_rc_msg(msg, rc); - replied = true; - rc = _run_prolog(req->job_id, req->uid, bg_part_id); xfree(bg_part_id); if (rc) { @@ -1050,14 +1061,39 @@ _rpc_batch_job(slurm_msg_t *msg) debug3("_rpc_batch_job: call to _forkexec_slurmstepd"); rc = _forkexec_slurmstepd(LAUNCH_BATCH_JOB, (void *)req, cli, NULL, (hostset_t)NULL); - debug3("_rpc_batch_job: return from _forkexec_slurmstepd"); + debug3("_rpc_batch_job: return from _forkexec_slurmstepd: %d", rc); slurm_mutex_unlock(&launch_mutex); + /* On a busy system, slurmstepd may take a while to respond, + * if the job was cancelled in the interim, run through the + * abort logic below */ + if (slurm_cred_revoked(conf->vctx, req->cred)) { + info("Job %u killed while launch was in progress", + req->job_id); + sleep(1); /* give slurmstepd time to create + * the communication socket */ + _terminate_all_steps(req->job_id, true); + rc = ESLURMD_CREDENTIAL_REVOKED; + goto done; + } + done: - if (!replied) - slurm_send_rc_msg(msg, rc); - else if (rc != 0) { + if (!replied) { + if (slurm_send_rc_msg(msg, rc) < 1) { + /* The slurmctld is no longer waiting for a reply. + * This typically indicates that the slurmd was + * blocked from memory and/or CPUs and the slurmctld + * has requeued the batch job request. */ + error("Could not confirm batch launch for job %u, " + "aborting request", req->job_id); + rc = SLURM_COMMUNICATIONS_SEND_ERROR; + } else { + /* No need to initiate separate reply below */ + rc = SLURM_SUCCESS; + } + } + if (rc != SLURM_SUCCESS) { /* prolog or job launch failure, * tell slurmctld that the job failed */ if (req->step_id == SLURM_BATCH_SCRIPT) @@ -1067,9 +1103,11 @@ _rpc_batch_job(slurm_msg_t *msg) } /* - * If job prolog failed, indicate failure to slurmctld + * If job prolog failed or we could not reply, + * initiate message to slurmctld with current state */ - if (rc == ESLURMD_PROLOG_FAILED) + if ((rc == ESLURMD_PROLOG_FAILED) || + (rc == SLURM_COMMUNICATIONS_SEND_ERROR)) send_registration_msg(rc, false); } diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 2e54f4f8b..dfd5e2619 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmd/slurmd.c - main slurm node server daemon - * $Id: slurmd.c 15006 2008-09-08 20:47:15Z da $ + * $Id: slurmd.c 15572 2008-11-03 23:14:27Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -95,7 +95,6 @@ /* global, copied to STDERR_FILENO in tasks before the exec */ int devnull = -1; slurmd_conf_t * conf; -extern char *slurm_stepd_path; /* * count of active threads @@ -526,16 +525,11 @@ _fill_registration_msg(slurm_node_registration_status_msg_t *msg) return; } -static inline int +static inline void _free_and_set(char **confvar, char *newval) { - if (newval) { - if (*confvar) - xfree(*confvar); - *confvar = newval; - return 1; - } else - return 0; + xfree(*confvar); + *confvar = newval; } /* Replace first "%h" in path string with actual hostname. @@ -895,7 +889,7 @@ _slurmd_init() struct rlimit rlim; slurm_ctl_conf_t *cf; struct stat stat_buf; - + char slurm_stepd_path[MAXPATHLEN]; /* * Process commandline arguments first, since one option may be * an alternate location for the slurm config file. @@ -977,6 +971,8 @@ _slurmd_init() fd_set_close_on_exec(devnull); /* make sure we have slurmstepd installed */ + snprintf(slurm_stepd_path, sizeof(slurm_stepd_path), + "%s/sbin/slurmstepd", SLURM_PREFIX); if (stat(slurm_stepd_path, &stat_buf)) { fatal("Unable to find slurmstepd file at %s", slurm_stepd_path); diff --git a/src/slurmd/slurmstepd/Makefile.in b/src/slurmd/slurmstepd/Makefile.in index 61bdc4cb7..7b1bbc1d3 100644 --- a/src/slurmd/slurmstepd/Makefile.in +++ b/src/slurmd/slurmstepd/Makefile.in @@ -207,6 +207,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 8a423f349..79262d29a 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/mgr.c - job manager functions for slurmstepd - * $Id: mgr.c 14994 2008-09-05 21:31:37Z da $ + * $Id: mgr.c 15502 2008-10-24 22:27:32Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -184,7 +184,6 @@ static int _wait_for_any_task(slurmd_job_t *job, bool waitflag); static void _setargs(slurmd_job_t *job); static void _random_sleep(slurmd_job_t *job); -static char *_sprint_task_cnt(batch_job_launch_msg_t *msg); static int _run_script_as_user(const char *name, const char *path, slurmd_job_t *job, int max_wait, char **env); @@ -285,12 +284,6 @@ slurmd_job_t * mgr_launch_batch_job_setup(batch_job_launch_msg_t *msg, slurm_addr *cli) { slurmd_job_t *job = NULL; - char buf[MAXHOSTRANGELEN]; - hostlist_t hl = hostlist_create(msg->nodes); - if (!hl) - return NULL; - - hostlist_ranged_string(hl, MAXHOSTRANGELEN, buf); if (!(job = job_batch_job_create(msg))) { error("job_batch_job_create() failed: %m"); @@ -314,14 +307,11 @@ mgr_launch_batch_job_setup(batch_job_launch_msg_t *msg, slurm_addr *cli) /* this is the new way of setting environment variables */ env_array_for_batch_job(&job->env, msg, conf->node_name); - /* this is the old way of setting environment variables */ - job->envtp->nprocs = msg->nprocs; + /* this is the old way of setting environment variables (but + * needed) */ job->envtp->overcommit = msg->overcommit; job->envtp->select_jobinfo = msg->select_jobinfo; - job->envtp->nhosts = hostlist_count(hl); - hostlist_destroy(hl); - job->envtp->nodelist = xstrdup(buf); - job->envtp->task_count = _sprint_task_cnt(msg); + return job; cleanup2: @@ -1352,7 +1342,7 @@ _make_batch_script(batch_job_launch_msg_t *msg, char *path) FILE *fp = NULL; char script[MAXPATHLEN]; - snprintf(script, 1024, "%s/%s", path, "script"); + snprintf(script, 1024, "%s/%s", path, "slurm_script"); again: if ((fp = safeopen(script, "w", SAFEOPEN_CREATE_ONLY)) == NULL) { @@ -1388,26 +1378,6 @@ _make_batch_script(batch_job_launch_msg_t *msg, char *path) } -static char * -_sprint_task_cnt(batch_job_launch_msg_t *msg) -{ - int i; - char *task_str = xstrdup(""); - char tmp[16], *comma = ""; - for (i=0; i<msg->num_cpu_groups; i++) { - if (i == 1) - comma = ","; - if (msg->cpu_count_reps[i] > 1) - sprintf(tmp, "%s%d(x%d)", comma, msg->cpus_per_node[i], - msg->cpu_count_reps[i]); - else - sprintf(tmp, "%s%d", comma, msg->cpus_per_node[i]); - xstrcat(task_str, tmp); - } - - return task_str; -} - static void _send_launch_failure (launch_tasks_request_msg_t *msg, slurm_addr *cli, int rc) { diff --git a/src/slurmd/slurmstepd/slurmstepd.c b/src/slurmd/slurmstepd/slurmstepd.c index 40d126f71..28b8cc4cf 100644 --- a/src/slurmd/slurmstepd/slurmstepd.c +++ b/src/slurmd/slurmstepd/slurmstepd.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/slurmstepd.c - SLURM job-step manager. - * $Id: slurmstepd.c 13672 2008-03-19 23:10:58Z jette $ + * $Id: slurmstepd.c 15505 2008-10-27 17:39:44Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -48,6 +48,7 @@ #include "src/common/xmalloc.h" #include "src/common/xsignal.h" #include "src/common/slurm_jobacct_gather.h" +#include "src/common/slurm_rlimits_info.h" #include "src/common/switch.h" #include "src/common/stepd_api.h" @@ -64,6 +65,7 @@ static int _init_from_slurmd(int sock, char **argv, slurm_addr **_cli, slurm_addr **_self, slurm_msg_t **_msg, int *_ngids, gid_t **_gids); +static void _dump_user_env(void); static void _send_ok_to_slurmd(int sock); static void _send_fail_to_slurmd(int sock); static slurmd_job_t *_step_setup(slurm_addr *cli, slurm_addr *self, @@ -76,6 +78,7 @@ int slurmstepd_blocked_signals[] = { /* global variable */ slurmd_conf_t * conf; +extern char ** environ; int main (int argc, char *argv[]) @@ -88,6 +91,12 @@ main (int argc, char *argv[]) gid_t *gids; int rc = 0; + if ((argc == 2) && (strcmp(argv[1], "getenv") == 0)) { + print_rlimits(); + _dump_user_env(); + exit(0); + } + xsignal_block(slurmstepd_blocked_signals); conf = xmalloc(sizeof(*conf)); conf->argv = &argv; @@ -371,3 +380,11 @@ _step_cleanup(slurmd_job_t *job, slurm_msg_t *msg, int rc) xfree(msg); } + +static void _dump_user_env(void) +{ + int i; + + for (i=0; environ[i]; i++) + printf("%s\n",environ[i]); +} diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c index a16c21901..cf666b54e 100644 --- a/src/slurmd/slurmstepd/task.c +++ b/src/slurmd/slurmstepd/task.c @@ -381,8 +381,6 @@ exec_task(slurmd_job_t *job, int i, int waitfd) log_fini(); exit(1); } - - pdebug_stop_current(job); } #ifdef HAVE_PTY_H @@ -417,6 +415,8 @@ exec_task(slurmd_job_t *job, int i, int waitfd) job->task_prolog, job); } + if (!job->batch) + pdebug_stop_current(job); if (job->env == NULL) { debug("job->env is NULL"); job->env = (char **)xmalloc(sizeof(char *)); diff --git a/src/slurmd/slurmstepd/ulimits.c b/src/slurmd/slurmstepd/ulimits.c index 08eda040d..a46d0c605 100644 --- a/src/slurmd/slurmstepd/ulimits.c +++ b/src/slurmd/slurmstepd/ulimits.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/ulimits.c - set user limits for job - * $Id: ulimits.c 13672 2008-03-19 23:10:58Z jette $ + * $Id: ulimits.c 15505 2008-10-27 17:39:44Z jette $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -192,8 +192,8 @@ _set_limit(char **env, slurm_rlimits_info_t *rli) * Nothing to do if the rlimit won't change */ if (r.rlim_cur == (rlim_t) env_value) { - debug2( "_set_limit: %s setrlimit %s is unnecessary (same val)", - u_req_propagate?"user":"conf", rlimit_name ); + debug2( "_set_limit: %s setrlimit %s no change in value: %u", + u_req_propagate?"user":"conf", rlimit_name, r.rlim_cur); return SLURM_SUCCESS; } diff --git a/src/slurmdbd/Makefile.in b/src/slurmdbd/Makefile.in index 4da8d7e0e..8f725dcef 100644 --- a/src/slurmdbd/Makefile.in +++ b/src/slurmdbd/Makefile.in @@ -200,6 +200,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/smap/Makefile.in b/src/smap/Makefile.in index ac8923c35..3e2a65e20 100644 --- a/src/smap/Makefile.in +++ b/src/smap/Makefile.in @@ -215,6 +215,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/squeue/Makefile.in b/src/squeue/Makefile.in index 1e87dec53..ca617dd25 100644 --- a/src/squeue/Makefile.in +++ b/src/squeue/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/squeue/opts.c b/src/squeue/opts.c index d295de2d7..820516583 100644 --- a/src/squeue/opts.c +++ b/src/squeue/opts.c @@ -1,7 +1,7 @@ /****************************************************************************\ * opts.c - srun command line option parsing * - * $Id: opts.c 14150 2008-05-29 00:14:29Z jette $ + * $Id: opts.c 15448 2008-10-17 21:49:29Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -64,7 +64,7 @@ /* getopt_long options, integers but not characters */ #define OPT_LONG_HELP 0x100 #define OPT_LONG_USAGE 0x101 -#define OPT_LONG_HIDE 0x102 +#define OPT_LONG_HIDE 0x102 /* FUNCTIONS */ static List _build_job_list( char* str ); @@ -92,7 +92,6 @@ parse_command_line( int argc, char* argv[] ) char *env_val = NULL; int opt_char; int option_index; - hostlist_t host_list; static struct option long_options[] = { {"all", no_argument, 0, 'a'}, {"noheader", no_argument, 0, 'h'}, @@ -100,6 +99,7 @@ parse_command_line( int argc, char* argv[] ) {"jobs", optional_argument, 0, 'j'}, {"long", no_argument, 0, 'l'}, {"node", required_argument, 0, 'n'}, + {"nodes", required_argument, 0, 'n'}, {"format", required_argument, 0, 'o'}, {"partitions", required_argument, 0, 'p'}, {"steps", optional_argument, 0, 's'}, @@ -154,15 +154,15 @@ parse_command_line( int argc, char* argv[] ) params.long_list = true; break; case (int) 'n': - xfree(params.node); - params.node = xstrdup(optarg); - host_list = hostlist_create(params.node); - if (!host_list) { - error("'%s' invalid entry for --node", - optarg); + if (params.nodes) + hostset_destroy(params.nodes); + + params.nodes = hostset_create(optarg); + if (params.nodes == NULL) { + error("'%s' invalid entry for --nodes", + optarg); exit(1); } - hostlist_destroy(host_list); break; case (int) 'o': xfree(params.format); @@ -236,18 +236,36 @@ parse_command_line( int argc, char* argv[] ) exit(1); } - if ( params.node ) { + if ( params.nodes ) { char *name1 = NULL; - if (strcasecmp("localhost", params.node) == 0) { - xfree(params.node); - params.node = xmalloc(128); - gethostname_short(params.node, 128); - } - name1 = slurm_conf_get_nodename(params.node); - if (name1) { - xfree(params.node); - params.node = xstrdup(name1); + char *name2 = NULL; + hostset_t nodenames = hostset_create(NULL); + if (nodenames == NULL) + fatal("malloc failure"); + + while ( hostset_count(params.nodes) > 0 ) { + name1 = hostset_pop(params.nodes); + + /* localhost = use current host name */ + if ( strcasecmp("localhost", name1) == 0 ) { + name2 = xmalloc(128); + gethostname_short(name2, 128); + } else { + /* translate NodeHostName to NodeName */ + name2 = slurm_conf_get_nodename(name1); + + /* use NodeName if translation failed */ + if ( name2 == NULL ) + name2 = xstrdup(name1); + } + hostset_insert(nodenames, name2); + free(name1); + xfree(name2); } + + /* Replace params.nodename with the new one */ + hostset_destroy(params.nodes); + params.nodes = nodenames; } if ( ( params.partitions == NULL ) && @@ -706,6 +724,13 @@ _print_options() enum job_states *state_id; squeue_job_step_t *job_step_id; uint32_t *job_id; + char hostlist[8192]; + + if (params.nodes) { + hostset_ranged_string(params.nodes, sizeof(hostlist)-1, + hostlist); + } else + hostlist[0] = '\0'; printf( "-----------------------------\n" ); printf( "all = %s\n", params.all_flag ? "true" : "false"); @@ -714,7 +739,7 @@ _print_options() printf( "job_flag = %d\n", params.job_flag ); printf( "jobs = %s\n", params.jobs ); printf( "max_procs = %d\n", params.max_procs ) ; - printf( "node = %s\n", params.node ) ; + printf( "nodes = %s\n", hostlist ) ; printf( "partitions = %s\n", params.partitions ) ; printf( "sort = %s\n", params.sort ) ; printf( "states = %s\n", params.states ) ; @@ -994,7 +1019,7 @@ Usage: squeue [OPTIONS]\n\ -j, --jobs comma separated list of jobs\n\ to view, default is all\n\ -l, --long long report\n\ - -n, --node=node_name name of single node to view, default is \n\ + -n, --nodes=hostlist list of nodes to view, default is \n\ all nodes\n\ -o, --format=format format specification\n\ -p, --partitions=partitions comma separated list of partitions\n\ diff --git a/src/squeue/print.c b/src/squeue/print.c index 6b4a016ca..6edb4a72d 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -59,7 +59,6 @@ static int _adjust_completing (job_info_t *j, node_info_msg_t **ni); static int _filter_job(job_info_t * job); static int _filter_step(job_step_info_t * step); static int _get_node_cnt(job_info_t * job); -static bool _node_in_list(char *node_name, char *node_list); static int _nodes_in_list(char *node_list); static int _print_str(char *str, int width, bool right, bool cut_output); @@ -490,8 +489,6 @@ int _print_job_time_start(job_info_t * job, int width, bool right, { if (job == NULL) /* Print the Header instead */ _print_str("START", width, right, true); - else if (job->job_state == JOB_PENDING) - _print_time((time_t) 0, 0, width, right); else _print_time(job->start_time, 0, width, right); if (suffix) @@ -777,18 +774,6 @@ static int _nodes_in_list(char *node_list) return count; } -static bool _node_in_list(char *node_name, char *node_list) -{ - bool rc; - hostset_t host_set = hostset_create(node_list); - if (hostset_within(host_set, node_name) == 0) - rc = false; - else - rc = true; - hostset_destroy(host_set); - return rc; -} - int _print_job_shared(job_info_t * job, int width, bool right_justify, char* suffix) { @@ -1309,8 +1294,9 @@ static int _filter_job(job_info_t * job) return 4; } - if ((params.node) - && (!_node_in_list(params.node, job->nodes))) + if ((params.nodes) + && ((job->nodes == NULL) + || (!hostset_intersects(params.nodes, job->nodes)))) return 5; if (params.user_list) { @@ -1383,8 +1369,9 @@ static int _filter_step(job_step_info_t * step) return 3; } - if ((params.node) - && (!_node_in_list(params.node, step->nodes))) + if ((params.nodes) + && ((step->nodes == NULL) + || (!hostset_intersects(params.nodes, step->nodes)))) return 5; if (params.user_list) { diff --git a/src/squeue/squeue.h b/src/squeue/squeue.h index 0227512ac..034880648 100644 --- a/src/squeue/squeue.h +++ b/src/squeue/squeue.h @@ -84,7 +84,7 @@ struct squeue_parameters { int verbose; char* jobs; - char* node; + hostset_t nodes; char* partitions; char* states; char* steps; diff --git a/src/sreport/Makefile.in b/src/sreport/Makefile.in index 408e7d90b..b472594ce 100644 --- a/src/sreport/Makefile.in +++ b/src/sreport/Makefile.in @@ -201,6 +201,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/sreport/cluster_reports.c b/src/sreport/cluster_reports.c index 2797f630a..62fedd053 100644 --- a/src/sreport/cluster_reports.c +++ b/src/sreport/cluster_reports.c @@ -867,6 +867,7 @@ extern int cluster_account_by_user(int argc, char *argv[]) ListIterator cluster_itr = NULL; List format_list = list_create(slurm_destroy_char); List assoc_list = NULL; + List first_list = NULL; List cluster_list = NULL; List sreport_cluster_list = list_create(destroy_sreport_cluster_rec); List tree_list = NULL; @@ -914,6 +915,9 @@ extern int cluster_account_by_user(int argc, char *argv[]) goto end_it; } + first_list = assoc_list; + assoc_list = get_hierarchical_sorted_assoc_list(first_list); + /* set up the structures for easy retrieval later */ itr = list_iterator_create(cluster_list); assoc_itr = list_iterator_create(assoc_list); @@ -990,6 +994,8 @@ extern int cluster_account_by_user(int argc, char *argv[]) cluster_list = NULL; list_destroy(assoc_list); assoc_list = NULL; + list_destroy(first_list); + first_list = NULL; if(print_fields_have_header) { char start_char[20]; @@ -1133,6 +1139,11 @@ end_it: assoc_list = NULL; } + if(first_list) { + list_destroy(first_list); + first_list = NULL; + } + if(cluster_list) { list_destroy(cluster_list); cluster_list = NULL; diff --git a/src/srun/Makefile.in b/src/srun/Makefile.in index 0f9dbabe6..87d077da0 100644 --- a/src/srun/Makefile.in +++ b/src/srun/Makefile.in @@ -201,6 +201,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 56d8b499e..444ef3e82 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/srun/allocate.c - srun functions for managing node allocations - * $Id: allocate.c 15262 2008-10-01 22:58:26Z jette $ + * $Id: allocate.c 15578 2008-11-04 01:04:21Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -590,7 +590,7 @@ create_job_step(srun_job_t *job) otermf = xsignal(SIGTERM, _intr_handler); oquitf = xsignal(SIGQUIT, _intr_handler); } else - info("Job step creation still disabled, retrying"); + verbose("Job step creation still disabled, retrying"); sleep(MIN((i*10), 60)); } if (i > 0) { diff --git a/src/srun/opt.c b/src/srun/opt.c index bdeb159cc..c9d547e4e 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -2014,11 +2014,6 @@ static bool _opt_verify(void) if ((opt.egid != (gid_t) -1) && (opt.egid != opt.gid)) opt.gid = opt.egid; - if (opt.propagate && parse_rlimits( opt.propagate, PROPAGATE_RLIMITS)) { - error( "--propagate=%s is not valid.", opt.propagate ); - verified = false; - } - if (opt.immediate) { char *sched_name = slurm_get_sched_type(); if (strcmp(sched_name, "sched/wiki") == 0) { diff --git a/src/srun/srun.c b/src/srun/srun.c index 4187b9ab2..adbb15d77 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -2,7 +2,8 @@ * srun.c - user interface to allocate resources, submit jobs, and execute * parallel jobs. ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Mark Grondona <grondona@llnl.gov>, et. al. * LLNL-CODE-402394. @@ -618,8 +619,15 @@ static int _set_rlimit_env(void) char name[64], *format; slurm_rlimits_info_t *rli; + /* Modify limits with any command-line options */ + if (opt.propagate && parse_rlimits( opt.propagate, PROPAGATE_RLIMITS)) + fatal( "--propagate=%s is not valid.", opt.propagate ); + for (rli = get_slurm_rlimits_info(); rli->name != NULL; rli++ ) { + if (rli->propagate_flag != PROPAGATE_RLIMITS) + continue; + if (getrlimit (rli->resource, rlim) < 0) { error ("getrlimit (RLIMIT_%s): %m", rli->name); rc = SLURM_FAILURE; diff --git a/src/sstat/Makefile.in b/src/sstat/Makefile.in index c06d4277d..978f3f100 100644 --- a/src/sstat/Makefile.in +++ b/src/sstat/Makefile.in @@ -201,6 +201,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/strigger/Makefile.in b/src/strigger/Makefile.in index 1e03e8571..0182c39af 100644 --- a/src/strigger/Makefile.in +++ b/src/strigger/Makefile.in @@ -201,6 +201,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/src/sview/Makefile.in b/src/sview/Makefile.in index 5ddd6b9ab..4ac9f03e2 100644 --- a/src/sview/Makefile.in +++ b/src/sview/Makefile.in @@ -219,6 +219,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/testsuite/Makefile.in b/testsuite/Makefile.in index e0e5c2fca..665e64e38 100644 --- a/testsuite/Makefile.in +++ b/testsuite/Makefile.in @@ -189,6 +189,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am index 61bf0d5f8..353740b77 100644 --- a/testsuite/expect/Makefile.am +++ b/testsuite/expect/Makefile.am @@ -29,6 +29,8 @@ EXTRA_DIST = \ test1.14 \ test1.15 \ test1.16 \ + test1.17 \ + test1.18 \ test1.19 \ test1.20 \ test1.21 \ @@ -46,16 +48,19 @@ EXTRA_DIST = \ test1.32 \ test1.32.prog.c \ test1.33 \ + test1.34 \ test1.35 \ test1.36 \ test1.38 \ test1.39 \ test1.39.prog.c \ + test1.40 \ test1.41 \ test1.42 \ test1.43 \ test1.44 \ test1.46 \ + test1.47 \ test1.48 \ test1.49 \ test1.50 \ @@ -65,6 +70,8 @@ EXTRA_DIST = \ test1.55 \ test1.56 \ test1.57 \ + test1.58 \ + test1.59 \ test1.80 \ test1.81 \ test1.82 \ @@ -81,7 +88,7 @@ EXTRA_DIST = \ test1.91 \ test1.91.prog.c \ test1.92 \ - test1.92.bash \ + test1.93 \ test2.1 \ test2.2 \ test2.3 \ @@ -156,6 +163,8 @@ EXTRA_DIST = \ test7.9 \ test7.9.prog.c \ test7.10 \ + test7.11 \ + test7.11.prog.c \ test8.1 \ test8.2 \ test8.3 \ @@ -231,6 +240,7 @@ EXTRA_DIST = \ test15.22 \ test15.23 \ test15.24 \ + test15.25 \ test16.1 \ test16.2 \ test16.3 \ @@ -268,6 +278,7 @@ EXTRA_DIST = \ test17.29 \ test17.31 \ test17.32 \ + test17.33 \ test19.1 \ test19.2 \ test19.3 \ @@ -275,6 +286,28 @@ EXTRA_DIST = \ test19.5 \ test19.6 \ test19.7 \ + test21.1 \ + test21.2 \ + test21.3 \ + test21.4 \ + test21.5 \ + test21.6 \ + test21.7 \ + test21.8 \ + test21.9 \ + test21.10 \ + test21.11 \ + test21.12 \ + test21.13 \ + test21.14 \ + test21.15 \ + test21.16 \ + test21.17 \ + test21.18 \ + test21.19 \ + test21.20 \ + test21.21 \ + test21.22 \ usleep distclean-local: diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in index e8c11f705..066d5ed60 100644 --- a/testsuite/expect/Makefile.in +++ b/testsuite/expect/Makefile.in @@ -170,6 +170,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ @@ -262,6 +263,8 @@ EXTRA_DIST = \ test1.14 \ test1.15 \ test1.16 \ + test1.17 \ + test1.18 \ test1.19 \ test1.20 \ test1.21 \ @@ -279,16 +282,19 @@ EXTRA_DIST = \ test1.32 \ test1.32.prog.c \ test1.33 \ + test1.34 \ test1.35 \ test1.36 \ test1.38 \ test1.39 \ test1.39.prog.c \ + test1.40 \ test1.41 \ test1.42 \ test1.43 \ test1.44 \ test1.46 \ + test1.47 \ test1.48 \ test1.49 \ test1.50 \ @@ -298,6 +304,8 @@ EXTRA_DIST = \ test1.55 \ test1.56 \ test1.57 \ + test1.58 \ + test1.59 \ test1.80 \ test1.81 \ test1.82 \ @@ -314,7 +322,7 @@ EXTRA_DIST = \ test1.91 \ test1.91.prog.c \ test1.92 \ - test1.92.bash \ + test1.93 \ test2.1 \ test2.2 \ test2.3 \ @@ -389,6 +397,8 @@ EXTRA_DIST = \ test7.9 \ test7.9.prog.c \ test7.10 \ + test7.11 \ + test7.11.prog.c \ test8.1 \ test8.2 \ test8.3 \ @@ -464,6 +474,7 @@ EXTRA_DIST = \ test15.22 \ test15.23 \ test15.24 \ + test15.25 \ test16.1 \ test16.2 \ test16.3 \ @@ -501,6 +512,7 @@ EXTRA_DIST = \ test17.29 \ test17.31 \ test17.32 \ + test17.33 \ test19.1 \ test19.2 \ test19.3 \ @@ -508,6 +520,28 @@ EXTRA_DIST = \ test19.5 \ test19.6 \ test19.7 \ + test21.1 \ + test21.2 \ + test21.3 \ + test21.4 \ + test21.5 \ + test21.6 \ + test21.7 \ + test21.8 \ + test21.9 \ + test21.10 \ + test21.11 \ + test21.12 \ + test21.13 \ + test21.14 \ + test21.15 \ + test21.16 \ + test21.17 \ + test21.18 \ + test21.19 \ + test21.20 \ + test21.21 \ + test21.22 \ usleep all: all-am diff --git a/testsuite/expect/README b/testsuite/expect/README index 15b285078..eefe4e3c3 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -134,10 +134,9 @@ test1.37 Test of srun --tasks-per-node option. test1.38 Test srun handling of SIGINT to get task status or kill the job (--quit-on-interrupt option). test1.39 Test of linux light-weight core files. -test1.40 REMOVED +test1.40 Test of job account (--account option). test1.41 Validate SLURM debugger infrastructure (--debugger-test option). -test1.42 Test of account number and job dependencies (--account, - and --depedency options). +test1.42 Test of job dependencies (--dependency option). test1.43 Test of slurm_job_will_run API, (srun --test-only option). test1.44 Read srun's stdout slowly and test for lost data. test1.45 REMOVED @@ -395,8 +394,7 @@ test15.10 Confirm that a job allocates the proper procesor count (--tasks) test15.11 Test of --nice and --job-name options. test15.12 Verify node configuration specification (--constraint option). test15.13 Verify the appropriate job environment variables are set -test15.14 Test of account number and job dependencies (--account - and --depedency options). +test15.14 Test of job dependencies (--depedency option). test15.15 Test of user signal upon allocation (--bell and --no-bell options) test15.16 Verify that SLURM directed environment variables are processed: SALLOC_BELL and SALLOC_NO_BELL (can't really confirm from Expect) @@ -411,9 +409,9 @@ test15.21 Test of contiguous option with multiple nodes (--contiguous option). test15.22 Test of partition specification on job submission (--partition option). test15.23 Test of environment variables that control salloc actions: - SALLOC_ACCOUNT, SALLOC_DEBUG and SALLOC_TIMELIMIT + SALLOC_DEBUG and SALLOC_TIMELIMIT test15.24 Test of --overcommit option. - +test15.25 Test of job account(--account option). test16.# Testing of sattach options. ====================================== @@ -455,8 +453,8 @@ test17.15 Verify that user user limits are propagated to the job test17.16 Verify that command line arguments get forwarded to job script test17.17 Confirm that node sharing flags are respected (--nodelist and --share options) -test17.18 Test of account number and job dependencies (--account, --begin - and --depedency options) +test17.18 Test of job dependencies and deferred begin time (--depedency + and --begin options). test17.19 Test the launch of a batch job within an existing job allocation. This logic is used by LSF test17.20 Test of mail options (--mail-type and --mail-user options) @@ -518,3 +516,6 @@ test21.17 sacctmgr modify user test21.18 sacctmgr modify multiple users test21.19 sacctmgr add and delete coordinator test21.20 sacctmgr add and modify QoS +test21.21 sacctmgr add an account to this cluster and try using it with + salloc, sbatch and srun. +test21.22 sacctmgr load file diff --git a/testsuite/expect/globals b/testsuite/expect/globals index a9003d3df..e2eb32332 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -1141,9 +1141,10 @@ proc check_acct_associations { } { # ################################################################ proc check_accounting_admin_level { } { - global sacctmgr alpha alpha_numeric_under bin_id + global sacctmgr alpha alpha_numeric_under bin_id - set admin_level "" + set admin_level "" + set user_name "" log_user 0 diff --git a/testsuite/expect/test1.40 b/testsuite/expect/test1.40 new file mode 100755 index 000000000..355decfe4 --- /dev/null +++ b/testsuite/expect/test1.40 @@ -0,0 +1,138 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of job account (--account option). +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2004-2007 The Regents of the University of California. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Morris Jette <jette1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.40" +set file_in "test$test_id.input" +set exit_code 0 +set job_id1 0 +set job_id2 0 + +print_header $test_id + +if {[test_assoc_enforced]} { + send_user "\nWARNING: This test will not work when associations are enforced.\n" + exit $exit_code +} +if {[test_account_storage]} { + send_user "\nWARNING: This test will not work when invalid account names are subject to change.\n" + exit $exit_code +} + +global env +set env(SLURM_ACCOUNT) QA_ACCT + +# +# Build input script file +# +make_bash_script $file_in "$bin_sleep 5" + +# +# Spawn an sbatch job that just sleeps for a while +# +set timeout $max_job_delay +set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null --account=MY_ACCT -t1 $file_in] +expect { + -re "Submitted batch job ($number)" { + set job_id1 $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + slow_kill $sbatch_pid + set exit_code 1 + } + eof { + wait + } +} +if {$job_id1 == 0} { + send_user "\nFAILURE: batch submit failure\n" + exit 1 +} + +# +# Spawn an srun job that should get account from SLURM_ACCOUNT env var +# +set match_acct 0 +set srun_pid [spawn $srun -v $scontrol show job $job_id1] +expect { + -re "launching ($number).0" { + set job_id2 $expect_out(1,string) + exp_continue + } + -re "Account=MY_ACCT" { + set match_acct 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $srun_pid + set exit_code 1 + } + eof { + wait + } +} +if {$match_acct == 0} { + send_user "\nFAILURE: Account information not processed\n" + set exit_code 1 +} + +# +# Confirm account info within second job based SLURM_ACCOUNT +# +set match_acct 0 +spawn $scontrol show job $job_id2 +expect { + -re "Account=QA_ACCT" { + set match_acct 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$match_acct == 0} { + send_user "\nFAILURE: Account information not processed\n" + set exit_code 1 +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.42 b/testsuite/expect/test1.42 index 644dfafe4..010755d1b 100755 --- a/testsuite/expect/test1.42 +++ b/testsuite/expect/test1.42 @@ -1,8 +1,7 @@ #!/usr/bin/expect ############################################################################ # Purpose: Test of SLURM functionality -# Test of account number and job dependencies (--account, and -# --depedency options). +# Test of job dependencies (--dependency option). # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -40,32 +39,24 @@ set job_id2 0 print_header $test_id -if {[test_assoc_enforced]} { - send_user "\nWARNING: This test will not work when associations are enforced.\n" - exit $exit_code -} - -global env -set env(SLURM_ACCOUNT) QA_ACCT - # # Build input script file # -make_bash_script $file_in "$bin_sleep 5" +make_bash_script $file_in "$bin_sleep 30" # -# Spawn a srun batch job that just sleeps for a while +# Spawn an sbatch job that just sleeps for a while # set timeout $max_job_delay -set srun_pid [spawn $sbatch --output=/dev/null --error=/dev/null --account=MY_ACCT -t1 $file_in] +set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t1 $file_in] expect { -re "Submitted batch job ($number)" { set job_id1 $expect_out(1,string) exp_continue } timeout { - send_user "\nFAILURE: srun not responding\n" - slow_kill $srun_pid + send_user "\nFAILURE: sbatch not responding\n" + slow_kill $sbatch_pid set exit_code 1 } eof { @@ -80,7 +71,6 @@ if {$job_id1 == 0} { # # Submit a dependent job # -set match_acct 0 set match_state 0 set srun_pid [spawn $srun -v --dependency=afterany:$job_id1 $scontrol show job $job_id1] expect { @@ -92,10 +82,6 @@ expect { set match_state 1 exp_continue } - -re "Account=MY_ACCT" { - set match_acct 1 - exp_continue - } timeout { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid @@ -105,20 +91,14 @@ expect { wait } } -if {$match_acct == 0} { - send_user "\nFAILURE: Account information not processed\n" - set exit_code 1 -} if {$match_state == 0} { send_user "\nFAILURE: Dependent job not completed\n" set exit_code 1 } # -# Confirm dependency info within second job as well as environment -# variable based SLURM account +# Confirm dependency info within second job # -set match_acct 0 set match_jobid 0 spawn $scontrol show job $job_id2 expect { @@ -126,10 +106,6 @@ expect { set match_jobid $expect_out(1,string) exp_continue } - -re "Account=QA_ACCT" { - set match_acct 1 - exp_continue - } timeout { send_user "\nFAILURE: scontrol not responding\n" set exit_code 1 @@ -138,10 +114,6 @@ expect { wait } } -if {$match_acct == 0} { - send_user "\nFAILURE: Account information not processed\n" - set exit_code 1 -} if {$match_jobid != $job_id1} { send_user "\nFAILURE: Dependency information not processed\n" set exit_code 1 diff --git a/testsuite/expect/test1.90 b/testsuite/expect/test1.90 index 2b8f25897..d3b2bb27a 100755 --- a/testsuite/expect/test1.90 +++ b/testsuite/expect/test1.90 @@ -554,6 +554,7 @@ if {$exit_code == 0} { } else { send_user "\nNOTE: This test can fail if the node configuration in slurm.conf \n" send_user " (sockets, cores, threads) differs from the actual configuration\n" + send_user " SPANK plugins (e.g. auto-affinity.so)\n" } exit $exit_code diff --git a/testsuite/expect/test15.14 b/testsuite/expect/test15.14 index 85c7d17bf..07e87f0c8 100755 --- a/testsuite/expect/test15.14 +++ b/testsuite/expect/test15.14 @@ -1,8 +1,7 @@ #!/usr/bin/expect ############################################################################ # Purpose: Test of SLURM functionality -# Test of account number and job dependencies (--account -# and --depedency options). +# Test of job dependencies (--depedency option). # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -41,14 +40,6 @@ set job_acct "TEST_ACCT" print_header $test_id -if {[test_assoc_enforced]} { - send_user "\nWARNING: This test will not work when associations are enforced.\n" - exit $exit_code -} - -global env -set env(SALLOC_ACCOUNT) QA_ACCT - # # Build input script file # @@ -58,7 +49,7 @@ make_bash_script $file_in "$bin_sleep 10" # Spawn a srun batch job that just sleeps for a while # set timeout $max_job_delay -spawn $sbatch --output=/dev/null --error=/dev/null --account=$job_acct -t1 $file_in +set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null -t1 $file_in] expect { -re "Submitted batch job ($number)" { set job_id1 $expect_out(1,string) @@ -66,6 +57,7 @@ expect { } timeout { send_user "\nFAILURE: sbatch not responding\n" + slow_kill $sbatch_pid set exit_code 1 } eof { @@ -80,7 +72,6 @@ if {$job_id1 == 0} { # # Submit a dependent job # -set match_acct 0 set match_state 0 set salloc_pid [spawn $salloc --dependency=afterany:$job_id1 $srun $scontrol show job $job_id1] expect { @@ -92,10 +83,6 @@ expect { set match_state 1 exp_continue } - -re "Account=$job_acct" { - set match_acct 1 - exp_continue - } timeout { send_user "\nFAILURE: salloc not responding\n" if {$job_id2 != 0} { @@ -108,20 +95,14 @@ expect { wait } } -if {$match_acct == 0} { - send_user "\nFAILURE: Account information not processed\n" - set exit_code 1 -} if {$match_state == 0} { send_user "\nFAILURE: Dependent job not completed\n" set exit_code 1 } # -# Confirm dependency info within second job as well as environment -# variable based SLURM account +# Confirm dependency info within second job # -set match_acct 0 set match_jobid 0 spawn $scontrol show job $job_id2 expect { @@ -129,10 +110,6 @@ expect { set match_jobid $expect_out(1,string) exp_continue } - -re "Account=QA_ACCT" { - set match_acct 1 - exp_continue - } timeout { send_user "\nFAILURE: scontrol not responding\n" set exit_code 1 @@ -141,10 +118,6 @@ expect { wait } } -if {$match_acct == 0} { - send_user "\nFAILURE: Account information not processed\n" - set exit_code 1 -} if {$match_jobid != $job_id1} { send_user "\nFAILURE: Dependency information not processed\n" set exit_code 1 diff --git a/testsuite/expect/test15.23 b/testsuite/expect/test15.23 index e1b6d7e79..fd29ab303 100755 --- a/testsuite/expect/test15.23 +++ b/testsuite/expect/test15.23 @@ -2,7 +2,7 @@ ############################################################################ # Purpose: Test of SLURM functionality # Verify environment variables controlling salloc are processed: -# SALLOC_ACCOUNT, SALLOC_DEBUG and SALLOC_TIMELIMIT +# SALLOC_DEBUG and SALLOC_TIMELIMIT # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -36,7 +36,6 @@ set test_id "15.23" set exit_code 0 set job_id 0 set timeout $max_job_delay -set job_acct "TEST_ACCT" print_header $test_id @@ -49,7 +48,6 @@ if {[test_assoc_enforced]} { # Set target environment variables # global env -set env(SALLOC_ACCOUNT) $job_acct set env(SALLOC_DEBUG) 2 set env(SALLOC_TIMELIMIT) 2 @@ -66,6 +64,7 @@ expect { -re "Granted job allocation ($number)" { set job_id $expect_out(1,string) send "$scontrol show job $job_id\n" + send "exit\n" exp_continue } -re "TimeLimit=($number):($number):" { @@ -78,11 +77,6 @@ expect { } exp_continue } - -re "Account=$job_acct" { - incr matches - send "exit\n" - exp_continue - } timeout { send_user "\nFAILURE: srun not responding\n" if {$job_id != 0} { @@ -100,8 +94,8 @@ if {$job_id == 0} { send_user "\nFAILURE: did not get job_id\n" exit 1 } -if {$matches != 3} { - send_user "\nFAILURE: processed $matches of 3 environment variables\n" +if {$matches != 2} { + send_user "\nFAILURE: processed $matches of 2 environment variables\n" set exit_code 1 } diff --git a/testsuite/expect/test15.25 b/testsuite/expect/test15.25 new file mode 100755 index 000000000..7b18ba289 --- /dev/null +++ b/testsuite/expect/test15.25 @@ -0,0 +1,142 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of job account(--account option). +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2004-2007 The Regents of the University of California. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Morris Jette <jette1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +############################################################################ +source ./globals + +set test_id "15.25" +set file_in "test$test_id.input" +set exit_code 0 +set job_id1 0 +set job_id2 0 +set job_acct "TEST_ACCT" + +print_header $test_id + +if {[test_assoc_enforced]} { + send_user "\nWARNING: This test will not work when associations are enforced.\n" + exit $exit_code +} +if {[test_account_storage]} { + send_user "\nWARNING: This test will not work when invalid account names are subject to change.\n" + exit $exit_code +} + +global env +set env(SALLOC_ACCOUNT) $job_acct + +# +# Build input script file +# +make_bash_script $file_in "$bin_sleep 10" + +# +# Spawn a srun batch job that just sleeps for a while +# +set timeout $max_job_delay +set sbatch_pid [spawn $sbatch --output=/dev/null --error=/dev/null --account=$job_acct -t1 $file_in] +expect { + -re "Submitted batch job ($number)" { + set job_id1 $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + slow_kill $sbatch_pid + set exit_code 1 + } + eof { + wait + } +} +if {$job_id1 == 0} { + send_user "\nFAILURE: batch submit failure\n" + exit 1 +} + +# +# Submit a second job that gets account from SALLOC_ACCOUNT environment variable +# +set match_acct 0 +set salloc_pid [spawn $salloc --dependency=afterany:$job_id1 $srun $scontrol show job $job_id1] +expect { + -re "Granted job allocation ($number)" { + set job_id2 $expect_out(1,string) + exp_continue + } + -re "Account=$job_acct" { + set match_acct 1 + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + if {$job_id2 != 0} { + cancel_job $job_id + } + slow_kill [expr 0 - $salloc_pid] + set exit_code 1 + } + eof { + wait + } +} +if {$match_acct == 0} { + send_user "\nFAILURE: Account information not processed\n" + set exit_code 1 +} + +# +# Confirm second job's as account name +# +set match_acct 0 +spawn $scontrol show job $job_id2 +expect { + -re "Account=$job_acct" { + set match_acct 1 + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$match_acct == 0} { + send_user "\nFAILURE: Account information not processed\n" + set exit_code 1 +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test17.18 b/testsuite/expect/test17.18 index 0a6068941..5273ba50b 100755 --- a/testsuite/expect/test17.18 +++ b/testsuite/expect/test17.18 @@ -1,8 +1,8 @@ #!/usr/bin/expect ############################################################################ # Purpose: Test of SLURM functionality -# Test of account number and job dependencies (--account, --begin -# and --depedency options). +# Test of job dependencies and deferred begin time (--depedency +# and --begin options). # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -38,18 +38,9 @@ set file_out "test$test_id.output" set exit_code 0 set job_id1 0 set job_id2 0 -set job_acct "QA_ACCT" print_header $test_id -if {[test_assoc_enforced]} { - send_user "\nWARNING: This test will not work when associations are enforced.\n" - exit $exit_code -} - -global env -set env(SBATCH_ACCOUNT) QA_ACCT - # # Build input script file # @@ -60,7 +51,7 @@ make_bash_script $file_in "$bin_sleep 5" # Spawn a batch job that just sleeps for a while # set timeout $max_job_delay -spawn $sbatch --output=/dev/null --error=/dev/null --account=MY_ACCT -t1 $file_in +spawn $sbatch --output=/dev/null --error=/dev/null -t1 $file_in expect { -re "Submitted batch job ($number)" { set job_id1 $expect_out(1,string) @@ -85,7 +76,6 @@ if {$job_id1 == 0} { # exec $bin_rm -f $file_in make_bash_script $file_in "$scontrol show job $job_id1" -set match_acct 0 set match_state 0 set timeout 30 spawn $sbatch --dependency=afterany:$job_id1 --output=$file_out $file_in @@ -127,28 +117,18 @@ expect { set match_state 1 exp_continue } - -re "Account=MY_ACCT" { - set match_acct 1 - exp_continue - } eof { wait } } -if {$match_acct == 0} { - send_user "\nFAILURE: Account information not processed\n" - set exit_code 1 -} if {$match_state == 0} { send_user "\nFAILURE: Dependent job not completed\n" set exit_code 1 } # -# Confirm dependency info within second job as well as environment -# variable based SLURM account +# Confirm dependency info within second job # -set match_acct 0 set match_jobid 0 spawn $scontrol show job $job_id2 expect { @@ -156,10 +136,6 @@ expect { set match_jobid $expect_out(1,string) exp_continue } - -re "Account=QA_ACCT" { - set match_acct 1 - exp_continue - } timeout { send_user "\nFAILURE: scontrol not responding\n" set exit_code 1 @@ -168,10 +144,6 @@ expect { wait } } -if {$match_acct == 0} { - send_user "\nFAILURE: Account information not processed\n" - set exit_code 1 -} if {$match_jobid != $job_id1} { send_user "\nFAILURE: Dependency information not processed\n" set exit_code 1 diff --git a/testsuite/expect/test17.21 b/testsuite/expect/test17.21 index a2e2cac84..b000c98c8 100755 --- a/testsuite/expect/test17.21 +++ b/testsuite/expect/test17.21 @@ -36,20 +36,13 @@ set test_id "17.21" set exit_code 0 set file_in "test$test_id.input" set file_out "test$test_id.output" -set job_acct "TEST_ACCT" set job_name "TEST_NAME" set delay 10 print_header $test_id -if {[test_assoc_enforced]} { - send_user "\nWARNING: Using users default account instead of $job_acct since associations are enforced.\n" - set job_acct [get_default_acct 0] -} - make_bash_script $file_in " #SBATCH --job-name=$job_name -#SBATCH --account=$job_acct $bin_sleep $delay " @@ -76,10 +69,6 @@ expect { incr matches exp_continue } - -re "Account=$job_acct" { - incr matches - exp_continue - } timeout { send_user "\nFAILURE: scontrol not responding\n" set exit_code 1 @@ -90,8 +79,8 @@ expect { } } cancel_job $job_id -if {$matches != 2} { - send_user "\nFAILURE: did not set job name and account from batch script\n" +if {$matches != 1} { + send_user "\nFAILURE: did not set job name from batch script\n" set exit_code 1 } diff --git a/testsuite/expect/test21.10 b/testsuite/expect/test21.10 index ae9cd61f1..71d8e1ac5 100755 --- a/testsuite/expect/test21.10 +++ b/testsuite/expect/test21.10 @@ -106,7 +106,7 @@ if { [string compare [check_accounting_admin_level] "Administrator"] } { set nothing 0 set matches 0 -set sadel_pid [spawn $sacctmgr -i $del $clu $tc1] +set my_pid [spawn $sacctmgr -i $del $clu $tc1] expect { -re "privilege to perform this action" { @@ -269,7 +269,7 @@ expect { } timeout { send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid + slow_kill $sadel_pid set exit_code 1 } eof { diff --git a/testsuite/expect/test21.11 b/testsuite/expect/test21.11 index c23e308aa..68f75cd42 100755 --- a/testsuite/expect/test21.11 +++ b/testsuite/expect/test21.11 @@ -134,7 +134,7 @@ expect { } timeout { send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid + slow_kill $sadel_pid set exit_code 1 } eof { diff --git a/testsuite/expect/test21.12 b/testsuite/expect/test21.12 index d66fab3a2..4566dcaca 100755 --- a/testsuite/expect/test21.12 +++ b/testsuite/expect/test21.12 @@ -136,7 +136,7 @@ expect { } timeout { send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid + slow_kill $sadel_pid set exit_code 1 } eof { diff --git a/testsuite/expect/test21.21 b/testsuite/expect/test21.21 new file mode 100755 index 000000000..6f8c60475 --- /dev/null +++ b/testsuite/expect/test21.21 @@ -0,0 +1,279 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr add an account to this cluster and try using it with +# salloc, sbatch and srun. +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.21" +set exit_code 0 +set file_in "test.$test_id.input" +set test_acct "test_acct" +set timeout 60 +print_header $test_id + +# +# Check accounting config and bail if not found. +# +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + +if { [string compare [check_accounting_admin_level] "Administrator"] } { + send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME set admin=admin.\n" + exit 0 +} + +# +# Identify the user and his current default account +# +set acct_name "" +set user_name "" +spawn $bin_id -u -n +expect { + -re "($alpha_numeric_under)" { + set user_name $expect_out(1,string) + exp_continue + } + eof { + wait + } +} +set s_pid [spawn $sacctmgr show user $user_name] +expect { + -re "$user_name *($alpha_numeric_under)" { + set acct_name $expect_out(1,string) + exp_continue + } + timeout { + send_user "FAILURE: sacctmgr add not responding\n" + slow_kill $s_pid + exit 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to add an account +# +set aamatches 0 +set sadd_pid [spawn $sacctmgr -i add account $test_acct] +expect { + -re "Adding Account" { + incr aamatches + exp_continue + } + -re "Nothing new added" { + send_user "\nWARNING: vestigial account $test_acct found\n" + incr aamatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} +if {$aamatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account.\n" + exit 1 +} + +# +# Add self to this new account +# +set sadd_pid [spawn $sacctmgr -i create user name=$user_name account=$test_acct] +expect { + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +# +# Spawn a job via salloc using this account +# +set job_id 0 +set matches 0 +set timeout $max_job_delay +spawn $salloc -N1 --account=$test_acct +expect { + -re "Granted job allocation ($number)" { + set job_id $expect_out(1,string) + send "$scontrol show job $job_id\n" + send "exit\n" + exp_continue + } + -re "Account=$test_acct" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: salloc not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: salloc failed to initiate job\n" + set exit_code 1 +} elseif {$matches != 1} { + send_user "\nFAILURE: salloc failed to use desired account\n" + set exit_code 1 +} + +# +# Spawn a job via sbatch using this account +# +make_bash_script $file_in "$bin_id" +set job_id 0 +spawn $sbatch -N1 --account=$test_acct --output=none $file_in +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: did not get sbatch job_id\n" + set exit_code 1 +} else { + set matches 0 + spawn $scontrol show job $job_id + expect { + -re "Account=$test_acct" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$matches != 1} { + send_user "\nFAILURE: sbatch failed to use specified account\n" + set exit_code 1 + } +} + +# +# Spawn a job via srun using this account +# +set job_id 0 +spawn $srun -N1 -v --account=$test_acct $bin_id +expect { + -re "launching ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: did not get srun job_id\n" + set exit_code 1 +} else { + set matches 0 + spawn $scontrol show job $job_id + expect { + -re "Account=$test_acct" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: scontrol not responding\n" + set exit_code 1 + } + eof { + wait + } + } + if {$matches != 1} { + send_user "\nFAILURE: srun failed to use specified account\n" + set exit_code 1 + } +} + +# +# Use sacctmgr to delete the test account +# +set damatches 0 +set sadel_pid [spawn $sacctmgr -i delete account $test_acct] +expect { + -re "Deleting account" { + incr damatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} +if {$damatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account\n" + set exit_code 1 +} + +if {$exit_code == 0} { + exec $bin_rm -f $file_in + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test21.22 b/testsuite/expect/test21.22 new file mode 100755 index 000000000..ebb231e65 --- /dev/null +++ b/testsuite/expect/test21.22 @@ -0,0 +1,1027 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr load data file +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.22" +set test_acct "test_acct" +set exit_code 0 +set file_in "test$test_id.input" +set file_in2 "test$test_id.input2" +set file_in3 "test$test_id.input3" +set file_out "test$test_id.output" +set file_out2 "test$test_id.output2" +set file_out3 "test$test_id.output3" +set lis list +set as assoc +set fmt format +set cln clean +set tre tree +set was withassoc +set wco withcoordinator +set par Parent +set roo root +set clu Cluster +set cl1 clust1 +set acc Account +set nams names +set nm1 scienceacct +set nm2 physicsacct +set nm3 theoryacct +set nm4 appliedacct +set dsc Description +set ds1 scienceacct +set ds2 physicsacct +set ds3 theoryacct +set ds4 appliedacct +set org Organization +set or1 scienceorg +set or2 physicsorg +set or3 theoryorg +set or4 appliedorg +set usr user +set us1 u1ser +set us2 u2ser +set us3 u3ser +set us4 u4ser +set qs QOS +set qs1 normal +set pat parent +set al AdminLevel +set aln None +set ala Administrator +set alo Operator +set coo Coordinator +set col CoordinatorList +set dac DefaultAccount +set pts Partitions +set pde pdebug +set pba pbatch +set fs Fairshare +set gm GrpCPUMins +set gc GrpCPUs +set gj GrpJobs +set gs GrpSubmitJobs +set gn GrpNodes +set gw GrpWall +set mm MaxCPUMins +set mc MaxCPUs +set mj MaxJobs +set ms MaxSubmitJobs +set mn MaxNodes +set mw MaxWall +set fs1 1000 +set fs2 2375 +set fs3 3240 +set fs4 4321 +set fs5 5678 +set fs6 6789 +set gm1 1100 +set gc1 10 +set gj1 120 +set gs1 130 +set gn1 140 +set gw1 60 +set gt1 01:00:00 +set mc1 150 +set mm1 110000 +set mj1 160 +set ms1 170 +set mn1 180 +set mw1 70 +set mt1 01:10:00 +set gm2 2000 +set gc2 20 +set gj2 210 +set gs2 220 +set gn2 230 +set gw2 120 +set gt2 02:00:00 +set mc2 240 +set mm2 220000 +set mj2 250 +set ms2 260 +set mn2 270 +set mw2 140 +set mt2 02:20:00 +set gm3 3300 +set gc3 30 +set gj3 310 +set gs3 320 +set gn3 330 +set gw3 180 +set gt3 03:00:00 +set mc3 340 +set mm3 330000 +set mj3 350 +set ms3 360 +set mn3 370 +set mw3 210 +set mt3 03:30:00 +set gm4 4000 +set gc4 40 +set gj4 410 +set gs4 420 +set gn4 430 +set gw4 240 +set gt4 04:00:00 +set mc4 440 +set mm4 420000 +set mj4 450 +set ms4 460 +set mn4 470 +set mw4 280 +set mt4 04:40:00 +set gm5 5500 +set gc5 50 +set gj5 510 +set gs5 520 +set gn5 530 +set gw5 300 +set gt5 05:00:00 +set mc5 540 +set mm5 550000 +set mj5 550 +set ms5 560 +set mn5 570 +set mw5 350 +set mt5 05:50:00 +set gm6 6600 +set gc6 60 +set gj6 610 +set gs6 620 +set gn6 630 +set gw6 1440 +set gt6 1-00:00:00 +set mc6 640 +set mm6 660000 +set mj6 650 +set ms6 660 +set mn6 670 +set mw6 2880 +set mt6 2-00:00:00 +set access_err 0 +set timeout 60 +#set user_name "id -u -n" + +print_header $test_id + +# +# Check accounting config and bail if not found. +# +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + +if { [string compare [check_accounting_admin_level] "Administrator"] } { + send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n" + exit 0 +} + +# +# Identify the user and his current default account +# +set acct_name "" +set user_name "" +spawn $bin_id -u -n +expect { + -re "($alpha_numeric_under)" { + set user_name $expect_out(1,string) + exp_continue + } + eof { + wait + } +} +set s_pid [spawn $sacctmgr show user $user_name] +expect { + -re "$user_name *($alpha_numeric_under)" { + set acct_name $expect_out(1,string) + exp_continue + } + timeout { + send_user "FAILURE: sacctmgr add not responding\n" + slow_kill $s_pid + exit 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to add an account +# +set aamatches 0 +set sadd_pid [spawn $sacctmgr -i add account $test_acct] +expect { + -re "Adding Account" { + incr aamatches + exp_continue + } + -re "Nothing new added" { + send_user "\nWARNING: vestigial account $test_acct found\n" + incr aamatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} +if {$aamatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account.\n" + exit 1 +} + +# +# Add self to this new account +# +set sadd_pid [spawn $sacctmgr -i create user name=$user_name account=$test_acct] +expect { + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +set timeout 60 + +# +# Delete left-over input script files +# +exec $bin_rm -f $file_in +exec $bin_rm -f $file_in2 +exec $bin_rm -f $file_in3 + +# +# Build input script file - to create original associations +# +exec echo "$clu - $cl1:$fs=$fs6:$gm=$gm6:$gc=$gc6:$gj=$gj6:$gn=$gn6:$gs=$gs6:$gw=$gw6:$mm=$mm6:$mc=$mc6:$mj=$mj6:$mn=$mn6:$ms=$ms6:$mw=$mw6:$qs=$qs1" >>$file_in +exec echo "$par - $roo" >>$file_in +exec echo "$acc - $nm1:$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in +exec echo "$acc - $nm2:$dsc=$ds2:$org=$or2:$fs=$fs4:$gm=$gm4:$gc=$gc4:$gj=$gj4:$gn=$gn4:$gs=$gs4:$gw=$gw4:$mm=$mm4:$mc=$mc4:$mj=$mj4:$mn=$mn4:$ms=$ms4:$mw=$mw4:$qs=$qs1" >>$file_in +exec echo "$par - $nm1" >>$file_in +exec echo "$acc - $nm3:$dsc=$ds3:$org=$or3:$fs=$fs3:$gm=$gm3:$gc=$gc3:$gj=$gj3:$gn=$gn3:$gs=$gs3:$gw=$gw3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in +exec echo "$par - $nm2" >>$file_in +exec echo "$usr - $us1:$coo=$nm2:$dac=$nm1:$fs=$fs1:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1:$al=$alo" >>$file_in +exec echo "$par - $nm3" >>$file_in +exec echo "$usr - $us2:$coo=$nm3:$dac=$nm2:$fs=$fs2:$mm=$mm1:$mc=$mc1:$mj=$mj1:$mn=$mn1:$ms=$ms1:$mw=$mw1:$qs=$qs1:$al=$ala" >>$file_in + +# +# Second input file - to modify and add associations to the original +# +exec echo "$clu - $cl1" >>$file_in2 +exec echo "$par - $roo" >>$file_in2 +exec echo "$acc - $nm1" >>$file_in2 +exec echo "$acc - $nm3:$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in2 +exec echo "$par - $nm1" >>$file_in2 +exec echo "$acc - $nm2" >>$file_in2 +exec echo "$par - $nm2" >>$file_in2 +exec echo "$usr - $us3:$coo=$nm1,$nm2,$nm3:$dac=$nm1:$fs=$fs2:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1:$al=$ala" >>$file_in2 +exec echo "$par - $nm3" >>$file_in2 +exec echo "$usr - $us2:$dac=$nm1:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1:$al=$alo" >>$file_in2 +exec echo "$usr - $us3:$dac=$nm1:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in2 +exec echo "$par - $nm1" >>$file_in2 +exec echo "$usr - $us3:$dac=$nm2:$fs=$fs2:$mm=$mm1:$mc=$mc1:$mj=$mj1:$mn=$mn1:$ms=$ms1:$mw=$mw1:$qs=$qs1" >>$file_in2 + +# +# Third input file - to replace all previous +# +exec echo "$clu - $cl1:$fs=$fs6:$gm=$gm6:$gc=$gc6:$gj=$gj6:$gn=$gn6:$gs=$gs6:$gw=$gw6:$mm=$mm6:$mc=$mc6:$mj=$mj6:$mn=$mn6:$ms=$ms6:$mw=$mw6:$qs=$qs1" >>$file_in3 +exec echo "$par - $roo" >>$file_in3 +exec echo "$acc - $nm1:$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in3 +exec echo "$acc - $nm3:$dsc=$ds3:$org=$or3:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in3 +exec echo "$par - $nm1" >>$file_in3 +exec echo "$acc - $nm3:$dsc=$ds1:$org=$or1:$fs=$fs5:$gm=$gm5:$gc=$gc5:$gj=$gj5:$gn=$gn5:$gs=$gs5:$gw=$gw5:$mm=$mm5:$mc=$mc5:$mj=$mj5:$mn=$mn5:$ms=$ms5:$mw=$mw5:$qs=$qs1" >>$file_in3 +exec echo "$acc - $nm2:$dsc=$ds2:$org=$or2:$fs=$fs4:$gm=$gm4:$gc=$gc4:$gj=$gj4:$gn=$gn4:$gs=$gs4:$gw=$gw4:$mm=$mm4:$mc=$mc4:$mj=$mj4:$mn=$mn4:$ms=$ms4:$mw=$mw4:$qs=$qs1:$al=$alo" >>$file_in3 +exec echo "$usr - $us3:$coo=$nm1,$nm2,$nm3:$dac=$nm2:$fs=$fs2:$mm=$mm1:$mc=$mc1:$mj=$mj1:$mn=$mn1:$ms=$ms1:$mw=$mw1:$qs=$qs1:$al=$ala" >>$file_in3 +exec echo "$par - $nm2" >>$file_in3 +exec echo "$usr - $us1:$coo=$nm2:$dac=$nm1:$fs=$fs1:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1:$al=$alo" >>$file_in3 +exec echo "$usr - $us3:$dac=$nm1:$fs=$fs2:$mm=$mm2:$mc=$mc2:$mj=$mj2:$mn=$mn2:$ms=$ms2:$mw=$mw2:$qs=$qs1" >>$file_in3 +exec echo "$par - $nm3" >>$file_in3 +exec echo "$usr - $us2:$coo=$nm3:$dac=$nm1:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in3 +exec echo "$usr - $us3:$dac=$nm1:$fs=$fs3:$mm=$mm3:$mc=$mc3:$mj=$mj3:$mn=$mn3:$ms=$ms3:$mw=$mw3:$qs=$qs1" >>$file_in3 + +# +# Check accounting config and bail if not found. +# +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + +if { [string compare [check_accounting_admin_level] "Administrator"] } { + send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n" + exit 0 +} + + +# +# Use sacctmgr to remove the test cluster +# +proc _remove_cluster {name} { + global access_err sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 + } + + set my_pid [spawn $sacctmgr -i delete cluster $name] + expect { + -re "privilege to perform this action" { + set access_err 1 + exp_continue + } + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Deleting clusters" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + if {$access_err != 0} { + return 1 + } + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $matches\n" + incr exit_code 1 + } + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code +} + +# +# Use sacctmgr to remove an account +# +proc _remove_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting account" + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 + } + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + set check "Deleting account associations" + } + + set my_pid [eval spawn $sacctmgr -i delete account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code +} + +# +# Use sacctmgr to remove an user +# +proc _remove_user { acct user } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting user" + + if { ![string length $user] } { + send_user "FAILURE: we need a name to remove\n" + return 1 + } + + set command "$user" + + if { [string length $acct] } { + set command "$command account=$acct" + set check "Deleting user associations" + } + + set my_pid [eval spawn $sacctmgr -i delete user $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknown problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting user. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code +} + +#make sure we have a clean system and permission to do this work +_remove_user "" "$us1,$us2,$us3" +_remove_acct "" "$nm1,$nm2,$nm3" +_remove_cluster "$cl1" +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} + +set matches 0 +set my_pid [spawn $sacctmgr -i load $file_in] +expect { + -re "(Unable to read *.* * No such file or directory)" { + send_user "FAILURE: there was a problem with the file\n" + incr exit_code 1 + } + -re "error" { + send_user "FAILURE: there was a problem with the file\n" + incr exit_code 1 + } + -re "Problem with requests" { + send_user "FAILURE: there was a problem with the request\n" + incr exit_code 1 + } + -re "(sacctmgr: For cluster $cl1)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 1} { + send_user "\nFAILURE: File load 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +# +# Use sacctmgr to list the test original association additions +# +set matches 0 +set my_pid [spawn $sacctmgr -n -p $lis $as $clu=$cl1 $fmt=$clu,$acc,$usr,$pts,$fs,$gm,$gc,$gj,$gn,$gs,$gw,$mc,$mm,$mj,$mn,$ms,$mw,$qs] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($cl1.$roo...$fs6.$gm6.$gc6.$gj6.$gn6.$gs6.$gt6.$mc6.$mm6.$mj6.$mn6.$ms6.$mt6.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$roo.$roo..1.......$mc6.$mm6.$mj6.$mn6.$ms6.$mt6.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm2...$fs4.$gm4.$gc4.$gj4.$gn4.$gs4.$gt4.$mc4.$mm4.$mj4.$mn4.$ms4.$mt4.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm2.$us1..$fs1.......$mc2.$mm2.$mj2.$mn2.$ms2.$mt2.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm1...$fs5.$gm5.$gc5.$gj5.$gn5.$gs5.$gt5.$mc5.$mm5.$mj5.$mn5.$ms5.$mt5.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm3...$fs3.$gm3.$gc3.$gj3.$gn3.$gs3.$gt3.$mc3.$mm3.$mj3.$mn3.$ms3.$mt3.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm3.$us2..$fs2.......$mc1.$mm1.$mj1.$mn1.$ms1.$mt1.$qs1)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 7} { + send_user "\nFAILURE: Associations addition 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +# +# Use sacctmgr to list the test original Admin Levels and Account Coordinators +# +set matches 0 +set my_pid [spawn $sacctmgr -n -p $lis $usr $fmt=$usr,$col,$ala $wco] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($us1.$nm2.$alo)" { + incr matches + exp_continue + } + -re "($us2.$nm3.$ala)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 2} { + send_user "\nFAILURE: Adminstrator and coordinator 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +set matches 0 +set my_pid [spawn $sacctmgr -i load $file_in2] +expect { + -re "(Unable to read *.* * No such file or directory)" { + send_user "FAILURE: there was a problem with the file\n" + incr exit_code 1 + } + -re "error" { + send_user "FAILURE: there was a problem with the file\n" + incr exit_code 1 + } + -re "Problem with requests" { + send_user "FAILURE: there was a problem with the request\n" + incr exit_code 1 + } + -re "(sacctmgr: For cluster $cl1)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 1} { + send_user "\nFAILURE: File load 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +# +# Use sacctmgr to list the test modifications to the original association +# +set matches 0 +set my_pid [spawn $sacctmgr -n -p $lis $as $clu=$cl1 $fmt=$clu,$acc,$usr,$pts,$fs,$gm,$gc,$gj,$gn,$gs,$gw,$mc,$mm,$mj,$mn,$ms,$mw,$qs] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($cl1.$roo...$fs6.$gm6.$gc6.$gj6.$gn6.$gs6.$gt6.$mc6.$mm6.$mj6.$mn6.$ms6.$mt6.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$roo.$roo..1.......$mc6.$mm6.$mj6.$mn6.$ms6.$mt6.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm1...$fs5.$gm5.$gc5.$gj5.$gn5.$gs5.$gt5.$mc5.$mm5.$mj5.$mn5.$ms5.$mt5.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm1.$us3..$fs2.......$mc1.$mm1.$mj1.$mn1.$ms1.$mt1.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm2...$fs4.$gm4.$gc4.$gj4.$gn4.$gs4.$gt4.$mc4.$mm4.$mj4.$mn4.$ms4.$mt4.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm2.$us1..$fs1.......$mc2.$mm2.$mj2.$mn2.$ms2.$mt2.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm2.$us3..$fs2.......$mc2.$mm2.$mj2.$mn2.$ms2.$mt2.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm3...$fs5.$gm5.$gc5.$gj5.$gn5.$gs5.$gt5.$mc5.$mm5.$mj5.$mn5.$ms5.$mt5.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm3.$us2..$fs3.......$mc3.$mm3.$mj3.$mn3.$ms3.$mt3.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm3.$us3..$fs3.......$mc3.$mm3.$mj3.$mn3.$ms3.$mt3.$qs1)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 10} { + send_user "\nFAILURE: Association modification 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +# +# Use sacctmgr to list the test modification Admin Levels and Account Coordinators +# +set matches 0 +set my_pid [spawn $sacctmgr -n -p $lis $usr $fmt=$usr,$col,$ala $wco] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($us1.$nm2.$alo)" { + incr matches + exp_continue + } + -re "($us2.$nm3.$alo)" { + incr matches + exp_continue + } + -re "($us3.$nm2,$nm1,$nm3.$ala)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 3} { + send_user "\nFAILURE: Adminstrator and coordinator 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +set matches 0 +set my_pid [spawn $sacctmgr -i load $file_in3 clean] +expect { + -re "(Unable to read *.* * No such file or directory)" { + send_user "FAILURE: there was a problem with the file\n" + incr exit_code 1 + } + -re "error" { + send_user "FAILURE: there was a problem with the file\n" + incr exit_code 1 + } + -re "Problem with requests" { + send_user "FAILURE: there was a problem with the request\n" + incr exit_code 1 + } + -re "(sacctmgr: For cluster $cl1)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 1} { + send_user "\nFAILURE: File load 1 incorrect with only $matches.\n" + incr exit_code 1 +} + + +# +# Use sacctmgr to list the test modifications to the original association +# +set matches 0 +set my_pid [spawn $sacctmgr -n -p $lis $as $clu=$cl1 $fmt=$clu,$acc,$usr,$pts,$fs,$gm,$gc,$gj,$gn,$gs,$gw,$mc,$mm,$mj,$mn,$ms,$mw,$qs] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($cl1.$roo...$fs6.$gm6.$gc6.$gj6.$gn6.$gs6.$gt6.$mc6.$mm6.$mj6.$mn6.$ms6.$mt6.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$roo.$roo..1.......$mc6.$mm6.$mj6.$mn6.$ms6.$mt6.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm1...$fs5.$gm5.$gc5.$gj5.$gn5.$gs5.$gt5.$mc5.$mm5.$mj5.$mn5.$ms5.$mt5.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm1.$us3..$fs2.......$mc1.$mm1.$mj1.$mn1.$ms1.$mt1.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm2...$fs4.$gm4.$gc4.$gj4.$gn4.$gs4.$gt4.$mc4.$mm4.$mj4.$mn4.$ms4.$mt4.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm2.$us1..$fs1.......$mc2.$mm2.$mj2.$mn2.$ms2.$mt2.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm2.$us3..$fs2.......$mc2.$mm2.$mj2.$mn2.$ms2.$mt2.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm3...$fs5.$gm5.$gc5.$gj5.$gn5.$gs5.$gt5.$mc5.$mm5.$mj5.$mn5.$ms5.$mt5.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm3.$us2..$fs3.......$mc3.$mm3.$mj3.$mn3.$ms3.$mt3.$qs1)" { + incr matches + exp_continue + } + -re "($cl1.$nm3.$us3..$fs3.......$mc3.$mm3.$mj3.$mn3.$ms3.$mt3.$qs1)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 10} { + send_user "\nFAILURE: Clean association addition 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +# +# Use sacctmgr to list the test modification Admin Levels and Account Coordinators +# +set matches 0 +set my_pid [spawn $sacctmgr -n -p $lis $usr $fmt=$usr,$col,$ala $wco] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($us1.$nm2.$alo)" { + incr matches + exp_continue + } + -re "($us2.$nm3.$alo)" { + incr matches + exp_continue + } + -re "($us3.$nm2,$nm1,$nm3.$ala)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 3} { + send_user "\nFAILURE: Adminstrator and coordinator 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +# This is the end below here + +incr exit_code [_remove_user "" "$us1,$us2,$us3"] +incr exit_code [_remove_acct "" "$nm1,$nm2,$nm3"] +incr exit_code [_remove_cluster "$cl1"] + +# +# Delete left-over input script files +# +exec $bin_rm -f $file_in +exec $bin_rm -f $file_in2 +exec $bin_rm -f $file_in3 + +# +# Use sacctmgr to delete the test account +# +set damatches 0 +set sadel_pid [spawn $sacctmgr -i delete account $test_acct] +expect { + -re "Deleting account" { + incr damatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} +if {$damatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} else { + send_user "\nFAILURE\n" +} +exit $exit_code + diff --git a/testsuite/expect/test21.5 b/testsuite/expect/test21.5 index ce86a2716..4f83bef5e 100755 --- a/testsuite/expect/test21.5 +++ b/testsuite/expect/test21.5 @@ -94,7 +94,7 @@ if { [string compare [check_accounting_admin_level] "Administrator"] } { set nothing 0 set matches 0 -set sadel_pid [spawn $sacctmgr -i $del $clu $tc1] +set my_pid [spawn $sacctmgr -i $del $clu $tc1] expect { -re "privilege to perform this action" { diff --git a/testsuite/expect/test21.6 b/testsuite/expect/test21.6 index 5a2e4a040..b73800444 100755 --- a/testsuite/expect/test21.6 +++ b/testsuite/expect/test21.6 @@ -97,7 +97,7 @@ if { [string compare [check_accounting_admin_level] "Administrator"] } { set nothing 0 set matches 0 -set sadel_pid [spawn $sacctmgr -i $del $clu $tc1,$tc2,$tc3] +set my_pid [spawn $sacctmgr -i $del $clu $tc1,$tc2,$tc3] expect { -re "privilege to perform this action" { diff --git a/testsuite/expect/test6.13 b/testsuite/expect/test6.13 index a2945d5bf..cb3934ca3 100755 --- a/testsuite/expect/test6.13 +++ b/testsuite/expect/test6.13 @@ -155,6 +155,7 @@ if {[wait_for_file $file_out] == 0} { # exec rm -f $file_in $file_out make_bash_script $file_in " + echo BEGIN $bin_sleep 10 $bin_sleep 10 echo FINI @@ -230,6 +231,7 @@ if {[wait_for_job $job_id DONE] != 0} { # Test output file # if {[wait_for_file $file_out] == 0} { + set matches 0 spawn cat $file_out expect { -re "FINI" { @@ -237,10 +239,18 @@ if {[wait_for_file $file_out] == 0} { set exit_code 1 exp_continue } + -re "CANCELLED" { + set matches 1 + exp_continue + } eof { wait } } + if {$matches != 1} { + send_user "\nFAILURE: Missing JOB CANCELLED error message\n" + set exit_code 1 + } } else { set exit_code 1 } diff --git a/testsuite/slurm_unit/Makefile.in b/testsuite/slurm_unit/Makefile.in index e4e4fe3c7..6697112ab 100644 --- a/testsuite/slurm_unit/Makefile.in +++ b/testsuite/slurm_unit/Makefile.in @@ -182,6 +182,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/testsuite/slurm_unit/api/Makefile.in b/testsuite/slurm_unit/api/Makefile.in index 25a05ccff..d5d7b66f0 100644 --- a/testsuite/slurm_unit/api/Makefile.in +++ b/testsuite/slurm_unit/api/Makefile.in @@ -202,6 +202,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/testsuite/slurm_unit/api/manual/Makefile.in b/testsuite/slurm_unit/api/manual/Makefile.in index 9f721d2e5..a9f8c0c2d 100644 --- a/testsuite/slurm_unit/api/manual/Makefile.in +++ b/testsuite/slurm_unit/api/manual/Makefile.in @@ -224,6 +224,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/testsuite/slurm_unit/common/Makefile.in b/testsuite/slurm_unit/common/Makefile.in index b56d8fd59..e6136caff 100644 --- a/testsuite/slurm_unit/common/Makefile.in +++ b/testsuite/slurm_unit/common/Makefile.in @@ -210,6 +210,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/testsuite/slurm_unit/slurmctld/Makefile.in b/testsuite/slurm_unit/slurmctld/Makefile.in index aae4a7566..01fa98251 100644 --- a/testsuite/slurm_unit/slurmctld/Makefile.in +++ b/testsuite/slurm_unit/slurmctld/Makefile.in @@ -173,6 +173,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ diff --git a/testsuite/slurm_unit/slurmd/Makefile.in b/testsuite/slurm_unit/slurmd/Makefile.in index c78fa4e54..5231f2e21 100644 --- a/testsuite/slurm_unit/slurmd/Makefile.in +++ b/testsuite/slurm_unit/slurmd/Makefile.in @@ -173,6 +173,7 @@ SLURM_API_VERSION = @SLURM_API_VERSION@ SLURM_MAJOR = @SLURM_MAJOR@ SLURM_MICRO = @SLURM_MICRO@ SLURM_MINOR = @SLURM_MINOR@ +SLURM_PREFIX = @SLURM_PREFIX@ SLURM_VERSION = @SLURM_VERSION@ SO_LDFLAGS = @SO_LDFLAGS@ SSL_CPPFLAGS = @SSL_CPPFLAGS@ -- GitLab